Coverage Report

Created: 2026-06-12 11:13

/root/doris/cloud/src/recycler/recycler.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "recycler/recycler.h"
19
20
#include <brpc/builtin_service.pb.h>
21
#include <brpc/server.h>
22
#include <butil/endpoint.h>
23
#include <butil/strings/string_split.h>
24
#include <bvar/status.h>
25
#include <gen_cpp/cloud.pb.h>
26
#include <gen_cpp/olap_file.pb.h>
27
28
#include <algorithm>
29
#include <atomic>
30
#include <chrono>
31
#include <cstddef>
32
#include <cstdint>
33
#include <cstdlib>
34
#include <deque>
35
#include <functional>
36
#include <initializer_list>
37
#include <memory>
38
#include <numeric>
39
#include <optional>
40
#include <random>
41
#include <string>
42
#include <string_view>
43
#include <thread>
44
#include <unordered_map>
45
#include <utility>
46
#include <variant>
47
48
#include "common/defer.h"
49
#include "common/stopwatch.h"
50
#include "meta-service/meta_service.h"
51
#include "meta-service/meta_service_helper.h"
52
#include "meta-service/meta_service_schema.h"
53
#include "meta-store/blob_message.h"
54
#include "meta-store/meta_reader.h"
55
#include "meta-store/txn_kv.h"
56
#include "meta-store/txn_kv_error.h"
57
#include "meta-store/versioned_value.h"
58
#include "recycler/checker.h"
59
#ifdef ENABLE_HDFS_STORAGE_VAULT
60
#include "recycler/hdfs_accessor.h"
61
#endif
62
#include "recycler/s3_accessor.h"
63
#include "recycler/storage_vault_accessor.h"
64
#ifdef UNIT_TEST
65
#include "../test/mock_accessor.h"
66
#endif
67
#include "common/bvars.h"
68
#include "common/config.h"
69
#include "common/encryption_util.h"
70
#include "common/logging.h"
71
#include "common/simple_thread_pool.h"
72
#include "common/util.h"
73
#include "cpp/sync_point.h"
74
#include "meta-store/codec.h"
75
#include "meta-store/document_message.h"
76
#include "meta-store/keys.h"
77
#include "recycler/recycler_service.h"
78
#include "recycler/sync_executor.h"
79
#include "recycler/util.h"
80
#include "snapshot/snapshot_manager_factory.h"
81
82
namespace doris::cloud {
83
84
using namespace std::chrono;
85
86
namespace {
87
88
0
int64_t packed_file_retry_sleep_ms() {
89
0
    const int64_t min_ms = std::max<int64_t>(0, config::packed_file_txn_retry_sleep_min_ms);
90
0
    const int64_t max_ms = std::max<int64_t>(min_ms, config::packed_file_txn_retry_sleep_max_ms);
91
0
    thread_local std::mt19937_64 gen(std::random_device {}());
92
0
    std::uniform_int_distribution<int64_t> dist(min_ms, max_ms);
93
0
    return dist(gen);
94
0
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12_GLOBAL__N_126packed_file_retry_sleep_msEv
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloud12_GLOBAL__N_126packed_file_retry_sleep_msEv
95
96
0
void sleep_for_packed_file_retry() {
97
0
    std::this_thread::sleep_for(std::chrono::milliseconds(packed_file_retry_sleep_ms()));
98
0
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12_GLOBAL__N_127sleep_for_packed_file_retryEv
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloud12_GLOBAL__N_127sleep_for_packed_file_retryEv
99
100
869k
bool is_packed_slice_path(const doris::RowsetMetaCloudPB& rowset, const std::string& path) {
101
869k
    const auto& locations = rowset.packed_slice_locations();
102
869k
    auto it = locations.find(path);
103
869k
    return it != locations.end() && it->second.has_packed_file_path() &&
104
869k
           !it->second.packed_file_path().empty();
105
869k
}
recycler.cpp:_ZN5doris5cloud12_GLOBAL__N_120is_packed_slice_pathERKNS_17RowsetMetaCloudPBERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
100
7
bool is_packed_slice_path(const doris::RowsetMetaCloudPB& rowset, const std::string& path) {
101
7
    const auto& locations = rowset.packed_slice_locations();
102
7
    auto it = locations.find(path);
103
7
    return it != locations.end() && it->second.has_packed_file_path() &&
104
7
           !it->second.packed_file_path().empty();
105
7
}
recycler_test.cpp:_ZN5doris5cloud12_GLOBAL__N_120is_packed_slice_pathERKNS_17RowsetMetaCloudPBERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
100
869k
bool is_packed_slice_path(const doris::RowsetMetaCloudPB& rowset, const std::string& path) {
101
869k
    const auto& locations = rowset.packed_slice_locations();
102
869k
    auto it = locations.find(path);
103
869k
    return it != locations.end() && it->second.has_packed_file_path() &&
104
869k
           !it->second.packed_file_path().empty();
105
869k
}
106
107
void add_file_to_delete_if_not_packed(const doris::RowsetMetaCloudPB& rowset,
108
                                      const std::string& path,
109
868k
                                      std::vector<std::string>* file_paths) {
110
869k
    if (!is_packed_slice_path(rowset, path)) {
111
869k
        file_paths->push_back(path);
112
869k
    }
113
868k
}
recycler.cpp:_ZN5doris5cloud12_GLOBAL__N_132add_file_to_delete_if_not_packedERKNS_17RowsetMetaCloudPBERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPSt6vectorISA_SaISA_EE
Line
Count
Source
109
7
                                      std::vector<std::string>* file_paths) {
110
7
    if (!is_packed_slice_path(rowset, path)) {
111
7
        file_paths->push_back(path);
112
7
    }
113
7
}
recycler_test.cpp:_ZN5doris5cloud12_GLOBAL__N_132add_file_to_delete_if_not_packedERKNS_17RowsetMetaCloudPBERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPSt6vectorISA_SaISA_EE
Line
Count
Source
109
868k
                                      std::vector<std::string>* file_paths) {
110
869k
    if (!is_packed_slice_path(rowset, path)) {
111
869k
        file_paths->push_back(path);
112
869k
    }
113
868k
}
114
115
37
bool filter_out_instance(const std::string& instance_id) {
116
37
    if (config::recycle_whitelist.empty()) {
117
35
        return std::ranges::find(config::recycle_blacklist, instance_id) !=
118
35
               config::recycle_blacklist.end();
119
35
    }
120
2
    return std::ranges::find(config::recycle_whitelist, instance_id) ==
121
2
           config::recycle_whitelist.end();
122
37
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12_GLOBAL__N_119filter_out_instanceERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
recycler_test.cpp:_ZN5doris5cloud12_GLOBAL__N_119filter_out_instanceERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
115
37
bool filter_out_instance(const std::string& instance_id) {
116
37
    if (config::recycle_whitelist.empty()) {
117
35
        return std::ranges::find(config::recycle_blacklist, instance_id) !=
118
35
               config::recycle_blacklist.end();
119
35
    }
120
2
    return std::ranges::find(config::recycle_whitelist, instance_id) ==
121
2
           config::recycle_whitelist.end();
122
37
}
123
124
} // namespace
125
126
// return 0 for success get a key, 1 for key not found, negative for error
127
0
[[maybe_unused]] static int txn_get(TxnKv* txn_kv, std::string_view key, std::string& val) {
128
0
    std::unique_ptr<Transaction> txn;
129
0
    TxnErrorCode err = txn_kv->create_txn(&txn);
130
0
    if (err != TxnErrorCode::TXN_OK) {
131
0
        return -1;
132
0
    }
133
0
    switch (txn->get(key, &val, true)) {
134
0
    case TxnErrorCode::TXN_OK:
135
0
        return 0;
136
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
137
0
        return 1;
138
0
    default:
139
0
        return -1;
140
0
    };
141
0
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
142
143
// 0 for success, negative for error
144
static int txn_get(TxnKv* txn_kv, std::string_view begin, std::string_view end,
145
396
                   std::unique_ptr<RangeGetIterator>& it) {
146
396
    std::unique_ptr<Transaction> txn;
147
396
    TxnErrorCode err = txn_kv->create_txn(&txn);
148
396
    if (err != TxnErrorCode::TXN_OK) {
149
0
        return -1;
150
0
    }
151
396
    switch (txn->get(begin, end, &it, true)) {
152
396
    case TxnErrorCode::TXN_OK:
153
396
        return 0;
154
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
155
0
        return 1;
156
0
    default:
157
0
        return -1;
158
396
    };
159
0
}
recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
145
31
                   std::unique_ptr<RangeGetIterator>& it) {
146
31
    std::unique_ptr<Transaction> txn;
147
31
    TxnErrorCode err = txn_kv->create_txn(&txn);
148
31
    if (err != TxnErrorCode::TXN_OK) {
149
0
        return -1;
150
0
    }
151
31
    switch (txn->get(begin, end, &it, true)) {
152
31
    case TxnErrorCode::TXN_OK:
153
31
        return 0;
154
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
155
0
        return 1;
156
0
    default:
157
0
        return -1;
158
31
    };
159
0
}
recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
145
365
                   std::unique_ptr<RangeGetIterator>& it) {
146
365
    std::unique_ptr<Transaction> txn;
147
365
    TxnErrorCode err = txn_kv->create_txn(&txn);
148
365
    if (err != TxnErrorCode::TXN_OK) {
149
0
        return -1;
150
0
    }
151
365
    switch (txn->get(begin, end, &it, true)) {
152
365
    case TxnErrorCode::TXN_OK:
153
365
        return 0;
154
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
155
0
        return 1;
156
0
    default:
157
0
        return -1;
158
365
    };
159
0
}
160
161
// return 0 for success otherwise error
162
6
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
163
6
    std::unique_ptr<Transaction> txn;
164
6
    TxnErrorCode err = txn_kv->create_txn(&txn);
165
6
    if (err != TxnErrorCode::TXN_OK) {
166
0
        return -1;
167
0
    }
168
10
    for (auto k : keys) {
169
10
        txn->remove(k);
170
10
    }
171
6
    switch (txn->commit()) {
172
6
    case TxnErrorCode::TXN_OK:
173
6
        return 0;
174
0
    case TxnErrorCode::TXN_CONFLICT:
175
0
        return -1;
176
0
    default:
177
0
        return -1;
178
6
    }
179
6
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
162
1
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
163
1
    std::unique_ptr<Transaction> txn;
164
1
    TxnErrorCode err = txn_kv->create_txn(&txn);
165
1
    if (err != TxnErrorCode::TXN_OK) {
166
0
        return -1;
167
0
    }
168
1
    for (auto k : keys) {
169
1
        txn->remove(k);
170
1
    }
171
1
    switch (txn->commit()) {
172
1
    case TxnErrorCode::TXN_OK:
173
1
        return 0;
174
0
    case TxnErrorCode::TXN_CONFLICT:
175
0
        return -1;
176
0
    default:
177
0
        return -1;
178
1
    }
179
1
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
162
5
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
163
5
    std::unique_ptr<Transaction> txn;
164
5
    TxnErrorCode err = txn_kv->create_txn(&txn);
165
5
    if (err != TxnErrorCode::TXN_OK) {
166
0
        return -1;
167
0
    }
168
9
    for (auto k : keys) {
169
9
        txn->remove(k);
170
9
    }
171
5
    switch (txn->commit()) {
172
5
    case TxnErrorCode::TXN_OK:
173
5
        return 0;
174
0
    case TxnErrorCode::TXN_CONFLICT:
175
0
        return -1;
176
0
    default:
177
0
        return -1;
178
5
    }
179
5
}
180
181
// return 0 for success otherwise error
182
102
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
183
102
    std::unique_ptr<Transaction> txn;
184
102
    TxnErrorCode err = txn_kv->create_txn(&txn);
185
102
    if (err != TxnErrorCode::TXN_OK) {
186
0
        return -1;
187
0
    }
188
106k
    for (auto& k : keys) {
189
106k
        txn->remove(k);
190
106k
    }
191
102
    switch (txn->commit()) {
192
102
    case TxnErrorCode::TXN_OK:
193
102
        return 0;
194
0
    case TxnErrorCode::TXN_CONFLICT:
195
0
        return -1;
196
0
    default:
197
0
        return -1;
198
102
    }
199
102
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
182
26
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
183
26
    std::unique_ptr<Transaction> txn;
184
26
    TxnErrorCode err = txn_kv->create_txn(&txn);
185
26
    if (err != TxnErrorCode::TXN_OK) {
186
0
        return -1;
187
0
    }
188
26
    for (auto& k : keys) {
189
16
        txn->remove(k);
190
16
    }
191
26
    switch (txn->commit()) {
192
26
    case TxnErrorCode::TXN_OK:
193
26
        return 0;
194
0
    case TxnErrorCode::TXN_CONFLICT:
195
0
        return -1;
196
0
    default:
197
0
        return -1;
198
26
    }
199
26
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
182
76
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
183
76
    std::unique_ptr<Transaction> txn;
184
76
    TxnErrorCode err = txn_kv->create_txn(&txn);
185
76
    if (err != TxnErrorCode::TXN_OK) {
186
0
        return -1;
187
0
    }
188
106k
    for (auto& k : keys) {
189
106k
        txn->remove(k);
190
106k
    }
191
76
    switch (txn->commit()) {
192
76
    case TxnErrorCode::TXN_OK:
193
76
        return 0;
194
0
    case TxnErrorCode::TXN_CONFLICT:
195
0
        return -1;
196
0
    default:
197
0
        return -1;
198
76
    }
199
76
}
200
201
// return 0 for success otherwise error
202
[[maybe_unused]] static int txn_remove(TxnKv* txn_kv, std::string_view begin,
203
106k
                                       std::string_view end) {
204
106k
    std::unique_ptr<Transaction> txn;
205
106k
    TxnErrorCode err = txn_kv->create_txn(&txn);
206
106k
    if (err != TxnErrorCode::TXN_OK) {
207
0
        return -1;
208
0
    }
209
106k
    txn->remove(begin, end);
210
106k
    switch (txn->commit()) {
211
106k
    case TxnErrorCode::TXN_OK:
212
106k
        return 0;
213
0
    case TxnErrorCode::TXN_CONFLICT:
214
0
        return -1;
215
0
    default:
216
0
        return -1;
217
106k
    }
218
106k
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
203
16
                                       std::string_view end) {
204
16
    std::unique_ptr<Transaction> txn;
205
16
    TxnErrorCode err = txn_kv->create_txn(&txn);
206
16
    if (err != TxnErrorCode::TXN_OK) {
207
0
        return -1;
208
0
    }
209
16
    txn->remove(begin, end);
210
16
    switch (txn->commit()) {
211
16
    case TxnErrorCode::TXN_OK:
212
16
        return 0;
213
0
    case TxnErrorCode::TXN_CONFLICT:
214
0
        return -1;
215
0
    default:
216
0
        return -1;
217
16
    }
218
16
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
203
106k
                                       std::string_view end) {
204
106k
    std::unique_ptr<Transaction> txn;
205
106k
    TxnErrorCode err = txn_kv->create_txn(&txn);
206
106k
    if (err != TxnErrorCode::TXN_OK) {
207
0
        return -1;
208
0
    }
209
106k
    txn->remove(begin, end);
210
106k
    switch (txn->commit()) {
211
106k
    case TxnErrorCode::TXN_OK:
212
106k
        return 0;
213
0
    case TxnErrorCode::TXN_CONFLICT:
214
0
        return -1;
215
0
    default:
216
0
        return -1;
217
106k
    }
218
106k
}
219
220
void scan_restore_job_rowset(
221
        Transaction* txn, const std::string& instance_id, int64_t tablet_id, MetaServiceCode& code,
222
        std::string& msg,
223
        std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>>* restore_job_rs_metas);
224
225
static inline void check_recycle_task(const std::string& instance_id, const std::string& task_name,
226
                                      int64_t num_scanned, int64_t num_recycled,
227
51
                                      int64_t start_time) {
228
51
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
229
0
        int64_t cost =
230
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
231
0
        if (cost > config::recycle_task_threshold_seconds) {
232
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
233
0
                    .tag("instance_id", instance_id)
234
0
                    .tag("task", task_name)
235
0
                    .tag("num_scanned", num_scanned)
236
0
                    .tag("num_recycled", num_recycled);
237
0
        }
238
0
    }
239
51
    return;
240
51
}
recycler.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
227
2
                                      int64_t start_time) {
228
2
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
229
0
        int64_t cost =
230
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
231
0
        if (cost > config::recycle_task_threshold_seconds) {
232
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
233
0
                    .tag("instance_id", instance_id)
234
0
                    .tag("task", task_name)
235
0
                    .tag("num_scanned", num_scanned)
236
0
                    .tag("num_recycled", num_recycled);
237
0
        }
238
0
    }
239
2
    return;
240
2
}
recycler_test.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
227
49
                                      int64_t start_time) {
228
49
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
229
0
        int64_t cost =
230
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
231
0
        if (cost > config::recycle_task_threshold_seconds) {
232
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
233
0
                    .tag("instance_id", instance_id)
234
0
                    .tag("task", task_name)
235
0
                    .tag("num_scanned", num_scanned)
236
0
                    .tag("num_recycled", num_recycled);
237
0
        }
238
0
    }
239
49
    return;
240
49
}
241
242
6
Recycler::Recycler(std::shared_ptr<TxnKv> txn_kv) : txn_kv_(std::move(txn_kv)) {
243
6
    ip_port_ = std::string(butil::my_ip_cstr()) + ":" + std::to_string(config::brpc_listen_port);
244
245
6
    auto s3_producer_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
246
6
                                                               "s3_producer_pool");
247
6
    s3_producer_pool->start();
248
6
    auto recycle_tablet_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
249
6
                                                                  "recycle_tablet_pool");
250
6
    recycle_tablet_pool->start();
251
6
    auto group_recycle_function_pool = std::make_shared<SimpleThreadPool>(
252
6
            config::recycle_pool_parallelism, "group_recycle_function_pool");
253
6
    group_recycle_function_pool->start();
254
6
    _thread_pool_group =
255
6
            RecyclerThreadPoolGroup(std::move(s3_producer_pool), std::move(recycle_tablet_pool),
256
6
                                    std::move(group_recycle_function_pool));
257
258
6
    auto resource_mgr = std::make_shared<ResourceManager>(txn_kv_);
259
6
    txn_lazy_committer_ = std::make_shared<TxnLazyCommitter>(txn_kv_, std::move(resource_mgr));
260
6
    snapshot_manager_ = create_snapshot_manager(txn_kv_);
261
6
}
262
263
6
Recycler::~Recycler() {
264
6
    if (!stopped()) {
265
0
        stop();
266
0
    }
267
6
}
268
269
5
void Recycler::instance_scanner_callback() {
270
    // sleep 60 seconds before scheduling for the launch procedure to complete:
271
    // some bad hdfs connection may cause some log to stdout stderr
272
    // which may pollute .out file and affect the script to check success
273
5
    std::this_thread::sleep_for(
274
5
            std::chrono::seconds(config::recycler_sleep_before_scheduling_seconds));
275
1.25k
    while (!stopped()) {
276
1.24k
        if (config::enable_recycler) {
277
3
            std::vector<InstanceInfoPB> instances;
278
3
            get_all_instances(txn_kv_.get(), instances);
279
            // TODO(plat1ko): delete job recycle kv of non-existent instances
280
3
            LOG(INFO) << "Recycler get instances: " << [&instances] {
281
3
                std::stringstream ss;
282
30
                for (auto& i : instances) ss << ' ' << i.instance_id();
283
3
                return ss.str();
284
3
            }();
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
Line
Count
Source
280
3
            LOG(INFO) << "Recycler get instances: " << [&instances] {
281
3
                std::stringstream ss;
282
30
                for (auto& i : instances) ss << ' ' << i.instance_id();
283
3
                return ss.str();
284
3
            }();
285
3
            if (!instances.empty()) {
286
                // enqueue instances
287
3
                std::lock_guard lock(mtx_);
288
30
                for (auto& instance : instances) {
289
30
                    if (filter_out_instance(instance.instance_id())) continue;
290
30
                    auto [_, success] = pending_instance_set_.insert(instance.instance_id());
291
                    // skip instance already in pending queue
292
30
                    if (success) {
293
30
                        pending_instance_queue_.push_back(std::move(instance));
294
30
                    }
295
30
                }
296
3
                pending_instance_cond_.notify_all();
297
3
            }
298
1.24k
        } else {
299
1.24k
            LOG(WARNING) << "Skip recycler since enable_recycler is false";
300
1.24k
        }
301
1.24k
        {
302
1.24k
            std::unique_lock lock(mtx_);
303
1.24k
            notifier_.wait_for(lock, std::chrono::seconds(config::recycle_interval_seconds),
304
2.49k
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
Line
Count
Source
304
2.49k
                               [&]() { return stopped(); });
305
1.24k
        }
306
1.24k
    }
307
5
}
308
309
9
void Recycler::recycle_callback() {
310
40
    while (!stopped()) {
311
38
        InstanceInfoPB instance;
312
38
        {
313
38
            std::unique_lock lock(mtx_);
314
38
            pending_instance_cond_.wait(
315
51
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
Line
Count
Source
315
51
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
316
38
            if (stopped()) {
317
7
                return;
318
7
            }
319
31
            instance = std::move(pending_instance_queue_.front());
320
31
            pending_instance_queue_.pop_front();
321
31
            pending_instance_set_.erase(instance.instance_id());
322
31
        }
323
0
        auto& instance_id = instance.instance_id();
324
31
        {
325
31
            std::lock_guard lock(mtx_);
326
            // skip instance in recycling
327
31
            if (recycling_instance_map_.count(instance_id)) continue;
328
31
        }
329
31
        if (!config::enable_recycler) {
330
1
            LOG(WARNING) << "Skip recycle instance_id=" << instance_id
331
1
                         << " since enable_recycler is false";
332
1
            continue;
333
1
        }
334
30
        auto instance_recycler = std::make_shared<InstanceRecycler>(
335
30
                txn_kv_, instance, _thread_pool_group, txn_lazy_committer_);
336
337
30
        if (int r = instance_recycler->init(); r != 0) {
338
0
            LOG(WARNING) << "failed to init instance recycler, instance_id=" << instance_id
339
0
                         << " ret=" << r;
340
0
            continue;
341
0
        }
342
30
        std::string recycle_job_key;
343
30
        job_recycle_key({instance_id}, &recycle_job_key);
344
30
        int ret = prepare_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id,
345
30
                                               ip_port_, config::recycle_interval_seconds * 1000);
346
30
        if (ret != 0) { // Prepare failed
347
20
            LOG(WARNING) << "failed to prepare recycle_job, instance_id=" << instance_id
348
20
                         << " ret=" << ret;
349
20
            continue;
350
20
        } else {
351
10
            std::lock_guard lock(mtx_);
352
10
            recycling_instance_map_.emplace(instance_id, instance_recycler);
353
10
        }
354
10
        if (stopped()) return;
355
10
        LOG_WARNING("begin to recycle instance").tag("instance_id", instance_id);
356
10
        auto ctime_ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
357
10
        g_bvar_recycler_instance_recycle_start_ts.put({instance_id}, ctime_ms);
358
10
        g_bvar_recycler_instance_recycle_task_status.put({"submitted"}, 1);
359
10
        ret = instance_recycler->do_recycle();
360
        // If instance recycler has been aborted, don't finish this job
361
362
10
        if (!instance_recycler->stopped()) {
363
10
            finish_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id, ip_port_,
364
10
                                        ret == 0, ctime_ms);
365
10
        }
366
10
        if (instance_recycler->stopped() || ret != 0) {
367
0
            g_bvar_recycler_instance_recycle_task_status.put({"error"}, 1);
368
0
        }
369
10
        {
370
10
            std::lock_guard lock(mtx_);
371
10
            recycling_instance_map_.erase(instance_id);
372
10
        }
373
374
10
        auto now = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
375
10
        auto elpased_ms = now - ctime_ms;
376
10
        g_bvar_recycler_instance_recycle_end_ts.put({instance_id}, now);
377
10
        g_bvar_recycler_instance_last_round_recycle_duration.put({instance_id}, elpased_ms);
378
10
        g_bvar_recycler_instance_next_ts.put({instance_id},
379
10
                                             now + config::recycle_interval_seconds * 1000);
380
10
        g_bvar_recycler_instance_recycle_task_status.put({"completed"}, 1);
381
10
        LOG(INFO) << "recycle instance done, "
382
10
                  << "instance_id=" << instance_id << " ret=" << ret << " ctime_ms: " << ctime_ms
383
10
                  << " now: " << now;
384
385
10
        g_bvar_recycler_instance_recycle_last_success_ts.put({instance_id}, now);
386
387
10
        LOG_WARNING("finish recycle instance")
388
10
                .tag("instance_id", instance_id)
389
10
                .tag("cost_ms", elpased_ms);
390
10
    }
391
9
}
392
393
4
void Recycler::lease_recycle_jobs() {
394
54
    while (!stopped()) {
395
50
        std::vector<std::string> instances;
396
50
        instances.reserve(recycling_instance_map_.size());
397
50
        {
398
50
            std::lock_guard lock(mtx_);
399
50
            for (auto& [id, _] : recycling_instance_map_) {
400
30
                instances.push_back(id);
401
30
            }
402
50
        }
403
50
        for (auto& i : instances) {
404
30
            std::string recycle_job_key;
405
30
            job_recycle_key({i}, &recycle_job_key);
406
30
            int ret = lease_instance_recycle_job(txn_kv_.get(), recycle_job_key, i, ip_port_);
407
30
            if (ret == 1) {
408
0
                std::lock_guard lock(mtx_);
409
0
                if (auto it = recycling_instance_map_.find(i);
410
0
                    it != recycling_instance_map_.end()) {
411
0
                    it->second->stop();
412
0
                }
413
0
            }
414
30
        }
415
50
        {
416
50
            std::unique_lock lock(mtx_);
417
50
            notifier_.wait_for(lock,
418
50
                               std::chrono::milliseconds(config::recycle_job_lease_expired_ms / 3),
419
100
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
Line
Count
Source
419
100
                               [&]() { return stopped(); });
420
50
        }
421
50
    }
422
4
}
423
424
4
void Recycler::check_recycle_tasks() {
425
7
    while (!stopped()) {
426
3
        std::unordered_map<std::string, std::shared_ptr<InstanceRecycler>> recycling_instance_map;
427
3
        {
428
3
            std::lock_guard lock(mtx_);
429
3
            recycling_instance_map = recycling_instance_map_;
430
3
        }
431
3
        for (auto& entry : recycling_instance_map) {
432
0
            entry.second->check_recycle_tasks();
433
0
        }
434
435
3
        std::unique_lock lock(mtx_);
436
3
        notifier_.wait_for(lock, std::chrono::seconds(config::check_recycle_task_interval_seconds),
437
6
                           [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
Line
Count
Source
437
6
                           [&]() { return stopped(); });
438
3
    }
439
4
}
440
441
4
int Recycler::start(brpc::Server* server) {
442
4
    g_bvar_recycler_task_max_concurrency.set_value(config::recycle_concurrency);
443
4
    S3Environment::getInstance();
444
445
4
    if (config::enable_checker) {
446
0
        checker_ = std::make_unique<Checker>(txn_kv_);
447
0
        int ret = checker_->start();
448
0
        std::string msg;
449
0
        if (ret != 0) {
450
0
            msg = "failed to start checker";
451
0
            LOG(ERROR) << msg;
452
0
            std::cerr << msg << std::endl;
453
0
            return ret;
454
0
        }
455
0
        msg = "checker started";
456
0
        LOG(INFO) << msg;
457
0
        std::cout << msg << std::endl;
458
0
    }
459
460
4
    if (server) {
461
        // Add service
462
1
        auto recycler_service =
463
1
                new RecyclerServiceImpl(txn_kv_, this, checker_.get(), txn_lazy_committer_);
464
1
        server->AddService(recycler_service, brpc::SERVER_OWNS_SERVICE);
465
1
    }
466
467
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
Line
Count
Source
467
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
468
12
    for (int i = 0; i < config::recycle_concurrency; ++i) {
469
8
        workers_.emplace_back([this] { recycle_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
Line
Count
Source
469
8
        workers_.emplace_back([this] { recycle_callback(); });
470
8
    }
471
472
4
    workers_.emplace_back(std::mem_fn(&Recycler::lease_recycle_jobs), this);
473
4
    workers_.emplace_back(std::mem_fn(&Recycler::check_recycle_tasks), this);
474
475
4
    if (config::enable_snapshot_data_migrator) {
476
0
        snapshot_data_migrator_ = std::make_shared<SnapshotDataMigrator>(txn_kv_);
477
0
        int ret = snapshot_data_migrator_->start();
478
0
        if (ret != 0) {
479
0
            LOG(ERROR) << "failed to start snapshot data migrator";
480
0
            return ret;
481
0
        }
482
0
        LOG(INFO) << "snapshot data migrator started";
483
0
    }
484
485
4
    if (config::enable_snapshot_chain_compactor) {
486
0
        snapshot_chain_compactor_ = std::make_shared<SnapshotChainCompactor>(txn_kv_);
487
0
        int ret = snapshot_chain_compactor_->start();
488
0
        if (ret != 0) {
489
0
            LOG(ERROR) << "failed to start snapshot chain compactor";
490
0
            return ret;
491
0
        }
492
0
        LOG(INFO) << "snapshot chain compactor started";
493
0
    }
494
495
4
    return 0;
496
4
}
497
498
4
void Recycler::stop() {
499
4
    stopped_ = true;
500
4
    notifier_.notify_all();
501
4
    pending_instance_cond_.notify_all();
502
4
    {
503
4
        std::lock_guard lock(mtx_);
504
4
        for (auto& [_, recycler] : recycling_instance_map_) {
505
0
            recycler->stop();
506
0
        }
507
4
    }
508
20
    for (auto& w : workers_) {
509
20
        if (w.joinable()) w.join();
510
20
    }
511
4
    if (checker_) {
512
0
        checker_->stop();
513
0
    }
514
4
    if (snapshot_data_migrator_) {
515
0
        snapshot_data_migrator_->stop();
516
0
    }
517
4
    if (snapshot_chain_compactor_) {
518
0
        snapshot_chain_compactor_->stop();
519
0
    }
520
4
}
521
522
class InstanceRecycler::InvertedIndexIdCache {
523
public:
524
    InvertedIndexIdCache(std::string instance_id, std::shared_ptr<TxnKv> txn_kv)
525
140
            : instance_id_(std::move(instance_id)), txn_kv_(std::move(txn_kv)) {}
526
527
    // Return 0 if success, 1 if schema kv not found, negative for error
528
    // For the same index_id, schema_version, res, since `get` is not completely atomic
529
    // one thread has not finished inserting, and another thread has not get the index_id and schema_version,
530
    // resulting in repeated addition and inaccuracy.
531
    // however, this approach can reduce the lock range and sacrifice a bit of meta repeated get to improve concurrency performance.
532
    // repeated addition does not affect correctness.
533
28.4k
    int get(int64_t index_id, int32_t schema_version, InvertedIndexInfo& res) {
534
28.4k
        {
535
28.4k
            std::lock_guard lock(mtx_);
536
28.4k
            if (schemas_without_inverted_index_.count({index_id, schema_version})) {
537
3.81k
                return 0;
538
3.81k
            }
539
24.6k
            if (auto it = inverted_index_id_map_.find({index_id, schema_version});
540
24.6k
                it != inverted_index_id_map_.end()) {
541
17.9k
                res = it->second;
542
17.9k
                return 0;
543
17.9k
            }
544
24.6k
        }
545
        // Get schema from kv
546
        // TODO(plat1ko): Single flight
547
6.67k
        std::unique_ptr<Transaction> txn;
548
6.67k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
549
6.67k
        if (err != TxnErrorCode::TXN_OK) {
550
0
            LOG(WARNING) << "failed to create txn, err=" << err;
551
0
            return -1;
552
0
        }
553
6.67k
        auto schema_key = meta_schema_key({instance_id_, index_id, schema_version});
554
6.67k
        ValueBuf val_buf;
555
6.67k
        err = cloud::blob_get(txn.get(), schema_key, &val_buf);
556
6.67k
        if (err != TxnErrorCode::TXN_OK) {
557
504
            LOG(WARNING) << "failed to get schema, err=" << err;
558
504
            return static_cast<int>(err);
559
504
        }
560
6.17k
        doris::TabletSchemaCloudPB schema;
561
6.17k
        if (!parse_schema_value(val_buf, &schema)) {
562
0
            LOG(WARNING) << "malformed schema value, key=" << hex(schema_key);
563
0
            return -1;
564
0
        }
565
6.17k
        if (schema.index_size() > 0) {
566
4.24k
            InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
567
4.24k
            if (schema.has_inverted_index_storage_format()) {
568
4.24k
                index_format = schema.inverted_index_storage_format();
569
4.24k
            }
570
4.24k
            res.first = index_format;
571
4.24k
            res.second.reserve(schema.index_size());
572
10.7k
            for (auto& i : schema.index()) {
573
10.7k
                if (i.has_index_type() && i.index_type() == IndexType::INVERTED) {
574
10.7k
                    res.second.push_back(std::make_pair(i.index_id(), i.index_suffix_name()));
575
10.7k
                }
576
10.7k
            }
577
4.24k
        }
578
6.17k
        insert(index_id, schema_version, res);
579
6.17k
        return 0;
580
6.17k
    }
581
582
    // Empty `ids` means this schema has no inverted index
583
6.17k
    void insert(int64_t index_id, int32_t schema_version, const InvertedIndexInfo& index_info) {
584
6.17k
        if (index_info.second.empty()) {
585
1.92k
            TEST_SYNC_POINT("InvertedIndexIdCache::insert1");
586
1.92k
            std::lock_guard lock(mtx_);
587
1.92k
            schemas_without_inverted_index_.emplace(index_id, schema_version);
588
4.24k
        } else {
589
4.24k
            TEST_SYNC_POINT("InvertedIndexIdCache::insert2");
590
4.24k
            std::lock_guard lock(mtx_);
591
4.24k
            inverted_index_id_map_.try_emplace({index_id, schema_version}, index_info);
592
4.24k
        }
593
6.17k
    }
594
595
private:
596
    std::string instance_id_;
597
    std::shared_ptr<TxnKv> txn_kv_;
598
599
    std::mutex mtx_;
600
    using Key = std::pair<int64_t, int32_t>; // <index_id, schema_version>
601
    struct HashOfKey {
602
59.2k
        size_t operator()(const Key& key) const {
603
59.2k
            size_t seed = 0;
604
59.2k
            seed = std::hash<int64_t> {}(key.first);
605
59.2k
            seed = std::hash<int32_t> {}(key.second);
606
59.2k
            return seed;
607
59.2k
        }
608
    };
609
    // <index_id, schema_version> -> inverted_index_ids
610
    std::unordered_map<Key, InvertedIndexInfo, HashOfKey> inverted_index_id_map_;
611
    // Store <index_id, schema_version> of schema which doesn't have inverted index
612
    std::unordered_set<Key, HashOfKey> schemas_without_inverted_index_;
613
};
614
615
InstanceRecycler::InstanceRecycler(std::shared_ptr<TxnKv> txn_kv, const InstanceInfoPB& instance,
616
                                   RecyclerThreadPoolGroup thread_pool_group,
617
                                   std::shared_ptr<TxnLazyCommitter> txn_lazy_committer)
618
        : txn_kv_(std::move(txn_kv)),
619
          instance_id_(instance.instance_id()),
620
          instance_info_(instance),
621
          inverted_index_id_cache_(std::make_unique<InvertedIndexIdCache>(instance_id_, txn_kv_)),
622
          _thread_pool_group(std::move(thread_pool_group)),
623
          txn_lazy_committer_(std::move(txn_lazy_committer)),
624
          delete_bitmap_lock_white_list_(std::make_shared<DeleteBitmapLockWhiteList>()),
625
140
          resource_mgr_(std::make_shared<ResourceManager>(txn_kv_)) {
626
140
    delete_bitmap_lock_white_list_->init();
627
140
    resource_mgr_->init();
628
629
140
    snapshot_manager_ = create_snapshot_manager(txn_kv_);
630
631
    // Since the recycler's resource manager could not be notified when instance info changes,
632
    // we need to refresh the instance info here to ensure the resource manager has the latest info.
633
140
    txn_lazy_committer_->resource_manager()->refresh_instance(instance_id_, instance);
634
140
};
635
636
140
InstanceRecycler::~InstanceRecycler() = default;
637
638
124
int InstanceRecycler::init_obj_store_accessors() {
639
124
    for (const auto& obj_info : instance_info_.obj_info()) {
640
84
#ifdef UNIT_TEST
641
84
        auto accessor = std::make_shared<MockAccessor>();
642
#else
643
        auto s3_conf = S3Conf::from_obj_store_info(obj_info);
644
        if (!s3_conf) {
645
            LOG(WARNING) << "failed to init object accessor, instance_id=" << instance_id_;
646
            return -1;
647
        }
648
649
        std::shared_ptr<S3Accessor> accessor;
650
        int ret = S3Accessor::create(std::move(*s3_conf), &accessor);
651
        if (ret != 0) {
652
            LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
653
                         << " resource_id=" << obj_info.id();
654
            return ret;
655
        }
656
#endif
657
84
        accessor_map_.emplace(obj_info.id(), std::move(accessor));
658
84
    }
659
660
124
    return 0;
661
124
}
662
663
124
int InstanceRecycler::init_storage_vault_accessors() {
664
124
    if (instance_info_.resource_ids().empty()) {
665
117
        return 0;
666
117
    }
667
668
7
    FullRangeGetOptions opts(txn_kv_);
669
7
    opts.prefetch = true;
670
7
    auto it = txn_kv_->full_range_get(storage_vault_key({instance_id_, ""}),
671
7
                                      storage_vault_key({instance_id_, "\xff"}), std::move(opts));
672
673
25
    for (auto kv = it->next(); kv.has_value(); kv = it->next()) {
674
18
        auto [k, v] = *kv;
675
18
        StorageVaultPB vault;
676
18
        if (!vault.ParseFromArray(v.data(), v.size())) {
677
0
            LOG(WARNING) << "malformed storage vault, unable to deserialize key=" << hex(k);
678
0
            return -1;
679
0
        }
680
18
        std::string recycler_storage_vault_white_list = accumulate(
681
18
                config::recycler_storage_vault_white_list.begin(),
682
18
                config::recycler_storage_vault_white_list.end(), std::string(),
683
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
Line
Count
Source
683
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
684
18
        LOG_INFO("config::recycler_storage_vault_white_list")
685
18
                .tag("", recycler_storage_vault_white_list);
686
18
        if (!config::recycler_storage_vault_white_list.empty()) {
687
8
            if (auto it = std::find(config::recycler_storage_vault_white_list.begin(),
688
8
                                    config::recycler_storage_vault_white_list.end(), vault.name());
689
8
                it == config::recycler_storage_vault_white_list.end()) {
690
2
                LOG_WARNING(
691
2
                        "failed to init accessor for vault because this vault is not in "
692
2
                        "config::recycler_storage_vault_white_list. ")
693
2
                        .tag(" vault name:", vault.name())
694
2
                        .tag(" config::recycler_storage_vault_white_list:",
695
2
                             recycler_storage_vault_white_list);
696
2
                continue;
697
2
            }
698
8
        }
699
16
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault",
700
16
                                 &accessor_map_, &vault);
701
16
        if (vault.has_hdfs_info()) {
702
9
#ifdef ENABLE_HDFS_STORAGE_VAULT
703
9
            auto accessor = std::make_shared<HdfsAccessor>(vault.hdfs_info());
704
9
            int ret = accessor->init();
705
9
            if (ret != 0) {
706
4
                LOG(WARNING) << "failed to init hdfs accessor. instance_id=" << instance_id_
707
4
                             << " resource_id=" << vault.id() << " name=" << vault.name()
708
4
                             << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
709
4
                continue;
710
4
            }
711
5
            LOG(INFO) << "succeed to init hdfs accessor. instance_id=" << instance_id_
712
5
                      << " resource_id=" << vault.id() << " name=" << vault.name()
713
5
                      << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
714
5
            accessor_map_.emplace(vault.id(), std::move(accessor));
715
#else
716
            LOG(ERROR) << "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT build option), "
717
                       << "but HDFS storage vaults were detected";
718
#endif
719
7
        } else if (vault.has_obj_info()) {
720
7
            auto s3_conf = S3Conf::from_obj_store_info(vault.obj_info());
721
7
            if (!s3_conf) {
722
1
                LOG(WARNING) << "failed to init object accessor, invalid conf, instance_id="
723
1
                             << instance_id_ << " s3_vault=" << vault.obj_info().ShortDebugString();
724
1
                continue;
725
1
            }
726
727
6
            std::shared_ptr<S3Accessor> accessor;
728
6
            int ret = S3Accessor::create(*s3_conf, &accessor);
729
6
            if (ret != 0) {
730
0
                LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
731
0
                             << " resource_id=" << vault.id() << " name=" << vault.name()
732
0
                             << " ret=" << ret
733
0
                             << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
734
0
                continue;
735
0
            }
736
6
            LOG(INFO) << "succeed to init s3 accessor. instance_id=" << instance_id_
737
6
                      << " resource_id=" << vault.id() << " name=" << vault.name() << " ret=" << ret
738
6
                      << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
739
6
            accessor_map_.emplace(vault.id(), std::move(accessor));
740
6
        }
741
16
    }
742
743
7
    if (!it->is_valid()) {
744
0
        LOG_WARNING("failed to get storage vault kv");
745
0
        return -1;
746
0
    }
747
748
7
    if (accessor_map_.empty()) {
749
1
        LOG(WARNING) << "no accessors for instance=" << instance_id_;
750
1
        return -2;
751
1
    }
752
6
    LOG_INFO("finish init instance recycler number_accessors={} instance=", accessor_map_.size(),
753
6
             instance_id_);
754
755
6
    return 0;
756
7
}
757
758
124
int InstanceRecycler::init() {
759
124
    int ret = init_obj_store_accessors();
760
124
    if (ret != 0) {
761
0
        return ret;
762
0
    }
763
764
124
    return init_storage_vault_accessors();
765
124
}
766
767
template <typename... Func>
768
120
auto task_wrapper(Func... funcs) -> std::function<int()> {
769
120
    return [funcs...]() {
770
120
        return [](std::initializer_list<int> ret_vals) {
771
120
            int i = 0;
772
140
            for (int ret : ret_vals) {
773
140
                if (ret != 0) {
774
0
                    i = ret;
775
0
                }
776
140
            }
777
120
            return i;
778
120
        }({funcs()...});
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
0
                    i = ret;
775
0
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
0
                    i = ret;
775
0
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
20
            for (int ret : ret_vals) {
773
20
                if (ret != 0) {
774
0
                    i = ret;
775
0
                }
776
20
            }
777
10
            return i;
778
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
0
                    i = ret;
775
0
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
0
                    i = ret;
775
0
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
0
                    i = ret;
775
0
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
20
            for (int ret : ret_vals) {
773
20
                if (ret != 0) {
774
0
                    i = ret;
775
0
                }
776
20
            }
777
10
            return i;
778
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
0
                    i = ret;
775
0
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
0
                    i = ret;
775
0
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
0
                    i = ret;
775
0
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
0
                    i = ret;
775
0
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
0
                    i = ret;
775
0
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
120
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEv
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
780
120
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Line
Count
Source
768
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
780
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Line
Count
Source
768
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
780
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_
Line
Count
Source
768
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
780
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Line
Count
Source
768
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
780
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Line
Count
Source
768
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
780
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Line
Count
Source
768
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
780
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Line
Count
Source
768
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
780
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
Line
Count
Source
768
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
780
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_
Line
Count
Source
768
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
780
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_
Line
Count
Source
768
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
780
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_
Line
Count
Source
768
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
780
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_
Line
Count
Source
768
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
769
10
    return [funcs...]() {
770
10
        return [](std::initializer_list<int> ret_vals) {
771
10
            int i = 0;
772
10
            for (int ret : ret_vals) {
773
10
                if (ret != 0) {
774
10
                    i = ret;
775
10
                }
776
10
            }
777
10
            return i;
778
10
        }({funcs()...});
779
10
    };
780
10
}
781
782
10
int InstanceRecycler::do_recycle() {
783
10
    TEST_SYNC_POINT("InstanceRecycler.do_recycle");
784
10
    tablet_metrics_context_.reset();
785
10
    segment_metrics_context_.reset();
786
10
    DORIS_CLOUD_DEFER {
787
10
        tablet_metrics_context_.finish_report();
788
10
        segment_metrics_context_.finish_report();
789
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv
Line
Count
Source
786
10
    DORIS_CLOUD_DEFER {
787
10
        tablet_metrics_context_.finish_report();
788
10
        segment_metrics_context_.finish_report();
789
10
    };
790
10
    if (instance_info_.status() == InstanceInfoPB::DELETED) {
791
0
        int res = recycle_cluster_snapshots();
792
0
        if (res != 0) {
793
0
            return -1;
794
0
        }
795
0
        return recycle_deleted_instance();
796
10
    } else if (instance_info_.status() == InstanceInfoPB::NORMAL) {
797
10
        SyncExecutor<int> sync_executor(_thread_pool_group.group_recycle_function_pool,
798
10
                                        fmt::format("instance id {}", instance_id_),
799
120
                                        [](int r) { return r != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi
Line
Count
Source
799
120
                                        [](int r) { return r != 0; });
800
10
        sync_executor
801
10
                .add(task_wrapper(
802
10
                        [this]() { return InstanceRecycler::recycle_cluster_snapshots(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
Line
Count
Source
802
10
                        [this]() { return InstanceRecycler::recycle_cluster_snapshots(); }))
803
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
Line
Count
Source
803
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }))
804
10
                .add(task_wrapper( // dropped table and dropped partition need to be recycled in series
805
                                   // becase they may both recycle the same set of tablets
806
                        // recycle dropped table or idexes(mv, rollup)
807
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
Line
Count
Source
807
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
808
                        // recycle dropped partitions
809
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
Line
Count
Source
809
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
810
10
                .add(task_wrapper(
811
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
Line
Count
Source
811
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
812
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
Line
Count
Source
812
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
813
10
                .add(task_wrapper(
814
10
                        [this]() -> int { return InstanceRecycler::recycle_packed_files(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
Line
Count
Source
814
10
                        [this]() -> int { return InstanceRecycler::recycle_packed_files(); }))
815
10
                .add(task_wrapper(
816
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
Line
Count
Source
816
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
817
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
Line
Count
Source
817
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
818
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
Line
Count
Source
818
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
819
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv
Line
Count
Source
819
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
820
10
                .add(task_wrapper(
821
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv
Line
Count
Source
821
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
822
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv
Line
Count
Source
822
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
823
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_15clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_15clEv
Line
Count
Source
823
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); }));
824
10
        bool finished = true;
825
10
        std::vector<int> rets = sync_executor.when_all(&finished);
826
120
        for (int ret : rets) {
827
120
            if (ret != 0) {
828
0
                return ret;
829
0
            }
830
120
        }
831
10
        return finished ? 0 : -1;
832
10
    } else {
833
0
        LOG(WARNING) << "invalid instance status: " << instance_info_.status()
834
0
                     << " instance_id=" << instance_id_;
835
0
        return -1;
836
0
    }
837
10
}
838
839
/**
840
* 1. delete all remote data
841
* 2. delete all kv
842
* 3. remove instance kv
843
*/
844
5
int InstanceRecycler::recycle_deleted_instance() {
845
5
    LOG_WARNING("begin to recycle deleted instance").tag("instance_id", instance_id_);
846
847
5
    int ret = 0;
848
5
    auto start_time = steady_clock::now();
849
850
5
    DORIS_CLOUD_DEFER {
851
5
        auto cost = duration<float>(steady_clock::now() - start_time).count();
852
5
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
853
5
                     << " recycle deleted instance, cost=" << cost
854
5
                     << "s, instance_id=" << instance_id_;
855
5
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
Line
Count
Source
850
5
    DORIS_CLOUD_DEFER {
851
5
        auto cost = duration<float>(steady_clock::now() - start_time).count();
852
5
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
853
5
                     << " recycle deleted instance, cost=" << cost
854
5
                     << "s, instance_id=" << instance_id_;
855
5
    };
856
857
    // Step 1: Recycle tmp rowsets (contains ref count but txn is not committed)
858
5
    auto recycle_tmp_rowsets_with_mark_delete_enabled = [&]() -> int {
859
5
        int res = recycle_tmp_rowsets();
860
5
        if (res == 0 && config::enable_mark_delete_rowset_before_recycle) {
861
            // If mark_delete_rowset_before_recycle is enabled, we will mark delete rowsets before recycling them,
862
            // so we need to recycle tmp rowsets again to make sure all rowsets in recycle space are marked for
863
            // deletion, otherwise we may meet some corner cases that some rowsets are not marked for deletion
864
            // and cannot be recycled.
865
5
            res = recycle_tmp_rowsets();
866
5
        }
867
5
        return res;
868
5
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_1clEv
Line
Count
Source
858
5
    auto recycle_tmp_rowsets_with_mark_delete_enabled = [&]() -> int {
859
5
        int res = recycle_tmp_rowsets();
860
5
        if (res == 0 && config::enable_mark_delete_rowset_before_recycle) {
861
            // If mark_delete_rowset_before_recycle is enabled, we will mark delete rowsets before recycling them,
862
            // so we need to recycle tmp rowsets again to make sure all rowsets in recycle space are marked for
863
            // deletion, otherwise we may meet some corner cases that some rowsets are not marked for deletion
864
            // and cannot be recycled.
865
5
            res = recycle_tmp_rowsets();
866
5
        }
867
5
        return res;
868
5
    };
869
5
    if (recycle_tmp_rowsets_with_mark_delete_enabled() != 0) {
870
0
        LOG_WARNING("failed to recycle tmp rowsets").tag("instance_id", instance_id_);
871
0
        ret = -1;
872
0
        return -1;
873
0
    }
874
875
    // Step 2: Recycle versioned rowsets in recycle space (already marked for deletion)
876
5
    if (recycle_versioned_rowsets() != 0) {
877
0
        LOG_WARNING("failed to recycle versioned rowsets").tag("instance_id", instance_id_);
878
0
        ret = -1;
879
0
        return -1;
880
0
    }
881
882
    // Step 3: Recycle operation logs (can recycle logs not referenced by snapshots)
883
5
    if (recycle_operation_logs() != 0) {
884
0
        LOG_WARNING("failed to recycle operation logs").tag("instance_id", instance_id_);
885
0
        ret = -1;
886
0
        return -1;
887
0
    }
888
889
    // Step 4: Check if there are still cluster snapshots
890
5
    bool has_snapshots = false;
891
5
    if (has_cluster_snapshots(&has_snapshots) != 0) {
892
0
        LOG(WARNING) << "check instance cluster snapshots failed, instance_id=" << instance_id_;
893
0
        ret = -1;
894
0
        return -1;
895
5
    } else if (has_snapshots) {
896
1
        LOG(INFO) << "instance has cluster snapshots, skip recycling, instance_id=" << instance_id_;
897
1
        return 0;
898
1
    }
899
900
4
    bool snapshot_enabled = instance_info().has_snapshot_switch_status() &&
901
4
                            instance_info().snapshot_switch_status() !=
902
1
                                    SnapshotSwitchStatus::SNAPSHOT_SWITCH_DISABLED;
903
4
    if (snapshot_enabled) {
904
1
        bool has_unrecycled_rowsets = false;
905
1
        if (recycle_ref_rowsets(&has_unrecycled_rowsets) != 0) {
906
0
            LOG_WARNING("failed to recycle ref rowsets").tag("instance_id", instance_id_);
907
0
            ret = -1;
908
0
            return -1;
909
1
        } else if (has_unrecycled_rowsets) {
910
0
            LOG_INFO("instance has referenced rowsets, skip recycling")
911
0
                    .tag("instance_id", instance_id_);
912
0
            return ret;
913
0
        }
914
3
    } else { // delete all remote data if snapshot is disabled
915
3
        for (auto& [_, accessor] : accessor_map_) {
916
3
            if (stopped()) {
917
0
                return ret;
918
0
            }
919
920
3
            LOG(INFO) << "begin to delete all objects in " << accessor->uri();
921
3
            int del_ret = accessor->delete_all();
922
3
            if (del_ret == 0) {
923
3
                LOG(INFO) << "successfully delete all objects in " << accessor->uri();
924
3
            } else if (del_ret != 1) { // no need to log, because S3Accessor has logged this error
925
                // If `del_ret == 1`, it can be considered that the object data has been recycled by cloud platform,
926
                // so the recycling has been successful.
927
0
                ret = -1;
928
0
            }
929
3
        }
930
931
3
        if (ret != 0) {
932
0
            LOG(WARNING) << "failed to delete all data of deleted instance=" << instance_id_;
933
0
            return ret;
934
0
        }
935
3
    }
936
937
    // Check successor instance, if exists, skip deleting kv because successor instance may still need the data in kv
938
4
    if (instance_info_.has_successor_instance_id() &&
939
4
        !instance_info_.successor_instance_id().empty()) {
940
0
        std::string key = instance_key(instance_info_.successor_instance_id());
941
0
        std::unique_ptr<Transaction> txn;
942
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
943
0
        if (err != TxnErrorCode::TXN_OK) {
944
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id_
945
0
                         << " successor_instance_id=" << instance_info_.successor_instance_id()
946
0
                         << " err=" << err;
947
0
            ret = -1;
948
0
            return -1;
949
0
        }
950
951
0
        std::string value;
952
0
        err = txn->get(key, &value);
953
0
        if (err == TxnErrorCode::TXN_OK) {
954
0
            LOG(INFO) << "instance successor instance is still exist, skip deleting kv,"
955
0
                      << " instance_id=" << instance_id_
956
0
                      << " successor_instance_id=" << instance_info_.successor_instance_id();
957
0
            return 0;
958
0
        } else if (err != TxnErrorCode::TXN_KEY_NOT_FOUND) {
959
0
            LOG(WARNING) << "failed to get successor instance, instance_id=" << instance_id_
960
0
                         << " successor_instance_id=" << instance_info_.successor_instance_id()
961
0
                         << " err=" << err;
962
0
            ret = -1;
963
0
            return -1;
964
0
        }
965
0
    }
966
967
    // delete all kv
968
4
    std::unique_ptr<Transaction> txn;
969
4
    TxnErrorCode err = txn_kv_->create_txn(&txn);
970
4
    if (err != TxnErrorCode::TXN_OK) {
971
0
        LOG(WARNING) << "failed to create txn";
972
0
        ret = -1;
973
0
        return -1;
974
0
    }
975
4
    LOG(INFO) << "begin to delete all kv, instance_id=" << instance_id_;
976
    // delete kv before deleting objects to prevent the checker from misjudging data loss
977
4
    std::string start_txn_key = txn_key_prefix(instance_id_);
978
4
    std::string end_txn_key = txn_key_prefix(instance_id_ + '\x00');
979
4
    txn->remove(start_txn_key, end_txn_key);
980
4
    std::string start_version_key = version_key_prefix(instance_id_);
981
4
    std::string end_version_key = version_key_prefix(instance_id_ + '\x00');
982
4
    txn->remove(start_version_key, end_version_key);
983
4
    std::string start_meta_key = meta_key_prefix(instance_id_);
984
4
    std::string end_meta_key = meta_key_prefix(instance_id_ + '\x00');
985
4
    txn->remove(start_meta_key, end_meta_key);
986
4
    std::string start_recycle_key = recycle_key_prefix(instance_id_);
987
4
    std::string end_recycle_key = recycle_key_prefix(instance_id_ + '\x00');
988
4
    txn->remove(start_recycle_key, end_recycle_key);
989
4
    std::string start_stats_tablet_key = stats_tablet_key({instance_id_, 0, 0, 0, 0});
990
4
    std::string end_stats_tablet_key = stats_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
991
4
    txn->remove(start_stats_tablet_key, end_stats_tablet_key);
992
4
    std::string start_copy_key = copy_key_prefix(instance_id_);
993
4
    std::string end_copy_key = copy_key_prefix(instance_id_ + '\x00');
994
4
    txn->remove(start_copy_key, end_copy_key);
995
    // should not remove job key range, because we need to reserve job recycle kv
996
    // 0:instance_id  1:table_id  2:index_id  3:part_id  4:tablet_id
997
4
    std::string start_job_tablet_key = job_tablet_key({instance_id_, 0, 0, 0, 0});
998
4
    std::string end_job_tablet_key = job_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
999
4
    txn->remove(start_job_tablet_key, end_job_tablet_key);
1000
4
    StorageVaultKeyInfo key_info0 {instance_id_, ""};
1001
4
    StorageVaultKeyInfo key_info1 {instance_id_, "\xff"};
1002
4
    std::string start_vault_key = storage_vault_key(key_info0);
1003
4
    std::string end_vault_key = storage_vault_key(key_info1);
1004
4
    txn->remove(start_vault_key, end_vault_key);
1005
4
    std::string versioned_version_key_start = versioned::version_key_prefix(instance_id_);
1006
4
    std::string versioned_version_key_end = versioned::version_key_prefix(instance_id_ + '\x00');
1007
4
    txn->remove(versioned_version_key_start, versioned_version_key_end);
1008
4
    std::string versioned_index_key_start = versioned::index_key_prefix(instance_id_);
1009
4
    std::string versioned_index_key_end = versioned::index_key_prefix(instance_id_ + '\x00');
1010
4
    txn->remove(versioned_index_key_start, versioned_index_key_end);
1011
4
    std::string versioned_stats_tablet_key_start = versioned::stats_key_prefix(instance_id_);
1012
4
    std::string versioned_stats_tablet_key_end = versioned::stats_key_prefix(instance_id_ + '\x00');
1013
4
    txn->remove(versioned_stats_tablet_key_start, versioned_stats_tablet_key_end);
1014
4
    std::string versioned_meta_key_start = versioned::meta_key_prefix(instance_id_);
1015
4
    std::string versioned_meta_key_end = versioned::meta_key_prefix(instance_id_ + '\x00');
1016
4
    txn->remove(versioned_meta_key_start, versioned_meta_key_end);
1017
4
    std::string versioned_data_key_start = versioned::data_key_prefix(instance_id_);
1018
4
    std::string versioned_data_key_end = versioned::data_key_prefix(instance_id_ + '\x00');
1019
4
    txn->remove(versioned_data_key_start, versioned_data_key_end);
1020
4
    std::string versioned_log_key_start = versioned::log_key_prefix(instance_id_);
1021
4
    std::string versioned_log_key_end = versioned::log_key_prefix(instance_id_ + '\x00');
1022
4
    txn->remove(versioned_log_key_start, versioned_log_key_end);
1023
4
    err = txn->commit();
1024
4
    if (err != TxnErrorCode::TXN_OK) {
1025
0
        LOG(WARNING) << "failed to delete all kv, instance_id=" << instance_id_ << ", err=" << err;
1026
0
        ret = -1;
1027
0
    }
1028
1029
4
    if (ret == 0) {
1030
        // remove instance kv
1031
        // ATTN: MUST ensure that cloud platform won't regenerate the same instance id
1032
4
        err = txn_kv_->create_txn(&txn);
1033
4
        if (err != TxnErrorCode::TXN_OK) {
1034
0
            LOG(WARNING) << "failed to create txn";
1035
0
            ret = -1;
1036
0
            return ret;
1037
0
        }
1038
4
        std::string key;
1039
4
        instance_key({instance_id_}, &key);
1040
4
        txn->atomic_add(system_meta_service_instance_update_key(), 1);
1041
4
        txn->remove(key);
1042
4
        err = txn->commit();
1043
4
        if (err != TxnErrorCode::TXN_OK) {
1044
0
            LOG(WARNING) << "failed to delete instance kv, instance_id=" << instance_id_
1045
0
                         << " err=" << err;
1046
0
            ret = -1;
1047
0
        }
1048
4
    }
1049
4
    return ret;
1050
4
}
1051
1052
int InstanceRecycler::check_rowset_exists(int64_t tablet_id, const std::string& rowset_id,
1053
9
                                          bool* exists, PackedFileRecycleStats* stats) {
1054
9
    if (exists == nullptr) {
1055
0
        return -1;
1056
0
    }
1057
9
    *exists = false;
1058
1059
9
    std::string begin = meta_rowset_key({instance_id_, tablet_id, 0});
1060
9
    std::string end = meta_rowset_key({instance_id_, tablet_id + 1, 0});
1061
9
    std::string scan_begin = begin;
1062
1063
9
    while (true) {
1064
9
        std::unique_ptr<RangeGetIterator> it_range;
1065
9
        int get_ret = txn_get(txn_kv_.get(), scan_begin, end, it_range);
1066
9
        if (get_ret < 0) {
1067
0
            LOG_WARNING("failed to scan rowset metas when recycling packed file")
1068
0
                    .tag("instance_id", instance_id_)
1069
0
                    .tag("tablet_id", tablet_id)
1070
0
                    .tag("ret", get_ret);
1071
0
            return -1;
1072
0
        }
1073
9
        if (get_ret == 1 || it_range == nullptr || !it_range->has_next()) {
1074
6
            return 0;
1075
6
        }
1076
1077
3
        std::string last_key;
1078
3
        while (it_range->has_next()) {
1079
3
            auto [k, v] = it_range->next();
1080
3
            last_key.assign(k.data(), k.size());
1081
3
            doris::RowsetMetaCloudPB rowset_meta;
1082
3
            if (!rowset_meta.ParseFromArray(v.data(), v.size())) {
1083
0
                LOG_WARNING("malformed rowset meta when checking packed file rowset existence")
1084
0
                        .tag("instance_id", instance_id_)
1085
0
                        .tag("tablet_id", tablet_id)
1086
0
                        .tag("key", hex(k));
1087
0
                continue;
1088
0
            }
1089
3
            if (stats) {
1090
3
                ++stats->rowset_scan_count;
1091
3
            }
1092
3
            if (rowset_meta.rowset_id_v2() == rowset_id) {
1093
3
                *exists = true;
1094
3
                return 0;
1095
3
            }
1096
3
        }
1097
1098
0
        if (!it_range->more()) {
1099
0
            return 0;
1100
0
        }
1101
1102
        // Continue scanning from the next key to keep each transaction short.
1103
0
        scan_begin = std::move(last_key);
1104
0
        scan_begin.push_back('\x00');
1105
0
    }
1106
9
}
1107
1108
int InstanceRecycler::check_recycle_and_tmp_rowset_exists(int64_t tablet_id,
1109
                                                          const std::string& rowset_id,
1110
                                                          int64_t txn_id, bool* recycle_exists,
1111
11
                                                          bool* tmp_exists) {
1112
11
    if (recycle_exists == nullptr || tmp_exists == nullptr) {
1113
0
        return -1;
1114
0
    }
1115
11
    *recycle_exists = false;
1116
11
    *tmp_exists = false;
1117
1118
11
    if (txn_id <= 0) {
1119
0
        LOG_WARNING("invalid txn id when checking recycle/tmp rowset existence")
1120
0
                .tag("instance_id", instance_id_)
1121
0
                .tag("tablet_id", tablet_id)
1122
0
                .tag("rowset_id", rowset_id)
1123
0
                .tag("txn_id", txn_id);
1124
0
        return -1;
1125
0
    }
1126
1127
11
    std::unique_ptr<Transaction> txn;
1128
11
    TxnErrorCode err = txn_kv_->create_txn(&txn);
1129
11
    if (err != TxnErrorCode::TXN_OK) {
1130
0
        LOG_WARNING("failed to create txn when checking recycle/tmp rowset existence")
1131
0
                .tag("instance_id", instance_id_)
1132
0
                .tag("tablet_id", tablet_id)
1133
0
                .tag("rowset_id", rowset_id)
1134
0
                .tag("txn_id", txn_id)
1135
0
                .tag("err", err);
1136
0
        return -1;
1137
0
    }
1138
1139
11
    std::string recycle_key = recycle_rowset_key({instance_id_, tablet_id, rowset_id});
1140
11
    auto ret = key_exists(txn.get(), recycle_key, true);
1141
11
    if (ret == TxnErrorCode::TXN_OK) {
1142
1
        *recycle_exists = true;
1143
10
    } else if (ret != TxnErrorCode::TXN_KEY_NOT_FOUND) {
1144
0
        LOG_WARNING("failed to check recycle rowset existence")
1145
0
                .tag("instance_id", instance_id_)
1146
0
                .tag("tablet_id", tablet_id)
1147
0
                .tag("rowset_id", rowset_id)
1148
0
                .tag("key", hex(recycle_key))
1149
0
                .tag("err", ret);
1150
0
        return -1;
1151
0
    }
1152
1153
11
    std::string tmp_key = meta_rowset_tmp_key({instance_id_, txn_id, tablet_id});
1154
11
    ret = key_exists(txn.get(), tmp_key, true);
1155
11
    if (ret == TxnErrorCode::TXN_OK) {
1156
1
        *tmp_exists = true;
1157
10
    } else if (ret != TxnErrorCode::TXN_KEY_NOT_FOUND) {
1158
0
        LOG_WARNING("failed to check tmp rowset existence")
1159
0
                .tag("instance_id", instance_id_)
1160
0
                .tag("tablet_id", tablet_id)
1161
0
                .tag("txn_id", txn_id)
1162
0
                .tag("key", hex(tmp_key))
1163
0
                .tag("err", ret);
1164
0
        return -1;
1165
0
    }
1166
1167
11
    return 0;
1168
11
}
1169
1170
std::pair<std::string, std::shared_ptr<StorageVaultAccessor>>
1171
8
InstanceRecycler::resolve_packed_file_accessor(const std::string& hint) {
1172
8
    if (!hint.empty()) {
1173
8
        if (auto it = accessor_map_.find(hint); it != accessor_map_.end()) {
1174
8
            return {hint, it->second};
1175
8
        }
1176
8
    }
1177
1178
0
    return {"", nullptr};
1179
8
}
1180
1181
int InstanceRecycler::correct_packed_file_info(cloud::PackedFileInfoPB* packed_info, bool* changed,
1182
                                               const std::string& packed_file_path,
1183
3
                                               PackedFileRecycleStats* stats) {
1184
3
    bool local_changed = false;
1185
3
    int64_t left_num = 0;
1186
3
    int64_t left_bytes = 0;
1187
3
    bool all_small_files_confirmed = true;
1188
3
    LOG(INFO) << "begin to correct file: " << packed_file_path;
1189
1190
14
    auto log_small_file_status = [&](const cloud::PackedSlicePB& file, bool confirmed_this_round) {
1191
14
        int64_t tablet_id = file.has_tablet_id() ? file.tablet_id() : int64_t {-1};
1192
14
        std::string rowset_id = file.has_rowset_id() ? file.rowset_id() : std::string {};
1193
14
        int64_t txn_id = file.has_txn_id() ? file.txn_id() : int64_t {0};
1194
14
        LOG_INFO("packed slice correction status")
1195
14
                .tag("instance_id", instance_id_)
1196
14
                .tag("packed_file_path", packed_file_path)
1197
14
                .tag("small_file_path", file.path())
1198
14
                .tag("tablet_id", tablet_id)
1199
14
                .tag("rowset_id", rowset_id)
1200
14
                .tag("txn_id", txn_id)
1201
14
                .tag("size", file.size())
1202
14
                .tag("deleted", file.deleted())
1203
14
                .tag("corrected", file.corrected())
1204
14
                .tag("confirmed_this_round", confirmed_this_round);
1205
14
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24correct_packed_file_infoEPNS0_16PackedFileInfoPBEPbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPNS1_22PackedFileRecycleStatsEENK3$_0clERKNS0_13PackedSlicePBEb
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24correct_packed_file_infoEPNS0_16PackedFileInfoPBEPbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPNS1_22PackedFileRecycleStatsEENK3$_0clERKNS0_13PackedSlicePBEb
Line
Count
Source
1190
14
    auto log_small_file_status = [&](const cloud::PackedSlicePB& file, bool confirmed_this_round) {
1191
14
        int64_t tablet_id = file.has_tablet_id() ? file.tablet_id() : int64_t {-1};
1192
14
        std::string rowset_id = file.has_rowset_id() ? file.rowset_id() : std::string {};
1193
14
        int64_t txn_id = file.has_txn_id() ? file.txn_id() : int64_t {0};
1194
14
        LOG_INFO("packed slice correction status")
1195
14
                .tag("instance_id", instance_id_)
1196
14
                .tag("packed_file_path", packed_file_path)
1197
14
                .tag("small_file_path", file.path())
1198
14
                .tag("tablet_id", tablet_id)
1199
14
                .tag("rowset_id", rowset_id)
1200
14
                .tag("txn_id", txn_id)
1201
14
                .tag("size", file.size())
1202
14
                .tag("deleted", file.deleted())
1203
14
                .tag("corrected", file.corrected())
1204
14
                .tag("confirmed_this_round", confirmed_this_round);
1205
14
    };
1206
1207
17
    for (int i = 0; i < packed_info->slices_size(); ++i) {
1208
14
        auto* small_file = packed_info->mutable_slices(i);
1209
14
        if (small_file->deleted()) {
1210
3
            log_small_file_status(*small_file, small_file->corrected());
1211
3
            continue;
1212
3
        }
1213
1214
11
        if (small_file->corrected()) {
1215
0
            left_num++;
1216
0
            left_bytes += small_file->size();
1217
0
            log_small_file_status(*small_file, true);
1218
0
            continue;
1219
0
        }
1220
1221
11
        if (!small_file->has_tablet_id() || !small_file->has_rowset_id()) {
1222
0
            LOG_WARNING("packed file small file missing identifiers during correction")
1223
0
                    .tag("instance_id", instance_id_)
1224
0
                    .tag("small_file_path", small_file->path())
1225
0
                    .tag("index", i);
1226
0
            return -1;
1227
0
        }
1228
1229
11
        int64_t tablet_id = small_file->tablet_id();
1230
11
        const std::string& rowset_id = small_file->rowset_id();
1231
11
        if (!small_file->has_txn_id() || small_file->txn_id() <= 0) {
1232
0
            LOG_WARNING("packed file small file missing valid txn id during correction")
1233
0
                    .tag("instance_id", instance_id_)
1234
0
                    .tag("small_file_path", small_file->path())
1235
0
                    .tag("index", i)
1236
0
                    .tag("tablet_id", tablet_id)
1237
0
                    .tag("rowset_id", rowset_id)
1238
0
                    .tag("has_txn_id", small_file->has_txn_id())
1239
0
                    .tag("txn_id", small_file->has_txn_id() ? small_file->txn_id() : 0);
1240
0
            return -1;
1241
0
        }
1242
11
        int64_t txn_id = small_file->txn_id();
1243
11
        bool recycle_exists = false;
1244
11
        bool tmp_exists = false;
1245
11
        if (check_recycle_and_tmp_rowset_exists(tablet_id, rowset_id, txn_id, &recycle_exists,
1246
11
                                                &tmp_exists) != 0) {
1247
0
            return -1;
1248
0
        }
1249
1250
11
        bool small_file_confirmed = false;
1251
11
        if (tmp_exists) {
1252
1
            left_num++;
1253
1
            left_bytes += small_file->size();
1254
1
            small_file_confirmed = true;
1255
10
        } else if (recycle_exists) {
1256
1
            left_num++;
1257
1
            left_bytes += small_file->size();
1258
            // keep small_file_confirmed=false so the packed file remains uncorrected
1259
9
        } else {
1260
9
            bool rowset_exists = false;
1261
9
            if (check_rowset_exists(tablet_id, rowset_id, &rowset_exists, stats) != 0) {
1262
0
                return -1;
1263
0
            }
1264
1265
9
            if (!rowset_exists) {
1266
6
                if (!small_file->deleted()) {
1267
6
                    small_file->set_deleted(true);
1268
6
                    local_changed = true;
1269
6
                }
1270
6
                if (!small_file->corrected()) {
1271
6
                    small_file->set_corrected(true);
1272
6
                    local_changed = true;
1273
6
                }
1274
6
                small_file_confirmed = true;
1275
6
            } else {
1276
3
                left_num++;
1277
3
                left_bytes += small_file->size();
1278
3
                small_file_confirmed = true;
1279
3
            }
1280
9
        }
1281
1282
11
        if (!small_file_confirmed) {
1283
1
            all_small_files_confirmed = false;
1284
1
        }
1285
1286
11
        if (small_file->corrected() != small_file_confirmed) {
1287
4
            small_file->set_corrected(small_file_confirmed);
1288
4
            local_changed = true;
1289
4
        }
1290
1291
11
        log_small_file_status(*small_file, small_file_confirmed);
1292
11
    }
1293
1294
3
    if (packed_info->remaining_slice_bytes() != left_bytes) {
1295
3
        packed_info->set_remaining_slice_bytes(left_bytes);
1296
3
        local_changed = true;
1297
3
    }
1298
3
    if (packed_info->ref_cnt() != left_num) {
1299
3
        auto old_ref_cnt = packed_info->ref_cnt();
1300
3
        packed_info->set_ref_cnt(left_num);
1301
3
        LOG_INFO("corrected packed file ref count")
1302
3
                .tag("instance_id", instance_id_)
1303
3
                .tag("resource_id", packed_info->resource_id())
1304
3
                .tag("packed_file_path", packed_file_path)
1305
3
                .tag("old_ref_cnt", old_ref_cnt)
1306
3
                .tag("new_ref_cnt", left_num);
1307
3
        local_changed = true;
1308
3
    }
1309
3
    if (packed_info->corrected() != all_small_files_confirmed) {
1310
2
        packed_info->set_corrected(all_small_files_confirmed);
1311
2
        local_changed = true;
1312
2
    }
1313
3
    if (left_num == 0 && packed_info->state() != cloud::PackedFileInfoPB::RECYCLING) {
1314
1
        packed_info->set_state(cloud::PackedFileInfoPB::RECYCLING);
1315
1
        local_changed = true;
1316
1
    }
1317
1318
3
    if (changed != nullptr) {
1319
3
        *changed = local_changed;
1320
3
    }
1321
3
    return 0;
1322
3
}
1323
1324
int InstanceRecycler::process_single_packed_file(const std::string& packed_key,
1325
                                                 const std::string& packed_file_path,
1326
3
                                                 PackedFileRecycleStats* stats) {
1327
3
    const int max_retry_times = std::max(1, config::packed_file_txn_retry_times);
1328
3
    bool correction_ok = false;
1329
3
    cloud::PackedFileInfoPB packed_info;
1330
1331
3
    for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
1332
3
        if (stopped()) {
1333
0
            LOG_WARNING("recycler stopped before processing packed file")
1334
0
                    .tag("instance_id", instance_id_)
1335
0
                    .tag("packed_file_path", packed_file_path)
1336
0
                    .tag("attempt", attempt);
1337
0
            return -1;
1338
0
        }
1339
1340
3
        std::unique_ptr<Transaction> txn;
1341
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1342
3
        if (err != TxnErrorCode::TXN_OK) {
1343
0
            LOG_WARNING("failed to create txn when processing packed file")
1344
0
                    .tag("instance_id", instance_id_)
1345
0
                    .tag("packed_file_path", packed_file_path)
1346
0
                    .tag("attempt", attempt)
1347
0
                    .tag("err", err);
1348
0
            return -1;
1349
0
        }
1350
1351
3
        std::string packed_val;
1352
3
        err = txn->get(packed_key, &packed_val);
1353
3
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1354
0
            return 0;
1355
0
        }
1356
3
        if (err != TxnErrorCode::TXN_OK) {
1357
0
            LOG_WARNING("failed to get packed file kv")
1358
0
                    .tag("instance_id", instance_id_)
1359
0
                    .tag("packed_file_path", packed_file_path)
1360
0
                    .tag("attempt", attempt)
1361
0
                    .tag("err", err);
1362
0
            return -1;
1363
0
        }
1364
1365
3
        if (!packed_info.ParseFromString(packed_val)) {
1366
0
            LOG_WARNING("failed to parse packed file info")
1367
0
                    .tag("instance_id", instance_id_)
1368
0
                    .tag("packed_file_path", packed_file_path)
1369
0
                    .tag("attempt", attempt);
1370
0
            return -1;
1371
0
        }
1372
1373
3
        int64_t now_sec = ::time(nullptr);
1374
3
        bool corrected = packed_info.corrected();
1375
3
        bool due = config::force_immediate_recycle ||
1376
3
                   now_sec - packed_info.created_at_sec() >=
1377
3
                           config::packed_file_correction_delay_seconds;
1378
1379
3
        if (!corrected && due) {
1380
3
            bool changed = false;
1381
3
            if (correct_packed_file_info(&packed_info, &changed, packed_file_path, stats) != 0) {
1382
0
                LOG_WARNING("correct_packed_file_info failed")
1383
0
                        .tag("instance_id", instance_id_)
1384
0
                        .tag("packed_file_path", packed_file_path)
1385
0
                        .tag("attempt", attempt);
1386
0
                return -1;
1387
0
            }
1388
3
            if (changed) {
1389
3
                std::string updated;
1390
3
                if (!packed_info.SerializeToString(&updated)) {
1391
0
                    LOG_WARNING("failed to serialize packed file info after correction")
1392
0
                            .tag("instance_id", instance_id_)
1393
0
                            .tag("packed_file_path", packed_file_path)
1394
0
                            .tag("attempt", attempt);
1395
0
                    return -1;
1396
0
                }
1397
3
                txn->put(packed_key, updated);
1398
3
                err = txn->commit();
1399
3
                if (err == TxnErrorCode::TXN_OK) {
1400
3
                    if (stats) {
1401
3
                        ++stats->num_corrected;
1402
3
                    }
1403
3
                } else {
1404
0
                    if (err == TxnErrorCode::TXN_CONFLICT && attempt < max_retry_times) {
1405
0
                        LOG_WARNING(
1406
0
                                "failed to commit correction for packed file due to conflict, "
1407
0
                                "retrying")
1408
0
                                .tag("instance_id", instance_id_)
1409
0
                                .tag("packed_file_path", packed_file_path)
1410
0
                                .tag("attempt", attempt);
1411
0
                        sleep_for_packed_file_retry();
1412
0
                        packed_info.Clear();
1413
0
                        continue;
1414
0
                    }
1415
0
                    LOG_WARNING("failed to commit correction for packed file")
1416
0
                            .tag("instance_id", instance_id_)
1417
0
                            .tag("packed_file_path", packed_file_path)
1418
0
                            .tag("attempt", attempt)
1419
0
                            .tag("err", err);
1420
0
                    return -1;
1421
0
                }
1422
3
            }
1423
3
        }
1424
1425
3
        correction_ok = true;
1426
3
        break;
1427
3
    }
1428
1429
3
    if (!correction_ok) {
1430
0
        return -1;
1431
0
    }
1432
1433
3
    if (!(packed_info.state() == cloud::PackedFileInfoPB::RECYCLING &&
1434
3
          packed_info.ref_cnt() == 0)) {
1435
2
        return 0;
1436
2
    }
1437
1438
1
    if (!packed_info.has_resource_id() || packed_info.resource_id().empty()) {
1439
0
        LOG_WARNING("packed file missing resource id when recycling")
1440
0
                .tag("instance_id", instance_id_)
1441
0
                .tag("packed_file_path", packed_file_path);
1442
0
        return -1;
1443
0
    }
1444
1
    auto [resource_id, accessor] = resolve_packed_file_accessor(packed_info.resource_id());
1445
1
    if (!accessor) {
1446
0
        LOG_WARNING("no accessor available to delete packed file")
1447
0
                .tag("instance_id", instance_id_)
1448
0
                .tag("packed_file_path", packed_file_path)
1449
0
                .tag("resource_id", packed_info.resource_id());
1450
0
        return -1;
1451
0
    }
1452
1
    int del_ret = accessor->delete_file(packed_file_path);
1453
1
    if (del_ret != 0 && del_ret != 1) {
1454
0
        LOG_WARNING("failed to delete packed file")
1455
0
                .tag("instance_id", instance_id_)
1456
0
                .tag("packed_file_path", packed_file_path)
1457
0
                .tag("resource_id", resource_id)
1458
0
                .tag("ret", del_ret);
1459
0
        return -1;
1460
0
    }
1461
1
    if (del_ret == 1) {
1462
0
        LOG_INFO("packed file already removed")
1463
0
                .tag("instance_id", instance_id_)
1464
0
                .tag("packed_file_path", packed_file_path)
1465
0
                .tag("resource_id", resource_id);
1466
1
    } else {
1467
1
        LOG_INFO("deleted packed file")
1468
1
                .tag("instance_id", instance_id_)
1469
1
                .tag("packed_file_path", packed_file_path)
1470
1
                .tag("resource_id", resource_id);
1471
1
    }
1472
1473
1
    for (int del_attempt = 1; del_attempt <= max_retry_times; ++del_attempt) {
1474
1
        std::unique_ptr<Transaction> del_txn;
1475
1
        TxnErrorCode err = txn_kv_->create_txn(&del_txn);
1476
1
        if (err != TxnErrorCode::TXN_OK) {
1477
0
            LOG_WARNING("failed to create txn when removing packed file kv")
1478
0
                    .tag("instance_id", instance_id_)
1479
0
                    .tag("packed_file_path", packed_file_path)
1480
0
                    .tag("del_attempt", del_attempt)
1481
0
                    .tag("err", err);
1482
0
            return -1;
1483
0
        }
1484
1485
1
        std::string latest_val;
1486
1
        err = del_txn->get(packed_key, &latest_val);
1487
1
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1488
0
            return 0;
1489
0
        }
1490
1
        if (err != TxnErrorCode::TXN_OK) {
1491
0
            LOG_WARNING("failed to re-read packed file kv before removal")
1492
0
                    .tag("instance_id", instance_id_)
1493
0
                    .tag("packed_file_path", packed_file_path)
1494
0
                    .tag("del_attempt", del_attempt)
1495
0
                    .tag("err", err);
1496
0
            return -1;
1497
0
        }
1498
1499
1
        cloud::PackedFileInfoPB latest_info;
1500
1
        if (!latest_info.ParseFromString(latest_val)) {
1501
0
            LOG_WARNING("failed to parse packed file info before removal")
1502
0
                    .tag("instance_id", instance_id_)
1503
0
                    .tag("packed_file_path", packed_file_path)
1504
0
                    .tag("del_attempt", del_attempt);
1505
0
            return -1;
1506
0
        }
1507
1508
1
        if (!(latest_info.state() == cloud::PackedFileInfoPB::RECYCLING &&
1509
1
              latest_info.ref_cnt() == 0)) {
1510
0
            LOG_INFO("packed file state changed before removal, skip deleting kv")
1511
0
                    .tag("instance_id", instance_id_)
1512
0
                    .tag("packed_file_path", packed_file_path)
1513
0
                    .tag("del_attempt", del_attempt);
1514
0
            return 0;
1515
0
        }
1516
1517
1
        del_txn->remove(packed_key);
1518
1
        err = del_txn->commit();
1519
1
        if (err == TxnErrorCode::TXN_OK) {
1520
1
            if (stats) {
1521
1
                ++stats->num_deleted;
1522
1
                stats->bytes_deleted += static_cast<int64_t>(packed_key.size()) +
1523
1
                                        static_cast<int64_t>(latest_val.size());
1524
1
                if (del_ret == 0 || del_ret == 1) {
1525
1
                    ++stats->num_object_deleted;
1526
1
                    int64_t object_size = latest_info.total_slice_bytes();
1527
1
                    if (object_size <= 0) {
1528
0
                        object_size = packed_info.total_slice_bytes();
1529
0
                    }
1530
1
                    stats->bytes_object_deleted += object_size;
1531
1
                }
1532
1
            }
1533
1
            LOG_INFO("removed packed file metadata")
1534
1
                    .tag("instance_id", instance_id_)
1535
1
                    .tag("packed_file_path", packed_file_path);
1536
1
            return 0;
1537
1
        }
1538
0
        if (err == TxnErrorCode::TXN_CONFLICT) {
1539
0
            if (del_attempt >= max_retry_times) {
1540
0
                LOG_WARNING("failed to remove packed file kv due to conflict after max retry")
1541
0
                        .tag("instance_id", instance_id_)
1542
0
                        .tag("packed_file_path", packed_file_path)
1543
0
                        .tag("del_attempt", del_attempt);
1544
0
                return -1;
1545
0
            }
1546
0
            LOG_WARNING("failed to remove packed file kv due to conflict, retrying")
1547
0
                    .tag("instance_id", instance_id_)
1548
0
                    .tag("packed_file_path", packed_file_path)
1549
0
                    .tag("del_attempt", del_attempt);
1550
0
            sleep_for_packed_file_retry();
1551
0
            continue;
1552
0
        }
1553
0
        LOG_WARNING("failed to remove packed file kv")
1554
0
                .tag("instance_id", instance_id_)
1555
0
                .tag("packed_file_path", packed_file_path)
1556
0
                .tag("del_attempt", del_attempt)
1557
0
                .tag("err", err);
1558
0
        return -1;
1559
0
    }
1560
1561
0
    return -1;
1562
1
}
1563
1564
int InstanceRecycler::handle_packed_file_kv(std::string_view key, std::string_view value,
1565
4
                                            PackedFileRecycleStats* stats, int* ret) {
1566
4
    if (stats) {
1567
4
        ++stats->num_scanned;
1568
4
    }
1569
4
    std::string packed_file_path;
1570
4
    if (!decode_packed_file_key(key, &packed_file_path)) {
1571
0
        LOG_WARNING("failed to decode packed file key")
1572
0
                .tag("instance_id", instance_id_)
1573
0
                .tag("key", hex(key));
1574
0
        if (stats) {
1575
0
            ++stats->num_failed;
1576
0
        }
1577
0
        if (ret) {
1578
0
            *ret = -1;
1579
0
        }
1580
0
        return 0;
1581
0
    }
1582
1583
4
    cloud::PackedFileInfoPB packed_info;
1584
4
    if (!packed_info.ParseFromArray(value.data(), value.size())) {
1585
0
        LOG_WARNING("failed to parse packed file info from scan")
1586
0
                .tag("instance_id", instance_id_)
1587
0
                .tag("packed_file_path", packed_file_path);
1588
0
        if (stats) {
1589
0
            ++stats->num_failed;
1590
0
        }
1591
0
        if (ret) {
1592
0
            *ret = -1;
1593
0
        }
1594
0
        return 0;
1595
0
    }
1596
1597
4
    const int64_t now_sec = ::time(nullptr);
1598
4
    const bool due =
1599
4
            config::force_immediate_recycle ||
1600
4
            now_sec - packed_info.created_at_sec() >= config::packed_file_correction_delay_seconds;
1601
4
    const bool need_correction = !packed_info.corrected() && due;
1602
4
    const bool need_recycle =
1603
4
            packed_info.state() == cloud::PackedFileInfoPB::RECYCLING && packed_info.ref_cnt() == 0;
1604
4
    if (!need_correction && !need_recycle) {
1605
1
        return 0;
1606
1
    }
1607
1608
3
    std::string packed_key(key);
1609
3
    int process_ret = process_single_packed_file(packed_key, packed_file_path, stats);
1610
3
    if (process_ret != 0) {
1611
0
        if (stats) {
1612
0
            ++stats->num_failed;
1613
0
        }
1614
0
        if (ret) {
1615
0
            *ret = -1;
1616
0
        }
1617
0
    }
1618
3
    return 0;
1619
4
}
1620
1621
int64_t calculate_rowset_expired_time(const std::string& instance_id_, const RecycleRowsetPB& rs,
1622
9.85k
                                      int64_t* earlest_ts /* rowset earliest expiration ts */) {
1623
9.85k
    if (config::force_immediate_recycle) {
1624
15
        return 0L;
1625
15
    }
1626
    // RecycleRowsetPB created by compacted or dropped rowset has no expiration time, and will be recycled when exceed retention time
1627
9.83k
    int64_t expiration = rs.expiration() > 0 ? rs.expiration() : rs.creation_time();
1628
9.83k
    int64_t retention_seconds = config::retention_seconds;
1629
9.83k
    if (rs.type() == RecycleRowsetPB::COMPACT || rs.type() == RecycleRowsetPB::DROP) {
1630
7.88k
        retention_seconds = std::min(config::compacted_rowset_retention_seconds, retention_seconds);
1631
7.88k
    }
1632
9.83k
    int64_t final_expiration = expiration + retention_seconds;
1633
9.83k
    if (*earlest_ts > final_expiration) {
1634
27
        *earlest_ts = final_expiration;
1635
27
        g_bvar_recycler_recycle_rowset_earlest_ts.put(instance_id_, *earlest_ts);
1636
27
    }
1637
9.83k
    return final_expiration;
1638
9.85k
}
1639
1640
int64_t calculate_partition_expired_time(
1641
        const std::string& instance_id_, const RecyclePartitionPB& partition_meta_pb,
1642
9
        int64_t* earlest_ts /* partition earliest expiration ts */) {
1643
9
    if (config::force_immediate_recycle) {
1644
3
        return 0L;
1645
3
    }
1646
6
    int64_t expiration = partition_meta_pb.expiration() > 0 ? partition_meta_pb.expiration()
1647
6
                                                            : partition_meta_pb.creation_time();
1648
6
    int64_t retention_seconds = config::retention_seconds;
1649
6
    if (partition_meta_pb.state() == RecyclePartitionPB::DROPPED) {
1650
6
        retention_seconds =
1651
6
                std::min(config::dropped_partition_retention_seconds, retention_seconds);
1652
6
    }
1653
6
    int64_t final_expiration = expiration + retention_seconds;
1654
6
    if (*earlest_ts > final_expiration) {
1655
2
        *earlest_ts = final_expiration;
1656
2
        g_bvar_recycler_recycle_partition_earlest_ts.put(instance_id_, *earlest_ts);
1657
2
    }
1658
6
    return final_expiration;
1659
9
}
1660
1661
int64_t calculate_index_expired_time(const std::string& instance_id_,
1662
                                     const RecycleIndexPB& index_meta_pb,
1663
10
                                     int64_t* earlest_ts /* index earliest expiration ts */) {
1664
10
    if (config::force_immediate_recycle) {
1665
4
        return 0L;
1666
4
    }
1667
6
    int64_t expiration = index_meta_pb.expiration() > 0 ? index_meta_pb.expiration()
1668
6
                                                        : index_meta_pb.creation_time();
1669
6
    int64_t retention_seconds = config::retention_seconds;
1670
6
    if (index_meta_pb.state() == RecycleIndexPB::DROPPED) {
1671
6
        retention_seconds = std::min(config::dropped_index_retention_seconds, retention_seconds);
1672
6
    }
1673
6
    int64_t final_expiration = expiration + retention_seconds;
1674
6
    if (*earlest_ts > final_expiration) {
1675
2
        *earlest_ts = final_expiration;
1676
2
        g_bvar_recycler_recycle_index_earlest_ts.put(instance_id_, *earlest_ts);
1677
2
    }
1678
6
    return final_expiration;
1679
10
}
1680
1681
int64_t calculate_tmp_rowset_expired_time(
1682
        const std::string& instance_id_, const doris::RowsetMetaCloudPB& tmp_rowset_meta_pb,
1683
106k
        int64_t* earlest_ts /* tmp_rowset earliest expiration ts */) {
1684
    // ATTN: `txn_expiration` should > 0, however we use `creation_time` + a large `retention_time` (> 1 day in production environment)
1685
    //  when `txn_expiration` <= 0 in some unexpected situation (usually when there are bugs). This is usually safe, coz loading
1686
    //  duration or timeout always < `retention_time` in practice.
1687
106k
    int64_t expiration = tmp_rowset_meta_pb.txn_expiration() > 0
1688
106k
                                 ? tmp_rowset_meta_pb.txn_expiration()
1689
106k
                                 : tmp_rowset_meta_pb.creation_time();
1690
106k
    expiration = config::force_immediate_recycle ? 0 : expiration;
1691
106k
    int64_t final_expiration = expiration + config::retention_seconds;
1692
106k
    if (*earlest_ts > final_expiration) {
1693
24
        *earlest_ts = final_expiration;
1694
24
        g_bvar_recycler_recycle_tmp_rowset_earlest_ts.put(instance_id_, *earlest_ts);
1695
24
    }
1696
106k
    return final_expiration;
1697
106k
}
1698
1699
int64_t calculate_txn_expired_time(const std::string& instance_id_, const RecycleTxnPB& txn_meta_pb,
1700
30.0k
                                   int64_t* earlest_ts /* txn earliest expiration ts */) {
1701
30.0k
    int64_t final_expiration = txn_meta_pb.creation_time() + config::label_keep_max_second * 1000L;
1702
30.0k
    if (*earlest_ts > final_expiration / 1000) {
1703
8
        *earlest_ts = final_expiration / 1000;
1704
8
        g_bvar_recycler_recycle_expired_txn_label_earlest_ts.put(instance_id_, *earlest_ts);
1705
8
    }
1706
30.0k
    return final_expiration;
1707
30.0k
}
1708
1709
int64_t calculate_restore_job_expired_time(
1710
        const std::string& instance_id_, const RestoreJobCloudPB& restore_job,
1711
41
        int64_t* earlest_ts /* restore job earliest expiration ts */) {
1712
41
    if (config::force_immediate_recycle || restore_job.state() == RestoreJobCloudPB::DROPPED ||
1713
41
        restore_job.state() == RestoreJobCloudPB::COMPLETED ||
1714
41
        restore_job.state() == RestoreJobCloudPB::RECYCLING) {
1715
        // final state, recycle immediately
1716
41
        return 0L;
1717
41
    }
1718
    // not final state, wait much longer than the FE's timeout(1 day)
1719
0
    int64_t last_modified_s =
1720
0
            restore_job.has_mtime_s() ? restore_job.mtime_s() : restore_job.ctime_s();
1721
0
    int64_t expiration = restore_job.expired_at_s() > 0
1722
0
                                 ? last_modified_s + restore_job.expired_at_s()
1723
0
                                 : last_modified_s;
1724
0
    int64_t final_expiration = expiration + config::retention_seconds;
1725
0
    if (*earlest_ts > final_expiration) {
1726
0
        *earlest_ts = final_expiration;
1727
0
        g_bvar_recycler_recycle_restore_job_earlest_ts.put(instance_id_, *earlest_ts);
1728
0
    }
1729
0
    return final_expiration;
1730
41
}
1731
1732
2
int InstanceRecycler::abort_txn_for_related_rowset(int64_t txn_id) {
1733
2
    AbortTxnRequest req;
1734
2
    TxnInfoPB txn_info;
1735
2
    MetaServiceCode code = MetaServiceCode::OK;
1736
2
    std::string msg;
1737
2
    std::stringstream ss;
1738
2
    std::unique_ptr<Transaction> txn;
1739
2
    TxnErrorCode err = txn_kv_->create_txn(&txn);
1740
2
    if (err != TxnErrorCode::TXN_OK) {
1741
0
        LOG_WARNING("failed to create txn").tag("err", err);
1742
0
        return -1;
1743
0
    }
1744
1745
    // get txn index
1746
2
    TxnIndexPB txn_idx_pb;
1747
2
    auto index_key = txn_index_key({instance_id_, txn_id});
1748
2
    std::string index_val;
1749
2
    err = txn->get(index_key, &index_val);
1750
2
    if (err != TxnErrorCode::TXN_OK) {
1751
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1752
            // maybe recycled
1753
0
            LOG_INFO("txn index not found, txn_id={} instance_id={}", txn_id, instance_id_)
1754
0
                    .tag("key", hex(index_key))
1755
0
                    .tag("txn_id", txn_id);
1756
0
            return 0;
1757
0
        }
1758
0
        LOG_WARNING("failed to get txn index")
1759
0
                .tag("err", err)
1760
0
                .tag("key", hex(index_key))
1761
0
                .tag("txn_id", txn_id);
1762
0
        return -1;
1763
0
    }
1764
2
    if (!txn_idx_pb.ParseFromString(index_val)) {
1765
0
        LOG_WARNING("failed to parse txn index")
1766
0
                .tag("err", err)
1767
0
                .tag("key", hex(index_key))
1768
0
                .tag("txn_id", txn_id);
1769
0
        return -1;
1770
0
    }
1771
1772
2
    auto info_key = txn_info_key({instance_id_, txn_idx_pb.tablet_index().db_id(), txn_id});
1773
2
    std::string info_val;
1774
2
    err = txn->get(info_key, &info_val);
1775
2
    if (err != TxnErrorCode::TXN_OK) {
1776
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1777
            // maybe recycled
1778
0
            LOG_INFO("txn info not found, txn_id={} instance_id={}", txn_id, instance_id_)
1779
0
                    .tag("key", hex(info_key))
1780
0
                    .tag("txn_id", txn_id);
1781
0
            return 0;
1782
0
        }
1783
0
        LOG_WARNING("failed to get txn info")
1784
0
                .tag("err", err)
1785
0
                .tag("key", hex(info_key))
1786
0
                .tag("txn_id", txn_id);
1787
0
        return -1;
1788
0
    }
1789
2
    if (!txn_info.ParseFromString(info_val)) {
1790
0
        LOG_WARNING("failed to parse txn info")
1791
0
                .tag("err", err)
1792
0
                .tag("key", hex(info_key))
1793
0
                .tag("txn_id", txn_id);
1794
0
        return -1;
1795
0
    }
1796
1797
2
    if (txn_info.status() != TxnStatusPB::TXN_STATUS_PREPARED) {
1798
0
        LOG_INFO("txn is not prepared status, txn_id={} status={}", txn_id, txn_info.status())
1799
0
                .tag("key", hex(info_key))
1800
0
                .tag("txn_id", txn_id);
1801
0
        return 0;
1802
0
    }
1803
1804
2
    req.set_txn_id(txn_id);
1805
1806
2
    LOG(INFO) << "begin abort txn for related rowset, txn_id=" << txn_id
1807
2
              << " instance_id=" << instance_id_ << " txn_info=" << txn_info.ShortDebugString();
1808
1809
2
    _abort_txn(instance_id_, &req, txn.get(), txn_info, ss, code, msg);
1810
2
    err = txn->commit();
1811
2
    if (err != TxnErrorCode::TXN_OK) {
1812
0
        code = cast_as<ErrCategory::COMMIT>(err);
1813
0
        ss << "failed to commit kv txn, txn_id=" << txn_info.txn_id() << " err=" << err;
1814
0
        msg = ss.str();
1815
0
        return -1;
1816
0
    }
1817
1818
2
    LOG(INFO) << "finish abort txn for related rowset, txn_id=" << txn_id
1819
2
              << " instance_id=" << instance_id_ << " txn_info=" << txn_info.ShortDebugString()
1820
2
              << " code=" << code << " msg=" << msg;
1821
1822
2
    return 0;
1823
2
}
1824
1825
4
int InstanceRecycler::abort_job_for_related_rowset(const RowsetMetaCloudPB& rowset_meta) {
1826
4
    FinishTabletJobRequest req;
1827
4
    FinishTabletJobResponse res;
1828
4
    req.set_action(FinishTabletJobRequest::ABORT);
1829
4
    MetaServiceCode code = MetaServiceCode::OK;
1830
4
    std::string msg;
1831
4
    std::stringstream ss;
1832
1833
4
    TabletIndexPB tablet_idx;
1834
4
    int ret = get_tablet_idx(txn_kv_.get(), instance_id_, rowset_meta.tablet_id(), tablet_idx);
1835
4
    if (ret == 1) {
1836
        // tablet maybe recycled, directly return 0
1837
1
        return 0;
1838
3
    } else if (ret != 0) {
1839
0
        LOG(WARNING) << "failed to get tablet index, tablet_id=" << rowset_meta.tablet_id()
1840
0
                     << " instance_id=" << instance_id_ << " ret=" << ret;
1841
0
        return ret;
1842
0
    }
1843
1844
3
    std::unique_ptr<Transaction> txn;
1845
3
    TxnErrorCode err = txn_kv_->create_txn(&txn);
1846
3
    if (err != TxnErrorCode::TXN_OK) {
1847
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id_ << " err=" << err;
1848
0
        return -1;
1849
0
    }
1850
1851
3
    std::string job_key =
1852
3
            job_tablet_key({instance_id_, tablet_idx.table_id(), tablet_idx.index_id(),
1853
3
                            tablet_idx.partition_id(), tablet_idx.tablet_id()});
1854
3
    std::string job_val;
1855
3
    err = txn->get(job_key, &job_val);
1856
3
    if (err != TxnErrorCode::TXN_OK) {
1857
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1858
0
            LOG(INFO) << "job not exists, instance_id=" << instance_id_
1859
0
                      << " tablet_id=" << tablet_idx.tablet_id() << " key=" << hex(job_key);
1860
0
            return 0;
1861
0
        }
1862
0
        LOG(WARNING) << "failed to get job, instance_id=" << instance_id_
1863
0
                     << " tablet_id=" << tablet_idx.tablet_id() << " err=" << err
1864
0
                     << " key=" << hex(job_key);
1865
0
        return -1;
1866
0
    }
1867
1868
3
    TabletJobInfoPB job_pb;
1869
3
    if (!job_pb.ParseFromString(job_val)) {
1870
0
        LOG(WARNING) << "failed to parse job, instance_id=" << instance_id_
1871
0
                     << " tablet_id=" << tablet_idx.tablet_id() << " key=" << hex(job_key);
1872
0
        return -1;
1873
0
    }
1874
1875
3
    std::string job_id {};
1876
3
    if (!job_pb.compaction().empty()) {
1877
2
        for (const auto& c : job_pb.compaction()) {
1878
2
            if (c.id() == rowset_meta.job_id()) {
1879
2
                job_id = c.id();
1880
2
                break;
1881
2
            }
1882
2
        }
1883
2
    } else if (job_pb.has_schema_change()) {
1884
1
        job_id = job_pb.schema_change().id();
1885
1
    }
1886
1887
3
    if (!job_id.empty() && rowset_meta.job_id() == job_id) {
1888
3
        LOG(INFO) << "begin to abort job for related rowset, job_id=" << rowset_meta.job_id()
1889
3
                  << " instance_id=" << instance_id_ << " tablet_id=" << tablet_idx.tablet_id();
1890
3
        req.mutable_job()->CopyFrom(job_pb);
1891
3
        req.set_action(FinishTabletJobRequest::ABORT);
1892
3
        _finish_tablet_job(&req, &res, instance_id_, txn, txn_kv_.get(),
1893
3
                           delete_bitmap_lock_white_list_.get(), resource_mgr_.get(), code, msg,
1894
3
                           ss);
1895
3
        if (code != MetaServiceCode::OK) {
1896
0
            LOG(WARNING) << "failed to abort job, instance_id=" << instance_id_
1897
0
                         << " tablet_id=" << tablet_idx.tablet_id() << " code=" << code
1898
0
                         << " msg=" << msg;
1899
0
            return -1;
1900
0
        }
1901
3
        LOG(INFO) << "finish abort job for related rowset, job_id=" << rowset_meta.job_id()
1902
3
                  << " instance_id=" << instance_id_ << " tablet_id=" << tablet_idx.tablet_id()
1903
3
                  << " code=" << code << " msg=" << msg;
1904
3
    } else {
1905
        // clang-format off
1906
0
        LOG(INFO) << "there is no job for related rowset, directly recycle rowset data"
1907
0
                  << ", instance_id=" << instance_id_ 
1908
0
                  << ", tablet_id=" << tablet_idx.tablet_id() 
1909
0
                  << ", job_id=" << job_id
1910
0
                  << ", rowset_id=" << rowset_meta.rowset_id_v2();
1911
        // clang-format on
1912
0
    }
1913
1914
3
    return 0;
1915
3
}
1916
1917
template <typename T>
1918
55.7k
RowsetMetaCloudPB* mutable_rowset_meta(T& rowset_meta_pb) {
1919
55.7k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1920
51.9k
        return rowset_meta_pb.mutable_rowset_meta();
1921
51.9k
    } else {
1922
51.9k
        return &rowset_meta_pb;
1923
51.9k
    }
1924
55.7k
}
_ZN5doris5cloud19mutable_rowset_metaINS0_15RecycleRowsetPBEEEPNS_17RowsetMetaCloudPBERT_
Line
Count
Source
1918
3.78k
RowsetMetaCloudPB* mutable_rowset_meta(T& rowset_meta_pb) {
1919
3.78k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1920
3.78k
        return rowset_meta_pb.mutable_rowset_meta();
1921
3.78k
    } else {
1922
3.78k
        return &rowset_meta_pb;
1923
3.78k
    }
1924
3.78k
}
_ZN5doris5cloud19mutable_rowset_metaINS_17RowsetMetaCloudPBEEEPS2_RT_
Line
Count
Source
1918
51.9k
RowsetMetaCloudPB* mutable_rowset_meta(T& rowset_meta_pb) {
1919
51.9k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1920
51.9k
        return rowset_meta_pb.mutable_rowset_meta();
1921
51.9k
    } else {
1922
51.9k
        return &rowset_meta_pb;
1923
51.9k
    }
1924
51.9k
}
1925
1926
template <typename T>
1927
224k
const RowsetMetaCloudPB& rowset_meta(const T& rowset_meta_pb) {
1928
224k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1929
212k
        return rowset_meta_pb.rowset_meta();
1930
212k
    } else {
1931
212k
        return rowset_meta_pb;
1932
212k
    }
1933
224k
}
_ZN5doris5cloud11rowset_metaINS0_15RecycleRowsetPBEEERKNS_17RowsetMetaCloudPBERKT_
Line
Count
Source
1927
12.0k
const RowsetMetaCloudPB& rowset_meta(const T& rowset_meta_pb) {
1928
12.0k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1929
12.0k
        return rowset_meta_pb.rowset_meta();
1930
12.0k
    } else {
1931
12.0k
        return rowset_meta_pb;
1932
12.0k
    }
1933
12.0k
}
_ZN5doris5cloud11rowset_metaINS_17RowsetMetaCloudPBEEERKS2_RKT_
Line
Count
Source
1927
212k
const RowsetMetaCloudPB& rowset_meta(const T& rowset_meta_pb) {
1928
212k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1929
212k
        return rowset_meta_pb.rowset_meta();
1930
212k
    } else {
1931
212k
        return rowset_meta_pb;
1932
212k
    }
1933
212k
}
1934
1935
struct DeferredRecycleAbortTask {
1936
    enum class Type : uint8_t {
1937
        TXN,
1938
        JOB,
1939
    };
1940
1941
    Type type = Type::TXN;
1942
    int64_t txn_id = 0;
1943
    int64_t tablet_id = 0;
1944
    int64_t start_version = 0;
1945
    int64_t end_version = 0;
1946
    std::string rowset_id;
1947
    std::string job_id;
1948
};
1949
1950
struct DeferredRecyclePrepareDeleteTask {
1951
    std::string key;
1952
    std::string resource_id;
1953
    std::string rowset_id;
1954
    int64_t tablet_id = 0;
1955
};
1956
1957
template <typename T>
1958
57.8k
std::optional<DeferredRecycleAbortTask> make_deferred_abort_task(const T& rowset_meta_pb) {
1959
57.8k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1960
3.79k
        if (rowset_meta_pb.type() != RecycleRowsetPB::PREPARE) {
1961
3.13k
            return std::nullopt;
1962
3.13k
        }
1963
3.79k
    }
1964
1965
654
    const auto& rs_meta = rowset_meta(rowset_meta_pb);
1966
654
    DeferredRecycleAbortTask task;
1967
654
    task.tablet_id = rs_meta.tablet_id();
1968
654
    task.start_version = rs_meta.start_version();
1969
654
    task.end_version = rs_meta.end_version();
1970
54.6k
    if (rs_meta.has_load_id()) {
1971
4
        task.type = DeferredRecycleAbortTask::Type::TXN;
1972
4
        task.txn_id = rs_meta.txn_id();
1973
4
        return task;
1974
4
    }
1975
54.6k
    if (rs_meta.has_job_id()) {
1976
6
        task.type = DeferredRecycleAbortTask::Type::JOB;
1977
6
        task.rowset_id = rs_meta.rowset_id_v2();
1978
6
        task.job_id = rs_meta.job_id();
1979
6
        return task;
1980
6
    }
1981
54.6k
    return std::nullopt;
1982
54.6k
}
_ZN5doris5cloud24make_deferred_abort_taskINS0_15RecycleRowsetPBEEESt8optionalINS0_24DeferredRecycleAbortTaskEERKT_
Line
Count
Source
1958
3.79k
std::optional<DeferredRecycleAbortTask> make_deferred_abort_task(const T& rowset_meta_pb) {
1959
3.79k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1960
3.79k
        if (rowset_meta_pb.type() != RecycleRowsetPB::PREPARE) {
1961
3.13k
            return std::nullopt;
1962
3.13k
        }
1963
3.79k
    }
1964
1965
654
    const auto& rs_meta = rowset_meta(rowset_meta_pb);
1966
654
    DeferredRecycleAbortTask task;
1967
654
    task.tablet_id = rs_meta.tablet_id();
1968
654
    task.start_version = rs_meta.start_version();
1969
654
    task.end_version = rs_meta.end_version();
1970
654
    if (rs_meta.has_load_id()) {
1971
2
        task.type = DeferredRecycleAbortTask::Type::TXN;
1972
2
        task.txn_id = rs_meta.txn_id();
1973
2
        return task;
1974
2
    }
1975
652
    if (rs_meta.has_job_id()) {
1976
2
        task.type = DeferredRecycleAbortTask::Type::JOB;
1977
2
        task.rowset_id = rs_meta.rowset_id_v2();
1978
2
        task.job_id = rs_meta.job_id();
1979
2
        return task;
1980
2
    }
1981
650
    return std::nullopt;
1982
652
}
_ZN5doris5cloud24make_deferred_abort_taskINS_17RowsetMetaCloudPBEEESt8optionalINS0_24DeferredRecycleAbortTaskEERKT_
Line
Count
Source
1958
54.0k
std::optional<DeferredRecycleAbortTask> make_deferred_abort_task(const T& rowset_meta_pb) {
1959
54.0k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1960
54.0k
        if (rowset_meta_pb.type() != RecycleRowsetPB::PREPARE) {
1961
54.0k
            return std::nullopt;
1962
54.0k
        }
1963
54.0k
    }
1964
1965
54.0k
    const auto& rs_meta = rowset_meta(rowset_meta_pb);
1966
54.0k
    DeferredRecycleAbortTask task;
1967
54.0k
    task.tablet_id = rs_meta.tablet_id();
1968
54.0k
    task.start_version = rs_meta.start_version();
1969
54.0k
    task.end_version = rs_meta.end_version();
1970
54.0k
    if (rs_meta.has_load_id()) {
1971
2
        task.type = DeferredRecycleAbortTask::Type::TXN;
1972
2
        task.txn_id = rs_meta.txn_id();
1973
2
        return task;
1974
2
    }
1975
54.0k
    if (rs_meta.has_job_id()) {
1976
4
        task.type = DeferredRecycleAbortTask::Type::JOB;
1977
4
        task.rowset_id = rs_meta.rowset_id_v2();
1978
4
        task.job_id = rs_meta.job_id();
1979
4
        return task;
1980
4
    }
1981
54.0k
    return std::nullopt;
1982
54.0k
}
1983
1984
template <typename T>
1985
169k
bool need_mark_rowset_as_recycled(const T& rowset_meta_pb) {
1986
169k
    const auto& rs_meta = rowset_meta(rowset_meta_pb);
1987
169k
    return !rs_meta.has_is_recycled() || !rs_meta.is_recycled();
1988
169k
}
_ZN5doris5cloud28need_mark_rowset_as_recycledINS0_15RecycleRowsetPBEEEbRKT_
Line
Count
Source
1985
11.3k
bool need_mark_rowset_as_recycled(const T& rowset_meta_pb) {
1986
11.3k
    const auto& rs_meta = rowset_meta(rowset_meta_pb);
1987
11.3k
    return !rs_meta.has_is_recycled() || !rs_meta.is_recycled();
1988
11.3k
}
_ZN5doris5cloud28need_mark_rowset_as_recycledINS_17RowsetMetaCloudPBEEEbRKT_
Line
Count
Source
1985
158k
bool need_mark_rowset_as_recycled(const T& rowset_meta_pb) {
1986
158k
    const auto& rs_meta = rowset_meta(rowset_meta_pb);
1987
158k
    return !rs_meta.has_is_recycled() || !rs_meta.is_recycled();
1988
158k
}
1989
1990
template <typename T>
1991
int batch_mark_rowsets_as_recycled(TxnKv* txn_kv, const std::string& instance_id,
1992
53
                                   const std::vector<std::string>& keys) {
1993
53
    std::unique_ptr<Transaction> txn;
1994
53
    TxnErrorCode err = txn_kv->create_txn(&txn);
1995
53
    if (err != TxnErrorCode::TXN_OK) {
1996
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
1997
0
        return -1;
1998
0
    }
1999
53
    std::vector<std::optional<std::string>> values;
2000
53
    err = txn->batch_get(&values, keys);
2001
53
    if (err != TxnErrorCode::TXN_OK) {
2002
0
        LOG(WARNING) << "failed to batch get rowset meta, instance_id=" << instance_id << ' '
2003
0
                     << "keys size=" << keys.size() << ' ' << "err=" << err;
2004
0
        return -1;
2005
0
    }
2006
53
    size_t total_keys = keys.size();
2007
55.8k
    for (size_t i = 0; i < total_keys; i++) {
2008
55.8k
        if (!values[i].has_value()) {
2009
            // has already been removed by commit_rowset
2010
0
            continue;
2011
0
        }
2012
55.8k
        auto key = keys[i];
2013
55.8k
        auto val = values[i].value();
2014
55.8k
        T rowset_meta_pb;
2015
55.8k
        if (!rowset_meta_pb.ParseFromString(val)) {
2016
0
            LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id
2017
0
                         << " key=" << hex(key);
2018
0
            return -1;
2019
0
        }
2020
55.8k
        if (!need_mark_rowset_as_recycled(rowset_meta_pb)) {
2021
0
            continue;
2022
0
        }
2023
55.8k
        mutable_rowset_meta(rowset_meta_pb)->set_is_recycled(true);
2024
55.8k
        val.clear();
2025
55.8k
        rowset_meta_pb.SerializeToString(&val);
2026
55.8k
        txn->put(key, val);
2027
55.8k
    }
2028
53
    err = txn->commit();
2029
53
    if (err != TxnErrorCode::TXN_OK) {
2030
0
        LOG(WARNING) << "failed to commit txn, instance_id=" << instance_id;
2031
0
        return -1;
2032
0
    }
2033
2034
53
    return 0;
2035
53
}
_ZN5doris5cloud30batch_mark_rowsets_as_recycledINS0_15RecycleRowsetPBEEEiPNS0_5TxnKvERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKSt6vectorISA_SaISA_EE
Line
Count
Source
1992
37
                                   const std::vector<std::string>& keys) {
1993
37
    std::unique_ptr<Transaction> txn;
1994
37
    TxnErrorCode err = txn_kv->create_txn(&txn);
1995
37
    if (err != TxnErrorCode::TXN_OK) {
1996
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
1997
0
        return -1;
1998
0
    }
1999
37
    std::vector<std::optional<std::string>> values;
2000
37
    err = txn->batch_get(&values, keys);
2001
37
    if (err != TxnErrorCode::TXN_OK) {
2002
0
        LOG(WARNING) << "failed to batch get rowset meta, instance_id=" << instance_id << ' '
2003
0
                     << "keys size=" << keys.size() << ' ' << "err=" << err;
2004
0
        return -1;
2005
0
    }
2006
37
    size_t total_keys = keys.size();
2007
3.82k
    for (size_t i = 0; i < total_keys; i++) {
2008
3.78k
        if (!values[i].has_value()) {
2009
            // has already been removed by commit_rowset
2010
0
            continue;
2011
0
        }
2012
3.78k
        auto key = keys[i];
2013
3.78k
        auto val = values[i].value();
2014
3.78k
        T rowset_meta_pb;
2015
3.78k
        if (!rowset_meta_pb.ParseFromString(val)) {
2016
0
            LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id
2017
0
                         << " key=" << hex(key);
2018
0
            return -1;
2019
0
        }
2020
3.78k
        if (!need_mark_rowset_as_recycled(rowset_meta_pb)) {
2021
0
            continue;
2022
0
        }
2023
3.78k
        mutable_rowset_meta(rowset_meta_pb)->set_is_recycled(true);
2024
3.78k
        val.clear();
2025
3.78k
        rowset_meta_pb.SerializeToString(&val);
2026
3.78k
        txn->put(key, val);
2027
3.78k
    }
2028
37
    err = txn->commit();
2029
37
    if (err != TxnErrorCode::TXN_OK) {
2030
0
        LOG(WARNING) << "failed to commit txn, instance_id=" << instance_id;
2031
0
        return -1;
2032
0
    }
2033
2034
37
    return 0;
2035
37
}
_ZN5doris5cloud30batch_mark_rowsets_as_recycledINS_17RowsetMetaCloudPBEEEiPNS0_5TxnKvERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKSt6vectorISA_SaISA_EE
Line
Count
Source
1992
16
                                   const std::vector<std::string>& keys) {
1993
16
    std::unique_ptr<Transaction> txn;
1994
16
    TxnErrorCode err = txn_kv->create_txn(&txn);
1995
16
    if (err != TxnErrorCode::TXN_OK) {
1996
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
1997
0
        return -1;
1998
0
    }
1999
16
    std::vector<std::optional<std::string>> values;
2000
16
    err = txn->batch_get(&values, keys);
2001
16
    if (err != TxnErrorCode::TXN_OK) {
2002
0
        LOG(WARNING) << "failed to batch get rowset meta, instance_id=" << instance_id << ' '
2003
0
                     << "keys size=" << keys.size() << ' ' << "err=" << err;
2004
0
        return -1;
2005
0
    }
2006
16
    size_t total_keys = keys.size();
2007
52.0k
    for (size_t i = 0; i < total_keys; i++) {
2008
52.0k
        if (!values[i].has_value()) {
2009
            // has already been removed by commit_rowset
2010
0
            continue;
2011
0
        }
2012
52.0k
        auto key = keys[i];
2013
52.0k
        auto val = values[i].value();
2014
52.0k
        T rowset_meta_pb;
2015
52.0k
        if (!rowset_meta_pb.ParseFromString(val)) {
2016
0
            LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id
2017
0
                         << " key=" << hex(key);
2018
0
            return -1;
2019
0
        }
2020
52.0k
        if (!need_mark_rowset_as_recycled(rowset_meta_pb)) {
2021
0
            continue;
2022
0
        }
2023
52.0k
        mutable_rowset_meta(rowset_meta_pb)->set_is_recycled(true);
2024
52.0k
        val.clear();
2025
52.0k
        rowset_meta_pb.SerializeToString(&val);
2026
52.0k
        txn->put(key, val);
2027
52.0k
    }
2028
16
    err = txn->commit();
2029
16
    if (err != TxnErrorCode::TXN_OK) {
2030
0
        LOG(WARNING) << "failed to commit txn, instance_id=" << instance_id;
2031
0
        return -1;
2032
0
    }
2033
2034
16
    return 0;
2035
16
}
2036
2037
template <typename T>
2038
int collect_deferred_abort_tasks(TxnKv* txn_kv, const std::string& instance_id,
2039
                                 const std::vector<std::string>& keys,
2040
                                 std::vector<DeferredRecycleAbortTask>* abort_tasks,
2041
5
                                 bool skip_base_version) {
2042
5
    constexpr size_t kAbortCheckBatchSize = 256;
2043
10
    for (size_t offset = 0; offset < keys.size(); offset += kAbortCheckBatchSize) {
2044
5
        size_t limit = std::min(keys.size(), offset + kAbortCheckBatchSize);
2045
5
        std::unique_ptr<Transaction> txn;
2046
5
        TxnErrorCode err = txn_kv->create_txn(&txn);
2047
5
        if (err != TxnErrorCode::TXN_OK) {
2048
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
2049
0
            return -1;
2050
0
        }
2051
10
        for (size_t idx = offset; idx < limit; ++idx) {
2052
5
            const std::string& key = keys[idx];
2053
5
            std::string val;
2054
5
            err = txn->get(key, &val);
2055
5
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
2056
                // has already been removed
2057
0
                continue;
2058
0
            }
2059
5
            if (err != TxnErrorCode::TXN_OK) {
2060
0
                LOG(WARNING) << "failed to get rowset meta, instance_id=" << instance_id
2061
0
                             << " key=" << hex(key);
2062
0
                return -1;
2063
0
            }
2064
5
            T rowset_meta_pb;
2065
5
            if (!rowset_meta_pb.ParseFromString(val)) {
2066
0
                LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id
2067
0
                             << " key=" << hex(key);
2068
0
                return -1;
2069
0
            }
2070
5
            if (skip_base_version && rowset_meta(rowset_meta_pb).end_version() == 1) {
2071
0
                continue;
2072
0
            }
2073
5
            if (auto abort_task = make_deferred_abort_task(rowset_meta_pb);
2074
5
                abort_task.has_value()) {
2075
5
                abort_tasks->emplace_back(std::move(*abort_task));
2076
5
            }
2077
5
        }
2078
5
    }
2079
5
    return 0;
2080
5
}
_ZN5doris5cloud28collect_deferred_abort_tasksINS0_15RecycleRowsetPBEEEiPNS0_5TxnKvERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKSt6vectorISA_SaISA_EEPSD_INS0_24DeferredRecycleAbortTaskESaISI_EEb
Line
Count
Source
2041
2
                                 bool skip_base_version) {
2042
2
    constexpr size_t kAbortCheckBatchSize = 256;
2043
4
    for (size_t offset = 0; offset < keys.size(); offset += kAbortCheckBatchSize) {
2044
2
        size_t limit = std::min(keys.size(), offset + kAbortCheckBatchSize);
2045
2
        std::unique_ptr<Transaction> txn;
2046
2
        TxnErrorCode err = txn_kv->create_txn(&txn);
2047
2
        if (err != TxnErrorCode::TXN_OK) {
2048
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
2049
0
            return -1;
2050
0
        }
2051
4
        for (size_t idx = offset; idx < limit; ++idx) {
2052
2
            const std::string& key = keys[idx];
2053
2
            std::string val;
2054
2
            err = txn->get(key, &val);
2055
2
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
2056
                // has already been removed
2057
0
                continue;
2058
0
            }
2059
2
            if (err != TxnErrorCode::TXN_OK) {
2060
0
                LOG(WARNING) << "failed to get rowset meta, instance_id=" << instance_id
2061
0
                             << " key=" << hex(key);
2062
0
                return -1;
2063
0
            }
2064
2
            T rowset_meta_pb;
2065
2
            if (!rowset_meta_pb.ParseFromString(val)) {
2066
0
                LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id
2067
0
                             << " key=" << hex(key);
2068
0
                return -1;
2069
0
            }
2070
2
            if (skip_base_version && rowset_meta(rowset_meta_pb).end_version() == 1) {
2071
0
                continue;
2072
0
            }
2073
2
            if (auto abort_task = make_deferred_abort_task(rowset_meta_pb);
2074
2
                abort_task.has_value()) {
2075
2
                abort_tasks->emplace_back(std::move(*abort_task));
2076
2
            }
2077
2
        }
2078
2
    }
2079
2
    return 0;
2080
2
}
_ZN5doris5cloud28collect_deferred_abort_tasksINS_17RowsetMetaCloudPBEEEiPNS0_5TxnKvERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKSt6vectorISA_SaISA_EEPSD_INS0_24DeferredRecycleAbortTaskESaISI_EEb
Line
Count
Source
2041
3
                                 bool skip_base_version) {
2042
3
    constexpr size_t kAbortCheckBatchSize = 256;
2043
6
    for (size_t offset = 0; offset < keys.size(); offset += kAbortCheckBatchSize) {
2044
3
        size_t limit = std::min(keys.size(), offset + kAbortCheckBatchSize);
2045
3
        std::unique_ptr<Transaction> txn;
2046
3
        TxnErrorCode err = txn_kv->create_txn(&txn);
2047
3
        if (err != TxnErrorCode::TXN_OK) {
2048
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
2049
0
            return -1;
2050
0
        }
2051
6
        for (size_t idx = offset; idx < limit; ++idx) {
2052
3
            const std::string& key = keys[idx];
2053
3
            std::string val;
2054
3
            err = txn->get(key, &val);
2055
3
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
2056
                // has already been removed
2057
0
                continue;
2058
0
            }
2059
3
            if (err != TxnErrorCode::TXN_OK) {
2060
0
                LOG(WARNING) << "failed to get rowset meta, instance_id=" << instance_id
2061
0
                             << " key=" << hex(key);
2062
0
                return -1;
2063
0
            }
2064
3
            T rowset_meta_pb;
2065
3
            if (!rowset_meta_pb.ParseFromString(val)) {
2066
0
                LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id
2067
0
                             << " key=" << hex(key);
2068
0
                return -1;
2069
0
            }
2070
3
            if (skip_base_version && rowset_meta(rowset_meta_pb).end_version() == 1) {
2071
0
                continue;
2072
0
            }
2073
3
            if (auto abort_task = make_deferred_abort_task(rowset_meta_pb);
2074
3
                abort_task.has_value()) {
2075
3
                abort_tasks->emplace_back(std::move(*abort_task));
2076
3
            }
2077
3
        }
2078
3
    }
2079
3
    return 0;
2080
3
}
2081
2082
template <typename T>
2083
int InstanceRecycler::batch_abort_txn_or_job_for_recycle(const std::vector<std::string>& keys,
2084
5
                                                         bool skip_base_version) {
2085
5
    std::vector<DeferredRecycleAbortTask> abort_tasks;
2086
5
    if (collect_deferred_abort_tasks<T>(txn_kv_.get(), instance_id_, keys, &abort_tasks,
2087
5
                                        skip_base_version) != 0) {
2088
0
        LOG(WARNING) << "failed to collect rowset abort tasks, instance_id=" << instance_id_;
2089
0
        return -1;
2090
0
    }
2091
5
    for (const auto& abort_task : abort_tasks) {
2092
5
        LOG(INFO) << "begin to abort txn or job for related rowset, instance_id=" << instance_id_
2093
5
                  << " tablet_id=" << abort_task.tablet_id << " version=["
2094
5
                  << abort_task.start_version << '-' << abort_task.end_version << "]";
2095
5
        int abort_ret = 0;
2096
5
        if (abort_task.type == DeferredRecycleAbortTask::Type::TXN) {
2097
2
            abort_ret = abort_txn_for_related_rowset(abort_task.txn_id);
2098
3
        } else {
2099
3
            RowsetMetaCloudPB rowset_meta;
2100
3
            rowset_meta.set_tablet_id(abort_task.tablet_id);
2101
3
            rowset_meta.set_rowset_id_v2(abort_task.rowset_id);
2102
3
            rowset_meta.set_job_id(abort_task.job_id);
2103
3
            abort_ret = abort_job_for_related_rowset(rowset_meta);
2104
3
        }
2105
5
        if (abort_ret != 0) {
2106
0
            LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
2107
0
                         << instance_id_ << " tablet_id=" << abort_task.tablet_id << " version=["
2108
0
                         << abort_task.start_version << '-' << abort_task.end_version << "]";
2109
0
            return abort_ret;
2110
0
        }
2111
5
    }
2112
5
    return 0;
2113
5
}
_ZN5doris5cloud16InstanceRecycler34batch_abort_txn_or_job_for_recycleINS0_15RecycleRowsetPBEEEiRKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaISA_EEb
Line
Count
Source
2084
2
                                                         bool skip_base_version) {
2085
2
    std::vector<DeferredRecycleAbortTask> abort_tasks;
2086
2
    if (collect_deferred_abort_tasks<T>(txn_kv_.get(), instance_id_, keys, &abort_tasks,
2087
2
                                        skip_base_version) != 0) {
2088
0
        LOG(WARNING) << "failed to collect rowset abort tasks, instance_id=" << instance_id_;
2089
0
        return -1;
2090
0
    }
2091
2
    for (const auto& abort_task : abort_tasks) {
2092
2
        LOG(INFO) << "begin to abort txn or job for related rowset, instance_id=" << instance_id_
2093
2
                  << " tablet_id=" << abort_task.tablet_id << " version=["
2094
2
                  << abort_task.start_version << '-' << abort_task.end_version << "]";
2095
2
        int abort_ret = 0;
2096
2
        if (abort_task.type == DeferredRecycleAbortTask::Type::TXN) {
2097
1
            abort_ret = abort_txn_for_related_rowset(abort_task.txn_id);
2098
1
        } else {
2099
1
            RowsetMetaCloudPB rowset_meta;
2100
1
            rowset_meta.set_tablet_id(abort_task.tablet_id);
2101
1
            rowset_meta.set_rowset_id_v2(abort_task.rowset_id);
2102
1
            rowset_meta.set_job_id(abort_task.job_id);
2103
1
            abort_ret = abort_job_for_related_rowset(rowset_meta);
2104
1
        }
2105
2
        if (abort_ret != 0) {
2106
0
            LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
2107
0
                         << instance_id_ << " tablet_id=" << abort_task.tablet_id << " version=["
2108
0
                         << abort_task.start_version << '-' << abort_task.end_version << "]";
2109
0
            return abort_ret;
2110
0
        }
2111
2
    }
2112
2
    return 0;
2113
2
}
_ZN5doris5cloud16InstanceRecycler34batch_abort_txn_or_job_for_recycleINS_17RowsetMetaCloudPBEEEiRKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaISA_EEb
Line
Count
Source
2084
3
                                                         bool skip_base_version) {
2085
3
    std::vector<DeferredRecycleAbortTask> abort_tasks;
2086
3
    if (collect_deferred_abort_tasks<T>(txn_kv_.get(), instance_id_, keys, &abort_tasks,
2087
3
                                        skip_base_version) != 0) {
2088
0
        LOG(WARNING) << "failed to collect rowset abort tasks, instance_id=" << instance_id_;
2089
0
        return -1;
2090
0
    }
2091
3
    for (const auto& abort_task : abort_tasks) {
2092
3
        LOG(INFO) << "begin to abort txn or job for related rowset, instance_id=" << instance_id_
2093
3
                  << " tablet_id=" << abort_task.tablet_id << " version=["
2094
3
                  << abort_task.start_version << '-' << abort_task.end_version << "]";
2095
3
        int abort_ret = 0;
2096
3
        if (abort_task.type == DeferredRecycleAbortTask::Type::TXN) {
2097
1
            abort_ret = abort_txn_for_related_rowset(abort_task.txn_id);
2098
2
        } else {
2099
2
            RowsetMetaCloudPB rowset_meta;
2100
2
            rowset_meta.set_tablet_id(abort_task.tablet_id);
2101
2
            rowset_meta.set_rowset_id_v2(abort_task.rowset_id);
2102
2
            rowset_meta.set_job_id(abort_task.job_id);
2103
2
            abort_ret = abort_job_for_related_rowset(rowset_meta);
2104
2
        }
2105
3
        if (abort_ret != 0) {
2106
0
            LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
2107
0
                         << instance_id_ << " tablet_id=" << abort_task.tablet_id << " version=["
2108
0
                         << abort_task.start_version << '-' << abort_task.end_version << "]";
2109
0
            return abort_ret;
2110
0
        }
2111
3
    }
2112
3
    return 0;
2113
3
}
2114
2115
int collect_prepare_delete_tasks(TxnKv* txn_kv, const std::string& instance_id,
2116
                                 const std::vector<std::string>& keys,
2117
0
                                 std::vector<DeferredRecyclePrepareDeleteTask>* delete_tasks) {
2118
0
    constexpr size_t kPrepareCheckBatchSize = 256;
2119
0
    for (size_t offset = 0; offset < keys.size(); offset += kPrepareCheckBatchSize) {
2120
0
        size_t limit = std::min(keys.size(), offset + kPrepareCheckBatchSize);
2121
0
        std::unique_ptr<Transaction> txn;
2122
0
        TxnErrorCode err = txn_kv->create_txn(&txn);
2123
0
        if (err != TxnErrorCode::TXN_OK) {
2124
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
2125
0
            return -1;
2126
0
        }
2127
0
        for (size_t idx = offset; idx < limit; ++idx) {
2128
0
            const std::string& key = keys[idx];
2129
0
            std::string val;
2130
0
            err = txn->get(key, &val);
2131
0
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
2132
                // has already been removed
2133
0
                continue;
2134
0
            }
2135
0
            if (err != TxnErrorCode::TXN_OK) {
2136
0
                LOG(WARNING) << "failed to get recycle rowset, instance_id=" << instance_id
2137
0
                             << " key=" << hex(key);
2138
0
                return -1;
2139
0
            }
2140
0
            RecycleRowsetPB rowset;
2141
0
            if (!rowset.ParseFromString(val)) {
2142
0
                LOG(WARNING) << "failed to parse recycle rowset, instance_id=" << instance_id
2143
0
                             << " key=" << hex(key);
2144
0
                return -1;
2145
0
            }
2146
0
            if (rowset.type() != RecycleRowsetPB::PREPARE) {
2147
0
                continue;
2148
0
            }
2149
0
            const auto& rs_meta = rowset.rowset_meta();
2150
0
            delete_tasks->push_back(
2151
0
                    {key, rs_meta.resource_id(), rs_meta.rowset_id_v2(), rs_meta.tablet_id()});
2152
0
        }
2153
0
    }
2154
0
    return 0;
2155
0
}
2156
2157
1
int InstanceRecycler::recycle_ref_rowsets(bool* has_unrecycled_rowsets) {
2158
1
    const std::string task_name = "recycle_ref_rowsets";
2159
1
    *has_unrecycled_rowsets = false;
2160
2161
1
    std::string data_rowset_ref_count_key_start =
2162
1
            versioned::data_rowset_ref_count_key({instance_id_, 0, ""});
2163
1
    std::string data_rowset_ref_count_key_end =
2164
1
            versioned::data_rowset_ref_count_key({instance_id_, INT64_MAX, ""});
2165
2166
1
    LOG_WARNING("begin to recycle ref rowsets").tag("instance_id", instance_id_);
2167
2168
1
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2169
1
    register_recycle_task(task_name, start_time);
2170
2171
1
    DORIS_CLOUD_DEFER {
2172
1
        unregister_recycle_task(task_name);
2173
1
        int64_t cost =
2174
1
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2175
1
        LOG_WARNING("recycle ref rowsets finished, cost={}s", cost)
2176
1
                .tag("instance_id", instance_id_);
2177
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_0clEv
Line
Count
Source
2171
1
    DORIS_CLOUD_DEFER {
2172
1
        unregister_recycle_task(task_name);
2173
1
        int64_t cost =
2174
1
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2175
1
        LOG_WARNING("recycle ref rowsets finished, cost={}s", cost)
2176
1
                .tag("instance_id", instance_id_);
2177
1
    };
2178
2179
    // Phase 1: Scan to collect all tablet_ids that have rowset ref counts
2180
1
    std::set<int64_t> tablets_with_refs;
2181
1
    int64_t num_scanned = 0;
2182
2183
1
    auto scan_func = [&](std::string_view k, std::string_view v) -> int {
2184
0
        ++num_scanned;
2185
0
        int64_t tablet_id;
2186
0
        std::string rowset_id;
2187
0
        std::string_view key(k);
2188
0
        if (!versioned::decode_data_rowset_ref_count_key(&key, &tablet_id, &rowset_id)) {
2189
0
            LOG_WARNING("failed to decode data rowset ref count key").tag("key", hex(k));
2190
0
            return 0; // Continue scanning
2191
0
        }
2192
2193
0
        tablets_with_refs.insert(tablet_id);
2194
0
        return 0;
2195
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES7_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES7_
2196
2197
1
    if (scan_and_recycle(data_rowset_ref_count_key_start, data_rowset_ref_count_key_end,
2198
1
                         std::move(scan_func)) != 0) {
2199
0
        LOG_WARNING("failed to scan data rowset ref count keys");
2200
0
        return -1;
2201
0
    }
2202
2203
1
    LOG_INFO("collected {} tablets with rowset refs, scanned {} ref count keys",
2204
1
             tablets_with_refs.size(), num_scanned)
2205
1
            .tag("instance_id", instance_id_);
2206
2207
    // Phase 2: Recycle each tablet
2208
1
    int64_t num_recycled_tablets = 0;
2209
1
    for (int64_t tablet_id : tablets_with_refs) {
2210
0
        if (stopped()) {
2211
0
            LOG_INFO("recycler stopped, skip remaining tablets")
2212
0
                    .tag("instance_id", instance_id_)
2213
0
                    .tag("tablets_processed", num_recycled_tablets)
2214
0
                    .tag("tablets_remaining", tablets_with_refs.size() - num_recycled_tablets);
2215
0
            break;
2216
0
        }
2217
2218
0
        RecyclerMetricsContext metrics_context(instance_id_, task_name);
2219
0
        if (recycle_versioned_tablet(tablet_id, metrics_context) != 0) {
2220
0
            LOG_WARNING("failed to recycle tablet")
2221
0
                    .tag("instance_id", instance_id_)
2222
0
                    .tag("tablet_id", tablet_id);
2223
0
            return -1;
2224
0
        }
2225
0
        ++num_recycled_tablets;
2226
0
    }
2227
2228
1
    LOG_INFO("recycled {} tablets", num_recycled_tablets)
2229
1
            .tag("instance_id", instance_id_)
2230
1
            .tag("total_tablets", tablets_with_refs.size());
2231
2232
    // Phase 3: Scan again to check if any ref count keys still exist
2233
1
    std::unique_ptr<Transaction> txn;
2234
1
    TxnErrorCode err = txn_kv_->create_txn(&txn);
2235
1
    if (err != TxnErrorCode::TXN_OK) {
2236
0
        LOG_WARNING("failed to create txn for final check")
2237
0
                .tag("instance_id", instance_id_)
2238
0
                .tag("err", err);
2239
0
        return -1;
2240
0
    }
2241
2242
1
    std::unique_ptr<RangeGetIterator> iter;
2243
1
    err = txn->get(data_rowset_ref_count_key_start, data_rowset_ref_count_key_end, &iter, true);
2244
1
    if (err != TxnErrorCode::TXN_OK) {
2245
0
        LOG_WARNING("failed to create range iterator for final check")
2246
0
                .tag("instance_id", instance_id_)
2247
0
                .tag("err", err);
2248
0
        return -1;
2249
0
    }
2250
2251
1
    *has_unrecycled_rowsets = iter->has_next();
2252
1
    if (*has_unrecycled_rowsets) {
2253
0
        LOG_INFO("still has unrecycled rowsets after recycle_ref_rowsets")
2254
0
                .tag("instance_id", instance_id_);
2255
0
    }
2256
2257
1
    return 0;
2258
1
}
2259
2260
17
int InstanceRecycler::recycle_indexes() {
2261
17
    const std::string task_name = "recycle_indexes";
2262
17
    int64_t num_scanned = 0;
2263
17
    int64_t num_expired = 0;
2264
17
    int64_t num_recycled = 0;
2265
17
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
2266
2267
17
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
2268
17
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
2269
17
    std::string index_key0;
2270
17
    std::string index_key1;
2271
17
    recycle_index_key(index_key_info0, &index_key0);
2272
17
    recycle_index_key(index_key_info1, &index_key1);
2273
2274
17
    LOG_WARNING("begin to recycle indexes").tag("instance_id", instance_id_);
2275
2276
17
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2277
17
    register_recycle_task(task_name, start_time);
2278
2279
17
    DORIS_CLOUD_DEFER {
2280
17
        unregister_recycle_task(task_name);
2281
17
        int64_t cost =
2282
17
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2283
17
        metrics_context.finish_report();
2284
17
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
2285
17
                .tag("instance_id", instance_id_)
2286
17
                .tag("num_scanned", num_scanned)
2287
17
                .tag("num_expired", num_expired)
2288
17
                .tag("num_recycled", num_recycled);
2289
17
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
2279
2
    DORIS_CLOUD_DEFER {
2280
2
        unregister_recycle_task(task_name);
2281
2
        int64_t cost =
2282
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2283
2
        metrics_context.finish_report();
2284
2
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
2285
2
                .tag("instance_id", instance_id_)
2286
2
                .tag("num_scanned", num_scanned)
2287
2
                .tag("num_expired", num_expired)
2288
2
                .tag("num_recycled", num_recycled);
2289
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
2279
15
    DORIS_CLOUD_DEFER {
2280
15
        unregister_recycle_task(task_name);
2281
15
        int64_t cost =
2282
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2283
15
        metrics_context.finish_report();
2284
15
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
2285
15
                .tag("instance_id", instance_id_)
2286
15
                .tag("num_scanned", num_scanned)
2287
15
                .tag("num_expired", num_expired)
2288
15
                .tag("num_recycled", num_recycled);
2289
15
    };
2290
2291
17
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
2292
2293
    // Elements in `index_keys` has the same lifetime as `it` in `scan_and_recycle`
2294
17
    std::vector<std::string_view> index_keys;
2295
17
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2296
10
        ++num_scanned;
2297
10
        RecycleIndexPB index_pb;
2298
10
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
2299
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2300
0
            return -1;
2301
0
        }
2302
10
        int64_t current_time = ::time(nullptr);
2303
10
        if (current_time <
2304
10
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
2305
0
            return 0;
2306
0
        }
2307
10
        ++num_expired;
2308
        // decode index_id
2309
10
        auto k1 = k;
2310
10
        k1.remove_prefix(1);
2311
10
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2312
10
        decode_key(&k1, &out);
2313
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
2314
10
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
2315
10
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
2316
10
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
2317
10
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
2318
        // Change state to RECYCLING
2319
10
        std::unique_ptr<Transaction> txn;
2320
10
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2321
10
        if (err != TxnErrorCode::TXN_OK) {
2322
0
            LOG_WARNING("failed to create txn").tag("err", err);
2323
0
            return -1;
2324
0
        }
2325
10
        std::string val;
2326
10
        err = txn->get(k, &val);
2327
10
        if (err ==
2328
10
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2329
0
            LOG_INFO("index {} has been recycled or committed", index_id);
2330
0
            return 0;
2331
0
        }
2332
10
        if (err != TxnErrorCode::TXN_OK) {
2333
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
2334
0
            return -1;
2335
0
        }
2336
10
        index_pb.Clear();
2337
10
        if (!index_pb.ParseFromString(val)) {
2338
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2339
0
            return -1;
2340
0
        }
2341
10
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
2342
9
            index_pb.set_state(RecycleIndexPB::RECYCLING);
2343
9
            txn->put(k, index_pb.SerializeAsString());
2344
9
            err = txn->commit();
2345
9
            if (err != TxnErrorCode::TXN_OK) {
2346
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2347
0
                return -1;
2348
0
            }
2349
9
        }
2350
10
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
2351
1
            LOG_WARNING("failed to recycle tablets under index")
2352
1
                    .tag("table_id", index_pb.table_id())
2353
1
                    .tag("instance_id", instance_id_)
2354
1
                    .tag("index_id", index_id);
2355
1
            return -1;
2356
1
        }
2357
2358
9
        if (index_pb.has_db_id()) {
2359
            // Recycle the versioned keys
2360
3
            std::unique_ptr<Transaction> txn;
2361
3
            err = txn_kv_->create_txn(&txn);
2362
3
            if (err != TxnErrorCode::TXN_OK) {
2363
0
                LOG_WARNING("failed to create txn").tag("err", err);
2364
0
                return -1;
2365
0
            }
2366
3
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
2367
3
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
2368
3
            std::string index_inverted_key = versioned::index_inverted_key(
2369
3
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
2370
3
            versioned_remove_all(txn.get(), meta_key);
2371
3
            txn->remove(index_key);
2372
3
            txn->remove(index_inverted_key);
2373
3
            err = txn->commit();
2374
3
            if (err != TxnErrorCode::TXN_OK) {
2375
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2376
0
                return -1;
2377
0
            }
2378
3
        }
2379
2380
9
        metrics_context.total_recycled_num = ++num_recycled;
2381
9
        metrics_context.report();
2382
9
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2383
9
        index_keys.push_back(k);
2384
9
        return 0;
2385
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2295
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2296
2
        ++num_scanned;
2297
2
        RecycleIndexPB index_pb;
2298
2
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
2299
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2300
0
            return -1;
2301
0
        }
2302
2
        int64_t current_time = ::time(nullptr);
2303
2
        if (current_time <
2304
2
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
2305
0
            return 0;
2306
0
        }
2307
2
        ++num_expired;
2308
        // decode index_id
2309
2
        auto k1 = k;
2310
2
        k1.remove_prefix(1);
2311
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2312
2
        decode_key(&k1, &out);
2313
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
2314
2
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
2315
2
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
2316
2
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
2317
2
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
2318
        // Change state to RECYCLING
2319
2
        std::unique_ptr<Transaction> txn;
2320
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2321
2
        if (err != TxnErrorCode::TXN_OK) {
2322
0
            LOG_WARNING("failed to create txn").tag("err", err);
2323
0
            return -1;
2324
0
        }
2325
2
        std::string val;
2326
2
        err = txn->get(k, &val);
2327
2
        if (err ==
2328
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2329
0
            LOG_INFO("index {} has been recycled or committed", index_id);
2330
0
            return 0;
2331
0
        }
2332
2
        if (err != TxnErrorCode::TXN_OK) {
2333
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
2334
0
            return -1;
2335
0
        }
2336
2
        index_pb.Clear();
2337
2
        if (!index_pb.ParseFromString(val)) {
2338
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2339
0
            return -1;
2340
0
        }
2341
2
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
2342
1
            index_pb.set_state(RecycleIndexPB::RECYCLING);
2343
1
            txn->put(k, index_pb.SerializeAsString());
2344
1
            err = txn->commit();
2345
1
            if (err != TxnErrorCode::TXN_OK) {
2346
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2347
0
                return -1;
2348
0
            }
2349
1
        }
2350
2
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
2351
1
            LOG_WARNING("failed to recycle tablets under index")
2352
1
                    .tag("table_id", index_pb.table_id())
2353
1
                    .tag("instance_id", instance_id_)
2354
1
                    .tag("index_id", index_id);
2355
1
            return -1;
2356
1
        }
2357
2358
1
        if (index_pb.has_db_id()) {
2359
            // Recycle the versioned keys
2360
1
            std::unique_ptr<Transaction> txn;
2361
1
            err = txn_kv_->create_txn(&txn);
2362
1
            if (err != TxnErrorCode::TXN_OK) {
2363
0
                LOG_WARNING("failed to create txn").tag("err", err);
2364
0
                return -1;
2365
0
            }
2366
1
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
2367
1
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
2368
1
            std::string index_inverted_key = versioned::index_inverted_key(
2369
1
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
2370
1
            versioned_remove_all(txn.get(), meta_key);
2371
1
            txn->remove(index_key);
2372
1
            txn->remove(index_inverted_key);
2373
1
            err = txn->commit();
2374
1
            if (err != TxnErrorCode::TXN_OK) {
2375
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2376
0
                return -1;
2377
0
            }
2378
1
        }
2379
2380
1
        metrics_context.total_recycled_num = ++num_recycled;
2381
1
        metrics_context.report();
2382
1
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2383
1
        index_keys.push_back(k);
2384
1
        return 0;
2385
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2295
8
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2296
8
        ++num_scanned;
2297
8
        RecycleIndexPB index_pb;
2298
8
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
2299
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2300
0
            return -1;
2301
0
        }
2302
8
        int64_t current_time = ::time(nullptr);
2303
8
        if (current_time <
2304
8
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
2305
0
            return 0;
2306
0
        }
2307
8
        ++num_expired;
2308
        // decode index_id
2309
8
        auto k1 = k;
2310
8
        k1.remove_prefix(1);
2311
8
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2312
8
        decode_key(&k1, &out);
2313
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
2314
8
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
2315
8
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
2316
8
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
2317
8
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
2318
        // Change state to RECYCLING
2319
8
        std::unique_ptr<Transaction> txn;
2320
8
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2321
8
        if (err != TxnErrorCode::TXN_OK) {
2322
0
            LOG_WARNING("failed to create txn").tag("err", err);
2323
0
            return -1;
2324
0
        }
2325
8
        std::string val;
2326
8
        err = txn->get(k, &val);
2327
8
        if (err ==
2328
8
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2329
0
            LOG_INFO("index {} has been recycled or committed", index_id);
2330
0
            return 0;
2331
0
        }
2332
8
        if (err != TxnErrorCode::TXN_OK) {
2333
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
2334
0
            return -1;
2335
0
        }
2336
8
        index_pb.Clear();
2337
8
        if (!index_pb.ParseFromString(val)) {
2338
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2339
0
            return -1;
2340
0
        }
2341
8
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
2342
8
            index_pb.set_state(RecycleIndexPB::RECYCLING);
2343
8
            txn->put(k, index_pb.SerializeAsString());
2344
8
            err = txn->commit();
2345
8
            if (err != TxnErrorCode::TXN_OK) {
2346
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2347
0
                return -1;
2348
0
            }
2349
8
        }
2350
8
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
2351
0
            LOG_WARNING("failed to recycle tablets under index")
2352
0
                    .tag("table_id", index_pb.table_id())
2353
0
                    .tag("instance_id", instance_id_)
2354
0
                    .tag("index_id", index_id);
2355
0
            return -1;
2356
0
        }
2357
2358
8
        if (index_pb.has_db_id()) {
2359
            // Recycle the versioned keys
2360
2
            std::unique_ptr<Transaction> txn;
2361
2
            err = txn_kv_->create_txn(&txn);
2362
2
            if (err != TxnErrorCode::TXN_OK) {
2363
0
                LOG_WARNING("failed to create txn").tag("err", err);
2364
0
                return -1;
2365
0
            }
2366
2
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
2367
2
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
2368
2
            std::string index_inverted_key = versioned::index_inverted_key(
2369
2
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
2370
2
            versioned_remove_all(txn.get(), meta_key);
2371
2
            txn->remove(index_key);
2372
2
            txn->remove(index_inverted_key);
2373
2
            err = txn->commit();
2374
2
            if (err != TxnErrorCode::TXN_OK) {
2375
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2376
0
                return -1;
2377
0
            }
2378
2
        }
2379
2380
8
        metrics_context.total_recycled_num = ++num_recycled;
2381
8
        metrics_context.report();
2382
8
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2383
8
        index_keys.push_back(k);
2384
8
        return 0;
2385
8
    };
2386
2387
17
    auto loop_done = [&index_keys, this]() -> int {
2388
6
        if (index_keys.empty()) return 0;
2389
5
        DORIS_CLOUD_DEFER {
2390
5
            index_keys.clear();
2391
5
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2389
1
        DORIS_CLOUD_DEFER {
2390
1
            index_keys.clear();
2391
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2389
4
        DORIS_CLOUD_DEFER {
2390
4
            index_keys.clear();
2391
4
        };
2392
5
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
2393
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
2394
0
            return -1;
2395
0
        }
2396
5
        return 0;
2397
5
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
2387
2
    auto loop_done = [&index_keys, this]() -> int {
2388
2
        if (index_keys.empty()) return 0;
2389
1
        DORIS_CLOUD_DEFER {
2390
1
            index_keys.clear();
2391
1
        };
2392
1
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
2393
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
2394
0
            return -1;
2395
0
        }
2396
1
        return 0;
2397
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
2387
4
    auto loop_done = [&index_keys, this]() -> int {
2388
4
        if (index_keys.empty()) return 0;
2389
4
        DORIS_CLOUD_DEFER {
2390
4
            index_keys.clear();
2391
4
        };
2392
4
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
2393
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
2394
0
            return -1;
2395
0
        }
2396
4
        return 0;
2397
4
    };
2398
2399
17
    if (config::enable_recycler_stats_metrics) {
2400
0
        scan_and_statistics_indexes();
2401
0
    }
2402
    // recycle_func and loop_done for scan and recycle
2403
17
    return scan_and_recycle(index_key0, index_key1, std::move(recycle_func), std::move(loop_done));
2404
17
}
2405
2406
bool check_lazy_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string instance_id,
2407
8.25k
                             int64_t tablet_id) {
2408
8.25k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("check_lazy_txn_finished::bypass_check", true);
2409
2410
8.25k
    std::unique_ptr<Transaction> txn;
2411
8.25k
    TxnErrorCode err = txn_kv->create_txn(&txn);
2412
8.25k
    if (err != TxnErrorCode::TXN_OK) {
2413
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id
2414
0
                     << " tablet_id=" << tablet_id << " err=" << err;
2415
0
        return false;
2416
0
    }
2417
2418
8.25k
    std::string tablet_idx_key = meta_tablet_idx_key({instance_id, tablet_id});
2419
8.25k
    std::string tablet_idx_val;
2420
8.25k
    err = txn->get(tablet_idx_key, &tablet_idx_val);
2421
8.25k
    if (TxnErrorCode::TXN_OK != err) {
2422
0
        LOG(WARNING) << "failed to get tablet index, instance_id=" << instance_id
2423
0
                     << " tablet_id=" << tablet_id << " err=" << err
2424
0
                     << " key=" << hex(tablet_idx_key);
2425
0
        return false;
2426
0
    }
2427
2428
8.25k
    TabletIndexPB tablet_idx_pb;
2429
8.25k
    if (!tablet_idx_pb.ParseFromString(tablet_idx_val)) {
2430
0
        LOG(WARNING) << "failed to parse tablet_idx_pb, instance_id=" << instance_id
2431
0
                     << " tablet_id=" << tablet_id;
2432
0
        return false;
2433
0
    }
2434
2435
8.25k
    if (!tablet_idx_pb.has_db_id()) {
2436
        // In the previous version, the db_id was not set in the index_pb.
2437
        // If updating to the version which enable txn lazy commit, the db_id will be set.
2438
0
        LOG(INFO) << "txn index has no db_id, tablet_id=" << tablet_id
2439
0
                  << " instance_id=" << instance_id
2440
0
                  << " tablet_idx_pb=" << tablet_idx_pb.ShortDebugString();
2441
0
        return true;
2442
0
    }
2443
2444
8.25k
    std::string ver_val;
2445
8.25k
    std::string ver_key =
2446
8.25k
            partition_version_key({instance_id, tablet_idx_pb.db_id(), tablet_idx_pb.table_id(),
2447
8.25k
                                   tablet_idx_pb.partition_id()});
2448
8.25k
    err = txn->get(ver_key, &ver_val);
2449
2450
8.25k
    if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
2451
214
        LOG(INFO) << ""
2452
214
                     "partition version not found, instance_id="
2453
214
                  << instance_id << " db_id=" << tablet_idx_pb.db_id()
2454
214
                  << " table_id=" << tablet_idx_pb.table_id()
2455
214
                  << " partition_id=" << tablet_idx_pb.partition_id() << " tablet_id=" << tablet_id
2456
214
                  << " key=" << hex(ver_key);
2457
214
        return true;
2458
214
    }
2459
2460
8.03k
    if (TxnErrorCode::TXN_OK != err) {
2461
0
        LOG(WARNING) << "failed to get partition version, instance_id=" << instance_id
2462
0
                     << " db_id=" << tablet_idx_pb.db_id()
2463
0
                     << " table_id=" << tablet_idx_pb.table_id()
2464
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
2465
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key) << " err=" << err;
2466
0
        return false;
2467
0
    }
2468
2469
8.03k
    VersionPB version_pb;
2470
8.03k
    if (!version_pb.ParseFromString(ver_val)) {
2471
0
        LOG(WARNING) << "failed to parse version_pb, instance_id=" << instance_id
2472
0
                     << " db_id=" << tablet_idx_pb.db_id()
2473
0
                     << " table_id=" << tablet_idx_pb.table_id()
2474
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
2475
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key);
2476
0
        return false;
2477
0
    }
2478
2479
8.03k
    if (version_pb.pending_txn_ids_size() > 0) {
2480
4.00k
        TEST_SYNC_POINT_CALLBACK("check_lazy_txn_finished::txn_not_finished");
2481
4.00k
        DCHECK(version_pb.pending_txn_ids_size() == 1);
2482
4.00k
        LOG(WARNING) << "lazy txn not finished, instance_id=" << instance_id
2483
4.00k
                     << " db_id=" << tablet_idx_pb.db_id()
2484
4.00k
                     << " table_id=" << tablet_idx_pb.table_id()
2485
4.00k
                     << " partition_id=" << tablet_idx_pb.partition_id()
2486
4.00k
                     << " tablet_id=" << tablet_id << " txn_id=" << version_pb.pending_txn_ids(0)
2487
4.00k
                     << " key=" << hex(ver_key);
2488
4.00k
        return false;
2489
4.00k
    }
2490
4.03k
    return true;
2491
8.03k
}
2492
2493
15
int InstanceRecycler::recycle_partitions() {
2494
15
    const std::string task_name = "recycle_partitions";
2495
15
    int64_t num_scanned = 0;
2496
15
    int64_t num_expired = 0;
2497
15
    int64_t num_recycled = 0;
2498
15
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
2499
2500
15
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
2501
15
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
2502
15
    std::string part_key0;
2503
15
    std::string part_key1;
2504
15
    recycle_partition_key(part_key_info0, &part_key0);
2505
15
    recycle_partition_key(part_key_info1, &part_key1);
2506
2507
15
    LOG_WARNING("begin to recycle partitions").tag("instance_id", instance_id_);
2508
2509
15
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2510
15
    register_recycle_task(task_name, start_time);
2511
2512
15
    DORIS_CLOUD_DEFER {
2513
15
        unregister_recycle_task(task_name);
2514
15
        int64_t cost =
2515
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2516
15
        metrics_context.finish_report();
2517
15
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
2518
15
                .tag("instance_id", instance_id_)
2519
15
                .tag("num_scanned", num_scanned)
2520
15
                .tag("num_expired", num_expired)
2521
15
                .tag("num_recycled", num_recycled);
2522
15
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
2512
2
    DORIS_CLOUD_DEFER {
2513
2
        unregister_recycle_task(task_name);
2514
2
        int64_t cost =
2515
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2516
2
        metrics_context.finish_report();
2517
2
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
2518
2
                .tag("instance_id", instance_id_)
2519
2
                .tag("num_scanned", num_scanned)
2520
2
                .tag("num_expired", num_expired)
2521
2
                .tag("num_recycled", num_recycled);
2522
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
2512
13
    DORIS_CLOUD_DEFER {
2513
13
        unregister_recycle_task(task_name);
2514
13
        int64_t cost =
2515
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2516
13
        metrics_context.finish_report();
2517
13
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
2518
13
                .tag("instance_id", instance_id_)
2519
13
                .tag("num_scanned", num_scanned)
2520
13
                .tag("num_expired", num_expired)
2521
13
                .tag("num_recycled", num_recycled);
2522
13
    };
2523
2524
15
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
2525
2526
    // Elements in `partition_keys` has the same lifetime as `it` in `scan_and_recycle`
2527
15
    std::vector<std::string_view> partition_keys;
2528
15
    std::vector<std::string> partition_version_keys;
2529
15
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2530
9
        ++num_scanned;
2531
9
        RecyclePartitionPB part_pb;
2532
9
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
2533
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2534
0
            return -1;
2535
0
        }
2536
9
        int64_t current_time = ::time(nullptr);
2537
9
        if (current_time < calculate_partition_expired_time(instance_id_, part_pb,
2538
9
                                                            &earlest_ts)) { // not expired
2539
0
            return 0;
2540
0
        }
2541
9
        ++num_expired;
2542
        // decode partition_id
2543
9
        auto k1 = k;
2544
9
        k1.remove_prefix(1);
2545
9
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2546
9
        decode_key(&k1, &out);
2547
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
2548
9
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
2549
9
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
2550
9
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
2551
9
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
2552
        // Change state to RECYCLING
2553
9
        std::unique_ptr<Transaction> txn;
2554
9
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2555
9
        if (err != TxnErrorCode::TXN_OK) {
2556
0
            LOG_WARNING("failed to create txn").tag("err", err);
2557
0
            return -1;
2558
0
        }
2559
9
        std::string val;
2560
9
        err = txn->get(k, &val);
2561
9
        if (err ==
2562
9
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2563
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
2564
0
            return 0;
2565
0
        }
2566
9
        if (err != TxnErrorCode::TXN_OK) {
2567
0
            LOG_WARNING("failed to get kv");
2568
0
            return -1;
2569
0
        }
2570
9
        part_pb.Clear();
2571
9
        if (!part_pb.ParseFromString(val)) {
2572
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2573
0
            return -1;
2574
0
        }
2575
        // Partitions with PREPARED state MUST have no data
2576
9
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
2577
8
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
2578
8
            txn->put(k, part_pb.SerializeAsString());
2579
8
            err = txn->commit();
2580
8
            if (err != TxnErrorCode::TXN_OK) {
2581
0
                LOG_WARNING("failed to commit txn: {}", err);
2582
0
                return -1;
2583
0
            }
2584
8
        }
2585
2586
9
        int ret = 0;
2587
33
        for (int64_t index_id : part_pb.index_id()) {
2588
33
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
2589
1
                LOG_WARNING("failed to recycle tablets under partition")
2590
1
                        .tag("table_id", part_pb.table_id())
2591
1
                        .tag("instance_id", instance_id_)
2592
1
                        .tag("index_id", index_id)
2593
1
                        .tag("partition_id", partition_id);
2594
1
                ret = -1;
2595
1
            }
2596
33
        }
2597
9
        if (ret == 0 && part_pb.has_db_id()) {
2598
            // Recycle the versioned keys
2599
8
            std::unique_ptr<Transaction> txn;
2600
8
            err = txn_kv_->create_txn(&txn);
2601
8
            if (err != TxnErrorCode::TXN_OK) {
2602
0
                LOG_WARNING("failed to create txn").tag("err", err);
2603
0
                return -1;
2604
0
            }
2605
8
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
2606
8
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
2607
8
            std::string inverted_index_key = versioned::partition_inverted_index_key(
2608
8
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
2609
8
            std::string partition_version_key =
2610
8
                    versioned::partition_version_key({instance_id_, partition_id});
2611
8
            versioned_remove_all(txn.get(), meta_key);
2612
8
            txn->remove(index_key);
2613
8
            txn->remove(inverted_index_key);
2614
8
            versioned_remove_all(txn.get(), partition_version_key);
2615
8
            err = txn->commit();
2616
8
            if (err != TxnErrorCode::TXN_OK) {
2617
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2618
0
                return -1;
2619
0
            }
2620
8
        }
2621
2622
9
        if (ret == 0) {
2623
8
            ++num_recycled;
2624
8
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2625
8
            partition_keys.push_back(k);
2626
8
            if (part_pb.db_id() > 0) {
2627
8
                partition_version_keys.push_back(partition_version_key(
2628
8
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
2629
8
            }
2630
8
            metrics_context.total_recycled_num = num_recycled;
2631
8
            metrics_context.report();
2632
8
        }
2633
9
        return ret;
2634
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2529
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2530
2
        ++num_scanned;
2531
2
        RecyclePartitionPB part_pb;
2532
2
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
2533
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2534
0
            return -1;
2535
0
        }
2536
2
        int64_t current_time = ::time(nullptr);
2537
2
        if (current_time < calculate_partition_expired_time(instance_id_, part_pb,
2538
2
                                                            &earlest_ts)) { // not expired
2539
0
            return 0;
2540
0
        }
2541
2
        ++num_expired;
2542
        // decode partition_id
2543
2
        auto k1 = k;
2544
2
        k1.remove_prefix(1);
2545
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2546
2
        decode_key(&k1, &out);
2547
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
2548
2
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
2549
2
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
2550
2
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
2551
2
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
2552
        // Change state to RECYCLING
2553
2
        std::unique_ptr<Transaction> txn;
2554
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2555
2
        if (err != TxnErrorCode::TXN_OK) {
2556
0
            LOG_WARNING("failed to create txn").tag("err", err);
2557
0
            return -1;
2558
0
        }
2559
2
        std::string val;
2560
2
        err = txn->get(k, &val);
2561
2
        if (err ==
2562
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2563
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
2564
0
            return 0;
2565
0
        }
2566
2
        if (err != TxnErrorCode::TXN_OK) {
2567
0
            LOG_WARNING("failed to get kv");
2568
0
            return -1;
2569
0
        }
2570
2
        part_pb.Clear();
2571
2
        if (!part_pb.ParseFromString(val)) {
2572
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2573
0
            return -1;
2574
0
        }
2575
        // Partitions with PREPARED state MUST have no data
2576
2
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
2577
1
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
2578
1
            txn->put(k, part_pb.SerializeAsString());
2579
1
            err = txn->commit();
2580
1
            if (err != TxnErrorCode::TXN_OK) {
2581
0
                LOG_WARNING("failed to commit txn: {}", err);
2582
0
                return -1;
2583
0
            }
2584
1
        }
2585
2586
2
        int ret = 0;
2587
2
        for (int64_t index_id : part_pb.index_id()) {
2588
2
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
2589
1
                LOG_WARNING("failed to recycle tablets under partition")
2590
1
                        .tag("table_id", part_pb.table_id())
2591
1
                        .tag("instance_id", instance_id_)
2592
1
                        .tag("index_id", index_id)
2593
1
                        .tag("partition_id", partition_id);
2594
1
                ret = -1;
2595
1
            }
2596
2
        }
2597
2
        if (ret == 0 && part_pb.has_db_id()) {
2598
            // Recycle the versioned keys
2599
1
            std::unique_ptr<Transaction> txn;
2600
1
            err = txn_kv_->create_txn(&txn);
2601
1
            if (err != TxnErrorCode::TXN_OK) {
2602
0
                LOG_WARNING("failed to create txn").tag("err", err);
2603
0
                return -1;
2604
0
            }
2605
1
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
2606
1
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
2607
1
            std::string inverted_index_key = versioned::partition_inverted_index_key(
2608
1
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
2609
1
            std::string partition_version_key =
2610
1
                    versioned::partition_version_key({instance_id_, partition_id});
2611
1
            versioned_remove_all(txn.get(), meta_key);
2612
1
            txn->remove(index_key);
2613
1
            txn->remove(inverted_index_key);
2614
1
            versioned_remove_all(txn.get(), partition_version_key);
2615
1
            err = txn->commit();
2616
1
            if (err != TxnErrorCode::TXN_OK) {
2617
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2618
0
                return -1;
2619
0
            }
2620
1
        }
2621
2622
2
        if (ret == 0) {
2623
1
            ++num_recycled;
2624
1
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2625
1
            partition_keys.push_back(k);
2626
1
            if (part_pb.db_id() > 0) {
2627
1
                partition_version_keys.push_back(partition_version_key(
2628
1
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
2629
1
            }
2630
1
            metrics_context.total_recycled_num = num_recycled;
2631
1
            metrics_context.report();
2632
1
        }
2633
2
        return ret;
2634
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2529
7
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2530
7
        ++num_scanned;
2531
7
        RecyclePartitionPB part_pb;
2532
7
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
2533
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2534
0
            return -1;
2535
0
        }
2536
7
        int64_t current_time = ::time(nullptr);
2537
7
        if (current_time < calculate_partition_expired_time(instance_id_, part_pb,
2538
7
                                                            &earlest_ts)) { // not expired
2539
0
            return 0;
2540
0
        }
2541
7
        ++num_expired;
2542
        // decode partition_id
2543
7
        auto k1 = k;
2544
7
        k1.remove_prefix(1);
2545
7
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2546
7
        decode_key(&k1, &out);
2547
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
2548
7
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
2549
7
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
2550
7
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
2551
7
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
2552
        // Change state to RECYCLING
2553
7
        std::unique_ptr<Transaction> txn;
2554
7
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2555
7
        if (err != TxnErrorCode::TXN_OK) {
2556
0
            LOG_WARNING("failed to create txn").tag("err", err);
2557
0
            return -1;
2558
0
        }
2559
7
        std::string val;
2560
7
        err = txn->get(k, &val);
2561
7
        if (err ==
2562
7
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2563
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
2564
0
            return 0;
2565
0
        }
2566
7
        if (err != TxnErrorCode::TXN_OK) {
2567
0
            LOG_WARNING("failed to get kv");
2568
0
            return -1;
2569
0
        }
2570
7
        part_pb.Clear();
2571
7
        if (!part_pb.ParseFromString(val)) {
2572
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2573
0
            return -1;
2574
0
        }
2575
        // Partitions with PREPARED state MUST have no data
2576
7
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
2577
7
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
2578
7
            txn->put(k, part_pb.SerializeAsString());
2579
7
            err = txn->commit();
2580
7
            if (err != TxnErrorCode::TXN_OK) {
2581
0
                LOG_WARNING("failed to commit txn: {}", err);
2582
0
                return -1;
2583
0
            }
2584
7
        }
2585
2586
7
        int ret = 0;
2587
31
        for (int64_t index_id : part_pb.index_id()) {
2588
31
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
2589
0
                LOG_WARNING("failed to recycle tablets under partition")
2590
0
                        .tag("table_id", part_pb.table_id())
2591
0
                        .tag("instance_id", instance_id_)
2592
0
                        .tag("index_id", index_id)
2593
0
                        .tag("partition_id", partition_id);
2594
0
                ret = -1;
2595
0
            }
2596
31
        }
2597
7
        if (ret == 0 && part_pb.has_db_id()) {
2598
            // Recycle the versioned keys
2599
7
            std::unique_ptr<Transaction> txn;
2600
7
            err = txn_kv_->create_txn(&txn);
2601
7
            if (err != TxnErrorCode::TXN_OK) {
2602
0
                LOG_WARNING("failed to create txn").tag("err", err);
2603
0
                return -1;
2604
0
            }
2605
7
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
2606
7
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
2607
7
            std::string inverted_index_key = versioned::partition_inverted_index_key(
2608
7
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
2609
7
            std::string partition_version_key =
2610
7
                    versioned::partition_version_key({instance_id_, partition_id});
2611
7
            versioned_remove_all(txn.get(), meta_key);
2612
7
            txn->remove(index_key);
2613
7
            txn->remove(inverted_index_key);
2614
7
            versioned_remove_all(txn.get(), partition_version_key);
2615
7
            err = txn->commit();
2616
7
            if (err != TxnErrorCode::TXN_OK) {
2617
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2618
0
                return -1;
2619
0
            }
2620
7
        }
2621
2622
7
        if (ret == 0) {
2623
7
            ++num_recycled;
2624
7
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2625
7
            partition_keys.push_back(k);
2626
7
            if (part_pb.db_id() > 0) {
2627
7
                partition_version_keys.push_back(partition_version_key(
2628
7
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
2629
7
            }
2630
7
            metrics_context.total_recycled_num = num_recycled;
2631
7
            metrics_context.report();
2632
7
        }
2633
7
        return ret;
2634
7
    };
2635
2636
15
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
2637
5
        if (partition_keys.empty()) return 0;
2638
4
        DORIS_CLOUD_DEFER {
2639
4
            partition_keys.clear();
2640
4
            partition_version_keys.clear();
2641
4
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2638
1
        DORIS_CLOUD_DEFER {
2639
1
            partition_keys.clear();
2640
1
            partition_version_keys.clear();
2641
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2638
3
        DORIS_CLOUD_DEFER {
2639
3
            partition_keys.clear();
2640
3
            partition_version_keys.clear();
2641
3
        };
2642
4
        std::unique_ptr<Transaction> txn;
2643
4
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2644
4
        if (err != TxnErrorCode::TXN_OK) {
2645
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2646
0
            return -1;
2647
0
        }
2648
8
        for (auto& k : partition_keys) {
2649
8
            txn->remove(k);
2650
8
        }
2651
8
        for (auto& k : partition_version_keys) {
2652
8
            txn->remove(k);
2653
8
        }
2654
4
        err = txn->commit();
2655
4
        if (err != TxnErrorCode::TXN_OK) {
2656
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2657
0
                         << " err=" << err;
2658
0
            return -1;
2659
0
        }
2660
4
        return 0;
2661
4
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
2636
2
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
2637
2
        if (partition_keys.empty()) return 0;
2638
1
        DORIS_CLOUD_DEFER {
2639
1
            partition_keys.clear();
2640
1
            partition_version_keys.clear();
2641
1
        };
2642
1
        std::unique_ptr<Transaction> txn;
2643
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2644
1
        if (err != TxnErrorCode::TXN_OK) {
2645
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2646
0
            return -1;
2647
0
        }
2648
1
        for (auto& k : partition_keys) {
2649
1
            txn->remove(k);
2650
1
        }
2651
1
        for (auto& k : partition_version_keys) {
2652
1
            txn->remove(k);
2653
1
        }
2654
1
        err = txn->commit();
2655
1
        if (err != TxnErrorCode::TXN_OK) {
2656
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2657
0
                         << " err=" << err;
2658
0
            return -1;
2659
0
        }
2660
1
        return 0;
2661
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
2636
3
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
2637
3
        if (partition_keys.empty()) return 0;
2638
3
        DORIS_CLOUD_DEFER {
2639
3
            partition_keys.clear();
2640
3
            partition_version_keys.clear();
2641
3
        };
2642
3
        std::unique_ptr<Transaction> txn;
2643
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2644
3
        if (err != TxnErrorCode::TXN_OK) {
2645
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2646
0
            return -1;
2647
0
        }
2648
7
        for (auto& k : partition_keys) {
2649
7
            txn->remove(k);
2650
7
        }
2651
7
        for (auto& k : partition_version_keys) {
2652
7
            txn->remove(k);
2653
7
        }
2654
3
        err = txn->commit();
2655
3
        if (err != TxnErrorCode::TXN_OK) {
2656
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2657
0
                         << " err=" << err;
2658
0
            return -1;
2659
0
        }
2660
3
        return 0;
2661
3
    };
2662
2663
15
    if (config::enable_recycler_stats_metrics) {
2664
0
        scan_and_statistics_partitions();
2665
0
    }
2666
    // recycle_func and loop_done for scan and recycle
2667
15
    return scan_and_recycle(part_key0, part_key1, std::move(recycle_func), std::move(loop_done));
2668
15
}
2669
2670
14
int InstanceRecycler::recycle_versions() {
2671
14
    if (should_recycle_versioned_keys()) {
2672
2
        return recycle_orphan_partitions();
2673
2
    }
2674
2675
12
    int64_t num_scanned = 0;
2676
12
    int64_t num_recycled = 0;
2677
12
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
2678
2679
12
    LOG_WARNING("begin to recycle table and partition versions").tag("instance_id", instance_id_);
2680
2681
12
    auto start_time = steady_clock::now();
2682
2683
12
    DORIS_CLOUD_DEFER {
2684
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2685
12
        metrics_context.finish_report();
2686
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
2687
12
                .tag("instance_id", instance_id_)
2688
12
                .tag("num_scanned", num_scanned)
2689
12
                .tag("num_recycled", num_recycled);
2690
12
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
Line
Count
Source
2683
12
    DORIS_CLOUD_DEFER {
2684
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2685
12
        metrics_context.finish_report();
2686
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
2687
12
                .tag("instance_id", instance_id_)
2688
12
                .tag("num_scanned", num_scanned)
2689
12
                .tag("num_recycled", num_recycled);
2690
12
    };
2691
2692
12
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
2693
12
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
2694
12
    int64_t last_scanned_table_id = 0;
2695
12
    bool is_recycled = false; // Is last scanned kv recycled
2696
12
    auto recycle_func = [&num_scanned, &num_recycled, &last_scanned_table_id, &is_recycled,
2697
12
                         &metrics_context, this](std::string_view k, std::string_view) {
2698
2
        ++num_scanned;
2699
2
        auto k1 = k;
2700
2
        k1.remove_prefix(1);
2701
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
2702
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2703
2
        decode_key(&k1, &out);
2704
2
        DCHECK_EQ(out.size(), 6) << k;
2705
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
2706
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
2707
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
2708
0
            return 0;
2709
0
        }
2710
2
        last_scanned_table_id = table_id;
2711
2
        is_recycled = false;
2712
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
2713
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
2714
2
        std::unique_ptr<Transaction> txn;
2715
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2716
2
        if (err != TxnErrorCode::TXN_OK) {
2717
0
            return -1;
2718
0
        }
2719
2
        std::unique_ptr<RangeGetIterator> iter;
2720
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
2721
2
        if (err != TxnErrorCode::TXN_OK) {
2722
0
            return -1;
2723
0
        }
2724
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
2725
1
            return 0;
2726
1
        }
2727
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
2728
        // 1. Remove all partition version kvs of this table
2729
1
        auto partition_version_key_begin =
2730
1
                partition_version_key({instance_id_, db_id, table_id, 0});
2731
1
        auto partition_version_key_end =
2732
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
2733
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
2734
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
2735
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
2736
1
                     << " table_id=" << table_id;
2737
        // 2. Remove the table version kv of this table
2738
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
2739
1
        txn->remove(tbl_version_key);
2740
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
2741
        // 3. Remove mow delete bitmap update lock and tablet job lock
2742
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2743
1
        txn->remove(lock_key);
2744
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2745
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2746
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2747
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
2748
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2749
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2750
1
                     << " table_id=" << table_id;
2751
1
        err = txn->commit();
2752
1
        if (err != TxnErrorCode::TXN_OK) {
2753
0
            return -1;
2754
0
        }
2755
1
        metrics_context.total_recycled_num = ++num_recycled;
2756
1
        metrics_context.report();
2757
1
        is_recycled = true;
2758
1
        return 0;
2759
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2697
2
                         &metrics_context, this](std::string_view k, std::string_view) {
2698
2
        ++num_scanned;
2699
2
        auto k1 = k;
2700
2
        k1.remove_prefix(1);
2701
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
2702
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2703
2
        decode_key(&k1, &out);
2704
2
        DCHECK_EQ(out.size(), 6) << k;
2705
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
2706
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
2707
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
2708
0
            return 0;
2709
0
        }
2710
2
        last_scanned_table_id = table_id;
2711
2
        is_recycled = false;
2712
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
2713
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
2714
2
        std::unique_ptr<Transaction> txn;
2715
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2716
2
        if (err != TxnErrorCode::TXN_OK) {
2717
0
            return -1;
2718
0
        }
2719
2
        std::unique_ptr<RangeGetIterator> iter;
2720
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
2721
2
        if (err != TxnErrorCode::TXN_OK) {
2722
0
            return -1;
2723
0
        }
2724
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
2725
1
            return 0;
2726
1
        }
2727
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
2728
        // 1. Remove all partition version kvs of this table
2729
1
        auto partition_version_key_begin =
2730
1
                partition_version_key({instance_id_, db_id, table_id, 0});
2731
1
        auto partition_version_key_end =
2732
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
2733
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
2734
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
2735
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
2736
1
                     << " table_id=" << table_id;
2737
        // 2. Remove the table version kv of this table
2738
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
2739
1
        txn->remove(tbl_version_key);
2740
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
2741
        // 3. Remove mow delete bitmap update lock and tablet job lock
2742
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2743
1
        txn->remove(lock_key);
2744
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2745
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2746
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2747
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
2748
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2749
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2750
1
                     << " table_id=" << table_id;
2751
1
        err = txn->commit();
2752
1
        if (err != TxnErrorCode::TXN_OK) {
2753
0
            return -1;
2754
0
        }
2755
1
        metrics_context.total_recycled_num = ++num_recycled;
2756
1
        metrics_context.report();
2757
1
        is_recycled = true;
2758
1
        return 0;
2759
1
    };
2760
2761
12
    if (config::enable_recycler_stats_metrics) {
2762
0
        scan_and_statistics_versions();
2763
0
    }
2764
    // recycle_func and loop_done for scan and recycle
2765
12
    return scan_and_recycle(version_key_begin, version_key_end, std::move(recycle_func));
2766
14
}
2767
2768
3
int InstanceRecycler::recycle_orphan_partitions() {
2769
3
    int64_t num_scanned = 0;
2770
3
    int64_t num_recycled = 0;
2771
3
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_orphan_partitions");
2772
2773
3
    LOG_WARNING("begin to recycle orphan table and partition versions")
2774
3
            .tag("instance_id", instance_id_);
2775
2776
3
    auto start_time = steady_clock::now();
2777
2778
3
    DORIS_CLOUD_DEFER {
2779
3
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2780
3
        metrics_context.finish_report();
2781
3
        LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost)
2782
3
                .tag("instance_id", instance_id_)
2783
3
                .tag("num_scanned", num_scanned)
2784
3
                .tag("num_recycled", num_recycled);
2785
3
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv
Line
Count
Source
2778
3
    DORIS_CLOUD_DEFER {
2779
3
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2780
3
        metrics_context.finish_report();
2781
3
        LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost)
2782
3
                .tag("instance_id", instance_id_)
2783
3
                .tag("num_scanned", num_scanned)
2784
3
                .tag("num_recycled", num_recycled);
2785
3
    };
2786
2787
3
    bool is_empty_table = false;        // whether the table has no indexes
2788
3
    bool is_table_kvs_recycled = false; // whether the table related kvs have been recycled
2789
3
    int64_t current_table_id = 0;       // current scanning table id
2790
3
    auto recycle_func = [&num_scanned, &num_recycled, &metrics_context, &is_empty_table,
2791
3
                         &current_table_id, &is_table_kvs_recycled,
2792
3
                         this](std::string_view k, std::string_view) {
2793
2
        ++num_scanned;
2794
2795
2
        std::string_view k1(k);
2796
2
        int64_t db_id, table_id, partition_id;
2797
2
        if (!versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id,
2798
2
                                                            &partition_id)) {
2799
0
            LOG(WARNING) << "malformed partition inverted index key " << hex(k);
2800
0
            return -1;
2801
2
        } else if (table_id != current_table_id) {
2802
2
            current_table_id = table_id;
2803
2
            is_table_kvs_recycled = false;
2804
2
            MetaReader meta_reader(instance_id_, txn_kv_.get());
2805
2
            TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table);
2806
2
            if (err != TxnErrorCode::TXN_OK) {
2807
0
                LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id
2808
0
                             << " table_id=" << table_id << " err=" << err;
2809
0
                return -1;
2810
0
            }
2811
2
        }
2812
2813
2
        if (!is_empty_table) {
2814
            // table is not empty, skip recycle
2815
1
            return 0;
2816
1
        }
2817
2818
1
        std::unique_ptr<Transaction> txn;
2819
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2820
1
        if (err != TxnErrorCode::TXN_OK) {
2821
0
            return -1;
2822
0
        }
2823
2824
        // 1. Remove all partition related kvs
2825
1
        std::string partition_meta_key =
2826
1
                versioned::meta_partition_key({instance_id_, partition_id});
2827
1
        std::string partition_index_key =
2828
1
                versioned::partition_index_key({instance_id_, partition_id});
2829
1
        std::string partition_inverted_key = versioned::partition_inverted_index_key(
2830
1
                {instance_id_, db_id, table_id, partition_id});
2831
1
        std::string partition_version_key =
2832
1
                versioned::partition_version_key({instance_id_, partition_id});
2833
1
        txn->remove(partition_index_key);
2834
1
        txn->remove(partition_inverted_key);
2835
1
        versioned_remove_all(txn.get(), partition_meta_key);
2836
1
        versioned_remove_all(txn.get(), partition_version_key);
2837
1
        LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id
2838
1
                     << " table_id=" << table_id << " db_id=" << db_id
2839
1
                     << " partition_meta_key=" << hex(partition_meta_key)
2840
1
                     << " partition_version_key=" << hex(partition_version_key);
2841
2842
1
        if (!is_table_kvs_recycled) {
2843
1
            is_table_kvs_recycled = true;
2844
2845
            // 2. Remove the table version kv of this table
2846
1
            std::string table_version_key = versioned::table_version_key({instance_id_, table_id});
2847
1
            versioned_remove_all(txn.get(), table_version_key);
2848
1
            LOG(WARNING) << "remove table version kv " << hex(table_version_key);
2849
            // 3. Remove mow delete bitmap update lock and tablet job lock
2850
1
            std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2851
1
            txn->remove(lock_key);
2852
1
            LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2853
1
            std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2854
1
            std::string tablet_job_key_end =
2855
1
                    mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2856
1
            txn->remove(tablet_job_key_begin, tablet_job_key_end);
2857
1
            LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2858
1
                         << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2859
1
                         << " table_id=" << table_id;
2860
1
        }
2861
2862
1
        err = txn->commit();
2863
1
        if (err != TxnErrorCode::TXN_OK) {
2864
0
            return -1;
2865
0
        }
2866
1
        metrics_context.total_recycled_num = ++num_recycled;
2867
1
        metrics_context.report();
2868
1
        return 0;
2869
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2792
2
                         this](std::string_view k, std::string_view) {
2793
2
        ++num_scanned;
2794
2795
2
        std::string_view k1(k);
2796
2
        int64_t db_id, table_id, partition_id;
2797
2
        if (!versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id,
2798
2
                                                            &partition_id)) {
2799
0
            LOG(WARNING) << "malformed partition inverted index key " << hex(k);
2800
0
            return -1;
2801
2
        } else if (table_id != current_table_id) {
2802
2
            current_table_id = table_id;
2803
2
            is_table_kvs_recycled = false;
2804
2
            MetaReader meta_reader(instance_id_, txn_kv_.get());
2805
2
            TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table);
2806
2
            if (err != TxnErrorCode::TXN_OK) {
2807
0
                LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id
2808
0
                             << " table_id=" << table_id << " err=" << err;
2809
0
                return -1;
2810
0
            }
2811
2
        }
2812
2813
2
        if (!is_empty_table) {
2814
            // table is not empty, skip recycle
2815
1
            return 0;
2816
1
        }
2817
2818
1
        std::unique_ptr<Transaction> txn;
2819
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2820
1
        if (err != TxnErrorCode::TXN_OK) {
2821
0
            return -1;
2822
0
        }
2823
2824
        // 1. Remove all partition related kvs
2825
1
        std::string partition_meta_key =
2826
1
                versioned::meta_partition_key({instance_id_, partition_id});
2827
1
        std::string partition_index_key =
2828
1
                versioned::partition_index_key({instance_id_, partition_id});
2829
1
        std::string partition_inverted_key = versioned::partition_inverted_index_key(
2830
1
                {instance_id_, db_id, table_id, partition_id});
2831
1
        std::string partition_version_key =
2832
1
                versioned::partition_version_key({instance_id_, partition_id});
2833
1
        txn->remove(partition_index_key);
2834
1
        txn->remove(partition_inverted_key);
2835
1
        versioned_remove_all(txn.get(), partition_meta_key);
2836
1
        versioned_remove_all(txn.get(), partition_version_key);
2837
1
        LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id
2838
1
                     << " table_id=" << table_id << " db_id=" << db_id
2839
1
                     << " partition_meta_key=" << hex(partition_meta_key)
2840
1
                     << " partition_version_key=" << hex(partition_version_key);
2841
2842
1
        if (!is_table_kvs_recycled) {
2843
1
            is_table_kvs_recycled = true;
2844
2845
            // 2. Remove the table version kv of this table
2846
1
            std::string table_version_key = versioned::table_version_key({instance_id_, table_id});
2847
1
            versioned_remove_all(txn.get(), table_version_key);
2848
1
            LOG(WARNING) << "remove table version kv " << hex(table_version_key);
2849
            // 3. Remove mow delete bitmap update lock and tablet job lock
2850
1
            std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2851
1
            txn->remove(lock_key);
2852
1
            LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2853
1
            std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2854
1
            std::string tablet_job_key_end =
2855
1
                    mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2856
1
            txn->remove(tablet_job_key_begin, tablet_job_key_end);
2857
1
            LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2858
1
                         << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2859
1
                         << " table_id=" << table_id;
2860
1
        }
2861
2862
1
        err = txn->commit();
2863
1
        if (err != TxnErrorCode::TXN_OK) {
2864
0
            return -1;
2865
0
        }
2866
1
        metrics_context.total_recycled_num = ++num_recycled;
2867
1
        metrics_context.report();
2868
1
        return 0;
2869
1
    };
2870
2871
    // recycle_func and loop_done for scan and recycle
2872
3
    return scan_and_recycle(
2873
3
            versioned::partition_inverted_index_key({instance_id_, 0, 0, 0}),
2874
3
            versioned::partition_inverted_index_key({instance_id_, INT64_MAX, 0, 0}),
2875
3
            std::move(recycle_func));
2876
3
}
2877
2878
int InstanceRecycler::recycle_tablets(int64_t table_id, int64_t index_id,
2879
                                      RecyclerMetricsContext& metrics_context,
2880
52
                                      int64_t partition_id) {
2881
52
    bool is_multi_version =
2882
52
            instance_info_.has_multi_version_status() &&
2883
52
            instance_info_.multi_version_status() != MultiVersionStatus::MULTI_VERSION_DISABLED;
2884
52
    int64_t num_scanned = 0;
2885
52
    std::atomic_long num_recycled = 0;
2886
2887
52
    std::string tablet_key_begin, tablet_key_end;
2888
52
    std::string stats_key_begin, stats_key_end;
2889
52
    std::string job_key_begin, job_key_end;
2890
2891
52
    std::string tablet_belongs;
2892
52
    if (partition_id > 0) {
2893
        // recycle tablets in a partition belonging to the index
2894
33
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
2895
33
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
2896
33
        stats_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &stats_key_begin);
2897
33
        stats_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &stats_key_end);
2898
33
        job_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &job_key_begin);
2899
33
        job_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &job_key_end);
2900
33
        tablet_belongs = "partition";
2901
33
    } else {
2902
        // recycle tablets in the index
2903
19
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
2904
19
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
2905
19
        stats_tablet_key({instance_id_, table_id, index_id, 0, 0}, &stats_key_begin);
2906
19
        stats_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &stats_key_end);
2907
19
        job_tablet_key({instance_id_, table_id, index_id, 0, 0}, &job_key_begin);
2908
19
        job_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &job_key_end);
2909
19
        tablet_belongs = "index";
2910
19
    }
2911
2912
52
    LOG_INFO("begin to recycle tablets of the " + tablet_belongs)
2913
52
            .tag("table_id", table_id)
2914
52
            .tag("index_id", index_id)
2915
52
            .tag("partition_id", partition_id);
2916
2917
52
    auto start_time = steady_clock::now();
2918
2919
52
    DORIS_CLOUD_DEFER {
2920
52
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2921
52
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2922
52
                .tag("instance_id", instance_id_)
2923
52
                .tag("table_id", table_id)
2924
52
                .tag("index_id", index_id)
2925
52
                .tag("partition_id", partition_id)
2926
52
                .tag("num_scanned", num_scanned)
2927
52
                .tag("num_recycled", num_recycled);
2928
52
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv
Line
Count
Source
2919
4
    DORIS_CLOUD_DEFER {
2920
4
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2921
4
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2922
4
                .tag("instance_id", instance_id_)
2923
4
                .tag("table_id", table_id)
2924
4
                .tag("index_id", index_id)
2925
4
                .tag("partition_id", partition_id)
2926
4
                .tag("num_scanned", num_scanned)
2927
4
                .tag("num_recycled", num_recycled);
2928
4
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv
Line
Count
Source
2919
48
    DORIS_CLOUD_DEFER {
2920
48
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2921
48
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2922
48
                .tag("instance_id", instance_id_)
2923
48
                .tag("table_id", table_id)
2924
48
                .tag("index_id", index_id)
2925
48
                .tag("partition_id", partition_id)
2926
48
                .tag("num_scanned", num_scanned)
2927
48
                .tag("num_recycled", num_recycled);
2928
48
    };
2929
2930
    // The tablet key and id which have been recycled.
2931
52
    struct TabletInfo {
2932
52
        std::string_view tablet_meta_key;
2933
52
        int64_t tablet_id;
2934
52
    };
2935
52
    SyncExecutor<TabletInfo> sync_executor(
2936
52
            _thread_pool_group.recycle_tablet_pool,
2937
52
            fmt::format("recycle tablets, tablet id {}, index id {}, partition id {}", table_id,
2938
52
                        index_id, partition_id),
2939
4.24k
            [](const TabletInfo& k) { return k.tablet_meta_key.empty(); });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKZNS1_15recycle_tabletsEllS3_lE10TabletInfo
Line
Count
Source
2939
4.00k
            [](const TabletInfo& k) { return k.tablet_meta_key.empty(); });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKZNS1_15recycle_tabletsEllS3_lE10TabletInfo
Line
Count
Source
2939
241
            [](const TabletInfo& k) { return k.tablet_meta_key.empty(); });
2940
2941
    // Elements in `tablets_info` has the same lifetime as `it` in `scan_and_recycle`
2942
52
    std::vector<std::string> init_rs_keys;
2943
52
    bool has_failure = false;
2944
8.25k
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2945
8.25k
        ++num_scanned;
2946
8.25k
        doris::TabletMetaCloudPB tablet_meta_pb;
2947
8.25k
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2948
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2949
0
            has_failure = true;
2950
0
            return -1;
2951
0
        }
2952
8.25k
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2953
2954
8.25k
        if (config::enable_recycler_check_lazy_txn_finished &&
2955
8.25k
            !check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2956
4.00k
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2957
4.00k
            has_failure = true;
2958
4.00k
            return -1;
2959
4.00k
        }
2960
2961
4.25k
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2962
4.25k
        sync_executor.add(
2963
4.25k
                [this, &num_recycled, tid = tablet_id, &metrics_context, k]() -> TabletInfo {
2964
4.25k
                    if (recycle_tablet(tid, metrics_context) != 0) {
2965
2
                        LOG_WARNING("failed to recycle tablet")
2966
2
                                .tag("instance_id", instance_id_)
2967
2
                                .tag("tablet_id", tid);
2968
2
                        return {.tablet_meta_key = std::string_view(), .tablet_id = tid};
2969
2
                    }
2970
4.25k
                    ++num_recycled;
2971
4.25k
                    LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2972
4.25k
                    return {.tablet_meta_key = k, .tablet_id = tid};
2973
4.25k
                });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
Line
Count
Source
2963
4.00k
                [this, &num_recycled, tid = tablet_id, &metrics_context, k]() -> TabletInfo {
2964
4.00k
                    if (recycle_tablet(tid, metrics_context) != 0) {
2965
0
                        LOG_WARNING("failed to recycle tablet")
2966
0
                                .tag("instance_id", instance_id_)
2967
0
                                .tag("tablet_id", tid);
2968
0
                        return {.tablet_meta_key = std::string_view(), .tablet_id = tid};
2969
0
                    }
2970
4.00k
                    ++num_recycled;
2971
4.00k
                    LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2972
4.00k
                    return {.tablet_meta_key = k, .tablet_id = tid};
2973
4.00k
                });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
Line
Count
Source
2963
250
                [this, &num_recycled, tid = tablet_id, &metrics_context, k]() -> TabletInfo {
2964
250
                    if (recycle_tablet(tid, metrics_context) != 0) {
2965
2
                        LOG_WARNING("failed to recycle tablet")
2966
2
                                .tag("instance_id", instance_id_)
2967
2
                                .tag("tablet_id", tid);
2968
2
                        return {.tablet_meta_key = std::string_view(), .tablet_id = tid};
2969
2
                    }
2970
248
                    ++num_recycled;
2971
248
                    LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2972
248
                    return {.tablet_meta_key = k, .tablet_id = tid};
2973
250
                });
2974
4.25k
        return 0;
2975
4.25k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
2944
8.00k
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2945
8.00k
        ++num_scanned;
2946
8.00k
        doris::TabletMetaCloudPB tablet_meta_pb;
2947
8.00k
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2948
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2949
0
            has_failure = true;
2950
0
            return -1;
2951
0
        }
2952
8.00k
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2953
2954
8.00k
        if (config::enable_recycler_check_lazy_txn_finished &&
2955
8.00k
            !check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2956
4.00k
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2957
4.00k
            has_failure = true;
2958
4.00k
            return -1;
2959
4.00k
        }
2960
2961
4.00k
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2962
4.00k
        sync_executor.add(
2963
4.00k
                [this, &num_recycled, tid = tablet_id, &metrics_context, k]() -> TabletInfo {
2964
4.00k
                    if (recycle_tablet(tid, metrics_context) != 0) {
2965
4.00k
                        LOG_WARNING("failed to recycle tablet")
2966
4.00k
                                .tag("instance_id", instance_id_)
2967
4.00k
                                .tag("tablet_id", tid);
2968
4.00k
                        return {.tablet_meta_key = std::string_view(), .tablet_id = tid};
2969
4.00k
                    }
2970
4.00k
                    ++num_recycled;
2971
4.00k
                    LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2972
4.00k
                    return {.tablet_meta_key = k, .tablet_id = tid};
2973
4.00k
                });
2974
4.00k
        return 0;
2975
4.00k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
2944
251
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2945
251
        ++num_scanned;
2946
251
        doris::TabletMetaCloudPB tablet_meta_pb;
2947
251
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2948
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2949
0
            has_failure = true;
2950
0
            return -1;
2951
0
        }
2952
251
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2953
2954
251
        if (config::enable_recycler_check_lazy_txn_finished &&
2955
251
            !check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2956
1
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2957
1
            has_failure = true;
2958
1
            return -1;
2959
1
        }
2960
2961
250
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2962
250
        sync_executor.add(
2963
250
                [this, &num_recycled, tid = tablet_id, &metrics_context, k]() -> TabletInfo {
2964
250
                    if (recycle_tablet(tid, metrics_context) != 0) {
2965
250
                        LOG_WARNING("failed to recycle tablet")
2966
250
                                .tag("instance_id", instance_id_)
2967
250
                                .tag("tablet_id", tid);
2968
250
                        return {.tablet_meta_key = std::string_view(), .tablet_id = tid};
2969
250
                    }
2970
250
                    ++num_recycled;
2971
250
                    LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2972
250
                    return {.tablet_meta_key = k, .tablet_id = tid};
2973
250
                });
2974
250
        return 0;
2975
250
    };
2976
2977
52
    auto loop_done = [&, this]() -> int {
2978
52
        int ret = 0;
2979
52
        bool finished = true;
2980
52
        bool has_empty_key = false;
2981
52
        DORIS_CLOUD_DEFER {
2982
52
            init_rs_keys.clear();
2983
52
            has_failure = false;
2984
52
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2981
4
        DORIS_CLOUD_DEFER {
2982
4
            init_rs_keys.clear();
2983
4
            has_failure = false;
2984
4
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2981
48
        DORIS_CLOUD_DEFER {
2982
48
            init_rs_keys.clear();
2983
48
            has_failure = false;
2984
48
        };
2985
52
        auto tablets_info = sync_executor.when_all(&finished);
2986
52
        if (!finished) {
2987
1
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2988
1
            return -1;
2989
1
        }
2990
2991
51
        size_t size_before_erase = tablets_info.size();
2992
4.25k
        std::erase_if(tablets_info, [](const TabletInfo& t) { return t.tablet_meta_key.empty(); });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKZNS1_15recycle_tabletsEllS3_lE10TabletInfoE_clES7_
Line
Count
Source
2992
4.00k
        std::erase_if(tablets_info, [](const TabletInfo& t) { return t.tablet_meta_key.empty(); });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKZNS1_15recycle_tabletsEllS3_lE10TabletInfoE_clES7_
Line
Count
Source
2992
249
        std::erase_if(tablets_info, [](const TabletInfo& t) { return t.tablet_meta_key.empty(); });
2993
51
        if (tablets_info.empty()) {
2994
2
            return size_before_erase == 0 ? 0 : -1;
2995
49
        } else if (size_before_erase != tablets_info.size()) {
2996
1
            has_empty_key = true;
2997
1
        }
2998
2999
49
        ret = has_empty_key ? -1 : 0;
3000
        // sort the vector using key's order
3001
49.4k
        std::ranges::sort(tablets_info, [](const auto& prev, const auto& last) {
3002
49.4k
            return prev.tablet_meta_key < last.tablet_meta_key;
3003
49.4k
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clIZNS1_15recycle_tabletsEllS3_lE10TabletInfoSD_EEDaS7_SA_
Line
Count
Source
3001
48.4k
        std::ranges::sort(tablets_info, [](const auto& prev, const auto& last) {
3002
48.4k
            return prev.tablet_meta_key < last.tablet_meta_key;
3003
48.4k
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clIZNS1_15recycle_tabletsEllS3_lE10TabletInfoSD_EEDaS7_SA_
Line
Count
Source
3001
958
        std::ranges::sort(tablets_info, [](const auto& prev, const auto& last) {
3002
958
            return prev.tablet_meta_key < last.tablet_meta_key;
3003
958
        });
3004
49
        std::unique_ptr<Transaction> txn;
3005
49
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
3006
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
3007
0
            return -1;
3008
0
        }
3009
49
        std::string tablet_key_end;
3010
49
        if (!tablets_info.empty()) {
3011
49
            if (!has_empty_key && !has_failure) {
3012
47
                tablet_key_end = std::string(tablets_info.back().tablet_meta_key) + '\x00';
3013
47
                txn->remove(tablets_info.front().tablet_meta_key, tablet_key_end);
3014
47
            } else {
3015
8
                for (auto& tablet_info : tablets_info) {
3016
8
                    txn->remove(tablet_info.tablet_meta_key);
3017
8
                }
3018
2
            }
3019
49
        }
3020
49
        if (is_multi_version) {
3021
6
            for (auto& tablet_info : tablets_info) {
3022
                // Remove all versions of tablet compact stats for recycled tablet
3023
6
                auto k = versioned::tablet_compact_stats_key({instance_id_, tablet_info.tablet_id});
3024
6
                LOG_INFO("remove versioned tablet compact stats key")
3025
6
                        .tag("compact_stats_key", hex(k));
3026
6
                versioned_remove_all(txn.get(), k);
3027
6
            }
3028
6
            for (auto& tablet_info : tablets_info) {
3029
                // Remove all versions of tablet load stats for recycled tablet
3030
6
                auto k = versioned::tablet_load_stats_key({instance_id_, tablet_info.tablet_id});
3031
6
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
3032
6
                versioned_remove_all(txn.get(), k);
3033
6
            }
3034
6
            for (auto& tablet_info : tablets_info) {
3035
                // Remove all versions of meta tablet for recycled tablet
3036
6
                auto k = versioned::meta_tablet_key({instance_id_, tablet_info.tablet_id});
3037
6
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
3038
6
                versioned_remove_all(txn.get(), k);
3039
6
            }
3040
5
        }
3041
4.25k
        for (auto& tablet_info : tablets_info) {
3042
4.25k
            std::string k;
3043
4.25k
            meta_tablet_idx_key({instance_id_, tablet_info.tablet_id}, &k);
3044
4.25k
            txn->remove(k);
3045
4.25k
        }
3046
4.25k
        for (auto& tablet_info : tablets_info) {
3047
4.25k
            std::string k;
3048
4.25k
            job_restore_tablet_key({instance_id_, tablet_info.tablet_id}, &k);
3049
4.25k
            txn->remove(k);
3050
4.25k
        }
3051
49
        for (auto& k : init_rs_keys) {
3052
0
            txn->remove(k);
3053
0
        }
3054
49
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
3055
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
3056
0
                         << ", err=" << err;
3057
0
            return -1;
3058
0
        }
3059
49
        return ret;
3060
49
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv
Line
Count
Source
2977
4
    auto loop_done = [&, this]() -> int {
2978
4
        int ret = 0;
2979
4
        bool finished = true;
2980
4
        bool has_empty_key = false;
2981
4
        DORIS_CLOUD_DEFER {
2982
4
            init_rs_keys.clear();
2983
4
            has_failure = false;
2984
4
        };
2985
4
        auto tablets_info = sync_executor.when_all(&finished);
2986
4
        if (!finished) {
2987
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2988
0
            return -1;
2989
0
        }
2990
2991
4
        size_t size_before_erase = tablets_info.size();
2992
4
        std::erase_if(tablets_info, [](const TabletInfo& t) { return t.tablet_meta_key.empty(); });
2993
4
        if (tablets_info.empty()) {
2994
2
            return size_before_erase == 0 ? 0 : -1;
2995
2
        } else if (size_before_erase != tablets_info.size()) {
2996
0
            has_empty_key = true;
2997
0
        }
2998
2999
2
        ret = has_empty_key ? -1 : 0;
3000
        // sort the vector using key's order
3001
2
        std::ranges::sort(tablets_info, [](const auto& prev, const auto& last) {
3002
2
            return prev.tablet_meta_key < last.tablet_meta_key;
3003
2
        });
3004
2
        std::unique_ptr<Transaction> txn;
3005
2
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
3006
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
3007
0
            return -1;
3008
0
        }
3009
2
        std::string tablet_key_end;
3010
2
        if (!tablets_info.empty()) {
3011
2
            if (!has_empty_key && !has_failure) {
3012
2
                tablet_key_end = std::string(tablets_info.back().tablet_meta_key) + '\x00';
3013
2
                txn->remove(tablets_info.front().tablet_meta_key, tablet_key_end);
3014
2
            } else {
3015
0
                for (auto& tablet_info : tablets_info) {
3016
0
                    txn->remove(tablet_info.tablet_meta_key);
3017
0
                }
3018
0
            }
3019
2
        }
3020
2
        if (is_multi_version) {
3021
0
            for (auto& tablet_info : tablets_info) {
3022
                // Remove all versions of tablet compact stats for recycled tablet
3023
0
                auto k = versioned::tablet_compact_stats_key({instance_id_, tablet_info.tablet_id});
3024
0
                LOG_INFO("remove versioned tablet compact stats key")
3025
0
                        .tag("compact_stats_key", hex(k));
3026
0
                versioned_remove_all(txn.get(), k);
3027
0
            }
3028
0
            for (auto& tablet_info : tablets_info) {
3029
                // Remove all versions of tablet load stats for recycled tablet
3030
0
                auto k = versioned::tablet_load_stats_key({instance_id_, tablet_info.tablet_id});
3031
0
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
3032
0
                versioned_remove_all(txn.get(), k);
3033
0
            }
3034
0
            for (auto& tablet_info : tablets_info) {
3035
                // Remove all versions of meta tablet for recycled tablet
3036
0
                auto k = versioned::meta_tablet_key({instance_id_, tablet_info.tablet_id});
3037
0
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
3038
0
                versioned_remove_all(txn.get(), k);
3039
0
            }
3040
0
        }
3041
4.00k
        for (auto& tablet_info : tablets_info) {
3042
4.00k
            std::string k;
3043
4.00k
            meta_tablet_idx_key({instance_id_, tablet_info.tablet_id}, &k);
3044
4.00k
            txn->remove(k);
3045
4.00k
        }
3046
4.00k
        for (auto& tablet_info : tablets_info) {
3047
4.00k
            std::string k;
3048
4.00k
            job_restore_tablet_key({instance_id_, tablet_info.tablet_id}, &k);
3049
4.00k
            txn->remove(k);
3050
4.00k
        }
3051
2
        for (auto& k : init_rs_keys) {
3052
0
            txn->remove(k);
3053
0
        }
3054
2
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
3055
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
3056
0
                         << ", err=" << err;
3057
0
            return -1;
3058
0
        }
3059
2
        return ret;
3060
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv
Line
Count
Source
2977
48
    auto loop_done = [&, this]() -> int {
2978
48
        int ret = 0;
2979
48
        bool finished = true;
2980
48
        bool has_empty_key = false;
2981
48
        DORIS_CLOUD_DEFER {
2982
48
            init_rs_keys.clear();
2983
48
            has_failure = false;
2984
48
        };
2985
48
        auto tablets_info = sync_executor.when_all(&finished);
2986
48
        if (!finished) {
2987
1
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2988
1
            return -1;
2989
1
        }
2990
2991
47
        size_t size_before_erase = tablets_info.size();
2992
47
        std::erase_if(tablets_info, [](const TabletInfo& t) { return t.tablet_meta_key.empty(); });
2993
47
        if (tablets_info.empty()) {
2994
0
            return size_before_erase == 0 ? 0 : -1;
2995
47
        } else if (size_before_erase != tablets_info.size()) {
2996
1
            has_empty_key = true;
2997
1
        }
2998
2999
47
        ret = has_empty_key ? -1 : 0;
3000
        // sort the vector using key's order
3001
47
        std::ranges::sort(tablets_info, [](const auto& prev, const auto& last) {
3002
47
            return prev.tablet_meta_key < last.tablet_meta_key;
3003
47
        });
3004
47
        std::unique_ptr<Transaction> txn;
3005
47
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
3006
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
3007
0
            return -1;
3008
0
        }
3009
47
        std::string tablet_key_end;
3010
47
        if (!tablets_info.empty()) {
3011
47
            if (!has_empty_key && !has_failure) {
3012
45
                tablet_key_end = std::string(tablets_info.back().tablet_meta_key) + '\x00';
3013
45
                txn->remove(tablets_info.front().tablet_meta_key, tablet_key_end);
3014
45
            } else {
3015
8
                for (auto& tablet_info : tablets_info) {
3016
8
                    txn->remove(tablet_info.tablet_meta_key);
3017
8
                }
3018
2
            }
3019
47
        }
3020
47
        if (is_multi_version) {
3021
6
            for (auto& tablet_info : tablets_info) {
3022
                // Remove all versions of tablet compact stats for recycled tablet
3023
6
                auto k = versioned::tablet_compact_stats_key({instance_id_, tablet_info.tablet_id});
3024
6
                LOG_INFO("remove versioned tablet compact stats key")
3025
6
                        .tag("compact_stats_key", hex(k));
3026
6
                versioned_remove_all(txn.get(), k);
3027
6
            }
3028
6
            for (auto& tablet_info : tablets_info) {
3029
                // Remove all versions of tablet load stats for recycled tablet
3030
6
                auto k = versioned::tablet_load_stats_key({instance_id_, tablet_info.tablet_id});
3031
6
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
3032
6
                versioned_remove_all(txn.get(), k);
3033
6
            }
3034
6
            for (auto& tablet_info : tablets_info) {
3035
                // Remove all versions of meta tablet for recycled tablet
3036
6
                auto k = versioned::meta_tablet_key({instance_id_, tablet_info.tablet_id});
3037
6
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
3038
6
                versioned_remove_all(txn.get(), k);
3039
6
            }
3040
5
        }
3041
248
        for (auto& tablet_info : tablets_info) {
3042
248
            std::string k;
3043
248
            meta_tablet_idx_key({instance_id_, tablet_info.tablet_id}, &k);
3044
248
            txn->remove(k);
3045
248
        }
3046
248
        for (auto& tablet_info : tablets_info) {
3047
248
            std::string k;
3048
248
            job_restore_tablet_key({instance_id_, tablet_info.tablet_id}, &k);
3049
248
            txn->remove(k);
3050
248
        }
3051
47
        for (auto& k : init_rs_keys) {
3052
0
            txn->remove(k);
3053
0
        }
3054
47
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
3055
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
3056
0
                         << ", err=" << err;
3057
0
            return -1;
3058
0
        }
3059
47
        return ret;
3060
47
    };
3061
3062
52
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(recycle_func),
3063
52
                               std::move(loop_done));
3064
52
    if (ret != 0) {
3065
5
        LOG(WARNING) << "failed to scan_and_recycle, instance_id=" << instance_id_;
3066
5
        return ret;
3067
5
    }
3068
3069
    // directly remove tablet stats and tablet jobs of these dropped index or partition
3070
47
    std::unique_ptr<Transaction> txn;
3071
47
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
3072
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_;
3073
0
        return -1;
3074
0
    }
3075
47
    txn->remove(stats_key_begin, stats_key_end);
3076
47
    LOG(WARNING) << "remove stats kv, begin=" << hex(stats_key_begin)
3077
47
                 << " end=" << hex(stats_key_end);
3078
47
    txn->remove(job_key_begin, job_key_end);
3079
47
    LOG(WARNING) << "remove job kv, begin=" << hex(job_key_begin) << " end=" << hex(job_key_end);
3080
47
    std::string schema_key_begin, schema_key_end;
3081
47
    std::string schema_dict_key;
3082
47
    std::string versioned_schema_key_begin, versioned_schema_key_end;
3083
47
    if (partition_id <= 0) {
3084
        // Delete schema kv of this index
3085
15
        meta_schema_key({instance_id_, index_id, 0}, &schema_key_begin);
3086
15
        meta_schema_key({instance_id_, index_id + 1, 0}, &schema_key_end);
3087
15
        txn->remove(schema_key_begin, schema_key_end);
3088
15
        LOG(WARNING) << "remove schema kv, begin=" << hex(schema_key_begin)
3089
15
                     << " end=" << hex(schema_key_end);
3090
15
        meta_schema_pb_dictionary_key({instance_id_, index_id}, &schema_dict_key);
3091
15
        txn->remove(schema_dict_key);
3092
15
        LOG(WARNING) << "remove schema dict kv, key=" << hex(schema_dict_key);
3093
15
        versioned::meta_schema_key({instance_id_, index_id, 0}, &versioned_schema_key_begin);
3094
15
        versioned::meta_schema_key({instance_id_, index_id + 1, 0}, &versioned_schema_key_end);
3095
15
        txn->remove(versioned_schema_key_begin, versioned_schema_key_end);
3096
15
        LOG(WARNING) << "remove versioned schema kv, begin=" << hex(versioned_schema_key_begin)
3097
15
                     << " end=" << hex(versioned_schema_key_end);
3098
15
    }
3099
3100
47
    TxnErrorCode err = txn->commit();
3101
47
    if (err != TxnErrorCode::TXN_OK) {
3102
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_
3103
0
                     << " err=" << err;
3104
0
        return -1;
3105
0
    }
3106
3107
47
    return ret;
3108
47
}
3109
3110
5.61k
int InstanceRecycler::delete_rowset_data(const RowsetMetaCloudPB& rs_meta_pb) {
3111
5.61k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("delete_rowset_data::bypass_check", true);
3112
5.61k
    int64_t num_segments = rs_meta_pb.num_segments();
3113
5.61k
    if (num_segments <= 0) return 0;
3114
3115
5.61k
    std::vector<std::string> file_paths;
3116
5.61k
    if (decrement_packed_file_ref_counts(rs_meta_pb) != 0) {
3117
0
        return -1;
3118
0
    }
3119
3120
    // Process inverted indexes
3121
5.61k
    std::vector<std::pair<int64_t, std::string>> index_ids;
3122
    // default format as v1.
3123
5.61k
    InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
3124
5.61k
    bool delete_rowset_data_by_prefix = false;
3125
5.61k
    if (rs_meta_pb.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
3126
        // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
3127
        // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
3128
0
        delete_rowset_data_by_prefix = true;
3129
5.61k
    } else if (rs_meta_pb.has_tablet_schema()) {
3130
10.0k
        for (const auto& index : rs_meta_pb.tablet_schema().index()) {
3131
10.0k
            if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
3132
10.0k
                index_ids.emplace_back(index.index_id(), index.index_suffix_name());
3133
10.0k
            }
3134
10.0k
        }
3135
4.80k
        if (rs_meta_pb.tablet_schema().has_inverted_index_storage_format()) {
3136
2.00k
            index_format = rs_meta_pb.tablet_schema().inverted_index_storage_format();
3137
2.00k
        }
3138
4.80k
    } else if (!rs_meta_pb.has_index_id() || !rs_meta_pb.has_schema_version()) {
3139
        // schema version and index id are not found, delete rowset data by prefix directly.
3140
0
        delete_rowset_data_by_prefix = true;
3141
809
    } else {
3142
        // otherwise, try to get schema kv
3143
809
        InvertedIndexInfo index_info;
3144
809
        int inverted_index_get_ret = inverted_index_id_cache_->get(
3145
809
                rs_meta_pb.index_id(), rs_meta_pb.schema_version(), index_info);
3146
809
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
3147
809
                                 &inverted_index_get_ret);
3148
809
        if (inverted_index_get_ret == 0) {
3149
809
            index_format = index_info.first;
3150
809
            index_ids = index_info.second;
3151
809
        } else if (inverted_index_get_ret == 1) {
3152
            // 1. Schema kv not found means tablet has been recycled
3153
            // Maybe some tablet recycle failed by some bugs
3154
            // We need to delete again to double check
3155
            // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
3156
            // because we are uncertain about the inverted index information.
3157
            // If there are inverted indexes, some data might not be deleted,
3158
            // but this is acceptable as we have made our best effort to delete the data.
3159
0
            LOG_INFO(
3160
0
                    "delete rowset data schema kv not found, need to delete again to double "
3161
0
                    "check")
3162
0
                    .tag("instance_id", instance_id_)
3163
0
                    .tag("tablet_id", rs_meta_pb.tablet_id())
3164
0
                    .tag("rowset", rs_meta_pb.ShortDebugString());
3165
            // Currently index_ids is guaranteed to be empty,
3166
            // but we clear it again here as a safeguard against future code changes
3167
            // that might cause index_ids to no longer be empty
3168
0
            index_format = InvertedIndexStorageFormatPB::V2;
3169
0
            index_ids.clear();
3170
0
        } else {
3171
            // failed to get schema kv, delete rowset data by prefix directly.
3172
0
            delete_rowset_data_by_prefix = true;
3173
0
        }
3174
809
    }
3175
3176
5.61k
    if (delete_rowset_data_by_prefix) {
3177
0
        return delete_rowset_data(rs_meta_pb.resource_id(), rs_meta_pb.tablet_id(),
3178
0
                                  rs_meta_pb.rowset_id_v2());
3179
0
    }
3180
3181
5.61k
    auto it = accessor_map_.find(rs_meta_pb.resource_id());
3182
5.61k
    if (it == accessor_map_.end()) {
3183
1.59k
        LOG_WARNING("instance has no such resource id")
3184
1.59k
                .tag("instance_id", instance_id_)
3185
1.59k
                .tag("resource_id", rs_meta_pb.resource_id());
3186
1.59k
        return -1;
3187
1.59k
    }
3188
4.01k
    auto& accessor = it->second;
3189
3190
4.01k
    int64_t tablet_id = rs_meta_pb.tablet_id();
3191
4.01k
    const auto& rowset_id = rs_meta_pb.rowset_id_v2();
3192
24.0k
    for (int64_t i = 0; i < num_segments; ++i) {
3193
20.0k
        add_file_to_delete_if_not_packed(rs_meta_pb, segment_path(tablet_id, rowset_id, i),
3194
20.0k
                                         &file_paths);
3195
20.0k
        if (index_format == InvertedIndexStorageFormatPB::V1) {
3196
40.0k
            for (const auto& index_id : index_ids) {
3197
40.0k
                add_file_to_delete_if_not_packed(
3198
40.0k
                        rs_meta_pb,
3199
40.0k
                        inverted_index_path_v1(tablet_id, rowset_id, i, index_id.first,
3200
40.0k
                                               index_id.second),
3201
40.0k
                        &file_paths);
3202
40.0k
            }
3203
20.0k
        } else if (!index_ids.empty()) {
3204
0
            add_file_to_delete_if_not_packed(
3205
0
                    rs_meta_pb, inverted_index_path_v2(tablet_id, rowset_id, i), &file_paths);
3206
0
        }
3207
20.0k
    }
3208
3209
    // Process delete bitmap - check where it's stored.
3210
4.01k
    DeleteBitmapStorageType delete_bitmap_storage_type = DeleteBitmapStorageType::NOT_FOUND;
3211
4.01k
    if (decrement_delete_bitmap_packed_file_ref_counts(tablet_id, rowset_id,
3212
4.01k
                                                       &delete_bitmap_storage_type) != 0) {
3213
0
        LOG_WARNING("failed to decrement delete bitmap packed file ref count")
3214
0
                .tag("instance_id", instance_id_)
3215
0
                .tag("tablet_id", tablet_id)
3216
0
                .tag("rowset_id", rowset_id);
3217
0
        return -1;
3218
0
    }
3219
4.01k
    if (delete_bitmap_storage_type == DeleteBitmapStorageType::STANDALONE_FILE) {
3220
2.00k
        file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
3221
2.00k
    }
3222
    // TODO(AlexYue): seems could do do batch
3223
4.01k
    return accessor->delete_files(file_paths);
3224
4.01k
}
3225
3226
62.3k
int InstanceRecycler::decrement_packed_file_ref_counts(const doris::RowsetMetaCloudPB& rs_meta_pb) {
3227
62.3k
    LOG_INFO("begin process_packed_file_location_index")
3228
62.3k
            .tag("instance_id", instance_id_)
3229
62.3k
            .tag("tablet_id", rs_meta_pb.tablet_id())
3230
62.3k
            .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3231
62.3k
            .tag("index_map_size", rs_meta_pb.packed_slice_locations_size());
3232
62.3k
    const auto& index_map = rs_meta_pb.packed_slice_locations();
3233
62.3k
    if (index_map.empty()) {
3234
62.3k
        LOG_INFO("skip merge file update: empty merge_file_segment_index")
3235
62.3k
                .tag("instance_id", instance_id_)
3236
62.3k
                .tag("tablet_id", rs_meta_pb.tablet_id())
3237
62.3k
                .tag("rowset_id", rs_meta_pb.rowset_id_v2());
3238
62.3k
        return 0;
3239
62.3k
    }
3240
3241
17
    struct PackedSmallFileInfo {
3242
17
        std::string small_file_path;
3243
17
    };
3244
17
    std::unordered_map<std::string, std::vector<PackedSmallFileInfo>> packed_file_updates;
3245
17
    packed_file_updates.reserve(index_map.size());
3246
27
    for (const auto& [small_path, index_pb] : index_map) {
3247
27
        if (!index_pb.has_packed_file_path() || index_pb.packed_file_path().empty()) {
3248
0
            continue;
3249
0
        }
3250
27
        packed_file_updates[index_pb.packed_file_path()].push_back(
3251
27
                PackedSmallFileInfo {small_path});
3252
27
    }
3253
17
    if (packed_file_updates.empty()) {
3254
0
        LOG_INFO("skip packed file update: no valid merge_file_path in merge_file_segment_index")
3255
0
                .tag("instance_id", instance_id_)
3256
0
                .tag("tablet_id", rs_meta_pb.tablet_id())
3257
0
                .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3258
0
                .tag("index_map_size", index_map.size());
3259
0
        return 0;
3260
0
    }
3261
3262
17
    const int max_retry_times = std::max(1, config::decrement_packed_file_ref_counts_retry_times);
3263
17
    int ret = 0;
3264
24
    for (auto& [packed_file_path, small_files] : packed_file_updates) {
3265
24
        if (small_files.empty()) {
3266
0
            continue;
3267
0
        }
3268
3269
24
        bool success = false;
3270
24
        for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
3271
24
            std::unique_ptr<Transaction> txn;
3272
24
            TxnErrorCode err = txn_kv_->create_txn(&txn);
3273
24
            if (err != TxnErrorCode::TXN_OK) {
3274
0
                LOG_WARNING("failed to create txn when updating packed file ref count")
3275
0
                        .tag("instance_id", instance_id_)
3276
0
                        .tag("packed_file_path", packed_file_path)
3277
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3278
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3279
0
                        .tag("err", err);
3280
0
                ret = -1;
3281
0
                break;
3282
0
            }
3283
3284
24
            std::string packed_key = packed_file_key({instance_id_, packed_file_path});
3285
24
            std::string packed_val;
3286
24
            err = txn->get(packed_key, &packed_val);
3287
24
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3288
0
                LOG_WARNING("packed file info not found when recycling rowset")
3289
0
                        .tag("instance_id", instance_id_)
3290
0
                        .tag("packed_file_path", packed_file_path)
3291
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3292
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3293
0
                        .tag("key", hex(packed_key))
3294
0
                        .tag("tablet id", rs_meta_pb.tablet_id());
3295
                // Skip this packed file entry and continue with others
3296
0
                success = true;
3297
0
                break;
3298
0
            }
3299
24
            if (err != TxnErrorCode::TXN_OK) {
3300
0
                LOG_WARNING("failed to get packed file info when recycling rowset")
3301
0
                        .tag("instance_id", instance_id_)
3302
0
                        .tag("packed_file_path", packed_file_path)
3303
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3304
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3305
0
                        .tag("err", err);
3306
0
                ret = -1;
3307
0
                break;
3308
0
            }
3309
3310
24
            cloud::PackedFileInfoPB packed_info;
3311
24
            if (!packed_info.ParseFromString(packed_val)) {
3312
0
                LOG_WARNING("failed to parse packed file info when recycling rowset")
3313
0
                        .tag("instance_id", instance_id_)
3314
0
                        .tag("packed_file_path", packed_file_path)
3315
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3316
0
                        .tag("tablet_id", rs_meta_pb.tablet_id());
3317
0
                ret = -1;
3318
0
                break;
3319
0
            }
3320
3321
24
            LOG_INFO("packed file update check")
3322
24
                    .tag("instance_id", instance_id_)
3323
24
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3324
24
                    .tag("tablet_id", rs_meta_pb.tablet_id())
3325
24
                    .tag("merged_file_path", packed_file_path)
3326
24
                    .tag("requested_small_files", small_files.size())
3327
24
                    .tag("merge_entries", packed_info.slices_size());
3328
3329
24
            auto* small_file_entries = packed_info.mutable_slices();
3330
24
            int64_t changed_files = 0;
3331
24
            int64_t missing_entries = 0;
3332
24
            int64_t already_deleted = 0;
3333
27
            for (const auto& small_file_info : small_files) {
3334
27
                bool found = false;
3335
87
                for (auto& small_file_entry : *small_file_entries) {
3336
87
                    if (small_file_entry.path() == small_file_info.small_file_path) {
3337
27
                        if (!small_file_entry.deleted()) {
3338
27
                            small_file_entry.set_deleted(true);
3339
27
                            if (!small_file_entry.corrected()) {
3340
27
                                small_file_entry.set_corrected(true);
3341
27
                            }
3342
27
                            ++changed_files;
3343
27
                        } else {
3344
0
                            ++already_deleted;
3345
0
                        }
3346
27
                        found = true;
3347
27
                        break;
3348
27
                    }
3349
87
                }
3350
27
                if (!found) {
3351
0
                    ++missing_entries;
3352
0
                    LOG_WARNING("packed file info missing small file entry")
3353
0
                            .tag("instance_id", instance_id_)
3354
0
                            .tag("packed_file_path", packed_file_path)
3355
0
                            .tag("small_file_path", small_file_info.small_file_path)
3356
0
                            .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3357
0
                            .tag("tablet_id", rs_meta_pb.tablet_id());
3358
0
                }
3359
27
            }
3360
3361
24
            if (changed_files == 0) {
3362
0
                LOG_INFO("skip merge file update: no merge entries changed")
3363
0
                        .tag("instance_id", instance_id_)
3364
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3365
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3366
0
                        .tag("merged_file_path", packed_file_path)
3367
0
                        .tag("missing_entries", missing_entries)
3368
0
                        .tag("already_deleted", already_deleted)
3369
0
                        .tag("requested_small_files", small_files.size())
3370
0
                        .tag("merge_entries", packed_info.slices_size());
3371
0
                success = true;
3372
0
                break;
3373
0
            }
3374
3375
            // Calculate remaining files
3376
24
            int64_t left_file_count = 0;
3377
24
            int64_t left_file_bytes = 0;
3378
141
            for (const auto& small_file_entry : packed_info.slices()) {
3379
141
                if (!small_file_entry.deleted()) {
3380
57
                    ++left_file_count;
3381
57
                    left_file_bytes += small_file_entry.size();
3382
57
                }
3383
141
            }
3384
24
            packed_info.set_remaining_slice_bytes(left_file_bytes);
3385
24
            packed_info.set_ref_cnt(left_file_count);
3386
24
            LOG_INFO("updated packed file reference info")
3387
24
                    .tag("instance_id", instance_id_)
3388
24
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3389
24
                    .tag("tablet_id", rs_meta_pb.tablet_id())
3390
24
                    .tag("packed_file_path", packed_file_path)
3391
24
                    .tag("ref_cnt", left_file_count)
3392
24
                    .tag("left_file_bytes", left_file_bytes);
3393
3394
24
            if (left_file_count == 0) {
3395
7
                packed_info.set_state(cloud::PackedFileInfoPB::RECYCLING);
3396
7
            }
3397
3398
24
            std::string updated_val;
3399
24
            if (!packed_info.SerializeToString(&updated_val)) {
3400
0
                LOG_WARNING("failed to serialize packed file info when recycling rowset")
3401
0
                        .tag("instance_id", instance_id_)
3402
0
                        .tag("packed_file_path", packed_file_path)
3403
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3404
0
                        .tag("tablet_id", rs_meta_pb.tablet_id());
3405
0
                ret = -1;
3406
0
                break;
3407
0
            }
3408
3409
24
            txn->put(packed_key, updated_val);
3410
24
            err = txn->commit();
3411
24
            if (err == TxnErrorCode::TXN_OK) {
3412
24
                success = true;
3413
24
                if (left_file_count == 0) {
3414
7
                    LOG_INFO("packed file ready to delete, deleting immediately")
3415
7
                            .tag("instance_id", instance_id_)
3416
7
                            .tag("packed_file_path", packed_file_path);
3417
7
                    if (delete_packed_file_and_kv(packed_file_path, packed_key, packed_info) != 0) {
3418
0
                        ret = -1;
3419
0
                    }
3420
7
                }
3421
24
                break;
3422
24
            }
3423
0
            if (err == TxnErrorCode::TXN_CONFLICT) {
3424
0
                if (attempt >= max_retry_times) {
3425
0
                    LOG_WARNING("packed file info update conflict after max retry")
3426
0
                            .tag("instance_id", instance_id_)
3427
0
                            .tag("packed_file_path", packed_file_path)
3428
0
                            .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3429
0
                            .tag("tablet_id", rs_meta_pb.tablet_id())
3430
0
                            .tag("changed_files", changed_files)
3431
0
                            .tag("attempt", attempt);
3432
0
                    ret = -1;
3433
0
                    break;
3434
0
                }
3435
0
                LOG_WARNING("packed file info update conflict, retrying")
3436
0
                        .tag("instance_id", instance_id_)
3437
0
                        .tag("packed_file_path", packed_file_path)
3438
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3439
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3440
0
                        .tag("changed_files", changed_files)
3441
0
                        .tag("attempt", attempt);
3442
0
                sleep_for_packed_file_retry();
3443
0
                continue;
3444
0
            }
3445
3446
0
            LOG_WARNING("failed to commit packed file info update")
3447
0
                    .tag("instance_id", instance_id_)
3448
0
                    .tag("packed_file_path", packed_file_path)
3449
0
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3450
0
                    .tag("tablet_id", rs_meta_pb.tablet_id())
3451
0
                    .tag("err", err)
3452
0
                    .tag("changed_files", changed_files);
3453
0
            ret = -1;
3454
0
            break;
3455
0
        }
3456
3457
24
        if (!success) {
3458
0
            ret = -1;
3459
0
        }
3460
24
    }
3461
3462
17
    return ret;
3463
17
}
3464
3465
int InstanceRecycler::decrement_delete_bitmap_packed_file_ref_counts(
3466
        int64_t tablet_id, const std::string& rowset_id,
3467
58.2k
        DeleteBitmapStorageType* out_storage_type) {
3468
58.2k
    if (out_storage_type) {
3469
58.2k
        *out_storage_type = DeleteBitmapStorageType::NOT_FOUND;
3470
58.2k
    }
3471
3472
    // Get delete bitmap storage info from FDB
3473
58.2k
    std::string dbm_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
3474
58.2k
    std::unique_ptr<Transaction> txn;
3475
58.2k
    TxnErrorCode err = txn_kv_->create_txn(&txn);
3476
58.2k
    if (err != TxnErrorCode::TXN_OK) {
3477
0
        LOG_WARNING("failed to create txn when getting delete bitmap storage")
3478
0
                .tag("instance_id", instance_id_)
3479
0
                .tag("tablet_id", tablet_id)
3480
0
                .tag("rowset_id", rowset_id)
3481
0
                .tag("err", err);
3482
0
        return -1;
3483
0
    }
3484
3485
58.2k
    std::string dbm_val;
3486
58.2k
    err = txn->get(dbm_key, &dbm_val);
3487
58.2k
    if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3488
        // No delete bitmap for this rowset, nothing to do
3489
4.66k
        LOG_INFO("delete bitmap not found, skip packed file ref count decrement")
3490
4.66k
                .tag("instance_id", instance_id_)
3491
4.66k
                .tag("tablet_id", tablet_id)
3492
4.66k
                .tag("rowset_id", rowset_id);
3493
4.66k
        return 0;
3494
4.66k
    }
3495
53.5k
    if (err != TxnErrorCode::TXN_OK) {
3496
0
        LOG_WARNING("failed to get delete bitmap storage")
3497
0
                .tag("instance_id", instance_id_)
3498
0
                .tag("tablet_id", tablet_id)
3499
0
                .tag("rowset_id", rowset_id)
3500
0
                .tag("err", err);
3501
0
        return -1;
3502
0
    }
3503
3504
53.5k
    DeleteBitmapStoragePB storage;
3505
53.5k
    if (!storage.ParseFromString(dbm_val)) {
3506
0
        LOG_WARNING("failed to parse delete bitmap storage")
3507
0
                .tag("instance_id", instance_id_)
3508
0
                .tag("tablet_id", tablet_id)
3509
0
                .tag("rowset_id", rowset_id);
3510
0
        return -1;
3511
0
    }
3512
3513
53.5k
    if (storage.store_in_fdb()) {
3514
0
        if (out_storage_type) {
3515
0
            *out_storage_type = DeleteBitmapStorageType::IN_FDB;
3516
0
        }
3517
0
        return 0;
3518
0
    }
3519
3520
    // Check if delete bitmap is stored in standalone file.
3521
53.5k
    if (!storage.has_packed_slice_location() ||
3522
53.5k
        storage.packed_slice_location().packed_file_path().empty()) {
3523
53.5k
        if (out_storage_type) {
3524
53.5k
            *out_storage_type = DeleteBitmapStorageType::STANDALONE_FILE;
3525
53.5k
        }
3526
53.5k
        return 0;
3527
53.5k
    }
3528
3529
18.4E
    if (out_storage_type) {
3530
0
        *out_storage_type = DeleteBitmapStorageType::PACKED_FILE;
3531
0
    }
3532
3533
18.4E
    const auto& packed_loc = storage.packed_slice_location();
3534
18.4E
    const std::string& packed_file_path = packed_loc.packed_file_path();
3535
3536
18.4E
    LOG_INFO("decrementing delete bitmap packed file ref count")
3537
18.4E
            .tag("instance_id", instance_id_)
3538
18.4E
            .tag("tablet_id", tablet_id)
3539
18.4E
            .tag("rowset_id", rowset_id)
3540
18.4E
            .tag("packed_file_path", packed_file_path);
3541
3542
18.4E
    const int max_retry_times = std::max(1, config::decrement_packed_file_ref_counts_retry_times);
3543
18.4E
    for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
3544
0
        std::unique_ptr<Transaction> update_txn;
3545
0
        err = txn_kv_->create_txn(&update_txn);
3546
0
        if (err != TxnErrorCode::TXN_OK) {
3547
0
            LOG_WARNING("failed to create txn for delete bitmap packed file update")
3548
0
                    .tag("instance_id", instance_id_)
3549
0
                    .tag("tablet_id", tablet_id)
3550
0
                    .tag("rowset_id", rowset_id)
3551
0
                    .tag("err", err);
3552
0
            return -1;
3553
0
        }
3554
3555
0
        std::string packed_key = packed_file_key({instance_id_, packed_file_path});
3556
0
        std::string packed_val;
3557
0
        err = update_txn->get(packed_key, &packed_val);
3558
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3559
0
            LOG_WARNING("packed file info not found for delete bitmap")
3560
0
                    .tag("instance_id", instance_id_)
3561
0
                    .tag("tablet_id", tablet_id)
3562
0
                    .tag("rowset_id", rowset_id)
3563
0
                    .tag("packed_file_path", packed_file_path);
3564
0
            return 0;
3565
0
        }
3566
0
        if (err != TxnErrorCode::TXN_OK) {
3567
0
            LOG_WARNING("failed to get packed file info for delete bitmap")
3568
0
                    .tag("instance_id", instance_id_)
3569
0
                    .tag("tablet_id", tablet_id)
3570
0
                    .tag("rowset_id", rowset_id)
3571
0
                    .tag("packed_file_path", packed_file_path)
3572
0
                    .tag("err", err);
3573
0
            return -1;
3574
0
        }
3575
3576
0
        cloud::PackedFileInfoPB packed_info;
3577
0
        if (!packed_info.ParseFromString(packed_val)) {
3578
0
            LOG_WARNING("failed to parse packed file info for delete bitmap")
3579
0
                    .tag("instance_id", instance_id_)
3580
0
                    .tag("tablet_id", tablet_id)
3581
0
                    .tag("rowset_id", rowset_id)
3582
0
                    .tag("packed_file_path", packed_file_path);
3583
0
            return -1;
3584
0
        }
3585
3586
        // Find and mark the small file entry as deleted
3587
        // Use tablet_id and rowset_id to match entry instead of path,
3588
        // because path format may vary with path_version (with or without shard prefix)
3589
0
        auto* entries = packed_info.mutable_slices();
3590
0
        bool found = false;
3591
0
        bool already_deleted = false;
3592
0
        for (auto& entry : *entries) {
3593
0
            if (entry.tablet_id() == tablet_id && entry.rowset_id() == rowset_id) {
3594
0
                if (!entry.deleted()) {
3595
0
                    entry.set_deleted(true);
3596
0
                    if (!entry.corrected()) {
3597
0
                        entry.set_corrected(true);
3598
0
                    }
3599
0
                } else {
3600
0
                    already_deleted = true;
3601
0
                }
3602
0
                found = true;
3603
0
                break;
3604
0
            }
3605
0
        }
3606
3607
0
        if (!found) {
3608
0
            LOG_WARNING("delete bitmap entry not found in packed file")
3609
0
                    .tag("instance_id", instance_id_)
3610
0
                    .tag("tablet_id", tablet_id)
3611
0
                    .tag("rowset_id", rowset_id)
3612
0
                    .tag("packed_file_path", packed_file_path);
3613
0
            return 0;
3614
0
        }
3615
3616
0
        if (already_deleted) {
3617
0
            LOG_INFO("delete bitmap entry already deleted in packed file")
3618
0
                    .tag("instance_id", instance_id_)
3619
0
                    .tag("tablet_id", tablet_id)
3620
0
                    .tag("rowset_id", rowset_id)
3621
0
                    .tag("packed_file_path", packed_file_path);
3622
0
            return 0;
3623
0
        }
3624
3625
        // Calculate remaining files
3626
0
        int64_t left_file_count = 0;
3627
0
        int64_t left_file_bytes = 0;
3628
0
        for (const auto& entry : packed_info.slices()) {
3629
0
            if (!entry.deleted()) {
3630
0
                ++left_file_count;
3631
0
                left_file_bytes += entry.size();
3632
0
            }
3633
0
        }
3634
0
        packed_info.set_remaining_slice_bytes(left_file_bytes);
3635
0
        packed_info.set_ref_cnt(left_file_count);
3636
3637
0
        if (left_file_count == 0) {
3638
0
            packed_info.set_state(cloud::PackedFileInfoPB::RECYCLING);
3639
0
        }
3640
3641
0
        std::string updated_val;
3642
0
        if (!packed_info.SerializeToString(&updated_val)) {
3643
0
            LOG_WARNING("failed to serialize packed file info for delete bitmap")
3644
0
                    .tag("instance_id", instance_id_)
3645
0
                    .tag("tablet_id", tablet_id)
3646
0
                    .tag("rowset_id", rowset_id)
3647
0
                    .tag("packed_file_path", packed_file_path);
3648
0
            return -1;
3649
0
        }
3650
3651
0
        update_txn->put(packed_key, updated_val);
3652
0
        err = update_txn->commit();
3653
0
        if (err == TxnErrorCode::TXN_OK) {
3654
0
            LOG_INFO("delete bitmap packed file ref count decremented")
3655
0
                    .tag("instance_id", instance_id_)
3656
0
                    .tag("tablet_id", tablet_id)
3657
0
                    .tag("rowset_id", rowset_id)
3658
0
                    .tag("packed_file_path", packed_file_path)
3659
0
                    .tag("left_file_count", left_file_count);
3660
0
            if (left_file_count == 0) {
3661
0
                if (delete_packed_file_and_kv(packed_file_path, packed_key, packed_info) != 0) {
3662
0
                    return -1;
3663
0
                }
3664
0
            }
3665
0
            return 0;
3666
0
        }
3667
0
        if (err == TxnErrorCode::TXN_CONFLICT) {
3668
0
            if (attempt >= max_retry_times) {
3669
0
                LOG_WARNING("delete bitmap packed file update conflict after max retry")
3670
0
                        .tag("instance_id", instance_id_)
3671
0
                        .tag("tablet_id", tablet_id)
3672
0
                        .tag("rowset_id", rowset_id)
3673
0
                        .tag("packed_file_path", packed_file_path)
3674
0
                        .tag("attempt", attempt);
3675
0
                return -1;
3676
0
            }
3677
0
            sleep_for_packed_file_retry();
3678
0
            continue;
3679
0
        }
3680
3681
0
        LOG_WARNING("failed to commit delete bitmap packed file update")
3682
0
                .tag("instance_id", instance_id_)
3683
0
                .tag("tablet_id", tablet_id)
3684
0
                .tag("rowset_id", rowset_id)
3685
0
                .tag("packed_file_path", packed_file_path)
3686
0
                .tag("err", err);
3687
0
        return -1;
3688
0
    }
3689
3690
18.4E
    return -1;
3691
18.4E
}
3692
3693
int InstanceRecycler::delete_packed_file_and_kv(const std::string& packed_file_path,
3694
                                                const std::string& packed_key,
3695
7
                                                const cloud::PackedFileInfoPB& packed_info) {
3696
7
    if (!packed_info.has_resource_id() || packed_info.resource_id().empty()) {
3697
0
        LOG_WARNING("packed file missing resource id when recycling")
3698
0
                .tag("instance_id", instance_id_)
3699
0
                .tag("packed_file_path", packed_file_path);
3700
0
        return -1;
3701
0
    }
3702
3703
7
    auto [resource_id, accessor] = resolve_packed_file_accessor(packed_info.resource_id());
3704
7
    if (!accessor) {
3705
0
        LOG_WARNING("no accessor available to delete packed file")
3706
0
                .tag("instance_id", instance_id_)
3707
0
                .tag("packed_file_path", packed_file_path)
3708
0
                .tag("resource_id", packed_info.resource_id());
3709
0
        return -1;
3710
0
    }
3711
3712
7
    int del_ret = accessor->delete_file(packed_file_path);
3713
7
    if (del_ret != 0 && del_ret != 1) {
3714
0
        LOG_WARNING("failed to delete packed file")
3715
0
                .tag("instance_id", instance_id_)
3716
0
                .tag("packed_file_path", packed_file_path)
3717
0
                .tag("resource_id", resource_id)
3718
0
                .tag("ret", del_ret);
3719
0
        return -1;
3720
0
    }
3721
7
    if (del_ret == 1) {
3722
0
        LOG_INFO("packed file already removed")
3723
0
                .tag("instance_id", instance_id_)
3724
0
                .tag("packed_file_path", packed_file_path)
3725
0
                .tag("resource_id", resource_id);
3726
7
    } else {
3727
7
        LOG_INFO("deleted packed file")
3728
7
                .tag("instance_id", instance_id_)
3729
7
                .tag("packed_file_path", packed_file_path)
3730
7
                .tag("resource_id", resource_id);
3731
7
    }
3732
3733
7
    const int max_retry_times = std::max(1, config::packed_file_txn_retry_times);
3734
7
    for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
3735
7
        std::unique_ptr<Transaction> del_txn;
3736
7
        TxnErrorCode err = txn_kv_->create_txn(&del_txn);
3737
7
        if (err != TxnErrorCode::TXN_OK) {
3738
0
            LOG_WARNING("failed to create txn when removing packed file kv")
3739
0
                    .tag("instance_id", instance_id_)
3740
0
                    .tag("packed_file_path", packed_file_path)
3741
0
                    .tag("attempt", attempt)
3742
0
                    .tag("err", err);
3743
0
            return -1;
3744
0
        }
3745
3746
7
        std::string latest_val;
3747
7
        err = del_txn->get(packed_key, &latest_val);
3748
7
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3749
0
            return 0;
3750
0
        }
3751
7
        if (err != TxnErrorCode::TXN_OK) {
3752
0
            LOG_WARNING("failed to re-read packed file kv before removal")
3753
0
                    .tag("instance_id", instance_id_)
3754
0
                    .tag("packed_file_path", packed_file_path)
3755
0
                    .tag("attempt", attempt)
3756
0
                    .tag("err", err);
3757
0
            return -1;
3758
0
        }
3759
3760
7
        cloud::PackedFileInfoPB latest_info;
3761
7
        if (!latest_info.ParseFromString(latest_val)) {
3762
0
            LOG_WARNING("failed to parse packed file info before removal")
3763
0
                    .tag("instance_id", instance_id_)
3764
0
                    .tag("packed_file_path", packed_file_path)
3765
0
                    .tag("attempt", attempt);
3766
0
            return -1;
3767
0
        }
3768
3769
7
        if (!(latest_info.state() == cloud::PackedFileInfoPB::RECYCLING &&
3770
7
              latest_info.ref_cnt() == 0)) {
3771
0
            LOG_INFO("packed file state changed before removal, skip deleting kv")
3772
0
                    .tag("instance_id", instance_id_)
3773
0
                    .tag("packed_file_path", packed_file_path)
3774
0
                    .tag("attempt", attempt);
3775
0
            return 0;
3776
0
        }
3777
3778
7
        del_txn->remove(packed_key);
3779
7
        err = del_txn->commit();
3780
7
        if (err == TxnErrorCode::TXN_OK) {
3781
7
            LOG_INFO("removed packed file metadata")
3782
7
                    .tag("instance_id", instance_id_)
3783
7
                    .tag("packed_file_path", packed_file_path);
3784
7
            return 0;
3785
7
        }
3786
0
        if (err == TxnErrorCode::TXN_CONFLICT) {
3787
0
            if (attempt >= max_retry_times) {
3788
0
                LOG_WARNING("failed to remove packed file kv due to conflict after max retry")
3789
0
                        .tag("instance_id", instance_id_)
3790
0
                        .tag("packed_file_path", packed_file_path)
3791
0
                        .tag("attempt", attempt);
3792
0
                return -1;
3793
0
            }
3794
0
            LOG_WARNING("failed to remove packed file kv due to conflict, retrying")
3795
0
                    .tag("instance_id", instance_id_)
3796
0
                    .tag("packed_file_path", packed_file_path)
3797
0
                    .tag("attempt", attempt);
3798
0
            sleep_for_packed_file_retry();
3799
0
            continue;
3800
0
        }
3801
0
        LOG_WARNING("failed to remove packed file kv")
3802
0
                .tag("instance_id", instance_id_)
3803
0
                .tag("packed_file_path", packed_file_path)
3804
0
                .tag("attempt", attempt)
3805
0
                .tag("err", err);
3806
0
        return -1;
3807
0
    }
3808
0
    return -1;
3809
7
}
3810
3811
int InstanceRecycler::delete_rowset_data(
3812
        const std::map<std::string, doris::RowsetMetaCloudPB>& rowsets, RowsetRecyclingState type,
3813
67
        RecyclerMetricsContext& metrics_context) {
3814
67
    int ret = 0;
3815
    // resource_id -> file_paths
3816
67
    std::map<std::string, std::vector<std::string>> resource_file_paths;
3817
    // (resource_id, tablet_id, rowset_id)
3818
67
    std::vector<std::tuple<std::string, int64_t, std::string>> rowsets_delete_by_prefix;
3819
67
    bool is_formal_rowset = (type == RowsetRecyclingState::FORMAL_ROWSET);
3820
3821
57.2k
    for (const auto& [_, rs] : rowsets) {
3822
        // we have to treat tmp rowset as "orphans" that may not related to any existing tablets
3823
        // due to aborted schema change.
3824
57.2k
        if (is_formal_rowset) {
3825
3.19k
            std::lock_guard lock(recycled_tablets_mtx_);
3826
3.19k
            if (recycled_tablets_.count(rs.tablet_id()) && rs.packed_slice_locations_size() == 0) {
3827
                // Tablet has been recycled and this rowset has no packed slices, so file data
3828
                // should already be gone; skip to avoid redundant deletes. Rowsets with packed
3829
                // slice info must still run to decrement packed file ref counts.
3830
0
                continue;
3831
0
            }
3832
3.19k
        }
3833
3834
57.2k
        int64_t num_segments = rs.num_segments();
3835
        // Check num_segments before accessor lookup, because empty rowsets
3836
        // (e.g. base compaction output of empty rowsets) may have no resource_id
3837
        // set. Skipping them early avoids a spurious "no such resource id" error
3838
        // that marks the entire batch as failed and prevents txn_remove from
3839
        // cleaning up recycle KV keys.
3840
57.2k
        if (num_segments <= 0) {
3841
0
            metrics_context.total_recycled_num++;
3842
0
            metrics_context.total_recycled_data_size += rs.total_disk_size();
3843
0
            continue;
3844
0
        }
3845
3846
57.2k
        auto it = accessor_map_.find(rs.resource_id());
3847
        // possible if the accessor is not initilized correctly
3848
57.2k
        if (it == accessor_map_.end()) [[unlikely]] {
3849
3.00k
            LOG_WARNING("instance has no such resource id")
3850
3.00k
                    .tag("instance_id", instance_id_)
3851
3.00k
                    .tag("resource_id", rs.resource_id());
3852
3.00k
            ret = -1;
3853
3.00k
            continue;
3854
3.00k
        }
3855
3856
54.2k
        auto& file_paths = resource_file_paths[rs.resource_id()];
3857
54.2k
        const auto& rowset_id = rs.rowset_id_v2();
3858
54.2k
        int64_t tablet_id = rs.tablet_id();
3859
54.2k
        LOG_INFO("recycle rowset merge index size")
3860
54.2k
                .tag("instance_id", instance_id_)
3861
54.2k
                .tag("tablet_id", tablet_id)
3862
54.2k
                .tag("rowset_id", rowset_id)
3863
54.2k
                .tag("merge_index_size", rs.packed_slice_locations_size());
3864
54.2k
        if (decrement_packed_file_ref_counts(rs) != 0) {
3865
0
            ret = -1;
3866
0
            continue;
3867
0
        }
3868
3869
        // Process delete bitmap - check where it's stored.
3870
54.2k
        DeleteBitmapStorageType delete_bitmap_storage_type = DeleteBitmapStorageType::NOT_FOUND;
3871
54.2k
        if (decrement_delete_bitmap_packed_file_ref_counts(tablet_id, rowset_id,
3872
54.2k
                                                           &delete_bitmap_storage_type) != 0) {
3873
0
            LOG_WARNING("failed to decrement delete bitmap packed file ref count")
3874
0
                    .tag("instance_id", instance_id_)
3875
0
                    .tag("tablet_id", tablet_id)
3876
0
                    .tag("rowset_id", rowset_id);
3877
0
            ret = -1;
3878
0
            continue;
3879
0
        }
3880
54.2k
        if (delete_bitmap_storage_type == DeleteBitmapStorageType::STANDALONE_FILE) {
3881
51.5k
            file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
3882
51.5k
        }
3883
3884
        // Process inverted indexes
3885
54.2k
        std::vector<std::pair<int64_t, std::string>> index_ids;
3886
        // default format as v1.
3887
54.2k
        InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
3888
54.2k
        int inverted_index_get_ret = 0;
3889
54.2k
        if (rs.has_tablet_schema()) {
3890
53.5k
            for (const auto& index : rs.tablet_schema().index()) {
3891
53.5k
                if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
3892
53.5k
                    index_ids.emplace_back(index.index_id(), index.index_suffix_name());
3893
53.5k
                }
3894
53.5k
            }
3895
26.6k
            if (rs.tablet_schema().has_inverted_index_storage_format()) {
3896
26.5k
                index_format = rs.tablet_schema().inverted_index_storage_format();
3897
26.5k
            }
3898
27.6k
        } else {
3899
27.6k
            if (!rs.has_index_id() || !rs.has_schema_version()) {
3900
0
                LOG(WARNING) << "rowset must have either schema or schema_version and index_id, "
3901
0
                                "instance_id="
3902
0
                             << instance_id_ << " tablet_id=" << tablet_id
3903
0
                             << " rowset_id=" << rowset_id;
3904
0
                ret = -1;
3905
0
                continue;
3906
0
            }
3907
27.6k
            InvertedIndexInfo index_info;
3908
27.6k
            inverted_index_get_ret =
3909
27.6k
                    inverted_index_id_cache_->get(rs.index_id(), rs.schema_version(), index_info);
3910
27.6k
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
3911
27.6k
                                     &inverted_index_get_ret);
3912
27.6k
            if (inverted_index_get_ret == 0) {
3913
27.1k
                index_format = index_info.first;
3914
27.1k
                index_ids = index_info.second;
3915
27.1k
            } else if (inverted_index_get_ret == 1) {
3916
                // 1. Schema kv not found means tablet has been recycled
3917
                // Maybe some tablet recycle failed by some bugs
3918
                // We need to delete again to double check
3919
                // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
3920
                // because we are uncertain about the inverted index information.
3921
                // If there are inverted indexes, some data might not be deleted,
3922
                // but this is acceptable as we have made our best effort to delete the data.
3923
507
                LOG_INFO(
3924
507
                        "delete rowset data schema kv not found, need to delete again to "
3925
507
                        "double "
3926
507
                        "check")
3927
507
                        .tag("instance_id", instance_id_)
3928
507
                        .tag("tablet_id", tablet_id)
3929
507
                        .tag("rowset", rs.ShortDebugString());
3930
                // Currently index_ids is guaranteed to be empty,
3931
                // but we clear it again here as a safeguard against future code changes
3932
                // that might cause index_ids to no longer be empty
3933
507
                index_format = InvertedIndexStorageFormatPB::V2;
3934
507
                index_ids.clear();
3935
18.4E
            } else {
3936
18.4E
                LOG(WARNING) << "failed to get schema kv for rowset, instance_id=" << instance_id_
3937
18.4E
                             << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id;
3938
18.4E
                ret = -1;
3939
18.4E
                continue;
3940
18.4E
            }
3941
27.6k
        }
3942
54.2k
        if (rs.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
3943
            // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
3944
            // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
3945
10
            rowsets_delete_by_prefix.emplace_back(rs.resource_id(), tablet_id, rs.rowset_id_v2());
3946
10
            continue;
3947
10
        }
3948
324k
        for (int64_t i = 0; i < num_segments; ++i) {
3949
270k
            add_file_to_delete_if_not_packed(rs, segment_path(tablet_id, rowset_id, i),
3950
270k
                                             &file_paths);
3951
270k
            if (index_format == InvertedIndexStorageFormatPB::V1) {
3952
536k
                for (const auto& index_id : index_ids) {
3953
536k
                    add_file_to_delete_if_not_packed(
3954
536k
                            rs,
3955
536k
                            inverted_index_path_v1(tablet_id, rowset_id, i, index_id.first,
3956
536k
                                                   index_id.second),
3957
536k
                            &file_paths);
3958
536k
                }
3959
268k
            } else if (!index_ids.empty() || inverted_index_get_ret == 1) {
3960
                // try to recycle inverted index v2 when get_ret == 1
3961
                // we treat schema not found as if it has a v2 format inverted index
3962
                // to reduce chance of data leakage
3963
2.50k
                if (inverted_index_get_ret == 1) {
3964
2.50k
                    LOG_INFO("delete rowset data schema kv not found, try to delete index file")
3965
2.50k
                            .tag("instance_id", instance_id_)
3966
2.50k
                            .tag("inverted index v2 path",
3967
2.50k
                                 inverted_index_path_v2(tablet_id, rowset_id, i));
3968
2.50k
                }
3969
2.50k
                add_file_to_delete_if_not_packed(
3970
2.50k
                        rs, inverted_index_path_v2(tablet_id, rowset_id, i), &file_paths);
3971
2.50k
            }
3972
270k
        }
3973
54.2k
    }
3974
3975
67
    SyncExecutor<int> concurrent_delete_executor(_thread_pool_group.s3_producer_pool,
3976
67
                                                 "delete_rowset_data",
3977
67
                                                 [](const int& ret) { return ret != 0; });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
Line
Count
Source
3977
5
                                                 [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
Line
Count
Source
3977
53
                                                 [](const int& ret) { return ret != 0; });
3978
67
    for (auto& [resource_id, file_paths] : resource_file_paths) {
3979
48
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
3980
48
            DCHECK(accessor_map_.count(*rid))
3981
0
                    << "uninitilized accessor, instance_id=" << instance_id_
3982
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
3983
48
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
3984
48
                                     &accessor_map_);
3985
48
            if (!accessor_map_.contains(*rid)) {
3986
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
3987
0
                        .tag("resource_id", resource_id)
3988
0
                        .tag("instance_id", instance_id_);
3989
0
                return -1;
3990
0
            }
3991
48
            auto& accessor = accessor_map_[*rid];
3992
48
            int ret = accessor->delete_files(*paths);
3993
48
            if (!ret) {
3994
                // deduplication of different files with the same rowset id
3995
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
3996
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
3997
48
                std::set<std::string> deleted_rowset_id;
3998
3999
48
                std::for_each(paths->begin(), paths->end(),
4000
48
                              [&metrics_context, &rowsets, &deleted_rowset_id,
4001
858k
                               this](const std::string& path) {
4002
858k
                                  std::vector<std::string> str;
4003
858k
                                  butil::SplitString(path, '/', &str);
4004
858k
                                  std::string rowset_id;
4005
858k
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
4006
854k
                                      rowset_id = str.back().substr(0, pos);
4007
854k
                                  } else {
4008
3.74k
                                      if (path.find("packed_file/") != std::string::npos) {
4009
0
                                          return; // packed files do not have rowset_id encoded
4010
0
                                      }
4011
3.74k
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
4012
3.74k
                                      return;
4013
3.74k
                                  }
4014
854k
                                  auto rs_meta = rowsets.find(rowset_id);
4015
854k
                                  if (rs_meta != rowsets.end() &&
4016
857k
                                      !deleted_rowset_id.contains(rowset_id)) {
4017
54.2k
                                      deleted_rowset_id.emplace(rowset_id);
4018
54.2k
                                      metrics_context.total_recycled_data_size +=
4019
54.2k
                                              rs_meta->second.total_disk_size();
4020
54.2k
                                      segment_metrics_context_.total_recycled_num +=
4021
54.2k
                                              rs_meta->second.num_segments();
4022
54.2k
                                      segment_metrics_context_.total_recycled_data_size +=
4023
54.2k
                                              rs_meta->second.total_disk_size();
4024
54.2k
                                      metrics_context.total_recycled_num++;
4025
54.2k
                                  }
4026
854k
                              });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
Line
Count
Source
4001
7
                               this](const std::string& path) {
4002
7
                                  std::vector<std::string> str;
4003
7
                                  butil::SplitString(path, '/', &str);
4004
7
                                  std::string rowset_id;
4005
7
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
4006
7
                                      rowset_id = str.back().substr(0, pos);
4007
7
                                  } else {
4008
0
                                      if (path.find("packed_file/") != std::string::npos) {
4009
0
                                          return; // packed files do not have rowset_id encoded
4010
0
                                      }
4011
0
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
4012
0
                                      return;
4013
0
                                  }
4014
7
                                  auto rs_meta = rowsets.find(rowset_id);
4015
7
                                  if (rs_meta != rowsets.end() &&
4016
7
                                      !deleted_rowset_id.contains(rowset_id)) {
4017
7
                                      deleted_rowset_id.emplace(rowset_id);
4018
7
                                      metrics_context.total_recycled_data_size +=
4019
7
                                              rs_meta->second.total_disk_size();
4020
7
                                      segment_metrics_context_.total_recycled_num +=
4021
7
                                              rs_meta->second.num_segments();
4022
7
                                      segment_metrics_context_.total_recycled_data_size +=
4023
7
                                              rs_meta->second.total_disk_size();
4024
7
                                      metrics_context.total_recycled_num++;
4025
7
                                  }
4026
7
                              });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
Line
Count
Source
4001
858k
                               this](const std::string& path) {
4002
858k
                                  std::vector<std::string> str;
4003
858k
                                  butil::SplitString(path, '/', &str);
4004
858k
                                  std::string rowset_id;
4005
858k
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
4006
854k
                                      rowset_id = str.back().substr(0, pos);
4007
854k
                                  } else {
4008
3.74k
                                      if (path.find("packed_file/") != std::string::npos) {
4009
0
                                          return; // packed files do not have rowset_id encoded
4010
0
                                      }
4011
3.74k
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
4012
3.74k
                                      return;
4013
3.74k
                                  }
4014
854k
                                  auto rs_meta = rowsets.find(rowset_id);
4015
854k
                                  if (rs_meta != rowsets.end() &&
4016
857k
                                      !deleted_rowset_id.contains(rowset_id)) {
4017
54.1k
                                      deleted_rowset_id.emplace(rowset_id);
4018
54.1k
                                      metrics_context.total_recycled_data_size +=
4019
54.1k
                                              rs_meta->second.total_disk_size();
4020
54.1k
                                      segment_metrics_context_.total_recycled_num +=
4021
54.1k
                                              rs_meta->second.num_segments();
4022
54.1k
                                      segment_metrics_context_.total_recycled_data_size +=
4023
54.1k
                                              rs_meta->second.total_disk_size();
4024
54.1k
                                      metrics_context.total_recycled_num++;
4025
54.1k
                                  }
4026
854k
                              });
4027
48
            }
4028
48
            return ret;
4029
48
        });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
3979
5
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
3980
5
            DCHECK(accessor_map_.count(*rid))
3981
0
                    << "uninitilized accessor, instance_id=" << instance_id_
3982
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
3983
5
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
3984
5
                                     &accessor_map_);
3985
5
            if (!accessor_map_.contains(*rid)) {
3986
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
3987
0
                        .tag("resource_id", resource_id)
3988
0
                        .tag("instance_id", instance_id_);
3989
0
                return -1;
3990
0
            }
3991
5
            auto& accessor = accessor_map_[*rid];
3992
5
            int ret = accessor->delete_files(*paths);
3993
5
            if (!ret) {
3994
                // deduplication of different files with the same rowset id
3995
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
3996
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
3997
5
                std::set<std::string> deleted_rowset_id;
3998
3999
5
                std::for_each(paths->begin(), paths->end(),
4000
5
                              [&metrics_context, &rowsets, &deleted_rowset_id,
4001
5
                               this](const std::string& path) {
4002
5
                                  std::vector<std::string> str;
4003
5
                                  butil::SplitString(path, '/', &str);
4004
5
                                  std::string rowset_id;
4005
5
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
4006
5
                                      rowset_id = str.back().substr(0, pos);
4007
5
                                  } else {
4008
5
                                      if (path.find("packed_file/") != std::string::npos) {
4009
5
                                          return; // packed files do not have rowset_id encoded
4010
5
                                      }
4011
5
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
4012
5
                                      return;
4013
5
                                  }
4014
5
                                  auto rs_meta = rowsets.find(rowset_id);
4015
5
                                  if (rs_meta != rowsets.end() &&
4016
5
                                      !deleted_rowset_id.contains(rowset_id)) {
4017
5
                                      deleted_rowset_id.emplace(rowset_id);
4018
5
                                      metrics_context.total_recycled_data_size +=
4019
5
                                              rs_meta->second.total_disk_size();
4020
5
                                      segment_metrics_context_.total_recycled_num +=
4021
5
                                              rs_meta->second.num_segments();
4022
5
                                      segment_metrics_context_.total_recycled_data_size +=
4023
5
                                              rs_meta->second.total_disk_size();
4024
5
                                      metrics_context.total_recycled_num++;
4025
5
                                  }
4026
5
                              });
4027
5
            }
4028
5
            return ret;
4029
5
        });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
3979
43
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
3980
43
            DCHECK(accessor_map_.count(*rid))
3981
0
                    << "uninitilized accessor, instance_id=" << instance_id_
3982
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
3983
43
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
3984
43
                                     &accessor_map_);
3985
43
            if (!accessor_map_.contains(*rid)) {
3986
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
3987
0
                        .tag("resource_id", resource_id)
3988
0
                        .tag("instance_id", instance_id_);
3989
0
                return -1;
3990
0
            }
3991
43
            auto& accessor = accessor_map_[*rid];
3992
43
            int ret = accessor->delete_files(*paths);
3993
43
            if (!ret) {
3994
                // deduplication of different files with the same rowset id
3995
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
3996
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
3997
43
                std::set<std::string> deleted_rowset_id;
3998
3999
43
                std::for_each(paths->begin(), paths->end(),
4000
43
                              [&metrics_context, &rowsets, &deleted_rowset_id,
4001
43
                               this](const std::string& path) {
4002
43
                                  std::vector<std::string> str;
4003
43
                                  butil::SplitString(path, '/', &str);
4004
43
                                  std::string rowset_id;
4005
43
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
4006
43
                                      rowset_id = str.back().substr(0, pos);
4007
43
                                  } else {
4008
43
                                      if (path.find("packed_file/") != std::string::npos) {
4009
43
                                          return; // packed files do not have rowset_id encoded
4010
43
                                      }
4011
43
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
4012
43
                                      return;
4013
43
                                  }
4014
43
                                  auto rs_meta = rowsets.find(rowset_id);
4015
43
                                  if (rs_meta != rowsets.end() &&
4016
43
                                      !deleted_rowset_id.contains(rowset_id)) {
4017
43
                                      deleted_rowset_id.emplace(rowset_id);
4018
43
                                      metrics_context.total_recycled_data_size +=
4019
43
                                              rs_meta->second.total_disk_size();
4020
43
                                      segment_metrics_context_.total_recycled_num +=
4021
43
                                              rs_meta->second.num_segments();
4022
43
                                      segment_metrics_context_.total_recycled_data_size +=
4023
43
                                              rs_meta->second.total_disk_size();
4024
43
                                      metrics_context.total_recycled_num++;
4025
43
                                  }
4026
43
                              });
4027
43
            }
4028
43
            return ret;
4029
43
        });
4030
48
    }
4031
67
    for (const auto& [resource_id, tablet_id, rowset_id] : rowsets_delete_by_prefix) {
4032
10
        LOG_INFO(
4033
10
                "delete rowset {} by prefix because it's in BEGIN_PARTIAL_UPDATE state, "
4034
10
                "resource_id={}, tablet_id={}, instance_id={}, task_type={}",
4035
10
                rowset_id, resource_id, tablet_id, instance_id_, metrics_context.operation_type);
4036
10
        concurrent_delete_executor.add([&]() -> int {
4037
10
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
4038
10
            if (!ret) {
4039
10
                auto rs = rowsets.at(rowset_id);
4040
10
                metrics_context.total_recycled_data_size += rs.total_disk_size();
4041
10
                metrics_context.total_recycled_num++;
4042
10
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
4043
10
                segment_metrics_context_.total_recycled_num += rs.num_segments();
4044
10
            }
4045
10
            return ret;
4046
10
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
Line
Count
Source
4036
10
        concurrent_delete_executor.add([&]() -> int {
4037
10
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
4038
10
            if (!ret) {
4039
10
                auto rs = rowsets.at(rowset_id);
4040
10
                metrics_context.total_recycled_data_size += rs.total_disk_size();
4041
10
                metrics_context.total_recycled_num++;
4042
10
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
4043
10
                segment_metrics_context_.total_recycled_num += rs.num_segments();
4044
10
            }
4045
10
            return ret;
4046
10
        });
4047
10
    }
4048
4049
67
    bool finished = true;
4050
67
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
4051
67
    for (int r : rets) {
4052
58
        if (r != 0) {
4053
0
            ret = -1;
4054
0
            break;
4055
0
        }
4056
58
    }
4057
67
    ret = finished ? ret : -1;
4058
67
    return ret;
4059
67
}
4060
4061
int InstanceRecycler::delete_rowset_data(const std::string& resource_id, int64_t tablet_id,
4062
3.31k
                                         const std::string& rowset_id) {
4063
3.31k
    auto it = accessor_map_.find(resource_id);
4064
3.31k
    if (it == accessor_map_.end()) {
4065
400
        LOG_WARNING("instance has no such resource id")
4066
400
                .tag("instance_id", instance_id_)
4067
400
                .tag("resource_id", resource_id)
4068
400
                .tag("tablet_id", tablet_id)
4069
400
                .tag("rowset_id", rowset_id);
4070
400
        return -1;
4071
400
    }
4072
2.91k
    auto& accessor = it->second;
4073
2.91k
    return accessor->delete_prefix(rowset_path_prefix(tablet_id, rowset_id));
4074
3.31k
}
4075
4076
4
bool InstanceRecycler::decode_packed_file_key(std::string_view key, std::string* packed_path) {
4077
4
    if (key.empty()) {
4078
0
        return false;
4079
0
    }
4080
4
    std::string_view key_view = key;
4081
4
    key_view.remove_prefix(1); // remove keyspace prefix
4082
4
    std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> decoded;
4083
4
    if (decode_key(&key_view, &decoded) != 0) {
4084
0
        return false;
4085
0
    }
4086
4
    if (decoded.size() < 4) {
4087
0
        return false;
4088
0
    }
4089
4
    try {
4090
4
        *packed_path = std::get<std::string>(std::get<0>(decoded.back()));
4091
4
    } catch (const std::bad_variant_access&) {
4092
0
        return false;
4093
0
    }
4094
4
    return true;
4095
4
}
4096
4097
14
int InstanceRecycler::recycle_packed_files() {
4098
14
    const std::string task_name = "recycle_packed_files";
4099
14
    auto start_tp = steady_clock::now();
4100
14
    int64_t start_time = duration_cast<seconds>(start_tp.time_since_epoch()).count();
4101
14
    int ret = 0;
4102
14
    PackedFileRecycleStats stats;
4103
4104
14
    register_recycle_task(task_name, start_time);
4105
14
    DORIS_CLOUD_DEFER {
4106
14
        unregister_recycle_task(task_name);
4107
14
        int64_t cost =
4108
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4109
14
        int64_t cost_ms = duration_cast<milliseconds>(steady_clock::now() - start_tp).count();
4110
14
        g_bvar_recycler_packed_file_recycled_kv_num.put(instance_id_, stats.num_deleted);
4111
14
        g_bvar_recycler_packed_file_recycled_kv_bytes.put(instance_id_, stats.bytes_deleted);
4112
14
        g_bvar_recycler_packed_file_recycle_cost_ms.put(instance_id_, cost_ms);
4113
14
        g_bvar_recycler_packed_file_scanned_kv_num.put(instance_id_, stats.num_scanned);
4114
14
        g_bvar_recycler_packed_file_corrected_kv_num.put(instance_id_, stats.num_corrected);
4115
14
        g_bvar_recycler_packed_file_recycled_object_num.put(instance_id_, stats.num_object_deleted);
4116
14
        g_bvar_recycler_packed_file_bytes_object_deleted.put(instance_id_,
4117
14
                                                             stats.bytes_object_deleted);
4118
14
        g_bvar_recycler_packed_file_rowset_scanned_num.put(instance_id_, stats.rowset_scan_count);
4119
14
        LOG_INFO("recycle packed files finished, cost={}s", cost)
4120
14
                .tag("instance_id", instance_id_)
4121
14
                .tag("num_scanned", stats.num_scanned)
4122
14
                .tag("num_corrected", stats.num_corrected)
4123
14
                .tag("num_deleted", stats.num_deleted)
4124
14
                .tag("num_failed", stats.num_failed)
4125
14
                .tag("num_objects_deleted", stats.num_object_deleted)
4126
14
                .tag("bytes_object_deleted", stats.bytes_object_deleted)
4127
14
                .tag("rowset_scan_count", stats.rowset_scan_count)
4128
14
                .tag("bytes_deleted", stats.bytes_deleted)
4129
14
                .tag("ret", ret);
4130
14
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_0clEv
Line
Count
Source
4105
14
    DORIS_CLOUD_DEFER {
4106
14
        unregister_recycle_task(task_name);
4107
14
        int64_t cost =
4108
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4109
14
        int64_t cost_ms = duration_cast<milliseconds>(steady_clock::now() - start_tp).count();
4110
14
        g_bvar_recycler_packed_file_recycled_kv_num.put(instance_id_, stats.num_deleted);
4111
14
        g_bvar_recycler_packed_file_recycled_kv_bytes.put(instance_id_, stats.bytes_deleted);
4112
14
        g_bvar_recycler_packed_file_recycle_cost_ms.put(instance_id_, cost_ms);
4113
14
        g_bvar_recycler_packed_file_scanned_kv_num.put(instance_id_, stats.num_scanned);
4114
14
        g_bvar_recycler_packed_file_corrected_kv_num.put(instance_id_, stats.num_corrected);
4115
14
        g_bvar_recycler_packed_file_recycled_object_num.put(instance_id_, stats.num_object_deleted);
4116
14
        g_bvar_recycler_packed_file_bytes_object_deleted.put(instance_id_,
4117
14
                                                             stats.bytes_object_deleted);
4118
14
        g_bvar_recycler_packed_file_rowset_scanned_num.put(instance_id_, stats.rowset_scan_count);
4119
14
        LOG_INFO("recycle packed files finished, cost={}s", cost)
4120
14
                .tag("instance_id", instance_id_)
4121
14
                .tag("num_scanned", stats.num_scanned)
4122
14
                .tag("num_corrected", stats.num_corrected)
4123
14
                .tag("num_deleted", stats.num_deleted)
4124
14
                .tag("num_failed", stats.num_failed)
4125
14
                .tag("num_objects_deleted", stats.num_object_deleted)
4126
14
                .tag("bytes_object_deleted", stats.bytes_object_deleted)
4127
14
                .tag("rowset_scan_count", stats.rowset_scan_count)
4128
14
                .tag("bytes_deleted", stats.bytes_deleted)
4129
14
                .tag("ret", ret);
4130
14
    };
4131
4132
14
    auto recycle_func = [this, &stats, &ret](auto&& key, auto&& value) {
4133
4
        return handle_packed_file_kv(std::forward<decltype(key)>(key),
4134
4
                                     std::forward<decltype(value)>(value), &stats, &ret);
4135
4
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_1clISt17basic_string_viewIcSt11char_traitsIcEES7_EEDaOT_OT0_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_1clISt17basic_string_viewIcSt11char_traitsIcEES7_EEDaOT_OT0_
Line
Count
Source
4132
4
    auto recycle_func = [this, &stats, &ret](auto&& key, auto&& value) {
4133
4
        return handle_packed_file_kv(std::forward<decltype(key)>(key),
4134
4
                                     std::forward<decltype(value)>(value), &stats, &ret);
4135
4
    };
4136
4137
14
    LOG_INFO("begin to recycle packed file").tag("instance_id", instance_id_);
4138
4139
14
    std::string begin = packed_file_key({instance_id_, ""});
4140
14
    std::string end = packed_file_key({instance_id_, "\xff"});
4141
14
    if (scan_and_recycle(begin, end, recycle_func) != 0) {
4142
0
        ret = -1;
4143
0
    }
4144
4145
14
    return ret;
4146
14
}
4147
4148
int InstanceRecycler::scan_tablets_and_statistics(int64_t table_id, int64_t index_id,
4149
                                                  RecyclerMetricsContext& metrics_context,
4150
0
                                                  int64_t partition_id, bool is_empty_tablet) {
4151
0
    std::string tablet_key_begin, tablet_key_end;
4152
4153
0
    if (partition_id > 0) {
4154
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
4155
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
4156
0
    } else {
4157
0
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
4158
0
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
4159
0
    }
4160
    // for calculate the total num or bytes of recyled objects
4161
0
    auto scan_and_statistics = [&, is_empty_tablet, this](std::string_view k,
4162
0
                                                          std::string_view v) -> int {
4163
0
        doris::TabletMetaCloudPB tablet_meta_pb;
4164
0
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
4165
0
            return 0;
4166
0
        }
4167
0
        int64_t tablet_id = tablet_meta_pb.tablet_id();
4168
4169
0
        if (config::enable_recycler_check_lazy_txn_finished &&
4170
0
            !check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
4171
0
            return 0;
4172
0
        }
4173
4174
0
        if (!is_empty_tablet) {
4175
0
            if (scan_tablet_and_statistics(tablet_id, metrics_context) != 0) {
4176
0
                return 0;
4177
0
            }
4178
0
            tablet_metrics_context_.total_need_recycle_num++;
4179
0
        }
4180
0
        return 0;
4181
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
4182
0
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(scan_and_statistics));
4183
0
    metrics_context.report(true);
4184
0
    tablet_metrics_context_.report(true);
4185
0
    segment_metrics_context_.report(true);
4186
0
    return ret;
4187
0
}
4188
4189
int InstanceRecycler::scan_tablet_and_statistics(int64_t tablet_id,
4190
0
                                                 RecyclerMetricsContext& metrics_context) {
4191
0
    int ret = 0;
4192
0
    std::map<std::string, RowsetMetaCloudPB> rowset_meta_map;
4193
0
    std::unique_ptr<Transaction> txn;
4194
0
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4195
0
        LOG_WARNING("failed to recycle tablet ")
4196
0
                .tag("tablet id", tablet_id)
4197
0
                .tag("instance_id", instance_id_)
4198
0
                .tag("reason", "failed to create txn");
4199
0
        ret = -1;
4200
0
    }
4201
0
    GetRowsetResponse resp;
4202
0
    std::string msg;
4203
0
    MetaServiceCode code = MetaServiceCode::OK;
4204
    // get rowsets in tablet
4205
0
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
4206
0
                        tablet_id, code, msg, &resp);
4207
0
    if (code != MetaServiceCode::OK) {
4208
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
4209
0
                .tag("tablet id", tablet_id)
4210
0
                .tag("msg", msg)
4211
0
                .tag("code", code)
4212
0
                .tag("instance id", instance_id_);
4213
0
        ret = -1;
4214
0
    }
4215
0
    for (const auto& rs_meta : resp.rowset_meta()) {
4216
        /*
4217
        * For compatibility, we skip the loop for [0-1] here.
4218
        * The purpose of this loop is to delete object files,
4219
        * and since [0-1] only has meta and doesn't have object files,
4220
        * skipping it doesn't affect system correctness.
4221
        *
4222
        * If not skipped, the check "if (!rs_meta.has_resource_id())" below
4223
        * would return error -1 directly, causing the recycle operation to fail.
4224
        *
4225
        * [0-1] doesn't have resource id is a bug.
4226
        * In the future, we will fix this problem, after that,
4227
        * we can remove this if statement.
4228
        *
4229
        * TODO(Yukang-Lian): remove this if statement when [0-1] has resource id in the future.
4230
        */
4231
4232
0
        if (rs_meta.end_version() == 1) {
4233
            // Assert that [0-1] has no resource_id to make sure
4234
            // this if statement will not be forgetted to remove
4235
            // when the resource id bug is fixed
4236
0
            DCHECK(!rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
4237
0
            continue;
4238
0
        }
4239
0
        if (!rs_meta.has_resource_id()) {
4240
0
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
4241
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
4242
0
                    .tag("instance_id", instance_id_)
4243
0
                    .tag("tablet_id", tablet_id);
4244
0
            continue;
4245
0
        }
4246
0
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
4247
0
        auto it = accessor_map_.find(rs_meta.resource_id());
4248
        // possible if the accessor is not initilized correctly
4249
0
        if (it == accessor_map_.end()) [[unlikely]] {
4250
0
            LOG_WARNING(
4251
0
                    "failed to find resource id when recycle tablet, skip this vault accessor "
4252
0
                    "recycle process")
4253
0
                    .tag("tablet id", tablet_id)
4254
0
                    .tag("instance_id", instance_id_)
4255
0
                    .tag("resource_id", rs_meta.resource_id())
4256
0
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
4257
0
            continue;
4258
0
        }
4259
4260
0
        metrics_context.total_need_recycle_data_size += rs_meta.total_disk_size();
4261
0
        tablet_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
4262
0
        segment_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
4263
0
        segment_metrics_context_.total_need_recycle_num += rs_meta.num_segments();
4264
0
    }
4265
0
    return ret;
4266
0
}
4267
4268
4.26k
int InstanceRecycler::recycle_tablet(int64_t tablet_id, RecyclerMetricsContext& metrics_context) {
4269
4.26k
    LOG_INFO("begin to recycle rowsets in a dropped tablet")
4270
4.26k
            .tag("instance_id", instance_id_)
4271
4.26k
            .tag("tablet_id", tablet_id);
4272
4273
4.26k
    if (should_recycle_versioned_keys()) {
4274
14
        int ret = recycle_versioned_tablet(tablet_id, metrics_context);
4275
14
        if (ret != 0) {
4276
0
            return ret;
4277
0
        }
4278
        // Continue to recycle non-versioned rowsets, if multi-version is set to DISABLED
4279
        // during the recycle_versioned_tablet process.
4280
        //
4281
        // .. And remove restore job rowsets of this tablet too
4282
14
    }
4283
4284
4.26k
    int ret = 0;
4285
4.26k
    auto start_time = steady_clock::now();
4286
4287
4.26k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
4288
4289
    // collect resource ids
4290
258
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
4291
258
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
4292
258
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
4293
258
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
4294
258
    std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
4295
258
    std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
4296
4297
258
    std::set<std::string> resource_ids;
4298
258
    int64_t recycle_rowsets_number = 0;
4299
258
    int64_t recycle_segments_number = 0;
4300
258
    int64_t recycle_rowsets_data_size = 0;
4301
258
    int64_t recycle_rowsets_index_size = 0;
4302
258
    int64_t recycle_restore_job_rowsets_number = 0;
4303
258
    int64_t recycle_restore_job_segments_number = 0;
4304
258
    int64_t recycle_restore_job_rowsets_data_size = 0;
4305
258
    int64_t recycle_restore_job_rowsets_index_size = 0;
4306
258
    int64_t max_rowset_version = 0;
4307
258
    int64_t min_rowset_creation_time = INT64_MAX;
4308
258
    int64_t max_rowset_creation_time = 0;
4309
258
    int64_t min_rowset_expiration_time = INT64_MAX;
4310
258
    int64_t max_rowset_expiration_time = 0;
4311
4312
258
    DORIS_CLOUD_DEFER {
4313
258
        auto cost = duration<float>(steady_clock::now() - start_time).count();
4314
258
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
4315
258
                .tag("instance_id", instance_id_)
4316
258
                .tag("tablet_id", tablet_id)
4317
258
                .tag("recycle rowsets number", recycle_rowsets_number)
4318
258
                .tag("recycle segments number", recycle_segments_number)
4319
258
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
4320
258
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
4321
258
                .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number)
4322
258
                .tag("recycle restore job segments number", recycle_restore_job_segments_number)
4323
258
                .tag("all restore job rowsets recycle data size",
4324
258
                     recycle_restore_job_rowsets_data_size)
4325
258
                .tag("all restore job rowsets recycle index size",
4326
258
                     recycle_restore_job_rowsets_index_size)
4327
258
                .tag("max rowset version", max_rowset_version)
4328
258
                .tag("min rowset creation time", min_rowset_creation_time)
4329
258
                .tag("max rowset creation time", max_rowset_creation_time)
4330
258
                .tag("min rowset expiration time", min_rowset_expiration_time)
4331
258
                .tag("max rowset expiration time", max_rowset_expiration_time)
4332
258
                .tag("task type", metrics_context.operation_type)
4333
258
                .tag("ret", ret);
4334
258
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
4312
258
    DORIS_CLOUD_DEFER {
4313
258
        auto cost = duration<float>(steady_clock::now() - start_time).count();
4314
258
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
4315
258
                .tag("instance_id", instance_id_)
4316
258
                .tag("tablet_id", tablet_id)
4317
258
                .tag("recycle rowsets number", recycle_rowsets_number)
4318
258
                .tag("recycle segments number", recycle_segments_number)
4319
258
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
4320
258
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
4321
258
                .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number)
4322
258
                .tag("recycle restore job segments number", recycle_restore_job_segments_number)
4323
258
                .tag("all restore job rowsets recycle data size",
4324
258
                     recycle_restore_job_rowsets_data_size)
4325
258
                .tag("all restore job rowsets recycle index size",
4326
258
                     recycle_restore_job_rowsets_index_size)
4327
258
                .tag("max rowset version", max_rowset_version)
4328
258
                .tag("min rowset creation time", min_rowset_creation_time)
4329
258
                .tag("max rowset creation time", max_rowset_creation_time)
4330
258
                .tag("min rowset expiration time", min_rowset_expiration_time)
4331
258
                .tag("max rowset expiration time", max_rowset_expiration_time)
4332
258
                .tag("task type", metrics_context.operation_type)
4333
258
                .tag("ret", ret);
4334
258
    };
4335
4336
258
    std::unique_ptr<Transaction> txn;
4337
258
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4338
0
        LOG_WARNING("failed to recycle tablet ")
4339
0
                .tag("tablet id", tablet_id)
4340
0
                .tag("instance_id", instance_id_)
4341
0
                .tag("reason", "failed to create txn");
4342
0
        ret = -1;
4343
0
    }
4344
258
    GetRowsetResponse resp;
4345
258
    std::string msg;
4346
258
    MetaServiceCode code = MetaServiceCode::OK;
4347
    // get rowsets in tablet
4348
258
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
4349
258
                        tablet_id, code, msg, &resp);
4350
258
    if (code != MetaServiceCode::OK) {
4351
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
4352
0
                .tag("tablet id", tablet_id)
4353
0
                .tag("msg", msg)
4354
0
                .tag("code", code)
4355
0
                .tag("instance id", instance_id_);
4356
0
        ret = -1;
4357
0
    }
4358
258
    TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_tablet.create_rowset_meta", &resp);
4359
4360
2.54k
    for (const auto& rs_meta : resp.rowset_meta()) {
4361
        // The rowset has no resource id and segments when it was generated by compaction
4362
        // with multiple hole rowsets or it's version is [0-1], so we can skip it.
4363
2.54k
        if (!rs_meta.has_resource_id() && rs_meta.num_segments() == 0) {
4364
0
            LOG_INFO("rowset meta does not have a resource id and no segments, skip this rowset")
4365
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
4366
0
                    .tag("instance_id", instance_id_)
4367
0
                    .tag("tablet_id", tablet_id);
4368
0
            recycle_rowsets_number += 1;
4369
0
            continue;
4370
0
        }
4371
2.54k
        if (!rs_meta.has_resource_id()) {
4372
1
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
4373
1
                    .tag("rs_meta", rs_meta.ShortDebugString())
4374
1
                    .tag("instance_id", instance_id_)
4375
1
                    .tag("tablet_id", tablet_id);
4376
1
            return -1;
4377
1
        }
4378
18.4E
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
4379
2.54k
        auto it = accessor_map_.find(rs_meta.resource_id());
4380
        // possible if the accessor is not initilized correctly
4381
2.54k
        if (it == accessor_map_.end()) [[unlikely]] {
4382
1
            LOG_WARNING(
4383
1
                    "failed to find resource id when recycle tablet, skip this vault accessor "
4384
1
                    "recycle process")
4385
1
                    .tag("tablet id", tablet_id)
4386
1
                    .tag("instance_id", instance_id_)
4387
1
                    .tag("resource_id", rs_meta.resource_id())
4388
1
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
4389
1
            return -1;
4390
1
        }
4391
2.54k
        if (decrement_packed_file_ref_counts(rs_meta) != 0) {
4392
0
            LOG_WARNING("failed to update packed file info when recycling tablet")
4393
0
                    .tag("instance_id", instance_id_)
4394
0
                    .tag("tablet_id", tablet_id)
4395
0
                    .tag("rowset_id", rs_meta.rowset_id_v2());
4396
0
            return -1;
4397
0
        }
4398
2.54k
        recycle_rowsets_number += 1;
4399
2.54k
        recycle_segments_number += rs_meta.num_segments();
4400
2.54k
        recycle_rowsets_data_size += rs_meta.data_disk_size();
4401
2.54k
        recycle_rowsets_index_size += rs_meta.index_disk_size();
4402
2.54k
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
4403
2.54k
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
4404
2.54k
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
4405
2.54k
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
4406
2.54k
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
4407
2.54k
        resource_ids.emplace(rs_meta.resource_id());
4408
2.54k
    }
4409
4410
    // get restore job rowset in tablet
4411
256
    std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>> restore_job_rs_metas;
4412
256
    scan_restore_job_rowset(txn.get(), instance_id_, tablet_id, code, msg, &restore_job_rs_metas);
4413
256
    if (code != MetaServiceCode::OK) {
4414
0
        LOG_WARNING("scan restore job rowsets failed when recycle tablet")
4415
0
                .tag("tablet id", tablet_id)
4416
0
                .tag("msg", msg)
4417
0
                .tag("code", code)
4418
0
                .tag("instance id", instance_id_);
4419
0
        return -1;
4420
0
    }
4421
4422
256
    for (auto& [_, rs_meta] : restore_job_rs_metas) {
4423
0
        if (!rs_meta.has_resource_id()) {
4424
0
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
4425
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
4426
0
                    .tag("instance_id", instance_id_)
4427
0
                    .tag("tablet_id", tablet_id);
4428
0
            return -1;
4429
0
        }
4430
4431
0
        auto it = accessor_map_.find(rs_meta.resource_id());
4432
        // possible if the accessor is not initilized correctly
4433
0
        if (it == accessor_map_.end()) [[unlikely]] {
4434
0
            LOG_WARNING(
4435
0
                    "failed to find resource id when recycle tablet, skip this vault accessor "
4436
0
                    "recycle process")
4437
0
                    .tag("tablet id", tablet_id)
4438
0
                    .tag("instance_id", instance_id_)
4439
0
                    .tag("resource_id", rs_meta.resource_id())
4440
0
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
4441
0
            return -1;
4442
0
        }
4443
0
        if (decrement_packed_file_ref_counts(rs_meta) != 0) {
4444
0
            LOG_WARNING("failed to update packed file info when recycling restore job rowset")
4445
0
                    .tag("instance_id", instance_id_)
4446
0
                    .tag("tablet_id", tablet_id)
4447
0
                    .tag("rowset_id", rs_meta.rowset_id_v2());
4448
0
            return -1;
4449
0
        }
4450
0
        recycle_restore_job_rowsets_number += 1;
4451
0
        recycle_restore_job_segments_number += rs_meta.num_segments();
4452
0
        recycle_restore_job_rowsets_data_size += rs_meta.data_disk_size();
4453
0
        recycle_restore_job_rowsets_index_size += rs_meta.index_disk_size();
4454
0
        resource_ids.emplace(rs_meta.resource_id());
4455
0
    }
4456
4457
256
    LOG_INFO("recycle tablet start to delete object")
4458
256
            .tag("instance id", instance_id_)
4459
256
            .tag("tablet id", tablet_id)
4460
256
            .tag("recycle tablet resource ids are",
4461
256
                 std::accumulate(resource_ids.begin(), resource_ids.end(), std::string(),
4462
256
                                 [](std::string rs_id, const auto& it) {
4463
216
                                     return rs_id.empty() ? it : rs_id + ", " + it;
4464
216
                                 }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
Line
Count
Source
4462
216
                                 [](std::string rs_id, const auto& it) {
4463
216
                                     return rs_id.empty() ? it : rs_id + ", " + it;
4464
216
                                 }));
4465
4466
256
    SyncExecutor<std::pair<int, std::string>> concurrent_delete_executor(
4467
256
            _thread_pool_group.s3_producer_pool,
4468
256
            fmt::format("delete tablet {} s3 rowset", tablet_id),
4469
256
            [](const std::pair<int, std::string>& ret) { return ret.first != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE
Line
Count
Source
4469
207
            [](const std::pair<int, std::string>& ret) { return ret.first != 0; });
4470
4471
    // delete all rowset data in this tablet
4472
    // ATTN: there may be data leak if not all accessor initilized successfully
4473
    //       partial data deleted if the tablet is stored cross-storage vault
4474
    //       vault id is not attached to TabletMeta...
4475
256
    for (const auto& resource_id : resource_ids) {
4476
216
        g_bvar_recycler_vault_recycle_task_status.put({instance_id_, resource_id, "submitted"}, 1);
4477
216
        concurrent_delete_executor.add(
4478
216
                [&, rs_id = resource_id,
4479
216
                 accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) {
4480
216
                    int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
4481
216
                    if (res != 0) {
4482
3
                        LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
4483
3
                                     << " path=" << accessor_ptr->uri()
4484
3
                                     << " task type=" << metrics_context.operation_type;
4485
3
                        return std::make_pair(-1, rs_id);
4486
3
                    }
4487
213
                    return std::make_pair(0, rs_id);
4488
216
                });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev
Line
Count
Source
4479
216
                 accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) {
4480
216
                    int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
4481
216
                    if (res != 0) {
4482
3
                        LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
4483
3
                                     << " path=" << accessor_ptr->uri()
4484
3
                                     << " task type=" << metrics_context.operation_type;
4485
3
                        return std::make_pair(-1, rs_id);
4486
3
                    }
4487
213
                    return std::make_pair(0, rs_id);
4488
216
                });
4489
216
    }
4490
4491
256
    bool finished = true;
4492
256
    std::vector<std::pair<int, std::string>> rets = concurrent_delete_executor.when_all(&finished);
4493
256
    for (auto& r : rets) {
4494
216
        if (r.first != 0) {
4495
3
            g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "error"}, 1);
4496
3
            ret = -1;
4497
3
        }
4498
216
        g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "completed"}, 1);
4499
216
    }
4500
256
    ret = finished ? ret : -1;
4501
4502
256
    if (ret != 0) { // failed recycle tablet data
4503
3
        LOG_WARNING("ret!=0")
4504
3
                .tag("finished", finished)
4505
3
                .tag("ret", ret)
4506
3
                .tag("instance_id", instance_id_)
4507
3
                .tag("tablet_id", tablet_id);
4508
3
        return ret;
4509
3
    }
4510
4511
253
    tablet_metrics_context_.total_recycled_data_size +=
4512
253
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4513
253
    tablet_metrics_context_.total_recycled_num += 1;
4514
253
    segment_metrics_context_.total_recycled_num += recycle_segments_number;
4515
253
    segment_metrics_context_.total_recycled_data_size +=
4516
253
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4517
253
    metrics_context.total_recycled_data_size +=
4518
253
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4519
253
    tablet_metrics_context_.report();
4520
253
    segment_metrics_context_.report();
4521
253
    metrics_context.report();
4522
4523
253
    txn.reset();
4524
253
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4525
0
        LOG_WARNING("failed to recycle tablet ")
4526
0
                .tag("tablet id", tablet_id)
4527
0
                .tag("instance_id", instance_id_)
4528
0
                .tag("reason", "failed to create txn");
4529
0
        ret = -1;
4530
0
    }
4531
    // delete all rowset kv in this tablet
4532
253
    txn->remove(rs_key0, rs_key1);
4533
253
    txn->remove(recyc_rs_key0, recyc_rs_key1);
4534
253
    txn->remove(restore_job_rs_key0, restore_job_rs_key1);
4535
4536
    // remove delete bitmap for MoW table
4537
253
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
4538
253
    txn->remove(pending_key);
4539
253
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
4540
253
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
4541
253
    txn->remove(delete_bitmap_start, delete_bitmap_end);
4542
4543
253
    std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""});
4544
253
    std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""});
4545
253
    txn->remove(dbm_start_key, dbm_end_key);
4546
253
    LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key)
4547
253
              << " end=" << hex(dbm_end_key);
4548
4549
253
    TxnErrorCode err = txn->commit();
4550
253
    if (err != TxnErrorCode::TXN_OK) {
4551
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
4552
0
        ret = -1;
4553
0
    }
4554
4555
253
    if (ret == 0) {
4556
        // All object files under tablet have been deleted
4557
253
        std::lock_guard lock(recycled_tablets_mtx_);
4558
253
        recycled_tablets_.insert(tablet_id);
4559
253
    }
4560
4561
253
    return ret;
4562
256
}
4563
4564
int InstanceRecycler::recycle_versioned_tablet(int64_t tablet_id,
4565
14
                                               RecyclerMetricsContext& metrics_context) {
4566
14
    int ret = 0;
4567
14
    auto start_time = steady_clock::now();
4568
4569
14
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
4570
4571
    // collect resource ids
4572
11
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
4573
11
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
4574
11
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
4575
11
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
4576
4577
11
    int64_t recycle_rowsets_number = 0;
4578
11
    int64_t recycle_segments_number = 0;
4579
11
    int64_t recycle_rowsets_data_size = 0;
4580
11
    int64_t recycle_rowsets_index_size = 0;
4581
11
    int64_t max_rowset_version = 0;
4582
11
    int64_t min_rowset_creation_time = INT64_MAX;
4583
11
    int64_t max_rowset_creation_time = 0;
4584
11
    int64_t min_rowset_expiration_time = INT64_MAX;
4585
11
    int64_t max_rowset_expiration_time = 0;
4586
4587
11
    DORIS_CLOUD_DEFER {
4588
11
        auto cost = duration<float>(steady_clock::now() - start_time).count();
4589
11
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
4590
11
                .tag("instance_id", instance_id_)
4591
11
                .tag("tablet_id", tablet_id)
4592
11
                .tag("recycle rowsets number", recycle_rowsets_number)
4593
11
                .tag("recycle segments number", recycle_segments_number)
4594
11
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
4595
11
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
4596
11
                .tag("max rowset version", max_rowset_version)
4597
11
                .tag("min rowset creation time", min_rowset_creation_time)
4598
11
                .tag("max rowset creation time", max_rowset_creation_time)
4599
11
                .tag("min rowset expiration time", min_rowset_expiration_time)
4600
11
                .tag("max rowset expiration time", max_rowset_expiration_time)
4601
11
                .tag("ret", ret);
4602
11
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
4587
11
    DORIS_CLOUD_DEFER {
4588
11
        auto cost = duration<float>(steady_clock::now() - start_time).count();
4589
11
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
4590
11
                .tag("instance_id", instance_id_)
4591
11
                .tag("tablet_id", tablet_id)
4592
11
                .tag("recycle rowsets number", recycle_rowsets_number)
4593
11
                .tag("recycle segments number", recycle_segments_number)
4594
11
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
4595
11
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
4596
11
                .tag("max rowset version", max_rowset_version)
4597
11
                .tag("min rowset creation time", min_rowset_creation_time)
4598
11
                .tag("max rowset creation time", max_rowset_creation_time)
4599
11
                .tag("min rowset expiration time", min_rowset_expiration_time)
4600
11
                .tag("max rowset expiration time", max_rowset_expiration_time)
4601
11
                .tag("ret", ret);
4602
11
    };
4603
4604
11
    std::unique_ptr<Transaction> txn;
4605
11
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4606
0
        LOG_WARNING("failed to recycle tablet ")
4607
0
                .tag("tablet id", tablet_id)
4608
0
                .tag("instance_id", instance_id_)
4609
0
                .tag("reason", "failed to create txn");
4610
0
        ret = -1;
4611
0
    }
4612
4613
    // Read the last version of load and compact rowsets, the previous rowsets will be recycled
4614
    // by the related operation logs.
4615
11
    std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> load_rowset_metas;
4616
11
    std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> compact_rowset_metas;
4617
11
    MetaReader meta_reader(instance_id_);
4618
11
    TxnErrorCode err = meta_reader.get_load_rowset_metas(txn.get(), tablet_id, &load_rowset_metas);
4619
11
    if (err == TxnErrorCode::TXN_OK) {
4620
11
        err = meta_reader.get_compact_rowset_metas(txn.get(), tablet_id, &compact_rowset_metas);
4621
11
    }
4622
11
    if (err != TxnErrorCode::TXN_OK) {
4623
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
4624
0
                .tag("tablet id", tablet_id)
4625
0
                .tag("err", err)
4626
0
                .tag("instance id", instance_id_);
4627
0
        ret = -1;
4628
0
    }
4629
4630
11
    LOG_INFO("recycle versioned tablet get {} load rowsets and {} compact rowsets",
4631
11
             load_rowset_metas.size(), compact_rowset_metas.size())
4632
11
            .tag("instance_id", instance_id_)
4633
11
            .tag("tablet_id", tablet_id);
4634
4635
11
    SyncExecutor<int> concurrent_delete_executor(
4636
11
            _thread_pool_group.s3_producer_pool,
4637
11
            fmt::format("delete tablet {} s3 rowset", tablet_id),
4638
11
            [](const int& ret) { return ret != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi
4639
4640
60
    auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) {
4641
60
        recycle_rowsets_number += 1;
4642
60
        recycle_segments_number += rs_meta.num_segments();
4643
60
        recycle_rowsets_data_size += rs_meta.data_disk_size();
4644
60
        recycle_rowsets_index_size += rs_meta.index_disk_size();
4645
60
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
4646
60
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
4647
60
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
4648
60
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
4649
60
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
4650
60
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE
Line
Count
Source
4640
60
    auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) {
4641
60
        recycle_rowsets_number += 1;
4642
60
        recycle_segments_number += rs_meta.num_segments();
4643
60
        recycle_rowsets_data_size += rs_meta.data_disk_size();
4644
60
        recycle_rowsets_index_size += rs_meta.index_disk_size();
4645
60
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
4646
60
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
4647
60
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
4648
60
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
4649
60
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
4650
60
    };
4651
4652
11
    std::vector<RowsetDeleteTask> all_tasks;
4653
60
    for (const auto& [rs_meta, versionstamp] : load_rowset_metas) {
4654
60
        update_rowset_stats(rs_meta);
4655
        // Version 0-1 rowset has no resource_id and no actual data files,
4656
        // but still needs ref_count key cleanup, so we add it to all_tasks.
4657
        // It will be filtered out in Phase 2 when building rowsets_to_delete.
4658
60
        RowsetDeleteTask task;
4659
60
        task.rowset_meta = rs_meta;
4660
60
        task.versioned_rowset_key =
4661
60
                versioned::meta_rowset_load_key({instance_id_, tablet_id, rs_meta.end_version()});
4662
60
        task.non_versioned_rowset_key =
4663
60
                meta_rowset_key({instance_id_, tablet_id, rs_meta.end_version()});
4664
60
        task.versionstamp = versionstamp;
4665
60
        all_tasks.push_back(std::move(task));
4666
60
    }
4667
4668
11
    for (const auto& [rs_meta, versionstamp] : compact_rowset_metas) {
4669
0
        update_rowset_stats(rs_meta);
4670
        // Version 0-1 rowset has no resource_id and no actual data files,
4671
        // but still needs ref_count key cleanup, so we add it to all_tasks.
4672
        // It will be filtered out in Phase 2 when building rowsets_to_delete.
4673
0
        RowsetDeleteTask task;
4674
0
        task.rowset_meta = rs_meta;
4675
0
        task.versioned_rowset_key = versioned::meta_rowset_compact_key(
4676
0
                {instance_id_, tablet_id, rs_meta.end_version()});
4677
0
        task.non_versioned_rowset_key =
4678
0
                meta_rowset_key({instance_id_, tablet_id, rs_meta.end_version()});
4679
0
        task.versionstamp = versionstamp;
4680
0
        all_tasks.push_back(std::move(task));
4681
0
    }
4682
4683
11
    auto handle_recycle_rowset_kv = [&](std::string_view k, std::string_view v) {
4684
0
        RecycleRowsetPB recycle_rowset;
4685
0
        if (!recycle_rowset.ParseFromArray(v.data(), v.size())) {
4686
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4687
0
            return -1;
4688
0
        }
4689
0
        if (!recycle_rowset.has_type()) { // compatible with old version `RecycleRowsetPB`
4690
0
            if (!recycle_rowset.has_resource_id()) [[unlikely]] { // impossible
4691
                // in old version, keep this key-value pair and it needs to be checked manually
4692
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4693
0
                return -1;
4694
0
            }
4695
0
            if (recycle_rowset.resource_id().empty()) [[unlikely]] {
4696
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4697
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4698
0
                          << hex(k) << " value=" << proto_to_json(recycle_rowset);
4699
0
                return -1;
4700
0
            }
4701
            // decode rowset_id
4702
0
            auto k1 = k;
4703
0
            k1.remove_prefix(1);
4704
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4705
0
            decode_key(&k1, &out);
4706
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4707
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4708
0
            LOG_INFO("delete old-version rowset data")
4709
0
                    .tag("instance_id", instance_id_)
4710
0
                    .tag("tablet_id", tablet_id)
4711
0
                    .tag("rowset_id", rowset_id);
4712
4713
            // Old version RecycleRowsetPB lacks full rowset_meta info (num_segments, schema, etc.),
4714
            // so we must use prefix deletion directly instead of batch delete.
4715
0
            concurrent_delete_executor.add(
4716
0
                    [tablet_id, resource_id = recycle_rowset.resource_id(), rowset_id, this]() {
4717
                        // delete by prefix, the recycle rowset key will be deleted by range later.
4718
0
                        return delete_rowset_data(resource_id, tablet_id, rowset_id);
4719
0
                    });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
4720
0
        } else {
4721
0
            const auto& rowset_meta = recycle_rowset.rowset_meta();
4722
            // Version 0-1 rowset has no resource_id and no actual data files,
4723
            // but still needs ref_count key cleanup, so we add it to all_tasks.
4724
            // It will be filtered out in Phase 2 when building rowsets_to_delete.
4725
0
            RowsetDeleteTask task;
4726
0
            task.rowset_meta = rowset_meta;
4727
0
            task.recycle_rowset_key = k;
4728
0
            all_tasks.push_back(std::move(task));
4729
0
        }
4730
0
        return 0;
4731
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
4732
4733
11
    if (scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_recycle_rowset_kv))) {
4734
0
        LOG_WARNING("failed to recycle rowset kv of tablet")
4735
0
                .tag("tablet id", tablet_id)
4736
0
                .tag("instance_id", instance_id_)
4737
0
                .tag("reason", "failed to scan and recycle RecycleRowsetPB");
4738
0
        ret = -1;
4739
0
    }
4740
4741
    // Phase 1: Classify tasks by ref_count
4742
11
    std::vector<RowsetDeleteTask> batch_delete_tasks;
4743
60
    for (auto& task : all_tasks) {
4744
60
        int classify_ret = classify_rowset_task_by_ref_count(task, batch_delete_tasks);
4745
60
        if (classify_ret < 0) {
4746
0
            LOG_WARNING("failed to classify rowset task, fallback to old logic")
4747
0
                    .tag("instance_id", instance_id_)
4748
0
                    .tag("tablet_id", tablet_id)
4749
0
                    .tag("rowset_id", task.rowset_meta.rowset_id_v2());
4750
0
            concurrent_delete_executor.add([this, t = std::move(task)]() mutable {
4751
0
                return recycle_rowset_meta_and_data(t);
4752
0
            });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEEN3$_4clEv
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEEN3$_4clEv
4753
0
        }
4754
60
    }
4755
4756
11
    g_bvar_recycler_batch_delete_rowset_plan_count.put(instance_id_, batch_delete_tasks.size());
4757
4758
11
    LOG_INFO("batch delete plan created")
4759
11
            .tag("instance_id", instance_id_)
4760
11
            .tag("tablet_id", tablet_id)
4761
11
            .tag("plan_count", batch_delete_tasks.size());
4762
4763
    // Phase 2: Execute batch delete using existing delete_rowset_data
4764
11
    if (!batch_delete_tasks.empty()) {
4765
10
        std::map<std::string, RowsetMetaCloudPB> rowsets_to_delete;
4766
49
        for (const auto& task : batch_delete_tasks) {
4767
            // Version 0-1 rowset has no resource_id and no actual data files, skip it
4768
49
            if (task.rowset_meta.resource_id().empty()) {
4769
10
                LOG_INFO("skip rowset with empty resource_id in batch delete")
4770
10
                        .tag("instance_id", instance_id_)
4771
10
                        .tag("tablet_id", tablet_id)
4772
10
                        .tag("rowset_id", task.rowset_meta.rowset_id_v2());
4773
10
                continue;
4774
10
            }
4775
39
            rowsets_to_delete[task.rowset_meta.rowset_id_v2()] = task.rowset_meta;
4776
39
        }
4777
4778
        // Only call delete_rowset_data if there are rowsets with actual data to delete
4779
10
        bool delete_success = true;
4780
10
        if (!rowsets_to_delete.empty()) {
4781
9
            RecyclerMetricsContext batch_metrics_context(instance_id_,
4782
9
                                                         "batch_delete_versioned_tablet");
4783
9
            int delete_ret = delete_rowset_data(
4784
9
                    rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET, batch_metrics_context);
4785
9
            if (delete_ret != 0) {
4786
0
                LOG_WARNING("batch delete execution failed")
4787
0
                        .tag("instance_id", instance_id_)
4788
0
                        .tag("tablet_id", tablet_id);
4789
0
                g_bvar_recycler_batch_delete_failures.put(instance_id_, 1);
4790
0
                ret = -1;
4791
0
                delete_success = false;
4792
0
            }
4793
9
        }
4794
4795
        // Phase 3: Only cleanup metadata if data deletion succeeded.
4796
        // If deletion failed, keep recycle_rowset_key so next round will retry.
4797
10
        if (delete_success) {
4798
10
            int cleanup_ret = cleanup_rowset_metadata(batch_delete_tasks);
4799
10
            if (cleanup_ret != 0) {
4800
0
                LOG_WARNING("batch delete cleanup failed")
4801
0
                        .tag("instance_id", instance_id_)
4802
0
                        .tag("tablet_id", tablet_id);
4803
0
                ret = -1;
4804
0
            }
4805
10
        }
4806
10
    }
4807
4808
    // Always wait for fallback tasks to complete before returning
4809
11
    bool finished = true;
4810
11
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
4811
11
    for (int r : rets) {
4812
0
        if (r != 0) {
4813
0
            ret = -1;
4814
0
        }
4815
0
    }
4816
4817
11
    ret = finished ? ret : -1;
4818
4819
11
    if (ret != 0) { // failed recycle tablet data
4820
0
        LOG_WARNING("recycle versioned tablet failed")
4821
0
                .tag("finished", finished)
4822
0
                .tag("ret", ret)
4823
0
                .tag("instance_id", instance_id_)
4824
0
                .tag("tablet_id", tablet_id);
4825
0
        return ret;
4826
0
    }
4827
4828
11
    tablet_metrics_context_.total_recycled_data_size +=
4829
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4830
11
    tablet_metrics_context_.total_recycled_num += 1;
4831
11
    segment_metrics_context_.total_recycled_num += recycle_segments_number;
4832
11
    segment_metrics_context_.total_recycled_data_size +=
4833
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4834
11
    metrics_context.total_recycled_data_size +=
4835
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4836
11
    tablet_metrics_context_.report();
4837
11
    segment_metrics_context_.report();
4838
11
    metrics_context.report();
4839
4840
11
    txn.reset();
4841
11
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4842
0
        LOG_WARNING("failed to recycle tablet ")
4843
0
                .tag("tablet id", tablet_id)
4844
0
                .tag("instance_id", instance_id_)
4845
0
                .tag("reason", "failed to create txn");
4846
0
        ret = -1;
4847
0
    }
4848
    // delete all rowset kv in this tablet
4849
11
    txn->remove(rs_key0, rs_key1);
4850
11
    txn->remove(recyc_rs_key0, recyc_rs_key1);
4851
4852
    // remove delete bitmap for MoW table
4853
11
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
4854
11
    txn->remove(pending_key);
4855
11
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
4856
11
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
4857
11
    txn->remove(delete_bitmap_start, delete_bitmap_end);
4858
4859
11
    std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""});
4860
11
    std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""});
4861
11
    txn->remove(dbm_start_key, dbm_end_key);
4862
11
    LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key)
4863
11
              << " end=" << hex(dbm_end_key);
4864
4865
11
    std::string versioned_idx_key = versioned::tablet_index_key({instance_id_, tablet_id});
4866
11
    std::string tablet_index_val;
4867
11
    err = txn->get(versioned_idx_key, &tablet_index_val);
4868
11
    if (err != TxnErrorCode::TXN_KEY_NOT_FOUND && err != TxnErrorCode::TXN_OK) {
4869
0
        LOG_WARNING("failed to get tablet index kv")
4870
0
                .tag("instance_id", instance_id_)
4871
0
                .tag("tablet_id", tablet_id)
4872
0
                .tag("err", err);
4873
0
        ret = -1;
4874
11
    } else if (err == TxnErrorCode::TXN_OK) {
4875
        // If the tablet index kv exists, we need to delete it
4876
10
        TabletIndexPB tablet_index_pb;
4877
10
        if (!tablet_index_pb.ParseFromString(tablet_index_val)) {
4878
0
            LOG_WARNING("failed to parse tablet index pb")
4879
0
                    .tag("instance_id", instance_id_)
4880
0
                    .tag("tablet_id", tablet_id);
4881
0
            ret = -1;
4882
10
        } else {
4883
10
            std::string versioned_inverted_idx_key = versioned::tablet_inverted_index_key(
4884
10
                    {instance_id_, tablet_index_pb.db_id(), tablet_index_pb.table_id(),
4885
10
                     tablet_index_pb.index_id(), tablet_index_pb.partition_id(), tablet_id});
4886
10
            txn->remove(versioned_inverted_idx_key);
4887
10
            txn->remove(versioned_idx_key);
4888
10
        }
4889
10
    }
4890
4891
11
    err = txn->commit();
4892
11
    if (err != TxnErrorCode::TXN_OK) {
4893
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
4894
0
        ret = -1;
4895
0
    }
4896
4897
11
    if (ret == 0) {
4898
        // All object files under tablet have been deleted
4899
11
        std::lock_guard lock(recycled_tablets_mtx_);
4900
11
        recycled_tablets_.insert(tablet_id);
4901
11
    }
4902
4903
11
    return ret;
4904
11
}
4905
4906
70
int InstanceRecycler::recycle_rowsets() {
4907
70
    if (should_recycle_versioned_keys()) {
4908
5
        return recycle_versioned_rowsets();
4909
5
    }
4910
4911
65
    const std::string task_name = "recycle_rowsets";
4912
65
    int64_t num_scanned = 0;
4913
65
    int64_t num_expired = 0;
4914
65
    int64_t num_prepare = 0;
4915
65
    int64_t num_compacted = 0;
4916
65
    int64_t num_empty_rowset = 0;
4917
65
    size_t total_rowset_key_size = 0;
4918
65
    size_t total_rowset_value_size = 0;
4919
65
    size_t expired_rowset_size = 0;
4920
65
    std::atomic_long num_recycled = 0;
4921
65
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4922
4923
65
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
4924
65
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
4925
65
    std::string recyc_rs_key0;
4926
65
    std::string recyc_rs_key1;
4927
65
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
4928
65
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
4929
4930
65
    LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_);
4931
4932
65
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4933
65
    register_recycle_task(task_name, start_time);
4934
4935
65
    DORIS_CLOUD_DEFER {
4936
65
        unregister_recycle_task(task_name);
4937
65
        int64_t cost =
4938
65
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4939
65
        metrics_context.finish_report();
4940
65
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4941
65
                .tag("instance_id", instance_id_)
4942
65
                .tag("num_scanned", num_scanned)
4943
65
                .tag("num_expired", num_expired)
4944
65
                .tag("num_recycled", num_recycled)
4945
65
                .tag("num_recycled.prepare", num_prepare)
4946
65
                .tag("num_recycled.compacted", num_compacted)
4947
65
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4948
65
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4949
65
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4950
65
                .tag("expired_rowset_meta_size", expired_rowset_size);
4951
65
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
Line
Count
Source
4935
7
    DORIS_CLOUD_DEFER {
4936
7
        unregister_recycle_task(task_name);
4937
7
        int64_t cost =
4938
7
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4939
7
        metrics_context.finish_report();
4940
7
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4941
7
                .tag("instance_id", instance_id_)
4942
7
                .tag("num_scanned", num_scanned)
4943
7
                .tag("num_expired", num_expired)
4944
7
                .tag("num_recycled", num_recycled)
4945
7
                .tag("num_recycled.prepare", num_prepare)
4946
7
                .tag("num_recycled.compacted", num_compacted)
4947
7
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4948
7
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4949
7
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4950
7
                .tag("expired_rowset_meta_size", expired_rowset_size);
4951
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
Line
Count
Source
4935
58
    DORIS_CLOUD_DEFER {
4936
58
        unregister_recycle_task(task_name);
4937
58
        int64_t cost =
4938
58
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4939
58
        metrics_context.finish_report();
4940
58
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4941
58
                .tag("instance_id", instance_id_)
4942
58
                .tag("num_scanned", num_scanned)
4943
58
                .tag("num_expired", num_expired)
4944
58
                .tag("num_recycled", num_recycled)
4945
58
                .tag("num_recycled.prepare", num_prepare)
4946
58
                .tag("num_recycled.compacted", num_compacted)
4947
58
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4948
58
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4949
58
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4950
58
                .tag("expired_rowset_meta_size", expired_rowset_size);
4951
58
    };
4952
4953
65
    struct RecycleRowsetEntry {
4954
65
        std::string key;
4955
65
        doris::RowsetMetaCloudPB meta;
4956
65
    };
4957
65
    struct RecycleRowsetDeleteJob {
4958
65
        std::vector<std::string> keys;
4959
65
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets;
4960
65
    };
4961
    // Store the scanned recycle key with rowset meta. The scanned key is the actual KV key to delete.
4962
65
    std::vector<RecycleRowsetEntry> rowsets;
4963
65
    int64_t current_tablet_id = -1;
4964
65
    int64_t recycled_rowset_count_for_current_tablet = 0;
4965
65
    bool current_tablet_skip_logged = false;
4966
65
    std::string next_scan_begin;
4967
65
    const int64_t rowset_batch_size_per_tablet =
4968
65
            std::max(1, config::recycle_rowsets_per_tablet_batch_size);
4969
65
    const int64_t delete_rowset_batch_size =
4970
65
            std::min(500000, config::recycle_rowsets_delete_batch_size);
4971
7.84k
    auto try_reserve_tablet_recycle_slot = [&](int64_t tablet_id) -> bool {
4972
7.84k
        if (current_tablet_id != tablet_id) {
4973
44
            current_tablet_id = tablet_id;
4974
44
            recycled_rowset_count_for_current_tablet = 0;
4975
44
            current_tablet_skip_logged = false;
4976
44
        }
4977
7.84k
        if (recycled_rowset_count_for_current_tablet >= rowset_batch_size_per_tablet) {
4978
13
            if (!current_tablet_skip_logged) {
4979
13
                LOG_INFO(
4980
13
                        "skip recycle rowsets for tablet because per-tablet batch limit is reached")
4981
13
                        .tag("instance_id", instance_id_)
4982
13
                        .tag("tablet_id", tablet_id)
4983
13
                        .tag("limit", rowset_batch_size_per_tablet);
4984
13
                current_tablet_skip_logged = true;
4985
13
            }
4986
13
            const int64_t next_tablet_id = tablet_id == INT64_MAX ? INT64_MAX : tablet_id + 1;
4987
13
            recycle_rowset_key({instance_id_, next_tablet_id, ""}, &next_scan_begin);
4988
13
            return false;
4989
13
        }
4990
7.82k
        ++recycled_rowset_count_for_current_tablet;
4991
7.82k
        return true;
4992
7.84k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_8clEl
Line
Count
Source
4971
7
    auto try_reserve_tablet_recycle_slot = [&](int64_t tablet_id) -> bool {
4972
7
        if (current_tablet_id != tablet_id) {
4973
7
            current_tablet_id = tablet_id;
4974
7
            recycled_rowset_count_for_current_tablet = 0;
4975
7
            current_tablet_skip_logged = false;
4976
7
        }
4977
7
        if (recycled_rowset_count_for_current_tablet >= rowset_batch_size_per_tablet) {
4978
0
            if (!current_tablet_skip_logged) {
4979
0
                LOG_INFO(
4980
0
                        "skip recycle rowsets for tablet because per-tablet batch limit is reached")
4981
0
                        .tag("instance_id", instance_id_)
4982
0
                        .tag("tablet_id", tablet_id)
4983
0
                        .tag("limit", rowset_batch_size_per_tablet);
4984
0
                current_tablet_skip_logged = true;
4985
0
            }
4986
0
            const int64_t next_tablet_id = tablet_id == INT64_MAX ? INT64_MAX : tablet_id + 1;
4987
0
            recycle_rowset_key({instance_id_, next_tablet_id, ""}, &next_scan_begin);
4988
0
            return false;
4989
0
        }
4990
7
        ++recycled_rowset_count_for_current_tablet;
4991
7
        return true;
4992
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_8clEl
Line
Count
Source
4971
7.83k
    auto try_reserve_tablet_recycle_slot = [&](int64_t tablet_id) -> bool {
4972
7.83k
        if (current_tablet_id != tablet_id) {
4973
37
            current_tablet_id = tablet_id;
4974
37
            recycled_rowset_count_for_current_tablet = 0;
4975
37
            current_tablet_skip_logged = false;
4976
37
        }
4977
7.83k
        if (recycled_rowset_count_for_current_tablet >= rowset_batch_size_per_tablet) {
4978
13
            if (!current_tablet_skip_logged) {
4979
13
                LOG_INFO(
4980
13
                        "skip recycle rowsets for tablet because per-tablet batch limit is reached")
4981
13
                        .tag("instance_id", instance_id_)
4982
13
                        .tag("tablet_id", tablet_id)
4983
13
                        .tag("limit", rowset_batch_size_per_tablet);
4984
13
                current_tablet_skip_logged = true;
4985
13
            }
4986
13
            const int64_t next_tablet_id = tablet_id == INT64_MAX ? INT64_MAX : tablet_id + 1;
4987
13
            recycle_rowset_key({instance_id_, next_tablet_id, ""}, &next_scan_begin);
4988
13
            return false;
4989
13
        }
4990
7.82k
        ++recycled_rowset_count_for_current_tablet;
4991
7.82k
        return true;
4992
7.83k
    };
4993
7.84k
    auto next_scan_begin_getter = [&](std::string* begin) -> bool {
4994
7.84k
        if (next_scan_begin.empty()) {
4995
7.82k
            return false;
4996
7.82k
        }
4997
13
        *begin = std::move(next_scan_begin);
4998
13
        next_scan_begin.clear();
4999
13
        return true;
5000
7.84k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clEPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
4993
7
    auto next_scan_begin_getter = [&](std::string* begin) -> bool {
4994
7
        if (next_scan_begin.empty()) {
4995
7
            return false;
4996
7
        }
4997
0
        *begin = std::move(next_scan_begin);
4998
0
        next_scan_begin.clear();
4999
0
        return true;
5000
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clEPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
4993
7.83k
    auto next_scan_begin_getter = [&](std::string* begin) -> bool {
4994
7.83k
        if (next_scan_begin.empty()) {
4995
7.82k
            return false;
4996
7.82k
        }
4997
13
        *begin = std::move(next_scan_begin);
4998
13
        next_scan_begin.clear();
4999
13
        return true;
5000
7.83k
    };
5001
5002
65
    std::vector<std::string> rowset_keys_to_mark_recycled;
5003
65
    std::vector<std::string> rowset_keys_to_abort;
5004
65
    std::vector<std::string> prepare_rowset_keys_to_delete;
5005
5006
    // Store keys of rowset recycled by background workers
5007
65
    std::mutex async_recycled_rowset_keys_mutex;
5008
65
    std::vector<std::string> async_recycled_rowset_keys;
5009
65
    std::vector<std::string> rowset_keys_without_data;
5010
65
    auto worker_pool = std::make_unique<SimpleThreadPool>(
5011
65
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
5012
65
    worker_pool->start();
5013
65
    auto mark_abort_worker_pool = std::make_unique<SimpleThreadPool>(
5014
65
            config::instance_recycler_worker_pool_size, "recycle_rs_mark_abort");
5015
65
    mark_abort_worker_pool->start();
5016
    // TODO bacth delete
5017
4.03k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5018
4.03k
        std::string dbm_start_key =
5019
4.03k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
5020
4.03k
        std::string dbm_end_key = dbm_start_key;
5021
4.03k
        encode_int64(INT64_MAX, &dbm_end_key);
5022
4.03k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
5023
4.03k
        if (ret != 0) {
5024
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
5025
0
                         << instance_id_;
5026
0
        }
5027
4.03k
        return ret;
5028
4.03k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_7clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5017
2
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5018
2
        std::string dbm_start_key =
5019
2
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
5020
2
        std::string dbm_end_key = dbm_start_key;
5021
2
        encode_int64(INT64_MAX, &dbm_end_key);
5022
2
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
5023
2
        if (ret != 0) {
5024
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
5025
0
                         << instance_id_;
5026
0
        }
5027
2
        return ret;
5028
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_7clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5017
4.03k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5018
4.03k
        std::string dbm_start_key =
5019
4.03k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
5020
4.03k
        std::string dbm_end_key = dbm_start_key;
5021
4.03k
        encode_int64(INT64_MAX, &dbm_end_key);
5022
4.03k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
5023
4.03k
        if (ret != 0) {
5024
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
5025
0
                         << instance_id_;
5026
0
        }
5027
4.03k
        return ret;
5028
4.03k
    };
5029
65
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
5030
902
                                            int64_t tablet_id, const std::string& rowset_id) {
5031
        // Try to delete rowset data in background thread
5032
902
        int ret = worker_pool->submit_with_timeout(
5033
902
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
5034
854
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5035
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5036
0
                        return;
5037
0
                    }
5038
854
                    std::vector<std::string> keys;
5039
854
                    {
5040
854
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
5041
854
                        async_recycled_rowset_keys.push_back(std::move(key));
5042
854
                        if (async_recycled_rowset_keys.size() > 100) {
5043
6
                            keys.swap(async_recycled_rowset_keys);
5044
6
                        }
5045
854
                    }
5046
854
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
5047
854
                    if (keys.empty()) return;
5048
6
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
5049
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5050
0
                                     << instance_id_;
5051
6
                    } else {
5052
6
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
5053
6
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
5054
6
                                           num_recycled, start_time);
5055
6
                    }
5056
6
                },
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_5clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
5033
2
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
5034
2
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5035
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5036
0
                        return;
5037
0
                    }
5038
2
                    std::vector<std::string> keys;
5039
2
                    {
5040
2
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
5041
2
                        async_recycled_rowset_keys.push_back(std::move(key));
5042
2
                        if (async_recycled_rowset_keys.size() > 100) {
5043
0
                            keys.swap(async_recycled_rowset_keys);
5044
0
                        }
5045
2
                    }
5046
2
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
5047
2
                    if (keys.empty()) return;
5048
0
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
5049
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5050
0
                                     << instance_id_;
5051
0
                    } else {
5052
0
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
5053
0
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
5054
0
                                           num_recycled, start_time);
5055
0
                    }
5056
0
                },
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_5clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
5033
852
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
5034
852
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5035
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5036
0
                        return;
5037
0
                    }
5038
852
                    std::vector<std::string> keys;
5039
852
                    {
5040
852
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
5041
852
                        async_recycled_rowset_keys.push_back(std::move(key));
5042
852
                        if (async_recycled_rowset_keys.size() > 100) {
5043
6
                            keys.swap(async_recycled_rowset_keys);
5044
6
                        }
5045
852
                    }
5046
852
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
5047
852
                    if (keys.empty()) return;
5048
6
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
5049
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5050
0
                                     << instance_id_;
5051
6
                    } else {
5052
6
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
5053
6
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
5054
6
                                           num_recycled, start_time);
5055
6
                    }
5056
6
                },
5057
902
                0);
5058
902
        if (ret == 0) return 0;
5059
        // Submit task failed, delete rowset data in current thread
5060
48
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5061
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5062
0
            return -1;
5063
0
        }
5064
48
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
5065
0
            return -1;
5066
0
        }
5067
48
        rowset_keys_without_data.push_back(std::move(key));
5068
48
        return 0;
5069
48
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_5clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
5030
2
                                            int64_t tablet_id, const std::string& rowset_id) {
5031
        // Try to delete rowset data in background thread
5032
2
        int ret = worker_pool->submit_with_timeout(
5033
2
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
5034
2
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5035
2
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5036
2
                        return;
5037
2
                    }
5038
2
                    std::vector<std::string> keys;
5039
2
                    {
5040
2
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
5041
2
                        async_recycled_rowset_keys.push_back(std::move(key));
5042
2
                        if (async_recycled_rowset_keys.size() > 100) {
5043
2
                            keys.swap(async_recycled_rowset_keys);
5044
2
                        }
5045
2
                    }
5046
2
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
5047
2
                    if (keys.empty()) return;
5048
2
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
5049
2
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5050
2
                                     << instance_id_;
5051
2
                    } else {
5052
2
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
5053
2
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
5054
2
                                           num_recycled, start_time);
5055
2
                    }
5056
2
                },
5057
2
                0);
5058
2
        if (ret == 0) return 0;
5059
        // Submit task failed, delete rowset data in current thread
5060
0
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5061
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5062
0
            return -1;
5063
0
        }
5064
0
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
5065
0
            return -1;
5066
0
        }
5067
0
        rowset_keys_without_data.push_back(std::move(key));
5068
0
        return 0;
5069
0
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_5clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
5030
900
                                            int64_t tablet_id, const std::string& rowset_id) {
5031
        // Try to delete rowset data in background thread
5032
900
        int ret = worker_pool->submit_with_timeout(
5033
900
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
5034
900
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5035
900
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5036
900
                        return;
5037
900
                    }
5038
900
                    std::vector<std::string> keys;
5039
900
                    {
5040
900
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
5041
900
                        async_recycled_rowset_keys.push_back(std::move(key));
5042
900
                        if (async_recycled_rowset_keys.size() > 100) {
5043
900
                            keys.swap(async_recycled_rowset_keys);
5044
900
                        }
5045
900
                    }
5046
900
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
5047
900
                    if (keys.empty()) return;
5048
900
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
5049
900
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5050
900
                                     << instance_id_;
5051
900
                    } else {
5052
900
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
5053
900
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
5054
900
                                           num_recycled, start_time);
5055
900
                    }
5056
900
                },
5057
900
                0);
5058
900
        if (ret == 0) return 0;
5059
        // Submit task failed, delete rowset data in current thread
5060
48
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5061
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5062
0
            return -1;
5063
0
        }
5064
48
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
5065
0
            return -1;
5066
0
        }
5067
48
        rowset_keys_without_data.push_back(std::move(key));
5068
48
        return 0;
5069
48
    };
5070
5071
65
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5072
5073
7.84k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
5074
7.84k
        ++num_scanned;
5075
7.84k
        total_rowset_key_size += k.size();
5076
7.84k
        total_rowset_value_size += v.size();
5077
7.84k
        RecycleRowsetPB rowset;
5078
7.84k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5079
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
5080
0
            return -1;
5081
0
        }
5082
5083
7.84k
        int64_t current_time = ::time(nullptr);
5084
7.84k
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5085
5086
7.84k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5087
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5088
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
5089
7.84k
        if (current_time < expiration) { // not expired
5090
0
            return 0;
5091
0
        }
5092
7.84k
        ++num_expired;
5093
7.84k
        expired_rowset_size += v.size();
5094
5095
7.84k
        int64_t tablet_id =
5096
7.84k
                rowset.has_type() ? rowset.rowset_meta().tablet_id() : rowset.tablet_id();
5097
7.84k
        if (!try_reserve_tablet_recycle_slot(tablet_id)) {
5098
13
            return 0;
5099
13
        }
5100
5101
7.82k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
5102
252
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
5103
                // in old version, keep this key-value pair and it needs to be checked manually
5104
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5105
0
                return -1;
5106
0
            }
5107
252
            if (rowset.resource_id().empty()) [[unlikely]] {
5108
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
5109
2
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
5110
2
                          << hex(k) << " value=" << proto_to_json(rowset);
5111
2
                rowset_keys_without_data.emplace_back(k);
5112
2
                return 0;
5113
2
            }
5114
            // decode rowset_id
5115
250
            auto k1 = k;
5116
250
            k1.remove_prefix(1);
5117
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5118
250
            decode_key(&k1, &out);
5119
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
5120
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
5121
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5122
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
5123
250
                      << " task_type=" << metrics_context.operation_type;
5124
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
5125
250
                                             rowset.tablet_id(), rowset_id) != 0) {
5126
0
                return -1;
5127
0
            }
5128
250
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
5129
250
            metrics_context.total_recycled_num++;
5130
250
            segment_metrics_context_.total_recycled_data_size +=
5131
250
                    rowset.rowset_meta().total_disk_size();
5132
250
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
5133
250
            return 0;
5134
250
        }
5135
5136
7.57k
        auto* rowset_meta = rowset.mutable_rowset_meta();
5137
7.57k
        if (config::enable_mark_delete_rowset_before_recycle) {
5138
7.56k
            if (need_mark_rowset_as_recycled(rowset)) {
5139
3.78k
                rowset_keys_to_mark_recycled.emplace_back(k);
5140
3.78k
                LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv "
5141
3.78k
                             "at next turn, instance_id="
5142
3.78k
                          << instance_id_ << " tablet_id=" << rowset_meta->tablet_id()
5143
3.78k
                          << " version=[" << rowset_meta->start_version() << '-'
5144
3.78k
                          << rowset_meta->end_version() << "]";
5145
3.78k
                return 0;
5146
3.78k
            }
5147
7.56k
        }
5148
5149
3.79k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle &&
5150
3.79k
            rowset_meta->end_version() != 1) {
5151
3.79k
            if (make_deferred_abort_task(rowset).has_value()) {
5152
2
                LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, "
5153
2
                             "instance_id="
5154
2
                          << instance_id_ << " tablet_id=" << rowset_meta->tablet_id()
5155
2
                          << " version=[" << rowset_meta->start_version() << '-'
5156
2
                          << rowset_meta->end_version() << "]";
5157
2
                rowset_keys_to_abort.emplace_back(k);
5158
2
            }
5159
3.79k
        }
5160
5161
        // TODO(plat1ko): check rowset not referenced
5162
3.79k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
5163
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
5164
0
                LOG_INFO("recycle rowset that has empty resource id");
5165
0
            } else {
5166
                // other situations, keep this key-value pair and it needs to be checked manually
5167
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5168
0
                return -1;
5169
0
            }
5170
0
        }
5171
3.79k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5172
3.79k
                  << " tablet_id=" << rowset_meta->tablet_id()
5173
3.79k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
5174
3.79k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
5175
3.79k
                  << "] txn_id=" << rowset_meta->txn_id()
5176
3.79k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
5177
3.79k
                  << " rowset_meta_size=" << v.size()
5178
3.79k
                  << " creation_time=" << rowset_meta->creation_time()
5179
3.79k
                  << " task_type=" << metrics_context.operation_type;
5180
3.79k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
5181
            // unable to calculate file path, can only be deleted by rowset id prefix
5182
652
            num_prepare += 1;
5183
652
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
5184
652
                                             rowset_meta->tablet_id(),
5185
652
                                             rowset_meta->rowset_id_v2()) != 0) {
5186
0
                return -1;
5187
0
            }
5188
3.13k
        } else {
5189
3.13k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
5190
3.13k
            if (rowset_meta->num_segments() > 0) { // Skip empty rowset
5191
3.13k
                rowsets.emplace_back(std::string(k), std::move(*rowset_meta));
5192
3.13k
            } else {
5193
3
                ++num_empty_rowset;
5194
3
                rowset_keys_without_data.emplace_back(k);
5195
3
            }
5196
3.13k
        }
5197
3.79k
        return 0;
5198
3.79k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5073
7
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
5074
7
        ++num_scanned;
5075
7
        total_rowset_key_size += k.size();
5076
7
        total_rowset_value_size += v.size();
5077
7
        RecycleRowsetPB rowset;
5078
7
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5079
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
5080
0
            return -1;
5081
0
        }
5082
5083
7
        int64_t current_time = ::time(nullptr);
5084
7
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5085
5086
7
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5087
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5088
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
5089
7
        if (current_time < expiration) { // not expired
5090
0
            return 0;
5091
0
        }
5092
7
        ++num_expired;
5093
7
        expired_rowset_size += v.size();
5094
5095
7
        int64_t tablet_id =
5096
7
                rowset.has_type() ? rowset.rowset_meta().tablet_id() : rowset.tablet_id();
5097
7
        if (!try_reserve_tablet_recycle_slot(tablet_id)) {
5098
0
            return 0;
5099
0
        }
5100
5101
7
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
5102
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
5103
                // in old version, keep this key-value pair and it needs to be checked manually
5104
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5105
0
                return -1;
5106
0
            }
5107
0
            if (rowset.resource_id().empty()) [[unlikely]] {
5108
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
5109
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
5110
0
                          << hex(k) << " value=" << proto_to_json(rowset);
5111
0
                rowset_keys_without_data.emplace_back(k);
5112
0
                return 0;
5113
0
            }
5114
            // decode rowset_id
5115
0
            auto k1 = k;
5116
0
            k1.remove_prefix(1);
5117
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5118
0
            decode_key(&k1, &out);
5119
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
5120
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
5121
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5122
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
5123
0
                      << " task_type=" << metrics_context.operation_type;
5124
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
5125
0
                                             rowset.tablet_id(), rowset_id) != 0) {
5126
0
                return -1;
5127
0
            }
5128
0
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
5129
0
            metrics_context.total_recycled_num++;
5130
0
            segment_metrics_context_.total_recycled_data_size +=
5131
0
                    rowset.rowset_meta().total_disk_size();
5132
0
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
5133
0
            return 0;
5134
0
        }
5135
5136
7
        auto* rowset_meta = rowset.mutable_rowset_meta();
5137
7
        if (config::enable_mark_delete_rowset_before_recycle) {
5138
7
            if (need_mark_rowset_as_recycled(rowset)) {
5139
5
                rowset_keys_to_mark_recycled.emplace_back(k);
5140
5
                LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv "
5141
5
                             "at next turn, instance_id="
5142
5
                          << instance_id_ << " tablet_id=" << rowset_meta->tablet_id()
5143
5
                          << " version=[" << rowset_meta->start_version() << '-'
5144
5
                          << rowset_meta->end_version() << "]";
5145
5
                return 0;
5146
5
            }
5147
7
        }
5148
5149
2
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle &&
5150
2
            rowset_meta->end_version() != 1) {
5151
2
            if (make_deferred_abort_task(rowset).has_value()) {
5152
2
                LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, "
5153
2
                             "instance_id="
5154
2
                          << instance_id_ << " tablet_id=" << rowset_meta->tablet_id()
5155
2
                          << " version=[" << rowset_meta->start_version() << '-'
5156
2
                          << rowset_meta->end_version() << "]";
5157
2
                rowset_keys_to_abort.emplace_back(k);
5158
2
            }
5159
2
        }
5160
5161
        // TODO(plat1ko): check rowset not referenced
5162
2
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
5163
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
5164
0
                LOG_INFO("recycle rowset that has empty resource id");
5165
0
            } else {
5166
                // other situations, keep this key-value pair and it needs to be checked manually
5167
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5168
0
                return -1;
5169
0
            }
5170
0
        }
5171
2
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5172
2
                  << " tablet_id=" << rowset_meta->tablet_id()
5173
2
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
5174
2
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
5175
2
                  << "] txn_id=" << rowset_meta->txn_id()
5176
2
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
5177
2
                  << " rowset_meta_size=" << v.size()
5178
2
                  << " creation_time=" << rowset_meta->creation_time()
5179
2
                  << " task_type=" << metrics_context.operation_type;
5180
2
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
5181
            // unable to calculate file path, can only be deleted by rowset id prefix
5182
2
            num_prepare += 1;
5183
2
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
5184
2
                                             rowset_meta->tablet_id(),
5185
2
                                             rowset_meta->rowset_id_v2()) != 0) {
5186
0
                return -1;
5187
0
            }
5188
2
        } else {
5189
0
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
5190
0
            if (rowset_meta->num_segments() > 0) { // Skip empty rowset
5191
0
                rowsets.emplace_back(std::string(k), std::move(*rowset_meta));
5192
0
            } else {
5193
0
                ++num_empty_rowset;
5194
0
                rowset_keys_without_data.emplace_back(k);
5195
0
            }
5196
0
        }
5197
2
        return 0;
5198
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5073
7.83k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
5074
7.83k
        ++num_scanned;
5075
7.83k
        total_rowset_key_size += k.size();
5076
7.83k
        total_rowset_value_size += v.size();
5077
7.83k
        RecycleRowsetPB rowset;
5078
7.83k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5079
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
5080
0
            return -1;
5081
0
        }
5082
5083
7.83k
        int64_t current_time = ::time(nullptr);
5084
7.83k
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5085
5086
7.83k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5087
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5088
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
5089
7.83k
        if (current_time < expiration) { // not expired
5090
0
            return 0;
5091
0
        }
5092
7.83k
        ++num_expired;
5093
7.83k
        expired_rowset_size += v.size();
5094
5095
7.83k
        int64_t tablet_id =
5096
7.83k
                rowset.has_type() ? rowset.rowset_meta().tablet_id() : rowset.tablet_id();
5097
7.83k
        if (!try_reserve_tablet_recycle_slot(tablet_id)) {
5098
13
            return 0;
5099
13
        }
5100
5101
7.82k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
5102
252
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
5103
                // in old version, keep this key-value pair and it needs to be checked manually
5104
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5105
0
                return -1;
5106
0
            }
5107
252
            if (rowset.resource_id().empty()) [[unlikely]] {
5108
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
5109
2
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
5110
2
                          << hex(k) << " value=" << proto_to_json(rowset);
5111
2
                rowset_keys_without_data.emplace_back(k);
5112
2
                return 0;
5113
2
            }
5114
            // decode rowset_id
5115
250
            auto k1 = k;
5116
250
            k1.remove_prefix(1);
5117
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5118
250
            decode_key(&k1, &out);
5119
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
5120
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
5121
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5122
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
5123
250
                      << " task_type=" << metrics_context.operation_type;
5124
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
5125
250
                                             rowset.tablet_id(), rowset_id) != 0) {
5126
0
                return -1;
5127
0
            }
5128
250
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
5129
250
            metrics_context.total_recycled_num++;
5130
250
            segment_metrics_context_.total_recycled_data_size +=
5131
250
                    rowset.rowset_meta().total_disk_size();
5132
250
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
5133
250
            return 0;
5134
250
        }
5135
5136
7.56k
        auto* rowset_meta = rowset.mutable_rowset_meta();
5137
7.56k
        if (config::enable_mark_delete_rowset_before_recycle) {
5138
7.56k
            if (need_mark_rowset_as_recycled(rowset)) {
5139
3.78k
                rowset_keys_to_mark_recycled.emplace_back(k);
5140
3.78k
                LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv "
5141
3.78k
                             "at next turn, instance_id="
5142
3.78k
                          << instance_id_ << " tablet_id=" << rowset_meta->tablet_id()
5143
3.78k
                          << " version=[" << rowset_meta->start_version() << '-'
5144
3.78k
                          << rowset_meta->end_version() << "]";
5145
3.78k
                return 0;
5146
3.78k
            }
5147
7.56k
        }
5148
5149
3.78k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle &&
5150
3.78k
            rowset_meta->end_version() != 1) {
5151
3.78k
            if (make_deferred_abort_task(rowset).has_value()) {
5152
0
                LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, "
5153
0
                             "instance_id="
5154
0
                          << instance_id_ << " tablet_id=" << rowset_meta->tablet_id()
5155
0
                          << " version=[" << rowset_meta->start_version() << '-'
5156
0
                          << rowset_meta->end_version() << "]";
5157
0
                rowset_keys_to_abort.emplace_back(k);
5158
0
            }
5159
3.78k
        }
5160
5161
        // TODO(plat1ko): check rowset not referenced
5162
3.78k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
5163
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
5164
0
                LOG_INFO("recycle rowset that has empty resource id");
5165
0
            } else {
5166
                // other situations, keep this key-value pair and it needs to be checked manually
5167
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5168
0
                return -1;
5169
0
            }
5170
0
        }
5171
3.78k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5172
3.78k
                  << " tablet_id=" << rowset_meta->tablet_id()
5173
3.78k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
5174
3.78k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
5175
3.78k
                  << "] txn_id=" << rowset_meta->txn_id()
5176
3.78k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
5177
3.78k
                  << " rowset_meta_size=" << v.size()
5178
3.78k
                  << " creation_time=" << rowset_meta->creation_time()
5179
3.78k
                  << " task_type=" << metrics_context.operation_type;
5180
3.78k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
5181
            // unable to calculate file path, can only be deleted by rowset id prefix
5182
650
            num_prepare += 1;
5183
650
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
5184
650
                                             rowset_meta->tablet_id(),
5185
650
                                             rowset_meta->rowset_id_v2()) != 0) {
5186
0
                return -1;
5187
0
            }
5188
3.13k
        } else {
5189
3.13k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
5190
3.13k
            if (rowset_meta->num_segments() > 0) { // Skip empty rowset
5191
3.13k
                rowsets.emplace_back(std::string(k), std::move(*rowset_meta));
5192
3.13k
            } else {
5193
3
                ++num_empty_rowset;
5194
3
                rowset_keys_without_data.emplace_back(k);
5195
3
            }
5196
3.13k
        }
5197
3.78k
        return 0;
5198
3.78k
    };
5199
5200
65
    auto submit_delete_rowset_data_job = [&](std::vector<std::string> rowset_keys,
5201
65
                                             std::map<std::string, RowsetMetaCloudPB> rowsets) {
5202
24
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys),
5203
24
                             rowsets_to_delete = std::move(rowsets)]() {
5204
24
            if (!rowsets_to_delete.empty() &&
5205
24
                delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
5206
18
                                   metrics_context) != 0) {
5207
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
5208
0
                return;
5209
0
            }
5210
3.13k
            for (const auto& [_, rs] : rowsets_to_delete) {
5211
3.13k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5212
0
                    return;
5213
0
                }
5214
3.13k
            }
5215
24
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
5216
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5217
0
                return;
5218
0
            }
5219
5220
24
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
5221
24
        });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_6clESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EESt3mapIS9_NS_17RowsetMetaCloudPBESt4lessIS9_ESaISt4pairIKS9_SD_EEEENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_6clESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EESt3mapIS9_NS_17RowsetMetaCloudPBESt4lessIS9_ESaISt4pairIKS9_SD_EEEENKUlvE_clEv
Line
Count
Source
5203
24
                             rowsets_to_delete = std::move(rowsets)]() {
5204
24
            if (!rowsets_to_delete.empty() &&
5205
24
                delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
5206
18
                                   metrics_context) != 0) {
5207
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
5208
0
                return;
5209
0
            }
5210
3.13k
            for (const auto& [_, rs] : rowsets_to_delete) {
5211
3.13k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5212
0
                    return;
5213
0
                }
5214
3.13k
            }
5215
24
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
5216
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5217
0
                return;
5218
0
            }
5219
5220
24
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
5221
24
        });
5222
24
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_6clESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EESt3mapIS9_NS_17RowsetMetaCloudPBESt4lessIS9_ESaISt4pairIKS9_SD_EEE
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_6clESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EESt3mapIS9_NS_17RowsetMetaCloudPBESt4lessIS9_ESaISt4pairIKS9_SD_EEE
Line
Count
Source
5201
24
                                             std::map<std::string, RowsetMetaCloudPB> rowsets) {
5202
24
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys),
5203
24
                             rowsets_to_delete = std::move(rowsets)]() {
5204
24
            if (!rowsets_to_delete.empty() &&
5205
24
                delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
5206
24
                                   metrics_context) != 0) {
5207
24
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
5208
24
                return;
5209
24
            }
5210
24
            for (const auto& [_, rs] : rowsets_to_delete) {
5211
24
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5212
24
                    return;
5213
24
                }
5214
24
            }
5215
24
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
5216
24
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5217
24
                return;
5218
24
            }
5219
5220
24
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
5221
24
        });
5222
24
    };
5223
5224
65
    auto submit_mark_abort_rowset_job = [&](std::vector<std::string> rowset_keys_to_mark,
5225
139
                                            std::vector<std::string> rowset_keys_to_abort) {
5226
139
        if (rowset_keys_to_mark.empty() && rowset_keys_to_abort.empty()) {
5227
100
            return;
5228
100
        }
5229
39
        mark_abort_worker_pool->submit([&, rowset_keys_to_mark = std::move(rowset_keys_to_mark),
5230
39
                                        rowset_keys_to_abort =
5231
39
                                                std::move(rowset_keys_to_abort)]() mutable {
5232
39
            auto start = steady_clock::now();
5233
39
            DORIS_CLOUD_DEFER {
5234
39
                auto cost = duration_cast<milliseconds>(steady_clock::now() - start).count();
5235
39
                LOG(INFO) << "finish mark and abort rowset job, instance_id=" << instance_id_ << ' '
5236
39
                          << "cost_ms=" << cost << ' '
5237
39
                          << "rowset_keys_to_mark.size()=" << rowset_keys_to_mark.size() << ' '
5238
39
                          << "rowset_keys_to_abort.size()=" << rowset_keys_to_abort.size();
5239
39
            };
recycler.cpp:_ZZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EESB_ENUlvE_clEvENKUlvE_clEv
Line
Count
Source
5233
7
            DORIS_CLOUD_DEFER {
5234
7
                auto cost = duration_cast<milliseconds>(steady_clock::now() - start).count();
5235
7
                LOG(INFO) << "finish mark and abort rowset job, instance_id=" << instance_id_ << ' '
5236
7
                          << "cost_ms=" << cost << ' '
5237
7
                          << "rowset_keys_to_mark.size()=" << rowset_keys_to_mark.size() << ' '
5238
7
                          << "rowset_keys_to_abort.size()=" << rowset_keys_to_abort.size();
5239
7
            };
recycler_test.cpp:_ZZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EESB_ENUlvE_clEvENKUlvE_clEv
Line
Count
Source
5233
32
            DORIS_CLOUD_DEFER {
5234
32
                auto cost = duration_cast<milliseconds>(steady_clock::now() - start).count();
5235
32
                LOG(INFO) << "finish mark and abort rowset job, instance_id=" << instance_id_ << ' '
5236
32
                          << "cost_ms=" << cost << ' '
5237
32
                          << "rowset_keys_to_mark.size()=" << rowset_keys_to_mark.size() << ' '
5238
32
                          << "rowset_keys_to_abort.size()=" << rowset_keys_to_abort.size();
5239
32
            };
5240
39
            if (!rowset_keys_to_mark.empty() &&
5241
39
                batch_mark_rowsets_as_recycled<RecycleRowsetPB>(txn_kv_.get(), instance_id_,
5242
37
                                                                rowset_keys_to_mark) != 0) {
5243
0
                LOG(WARNING) << "failed to batch mark rowsets as recycled, instance_id="
5244
0
                             << instance_id_ << ' '
5245
0
                             << "rowset_keys_to_mark.size()=" << rowset_keys_to_mark.size();
5246
0
                return;
5247
0
            }
5248
39
            if (!rowset_keys_to_abort.empty() &&
5249
39
                batch_abort_txn_or_job_for_recycle<RecycleRowsetPB>(rowset_keys_to_abort, true) !=
5250
2
                        0) {
5251
0
                LOG(WARNING) << "failed to batch abort txn or job for related rowset, "
5252
0
                                "instance_id="
5253
0
                             << instance_id_ << ' '
5254
0
                             << "rowset_keys_to_abort.size()=" << rowset_keys_to_abort.size();
5255
0
                return;
5256
0
            }
5257
39
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EESB_ENUlvE_clEv
Line
Count
Source
5231
7
                                                std::move(rowset_keys_to_abort)]() mutable {
5232
7
            auto start = steady_clock::now();
5233
7
            DORIS_CLOUD_DEFER {
5234
7
                auto cost = duration_cast<milliseconds>(steady_clock::now() - start).count();
5235
7
                LOG(INFO) << "finish mark and abort rowset job, instance_id=" << instance_id_ << ' '
5236
7
                          << "cost_ms=" << cost << ' '
5237
7
                          << "rowset_keys_to_mark.size()=" << rowset_keys_to_mark.size() << ' '
5238
7
                          << "rowset_keys_to_abort.size()=" << rowset_keys_to_abort.size();
5239
7
            };
5240
7
            if (!rowset_keys_to_mark.empty() &&
5241
7
                batch_mark_rowsets_as_recycled<RecycleRowsetPB>(txn_kv_.get(), instance_id_,
5242
5
                                                                rowset_keys_to_mark) != 0) {
5243
0
                LOG(WARNING) << "failed to batch mark rowsets as recycled, instance_id="
5244
0
                             << instance_id_ << ' '
5245
0
                             << "rowset_keys_to_mark.size()=" << rowset_keys_to_mark.size();
5246
0
                return;
5247
0
            }
5248
7
            if (!rowset_keys_to_abort.empty() &&
5249
7
                batch_abort_txn_or_job_for_recycle<RecycleRowsetPB>(rowset_keys_to_abort, true) !=
5250
2
                        0) {
5251
0
                LOG(WARNING) << "failed to batch abort txn or job for related rowset, "
5252
0
                                "instance_id="
5253
0
                             << instance_id_ << ' '
5254
0
                             << "rowset_keys_to_abort.size()=" << rowset_keys_to_abort.size();
5255
0
                return;
5256
0
            }
5257
7
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EESB_ENUlvE_clEv
Line
Count
Source
5231
32
                                                std::move(rowset_keys_to_abort)]() mutable {
5232
32
            auto start = steady_clock::now();
5233
32
            DORIS_CLOUD_DEFER {
5234
32
                auto cost = duration_cast<milliseconds>(steady_clock::now() - start).count();
5235
32
                LOG(INFO) << "finish mark and abort rowset job, instance_id=" << instance_id_ << ' '
5236
32
                          << "cost_ms=" << cost << ' '
5237
32
                          << "rowset_keys_to_mark.size()=" << rowset_keys_to_mark.size() << ' '
5238
32
                          << "rowset_keys_to_abort.size()=" << rowset_keys_to_abort.size();
5239
32
            };
5240
32
            if (!rowset_keys_to_mark.empty() &&
5241
32
                batch_mark_rowsets_as_recycled<RecycleRowsetPB>(txn_kv_.get(), instance_id_,
5242
32
                                                                rowset_keys_to_mark) != 0) {
5243
0
                LOG(WARNING) << "failed to batch mark rowsets as recycled, instance_id="
5244
0
                             << instance_id_ << ' '
5245
0
                             << "rowset_keys_to_mark.size()=" << rowset_keys_to_mark.size();
5246
0
                return;
5247
0
            }
5248
32
            if (!rowset_keys_to_abort.empty() &&
5249
32
                batch_abort_txn_or_job_for_recycle<RecycleRowsetPB>(rowset_keys_to_abort, true) !=
5250
0
                        0) {
5251
0
                LOG(WARNING) << "failed to batch abort txn or job for related rowset, "
5252
0
                                "instance_id="
5253
0
                             << instance_id_ << ' '
5254
0
                             << "rowset_keys_to_abort.size()=" << rowset_keys_to_abort.size();
5255
0
                return;
5256
0
            }
5257
32
        });
5258
39
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EESB_
Line
Count
Source
5225
14
                                            std::vector<std::string> rowset_keys_to_abort) {
5226
14
        if (rowset_keys_to_mark.empty() && rowset_keys_to_abort.empty()) {
5227
7
            return;
5228
7
        }
5229
7
        mark_abort_worker_pool->submit([&, rowset_keys_to_mark = std::move(rowset_keys_to_mark),
5230
7
                                        rowset_keys_to_abort =
5231
7
                                                std::move(rowset_keys_to_abort)]() mutable {
5232
7
            auto start = steady_clock::now();
5233
7
            DORIS_CLOUD_DEFER {
5234
7
                auto cost = duration_cast<milliseconds>(steady_clock::now() - start).count();
5235
7
                LOG(INFO) << "finish mark and abort rowset job, instance_id=" << instance_id_ << ' '
5236
7
                          << "cost_ms=" << cost << ' '
5237
7
                          << "rowset_keys_to_mark.size()=" << rowset_keys_to_mark.size() << ' '
5238
7
                          << "rowset_keys_to_abort.size()=" << rowset_keys_to_abort.size();
5239
7
            };
5240
7
            if (!rowset_keys_to_mark.empty() &&
5241
7
                batch_mark_rowsets_as_recycled<RecycleRowsetPB>(txn_kv_.get(), instance_id_,
5242
7
                                                                rowset_keys_to_mark) != 0) {
5243
7
                LOG(WARNING) << "failed to batch mark rowsets as recycled, instance_id="
5244
7
                             << instance_id_ << ' '
5245
7
                             << "rowset_keys_to_mark.size()=" << rowset_keys_to_mark.size();
5246
7
                return;
5247
7
            }
5248
7
            if (!rowset_keys_to_abort.empty() &&
5249
7
                batch_abort_txn_or_job_for_recycle<RecycleRowsetPB>(rowset_keys_to_abort, true) !=
5250
7
                        0) {
5251
7
                LOG(WARNING) << "failed to batch abort txn or job for related rowset, "
5252
7
                                "instance_id="
5253
7
                             << instance_id_ << ' '
5254
7
                             << "rowset_keys_to_abort.size()=" << rowset_keys_to_abort.size();
5255
7
                return;
5256
7
            }
5257
7
        });
5258
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EESB_
Line
Count
Source
5225
125
                                            std::vector<std::string> rowset_keys_to_abort) {
5226
125
        if (rowset_keys_to_mark.empty() && rowset_keys_to_abort.empty()) {
5227
93
            return;
5228
93
        }
5229
32
        mark_abort_worker_pool->submit([&, rowset_keys_to_mark = std::move(rowset_keys_to_mark),
5230
32
                                        rowset_keys_to_abort =
5231
32
                                                std::move(rowset_keys_to_abort)]() mutable {
5232
32
            auto start = steady_clock::now();
5233
32
            DORIS_CLOUD_DEFER {
5234
32
                auto cost = duration_cast<milliseconds>(steady_clock::now() - start).count();
5235
32
                LOG(INFO) << "finish mark and abort rowset job, instance_id=" << instance_id_ << ' '
5236
32
                          << "cost_ms=" << cost << ' '
5237
32
                          << "rowset_keys_to_mark.size()=" << rowset_keys_to_mark.size() << ' '
5238
32
                          << "rowset_keys_to_abort.size()=" << rowset_keys_to_abort.size();
5239
32
            };
5240
32
            if (!rowset_keys_to_mark.empty() &&
5241
32
                batch_mark_rowsets_as_recycled<RecycleRowsetPB>(txn_kv_.get(), instance_id_,
5242
32
                                                                rowset_keys_to_mark) != 0) {
5243
32
                LOG(WARNING) << "failed to batch mark rowsets as recycled, instance_id="
5244
32
                             << instance_id_ << ' '
5245
32
                             << "rowset_keys_to_mark.size()=" << rowset_keys_to_mark.size();
5246
32
                return;
5247
32
            }
5248
32
            if (!rowset_keys_to_abort.empty() &&
5249
32
                batch_abort_txn_or_job_for_recycle<RecycleRowsetPB>(rowset_keys_to_abort, true) !=
5250
32
                        0) {
5251
32
                LOG(WARNING) << "failed to batch abort txn or job for related rowset, "
5252
32
                                "instance_id="
5253
32
                             << instance_id_ << ' '
5254
32
                             << "rowset_keys_to_abort.size()=" << rowset_keys_to_abort.size();
5255
32
                return;
5256
32
            }
5257
32
        });
5258
32
    };
5259
5260
65
    bool scan_finished = false;
5261
139
    auto loop_done = [&]() -> int {
5262
139
        std::vector<std::string> mark_keys_to_process;
5263
139
        std::vector<std::string> abort_keys_to_process;
5264
139
        mark_keys_to_process.swap(rowset_keys_to_mark_recycled);
5265
139
        abort_keys_to_process.swap(rowset_keys_to_abort);
5266
139
        submit_mark_abort_rowset_job(std::move(mark_keys_to_process),
5267
139
                                     std::move(abort_keys_to_process));
5268
139
        if (!scan_finished && rowsets.size() < delete_rowset_batch_size) {
5269
74
            return 0;
5270
74
        }
5271
5272
65
        DORIS_CLOUD_DEFER {
5273
            // if return -1 in loop done, rowset info in memory is not cleared,
5274
            // it can lead to memory accumulation
5275
65
            rowset_keys_without_data.clear();
5276
65
            rowsets.clear();
5277
65
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
Line
Count
Source
5272
7
        DORIS_CLOUD_DEFER {
5273
            // if return -1 in loop done, rowset info in memory is not cleared,
5274
            // it can lead to memory accumulation
5275
7
            rowset_keys_without_data.clear();
5276
7
            rowsets.clear();
5277
7
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
Line
Count
Source
5272
58
        DORIS_CLOUD_DEFER {
5273
            // if return -1 in loop done, rowset info in memory is not cleared,
5274
            // it can lead to memory accumulation
5275
58
            rowset_keys_without_data.clear();
5276
58
            rowsets.clear();
5277
58
        };
5278
65
        std::random_device rd;
5279
65
        std::mt19937 g(rd());
5280
65
        std::ranges::shuffle(rowsets, g);
5281
5282
65
        std::vector<std::string> rowset_keys_to_delete;
5283
65
        rowset_keys_to_delete.reserve(rowset_batch_size_per_tablet);
5284
        // rowset_id -> rowset_meta
5285
        // store rowset id and meta for statistics rs size when delete
5286
65
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
5287
5288
65
        size_t rowsets_per_batch_size = 0;
5289
3.13k
        for (auto& rowset : rowsets) {
5290
3.13k
            rowset_keys_to_delete.emplace_back(std::move(rowset.key));
5291
3.13k
            rowsets_to_delete.emplace(rowset.meta.rowset_id_v2(), std::move(rowset.meta));
5292
3.13k
            if (++rowsets_per_batch_size < rowset_batch_size_per_tablet) {
5293
3.12k
                continue;
5294
3.12k
            }
5295
5296
8
            submit_delete_rowset_data_job(std::move(rowset_keys_to_delete),
5297
8
                                          std::move(rowsets_to_delete));
5298
8
            rowsets_per_batch_size = 0;
5299
8
            rowset_keys_to_delete.clear();
5300
8
            rowsets_to_delete.clear();
5301
8
        }
5302
5303
65
        if (!rowset_keys_to_delete.empty() || !rowsets_to_delete.empty()) {
5304
10
            submit_delete_rowset_data_job(std::move(rowset_keys_to_delete),
5305
10
                                          std::move(rowsets_to_delete));
5306
10
        }
5307
5308
71
        for (size_t i = 0; i < rowset_keys_without_data.size(); i += rowset_batch_size_per_tablet) {
5309
6
            auto begin = rowset_keys_without_data.begin() + i;
5310
6
            auto end = rowset_keys_without_data.begin() +
5311
6
                       std::min(i + rowset_batch_size_per_tablet, rowset_keys_without_data.size());
5312
6
            std::vector<std::string> rowset_keys_to_remove(std::make_move_iterator(begin),
5313
6
                                                           std::make_move_iterator(end));
5314
6
            submit_delete_rowset_data_job(std::move(rowset_keys_to_remove), {});
5315
6
        }
5316
5317
65
        return 0;
5318
139
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
Line
Count
Source
5261
14
    auto loop_done = [&]() -> int {
5262
14
        std::vector<std::string> mark_keys_to_process;
5263
14
        std::vector<std::string> abort_keys_to_process;
5264
14
        mark_keys_to_process.swap(rowset_keys_to_mark_recycled);
5265
14
        abort_keys_to_process.swap(rowset_keys_to_abort);
5266
14
        submit_mark_abort_rowset_job(std::move(mark_keys_to_process),
5267
14
                                     std::move(abort_keys_to_process));
5268
14
        if (!scan_finished && rowsets.size() < delete_rowset_batch_size) {
5269
7
            return 0;
5270
7
        }
5271
5272
7
        DORIS_CLOUD_DEFER {
5273
            // if return -1 in loop done, rowset info in memory is not cleared,
5274
            // it can lead to memory accumulation
5275
7
            rowset_keys_without_data.clear();
5276
7
            rowsets.clear();
5277
7
        };
5278
7
        std::random_device rd;
5279
7
        std::mt19937 g(rd());
5280
7
        std::ranges::shuffle(rowsets, g);
5281
5282
7
        std::vector<std::string> rowset_keys_to_delete;
5283
7
        rowset_keys_to_delete.reserve(rowset_batch_size_per_tablet);
5284
        // rowset_id -> rowset_meta
5285
        // store rowset id and meta for statistics rs size when delete
5286
7
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
5287
5288
7
        size_t rowsets_per_batch_size = 0;
5289
7
        for (auto& rowset : rowsets) {
5290
0
            rowset_keys_to_delete.emplace_back(std::move(rowset.key));
5291
0
            rowsets_to_delete.emplace(rowset.meta.rowset_id_v2(), std::move(rowset.meta));
5292
0
            if (++rowsets_per_batch_size < rowset_batch_size_per_tablet) {
5293
0
                continue;
5294
0
            }
5295
5296
0
            submit_delete_rowset_data_job(std::move(rowset_keys_to_delete),
5297
0
                                          std::move(rowsets_to_delete));
5298
0
            rowsets_per_batch_size = 0;
5299
0
            rowset_keys_to_delete.clear();
5300
0
            rowsets_to_delete.clear();
5301
0
        }
5302
5303
7
        if (!rowset_keys_to_delete.empty() || !rowsets_to_delete.empty()) {
5304
0
            submit_delete_rowset_data_job(std::move(rowset_keys_to_delete),
5305
0
                                          std::move(rowsets_to_delete));
5306
0
        }
5307
5308
7
        for (size_t i = 0; i < rowset_keys_without_data.size(); i += rowset_batch_size_per_tablet) {
5309
0
            auto begin = rowset_keys_without_data.begin() + i;
5310
0
            auto end = rowset_keys_without_data.begin() +
5311
0
                       std::min(i + rowset_batch_size_per_tablet, rowset_keys_without_data.size());
5312
0
            std::vector<std::string> rowset_keys_to_remove(std::make_move_iterator(begin),
5313
0
                                                           std::make_move_iterator(end));
5314
0
            submit_delete_rowset_data_job(std::move(rowset_keys_to_remove), {});
5315
0
        }
5316
5317
7
        return 0;
5318
14
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
Line
Count
Source
5261
125
    auto loop_done = [&]() -> int {
5262
125
        std::vector<std::string> mark_keys_to_process;
5263
125
        std::vector<std::string> abort_keys_to_process;
5264
125
        mark_keys_to_process.swap(rowset_keys_to_mark_recycled);
5265
125
        abort_keys_to_process.swap(rowset_keys_to_abort);
5266
125
        submit_mark_abort_rowset_job(std::move(mark_keys_to_process),
5267
125
                                     std::move(abort_keys_to_process));
5268
125
        if (!scan_finished && rowsets.size() < delete_rowset_batch_size) {
5269
67
            return 0;
5270
67
        }
5271
5272
58
        DORIS_CLOUD_DEFER {
5273
            // if return -1 in loop done, rowset info in memory is not cleared,
5274
            // it can lead to memory accumulation
5275
58
            rowset_keys_without_data.clear();
5276
58
            rowsets.clear();
5277
58
        };
5278
58
        std::random_device rd;
5279
58
        std::mt19937 g(rd());
5280
58
        std::ranges::shuffle(rowsets, g);
5281
5282
58
        std::vector<std::string> rowset_keys_to_delete;
5283
58
        rowset_keys_to_delete.reserve(rowset_batch_size_per_tablet);
5284
        // rowset_id -> rowset_meta
5285
        // store rowset id and meta for statistics rs size when delete
5286
58
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
5287
5288
58
        size_t rowsets_per_batch_size = 0;
5289
3.13k
        for (auto& rowset : rowsets) {
5290
3.13k
            rowset_keys_to_delete.emplace_back(std::move(rowset.key));
5291
3.13k
            rowsets_to_delete.emplace(rowset.meta.rowset_id_v2(), std::move(rowset.meta));
5292
3.13k
            if (++rowsets_per_batch_size < rowset_batch_size_per_tablet) {
5293
3.12k
                continue;
5294
3.12k
            }
5295
5296
8
            submit_delete_rowset_data_job(std::move(rowset_keys_to_delete),
5297
8
                                          std::move(rowsets_to_delete));
5298
8
            rowsets_per_batch_size = 0;
5299
8
            rowset_keys_to_delete.clear();
5300
8
            rowsets_to_delete.clear();
5301
8
        }
5302
5303
58
        if (!rowset_keys_to_delete.empty() || !rowsets_to_delete.empty()) {
5304
10
            submit_delete_rowset_data_job(std::move(rowset_keys_to_delete),
5305
10
                                          std::move(rowsets_to_delete));
5306
10
        }
5307
5308
64
        for (size_t i = 0; i < rowset_keys_without_data.size(); i += rowset_batch_size_per_tablet) {
5309
6
            auto begin = rowset_keys_without_data.begin() + i;
5310
6
            auto end = rowset_keys_without_data.begin() +
5311
6
                       std::min(i + rowset_batch_size_per_tablet, rowset_keys_without_data.size());
5312
6
            std::vector<std::string> rowset_keys_to_remove(std::make_move_iterator(begin),
5313
6
                                                           std::make_move_iterator(end));
5314
6
            submit_delete_rowset_data_job(std::move(rowset_keys_to_remove), {});
5315
6
        }
5316
5317
58
        return 0;
5318
125
    };
5319
5320
65
    if (config::enable_recycler_stats_metrics) {
5321
0
        scan_and_statistics_rowsets();
5322
0
    }
5323
    // recycle_func and loop_done for scan and recycle
5324
65
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv), loop_done,
5325
65
                               std::move(next_scan_begin_getter));
5326
65
    scan_finished = true;
5327
    // if the size of rowsets is always less than delete_rowset_batch_size
5328
    // it need to submit the task directly
5329
    // else if the size of rowsets is greater than delete_rowset_batch_size,
5330
    // but there are residual, whether due to failed or unsuccessful cleanup, this behavior is idempotent
5331
65
    if (loop_done() != 0) {
5332
0
        ret = -1;
5333
0
    }
5334
5335
65
    mark_abort_worker_pool->stop();
5336
65
    worker_pool->stop();
5337
5338
65
    if (!async_recycled_rowset_keys.empty()) {
5339
8
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
5340
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5341
0
            return -1;
5342
8
        } else {
5343
8
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
5344
8
        }
5345
8
    }
5346
5347
    // Report final metrics after all concurrent tasks completed
5348
65
    segment_metrics_context_.report();
5349
65
    metrics_context.report();
5350
5351
65
    return ret;
5352
65
}
5353
5354
int InstanceRecycler::next_recycle_rowset_tablet_key(const std::string& instance_id,
5355
1
                                                     int64_t tablet_id, std::string* next_key) {
5356
1
    DCHECK(next_key != nullptr);
5357
1
    if (tablet_id == std::numeric_limits<int64_t>::max()) {
5358
0
        return -1;
5359
0
    }
5360
1
    *next_key = recycle_rowset_key({instance_id, tablet_id + 1, ""});
5361
1
    return 0;
5362
1
}
5363
5364
13
int InstanceRecycler::recycle_restore_jobs() {
5365
13
    const std::string task_name = "recycle_restore_jobs";
5366
13
    int64_t num_scanned = 0;
5367
13
    int64_t num_expired = 0;
5368
13
    int64_t num_recycled = 0;
5369
13
    int64_t num_aborted = 0;
5370
5371
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5372
5373
13
    JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0};
5374
13
    JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX};
5375
13
    std::string restore_job_key0;
5376
13
    std::string restore_job_key1;
5377
13
    job_restore_tablet_key(restore_job_key_info0, &restore_job_key0);
5378
13
    job_restore_tablet_key(restore_job_key_info1, &restore_job_key1);
5379
5380
13
    LOG_INFO("begin to recycle restore jobs").tag("instance_id", instance_id_);
5381
5382
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5383
13
    register_recycle_task(task_name, start_time);
5384
5385
13
    DORIS_CLOUD_DEFER {
5386
13
        unregister_recycle_task(task_name);
5387
13
        int64_t cost =
5388
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5389
13
        metrics_context.finish_report();
5390
5391
13
        LOG_INFO("recycle restore jobs finished, cost={}s", cost)
5392
13
                .tag("instance_id", instance_id_)
5393
13
                .tag("num_scanned", num_scanned)
5394
13
                .tag("num_expired", num_expired)
5395
13
                .tag("num_recycled", num_recycled)
5396
13
                .tag("num_aborted", num_aborted);
5397
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv
Line
Count
Source
5385
13
    DORIS_CLOUD_DEFER {
5386
13
        unregister_recycle_task(task_name);
5387
13
        int64_t cost =
5388
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5389
13
        metrics_context.finish_report();
5390
5391
13
        LOG_INFO("recycle restore jobs finished, cost={}s", cost)
5392
13
                .tag("instance_id", instance_id_)
5393
13
                .tag("num_scanned", num_scanned)
5394
13
                .tag("num_expired", num_expired)
5395
13
                .tag("num_recycled", num_recycled)
5396
13
                .tag("num_aborted", num_aborted);
5397
13
    };
5398
5399
13
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5400
5401
13
    std::vector<std::string_view> restore_job_keys;
5402
41
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
5403
41
        ++num_scanned;
5404
41
        RestoreJobCloudPB restore_job_pb;
5405
41
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
5406
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
5407
0
            return -1;
5408
0
        }
5409
41
        int64_t expiration =
5410
41
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
5411
41
        VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned
5412
0
                   << " num_expired=" << num_expired << " expiration time=" << expiration
5413
0
                   << " job expiration=" << restore_job_pb.expired_at_s()
5414
0
                   << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s()
5415
0
                   << " state=" << restore_job_pb.state();
5416
41
        int64_t current_time = ::time(nullptr);
5417
41
        if (current_time < expiration) { // not expired
5418
0
            return 0;
5419
0
        }
5420
41
        ++num_expired;
5421
5422
41
        int64_t tablet_id = restore_job_pb.tablet_id();
5423
41
        LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_
5424
41
                  << " restore_job_pb=" << restore_job_pb.DebugString();
5425
5426
41
        std::unique_ptr<Transaction> txn;
5427
41
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5428
41
        if (err != TxnErrorCode::TXN_OK) {
5429
0
            LOG_WARNING("failed to recycle restore job")
5430
0
                    .tag("err", err)
5431
0
                    .tag("tablet id", tablet_id)
5432
0
                    .tag("instance_id", instance_id_)
5433
0
                    .tag("reason", "failed to create txn");
5434
0
            return -1;
5435
0
        }
5436
5437
41
        std::string val;
5438
41
        err = txn->get(k, &val);
5439
41
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it
5440
0
            LOG_INFO("restore job {} has been recycled", tablet_id);
5441
0
            return 0;
5442
0
        }
5443
41
        if (err != TxnErrorCode::TXN_OK) {
5444
0
            LOG_WARNING("failed to get kv");
5445
0
            return -1;
5446
0
        }
5447
41
        restore_job_pb.Clear();
5448
41
        if (!restore_job_pb.ParseFromString(val)) {
5449
0
            LOG_WARNING("malformed recycle restore job value").tag("key", hex(k));
5450
0
            return -1;
5451
0
        }
5452
5453
        // PREPARED or COMMITTED, change state to DROPPED and return
5454
41
        if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED ||
5455
41
            restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) {
5456
0
            restore_job_pb.set_state(RestoreJobCloudPB::DROPPED);
5457
0
            restore_job_pb.set_need_recycle_data(true);
5458
0
            txn->put(k, restore_job_pb.SerializeAsString());
5459
0
            err = txn->commit();
5460
0
            if (err != TxnErrorCode::TXN_OK) {
5461
0
                LOG_WARNING("failed to commit txn: {}", err);
5462
0
                return -1;
5463
0
            }
5464
0
            num_aborted++;
5465
0
            return 0;
5466
0
        }
5467
5468
        // Change state to RECYCLING
5469
41
        if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) {
5470
21
            restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING);
5471
21
            txn->put(k, restore_job_pb.SerializeAsString());
5472
21
            err = txn->commit();
5473
21
            if (err != TxnErrorCode::TXN_OK) {
5474
0
                LOG_WARNING("failed to commit txn: {}", err);
5475
0
                return -1;
5476
0
            }
5477
21
            return 0;
5478
21
        }
5479
5480
20
        std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
5481
20
        std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
5482
5483
        // Recycle all data associated with the restore job.
5484
        // This includes rowsets, segments, and related resources.
5485
20
        bool need_recycle_data = restore_job_pb.need_recycle_data();
5486
20
        if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) {
5487
0
            LOG_WARNING("failed to recycle tablet")
5488
0
                    .tag("tablet_id", tablet_id)
5489
0
                    .tag("instance_id", instance_id_);
5490
0
            return -1;
5491
0
        }
5492
5493
        // delete all restore job rowset kv
5494
20
        txn->remove(restore_job_rs_key0, restore_job_rs_key1);
5495
5496
20
        err = txn->commit();
5497
20
        if (err != TxnErrorCode::TXN_OK) {
5498
0
            LOG_WARNING("failed to recycle tablet restore job rowset kv")
5499
0
                    .tag("err", err)
5500
0
                    .tag("tablet id", tablet_id)
5501
0
                    .tag("instance_id", instance_id_)
5502
0
                    .tag("reason", "failed to commit txn");
5503
0
            return -1;
5504
0
        }
5505
5506
20
        metrics_context.total_recycled_num = ++num_recycled;
5507
20
        metrics_context.report();
5508
20
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
5509
20
        restore_job_keys.push_back(k);
5510
5511
20
        LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k)
5512
20
                  << " tablet_id=" << tablet_id;
5513
20
        return 0;
5514
20
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5402
41
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
5403
41
        ++num_scanned;
5404
41
        RestoreJobCloudPB restore_job_pb;
5405
41
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
5406
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
5407
0
            return -1;
5408
0
        }
5409
41
        int64_t expiration =
5410
41
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
5411
41
        VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned
5412
0
                   << " num_expired=" << num_expired << " expiration time=" << expiration
5413
0
                   << " job expiration=" << restore_job_pb.expired_at_s()
5414
0
                   << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s()
5415
0
                   << " state=" << restore_job_pb.state();
5416
41
        int64_t current_time = ::time(nullptr);
5417
41
        if (current_time < expiration) { // not expired
5418
0
            return 0;
5419
0
        }
5420
41
        ++num_expired;
5421
5422
41
        int64_t tablet_id = restore_job_pb.tablet_id();
5423
41
        LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_
5424
41
                  << " restore_job_pb=" << restore_job_pb.DebugString();
5425
5426
41
        std::unique_ptr<Transaction> txn;
5427
41
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5428
41
        if (err != TxnErrorCode::TXN_OK) {
5429
0
            LOG_WARNING("failed to recycle restore job")
5430
0
                    .tag("err", err)
5431
0
                    .tag("tablet id", tablet_id)
5432
0
                    .tag("instance_id", instance_id_)
5433
0
                    .tag("reason", "failed to create txn");
5434
0
            return -1;
5435
0
        }
5436
5437
41
        std::string val;
5438
41
        err = txn->get(k, &val);
5439
41
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it
5440
0
            LOG_INFO("restore job {} has been recycled", tablet_id);
5441
0
            return 0;
5442
0
        }
5443
41
        if (err != TxnErrorCode::TXN_OK) {
5444
0
            LOG_WARNING("failed to get kv");
5445
0
            return -1;
5446
0
        }
5447
41
        restore_job_pb.Clear();
5448
41
        if (!restore_job_pb.ParseFromString(val)) {
5449
0
            LOG_WARNING("malformed recycle restore job value").tag("key", hex(k));
5450
0
            return -1;
5451
0
        }
5452
5453
        // PREPARED or COMMITTED, change state to DROPPED and return
5454
41
        if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED ||
5455
41
            restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) {
5456
0
            restore_job_pb.set_state(RestoreJobCloudPB::DROPPED);
5457
0
            restore_job_pb.set_need_recycle_data(true);
5458
0
            txn->put(k, restore_job_pb.SerializeAsString());
5459
0
            err = txn->commit();
5460
0
            if (err != TxnErrorCode::TXN_OK) {
5461
0
                LOG_WARNING("failed to commit txn: {}", err);
5462
0
                return -1;
5463
0
            }
5464
0
            num_aborted++;
5465
0
            return 0;
5466
0
        }
5467
5468
        // Change state to RECYCLING
5469
41
        if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) {
5470
21
            restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING);
5471
21
            txn->put(k, restore_job_pb.SerializeAsString());
5472
21
            err = txn->commit();
5473
21
            if (err != TxnErrorCode::TXN_OK) {
5474
0
                LOG_WARNING("failed to commit txn: {}", err);
5475
0
                return -1;
5476
0
            }
5477
21
            return 0;
5478
21
        }
5479
5480
20
        std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
5481
20
        std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
5482
5483
        // Recycle all data associated with the restore job.
5484
        // This includes rowsets, segments, and related resources.
5485
20
        bool need_recycle_data = restore_job_pb.need_recycle_data();
5486
20
        if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) {
5487
0
            LOG_WARNING("failed to recycle tablet")
5488
0
                    .tag("tablet_id", tablet_id)
5489
0
                    .tag("instance_id", instance_id_);
5490
0
            return -1;
5491
0
        }
5492
5493
        // delete all restore job rowset kv
5494
20
        txn->remove(restore_job_rs_key0, restore_job_rs_key1);
5495
5496
20
        err = txn->commit();
5497
20
        if (err != TxnErrorCode::TXN_OK) {
5498
0
            LOG_WARNING("failed to recycle tablet restore job rowset kv")
5499
0
                    .tag("err", err)
5500
0
                    .tag("tablet id", tablet_id)
5501
0
                    .tag("instance_id", instance_id_)
5502
0
                    .tag("reason", "failed to commit txn");
5503
0
            return -1;
5504
0
        }
5505
5506
20
        metrics_context.total_recycled_num = ++num_recycled;
5507
20
        metrics_context.report();
5508
20
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
5509
20
        restore_job_keys.push_back(k);
5510
5511
20
        LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k)
5512
20
                  << " tablet_id=" << tablet_id;
5513
20
        return 0;
5514
20
    };
5515
5516
13
    auto loop_done = [&restore_job_keys, this]() -> int {
5517
3
        if (restore_job_keys.empty()) return 0;
5518
1
        DORIS_CLOUD_DEFER {
5519
1
            restore_job_keys.clear();
5520
1
        };
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
5518
1
        DORIS_CLOUD_DEFER {
5519
1
            restore_job_keys.clear();
5520
1
        };
5521
5522
1
        std::unique_ptr<Transaction> txn;
5523
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5524
1
        if (err != TxnErrorCode::TXN_OK) {
5525
0
            LOG_WARNING("failed to recycle restore job")
5526
0
                    .tag("err", err)
5527
0
                    .tag("instance_id", instance_id_)
5528
0
                    .tag("reason", "failed to create txn");
5529
0
            return -1;
5530
0
        }
5531
20
        for (auto& k : restore_job_keys) {
5532
20
            txn->remove(k);
5533
20
        }
5534
1
        err = txn->commit();
5535
1
        if (err != TxnErrorCode::TXN_OK) {
5536
0
            LOG_WARNING("failed to recycle restore job")
5537
0
                    .tag("err", err)
5538
0
                    .tag("instance_id", instance_id_)
5539
0
                    .tag("reason", "failed to commit txn");
5540
0
            return -1;
5541
0
        }
5542
1
        return 0;
5543
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv
Line
Count
Source
5516
3
    auto loop_done = [&restore_job_keys, this]() -> int {
5517
3
        if (restore_job_keys.empty()) return 0;
5518
1
        DORIS_CLOUD_DEFER {
5519
1
            restore_job_keys.clear();
5520
1
        };
5521
5522
1
        std::unique_ptr<Transaction> txn;
5523
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5524
1
        if (err != TxnErrorCode::TXN_OK) {
5525
0
            LOG_WARNING("failed to recycle restore job")
5526
0
                    .tag("err", err)
5527
0
                    .tag("instance_id", instance_id_)
5528
0
                    .tag("reason", "failed to create txn");
5529
0
            return -1;
5530
0
        }
5531
20
        for (auto& k : restore_job_keys) {
5532
20
            txn->remove(k);
5533
20
        }
5534
1
        err = txn->commit();
5535
1
        if (err != TxnErrorCode::TXN_OK) {
5536
0
            LOG_WARNING("failed to recycle restore job")
5537
0
                    .tag("err", err)
5538
0
                    .tag("instance_id", instance_id_)
5539
0
                    .tag("reason", "failed to commit txn");
5540
0
            return -1;
5541
0
        }
5542
1
        return 0;
5543
1
    };
5544
5545
13
    if (config::enable_recycler_stats_metrics) {
5546
0
        scan_and_statistics_restore_jobs();
5547
0
    }
5548
5549
13
    return scan_and_recycle(restore_job_key0, restore_job_key1, std::move(recycle_func),
5550
13
                            std::move(loop_done));
5551
13
}
5552
5553
10
int InstanceRecycler::recycle_versioned_rowsets() {
5554
10
    const std::string task_name = "recycle_rowsets";
5555
10
    int64_t num_scanned = 0;
5556
10
    int64_t num_expired = 0;
5557
10
    int64_t num_prepare = 0;
5558
10
    int64_t num_compacted = 0;
5559
10
    int64_t num_empty_rowset = 0;
5560
10
    size_t total_rowset_key_size = 0;
5561
10
    size_t total_rowset_value_size = 0;
5562
10
    size_t expired_rowset_size = 0;
5563
10
    std::atomic_long num_recycled = 0;
5564
10
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5565
5566
10
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
5567
10
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
5568
10
    std::string recyc_rs_key0;
5569
10
    std::string recyc_rs_key1;
5570
10
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
5571
10
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
5572
5573
10
    LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_);
5574
5575
10
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5576
10
    register_recycle_task(task_name, start_time);
5577
5578
10
    DORIS_CLOUD_DEFER {
5579
10
        unregister_recycle_task(task_name);
5580
10
        int64_t cost =
5581
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5582
10
        metrics_context.finish_report();
5583
10
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
5584
10
                .tag("instance_id", instance_id_)
5585
10
                .tag("num_scanned", num_scanned)
5586
10
                .tag("num_expired", num_expired)
5587
10
                .tag("num_recycled", num_recycled)
5588
10
                .tag("num_recycled.prepare", num_prepare)
5589
10
                .tag("num_recycled.compacted", num_compacted)
5590
10
                .tag("num_recycled.empty_rowset", num_empty_rowset)
5591
10
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5592
10
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5593
10
                .tag("expired_rowset_meta_size", expired_rowset_size);
5594
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv
Line
Count
Source
5578
10
    DORIS_CLOUD_DEFER {
5579
10
        unregister_recycle_task(task_name);
5580
10
        int64_t cost =
5581
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5582
10
        metrics_context.finish_report();
5583
10
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
5584
10
                .tag("instance_id", instance_id_)
5585
10
                .tag("num_scanned", num_scanned)
5586
10
                .tag("num_expired", num_expired)
5587
10
                .tag("num_recycled", num_recycled)
5588
10
                .tag("num_recycled.prepare", num_prepare)
5589
10
                .tag("num_recycled.compacted", num_compacted)
5590
10
                .tag("num_recycled.empty_rowset", num_empty_rowset)
5591
10
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5592
10
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5593
10
                .tag("expired_rowset_meta_size", expired_rowset_size);
5594
10
    };
5595
5596
10
    std::vector<std::string> orphan_rowset_keys;
5597
5598
    // Store keys of rowset recycled by background workers
5599
10
    std::mutex async_recycled_rowset_keys_mutex;
5600
10
    std::vector<std::string> async_recycled_rowset_keys;
5601
10
    auto worker_pool = std::make_unique<SimpleThreadPool>(
5602
10
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
5603
10
    worker_pool->start();
5604
10
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
5605
400
                                            int64_t tablet_id, const std::string& rowset_id) {
5606
        // Try to delete rowset data in background thread
5607
400
        int ret = worker_pool->submit_with_timeout(
5608
400
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
5609
400
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5610
400
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5611
400
                        return;
5612
400
                    }
5613
                    // The async recycled rowsets are staled format or has not been used,
5614
                    // so we don't need to check the rowset ref count key.
5615
0
                    std::vector<std::string> keys;
5616
0
                    {
5617
0
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
5618
0
                        async_recycled_rowset_keys.push_back(std::move(key));
5619
0
                        if (async_recycled_rowset_keys.size() > 100) {
5620
0
                            keys.swap(async_recycled_rowset_keys);
5621
0
                        }
5622
0
                    }
5623
0
                    if (keys.empty()) return;
5624
0
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
5625
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5626
0
                                     << instance_id_;
5627
0
                    } else {
5628
0
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
5629
0
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
5630
0
                                           num_recycled, start_time);
5631
0
                    }
5632
0
                },
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
5608
400
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
5609
400
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5610
400
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5611
400
                        return;
5612
400
                    }
5613
                    // The async recycled rowsets are staled format or has not been used,
5614
                    // so we don't need to check the rowset ref count key.
5615
0
                    std::vector<std::string> keys;
5616
0
                    {
5617
0
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
5618
0
                        async_recycled_rowset_keys.push_back(std::move(key));
5619
0
                        if (async_recycled_rowset_keys.size() > 100) {
5620
0
                            keys.swap(async_recycled_rowset_keys);
5621
0
                        }
5622
0
                    }
5623
0
                    if (keys.empty()) return;
5624
0
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
5625
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5626
0
                                     << instance_id_;
5627
0
                    } else {
5628
0
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
5629
0
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
5630
0
                                           num_recycled, start_time);
5631
0
                    }
5632
0
                },
5633
400
                0);
5634
400
        if (ret == 0) return 0;
5635
        // Submit task failed, delete rowset data in current thread
5636
0
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5637
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5638
0
            return -1;
5639
0
        }
5640
0
        orphan_rowset_keys.push_back(std::move(key));
5641
0
        return 0;
5642
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
5605
400
                                            int64_t tablet_id, const std::string& rowset_id) {
5606
        // Try to delete rowset data in background thread
5607
400
        int ret = worker_pool->submit_with_timeout(
5608
400
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
5609
400
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5610
400
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5611
400
                        return;
5612
400
                    }
5613
                    // The async recycled rowsets are staled format or has not been used,
5614
                    // so we don't need to check the rowset ref count key.
5615
400
                    std::vector<std::string> keys;
5616
400
                    {
5617
400
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
5618
400
                        async_recycled_rowset_keys.push_back(std::move(key));
5619
400
                        if (async_recycled_rowset_keys.size() > 100) {
5620
400
                            keys.swap(async_recycled_rowset_keys);
5621
400
                        }
5622
400
                    }
5623
400
                    if (keys.empty()) return;
5624
400
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
5625
400
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5626
400
                                     << instance_id_;
5627
400
                    } else {
5628
400
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
5629
400
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
5630
400
                                           num_recycled, start_time);
5631
400
                    }
5632
400
                },
5633
400
                0);
5634
400
        if (ret == 0) return 0;
5635
        // Submit task failed, delete rowset data in current thread
5636
0
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5637
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5638
0
            return -1;
5639
0
        }
5640
0
        orphan_rowset_keys.push_back(std::move(key));
5641
0
        return 0;
5642
0
    };
5643
5644
10
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5645
5646
2.01k
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
5647
2.01k
        ++num_scanned;
5648
2.01k
        total_rowset_key_size += k.size();
5649
2.01k
        total_rowset_value_size += v.size();
5650
2.01k
        RecycleRowsetPB rowset;
5651
2.01k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5652
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
5653
0
            return -1;
5654
0
        }
5655
5656
2.01k
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5657
5658
2.01k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5659
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
5660
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
5661
2.01k
        int64_t current_time = ::time(nullptr);
5662
2.01k
        if (current_time < final_expiration) { // not expired
5663
0
            return 0;
5664
0
        }
5665
2.01k
        ++num_expired;
5666
2.01k
        expired_rowset_size += v.size();
5667
2.01k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
5668
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
5669
                // in old version, keep this key-value pair and it needs to be checked manually
5670
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5671
0
                return -1;
5672
0
            }
5673
0
            if (rowset.resource_id().empty()) [[unlikely]] {
5674
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
5675
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
5676
0
                          << hex(k) << " value=" << proto_to_json(rowset);
5677
0
                orphan_rowset_keys.emplace_back(k);
5678
0
                return -1;
5679
0
            }
5680
            // decode rowset_id
5681
0
            auto k1 = k;
5682
0
            k1.remove_prefix(1);
5683
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5684
0
            decode_key(&k1, &out);
5685
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
5686
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
5687
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5688
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
5689
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
5690
0
                                             rowset.tablet_id(), rowset_id) != 0) {
5691
0
                return -1;
5692
0
            }
5693
0
            return 0;
5694
0
        }
5695
        // TODO(plat1ko): check rowset not referenced
5696
2.01k
        auto rowset_meta = rowset.mutable_rowset_meta();
5697
2.01k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
5698
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
5699
0
                LOG_INFO("recycle rowset that has empty resource id");
5700
0
            } else {
5701
                // other situations, keep this key-value pair and it needs to be checked manually
5702
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5703
0
                return -1;
5704
0
            }
5705
0
        }
5706
2.01k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5707
2.01k
                  << " tablet_id=" << rowset_meta->tablet_id()
5708
2.01k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
5709
2.01k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
5710
2.01k
                  << "] txn_id=" << rowset_meta->txn_id()
5711
2.01k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
5712
2.01k
                  << " rowset_meta_size=" << v.size()
5713
2.01k
                  << " creation_time=" << rowset_meta->creation_time();
5714
2.01k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
5715
            // unable to calculate file path, can only be deleted by rowset id prefix
5716
400
            num_prepare += 1;
5717
400
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
5718
400
                                             rowset_meta->tablet_id(),
5719
400
                                             rowset_meta->rowset_id_v2()) != 0) {
5720
0
                return -1;
5721
0
            }
5722
1.61k
        } else {
5723
1.61k
            bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT;
5724
1.61k
            worker_pool->submit(
5725
1.61k
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
5726
                        // The load & compact rowset keys are recycled during recycling operation logs.
5727
1.61k
                        RowsetDeleteTask task;
5728
1.61k
                        task.rowset_meta = rowset_meta;
5729
1.61k
                        task.recycle_rowset_key = k;
5730
1.61k
                        if (recycle_rowset_meta_and_data(task) != 0) {
5731
1.60k
                            return;
5732
1.60k
                        }
5733
13
                        num_compacted += is_compacted;
5734
13
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
5735
13
                        if (rowset_meta.num_segments() == 0) {
5736
0
                            ++num_empty_rowset;
5737
0
                        }
5738
13
                    });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv
Line
Count
Source
5725
1.61k
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
5726
                        // The load & compact rowset keys are recycled during recycling operation logs.
5727
1.61k
                        RowsetDeleteTask task;
5728
1.61k
                        task.rowset_meta = rowset_meta;
5729
1.61k
                        task.recycle_rowset_key = k;
5730
1.61k
                        if (recycle_rowset_meta_and_data(task) != 0) {
5731
1.60k
                            return;
5732
1.60k
                        }
5733
13
                        num_compacted += is_compacted;
5734
13
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
5735
13
                        if (rowset_meta.num_segments() == 0) {
5736
0
                            ++num_empty_rowset;
5737
0
                        }
5738
13
                    });
5739
1.61k
        }
5740
2.01k
        return 0;
5741
2.01k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5646
2.01k
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
5647
2.01k
        ++num_scanned;
5648
2.01k
        total_rowset_key_size += k.size();
5649
2.01k
        total_rowset_value_size += v.size();
5650
2.01k
        RecycleRowsetPB rowset;
5651
2.01k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5652
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
5653
0
            return -1;
5654
0
        }
5655
5656
2.01k
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5657
5658
2.01k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5659
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
5660
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
5661
2.01k
        int64_t current_time = ::time(nullptr);
5662
2.01k
        if (current_time < final_expiration) { // not expired
5663
0
            return 0;
5664
0
        }
5665
2.01k
        ++num_expired;
5666
2.01k
        expired_rowset_size += v.size();
5667
2.01k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
5668
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
5669
                // in old version, keep this key-value pair and it needs to be checked manually
5670
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5671
0
                return -1;
5672
0
            }
5673
0
            if (rowset.resource_id().empty()) [[unlikely]] {
5674
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
5675
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
5676
0
                          << hex(k) << " value=" << proto_to_json(rowset);
5677
0
                orphan_rowset_keys.emplace_back(k);
5678
0
                return -1;
5679
0
            }
5680
            // decode rowset_id
5681
0
            auto k1 = k;
5682
0
            k1.remove_prefix(1);
5683
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5684
0
            decode_key(&k1, &out);
5685
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
5686
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
5687
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5688
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
5689
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
5690
0
                                             rowset.tablet_id(), rowset_id) != 0) {
5691
0
                return -1;
5692
0
            }
5693
0
            return 0;
5694
0
        }
5695
        // TODO(plat1ko): check rowset not referenced
5696
2.01k
        auto rowset_meta = rowset.mutable_rowset_meta();
5697
2.01k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
5698
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
5699
0
                LOG_INFO("recycle rowset that has empty resource id");
5700
0
            } else {
5701
                // other situations, keep this key-value pair and it needs to be checked manually
5702
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5703
0
                return -1;
5704
0
            }
5705
0
        }
5706
2.01k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5707
2.01k
                  << " tablet_id=" << rowset_meta->tablet_id()
5708
2.01k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
5709
2.01k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
5710
2.01k
                  << "] txn_id=" << rowset_meta->txn_id()
5711
2.01k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
5712
2.01k
                  << " rowset_meta_size=" << v.size()
5713
2.01k
                  << " creation_time=" << rowset_meta->creation_time();
5714
2.01k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
5715
            // unable to calculate file path, can only be deleted by rowset id prefix
5716
400
            num_prepare += 1;
5717
400
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
5718
400
                                             rowset_meta->tablet_id(),
5719
400
                                             rowset_meta->rowset_id_v2()) != 0) {
5720
0
                return -1;
5721
0
            }
5722
1.61k
        } else {
5723
1.61k
            bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT;
5724
1.61k
            worker_pool->submit(
5725
1.61k
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
5726
                        // The load & compact rowset keys are recycled during recycling operation logs.
5727
1.61k
                        RowsetDeleteTask task;
5728
1.61k
                        task.rowset_meta = rowset_meta;
5729
1.61k
                        task.recycle_rowset_key = k;
5730
1.61k
                        if (recycle_rowset_meta_and_data(task) != 0) {
5731
1.61k
                            return;
5732
1.61k
                        }
5733
1.61k
                        num_compacted += is_compacted;
5734
1.61k
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
5735
1.61k
                        if (rowset_meta.num_segments() == 0) {
5736
1.61k
                            ++num_empty_rowset;
5737
1.61k
                        }
5738
1.61k
                    });
5739
1.61k
        }
5740
2.01k
        return 0;
5741
2.01k
    };
5742
5743
10
    if (config::enable_recycler_stats_metrics) {
5744
0
        scan_and_statistics_rowsets();
5745
0
    }
5746
5747
10
    auto loop_done = [&]() -> int {
5748
6
        if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) {
5749
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5750
0
        }
5751
6
        orphan_rowset_keys.clear();
5752
6
        return 0;
5753
6
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv
Line
Count
Source
5747
6
    auto loop_done = [&]() -> int {
5748
6
        if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) {
5749
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5750
0
        }
5751
6
        orphan_rowset_keys.clear();
5752
6
        return 0;
5753
6
    };
5754
5755
    // recycle_func and loop_done for scan and recycle
5756
10
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
5757
10
                               std::move(loop_done));
5758
5759
10
    worker_pool->stop();
5760
5761
10
    if (!async_recycled_rowset_keys.empty()) {
5762
0
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
5763
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5764
0
            return -1;
5765
0
        } else {
5766
0
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
5767
0
        }
5768
0
    }
5769
5770
    // Report final metrics after all concurrent tasks completed
5771
10
    segment_metrics_context_.report();
5772
10
    metrics_context.report();
5773
5774
10
    return ret;
5775
10
}
5776
5777
1.61k
int InstanceRecycler::recycle_rowset_meta_and_data(const RowsetDeleteTask& task) {
5778
1.61k
    constexpr int MAX_RETRY = 10;
5779
1.61k
    const RowsetMetaCloudPB& rowset_meta = task.rowset_meta;
5780
1.61k
    int64_t tablet_id = rowset_meta.tablet_id();
5781
1.61k
    const std::string& rowset_id = rowset_meta.rowset_id_v2();
5782
1.61k
    std::string_view reference_instance_id = instance_id_;
5783
1.61k
    if (rowset_meta.has_reference_instance_id()) {
5784
8
        reference_instance_id = rowset_meta.reference_instance_id();
5785
8
    }
5786
5787
1.61k
    AnnotateTag tablet_id_tag("tablet_id", tablet_id);
5788
1.61k
    AnnotateTag rowset_id_tag("rowset_id", rowset_id);
5789
1.61k
    AnnotateTag rowset_key_tag("recycle_rowset_key", hex(task.recycle_rowset_key));
5790
1.61k
    AnnotateTag instance_id_tag("instance_id", instance_id_);
5791
1.61k
    AnnotateTag ref_instance_id_tag("ref_instance_id", reference_instance_id);
5792
1.61k
    for (int i = 0; i < MAX_RETRY; ++i) {
5793
1.61k
        std::unique_ptr<Transaction> txn;
5794
1.61k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5795
1.61k
        if (err != TxnErrorCode::TXN_OK) {
5796
0
            LOG_WARNING("failed to create txn").tag("err", err);
5797
0
            return -1;
5798
0
        }
5799
5800
1.61k
        std::string rowset_ref_count_key =
5801
1.61k
                versioned::data_rowset_ref_count_key({reference_instance_id, tablet_id, rowset_id});
5802
1.61k
        int64_t ref_count = 0;
5803
1.61k
        {
5804
1.61k
            std::string value;
5805
1.61k
            TxnErrorCode err = txn->get(rowset_ref_count_key, &value);
5806
1.61k
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
5807
                // This is the old version rowset, we could recycle it directly.
5808
1.60k
                ref_count = 1;
5809
1.60k
            } else if (err != TxnErrorCode::TXN_OK) {
5810
0
                LOG_WARNING("failed to get rowset ref count key").tag("err", err);
5811
0
                return -1;
5812
9
            } else if (!txn->decode_atomic_int(value, &ref_count)) {
5813
0
                LOG_WARNING("failed to decode rowset data ref count").tag("value", hex(value));
5814
0
                return -1;
5815
0
            }
5816
1.61k
        }
5817
5818
1.61k
        if (ref_count == 1) {
5819
            // It would not be added since it is recycling.
5820
1.61k
            if (delete_rowset_data(rowset_meta) != 0) {
5821
1.60k
                LOG_WARNING("failed to delete rowset data");
5822
1.60k
                return -1;
5823
1.60k
            }
5824
5825
            // Reset the transaction to avoid timeout.
5826
10
            err = txn_kv_->create_txn(&txn);
5827
10
            if (err != TxnErrorCode::TXN_OK) {
5828
0
                LOG_WARNING("failed to create txn").tag("err", err);
5829
0
                return -1;
5830
0
            }
5831
10
            txn->remove(rowset_ref_count_key);
5832
10
            LOG_INFO("delete rowset data ref count key")
5833
10
                    .tag("txn_id", rowset_meta.txn_id())
5834
10
                    .tag("ref_count_key", hex(rowset_ref_count_key));
5835
5836
10
            std::string dbm_start_key =
5837
10
                    meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id, 0, 0});
5838
10
            std::string dbm_end_key = meta_delete_bitmap_key(
5839
10
                    {reference_instance_id, tablet_id, rowset_id,
5840
10
                     std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::max()});
5841
10
            txn->remove(dbm_start_key, dbm_end_key);
5842
10
            LOG_INFO("remove delete bitmap kv")
5843
10
                    .tag("begin", hex(dbm_start_key))
5844
10
                    .tag("end", hex(dbm_end_key));
5845
5846
10
            std::string versioned_dbm_start_key = versioned::meta_delete_bitmap_key(
5847
10
                    {reference_instance_id, tablet_id, rowset_id});
5848
10
            std::string versioned_dbm_end_key = versioned_dbm_start_key;
5849
10
            encode_int64(INT64_MAX, &versioned_dbm_end_key);
5850
10
            txn->remove(versioned_dbm_start_key, versioned_dbm_end_key);
5851
10
            LOG_INFO("remove versioned delete bitmap kv")
5852
10
                    .tag("begin", hex(versioned_dbm_start_key))
5853
10
                    .tag("end", hex(versioned_dbm_end_key));
5854
10
        } else {
5855
            // Decrease the rowset ref count.
5856
            //
5857
            // The read conflict range will protect the rowset ref count key, if any conflict happens,
5858
            // we will retry and check whether the rowset ref count is 1 and the data need to be deleted.
5859
1
            txn->atomic_add(rowset_ref_count_key, -1);
5860
1
            LOG_INFO("decrease rowset data ref count")
5861
1
                    .tag("txn_id", rowset_meta.txn_id())
5862
1
                    .tag("ref_count", ref_count - 1)
5863
1
                    .tag("ref_count_key", hex(rowset_ref_count_key));
5864
1
        }
5865
5866
11
        if (!task.versioned_rowset_key.empty()) {
5867
0
            versioned::document_remove<RowsetMetaCloudPB>(txn.get(), task.versioned_rowset_key,
5868
0
                                                          task.versionstamp);
5869
0
            LOG_INFO("remove versioned meta rowset key").tag("key", hex(task.versioned_rowset_key));
5870
0
        }
5871
5872
11
        if (!task.non_versioned_rowset_key.empty()) {
5873
0
            txn->remove(task.non_versioned_rowset_key);
5874
0
            LOG_INFO("remove non versioned rowset key")
5875
0
                    .tag("key", hex(task.non_versioned_rowset_key));
5876
0
        }
5877
5878
        // empty when recycle ref rowsets for deleted instance
5879
13
        if (!task.recycle_rowset_key.empty()) {
5880
13
            txn->remove(task.recycle_rowset_key);
5881
13
            LOG_INFO("remove recycle rowset key").tag("key", hex(task.recycle_rowset_key));
5882
13
        }
5883
5884
11
        err = txn->commit();
5885
11
        if (err == TxnErrorCode::TXN_CONFLICT) { // unlikely
5886
            // The rowset ref count key has been changed, we need to retry.
5887
0
            VLOG_DEBUG << "decrease rowset ref count but txn conflict, retry"
5888
0
                       << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id
5889
0
                       << ", ref_count=" << ref_count << ", retry=" << i;
5890
0
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
5891
0
            continue;
5892
11
        } else if (err != TxnErrorCode::TXN_OK) {
5893
0
            LOG_WARNING("failed to recycle rowset meta and data").tag("err", err);
5894
0
            return -1;
5895
0
        }
5896
11
        LOG_INFO("recycle rowset meta and data success");
5897
11
        return 0;
5898
11
    }
5899
2
    LOG_WARNING("failed to recycle rowset meta and data after retry")
5900
2
            .tag("tablet_id", tablet_id)
5901
2
            .tag("rowset_id", rowset_id)
5902
2
            .tag("retry", MAX_RETRY);
5903
2
    return -1;
5904
1.61k
}
5905
5906
39
int InstanceRecycler::recycle_tmp_rowsets() {
5907
39
    const std::string task_name = "recycle_tmp_rowsets";
5908
39
    int64_t num_scanned = 0;
5909
39
    int64_t num_expired = 0;
5910
39
    std::atomic_long num_recycled = 0;
5911
39
    size_t expired_rowset_size = 0;
5912
39
    size_t total_rowset_key_size = 0;
5913
39
    size_t total_rowset_value_size = 0;
5914
39
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5915
5916
39
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
5917
39
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
5918
39
    std::string tmp_rs_key0;
5919
39
    std::string tmp_rs_key1;
5920
39
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
5921
39
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
5922
5923
39
    LOG_WARNING("begin to recycle tmp rowsets").tag("instance_id", instance_id_);
5924
5925
39
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5926
39
    register_recycle_task(task_name, start_time);
5927
5928
39
    DORIS_CLOUD_DEFER {
5929
39
        unregister_recycle_task(task_name);
5930
39
        int64_t cost =
5931
39
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5932
39
        metrics_context.finish_report();
5933
39
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
5934
39
                .tag("instance_id", instance_id_)
5935
39
                .tag("num_scanned", num_scanned)
5936
39
                .tag("num_expired", num_expired)
5937
39
                .tag("num_recycled", num_recycled)
5938
39
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5939
39
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5940
39
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
5941
39
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
5928
12
    DORIS_CLOUD_DEFER {
5929
12
        unregister_recycle_task(task_name);
5930
12
        int64_t cost =
5931
12
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5932
12
        metrics_context.finish_report();
5933
12
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
5934
12
                .tag("instance_id", instance_id_)
5935
12
                .tag("num_scanned", num_scanned)
5936
12
                .tag("num_expired", num_expired)
5937
12
                .tag("num_recycled", num_recycled)
5938
12
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5939
12
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5940
12
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
5941
12
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
5928
27
    DORIS_CLOUD_DEFER {
5929
27
        unregister_recycle_task(task_name);
5930
27
        int64_t cost =
5931
27
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5932
27
        metrics_context.finish_report();
5933
27
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
5934
27
                .tag("instance_id", instance_id_)
5935
27
                .tag("num_scanned", num_scanned)
5936
27
                .tag("num_expired", num_expired)
5937
27
                .tag("num_recycled", num_recycled)
5938
27
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5939
27
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5940
27
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
5941
27
    };
5942
5943
    // Elements in `tmp_rowset_keys` has the same lifetime as `it`
5944
5945
39
    std::vector<std::string> tmp_rowset_keys;
5946
39
    std::vector<std::string> tmp_rowset_ref_count_keys;
5947
39
    std::vector<std::string> tmp_rowset_keys_to_mark_recycled;
5948
39
    std::vector<std::string> tmp_rowset_keys_to_abort;
5949
5950
    // rowset_id -> rowset_meta
5951
    // store tmp_rowset id and meta for statistics rs size when delete
5952
39
    std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets;
5953
39
    auto worker_pool = std::make_unique<SimpleThreadPool>(
5954
39
            config::instance_recycler_worker_pool_size, "recycle_tmp_rowsets");
5955
39
    worker_pool->start();
5956
5957
39
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5958
5959
39
    auto handle_rowset_kv = [&num_scanned, &num_expired, &tmp_rowset_keys, &tmp_rowsets,
5960
39
                             &expired_rowset_size, &total_rowset_key_size, &total_rowset_value_size,
5961
39
                             &earlest_ts, &tmp_rowset_ref_count_keys,
5962
39
                             &tmp_rowset_keys_to_mark_recycled, &tmp_rowset_keys_to_abort, this,
5963
106k
                             &metrics_context](std::string_view k, std::string_view v) -> int {
5964
106k
        ++num_scanned;
5965
106k
        total_rowset_key_size += k.size();
5966
106k
        total_rowset_value_size += v.size();
5967
106k
        doris::RowsetMetaCloudPB rowset;
5968
106k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5969
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
5970
0
            return -1;
5971
0
        }
5972
106k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5973
106k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5974
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5975
0
                   << " txn_expiration=" << rowset.txn_expiration()
5976
0
                   << " rowset_creation_time=" << rowset.creation_time();
5977
106k
        int64_t current_time = ::time(nullptr);
5978
106k
        if (current_time < expiration) { // not expired
5979
0
            return 0;
5980
0
        }
5981
5982
106k
        if (config::enable_mark_delete_rowset_before_recycle) {
5983
106k
            if (need_mark_rowset_as_recycled(rowset)) {
5984
52.0k
                tmp_rowset_keys_to_mark_recycled.emplace_back(k);
5985
52.0k
                LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv "
5986
52.0k
                             "at next turn, instance_id="
5987
52.0k
                          << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5988
52.0k
                          << rowset.start_version() << '-' << rowset.end_version() << "]";
5989
52.0k
                return 0;
5990
52.0k
            }
5991
106k
        }
5992
5993
54.0k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
5994
54.0k
            if (make_deferred_abort_task(rowset).has_value()) {
5995
3
                LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, "
5996
3
                             "instance_id="
5997
3
                          << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5998
3
                          << rowset.start_version() << '-' << rowset.end_version() << "]";
5999
3
                tmp_rowset_keys_to_abort.emplace_back(k);
6000
3
            }
6001
54.0k
        }
6002
6003
54.0k
        ++num_expired;
6004
54.0k
        expired_rowset_size += v.size();
6005
54.0k
        if (!rowset.has_resource_id()) {
6006
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
6007
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
6008
0
                return -1;
6009
0
            }
6010
            // might be a delete pred rowset
6011
0
            tmp_rowset_keys.emplace_back(k);
6012
0
            return 0;
6013
0
        }
6014
        // TODO(plat1ko): check rowset not referenced
6015
54.0k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
6016
54.0k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
6017
54.0k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
6018
54.0k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
6019
54.0k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
6020
54.0k
                  << " num_expired=" << num_expired
6021
54.0k
                  << " task_type=" << metrics_context.operation_type;
6022
6023
54.0k
        tmp_rowset_keys.emplace_back(k.data(), k.size());
6024
        // Remove the rowset ref count key directly since it has not been used.
6025
54.0k
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
6026
54.0k
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
6027
54.0k
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
6028
54.0k
                  << "key=" << hex(rowset_ref_count_key);
6029
54.0k
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
6030
6031
54.0k
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
6032
54.0k
        return 0;
6033
54.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5963
16
                             &metrics_context](std::string_view k, std::string_view v) -> int {
5964
16
        ++num_scanned;
5965
16
        total_rowset_key_size += k.size();
5966
16
        total_rowset_value_size += v.size();
5967
16
        doris::RowsetMetaCloudPB rowset;
5968
16
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5969
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
5970
0
            return -1;
5971
0
        }
5972
16
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5973
16
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5974
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5975
0
                   << " txn_expiration=" << rowset.txn_expiration()
5976
0
                   << " rowset_creation_time=" << rowset.creation_time();
5977
16
        int64_t current_time = ::time(nullptr);
5978
16
        if (current_time < expiration) { // not expired
5979
0
            return 0;
5980
0
        }
5981
5982
16
        if (config::enable_mark_delete_rowset_before_recycle) {
5983
16
            if (need_mark_rowset_as_recycled(rowset)) {
5984
9
                tmp_rowset_keys_to_mark_recycled.emplace_back(k);
5985
9
                LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv "
5986
9
                             "at next turn, instance_id="
5987
9
                          << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5988
9
                          << rowset.start_version() << '-' << rowset.end_version() << "]";
5989
9
                return 0;
5990
9
            }
5991
16
        }
5992
5993
7
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
5994
7
            if (make_deferred_abort_task(rowset).has_value()) {
5995
3
                LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, "
5996
3
                             "instance_id="
5997
3
                          << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5998
3
                          << rowset.start_version() << '-' << rowset.end_version() << "]";
5999
3
                tmp_rowset_keys_to_abort.emplace_back(k);
6000
3
            }
6001
7
        }
6002
6003
7
        ++num_expired;
6004
7
        expired_rowset_size += v.size();
6005
7
        if (!rowset.has_resource_id()) {
6006
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
6007
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
6008
0
                return -1;
6009
0
            }
6010
            // might be a delete pred rowset
6011
0
            tmp_rowset_keys.emplace_back(k);
6012
0
            return 0;
6013
0
        }
6014
        // TODO(plat1ko): check rowset not referenced
6015
7
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
6016
7
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
6017
7
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
6018
7
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
6019
7
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
6020
7
                  << " num_expired=" << num_expired
6021
7
                  << " task_type=" << metrics_context.operation_type;
6022
6023
7
        tmp_rowset_keys.emplace_back(k.data(), k.size());
6024
        // Remove the rowset ref count key directly since it has not been used.
6025
7
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
6026
7
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
6027
7
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
6028
7
                  << "key=" << hex(rowset_ref_count_key);
6029
7
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
6030
6031
7
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
6032
7
        return 0;
6033
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5963
106k
                             &metrics_context](std::string_view k, std::string_view v) -> int {
5964
106k
        ++num_scanned;
5965
106k
        total_rowset_key_size += k.size();
5966
106k
        total_rowset_value_size += v.size();
5967
106k
        doris::RowsetMetaCloudPB rowset;
5968
106k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5969
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
5970
0
            return -1;
5971
0
        }
5972
106k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5973
106k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5974
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5975
0
                   << " txn_expiration=" << rowset.txn_expiration()
5976
0
                   << " rowset_creation_time=" << rowset.creation_time();
5977
106k
        int64_t current_time = ::time(nullptr);
5978
106k
        if (current_time < expiration) { // not expired
5979
0
            return 0;
5980
0
        }
5981
5982
106k
        if (config::enable_mark_delete_rowset_before_recycle) {
5983
106k
            if (need_mark_rowset_as_recycled(rowset)) {
5984
52.0k
                tmp_rowset_keys_to_mark_recycled.emplace_back(k);
5985
52.0k
                LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv "
5986
52.0k
                             "at next turn, instance_id="
5987
52.0k
                          << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5988
52.0k
                          << rowset.start_version() << '-' << rowset.end_version() << "]";
5989
52.0k
                return 0;
5990
52.0k
            }
5991
106k
        }
5992
5993
54.0k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
5994
54.0k
            if (make_deferred_abort_task(rowset).has_value()) {
5995
0
                LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, "
5996
0
                             "instance_id="
5997
0
                          << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5998
0
                          << rowset.start_version() << '-' << rowset.end_version() << "]";
5999
0
                tmp_rowset_keys_to_abort.emplace_back(k);
6000
0
            }
6001
54.0k
        }
6002
6003
54.0k
        ++num_expired;
6004
54.0k
        expired_rowset_size += v.size();
6005
54.0k
        if (!rowset.has_resource_id()) {
6006
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
6007
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
6008
0
                return -1;
6009
0
            }
6010
            // might be a delete pred rowset
6011
0
            tmp_rowset_keys.emplace_back(k);
6012
0
            return 0;
6013
0
        }
6014
        // TODO(plat1ko): check rowset not referenced
6015
54.0k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
6016
54.0k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
6017
54.0k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
6018
54.0k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
6019
54.0k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
6020
54.0k
                  << " num_expired=" << num_expired
6021
54.0k
                  << " task_type=" << metrics_context.operation_type;
6022
6023
54.0k
        tmp_rowset_keys.emplace_back(k.data(), k.size());
6024
        // Remove the rowset ref count key directly since it has not been used.
6025
54.0k
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
6026
54.0k
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
6027
54.0k
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
6028
54.0k
                  << "key=" << hex(rowset_ref_count_key);
6029
54.0k
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
6030
6031
54.0k
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
6032
54.0k
        return 0;
6033
54.0k
    };
6034
6035
    // TODO bacth delete
6036
51.0k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
6037
51.0k
        std::string dbm_start_key =
6038
51.0k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
6039
51.0k
        std::string dbm_end_key = dbm_start_key;
6040
51.0k
        encode_int64(INT64_MAX, &dbm_end_key);
6041
51.0k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
6042
51.0k
        if (ret != 0) {
6043
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
6044
0
                         << instance_id_ << ", tablet_id=" << tablet_id
6045
0
                         << ", rowset_id=" << rowset_id;
6046
0
        }
6047
51.0k
        return ret;
6048
51.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
6036
7
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
6037
7
        std::string dbm_start_key =
6038
7
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
6039
7
        std::string dbm_end_key = dbm_start_key;
6040
7
        encode_int64(INT64_MAX, &dbm_end_key);
6041
7
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
6042
7
        if (ret != 0) {
6043
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
6044
0
                         << instance_id_ << ", tablet_id=" << tablet_id
6045
0
                         << ", rowset_id=" << rowset_id;
6046
0
        }
6047
7
        return ret;
6048
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
6036
51.0k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
6037
51.0k
        std::string dbm_start_key =
6038
51.0k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
6039
51.0k
        std::string dbm_end_key = dbm_start_key;
6040
51.0k
        encode_int64(INT64_MAX, &dbm_end_key);
6041
51.0k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
6042
51.0k
        if (ret != 0) {
6043
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
6044
0
                         << instance_id_ << ", tablet_id=" << tablet_id
6045
0
                         << ", rowset_id=" << rowset_id;
6046
0
        }
6047
51.0k
        return ret;
6048
51.0k
    };
6049
6050
51.0k
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
6051
51.0k
        auto delete_bitmap_start =
6052
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
6053
51.0k
        auto delete_bitmap_end =
6054
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
6055
51.0k
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
6056
51.0k
        if (ret != 0) {
6057
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
6058
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
6059
0
        }
6060
51.0k
        return ret;
6061
51.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
6050
7
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
6051
7
        auto delete_bitmap_start =
6052
7
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
6053
7
        auto delete_bitmap_end =
6054
7
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
6055
7
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
6056
7
        if (ret != 0) {
6057
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
6058
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
6059
0
        }
6060
7
        return ret;
6061
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
6050
51.0k
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
6051
51.0k
        auto delete_bitmap_start =
6052
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
6053
51.0k
        auto delete_bitmap_end =
6054
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
6055
51.0k
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
6056
51.0k
        if (ret != 0) {
6057
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
6058
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
6059
0
        }
6060
51.0k
        return ret;
6061
51.0k
    };
6062
6063
39
    auto loop_done = [&]() -> int {
6064
32
        std::vector<std::string> tmp_rowset_keys_to_delete;
6065
32
        std::vector<std::string> tmp_rowset_ref_count_keys_to_delete;
6066
32
        std::vector<std::string> mark_keys_to_process;
6067
32
        std::vector<std::string> abort_keys_to_process;
6068
32
        std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets_to_delete;
6069
32
        tmp_rowset_keys_to_delete.swap(tmp_rowset_keys);
6070
32
        tmp_rowsets_to_delete.swap(tmp_rowsets);
6071
32
        tmp_rowset_ref_count_keys_to_delete.swap(tmp_rowset_ref_count_keys);
6072
32
        mark_keys_to_process.swap(tmp_rowset_keys_to_mark_recycled);
6073
32
        abort_keys_to_process.swap(tmp_rowset_keys_to_abort);
6074
32
        worker_pool->submit([&, tmp_rowset_keys_to_delete = std::move(tmp_rowset_keys_to_delete),
6075
32
                             tmp_rowsets_to_delete = std::move(tmp_rowsets_to_delete),
6076
32
                             tmp_rowset_ref_count_keys_to_delete =
6077
32
                                     std::move(tmp_rowset_ref_count_keys_to_delete),
6078
32
                             mark_keys_to_process = std::move(mark_keys_to_process),
6079
32
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
6080
32
            if (!mark_keys_to_process.empty() &&
6081
32
                batch_mark_rowsets_as_recycled<RowsetMetaCloudPB>(txn_kv_.get(), instance_id_,
6082
16
                                                                  mark_keys_to_process) != 0) {
6083
0
                LOG(WARNING) << "failed to batch mark tmp rowsets as recycled, instance_id="
6084
0
                             << instance_id_;
6085
0
                return;
6086
0
            }
6087
32
            if (!abort_keys_to_process.empty() &&
6088
32
                batch_abort_txn_or_job_for_recycle<RowsetMetaCloudPB>(abort_keys_to_process,
6089
3
                                                                      false) != 0) {
6090
0
                LOG(WARNING) << "failed to batch abort txn or job for releated rowset, instance_id="
6091
0
                             << instance_id_;
6092
0
                return;
6093
0
            }
6094
32
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
6095
32
                                   metrics_context) != 0) {
6096
3
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
6097
3
                return;
6098
3
            }
6099
51.0k
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
6100
51.0k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
6101
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
6102
0
                                 << rs.ShortDebugString();
6103
0
                    return;
6104
0
                }
6105
51.0k
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
6106
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
6107
0
                                 << rs.ShortDebugString();
6108
0
                    return;
6109
0
                }
6110
51.0k
            }
6111
29
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
6112
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
6113
0
                return;
6114
0
            }
6115
29
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
6116
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
6117
0
                return;
6118
0
            }
6119
29
            num_recycled += tmp_rowset_keys_to_delete.size();
6120
29
            return;
6121
29
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEvENUlvE_clEv
Line
Count
Source
6079
12
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
6080
12
            if (!mark_keys_to_process.empty() &&
6081
12
                batch_mark_rowsets_as_recycled<RowsetMetaCloudPB>(txn_kv_.get(), instance_id_,
6082
7
                                                                  mark_keys_to_process) != 0) {
6083
0
                LOG(WARNING) << "failed to batch mark tmp rowsets as recycled, instance_id="
6084
0
                             << instance_id_;
6085
0
                return;
6086
0
            }
6087
12
            if (!abort_keys_to_process.empty() &&
6088
12
                batch_abort_txn_or_job_for_recycle<RowsetMetaCloudPB>(abort_keys_to_process,
6089
3
                                                                      false) != 0) {
6090
0
                LOG(WARNING) << "failed to batch abort txn or job for releated rowset, instance_id="
6091
0
                             << instance_id_;
6092
0
                return;
6093
0
            }
6094
12
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
6095
12
                                   metrics_context) != 0) {
6096
0
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
6097
0
                return;
6098
0
            }
6099
12
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
6100
7
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
6101
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
6102
0
                                 << rs.ShortDebugString();
6103
0
                    return;
6104
0
                }
6105
7
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
6106
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
6107
0
                                 << rs.ShortDebugString();
6108
0
                    return;
6109
0
                }
6110
7
            }
6111
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
6112
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
6113
0
                return;
6114
0
            }
6115
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
6116
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
6117
0
                return;
6118
0
            }
6119
12
            num_recycled += tmp_rowset_keys_to_delete.size();
6120
12
            return;
6121
12
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEvENUlvE_clEv
Line
Count
Source
6079
20
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
6080
20
            if (!mark_keys_to_process.empty() &&
6081
20
                batch_mark_rowsets_as_recycled<RowsetMetaCloudPB>(txn_kv_.get(), instance_id_,
6082
9
                                                                  mark_keys_to_process) != 0) {
6083
0
                LOG(WARNING) << "failed to batch mark tmp rowsets as recycled, instance_id="
6084
0
                             << instance_id_;
6085
0
                return;
6086
0
            }
6087
20
            if (!abort_keys_to_process.empty() &&
6088
20
                batch_abort_txn_or_job_for_recycle<RowsetMetaCloudPB>(abort_keys_to_process,
6089
0
                                                                      false) != 0) {
6090
0
                LOG(WARNING) << "failed to batch abort txn or job for releated rowset, instance_id="
6091
0
                             << instance_id_;
6092
0
                return;
6093
0
            }
6094
20
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
6095
20
                                   metrics_context) != 0) {
6096
3
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
6097
3
                return;
6098
3
            }
6099
51.0k
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
6100
51.0k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
6101
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
6102
0
                                 << rs.ShortDebugString();
6103
0
                    return;
6104
0
                }
6105
51.0k
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
6106
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
6107
0
                                 << rs.ShortDebugString();
6108
0
                    return;
6109
0
                }
6110
51.0k
            }
6111
17
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
6112
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
6113
0
                return;
6114
0
            }
6115
17
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
6116
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
6117
0
                return;
6118
0
            }
6119
17
            num_recycled += tmp_rowset_keys_to_delete.size();
6120
17
            return;
6121
17
        });
6122
32
        return 0;
6123
32
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEv
Line
Count
Source
6063
12
    auto loop_done = [&]() -> int {
6064
12
        std::vector<std::string> tmp_rowset_keys_to_delete;
6065
12
        std::vector<std::string> tmp_rowset_ref_count_keys_to_delete;
6066
12
        std::vector<std::string> mark_keys_to_process;
6067
12
        std::vector<std::string> abort_keys_to_process;
6068
12
        std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets_to_delete;
6069
12
        tmp_rowset_keys_to_delete.swap(tmp_rowset_keys);
6070
12
        tmp_rowsets_to_delete.swap(tmp_rowsets);
6071
12
        tmp_rowset_ref_count_keys_to_delete.swap(tmp_rowset_ref_count_keys);
6072
12
        mark_keys_to_process.swap(tmp_rowset_keys_to_mark_recycled);
6073
12
        abort_keys_to_process.swap(tmp_rowset_keys_to_abort);
6074
12
        worker_pool->submit([&, tmp_rowset_keys_to_delete = std::move(tmp_rowset_keys_to_delete),
6075
12
                             tmp_rowsets_to_delete = std::move(tmp_rowsets_to_delete),
6076
12
                             tmp_rowset_ref_count_keys_to_delete =
6077
12
                                     std::move(tmp_rowset_ref_count_keys_to_delete),
6078
12
                             mark_keys_to_process = std::move(mark_keys_to_process),
6079
12
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
6080
12
            if (!mark_keys_to_process.empty() &&
6081
12
                batch_mark_rowsets_as_recycled<RowsetMetaCloudPB>(txn_kv_.get(), instance_id_,
6082
12
                                                                  mark_keys_to_process) != 0) {
6083
12
                LOG(WARNING) << "failed to batch mark tmp rowsets as recycled, instance_id="
6084
12
                             << instance_id_;
6085
12
                return;
6086
12
            }
6087
12
            if (!abort_keys_to_process.empty() &&
6088
12
                batch_abort_txn_or_job_for_recycle<RowsetMetaCloudPB>(abort_keys_to_process,
6089
12
                                                                      false) != 0) {
6090
12
                LOG(WARNING) << "failed to batch abort txn or job for releated rowset, instance_id="
6091
12
                             << instance_id_;
6092
12
                return;
6093
12
            }
6094
12
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
6095
12
                                   metrics_context) != 0) {
6096
12
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
6097
12
                return;
6098
12
            }
6099
12
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
6100
12
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
6101
12
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
6102
12
                                 << rs.ShortDebugString();
6103
12
                    return;
6104
12
                }
6105
12
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
6106
12
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
6107
12
                                 << rs.ShortDebugString();
6108
12
                    return;
6109
12
                }
6110
12
            }
6111
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
6112
12
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
6113
12
                return;
6114
12
            }
6115
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
6116
12
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
6117
12
                return;
6118
12
            }
6119
12
            num_recycled += tmp_rowset_keys_to_delete.size();
6120
12
            return;
6121
12
        });
6122
12
        return 0;
6123
12
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEv
Line
Count
Source
6063
20
    auto loop_done = [&]() -> int {
6064
20
        std::vector<std::string> tmp_rowset_keys_to_delete;
6065
20
        std::vector<std::string> tmp_rowset_ref_count_keys_to_delete;
6066
20
        std::vector<std::string> mark_keys_to_process;
6067
20
        std::vector<std::string> abort_keys_to_process;
6068
20
        std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets_to_delete;
6069
20
        tmp_rowset_keys_to_delete.swap(tmp_rowset_keys);
6070
20
        tmp_rowsets_to_delete.swap(tmp_rowsets);
6071
20
        tmp_rowset_ref_count_keys_to_delete.swap(tmp_rowset_ref_count_keys);
6072
20
        mark_keys_to_process.swap(tmp_rowset_keys_to_mark_recycled);
6073
20
        abort_keys_to_process.swap(tmp_rowset_keys_to_abort);
6074
20
        worker_pool->submit([&, tmp_rowset_keys_to_delete = std::move(tmp_rowset_keys_to_delete),
6075
20
                             tmp_rowsets_to_delete = std::move(tmp_rowsets_to_delete),
6076
20
                             tmp_rowset_ref_count_keys_to_delete =
6077
20
                                     std::move(tmp_rowset_ref_count_keys_to_delete),
6078
20
                             mark_keys_to_process = std::move(mark_keys_to_process),
6079
20
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
6080
20
            if (!mark_keys_to_process.empty() &&
6081
20
                batch_mark_rowsets_as_recycled<RowsetMetaCloudPB>(txn_kv_.get(), instance_id_,
6082
20
                                                                  mark_keys_to_process) != 0) {
6083
20
                LOG(WARNING) << "failed to batch mark tmp rowsets as recycled, instance_id="
6084
20
                             << instance_id_;
6085
20
                return;
6086
20
            }
6087
20
            if (!abort_keys_to_process.empty() &&
6088
20
                batch_abort_txn_or_job_for_recycle<RowsetMetaCloudPB>(abort_keys_to_process,
6089
20
                                                                      false) != 0) {
6090
20
                LOG(WARNING) << "failed to batch abort txn or job for releated rowset, instance_id="
6091
20
                             << instance_id_;
6092
20
                return;
6093
20
            }
6094
20
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
6095
20
                                   metrics_context) != 0) {
6096
20
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
6097
20
                return;
6098
20
            }
6099
20
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
6100
20
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
6101
20
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
6102
20
                                 << rs.ShortDebugString();
6103
20
                    return;
6104
20
                }
6105
20
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
6106
20
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
6107
20
                                 << rs.ShortDebugString();
6108
20
                    return;
6109
20
                }
6110
20
            }
6111
20
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
6112
20
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
6113
20
                return;
6114
20
            }
6115
20
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
6116
20
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
6117
20
                return;
6118
20
            }
6119
20
            num_recycled += tmp_rowset_keys_to_delete.size();
6120
20
            return;
6121
20
        });
6122
20
        return 0;
6123
20
    };
6124
6125
39
    if (config::enable_recycler_stats_metrics) {
6126
0
        scan_and_statistics_tmp_rowsets();
6127
0
    }
6128
    // recycle_func and loop_done for scan and recycle
6129
39
    int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_rowset_kv),
6130
39
                               std::move(loop_done));
6131
6132
39
    worker_pool->stop();
6133
6134
    // Report final metrics after all concurrent tasks completed
6135
39
    segment_metrics_context_.report();
6136
39
    metrics_context.report();
6137
6138
39
    return ret;
6139
39
}
6140
6141
int InstanceRecycler::scan_and_recycle(
6142
        std::string begin, std::string_view end,
6143
        std::function<int(std::string_view k, std::string_view v)> recycle_func,
6144
314
        std::function<int()> loop_done, std::function<bool(std::string*)> next_begin_getter) {
6145
314
    LOG(INFO) << "begin scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) << ")";
6146
314
    int ret = 0;
6147
314
    int64_t cnt = 0;
6148
314
    int get_range_retried = 0;
6149
314
    std::string err;
6150
314
    DORIS_CLOUD_DEFER_COPY(begin, end) {
6151
314
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
6152
314
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
6153
314
                  << " ret=" << ret << " err=" << err;
6154
314
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEESA_IFbPS7_EEENK3$_0clEv
Line
Count
Source
6150
31
    DORIS_CLOUD_DEFER_COPY(begin, end) {
6151
31
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
6152
31
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
6153
31
                  << " ret=" << ret << " err=" << err;
6154
31
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEESA_IFbPS7_EEENK3$_0clEv
Line
Count
Source
6150
283
    DORIS_CLOUD_DEFER_COPY(begin, end) {
6151
283
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
6152
283
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
6153
283
                  << " ret=" << ret << " err=" << err;
6154
283
    };
6155
6156
314
    std::unique_ptr<RangeGetIterator> it;
6157
523
    while (it == nullptr /* may be not init */ || (it->more() && !stopped())) {
6158
380
        if (get_range_retried > 1000) {
6159
0
            err = "txn_get exceeds max retry(1000), may not scan all keys";
6160
0
            ret = -3;
6161
0
            return ret;
6162
0
        }
6163
380
        int get_ret = txn_get(txn_kv_.get(), begin, end, it);
6164
380
        if (get_ret != 0) { // txn kv may complain "Request for future version"
6165
0
            LOG(WARNING) << "failed to get kv, range=[" << hex(begin) << "," << hex(end)
6166
0
                         << ") num_scanned=" << cnt << " txn_get_ret=" << get_ret
6167
0
                         << " get_range_retried=" << get_range_retried;
6168
0
            ++get_range_retried;
6169
0
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
6170
0
            continue; // try again
6171
0
        }
6172
380
        if (!it->has_next()) {
6173
171
            LOG(INFO) << "no keys in the given range=[" << hex(begin) << "," << hex(end) << ")";
6174
171
            break; // scan finished
6175
171
        }
6176
209
        bool begin_updated = false;
6177
209
        LOG(INFO) << "scan_and_recycle iterator key_range=[" << hex(begin) << "," << hex(end)
6178
209
                  << ") iterator->size()=" << it->size();
6179
154k
        while (it->has_next()) {
6180
154k
            ++cnt;
6181
            // recycle corresponding resources
6182
154k
            auto [k, v] = it->next();
6183
154k
            if (!it->has_next()) {
6184
198
                begin = k;
6185
198
                VLOG_DEBUG << "iterator has no more kvs. key=" << hex(k);
6186
198
            }
6187
            // FIXME(gavin): if we want to continue scanning, the recycle_func should not return non-zero
6188
154k
            if (recycle_func(k, v) != 0) {
6189
4.00k
                err = "recycle_func error";
6190
4.00k
                ret = -1;
6191
4.00k
            }
6192
154k
            if (next_begin_getter) {
6193
7.84k
                std::string next_begin;
6194
7.84k
                if (next_begin_getter(&next_begin)) {
6195
13
                    if (next_begin > k) {
6196
13
                        begin = std::move(next_begin);
6197
13
                        begin_updated = true;
6198
13
                        VLOG_DEBUG << "scan_and_recycle updates begin to " << hex(begin)
6199
0
                                   << " after key=" << hex(k);
6200
13
                        break;
6201
13
                    }
6202
0
                    LOG_WARNING("ignore invalid next begin in scan_and_recycle")
6203
0
                            .tag("next_begin", hex(next_begin))
6204
0
                            .tag("current_key", hex(k));
6205
0
                }
6206
7.84k
            }
6207
154k
        }
6208
209
        if (!begin_updated) {
6209
196
            begin.push_back('\x00'); // Update to next smallest key for iteration
6210
196
        } else {
6211
13
            it.reset();
6212
13
        }
6213
6214
        // FIXME(gavin): if we want to continue scanning, the loop_done should not return non-zero
6215
        // if we want to continue scanning, the recycle_func should not return non-zero
6216
209
        if (loop_done && loop_done() != 0) {
6217
5
            err = "loop_done error";
6218
5
            ret = -1;
6219
5
        }
6220
209
    }
6221
314
    return ret;
6222
314
}
6223
6224
19
int InstanceRecycler::abort_timeout_txn() {
6225
19
    const std::string task_name = "abort_timeout_txn";
6226
19
    int64_t num_scanned = 0;
6227
19
    int64_t num_timeout = 0;
6228
19
    int64_t num_abort = 0;
6229
19
    int64_t num_advance = 0;
6230
19
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
6231
6232
19
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
6233
19
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
6234
19
    std::string begin_txn_running_key;
6235
19
    std::string end_txn_running_key;
6236
19
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
6237
19
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
6238
6239
19
    LOG_WARNING("begin to abort timeout txn").tag("instance_id", instance_id_);
6240
6241
19
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6242
19
    register_recycle_task(task_name, start_time);
6243
6244
19
    DORIS_CLOUD_DEFER {
6245
19
        unregister_recycle_task(task_name);
6246
19
        int64_t cost =
6247
19
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6248
19
        metrics_context.finish_report();
6249
19
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
6250
19
                .tag("instance_id", instance_id_)
6251
19
                .tag("num_scanned", num_scanned)
6252
19
                .tag("num_timeout", num_timeout)
6253
19
                .tag("num_abort", num_abort)
6254
19
                .tag("num_advance", num_advance);
6255
19
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
6244
3
    DORIS_CLOUD_DEFER {
6245
3
        unregister_recycle_task(task_name);
6246
3
        int64_t cost =
6247
3
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6248
3
        metrics_context.finish_report();
6249
3
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
6250
3
                .tag("instance_id", instance_id_)
6251
3
                .tag("num_scanned", num_scanned)
6252
3
                .tag("num_timeout", num_timeout)
6253
3
                .tag("num_abort", num_abort)
6254
3
                .tag("num_advance", num_advance);
6255
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
6244
16
    DORIS_CLOUD_DEFER {
6245
16
        unregister_recycle_task(task_name);
6246
16
        int64_t cost =
6247
16
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6248
16
        metrics_context.finish_report();
6249
16
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
6250
16
                .tag("instance_id", instance_id_)
6251
16
                .tag("num_scanned", num_scanned)
6252
16
                .tag("num_timeout", num_timeout)
6253
16
                .tag("num_abort", num_abort)
6254
16
                .tag("num_advance", num_advance);
6255
16
    };
6256
6257
19
    int64_t current_time =
6258
19
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6259
6260
19
    auto handle_txn_running_kv = [&num_scanned, &num_timeout, &num_abort, &num_advance,
6261
19
                                  &current_time, &metrics_context,
6262
19
                                  this](std::string_view k, std::string_view v) -> int {
6263
9
        ++num_scanned;
6264
6265
9
        std::unique_ptr<Transaction> txn;
6266
9
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6267
9
        if (err != TxnErrorCode::TXN_OK) {
6268
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
6269
0
            return -1;
6270
0
        }
6271
9
        std::string_view k1 = k;
6272
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
6273
9
        k1.remove_prefix(1); // Remove key space
6274
9
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6275
9
        if (decode_key(&k1, &out) != 0) {
6276
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
6277
0
            return -1;
6278
0
        }
6279
9
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6280
9
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6281
9
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
6282
        // Update txn_info
6283
9
        std::string txn_inf_key, txn_inf_val;
6284
9
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
6285
9
        err = txn->get(txn_inf_key, &txn_inf_val);
6286
9
        if (err != TxnErrorCode::TXN_OK) {
6287
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
6288
0
            return -1;
6289
0
        }
6290
9
        TxnInfoPB txn_info;
6291
9
        if (!txn_info.ParseFromString(txn_inf_val)) {
6292
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
6293
0
            return -1;
6294
0
        }
6295
6296
9
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
6297
3
            txn.reset();
6298
3
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
6299
3
            std::shared_ptr<TxnLazyCommitTask> task =
6300
3
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
6301
3
            std::pair<MetaServiceCode, std::string> ret = task->wait();
6302
3
            if (ret.first != MetaServiceCode::OK) {
6303
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
6304
0
                             << "msg=" << ret.second;
6305
0
                return -1;
6306
0
            }
6307
3
            ++num_advance;
6308
3
            return 0;
6309
6
        } else {
6310
6
            TxnRunningPB txn_running_pb;
6311
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
6312
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
6313
0
                return -1;
6314
0
            }
6315
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
6316
4
                return 0;
6317
4
            }
6318
2
            ++num_timeout;
6319
6320
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
6321
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
6322
2
            txn_info.set_finish_time(current_time);
6323
2
            txn_info.set_reason("timeout");
6324
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
6325
2
            txn_inf_val.clear();
6326
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
6327
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
6328
0
                return -1;
6329
0
            }
6330
2
            txn->put(txn_inf_key, txn_inf_val);
6331
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
6332
            // Put recycle txn key
6333
2
            std::string recyc_txn_key, recyc_txn_val;
6334
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
6335
2
            RecycleTxnPB recycle_txn_pb;
6336
2
            recycle_txn_pb.set_creation_time(current_time);
6337
2
            recycle_txn_pb.set_label(txn_info.label());
6338
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
6339
0
                LOG_WARNING("failed to serialize txn recycle info")
6340
0
                        .tag("key", hex(k))
6341
0
                        .tag("db_id", db_id)
6342
0
                        .tag("txn_id", txn_id);
6343
0
                return -1;
6344
0
            }
6345
2
            txn->put(recyc_txn_key, recyc_txn_val);
6346
            // Remove txn running key
6347
2
            txn->remove(k);
6348
2
            err = txn->commit();
6349
2
            if (err != TxnErrorCode::TXN_OK) {
6350
0
                LOG_WARNING("failed to commit txn err={}", err)
6351
0
                        .tag("key", hex(k))
6352
0
                        .tag("db_id", db_id)
6353
0
                        .tag("txn_id", txn_id);
6354
0
                return -1;
6355
0
            }
6356
2
            metrics_context.total_recycled_num = ++num_abort;
6357
2
            metrics_context.report();
6358
2
        }
6359
6360
2
        return 0;
6361
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6262
3
                                  this](std::string_view k, std::string_view v) -> int {
6263
3
        ++num_scanned;
6264
6265
3
        std::unique_ptr<Transaction> txn;
6266
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6267
3
        if (err != TxnErrorCode::TXN_OK) {
6268
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
6269
0
            return -1;
6270
0
        }
6271
3
        std::string_view k1 = k;
6272
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
6273
3
        k1.remove_prefix(1); // Remove key space
6274
3
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6275
3
        if (decode_key(&k1, &out) != 0) {
6276
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
6277
0
            return -1;
6278
0
        }
6279
3
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6280
3
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6281
3
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
6282
        // Update txn_info
6283
3
        std::string txn_inf_key, txn_inf_val;
6284
3
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
6285
3
        err = txn->get(txn_inf_key, &txn_inf_val);
6286
3
        if (err != TxnErrorCode::TXN_OK) {
6287
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
6288
0
            return -1;
6289
0
        }
6290
3
        TxnInfoPB txn_info;
6291
3
        if (!txn_info.ParseFromString(txn_inf_val)) {
6292
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
6293
0
            return -1;
6294
0
        }
6295
6296
3
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
6297
3
            txn.reset();
6298
3
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
6299
3
            std::shared_ptr<TxnLazyCommitTask> task =
6300
3
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
6301
3
            std::pair<MetaServiceCode, std::string> ret = task->wait();
6302
3
            if (ret.first != MetaServiceCode::OK) {
6303
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
6304
0
                             << "msg=" << ret.second;
6305
0
                return -1;
6306
0
            }
6307
3
            ++num_advance;
6308
3
            return 0;
6309
3
        } else {
6310
0
            TxnRunningPB txn_running_pb;
6311
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
6312
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
6313
0
                return -1;
6314
0
            }
6315
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
6316
0
                return 0;
6317
0
            }
6318
0
            ++num_timeout;
6319
6320
0
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
6321
0
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
6322
0
            txn_info.set_finish_time(current_time);
6323
0
            txn_info.set_reason("timeout");
6324
0
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
6325
0
            txn_inf_val.clear();
6326
0
            if (!txn_info.SerializeToString(&txn_inf_val)) {
6327
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
6328
0
                return -1;
6329
0
            }
6330
0
            txn->put(txn_inf_key, txn_inf_val);
6331
0
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
6332
            // Put recycle txn key
6333
0
            std::string recyc_txn_key, recyc_txn_val;
6334
0
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
6335
0
            RecycleTxnPB recycle_txn_pb;
6336
0
            recycle_txn_pb.set_creation_time(current_time);
6337
0
            recycle_txn_pb.set_label(txn_info.label());
6338
0
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
6339
0
                LOG_WARNING("failed to serialize txn recycle info")
6340
0
                        .tag("key", hex(k))
6341
0
                        .tag("db_id", db_id)
6342
0
                        .tag("txn_id", txn_id);
6343
0
                return -1;
6344
0
            }
6345
0
            txn->put(recyc_txn_key, recyc_txn_val);
6346
            // Remove txn running key
6347
0
            txn->remove(k);
6348
0
            err = txn->commit();
6349
0
            if (err != TxnErrorCode::TXN_OK) {
6350
0
                LOG_WARNING("failed to commit txn err={}", err)
6351
0
                        .tag("key", hex(k))
6352
0
                        .tag("db_id", db_id)
6353
0
                        .tag("txn_id", txn_id);
6354
0
                return -1;
6355
0
            }
6356
0
            metrics_context.total_recycled_num = ++num_abort;
6357
0
            metrics_context.report();
6358
0
        }
6359
6360
0
        return 0;
6361
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6262
6
                                  this](std::string_view k, std::string_view v) -> int {
6263
6
        ++num_scanned;
6264
6265
6
        std::unique_ptr<Transaction> txn;
6266
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6267
6
        if (err != TxnErrorCode::TXN_OK) {
6268
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
6269
0
            return -1;
6270
0
        }
6271
6
        std::string_view k1 = k;
6272
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
6273
6
        k1.remove_prefix(1); // Remove key space
6274
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6275
6
        if (decode_key(&k1, &out) != 0) {
6276
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
6277
0
            return -1;
6278
0
        }
6279
6
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6280
6
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6281
6
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
6282
        // Update txn_info
6283
6
        std::string txn_inf_key, txn_inf_val;
6284
6
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
6285
6
        err = txn->get(txn_inf_key, &txn_inf_val);
6286
6
        if (err != TxnErrorCode::TXN_OK) {
6287
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
6288
0
            return -1;
6289
0
        }
6290
6
        TxnInfoPB txn_info;
6291
6
        if (!txn_info.ParseFromString(txn_inf_val)) {
6292
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
6293
0
            return -1;
6294
0
        }
6295
6296
6
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
6297
0
            txn.reset();
6298
0
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
6299
0
            std::shared_ptr<TxnLazyCommitTask> task =
6300
0
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
6301
0
            std::pair<MetaServiceCode, std::string> ret = task->wait();
6302
0
            if (ret.first != MetaServiceCode::OK) {
6303
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
6304
0
                             << "msg=" << ret.second;
6305
0
                return -1;
6306
0
            }
6307
0
            ++num_advance;
6308
0
            return 0;
6309
6
        } else {
6310
6
            TxnRunningPB txn_running_pb;
6311
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
6312
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
6313
0
                return -1;
6314
0
            }
6315
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
6316
4
                return 0;
6317
4
            }
6318
2
            ++num_timeout;
6319
6320
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
6321
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
6322
2
            txn_info.set_finish_time(current_time);
6323
2
            txn_info.set_reason("timeout");
6324
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
6325
2
            txn_inf_val.clear();
6326
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
6327
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
6328
0
                return -1;
6329
0
            }
6330
2
            txn->put(txn_inf_key, txn_inf_val);
6331
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
6332
            // Put recycle txn key
6333
2
            std::string recyc_txn_key, recyc_txn_val;
6334
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
6335
2
            RecycleTxnPB recycle_txn_pb;
6336
2
            recycle_txn_pb.set_creation_time(current_time);
6337
2
            recycle_txn_pb.set_label(txn_info.label());
6338
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
6339
0
                LOG_WARNING("failed to serialize txn recycle info")
6340
0
                        .tag("key", hex(k))
6341
0
                        .tag("db_id", db_id)
6342
0
                        .tag("txn_id", txn_id);
6343
0
                return -1;
6344
0
            }
6345
2
            txn->put(recyc_txn_key, recyc_txn_val);
6346
            // Remove txn running key
6347
2
            txn->remove(k);
6348
2
            err = txn->commit();
6349
2
            if (err != TxnErrorCode::TXN_OK) {
6350
0
                LOG_WARNING("failed to commit txn err={}", err)
6351
0
                        .tag("key", hex(k))
6352
0
                        .tag("db_id", db_id)
6353
0
                        .tag("txn_id", txn_id);
6354
0
                return -1;
6355
0
            }
6356
2
            metrics_context.total_recycled_num = ++num_abort;
6357
2
            metrics_context.report();
6358
2
        }
6359
6360
2
        return 0;
6361
6
    };
6362
6363
19
    if (config::enable_recycler_stats_metrics) {
6364
0
        scan_and_statistics_abort_timeout_txn();
6365
0
    }
6366
    // recycle_func and loop_done for scan and recycle
6367
19
    return scan_and_recycle(begin_txn_running_key, end_txn_running_key,
6368
19
                            std::move(handle_txn_running_kv));
6369
19
}
6370
6371
19
int InstanceRecycler::recycle_expired_txn_label() {
6372
19
    const std::string task_name = "recycle_expired_txn_label";
6373
19
    int64_t num_scanned = 0;
6374
19
    int64_t num_expired = 0;
6375
19
    std::atomic_long num_recycled = 0;
6376
19
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
6377
19
    int ret = 0;
6378
6379
19
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
6380
19
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
6381
19
    std::string begin_recycle_txn_key;
6382
19
    std::string end_recycle_txn_key;
6383
19
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
6384
19
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
6385
19
    std::vector<std::string> recycle_txn_info_keys;
6386
6387
19
    LOG_WARNING("begin to recycle expired txn").tag("instance_id", instance_id_);
6388
6389
19
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6390
19
    register_recycle_task(task_name, start_time);
6391
19
    DORIS_CLOUD_DEFER {
6392
19
        unregister_recycle_task(task_name);
6393
19
        int64_t cost =
6394
19
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6395
19
        metrics_context.finish_report();
6396
19
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
6397
19
                .tag("instance_id", instance_id_)
6398
19
                .tag("num_scanned", num_scanned)
6399
19
                .tag("num_expired", num_expired)
6400
19
                .tag("num_recycled", num_recycled);
6401
19
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
6391
1
    DORIS_CLOUD_DEFER {
6392
1
        unregister_recycle_task(task_name);
6393
1
        int64_t cost =
6394
1
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6395
1
        metrics_context.finish_report();
6396
1
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
6397
1
                .tag("instance_id", instance_id_)
6398
1
                .tag("num_scanned", num_scanned)
6399
1
                .tag("num_expired", num_expired)
6400
1
                .tag("num_recycled", num_recycled);
6401
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
6391
18
    DORIS_CLOUD_DEFER {
6392
18
        unregister_recycle_task(task_name);
6393
18
        int64_t cost =
6394
18
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6395
18
        metrics_context.finish_report();
6396
18
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
6397
18
                .tag("instance_id", instance_id_)
6398
18
                .tag("num_scanned", num_scanned)
6399
18
                .tag("num_expired", num_expired)
6400
18
                .tag("num_recycled", num_recycled);
6401
18
    };
6402
6403
19
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6404
6405
19
    SyncExecutor<int> concurrent_delete_executor(
6406
19
            _thread_pool_group.s3_producer_pool,
6407
19
            fmt::format("recycle expired txn label, instance id {}", instance_id_),
6408
23.0k
            [](const int& ret) { return ret != 0; });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
6408
1
            [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
6408
23.0k
            [](const int& ret) { return ret != 0; });
6409
6410
19
    int64_t current_time_ms =
6411
19
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6412
6413
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
6414
30.0k
        ++num_scanned;
6415
30.0k
        RecycleTxnPB recycle_txn_pb;
6416
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
6417
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
6418
0
            return -1;
6419
0
        }
6420
30.0k
        if ((config::force_immediate_recycle) ||
6421
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
6422
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
6423
30.0k
             current_time_ms)) {
6424
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
6425
23.0k
            num_expired++;
6426
23.0k
            recycle_txn_info_keys.emplace_back(k);
6427
23.0k
        }
6428
30.0k
        return 0;
6429
30.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6413
1
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
6414
1
        ++num_scanned;
6415
1
        RecycleTxnPB recycle_txn_pb;
6416
1
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
6417
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
6418
0
            return -1;
6419
0
        }
6420
1
        if ((config::force_immediate_recycle) ||
6421
1
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
6422
1
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
6423
1
             current_time_ms)) {
6424
1
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
6425
1
            num_expired++;
6426
1
            recycle_txn_info_keys.emplace_back(k);
6427
1
        }
6428
1
        return 0;
6429
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6413
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
6414
30.0k
        ++num_scanned;
6415
30.0k
        RecycleTxnPB recycle_txn_pb;
6416
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
6417
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
6418
0
            return -1;
6419
0
        }
6420
30.0k
        if ((config::force_immediate_recycle) ||
6421
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
6422
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
6423
30.0k
             current_time_ms)) {
6424
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
6425
23.0k
            num_expired++;
6426
23.0k
            recycle_txn_info_keys.emplace_back(k);
6427
23.0k
        }
6428
30.0k
        return 0;
6429
30.0k
    };
6430
6431
    // int 0 for success, 1 for conflict, -1 for error
6432
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
6433
23.0k
        std::string_view k1 = k;
6434
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
6435
23.0k
        k1.remove_prefix(1); // Remove key space
6436
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6437
23.0k
        int ret = decode_key(&k1, &out);
6438
23.0k
        if (ret != 0) {
6439
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
6440
0
            return -1;
6441
0
        }
6442
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6443
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6444
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
6445
23.0k
        std::unique_ptr<Transaction> txn;
6446
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6447
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6448
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
6449
0
            return -1;
6450
0
        }
6451
        // Remove txn index kv
6452
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
6453
23.0k
        txn->remove(index_key);
6454
        // Remove txn info kv
6455
23.0k
        std::string info_key, info_val;
6456
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
6457
23.0k
        err = txn->get(info_key, &info_val);
6458
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6459
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
6460
0
            return -1;
6461
0
        }
6462
23.0k
        TxnInfoPB txn_info;
6463
23.0k
        if (!txn_info.ParseFromString(info_val)) {
6464
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
6465
0
            return -1;
6466
0
        }
6467
23.0k
        txn->remove(info_key);
6468
        // Remove sub txn index kvs
6469
23.0k
        std::vector<std::string> sub_txn_index_keys;
6470
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
6471
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
6472
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
6473
22.9k
        }
6474
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
6475
22.9k
            txn->remove(sub_txn_index_key);
6476
22.9k
        }
6477
        // Update txn label
6478
23.0k
        std::string label_key, label_val;
6479
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
6480
23.0k
        err = txn->get(label_key, &label_val);
6481
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6482
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
6483
0
                         << " err=" << err;
6484
0
            return -1;
6485
0
        }
6486
23.0k
        TxnLabelPB txn_label;
6487
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
6488
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
6489
0
            return -1;
6490
0
        }
6491
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
6492
23.0k
        if (it != txn_label.txn_ids().end()) {
6493
23.0k
            txn_label.mutable_txn_ids()->erase(it);
6494
23.0k
        }
6495
23.0k
        if (txn_label.txn_ids().empty()) {
6496
23.0k
            txn->remove(label_key);
6497
23.0k
            TEST_SYNC_POINT_CALLBACK(
6498
23.0k
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
6499
23.0k
        } else {
6500
73
            if (!txn_label.SerializeToString(&label_val)) {
6501
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
6502
0
                return -1;
6503
0
            }
6504
73
            TEST_SYNC_POINT_CALLBACK(
6505
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
6506
73
            txn->atomic_set_ver_value(label_key, label_val);
6507
73
            TEST_SYNC_POINT_CALLBACK(
6508
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
6509
73
        }
6510
        // Remove recycle txn kv
6511
23.0k
        txn->remove(k);
6512
23.0k
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
6513
23.0k
        err = txn->commit();
6514
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6515
62
            if (err == TxnErrorCode::TXN_CONFLICT) {
6516
62
                TEST_SYNC_POINT_CALLBACK(
6517
62
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
6518
                // log the txn_id and label
6519
62
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
6520
62
                             << " txn_label_pb=" << txn_label.ShortDebugString()
6521
62
                             << " txn_label=" << txn_info.label();
6522
62
                return 1;
6523
62
            }
6524
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
6525
0
            return -1;
6526
62
        }
6527
23.0k
        ++num_recycled;
6528
6529
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
6530
23.0k
        return 0;
6531
23.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
6432
1
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
6433
1
        std::string_view k1 = k;
6434
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
6435
1
        k1.remove_prefix(1); // Remove key space
6436
1
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6437
1
        int ret = decode_key(&k1, &out);
6438
1
        if (ret != 0) {
6439
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
6440
0
            return -1;
6441
0
        }
6442
1
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6443
1
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6444
1
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
6445
1
        std::unique_ptr<Transaction> txn;
6446
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6447
1
        if (err != TxnErrorCode::TXN_OK) {
6448
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
6449
0
            return -1;
6450
0
        }
6451
        // Remove txn index kv
6452
1
        auto index_key = txn_index_key({instance_id_, txn_id});
6453
1
        txn->remove(index_key);
6454
        // Remove txn info kv
6455
1
        std::string info_key, info_val;
6456
1
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
6457
1
        err = txn->get(info_key, &info_val);
6458
1
        if (err != TxnErrorCode::TXN_OK) {
6459
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
6460
0
            return -1;
6461
0
        }
6462
1
        TxnInfoPB txn_info;
6463
1
        if (!txn_info.ParseFromString(info_val)) {
6464
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
6465
0
            return -1;
6466
0
        }
6467
1
        txn->remove(info_key);
6468
        // Remove sub txn index kvs
6469
1
        std::vector<std::string> sub_txn_index_keys;
6470
1
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
6471
0
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
6472
0
            sub_txn_index_keys.push_back(sub_txn_index_key);
6473
0
        }
6474
1
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
6475
0
            txn->remove(sub_txn_index_key);
6476
0
        }
6477
        // Update txn label
6478
1
        std::string label_key, label_val;
6479
1
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
6480
1
        err = txn->get(label_key, &label_val);
6481
1
        if (err != TxnErrorCode::TXN_OK) {
6482
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
6483
0
                         << " err=" << err;
6484
0
            return -1;
6485
0
        }
6486
1
        TxnLabelPB txn_label;
6487
1
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
6488
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
6489
0
            return -1;
6490
0
        }
6491
1
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
6492
1
        if (it != txn_label.txn_ids().end()) {
6493
1
            txn_label.mutable_txn_ids()->erase(it);
6494
1
        }
6495
1
        if (txn_label.txn_ids().empty()) {
6496
1
            txn->remove(label_key);
6497
1
            TEST_SYNC_POINT_CALLBACK(
6498
1
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
6499
1
        } else {
6500
0
            if (!txn_label.SerializeToString(&label_val)) {
6501
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
6502
0
                return -1;
6503
0
            }
6504
0
            TEST_SYNC_POINT_CALLBACK(
6505
0
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
6506
0
            txn->atomic_set_ver_value(label_key, label_val);
6507
0
            TEST_SYNC_POINT_CALLBACK(
6508
0
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
6509
0
        }
6510
        // Remove recycle txn kv
6511
1
        txn->remove(k);
6512
1
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
6513
1
        err = txn->commit();
6514
1
        if (err != TxnErrorCode::TXN_OK) {
6515
0
            if (err == TxnErrorCode::TXN_CONFLICT) {
6516
0
                TEST_SYNC_POINT_CALLBACK(
6517
0
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
6518
                // log the txn_id and label
6519
0
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
6520
0
                             << " txn_label_pb=" << txn_label.ShortDebugString()
6521
0
                             << " txn_label=" << txn_info.label();
6522
0
                return 1;
6523
0
            }
6524
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
6525
0
            return -1;
6526
0
        }
6527
1
        ++num_recycled;
6528
6529
1
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
6530
1
        return 0;
6531
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
6432
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
6433
23.0k
        std::string_view k1 = k;
6434
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
6435
23.0k
        k1.remove_prefix(1); // Remove key space
6436
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6437
23.0k
        int ret = decode_key(&k1, &out);
6438
23.0k
        if (ret != 0) {
6439
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
6440
0
            return -1;
6441
0
        }
6442
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6443
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6444
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
6445
23.0k
        std::unique_ptr<Transaction> txn;
6446
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6447
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6448
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
6449
0
            return -1;
6450
0
        }
6451
        // Remove txn index kv
6452
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
6453
23.0k
        txn->remove(index_key);
6454
        // Remove txn info kv
6455
23.0k
        std::string info_key, info_val;
6456
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
6457
23.0k
        err = txn->get(info_key, &info_val);
6458
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6459
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
6460
0
            return -1;
6461
0
        }
6462
23.0k
        TxnInfoPB txn_info;
6463
23.0k
        if (!txn_info.ParseFromString(info_val)) {
6464
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
6465
0
            return -1;
6466
0
        }
6467
23.0k
        txn->remove(info_key);
6468
        // Remove sub txn index kvs
6469
23.0k
        std::vector<std::string> sub_txn_index_keys;
6470
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
6471
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
6472
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
6473
22.9k
        }
6474
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
6475
22.9k
            txn->remove(sub_txn_index_key);
6476
22.9k
        }
6477
        // Update txn label
6478
23.0k
        std::string label_key, label_val;
6479
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
6480
23.0k
        err = txn->get(label_key, &label_val);
6481
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6482
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
6483
0
                         << " err=" << err;
6484
0
            return -1;
6485
0
        }
6486
23.0k
        TxnLabelPB txn_label;
6487
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
6488
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
6489
0
            return -1;
6490
0
        }
6491
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
6492
23.0k
        if (it != txn_label.txn_ids().end()) {
6493
23.0k
            txn_label.mutable_txn_ids()->erase(it);
6494
23.0k
        }
6495
23.0k
        if (txn_label.txn_ids().empty()) {
6496
23.0k
            txn->remove(label_key);
6497
23.0k
            TEST_SYNC_POINT_CALLBACK(
6498
23.0k
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
6499
23.0k
        } else {
6500
73
            if (!txn_label.SerializeToString(&label_val)) {
6501
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
6502
0
                return -1;
6503
0
            }
6504
73
            TEST_SYNC_POINT_CALLBACK(
6505
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
6506
73
            txn->atomic_set_ver_value(label_key, label_val);
6507
73
            TEST_SYNC_POINT_CALLBACK(
6508
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
6509
73
        }
6510
        // Remove recycle txn kv
6511
23.0k
        txn->remove(k);
6512
23.0k
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
6513
23.0k
        err = txn->commit();
6514
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6515
62
            if (err == TxnErrorCode::TXN_CONFLICT) {
6516
62
                TEST_SYNC_POINT_CALLBACK(
6517
62
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
6518
                // log the txn_id and label
6519
62
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
6520
62
                             << " txn_label_pb=" << txn_label.ShortDebugString()
6521
62
                             << " txn_label=" << txn_info.label();
6522
62
                return 1;
6523
62
            }
6524
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
6525
0
            return -1;
6526
62
        }
6527
23.0k
        ++num_recycled;
6528
6529
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
6530
23.0k
        return 0;
6531
23.0k
    };
6532
6533
19
    auto loop_done = [&]() -> int {
6534
10
        DORIS_CLOUD_DEFER {
6535
10
            recycle_txn_info_keys.clear();
6536
10
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
6534
1
        DORIS_CLOUD_DEFER {
6535
1
            recycle_txn_info_keys.clear();
6536
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
6534
9
        DORIS_CLOUD_DEFER {
6535
9
            recycle_txn_info_keys.clear();
6536
9
        };
6537
10
        TEST_SYNC_POINT_CALLBACK(
6538
10
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
6539
10
                &recycle_txn_info_keys);
6540
23.0k
        for (const auto& k : recycle_txn_info_keys) {
6541
23.0k
            concurrent_delete_executor.add([&]() {
6542
23.0k
                int ret = delete_recycle_txn_kv(k);
6543
23.0k
                if (ret == 1) {
6544
18
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6545
54
                    for (int i = 1; i <= max_retry; ++i) {
6546
54
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6547
54
                        ret = delete_recycle_txn_kv(k);
6548
                        // clang-format off
6549
54
                        TEST_SYNC_POINT_CALLBACK(
6550
54
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6551
                        // clang-format off
6552
54
                        if (ret != 1) {
6553
18
                            break;
6554
18
                        }
6555
                        // random sleep 0-100 ms to retry
6556
36
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6557
36
                    }
6558
18
                }
6559
23.0k
                if (ret != 0) {
6560
9
                    LOG_WARNING("failed to delete recycle txn kv")
6561
9
                            .tag("instance id", instance_id_)
6562
9
                            .tag("key", hex(k));
6563
9
                    return -1;
6564
9
                }
6565
23.0k
                return 0;
6566
23.0k
            });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
6541
1
            concurrent_delete_executor.add([&]() {
6542
1
                int ret = delete_recycle_txn_kv(k);
6543
1
                if (ret == 1) {
6544
0
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6545
0
                    for (int i = 1; i <= max_retry; ++i) {
6546
0
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6547
0
                        ret = delete_recycle_txn_kv(k);
6548
                        // clang-format off
6549
0
                        TEST_SYNC_POINT_CALLBACK(
6550
0
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6551
                        // clang-format off
6552
0
                        if (ret != 1) {
6553
0
                            break;
6554
0
                        }
6555
                        // random sleep 0-100 ms to retry
6556
0
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6557
0
                    }
6558
0
                }
6559
1
                if (ret != 0) {
6560
0
                    LOG_WARNING("failed to delete recycle txn kv")
6561
0
                            .tag("instance id", instance_id_)
6562
0
                            .tag("key", hex(k));
6563
0
                    return -1;
6564
0
                }
6565
1
                return 0;
6566
1
            });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
6541
23.0k
            concurrent_delete_executor.add([&]() {
6542
23.0k
                int ret = delete_recycle_txn_kv(k);
6543
23.0k
                if (ret == 1) {
6544
18
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6545
54
                    for (int i = 1; i <= max_retry; ++i) {
6546
54
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6547
54
                        ret = delete_recycle_txn_kv(k);
6548
                        // clang-format off
6549
54
                        TEST_SYNC_POINT_CALLBACK(
6550
54
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6551
                        // clang-format off
6552
54
                        if (ret != 1) {
6553
18
                            break;
6554
18
                        }
6555
                        // random sleep 0-100 ms to retry
6556
36
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6557
36
                    }
6558
18
                }
6559
23.0k
                if (ret != 0) {
6560
9
                    LOG_WARNING("failed to delete recycle txn kv")
6561
9
                            .tag("instance id", instance_id_)
6562
9
                            .tag("key", hex(k));
6563
9
                    return -1;
6564
9
                }
6565
23.0k
                return 0;
6566
23.0k
            });
6567
23.0k
        }
6568
10
        bool finished = true;
6569
10
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
6570
23.0k
        for (int r : rets) {
6571
23.0k
            if (r != 0) {
6572
9
                ret = -1;
6573
9
            }
6574
23.0k
        }
6575
6576
10
        ret = finished ? ret : -1;
6577
6578
        // Update metrics after all concurrent tasks completed
6579
10
        metrics_context.total_recycled_num = num_recycled.load();
6580
10
        metrics_context.report();
6581
6582
10
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
6583
6584
10
        if (ret != 0) {
6585
3
            LOG_WARNING("recycle txn kv ret!=0")
6586
3
                    .tag("finished", finished)
6587
3
                    .tag("ret", ret)
6588
3
                    .tag("instance_id", instance_id_);
6589
3
            return ret;
6590
3
        }
6591
7
        return ret;
6592
10
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
6533
1
    auto loop_done = [&]() -> int {
6534
1
        DORIS_CLOUD_DEFER {
6535
1
            recycle_txn_info_keys.clear();
6536
1
        };
6537
1
        TEST_SYNC_POINT_CALLBACK(
6538
1
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
6539
1
                &recycle_txn_info_keys);
6540
1
        for (const auto& k : recycle_txn_info_keys) {
6541
1
            concurrent_delete_executor.add([&]() {
6542
1
                int ret = delete_recycle_txn_kv(k);
6543
1
                if (ret == 1) {
6544
1
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6545
1
                    for (int i = 1; i <= max_retry; ++i) {
6546
1
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6547
1
                        ret = delete_recycle_txn_kv(k);
6548
                        // clang-format off
6549
1
                        TEST_SYNC_POINT_CALLBACK(
6550
1
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6551
                        // clang-format off
6552
1
                        if (ret != 1) {
6553
1
                            break;
6554
1
                        }
6555
                        // random sleep 0-100 ms to retry
6556
1
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6557
1
                    }
6558
1
                }
6559
1
                if (ret != 0) {
6560
1
                    LOG_WARNING("failed to delete recycle txn kv")
6561
1
                            .tag("instance id", instance_id_)
6562
1
                            .tag("key", hex(k));
6563
1
                    return -1;
6564
1
                }
6565
1
                return 0;
6566
1
            });
6567
1
        }
6568
1
        bool finished = true;
6569
1
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
6570
1
        for (int r : rets) {
6571
1
            if (r != 0) {
6572
0
                ret = -1;
6573
0
            }
6574
1
        }
6575
6576
1
        ret = finished ? ret : -1;
6577
6578
        // Update metrics after all concurrent tasks completed
6579
1
        metrics_context.total_recycled_num = num_recycled.load();
6580
1
        metrics_context.report();
6581
6582
1
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
6583
6584
1
        if (ret != 0) {
6585
0
            LOG_WARNING("recycle txn kv ret!=0")
6586
0
                    .tag("finished", finished)
6587
0
                    .tag("ret", ret)
6588
0
                    .tag("instance_id", instance_id_);
6589
0
            return ret;
6590
0
        }
6591
1
        return ret;
6592
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
6533
9
    auto loop_done = [&]() -> int {
6534
9
        DORIS_CLOUD_DEFER {
6535
9
            recycle_txn_info_keys.clear();
6536
9
        };
6537
9
        TEST_SYNC_POINT_CALLBACK(
6538
9
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
6539
9
                &recycle_txn_info_keys);
6540
23.0k
        for (const auto& k : recycle_txn_info_keys) {
6541
23.0k
            concurrent_delete_executor.add([&]() {
6542
23.0k
                int ret = delete_recycle_txn_kv(k);
6543
23.0k
                if (ret == 1) {
6544
23.0k
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6545
23.0k
                    for (int i = 1; i <= max_retry; ++i) {
6546
23.0k
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6547
23.0k
                        ret = delete_recycle_txn_kv(k);
6548
                        // clang-format off
6549
23.0k
                        TEST_SYNC_POINT_CALLBACK(
6550
23.0k
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6551
                        // clang-format off
6552
23.0k
                        if (ret != 1) {
6553
23.0k
                            break;
6554
23.0k
                        }
6555
                        // random sleep 0-100 ms to retry
6556
23.0k
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6557
23.0k
                    }
6558
23.0k
                }
6559
23.0k
                if (ret != 0) {
6560
23.0k
                    LOG_WARNING("failed to delete recycle txn kv")
6561
23.0k
                            .tag("instance id", instance_id_)
6562
23.0k
                            .tag("key", hex(k));
6563
23.0k
                    return -1;
6564
23.0k
                }
6565
23.0k
                return 0;
6566
23.0k
            });
6567
23.0k
        }
6568
9
        bool finished = true;
6569
9
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
6570
23.0k
        for (int r : rets) {
6571
23.0k
            if (r != 0) {
6572
9
                ret = -1;
6573
9
            }
6574
23.0k
        }
6575
6576
9
        ret = finished ? ret : -1;
6577
6578
        // Update metrics after all concurrent tasks completed
6579
9
        metrics_context.total_recycled_num = num_recycled.load();
6580
9
        metrics_context.report();
6581
6582
9
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
6583
6584
9
        if (ret != 0) {
6585
3
            LOG_WARNING("recycle txn kv ret!=0")
6586
3
                    .tag("finished", finished)
6587
3
                    .tag("ret", ret)
6588
3
                    .tag("instance_id", instance_id_);
6589
3
            return ret;
6590
3
        }
6591
6
        return ret;
6592
9
    };
6593
6594
19
    if (config::enable_recycler_stats_metrics) {
6595
0
        scan_and_statistics_expired_txn_label();
6596
0
    }
6597
    // recycle_func and loop_done for scan and recycle
6598
19
    return scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key,
6599
19
                            std::move(handle_recycle_txn_kv), std::move(loop_done));
6600
19
}
6601
6602
struct CopyJobIdTuple {
6603
    std::string instance_id;
6604
    std::string stage_id;
6605
    long table_id;
6606
    std::string copy_id;
6607
    std::string stage_path;
6608
};
6609
struct BatchObjStoreAccessor {
6610
    BatchObjStoreAccessor(std::shared_ptr<StorageVaultAccessor> accessor, uint64_t& batch_count,
6611
                          TxnKv* txn_kv)
6612
3
            : accessor_(std::move(accessor)), batch_count_(batch_count), txn_kv_(txn_kv) {};
6613
3
    ~BatchObjStoreAccessor() {
6614
3
        if (!paths_.empty()) {
6615
3
            consume();
6616
3
        }
6617
3
    }
6618
6619
    /**
6620
    * To implicitely do batch work and submit the batch delete task to s3
6621
    * The s3 delete opreations would be done in batches, and then delete CopyJobPB key one by one
6622
    *
6623
    * @param copy_job The protubuf struct consists of the copy job files.
6624
    * @param key The copy job's key on fdb, the key is originally occupied by fdb range iterator, to make sure
6625
    *            it would last until we finish the delete task, here we need pass one string value
6626
    * @param cope_job_id_tuple One tuple {log_trace instance_id, stage_id, table_id, query_id, stage_path} to print log
6627
    */
6628
5
    void add(CopyJobPB copy_job, std::string key, const CopyJobIdTuple cope_job_id_tuple) {
6629
5
        auto& [instance_id, stage_id, table_id, copy_id, path] = cope_job_id_tuple;
6630
5
        auto& file_keys = copy_file_keys_[key];
6631
5
        file_keys.log_trace =
6632
5
                fmt::format("instance_id={}, stage_id={}, table_id={}, query_id={}, path={}",
6633
5
                            instance_id, stage_id, table_id, copy_id, path);
6634
5
        std::string_view log_trace = file_keys.log_trace;
6635
2.03k
        for (const auto& file : copy_job.object_files()) {
6636
2.03k
            auto relative_path = file.relative_path();
6637
2.03k
            paths_.push_back(relative_path);
6638
2.03k
            file_keys.keys.push_back(copy_file_key(
6639
2.03k
                    {instance_id, stage_id, table_id, file.relative_path(), file.etag()}));
6640
2.03k
            LOG_INFO(log_trace)
6641
2.03k
                    .tag("relative_path", relative_path)
6642
2.03k
                    .tag("batch_count", batch_count_);
6643
2.03k
        }
6644
5
        LOG_INFO(log_trace)
6645
5
                .tag("objects_num", copy_job.object_files().size())
6646
5
                .tag("batch_count", batch_count_);
6647
        // TODO(AlexYue): If the size is 1001, it would be one delete with 1000 objects and one delete request with only one object(**ATTN**: DOESN'T
6648
        // recommend using delete objects when objects num is less than 10)
6649
5
        if (paths_.size() < 1000) {
6650
3
            return;
6651
3
        }
6652
2
        consume();
6653
2
    }
6654
6655
private:
6656
5
    void consume() {
6657
5
        DORIS_CLOUD_DEFER {
6658
5
            paths_.clear();
6659
5
            copy_file_keys_.clear();
6660
5
            batch_count_++;
6661
6662
5
            LOG_WARNING("begin to delete {} internal stage objects in batch {}", paths_.size(),
6663
5
                        batch_count_);
6664
5
        };
6665
6666
5
        StopWatch sw;
6667
        // TODO(yuejing): 在accessor的delete_objets的实现里可以考虑如果_paths数量不超过10个的话,就直接发10个delete objection operation而不是发post
6668
5
        if (0 != accessor_->delete_files(paths_)) {
6669
2
            LOG_WARNING("failed to delete {} internal stage objects in batch {} and it takes {} us",
6670
2
                        paths_.size(), batch_count_, sw.elapsed_us());
6671
2
            return;
6672
2
        }
6673
3
        LOG_WARNING("succeed to delete {} internal stage objects in batch {} and it takes {} us",
6674
3
                    paths_.size(), batch_count_, sw.elapsed_us());
6675
        // delete fdb's keys
6676
3
        for (auto& file_keys : copy_file_keys_) {
6677
3
            auto& [log_trace, keys] = file_keys.second;
6678
3
            std::unique_ptr<Transaction> txn;
6679
3
            if (txn_kv_->create_txn(&txn) != cloud::TxnErrorCode::TXN_OK) {
6680
0
                LOG(WARNING) << "failed to create txn";
6681
0
                continue;
6682
0
            }
6683
            // FIXME: We have already limited the file num and file meta size when selecting file in FE.
6684
            // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
6685
            // limited, should not cause the txn commit failed.
6686
1.02k
            for (const auto& key : keys) {
6687
1.02k
                txn->remove(key);
6688
1.02k
                LOG_INFO("remove copy_file_key={}, {}", hex(key), log_trace);
6689
1.02k
            }
6690
3
            txn->remove(file_keys.first);
6691
3
            if (auto ret = txn->commit(); ret != cloud::TxnErrorCode::TXN_OK) {
6692
0
                LOG(WARNING) << "failed to commit txn ret is " << ret;
6693
0
                continue;
6694
0
            }
6695
3
        }
6696
3
    }
6697
    std::shared_ptr<StorageVaultAccessor> accessor_;
6698
    // the path of the s3 files to be deleted
6699
    std::vector<std::string> paths_;
6700
    struct CopyFiles {
6701
        std::string log_trace;
6702
        std::vector<std::string> keys;
6703
    };
6704
    // pair<std::string, std::vector<std::string>>
6705
    // first: instance_id_ stage_id table_id query_id
6706
    // second: keys to be deleted
6707
    // <fdb key, <{instance_id_ stage_id table_id query_id}, file keys to be deleted>>
6708
    std::unordered_map<std::string, CopyFiles> copy_file_keys_;
6709
    // used to distinguish different batch tasks, the task log consists of thread ID and batch number
6710
    // which can together uniquely identifies different tasks for tracing log
6711
    uint64_t& batch_count_;
6712
    TxnKv* txn_kv_;
6713
};
6714
6715
13
int InstanceRecycler::recycle_copy_jobs() {
6716
13
    int64_t num_scanned = 0;
6717
13
    int64_t num_finished = 0;
6718
13
    int64_t num_expired = 0;
6719
13
    int64_t num_recycled = 0;
6720
    // Used for INTERNAL stage's copy jobs to tag each batch for log trace
6721
13
    uint64_t batch_count = 0;
6722
13
    const std::string task_name = "recycle_copy_jobs";
6723
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
6724
6725
13
    LOG_WARNING("begin to recycle copy jobs").tag("instance_id", instance_id_);
6726
6727
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6728
13
    register_recycle_task(task_name, start_time);
6729
6730
13
    DORIS_CLOUD_DEFER {
6731
13
        unregister_recycle_task(task_name);
6732
13
        int64_t cost =
6733
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6734
13
        metrics_context.finish_report();
6735
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
6736
13
                .tag("instance_id", instance_id_)
6737
13
                .tag("num_scanned", num_scanned)
6738
13
                .tag("num_finished", num_finished)
6739
13
                .tag("num_expired", num_expired)
6740
13
                .tag("num_recycled", num_recycled);
6741
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
Line
Count
Source
6730
13
    DORIS_CLOUD_DEFER {
6731
13
        unregister_recycle_task(task_name);
6732
13
        int64_t cost =
6733
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6734
13
        metrics_context.finish_report();
6735
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
6736
13
                .tag("instance_id", instance_id_)
6737
13
                .tag("num_scanned", num_scanned)
6738
13
                .tag("num_finished", num_finished)
6739
13
                .tag("num_expired", num_expired)
6740
13
                .tag("num_recycled", num_recycled);
6741
13
    };
6742
6743
13
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
6744
13
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
6745
13
    std::string key0;
6746
13
    std::string key1;
6747
13
    copy_job_key(key_info0, &key0);
6748
13
    copy_job_key(key_info1, &key1);
6749
13
    std::unordered_map<std::string, std::shared_ptr<BatchObjStoreAccessor>> stage_accessor_map;
6750
13
    auto recycle_func = [&start_time, &num_scanned, &num_finished, &num_expired, &num_recycled,
6751
13
                         &batch_count, &stage_accessor_map, &task_name, &metrics_context,
6752
16
                         this](std::string_view k, std::string_view v) -> int {
6753
16
        ++num_scanned;
6754
16
        CopyJobPB copy_job;
6755
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
6756
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
6757
0
            return -1;
6758
0
        }
6759
6760
        // decode copy job key
6761
16
        auto k1 = k;
6762
16
        k1.remove_prefix(1);
6763
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6764
16
        decode_key(&k1, &out);
6765
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
6766
        // -> CopyJobPB
6767
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
6768
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
6769
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
6770
6771
16
        bool check_storage = true;
6772
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
6773
12
            ++num_finished;
6774
6775
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
6776
7
                auto it = stage_accessor_map.find(stage_id);
6777
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
6778
7
                std::string_view path;
6779
7
                if (it != stage_accessor_map.end()) {
6780
2
                    accessor = it->second;
6781
5
                } else {
6782
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
6783
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
6784
5
                                                      &inner_accessor);
6785
5
                    if (ret < 0) { // error
6786
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
6787
0
                        return -1;
6788
5
                    } else if (ret == 0) {
6789
3
                        path = inner_accessor->uri();
6790
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
6791
3
                                inner_accessor, batch_count, txn_kv_.get());
6792
3
                        stage_accessor_map.emplace(stage_id, accessor);
6793
3
                    } else { // stage not found, skip check storage
6794
2
                        check_storage = false;
6795
2
                    }
6796
5
                }
6797
7
                if (check_storage) {
6798
                    // TODO delete objects with key and etag is not supported
6799
5
                    accessor->add(std::move(copy_job), std::string(k),
6800
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
6801
5
                    return 0;
6802
5
                }
6803
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
6804
5
                int64_t current_time =
6805
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6806
5
                if (copy_job.finish_time_ms() > 0) {
6807
2
                    if (!config::force_immediate_recycle &&
6808
2
                        current_time < copy_job.finish_time_ms() +
6809
2
                                               config::copy_job_max_retention_second * 1000) {
6810
1
                        return 0;
6811
1
                    }
6812
3
                } else {
6813
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
6814
3
                    if (!config::force_immediate_recycle &&
6815
3
                        current_time < copy_job.start_time_ms() +
6816
3
                                               config::copy_job_max_retention_second * 1000) {
6817
1
                        return 0;
6818
1
                    }
6819
3
                }
6820
5
            }
6821
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
6822
4
            int64_t current_time =
6823
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6824
            // if copy job is timeout: delete all copy file kvs and copy job kv
6825
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
6826
2
                return 0;
6827
2
            }
6828
2
            ++num_expired;
6829
2
        }
6830
6831
        // delete all copy files
6832
7
        std::vector<std::string> copy_file_keys;
6833
70
        for (auto& file : copy_job.object_files()) {
6834
70
            copy_file_keys.push_back(copy_file_key(
6835
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
6836
70
        }
6837
7
        std::unique_ptr<Transaction> txn;
6838
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
6839
0
            LOG(WARNING) << "failed to create txn";
6840
0
            return -1;
6841
0
        }
6842
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
6843
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
6844
        // limited, should not cause the txn commit failed.
6845
70
        for (const auto& key : copy_file_keys) {
6846
70
            txn->remove(key);
6847
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
6848
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
6849
70
                      << ", query_id=" << copy_id;
6850
70
        }
6851
7
        txn->remove(k);
6852
7
        TxnErrorCode err = txn->commit();
6853
7
        if (err != TxnErrorCode::TXN_OK) {
6854
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
6855
0
            return -1;
6856
0
        }
6857
6858
7
        metrics_context.total_recycled_num = ++num_recycled;
6859
7
        metrics_context.report();
6860
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
6861
7
        return 0;
6862
7
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6752
16
                         this](std::string_view k, std::string_view v) -> int {
6753
16
        ++num_scanned;
6754
16
        CopyJobPB copy_job;
6755
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
6756
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
6757
0
            return -1;
6758
0
        }
6759
6760
        // decode copy job key
6761
16
        auto k1 = k;
6762
16
        k1.remove_prefix(1);
6763
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6764
16
        decode_key(&k1, &out);
6765
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
6766
        // -> CopyJobPB
6767
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
6768
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
6769
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
6770
6771
16
        bool check_storage = true;
6772
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
6773
12
            ++num_finished;
6774
6775
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
6776
7
                auto it = stage_accessor_map.find(stage_id);
6777
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
6778
7
                std::string_view path;
6779
7
                if (it != stage_accessor_map.end()) {
6780
2
                    accessor = it->second;
6781
5
                } else {
6782
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
6783
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
6784
5
                                                      &inner_accessor);
6785
5
                    if (ret < 0) { // error
6786
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
6787
0
                        return -1;
6788
5
                    } else if (ret == 0) {
6789
3
                        path = inner_accessor->uri();
6790
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
6791
3
                                inner_accessor, batch_count, txn_kv_.get());
6792
3
                        stage_accessor_map.emplace(stage_id, accessor);
6793
3
                    } else { // stage not found, skip check storage
6794
2
                        check_storage = false;
6795
2
                    }
6796
5
                }
6797
7
                if (check_storage) {
6798
                    // TODO delete objects with key and etag is not supported
6799
5
                    accessor->add(std::move(copy_job), std::string(k),
6800
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
6801
5
                    return 0;
6802
5
                }
6803
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
6804
5
                int64_t current_time =
6805
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6806
5
                if (copy_job.finish_time_ms() > 0) {
6807
2
                    if (!config::force_immediate_recycle &&
6808
2
                        current_time < copy_job.finish_time_ms() +
6809
2
                                               config::copy_job_max_retention_second * 1000) {
6810
1
                        return 0;
6811
1
                    }
6812
3
                } else {
6813
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
6814
3
                    if (!config::force_immediate_recycle &&
6815
3
                        current_time < copy_job.start_time_ms() +
6816
3
                                               config::copy_job_max_retention_second * 1000) {
6817
1
                        return 0;
6818
1
                    }
6819
3
                }
6820
5
            }
6821
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
6822
4
            int64_t current_time =
6823
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6824
            // if copy job is timeout: delete all copy file kvs and copy job kv
6825
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
6826
2
                return 0;
6827
2
            }
6828
2
            ++num_expired;
6829
2
        }
6830
6831
        // delete all copy files
6832
7
        std::vector<std::string> copy_file_keys;
6833
70
        for (auto& file : copy_job.object_files()) {
6834
70
            copy_file_keys.push_back(copy_file_key(
6835
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
6836
70
        }
6837
7
        std::unique_ptr<Transaction> txn;
6838
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
6839
0
            LOG(WARNING) << "failed to create txn";
6840
0
            return -1;
6841
0
        }
6842
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
6843
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
6844
        // limited, should not cause the txn commit failed.
6845
70
        for (const auto& key : copy_file_keys) {
6846
70
            txn->remove(key);
6847
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
6848
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
6849
70
                      << ", query_id=" << copy_id;
6850
70
        }
6851
7
        txn->remove(k);
6852
7
        TxnErrorCode err = txn->commit();
6853
7
        if (err != TxnErrorCode::TXN_OK) {
6854
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
6855
0
            return -1;
6856
0
        }
6857
6858
7
        metrics_context.total_recycled_num = ++num_recycled;
6859
7
        metrics_context.report();
6860
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
6861
7
        return 0;
6862
7
    };
6863
6864
13
    if (config::enable_recycler_stats_metrics) {
6865
0
        scan_and_statistics_copy_jobs();
6866
0
    }
6867
    // recycle_func and loop_done for scan and recycle
6868
13
    return scan_and_recycle(key0, key1, std::move(recycle_func));
6869
13
}
6870
6871
int InstanceRecycler::init_copy_job_accessor(const std::string& stage_id,
6872
                                             const StagePB::StageType& stage_type,
6873
5
                                             std::shared_ptr<StorageVaultAccessor>* accessor) {
6874
5
#ifdef UNIT_TEST
6875
    // In unit test, external use the same accessor as the internal stage
6876
5
    auto it = accessor_map_.find(stage_id);
6877
5
    if (it != accessor_map_.end()) {
6878
3
        *accessor = it->second;
6879
3
    } else {
6880
2
        std::cout << "UT can not find accessor with stage_id: " << stage_id << std::endl;
6881
2
        return 1;
6882
2
    }
6883
#else
6884
    // init s3 accessor and add to accessor map
6885
    auto stage_it =
6886
            std::find_if(instance_info_.stages().begin(), instance_info_.stages().end(),
6887
                         [&stage_id](auto&& stage) { return stage.stage_id() == stage_id; });
6888
6889
    if (stage_it == instance_info_.stages().end()) {
6890
        LOG(INFO) << "Recycle nonexisted stage copy jobs. instance_id=" << instance_id_
6891
                  << ", stage_id=" << stage_id << ", stage_type=" << stage_type;
6892
        return 1;
6893
    }
6894
6895
    const auto& object_store_info = stage_it->obj_info();
6896
    auto stage_access_type = stage_it->has_access_type() ? stage_it->access_type() : StagePB::AKSK;
6897
6898
    S3Conf s3_conf;
6899
    if (stage_type == StagePB::EXTERNAL) {
6900
        if (stage_access_type == StagePB::AKSK) {
6901
            auto conf = S3Conf::from_obj_store_info(object_store_info);
6902
            if (!conf) {
6903
                return -1;
6904
            }
6905
6906
            s3_conf = std::move(*conf);
6907
        } else if (stage_access_type == StagePB::BUCKET_ACL) {
6908
            auto conf = S3Conf::from_obj_store_info(object_store_info, true /* skip_aksk */);
6909
            if (!conf) {
6910
                return -1;
6911
            }
6912
6913
            s3_conf = std::move(*conf);
6914
            if (instance_info_.ram_user().has_encryption_info()) {
6915
                AkSkPair plain_ak_sk_pair;
6916
                int ret = decrypt_ak_sk_helper(
6917
                        instance_info_.ram_user().ak(), instance_info_.ram_user().sk(),
6918
                        instance_info_.ram_user().encryption_info(), &plain_ak_sk_pair);
6919
                if (ret != 0) {
6920
                    LOG(WARNING) << "fail to decrypt ak sk. instance_id: " << instance_id_
6921
                                 << " ram_user: " << proto_to_json(instance_info_.ram_user());
6922
                    return -1;
6923
                }
6924
                s3_conf.ak = std::move(plain_ak_sk_pair.first);
6925
                s3_conf.sk = std::move(plain_ak_sk_pair.second);
6926
            } else {
6927
                s3_conf.ak = instance_info_.ram_user().ak();
6928
                s3_conf.sk = instance_info_.ram_user().sk();
6929
            }
6930
        } else {
6931
            LOG(INFO) << "Unsupported stage access type=" << stage_access_type
6932
                      << ", instance_id=" << instance_id_ << ", stage_id=" << stage_id;
6933
            return -1;
6934
        }
6935
    } else if (stage_type == StagePB::INTERNAL) {
6936
        int idx = stoi(object_store_info.id());
6937
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6938
            LOG(WARNING) << "invalid idx: " << idx;
6939
            return -1;
6940
        }
6941
6942
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
6943
        auto conf = S3Conf::from_obj_store_info(old_obj);
6944
        if (!conf) {
6945
            return -1;
6946
        }
6947
6948
        s3_conf = std::move(*conf);
6949
        s3_conf.prefix = object_store_info.prefix();
6950
    } else {
6951
        LOG(WARNING) << "unknown stage type " << stage_type;
6952
        return -1;
6953
    }
6954
6955
    std::shared_ptr<S3Accessor> s3_accessor;
6956
    int ret = S3Accessor::create(std::move(s3_conf), &s3_accessor);
6957
    if (ret != 0) {
6958
        LOG(WARNING) << "failed to init s3 accessor ret=" << ret;
6959
        return -1;
6960
    }
6961
6962
    *accessor = std::move(s3_accessor);
6963
#endif
6964
3
    return 0;
6965
5
}
6966
6967
11
int InstanceRecycler::recycle_stage() {
6968
11
    int64_t num_scanned = 0;
6969
11
    int64_t num_recycled = 0;
6970
11
    const std::string task_name = "recycle_stage";
6971
11
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
6972
6973
11
    LOG_WARNING("begin to recycle stage").tag("instance_id", instance_id_);
6974
6975
11
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6976
11
    register_recycle_task(task_name, start_time);
6977
6978
11
    DORIS_CLOUD_DEFER {
6979
11
        unregister_recycle_task(task_name);
6980
11
        int64_t cost =
6981
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6982
11
        metrics_context.finish_report();
6983
11
        LOG_WARNING("recycle stage, cost={}s", cost)
6984
11
                .tag("instance_id", instance_id_)
6985
11
                .tag("num_scanned", num_scanned)
6986
11
                .tag("num_recycled", num_recycled);
6987
11
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
Line
Count
Source
6978
11
    DORIS_CLOUD_DEFER {
6979
11
        unregister_recycle_task(task_name);
6980
11
        int64_t cost =
6981
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6982
11
        metrics_context.finish_report();
6983
11
        LOG_WARNING("recycle stage, cost={}s", cost)
6984
11
                .tag("instance_id", instance_id_)
6985
11
                .tag("num_scanned", num_scanned)
6986
11
                .tag("num_recycled", num_recycled);
6987
11
    };
6988
6989
11
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
6990
11
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
6991
11
    std::string key0 = recycle_stage_key(key_info0);
6992
11
    std::string key1 = recycle_stage_key(key_info1);
6993
6994
11
    std::vector<std::string_view> stage_keys;
6995
11
    auto recycle_func = [&start_time, &num_scanned, &num_recycled, &stage_keys, &metrics_context,
6996
11
                         this](std::string_view k, std::string_view v) -> int {
6997
1
        ++num_scanned;
6998
1
        RecycleStagePB recycle_stage;
6999
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
7000
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
7001
0
            return -1;
7002
0
        }
7003
7004
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
7005
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
7006
0
            LOG(WARNING) << "invalid idx: " << idx;
7007
0
            return -1;
7008
0
        }
7009
7010
1
        std::shared_ptr<StorageVaultAccessor> accessor;
7011
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
7012
1
                [&] {
7013
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
7014
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
7015
1
                    if (!s3_conf) {
7016
1
                        return -1;
7017
1
                    }
7018
7019
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
7020
1
                    std::shared_ptr<S3Accessor> s3_accessor;
7021
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
7022
1
                    if (ret != 0) {
7023
1
                        return -1;
7024
1
                    }
7025
7026
1
                    accessor = std::move(s3_accessor);
7027
1
                    return 0;
7028
1
                }(),
7029
1
                "recycle_stage:get_accessor", &accessor);
7030
7031
1
        if (ret != 0) {
7032
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
7033
0
            return ret;
7034
0
        }
7035
7036
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
7037
1
                .tag("instance_id", instance_id_)
7038
1
                .tag("stage_id", recycle_stage.stage().stage_id())
7039
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
7040
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
7041
1
                .tag("obj_info_id", idx)
7042
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
7043
1
        ret = accessor->delete_all();
7044
1
        if (ret != 0) {
7045
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
7046
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
7047
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
7048
0
                         << ", ret=" << ret;
7049
0
            return -1;
7050
0
        }
7051
1
        metrics_context.total_recycled_num = ++num_recycled;
7052
1
        metrics_context.report();
7053
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
7054
1
        stage_keys.push_back(k);
7055
1
        return 0;
7056
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6996
1
                         this](std::string_view k, std::string_view v) -> int {
6997
1
        ++num_scanned;
6998
1
        RecycleStagePB recycle_stage;
6999
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
7000
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
7001
0
            return -1;
7002
0
        }
7003
7004
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
7005
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
7006
0
            LOG(WARNING) << "invalid idx: " << idx;
7007
0
            return -1;
7008
0
        }
7009
7010
1
        std::shared_ptr<StorageVaultAccessor> accessor;
7011
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
7012
1
                [&] {
7013
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
7014
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
7015
1
                    if (!s3_conf) {
7016
1
                        return -1;
7017
1
                    }
7018
7019
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
7020
1
                    std::shared_ptr<S3Accessor> s3_accessor;
7021
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
7022
1
                    if (ret != 0) {
7023
1
                        return -1;
7024
1
                    }
7025
7026
1
                    accessor = std::move(s3_accessor);
7027
1
                    return 0;
7028
1
                }(),
7029
1
                "recycle_stage:get_accessor", &accessor);
7030
7031
1
        if (ret != 0) {
7032
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
7033
0
            return ret;
7034
0
        }
7035
7036
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
7037
1
                .tag("instance_id", instance_id_)
7038
1
                .tag("stage_id", recycle_stage.stage().stage_id())
7039
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
7040
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
7041
1
                .tag("obj_info_id", idx)
7042
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
7043
1
        ret = accessor->delete_all();
7044
1
        if (ret != 0) {
7045
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
7046
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
7047
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
7048
0
                         << ", ret=" << ret;
7049
0
            return -1;
7050
0
        }
7051
1
        metrics_context.total_recycled_num = ++num_recycled;
7052
1
        metrics_context.report();
7053
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
7054
1
        stage_keys.push_back(k);
7055
1
        return 0;
7056
1
    };
7057
7058
11
    auto loop_done = [&stage_keys, this]() -> int {
7059
1
        if (stage_keys.empty()) return 0;
7060
1
        DORIS_CLOUD_DEFER {
7061
1
            stage_keys.clear();
7062
1
        };
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
7060
1
        DORIS_CLOUD_DEFER {
7061
1
            stage_keys.clear();
7062
1
        };
7063
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
7064
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
7065
0
            return -1;
7066
0
        }
7067
1
        return 0;
7068
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
Line
Count
Source
7058
1
    auto loop_done = [&stage_keys, this]() -> int {
7059
1
        if (stage_keys.empty()) return 0;
7060
1
        DORIS_CLOUD_DEFER {
7061
1
            stage_keys.clear();
7062
1
        };
7063
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
7064
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
7065
0
            return -1;
7066
0
        }
7067
1
        return 0;
7068
1
    };
7069
11
    if (config::enable_recycler_stats_metrics) {
7070
0
        scan_and_statistics_stage();
7071
0
    }
7072
    // recycle_func and loop_done for scan and recycle
7073
11
    return scan_and_recycle(key0, key1, std::move(recycle_func), std::move(loop_done));
7074
11
}
7075
7076
10
int InstanceRecycler::recycle_expired_stage_objects() {
7077
10
    LOG_WARNING("begin to recycle expired stage objects").tag("instance_id", instance_id_);
7078
7079
10
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
7080
10
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
7081
7082
10
    DORIS_CLOUD_DEFER {
7083
10
        int64_t cost =
7084
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
7085
10
        metrics_context.finish_report();
7086
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
7087
10
                .tag("instance_id", instance_id_);
7088
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
Line
Count
Source
7082
10
    DORIS_CLOUD_DEFER {
7083
10
        int64_t cost =
7084
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
7085
10
        metrics_context.finish_report();
7086
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
7087
10
                .tag("instance_id", instance_id_);
7088
10
    };
7089
7090
10
    int ret = 0;
7091
7092
10
    if (config::enable_recycler_stats_metrics) {
7093
0
        scan_and_statistics_expired_stage_objects();
7094
0
    }
7095
7096
10
    for (const auto& stage : instance_info_.stages()) {
7097
0
        std::stringstream ss;
7098
0
        ss << "instance_id=" << instance_id_ << ", stage_id=" << stage.stage_id() << ", user_name="
7099
0
           << (stage.mysql_user_name().empty() ? "null" : stage.mysql_user_name().at(0))
7100
0
           << ", user_id=" << (stage.mysql_user_id().empty() ? "null" : stage.mysql_user_id().at(0))
7101
0
           << ", prefix=" << stage.obj_info().prefix();
7102
7103
0
        if (stopped()) {
7104
0
            break;
7105
0
        }
7106
0
        if (stage.type() == StagePB::EXTERNAL) {
7107
0
            continue;
7108
0
        }
7109
0
        int idx = stoi(stage.obj_info().id());
7110
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
7111
0
            LOG(WARNING) << "invalid idx: " << idx << ", id: " << stage.obj_info().id();
7112
0
            continue;
7113
0
        }
7114
7115
0
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
7116
0
        auto s3_conf = S3Conf::from_obj_store_info(old_obj);
7117
0
        if (!s3_conf) {
7118
0
            LOG(WARNING) << "failed to init s3_conf with obj_info=" << old_obj.ShortDebugString();
7119
0
            continue;
7120
0
        }
7121
7122
0
        s3_conf->prefix = stage.obj_info().prefix();
7123
0
        std::shared_ptr<S3Accessor> accessor;
7124
0
        int ret1 = S3Accessor::create(*s3_conf, &accessor);
7125
0
        if (ret1 != 0) {
7126
0
            LOG(WARNING) << "failed to init s3 accessor ret=" << ret1 << " " << ss.str();
7127
0
            ret = -1;
7128
0
            continue;
7129
0
        }
7130
7131
0
        if (s3_conf->prefix.find("/stage/") == std::string::npos) {
7132
0
            LOG(WARNING) << "try to delete illegal prefix, which is catastrophic, " << ss.str();
7133
0
            ret = -1;
7134
0
            continue;
7135
0
        }
7136
7137
0
        LOG(INFO) << "recycle expired stage objects, " << ss.str();
7138
0
        int64_t expiration_time =
7139
0
                duration_cast<seconds>(system_clock::now().time_since_epoch()).count() -
7140
0
                config::internal_stage_objects_expire_time_second;
7141
0
        if (config::force_immediate_recycle) {
7142
0
            expiration_time = INT64_MAX;
7143
0
        }
7144
0
        ret1 = accessor->delete_all(expiration_time);
7145
0
        if (ret1 != 0) {
7146
0
            LOG(WARNING) << "failed to recycle expired stage objects, ret=" << ret1 << " "
7147
0
                         << ss.str();
7148
0
            ret = -1;
7149
0
            continue;
7150
0
        }
7151
0
        metrics_context.total_recycled_num++;
7152
0
        metrics_context.report();
7153
0
    }
7154
10
    return ret;
7155
10
}
7156
7157
236
void InstanceRecycler::register_recycle_task(const std::string& task_name, int64_t start_time) {
7158
236
    std::lock_guard lock(recycle_tasks_mutex);
7159
236
    running_recycle_tasks[task_name] = start_time;
7160
236
}
7161
7162
236
void InstanceRecycler::unregister_recycle_task(const std::string& task_name) {
7163
236
    std::lock_guard lock(recycle_tasks_mutex);
7164
236
    DCHECK(running_recycle_tasks[task_name] > 0);
7165
236
    running_recycle_tasks.erase(task_name);
7166
236
}
7167
7168
3
bool InstanceRecycler::check_recycle_tasks() {
7169
3
    std::map<std::string, int64_t> tmp_running_recycle_tasks;
7170
3
    {
7171
3
        std::lock_guard lock(recycle_tasks_mutex);
7172
3
        tmp_running_recycle_tasks = running_recycle_tasks;
7173
3
    }
7174
7175
3
    bool found = false;
7176
3
    int64_t now = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
7177
3
    for (auto& [task_name, start_time] : tmp_running_recycle_tasks) {
7178
2
        int64_t cost = now - start_time;
7179
2
        if (cost > config::recycle_task_threshold_seconds) [[unlikely]] {
7180
2
            LOG_INFO("recycle task cost too much time cost={}s", cost)
7181
2
                    .tag("instance_id", instance_id_)
7182
2
                    .tag("task", task_name);
7183
2
            found = true;
7184
2
        }
7185
2
    }
7186
7187
3
    return found;
7188
3
}
7189
7190
// Scan and statistics indexes that need to be recycled
7191
0
int InstanceRecycler::scan_and_statistics_indexes() {
7192
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_indexes");
7193
7194
0
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
7195
0
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
7196
0
    std::string index_key0;
7197
0
    std::string index_key1;
7198
0
    recycle_index_key(index_key_info0, &index_key0);
7199
0
    recycle_index_key(index_key_info1, &index_key1);
7200
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
7201
7202
0
    auto handle_index_kv = [&, this](std::string_view k, std::string_view v) -> int {
7203
0
        RecycleIndexPB index_pb;
7204
0
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
7205
0
            return 0;
7206
0
        }
7207
0
        int64_t current_time = ::time(nullptr);
7208
0
        if (current_time <
7209
0
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
7210
0
            return 0;
7211
0
        }
7212
        // decode index_id
7213
0
        auto k1 = k;
7214
0
        k1.remove_prefix(1);
7215
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
7216
0
        decode_key(&k1, &out);
7217
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
7218
0
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
7219
0
        std::unique_ptr<Transaction> txn;
7220
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7221
0
        if (err != TxnErrorCode::TXN_OK) {
7222
0
            return 0;
7223
0
        }
7224
0
        std::string val;
7225
0
        err = txn->get(k, &val);
7226
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
7227
0
            return 0;
7228
0
        }
7229
0
        if (err != TxnErrorCode::TXN_OK) {
7230
0
            return 0;
7231
0
        }
7232
0
        index_pb.Clear();
7233
0
        if (!index_pb.ParseFromString(val)) {
7234
0
            return 0;
7235
0
        }
7236
0
        if (scan_tablets_and_statistics(index_pb.table_id(), index_id, metrics_context) != 0) {
7237
0
            return 0;
7238
0
        }
7239
0
        metrics_context.total_need_recycle_num++;
7240
0
        return 0;
7241
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7242
7243
0
    int ret = scan_and_recycle(index_key0, index_key1, std::move(handle_index_kv));
7244
0
    metrics_context.report(true);
7245
0
    segment_metrics_context_.report(true);
7246
0
    tablet_metrics_context_.report(true);
7247
0
    return ret;
7248
0
}
7249
7250
// Scan and statistics partitions that need to be recycled
7251
0
int InstanceRecycler::scan_and_statistics_partitions() {
7252
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_partitions");
7253
7254
0
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
7255
0
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
7256
0
    std::string part_key0;
7257
0
    std::string part_key1;
7258
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
7259
7260
0
    recycle_partition_key(part_key_info0, &part_key0);
7261
0
    recycle_partition_key(part_key_info1, &part_key1);
7262
0
    auto handle_partition_kv = [&, this](std::string_view k, std::string_view v) -> int {
7263
0
        RecyclePartitionPB part_pb;
7264
0
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
7265
0
            return 0;
7266
0
        }
7267
0
        int64_t current_time = ::time(nullptr);
7268
0
        if (current_time <
7269
0
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
7270
0
            return 0;
7271
0
        }
7272
        // decode partition_id
7273
0
        auto k1 = k;
7274
0
        k1.remove_prefix(1);
7275
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
7276
0
        decode_key(&k1, &out);
7277
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
7278
0
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
7279
        // Change state to RECYCLING
7280
0
        std::unique_ptr<Transaction> txn;
7281
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7282
0
        if (err != TxnErrorCode::TXN_OK) {
7283
0
            return 0;
7284
0
        }
7285
0
        std::string val;
7286
0
        err = txn->get(k, &val);
7287
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
7288
0
            return 0;
7289
0
        }
7290
0
        if (err != TxnErrorCode::TXN_OK) {
7291
0
            return 0;
7292
0
        }
7293
0
        part_pb.Clear();
7294
0
        if (!part_pb.ParseFromString(val)) {
7295
0
            return 0;
7296
0
        }
7297
        // Partitions with PREPARED state MUST have no data
7298
0
        bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED;
7299
0
        int ret = 0;
7300
0
        for (int64_t index_id : part_pb.index_id()) {
7301
0
            if (scan_tablets_and_statistics(part_pb.table_id(), index_id, metrics_context,
7302
0
                                            partition_id, is_empty_tablet) != 0) {
7303
0
                ret = 0;
7304
0
            }
7305
0
        }
7306
0
        metrics_context.total_need_recycle_num++;
7307
0
        return ret;
7308
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7309
7310
0
    int ret = scan_and_recycle(part_key0, part_key1, std::move(handle_partition_kv));
7311
0
    metrics_context.report(true);
7312
0
    segment_metrics_context_.report(true);
7313
0
    tablet_metrics_context_.report(true);
7314
0
    return ret;
7315
0
}
7316
7317
// Scan and statistics rowsets that need to be recycled
7318
0
int InstanceRecycler::scan_and_statistics_rowsets() {
7319
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_rowsets");
7320
0
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
7321
0
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
7322
0
    std::string recyc_rs_key0;
7323
0
    std::string recyc_rs_key1;
7324
0
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
7325
0
                recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
7326
0
       int64_t earlest_ts = std::numeric_limits<int64_t>::max();
7327
7328
0
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
7329
0
        RecycleRowsetPB rowset;
7330
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
7331
0
            return 0;
7332
0
        }
7333
0
        auto* rowset_meta = rowset.mutable_rowset_meta();
7334
0
        int64_t current_time = ::time(nullptr);
7335
0
        if (current_time <
7336
0
            calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts)) { // not expired
7337
0
            return 0;
7338
0
        }
7339
7340
0
        if (!rowset.has_type()) {
7341
0
            if (!rowset.has_resource_id()) [[unlikely]] {
7342
0
                return 0;
7343
0
            }
7344
0
            if (rowset.resource_id().empty()) [[unlikely]] {
7345
0
                return 0;
7346
0
            }
7347
0
            metrics_context.total_need_recycle_num++;
7348
0
            metrics_context.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size();
7349
0
            segment_metrics_context_.total_need_recycle_num += rowset.rowset_meta().num_segments();
7350
0
            segment_metrics_context_.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size();
7351
0
            return 0;
7352
0
        }
7353
7354
0
        if(!rowset_meta->has_is_recycled() || !rowset_meta->is_recycled()) {
7355
0
            return 0;
7356
0
        }
7357
7358
0
        if (!rowset_meta->has_resource_id()) [[unlikely]] {
7359
0
            if (rowset.type() == RecycleRowsetPB::PREPARE || rowset_meta->num_segments() != 0) {
7360
0
                return 0;
7361
0
            }
7362
0
        }
7363
0
        metrics_context.total_need_recycle_num++;
7364
0
        metrics_context.total_need_recycle_data_size += rowset_meta->total_disk_size();
7365
0
        segment_metrics_context_.total_need_recycle_num += rowset_meta->num_segments();
7366
0
        segment_metrics_context_.total_need_recycle_data_size += rowset_meta->total_disk_size();
7367
0
        return 0;
7368
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7369
0
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv));
7370
0
    metrics_context.report(true);
7371
0
    segment_metrics_context_.report(true);
7372
0
    return ret;
7373
0
}
7374
7375
// Scan and statistics tmp_rowsets that need to be recycled
7376
0
int InstanceRecycler::scan_and_statistics_tmp_rowsets() {
7377
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_tmp_rowsets");
7378
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
7379
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
7380
0
    std::string tmp_rs_key0;
7381
0
    std::string tmp_rs_key1;
7382
0
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
7383
0
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
7384
7385
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
7386
7387
0
    auto handle_tmp_rowsets_kv = [&, this](std::string_view k, std::string_view v) -> int {
7388
0
        doris::RowsetMetaCloudPB rowset;
7389
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
7390
0
            return 0;
7391
0
        }
7392
0
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
7393
0
        int64_t current_time = ::time(nullptr);
7394
0
        if (current_time < expiration) {
7395
0
            return 0;
7396
0
        }
7397
7398
0
        DCHECK_GT(rowset.txn_id(), 0)
7399
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
7400
7401
0
        if(!rowset.has_is_recycled() || !rowset.is_recycled()) {
7402
0
            return 0;
7403
0
        }
7404
7405
0
        if (!rowset.has_resource_id()) {
7406
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
7407
0
                return 0;
7408
0
            }
7409
0
            return 0;
7410
0
        }
7411
7412
0
        metrics_context.total_need_recycle_num++;
7413
0
        metrics_context.total_need_recycle_data_size += rowset.total_disk_size();
7414
0
        segment_metrics_context_.total_need_recycle_data_size += rowset.total_disk_size();
7415
0
        segment_metrics_context_.total_need_recycle_num += rowset.num_segments();
7416
0
        return 0;
7417
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7418
0
    int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_tmp_rowsets_kv));
7419
0
    metrics_context.report(true);
7420
0
    segment_metrics_context_.report(true);
7421
0
    return ret;
7422
0
}
7423
7424
// Scan and statistics abort_timeout_txn that need to be recycled
7425
0
int InstanceRecycler::scan_and_statistics_abort_timeout_txn() {
7426
0
    RecyclerMetricsContext metrics_context(instance_id_, "abort_timeout_txn");
7427
7428
0
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
7429
0
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
7430
0
    std::string begin_txn_running_key;
7431
0
    std::string end_txn_running_key;
7432
0
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
7433
0
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
7434
7435
0
    int64_t current_time =
7436
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
7437
7438
0
    auto handle_abort_timeout_txn_kv = [&metrics_context, &current_time, this](
7439
0
                                               std::string_view k, std::string_view v) -> int {
7440
0
        std::unique_ptr<Transaction> txn;
7441
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7442
0
        if (err != TxnErrorCode::TXN_OK) {
7443
0
            return 0;
7444
0
        }
7445
0
        std::string_view k1 = k;
7446
0
        k1.remove_prefix(1);
7447
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
7448
0
        if (decode_key(&k1, &out) != 0) {
7449
0
            return 0;
7450
0
        }
7451
0
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
7452
0
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
7453
        // Update txn_info
7454
0
        std::string txn_inf_key, txn_inf_val;
7455
0
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
7456
0
        err = txn->get(txn_inf_key, &txn_inf_val);
7457
0
        if (err != TxnErrorCode::TXN_OK) {
7458
0
            return 0;
7459
0
        }
7460
0
        TxnInfoPB txn_info;
7461
0
        if (!txn_info.ParseFromString(txn_inf_val)) {
7462
0
            return 0;
7463
0
        }
7464
7465
0
        if (TxnStatusPB::TXN_STATUS_COMMITTED != txn_info.status()) {
7466
0
            TxnRunningPB txn_running_pb;
7467
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
7468
0
                return 0;
7469
0
            }
7470
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
7471
0
                return 0;
7472
0
            }
7473
0
            metrics_context.total_need_recycle_num++;
7474
0
        }
7475
0
        return 0;
7476
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7477
7478
0
    int ret = scan_and_recycle(begin_txn_running_key, end_txn_running_key, std::move(handle_abort_timeout_txn_kv));
7479
0
    metrics_context.report(true);
7480
0
    return ret;
7481
0
}
7482
7483
// Scan and statistics expired_txn_label that need to be recycled
7484
0
int InstanceRecycler::scan_and_statistics_expired_txn_label() {
7485
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_txn_label");
7486
7487
0
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
7488
0
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
7489
0
    std::string begin_recycle_txn_key;
7490
0
    std::string end_recycle_txn_key;
7491
0
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
7492
0
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
7493
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
7494
0
    int64_t current_time_ms =
7495
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
7496
7497
    // for calculate the total num or bytes of recyled objects
7498
0
    auto handle_expired_txn_label_kv = [&, this](std::string_view k, std::string_view v) -> int {
7499
0
        RecycleTxnPB recycle_txn_pb;
7500
0
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
7501
0
            return 0;
7502
0
        }
7503
0
        if ((config::force_immediate_recycle) ||
7504
0
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
7505
0
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
7506
0
             current_time_ms)) {
7507
0
            metrics_context.total_need_recycle_num++;
7508
0
        }
7509
0
        return 0;
7510
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7511
7512
0
    int ret = scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key, std::move(handle_expired_txn_label_kv));
7513
0
    metrics_context.report(true);
7514
0
    return ret;
7515
0
}
7516
7517
// Scan and statistics copy_jobs that need to be recycled
7518
0
int InstanceRecycler::scan_and_statistics_copy_jobs() {
7519
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_copy_jobs");
7520
0
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
7521
0
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
7522
0
    std::string key0;
7523
0
    std::string key1;
7524
0
    copy_job_key(key_info0, &key0);
7525
0
    copy_job_key(key_info1, &key1);
7526
7527
    // for calculate the total num or bytes of recyled objects
7528
0
    auto scan_and_statistics = [&metrics_context](std::string_view k, std::string_view v) -> int {
7529
0
        CopyJobPB copy_job;
7530
0
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
7531
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
7532
0
            return 0;
7533
0
        }
7534
7535
0
        if (copy_job.job_status() == CopyJobPB::FINISH) {
7536
0
            if (copy_job.stage_type() == StagePB::EXTERNAL) {
7537
0
                int64_t current_time =
7538
0
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
7539
0
                if (copy_job.finish_time_ms() > 0) {
7540
0
                    if (!config::force_immediate_recycle &&
7541
0
                        current_time < copy_job.finish_time_ms() +
7542
0
                                               config::copy_job_max_retention_second * 1000) {
7543
0
                        return 0;
7544
0
                    }
7545
0
                } else {
7546
0
                    if (!config::force_immediate_recycle &&
7547
0
                        current_time < copy_job.start_time_ms() +
7548
0
                                               config::copy_job_max_retention_second * 1000) {
7549
0
                        return 0;
7550
0
                    }
7551
0
                }
7552
0
            }
7553
0
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
7554
0
            int64_t current_time =
7555
0
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
7556
0
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
7557
0
                return 0;
7558
0
            }
7559
0
        }
7560
0
        metrics_context.total_need_recycle_num++;
7561
0
        return 0;
7562
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7563
7564
0
    int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics));
7565
0
    metrics_context.report(true);
7566
0
    return ret;
7567
0
}
7568
7569
// Scan and statistics stage that need to be recycled
7570
0
int InstanceRecycler::scan_and_statistics_stage() {
7571
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_stage");
7572
0
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
7573
0
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
7574
0
    std::string key0 = recycle_stage_key(key_info0);
7575
0
    std::string key1 = recycle_stage_key(key_info1);
7576
7577
    // for calculate the total num or bytes of recyled objects
7578
0
    auto scan_and_statistics = [&metrics_context, this](std::string_view k,
7579
0
                                                        std::string_view v) -> int {
7580
0
        RecycleStagePB recycle_stage;
7581
0
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
7582
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
7583
0
            return 0;
7584
0
        }
7585
7586
0
        int idx = stoi(recycle_stage.stage().obj_info().id());
7587
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
7588
0
            LOG(WARNING) << "invalid idx: " << idx;
7589
0
            return 0;
7590
0
        }
7591
7592
0
        std::shared_ptr<StorageVaultAccessor> accessor;
7593
0
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
7594
0
                [&] {
7595
0
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
7596
0
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
7597
0
                    if (!s3_conf) {
7598
0
                        return 0;
7599
0
                    }
7600
7601
0
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
7602
0
                    std::shared_ptr<S3Accessor> s3_accessor;
7603
0
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
7604
0
                    if (ret != 0) {
7605
0
                        return 0;
7606
0
                    }
7607
7608
0
                    accessor = std::move(s3_accessor);
7609
0
                    return 0;
7610
0
                }(),
7611
0
                "recycle_stage:get_accessor", &accessor);
7612
7613
0
        if (ret != 0) {
7614
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
7615
0
            return 0;
7616
0
        }
7617
7618
0
        metrics_context.total_need_recycle_num++;
7619
0
        return 0;
7620
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7621
7622
0
    int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics));
7623
0
    metrics_context.report(true);
7624
0
    return ret;
7625
0
}
7626
7627
// Scan and statistics expired_stage_objects that need to be recycled
7628
0
int InstanceRecycler::scan_and_statistics_expired_stage_objects() {
7629
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
7630
7631
    // for calculate the total num or bytes of recyled objects
7632
0
    auto scan_and_statistics = [&metrics_context, this]() {
7633
0
        for (const auto& stage : instance_info_.stages()) {
7634
0
            if (stopped()) {
7635
0
                break;
7636
0
            }
7637
0
            if (stage.type() == StagePB::EXTERNAL) {
7638
0
                continue;
7639
0
            }
7640
0
            int idx = stoi(stage.obj_info().id());
7641
0
            if (idx > instance_info_.obj_info().size() || idx < 1) {
7642
0
                continue;
7643
0
            }
7644
0
            const auto& old_obj = instance_info_.obj_info()[idx - 1];
7645
0
            auto s3_conf = S3Conf::from_obj_store_info(old_obj);
7646
0
            if (!s3_conf) {
7647
0
                continue;
7648
0
            }
7649
0
            s3_conf->prefix = stage.obj_info().prefix();
7650
0
            std::shared_ptr<S3Accessor> accessor;
7651
0
            int ret1 = S3Accessor::create(*s3_conf, &accessor);
7652
0
            if (ret1 != 0) {
7653
0
                continue;
7654
0
            }
7655
0
            if (s3_conf->prefix.find("/stage/") == std::string::npos) {
7656
0
                continue;
7657
0
            }
7658
0
            metrics_context.total_need_recycle_num++;
7659
0
        }
7660
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
7661
7662
0
    scan_and_statistics();
7663
0
    metrics_context.report(true);
7664
0
    return 0;
7665
0
}
7666
7667
// Scan and statistics versions that need to be recycled
7668
0
int InstanceRecycler::scan_and_statistics_versions() {
7669
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
7670
0
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
7671
0
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
7672
7673
0
    int64_t last_scanned_table_id = 0;
7674
0
    bool is_recycled = false; // Is last scanned kv recycled
7675
    // for calculate the total num or bytes of recyled objects
7676
0
    auto scan_and_statistics = [&metrics_context, &last_scanned_table_id, &is_recycled, this](
7677
0
                                       std::string_view k, std::string_view) {
7678
0
        auto k1 = k;
7679
0
        k1.remove_prefix(1);
7680
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
7681
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
7682
0
        decode_key(&k1, &out);
7683
0
        DCHECK_EQ(out.size(), 6) << k;
7684
0
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
7685
0
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
7686
0
            metrics_context.total_need_recycle_num +=
7687
0
                    is_recycled; // Version kv of this table has been recycled
7688
0
            return 0;
7689
0
        }
7690
0
        last_scanned_table_id = table_id;
7691
0
        is_recycled = false;
7692
0
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
7693
0
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
7694
0
        std::unique_ptr<Transaction> txn;
7695
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7696
0
        if (err != TxnErrorCode::TXN_OK) {
7697
0
            return 0;
7698
0
        }
7699
0
        std::unique_ptr<RangeGetIterator> iter;
7700
0
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
7701
0
        if (err != TxnErrorCode::TXN_OK) {
7702
0
            return 0;
7703
0
        }
7704
0
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
7705
0
            return 0;
7706
0
        }
7707
0
        metrics_context.total_need_recycle_num++;
7708
0
        is_recycled = true;
7709
0
        return 0;
7710
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7711
7712
0
    int ret = scan_and_recycle(version_key_begin, version_key_end, std::move(scan_and_statistics));
7713
0
    metrics_context.report(true);
7714
0
    return ret;
7715
0
}
7716
7717
// Scan and statistics restore jobs that need to be recycled
7718
0
int InstanceRecycler::scan_and_statistics_restore_jobs() {
7719
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_restore_jobs");
7720
0
    JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0};
7721
0
    JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX};
7722
0
    std::string restore_job_key0;
7723
0
    std::string restore_job_key1;
7724
0
    job_restore_tablet_key(restore_job_key_info0, &restore_job_key0);
7725
0
    job_restore_tablet_key(restore_job_key_info1, &restore_job_key1);
7726
7727
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
7728
7729
    // for calculate the total num or bytes of recyled objects
7730
0
    auto scan_and_statistics = [&](std::string_view k, std::string_view v) -> int {
7731
0
        RestoreJobCloudPB restore_job_pb;
7732
0
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
7733
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
7734
0
            return 0;
7735
0
        }
7736
0
        int64_t expiration =
7737
0
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
7738
0
        int64_t current_time = ::time(nullptr);
7739
0
        if (current_time < expiration) { // not expired
7740
0
            return 0;
7741
0
        }
7742
0
        metrics_context.total_need_recycle_num++;
7743
0
        if(restore_job_pb.need_recycle_data()) {
7744
0
            scan_tablet_and_statistics(restore_job_pb.tablet_id(), metrics_context);
7745
0
        }
7746
0
        return 0;
7747
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7748
7749
0
    int ret = scan_and_recycle(restore_job_key0, restore_job_key1, std::move(scan_and_statistics));
7750
0
    metrics_context.report(true);
7751
0
    return ret;
7752
0
}
7753
7754
3
void InstanceRecycler::scan_and_statistics_operation_logs() {
7755
3
    if (!should_recycle_versioned_keys()) {
7756
0
        return;
7757
0
    }
7758
7759
3
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_operation_logs");
7760
7761
3
    OperationLogRecycleChecker recycle_checker(instance_id_, txn_kv_.get(), instance_info_);
7762
3
    if (recycle_checker.init() != 0) {
7763
0
        return;
7764
0
    }
7765
7766
3
    std::string log_key_prefix = versioned::log_key(instance_id_);
7767
3
    std::string begin_key = encode_versioned_key(log_key_prefix, Versionstamp::min());
7768
3
    std::string end_key = encode_versioned_key(log_key_prefix, Versionstamp::max());
7769
7770
3
    std::unique_ptr<BlobIterator> iter = blob_get_range(txn_kv_, begin_key, end_key);
7771
8
    for (; iter->valid(); iter->next()) {
7772
5
        OperationLogPB operation_log;
7773
5
        if (!iter->parse_value(&operation_log)) {
7774
0
            continue;
7775
0
        }
7776
7777
5
        std::string_view key = iter->key();
7778
5
        Versionstamp log_versionstamp;
7779
5
        if (!decode_versioned_key(&key, &log_versionstamp)) {
7780
0
            continue;
7781
0
        }
7782
7783
5
        OperationLogReferenceInfo ref_info;
7784
5
        if (recycle_checker.can_recycle(log_versionstamp, operation_log.min_timestamp(),
7785
5
                                         &ref_info)) {
7786
4
            metrics_context.total_need_recycle_num++;
7787
4
            metrics_context.total_need_recycle_data_size += operation_log.ByteSizeLong();
7788
4
        }
7789
5
    }
7790
7791
3
    metrics_context.report(true);
7792
3
}
7793
7794
int InstanceRecycler::classify_rowset_task_by_ref_count(
7795
60
        RowsetDeleteTask& task, std::vector<RowsetDeleteTask>& batch_delete_tasks) {
7796
60
    constexpr int MAX_RETRY = 10;
7797
60
    const auto& rowset_meta = task.rowset_meta;
7798
60
    int64_t tablet_id = rowset_meta.tablet_id();
7799
60
    const std::string& rowset_id = rowset_meta.rowset_id_v2();
7800
60
    std::string_view reference_instance_id = instance_id_;
7801
60
    if (rowset_meta.has_reference_instance_id()) {
7802
5
        reference_instance_id = rowset_meta.reference_instance_id();
7803
5
    }
7804
7805
61
    for (int i = 0; i < MAX_RETRY; ++i) {
7806
61
        std::unique_ptr<Transaction> txn;
7807
61
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7808
61
        if (err != TxnErrorCode::TXN_OK) {
7809
0
            LOG_WARNING("failed to create txn when classifying rowset task")
7810
0
                    .tag("instance_id", instance_id_)
7811
0
                    .tag("tablet_id", tablet_id)
7812
0
                    .tag("rowset_id", rowset_id)
7813
0
                    .tag("err", err);
7814
0
            return -1;
7815
0
        }
7816
7817
61
        std::string rowset_ref_count_key =
7818
61
                versioned::data_rowset_ref_count_key({reference_instance_id, tablet_id, rowset_id});
7819
61
        task.rowset_ref_count_key = rowset_ref_count_key;
7820
7821
61
        int64_t ref_count = 0;
7822
61
        {
7823
61
            std::string value;
7824
61
            TxnErrorCode err = txn->get(rowset_ref_count_key, &value);
7825
61
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
7826
0
                ref_count = 1;
7827
61
            } else if (err != TxnErrorCode::TXN_OK) {
7828
0
                LOG_WARNING("failed to get rowset ref count key when classifying")
7829
0
                        .tag("instance_id", instance_id_)
7830
0
                        .tag("tablet_id", tablet_id)
7831
0
                        .tag("rowset_id", rowset_id)
7832
0
                        .tag("err", err);
7833
0
                return -1;
7834
61
            } else if (!txn->decode_atomic_int(value, &ref_count)) {
7835
0
                LOG_WARNING("failed to decode rowset data ref count when classifying")
7836
0
                        .tag("instance_id", instance_id_)
7837
0
                        .tag("tablet_id", tablet_id)
7838
0
                        .tag("rowset_id", rowset_id)
7839
0
                        .tag("value", hex(value));
7840
0
                return -1;
7841
0
            }
7842
61
        }
7843
7844
61
        if (ref_count > 1) {
7845
            // ref_count > 1: decrement count, remove recycle keys, don't add to batch delete
7846
12
            txn->atomic_add(rowset_ref_count_key, -1);
7847
12
            LOG_INFO("decrease rowset data ref count in classification phase")
7848
12
                    .tag("instance_id", instance_id_)
7849
12
                    .tag("tablet_id", tablet_id)
7850
12
                    .tag("rowset_id", rowset_id)
7851
12
                    .tag("ref_count", ref_count - 1)
7852
12
                    .tag("ref_count_key", hex(rowset_ref_count_key));
7853
7854
12
            if (!task.recycle_rowset_key.empty()) {
7855
0
                txn->remove(task.recycle_rowset_key);
7856
0
                LOG_INFO("remove recycle rowset key in classification phase")
7857
0
                        .tag("key", hex(task.recycle_rowset_key));
7858
0
            }
7859
12
            if (!task.non_versioned_rowset_key.empty()) {
7860
12
                txn->remove(task.non_versioned_rowset_key);
7861
12
                LOG_INFO("remove non versioned rowset key in classification phase")
7862
12
                        .tag("key", hex(task.non_versioned_rowset_key));
7863
12
            }
7864
7865
12
            err = txn->commit();
7866
12
            if (err == TxnErrorCode::TXN_CONFLICT) {
7867
1
                VLOG_DEBUG << "decrease rowset ref count but txn conflict in classification, retry"
7868
0
                           << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id
7869
0
                           << ", ref_count=" << ref_count << ", retry=" << i;
7870
1
                std::this_thread::sleep_for(std::chrono::milliseconds(500));
7871
1
                continue;
7872
11
            } else if (err != TxnErrorCode::TXN_OK) {
7873
0
                LOG_WARNING("failed to commit txn when classifying rowset task")
7874
0
                        .tag("instance_id", instance_id_)
7875
0
                        .tag("tablet_id", tablet_id)
7876
0
                        .tag("rowset_id", rowset_id)
7877
0
                        .tag("err", err);
7878
0
                return -1;
7879
0
            }
7880
11
            return 1; // handled, not added to batch delete
7881
49
        } else {
7882
            // ref_count == 1: Add to batch delete plan without modifying any KV.
7883
            // Keep recycle_rowset_key as "pending recycle" marker until data is actually deleted.
7884
49
            LOG_INFO("add rowset to batch delete plan")
7885
49
                    .tag("instance_id", instance_id_)
7886
49
                    .tag("tablet_id", tablet_id)
7887
49
                    .tag("rowset_id", rowset_id)
7888
49
                    .tag("resource_id", rowset_meta.resource_id())
7889
49
                    .tag("ref_count", ref_count);
7890
7891
49
            batch_delete_tasks.push_back(std::move(task));
7892
49
            return 0; // added to batch delete
7893
49
        }
7894
61
    }
7895
7896
0
    LOG_WARNING("failed to classify rowset task after retry")
7897
0
            .tag("instance_id", instance_id_)
7898
0
            .tag("tablet_id", tablet_id)
7899
0
            .tag("rowset_id", rowset_id)
7900
0
            .tag("retry", MAX_RETRY);
7901
0
    return -1;
7902
60
}
7903
7904
10
int InstanceRecycler::cleanup_rowset_metadata(const std::vector<RowsetDeleteTask>& tasks) {
7905
10
    int ret = 0;
7906
49
    for (const auto& task : tasks) {
7907
49
        int64_t tablet_id = task.rowset_meta.tablet_id();
7908
49
        const std::string& rowset_id = task.rowset_meta.rowset_id_v2();
7909
7910
        // Note: decrement_packed_file_ref_counts is already called in delete_rowset_data,
7911
        // so we don't need to call it again here.
7912
7913
        // Remove all metadata keys in one transaction
7914
49
        std::unique_ptr<Transaction> txn;
7915
49
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7916
49
        if (err != TxnErrorCode::TXN_OK) {
7917
0
            LOG_WARNING("failed to create txn when cleaning up metadata")
7918
0
                    .tag("instance_id", instance_id_)
7919
0
                    .tag("tablet_id", tablet_id)
7920
0
                    .tag("rowset_id", rowset_id)
7921
0
                    .tag("err", err);
7922
0
            ret = -1;
7923
0
            continue;
7924
0
        }
7925
7926
49
        std::string_view reference_instance_id = instance_id_;
7927
49
        if (task.rowset_meta.has_reference_instance_id()) {
7928
5
            reference_instance_id = task.rowset_meta.reference_instance_id();
7929
5
        }
7930
7931
49
        txn->remove(task.rowset_ref_count_key);
7932
49
        LOG_INFO("delete rowset data ref count key in cleanup phase")
7933
49
                .tag("instance_id", instance_id_)
7934
49
                .tag("tablet_id", tablet_id)
7935
49
                .tag("rowset_id", rowset_id)
7936
49
                .tag("ref_count_key", hex(task.rowset_ref_count_key));
7937
7938
49
        std::string dbm_start_key =
7939
49
                meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id, 0, 0});
7940
49
        std::string dbm_end_key = meta_delete_bitmap_key(
7941
49
                {reference_instance_id, tablet_id, rowset_id,
7942
49
                 std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::max()});
7943
49
        txn->remove(dbm_start_key, dbm_end_key);
7944
49
        LOG_INFO("remove delete bitmap kv in cleanup phase")
7945
49
                .tag("instance_id", instance_id_)
7946
49
                .tag("tablet_id", tablet_id)
7947
49
                .tag("rowset_id", rowset_id)
7948
49
                .tag("begin", hex(dbm_start_key))
7949
49
                .tag("end", hex(dbm_end_key));
7950
7951
49
        std::string versioned_dbm_start_key =
7952
49
                versioned::meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id});
7953
49
        std::string versioned_dbm_end_key = versioned_dbm_start_key;
7954
49
        encode_int64(INT64_MAX, &versioned_dbm_end_key);
7955
49
        txn->remove(versioned_dbm_start_key, versioned_dbm_end_key);
7956
49
        LOG_INFO("remove versioned delete bitmap kv in cleanup phase")
7957
49
                .tag("instance_id", instance_id_)
7958
49
                .tag("tablet_id", tablet_id)
7959
49
                .tag("rowset_id", rowset_id)
7960
49
                .tag("begin", hex(versioned_dbm_start_key))
7961
49
                .tag("end", hex(versioned_dbm_end_key));
7962
7963
        // Remove versioned meta rowset key
7964
49
        if (!task.versioned_rowset_key.empty()) {
7965
49
            versioned::document_remove<RowsetMetaCloudPB>(
7966
49
                txn.get(), task.versioned_rowset_key, task.versionstamp);
7967
49
            LOG_INFO("remove versioned meta rowset key in cleanup phase")
7968
49
                    .tag("instance_id", instance_id_)
7969
49
                    .tag("tablet_id", tablet_id)
7970
49
                    .tag("rowset_id", rowset_id)
7971
49
                    .tag("key_prefix", hex(task.versioned_rowset_key));
7972
49
        }
7973
7974
49
        if (!task.non_versioned_rowset_key.empty()) {
7975
49
            txn->remove(task.non_versioned_rowset_key);
7976
49
            LOG_INFO("remove non versioned rowset key in cleanup phase")
7977
49
                    .tag("instance_id", instance_id_)
7978
49
                    .tag("tablet_id", tablet_id)
7979
49
                    .tag("rowset_id", rowset_id)
7980
49
                    .tag("key", hex(task.non_versioned_rowset_key));
7981
49
        }
7982
7983
        // Remove recycle_rowset_key last to ensure retry safety:
7984
        // if cleanup fails, this key remains and triggers next round retry.
7985
49
        if (!task.recycle_rowset_key.empty()) {
7986
0
            txn->remove(task.recycle_rowset_key);
7987
0
            LOG_INFO("remove recycle rowset key in cleanup phase")
7988
0
                    .tag("instance_id", instance_id_)
7989
0
                    .tag("tablet_id", tablet_id)
7990
0
                    .tag("rowset_id", rowset_id)
7991
0
                    .tag("key", hex(task.recycle_rowset_key));
7992
0
        }
7993
7994
49
        err = txn->commit();
7995
49
        if (err != TxnErrorCode::TXN_OK) {
7996
            // Metadata cleanup failed. recycle_rowset_key remains, next round will retry.
7997
0
            LOG_WARNING("failed to commit cleanup metadata txn, will retry next round")
7998
0
                    .tag("instance_id", instance_id_)
7999
0
                    .tag("tablet_id", tablet_id)
8000
0
                    .tag("rowset_id", rowset_id)
8001
0
                    .tag("err", err);
8002
0
            ret = -1;
8003
0
            continue;
8004
0
        }
8005
8006
49
        LOG_INFO("cleanup rowset metadata success")
8007
49
                .tag("instance_id", instance_id_)
8008
49
                .tag("tablet_id", tablet_id)
8009
49
                .tag("rowset_id", rowset_id);
8010
49
    }
8011
10
    return ret;
8012
10
}
8013
8014
} // namespace doris::cloud