/root/doris/cloud/src/recycler/recycler.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "recycler/recycler.h" |
19 | | |
20 | | #include <brpc/builtin_service.pb.h> |
21 | | #include <brpc/server.h> |
22 | | #include <butil/endpoint.h> |
23 | | #include <bvar/status.h> |
24 | | #include <gen_cpp/cloud.pb.h> |
25 | | #include <gen_cpp/olap_file.pb.h> |
26 | | |
27 | | #include <atomic> |
28 | | #include <chrono> |
29 | | #include <cstddef> |
30 | | #include <cstdint> |
31 | | #include <deque> |
32 | | #include <initializer_list> |
33 | | #include <numeric> |
34 | | #include <string> |
35 | | #include <string_view> |
36 | | #include <utility> |
37 | | |
38 | | #include "common/stopwatch.h" |
39 | | #include "meta-service/meta_service.h" |
40 | | #include "meta-service/meta_service_helper.h" |
41 | | #include "meta-service/meta_service_schema.h" |
42 | | #include "meta-service/txn_kv.h" |
43 | | #include "meta-service/txn_kv_error.h" |
44 | | #include "recycler/checker.h" |
45 | | #include "recycler/hdfs_accessor.h" |
46 | | #include "recycler/s3_accessor.h" |
47 | | #include "recycler/storage_vault_accessor.h" |
48 | | #ifdef UNIT_TEST |
49 | | #include "../test/mock_accessor.h" |
50 | | #endif |
51 | | #include "common/bvars.h" |
52 | | #include "common/config.h" |
53 | | #include "common/encryption_util.h" |
54 | | #include "common/logging.h" |
55 | | #include "common/simple_thread_pool.h" |
56 | | #include "common/util.h" |
57 | | #include "cpp/sync_point.h" |
58 | | #include "meta-service/keys.h" |
59 | | #include "recycler/recycler_service.h" |
60 | | #include "recycler/sync_executor.h" |
61 | | #include "recycler/util.h" |
62 | | |
63 | | namespace doris::cloud { |
64 | | |
65 | | using namespace std::chrono; |
66 | | |
67 | | // return 0 for success get a key, 1 for key not found, negative for error |
68 | 0 | [[maybe_unused]] static int txn_get(TxnKv* txn_kv, std::string_view key, std::string& val) { |
69 | 0 | std::unique_ptr<Transaction> txn; |
70 | 0 | TxnErrorCode err = txn_kv->create_txn(&txn); |
71 | 0 | if (err != TxnErrorCode::TXN_OK) { |
72 | 0 | return -1; |
73 | 0 | } |
74 | 0 | switch (txn->get(key, &val, true)) { |
75 | 0 | case TxnErrorCode::TXN_OK: |
76 | 0 | return 0; |
77 | 0 | case TxnErrorCode::TXN_KEY_NOT_FOUND: |
78 | 0 | return 1; |
79 | 0 | default: |
80 | 0 | return -1; |
81 | 0 | }; |
82 | 0 | } Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE |
83 | | |
84 | | // 0 for success, negative for error |
85 | | static int txn_get(TxnKv* txn_kv, std::string_view begin, std::string_view end, |
86 | 199 | std::unique_ptr<RangeGetIterator>& it) { |
87 | 199 | std::unique_ptr<Transaction> txn; |
88 | 199 | TxnErrorCode err = txn_kv->create_txn(&txn); |
89 | 199 | if (err != TxnErrorCode::TXN_OK) { |
90 | 0 | return -1; |
91 | 0 | } |
92 | 199 | switch (txn->get(begin, end, &it, true)) { |
93 | 199 | case TxnErrorCode::TXN_OK: |
94 | 199 | return 0; |
95 | 0 | case TxnErrorCode::TXN_KEY_NOT_FOUND: |
96 | 0 | return 1; |
97 | 0 | default: |
98 | 0 | return -1; |
99 | 199 | }; |
100 | 0 | } recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE Line | Count | Source | 86 | 180 | std::unique_ptr<RangeGetIterator>& it) { | 87 | 180 | std::unique_ptr<Transaction> txn; | 88 | 180 | TxnErrorCode err = txn_kv->create_txn(&txn); | 89 | 180 | if (err != TxnErrorCode::TXN_OK) { | 90 | 0 | return -1; | 91 | 0 | } | 92 | 180 | switch (txn->get(begin, end, &it, true)) { | 93 | 180 | case TxnErrorCode::TXN_OK: | 94 | 180 | return 0; | 95 | 0 | case TxnErrorCode::TXN_KEY_NOT_FOUND: | 96 | 0 | return 1; | 97 | 0 | default: | 98 | 0 | return -1; | 99 | 180 | }; | 100 | 0 | } |
recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE Line | Count | Source | 86 | 19 | std::unique_ptr<RangeGetIterator>& it) { | 87 | 19 | std::unique_ptr<Transaction> txn; | 88 | 19 | TxnErrorCode err = txn_kv->create_txn(&txn); | 89 | 19 | if (err != TxnErrorCode::TXN_OK) { | 90 | 0 | return -1; | 91 | 0 | } | 92 | 19 | switch (txn->get(begin, end, &it, true)) { | 93 | 19 | case TxnErrorCode::TXN_OK: | 94 | 19 | return 0; | 95 | 0 | case TxnErrorCode::TXN_KEY_NOT_FOUND: | 96 | 0 | return 1; | 97 | 0 | default: | 98 | 0 | return -1; | 99 | 19 | }; | 100 | 0 | } |
|
101 | | |
102 | | // return 0 for success otherwise error |
103 | 10 | static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) { |
104 | 10 | std::unique_ptr<Transaction> txn; |
105 | 10 | TxnErrorCode err = txn_kv->create_txn(&txn); |
106 | 10 | if (err != TxnErrorCode::TXN_OK) { |
107 | 0 | return -1; |
108 | 0 | } |
109 | 3.04k | for (auto k : keys) { |
110 | 3.04k | txn->remove(k); |
111 | 3.04k | } |
112 | 10 | switch (txn->commit()) { |
113 | 10 | case TxnErrorCode::TXN_OK: |
114 | 10 | return 0; |
115 | 0 | case TxnErrorCode::TXN_CONFLICT: |
116 | 0 | return -1; |
117 | 0 | default: |
118 | 0 | return -1; |
119 | 10 | } |
120 | 10 | } recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE Line | Count | Source | 103 | 6 | static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) { | 104 | 6 | std::unique_ptr<Transaction> txn; | 105 | 6 | TxnErrorCode err = txn_kv->create_txn(&txn); | 106 | 6 | if (err != TxnErrorCode::TXN_OK) { | 107 | 0 | return -1; | 108 | 0 | } | 109 | 3.02k | for (auto k : keys) { | 110 | 3.02k | txn->remove(k); | 111 | 3.02k | } | 112 | 6 | switch (txn->commit()) { | 113 | 6 | case TxnErrorCode::TXN_OK: | 114 | 6 | return 0; | 115 | 0 | case TxnErrorCode::TXN_CONFLICT: | 116 | 0 | return -1; | 117 | 0 | default: | 118 | 0 | return -1; | 119 | 6 | } | 120 | 6 | } |
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE Line | Count | Source | 103 | 4 | static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) { | 104 | 4 | std::unique_ptr<Transaction> txn; | 105 | 4 | TxnErrorCode err = txn_kv->create_txn(&txn); | 106 | 4 | if (err != TxnErrorCode::TXN_OK) { | 107 | 0 | return -1; | 108 | 0 | } | 109 | 21 | for (auto k : keys) { | 110 | 21 | txn->remove(k); | 111 | 21 | } | 112 | 4 | switch (txn->commit()) { | 113 | 4 | case TxnErrorCode::TXN_OK: | 114 | 4 | return 0; | 115 | 0 | case TxnErrorCode::TXN_CONFLICT: | 116 | 0 | return -1; | 117 | 0 | default: | 118 | 0 | return -1; | 119 | 4 | } | 120 | 4 | } |
|
121 | | |
122 | | // return 0 for success otherwise error |
123 | 30 | static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) { |
124 | 30 | std::unique_ptr<Transaction> txn; |
125 | 30 | TxnErrorCode err = txn_kv->create_txn(&txn); |
126 | 30 | if (err != TxnErrorCode::TXN_OK) { |
127 | 0 | return -1; |
128 | 0 | } |
129 | 4.00k | for (auto& k : keys) { |
130 | 4.00k | txn->remove(k); |
131 | 4.00k | } |
132 | 30 | switch (txn->commit()) { |
133 | 30 | case TxnErrorCode::TXN_OK: |
134 | 30 | return 0; |
135 | 0 | case TxnErrorCode::TXN_CONFLICT: |
136 | 0 | return -1; |
137 | 0 | default: |
138 | 0 | return -1; |
139 | 30 | } |
140 | 30 | } recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE Line | Count | Source | 123 | 30 | static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) { | 124 | 30 | std::unique_ptr<Transaction> txn; | 125 | 30 | TxnErrorCode err = txn_kv->create_txn(&txn); | 126 | 30 | if (err != TxnErrorCode::TXN_OK) { | 127 | 0 | return -1; | 128 | 0 | } | 129 | 4.00k | for (auto& k : keys) { | 130 | 4.00k | txn->remove(k); | 131 | 4.00k | } | 132 | 30 | switch (txn->commit()) { | 133 | 30 | case TxnErrorCode::TXN_OK: | 134 | 30 | return 0; | 135 | 0 | case TxnErrorCode::TXN_CONFLICT: | 136 | 0 | return -1; | 137 | 0 | default: | 138 | 0 | return -1; | 139 | 30 | } | 140 | 30 | } |
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE |
141 | | |
142 | | // return 0 for success otherwise error |
143 | | [[maybe_unused]] static int txn_remove(TxnKv* txn_kv, std::string_view begin, |
144 | 0 | std::string_view end) { |
145 | 0 | std::unique_ptr<Transaction> txn; |
146 | 0 | TxnErrorCode err = txn_kv->create_txn(&txn); |
147 | 0 | if (err != TxnErrorCode::TXN_OK) { |
148 | 0 | return -1; |
149 | 0 | } |
150 | 0 | txn->remove(begin, end); |
151 | 0 | switch (txn->commit()) { |
152 | 0 | case TxnErrorCode::TXN_OK: |
153 | 0 | return 0; |
154 | 0 | case TxnErrorCode::TXN_CONFLICT: |
155 | 0 | return -1; |
156 | 0 | default: |
157 | 0 | return -1; |
158 | 0 | } |
159 | 0 | } Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_ |
160 | | |
161 | | static inline void check_recycle_task(const std::string& instance_id, const std::string& task_name, |
162 | | int64_t num_scanned, int64_t num_recycled, |
163 | 29 | int64_t start_time) { |
164 | 29 | if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] { |
165 | 0 | int64_t cost = |
166 | 0 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
167 | 0 | if (cost > config::recycle_task_threshold_seconds) { |
168 | 0 | LOG_INFO("recycle task cost too much time cost={}s", cost) |
169 | 0 | .tag("instance_id", instance_id) |
170 | 0 | .tag("task", task_name) |
171 | 0 | .tag("num_scanned", num_scanned) |
172 | 0 | .tag("num_recycled", num_recycled); |
173 | 0 | } |
174 | 0 | } |
175 | 29 | return; |
176 | 29 | } recycler_test.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll Line | Count | Source | 163 | 27 | int64_t start_time) { | 164 | 27 | if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] { | 165 | 0 | int64_t cost = | 166 | 0 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 167 | 0 | if (cost > config::recycle_task_threshold_seconds) { | 168 | 0 | LOG_INFO("recycle task cost too much time cost={}s", cost) | 169 | 0 | .tag("instance_id", instance_id) | 170 | 0 | .tag("task", task_name) | 171 | 0 | .tag("num_scanned", num_scanned) | 172 | 0 | .tag("num_recycled", num_recycled); | 173 | 0 | } | 174 | 0 | } | 175 | 27 | return; | 176 | 27 | } |
recycler.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll Line | Count | Source | 163 | 2 | int64_t start_time) { | 164 | 2 | if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] { | 165 | 0 | int64_t cost = | 166 | 0 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 167 | 0 | if (cost > config::recycle_task_threshold_seconds) { | 168 | 0 | LOG_INFO("recycle task cost too much time cost={}s", cost) | 169 | 0 | .tag("instance_id", instance_id) | 170 | 0 | .tag("task", task_name) | 171 | 0 | .tag("num_scanned", num_scanned) | 172 | 0 | .tag("num_recycled", num_recycled); | 173 | 0 | } | 174 | 0 | } | 175 | 2 | return; | 176 | 2 | } |
|
177 | | |
178 | 4 | Recycler::Recycler(std::shared_ptr<TxnKv> txn_kv) : txn_kv_(std::move(txn_kv)) { |
179 | 4 | ip_port_ = std::string(butil::my_ip_cstr()) + ":" + std::to_string(config::brpc_listen_port); |
180 | | |
181 | 4 | auto s3_producer_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism, |
182 | 4 | "s3_producer_pool"); |
183 | 4 | s3_producer_pool->start(); |
184 | 4 | auto recycle_tablet_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism, |
185 | 4 | "recycle_tablet_pool"); |
186 | 4 | recycle_tablet_pool->start(); |
187 | 4 | auto group_recycle_function_pool = std::make_shared<SimpleThreadPool>( |
188 | 4 | config::recycle_pool_parallelism, "group_recycle_function_pool"); |
189 | 4 | group_recycle_function_pool->start(); |
190 | 4 | _thread_pool_group = |
191 | 4 | RecyclerThreadPoolGroup(std::move(s3_producer_pool), std::move(recycle_tablet_pool), |
192 | 4 | std::move(group_recycle_function_pool)); |
193 | | |
194 | 4 | txn_lazy_committer_ = std::make_shared<TxnLazyCommitter>(txn_kv_); |
195 | 4 | } |
196 | | |
197 | 4 | Recycler::~Recycler() { |
198 | 4 | if (!stopped()) { |
199 | 0 | stop(); |
200 | 0 | } |
201 | 4 | } |
202 | | |
203 | 4 | void Recycler::instance_scanner_callback() { |
204 | | // sleep 60 seconds before scheduling for the launch procedure to complete: |
205 | | // some bad hdfs connection may cause some log to stdout stderr |
206 | | // which may pollute .out file and affect the script to check success |
207 | 4 | std::this_thread::sleep_for( |
208 | 4 | std::chrono::seconds(config::recycler_sleep_before_scheduling_seconds)); |
209 | 8 | while (!stopped()) { |
210 | 4 | std::vector<InstanceInfoPB> instances; |
211 | 4 | get_all_instances(txn_kv_.get(), instances); |
212 | | // TODO(plat1ko): delete job recycle kv of non-existent instances |
213 | 4 | LOG(INFO) << "Recycler get instances: " << [&instances] { |
214 | 4 | std::stringstream ss; |
215 | 30 | for (auto& i : instances) ss << ' ' << i.instance_id(); |
216 | 4 | return ss.str(); |
217 | 4 | }(); recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev Line | Count | Source | 213 | 4 | LOG(INFO) << "Recycler get instances: " << [&instances] { | 214 | 4 | std::stringstream ss; | 215 | 30 | for (auto& i : instances) ss << ' ' << i.instance_id(); | 216 | 4 | return ss.str(); | 217 | 4 | }(); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev |
218 | 4 | if (!instances.empty()) { |
219 | | // enqueue instances |
220 | 3 | std::lock_guard lock(mtx_); |
221 | 30 | for (auto& instance : instances) { |
222 | 30 | if (instance_filter_.filter_out(instance.instance_id())) continue; |
223 | 30 | auto [_, success] = pending_instance_set_.insert(instance.instance_id()); |
224 | | // skip instance already in pending queue |
225 | 30 | if (success) { |
226 | 30 | pending_instance_queue_.push_back(std::move(instance)); |
227 | 30 | } |
228 | 30 | } |
229 | 3 | pending_instance_cond_.notify_all(); |
230 | 3 | } |
231 | 4 | { |
232 | 4 | std::unique_lock lock(mtx_); |
233 | 4 | notifier_.wait_for(lock, std::chrono::seconds(config::recycle_interval_seconds), |
234 | 7 | [&]() { return stopped(); }); recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv Line | Count | Source | 234 | 7 | [&]() { return stopped(); }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv |
235 | 4 | } |
236 | 4 | } |
237 | 4 | } |
238 | | |
239 | 8 | void Recycler::recycle_callback() { |
240 | 37 | while (!stopped()) { |
241 | 35 | InstanceInfoPB instance; |
242 | 35 | { |
243 | 35 | std::unique_lock lock(mtx_); |
244 | 35 | pending_instance_cond_.wait( |
245 | 48 | lock, [&]() { return !pending_instance_queue_.empty() || stopped(); }); recycler_test.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv Line | Count | Source | 245 | 48 | lock, [&]() { return !pending_instance_queue_.empty() || stopped(); }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv |
246 | 35 | if (stopped()) { |
247 | 6 | return; |
248 | 6 | } |
249 | 29 | instance = std::move(pending_instance_queue_.front()); |
250 | 29 | pending_instance_queue_.pop_front(); |
251 | 29 | pending_instance_set_.erase(instance.instance_id()); |
252 | 29 | } |
253 | 0 | auto& instance_id = instance.instance_id(); |
254 | 29 | { |
255 | 29 | std::lock_guard lock(mtx_); |
256 | | // skip instance in recycling |
257 | 29 | if (recycling_instance_map_.count(instance_id)) continue; |
258 | 29 | } |
259 | 29 | auto instance_recycler = std::make_shared<InstanceRecycler>( |
260 | 29 | txn_kv_, instance, _thread_pool_group, txn_lazy_committer_); |
261 | | |
262 | 29 | if (int r = instance_recycler->init(); r != 0) { |
263 | 0 | LOG(WARNING) << "failed to init instance recycler, instance_id=" << instance_id |
264 | 0 | << " ret=" << r; |
265 | 0 | continue; |
266 | 0 | } |
267 | 29 | std::string recycle_job_key; |
268 | 29 | job_recycle_key({instance_id}, &recycle_job_key); |
269 | 29 | int ret = prepare_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id, |
270 | 29 | ip_port_, config::recycle_interval_seconds * 1000); |
271 | 29 | if (ret != 0) { // Prepare failed |
272 | 20 | LOG(WARNING) << "failed to prepare recycle_job, instance_id=" << instance_id |
273 | 20 | << " ret=" << ret; |
274 | 20 | continue; |
275 | 20 | } else { |
276 | 9 | std::lock_guard lock(mtx_); |
277 | 9 | recycling_instance_map_.emplace(instance_id, instance_recycler); |
278 | 9 | } |
279 | 9 | if (stopped()) return; |
280 | 9 | LOG_INFO("begin to recycle instance").tag("instance_id", instance_id); |
281 | 9 | auto ctime_ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
282 | 9 | g_bvar_recycler_task_concurrency << 1; |
283 | 9 | g_bvar_recycler_instance_running.put({instance_id}, 1); |
284 | 9 | g_bvar_recycler_instance_recycle_times.put({instance_id}, std::make_pair(ctime_ms, -1)); |
285 | 9 | ret = instance_recycler->do_recycle(); |
286 | 9 | g_bvar_recycler_task_concurrency << -1; |
287 | 9 | g_bvar_recycler_instance_running.put({instance_id}, -1); |
288 | | // If instance recycler has been aborted, don't finish this job |
289 | 10 | if (!instance_recycler->stopped()) { |
290 | 10 | finish_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id, ip_port_, |
291 | 10 | ret == 0, ctime_ms); |
292 | 10 | } |
293 | 9 | { |
294 | 9 | std::lock_guard lock(mtx_); |
295 | 9 | recycling_instance_map_.erase(instance_id); |
296 | 9 | } |
297 | 9 | auto now = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
298 | 9 | auto elpased_ms = now - ctime_ms; |
299 | 9 | g_bvar_recycler_instance_recycle_times.put({instance_id}, std::make_pair(ctime_ms, now)); |
300 | 9 | g_bvar_recycler_instance_last_recycle_duration.put({instance_id}, elpased_ms); |
301 | 9 | g_bvar_recycler_instance_next_time.put({instance_id}, |
302 | 9 | now + config::recycle_interval_seconds * 1000); |
303 | 9 | LOG(INFO) << "recycle instance done, " |
304 | 9 | << "instance_id=" << instance_id << " ret=" << ret << " ctime_ms: " << ctime_ms |
305 | 9 | << " now: " << now; |
306 | | |
307 | 9 | g_bvar_recycler_instance_recycle_last_success_times.put({instance_id}, now); |
308 | | |
309 | 9 | LOG_INFO("finish recycle instance") |
310 | 9 | .tag("instance_id", instance_id) |
311 | 9 | .tag("cost_ms", elpased_ms); |
312 | 9 | } |
313 | 8 | } |
314 | | |
315 | 4 | void Recycler::lease_recycle_jobs() { |
316 | 54 | while (!stopped()) { |
317 | 50 | std::vector<std::string> instances; |
318 | 50 | instances.reserve(recycling_instance_map_.size()); |
319 | 50 | { |
320 | 50 | std::lock_guard lock(mtx_); |
321 | 50 | for (auto& [id, _] : recycling_instance_map_) { |
322 | 30 | instances.push_back(id); |
323 | 30 | } |
324 | 50 | } |
325 | 50 | for (auto& i : instances) { |
326 | 30 | std::string recycle_job_key; |
327 | 30 | job_recycle_key({i}, &recycle_job_key); |
328 | 30 | int ret = lease_instance_recycle_job(txn_kv_.get(), recycle_job_key, i, ip_port_); |
329 | 30 | if (ret == 1) { |
330 | 0 | std::lock_guard lock(mtx_); |
331 | 0 | if (auto it = recycling_instance_map_.find(i); |
332 | 0 | it != recycling_instance_map_.end()) { |
333 | 0 | it->second->stop(); |
334 | 0 | } |
335 | 0 | } |
336 | 30 | } |
337 | 50 | { |
338 | 50 | std::unique_lock lock(mtx_); |
339 | 50 | notifier_.wait_for(lock, |
340 | 50 | std::chrono::milliseconds(config::recycle_job_lease_expired_ms / 3), |
341 | 100 | [&]() { return stopped(); }); recycler_test.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv Line | Count | Source | 341 | 100 | [&]() { return stopped(); }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv |
342 | 50 | } |
343 | 50 | } |
344 | 4 | } |
345 | | |
346 | 4 | void Recycler::check_recycle_tasks() { |
347 | 7 | while (!stopped()) { |
348 | 3 | std::unordered_map<std::string, std::shared_ptr<InstanceRecycler>> recycling_instance_map; |
349 | 3 | { |
350 | 3 | std::lock_guard lock(mtx_); |
351 | 3 | recycling_instance_map = recycling_instance_map_; |
352 | 3 | } |
353 | 3 | for (auto& entry : recycling_instance_map) { |
354 | 0 | entry.second->check_recycle_tasks(); |
355 | 0 | } |
356 | | |
357 | 3 | std::unique_lock lock(mtx_); |
358 | 3 | notifier_.wait_for(lock, std::chrono::seconds(config::check_recycle_task_interval_seconds), |
359 | 6 | [&]() { return stopped(); }); recycler_test.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv Line | Count | Source | 359 | 6 | [&]() { return stopped(); }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv |
360 | 3 | } |
361 | 4 | } |
362 | | |
363 | 4 | int Recycler::start(brpc::Server* server) { |
364 | 4 | instance_filter_.reset(config::recycle_whitelist, config::recycle_blacklist); |
365 | 4 | g_bvar_recycler_task_max_concurrency.set_value(config::recycle_concurrency); |
366 | | |
367 | 4 | if (config::enable_checker) { |
368 | 0 | checker_ = std::make_unique<Checker>(txn_kv_); |
369 | 0 | int ret = checker_->start(); |
370 | 0 | std::string msg; |
371 | 0 | if (ret != 0) { |
372 | 0 | msg = "failed to start checker"; |
373 | 0 | LOG(ERROR) << msg; |
374 | 0 | std::cerr << msg << std::endl; |
375 | 0 | return ret; |
376 | 0 | } |
377 | 0 | msg = "checker started"; |
378 | 0 | LOG(INFO) << msg; |
379 | 0 | std::cout << msg << std::endl; |
380 | 0 | } |
381 | | |
382 | 4 | if (server) { |
383 | | // Add service |
384 | 1 | auto recycler_service = |
385 | 1 | new RecyclerServiceImpl(txn_kv_, this, checker_.get(), txn_lazy_committer_); |
386 | 1 | server->AddService(recycler_service, brpc::SERVER_OWNS_SERVICE); |
387 | 1 | } |
388 | | |
389 | 4 | workers_.emplace_back([this] { instance_scanner_callback(); }); recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv Line | Count | Source | 389 | 4 | workers_.emplace_back([this] { instance_scanner_callback(); }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv |
390 | 12 | for (int i = 0; i < config::recycle_concurrency; ++i) { |
391 | 8 | workers_.emplace_back([this] { recycle_callback(); }); recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv Line | Count | Source | 391 | 8 | workers_.emplace_back([this] { recycle_callback(); }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv |
392 | 8 | } |
393 | | |
394 | 4 | workers_.emplace_back(std::mem_fn(&Recycler::lease_recycle_jobs), this); |
395 | 4 | workers_.emplace_back(std::mem_fn(&Recycler::check_recycle_tasks), this); |
396 | 4 | return 0; |
397 | 4 | } |
398 | | |
399 | 4 | void Recycler::stop() { |
400 | 4 | stopped_ = true; |
401 | 4 | notifier_.notify_all(); |
402 | 4 | pending_instance_cond_.notify_all(); |
403 | 4 | { |
404 | 4 | std::lock_guard lock(mtx_); |
405 | 4 | for (auto& [_, recycler] : recycling_instance_map_) { |
406 | 0 | recycler->stop(); |
407 | 0 | } |
408 | 4 | } |
409 | 20 | for (auto& w : workers_) { |
410 | 20 | if (w.joinable()) w.join(); |
411 | 20 | } |
412 | 4 | if (checker_) { |
413 | 0 | checker_->stop(); |
414 | 0 | } |
415 | 4 | } |
416 | | |
417 | | class InstanceRecycler::InvertedIndexIdCache { |
418 | | public: |
419 | | InvertedIndexIdCache(std::string instance_id, std::shared_ptr<TxnKv> txn_kv) |
420 | 73 | : instance_id_(std::move(instance_id)), txn_kv_(std::move(txn_kv)) {} |
421 | | |
422 | | // Return 0 if success, 1 if schema kv not found, negative for error |
423 | 3.55k | int get(int64_t index_id, int32_t schema_version, InvertedIndexInfo& res) { |
424 | 3.55k | { |
425 | 3.55k | std::lock_guard lock(mtx_); |
426 | 3.55k | if (schemas_without_inverted_index_.count({index_id, schema_version})) { |
427 | 644 | return 0; |
428 | 644 | } |
429 | 2.90k | if (auto it = inverted_index_id_map_.find({index_id, schema_version}); |
430 | 2.90k | it != inverted_index_id_map_.end()) { |
431 | 2.37k | res = it->second; |
432 | 2.37k | return 0; |
433 | 2.37k | } |
434 | 2.90k | } |
435 | | // Get schema from kv |
436 | | // TODO(plat1ko): Single flight |
437 | 532 | std::unique_ptr<Transaction> txn; |
438 | 532 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
439 | 532 | if (err != TxnErrorCode::TXN_OK) { |
440 | 0 | LOG(WARNING) << "failed to create txn, err=" << err; |
441 | 0 | return -1; |
442 | 0 | } |
443 | 532 | auto schema_key = meta_schema_key({instance_id_, index_id, schema_version}); |
444 | 532 | ValueBuf val_buf; |
445 | 532 | err = cloud::get(txn.get(), schema_key, &val_buf); |
446 | 532 | if (err != TxnErrorCode::TXN_OK) { |
447 | 500 | LOG(WARNING) << "failed to get schema, err=" << err; |
448 | 500 | return static_cast<int>(err); |
449 | 500 | } |
450 | 32 | doris::TabletSchemaCloudPB schema; |
451 | 32 | if (!parse_schema_value(val_buf, &schema)) { |
452 | 0 | LOG(WARNING) << "malformed schema value, key=" << hex(schema_key); |
453 | 0 | return -1; |
454 | 0 | } |
455 | 32 | if (schema.index_size() > 0) { |
456 | 26 | InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1; |
457 | 26 | if (schema.has_inverted_index_storage_format()) { |
458 | 23 | index_format = schema.inverted_index_storage_format(); |
459 | 23 | } |
460 | 26 | res.first = index_format; |
461 | 26 | res.second.reserve(schema.index_size()); |
462 | 62 | for (auto& i : schema.index()) { |
463 | 62 | if (i.has_index_type() && i.index_type() == IndexType::INVERTED) { |
464 | 62 | res.second.push_back(std::make_pair(i.index_id(), i.index_suffix_name())); |
465 | 62 | } |
466 | 62 | } |
467 | 26 | } |
468 | 32 | insert(index_id, schema_version, res); |
469 | 32 | return 0; |
470 | 32 | } |
471 | | |
472 | | // Empty `ids` means this schema has no inverted index |
473 | 32 | void insert(int64_t index_id, int32_t schema_version, const InvertedIndexInfo& index_info) { |
474 | 32 | if (index_info.second.empty()) { |
475 | 6 | TEST_SYNC_POINT("InvertedIndexIdCache::insert1"); |
476 | 6 | std::lock_guard lock(mtx_); |
477 | 6 | schemas_without_inverted_index_.emplace(index_id, schema_version); |
478 | 26 | } else { |
479 | 26 | TEST_SYNC_POINT("InvertedIndexIdCache::insert2"); |
480 | 26 | std::lock_guard lock(mtx_); |
481 | 26 | inverted_index_id_map_.try_emplace({index_id, schema_version}, index_info); |
482 | 26 | } |
483 | 32 | } |
484 | | |
485 | | private: |
486 | | std::string instance_id_; |
487 | | std::shared_ptr<TxnKv> txn_kv_; |
488 | | |
489 | | std::mutex mtx_; |
490 | | using Key = std::pair<int64_t, int32_t>; // <index_id, schema_version> |
491 | | struct HashOfKey { |
492 | 6.49k | size_t operator()(const Key& key) const { |
493 | 6.49k | size_t seed = 0; |
494 | 6.49k | seed = std::hash<int64_t> {}(key.first); |
495 | 6.49k | seed = std::hash<int32_t> {}(key.second); |
496 | 6.49k | return seed; |
497 | 6.49k | } |
498 | | }; |
499 | | // <index_id, schema_version> -> inverted_index_ids |
500 | | std::unordered_map<Key, InvertedIndexInfo, HashOfKey> inverted_index_id_map_; |
501 | | // Store <index_id, schema_version> of schema which doesn't have inverted index |
502 | | std::unordered_set<Key, HashOfKey> schemas_without_inverted_index_; |
503 | | }; |
504 | | |
505 | | InstanceRecycler::InstanceRecycler(std::shared_ptr<TxnKv> txn_kv, const InstanceInfoPB& instance, |
506 | | RecyclerThreadPoolGroup thread_pool_group, |
507 | | std::shared_ptr<TxnLazyCommitter> txn_lazy_committer) |
508 | | : txn_kv_(std::move(txn_kv)), |
509 | | instance_id_(instance.instance_id()), |
510 | | instance_info_(instance), |
511 | | inverted_index_id_cache_(std::make_unique<InvertedIndexIdCache>(instance_id_, txn_kv_)), |
512 | | _thread_pool_group(std::move(thread_pool_group)), |
513 | 73 | txn_lazy_committer_(std::move(txn_lazy_committer)) {}; |
514 | | |
515 | 73 | InstanceRecycler::~InstanceRecycler() = default; |
516 | | |
517 | 73 | int InstanceRecycler::init_obj_store_accessors() { |
518 | 73 | for (const auto& obj_info : instance_info_.obj_info()) { |
519 | 54 | #ifdef UNIT_TEST |
520 | 54 | auto accessor = std::make_shared<MockAccessor>(); |
521 | | #else |
522 | | auto s3_conf = S3Conf::from_obj_store_info(obj_info); |
523 | | if (!s3_conf) { |
524 | | LOG(WARNING) << "failed to init object accessor, instance_id=" << instance_id_; |
525 | | return -1; |
526 | | } |
527 | | |
528 | | std::shared_ptr<S3Accessor> accessor; |
529 | | int ret = S3Accessor::create(std::move(*s3_conf), &accessor); |
530 | | if (ret != 0) { |
531 | | LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_ |
532 | | << " resource_id=" << obj_info.id(); |
533 | | return ret; |
534 | | } |
535 | | #endif |
536 | 54 | accessor_map_.emplace(obj_info.id(), std::move(accessor)); |
537 | 54 | } |
538 | | |
539 | 73 | return 0; |
540 | 73 | } |
541 | | |
542 | 73 | int InstanceRecycler::init_storage_vault_accessors() { |
543 | 73 | if (instance_info_.resource_ids().empty()) { |
544 | 66 | return 0; |
545 | 66 | } |
546 | | |
547 | 7 | FullRangeGetIteratorOptions opts(txn_kv_); |
548 | 7 | opts.prefetch = true; |
549 | 7 | auto it = txn_kv_->full_range_get(storage_vault_key({instance_id_, ""}), |
550 | 7 | storage_vault_key({instance_id_, "\xff"}), std::move(opts)); |
551 | | |
552 | 25 | for (auto kv = it->next(); kv.has_value(); kv = it->next()) { |
553 | 18 | auto [k, v] = *kv; |
554 | 18 | StorageVaultPB vault; |
555 | 18 | if (!vault.ParseFromArray(v.data(), v.size())) { |
556 | 0 | LOG(WARNING) << "malformed storage vault, unable to deserialize key=" << hex(k); |
557 | 0 | return -1; |
558 | 0 | } |
559 | 18 | std::string recycler_storage_vault_white_list = accumulate( |
560 | 18 | config::recycler_storage_vault_white_list.begin(), |
561 | 18 | config::recycler_storage_vault_white_list.end(), std::string(), |
562 | 24 | [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_ Line | Count | Source | 562 | 24 | [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_ |
563 | 18 | LOG_INFO("config::recycler_storage_vault_white_list") |
564 | 18 | .tag("", recycler_storage_vault_white_list); |
565 | 18 | if (!config::recycler_storage_vault_white_list.empty()) { |
566 | 8 | if (auto it = std::find(config::recycler_storage_vault_white_list.begin(), |
567 | 8 | config::recycler_storage_vault_white_list.end(), vault.name()); |
568 | 8 | it == config::recycler_storage_vault_white_list.end()) { |
569 | 2 | LOG_WARNING( |
570 | 2 | "failed to init accessor for vault because this vault is not in " |
571 | 2 | "config::recycler_storage_vault_white_list. ") |
572 | 2 | .tag(" vault name:", vault.name()) |
573 | 2 | .tag(" config::recycler_storage_vault_white_list:", |
574 | 2 | recycler_storage_vault_white_list); |
575 | 2 | continue; |
576 | 2 | } |
577 | 8 | } |
578 | 16 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault", |
579 | 16 | &accessor_map_, &vault); |
580 | 16 | if (vault.has_hdfs_info()) { |
581 | 9 | auto accessor = std::make_shared<HdfsAccessor>(vault.hdfs_info()); |
582 | 9 | int ret = accessor->init(); |
583 | 9 | if (ret != 0) { |
584 | 4 | LOG(WARNING) << "failed to init hdfs accessor. instance_id=" << instance_id_ |
585 | 4 | << " resource_id=" << vault.id() << " name=" << vault.name() |
586 | 4 | << " hdfs_vault=" << vault.hdfs_info().ShortDebugString(); |
587 | 4 | continue; |
588 | 4 | } |
589 | 5 | LOG(INFO) << "succeed to init hdfs accessor. instance_id=" << instance_id_ |
590 | 5 | << " resource_id=" << vault.id() << " name=" << vault.name() |
591 | 5 | << " hdfs_vault=" << vault.hdfs_info().ShortDebugString(); |
592 | 5 | accessor_map_.emplace(vault.id(), std::move(accessor)); |
593 | 7 | } else if (vault.has_obj_info()) { |
594 | 7 | auto s3_conf = S3Conf::from_obj_store_info(vault.obj_info()); |
595 | 7 | if (!s3_conf) { |
596 | 1 | LOG(WARNING) << "failed to init object accessor, invalid conf, instance_id=" |
597 | 1 | << instance_id_ << " s3_vault=" << vault.obj_info().ShortDebugString(); |
598 | 1 | continue; |
599 | 1 | } |
600 | | |
601 | 6 | std::shared_ptr<S3Accessor> accessor; |
602 | 6 | int ret = S3Accessor::create(*s3_conf, &accessor); |
603 | 6 | if (ret != 0) { |
604 | 0 | LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_ |
605 | 0 | << " resource_id=" << vault.id() << " name=" << vault.name() |
606 | 0 | << " ret=" << ret |
607 | 0 | << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString()); |
608 | 0 | continue; |
609 | 0 | } |
610 | 6 | LOG(INFO) << "succeed to init s3 accessor. instance_id=" << instance_id_ |
611 | 6 | << " resource_id=" << vault.id() << " name=" << vault.name() << " ret=" << ret |
612 | 6 | << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString()); |
613 | 6 | accessor_map_.emplace(vault.id(), std::move(accessor)); |
614 | 6 | } |
615 | 16 | } |
616 | | |
617 | 7 | if (!it->is_valid()) { |
618 | 0 | LOG_WARNING("failed to get storage vault kv"); |
619 | 0 | return -1; |
620 | 0 | } |
621 | | |
622 | 7 | if (accessor_map_.empty()) { |
623 | 1 | LOG(WARNING) << "no accessors for instance=" << instance_id_; |
624 | 1 | return -2; |
625 | 1 | } |
626 | 6 | LOG_INFO("finish init instance recycler number_accessors={} instance=", accessor_map_.size(), |
627 | 6 | instance_id_); |
628 | | |
629 | 6 | return 0; |
630 | 7 | } |
631 | | |
632 | 73 | int InstanceRecycler::init() { |
633 | 73 | int ret = init_obj_store_accessors(); |
634 | 73 | if (ret != 0) { |
635 | 0 | return ret; |
636 | 0 | } |
637 | | |
638 | 73 | return init_storage_vault_accessors(); |
639 | 73 | } |
640 | | |
641 | | template <typename... Func> |
642 | 80 | auto task_wrapper(Func... funcs) -> std::function<int()> { |
643 | 80 | return [funcs...]() { |
644 | 80 | return [](std::initializer_list<int> ret_vals) { |
645 | 80 | int i = 0; |
646 | 100 | for (int ret : ret_vals) { |
647 | 100 | if (ret != 0) { |
648 | 0 | i = ret; |
649 | 0 | } |
650 | 100 | } |
651 | 80 | return i; |
652 | 80 | }({funcs()...}); recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_ Line | Count | Source | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 20 | for (int ret : ret_vals) { | 647 | 20 | if (ret != 0) { | 648 | 0 | i = ret; | 649 | 0 | } | 650 | 20 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 10 | for (int ret : ret_vals) { | 647 | 10 | if (ret != 0) { | 648 | 0 | i = ret; | 649 | 0 | } | 650 | 10 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 10 | for (int ret : ret_vals) { | 647 | 10 | if (ret != 0) { | 648 | 0 | i = ret; | 649 | 0 | } | 650 | 10 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_ Line | Count | Source | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 20 | for (int ret : ret_vals) { | 647 | 20 | if (ret != 0) { | 648 | 0 | i = ret; | 649 | 0 | } | 650 | 20 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 10 | for (int ret : ret_vals) { | 647 | 10 | if (ret != 0) { | 648 | 0 | i = ret; | 649 | 0 | } | 650 | 10 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 10 | for (int ret : ret_vals) { | 647 | 10 | if (ret != 0) { | 648 | 0 | i = ret; | 649 | 0 | } | 650 | 10 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 10 | for (int ret : ret_vals) { | 647 | 10 | if (ret != 0) { | 648 | 0 | i = ret; | 649 | 0 | } | 650 | 10 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 10 | for (int ret : ret_vals) { | 647 | 10 | if (ret != 0) { | 648 | 0 | i = ret; | 649 | 0 | } | 650 | 10 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); |
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ |
653 | 80 | }; recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 643 | 10 | return [funcs...]() { | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 10 | for (int ret : ret_vals) { | 647 | 10 | if (ret != 0) { | 648 | 10 | i = ret; | 649 | 10 | } | 650 | 10 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); | 653 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 643 | 10 | return [funcs...]() { | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 10 | for (int ret : ret_vals) { | 647 | 10 | if (ret != 0) { | 648 | 10 | i = ret; | 649 | 10 | } | 650 | 10 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); | 653 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 643 | 10 | return [funcs...]() { | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 10 | for (int ret : ret_vals) { | 647 | 10 | if (ret != 0) { | 648 | 10 | i = ret; | 649 | 10 | } | 650 | 10 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); | 653 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 643 | 10 | return [funcs...]() { | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 10 | for (int ret : ret_vals) { | 647 | 10 | if (ret != 0) { | 648 | 10 | i = ret; | 649 | 10 | } | 650 | 10 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); | 653 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 643 | 10 | return [funcs...]() { | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 10 | for (int ret : ret_vals) { | 647 | 10 | if (ret != 0) { | 648 | 10 | i = ret; | 649 | 10 | } | 650 | 10 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); | 653 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 643 | 10 | return [funcs...]() { | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 10 | for (int ret : ret_vals) { | 647 | 10 | if (ret != 0) { | 648 | 10 | i = ret; | 649 | 10 | } | 650 | 10 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); | 653 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 643 | 10 | return [funcs...]() { | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 10 | for (int ret : ret_vals) { | 647 | 10 | if (ret != 0) { | 648 | 10 | i = ret; | 649 | 10 | } | 650 | 10 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); | 653 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 643 | 10 | return [funcs...]() { | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 10 | for (int ret : ret_vals) { | 647 | 10 | if (ret != 0) { | 648 | 10 | i = ret; | 649 | 10 | } | 650 | 10 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); | 653 | 10 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv |
654 | 80 | } recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ Line | Count | Source | 642 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 643 | 10 | return [funcs...]() { | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 10 | for (int ret : ret_vals) { | 647 | 10 | if (ret != 0) { | 648 | 10 | i = ret; | 649 | 10 | } | 650 | 10 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); | 653 | 10 | }; | 654 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ Line | Count | Source | 642 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 643 | 10 | return [funcs...]() { | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 10 | for (int ret : ret_vals) { | 647 | 10 | if (ret != 0) { | 648 | 10 | i = ret; | 649 | 10 | } | 650 | 10 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); | 653 | 10 | }; | 654 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_ Line | Count | Source | 642 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 643 | 10 | return [funcs...]() { | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 10 | for (int ret : ret_vals) { | 647 | 10 | if (ret != 0) { | 648 | 10 | i = ret; | 649 | 10 | } | 650 | 10 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); | 653 | 10 | }; | 654 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ Line | Count | Source | 642 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 643 | 10 | return [funcs...]() { | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 10 | for (int ret : ret_vals) { | 647 | 10 | if (ret != 0) { | 648 | 10 | i = ret; | 649 | 10 | } | 650 | 10 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); | 653 | 10 | }; | 654 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ Line | Count | Source | 642 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 643 | 10 | return [funcs...]() { | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 10 | for (int ret : ret_vals) { | 647 | 10 | if (ret != 0) { | 648 | 10 | i = ret; | 649 | 10 | } | 650 | 10 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); | 653 | 10 | }; | 654 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ Line | Count | Source | 642 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 643 | 10 | return [funcs...]() { | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 10 | for (int ret : ret_vals) { | 647 | 10 | if (ret != 0) { | 648 | 10 | i = ret; | 649 | 10 | } | 650 | 10 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); | 653 | 10 | }; | 654 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ Line | Count | Source | 642 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 643 | 10 | return [funcs...]() { | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 10 | for (int ret : ret_vals) { | 647 | 10 | if (ret != 0) { | 648 | 10 | i = ret; | 649 | 10 | } | 650 | 10 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); | 653 | 10 | }; | 654 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ Line | Count | Source | 642 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 643 | 10 | return [funcs...]() { | 644 | 10 | return [](std::initializer_list<int> ret_vals) { | 645 | 10 | int i = 0; | 646 | 10 | for (int ret : ret_vals) { | 647 | 10 | if (ret != 0) { | 648 | 10 | i = ret; | 649 | 10 | } | 650 | 10 | } | 651 | 10 | return i; | 652 | 10 | }({funcs()...}); | 653 | 10 | }; | 654 | 10 | } |
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ |
655 | | |
656 | 10 | int InstanceRecycler::do_recycle() { |
657 | 10 | TEST_SYNC_POINT("InstanceRecycler.do_recycle"); |
658 | 10 | if (instance_info_.status() == InstanceInfoPB::DELETED) { |
659 | 0 | return recycle_deleted_instance(); |
660 | 10 | } else if (instance_info_.status() == InstanceInfoPB::NORMAL) { |
661 | 10 | SyncExecutor<int> sync_executor(_thread_pool_group.group_recycle_function_pool, |
662 | 10 | fmt::format("instance id {}", instance_id_), |
663 | 80 | [](int r) { return r != 0; }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEi Line | Count | Source | 663 | 80 | [](int r) { return r != 0; }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEi |
664 | 10 | sync_executor |
665 | 10 | .add(task_wrapper( // dropped table and dropped partition need to be recycled in series |
666 | | // becase they may both recycle the same set of tablets |
667 | | // recycle dropped table or idexes(mv, rollup) |
668 | 10 | [this]() -> int { return InstanceRecycler::recycle_indexes(); }, recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEv Line | Count | Source | 668 | 10 | [this]() -> int { return InstanceRecycler::recycle_indexes(); }, |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEv |
669 | | // recycle dropped partitions |
670 | 10 | [this]() -> int { return InstanceRecycler::recycle_partitions(); })) recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv Line | Count | Source | 670 | 10 | [this]() -> int { return InstanceRecycler::recycle_partitions(); })) |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv |
671 | 10 | .add(task_wrapper( |
672 | 10 | [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); })) recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv Line | Count | Source | 672 | 10 | [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); })) |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv |
673 | 10 | .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); })) recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv Line | Count | Source | 673 | 10 | .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); })) |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv |
674 | 10 | .add(task_wrapper( |
675 | 10 | [this]() { return InstanceRecycler::abort_timeout_txn(); }, recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv Line | Count | Source | 675 | 10 | [this]() { return InstanceRecycler::abort_timeout_txn(); }, |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv |
676 | 10 | [this]() { return InstanceRecycler::recycle_expired_txn_label(); })) recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv Line | Count | Source | 676 | 10 | [this]() { return InstanceRecycler::recycle_expired_txn_label(); })) |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv |
677 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); })) recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv Line | Count | Source | 677 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); })) |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv |
678 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); })) recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv Line | Count | Source | 678 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); })) |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv |
679 | 10 | .add(task_wrapper( |
680 | 10 | [this]() { return InstanceRecycler::recycle_expired_stage_objects(); })) recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv Line | Count | Source | 680 | 10 | [this]() { return InstanceRecycler::recycle_expired_stage_objects(); })) |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv |
681 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); })); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv Line | Count | Source | 681 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); })); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv |
682 | 10 | bool finished = true; |
683 | 10 | std::vector<int> rets = sync_executor.when_all(&finished); |
684 | 80 | for (int ret : rets) { |
685 | 80 | if (ret != 0) { |
686 | 0 | return ret; |
687 | 0 | } |
688 | 80 | } |
689 | 10 | return finished ? 0 : -1; |
690 | 10 | } else { |
691 | 0 | LOG(WARNING) << "invalid instance status: " << instance_info_.status() |
692 | 0 | << " instance_id=" << instance_id_; |
693 | 0 | return -1; |
694 | 0 | } |
695 | 10 | } |
696 | | |
697 | | /** |
698 | | * 1. delete all remote data |
699 | | * 2. delete all kv |
700 | | * 3. remove instance kv |
701 | | */ |
702 | 1 | int InstanceRecycler::recycle_deleted_instance() { |
703 | 1 | LOG_INFO("begin to recycle deleted instance").tag("instance_id", instance_id_); |
704 | | |
705 | 1 | int ret = 0; |
706 | 1 | auto start_time = steady_clock::now(); |
707 | | |
708 | 1 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { |
709 | 1 | auto cost = duration<float>(steady_clock::now() - start_time).count(); |
710 | 1 | LOG(INFO) << (ret == 0 ? "successfully" : "failed to") |
711 | 1 | << " recycle deleted instance, cost=" << cost |
712 | 1 | << "s, instance_id=" << instance_id_; |
713 | 1 | }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEPi Line | Count | Source | 708 | 1 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { | 709 | 1 | auto cost = duration<float>(steady_clock::now() - start_time).count(); | 710 | 1 | LOG(INFO) << (ret == 0 ? "successfully" : "failed to") | 711 | 1 | << " recycle deleted instance, cost=" << cost | 712 | 1 | << "s, instance_id=" << instance_id_; | 713 | 1 | }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEPi |
714 | | |
715 | | // delete all remote data |
716 | 2 | for (auto& [_, accessor] : accessor_map_) { |
717 | 2 | if (stopped()) { |
718 | 0 | return ret; |
719 | 0 | } |
720 | | |
721 | 2 | LOG(INFO) << "begin to delete all objects in " << accessor->uri(); |
722 | 2 | int del_ret = accessor->delete_all(); |
723 | 2 | if (del_ret == 0) { |
724 | 2 | LOG(INFO) << "successfully delete all objects in " << accessor->uri(); |
725 | 2 | } else if (del_ret != 1) { // no need to log, because S3Accessor has logged this error |
726 | | // If `del_ret == 1`, it can be considered that the object data has been recycled by cloud platform, |
727 | | // so the recycling has been successful. |
728 | 0 | ret = -1; |
729 | 0 | } |
730 | 2 | } |
731 | | |
732 | 1 | if (ret != 0) { |
733 | 0 | LOG(WARNING) << "failed to delete all data of deleted instance=" << instance_id_; |
734 | 0 | return ret; |
735 | 0 | } |
736 | | |
737 | | // delete all kv |
738 | 1 | std::unique_ptr<Transaction> txn; |
739 | 1 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
740 | 1 | if (err != TxnErrorCode::TXN_OK) { |
741 | 0 | LOG(WARNING) << "failed to create txn"; |
742 | 0 | ret = -1; |
743 | 0 | return -1; |
744 | 0 | } |
745 | 1 | LOG(INFO) << "begin to delete all kv, instance_id=" << instance_id_; |
746 | | // delete kv before deleting objects to prevent the checker from misjudging data loss |
747 | 1 | std::string start_txn_key = txn_key_prefix(instance_id_); |
748 | 1 | std::string end_txn_key = txn_key_prefix(instance_id_ + '\x00'); |
749 | 1 | txn->remove(start_txn_key, end_txn_key); |
750 | 1 | std::string start_version_key = version_key_prefix(instance_id_); |
751 | 1 | std::string end_version_key = version_key_prefix(instance_id_ + '\x00'); |
752 | 1 | txn->remove(start_version_key, end_version_key); |
753 | 1 | std::string start_meta_key = meta_key_prefix(instance_id_); |
754 | 1 | std::string end_meta_key = meta_key_prefix(instance_id_ + '\x00'); |
755 | 1 | txn->remove(start_meta_key, end_meta_key); |
756 | 1 | std::string start_recycle_key = recycle_key_prefix(instance_id_); |
757 | 1 | std::string end_recycle_key = recycle_key_prefix(instance_id_ + '\x00'); |
758 | 1 | txn->remove(start_recycle_key, end_recycle_key); |
759 | 1 | std::string start_stats_tablet_key = stats_tablet_key({instance_id_, 0, 0, 0, 0}); |
760 | 1 | std::string end_stats_tablet_key = stats_tablet_key({instance_id_, INT64_MAX, 0, 0, 0}); |
761 | 1 | txn->remove(start_stats_tablet_key, end_stats_tablet_key); |
762 | 1 | std::string start_copy_key = copy_key_prefix(instance_id_); |
763 | 1 | std::string end_copy_key = copy_key_prefix(instance_id_ + '\x00'); |
764 | 1 | txn->remove(start_copy_key, end_copy_key); |
765 | | // should not remove job key range, because we need to reserve job recycle kv |
766 | | // 0:instance_id 1:table_id 2:index_id 3:part_id 4:tablet_id |
767 | 1 | std::string start_job_tablet_key = job_tablet_key({instance_id_, 0, 0, 0, 0}); |
768 | 1 | std::string end_job_tablet_key = job_tablet_key({instance_id_, INT64_MAX, 0, 0, 0}); |
769 | 1 | txn->remove(start_job_tablet_key, end_job_tablet_key); |
770 | 1 | StorageVaultKeyInfo key_info0 {instance_id_, ""}; |
771 | 1 | StorageVaultKeyInfo key_info1 {instance_id_, "\xff"}; |
772 | 1 | std::string start_vault_key = storage_vault_key(key_info0); |
773 | 1 | std::string end_vault_key = storage_vault_key(key_info1); |
774 | 1 | txn->remove(start_vault_key, end_vault_key); |
775 | 1 | err = txn->commit(); |
776 | 1 | if (err != TxnErrorCode::TXN_OK) { |
777 | 0 | LOG(WARNING) << "failed to delete all kv, instance_id=" << instance_id_ << ", err=" << err; |
778 | 0 | ret = -1; |
779 | 0 | } |
780 | | |
781 | 1 | if (ret == 0) { |
782 | | // remove instance kv |
783 | | // ATTN: MUST ensure that cloud platform won't regenerate the same instance id |
784 | 1 | err = txn_kv_->create_txn(&txn); |
785 | 1 | if (err != TxnErrorCode::TXN_OK) { |
786 | 0 | LOG(WARNING) << "failed to create txn"; |
787 | 0 | ret = -1; |
788 | 0 | return ret; |
789 | 0 | } |
790 | 1 | std::string key; |
791 | 1 | instance_key({instance_id_}, &key); |
792 | 1 | txn->remove(key); |
793 | 1 | err = txn->commit(); |
794 | 1 | if (err != TxnErrorCode::TXN_OK) { |
795 | 0 | LOG(WARNING) << "failed to delete instance kv, instance_id=" << instance_id_ |
796 | 0 | << " err=" << err; |
797 | 0 | ret = -1; |
798 | 0 | } |
799 | 1 | } |
800 | 1 | return ret; |
801 | 1 | } |
802 | | |
803 | 14 | int InstanceRecycler::recycle_indexes() { |
804 | 14 | const std::string task_name = "recycle_indexes"; |
805 | 14 | int64_t num_scanned = 0; |
806 | 14 | int64_t num_expired = 0; |
807 | 14 | int64_t num_recycled = 0; |
808 | | |
809 | 14 | RecycleIndexKeyInfo index_key_info0 {instance_id_, 0}; |
810 | 14 | RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX}; |
811 | 14 | std::string index_key0; |
812 | 14 | std::string index_key1; |
813 | 14 | recycle_index_key(index_key_info0, &index_key0); |
814 | 14 | recycle_index_key(index_key_info1, &index_key1); |
815 | | |
816 | 14 | LOG_INFO("begin to recycle indexes").tag("instance_id", instance_id_); |
817 | | |
818 | 14 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
819 | 14 | register_recycle_task(task_name, start_time); |
820 | | |
821 | 14 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { |
822 | 14 | unregister_recycle_task(task_name); |
823 | 14 | int64_t cost = |
824 | 14 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
825 | 14 | LOG_INFO("recycle indexes finished, cost={}s", cost) |
826 | 14 | .tag("instance_id", instance_id_) |
827 | 14 | .tag("num_scanned", num_scanned) |
828 | 14 | .tag("num_expired", num_expired) |
829 | 14 | .tag("num_recycled", num_recycled); |
830 | 14 | }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEPi Line | Count | Source | 821 | 12 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { | 822 | 12 | unregister_recycle_task(task_name); | 823 | 12 | int64_t cost = | 824 | 12 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 825 | 12 | LOG_INFO("recycle indexes finished, cost={}s", cost) | 826 | 12 | .tag("instance_id", instance_id_) | 827 | 12 | .tag("num_scanned", num_scanned) | 828 | 12 | .tag("num_expired", num_expired) | 829 | 12 | .tag("num_recycled", num_recycled); | 830 | 12 | }); |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEPi Line | Count | Source | 821 | 2 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { | 822 | 2 | unregister_recycle_task(task_name); | 823 | 2 | int64_t cost = | 824 | 2 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 825 | 2 | LOG_INFO("recycle indexes finished, cost={}s", cost) | 826 | 2 | .tag("instance_id", instance_id_) | 827 | 2 | .tag("num_scanned", num_scanned) | 828 | 2 | .tag("num_expired", num_expired) | 829 | 2 | .tag("num_recycled", num_recycled); | 830 | 2 | }); |
|
831 | | |
832 | 14 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
833 | | |
834 | 14 | auto calc_expiration = [&earlest_ts, this](const RecycleIndexPB& index) { |
835 | 8 | if (config::force_immediate_recycle) { |
836 | 2 | return 0L; |
837 | 2 | } |
838 | 6 | int64_t expiration = index.expiration() > 0 ? index.expiration() : index.creation_time(); |
839 | 6 | int64_t retention_seconds = config::retention_seconds; |
840 | 6 | if (index.state() == RecycleIndexPB::DROPPED) { |
841 | 6 | retention_seconds = |
842 | 6 | std::min(config::dropped_index_retention_seconds, retention_seconds); |
843 | 6 | } |
844 | 6 | int64_t final_expiration = expiration + retention_seconds; |
845 | 6 | if (earlest_ts > final_expiration) { |
846 | 2 | earlest_ts = final_expiration; |
847 | 2 | g_bvar_recycler_recycle_index_earlest_ts.put(instance_id_, earlest_ts); |
848 | 2 | } |
849 | 6 | return final_expiration; |
850 | 8 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_3clERKNS0_14RecycleIndexPBE Line | Count | Source | 834 | 6 | auto calc_expiration = [&earlest_ts, this](const RecycleIndexPB& index) { | 835 | 6 | if (config::force_immediate_recycle) { | 836 | 0 | return 0L; | 837 | 0 | } | 838 | 6 | int64_t expiration = index.expiration() > 0 ? index.expiration() : index.creation_time(); | 839 | 6 | int64_t retention_seconds = config::retention_seconds; | 840 | 6 | if (index.state() == RecycleIndexPB::DROPPED) { | 841 | 6 | retention_seconds = | 842 | 6 | std::min(config::dropped_index_retention_seconds, retention_seconds); | 843 | 6 | } | 844 | 6 | int64_t final_expiration = expiration + retention_seconds; | 845 | 6 | if (earlest_ts > final_expiration) { | 846 | 2 | earlest_ts = final_expiration; | 847 | 2 | g_bvar_recycler_recycle_index_earlest_ts.put(instance_id_, earlest_ts); | 848 | 2 | } | 849 | 6 | return final_expiration; | 850 | 6 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_3clERKNS0_14RecycleIndexPBE Line | Count | Source | 834 | 2 | auto calc_expiration = [&earlest_ts, this](const RecycleIndexPB& index) { | 835 | 2 | if (config::force_immediate_recycle) { | 836 | 2 | return 0L; | 837 | 2 | } | 838 | 0 | int64_t expiration = index.expiration() > 0 ? index.expiration() : index.creation_time(); | 839 | 0 | int64_t retention_seconds = config::retention_seconds; | 840 | 0 | if (index.state() == RecycleIndexPB::DROPPED) { | 841 | 0 | retention_seconds = | 842 | 0 | std::min(config::dropped_index_retention_seconds, retention_seconds); | 843 | 0 | } | 844 | 0 | int64_t final_expiration = expiration + retention_seconds; | 845 | 0 | if (earlest_ts > final_expiration) { | 846 | 0 | earlest_ts = final_expiration; | 847 | 0 | g_bvar_recycler_recycle_index_earlest_ts.put(instance_id_, earlest_ts); | 848 | 0 | } | 849 | 0 | return final_expiration; | 850 | 2 | }; |
|
851 | | |
852 | | // Elements in `index_keys` has the same lifetime as `it` in `scan_and_recycle` |
853 | 14 | std::vector<std::string_view> index_keys; |
854 | 14 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { |
855 | 8 | ++num_scanned; |
856 | 8 | RecycleIndexPB index_pb; |
857 | 8 | if (!index_pb.ParseFromArray(v.data(), v.size())) { |
858 | 0 | LOG_WARNING("malformed recycle index value").tag("key", hex(k)); |
859 | 0 | return -1; |
860 | 0 | } |
861 | 8 | int64_t current_time = ::time(nullptr); |
862 | 8 | if (current_time < calc_expiration(index_pb)) { // not expired |
863 | 0 | return 0; |
864 | 0 | } |
865 | 8 | ++num_expired; |
866 | | // decode index_id |
867 | 8 | auto k1 = k; |
868 | 8 | k1.remove_prefix(1); |
869 | 8 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
870 | 8 | decode_key(&k1, &out); |
871 | | // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB |
872 | 8 | auto index_id = std::get<int64_t>(std::get<0>(out[3])); |
873 | 8 | LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_ |
874 | 8 | << " table_id=" << index_pb.table_id() << " index_id=" << index_id |
875 | 8 | << " state=" << RecycleIndexPB::State_Name(index_pb.state()); |
876 | | // Change state to RECYCLING |
877 | 8 | std::unique_ptr<Transaction> txn; |
878 | 8 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
879 | 8 | if (err != TxnErrorCode::TXN_OK) { |
880 | 0 | LOG_WARNING("failed to create txn").tag("err", err); |
881 | 0 | return -1; |
882 | 0 | } |
883 | 8 | std::string val; |
884 | 8 | err = txn->get(k, &val); |
885 | 8 | if (err == |
886 | 8 | TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it |
887 | 0 | LOG_INFO("index {} has been recycled or committed", index_id); |
888 | 0 | return 0; |
889 | 0 | } |
890 | 8 | if (err != TxnErrorCode::TXN_OK) { |
891 | 0 | LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err); |
892 | 0 | return -1; |
893 | 0 | } |
894 | 8 | index_pb.Clear(); |
895 | 8 | if (!index_pb.ParseFromString(val)) { |
896 | 0 | LOG_WARNING("malformed recycle index value").tag("key", hex(k)); |
897 | 0 | return -1; |
898 | 0 | } |
899 | 8 | if (index_pb.state() != RecycleIndexPB::RECYCLING) { |
900 | 7 | index_pb.set_state(RecycleIndexPB::RECYCLING); |
901 | 7 | txn->put(k, index_pb.SerializeAsString()); |
902 | 7 | err = txn->commit(); |
903 | 7 | if (err != TxnErrorCode::TXN_OK) { |
904 | 0 | LOG_WARNING("failed to commit txn").tag("err", err); |
905 | 0 | return -1; |
906 | 0 | } |
907 | 7 | } |
908 | 8 | if (recycle_tablets(index_pb.table_id(), index_id) != 0) { |
909 | 1 | LOG_WARNING("failed to recycle tablets under index") |
910 | 1 | .tag("table_id", index_pb.table_id()) |
911 | 1 | .tag("instance_id", instance_id_) |
912 | 1 | .tag("index_id", index_id); |
913 | 1 | return -1; |
914 | 1 | } |
915 | 7 | ++num_recycled; |
916 | 7 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); |
917 | 7 | index_keys.push_back(k); |
918 | 7 | return 0; |
919 | 8 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 854 | 6 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { | 855 | 6 | ++num_scanned; | 856 | 6 | RecycleIndexPB index_pb; | 857 | 6 | if (!index_pb.ParseFromArray(v.data(), v.size())) { | 858 | 0 | LOG_WARNING("malformed recycle index value").tag("key", hex(k)); | 859 | 0 | return -1; | 860 | 0 | } | 861 | 6 | int64_t current_time = ::time(nullptr); | 862 | 6 | if (current_time < calc_expiration(index_pb)) { // not expired | 863 | 0 | return 0; | 864 | 0 | } | 865 | 6 | ++num_expired; | 866 | | // decode index_id | 867 | 6 | auto k1 = k; | 868 | 6 | k1.remove_prefix(1); | 869 | 6 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 870 | 6 | decode_key(&k1, &out); | 871 | | // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB | 872 | 6 | auto index_id = std::get<int64_t>(std::get<0>(out[3])); | 873 | 6 | LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_ | 874 | 6 | << " table_id=" << index_pb.table_id() << " index_id=" << index_id | 875 | 6 | << " state=" << RecycleIndexPB::State_Name(index_pb.state()); | 876 | | // Change state to RECYCLING | 877 | 6 | std::unique_ptr<Transaction> txn; | 878 | 6 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 879 | 6 | if (err != TxnErrorCode::TXN_OK) { | 880 | 0 | LOG_WARNING("failed to create txn").tag("err", err); | 881 | 0 | return -1; | 882 | 0 | } | 883 | 6 | std::string val; | 884 | 6 | err = txn->get(k, &val); | 885 | 6 | if (err == | 886 | 6 | TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it | 887 | 0 | LOG_INFO("index {} has been recycled or committed", index_id); | 888 | 0 | return 0; | 889 | 0 | } | 890 | 6 | if (err != TxnErrorCode::TXN_OK) { | 891 | 0 | LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err); | 892 | 0 | return -1; | 893 | 0 | } | 894 | 6 | index_pb.Clear(); | 895 | 6 | if (!index_pb.ParseFromString(val)) { | 896 | 0 | LOG_WARNING("malformed recycle index value").tag("key", hex(k)); | 897 | 0 | return -1; | 898 | 0 | } | 899 | 6 | if (index_pb.state() != RecycleIndexPB::RECYCLING) { | 900 | 6 | index_pb.set_state(RecycleIndexPB::RECYCLING); | 901 | 6 | txn->put(k, index_pb.SerializeAsString()); | 902 | 6 | err = txn->commit(); | 903 | 6 | if (err != TxnErrorCode::TXN_OK) { | 904 | 0 | LOG_WARNING("failed to commit txn").tag("err", err); | 905 | 0 | return -1; | 906 | 0 | } | 907 | 6 | } | 908 | 6 | if (recycle_tablets(index_pb.table_id(), index_id) != 0) { | 909 | 0 | LOG_WARNING("failed to recycle tablets under index") | 910 | 0 | .tag("table_id", index_pb.table_id()) | 911 | 0 | .tag("instance_id", instance_id_) | 912 | 0 | .tag("index_id", index_id); | 913 | 0 | return -1; | 914 | 0 | } | 915 | 6 | ++num_recycled; | 916 | 6 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); | 917 | 6 | index_keys.push_back(k); | 918 | 6 | return 0; | 919 | 6 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 854 | 2 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { | 855 | 2 | ++num_scanned; | 856 | 2 | RecycleIndexPB index_pb; | 857 | 2 | if (!index_pb.ParseFromArray(v.data(), v.size())) { | 858 | 0 | LOG_WARNING("malformed recycle index value").tag("key", hex(k)); | 859 | 0 | return -1; | 860 | 0 | } | 861 | 2 | int64_t current_time = ::time(nullptr); | 862 | 2 | if (current_time < calc_expiration(index_pb)) { // not expired | 863 | 0 | return 0; | 864 | 0 | } | 865 | 2 | ++num_expired; | 866 | | // decode index_id | 867 | 2 | auto k1 = k; | 868 | 2 | k1.remove_prefix(1); | 869 | 2 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 870 | 2 | decode_key(&k1, &out); | 871 | | // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB | 872 | 2 | auto index_id = std::get<int64_t>(std::get<0>(out[3])); | 873 | 2 | LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_ | 874 | 2 | << " table_id=" << index_pb.table_id() << " index_id=" << index_id | 875 | 2 | << " state=" << RecycleIndexPB::State_Name(index_pb.state()); | 876 | | // Change state to RECYCLING | 877 | 2 | std::unique_ptr<Transaction> txn; | 878 | 2 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 879 | 2 | if (err != TxnErrorCode::TXN_OK) { | 880 | 0 | LOG_WARNING("failed to create txn").tag("err", err); | 881 | 0 | return -1; | 882 | 0 | } | 883 | 2 | std::string val; | 884 | 2 | err = txn->get(k, &val); | 885 | 2 | if (err == | 886 | 2 | TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it | 887 | 0 | LOG_INFO("index {} has been recycled or committed", index_id); | 888 | 0 | return 0; | 889 | 0 | } | 890 | 2 | if (err != TxnErrorCode::TXN_OK) { | 891 | 0 | LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err); | 892 | 0 | return -1; | 893 | 0 | } | 894 | 2 | index_pb.Clear(); | 895 | 2 | if (!index_pb.ParseFromString(val)) { | 896 | 0 | LOG_WARNING("malformed recycle index value").tag("key", hex(k)); | 897 | 0 | return -1; | 898 | 0 | } | 899 | 2 | if (index_pb.state() != RecycleIndexPB::RECYCLING) { | 900 | 1 | index_pb.set_state(RecycleIndexPB::RECYCLING); | 901 | 1 | txn->put(k, index_pb.SerializeAsString()); | 902 | 1 | err = txn->commit(); | 903 | 1 | if (err != TxnErrorCode::TXN_OK) { | 904 | 0 | LOG_WARNING("failed to commit txn").tag("err", err); | 905 | 0 | return -1; | 906 | 0 | } | 907 | 1 | } | 908 | 2 | if (recycle_tablets(index_pb.table_id(), index_id) != 0) { | 909 | 1 | LOG_WARNING("failed to recycle tablets under index") | 910 | 1 | .tag("table_id", index_pb.table_id()) | 911 | 1 | .tag("instance_id", instance_id_) | 912 | 1 | .tag("index_id", index_id); | 913 | 1 | return -1; | 914 | 1 | } | 915 | 1 | ++num_recycled; | 916 | 1 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); | 917 | 1 | index_keys.push_back(k); | 918 | 1 | return 0; | 919 | 2 | }; |
|
920 | | |
921 | 14 | auto loop_done = [&index_keys, this]() -> int { |
922 | 4 | if (index_keys.empty()) return 0; |
923 | 3 | std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, |
924 | 3 | [&](int*) { index_keys.clear(); }); recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clEvENKUlPiE_clES3_ Line | Count | Source | 924 | 2 | [&](int*) { index_keys.clear(); }); |
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clEvENKUlPiE_clES3_ Line | Count | Source | 924 | 1 | [&](int*) { index_keys.clear(); }); |
|
925 | 3 | if (0 != txn_remove(txn_kv_.get(), index_keys)) { |
926 | 0 | LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_; |
927 | 0 | return -1; |
928 | 0 | } |
929 | 3 | return 0; |
930 | 3 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clEv Line | Count | Source | 921 | 2 | auto loop_done = [&index_keys, this]() -> int { | 922 | 2 | if (index_keys.empty()) return 0; | 923 | 2 | std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, | 924 | 2 | [&](int*) { index_keys.clear(); }); | 925 | 2 | if (0 != txn_remove(txn_kv_.get(), index_keys)) { | 926 | 0 | LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_; | 927 | 0 | return -1; | 928 | 0 | } | 929 | 2 | return 0; | 930 | 2 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clEv Line | Count | Source | 921 | 2 | auto loop_done = [&index_keys, this]() -> int { | 922 | 2 | if (index_keys.empty()) return 0; | 923 | 1 | std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, | 924 | 1 | [&](int*) { index_keys.clear(); }); | 925 | 1 | if (0 != txn_remove(txn_kv_.get(), index_keys)) { | 926 | 0 | LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_; | 927 | 0 | return -1; | 928 | 0 | } | 929 | 1 | return 0; | 930 | 1 | }; |
|
931 | | |
932 | 14 | return scan_and_recycle(index_key0, index_key1, std::move(recycle_func), std::move(loop_done)); |
933 | 14 | } |
934 | | |
935 | | bool check_lazy_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string instance_id, |
936 | 271 | int64_t tablet_id) { |
937 | 271 | std::unique_ptr<Transaction> txn; |
938 | 271 | TxnErrorCode err = txn_kv->create_txn(&txn); |
939 | 271 | if (err != TxnErrorCode::TXN_OK) { |
940 | 0 | LOG(WARNING) << "failed to create txn, instance_id=" << instance_id |
941 | 0 | << " tablet_id=" << tablet_id << " err=" << err; |
942 | 0 | return false; |
943 | 0 | } |
944 | | |
945 | 271 | std::string tablet_idx_key = meta_tablet_idx_key({instance_id, tablet_id}); |
946 | 271 | std::string tablet_idx_val; |
947 | 271 | err = txn->get(tablet_idx_key, &tablet_idx_val); |
948 | 271 | if (TxnErrorCode::TXN_OK != err) { |
949 | 0 | LOG(WARNING) << "failed to get tablet index, instance_id=" << instance_id |
950 | 0 | << " tablet_id=" << tablet_id << " err=" << err |
951 | 0 | << " key=" << hex(tablet_idx_key); |
952 | 0 | return false; |
953 | 0 | } |
954 | | |
955 | 271 | TabletIndexPB tablet_idx_pb; |
956 | 271 | if (!tablet_idx_pb.ParseFromString(tablet_idx_val)) { |
957 | 0 | LOG(WARNING) << "failed to parse tablet_idx_pb, instance_id=" << instance_id |
958 | 0 | << " tablet_id=" << tablet_id; |
959 | 0 | return false; |
960 | 0 | } |
961 | | |
962 | 271 | if (!tablet_idx_pb.has_db_id()) { |
963 | | // In the previous version, the db_id was not set in the index_pb. |
964 | | // If updating to the version which enable txn lazy commit, the db_id will be set. |
965 | 0 | LOG(INFO) << "txn index has no db_id, tablet_id=" << tablet_id |
966 | 0 | << " instance_id=" << instance_id |
967 | 0 | << " tablet_idx_pb=" << tablet_idx_pb.ShortDebugString(); |
968 | 0 | return true; |
969 | 0 | } |
970 | | |
971 | 271 | std::string ver_val; |
972 | 271 | std::string ver_key = |
973 | 271 | partition_version_key({instance_id, tablet_idx_pb.db_id(), tablet_idx_pb.table_id(), |
974 | 271 | tablet_idx_pb.partition_id()}); |
975 | 271 | err = txn->get(ver_key, &ver_val); |
976 | | |
977 | 271 | if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) { |
978 | 0 | LOG(INFO) << "" |
979 | 0 | "partition version not found, instance_id=" |
980 | 0 | << instance_id << " db_id=" << tablet_idx_pb.db_id() |
981 | 0 | << " table_id=" << tablet_idx_pb.table_id() |
982 | 0 | << " partition_id=" << tablet_idx_pb.partition_id() << " tablet_id=" << tablet_id |
983 | 0 | << " key=" << hex(ver_key); |
984 | 0 | return true; |
985 | 0 | } |
986 | | |
987 | 271 | if (TxnErrorCode::TXN_OK != err) { |
988 | 0 | LOG(WARNING) << "failed to get partition version, instance_id=" << instance_id |
989 | 0 | << " db_id=" << tablet_idx_pb.db_id() |
990 | 0 | << " table_id=" << tablet_idx_pb.table_id() |
991 | 0 | << " partition_id=" << tablet_idx_pb.partition_id() |
992 | 0 | << " tablet_id=" << tablet_id << " key=" << hex(ver_key) << " err=" << err; |
993 | 0 | return false; |
994 | 0 | } |
995 | | |
996 | 271 | VersionPB version_pb; |
997 | 271 | if (!version_pb.ParseFromString(ver_val)) { |
998 | 0 | LOG(WARNING) << "failed to parse version_pb, instance_id=" << instance_id |
999 | 0 | << " db_id=" << tablet_idx_pb.db_id() |
1000 | 0 | << " table_id=" << tablet_idx_pb.table_id() |
1001 | 0 | << " partition_id=" << tablet_idx_pb.partition_id() |
1002 | 0 | << " tablet_id=" << tablet_id << " key=" << hex(ver_key); |
1003 | 0 | return false; |
1004 | 0 | } |
1005 | | |
1006 | 271 | if (version_pb.pending_txn_ids_size() > 0) { |
1007 | 20 | TEST_SYNC_POINT_CALLBACK("check_lazy_txn_finished::txn_not_finished"); |
1008 | 20 | DCHECK(version_pb.pending_txn_ids_size() == 1); |
1009 | 20 | LOG(WARNING) << "lazy txn not finished, instance_id=" << instance_id |
1010 | 20 | << " db_id=" << tablet_idx_pb.db_id() |
1011 | 20 | << " table_id=" << tablet_idx_pb.table_id() |
1012 | 20 | << " partition_id=" << tablet_idx_pb.partition_id() |
1013 | 20 | << " tablet_id=" << tablet_id << " txn_id=" << version_pb.pending_txn_ids(0) |
1014 | 20 | << " key=" << hex(ver_key); |
1015 | 20 | return false; |
1016 | 20 | } |
1017 | 251 | return true; |
1018 | 271 | } |
1019 | | |
1020 | 14 | int InstanceRecycler::recycle_partitions() { |
1021 | 14 | const std::string task_name = "recycle_partitions"; |
1022 | 14 | int64_t num_scanned = 0; |
1023 | 14 | int64_t num_expired = 0; |
1024 | 14 | int64_t num_recycled = 0; |
1025 | | |
1026 | 14 | RecyclePartKeyInfo part_key_info0 {instance_id_, 0}; |
1027 | 14 | RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX}; |
1028 | 14 | std::string part_key0; |
1029 | 14 | std::string part_key1; |
1030 | 14 | recycle_partition_key(part_key_info0, &part_key0); |
1031 | 14 | recycle_partition_key(part_key_info1, &part_key1); |
1032 | | |
1033 | 14 | LOG_INFO("begin to recycle partitions").tag("instance_id", instance_id_); |
1034 | | |
1035 | 14 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
1036 | 14 | register_recycle_task(task_name, start_time); |
1037 | | |
1038 | 14 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { |
1039 | 14 | unregister_recycle_task(task_name); |
1040 | 14 | int64_t cost = |
1041 | 14 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
1042 | 14 | LOG_INFO("recycle partitions finished, cost={}s", cost) |
1043 | 14 | .tag("instance_id", instance_id_) |
1044 | 14 | .tag("num_scanned", num_scanned) |
1045 | 14 | .tag("num_expired", num_expired) |
1046 | 14 | .tag("num_recycled", num_recycled); |
1047 | 14 | }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEPi Line | Count | Source | 1038 | 12 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { | 1039 | 12 | unregister_recycle_task(task_name); | 1040 | 12 | int64_t cost = | 1041 | 12 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 1042 | 12 | LOG_INFO("recycle partitions finished, cost={}s", cost) | 1043 | 12 | .tag("instance_id", instance_id_) | 1044 | 12 | .tag("num_scanned", num_scanned) | 1045 | 12 | .tag("num_expired", num_expired) | 1046 | 12 | .tag("num_recycled", num_recycled); | 1047 | 12 | }); |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEPi Line | Count | Source | 1038 | 2 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { | 1039 | 2 | unregister_recycle_task(task_name); | 1040 | 2 | int64_t cost = | 1041 | 2 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 1042 | 2 | LOG_INFO("recycle partitions finished, cost={}s", cost) | 1043 | 2 | .tag("instance_id", instance_id_) | 1044 | 2 | .tag("num_scanned", num_scanned) | 1045 | 2 | .tag("num_expired", num_expired) | 1046 | 2 | .tag("num_recycled", num_recycled); | 1047 | 2 | }); |
|
1048 | | |
1049 | 14 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
1050 | | |
1051 | 14 | auto calc_expiration = [&earlest_ts, this](const RecyclePartitionPB& partition) { |
1052 | 8 | if (config::force_immediate_recycle) { |
1053 | 2 | return 0L; |
1054 | 2 | } |
1055 | 6 | int64_t expiration = |
1056 | 6 | partition.expiration() > 0 ? partition.expiration() : partition.creation_time(); |
1057 | 6 | int64_t retention_seconds = config::retention_seconds; |
1058 | 6 | if (partition.state() == RecyclePartitionPB::DROPPED) { |
1059 | 6 | retention_seconds = |
1060 | 6 | std::min(config::dropped_partition_retention_seconds, retention_seconds); |
1061 | 6 | } |
1062 | 6 | int64_t final_expiration = expiration + retention_seconds; |
1063 | 6 | if (earlest_ts > final_expiration) { |
1064 | 2 | earlest_ts = final_expiration; |
1065 | 2 | g_bvar_recycler_recycle_partition_earlest_ts.put(instance_id_, earlest_ts); |
1066 | 2 | } |
1067 | 6 | return final_expiration; |
1068 | 8 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_3clERKNS0_18RecyclePartitionPBE Line | Count | Source | 1051 | 6 | auto calc_expiration = [&earlest_ts, this](const RecyclePartitionPB& partition) { | 1052 | 6 | if (config::force_immediate_recycle) { | 1053 | 0 | return 0L; | 1054 | 0 | } | 1055 | 6 | int64_t expiration = | 1056 | 6 | partition.expiration() > 0 ? partition.expiration() : partition.creation_time(); | 1057 | 6 | int64_t retention_seconds = config::retention_seconds; | 1058 | 6 | if (partition.state() == RecyclePartitionPB::DROPPED) { | 1059 | 6 | retention_seconds = | 1060 | 6 | std::min(config::dropped_partition_retention_seconds, retention_seconds); | 1061 | 6 | } | 1062 | 6 | int64_t final_expiration = expiration + retention_seconds; | 1063 | 6 | if (earlest_ts > final_expiration) { | 1064 | 2 | earlest_ts = final_expiration; | 1065 | 2 | g_bvar_recycler_recycle_partition_earlest_ts.put(instance_id_, earlest_ts); | 1066 | 2 | } | 1067 | 6 | return final_expiration; | 1068 | 6 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_3clERKNS0_18RecyclePartitionPBE Line | Count | Source | 1051 | 2 | auto calc_expiration = [&earlest_ts, this](const RecyclePartitionPB& partition) { | 1052 | 2 | if (config::force_immediate_recycle) { | 1053 | 2 | return 0L; | 1054 | 2 | } | 1055 | 0 | int64_t expiration = | 1056 | 0 | partition.expiration() > 0 ? partition.expiration() : partition.creation_time(); | 1057 | 0 | int64_t retention_seconds = config::retention_seconds; | 1058 | 0 | if (partition.state() == RecyclePartitionPB::DROPPED) { | 1059 | 0 | retention_seconds = | 1060 | 0 | std::min(config::dropped_partition_retention_seconds, retention_seconds); | 1061 | 0 | } | 1062 | 0 | int64_t final_expiration = expiration + retention_seconds; | 1063 | 0 | if (earlest_ts > final_expiration) { | 1064 | 0 | earlest_ts = final_expiration; | 1065 | 0 | g_bvar_recycler_recycle_partition_earlest_ts.put(instance_id_, earlest_ts); | 1066 | 0 | } | 1067 | 0 | return final_expiration; | 1068 | 2 | }; |
|
1069 | | |
1070 | | // Elements in `partition_keys` has the same lifetime as `it` in `scan_and_recycle` |
1071 | 14 | std::vector<std::string_view> partition_keys; |
1072 | 14 | std::vector<std::string> partition_version_keys; |
1073 | 14 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { |
1074 | 8 | ++num_scanned; |
1075 | 8 | RecyclePartitionPB part_pb; |
1076 | 8 | if (!part_pb.ParseFromArray(v.data(), v.size())) { |
1077 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); |
1078 | 0 | return -1; |
1079 | 0 | } |
1080 | 8 | int64_t current_time = ::time(nullptr); |
1081 | 8 | if (current_time < calc_expiration(part_pb)) { // not expired |
1082 | 0 | return 0; |
1083 | 0 | } |
1084 | 8 | ++num_expired; |
1085 | | // decode partition_id |
1086 | 8 | auto k1 = k; |
1087 | 8 | k1.remove_prefix(1); |
1088 | 8 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
1089 | 8 | decode_key(&k1, &out); |
1090 | | // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB |
1091 | 8 | auto partition_id = std::get<int64_t>(std::get<0>(out[3])); |
1092 | 8 | LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_ |
1093 | 8 | << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id |
1094 | 8 | << " state=" << RecyclePartitionPB::State_Name(part_pb.state()); |
1095 | | // Change state to RECYCLING |
1096 | 8 | std::unique_ptr<Transaction> txn; |
1097 | 8 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
1098 | 8 | if (err != TxnErrorCode::TXN_OK) { |
1099 | 0 | LOG_WARNING("failed to create txn").tag("err", err); |
1100 | 0 | return -1; |
1101 | 0 | } |
1102 | 8 | std::string val; |
1103 | 8 | err = txn->get(k, &val); |
1104 | 8 | if (err == |
1105 | 8 | TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it |
1106 | 0 | LOG_INFO("partition {} has been recycled or committed", partition_id); |
1107 | 0 | return 0; |
1108 | 0 | } |
1109 | 8 | if (err != TxnErrorCode::TXN_OK) { |
1110 | 0 | LOG_WARNING("failed to get kv"); |
1111 | 0 | return -1; |
1112 | 0 | } |
1113 | 8 | part_pb.Clear(); |
1114 | 8 | if (!part_pb.ParseFromString(val)) { |
1115 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); |
1116 | 0 | return -1; |
1117 | 0 | } |
1118 | | // Partitions with PREPARED state MUST have no data |
1119 | 8 | bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED; |
1120 | 8 | if (part_pb.state() != RecyclePartitionPB::RECYCLING) { |
1121 | 7 | part_pb.set_state(RecyclePartitionPB::RECYCLING); |
1122 | 7 | txn->put(k, part_pb.SerializeAsString()); |
1123 | 7 | err = txn->commit(); |
1124 | 7 | if (err != TxnErrorCode::TXN_OK) { |
1125 | 0 | LOG_WARNING("failed to commit txn: {}", err); |
1126 | 0 | return -1; |
1127 | 0 | } |
1128 | 7 | } |
1129 | | |
1130 | 8 | int ret = 0; |
1131 | 32 | for (int64_t index_id : part_pb.index_id()) { |
1132 | 32 | if (recycle_tablets(part_pb.table_id(), index_id, partition_id, is_empty_tablet) != 0) { |
1133 | 1 | LOG_WARNING("failed to recycle tablets under partition") |
1134 | 1 | .tag("table_id", part_pb.table_id()) |
1135 | 1 | .tag("instance_id", instance_id_) |
1136 | 1 | .tag("index_id", index_id) |
1137 | 1 | .tag("partition_id", partition_id); |
1138 | 1 | ret = -1; |
1139 | 1 | } |
1140 | 32 | } |
1141 | 8 | if (ret == 0) { |
1142 | 7 | ++num_recycled; |
1143 | 7 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); |
1144 | 7 | partition_keys.push_back(k); |
1145 | 7 | if (part_pb.db_id() > 0) { |
1146 | 7 | partition_version_keys.push_back(partition_version_key( |
1147 | 7 | {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id})); |
1148 | 7 | } |
1149 | 7 | } |
1150 | 8 | return ret; |
1151 | 8 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 1073 | 6 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { | 1074 | 6 | ++num_scanned; | 1075 | 6 | RecyclePartitionPB part_pb; | 1076 | 6 | if (!part_pb.ParseFromArray(v.data(), v.size())) { | 1077 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); | 1078 | 0 | return -1; | 1079 | 0 | } | 1080 | 6 | int64_t current_time = ::time(nullptr); | 1081 | 6 | if (current_time < calc_expiration(part_pb)) { // not expired | 1082 | 0 | return 0; | 1083 | 0 | } | 1084 | 6 | ++num_expired; | 1085 | | // decode partition_id | 1086 | 6 | auto k1 = k; | 1087 | 6 | k1.remove_prefix(1); | 1088 | 6 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 1089 | 6 | decode_key(&k1, &out); | 1090 | | // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB | 1091 | 6 | auto partition_id = std::get<int64_t>(std::get<0>(out[3])); | 1092 | 6 | LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_ | 1093 | 6 | << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id | 1094 | 6 | << " state=" << RecyclePartitionPB::State_Name(part_pb.state()); | 1095 | | // Change state to RECYCLING | 1096 | 6 | std::unique_ptr<Transaction> txn; | 1097 | 6 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 1098 | 6 | if (err != TxnErrorCode::TXN_OK) { | 1099 | 0 | LOG_WARNING("failed to create txn").tag("err", err); | 1100 | 0 | return -1; | 1101 | 0 | } | 1102 | 6 | std::string val; | 1103 | 6 | err = txn->get(k, &val); | 1104 | 6 | if (err == | 1105 | 6 | TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it | 1106 | 0 | LOG_INFO("partition {} has been recycled or committed", partition_id); | 1107 | 0 | return 0; | 1108 | 0 | } | 1109 | 6 | if (err != TxnErrorCode::TXN_OK) { | 1110 | 0 | LOG_WARNING("failed to get kv"); | 1111 | 0 | return -1; | 1112 | 0 | } | 1113 | 6 | part_pb.Clear(); | 1114 | 6 | if (!part_pb.ParseFromString(val)) { | 1115 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); | 1116 | 0 | return -1; | 1117 | 0 | } | 1118 | | // Partitions with PREPARED state MUST have no data | 1119 | 6 | bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED; | 1120 | 6 | if (part_pb.state() != RecyclePartitionPB::RECYCLING) { | 1121 | 6 | part_pb.set_state(RecyclePartitionPB::RECYCLING); | 1122 | 6 | txn->put(k, part_pb.SerializeAsString()); | 1123 | 6 | err = txn->commit(); | 1124 | 6 | if (err != TxnErrorCode::TXN_OK) { | 1125 | 0 | LOG_WARNING("failed to commit txn: {}", err); | 1126 | 0 | return -1; | 1127 | 0 | } | 1128 | 6 | } | 1129 | | | 1130 | 6 | int ret = 0; | 1131 | 30 | for (int64_t index_id : part_pb.index_id()) { | 1132 | 30 | if (recycle_tablets(part_pb.table_id(), index_id, partition_id, is_empty_tablet) != 0) { | 1133 | 0 | LOG_WARNING("failed to recycle tablets under partition") | 1134 | 0 | .tag("table_id", part_pb.table_id()) | 1135 | 0 | .tag("instance_id", instance_id_) | 1136 | 0 | .tag("index_id", index_id) | 1137 | 0 | .tag("partition_id", partition_id); | 1138 | 0 | ret = -1; | 1139 | 0 | } | 1140 | 30 | } | 1141 | 6 | if (ret == 0) { | 1142 | 6 | ++num_recycled; | 1143 | 6 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); | 1144 | 6 | partition_keys.push_back(k); | 1145 | 6 | if (part_pb.db_id() > 0) { | 1146 | 6 | partition_version_keys.push_back(partition_version_key( | 1147 | 6 | {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id})); | 1148 | 6 | } | 1149 | 6 | } | 1150 | 6 | return ret; | 1151 | 6 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 1073 | 2 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { | 1074 | 2 | ++num_scanned; | 1075 | 2 | RecyclePartitionPB part_pb; | 1076 | 2 | if (!part_pb.ParseFromArray(v.data(), v.size())) { | 1077 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); | 1078 | 0 | return -1; | 1079 | 0 | } | 1080 | 2 | int64_t current_time = ::time(nullptr); | 1081 | 2 | if (current_time < calc_expiration(part_pb)) { // not expired | 1082 | 0 | return 0; | 1083 | 0 | } | 1084 | 2 | ++num_expired; | 1085 | | // decode partition_id | 1086 | 2 | auto k1 = k; | 1087 | 2 | k1.remove_prefix(1); | 1088 | 2 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 1089 | 2 | decode_key(&k1, &out); | 1090 | | // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB | 1091 | 2 | auto partition_id = std::get<int64_t>(std::get<0>(out[3])); | 1092 | 2 | LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_ | 1093 | 2 | << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id | 1094 | 2 | << " state=" << RecyclePartitionPB::State_Name(part_pb.state()); | 1095 | | // Change state to RECYCLING | 1096 | 2 | std::unique_ptr<Transaction> txn; | 1097 | 2 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 1098 | 2 | if (err != TxnErrorCode::TXN_OK) { | 1099 | 0 | LOG_WARNING("failed to create txn").tag("err", err); | 1100 | 0 | return -1; | 1101 | 0 | } | 1102 | 2 | std::string val; | 1103 | 2 | err = txn->get(k, &val); | 1104 | 2 | if (err == | 1105 | 2 | TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it | 1106 | 0 | LOG_INFO("partition {} has been recycled or committed", partition_id); | 1107 | 0 | return 0; | 1108 | 0 | } | 1109 | 2 | if (err != TxnErrorCode::TXN_OK) { | 1110 | 0 | LOG_WARNING("failed to get kv"); | 1111 | 0 | return -1; | 1112 | 0 | } | 1113 | 2 | part_pb.Clear(); | 1114 | 2 | if (!part_pb.ParseFromString(val)) { | 1115 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); | 1116 | 0 | return -1; | 1117 | 0 | } | 1118 | | // Partitions with PREPARED state MUST have no data | 1119 | 2 | bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED; | 1120 | 2 | if (part_pb.state() != RecyclePartitionPB::RECYCLING) { | 1121 | 1 | part_pb.set_state(RecyclePartitionPB::RECYCLING); | 1122 | 1 | txn->put(k, part_pb.SerializeAsString()); | 1123 | 1 | err = txn->commit(); | 1124 | 1 | if (err != TxnErrorCode::TXN_OK) { | 1125 | 0 | LOG_WARNING("failed to commit txn: {}", err); | 1126 | 0 | return -1; | 1127 | 0 | } | 1128 | 1 | } | 1129 | | | 1130 | 2 | int ret = 0; | 1131 | 2 | for (int64_t index_id : part_pb.index_id()) { | 1132 | 2 | if (recycle_tablets(part_pb.table_id(), index_id, partition_id, is_empty_tablet) != 0) { | 1133 | 1 | LOG_WARNING("failed to recycle tablets under partition") | 1134 | 1 | .tag("table_id", part_pb.table_id()) | 1135 | 1 | .tag("instance_id", instance_id_) | 1136 | 1 | .tag("index_id", index_id) | 1137 | 1 | .tag("partition_id", partition_id); | 1138 | 1 | ret = -1; | 1139 | 1 | } | 1140 | 2 | } | 1141 | 2 | if (ret == 0) { | 1142 | 1 | ++num_recycled; | 1143 | 1 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); | 1144 | 1 | partition_keys.push_back(k); | 1145 | 1 | if (part_pb.db_id() > 0) { | 1146 | 1 | partition_version_keys.push_back(partition_version_key( | 1147 | 1 | {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id})); | 1148 | 1 | } | 1149 | 1 | } | 1150 | 2 | return ret; | 1151 | 2 | }; |
|
1152 | | |
1153 | 14 | auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int { |
1154 | 4 | if (partition_keys.empty()) return 0; |
1155 | 3 | std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) { |
1156 | 3 | partition_keys.clear(); |
1157 | 3 | partition_version_keys.clear(); |
1158 | 3 | }); recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clEvENKUlPiE_clES3_ Line | Count | Source | 1155 | 2 | std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) { | 1156 | 2 | partition_keys.clear(); | 1157 | 2 | partition_version_keys.clear(); | 1158 | 2 | }); |
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clEvENKUlPiE_clES3_ Line | Count | Source | 1155 | 1 | std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) { | 1156 | 1 | partition_keys.clear(); | 1157 | 1 | partition_version_keys.clear(); | 1158 | 1 | }); |
|
1159 | 3 | std::unique_ptr<Transaction> txn; |
1160 | 3 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
1161 | 3 | if (err != TxnErrorCode::TXN_OK) { |
1162 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_; |
1163 | 0 | return -1; |
1164 | 0 | } |
1165 | 7 | for (auto& k : partition_keys) { |
1166 | 7 | txn->remove(k); |
1167 | 7 | } |
1168 | 7 | for (auto& k : partition_version_keys) { |
1169 | 7 | txn->remove(k); |
1170 | 7 | } |
1171 | 3 | err = txn->commit(); |
1172 | 3 | if (err != TxnErrorCode::TXN_OK) { |
1173 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_ |
1174 | 0 | << " err=" << err; |
1175 | 0 | return -1; |
1176 | 0 | } |
1177 | 3 | return 0; |
1178 | 3 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clEv Line | Count | Source | 1153 | 2 | auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int { | 1154 | 2 | if (partition_keys.empty()) return 0; | 1155 | 2 | std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) { | 1156 | 2 | partition_keys.clear(); | 1157 | 2 | partition_version_keys.clear(); | 1158 | 2 | }); | 1159 | 2 | std::unique_ptr<Transaction> txn; | 1160 | 2 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 1161 | 2 | if (err != TxnErrorCode::TXN_OK) { | 1162 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_; | 1163 | 0 | return -1; | 1164 | 0 | } | 1165 | 6 | for (auto& k : partition_keys) { | 1166 | 6 | txn->remove(k); | 1167 | 6 | } | 1168 | 6 | for (auto& k : partition_version_keys) { | 1169 | 6 | txn->remove(k); | 1170 | 6 | } | 1171 | 2 | err = txn->commit(); | 1172 | 2 | if (err != TxnErrorCode::TXN_OK) { | 1173 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_ | 1174 | 0 | << " err=" << err; | 1175 | 0 | return -1; | 1176 | 0 | } | 1177 | 2 | return 0; | 1178 | 2 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clEv Line | Count | Source | 1153 | 2 | auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int { | 1154 | 2 | if (partition_keys.empty()) return 0; | 1155 | 1 | std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) { | 1156 | 1 | partition_keys.clear(); | 1157 | 1 | partition_version_keys.clear(); | 1158 | 1 | }); | 1159 | 1 | std::unique_ptr<Transaction> txn; | 1160 | 1 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 1161 | 1 | if (err != TxnErrorCode::TXN_OK) { | 1162 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_; | 1163 | 0 | return -1; | 1164 | 0 | } | 1165 | 1 | for (auto& k : partition_keys) { | 1166 | 1 | txn->remove(k); | 1167 | 1 | } | 1168 | 1 | for (auto& k : partition_version_keys) { | 1169 | 1 | txn->remove(k); | 1170 | 1 | } | 1171 | 1 | err = txn->commit(); | 1172 | 1 | if (err != TxnErrorCode::TXN_OK) { | 1173 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_ | 1174 | 0 | << " err=" << err; | 1175 | 0 | return -1; | 1176 | 0 | } | 1177 | 1 | return 0; | 1178 | 1 | }; |
|
1179 | | |
1180 | 14 | return scan_and_recycle(part_key0, part_key1, std::move(recycle_func), std::move(loop_done)); |
1181 | 14 | } |
1182 | | |
1183 | 12 | int InstanceRecycler::recycle_versions() { |
1184 | 12 | int64_t num_scanned = 0; |
1185 | 12 | int64_t num_recycled = 0; |
1186 | | |
1187 | 12 | LOG_INFO("begin to recycle table and partition versions").tag("instance_id", instance_id_); |
1188 | | |
1189 | 12 | auto start_time = steady_clock::now(); |
1190 | | |
1191 | 12 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { |
1192 | 12 | auto cost = duration<float>(steady_clock::now() - start_time).count(); |
1193 | 12 | LOG_INFO("recycle table and partition versions finished, cost={}s", cost) |
1194 | 12 | .tag("instance_id", instance_id_) |
1195 | 12 | .tag("num_scanned", num_scanned) |
1196 | 12 | .tag("num_recycled", num_recycled); |
1197 | 12 | }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEPi Line | Count | Source | 1191 | 12 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { | 1192 | 12 | auto cost = duration<float>(steady_clock::now() - start_time).count(); | 1193 | 12 | LOG_INFO("recycle table and partition versions finished, cost={}s", cost) | 1194 | 12 | .tag("instance_id", instance_id_) | 1195 | 12 | .tag("num_scanned", num_scanned) | 1196 | 12 | .tag("num_recycled", num_recycled); | 1197 | 12 | }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEPi |
1198 | | |
1199 | 12 | auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0}); |
1200 | 12 | auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0}); |
1201 | 12 | int64_t last_scanned_table_id = 0; |
1202 | 12 | bool is_recycled = false; // Is last scanned kv recycled |
1203 | 12 | auto recycle_func = [&num_scanned, &num_recycled, &last_scanned_table_id, &is_recycled, this]( |
1204 | 12 | std::string_view k, std::string_view) { |
1205 | 2 | ++num_scanned; |
1206 | 2 | auto k1 = k; |
1207 | 2 | k1.remove_prefix(1); |
1208 | | // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id} |
1209 | 2 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
1210 | 2 | decode_key(&k1, &out); |
1211 | 2 | DCHECK_EQ(out.size(), 6) << k; |
1212 | 2 | auto table_id = std::get<int64_t>(std::get<0>(out[4])); |
1213 | 2 | if (table_id == last_scanned_table_id) { // Already handle kvs of this table |
1214 | 0 | num_recycled += is_recycled; // Version kv of this table has been recycled |
1215 | 0 | return 0; |
1216 | 0 | } |
1217 | 2 | last_scanned_table_id = table_id; |
1218 | 2 | is_recycled = false; |
1219 | 2 | auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0}); |
1220 | 2 | auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0}); |
1221 | 2 | std::unique_ptr<Transaction> txn; |
1222 | 2 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
1223 | 2 | if (err != TxnErrorCode::TXN_OK) { |
1224 | 0 | return -1; |
1225 | 0 | } |
1226 | 2 | std::unique_ptr<RangeGetIterator> iter; |
1227 | 2 | err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1); |
1228 | 2 | if (err != TxnErrorCode::TXN_OK) { |
1229 | 0 | return -1; |
1230 | 0 | } |
1231 | 2 | if (iter->has_next()) { // Table is useful, should not recycle table and partition versions |
1232 | 1 | return 0; |
1233 | 1 | } |
1234 | 1 | auto db_id = std::get<int64_t>(std::get<0>(out[3])); |
1235 | | // 1. Remove all partition version kvs of this table |
1236 | 1 | auto partition_version_key_begin = |
1237 | 1 | partition_version_key({instance_id_, db_id, table_id, 0}); |
1238 | 1 | auto partition_version_key_end = |
1239 | 1 | partition_version_key({instance_id_, db_id, table_id, INT64_MAX}); |
1240 | 1 | txn->remove(partition_version_key_begin, partition_version_key_end); |
1241 | 1 | LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin) |
1242 | 1 | << " end=" << hex(partition_version_key_end) << " db_id=" << db_id |
1243 | 1 | << " table_id=" << table_id; |
1244 | | // 2. Remove the table version kv of this table |
1245 | 1 | auto tbl_version_key = table_version_key({instance_id_, db_id, table_id}); |
1246 | 1 | txn->remove(tbl_version_key); |
1247 | 1 | LOG(WARNING) << "remove table version kv " << hex(tbl_version_key); |
1248 | | // 3. Remove mow delete bitmap update lock and tablet compaction lock |
1249 | 1 | std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1}); |
1250 | 1 | txn->remove(lock_key); |
1251 | 1 | LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key); |
1252 | 1 | std::string tablet_compaction_key_begin = |
1253 | 1 | mow_tablet_compaction_key({instance_id_, table_id, 0}); |
1254 | 1 | std::string tablet_compaction_key_end = |
1255 | 1 | mow_tablet_compaction_key({instance_id_, table_id, INT64_MAX}); |
1256 | 1 | txn->remove(tablet_compaction_key_begin, tablet_compaction_key_end); |
1257 | 1 | LOG(WARNING) << "remove mow tablet compaction kv, begin=" |
1258 | 1 | << hex(tablet_compaction_key_begin) |
1259 | 1 | << " end=" << hex(tablet_compaction_key_end) << " db_id=" << db_id |
1260 | 1 | << " table_id=" << table_id; |
1261 | 1 | err = txn->commit(); |
1262 | 1 | if (err != TxnErrorCode::TXN_OK) { |
1263 | 0 | return -1; |
1264 | 0 | } |
1265 | 1 | ++num_recycled; |
1266 | 1 | is_recycled = true; |
1267 | 1 | return 0; |
1268 | 1 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 1204 | 2 | std::string_view k, std::string_view) { | 1205 | 2 | ++num_scanned; | 1206 | 2 | auto k1 = k; | 1207 | 2 | k1.remove_prefix(1); | 1208 | | // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id} | 1209 | 2 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 1210 | 2 | decode_key(&k1, &out); | 1211 | 2 | DCHECK_EQ(out.size(), 6) << k; | 1212 | 2 | auto table_id = std::get<int64_t>(std::get<0>(out[4])); | 1213 | 2 | if (table_id == last_scanned_table_id) { // Already handle kvs of this table | 1214 | 0 | num_recycled += is_recycled; // Version kv of this table has been recycled | 1215 | 0 | return 0; | 1216 | 0 | } | 1217 | 2 | last_scanned_table_id = table_id; | 1218 | 2 | is_recycled = false; | 1219 | 2 | auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0}); | 1220 | 2 | auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0}); | 1221 | 2 | std::unique_ptr<Transaction> txn; | 1222 | 2 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 1223 | 2 | if (err != TxnErrorCode::TXN_OK) { | 1224 | 0 | return -1; | 1225 | 0 | } | 1226 | 2 | std::unique_ptr<RangeGetIterator> iter; | 1227 | 2 | err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1); | 1228 | 2 | if (err != TxnErrorCode::TXN_OK) { | 1229 | 0 | return -1; | 1230 | 0 | } | 1231 | 2 | if (iter->has_next()) { // Table is useful, should not recycle table and partition versions | 1232 | 1 | return 0; | 1233 | 1 | } | 1234 | 1 | auto db_id = std::get<int64_t>(std::get<0>(out[3])); | 1235 | | // 1. Remove all partition version kvs of this table | 1236 | 1 | auto partition_version_key_begin = | 1237 | 1 | partition_version_key({instance_id_, db_id, table_id, 0}); | 1238 | 1 | auto partition_version_key_end = | 1239 | 1 | partition_version_key({instance_id_, db_id, table_id, INT64_MAX}); | 1240 | 1 | txn->remove(partition_version_key_begin, partition_version_key_end); | 1241 | 1 | LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin) | 1242 | 1 | << " end=" << hex(partition_version_key_end) << " db_id=" << db_id | 1243 | 1 | << " table_id=" << table_id; | 1244 | | // 2. Remove the table version kv of this table | 1245 | 1 | auto tbl_version_key = table_version_key({instance_id_, db_id, table_id}); | 1246 | 1 | txn->remove(tbl_version_key); | 1247 | 1 | LOG(WARNING) << "remove table version kv " << hex(tbl_version_key); | 1248 | | // 3. Remove mow delete bitmap update lock and tablet compaction lock | 1249 | 1 | std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1}); | 1250 | 1 | txn->remove(lock_key); | 1251 | 1 | LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key); | 1252 | 1 | std::string tablet_compaction_key_begin = | 1253 | 1 | mow_tablet_compaction_key({instance_id_, table_id, 0}); | 1254 | 1 | std::string tablet_compaction_key_end = | 1255 | 1 | mow_tablet_compaction_key({instance_id_, table_id, INT64_MAX}); | 1256 | 1 | txn->remove(tablet_compaction_key_begin, tablet_compaction_key_end); | 1257 | 1 | LOG(WARNING) << "remove mow tablet compaction kv, begin=" | 1258 | 1 | << hex(tablet_compaction_key_begin) | 1259 | 1 | << " end=" << hex(tablet_compaction_key_end) << " db_id=" << db_id | 1260 | 1 | << " table_id=" << table_id; | 1261 | 1 | err = txn->commit(); | 1262 | 1 | if (err != TxnErrorCode::TXN_OK) { | 1263 | 0 | return -1; | 1264 | 0 | } | 1265 | 1 | ++num_recycled; | 1266 | 1 | is_recycled = true; | 1267 | 1 | return 0; | 1268 | 1 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
1269 | | |
1270 | 12 | return scan_and_recycle(version_key_begin, version_key_end, std::move(recycle_func)); |
1271 | 12 | } |
1272 | | |
1273 | | int InstanceRecycler::recycle_tablets(int64_t table_id, int64_t index_id, int64_t partition_id, |
1274 | 41 | bool is_empty_tablet) { |
1275 | 41 | int64_t num_scanned = 0; |
1276 | 41 | std::atomic_long num_recycled = 0; |
1277 | | |
1278 | 41 | std::string tablet_key_begin, tablet_key_end; |
1279 | 41 | std::string stats_key_begin, stats_key_end; |
1280 | 41 | std::string job_key_begin, job_key_end; |
1281 | | |
1282 | 41 | std::string tablet_belongs; |
1283 | 41 | if (partition_id > 0) { |
1284 | | // recycle tablets in a partition belonging to the index |
1285 | 32 | meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin); |
1286 | 32 | meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end); |
1287 | 32 | stats_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &stats_key_begin); |
1288 | 32 | stats_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &stats_key_end); |
1289 | 32 | job_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &job_key_begin); |
1290 | 32 | job_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &job_key_end); |
1291 | 32 | tablet_belongs = "partition"; |
1292 | 32 | } else { |
1293 | | // recycle tablets in the index |
1294 | 9 | meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin); |
1295 | 9 | meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end); |
1296 | 9 | stats_tablet_key({instance_id_, table_id, index_id, 0, 0}, &stats_key_begin); |
1297 | 9 | stats_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &stats_key_end); |
1298 | 9 | job_tablet_key({instance_id_, table_id, index_id, 0, 0}, &job_key_begin); |
1299 | 9 | job_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &job_key_end); |
1300 | 9 | tablet_belongs = "index"; |
1301 | 9 | } |
1302 | | |
1303 | 41 | LOG_INFO("begin to recycle tablets of the " + tablet_belongs) |
1304 | 41 | .tag("table_id", table_id) |
1305 | 41 | .tag("index_id", index_id) |
1306 | 41 | .tag("partition_id", partition_id); |
1307 | | |
1308 | 41 | auto start_time = steady_clock::now(); |
1309 | | |
1310 | 41 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { |
1311 | 41 | auto cost = duration<float>(steady_clock::now() - start_time).count(); |
1312 | 41 | LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost) |
1313 | 41 | .tag("instance_id", instance_id_) |
1314 | 41 | .tag("table_id", table_id) |
1315 | 41 | .tag("index_id", index_id) |
1316 | 41 | .tag("partition_id", partition_id) |
1317 | 41 | .tag("num_scanned", num_scanned) |
1318 | 41 | .tag("num_recycled", num_recycled); |
1319 | 41 | }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_1clEPi Line | Count | Source | 1310 | 37 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { | 1311 | 37 | auto cost = duration<float>(steady_clock::now() - start_time).count(); | 1312 | 37 | LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost) | 1313 | 37 | .tag("instance_id", instance_id_) | 1314 | 37 | .tag("table_id", table_id) | 1315 | 37 | .tag("index_id", index_id) | 1316 | 37 | .tag("partition_id", partition_id) | 1317 | 37 | .tag("num_scanned", num_scanned) | 1318 | 37 | .tag("num_recycled", num_recycled); | 1319 | 37 | }); |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_1clEPi Line | Count | Source | 1310 | 4 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { | 1311 | 4 | auto cost = duration<float>(steady_clock::now() - start_time).count(); | 1312 | 4 | LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost) | 1313 | 4 | .tag("instance_id", instance_id_) | 1314 | 4 | .tag("table_id", table_id) | 1315 | 4 | .tag("index_id", index_id) | 1316 | 4 | .tag("partition_id", partition_id) | 1317 | 4 | .tag("num_scanned", num_scanned) | 1318 | 4 | .tag("num_recycled", num_recycled); | 1319 | 4 | }); |
|
1320 | | |
1321 | | // The first string_view represents the tablet key which has been recycled |
1322 | | // The second bool represents whether the following fdb's tablet key deletion could be done using range move or not |
1323 | 41 | using TabletKeyPair = std::pair<std::string_view, bool>; |
1324 | 41 | SyncExecutor<TabletKeyPair> sync_executor( |
1325 | 41 | _thread_pool_group.recycle_tablet_pool, |
1326 | 41 | fmt::format("recycle tablets, tablet id {}, index id {}, partition id {}", table_id, |
1327 | 41 | index_id, partition_id), |
1328 | 251 | [](const TabletKeyPair& k) { return k.first.empty(); }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE Line | Count | Source | 1328 | 231 | [](const TabletKeyPair& k) { return k.first.empty(); }); |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE Line | Count | Source | 1328 | 20 | [](const TabletKeyPair& k) { return k.first.empty(); }); |
|
1329 | | |
1330 | | // Elements in `tablet_keys` has the same lifetime as `it` in `scan_and_recycle` |
1331 | 41 | std::vector<std::string> tablet_idx_keys; |
1332 | 41 | std::vector<std::string> init_rs_keys; |
1333 | 271 | auto recycle_func = [&, is_empty_tablet, this](std::string_view k, std::string_view v) -> int { |
1334 | 271 | bool use_range_remove = true; |
1335 | 271 | ++num_scanned; |
1336 | 271 | doris::TabletMetaCloudPB tablet_meta_pb; |
1337 | 271 | if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) { |
1338 | 0 | LOG_WARNING("malformed tablet meta").tag("key", hex(k)); |
1339 | 0 | use_range_remove = false; |
1340 | 0 | return -1; |
1341 | 0 | } |
1342 | 271 | int64_t tablet_id = tablet_meta_pb.tablet_id(); |
1343 | | |
1344 | 271 | if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) { |
1345 | 20 | LOG(WARNING) << "lazy txn not finished tablet_meta_pb=" << tablet_meta_pb.tablet_id(); |
1346 | 20 | return -1; |
1347 | 20 | } |
1348 | | |
1349 | 251 | tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id})); |
1350 | 251 | if (!is_empty_tablet) { |
1351 | 251 | sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove, |
1352 | 251 | k]() mutable -> TabletKeyPair { |
1353 | 251 | if (recycle_tablet(tid) != 0) { |
1354 | 0 | LOG_WARNING("failed to recycle tablet") |
1355 | 0 | .tag("instance_id", instance_id_) |
1356 | 0 | .tag("tablet_id", tid); |
1357 | 0 | range_move = false; |
1358 | 0 | return {std::string_view(), range_move}; |
1359 | 0 | } |
1360 | 251 | ++num_recycled; |
1361 | 251 | LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k)); |
1362 | 251 | return {k, range_move}; |
1363 | 251 | }); recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_ENUlvE_clEv Line | Count | Source | 1352 | 231 | k]() mutable -> TabletKeyPair { | 1353 | 231 | if (recycle_tablet(tid) != 0) { | 1354 | 0 | LOG_WARNING("failed to recycle tablet") | 1355 | 0 | .tag("instance_id", instance_id_) | 1356 | 0 | .tag("tablet_id", tid); | 1357 | 0 | range_move = false; | 1358 | 0 | return {std::string_view(), range_move}; | 1359 | 0 | } | 1360 | 231 | ++num_recycled; | 1361 | 231 | LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k)); | 1362 | 231 | return {k, range_move}; | 1363 | 231 | }); |
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_ENUlvE_clEv Line | Count | Source | 1352 | 20 | k]() mutable -> TabletKeyPair { | 1353 | 20 | if (recycle_tablet(tid) != 0) { | 1354 | 0 | LOG_WARNING("failed to recycle tablet") | 1355 | 0 | .tag("instance_id", instance_id_) | 1356 | 0 | .tag("tablet_id", tid); | 1357 | 0 | range_move = false; | 1358 | 0 | return {std::string_view(), range_move}; | 1359 | 0 | } | 1360 | 20 | ++num_recycled; | 1361 | 20 | LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k)); | 1362 | 20 | return {k, range_move}; | 1363 | 20 | }); |
|
1364 | 251 | } else { |
1365 | | // Empty tablet only has a [0-1] init rowset |
1366 | 0 | init_rs_keys.push_back(meta_rowset_key({instance_id_, tablet_id, 1})); |
1367 | 0 | DCHECK([&]() { |
1368 | 0 | std::unique_ptr<Transaction> txn; |
1369 | 0 | if (TxnErrorCode err = txn_kv_->create_txn(&txn); err != TxnErrorCode::TXN_OK) { |
1370 | 0 | LOG_ERROR("failed to create txn").tag("err", err); |
1371 | 0 | return false; |
1372 | 0 | } |
1373 | 0 | auto rs_key_begin = meta_rowset_key({instance_id_, tablet_id, 2}); |
1374 | 0 | auto rs_key_end = meta_rowset_key({instance_id_, tablet_id, INT64_MAX}); |
1375 | 0 | std::unique_ptr<RangeGetIterator> iter; |
1376 | 0 | if (TxnErrorCode err = txn->get(rs_key_begin, rs_key_end, &iter, true, 1); |
1377 | 0 | err != TxnErrorCode::TXN_OK) { |
1378 | 0 | LOG_ERROR("failed to get kv").tag("err", err); |
1379 | 0 | return false; |
1380 | 0 | } |
1381 | 0 | if (iter->has_next()) { |
1382 | 0 | LOG_ERROR("tablet is not empty").tag("tablet_id", tablet_id); |
1383 | 0 | return false; |
1384 | 0 | } |
1385 | 0 | return true; |
1386 | 0 | }()); |
1387 | 0 | sync_executor.add([k]() mutable -> TabletKeyPair { |
1388 | 0 | LOG_INFO("k is {}, is empty {}", k, k.empty()); |
1389 | 0 | return {k, true}; |
1390 | 0 | }); Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_ENUlvE1_clEv Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_ENUlvE1_clEv |
1391 | 0 | ++num_recycled; |
1392 | 0 | } |
1393 | 251 | return 0; |
1394 | 271 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 1333 | 231 | auto recycle_func = [&, is_empty_tablet, this](std::string_view k, std::string_view v) -> int { | 1334 | 231 | bool use_range_remove = true; | 1335 | 231 | ++num_scanned; | 1336 | 231 | doris::TabletMetaCloudPB tablet_meta_pb; | 1337 | 231 | if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) { | 1338 | 0 | LOG_WARNING("malformed tablet meta").tag("key", hex(k)); | 1339 | 0 | use_range_remove = false; | 1340 | 0 | return -1; | 1341 | 0 | } | 1342 | 231 | int64_t tablet_id = tablet_meta_pb.tablet_id(); | 1343 | | | 1344 | 231 | if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) { | 1345 | 0 | LOG(WARNING) << "lazy txn not finished tablet_meta_pb=" << tablet_meta_pb.tablet_id(); | 1346 | 0 | return -1; | 1347 | 0 | } | 1348 | | | 1349 | 231 | tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id})); | 1350 | 231 | if (!is_empty_tablet) { | 1351 | 231 | sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove, | 1352 | 231 | k]() mutable -> TabletKeyPair { | 1353 | 231 | if (recycle_tablet(tid) != 0) { | 1354 | 231 | LOG_WARNING("failed to recycle tablet") | 1355 | 231 | .tag("instance_id", instance_id_) | 1356 | 231 | .tag("tablet_id", tid); | 1357 | 231 | range_move = false; | 1358 | 231 | return {std::string_view(), range_move}; | 1359 | 231 | } | 1360 | 231 | ++num_recycled; | 1361 | 231 | LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k)); | 1362 | 231 | return {k, range_move}; | 1363 | 231 | }); | 1364 | 231 | } else { | 1365 | | // Empty tablet only has a [0-1] init rowset | 1366 | 0 | init_rs_keys.push_back(meta_rowset_key({instance_id_, tablet_id, 1})); | 1367 | 0 | DCHECK([&]() { | 1368 | 0 | std::unique_ptr<Transaction> txn; | 1369 | 0 | if (TxnErrorCode err = txn_kv_->create_txn(&txn); err != TxnErrorCode::TXN_OK) { | 1370 | 0 | LOG_ERROR("failed to create txn").tag("err", err); | 1371 | 0 | return false; | 1372 | 0 | } | 1373 | 0 | auto rs_key_begin = meta_rowset_key({instance_id_, tablet_id, 2}); | 1374 | 0 | auto rs_key_end = meta_rowset_key({instance_id_, tablet_id, INT64_MAX}); | 1375 | 0 | std::unique_ptr<RangeGetIterator> iter; | 1376 | 0 | if (TxnErrorCode err = txn->get(rs_key_begin, rs_key_end, &iter, true, 1); | 1377 | 0 | err != TxnErrorCode::TXN_OK) { | 1378 | 0 | LOG_ERROR("failed to get kv").tag("err", err); | 1379 | 0 | return false; | 1380 | 0 | } | 1381 | 0 | if (iter->has_next()) { | 1382 | 0 | LOG_ERROR("tablet is not empty").tag("tablet_id", tablet_id); | 1383 | 0 | return false; | 1384 | 0 | } | 1385 | 0 | return true; | 1386 | 0 | }()); | 1387 | 0 | sync_executor.add([k]() mutable -> TabletKeyPair { | 1388 | 0 | LOG_INFO("k is {}, is empty {}", k, k.empty()); | 1389 | 0 | return {k, true}; | 1390 | 0 | }); | 1391 | 0 | ++num_recycled; | 1392 | 0 | } | 1393 | 231 | return 0; | 1394 | 231 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 1333 | 40 | auto recycle_func = [&, is_empty_tablet, this](std::string_view k, std::string_view v) -> int { | 1334 | 40 | bool use_range_remove = true; | 1335 | 40 | ++num_scanned; | 1336 | 40 | doris::TabletMetaCloudPB tablet_meta_pb; | 1337 | 40 | if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) { | 1338 | 0 | LOG_WARNING("malformed tablet meta").tag("key", hex(k)); | 1339 | 0 | use_range_remove = false; | 1340 | 0 | return -1; | 1341 | 0 | } | 1342 | 40 | int64_t tablet_id = tablet_meta_pb.tablet_id(); | 1343 | | | 1344 | 40 | if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) { | 1345 | 20 | LOG(WARNING) << "lazy txn not finished tablet_meta_pb=" << tablet_meta_pb.tablet_id(); | 1346 | 20 | return -1; | 1347 | 20 | } | 1348 | | | 1349 | 20 | tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id})); | 1350 | 20 | if (!is_empty_tablet) { | 1351 | 20 | sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove, | 1352 | 20 | k]() mutable -> TabletKeyPair { | 1353 | 20 | if (recycle_tablet(tid) != 0) { | 1354 | 20 | LOG_WARNING("failed to recycle tablet") | 1355 | 20 | .tag("instance_id", instance_id_) | 1356 | 20 | .tag("tablet_id", tid); | 1357 | 20 | range_move = false; | 1358 | 20 | return {std::string_view(), range_move}; | 1359 | 20 | } | 1360 | 20 | ++num_recycled; | 1361 | 20 | LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k)); | 1362 | 20 | return {k, range_move}; | 1363 | 20 | }); | 1364 | 20 | } else { | 1365 | | // Empty tablet only has a [0-1] init rowset | 1366 | 0 | init_rs_keys.push_back(meta_rowset_key({instance_id_, tablet_id, 1})); | 1367 | 0 | DCHECK([&]() { | 1368 | 0 | std::unique_ptr<Transaction> txn; | 1369 | 0 | if (TxnErrorCode err = txn_kv_->create_txn(&txn); err != TxnErrorCode::TXN_OK) { | 1370 | 0 | LOG_ERROR("failed to create txn").tag("err", err); | 1371 | 0 | return false; | 1372 | 0 | } | 1373 | 0 | auto rs_key_begin = meta_rowset_key({instance_id_, tablet_id, 2}); | 1374 | 0 | auto rs_key_end = meta_rowset_key({instance_id_, tablet_id, INT64_MAX}); | 1375 | 0 | std::unique_ptr<RangeGetIterator> iter; | 1376 | 0 | if (TxnErrorCode err = txn->get(rs_key_begin, rs_key_end, &iter, true, 1); | 1377 | 0 | err != TxnErrorCode::TXN_OK) { | 1378 | 0 | LOG_ERROR("failed to get kv").tag("err", err); | 1379 | 0 | return false; | 1380 | 0 | } | 1381 | 0 | if (iter->has_next()) { | 1382 | 0 | LOG_ERROR("tablet is not empty").tag("tablet_id", tablet_id); | 1383 | 0 | return false; | 1384 | 0 | } | 1385 | 0 | return true; | 1386 | 0 | }()); | 1387 | 0 | sync_executor.add([k]() mutable -> TabletKeyPair { | 1388 | 0 | LOG_INFO("k is {}, is empty {}", k, k.empty()); | 1389 | 0 | return {k, true}; | 1390 | 0 | }); | 1391 | 0 | ++num_recycled; | 1392 | 0 | } | 1393 | 20 | return 0; | 1394 | 40 | }; |
|
1395 | | |
1396 | | // TODO(AlexYue): Add one ut to cover use_range_remove = false |
1397 | 41 | auto loop_done = [&, this]() -> int { |
1398 | 41 | bool finished = true; |
1399 | 41 | auto tablet_keys = sync_executor.when_all(&finished); |
1400 | 41 | if (!finished) { |
1401 | 0 | LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_); |
1402 | 0 | return -1; |
1403 | 0 | } |
1404 | 41 | if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0; |
1405 | | // sort the vector using key's order |
1406 | 39 | std::sort(tablet_keys.begin(), tablet_keys.end(), |
1407 | 980 | [](const auto& prev, const auto& last) { return prev.first < last.first; }); recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_0clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESG_EEDaS5_S8_ Line | Count | Source | 1407 | 944 | [](const auto& prev, const auto& last) { return prev.first < last.first; }); |
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_0clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESG_EEDaS5_S8_ Line | Count | Source | 1407 | 36 | [](const auto& prev, const auto& last) { return prev.first < last.first; }); |
|
1408 | 39 | bool use_range_remove = true; |
1409 | 251 | for (auto& [_, remove] : tablet_keys) { |
1410 | 251 | if (!remove) { |
1411 | 0 | use_range_remove = remove; |
1412 | 0 | break; |
1413 | 0 | } |
1414 | 251 | } |
1415 | 39 | std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) { |
1416 | 39 | tablet_idx_keys.clear(); |
1417 | 39 | init_rs_keys.clear(); |
1418 | 39 | }); recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_0clEvENKUlPiE_clES3_ Line | Count | Source | 1415 | 37 | std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) { | 1416 | 37 | tablet_idx_keys.clear(); | 1417 | 37 | init_rs_keys.clear(); | 1418 | 37 | }); |
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_0clEvENKUlPiE_clES3_ Line | Count | Source | 1415 | 2 | std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) { | 1416 | 2 | tablet_idx_keys.clear(); | 1417 | 2 | init_rs_keys.clear(); | 1418 | 2 | }); |
|
1419 | 39 | std::unique_ptr<Transaction> txn; |
1420 | 39 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { |
1421 | 0 | LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_; |
1422 | 0 | return -1; |
1423 | 0 | } |
1424 | 39 | std::string tablet_key_end; |
1425 | 39 | if (!tablet_keys.empty()) { |
1426 | 39 | if (use_range_remove) { |
1427 | 39 | tablet_key_end = std::string(tablet_keys.back().first) + '\x00'; |
1428 | 39 | txn->remove(tablet_keys.front().first, tablet_key_end); |
1429 | 39 | } else { |
1430 | 0 | for (auto& [k, _] : tablet_keys) { |
1431 | 0 | txn->remove(k); |
1432 | 0 | } |
1433 | 0 | } |
1434 | 39 | } |
1435 | 251 | for (auto& k : tablet_idx_keys) { |
1436 | 251 | txn->remove(k); |
1437 | 251 | } |
1438 | 39 | for (auto& k : init_rs_keys) { |
1439 | 0 | txn->remove(k); |
1440 | 0 | } |
1441 | 39 | if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) { |
1442 | 0 | LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_ |
1443 | 0 | << ", err=" << err; |
1444 | 0 | return -1; |
1445 | 0 | } |
1446 | 39 | return 0; |
1447 | 39 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_0clEv Line | Count | Source | 1397 | 37 | auto loop_done = [&, this]() -> int { | 1398 | 37 | bool finished = true; | 1399 | 37 | auto tablet_keys = sync_executor.when_all(&finished); | 1400 | 37 | if (!finished) { | 1401 | 0 | LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_); | 1402 | 0 | return -1; | 1403 | 0 | } | 1404 | 37 | if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0; | 1405 | | // sort the vector using key's order | 1406 | 37 | std::sort(tablet_keys.begin(), tablet_keys.end(), | 1407 | 37 | [](const auto& prev, const auto& last) { return prev.first < last.first; }); | 1408 | 37 | bool use_range_remove = true; | 1409 | 231 | for (auto& [_, remove] : tablet_keys) { | 1410 | 231 | if (!remove) { | 1411 | 0 | use_range_remove = remove; | 1412 | 0 | break; | 1413 | 0 | } | 1414 | 231 | } | 1415 | 37 | std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) { | 1416 | 37 | tablet_idx_keys.clear(); | 1417 | 37 | init_rs_keys.clear(); | 1418 | 37 | }); | 1419 | 37 | std::unique_ptr<Transaction> txn; | 1420 | 37 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { | 1421 | 0 | LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_; | 1422 | 0 | return -1; | 1423 | 0 | } | 1424 | 37 | std::string tablet_key_end; | 1425 | 37 | if (!tablet_keys.empty()) { | 1426 | 37 | if (use_range_remove) { | 1427 | 37 | tablet_key_end = std::string(tablet_keys.back().first) + '\x00'; | 1428 | 37 | txn->remove(tablet_keys.front().first, tablet_key_end); | 1429 | 37 | } else { | 1430 | 0 | for (auto& [k, _] : tablet_keys) { | 1431 | 0 | txn->remove(k); | 1432 | 0 | } | 1433 | 0 | } | 1434 | 37 | } | 1435 | 231 | for (auto& k : tablet_idx_keys) { | 1436 | 231 | txn->remove(k); | 1437 | 231 | } | 1438 | 37 | for (auto& k : init_rs_keys) { | 1439 | 0 | txn->remove(k); | 1440 | 0 | } | 1441 | 37 | if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) { | 1442 | 0 | LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_ | 1443 | 0 | << ", err=" << err; | 1444 | 0 | return -1; | 1445 | 0 | } | 1446 | 37 | return 0; | 1447 | 37 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_0clEv Line | Count | Source | 1397 | 4 | auto loop_done = [&, this]() -> int { | 1398 | 4 | bool finished = true; | 1399 | 4 | auto tablet_keys = sync_executor.when_all(&finished); | 1400 | 4 | if (!finished) { | 1401 | 0 | LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_); | 1402 | 0 | return -1; | 1403 | 0 | } | 1404 | 4 | if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0; | 1405 | | // sort the vector using key's order | 1406 | 2 | std::sort(tablet_keys.begin(), tablet_keys.end(), | 1407 | 2 | [](const auto& prev, const auto& last) { return prev.first < last.first; }); | 1408 | 2 | bool use_range_remove = true; | 1409 | 20 | for (auto& [_, remove] : tablet_keys) { | 1410 | 20 | if (!remove) { | 1411 | 0 | use_range_remove = remove; | 1412 | 0 | break; | 1413 | 0 | } | 1414 | 20 | } | 1415 | 2 | std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) { | 1416 | 2 | tablet_idx_keys.clear(); | 1417 | 2 | init_rs_keys.clear(); | 1418 | 2 | }); | 1419 | 2 | std::unique_ptr<Transaction> txn; | 1420 | 2 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { | 1421 | 0 | LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_; | 1422 | 0 | return -1; | 1423 | 0 | } | 1424 | 2 | std::string tablet_key_end; | 1425 | 2 | if (!tablet_keys.empty()) { | 1426 | 2 | if (use_range_remove) { | 1427 | 2 | tablet_key_end = std::string(tablet_keys.back().first) + '\x00'; | 1428 | 2 | txn->remove(tablet_keys.front().first, tablet_key_end); | 1429 | 2 | } else { | 1430 | 0 | for (auto& [k, _] : tablet_keys) { | 1431 | 0 | txn->remove(k); | 1432 | 0 | } | 1433 | 0 | } | 1434 | 2 | } | 1435 | 20 | for (auto& k : tablet_idx_keys) { | 1436 | 20 | txn->remove(k); | 1437 | 20 | } | 1438 | 2 | for (auto& k : init_rs_keys) { | 1439 | 0 | txn->remove(k); | 1440 | 0 | } | 1441 | 2 | if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) { | 1442 | 0 | LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_ | 1443 | 0 | << ", err=" << err; | 1444 | 0 | return -1; | 1445 | 0 | } | 1446 | 2 | return 0; | 1447 | 2 | }; |
|
1448 | | |
1449 | 41 | int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(recycle_func), |
1450 | 41 | std::move(loop_done)); |
1451 | 41 | if (ret != 0) { |
1452 | 2 | LOG(WARNING) << "failed to scan_and_recycle, instance_id=" << instance_id_; |
1453 | 2 | return ret; |
1454 | 2 | } |
1455 | | |
1456 | | // directly remove tablet stats and tablet jobs of these dropped index or partition |
1457 | 39 | std::unique_ptr<Transaction> txn; |
1458 | 39 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { |
1459 | 0 | LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_; |
1460 | 0 | return -1; |
1461 | 0 | } |
1462 | 39 | txn->remove(stats_key_begin, stats_key_end); |
1463 | 39 | LOG(WARNING) << "remove stats kv, begin=" << hex(stats_key_begin) |
1464 | 39 | << " end=" << hex(stats_key_end); |
1465 | 39 | txn->remove(job_key_begin, job_key_end); |
1466 | 39 | LOG(WARNING) << "remove job kv, begin=" << hex(job_key_begin) << " end=" << hex(job_key_end); |
1467 | 39 | std::string schema_key_begin, schema_key_end; |
1468 | 39 | std::string schema_dict_key; |
1469 | 39 | if (partition_id <= 0) { |
1470 | | // Delete schema kv of this index |
1471 | 8 | meta_schema_key({instance_id_, index_id, 0}, &schema_key_begin); |
1472 | 8 | meta_schema_key({instance_id_, index_id + 1, 0}, &schema_key_end); |
1473 | 8 | txn->remove(schema_key_begin, schema_key_end); |
1474 | 8 | LOG(WARNING) << "remove schema kv, begin=" << hex(schema_key_begin) |
1475 | 8 | << " end=" << hex(schema_key_end); |
1476 | 8 | meta_schema_pb_dictionary_key({instance_id_, index_id}, &schema_dict_key); |
1477 | 8 | txn->remove(schema_dict_key); |
1478 | 8 | LOG(WARNING) << "remove schema dict kv, key=" << hex(schema_dict_key); |
1479 | 8 | } |
1480 | | |
1481 | 39 | TxnErrorCode err = txn->commit(); |
1482 | 39 | if (err != TxnErrorCode::TXN_OK) { |
1483 | 0 | LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_ |
1484 | 0 | << " err=" << err; |
1485 | 0 | return -1; |
1486 | 0 | } |
1487 | | |
1488 | 39 | return ret; |
1489 | 39 | } |
1490 | | |
1491 | 4.00k | int InstanceRecycler::delete_rowset_data(const doris::RowsetMetaCloudPB& rs_meta_pb) { |
1492 | 4.00k | int64_t num_segments = rs_meta_pb.num_segments(); |
1493 | 4.00k | if (num_segments <= 0) return 0; |
1494 | 4.00k | if (!rs_meta_pb.has_tablet_schema()) { |
1495 | 0 | return delete_rowset_data(rs_meta_pb.resource_id(), rs_meta_pb.tablet_id(), |
1496 | 0 | rs_meta_pb.rowset_id_v2()); |
1497 | 0 | } |
1498 | 4.00k | auto it = accessor_map_.find(rs_meta_pb.resource_id()); |
1499 | 4.00k | if (it == accessor_map_.end()) { |
1500 | 0 | LOG_WARNING("instance has no such resource id") |
1501 | 0 | .tag("instance_id", instance_id_) |
1502 | 0 | .tag("resource_id", rs_meta_pb.resource_id()); |
1503 | 0 | return -1; |
1504 | 0 | } |
1505 | 4.00k | auto& accessor = it->second; |
1506 | 4.00k | const auto& rowset_id = rs_meta_pb.rowset_id_v2(); |
1507 | 4.00k | int64_t tablet_id = rs_meta_pb.tablet_id(); |
1508 | | // process inverted indexes |
1509 | 4.00k | std::vector<std::pair<int64_t, std::string>> index_ids; |
1510 | 4.00k | index_ids.reserve(rs_meta_pb.tablet_schema().index_size()); |
1511 | 8.00k | for (auto& i : rs_meta_pb.tablet_schema().index()) { |
1512 | 8.00k | if (i.has_index_type() && i.index_type() == IndexType::INVERTED) { |
1513 | 8.00k | index_ids.push_back(std::make_pair(i.index_id(), i.index_suffix_name())); |
1514 | 8.00k | } |
1515 | 8.00k | } |
1516 | 4.00k | std::vector<std::string> file_paths; |
1517 | 4.00k | auto tablet_schema = rs_meta_pb.tablet_schema(); |
1518 | 4.00k | auto index_storage_format = InvertedIndexStorageFormatPB::V1; |
1519 | 24.0k | for (int64_t i = 0; i < num_segments; ++i) { |
1520 | 20.0k | file_paths.push_back(segment_path(tablet_id, rowset_id, i)); |
1521 | 20.0k | if (tablet_schema.has_inverted_index_storage_format()) { |
1522 | 10.0k | index_storage_format = tablet_schema.inverted_index_storage_format(); |
1523 | 10.0k | } |
1524 | 20.0k | if (index_storage_format == InvertedIndexStorageFormatPB::V1) { |
1525 | 40.0k | for (const auto& index_id : index_ids) { |
1526 | 40.0k | file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i, index_id.first, |
1527 | 40.0k | index_id.second)); |
1528 | 40.0k | } |
1529 | 20.0k | } else if (!index_ids.empty()) { |
1530 | 0 | file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i)); |
1531 | 0 | } |
1532 | 20.0k | } |
1533 | | // TODO(AlexYue): seems could do do batch |
1534 | 4.00k | return accessor->delete_files(file_paths); |
1535 | 4.00k | } |
1536 | | |
1537 | | int InstanceRecycler::delete_rowset_data(const std::vector<doris::RowsetMetaCloudPB>& rowsets, |
1538 | 32 | RowsetRecyclingState type) { |
1539 | 32 | int ret = 0; |
1540 | | // resource_id -> file_paths |
1541 | 32 | std::map<std::string, std::vector<std::string>> resource_file_paths; |
1542 | | // (resource_id, tablet_id, rowset_id) |
1543 | 32 | std::vector<std::tuple<std::string, int64_t, std::string>> rowsets_delete_by_prefix; |
1544 | | |
1545 | 6.14k | for (const auto& rs : rowsets) { |
1546 | | // we have to treat tmp rowset as "orphans" that may not related to any existing tablets |
1547 | | // due to aborted schema change. |
1548 | 6.14k | if (type == RowsetRecyclingState::FORMAL_ROWSET) { |
1549 | 3.12k | std::lock_guard lock(recycled_tablets_mtx_); |
1550 | 3.12k | if (recycled_tablets_.count(rs.tablet_id())) { |
1551 | 0 | continue; // Rowset data has already been deleted |
1552 | 0 | } |
1553 | 3.12k | } |
1554 | | |
1555 | 6.14k | auto it = accessor_map_.find(rs.resource_id()); |
1556 | | // possible if the accessor is not initilized correctly |
1557 | 6.14k | if (it == accessor_map_.end()) [[unlikely]] { |
1558 | 1 | LOG_WARNING("instance has no such resource id") |
1559 | 1 | .tag("instance_id", instance_id_) |
1560 | 1 | .tag("resource_id", rs.resource_id()); |
1561 | 1 | ret = -1; |
1562 | 1 | continue; |
1563 | 1 | } |
1564 | | |
1565 | 6.14k | auto& file_paths = resource_file_paths[rs.resource_id()]; |
1566 | 6.14k | const auto& rowset_id = rs.rowset_id_v2(); |
1567 | 6.14k | int64_t tablet_id = rs.tablet_id(); |
1568 | 6.14k | int64_t num_segments = rs.num_segments(); |
1569 | 6.14k | if (num_segments <= 0) continue; |
1570 | | |
1571 | | // Process inverted indexes |
1572 | 6.14k | std::vector<std::pair<int64_t, std::string>> index_ids; |
1573 | | // default format as v1. |
1574 | 6.14k | InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1; |
1575 | 6.14k | int inverted_index_get_ret = 0; |
1576 | 6.14k | if (rs.has_tablet_schema()) { |
1577 | 5.54k | for (const auto& index : rs.tablet_schema().index()) { |
1578 | 5.54k | if (index.has_index_type() && index.index_type() == IndexType::INVERTED) { |
1579 | 5.54k | index_ids.emplace_back(index.index_id(), index.index_suffix_name()); |
1580 | 5.54k | } |
1581 | 5.54k | } |
1582 | 2.59k | if (rs.tablet_schema().has_inverted_index_storage_format()) { |
1583 | 2.56k | index_format = rs.tablet_schema().inverted_index_storage_format(); |
1584 | 2.56k | } |
1585 | 3.55k | } else { |
1586 | 3.55k | if (!rs.has_index_id() || !rs.has_schema_version()) { |
1587 | 0 | LOG(WARNING) << "rowset must have either schema or schema_version and index_id, " |
1588 | 0 | "instance_id=" |
1589 | 0 | << instance_id_ << " tablet_id=" << tablet_id |
1590 | 0 | << " rowset_id=" << rowset_id; |
1591 | 0 | ret = -1; |
1592 | 0 | continue; |
1593 | 0 | } |
1594 | 3.55k | InvertedIndexInfo index_info; |
1595 | 3.55k | inverted_index_get_ret = |
1596 | 3.55k | inverted_index_id_cache_->get(rs.index_id(), rs.schema_version(), index_info); |
1597 | 3.55k | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset", |
1598 | 3.55k | &inverted_index_get_ret); |
1599 | 3.55k | if (inverted_index_get_ret == 0) { |
1600 | 3.05k | index_format = index_info.first; |
1601 | 3.05k | index_ids = index_info.second; |
1602 | 3.05k | } else if (inverted_index_get_ret == 1) { |
1603 | | // 1. Schema kv not found means tablet has been recycled |
1604 | | // Maybe some tablet recycle failed by some bugs |
1605 | | // We need to delete again to double check |
1606 | | // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes, |
1607 | | // because we are uncertain about the inverted index information. |
1608 | | // If there are inverted indexes, some data might not be deleted, |
1609 | | // but this is acceptable as we have made our best effort to delete the data. |
1610 | 503 | LOG_INFO( |
1611 | 503 | "delete rowset data schema kv not found, need to delete again to double " |
1612 | 503 | "check") |
1613 | 503 | .tag("instance_id", instance_id_) |
1614 | 503 | .tag("tablet_id", tablet_id) |
1615 | 503 | .tag("rowset", rs.ShortDebugString()); |
1616 | | // Currently index_ids is guaranteed to be empty, |
1617 | | // but we clear it again here as a safeguard against future code changes |
1618 | | // that might cause index_ids to no longer be empty |
1619 | 503 | index_format = InvertedIndexStorageFormatPB::V2; |
1620 | 503 | index_ids.clear(); |
1621 | 503 | } else { |
1622 | 0 | LOG(WARNING) << "failed to get schema kv for rowset, instance_id=" << instance_id_ |
1623 | 0 | << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id; |
1624 | 0 | ret = -1; |
1625 | 0 | continue; |
1626 | 0 | } |
1627 | 3.55k | } |
1628 | 6.14k | if (rs.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) { |
1629 | | // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data |
1630 | | // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix |
1631 | 5 | rowsets_delete_by_prefix.emplace_back(rs.resource_id(), tablet_id, rs.rowset_id_v2()); |
1632 | 5 | continue; |
1633 | 5 | } |
1634 | 36.8k | for (int64_t i = 0; i < num_segments; ++i) { |
1635 | 30.6k | file_paths.push_back(segment_path(tablet_id, rowset_id, i)); |
1636 | 30.6k | if (index_format == InvertedIndexStorageFormatPB::V1) { |
1637 | 59.2k | for (const auto& index_id : index_ids) { |
1638 | 59.2k | file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i, |
1639 | 59.2k | index_id.first, index_id.second)); |
1640 | 59.2k | } |
1641 | 28.1k | } else if (!index_ids.empty() || inverted_index_get_ret == 1) { |
1642 | | // try to recycle inverted index v2 when get_ret == 1 |
1643 | | // we treat schema not found as if it has a v2 format inverted index |
1644 | | // to reduce chance of data leakage |
1645 | 2.50k | if (inverted_index_get_ret == 1) { |
1646 | 2.50k | LOG_INFO("delete rowset data schema kv not found, try to delete index file") |
1647 | 2.50k | .tag("instance_id", instance_id_) |
1648 | 2.50k | .tag("inverted index v2 path", |
1649 | 2.50k | inverted_index_path_v2(tablet_id, rowset_id, i)); |
1650 | 2.50k | } |
1651 | 2.50k | file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i)); |
1652 | 2.50k | } |
1653 | 30.6k | } |
1654 | 6.13k | } |
1655 | | |
1656 | 32 | SyncExecutor<int> concurrent_delete_executor(_thread_pool_group.s3_producer_pool, |
1657 | 32 | "delete_rowset_data", |
1658 | 34 | [](const int& ret) { return ret != 0; }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt6vectorINS_17RowsetMetaCloudPBESaIS3_EENS0_20RowsetRecyclingStateEENK3$_0clERKi Line | Count | Source | 1658 | 34 | [](const int& ret) { return ret != 0; }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt6vectorINS_17RowsetMetaCloudPBESaIS3_EENS0_20RowsetRecyclingStateEENK3$_0clERKi |
1659 | 32 | for (auto& [resource_id, file_paths] : resource_file_paths) { |
1660 | 29 | concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int { |
1661 | 29 | DCHECK(accessor_map_.count(*rid)) |
1662 | 0 | << "uninitilized accessor, instance_id=" << instance_id_ |
1663 | 0 | << " resource_id=" << resource_id << " path[0]=" << (*paths)[0]; |
1664 | 29 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id", |
1665 | 29 | &accessor_map_); |
1666 | 29 | if (!accessor_map_.contains(*rid)) { |
1667 | 0 | LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id") |
1668 | 0 | .tag("resource_id", resource_id) |
1669 | 0 | .tag("instance_id", instance_id_); |
1670 | 0 | return -1; |
1671 | 0 | } |
1672 | 29 | auto& accessor = accessor_map_[*rid]; |
1673 | 29 | return accessor->delete_files(*paths); |
1674 | 29 | }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt6vectorINS_17RowsetMetaCloudPBESaIS3_EENS0_20RowsetRecyclingStateEENK3$_1clEv Line | Count | Source | 1660 | 29 | concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int { | 1661 | 29 | DCHECK(accessor_map_.count(*rid)) | 1662 | 0 | << "uninitilized accessor, instance_id=" << instance_id_ | 1663 | 0 | << " resource_id=" << resource_id << " path[0]=" << (*paths)[0]; | 1664 | 29 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id", | 1665 | 29 | &accessor_map_); | 1666 | 29 | if (!accessor_map_.contains(*rid)) { | 1667 | 0 | LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id") | 1668 | 0 | .tag("resource_id", resource_id) | 1669 | 0 | .tag("instance_id", instance_id_); | 1670 | 0 | return -1; | 1671 | 0 | } | 1672 | 29 | auto& accessor = accessor_map_[*rid]; | 1673 | 29 | return accessor->delete_files(*paths); | 1674 | 29 | }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt6vectorINS_17RowsetMetaCloudPBESaIS3_EENS0_20RowsetRecyclingStateEENK3$_1clEv |
1675 | 29 | } |
1676 | 32 | for (const auto& [resource_id, tablet_id, rowset_id] : rowsets_delete_by_prefix) { |
1677 | 5 | LOG_INFO( |
1678 | 5 | "delete rowset {} by prefix because it's in BEGIN_PARTIAL_UPDATE state, " |
1679 | 5 | "resource_id={}, tablet_id={}, instance_id={}", |
1680 | 5 | rowset_id, resource_id, tablet_id, instance_id_); |
1681 | 5 | concurrent_delete_executor.add( |
1682 | 5 | [&]() -> int { return delete_rowset_data(resource_id, tablet_id, rowset_id); }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt6vectorINS_17RowsetMetaCloudPBESaIS3_EENS0_20RowsetRecyclingStateEENK3$_2clEv Line | Count | Source | 1682 | 5 | [&]() -> int { return delete_rowset_data(resource_id, tablet_id, rowset_id); }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt6vectorINS_17RowsetMetaCloudPBESaIS3_EENS0_20RowsetRecyclingStateEENK3$_2clEv |
1683 | 5 | } |
1684 | 32 | bool finished = true; |
1685 | 32 | std::vector<int> rets = concurrent_delete_executor.when_all(&finished); |
1686 | 34 | for (int r : rets) { |
1687 | 34 | if (r != 0) { |
1688 | 0 | ret = -1; |
1689 | 0 | break; |
1690 | 0 | } |
1691 | 34 | } |
1692 | 32 | ret = finished ? ret : -1; |
1693 | 32 | return ret; |
1694 | 32 | } |
1695 | | |
1696 | | int InstanceRecycler::delete_rowset_data(const std::string& resource_id, int64_t tablet_id, |
1697 | 2.90k | const std::string& rowset_id) { |
1698 | 2.90k | auto it = accessor_map_.find(resource_id); |
1699 | 2.90k | if (it == accessor_map_.end()) { |
1700 | 0 | LOG_WARNING("instance has no such resource id") |
1701 | 0 | .tag("instance_id", instance_id_) |
1702 | 0 | .tag("resource_id", resource_id) |
1703 | 0 | .tag("tablet_id", tablet_id) |
1704 | 0 | .tag("rowset_id", rowset_id); |
1705 | 0 | return -1; |
1706 | 0 | } |
1707 | 2.90k | auto& accessor = it->second; |
1708 | 2.90k | return accessor->delete_prefix(rowset_path_prefix(tablet_id, rowset_id)); |
1709 | 2.90k | } |
1710 | | |
1711 | 254 | int InstanceRecycler::recycle_tablet(int64_t tablet_id) { |
1712 | 254 | LOG_INFO("begin to recycle rowsets in a dropped tablet") |
1713 | 254 | .tag("instance_id", instance_id_) |
1714 | 254 | .tag("tablet_id", tablet_id); |
1715 | | |
1716 | 254 | int ret = 0; |
1717 | 254 | auto start_time = steady_clock::now(); |
1718 | | |
1719 | 254 | TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0); |
1720 | | |
1721 | | // collect resource ids |
1722 | 234 | std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0}); |
1723 | 234 | std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0}); |
1724 | 234 | std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""}); |
1725 | 234 | std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""}); |
1726 | | |
1727 | 234 | std::set<std::string> resource_ids; |
1728 | 234 | int64_t recycle_rowsets_number = 0; |
1729 | 234 | int64_t recycle_segments_number = 0; |
1730 | 234 | int64_t recycle_rowsets_data_size = 0; |
1731 | 234 | int64_t recycle_rowsets_index_size = 0; |
1732 | 234 | int64_t max_rowset_version = 0; |
1733 | 234 | int64_t min_rowset_creation_time = INT64_MAX; |
1734 | 234 | int64_t max_rowset_creation_time = 0; |
1735 | 234 | int64_t min_rowset_expiration_time = INT64_MAX; |
1736 | 234 | int64_t max_rowset_expiration_time = 0; |
1737 | | |
1738 | 234 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { |
1739 | 234 | auto cost = duration<float>(steady_clock::now() - start_time).count(); |
1740 | 234 | LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost) |
1741 | 234 | .tag("instance_id", instance_id_) |
1742 | 234 | .tag("tablet_id", tablet_id) |
1743 | 234 | .tag("recycle rowsets number", recycle_rowsets_number) |
1744 | 234 | .tag("recycle segments number", recycle_segments_number) |
1745 | 234 | .tag("all rowsets recycle data size", recycle_rowsets_data_size) |
1746 | 234 | .tag("all rowsets recycle index size", recycle_rowsets_index_size) |
1747 | 234 | .tag("max rowset version", max_rowset_version) |
1748 | 234 | .tag("min rowset creation time", min_rowset_creation_time) |
1749 | 234 | .tag("max rowset creation time", max_rowset_creation_time) |
1750 | 234 | .tag("min rowset expiration time", min_rowset_expiration_time) |
1751 | 234 | .tag("max rowset expiration time", max_rowset_expiration_time) |
1752 | 234 | .tag("ret", ret); |
1753 | 234 | }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_1clEPi Line | Count | Source | 1738 | 234 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { | 1739 | 234 | auto cost = duration<float>(steady_clock::now() - start_time).count(); | 1740 | 234 | LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost) | 1741 | 234 | .tag("instance_id", instance_id_) | 1742 | 234 | .tag("tablet_id", tablet_id) | 1743 | 234 | .tag("recycle rowsets number", recycle_rowsets_number) | 1744 | 234 | .tag("recycle segments number", recycle_segments_number) | 1745 | 234 | .tag("all rowsets recycle data size", recycle_rowsets_data_size) | 1746 | 234 | .tag("all rowsets recycle index size", recycle_rowsets_index_size) | 1747 | 234 | .tag("max rowset version", max_rowset_version) | 1748 | 234 | .tag("min rowset creation time", min_rowset_creation_time) | 1749 | 234 | .tag("max rowset creation time", max_rowset_creation_time) | 1750 | 234 | .tag("min rowset expiration time", min_rowset_expiration_time) | 1751 | 234 | .tag("max rowset expiration time", max_rowset_expiration_time) | 1752 | 234 | .tag("ret", ret); | 1753 | 234 | }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_1clEPi |
1754 | | |
1755 | 234 | std::unique_ptr<Transaction> txn; |
1756 | 234 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { |
1757 | 0 | LOG_WARNING("failed to recycle tablet ") |
1758 | 0 | .tag("tablet id", tablet_id) |
1759 | 0 | .tag("instance_id", instance_id_) |
1760 | 0 | .tag("reason", "failed to create txn"); |
1761 | 0 | ret = -1; |
1762 | 0 | } |
1763 | 234 | GetRowsetResponse resp; |
1764 | 234 | std::string msg; |
1765 | 234 | MetaServiceCode code = MetaServiceCode::OK; |
1766 | | // get rowsets in tablet |
1767 | 234 | internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_, |
1768 | 234 | tablet_id, code, msg, &resp); |
1769 | 234 | if (code != MetaServiceCode::OK) { |
1770 | 0 | LOG_WARNING("failed to get rowsets of tablet when recycle tablet") |
1771 | 0 | .tag("tablet id", tablet_id) |
1772 | 0 | .tag("msg", msg) |
1773 | 0 | .tag("code", code) |
1774 | 0 | .tag("instance id", instance_id_); |
1775 | 0 | ret = -1; |
1776 | 0 | } |
1777 | 234 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_tablet.create_rowset_meta", &resp); |
1778 | | |
1779 | 2.50k | for (const auto& rs_meta : resp.rowset_meta()) { |
1780 | | /* |
1781 | | * For compatibility, we skip the loop for [0-1] here. |
1782 | | * The purpose of this loop is to delete object files, |
1783 | | * and since [0-1] only has meta and doesn't have object files, |
1784 | | * skipping it doesn't affect system correctness. |
1785 | | * |
1786 | | * If not skipped, the check "if (!rs_meta.has_resource_id())" below |
1787 | | * would return error -1 directly, causing the recycle operation to fail. |
1788 | | * |
1789 | | * [0-1] doesn't have resource id is a bug. |
1790 | | * In the future, we will fix this problem, after that, |
1791 | | * we can remove this if statement. |
1792 | | * |
1793 | | * TODO(Yukang-Lian): remove this if statement when [0-1] has resource id in the future. |
1794 | | */ |
1795 | | |
1796 | 2.50k | if (rs_meta.end_version() == 1) { |
1797 | | // Assert that [0-1] has no resource_id to make sure |
1798 | | // this if statement will not be forgetted to remove |
1799 | | // when the resource id bug is fixed |
1800 | 0 | DCHECK(!rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString(); |
1801 | 0 | recycle_rowsets_number += 1; |
1802 | 0 | continue; |
1803 | 0 | } |
1804 | 2.50k | if (!rs_meta.has_resource_id()) { |
1805 | 1 | LOG_WARNING("rowset meta does not have a resource id, impossible!") |
1806 | 1 | .tag("rs_meta", rs_meta.ShortDebugString()) |
1807 | 1 | .tag("instance_id", instance_id_) |
1808 | 1 | .tag("tablet_id", tablet_id); |
1809 | 1 | return -1; |
1810 | 1 | } |
1811 | 2.50k | DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString(); |
1812 | 2.50k | auto it = accessor_map_.find(rs_meta.resource_id()); |
1813 | | // possible if the accessor is not initilized correctly |
1814 | 2.50k | if (it == accessor_map_.end()) [[unlikely]] { |
1815 | 1 | LOG_WARNING( |
1816 | 1 | "failed to find resource id when recycle tablet, skip this vault accessor " |
1817 | 1 | "recycle process") |
1818 | 1 | .tag("tablet id", tablet_id) |
1819 | 1 | .tag("instance_id", instance_id_) |
1820 | 1 | .tag("resource_id", rs_meta.resource_id()) |
1821 | 1 | .tag("rowset meta pb", rs_meta.ShortDebugString()); |
1822 | 1 | return -1; |
1823 | 1 | } |
1824 | 2.50k | recycle_rowsets_number += 1; |
1825 | 2.50k | recycle_segments_number += rs_meta.num_segments(); |
1826 | 2.50k | recycle_rowsets_data_size += rs_meta.data_disk_size(); |
1827 | 2.50k | recycle_rowsets_index_size += rs_meta.index_disk_size(); |
1828 | 2.50k | max_rowset_version = std::max(max_rowset_version, rs_meta.end_version()); |
1829 | 2.50k | min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time()); |
1830 | 2.50k | max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time()); |
1831 | 2.50k | min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration()); |
1832 | 2.50k | max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration()); |
1833 | 2.50k | resource_ids.emplace(rs_meta.resource_id()); |
1834 | 2.50k | } |
1835 | | |
1836 | 232 | LOG_INFO("recycle tablet start to delete object") |
1837 | 232 | .tag("instance id", instance_id_) |
1838 | 232 | .tag("tablet id", tablet_id) |
1839 | 232 | .tag("recycle tablet resource ids are", |
1840 | 232 | std::accumulate(resource_ids.begin(), resource_ids.end(), std::string(), |
1841 | 232 | [](const std::string& a, const std::string& b) { |
1842 | 203 | return a.empty() ? b : a + "," + b; |
1843 | 203 | })); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_0clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESA_ Line | Count | Source | 1841 | 203 | [](const std::string& a, const std::string& b) { | 1842 | 203 | return a.empty() ? b : a + "," + b; | 1843 | 203 | })); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_0clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESA_ |
1844 | | |
1845 | 232 | SyncExecutor<int> concurrent_delete_executor( |
1846 | 232 | _thread_pool_group.s3_producer_pool, |
1847 | 232 | fmt::format("delete tablet {} s3 rowset", tablet_id), |
1848 | 232 | [](const int& ret) { return ret != 0; }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_2clERKi Line | Count | Source | 1848 | 203 | [](const int& ret) { return ret != 0; }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_2clERKi |
1849 | | |
1850 | | // delete all rowset data in this tablet |
1851 | | // ATTN: there may be data leak if not all accessor initilized successfully |
1852 | | // partial data deleted if the tablet is stored cross-storage vault |
1853 | | // vault id is not attached to TabletMeta... |
1854 | 232 | for (const auto& resource_id : resource_ids) { |
1855 | 203 | concurrent_delete_executor.add([&, accessor_ptr = accessor_map_[resource_id]]() { |
1856 | 203 | if (accessor_ptr->delete_directory(tablet_path_prefix(tablet_id)) != 0) { |
1857 | 1 | LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id |
1858 | 1 | << " path=" << accessor_ptr->uri(); |
1859 | 1 | return -1; |
1860 | 1 | } |
1861 | 202 | return 0; |
1862 | 203 | }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_3clEv Line | Count | Source | 1855 | 203 | concurrent_delete_executor.add([&, accessor_ptr = accessor_map_[resource_id]]() { | 1856 | 203 | if (accessor_ptr->delete_directory(tablet_path_prefix(tablet_id)) != 0) { | 1857 | 1 | LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id | 1858 | 1 | << " path=" << accessor_ptr->uri(); | 1859 | 1 | return -1; | 1860 | 1 | } | 1861 | 202 | return 0; | 1862 | 203 | }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_3clEv |
1863 | 203 | } |
1864 | | |
1865 | 232 | bool finished = true; |
1866 | 232 | std::vector<int> rets = concurrent_delete_executor.when_all(&finished); |
1867 | 232 | for (int r : rets) { |
1868 | 203 | if (r != 0) { |
1869 | 1 | ret = -1; |
1870 | 1 | } |
1871 | 203 | } |
1872 | | |
1873 | 232 | ret = finished ? ret : -1; |
1874 | | |
1875 | 232 | if (ret != 0) { // failed recycle tablet data |
1876 | 1 | LOG_WARNING("ret!=0") |
1877 | 1 | .tag("finished", finished) |
1878 | 1 | .tag("ret", ret) |
1879 | 1 | .tag("instance_id", instance_id_) |
1880 | 1 | .tag("tablet_id", tablet_id); |
1881 | 1 | return ret; |
1882 | 1 | } |
1883 | | |
1884 | 231 | txn.reset(); |
1885 | 231 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { |
1886 | 0 | LOG_WARNING("failed to recycle tablet ") |
1887 | 0 | .tag("tablet id", tablet_id) |
1888 | 0 | .tag("instance_id", instance_id_) |
1889 | 0 | .tag("reason", "failed to create txn"); |
1890 | 0 | ret = -1; |
1891 | 0 | } |
1892 | | // delete all rowset kv in this tablet |
1893 | 231 | txn->remove(rs_key0, rs_key1); |
1894 | 231 | txn->remove(recyc_rs_key0, recyc_rs_key1); |
1895 | | |
1896 | | // remove delete bitmap for MoW table |
1897 | 231 | std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id}); |
1898 | 231 | txn->remove(pending_key); |
1899 | 231 | std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0}); |
1900 | 231 | std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0}); |
1901 | 231 | txn->remove(delete_bitmap_start, delete_bitmap_end); |
1902 | | |
1903 | 231 | TxnErrorCode err = txn->commit(); |
1904 | 231 | if (err != TxnErrorCode::TXN_OK) { |
1905 | 0 | LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err; |
1906 | 0 | ret = -1; |
1907 | 0 | } |
1908 | | |
1909 | 231 | if (ret == 0) { |
1910 | | // All object files under tablet have been deleted |
1911 | 231 | std::lock_guard lock(recycled_tablets_mtx_); |
1912 | 231 | recycled_tablets_.insert(tablet_id); |
1913 | 231 | } |
1914 | | |
1915 | 231 | return ret; |
1916 | 232 | } |
1917 | | |
1918 | 13 | int InstanceRecycler::recycle_rowsets() { |
1919 | 13 | const std::string task_name = "recycle_rowsets"; |
1920 | 13 | int64_t num_scanned = 0; |
1921 | 13 | int64_t num_expired = 0; |
1922 | 13 | int64_t num_prepare = 0; |
1923 | 13 | int64_t num_compacted = 0; |
1924 | 13 | int64_t num_empty_rowset = 0; |
1925 | 13 | size_t total_rowset_key_size = 0; |
1926 | 13 | size_t total_rowset_value_size = 0; |
1927 | 13 | size_t expired_rowset_size = 0; |
1928 | 13 | std::atomic_long num_recycled = 0; |
1929 | | |
1930 | 13 | RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""}; |
1931 | 13 | RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""}; |
1932 | 13 | std::string recyc_rs_key0; |
1933 | 13 | std::string recyc_rs_key1; |
1934 | 13 | recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0); |
1935 | 13 | recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1); |
1936 | | |
1937 | 13 | LOG_INFO("begin to recycle rowsets").tag("instance_id", instance_id_); |
1938 | | |
1939 | 13 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
1940 | 13 | register_recycle_task(task_name, start_time); |
1941 | | |
1942 | 13 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { |
1943 | 13 | unregister_recycle_task(task_name); |
1944 | 13 | int64_t cost = |
1945 | 13 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
1946 | 13 | LOG_INFO("recycle rowsets finished, cost={}s", cost) |
1947 | 13 | .tag("instance_id", instance_id_) |
1948 | 13 | .tag("num_scanned", num_scanned) |
1949 | 13 | .tag("num_expired", num_expired) |
1950 | 13 | .tag("num_recycled", num_recycled) |
1951 | 13 | .tag("num_recycled.prepare", num_prepare) |
1952 | 13 | .tag("num_recycled.compacted", num_compacted) |
1953 | 13 | .tag("num_recycled.empty_rowset", num_empty_rowset) |
1954 | 13 | .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size) |
1955 | 13 | .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size) |
1956 | 13 | .tag("expired_rowset_meta_size", expired_rowset_size); |
1957 | 13 | }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEPi Line | Count | Source | 1942 | 13 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { | 1943 | 13 | unregister_recycle_task(task_name); | 1944 | 13 | int64_t cost = | 1945 | 13 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 1946 | 13 | LOG_INFO("recycle rowsets finished, cost={}s", cost) | 1947 | 13 | .tag("instance_id", instance_id_) | 1948 | 13 | .tag("num_scanned", num_scanned) | 1949 | 13 | .tag("num_expired", num_expired) | 1950 | 13 | .tag("num_recycled", num_recycled) | 1951 | 13 | .tag("num_recycled.prepare", num_prepare) | 1952 | 13 | .tag("num_recycled.compacted", num_compacted) | 1953 | 13 | .tag("num_recycled.empty_rowset", num_empty_rowset) | 1954 | 13 | .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size) | 1955 | 13 | .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size) | 1956 | 13 | .tag("expired_rowset_meta_size", expired_rowset_size); | 1957 | 13 | }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEPi |
1958 | | |
1959 | 13 | std::vector<std::string> rowset_keys; |
1960 | 13 | std::vector<doris::RowsetMetaCloudPB> rowsets; |
1961 | | |
1962 | | // Store keys of rowset recycled by background workers |
1963 | 13 | std::mutex async_recycled_rowset_keys_mutex; |
1964 | 13 | std::vector<std::string> async_recycled_rowset_keys; |
1965 | 13 | auto worker_pool = std::make_unique<SimpleThreadPool>( |
1966 | 13 | config::instance_recycler_worker_pool_size, "recycle_rowsets"); |
1967 | 13 | worker_pool->start(); |
1968 | 13 | auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id, |
1969 | 900 | int64_t tablet_id, const std::string& rowset_id) { |
1970 | | // Try to delete rowset data in background thread |
1971 | 900 | int ret = worker_pool->submit_with_timeout( |
1972 | 900 | [&, resource_id, tablet_id, rowset_id, key]() mutable { |
1973 | 780 | if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) { |
1974 | 0 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(key); |
1975 | 0 | return; |
1976 | 0 | } |
1977 | 780 | std::vector<std::string> keys; |
1978 | 780 | { |
1979 | 780 | std::lock_guard lock(async_recycled_rowset_keys_mutex); |
1980 | 780 | async_recycled_rowset_keys.push_back(std::move(key)); |
1981 | 780 | if (async_recycled_rowset_keys.size() > 100) { |
1982 | 7 | keys.swap(async_recycled_rowset_keys); |
1983 | 7 | } |
1984 | 780 | } |
1985 | 780 | if (keys.empty()) return; |
1986 | 7 | if (txn_remove(txn_kv_.get(), keys) != 0) { |
1987 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" |
1988 | 0 | << instance_id_; |
1989 | 7 | } else { |
1990 | 7 | num_recycled.fetch_add(keys.size(), std::memory_order_relaxed); |
1991 | 7 | check_recycle_task(instance_id_, "recycle_rowsets", num_scanned, |
1992 | 7 | num_recycled, start_time); |
1993 | 7 | } |
1994 | 7 | }, recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv Line | Count | Source | 1972 | 780 | [&, resource_id, tablet_id, rowset_id, key]() mutable { | 1973 | 780 | if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) { | 1974 | 0 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(key); | 1975 | 0 | return; | 1976 | 0 | } | 1977 | 780 | std::vector<std::string> keys; | 1978 | 780 | { | 1979 | 780 | std::lock_guard lock(async_recycled_rowset_keys_mutex); | 1980 | 780 | async_recycled_rowset_keys.push_back(std::move(key)); | 1981 | 780 | if (async_recycled_rowset_keys.size() > 100) { | 1982 | 7 | keys.swap(async_recycled_rowset_keys); | 1983 | 7 | } | 1984 | 780 | } | 1985 | 780 | if (keys.empty()) return; | 1986 | 7 | if (txn_remove(txn_kv_.get(), keys) != 0) { | 1987 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" | 1988 | 0 | << instance_id_; | 1989 | 7 | } else { | 1990 | 7 | num_recycled.fetch_add(keys.size(), std::memory_order_relaxed); | 1991 | 7 | check_recycle_task(instance_id_, "recycle_rowsets", num_scanned, | 1992 | 7 | num_recycled, start_time); | 1993 | 7 | } | 1994 | 7 | }, |
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv |
1995 | 900 | 0); |
1996 | 900 | if (ret == 0) return 0; |
1997 | | // Submit task failed, delete rowset data in current thread |
1998 | 120 | if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) { |
1999 | 0 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(key); |
2000 | 0 | return -1; |
2001 | 0 | } |
2002 | 120 | rowset_keys.push_back(std::move(key)); |
2003 | 120 | return 0; |
2004 | 120 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ Line | Count | Source | 1969 | 900 | int64_t tablet_id, const std::string& rowset_id) { | 1970 | | // Try to delete rowset data in background thread | 1971 | 900 | int ret = worker_pool->submit_with_timeout( | 1972 | 900 | [&, resource_id, tablet_id, rowset_id, key]() mutable { | 1973 | 900 | if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) { | 1974 | 900 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(key); | 1975 | 900 | return; | 1976 | 900 | } | 1977 | 900 | std::vector<std::string> keys; | 1978 | 900 | { | 1979 | 900 | std::lock_guard lock(async_recycled_rowset_keys_mutex); | 1980 | 900 | async_recycled_rowset_keys.push_back(std::move(key)); | 1981 | 900 | if (async_recycled_rowset_keys.size() > 100) { | 1982 | 900 | keys.swap(async_recycled_rowset_keys); | 1983 | 900 | } | 1984 | 900 | } | 1985 | 900 | if (keys.empty()) return; | 1986 | 900 | if (txn_remove(txn_kv_.get(), keys) != 0) { | 1987 | 900 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" | 1988 | 900 | << instance_id_; | 1989 | 900 | } else { | 1990 | 900 | num_recycled.fetch_add(keys.size(), std::memory_order_relaxed); | 1991 | 900 | check_recycle_task(instance_id_, "recycle_rowsets", num_scanned, | 1992 | 900 | num_recycled, start_time); | 1993 | 900 | } | 1994 | 900 | }, | 1995 | 900 | 0); | 1996 | 900 | if (ret == 0) return 0; | 1997 | | // Submit task failed, delete rowset data in current thread | 1998 | 120 | if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) { | 1999 | 0 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(key); | 2000 | 0 | return -1; | 2001 | 0 | } | 2002 | 120 | rowset_keys.push_back(std::move(key)); | 2003 | 120 | return 0; | 2004 | 120 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ |
2005 | | |
2006 | 13 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
2007 | | |
2008 | 4.00k | auto calc_expiration = [&earlest_ts, this](const RecycleRowsetPB& rs) { |
2009 | 4.00k | if (config::force_immediate_recycle) { |
2010 | 0 | return 0L; |
2011 | 0 | } |
2012 | | // RecycleRowsetPB created by compacted or dropped rowset has no expiration time, and will be recycled when exceed retention time |
2013 | 4.00k | int64_t expiration = rs.expiration() > 0 ? rs.expiration() : rs.creation_time(); |
2014 | 4.00k | int64_t retention_seconds = config::retention_seconds; |
2015 | 4.00k | if (rs.type() == RecycleRowsetPB::COMPACT || rs.type() == RecycleRowsetPB::DROP) { |
2016 | 3.10k | retention_seconds = |
2017 | 3.10k | std::min(config::compacted_rowset_retention_seconds, retention_seconds); |
2018 | 3.10k | } |
2019 | 4.00k | int64_t final_expiration = expiration + retention_seconds; |
2020 | 4.00k | if (earlest_ts > final_expiration) { |
2021 | 2 | earlest_ts = final_expiration; |
2022 | 2 | g_bvar_recycler_recycle_rowset_earlest_ts.put(instance_id_, earlest_ts); |
2023 | 2 | } |
2024 | 4.00k | return final_expiration; |
2025 | 4.00k | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clERKNS0_15RecycleRowsetPBE Line | Count | Source | 2008 | 4.00k | auto calc_expiration = [&earlest_ts, this](const RecycleRowsetPB& rs) { | 2009 | 4.00k | if (config::force_immediate_recycle) { | 2010 | 0 | return 0L; | 2011 | 0 | } | 2012 | | // RecycleRowsetPB created by compacted or dropped rowset has no expiration time, and will be recycled when exceed retention time | 2013 | 4.00k | int64_t expiration = rs.expiration() > 0 ? rs.expiration() : rs.creation_time(); | 2014 | 4.00k | int64_t retention_seconds = config::retention_seconds; | 2015 | 4.00k | if (rs.type() == RecycleRowsetPB::COMPACT || rs.type() == RecycleRowsetPB::DROP) { | 2016 | 3.10k | retention_seconds = | 2017 | 3.10k | std::min(config::compacted_rowset_retention_seconds, retention_seconds); | 2018 | 3.10k | } | 2019 | 4.00k | int64_t final_expiration = expiration + retention_seconds; | 2020 | 4.00k | if (earlest_ts > final_expiration) { | 2021 | 2 | earlest_ts = final_expiration; | 2022 | 2 | g_bvar_recycler_recycle_rowset_earlest_ts.put(instance_id_, earlest_ts); | 2023 | 2 | } | 2024 | 4.00k | return final_expiration; | 2025 | 4.00k | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clERKNS0_15RecycleRowsetPBE |
2026 | | |
2027 | 4.00k | auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int { |
2028 | 4.00k | ++num_scanned; |
2029 | 4.00k | total_rowset_key_size += k.size(); |
2030 | 4.00k | total_rowset_value_size += v.size(); |
2031 | 4.00k | RecycleRowsetPB rowset; |
2032 | 4.00k | if (!rowset.ParseFromArray(v.data(), v.size())) { |
2033 | 0 | LOG_WARNING("malformed recycle rowset").tag("key", hex(k)); |
2034 | 0 | return -1; |
2035 | 0 | } |
2036 | | |
2037 | 4.00k | VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned |
2038 | 0 | << " num_expired=" << num_expired << " expiration=" << calc_expiration(rowset) |
2039 | 0 | << " RecycleRowsetPB=" << rowset.ShortDebugString(); |
2040 | 4.00k | int64_t current_time = ::time(nullptr); |
2041 | 4.00k | if (current_time < calc_expiration(rowset)) { // not expired |
2042 | 0 | return 0; |
2043 | 0 | } |
2044 | 4.00k | ++num_expired; |
2045 | 4.00k | expired_rowset_size += v.size(); |
2046 | 4.00k | if (!rowset.has_type()) { // old version `RecycleRowsetPB` |
2047 | 250 | if (!rowset.has_resource_id()) [[unlikely]] { // impossible |
2048 | | // in old version, keep this key-value pair and it needs to be checked manually |
2049 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); |
2050 | 0 | return -1; |
2051 | 0 | } |
2052 | 250 | if (rowset.resource_id().empty()) [[unlikely]] { |
2053 | | // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv. |
2054 | 0 | LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key=" |
2055 | 0 | << hex(k) << " value=" << proto_to_json(rowset); |
2056 | 0 | rowset_keys.emplace_back(k); |
2057 | 0 | return -1; |
2058 | 0 | } |
2059 | | // decode rowset_id |
2060 | 250 | auto k1 = k; |
2061 | 250 | k1.remove_prefix(1); |
2062 | 250 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
2063 | 250 | decode_key(&k1, &out); |
2064 | | // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB |
2065 | 250 | const auto& rowset_id = std::get<std::string>(std::get<0>(out[4])); |
2066 | 250 | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ |
2067 | 250 | << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id; |
2068 | 250 | if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(), |
2069 | 250 | rowset.tablet_id(), rowset_id) != 0) { |
2070 | 0 | return -1; |
2071 | 0 | } |
2072 | 250 | return 0; |
2073 | 250 | } |
2074 | | // TODO(plat1ko): check rowset not referenced |
2075 | 3.75k | auto rowset_meta = rowset.mutable_rowset_meta(); |
2076 | 3.75k | if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible |
2077 | 0 | if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) { |
2078 | 0 | LOG_INFO("recycle rowset that has empty resource id"); |
2079 | 0 | } else { |
2080 | | // other situations, keep this key-value pair and it needs to be checked manually |
2081 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); |
2082 | 0 | return -1; |
2083 | 0 | } |
2084 | 0 | } |
2085 | 3.75k | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ |
2086 | 3.75k | << " tablet_id=" << rowset_meta->tablet_id() |
2087 | 3.75k | << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=[" |
2088 | 3.75k | << rowset_meta->start_version() << '-' << rowset_meta->end_version() |
2089 | 3.75k | << "] txn_id=" << rowset_meta->txn_id() |
2090 | 3.75k | << " type=" << RecycleRowsetPB_Type_Name(rowset.type()) |
2091 | 3.75k | << " rowset_meta_size=" << v.size() |
2092 | 3.75k | << " creation_time=" << rowset_meta->creation_time(); |
2093 | 3.75k | if (rowset.type() == RecycleRowsetPB::PREPARE) { |
2094 | | // unable to calculate file path, can only be deleted by rowset id prefix |
2095 | 650 | num_prepare += 1; |
2096 | 650 | if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(), |
2097 | 650 | rowset_meta->tablet_id(), |
2098 | 650 | rowset_meta->rowset_id_v2()) != 0) { |
2099 | 0 | return -1; |
2100 | 0 | } |
2101 | 3.10k | } else { |
2102 | 3.10k | num_compacted += rowset.type() == RecycleRowsetPB::COMPACT; |
2103 | 3.10k | rowset_keys.emplace_back(k); |
2104 | 3.10k | if (rowset_meta->num_segments() > 0) { // Skip empty rowset |
2105 | 3.10k | rowsets.push_back(std::move(*rowset_meta)); |
2106 | 3.10k | } else { |
2107 | 0 | ++num_empty_rowset; |
2108 | 0 | } |
2109 | 3.10k | } |
2110 | 3.75k | return 0; |
2111 | 3.75k | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 2027 | 4.00k | auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int { | 2028 | 4.00k | ++num_scanned; | 2029 | 4.00k | total_rowset_key_size += k.size(); | 2030 | 4.00k | total_rowset_value_size += v.size(); | 2031 | 4.00k | RecycleRowsetPB rowset; | 2032 | 4.00k | if (!rowset.ParseFromArray(v.data(), v.size())) { | 2033 | 0 | LOG_WARNING("malformed recycle rowset").tag("key", hex(k)); | 2034 | 0 | return -1; | 2035 | 0 | } | 2036 | | | 2037 | 4.00k | VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned | 2038 | 0 | << " num_expired=" << num_expired << " expiration=" << calc_expiration(rowset) | 2039 | 0 | << " RecycleRowsetPB=" << rowset.ShortDebugString(); | 2040 | 4.00k | int64_t current_time = ::time(nullptr); | 2041 | 4.00k | if (current_time < calc_expiration(rowset)) { // not expired | 2042 | 0 | return 0; | 2043 | 0 | } | 2044 | 4.00k | ++num_expired; | 2045 | 4.00k | expired_rowset_size += v.size(); | 2046 | 4.00k | if (!rowset.has_type()) { // old version `RecycleRowsetPB` | 2047 | 250 | if (!rowset.has_resource_id()) [[unlikely]] { // impossible | 2048 | | // in old version, keep this key-value pair and it needs to be checked manually | 2049 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); | 2050 | 0 | return -1; | 2051 | 0 | } | 2052 | 250 | if (rowset.resource_id().empty()) [[unlikely]] { | 2053 | | // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv. | 2054 | 0 | LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key=" | 2055 | 0 | << hex(k) << " value=" << proto_to_json(rowset); | 2056 | 0 | rowset_keys.emplace_back(k); | 2057 | 0 | return -1; | 2058 | 0 | } | 2059 | | // decode rowset_id | 2060 | 250 | auto k1 = k; | 2061 | 250 | k1.remove_prefix(1); | 2062 | 250 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 2063 | 250 | decode_key(&k1, &out); | 2064 | | // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB | 2065 | 250 | const auto& rowset_id = std::get<std::string>(std::get<0>(out[4])); | 2066 | 250 | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ | 2067 | 250 | << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id; | 2068 | 250 | if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(), | 2069 | 250 | rowset.tablet_id(), rowset_id) != 0) { | 2070 | 0 | return -1; | 2071 | 0 | } | 2072 | 250 | return 0; | 2073 | 250 | } | 2074 | | // TODO(plat1ko): check rowset not referenced | 2075 | 3.75k | auto rowset_meta = rowset.mutable_rowset_meta(); | 2076 | 3.75k | if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible | 2077 | 0 | if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) { | 2078 | 0 | LOG_INFO("recycle rowset that has empty resource id"); | 2079 | 0 | } else { | 2080 | | // other situations, keep this key-value pair and it needs to be checked manually | 2081 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); | 2082 | 0 | return -1; | 2083 | 0 | } | 2084 | 0 | } | 2085 | 3.75k | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ | 2086 | 3.75k | << " tablet_id=" << rowset_meta->tablet_id() | 2087 | 3.75k | << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=[" | 2088 | 3.75k | << rowset_meta->start_version() << '-' << rowset_meta->end_version() | 2089 | 3.75k | << "] txn_id=" << rowset_meta->txn_id() | 2090 | 3.75k | << " type=" << RecycleRowsetPB_Type_Name(rowset.type()) | 2091 | 3.75k | << " rowset_meta_size=" << v.size() | 2092 | 3.75k | << " creation_time=" << rowset_meta->creation_time(); | 2093 | 3.75k | if (rowset.type() == RecycleRowsetPB::PREPARE) { | 2094 | | // unable to calculate file path, can only be deleted by rowset id prefix | 2095 | 650 | num_prepare += 1; | 2096 | 650 | if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(), | 2097 | 650 | rowset_meta->tablet_id(), | 2098 | 650 | rowset_meta->rowset_id_v2()) != 0) { | 2099 | 0 | return -1; | 2100 | 0 | } | 2101 | 3.10k | } else { | 2102 | 3.10k | num_compacted += rowset.type() == RecycleRowsetPB::COMPACT; | 2103 | 3.10k | rowset_keys.emplace_back(k); | 2104 | 3.10k | if (rowset_meta->num_segments() > 0) { // Skip empty rowset | 2105 | 3.10k | rowsets.push_back(std::move(*rowset_meta)); | 2106 | 3.10k | } else { | 2107 | 0 | ++num_empty_rowset; | 2108 | 0 | } | 2109 | 3.10k | } | 2110 | 3.75k | return 0; | 2111 | 3.75k | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
2112 | | |
2113 | 21 | auto loop_done = [&]() -> int { |
2114 | 21 | std::vector<std::string> rowset_keys_to_delete; |
2115 | 21 | std::vector<doris::RowsetMetaCloudPB> rowsets_to_delete; |
2116 | 21 | rowset_keys_to_delete.swap(rowset_keys); |
2117 | 21 | rowsets_to_delete.swap(rowsets); |
2118 | 21 | worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete), |
2119 | 21 | rowsets_to_delete = std::move(rowsets_to_delete)]() { |
2120 | 21 | if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET) != 0) { |
2121 | 0 | LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_; |
2122 | 0 | return; |
2123 | 0 | } |
2124 | 21 | if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) { |
2125 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_; |
2126 | 0 | return; |
2127 | 0 | } |
2128 | 21 | num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed); |
2129 | 21 | }); recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv Line | Count | Source | 2119 | 21 | rowsets_to_delete = std::move(rowsets_to_delete)]() { | 2120 | 21 | if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET) != 0) { | 2121 | 0 | LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_; | 2122 | 0 | return; | 2123 | 0 | } | 2124 | 21 | if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) { | 2125 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_; | 2126 | 0 | return; | 2127 | 0 | } | 2128 | 21 | num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed); | 2129 | 21 | }); |
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv |
2130 | 21 | return 0; |
2131 | 21 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv Line | Count | Source | 2113 | 21 | auto loop_done = [&]() -> int { | 2114 | 21 | std::vector<std::string> rowset_keys_to_delete; | 2115 | 21 | std::vector<doris::RowsetMetaCloudPB> rowsets_to_delete; | 2116 | 21 | rowset_keys_to_delete.swap(rowset_keys); | 2117 | 21 | rowsets_to_delete.swap(rowsets); | 2118 | 21 | worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete), | 2119 | 21 | rowsets_to_delete = std::move(rowsets_to_delete)]() { | 2120 | 21 | if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET) != 0) { | 2121 | 21 | LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_; | 2122 | 21 | return; | 2123 | 21 | } | 2124 | 21 | if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) { | 2125 | 21 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_; | 2126 | 21 | return; | 2127 | 21 | } | 2128 | 21 | num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed); | 2129 | 21 | }); | 2130 | 21 | return 0; | 2131 | 21 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv |
2132 | | |
2133 | 13 | int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv), |
2134 | 13 | std::move(loop_done)); |
2135 | 13 | worker_pool->stop(); |
2136 | | |
2137 | 13 | if (!async_recycled_rowset_keys.empty()) { |
2138 | 2 | if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) { |
2139 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_; |
2140 | 0 | return -1; |
2141 | 2 | } else { |
2142 | 2 | num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed); |
2143 | 2 | } |
2144 | 2 | } |
2145 | 13 | return ret; |
2146 | 13 | } |
2147 | | |
2148 | | bool is_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string& instance_id, |
2149 | 3.05k | int64_t txn_id) { |
2150 | 3.05k | std::unique_ptr<Transaction> txn; |
2151 | 3.05k | TxnErrorCode err = txn_kv->create_txn(&txn); |
2152 | 3.05k | if (err != TxnErrorCode::TXN_OK) { |
2153 | 0 | LOG(WARNING) << "failed to create txn, txn_id=" << txn_id << " instance_id=" << instance_id; |
2154 | 0 | return false; |
2155 | 0 | } |
2156 | | |
2157 | 3.05k | std::string index_val; |
2158 | 3.05k | const std::string index_key = txn_index_key({instance_id, txn_id}); |
2159 | 3.05k | err = txn->get(index_key, &index_val); |
2160 | 3.05k | if (err != TxnErrorCode::TXN_OK) { |
2161 | 3.03k | if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) { |
2162 | 3.03k | TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_has_been_recycled"); |
2163 | | // txn has been recycled; |
2164 | 3.03k | LOG(INFO) << "txn index key has been recycled, txn_id=" << txn_id |
2165 | 3.03k | << " instance_id=" << instance_id; |
2166 | 3.03k | return true; |
2167 | 3.03k | } |
2168 | 0 | LOG(WARNING) << "failed to get txn index key, txn_id=" << txn_id |
2169 | 0 | << " instance_id=" << instance_id << " key=" << hex(index_key) |
2170 | 0 | << " err=" << err; |
2171 | 0 | return false; |
2172 | 3.03k | } |
2173 | | |
2174 | 20 | TxnIndexPB index_pb; |
2175 | 20 | if (!index_pb.ParseFromString(index_val)) { |
2176 | 0 | LOG(WARNING) << "failed to parse txn_index_pb, txn_id=" << txn_id |
2177 | 0 | << " instance_id=" << instance_id; |
2178 | 0 | return false; |
2179 | 0 | } |
2180 | | |
2181 | 20 | DCHECK(index_pb.has_tablet_index() == true); |
2182 | 20 | if (!index_pb.tablet_index().has_db_id()) { |
2183 | | // In the previous version, the db_id was not set in the index_pb. |
2184 | | // If updating to the version which enable txn lazy commit, the db_id will be set. |
2185 | 0 | LOG(INFO) << "txn index has no db_id, txn_id=" << txn_id << " instance_id=" << instance_id |
2186 | 0 | << " index=" << index_pb.ShortDebugString(); |
2187 | 0 | return true; |
2188 | 0 | } |
2189 | | |
2190 | 20 | int64_t db_id = index_pb.tablet_index().db_id(); |
2191 | 20 | DCHECK_GT(db_id, 0) << "db_id=" << db_id << " txn_id=" << txn_id |
2192 | 0 | << " instance_id=" << instance_id; |
2193 | | |
2194 | 20 | std::string info_val; |
2195 | 20 | const std::string info_key = txn_info_key({instance_id, db_id, txn_id}); |
2196 | 20 | err = txn->get(info_key, &info_val); |
2197 | 20 | if (err != TxnErrorCode::TXN_OK) { |
2198 | 0 | if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) { |
2199 | | // txn info has been recycled; |
2200 | 0 | LOG(INFO) << "txn info key has been recycled, db_id=" << db_id << " txn_id=" << txn_id |
2201 | 0 | << " instance_id=" << instance_id; |
2202 | 0 | return true; |
2203 | 0 | } |
2204 | | |
2205 | 0 | DCHECK(err != TxnErrorCode::TXN_KEY_NOT_FOUND); |
2206 | 0 | LOG(WARNING) << "failed to get txn info key, txn_id=" << txn_id |
2207 | 0 | << " instance_id=" << instance_id << " key=" << hex(info_key) |
2208 | 0 | << " err=" << err; |
2209 | 0 | return false; |
2210 | 0 | } |
2211 | | |
2212 | 20 | TxnInfoPB txn_info; |
2213 | 20 | if (!txn_info.ParseFromString(info_val)) { |
2214 | 0 | LOG(WARNING) << "failed to parse txn_info, txn_id=" << txn_id |
2215 | 0 | << " instance_id=" << instance_id; |
2216 | 0 | return false; |
2217 | 0 | } |
2218 | | |
2219 | 20 | DCHECK(txn_info.txn_id() == txn_id) << "txn_id=" << txn_id << " instance_id=" << instance_id |
2220 | 0 | << " txn_info=" << txn_info.ShortDebugString(); |
2221 | | |
2222 | 20 | if (TxnStatusPB::TXN_STATUS_ABORTED == txn_info.status() || |
2223 | 20 | TxnStatusPB::TXN_STATUS_VISIBLE == txn_info.status()) { |
2224 | 10 | TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_has_been_aborted", &txn_info); |
2225 | 10 | return true; |
2226 | 10 | } |
2227 | | |
2228 | 10 | TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_not_finished", &txn_info); |
2229 | 10 | return false; |
2230 | 20 | } |
2231 | | |
2232 | 17 | int InstanceRecycler::recycle_tmp_rowsets() { |
2233 | 17 | const std::string task_name = "recycle_tmp_rowsets"; |
2234 | 17 | int64_t num_scanned = 0; |
2235 | 17 | int64_t num_expired = 0; |
2236 | 17 | int64_t num_recycled = 0; |
2237 | 17 | size_t expired_rowset_size = 0; |
2238 | 17 | size_t total_rowset_key_size = 0; |
2239 | 17 | size_t total_rowset_value_size = 0; |
2240 | | |
2241 | 17 | MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0}; |
2242 | 17 | MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0}; |
2243 | 17 | std::string tmp_rs_key0; |
2244 | 17 | std::string tmp_rs_key1; |
2245 | 17 | meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0); |
2246 | 17 | meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1); |
2247 | | |
2248 | 17 | LOG_INFO("begin to recycle tmp rowsets").tag("instance_id", instance_id_); |
2249 | | |
2250 | 17 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
2251 | 17 | register_recycle_task(task_name, start_time); |
2252 | | |
2253 | 17 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { |
2254 | 17 | unregister_recycle_task(task_name); |
2255 | 17 | int64_t cost = |
2256 | 17 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
2257 | 17 | LOG_INFO("recycle tmp rowsets finished, cost={}s", cost) |
2258 | 17 | .tag("instance_id", instance_id_) |
2259 | 17 | .tag("num_scanned", num_scanned) |
2260 | 17 | .tag("num_expired", num_expired) |
2261 | 17 | .tag("num_recycled", num_recycled) |
2262 | 17 | .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size) |
2263 | 17 | .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size) |
2264 | 17 | .tag("expired_rowset_meta_size_recycled", expired_rowset_size); |
2265 | 17 | }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEPi Line | Count | Source | 2253 | 13 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { | 2254 | 13 | unregister_recycle_task(task_name); | 2255 | 13 | int64_t cost = | 2256 | 13 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 2257 | 13 | LOG_INFO("recycle tmp rowsets finished, cost={}s", cost) | 2258 | 13 | .tag("instance_id", instance_id_) | 2259 | 13 | .tag("num_scanned", num_scanned) | 2260 | 13 | .tag("num_expired", num_expired) | 2261 | 13 | .tag("num_recycled", num_recycled) | 2262 | 13 | .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size) | 2263 | 13 | .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size) | 2264 | 13 | .tag("expired_rowset_meta_size_recycled", expired_rowset_size); | 2265 | 13 | }); |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEPi Line | Count | Source | 2253 | 4 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { | 2254 | 4 | unregister_recycle_task(task_name); | 2255 | 4 | int64_t cost = | 2256 | 4 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 2257 | 4 | LOG_INFO("recycle tmp rowsets finished, cost={}s", cost) | 2258 | 4 | .tag("instance_id", instance_id_) | 2259 | 4 | .tag("num_scanned", num_scanned) | 2260 | 4 | .tag("num_expired", num_expired) | 2261 | 4 | .tag("num_recycled", num_recycled) | 2262 | 4 | .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size) | 2263 | 4 | .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size) | 2264 | 4 | .tag("expired_rowset_meta_size_recycled", expired_rowset_size); | 2265 | 4 | }); |
|
2266 | | |
2267 | | // Elements in `tmp_rowset_keys` has the same lifetime as `it` |
2268 | 17 | std::vector<std::string_view> tmp_rowset_keys; |
2269 | 17 | std::vector<doris::RowsetMetaCloudPB> tmp_rowsets; |
2270 | | |
2271 | 17 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
2272 | 3.05k | auto calc_expiration = [&earlest_ts, this](const doris::RowsetMetaCloudPB& rowset) { |
2273 | | // ATTN: `txn_expiration` should > 0, however we use `creation_time` + a large `retention_time` (> 1 day in production environment) |
2274 | | // when `txn_expiration` <= 0 in some unexpected situation (usually when there are bugs). This is usually safe, coz loading |
2275 | | // duration or timeout always < `retention_time` in practice. |
2276 | 3.05k | int64_t expiration = |
2277 | 3.05k | rowset.txn_expiration() > 0 ? rowset.txn_expiration() : rowset.creation_time(); |
2278 | 3.05k | expiration = config::force_immediate_recycle ? 0 : expiration; |
2279 | 3.05k | int64_t final_expiration = expiration + config::retention_seconds; |
2280 | 3.05k | if (earlest_ts > final_expiration) { |
2281 | 6 | earlest_ts = final_expiration; |
2282 | 6 | g_bvar_recycler_recycle_tmp_rowset_earlest_ts.put(instance_id_, earlest_ts); |
2283 | 6 | } |
2284 | 3.05k | return final_expiration; |
2285 | 3.05k | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clERKNS_17RowsetMetaCloudPBE Line | Count | Source | 2272 | 3.02k | auto calc_expiration = [&earlest_ts, this](const doris::RowsetMetaCloudPB& rowset) { | 2273 | | // ATTN: `txn_expiration` should > 0, however we use `creation_time` + a large `retention_time` (> 1 day in production environment) | 2274 | | // when `txn_expiration` <= 0 in some unexpected situation (usually when there are bugs). This is usually safe, coz loading | 2275 | | // duration or timeout always < `retention_time` in practice. | 2276 | 3.02k | int64_t expiration = | 2277 | 3.02k | rowset.txn_expiration() > 0 ? rowset.txn_expiration() : rowset.creation_time(); | 2278 | 3.02k | expiration = config::force_immediate_recycle ? 0 : expiration; | 2279 | 3.02k | int64_t final_expiration = expiration + config::retention_seconds; | 2280 | 3.02k | if (earlest_ts > final_expiration) { | 2281 | 3 | earlest_ts = final_expiration; | 2282 | 3 | g_bvar_recycler_recycle_tmp_rowset_earlest_ts.put(instance_id_, earlest_ts); | 2283 | 3 | } | 2284 | 3.02k | return final_expiration; | 2285 | 3.02k | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clERKNS_17RowsetMetaCloudPBE Line | Count | Source | 2272 | 30 | auto calc_expiration = [&earlest_ts, this](const doris::RowsetMetaCloudPB& rowset) { | 2273 | | // ATTN: `txn_expiration` should > 0, however we use `creation_time` + a large `retention_time` (> 1 day in production environment) | 2274 | | // when `txn_expiration` <= 0 in some unexpected situation (usually when there are bugs). This is usually safe, coz loading | 2275 | | // duration or timeout always < `retention_time` in practice. | 2276 | 30 | int64_t expiration = | 2277 | 30 | rowset.txn_expiration() > 0 ? rowset.txn_expiration() : rowset.creation_time(); | 2278 | 30 | expiration = config::force_immediate_recycle ? 0 : expiration; | 2279 | 30 | int64_t final_expiration = expiration + config::retention_seconds; | 2280 | 30 | if (earlest_ts > final_expiration) { | 2281 | 3 | earlest_ts = final_expiration; | 2282 | 3 | g_bvar_recycler_recycle_tmp_rowset_earlest_ts.put(instance_id_, earlest_ts); | 2283 | 3 | } | 2284 | 30 | return final_expiration; | 2285 | 30 | }; |
|
2286 | | |
2287 | 17 | auto handle_rowset_kv = [&num_scanned, &num_expired, &tmp_rowset_keys, &tmp_rowsets, |
2288 | 17 | &expired_rowset_size, &total_rowset_key_size, &total_rowset_value_size, |
2289 | 17 | &calc_expiration, |
2290 | 3.05k | this](std::string_view k, std::string_view v) -> int { |
2291 | 3.05k | ++num_scanned; |
2292 | 3.05k | total_rowset_key_size += k.size(); |
2293 | 3.05k | total_rowset_value_size += v.size(); |
2294 | 3.05k | doris::RowsetMetaCloudPB rowset; |
2295 | 3.05k | if (!rowset.ParseFromArray(v.data(), v.size())) { |
2296 | 0 | LOG_WARNING("malformed rowset meta").tag("key", hex(k)); |
2297 | 0 | return -1; |
2298 | 0 | } |
2299 | 3.05k | int64_t expiration = calc_expiration(rowset); |
2300 | 3.05k | VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned |
2301 | 0 | << " num_expired=" << num_expired << " expiration=" << expiration |
2302 | 0 | << " txn_expiration=" << rowset.txn_expiration() |
2303 | 0 | << " rowset_creation_time=" << rowset.creation_time(); |
2304 | 3.05k | int64_t current_time = ::time(nullptr); |
2305 | 3.05k | if (current_time < expiration) { // not expired |
2306 | 0 | return 0; |
2307 | 0 | } |
2308 | | |
2309 | 3.05k | DCHECK_GT(rowset.txn_id(), 0) |
2310 | 0 | << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString(); |
2311 | 3.05k | if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) { |
2312 | 10 | LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id=" |
2313 | 10 | << instance_id_ << " tablet_id=" << rowset.tablet_id() |
2314 | 10 | << " rowset_id=" << rowset.rowset_id_v2() << " version=[" |
2315 | 10 | << rowset.start_version() << '-' << rowset.end_version() |
2316 | 10 | << "] txn_id=" << rowset.txn_id() |
2317 | 10 | << " creation_time=" << rowset.creation_time() << " expiration=" << expiration |
2318 | 10 | << " txn_expiration=" << rowset.txn_expiration(); |
2319 | 10 | return 0; |
2320 | 10 | } |
2321 | | |
2322 | 3.04k | ++num_expired; |
2323 | 3.04k | expired_rowset_size += v.size(); |
2324 | 3.04k | if (!rowset.has_resource_id()) { |
2325 | 20 | if (rowset.num_segments() > 0) [[unlikely]] { // impossible |
2326 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", k); |
2327 | 0 | return -1; |
2328 | 0 | } |
2329 | | // might be a delete pred rowset |
2330 | 20 | tmp_rowset_keys.push_back(k); |
2331 | 20 | return 0; |
2332 | 20 | } |
2333 | | // TODO(plat1ko): check rowset not referenced |
2334 | 3.02k | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ |
2335 | 3.02k | << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2() |
2336 | 3.02k | << " version=[" << rowset.start_version() << '-' << rowset.end_version() |
2337 | 3.02k | << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size() |
2338 | 3.02k | << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned |
2339 | 3.02k | << " num_expired=" << num_expired; |
2340 | | |
2341 | 3.02k | tmp_rowset_keys.push_back(k); |
2342 | 3.02k | if (rowset.num_segments() > 0) { // Skip empty rowset |
2343 | 3.02k | tmp_rowsets.push_back(std::move(rowset)); |
2344 | 3.02k | } |
2345 | 3.02k | return 0; |
2346 | 3.04k | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 2290 | 3.02k | this](std::string_view k, std::string_view v) -> int { | 2291 | 3.02k | ++num_scanned; | 2292 | 3.02k | total_rowset_key_size += k.size(); | 2293 | 3.02k | total_rowset_value_size += v.size(); | 2294 | 3.02k | doris::RowsetMetaCloudPB rowset; | 2295 | 3.02k | if (!rowset.ParseFromArray(v.data(), v.size())) { | 2296 | 0 | LOG_WARNING("malformed rowset meta").tag("key", hex(k)); | 2297 | 0 | return -1; | 2298 | 0 | } | 2299 | 3.02k | int64_t expiration = calc_expiration(rowset); | 2300 | 3.02k | VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned | 2301 | 0 | << " num_expired=" << num_expired << " expiration=" << expiration | 2302 | 0 | << " txn_expiration=" << rowset.txn_expiration() | 2303 | 0 | << " rowset_creation_time=" << rowset.creation_time(); | 2304 | 3.02k | int64_t current_time = ::time(nullptr); | 2305 | 3.02k | if (current_time < expiration) { // not expired | 2306 | 0 | return 0; | 2307 | 0 | } | 2308 | | | 2309 | 3.02k | DCHECK_GT(rowset.txn_id(), 0) | 2310 | 0 | << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString(); | 2311 | 3.02k | if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) { | 2312 | 0 | LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id=" | 2313 | 0 | << instance_id_ << " tablet_id=" << rowset.tablet_id() | 2314 | 0 | << " rowset_id=" << rowset.rowset_id_v2() << " version=[" | 2315 | 0 | << rowset.start_version() << '-' << rowset.end_version() | 2316 | 0 | << "] txn_id=" << rowset.txn_id() | 2317 | 0 | << " creation_time=" << rowset.creation_time() << " expiration=" << expiration | 2318 | 0 | << " txn_expiration=" << rowset.txn_expiration(); | 2319 | 0 | return 0; | 2320 | 0 | } | 2321 | | | 2322 | 3.02k | ++num_expired; | 2323 | 3.02k | expired_rowset_size += v.size(); | 2324 | 3.02k | if (!rowset.has_resource_id()) { | 2325 | 0 | if (rowset.num_segments() > 0) [[unlikely]] { // impossible | 2326 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", k); | 2327 | 0 | return -1; | 2328 | 0 | } | 2329 | | // might be a delete pred rowset | 2330 | 0 | tmp_rowset_keys.push_back(k); | 2331 | 0 | return 0; | 2332 | 0 | } | 2333 | | // TODO(plat1ko): check rowset not referenced | 2334 | 3.02k | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ | 2335 | 3.02k | << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2() | 2336 | 3.02k | << " version=[" << rowset.start_version() << '-' << rowset.end_version() | 2337 | 3.02k | << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size() | 2338 | 3.02k | << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned | 2339 | 3.02k | << " num_expired=" << num_expired; | 2340 | | | 2341 | 3.02k | tmp_rowset_keys.push_back(k); | 2342 | 3.02k | if (rowset.num_segments() > 0) { // Skip empty rowset | 2343 | 3.02k | tmp_rowsets.push_back(std::move(rowset)); | 2344 | 3.02k | } | 2345 | 3.02k | return 0; | 2346 | 3.02k | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 2290 | 30 | this](std::string_view k, std::string_view v) -> int { | 2291 | 30 | ++num_scanned; | 2292 | 30 | total_rowset_key_size += k.size(); | 2293 | 30 | total_rowset_value_size += v.size(); | 2294 | 30 | doris::RowsetMetaCloudPB rowset; | 2295 | 30 | if (!rowset.ParseFromArray(v.data(), v.size())) { | 2296 | 0 | LOG_WARNING("malformed rowset meta").tag("key", hex(k)); | 2297 | 0 | return -1; | 2298 | 0 | } | 2299 | 30 | int64_t expiration = calc_expiration(rowset); | 2300 | 30 | VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned | 2301 | 0 | << " num_expired=" << num_expired << " expiration=" << expiration | 2302 | 0 | << " txn_expiration=" << rowset.txn_expiration() | 2303 | 0 | << " rowset_creation_time=" << rowset.creation_time(); | 2304 | 30 | int64_t current_time = ::time(nullptr); | 2305 | 30 | if (current_time < expiration) { // not expired | 2306 | 0 | return 0; | 2307 | 0 | } | 2308 | | | 2309 | 30 | DCHECK_GT(rowset.txn_id(), 0) | 2310 | 0 | << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString(); | 2311 | 30 | if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) { | 2312 | 10 | LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id=" | 2313 | 10 | << instance_id_ << " tablet_id=" << rowset.tablet_id() | 2314 | 10 | << " rowset_id=" << rowset.rowset_id_v2() << " version=[" | 2315 | 10 | << rowset.start_version() << '-' << rowset.end_version() | 2316 | 10 | << "] txn_id=" << rowset.txn_id() | 2317 | 10 | << " creation_time=" << rowset.creation_time() << " expiration=" << expiration | 2318 | 10 | << " txn_expiration=" << rowset.txn_expiration(); | 2319 | 10 | return 0; | 2320 | 10 | } | 2321 | | | 2322 | 20 | ++num_expired; | 2323 | 20 | expired_rowset_size += v.size(); | 2324 | 20 | if (!rowset.has_resource_id()) { | 2325 | 20 | if (rowset.num_segments() > 0) [[unlikely]] { // impossible | 2326 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", k); | 2327 | 0 | return -1; | 2328 | 0 | } | 2329 | | // might be a delete pred rowset | 2330 | 20 | tmp_rowset_keys.push_back(k); | 2331 | 20 | return 0; | 2332 | 20 | } | 2333 | | // TODO(plat1ko): check rowset not referenced | 2334 | 0 | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ | 2335 | 0 | << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2() | 2336 | 0 | << " version=[" << rowset.start_version() << '-' << rowset.end_version() | 2337 | 0 | << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size() | 2338 | 0 | << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned | 2339 | 0 | << " num_expired=" << num_expired; | 2340 | |
| 2341 | 0 | tmp_rowset_keys.push_back(k); | 2342 | 0 | if (rowset.num_segments() > 0) { // Skip empty rowset | 2343 | 0 | tmp_rowsets.push_back(std::move(rowset)); | 2344 | 0 | } | 2345 | 0 | return 0; | 2346 | 20 | }; |
|
2347 | | |
2348 | 17 | auto loop_done = [&tmp_rowset_keys, &tmp_rowsets, &num_recycled, this]() -> int { |
2349 | 6 | std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) { |
2350 | 6 | tmp_rowset_keys.clear(); |
2351 | 6 | tmp_rowsets.clear(); |
2352 | 6 | }); recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEvENKUlPiE_clES3_ Line | Count | Source | 2349 | 3 | std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) { | 2350 | 3 | tmp_rowset_keys.clear(); | 2351 | 3 | tmp_rowsets.clear(); | 2352 | 3 | }); |
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEvENKUlPiE_clES3_ Line | Count | Source | 2349 | 3 | std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) { | 2350 | 3 | tmp_rowset_keys.clear(); | 2351 | 3 | tmp_rowsets.clear(); | 2352 | 3 | }); |
|
2353 | 6 | if (delete_rowset_data(tmp_rowsets, RowsetRecyclingState::TMP_ROWSET) != 0) { |
2354 | 0 | LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_; |
2355 | 0 | return -1; |
2356 | 0 | } |
2357 | 6 | if (txn_remove(txn_kv_.get(), tmp_rowset_keys) != 0) { |
2358 | 0 | LOG(WARNING) << "failed to delete tmp rowset kv, instance_id=" << instance_id_; |
2359 | 0 | return -1; |
2360 | 0 | } |
2361 | 6 | num_recycled += tmp_rowset_keys.size(); |
2362 | 6 | return 0; |
2363 | 6 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEv Line | Count | Source | 2348 | 3 | auto loop_done = [&tmp_rowset_keys, &tmp_rowsets, &num_recycled, this]() -> int { | 2349 | 3 | std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) { | 2350 | 3 | tmp_rowset_keys.clear(); | 2351 | 3 | tmp_rowsets.clear(); | 2352 | 3 | }); | 2353 | 3 | if (delete_rowset_data(tmp_rowsets, RowsetRecyclingState::TMP_ROWSET) != 0) { | 2354 | 0 | LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_; | 2355 | 0 | return -1; | 2356 | 0 | } | 2357 | 3 | if (txn_remove(txn_kv_.get(), tmp_rowset_keys) != 0) { | 2358 | 0 | LOG(WARNING) << "failed to delete tmp rowset kv, instance_id=" << instance_id_; | 2359 | 0 | return -1; | 2360 | 0 | } | 2361 | 3 | num_recycled += tmp_rowset_keys.size(); | 2362 | 3 | return 0; | 2363 | 3 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEv Line | Count | Source | 2348 | 3 | auto loop_done = [&tmp_rowset_keys, &tmp_rowsets, &num_recycled, this]() -> int { | 2349 | 3 | std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) { | 2350 | 3 | tmp_rowset_keys.clear(); | 2351 | 3 | tmp_rowsets.clear(); | 2352 | 3 | }); | 2353 | 3 | if (delete_rowset_data(tmp_rowsets, RowsetRecyclingState::TMP_ROWSET) != 0) { | 2354 | 0 | LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_; | 2355 | 0 | return -1; | 2356 | 0 | } | 2357 | 3 | if (txn_remove(txn_kv_.get(), tmp_rowset_keys) != 0) { | 2358 | 0 | LOG(WARNING) << "failed to delete tmp rowset kv, instance_id=" << instance_id_; | 2359 | 0 | return -1; | 2360 | 0 | } | 2361 | 3 | num_recycled += tmp_rowset_keys.size(); | 2362 | 3 | return 0; | 2363 | 3 | }; |
|
2364 | | |
2365 | 17 | return scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_rowset_kv), |
2366 | 17 | std::move(loop_done)); |
2367 | 17 | } |
2368 | | |
2369 | | int InstanceRecycler::scan_and_recycle( |
2370 | | std::string begin, std::string_view end, |
2371 | | std::function<int(std::string_view k, std::string_view v)> recycle_func, |
2372 | 174 | std::function<int()> loop_done) { |
2373 | 174 | LOG(INFO) << "begin scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) << ")"; |
2374 | 174 | int ret = 0; |
2375 | 174 | int64_t cnt = 0; |
2376 | 174 | int get_range_retried = 0; |
2377 | 174 | std::string err; |
2378 | 174 | std::unique_ptr<int, std::function<void(int*)>> defer_log( |
2379 | 174 | (int*)0x01, [begin, end, &err, &ret, &cnt, &get_range_retried](int*) { |
2380 | 174 | LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) |
2381 | 174 | << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried |
2382 | 174 | << " ret=" << ret << " err=" << err; |
2383 | 174 | }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEPi Line | Count | Source | 2379 | 155 | (int*)0x01, [begin, end, &err, &ret, &cnt, &get_range_retried](int*) { | 2380 | 155 | LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) | 2381 | 155 | << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried | 2382 | 155 | << " ret=" << ret << " err=" << err; | 2383 | 155 | }); |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEPi Line | Count | Source | 2379 | 19 | (int*)0x01, [begin, end, &err, &ret, &cnt, &get_range_retried](int*) { | 2380 | 19 | LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) | 2381 | 19 | << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried | 2382 | 19 | << " ret=" << ret << " err=" << err; | 2383 | 19 | }); |
|
2384 | | |
2385 | 174 | std::unique_ptr<RangeGetIterator> it; |
2386 | 194 | do { |
2387 | 194 | if (get_range_retried > 1000) { |
2388 | 0 | err = "txn_get exceeds max retry, may not scan all keys"; |
2389 | 0 | ret = -1; |
2390 | 0 | return -1; |
2391 | 0 | } |
2392 | 194 | int get_ret = txn_get(txn_kv_.get(), begin, end, it); |
2393 | 194 | if (get_ret != 0) { // txn kv may complain "Request for future version" |
2394 | 0 | LOG(WARNING) << "failed to get kv, range=[" << hex(begin) << "," << hex(end) |
2395 | 0 | << ") num_scanned=" << cnt << " txn_get_ret=" << get_ret |
2396 | 0 | << " get_range_retried=" << get_range_retried; |
2397 | 0 | ++get_range_retried; |
2398 | 0 | std::this_thread::sleep_for(std::chrono::milliseconds(500)); |
2399 | 0 | continue; // try again |
2400 | 0 | } |
2401 | 194 | if (!it->has_next()) { |
2402 | 91 | LOG(INFO) << "no keys in the given range=[" << hex(begin) << "," << hex(end) << ")"; |
2403 | 91 | break; // scan finished |
2404 | 91 | } |
2405 | 37.4k | while (it->has_next()) { |
2406 | 37.3k | ++cnt; |
2407 | | // recycle corresponding resources |
2408 | 37.3k | auto [k, v] = it->next(); |
2409 | 37.3k | if (!it->has_next()) { |
2410 | 102 | begin = k; |
2411 | 102 | VLOG_DEBUG << "iterator has no more kvs. key=" << hex(k); |
2412 | 102 | } |
2413 | | // if we want to continue scanning, the recycle_func should not return non-zero |
2414 | 37.3k | if (recycle_func(k, v) != 0) { |
2415 | 22 | err = "recycle_func error"; |
2416 | 22 | ret = -1; |
2417 | 22 | } |
2418 | 37.3k | } |
2419 | 103 | begin.push_back('\x00'); // Update to next smallest key for iteration |
2420 | | // if we want to continue scanning, the recycle_func should not return non-zero |
2421 | 103 | if (loop_done && loop_done() != 0) { |
2422 | 2 | err = "loop_done error"; |
2423 | 2 | ret = -1; |
2424 | 2 | } |
2425 | 103 | } while (it->more() && !stopped()); |
2426 | 174 | return ret; |
2427 | 174 | } |
2428 | | |
2429 | 20 | int InstanceRecycler::abort_timeout_txn() { |
2430 | 20 | const std::string task_name = "abort_timeout_txn"; |
2431 | 20 | int64_t num_scanned = 0; |
2432 | 20 | int64_t num_timeout = 0; |
2433 | 20 | int64_t num_abort = 0; |
2434 | 20 | int64_t num_advance = 0; |
2435 | | |
2436 | 20 | TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0}; |
2437 | 20 | TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX}; |
2438 | 20 | std::string begin_txn_running_key; |
2439 | 20 | std::string end_txn_running_key; |
2440 | 20 | txn_running_key(txn_running_key_info0, &begin_txn_running_key); |
2441 | 20 | txn_running_key(txn_running_key_info1, &end_txn_running_key); |
2442 | | |
2443 | 20 | LOG_INFO("begin to abort timeout txn").tag("instance_id", instance_id_); |
2444 | | |
2445 | 20 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
2446 | 20 | register_recycle_task(task_name, start_time); |
2447 | | |
2448 | 20 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { |
2449 | 20 | unregister_recycle_task(task_name); |
2450 | 20 | int64_t cost = |
2451 | 20 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
2452 | 20 | LOG_INFO("end to abort timeout txn, cost={}s", cost) |
2453 | 20 | .tag("instance_id", instance_id_) |
2454 | 20 | .tag("num_scanned", num_scanned) |
2455 | 20 | .tag("num_timeout", num_timeout) |
2456 | 20 | .tag("num_abort", num_abort) |
2457 | 20 | .tag("num_advance", num_advance); |
2458 | 20 | }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEPi Line | Count | Source | 2448 | 16 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { | 2449 | 16 | unregister_recycle_task(task_name); | 2450 | 16 | int64_t cost = | 2451 | 16 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 2452 | 16 | LOG_INFO("end to abort timeout txn, cost={}s", cost) | 2453 | 16 | .tag("instance_id", instance_id_) | 2454 | 16 | .tag("num_scanned", num_scanned) | 2455 | 16 | .tag("num_timeout", num_timeout) | 2456 | 16 | .tag("num_abort", num_abort) | 2457 | 16 | .tag("num_advance", num_advance); | 2458 | 16 | }); |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEPi Line | Count | Source | 2448 | 4 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { | 2449 | 4 | unregister_recycle_task(task_name); | 2450 | 4 | int64_t cost = | 2451 | 4 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 2452 | 4 | LOG_INFO("end to abort timeout txn, cost={}s", cost) | 2453 | 4 | .tag("instance_id", instance_id_) | 2454 | 4 | .tag("num_scanned", num_scanned) | 2455 | 4 | .tag("num_timeout", num_timeout) | 2456 | 4 | .tag("num_abort", num_abort) | 2457 | 4 | .tag("num_advance", num_advance); | 2458 | 4 | }); |
|
2459 | | |
2460 | 20 | int64_t current_time = |
2461 | 20 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
2462 | | |
2463 | 20 | auto handle_txn_running_kv = [&num_scanned, &num_timeout, &num_abort, &num_advance, |
2464 | 20 | ¤t_time, |
2465 | 20 | this](std::string_view k, std::string_view v) -> int { |
2466 | 10 | ++num_scanned; |
2467 | | |
2468 | 10 | std::unique_ptr<Transaction> txn; |
2469 | 10 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
2470 | 10 | if (err != TxnErrorCode::TXN_OK) { |
2471 | 0 | LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k)); |
2472 | 0 | return -1; |
2473 | 0 | } |
2474 | 10 | std::string_view k1 = k; |
2475 | | //TxnRunningKeyInfo 0:instance_id 1:db_id 2:txn_id |
2476 | 10 | k1.remove_prefix(1); // Remove key space |
2477 | 10 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
2478 | 10 | if (decode_key(&k1, &out) != 0) { |
2479 | 0 | LOG_ERROR("failed to decode key").tag("key", hex(k)); |
2480 | 0 | return -1; |
2481 | 0 | } |
2482 | 10 | int64_t db_id = std::get<int64_t>(std::get<0>(out[3])); |
2483 | 10 | int64_t txn_id = std::get<int64_t>(std::get<0>(out[4])); |
2484 | 10 | VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id; |
2485 | | // Update txn_info |
2486 | 10 | std::string txn_inf_key, txn_inf_val; |
2487 | 10 | txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key); |
2488 | 10 | err = txn->get(txn_inf_key, &txn_inf_val); |
2489 | 10 | if (err != TxnErrorCode::TXN_OK) { |
2490 | 0 | LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key)); |
2491 | 0 | return -1; |
2492 | 0 | } |
2493 | 10 | TxnInfoPB txn_info; |
2494 | 10 | if (!txn_info.ParseFromString(txn_inf_val)) { |
2495 | 0 | LOG_WARNING("failed to parse txn info").tag("key", hex(k)); |
2496 | 0 | return -1; |
2497 | 0 | } |
2498 | | |
2499 | 10 | if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) { |
2500 | 4 | txn.reset(); |
2501 | 4 | TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info); |
2502 | 4 | std::shared_ptr<TxnLazyCommitTask> task = |
2503 | 4 | txn_lazy_committer_->submit(instance_id_, txn_info.txn_id()); |
2504 | 4 | std::pair<MetaServiceCode, std::string> ret = task->wait(); |
2505 | 4 | if (ret.first != MetaServiceCode::OK) { |
2506 | 0 | LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first |
2507 | 0 | << "msg=" << ret.second; |
2508 | 0 | return -1; |
2509 | 0 | } |
2510 | 4 | ++num_advance; |
2511 | 4 | return 0; |
2512 | 6 | } else { |
2513 | 6 | TxnRunningPB txn_running_pb; |
2514 | 6 | if (!txn_running_pb.ParseFromArray(v.data(), v.size())) { |
2515 | 0 | LOG_WARNING("malformed txn_running_pb").tag("key", hex(k)); |
2516 | 0 | return -1; |
2517 | 0 | } |
2518 | 6 | if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) { |
2519 | 4 | return 0; |
2520 | 4 | } |
2521 | 2 | ++num_timeout; |
2522 | | |
2523 | 2 | DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE); |
2524 | 2 | txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED); |
2525 | 2 | txn_info.set_finish_time(current_time); |
2526 | 2 | txn_info.set_reason("timeout"); |
2527 | 2 | VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString(); |
2528 | 2 | txn_inf_val.clear(); |
2529 | 2 | if (!txn_info.SerializeToString(&txn_inf_val)) { |
2530 | 0 | LOG_WARNING("failed to serialize txn info").tag("key", hex(k)); |
2531 | 0 | return -1; |
2532 | 0 | } |
2533 | 2 | txn->put(txn_inf_key, txn_inf_val); |
2534 | 2 | VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key); |
2535 | | // Put recycle txn key |
2536 | 2 | std::string recyc_txn_key, recyc_txn_val; |
2537 | 2 | recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key); |
2538 | 2 | RecycleTxnPB recycle_txn_pb; |
2539 | 2 | recycle_txn_pb.set_creation_time(current_time); |
2540 | 2 | recycle_txn_pb.set_label(txn_info.label()); |
2541 | 2 | if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) { |
2542 | 0 | LOG_WARNING("failed to serialize txn recycle info") |
2543 | 0 | .tag("key", hex(k)) |
2544 | 0 | .tag("db_id", db_id) |
2545 | 0 | .tag("txn_id", txn_id); |
2546 | 0 | return -1; |
2547 | 0 | } |
2548 | 2 | txn->put(recyc_txn_key, recyc_txn_val); |
2549 | | // Remove txn running key |
2550 | 2 | txn->remove(k); |
2551 | 2 | err = txn->commit(); |
2552 | 2 | if (err != TxnErrorCode::TXN_OK) { |
2553 | 0 | LOG_WARNING("failed to commit txn err={}", err) |
2554 | 0 | .tag("key", hex(k)) |
2555 | 0 | .tag("db_id", db_id) |
2556 | 0 | .tag("txn_id", txn_id); |
2557 | 0 | return -1; |
2558 | 0 | } |
2559 | 2 | ++num_abort; |
2560 | 2 | } |
2561 | | |
2562 | 2 | return 0; |
2563 | 10 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 2465 | 6 | this](std::string_view k, std::string_view v) -> int { | 2466 | 6 | ++num_scanned; | 2467 | | | 2468 | 6 | std::unique_ptr<Transaction> txn; | 2469 | 6 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 2470 | 6 | if (err != TxnErrorCode::TXN_OK) { | 2471 | 0 | LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k)); | 2472 | 0 | return -1; | 2473 | 0 | } | 2474 | 6 | std::string_view k1 = k; | 2475 | | //TxnRunningKeyInfo 0:instance_id 1:db_id 2:txn_id | 2476 | 6 | k1.remove_prefix(1); // Remove key space | 2477 | 6 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 2478 | 6 | if (decode_key(&k1, &out) != 0) { | 2479 | 0 | LOG_ERROR("failed to decode key").tag("key", hex(k)); | 2480 | 0 | return -1; | 2481 | 0 | } | 2482 | 6 | int64_t db_id = std::get<int64_t>(std::get<0>(out[3])); | 2483 | 6 | int64_t txn_id = std::get<int64_t>(std::get<0>(out[4])); | 2484 | 6 | VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id; | 2485 | | // Update txn_info | 2486 | 6 | std::string txn_inf_key, txn_inf_val; | 2487 | 6 | txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key); | 2488 | 6 | err = txn->get(txn_inf_key, &txn_inf_val); | 2489 | 6 | if (err != TxnErrorCode::TXN_OK) { | 2490 | 0 | LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key)); | 2491 | 0 | return -1; | 2492 | 0 | } | 2493 | 6 | TxnInfoPB txn_info; | 2494 | 6 | if (!txn_info.ParseFromString(txn_inf_val)) { | 2495 | 0 | LOG_WARNING("failed to parse txn info").tag("key", hex(k)); | 2496 | 0 | return -1; | 2497 | 0 | } | 2498 | | | 2499 | 6 | if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) { | 2500 | 0 | txn.reset(); | 2501 | 0 | TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info); | 2502 | 0 | std::shared_ptr<TxnLazyCommitTask> task = | 2503 | 0 | txn_lazy_committer_->submit(instance_id_, txn_info.txn_id()); | 2504 | 0 | std::pair<MetaServiceCode, std::string> ret = task->wait(); | 2505 | 0 | if (ret.first != MetaServiceCode::OK) { | 2506 | 0 | LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first | 2507 | 0 | << "msg=" << ret.second; | 2508 | 0 | return -1; | 2509 | 0 | } | 2510 | 0 | ++num_advance; | 2511 | 0 | return 0; | 2512 | 6 | } else { | 2513 | 6 | TxnRunningPB txn_running_pb; | 2514 | 6 | if (!txn_running_pb.ParseFromArray(v.data(), v.size())) { | 2515 | 0 | LOG_WARNING("malformed txn_running_pb").tag("key", hex(k)); | 2516 | 0 | return -1; | 2517 | 0 | } | 2518 | 6 | if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) { | 2519 | 4 | return 0; | 2520 | 4 | } | 2521 | 2 | ++num_timeout; | 2522 | | | 2523 | 2 | DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE); | 2524 | 2 | txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED); | 2525 | 2 | txn_info.set_finish_time(current_time); | 2526 | 2 | txn_info.set_reason("timeout"); | 2527 | 2 | VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString(); | 2528 | 2 | txn_inf_val.clear(); | 2529 | 2 | if (!txn_info.SerializeToString(&txn_inf_val)) { | 2530 | 0 | LOG_WARNING("failed to serialize txn info").tag("key", hex(k)); | 2531 | 0 | return -1; | 2532 | 0 | } | 2533 | 2 | txn->put(txn_inf_key, txn_inf_val); | 2534 | 2 | VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key); | 2535 | | // Put recycle txn key | 2536 | 2 | std::string recyc_txn_key, recyc_txn_val; | 2537 | 2 | recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key); | 2538 | 2 | RecycleTxnPB recycle_txn_pb; | 2539 | 2 | recycle_txn_pb.set_creation_time(current_time); | 2540 | 2 | recycle_txn_pb.set_label(txn_info.label()); | 2541 | 2 | if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) { | 2542 | 0 | LOG_WARNING("failed to serialize txn recycle info") | 2543 | 0 | .tag("key", hex(k)) | 2544 | 0 | .tag("db_id", db_id) | 2545 | 0 | .tag("txn_id", txn_id); | 2546 | 0 | return -1; | 2547 | 0 | } | 2548 | 2 | txn->put(recyc_txn_key, recyc_txn_val); | 2549 | | // Remove txn running key | 2550 | 2 | txn->remove(k); | 2551 | 2 | err = txn->commit(); | 2552 | 2 | if (err != TxnErrorCode::TXN_OK) { | 2553 | 0 | LOG_WARNING("failed to commit txn err={}", err) | 2554 | 0 | .tag("key", hex(k)) | 2555 | 0 | .tag("db_id", db_id) | 2556 | 0 | .tag("txn_id", txn_id); | 2557 | 0 | return -1; | 2558 | 0 | } | 2559 | 2 | ++num_abort; | 2560 | 2 | } | 2561 | | | 2562 | 2 | return 0; | 2563 | 6 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 2465 | 4 | this](std::string_view k, std::string_view v) -> int { | 2466 | 4 | ++num_scanned; | 2467 | | | 2468 | 4 | std::unique_ptr<Transaction> txn; | 2469 | 4 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 2470 | 4 | if (err != TxnErrorCode::TXN_OK) { | 2471 | 0 | LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k)); | 2472 | 0 | return -1; | 2473 | 0 | } | 2474 | 4 | std::string_view k1 = k; | 2475 | | //TxnRunningKeyInfo 0:instance_id 1:db_id 2:txn_id | 2476 | 4 | k1.remove_prefix(1); // Remove key space | 2477 | 4 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 2478 | 4 | if (decode_key(&k1, &out) != 0) { | 2479 | 0 | LOG_ERROR("failed to decode key").tag("key", hex(k)); | 2480 | 0 | return -1; | 2481 | 0 | } | 2482 | 4 | int64_t db_id = std::get<int64_t>(std::get<0>(out[3])); | 2483 | 4 | int64_t txn_id = std::get<int64_t>(std::get<0>(out[4])); | 2484 | 4 | VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id; | 2485 | | // Update txn_info | 2486 | 4 | std::string txn_inf_key, txn_inf_val; | 2487 | 4 | txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key); | 2488 | 4 | err = txn->get(txn_inf_key, &txn_inf_val); | 2489 | 4 | if (err != TxnErrorCode::TXN_OK) { | 2490 | 0 | LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key)); | 2491 | 0 | return -1; | 2492 | 0 | } | 2493 | 4 | TxnInfoPB txn_info; | 2494 | 4 | if (!txn_info.ParseFromString(txn_inf_val)) { | 2495 | 0 | LOG_WARNING("failed to parse txn info").tag("key", hex(k)); | 2496 | 0 | return -1; | 2497 | 0 | } | 2498 | | | 2499 | 4 | if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) { | 2500 | 4 | txn.reset(); | 2501 | 4 | TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info); | 2502 | 4 | std::shared_ptr<TxnLazyCommitTask> task = | 2503 | 4 | txn_lazy_committer_->submit(instance_id_, txn_info.txn_id()); | 2504 | 4 | std::pair<MetaServiceCode, std::string> ret = task->wait(); | 2505 | 4 | if (ret.first != MetaServiceCode::OK) { | 2506 | 0 | LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first | 2507 | 0 | << "msg=" << ret.second; | 2508 | 0 | return -1; | 2509 | 0 | } | 2510 | 4 | ++num_advance; | 2511 | 4 | return 0; | 2512 | 4 | } else { | 2513 | 0 | TxnRunningPB txn_running_pb; | 2514 | 0 | if (!txn_running_pb.ParseFromArray(v.data(), v.size())) { | 2515 | 0 | LOG_WARNING("malformed txn_running_pb").tag("key", hex(k)); | 2516 | 0 | return -1; | 2517 | 0 | } | 2518 | 0 | if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) { | 2519 | 0 | return 0; | 2520 | 0 | } | 2521 | 0 | ++num_timeout; | 2522 | |
| 2523 | 0 | DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE); | 2524 | 0 | txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED); | 2525 | 0 | txn_info.set_finish_time(current_time); | 2526 | 0 | txn_info.set_reason("timeout"); | 2527 | 0 | VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString(); | 2528 | 0 | txn_inf_val.clear(); | 2529 | 0 | if (!txn_info.SerializeToString(&txn_inf_val)) { | 2530 | 0 | LOG_WARNING("failed to serialize txn info").tag("key", hex(k)); | 2531 | 0 | return -1; | 2532 | 0 | } | 2533 | 0 | txn->put(txn_inf_key, txn_inf_val); | 2534 | 0 | VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key); | 2535 | | // Put recycle txn key | 2536 | 0 | std::string recyc_txn_key, recyc_txn_val; | 2537 | 0 | recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key); | 2538 | 0 | RecycleTxnPB recycle_txn_pb; | 2539 | 0 | recycle_txn_pb.set_creation_time(current_time); | 2540 | 0 | recycle_txn_pb.set_label(txn_info.label()); | 2541 | 0 | if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) { | 2542 | 0 | LOG_WARNING("failed to serialize txn recycle info") | 2543 | 0 | .tag("key", hex(k)) | 2544 | 0 | .tag("db_id", db_id) | 2545 | 0 | .tag("txn_id", txn_id); | 2546 | 0 | return -1; | 2547 | 0 | } | 2548 | 0 | txn->put(recyc_txn_key, recyc_txn_val); | 2549 | | // Remove txn running key | 2550 | 0 | txn->remove(k); | 2551 | 0 | err = txn->commit(); | 2552 | 0 | if (err != TxnErrorCode::TXN_OK) { | 2553 | 0 | LOG_WARNING("failed to commit txn err={}", err) | 2554 | 0 | .tag("key", hex(k)) | 2555 | 0 | .tag("db_id", db_id) | 2556 | 0 | .tag("txn_id", txn_id); | 2557 | 0 | return -1; | 2558 | 0 | } | 2559 | 0 | ++num_abort; | 2560 | 0 | } | 2561 | | | 2562 | 0 | return 0; | 2563 | 4 | }; |
|
2564 | | |
2565 | 20 | return scan_and_recycle(begin_txn_running_key, end_txn_running_key, |
2566 | 20 | std::move(handle_txn_running_kv)); |
2567 | 20 | } |
2568 | | |
2569 | 19 | int InstanceRecycler::recycle_expired_txn_label() { |
2570 | 19 | const std::string task_name = "recycle_expired_txn_label"; |
2571 | 19 | int64_t num_scanned = 0; |
2572 | 19 | int64_t num_expired = 0; |
2573 | 19 | int64_t num_recycled = 0; |
2574 | 19 | int ret = 0; |
2575 | | |
2576 | 19 | RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0}; |
2577 | 19 | RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX}; |
2578 | 19 | std::string begin_recycle_txn_key; |
2579 | 19 | std::string end_recycle_txn_key; |
2580 | 19 | recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key); |
2581 | 19 | recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key); |
2582 | 19 | std::vector<std::string> recycle_txn_info_keys; |
2583 | | |
2584 | 19 | LOG_INFO("begin to recycle expired txn").tag("instance_id", instance_id_); |
2585 | | |
2586 | 19 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
2587 | 19 | register_recycle_task(task_name, start_time); |
2588 | 19 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { |
2589 | 19 | unregister_recycle_task(task_name); |
2590 | 19 | int64_t cost = |
2591 | 19 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
2592 | 19 | LOG_INFO("end to recycle expired txn, cost={}s", cost) |
2593 | 19 | .tag("instance_id", instance_id_) |
2594 | 19 | .tag("num_scanned", num_scanned) |
2595 | 19 | .tag("num_expired", num_expired) |
2596 | 19 | .tag("num_recycled", num_recycled); |
2597 | 19 | }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEPi Line | Count | Source | 2588 | 16 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { | 2589 | 16 | unregister_recycle_task(task_name); | 2590 | 16 | int64_t cost = | 2591 | 16 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 2592 | 16 | LOG_INFO("end to recycle expired txn, cost={}s", cost) | 2593 | 16 | .tag("instance_id", instance_id_) | 2594 | 16 | .tag("num_scanned", num_scanned) | 2595 | 16 | .tag("num_expired", num_expired) | 2596 | 16 | .tag("num_recycled", num_recycled); | 2597 | 16 | }); |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEPi Line | Count | Source | 2588 | 3 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { | 2589 | 3 | unregister_recycle_task(task_name); | 2590 | 3 | int64_t cost = | 2591 | 3 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 2592 | 3 | LOG_INFO("end to recycle expired txn, cost={}s", cost) | 2593 | 3 | .tag("instance_id", instance_id_) | 2594 | 3 | .tag("num_scanned", num_scanned) | 2595 | 3 | .tag("num_expired", num_expired) | 2596 | 3 | .tag("num_recycled", num_recycled); | 2597 | 3 | }); |
|
2598 | | |
2599 | 19 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
2600 | 30.0k | auto calc_expiration = [&earlest_ts, this](const RecycleTxnPB& recycle_txn_pb) { |
2601 | 30.0k | int64_t final_expiration = |
2602 | 30.0k | recycle_txn_pb.creation_time() + config::label_keep_max_second * 1000L; |
2603 | 30.0k | if (earlest_ts > final_expiration / 1000) { |
2604 | 6 | earlest_ts = final_expiration / 1000; |
2605 | 6 | g_bvar_recycler_recycle_expired_txn_label_earlest_ts.put(instance_id_, earlest_ts); |
2606 | 6 | } |
2607 | 30.0k | return final_expiration; |
2608 | 30.0k | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNS0_12RecycleTxnPBE Line | Count | Source | 2600 | 30.0k | auto calc_expiration = [&earlest_ts, this](const RecycleTxnPB& recycle_txn_pb) { | 2601 | 30.0k | int64_t final_expiration = | 2602 | 30.0k | recycle_txn_pb.creation_time() + config::label_keep_max_second * 1000L; | 2603 | 30.0k | if (earlest_ts > final_expiration / 1000) { | 2604 | 6 | earlest_ts = final_expiration / 1000; | 2605 | 6 | g_bvar_recycler_recycle_expired_txn_label_earlest_ts.put(instance_id_, earlest_ts); | 2606 | 6 | } | 2607 | 30.0k | return final_expiration; | 2608 | 30.0k | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNS0_12RecycleTxnPBE |
2609 | | |
2610 | 19 | SyncExecutor<int> concurrent_delete_executor( |
2611 | 19 | _thread_pool_group.s3_producer_pool, |
2612 | 19 | fmt::format("recycle expired txn label, instance id {}", instance_id_), |
2613 | 23.0k | [](const int& ret) { return ret != 0; }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clERKi Line | Count | Source | 2613 | 23.0k | [](const int& ret) { return ret != 0; }); |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clERKi Line | Count | Source | 2613 | 3 | [](const int& ret) { return ret != 0; }); |
|
2614 | | |
2615 | 19 | int64_t current_time_ms = |
2616 | 19 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
2617 | | |
2618 | 30.0k | auto handle_recycle_txn_kv = [&](std::string_view k, std::string_view v) -> int { |
2619 | 30.0k | ++num_scanned; |
2620 | 30.0k | RecycleTxnPB recycle_txn_pb; |
2621 | 30.0k | if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) { |
2622 | 0 | LOG_WARNING("malformed txn_running_pb").tag("key", hex(k)); |
2623 | 0 | return -1; |
2624 | 0 | } |
2625 | 30.0k | if ((config::force_immediate_recycle) || |
2626 | 30.0k | (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) || |
2627 | 30.0k | (calc_expiration(recycle_txn_pb) <= current_time_ms)) { |
2628 | 23.0k | VLOG_DEBUG << "found recycle txn, key=" << hex(k); |
2629 | 23.0k | num_expired++; |
2630 | 23.0k | recycle_txn_info_keys.emplace_back(k); |
2631 | 23.0k | } |
2632 | 30.0k | return 0; |
2633 | 30.0k | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 2618 | 30.0k | auto handle_recycle_txn_kv = [&](std::string_view k, std::string_view v) -> int { | 2619 | 30.0k | ++num_scanned; | 2620 | 30.0k | RecycleTxnPB recycle_txn_pb; | 2621 | 30.0k | if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) { | 2622 | 0 | LOG_WARNING("malformed txn_running_pb").tag("key", hex(k)); | 2623 | 0 | return -1; | 2624 | 0 | } | 2625 | 30.0k | if ((config::force_immediate_recycle) || | 2626 | 30.0k | (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) || | 2627 | 30.0k | (calc_expiration(recycle_txn_pb) <= current_time_ms)) { | 2628 | 23.0k | VLOG_DEBUG << "found recycle txn, key=" << hex(k); | 2629 | 23.0k | num_expired++; | 2630 | 23.0k | recycle_txn_info_keys.emplace_back(k); | 2631 | 23.0k | } | 2632 | 30.0k | return 0; | 2633 | 30.0k | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 2618 | 3 | auto handle_recycle_txn_kv = [&](std::string_view k, std::string_view v) -> int { | 2619 | 3 | ++num_scanned; | 2620 | 3 | RecycleTxnPB recycle_txn_pb; | 2621 | 3 | if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) { | 2622 | 0 | LOG_WARNING("malformed txn_running_pb").tag("key", hex(k)); | 2623 | 0 | return -1; | 2624 | 0 | } | 2625 | 3 | if ((config::force_immediate_recycle) || | 2626 | 3 | (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) || | 2627 | 3 | (calc_expiration(recycle_txn_pb) <= current_time_ms)) { | 2628 | 3 | VLOG_DEBUG << "found recycle txn, key=" << hex(k); | 2629 | 3 | num_expired++; | 2630 | 3 | recycle_txn_info_keys.emplace_back(k); | 2631 | 3 | } | 2632 | 3 | return 0; | 2633 | 3 | }; |
|
2634 | | |
2635 | 23.0k | auto delete_recycle_txn_kv = [&](const std::string& k) -> int { |
2636 | 23.0k | std::string_view k1 = k; |
2637 | | //RecycleTxnKeyInfo 0:instance_id 1:db_id 2:txn_id |
2638 | 23.0k | k1.remove_prefix(1); // Remove key space |
2639 | 23.0k | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
2640 | 23.0k | int ret = decode_key(&k1, &out); |
2641 | 23.0k | if (ret != 0) { |
2642 | 0 | LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k)); |
2643 | 0 | return -1; |
2644 | 0 | } |
2645 | 23.0k | int64_t db_id = std::get<int64_t>(std::get<0>(out[3])); |
2646 | 23.0k | int64_t txn_id = std::get<int64_t>(std::get<0>(out[4])); |
2647 | 23.0k | VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id; |
2648 | 23.0k | std::unique_ptr<Transaction> txn; |
2649 | 23.0k | TxnErrorCode err = txn_kv_->create_txn(&txn); |
2650 | 23.0k | if (err != TxnErrorCode::TXN_OK) { |
2651 | 0 | LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k)); |
2652 | 0 | return -1; |
2653 | 0 | } |
2654 | | // Remove txn index kv |
2655 | 23.0k | auto index_key = txn_index_key({instance_id_, txn_id}); |
2656 | 23.0k | txn->remove(index_key); |
2657 | | // Remove txn info kv |
2658 | 23.0k | std::string info_key, info_val; |
2659 | 23.0k | txn_info_key({instance_id_, db_id, txn_id}, &info_key); |
2660 | 23.0k | err = txn->get(info_key, &info_val); |
2661 | 23.0k | if (err != TxnErrorCode::TXN_OK) { |
2662 | 0 | LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key)); |
2663 | 0 | return -1; |
2664 | 0 | } |
2665 | 23.0k | TxnInfoPB txn_info; |
2666 | 23.0k | if (!txn_info.ParseFromString(info_val)) { |
2667 | 0 | LOG_WARNING("failed to parse txn info").tag("key", hex(info_key)); |
2668 | 0 | return -1; |
2669 | 0 | } |
2670 | 23.0k | txn->remove(info_key); |
2671 | | // Remove sub txn index kvs |
2672 | 23.0k | std::vector<std::string> sub_txn_index_keys; |
2673 | 23.0k | for (auto sub_txn_id : txn_info.sub_txn_ids()) { |
2674 | 22.9k | auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id}); |
2675 | 22.9k | sub_txn_index_keys.push_back(sub_txn_index_key); |
2676 | 22.9k | } |
2677 | 23.0k | for (auto& sub_txn_index_key : sub_txn_index_keys) { |
2678 | 22.9k | txn->remove(sub_txn_index_key); |
2679 | 22.9k | } |
2680 | | // Update txn label |
2681 | 23.0k | std::string label_key, label_val; |
2682 | 23.0k | txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key); |
2683 | 23.0k | err = txn->get(label_key, &label_val); |
2684 | 23.0k | if (err != TxnErrorCode::TXN_OK) { |
2685 | 0 | LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key |
2686 | 0 | << " err=" << err; |
2687 | 0 | return -1; |
2688 | 0 | } |
2689 | 23.0k | TxnLabelPB txn_label; |
2690 | 23.0k | if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) { |
2691 | 0 | LOG_WARNING("failed to parse txn label").tag("key", hex(label_key)); |
2692 | 0 | return -1; |
2693 | 0 | } |
2694 | 23.0k | auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id); |
2695 | 23.0k | if (it != txn_label.txn_ids().end()) { |
2696 | 23.0k | txn_label.mutable_txn_ids()->erase(it); |
2697 | 23.0k | } |
2698 | 23.0k | if (txn_label.txn_ids().empty()) { |
2699 | 23.0k | txn->remove(label_key); |
2700 | 23.0k | } else { |
2701 | 0 | if (!txn_label.SerializeToString(&label_val)) { |
2702 | 0 | LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key); |
2703 | 0 | return -1; |
2704 | 0 | } |
2705 | 0 | txn->atomic_set_ver_value(label_key, label_val); |
2706 | 0 | } |
2707 | | // Remove recycle txn kv |
2708 | 23.0k | txn->remove(k); |
2709 | 23.0k | err = txn->commit(); |
2710 | 23.0k | if (err != TxnErrorCode::TXN_OK) { |
2711 | 0 | LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k); |
2712 | 0 | return -1; |
2713 | 0 | } |
2714 | 23.0k | ++num_recycled; |
2715 | 23.0k | LOG(INFO) << "recycle expired txn, key=" << hex(k); |
2716 | 23.0k | return 0; |
2717 | 23.0k | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_5clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 2635 | 23.0k | auto delete_recycle_txn_kv = [&](const std::string& k) -> int { | 2636 | 23.0k | std::string_view k1 = k; | 2637 | | //RecycleTxnKeyInfo 0:instance_id 1:db_id 2:txn_id | 2638 | 23.0k | k1.remove_prefix(1); // Remove key space | 2639 | 23.0k | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 2640 | 23.0k | int ret = decode_key(&k1, &out); | 2641 | 23.0k | if (ret != 0) { | 2642 | 0 | LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k)); | 2643 | 0 | return -1; | 2644 | 0 | } | 2645 | 23.0k | int64_t db_id = std::get<int64_t>(std::get<0>(out[3])); | 2646 | 23.0k | int64_t txn_id = std::get<int64_t>(std::get<0>(out[4])); | 2647 | 23.0k | VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id; | 2648 | 23.0k | std::unique_ptr<Transaction> txn; | 2649 | 23.0k | TxnErrorCode err = txn_kv_->create_txn(&txn); | 2650 | 23.0k | if (err != TxnErrorCode::TXN_OK) { | 2651 | 0 | LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k)); | 2652 | 0 | return -1; | 2653 | 0 | } | 2654 | | // Remove txn index kv | 2655 | 23.0k | auto index_key = txn_index_key({instance_id_, txn_id}); | 2656 | 23.0k | txn->remove(index_key); | 2657 | | // Remove txn info kv | 2658 | 23.0k | std::string info_key, info_val; | 2659 | 23.0k | txn_info_key({instance_id_, db_id, txn_id}, &info_key); | 2660 | 23.0k | err = txn->get(info_key, &info_val); | 2661 | 23.0k | if (err != TxnErrorCode::TXN_OK) { | 2662 | 0 | LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key)); | 2663 | 0 | return -1; | 2664 | 0 | } | 2665 | 23.0k | TxnInfoPB txn_info; | 2666 | 23.0k | if (!txn_info.ParseFromString(info_val)) { | 2667 | 0 | LOG_WARNING("failed to parse txn info").tag("key", hex(info_key)); | 2668 | 0 | return -1; | 2669 | 0 | } | 2670 | 23.0k | txn->remove(info_key); | 2671 | | // Remove sub txn index kvs | 2672 | 23.0k | std::vector<std::string> sub_txn_index_keys; | 2673 | 23.0k | for (auto sub_txn_id : txn_info.sub_txn_ids()) { | 2674 | 22.9k | auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id}); | 2675 | 22.9k | sub_txn_index_keys.push_back(sub_txn_index_key); | 2676 | 22.9k | } | 2677 | 23.0k | for (auto& sub_txn_index_key : sub_txn_index_keys) { | 2678 | 22.9k | txn->remove(sub_txn_index_key); | 2679 | 22.9k | } | 2680 | | // Update txn label | 2681 | 23.0k | std::string label_key, label_val; | 2682 | 23.0k | txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key); | 2683 | 23.0k | err = txn->get(label_key, &label_val); | 2684 | 23.0k | if (err != TxnErrorCode::TXN_OK) { | 2685 | 0 | LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key | 2686 | 0 | << " err=" << err; | 2687 | 0 | return -1; | 2688 | 0 | } | 2689 | 23.0k | TxnLabelPB txn_label; | 2690 | 23.0k | if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) { | 2691 | 0 | LOG_WARNING("failed to parse txn label").tag("key", hex(label_key)); | 2692 | 0 | return -1; | 2693 | 0 | } | 2694 | 23.0k | auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id); | 2695 | 23.0k | if (it != txn_label.txn_ids().end()) { | 2696 | 23.0k | txn_label.mutable_txn_ids()->erase(it); | 2697 | 23.0k | } | 2698 | 23.0k | if (txn_label.txn_ids().empty()) { | 2699 | 23.0k | txn->remove(label_key); | 2700 | 23.0k | } else { | 2701 | 0 | if (!txn_label.SerializeToString(&label_val)) { | 2702 | 0 | LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key); | 2703 | 0 | return -1; | 2704 | 0 | } | 2705 | 0 | txn->atomic_set_ver_value(label_key, label_val); | 2706 | 0 | } | 2707 | | // Remove recycle txn kv | 2708 | 23.0k | txn->remove(k); | 2709 | 23.0k | err = txn->commit(); | 2710 | 23.0k | if (err != TxnErrorCode::TXN_OK) { | 2711 | 0 | LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k); | 2712 | 0 | return -1; | 2713 | 0 | } | 2714 | 23.0k | ++num_recycled; | 2715 | 23.0k | LOG(INFO) << "recycle expired txn, key=" << hex(k); | 2716 | 23.0k | return 0; | 2717 | 23.0k | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_5clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 2635 | 3 | auto delete_recycle_txn_kv = [&](const std::string& k) -> int { | 2636 | 3 | std::string_view k1 = k; | 2637 | | //RecycleTxnKeyInfo 0:instance_id 1:db_id 2:txn_id | 2638 | 3 | k1.remove_prefix(1); // Remove key space | 2639 | 3 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 2640 | 3 | int ret = decode_key(&k1, &out); | 2641 | 3 | if (ret != 0) { | 2642 | 0 | LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k)); | 2643 | 0 | return -1; | 2644 | 0 | } | 2645 | 3 | int64_t db_id = std::get<int64_t>(std::get<0>(out[3])); | 2646 | 3 | int64_t txn_id = std::get<int64_t>(std::get<0>(out[4])); | 2647 | 3 | VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id; | 2648 | 3 | std::unique_ptr<Transaction> txn; | 2649 | 3 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 2650 | 3 | if (err != TxnErrorCode::TXN_OK) { | 2651 | 0 | LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k)); | 2652 | 0 | return -1; | 2653 | 0 | } | 2654 | | // Remove txn index kv | 2655 | 3 | auto index_key = txn_index_key({instance_id_, txn_id}); | 2656 | 3 | txn->remove(index_key); | 2657 | | // Remove txn info kv | 2658 | 3 | std::string info_key, info_val; | 2659 | 3 | txn_info_key({instance_id_, db_id, txn_id}, &info_key); | 2660 | 3 | err = txn->get(info_key, &info_val); | 2661 | 3 | if (err != TxnErrorCode::TXN_OK) { | 2662 | 0 | LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key)); | 2663 | 0 | return -1; | 2664 | 0 | } | 2665 | 3 | TxnInfoPB txn_info; | 2666 | 3 | if (!txn_info.ParseFromString(info_val)) { | 2667 | 0 | LOG_WARNING("failed to parse txn info").tag("key", hex(info_key)); | 2668 | 0 | return -1; | 2669 | 0 | } | 2670 | 3 | txn->remove(info_key); | 2671 | | // Remove sub txn index kvs | 2672 | 3 | std::vector<std::string> sub_txn_index_keys; | 2673 | 3 | for (auto sub_txn_id : txn_info.sub_txn_ids()) { | 2674 | 0 | auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id}); | 2675 | 0 | sub_txn_index_keys.push_back(sub_txn_index_key); | 2676 | 0 | } | 2677 | 3 | for (auto& sub_txn_index_key : sub_txn_index_keys) { | 2678 | 0 | txn->remove(sub_txn_index_key); | 2679 | 0 | } | 2680 | | // Update txn label | 2681 | 3 | std::string label_key, label_val; | 2682 | 3 | txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key); | 2683 | 3 | err = txn->get(label_key, &label_val); | 2684 | 3 | if (err != TxnErrorCode::TXN_OK) { | 2685 | 0 | LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key | 2686 | 0 | << " err=" << err; | 2687 | 0 | return -1; | 2688 | 0 | } | 2689 | 3 | TxnLabelPB txn_label; | 2690 | 3 | if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) { | 2691 | 0 | LOG_WARNING("failed to parse txn label").tag("key", hex(label_key)); | 2692 | 0 | return -1; | 2693 | 0 | } | 2694 | 3 | auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id); | 2695 | 3 | if (it != txn_label.txn_ids().end()) { | 2696 | 3 | txn_label.mutable_txn_ids()->erase(it); | 2697 | 3 | } | 2698 | 3 | if (txn_label.txn_ids().empty()) { | 2699 | 3 | txn->remove(label_key); | 2700 | 3 | } else { | 2701 | 0 | if (!txn_label.SerializeToString(&label_val)) { | 2702 | 0 | LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key); | 2703 | 0 | return -1; | 2704 | 0 | } | 2705 | 0 | txn->atomic_set_ver_value(label_key, label_val); | 2706 | 0 | } | 2707 | | // Remove recycle txn kv | 2708 | 3 | txn->remove(k); | 2709 | 3 | err = txn->commit(); | 2710 | 3 | if (err != TxnErrorCode::TXN_OK) { | 2711 | 0 | LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k); | 2712 | 0 | return -1; | 2713 | 0 | } | 2714 | 3 | ++num_recycled; | 2715 | 3 | LOG(INFO) << "recycle expired txn, key=" << hex(k); | 2716 | 3 | return 0; | 2717 | 3 | }; |
|
2718 | | |
2719 | 19 | auto loop_done = [&]() -> int { |
2720 | 10 | std::unique_ptr<int, std::function<void(int*)>> defer( |
2721 | 10 | (int*)0x01, [&](int*) { recycle_txn_info_keys.clear(); }); recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clEvENKUlPiE_clES3_ Line | Count | Source | 2721 | 7 | (int*)0x01, [&](int*) { recycle_txn_info_keys.clear(); }); |
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clEvENKUlPiE_clES3_ Line | Count | Source | 2721 | 3 | (int*)0x01, [&](int*) { recycle_txn_info_keys.clear(); }); |
|
2722 | 10 | TEST_SYNC_POINT_CALLBACK( |
2723 | 10 | "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys", |
2724 | 10 | &recycle_txn_info_keys); |
2725 | 23.0k | for (const auto& k : recycle_txn_info_keys) { |
2726 | 23.0k | concurrent_delete_executor.add([&]() { |
2727 | 23.0k | if (delete_recycle_txn_kv(k) != 0) { |
2728 | 0 | LOG_WARNING("failed to delete recycle txn kv") |
2729 | 0 | .tag("instance id", instance_id_) |
2730 | 0 | .tag("key", hex(k)); |
2731 | 0 | return -1; |
2732 | 0 | } |
2733 | 23.0k | return 0; |
2734 | 23.0k | }); recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clEvENKUlvE_clEv Line | Count | Source | 2726 | 23.0k | concurrent_delete_executor.add([&]() { | 2727 | 23.0k | if (delete_recycle_txn_kv(k) != 0) { | 2728 | 0 | LOG_WARNING("failed to delete recycle txn kv") | 2729 | 0 | .tag("instance id", instance_id_) | 2730 | 0 | .tag("key", hex(k)); | 2731 | 0 | return -1; | 2732 | 0 | } | 2733 | 23.0k | return 0; | 2734 | 23.0k | }); |
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clEvENKUlvE_clEv Line | Count | Source | 2726 | 3 | concurrent_delete_executor.add([&]() { | 2727 | 3 | if (delete_recycle_txn_kv(k) != 0) { | 2728 | 0 | LOG_WARNING("failed to delete recycle txn kv") | 2729 | 0 | .tag("instance id", instance_id_) | 2730 | 0 | .tag("key", hex(k)); | 2731 | 0 | return -1; | 2732 | 0 | } | 2733 | 3 | return 0; | 2734 | 3 | }); |
|
2735 | 23.0k | } |
2736 | 10 | bool finished = true; |
2737 | 10 | std::vector<int> rets = concurrent_delete_executor.when_all(&finished); |
2738 | 23.0k | for (int r : rets) { |
2739 | 23.0k | if (r != 0) { |
2740 | 0 | ret = -1; |
2741 | 0 | } |
2742 | 23.0k | } |
2743 | | |
2744 | 10 | ret = finished ? ret : -1; |
2745 | | |
2746 | 10 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret); |
2747 | | |
2748 | 10 | if (ret != 0) { |
2749 | 2 | LOG_WARNING("recycle txn kv ret!=0") |
2750 | 2 | .tag("finished", finished) |
2751 | 2 | .tag("ret", ret) |
2752 | 2 | .tag("instance_id", instance_id_); |
2753 | 2 | return ret; |
2754 | 2 | } |
2755 | 8 | return ret; |
2756 | 10 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clEv Line | Count | Source | 2719 | 7 | auto loop_done = [&]() -> int { | 2720 | 7 | std::unique_ptr<int, std::function<void(int*)>> defer( | 2721 | 7 | (int*)0x01, [&](int*) { recycle_txn_info_keys.clear(); }); | 2722 | 7 | TEST_SYNC_POINT_CALLBACK( | 2723 | 7 | "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys", | 2724 | 7 | &recycle_txn_info_keys); | 2725 | 23.0k | for (const auto& k : recycle_txn_info_keys) { | 2726 | 23.0k | concurrent_delete_executor.add([&]() { | 2727 | 23.0k | if (delete_recycle_txn_kv(k) != 0) { | 2728 | 23.0k | LOG_WARNING("failed to delete recycle txn kv") | 2729 | 23.0k | .tag("instance id", instance_id_) | 2730 | 23.0k | .tag("key", hex(k)); | 2731 | 23.0k | return -1; | 2732 | 23.0k | } | 2733 | 23.0k | return 0; | 2734 | 23.0k | }); | 2735 | 23.0k | } | 2736 | 7 | bool finished = true; | 2737 | 7 | std::vector<int> rets = concurrent_delete_executor.when_all(&finished); | 2738 | 23.0k | for (int r : rets) { | 2739 | 23.0k | if (r != 0) { | 2740 | 0 | ret = -1; | 2741 | 0 | } | 2742 | 23.0k | } | 2743 | | | 2744 | 7 | ret = finished ? ret : -1; | 2745 | | | 2746 | 7 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret); | 2747 | | | 2748 | 7 | if (ret != 0) { | 2749 | 2 | LOG_WARNING("recycle txn kv ret!=0") | 2750 | 2 | .tag("finished", finished) | 2751 | 2 | .tag("ret", ret) | 2752 | 2 | .tag("instance_id", instance_id_); | 2753 | 2 | return ret; | 2754 | 2 | } | 2755 | 5 | return ret; | 2756 | 7 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clEv Line | Count | Source | 2719 | 3 | auto loop_done = [&]() -> int { | 2720 | 3 | std::unique_ptr<int, std::function<void(int*)>> defer( | 2721 | 3 | (int*)0x01, [&](int*) { recycle_txn_info_keys.clear(); }); | 2722 | 3 | TEST_SYNC_POINT_CALLBACK( | 2723 | 3 | "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys", | 2724 | 3 | &recycle_txn_info_keys); | 2725 | 3 | for (const auto& k : recycle_txn_info_keys) { | 2726 | 3 | concurrent_delete_executor.add([&]() { | 2727 | 3 | if (delete_recycle_txn_kv(k) != 0) { | 2728 | 3 | LOG_WARNING("failed to delete recycle txn kv") | 2729 | 3 | .tag("instance id", instance_id_) | 2730 | 3 | .tag("key", hex(k)); | 2731 | 3 | return -1; | 2732 | 3 | } | 2733 | 3 | return 0; | 2734 | 3 | }); | 2735 | 3 | } | 2736 | 3 | bool finished = true; | 2737 | 3 | std::vector<int> rets = concurrent_delete_executor.when_all(&finished); | 2738 | 3 | for (int r : rets) { | 2739 | 3 | if (r != 0) { | 2740 | 0 | ret = -1; | 2741 | 0 | } | 2742 | 3 | } | 2743 | | | 2744 | 3 | ret = finished ? ret : -1; | 2745 | | | 2746 | 3 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret); | 2747 | | | 2748 | 3 | if (ret != 0) { | 2749 | 0 | LOG_WARNING("recycle txn kv ret!=0") | 2750 | 0 | .tag("finished", finished) | 2751 | 0 | .tag("ret", ret) | 2752 | 0 | .tag("instance_id", instance_id_); | 2753 | 0 | return ret; | 2754 | 0 | } | 2755 | 3 | return ret; | 2756 | 3 | }; |
|
2757 | | |
2758 | 19 | return scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key, |
2759 | 19 | std::move(handle_recycle_txn_kv), std::move(loop_done)); |
2760 | 19 | } |
2761 | | |
2762 | | struct CopyJobIdTuple { |
2763 | | std::string instance_id; |
2764 | | std::string stage_id; |
2765 | | long table_id; |
2766 | | std::string copy_id; |
2767 | | std::string stage_path; |
2768 | | }; |
2769 | | struct BatchObjStoreAccessor { |
2770 | | BatchObjStoreAccessor(std::shared_ptr<StorageVaultAccessor> accessor, uint64_t& batch_count, |
2771 | | TxnKv* txn_kv) |
2772 | 3 | : accessor_(std::move(accessor)), batch_count_(batch_count), txn_kv_(txn_kv) {}; |
2773 | 3 | ~BatchObjStoreAccessor() { |
2774 | 3 | if (!paths_.empty()) { |
2775 | 3 | consume(); |
2776 | 3 | } |
2777 | 3 | } |
2778 | | |
2779 | | /** |
2780 | | * To implicitely do batch work and submit the batch delete task to s3 |
2781 | | * The s3 delete opreations would be done in batches, and then delete CopyJobPB key one by one |
2782 | | * |
2783 | | * @param copy_job The protubuf struct consists of the copy job files. |
2784 | | * @param key The copy job's key on fdb, the key is originally occupied by fdb range iterator, to make sure |
2785 | | * it would last until we finish the delete task, here we need pass one string value |
2786 | | * @param cope_job_id_tuple One tuple {log_trace instance_id, stage_id, table_id, query_id, stage_path} to print log |
2787 | | */ |
2788 | 5 | void add(CopyJobPB copy_job, std::string key, const CopyJobIdTuple cope_job_id_tuple) { |
2789 | 5 | auto& [instance_id, stage_id, table_id, copy_id, path] = cope_job_id_tuple; |
2790 | 5 | auto& file_keys = copy_file_keys_[key]; |
2791 | 5 | file_keys.log_trace = |
2792 | 5 | fmt::format("instance_id={}, stage_id={}, table_id={}, query_id={}, path={}", |
2793 | 5 | instance_id, stage_id, table_id, copy_id, path); |
2794 | 5 | std::string_view log_trace = file_keys.log_trace; |
2795 | 2.03k | for (const auto& file : copy_job.object_files()) { |
2796 | 2.03k | auto relative_path = file.relative_path(); |
2797 | 2.03k | paths_.push_back(relative_path); |
2798 | 2.03k | file_keys.keys.push_back(copy_file_key( |
2799 | 2.03k | {instance_id, stage_id, table_id, file.relative_path(), file.etag()})); |
2800 | 2.03k | LOG_INFO(log_trace) |
2801 | 2.03k | .tag("relative_path", relative_path) |
2802 | 2.03k | .tag("batch_count", batch_count_); |
2803 | 2.03k | } |
2804 | 5 | LOG_INFO(log_trace) |
2805 | 5 | .tag("objects_num", copy_job.object_files().size()) |
2806 | 5 | .tag("batch_count", batch_count_); |
2807 | | // TODO(AlexYue): If the size is 1001, it would be one delete with 1000 objects and one delete request with only one object(**ATTN**: DOESN'T |
2808 | | // recommend using delete objects when objects num is less than 10) |
2809 | 5 | if (paths_.size() < 1000) { |
2810 | 3 | return; |
2811 | 3 | } |
2812 | 2 | consume(); |
2813 | 2 | } |
2814 | | |
2815 | | private: |
2816 | 5 | void consume() { |
2817 | 5 | std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [this](int*) { |
2818 | 5 | paths_.clear(); |
2819 | 5 | copy_file_keys_.clear(); |
2820 | 5 | batch_count_++; |
2821 | 5 | }); |
2822 | 5 | LOG_INFO("begin to delete {} internal stage objects in batch {}", paths_.size(), |
2823 | 5 | batch_count_); |
2824 | 5 | StopWatch sw; |
2825 | | // TODO(yuejing): 在accessor的delete_objets的实现里可以考虑如果_paths数量不超过10个的话,就直接发10个delete objection operation而不是发post |
2826 | 5 | if (0 != accessor_->delete_files(paths_)) { |
2827 | 2 | LOG_WARNING("failed to delete {} internal stage objects in batch {} and it takes {} us", |
2828 | 2 | paths_.size(), batch_count_, sw.elapsed_us()); |
2829 | 2 | return; |
2830 | 2 | } |
2831 | 3 | LOG_INFO("succeed to delete {} internal stage objects in batch {} and it takes {} us", |
2832 | 3 | paths_.size(), batch_count_, sw.elapsed_us()); |
2833 | | // delete fdb's keys |
2834 | 3 | for (auto& file_keys : copy_file_keys_) { |
2835 | 3 | auto& [log_trace, keys] = file_keys.second; |
2836 | 3 | std::unique_ptr<Transaction> txn; |
2837 | 3 | if (txn_kv_->create_txn(&txn) != cloud::TxnErrorCode::TXN_OK) { |
2838 | 0 | LOG(WARNING) << "failed to create txn"; |
2839 | 0 | continue; |
2840 | 0 | } |
2841 | | // FIXME: We have already limited the file num and file meta size when selecting file in FE. |
2842 | | // And if too many copy files, begin_copy failed commit too. So here the copy file keys are |
2843 | | // limited, should not cause the txn commit failed. |
2844 | 1.02k | for (const auto& key : keys) { |
2845 | 1.02k | txn->remove(key); |
2846 | 1.02k | LOG_INFO("remove copy_file_key={}, {}", hex(key), log_trace); |
2847 | 1.02k | } |
2848 | 3 | txn->remove(file_keys.first); |
2849 | 3 | if (auto ret = txn->commit(); ret != cloud::TxnErrorCode::TXN_OK) { |
2850 | 0 | LOG(WARNING) << "failed to commit txn ret is " << ret; |
2851 | 0 | continue; |
2852 | 0 | } |
2853 | 3 | } |
2854 | 3 | } |
2855 | | std::shared_ptr<StorageVaultAccessor> accessor_; |
2856 | | // the path of the s3 files to be deleted |
2857 | | std::vector<std::string> paths_; |
2858 | | struct CopyFiles { |
2859 | | std::string log_trace; |
2860 | | std::vector<std::string> keys; |
2861 | | }; |
2862 | | // pair<std::string, std::vector<std::string>> |
2863 | | // first: instance_id_ stage_id table_id query_id |
2864 | | // second: keys to be deleted |
2865 | | // <fdb key, <{instance_id_ stage_id table_id query_id}, file keys to be deleted>> |
2866 | | std::unordered_map<std::string, CopyFiles> copy_file_keys_; |
2867 | | // used to distinguish different batch tasks, the task log consists of thread ID and batch number |
2868 | | // which can together uniquely identifies different tasks for tracing log |
2869 | | uint64_t& batch_count_; |
2870 | | TxnKv* txn_kv_; |
2871 | | }; |
2872 | | |
2873 | 13 | int InstanceRecycler::recycle_copy_jobs() { |
2874 | 13 | int64_t num_scanned = 0; |
2875 | 13 | int64_t num_finished = 0; |
2876 | 13 | int64_t num_expired = 0; |
2877 | 13 | int64_t num_recycled = 0; |
2878 | | // Used for INTERNAL stage's copy jobs to tag each batch for log trace |
2879 | 13 | uint64_t batch_count = 0; |
2880 | 13 | const std::string task_name = "recycle_copy_jobs"; |
2881 | | |
2882 | 13 | LOG_INFO("begin to recycle copy jobs").tag("instance_id", instance_id_); |
2883 | | |
2884 | 13 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
2885 | 13 | register_recycle_task(task_name, start_time); |
2886 | | |
2887 | 13 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { |
2888 | 13 | unregister_recycle_task(task_name); |
2889 | 13 | int64_t cost = |
2890 | 13 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
2891 | 13 | LOG_INFO("recycle copy jobs finished, cost={}s", cost) |
2892 | 13 | .tag("instance_id", instance_id_) |
2893 | 13 | .tag("num_scanned", num_scanned) |
2894 | 13 | .tag("num_finished", num_finished) |
2895 | 13 | .tag("num_expired", num_expired) |
2896 | 13 | .tag("num_recycled", num_recycled); |
2897 | 13 | }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEPi Line | Count | Source | 2887 | 13 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { | 2888 | 13 | unregister_recycle_task(task_name); | 2889 | 13 | int64_t cost = | 2890 | 13 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 2891 | 13 | LOG_INFO("recycle copy jobs finished, cost={}s", cost) | 2892 | 13 | .tag("instance_id", instance_id_) | 2893 | 13 | .tag("num_scanned", num_scanned) | 2894 | 13 | .tag("num_finished", num_finished) | 2895 | 13 | .tag("num_expired", num_expired) | 2896 | 13 | .tag("num_recycled", num_recycled); | 2897 | 13 | }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEPi |
2898 | | |
2899 | 13 | CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0}; |
2900 | 13 | CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0}; |
2901 | 13 | std::string key0; |
2902 | 13 | std::string key1; |
2903 | 13 | copy_job_key(key_info0, &key0); |
2904 | 13 | copy_job_key(key_info1, &key1); |
2905 | 13 | std::unordered_map<std::string, std::shared_ptr<BatchObjStoreAccessor>> stage_accessor_map; |
2906 | 13 | auto recycle_func = [&start_time, &num_scanned, &num_finished, &num_expired, &num_recycled, |
2907 | 13 | &batch_count, &stage_accessor_map, &task_name, |
2908 | 16 | this](std::string_view k, std::string_view v) -> int { |
2909 | 16 | ++num_scanned; |
2910 | 16 | CopyJobPB copy_job; |
2911 | 16 | if (!copy_job.ParseFromArray(v.data(), v.size())) { |
2912 | 0 | LOG_WARNING("malformed copy job").tag("key", hex(k)); |
2913 | 0 | return -1; |
2914 | 0 | } |
2915 | | |
2916 | | // decode copy job key |
2917 | 16 | auto k1 = k; |
2918 | 16 | k1.remove_prefix(1); |
2919 | 16 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
2920 | 16 | decode_key(&k1, &out); |
2921 | | // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id} |
2922 | | // -> CopyJobPB |
2923 | 16 | const auto& stage_id = std::get<std::string>(std::get<0>(out[3])); |
2924 | 16 | const auto& table_id = std::get<int64_t>(std::get<0>(out[4])); |
2925 | 16 | const auto& copy_id = std::get<std::string>(std::get<0>(out[5])); |
2926 | | |
2927 | 16 | bool check_storage = true; |
2928 | 16 | if (copy_job.job_status() == CopyJobPB::FINISH) { |
2929 | 12 | ++num_finished; |
2930 | | |
2931 | 12 | if (copy_job.stage_type() == StagePB::INTERNAL) { |
2932 | 7 | auto it = stage_accessor_map.find(stage_id); |
2933 | 7 | std::shared_ptr<BatchObjStoreAccessor> accessor; |
2934 | 7 | std::string_view path; |
2935 | 7 | if (it != stage_accessor_map.end()) { |
2936 | 2 | accessor = it->second; |
2937 | 5 | } else { |
2938 | 5 | std::shared_ptr<StorageVaultAccessor> inner_accessor; |
2939 | 5 | auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(), |
2940 | 5 | &inner_accessor); |
2941 | 5 | if (ret < 0) { // error |
2942 | 0 | LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret); |
2943 | 0 | return -1; |
2944 | 5 | } else if (ret == 0) { |
2945 | 3 | path = inner_accessor->uri(); |
2946 | 3 | accessor = std::make_shared<BatchObjStoreAccessor>( |
2947 | 3 | inner_accessor, batch_count, txn_kv_.get()); |
2948 | 3 | stage_accessor_map.emplace(stage_id, accessor); |
2949 | 3 | } else { // stage not found, skip check storage |
2950 | 2 | check_storage = false; |
2951 | 2 | } |
2952 | 5 | } |
2953 | 7 | if (check_storage) { |
2954 | | // TODO delete objects with key and etag is not supported |
2955 | 5 | accessor->add(std::move(copy_job), std::string(k), |
2956 | 5 | {instance_id_, stage_id, table_id, copy_id, std::string(path)}); |
2957 | 5 | return 0; |
2958 | 5 | } |
2959 | 7 | } else if (copy_job.stage_type() == StagePB::EXTERNAL) { |
2960 | 5 | int64_t current_time = |
2961 | 5 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
2962 | 5 | if (copy_job.finish_time_ms() > 0) { |
2963 | 2 | if (!config::force_immediate_recycle && |
2964 | 2 | current_time < copy_job.finish_time_ms() + |
2965 | 2 | config::copy_job_max_retention_second * 1000) { |
2966 | 1 | return 0; |
2967 | 1 | } |
2968 | 3 | } else { |
2969 | | // For compatibility, copy job does not contain finish time before 2.2.2, use start time |
2970 | 3 | if (!config::force_immediate_recycle && |
2971 | 3 | current_time < copy_job.start_time_ms() + |
2972 | 3 | config::copy_job_max_retention_second * 1000) { |
2973 | 1 | return 0; |
2974 | 1 | } |
2975 | 3 | } |
2976 | 5 | } |
2977 | 12 | } else if (copy_job.job_status() == CopyJobPB::LOADING) { |
2978 | 4 | int64_t current_time = |
2979 | 4 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
2980 | | // if copy job is timeout: delete all copy file kvs and copy job kv |
2981 | 4 | if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) { |
2982 | 2 | return 0; |
2983 | 2 | } |
2984 | 2 | ++num_expired; |
2985 | 2 | } |
2986 | | |
2987 | | // delete all copy files |
2988 | 7 | std::vector<std::string> copy_file_keys; |
2989 | 70 | for (auto& file : copy_job.object_files()) { |
2990 | 70 | copy_file_keys.push_back(copy_file_key( |
2991 | 70 | {instance_id_, stage_id, table_id, file.relative_path(), file.etag()})); |
2992 | 70 | } |
2993 | 7 | std::unique_ptr<Transaction> txn; |
2994 | 7 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { |
2995 | 0 | LOG(WARNING) << "failed to create txn"; |
2996 | 0 | return -1; |
2997 | 0 | } |
2998 | | // FIXME: We have already limited the file num and file meta size when selecting file in FE. |
2999 | | // And if too many copy files, begin_copy failed commit too. So here the copy file keys are |
3000 | | // limited, should not cause the txn commit failed. |
3001 | 70 | for (const auto& key : copy_file_keys) { |
3002 | 70 | txn->remove(key); |
3003 | 70 | LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_ |
3004 | 70 | << ", stage_id=" << stage_id << ", table_id=" << table_id |
3005 | 70 | << ", query_id=" << copy_id; |
3006 | 70 | } |
3007 | 7 | txn->remove(k); |
3008 | 7 | TxnErrorCode err = txn->commit(); |
3009 | 7 | if (err != TxnErrorCode::TXN_OK) { |
3010 | 0 | LOG(WARNING) << "failed to commit txn, err=" << err; |
3011 | 0 | return -1; |
3012 | 0 | } |
3013 | | |
3014 | 7 | ++num_recycled; |
3015 | 7 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); |
3016 | 7 | return 0; |
3017 | 7 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 2908 | 16 | this](std::string_view k, std::string_view v) -> int { | 2909 | 16 | ++num_scanned; | 2910 | 16 | CopyJobPB copy_job; | 2911 | 16 | if (!copy_job.ParseFromArray(v.data(), v.size())) { | 2912 | 0 | LOG_WARNING("malformed copy job").tag("key", hex(k)); | 2913 | 0 | return -1; | 2914 | 0 | } | 2915 | | | 2916 | | // decode copy job key | 2917 | 16 | auto k1 = k; | 2918 | 16 | k1.remove_prefix(1); | 2919 | 16 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 2920 | 16 | decode_key(&k1, &out); | 2921 | | // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id} | 2922 | | // -> CopyJobPB | 2923 | 16 | const auto& stage_id = std::get<std::string>(std::get<0>(out[3])); | 2924 | 16 | const auto& table_id = std::get<int64_t>(std::get<0>(out[4])); | 2925 | 16 | const auto& copy_id = std::get<std::string>(std::get<0>(out[5])); | 2926 | | | 2927 | 16 | bool check_storage = true; | 2928 | 16 | if (copy_job.job_status() == CopyJobPB::FINISH) { | 2929 | 12 | ++num_finished; | 2930 | | | 2931 | 12 | if (copy_job.stage_type() == StagePB::INTERNAL) { | 2932 | 7 | auto it = stage_accessor_map.find(stage_id); | 2933 | 7 | std::shared_ptr<BatchObjStoreAccessor> accessor; | 2934 | 7 | std::string_view path; | 2935 | 7 | if (it != stage_accessor_map.end()) { | 2936 | 2 | accessor = it->second; | 2937 | 5 | } else { | 2938 | 5 | std::shared_ptr<StorageVaultAccessor> inner_accessor; | 2939 | 5 | auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(), | 2940 | 5 | &inner_accessor); | 2941 | 5 | if (ret < 0) { // error | 2942 | 0 | LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret); | 2943 | 0 | return -1; | 2944 | 5 | } else if (ret == 0) { | 2945 | 3 | path = inner_accessor->uri(); | 2946 | 3 | accessor = std::make_shared<BatchObjStoreAccessor>( | 2947 | 3 | inner_accessor, batch_count, txn_kv_.get()); | 2948 | 3 | stage_accessor_map.emplace(stage_id, accessor); | 2949 | 3 | } else { // stage not found, skip check storage | 2950 | 2 | check_storage = false; | 2951 | 2 | } | 2952 | 5 | } | 2953 | 7 | if (check_storage) { | 2954 | | // TODO delete objects with key and etag is not supported | 2955 | 5 | accessor->add(std::move(copy_job), std::string(k), | 2956 | 5 | {instance_id_, stage_id, table_id, copy_id, std::string(path)}); | 2957 | 5 | return 0; | 2958 | 5 | } | 2959 | 7 | } else if (copy_job.stage_type() == StagePB::EXTERNAL) { | 2960 | 5 | int64_t current_time = | 2961 | 5 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); | 2962 | 5 | if (copy_job.finish_time_ms() > 0) { | 2963 | 2 | if (!config::force_immediate_recycle && | 2964 | 2 | current_time < copy_job.finish_time_ms() + | 2965 | 2 | config::copy_job_max_retention_second * 1000) { | 2966 | 1 | return 0; | 2967 | 1 | } | 2968 | 3 | } else { | 2969 | | // For compatibility, copy job does not contain finish time before 2.2.2, use start time | 2970 | 3 | if (!config::force_immediate_recycle && | 2971 | 3 | current_time < copy_job.start_time_ms() + | 2972 | 3 | config::copy_job_max_retention_second * 1000) { | 2973 | 1 | return 0; | 2974 | 1 | } | 2975 | 3 | } | 2976 | 5 | } | 2977 | 12 | } else if (copy_job.job_status() == CopyJobPB::LOADING) { | 2978 | 4 | int64_t current_time = | 2979 | 4 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); | 2980 | | // if copy job is timeout: delete all copy file kvs and copy job kv | 2981 | 4 | if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) { | 2982 | 2 | return 0; | 2983 | 2 | } | 2984 | 2 | ++num_expired; | 2985 | 2 | } | 2986 | | | 2987 | | // delete all copy files | 2988 | 7 | std::vector<std::string> copy_file_keys; | 2989 | 70 | for (auto& file : copy_job.object_files()) { | 2990 | 70 | copy_file_keys.push_back(copy_file_key( | 2991 | 70 | {instance_id_, stage_id, table_id, file.relative_path(), file.etag()})); | 2992 | 70 | } | 2993 | 7 | std::unique_ptr<Transaction> txn; | 2994 | 7 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { | 2995 | 0 | LOG(WARNING) << "failed to create txn"; | 2996 | 0 | return -1; | 2997 | 0 | } | 2998 | | // FIXME: We have already limited the file num and file meta size when selecting file in FE. | 2999 | | // And if too many copy files, begin_copy failed commit too. So here the copy file keys are | 3000 | | // limited, should not cause the txn commit failed. | 3001 | 70 | for (const auto& key : copy_file_keys) { | 3002 | 70 | txn->remove(key); | 3003 | 70 | LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_ | 3004 | 70 | << ", stage_id=" << stage_id << ", table_id=" << table_id | 3005 | 70 | << ", query_id=" << copy_id; | 3006 | 70 | } | 3007 | 7 | txn->remove(k); | 3008 | 7 | TxnErrorCode err = txn->commit(); | 3009 | 7 | if (err != TxnErrorCode::TXN_OK) { | 3010 | 0 | LOG(WARNING) << "failed to commit txn, err=" << err; | 3011 | 0 | return -1; | 3012 | 0 | } | 3013 | | | 3014 | 7 | ++num_recycled; | 3015 | 7 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); | 3016 | 7 | return 0; | 3017 | 7 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
3018 | | |
3019 | 13 | return scan_and_recycle(key0, key1, std::move(recycle_func)); |
3020 | 13 | } |
3021 | | |
3022 | | int InstanceRecycler::init_copy_job_accessor(const std::string& stage_id, |
3023 | | const StagePB::StageType& stage_type, |
3024 | 5 | std::shared_ptr<StorageVaultAccessor>* accessor) { |
3025 | 5 | #ifdef UNIT_TEST |
3026 | | // In unit test, external use the same accessor as the internal stage |
3027 | 5 | auto it = accessor_map_.find(stage_id); |
3028 | 5 | if (it != accessor_map_.end()) { |
3029 | 3 | *accessor = it->second; |
3030 | 3 | } else { |
3031 | 2 | std::cout << "UT can not find accessor with stage_id: " << stage_id << std::endl; |
3032 | 2 | return 1; |
3033 | 2 | } |
3034 | | #else |
3035 | | // init s3 accessor and add to accessor map |
3036 | | auto stage_it = |
3037 | | std::find_if(instance_info_.stages().begin(), instance_info_.stages().end(), |
3038 | | [&stage_id](auto&& stage) { return stage.stage_id() == stage_id; }); |
3039 | | |
3040 | | if (stage_it == instance_info_.stages().end()) { |
3041 | | LOG(INFO) << "Recycle nonexisted stage copy jobs. instance_id=" << instance_id_ |
3042 | | << ", stage_id=" << stage_id << ", stage_type=" << stage_type; |
3043 | | return 1; |
3044 | | } |
3045 | | |
3046 | | const auto& object_store_info = stage_it->obj_info(); |
3047 | | auto stage_access_type = stage_it->has_access_type() ? stage_it->access_type() : StagePB::AKSK; |
3048 | | |
3049 | | S3Conf s3_conf; |
3050 | | if (stage_type == StagePB::EXTERNAL) { |
3051 | | if (stage_access_type == StagePB::AKSK) { |
3052 | | auto conf = S3Conf::from_obj_store_info(object_store_info); |
3053 | | if (!conf) { |
3054 | | return -1; |
3055 | | } |
3056 | | |
3057 | | s3_conf = std::move(*conf); |
3058 | | } else if (stage_access_type == StagePB::BUCKET_ACL) { |
3059 | | auto conf = S3Conf::from_obj_store_info(object_store_info, true /* skip_aksk */); |
3060 | | if (!conf) { |
3061 | | return -1; |
3062 | | } |
3063 | | |
3064 | | s3_conf = std::move(*conf); |
3065 | | if (instance_info_.ram_user().has_encryption_info()) { |
3066 | | AkSkPair plain_ak_sk_pair; |
3067 | | int ret = decrypt_ak_sk_helper( |
3068 | | instance_info_.ram_user().ak(), instance_info_.ram_user().sk(), |
3069 | | instance_info_.ram_user().encryption_info(), &plain_ak_sk_pair); |
3070 | | if (ret != 0) { |
3071 | | LOG(WARNING) << "fail to decrypt ak sk. instance_id: " << instance_id_ |
3072 | | << " ram_user: " << proto_to_json(instance_info_.ram_user()); |
3073 | | return -1; |
3074 | | } |
3075 | | s3_conf.ak = std::move(plain_ak_sk_pair.first); |
3076 | | s3_conf.sk = std::move(plain_ak_sk_pair.second); |
3077 | | } else { |
3078 | | s3_conf.ak = instance_info_.ram_user().ak(); |
3079 | | s3_conf.sk = instance_info_.ram_user().sk(); |
3080 | | } |
3081 | | } else { |
3082 | | LOG(INFO) << "Unsupported stage access type=" << stage_access_type |
3083 | | << ", instance_id=" << instance_id_ << ", stage_id=" << stage_id; |
3084 | | return -1; |
3085 | | } |
3086 | | } else if (stage_type == StagePB::INTERNAL) { |
3087 | | int idx = stoi(object_store_info.id()); |
3088 | | if (idx > instance_info_.obj_info().size() || idx < 1) { |
3089 | | LOG(WARNING) << "invalid idx: " << idx; |
3090 | | return -1; |
3091 | | } |
3092 | | |
3093 | | const auto& old_obj = instance_info_.obj_info()[idx - 1]; |
3094 | | auto conf = S3Conf::from_obj_store_info(old_obj); |
3095 | | if (!conf) { |
3096 | | return -1; |
3097 | | } |
3098 | | |
3099 | | s3_conf = std::move(*conf); |
3100 | | s3_conf.prefix = object_store_info.prefix(); |
3101 | | } else { |
3102 | | LOG(WARNING) << "unknown stage type " << stage_type; |
3103 | | return -1; |
3104 | | } |
3105 | | |
3106 | | std::shared_ptr<S3Accessor> s3_accessor; |
3107 | | int ret = S3Accessor::create(std::move(s3_conf), &s3_accessor); |
3108 | | if (ret != 0) { |
3109 | | LOG(WARNING) << "failed to init s3 accessor ret=" << ret; |
3110 | | return -1; |
3111 | | } |
3112 | | |
3113 | | *accessor = std::move(s3_accessor); |
3114 | | #endif |
3115 | 3 | return 0; |
3116 | 5 | } |
3117 | | |
3118 | 11 | int InstanceRecycler::recycle_stage() { |
3119 | 11 | int64_t num_scanned = 0; |
3120 | 11 | int64_t num_recycled = 0; |
3121 | 11 | const std::string task_name = "recycle_stage"; |
3122 | | |
3123 | 11 | LOG_INFO("begin to recycle stage").tag("instance_id", instance_id_); |
3124 | | |
3125 | 11 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
3126 | 11 | register_recycle_task(task_name, start_time); |
3127 | | |
3128 | 11 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { |
3129 | 11 | unregister_recycle_task(task_name); |
3130 | 11 | int64_t cost = |
3131 | 11 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
3132 | 11 | LOG_INFO("recycle stage, cost={}s", cost) |
3133 | 11 | .tag("instance_id", instance_id_) |
3134 | 11 | .tag("num_scanned", num_scanned) |
3135 | 11 | .tag("num_recycled", num_recycled); |
3136 | 11 | }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEPi Line | Count | Source | 3128 | 11 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { | 3129 | 11 | unregister_recycle_task(task_name); | 3130 | 11 | int64_t cost = | 3131 | 11 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 3132 | 11 | LOG_INFO("recycle stage, cost={}s", cost) | 3133 | 11 | .tag("instance_id", instance_id_) | 3134 | 11 | .tag("num_scanned", num_scanned) | 3135 | 11 | .tag("num_recycled", num_recycled); | 3136 | 11 | }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEPi |
3137 | | |
3138 | 11 | RecycleStageKeyInfo key_info0 {instance_id_, ""}; |
3139 | 11 | RecycleStageKeyInfo key_info1 {instance_id_, "\xff"}; |
3140 | 11 | std::string key0 = recycle_stage_key(key_info0); |
3141 | 11 | std::string key1 = recycle_stage_key(key_info1); |
3142 | | |
3143 | 11 | std::vector<std::string_view> stage_keys; |
3144 | 11 | auto recycle_func = [&start_time, &num_scanned, &num_recycled, &stage_keys, this]( |
3145 | 11 | std::string_view k, std::string_view v) -> int { |
3146 | 1 | ++num_scanned; |
3147 | 1 | RecycleStagePB recycle_stage; |
3148 | 1 | if (!recycle_stage.ParseFromArray(v.data(), v.size())) { |
3149 | 0 | LOG_WARNING("malformed recycle stage").tag("key", hex(k)); |
3150 | 0 | return -1; |
3151 | 0 | } |
3152 | | |
3153 | 1 | int idx = stoi(recycle_stage.stage().obj_info().id()); |
3154 | 1 | if (idx > instance_info_.obj_info().size() || idx < 1) { |
3155 | 0 | LOG(WARNING) << "invalid idx: " << idx; |
3156 | 0 | return -1; |
3157 | 0 | } |
3158 | | |
3159 | 1 | std::shared_ptr<StorageVaultAccessor> accessor; |
3160 | 1 | int ret = SYNC_POINT_HOOK_RETURN_VALUE( |
3161 | 1 | [&] { |
3162 | 1 | auto& old_obj = instance_info_.obj_info()[idx - 1]; |
3163 | 1 | auto s3_conf = S3Conf::from_obj_store_info(old_obj); |
3164 | 1 | if (!s3_conf) { |
3165 | 1 | return -1; |
3166 | 1 | } |
3167 | | |
3168 | 1 | s3_conf->prefix = recycle_stage.stage().obj_info().prefix(); |
3169 | 1 | std::shared_ptr<S3Accessor> s3_accessor; |
3170 | 1 | int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor); |
3171 | 1 | if (ret != 0) { |
3172 | 1 | return -1; |
3173 | 1 | } |
3174 | | |
3175 | 1 | accessor = std::move(s3_accessor); |
3176 | 1 | return 0; |
3177 | 1 | }(), |
3178 | 1 | "recycle_stage:get_accessor", &accessor); |
3179 | | |
3180 | 1 | if (ret != 0) { |
3181 | 0 | LOG(WARNING) << "failed to init accessor ret=" << ret; |
3182 | 0 | return ret; |
3183 | 0 | } |
3184 | | |
3185 | 1 | LOG_INFO("begin to delete objects of dropped internal stage") |
3186 | 1 | .tag("instance_id", instance_id_) |
3187 | 1 | .tag("stage_id", recycle_stage.stage().stage_id()) |
3188 | 1 | .tag("user_name", recycle_stage.stage().mysql_user_name()[0]) |
3189 | 1 | .tag("user_id", recycle_stage.stage().mysql_user_id()[0]) |
3190 | 1 | .tag("obj_info_id", idx) |
3191 | 1 | .tag("prefix", recycle_stage.stage().obj_info().prefix()); |
3192 | 1 | ret = accessor->delete_all(); |
3193 | 1 | if (ret != 0) { |
3194 | 0 | LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id=" |
3195 | 0 | << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id() |
3196 | 0 | << ", prefix=" << recycle_stage.stage().obj_info().prefix() |
3197 | 0 | << ", ret=" << ret; |
3198 | 0 | return -1; |
3199 | 0 | } |
3200 | 1 | ++num_recycled; |
3201 | 1 | check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time); |
3202 | 1 | stage_keys.push_back(k); |
3203 | 1 | return 0; |
3204 | 1 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 3145 | 1 | std::string_view k, std::string_view v) -> int { | 3146 | 1 | ++num_scanned; | 3147 | 1 | RecycleStagePB recycle_stage; | 3148 | 1 | if (!recycle_stage.ParseFromArray(v.data(), v.size())) { | 3149 | 0 | LOG_WARNING("malformed recycle stage").tag("key", hex(k)); | 3150 | 0 | return -1; | 3151 | 0 | } | 3152 | | | 3153 | 1 | int idx = stoi(recycle_stage.stage().obj_info().id()); | 3154 | 1 | if (idx > instance_info_.obj_info().size() || idx < 1) { | 3155 | 0 | LOG(WARNING) << "invalid idx: " << idx; | 3156 | 0 | return -1; | 3157 | 0 | } | 3158 | | | 3159 | 1 | std::shared_ptr<StorageVaultAccessor> accessor; | 3160 | 1 | int ret = SYNC_POINT_HOOK_RETURN_VALUE( | 3161 | 1 | [&] { | 3162 | 1 | auto& old_obj = instance_info_.obj_info()[idx - 1]; | 3163 | 1 | auto s3_conf = S3Conf::from_obj_store_info(old_obj); | 3164 | 1 | if (!s3_conf) { | 3165 | 1 | return -1; | 3166 | 1 | } | 3167 | | | 3168 | 1 | s3_conf->prefix = recycle_stage.stage().obj_info().prefix(); | 3169 | 1 | std::shared_ptr<S3Accessor> s3_accessor; | 3170 | 1 | int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor); | 3171 | 1 | if (ret != 0) { | 3172 | 1 | return -1; | 3173 | 1 | } | 3174 | | | 3175 | 1 | accessor = std::move(s3_accessor); | 3176 | 1 | return 0; | 3177 | 1 | }(), | 3178 | 1 | "recycle_stage:get_accessor", &accessor); | 3179 | | | 3180 | 1 | if (ret != 0) { | 3181 | 0 | LOG(WARNING) << "failed to init accessor ret=" << ret; | 3182 | 0 | return ret; | 3183 | 0 | } | 3184 | | | 3185 | 1 | LOG_INFO("begin to delete objects of dropped internal stage") | 3186 | 1 | .tag("instance_id", instance_id_) | 3187 | 1 | .tag("stage_id", recycle_stage.stage().stage_id()) | 3188 | 1 | .tag("user_name", recycle_stage.stage().mysql_user_name()[0]) | 3189 | 1 | .tag("user_id", recycle_stage.stage().mysql_user_id()[0]) | 3190 | 1 | .tag("obj_info_id", idx) | 3191 | 1 | .tag("prefix", recycle_stage.stage().obj_info().prefix()); | 3192 | 1 | ret = accessor->delete_all(); | 3193 | 1 | if (ret != 0) { | 3194 | 0 | LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id=" | 3195 | 0 | << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id() | 3196 | 0 | << ", prefix=" << recycle_stage.stage().obj_info().prefix() | 3197 | 0 | << ", ret=" << ret; | 3198 | 0 | return -1; | 3199 | 0 | } | 3200 | 1 | ++num_recycled; | 3201 | 1 | check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time); | 3202 | 1 | stage_keys.push_back(k); | 3203 | 1 | return 0; | 3204 | 1 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
3205 | | |
3206 | 11 | auto loop_done = [&stage_keys, this]() -> int { |
3207 | 1 | if (stage_keys.empty()) return 0; |
3208 | 1 | std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, |
3209 | 1 | [&](int*) { stage_keys.clear(); }); recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clEvENKUlPiE_clES3_ Line | Count | Source | 3209 | 1 | [&](int*) { stage_keys.clear(); }); |
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clEvENKUlPiE_clES3_ |
3210 | 1 | if (0 != txn_remove(txn_kv_.get(), stage_keys)) { |
3211 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_; |
3212 | 0 | return -1; |
3213 | 0 | } |
3214 | 1 | return 0; |
3215 | 1 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clEv Line | Count | Source | 3206 | 1 | auto loop_done = [&stage_keys, this]() -> int { | 3207 | 1 | if (stage_keys.empty()) return 0; | 3208 | 1 | std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, | 3209 | 1 | [&](int*) { stage_keys.clear(); }); | 3210 | 1 | if (0 != txn_remove(txn_kv_.get(), stage_keys)) { | 3211 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_; | 3212 | 0 | return -1; | 3213 | 0 | } | 3214 | 1 | return 0; | 3215 | 1 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clEv |
3216 | | |
3217 | 11 | return scan_and_recycle(key0, key1, std::move(recycle_func), std::move(loop_done)); |
3218 | 11 | } |
3219 | | |
3220 | 10 | int InstanceRecycler::recycle_expired_stage_objects() { |
3221 | 10 | LOG_INFO("begin to recycle expired stage objects").tag("instance_id", instance_id_); |
3222 | | |
3223 | 10 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
3224 | | |
3225 | 10 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { |
3226 | 10 | int64_t cost = |
3227 | 10 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
3228 | 10 | LOG_INFO("recycle expired stage objects, cost={}s", cost).tag("instance_id", instance_id_); |
3229 | 10 | }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEPi Line | Count | Source | 3225 | 10 | std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { | 3226 | 10 | int64_t cost = | 3227 | 10 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 3228 | 10 | LOG_INFO("recycle expired stage objects, cost={}s", cost).tag("instance_id", instance_id_); | 3229 | 10 | }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEPi |
3230 | 10 | int ret = 0; |
3231 | 10 | for (const auto& stage : instance_info_.stages()) { |
3232 | 0 | std::stringstream ss; |
3233 | 0 | ss << "instance_id=" << instance_id_ << ", stage_id=" << stage.stage_id() << ", user_name=" |
3234 | 0 | << (stage.mysql_user_name().empty() ? "null" : stage.mysql_user_name().at(0)) |
3235 | 0 | << ", user_id=" << (stage.mysql_user_id().empty() ? "null" : stage.mysql_user_id().at(0)) |
3236 | 0 | << ", prefix=" << stage.obj_info().prefix(); |
3237 | |
|
3238 | 0 | if (stopped()) break; |
3239 | 0 | if (stage.type() == StagePB::EXTERNAL) { |
3240 | 0 | continue; |
3241 | 0 | } |
3242 | 0 | int idx = stoi(stage.obj_info().id()); |
3243 | 0 | if (idx > instance_info_.obj_info().size() || idx < 1) { |
3244 | 0 | LOG(WARNING) << "invalid idx: " << idx << ", id: " << stage.obj_info().id(); |
3245 | 0 | continue; |
3246 | 0 | } |
3247 | | |
3248 | 0 | const auto& old_obj = instance_info_.obj_info()[idx - 1]; |
3249 | 0 | auto s3_conf = S3Conf::from_obj_store_info(old_obj); |
3250 | 0 | if (!s3_conf) { |
3251 | 0 | LOG(WARNING) << "failed to init s3_conf with obj_info=" << old_obj.ShortDebugString(); |
3252 | 0 | continue; |
3253 | 0 | } |
3254 | | |
3255 | 0 | s3_conf->prefix = stage.obj_info().prefix(); |
3256 | 0 | std::shared_ptr<S3Accessor> accessor; |
3257 | 0 | int ret1 = S3Accessor::create(*s3_conf, &accessor); |
3258 | 0 | if (ret1 != 0) { |
3259 | 0 | LOG(WARNING) << "failed to init s3 accessor ret=" << ret1 << " " << ss.str(); |
3260 | 0 | ret = -1; |
3261 | 0 | continue; |
3262 | 0 | } |
3263 | | |
3264 | 0 | if (s3_conf->prefix.find("/stage/") == std::string::npos) { |
3265 | 0 | LOG(WARNING) << "try to delete illegal prefix, which is catastrophic, " << ss.str(); |
3266 | 0 | ret = -1; |
3267 | 0 | continue; |
3268 | 0 | } |
3269 | | |
3270 | 0 | LOG(INFO) << "recycle expired stage objects, " << ss.str(); |
3271 | 0 | int64_t expiration_time = |
3272 | 0 | duration_cast<seconds>(system_clock::now().time_since_epoch()).count() - |
3273 | 0 | config::internal_stage_objects_expire_time_second; |
3274 | 0 | if (config::force_immediate_recycle) { |
3275 | 0 | expiration_time = INT64_MAX; |
3276 | 0 | } |
3277 | 0 | ret1 = accessor->delete_all(expiration_time); |
3278 | 0 | if (ret1 != 0) { |
3279 | 0 | LOG(WARNING) << "failed to recycle expired stage objects, ret=" << ret1 << " " |
3280 | 0 | << ss.str(); |
3281 | 0 | ret = -1; |
3282 | 0 | continue; |
3283 | 0 | } |
3284 | 0 | } |
3285 | 10 | return ret; |
3286 | 10 | } |
3287 | | |
3288 | 121 | void InstanceRecycler::register_recycle_task(const std::string& task_name, int64_t start_time) { |
3289 | 121 | std::lock_guard lock(recycle_tasks_mutex); |
3290 | 121 | running_recycle_tasks[task_name] = start_time; |
3291 | 121 | } |
3292 | | |
3293 | 121 | void InstanceRecycler::unregister_recycle_task(const std::string& task_name) { |
3294 | 121 | std::lock_guard lock(recycle_tasks_mutex); |
3295 | 121 | DCHECK(running_recycle_tasks[task_name] > 0); |
3296 | 121 | running_recycle_tasks.erase(task_name); |
3297 | 121 | } |
3298 | | |
3299 | 21 | bool InstanceRecycler::check_recycle_tasks() { |
3300 | 21 | std::map<std::string, int64_t> tmp_running_recycle_tasks; |
3301 | 21 | { |
3302 | 21 | std::lock_guard lock(recycle_tasks_mutex); |
3303 | 21 | tmp_running_recycle_tasks = running_recycle_tasks; |
3304 | 21 | } |
3305 | | |
3306 | 21 | bool found = false; |
3307 | 21 | int64_t now = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
3308 | 21 | for (auto& [task_name, start_time] : tmp_running_recycle_tasks) { |
3309 | 20 | int64_t cost = now - start_time; |
3310 | 20 | if (cost > config::recycle_task_threshold_seconds) [[unlikely]] { |
3311 | 20 | LOG_INFO("recycle task cost too much time cost={}s", cost) |
3312 | 20 | .tag("instance_id", instance_id_) |
3313 | 20 | .tag("task", task_name); |
3314 | 20 | found = true; |
3315 | 20 | } |
3316 | 20 | } |
3317 | | |
3318 | 21 | return found; |
3319 | 21 | } |
3320 | | |
3321 | | } // namespace doris::cloud |