/root/doris/cloud/src/recycler/recycler.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "recycler/recycler.h" |
19 | | |
20 | | #include <brpc/builtin_service.pb.h> |
21 | | #include <brpc/server.h> |
22 | | #include <butil/endpoint.h> |
23 | | #include <butil/strings/string_split.h> |
24 | | #include <bvar/status.h> |
25 | | #include <gen_cpp/cloud.pb.h> |
26 | | #include <gen_cpp/olap_file.pb.h> |
27 | | |
28 | | #include <algorithm> |
29 | | #include <atomic> |
30 | | #include <chrono> |
31 | | #include <cstddef> |
32 | | #include <cstdint> |
33 | | #include <cstdlib> |
34 | | #include <deque> |
35 | | #include <initializer_list> |
36 | | #include <memory> |
37 | | #include <numeric> |
38 | | #include <string> |
39 | | #include <string_view> |
40 | | #include <utility> |
41 | | |
42 | | #include "common/defer.h" |
43 | | #include "common/stopwatch.h" |
44 | | #include "meta-service/meta_service.h" |
45 | | #include "meta-service/meta_service_helper.h" |
46 | | #include "meta-service/meta_service_schema.h" |
47 | | #include "meta-store/blob_message.h" |
48 | | #include "meta-store/meta_reader.h" |
49 | | #include "meta-store/txn_kv.h" |
50 | | #include "meta-store/txn_kv_error.h" |
51 | | #include "meta-store/versioned_value.h" |
52 | | #include "recycler/checker.h" |
53 | | #ifdef ENABLE_HDFS_STORAGE_VAULT |
54 | | #include "recycler/hdfs_accessor.h" |
55 | | #endif |
56 | | #include "recycler/s3_accessor.h" |
57 | | #include "recycler/storage_vault_accessor.h" |
58 | | #ifdef UNIT_TEST |
59 | | #include "../test/mock_accessor.h" |
60 | | #endif |
61 | | #include "common/bvars.h" |
62 | | #include "common/config.h" |
63 | | #include "common/encryption_util.h" |
64 | | #include "common/logging.h" |
65 | | #include "common/simple_thread_pool.h" |
66 | | #include "common/util.h" |
67 | | #include "cpp/sync_point.h" |
68 | | #include "meta-store/codec.h" |
69 | | #include "meta-store/keys.h" |
70 | | #include "recycler/recycler_service.h" |
71 | | #include "recycler/sync_executor.h" |
72 | | #include "recycler/util.h" |
73 | | |
74 | | namespace doris::cloud { |
75 | | |
76 | | using namespace std::chrono; |
77 | | |
78 | | // return 0 for success get a key, 1 for key not found, negative for error |
79 | 0 | [[maybe_unused]] static int txn_get(TxnKv* txn_kv, std::string_view key, std::string& val) { |
80 | 0 | std::unique_ptr<Transaction> txn; |
81 | 0 | TxnErrorCode err = txn_kv->create_txn(&txn); |
82 | 0 | if (err != TxnErrorCode::TXN_OK) { |
83 | 0 | return -1; |
84 | 0 | } |
85 | 0 | switch (txn->get(key, &val, true)) { |
86 | 0 | case TxnErrorCode::TXN_OK: |
87 | 0 | return 0; |
88 | 0 | case TxnErrorCode::TXN_KEY_NOT_FOUND: |
89 | 0 | return 1; |
90 | 0 | default: |
91 | 0 | return -1; |
92 | 0 | }; |
93 | 0 | } Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE |
94 | | |
95 | | // 0 for success, negative for error |
96 | | static int txn_get(TxnKv* txn_kv, std::string_view begin, std::string_view end, |
97 | 270 | std::unique_ptr<RangeGetIterator>& it) { |
98 | 270 | std::unique_ptr<Transaction> txn; |
99 | 270 | TxnErrorCode err = txn_kv->create_txn(&txn); |
100 | 270 | if (err != TxnErrorCode::TXN_OK) { |
101 | 0 | return -1; |
102 | 0 | } |
103 | 270 | switch (txn->get(begin, end, &it, true)) { |
104 | 270 | case TxnErrorCode::TXN_OK: |
105 | 270 | return 0; |
106 | 0 | case TxnErrorCode::TXN_KEY_NOT_FOUND: |
107 | 0 | return 1; |
108 | 0 | default: |
109 | 0 | return -1; |
110 | 270 | }; |
111 | 0 | } recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE Line | Count | Source | 97 | 251 | std::unique_ptr<RangeGetIterator>& it) { | 98 | 251 | std::unique_ptr<Transaction> txn; | 99 | 251 | TxnErrorCode err = txn_kv->create_txn(&txn); | 100 | 251 | if (err != TxnErrorCode::TXN_OK) { | 101 | 0 | return -1; | 102 | 0 | } | 103 | 251 | switch (txn->get(begin, end, &it, true)) { | 104 | 251 | case TxnErrorCode::TXN_OK: | 105 | 251 | return 0; | 106 | 0 | case TxnErrorCode::TXN_KEY_NOT_FOUND: | 107 | 0 | return 1; | 108 | 0 | default: | 109 | 0 | return -1; | 110 | 251 | }; | 111 | 0 | } |
recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE Line | Count | Source | 97 | 19 | std::unique_ptr<RangeGetIterator>& it) { | 98 | 19 | std::unique_ptr<Transaction> txn; | 99 | 19 | TxnErrorCode err = txn_kv->create_txn(&txn); | 100 | 19 | if (err != TxnErrorCode::TXN_OK) { | 101 | 0 | return -1; | 102 | 0 | } | 103 | 19 | switch (txn->get(begin, end, &it, true)) { | 104 | 19 | case TxnErrorCode::TXN_OK: | 105 | 19 | return 0; | 106 | 0 | case TxnErrorCode::TXN_KEY_NOT_FOUND: | 107 | 0 | return 1; | 108 | 0 | default: | 109 | 0 | return -1; | 110 | 19 | }; | 111 | 0 | } |
|
112 | | |
113 | | // return 0 for success otherwise error |
114 | 6 | static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) { |
115 | 6 | std::unique_ptr<Transaction> txn; |
116 | 6 | TxnErrorCode err = txn_kv->create_txn(&txn); |
117 | 6 | if (err != TxnErrorCode::TXN_OK) { |
118 | 0 | return -1; |
119 | 0 | } |
120 | 10 | for (auto k : keys) { |
121 | 10 | txn->remove(k); |
122 | 10 | } |
123 | 6 | switch (txn->commit()) { |
124 | 6 | case TxnErrorCode::TXN_OK: |
125 | 6 | return 0; |
126 | 0 | case TxnErrorCode::TXN_CONFLICT: |
127 | 0 | return -1; |
128 | 0 | default: |
129 | 0 | return -1; |
130 | 6 | } |
131 | 6 | } recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE Line | Count | Source | 114 | 5 | static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) { | 115 | 5 | std::unique_ptr<Transaction> txn; | 116 | 5 | TxnErrorCode err = txn_kv->create_txn(&txn); | 117 | 5 | if (err != TxnErrorCode::TXN_OK) { | 118 | 0 | return -1; | 119 | 0 | } | 120 | 9 | for (auto k : keys) { | 121 | 9 | txn->remove(k); | 122 | 9 | } | 123 | 5 | switch (txn->commit()) { | 124 | 5 | case TxnErrorCode::TXN_OK: | 125 | 5 | return 0; | 126 | 0 | case TxnErrorCode::TXN_CONFLICT: | 127 | 0 | return -1; | 128 | 0 | default: | 129 | 0 | return -1; | 130 | 5 | } | 131 | 5 | } |
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE Line | Count | Source | 114 | 1 | static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) { | 115 | 1 | std::unique_ptr<Transaction> txn; | 116 | 1 | TxnErrorCode err = txn_kv->create_txn(&txn); | 117 | 1 | if (err != TxnErrorCode::TXN_OK) { | 118 | 0 | return -1; | 119 | 0 | } | 120 | 1 | for (auto k : keys) { | 121 | 1 | txn->remove(k); | 122 | 1 | } | 123 | 1 | switch (txn->commit()) { | 124 | 1 | case TxnErrorCode::TXN_OK: | 125 | 1 | return 0; | 126 | 0 | case TxnErrorCode::TXN_CONFLICT: | 127 | 0 | return -1; | 128 | 0 | default: | 129 | 0 | return -1; | 130 | 1 | } | 131 | 1 | } |
|
132 | | |
133 | | // return 0 for success otherwise error |
134 | 54 | static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) { |
135 | 54 | std::unique_ptr<Transaction> txn; |
136 | 54 | TxnErrorCode err = txn_kv->create_txn(&txn); |
137 | 54 | if (err != TxnErrorCode::TXN_OK) { |
138 | 0 | return -1; |
139 | 0 | } |
140 | 109k | for (auto& k : keys) { |
141 | 109k | txn->remove(k); |
142 | 109k | } |
143 | 54 | switch (txn->commit()) { |
144 | 54 | case TxnErrorCode::TXN_OK: |
145 | 54 | return 0; |
146 | 0 | case TxnErrorCode::TXN_CONFLICT: |
147 | 0 | return -1; |
148 | 0 | default: |
149 | 0 | return -1; |
150 | 54 | } |
151 | 54 | } recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE Line | Count | Source | 134 | 48 | static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) { | 135 | 48 | std::unique_ptr<Transaction> txn; | 136 | 48 | TxnErrorCode err = txn_kv->create_txn(&txn); | 137 | 48 | if (err != TxnErrorCode::TXN_OK) { | 138 | 0 | return -1; | 139 | 0 | } | 140 | 105k | for (auto& k : keys) { | 141 | 105k | txn->remove(k); | 142 | 105k | } | 143 | 48 | switch (txn->commit()) { | 144 | 48 | case TxnErrorCode::TXN_OK: | 145 | 48 | return 0; | 146 | 0 | case TxnErrorCode::TXN_CONFLICT: | 147 | 0 | return -1; | 148 | 0 | default: | 149 | 0 | return -1; | 150 | 48 | } | 151 | 48 | } |
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE Line | Count | Source | 134 | 6 | static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) { | 135 | 6 | std::unique_ptr<Transaction> txn; | 136 | 6 | TxnErrorCode err = txn_kv->create_txn(&txn); | 137 | 6 | if (err != TxnErrorCode::TXN_OK) { | 138 | 0 | return -1; | 139 | 0 | } | 140 | 4.00k | for (auto& k : keys) { | 141 | 4.00k | txn->remove(k); | 142 | 4.00k | } | 143 | 6 | switch (txn->commit()) { | 144 | 6 | case TxnErrorCode::TXN_OK: | 145 | 6 | return 0; | 146 | 0 | case TxnErrorCode::TXN_CONFLICT: | 147 | 0 | return -1; | 148 | 0 | default: | 149 | 0 | return -1; | 150 | 6 | } | 151 | 6 | } |
|
152 | | |
153 | | // return 0 for success otherwise error |
154 | | [[maybe_unused]] static int txn_remove(TxnKv* txn_kv, std::string_view begin, |
155 | 106k | std::string_view end) { |
156 | 106k | std::unique_ptr<Transaction> txn; |
157 | 106k | TxnErrorCode err = txn_kv->create_txn(&txn); |
158 | 106k | if (err != TxnErrorCode::TXN_OK) { |
159 | 0 | return -1; |
160 | 0 | } |
161 | 106k | txn->remove(begin, end); |
162 | 106k | switch (txn->commit()) { |
163 | 106k | case TxnErrorCode::TXN_OK: |
164 | 106k | return 0; |
165 | 0 | case TxnErrorCode::TXN_CONFLICT: |
166 | 0 | return -1; |
167 | 0 | default: |
168 | 0 | return -1; |
169 | 106k | } |
170 | 106k | } recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 155 | 106k | std::string_view end) { | 156 | 106k | std::unique_ptr<Transaction> txn; | 157 | 106k | TxnErrorCode err = txn_kv->create_txn(&txn); | 158 | 106k | if (err != TxnErrorCode::TXN_OK) { | 159 | 0 | return -1; | 160 | 0 | } | 161 | 106k | txn->remove(begin, end); | 162 | 106k | switch (txn->commit()) { | 163 | 106k | case TxnErrorCode::TXN_OK: | 164 | 106k | return 0; | 165 | 0 | case TxnErrorCode::TXN_CONFLICT: | 166 | 0 | return -1; | 167 | 0 | default: | 168 | 0 | return -1; | 169 | 106k | } | 170 | 106k | } |
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_ |
171 | | |
172 | | void scan_restore_job_rowset( |
173 | | Transaction* txn, const std::string& instance_id, int64_t tablet_id, MetaServiceCode& code, |
174 | | std::string& msg, |
175 | | std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>>* restore_job_rs_metas); |
176 | | |
177 | | static inline void check_recycle_task(const std::string& instance_id, const std::string& task_name, |
178 | | int64_t num_scanned, int64_t num_recycled, |
179 | 52 | int64_t start_time) { |
180 | 52 | if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] { |
181 | 0 | int64_t cost = |
182 | 0 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
183 | 0 | if (cost > config::recycle_task_threshold_seconds) { |
184 | 0 | LOG_WARNING("recycle task cost too much time cost={}s", cost) |
185 | 0 | .tag("instance_id", instance_id) |
186 | 0 | .tag("task", task_name) |
187 | 0 | .tag("num_scanned", num_scanned) |
188 | 0 | .tag("num_recycled", num_recycled); |
189 | 0 | } |
190 | 0 | } |
191 | 52 | return; |
192 | 52 | } recycler_test.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll Line | Count | Source | 179 | 50 | int64_t start_time) { | 180 | 50 | if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] { | 181 | 0 | int64_t cost = | 182 | 0 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 183 | 0 | if (cost > config::recycle_task_threshold_seconds) { | 184 | 0 | LOG_WARNING("recycle task cost too much time cost={}s", cost) | 185 | 0 | .tag("instance_id", instance_id) | 186 | 0 | .tag("task", task_name) | 187 | 0 | .tag("num_scanned", num_scanned) | 188 | 0 | .tag("num_recycled", num_recycled); | 189 | 0 | } | 190 | 0 | } | 191 | 50 | return; | 192 | 50 | } |
recycler.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll Line | Count | Source | 179 | 2 | int64_t start_time) { | 180 | 2 | if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] { | 181 | 0 | int64_t cost = | 182 | 0 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 183 | 0 | if (cost > config::recycle_task_threshold_seconds) { | 184 | 0 | LOG_WARNING("recycle task cost too much time cost={}s", cost) | 185 | 0 | .tag("instance_id", instance_id) | 186 | 0 | .tag("task", task_name) | 187 | 0 | .tag("num_scanned", num_scanned) | 188 | 0 | .tag("num_recycled", num_recycled); | 189 | 0 | } | 190 | 0 | } | 191 | 2 | return; | 192 | 2 | } |
|
193 | | |
194 | 4 | Recycler::Recycler(std::shared_ptr<TxnKv> txn_kv) : txn_kv_(std::move(txn_kv)) { |
195 | 4 | ip_port_ = std::string(butil::my_ip_cstr()) + ":" + std::to_string(config::brpc_listen_port); |
196 | | |
197 | 4 | auto s3_producer_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism, |
198 | 4 | "s3_producer_pool"); |
199 | 4 | s3_producer_pool->start(); |
200 | 4 | auto recycle_tablet_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism, |
201 | 4 | "recycle_tablet_pool"); |
202 | 4 | recycle_tablet_pool->start(); |
203 | 4 | auto group_recycle_function_pool = std::make_shared<SimpleThreadPool>( |
204 | 4 | config::recycle_pool_parallelism, "group_recycle_function_pool"); |
205 | 4 | group_recycle_function_pool->start(); |
206 | 4 | _thread_pool_group = |
207 | 4 | RecyclerThreadPoolGroup(std::move(s3_producer_pool), std::move(recycle_tablet_pool), |
208 | 4 | std::move(group_recycle_function_pool)); |
209 | | |
210 | 4 | auto resource_mgr = std::make_shared<ResourceManager>(txn_kv_); |
211 | 4 | txn_lazy_committer_ = std::make_shared<TxnLazyCommitter>(txn_kv_, std::move(resource_mgr)); |
212 | 4 | snapshot_manager_ = std::make_shared<SnapshotManager>(txn_kv_); |
213 | 4 | } |
214 | | |
215 | 4 | Recycler::~Recycler() { |
216 | 4 | if (!stopped()) { |
217 | 0 | stop(); |
218 | 0 | } |
219 | 4 | } |
220 | | |
221 | 4 | void Recycler::instance_scanner_callback() { |
222 | | // sleep 60 seconds before scheduling for the launch procedure to complete: |
223 | | // some bad hdfs connection may cause some log to stdout stderr |
224 | | // which may pollute .out file and affect the script to check success |
225 | 4 | std::this_thread::sleep_for( |
226 | 4 | std::chrono::seconds(config::recycler_sleep_before_scheduling_seconds)); |
227 | 8 | while (!stopped()) { |
228 | 4 | std::vector<InstanceInfoPB> instances; |
229 | 4 | get_all_instances(txn_kv_.get(), instances); |
230 | | // TODO(plat1ko): delete job recycle kv of non-existent instances |
231 | 4 | LOG(INFO) << "Recycler get instances: " << [&instances] { |
232 | 4 | std::stringstream ss; |
233 | 30 | for (auto& i : instances) ss << ' ' << i.instance_id(); |
234 | 4 | return ss.str(); |
235 | 4 | }(); recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev Line | Count | Source | 231 | 4 | LOG(INFO) << "Recycler get instances: " << [&instances] { | 232 | 4 | std::stringstream ss; | 233 | 30 | for (auto& i : instances) ss << ' ' << i.instance_id(); | 234 | 4 | return ss.str(); | 235 | 4 | }(); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev |
236 | 4 | if (!instances.empty()) { |
237 | | // enqueue instances |
238 | 3 | std::lock_guard lock(mtx_); |
239 | 30 | for (auto& instance : instances) { |
240 | 30 | if (instance_filter_.filter_out(instance.instance_id())) continue; |
241 | 30 | auto [_, success] = pending_instance_set_.insert(instance.instance_id()); |
242 | | // skip instance already in pending queue |
243 | 30 | if (success) { |
244 | 30 | pending_instance_queue_.push_back(std::move(instance)); |
245 | 30 | } |
246 | 30 | } |
247 | 3 | pending_instance_cond_.notify_all(); |
248 | 3 | } |
249 | 4 | { |
250 | 4 | std::unique_lock lock(mtx_); |
251 | 4 | notifier_.wait_for(lock, std::chrono::seconds(config::recycle_interval_seconds), |
252 | 7 | [&]() { return stopped(); }); recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv Line | Count | Source | 252 | 7 | [&]() { return stopped(); }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv |
253 | 4 | } |
254 | 4 | } |
255 | 4 | } |
256 | | |
257 | 8 | void Recycler::recycle_callback() { |
258 | 38 | while (!stopped()) { |
259 | 36 | InstanceInfoPB instance; |
260 | 36 | { |
261 | 36 | std::unique_lock lock(mtx_); |
262 | 36 | pending_instance_cond_.wait( |
263 | 48 | lock, [&]() { return !pending_instance_queue_.empty() || stopped(); }); recycler_test.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv Line | Count | Source | 263 | 48 | lock, [&]() { return !pending_instance_queue_.empty() || stopped(); }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv |
264 | 36 | if (stopped()) { |
265 | 6 | return; |
266 | 6 | } |
267 | 30 | instance = std::move(pending_instance_queue_.front()); |
268 | 30 | pending_instance_queue_.pop_front(); |
269 | 30 | pending_instance_set_.erase(instance.instance_id()); |
270 | 30 | } |
271 | 0 | auto& instance_id = instance.instance_id(); |
272 | 30 | { |
273 | 30 | std::lock_guard lock(mtx_); |
274 | | // skip instance in recycling |
275 | 30 | if (recycling_instance_map_.count(instance_id)) continue; |
276 | 30 | } |
277 | 30 | auto instance_recycler = std::make_shared<InstanceRecycler>( |
278 | 30 | txn_kv_, instance, _thread_pool_group, txn_lazy_committer_); |
279 | | |
280 | 30 | if (int r = instance_recycler->init(); r != 0) { |
281 | 0 | LOG(WARNING) << "failed to init instance recycler, instance_id=" << instance_id |
282 | 0 | << " ret=" << r; |
283 | 0 | continue; |
284 | 0 | } |
285 | 30 | std::string recycle_job_key; |
286 | 30 | job_recycle_key({instance_id}, &recycle_job_key); |
287 | 30 | int ret = prepare_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id, |
288 | 30 | ip_port_, config::recycle_interval_seconds * 1000); |
289 | 30 | if (ret != 0) { // Prepare failed |
290 | 20 | LOG(WARNING) << "failed to prepare recycle_job, instance_id=" << instance_id |
291 | 20 | << " ret=" << ret; |
292 | 20 | continue; |
293 | 20 | } else { |
294 | 10 | std::lock_guard lock(mtx_); |
295 | 10 | recycling_instance_map_.emplace(instance_id, instance_recycler); |
296 | 10 | } |
297 | 10 | if (stopped()) return; |
298 | 10 | LOG_WARNING("begin to recycle instance").tag("instance_id", instance_id); |
299 | 10 | auto ctime_ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
300 | 10 | g_bvar_recycler_instance_recycle_start_ts.put({instance_id}, ctime_ms); |
301 | 10 | g_bvar_recycler_instance_recycle_task_status.put({"submitted"}, 1); |
302 | 10 | ret = instance_recycler->do_recycle(); |
303 | | // If instance recycler has been aborted, don't finish this job |
304 | | |
305 | 10 | if (!instance_recycler->stopped()) { |
306 | 10 | finish_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id, ip_port_, |
307 | 10 | ret == 0, ctime_ms); |
308 | 10 | } |
309 | 10 | if (instance_recycler->stopped() || ret != 0) { |
310 | 0 | g_bvar_recycler_instance_recycle_task_status.put({"error"}, 1); |
311 | 0 | } |
312 | 10 | { |
313 | 10 | std::lock_guard lock(mtx_); |
314 | 10 | recycling_instance_map_.erase(instance_id); |
315 | 10 | } |
316 | | |
317 | 10 | auto now = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
318 | 10 | auto elpased_ms = now - ctime_ms; |
319 | 10 | g_bvar_recycler_instance_recycle_end_ts.put({instance_id}, now); |
320 | 10 | g_bvar_recycler_instance_last_round_recycle_duration.put({instance_id}, elpased_ms); |
321 | 10 | g_bvar_recycler_instance_next_ts.put({instance_id}, |
322 | 10 | now + config::recycle_interval_seconds * 1000); |
323 | 10 | g_bvar_recycler_instance_recycle_task_status.put({"completed"}, 1); |
324 | 10 | LOG(INFO) << "recycle instance done, " |
325 | 10 | << "instance_id=" << instance_id << " ret=" << ret << " ctime_ms: " << ctime_ms |
326 | 10 | << " now: " << now; |
327 | | |
328 | 10 | g_bvar_recycler_instance_recycle_last_success_ts.put({instance_id}, now); |
329 | | |
330 | 10 | LOG_WARNING("finish recycle instance") |
331 | 10 | .tag("instance_id", instance_id) |
332 | 10 | .tag("cost_ms", elpased_ms); |
333 | 10 | } |
334 | 8 | } |
335 | | |
336 | 4 | void Recycler::lease_recycle_jobs() { |
337 | 54 | while (!stopped()) { |
338 | 50 | std::vector<std::string> instances; |
339 | 50 | instances.reserve(recycling_instance_map_.size()); |
340 | 50 | { |
341 | 50 | std::lock_guard lock(mtx_); |
342 | 50 | for (auto& [id, _] : recycling_instance_map_) { |
343 | 30 | instances.push_back(id); |
344 | 30 | } |
345 | 50 | } |
346 | 50 | for (auto& i : instances) { |
347 | 30 | std::string recycle_job_key; |
348 | 30 | job_recycle_key({i}, &recycle_job_key); |
349 | 30 | int ret = lease_instance_recycle_job(txn_kv_.get(), recycle_job_key, i, ip_port_); |
350 | 30 | if (ret == 1) { |
351 | 0 | std::lock_guard lock(mtx_); |
352 | 0 | if (auto it = recycling_instance_map_.find(i); |
353 | 0 | it != recycling_instance_map_.end()) { |
354 | 0 | it->second->stop(); |
355 | 0 | } |
356 | 0 | } |
357 | 30 | } |
358 | 50 | { |
359 | 50 | std::unique_lock lock(mtx_); |
360 | 50 | notifier_.wait_for(lock, |
361 | 50 | std::chrono::milliseconds(config::recycle_job_lease_expired_ms / 3), |
362 | 100 | [&]() { return stopped(); }); recycler_test.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv Line | Count | Source | 362 | 100 | [&]() { return stopped(); }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv |
363 | 50 | } |
364 | 50 | } |
365 | 4 | } |
366 | | |
367 | 4 | void Recycler::check_recycle_tasks() { |
368 | 7 | while (!stopped()) { |
369 | 3 | std::unordered_map<std::string, std::shared_ptr<InstanceRecycler>> recycling_instance_map; |
370 | 3 | { |
371 | 3 | std::lock_guard lock(mtx_); |
372 | 3 | recycling_instance_map = recycling_instance_map_; |
373 | 3 | } |
374 | 3 | for (auto& entry : recycling_instance_map) { |
375 | 0 | entry.second->check_recycle_tasks(); |
376 | 0 | } |
377 | | |
378 | 3 | std::unique_lock lock(mtx_); |
379 | 3 | notifier_.wait_for(lock, std::chrono::seconds(config::check_recycle_task_interval_seconds), |
380 | 6 | [&]() { return stopped(); }); recycler_test.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv Line | Count | Source | 380 | 6 | [&]() { return stopped(); }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv |
381 | 3 | } |
382 | 4 | } |
383 | | |
384 | 4 | int Recycler::start(brpc::Server* server) { |
385 | 4 | instance_filter_.reset(config::recycle_whitelist, config::recycle_blacklist); |
386 | 4 | g_bvar_recycler_task_max_concurrency.set_value(config::recycle_concurrency); |
387 | 4 | S3Environment::getInstance(); |
388 | | |
389 | 4 | if (config::enable_checker) { |
390 | 0 | checker_ = std::make_unique<Checker>(txn_kv_); |
391 | 0 | int ret = checker_->start(); |
392 | 0 | std::string msg; |
393 | 0 | if (ret != 0) { |
394 | 0 | msg = "failed to start checker"; |
395 | 0 | LOG(ERROR) << msg; |
396 | 0 | std::cerr << msg << std::endl; |
397 | 0 | return ret; |
398 | 0 | } |
399 | 0 | msg = "checker started"; |
400 | 0 | LOG(INFO) << msg; |
401 | 0 | std::cout << msg << std::endl; |
402 | 0 | } |
403 | | |
404 | 4 | if (server) { |
405 | | // Add service |
406 | 1 | auto recycler_service = |
407 | 1 | new RecyclerServiceImpl(txn_kv_, this, checker_.get(), txn_lazy_committer_); |
408 | 1 | server->AddService(recycler_service, brpc::SERVER_OWNS_SERVICE); |
409 | 1 | } |
410 | | |
411 | 4 | workers_.emplace_back([this] { instance_scanner_callback(); }); recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv Line | Count | Source | 411 | 4 | workers_.emplace_back([this] { instance_scanner_callback(); }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv |
412 | 12 | for (int i = 0; i < config::recycle_concurrency; ++i) { |
413 | 8 | workers_.emplace_back([this] { recycle_callback(); }); recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv Line | Count | Source | 413 | 8 | workers_.emplace_back([this] { recycle_callback(); }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv |
414 | 8 | } |
415 | | |
416 | 4 | workers_.emplace_back(std::mem_fn(&Recycler::lease_recycle_jobs), this); |
417 | 4 | workers_.emplace_back(std::mem_fn(&Recycler::check_recycle_tasks), this); |
418 | 4 | return 0; |
419 | 4 | } |
420 | | |
421 | 4 | void Recycler::stop() { |
422 | 4 | stopped_ = true; |
423 | 4 | notifier_.notify_all(); |
424 | 4 | pending_instance_cond_.notify_all(); |
425 | 4 | { |
426 | 4 | std::lock_guard lock(mtx_); |
427 | 4 | for (auto& [_, recycler] : recycling_instance_map_) { |
428 | 0 | recycler->stop(); |
429 | 0 | } |
430 | 4 | } |
431 | 20 | for (auto& w : workers_) { |
432 | 20 | if (w.joinable()) w.join(); |
433 | 20 | } |
434 | 4 | if (checker_) { |
435 | 0 | checker_->stop(); |
436 | 0 | } |
437 | 4 | } |
438 | | |
439 | | class InstanceRecycler::InvertedIndexIdCache { |
440 | | public: |
441 | | InvertedIndexIdCache(std::string instance_id, std::shared_ptr<TxnKv> txn_kv) |
442 | 101 | : instance_id_(std::move(instance_id)), txn_kv_(std::move(txn_kv)) {} |
443 | | |
444 | | // Return 0 if success, 1 if schema kv not found, negative for error |
445 | | // For the same index_id, schema_version, res, since `get` is not completely atomic |
446 | | // one thread has not finished inserting, and another thread has not get the index_id and schema_version, |
447 | | // resulting in repeated addition and inaccuracy. |
448 | | // however, this approach can reduce the lock range and sacrifice a bit of meta repeated get to improve concurrency performance. |
449 | | // repeated addition does not affect correctness. |
450 | 27.5k | int get(int64_t index_id, int32_t schema_version, InvertedIndexInfo& res) { |
451 | 27.5k | { |
452 | 27.5k | std::lock_guard lock(mtx_); |
453 | 27.5k | if (schemas_without_inverted_index_.count({index_id, schema_version})) { |
454 | 4.45k | return 0; |
455 | 4.45k | } |
456 | 23.1k | if (auto it = inverted_index_id_map_.find({index_id, schema_version}); |
457 | 23.1k | it != inverted_index_id_map_.end()) { |
458 | 17.5k | res = it->second; |
459 | 17.5k | return 0; |
460 | 17.5k | } |
461 | 23.1k | } |
462 | | // Get schema from kv |
463 | | // TODO(plat1ko): Single flight |
464 | 5.53k | std::unique_ptr<Transaction> txn; |
465 | 5.53k | TxnErrorCode err = txn_kv_->create_txn(&txn); |
466 | 5.53k | if (err != TxnErrorCode::TXN_OK) { |
467 | 0 | LOG(WARNING) << "failed to create txn, err=" << err; |
468 | 0 | return -1; |
469 | 0 | } |
470 | 5.53k | auto schema_key = meta_schema_key({instance_id_, index_id, schema_version}); |
471 | 5.53k | ValueBuf val_buf; |
472 | 5.53k | err = cloud::blob_get(txn.get(), schema_key, &val_buf); |
473 | 5.53k | if (err != TxnErrorCode::TXN_OK) { |
474 | 500 | LOG(WARNING) << "failed to get schema, err=" << err; |
475 | 500 | return static_cast<int>(err); |
476 | 500 | } |
477 | 5.03k | doris::TabletSchemaCloudPB schema; |
478 | 5.03k | if (!parse_schema_value(val_buf, &schema)) { |
479 | 0 | LOG(WARNING) << "malformed schema value, key=" << hex(schema_key); |
480 | 0 | return -1; |
481 | 0 | } |
482 | 5.03k | if (schema.index_size() > 0) { |
483 | 4.01k | InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1; |
484 | 4.01k | if (schema.has_inverted_index_storage_format()) { |
485 | 4.00k | index_format = schema.inverted_index_storage_format(); |
486 | 4.00k | } |
487 | 4.01k | res.first = index_format; |
488 | 4.01k | res.second.reserve(schema.index_size()); |
489 | 10.0k | for (auto& i : schema.index()) { |
490 | 10.0k | if (i.has_index_type() && i.index_type() == IndexType::INVERTED) { |
491 | 10.0k | res.second.push_back(std::make_pair(i.index_id(), i.index_suffix_name())); |
492 | 10.0k | } |
493 | 10.0k | } |
494 | 4.01k | } |
495 | 5.03k | insert(index_id, schema_version, res); |
496 | 5.03k | return 0; |
497 | 5.03k | } |
498 | | |
499 | | // Empty `ids` means this schema has no inverted index |
500 | 5.03k | void insert(int64_t index_id, int32_t schema_version, const InvertedIndexInfo& index_info) { |
501 | 5.03k | if (index_info.second.empty()) { |
502 | 1.02k | TEST_SYNC_POINT("InvertedIndexIdCache::insert1"); |
503 | 1.02k | std::lock_guard lock(mtx_); |
504 | 1.02k | schemas_without_inverted_index_.emplace(index_id, schema_version); |
505 | 4.01k | } else { |
506 | 4.01k | TEST_SYNC_POINT("InvertedIndexIdCache::insert2"); |
507 | 4.01k | std::lock_guard lock(mtx_); |
508 | 4.01k | inverted_index_id_map_.try_emplace({index_id, schema_version}, index_info); |
509 | 4.01k | } |
510 | 5.03k | } |
511 | | |
512 | | private: |
513 | | std::string instance_id_; |
514 | | std::shared_ptr<TxnKv> txn_kv_; |
515 | | |
516 | | std::mutex mtx_; |
517 | | using Key = std::pair<int64_t, int32_t>; // <index_id, schema_version> |
518 | | struct HashOfKey { |
519 | 55.7k | size_t operator()(const Key& key) const { |
520 | 55.7k | size_t seed = 0; |
521 | 55.7k | seed = std::hash<int64_t> {}(key.first); |
522 | 55.7k | seed = std::hash<int32_t> {}(key.second); |
523 | 55.7k | return seed; |
524 | 55.7k | } |
525 | | }; |
526 | | // <index_id, schema_version> -> inverted_index_ids |
527 | | std::unordered_map<Key, InvertedIndexInfo, HashOfKey> inverted_index_id_map_; |
528 | | // Store <index_id, schema_version> of schema which doesn't have inverted index |
529 | | std::unordered_set<Key, HashOfKey> schemas_without_inverted_index_; |
530 | | }; |
531 | | |
532 | | InstanceRecycler::InstanceRecycler(std::shared_ptr<TxnKv> txn_kv, const InstanceInfoPB& instance, |
533 | | RecyclerThreadPoolGroup thread_pool_group, |
534 | | std::shared_ptr<TxnLazyCommitter> txn_lazy_committer) |
535 | | : txn_kv_(std::move(txn_kv)), |
536 | | instance_id_(instance.instance_id()), |
537 | | instance_info_(instance), |
538 | | inverted_index_id_cache_(std::make_unique<InvertedIndexIdCache>(instance_id_, txn_kv_)), |
539 | | _thread_pool_group(std::move(thread_pool_group)), |
540 | 101 | txn_lazy_committer_(std::move(txn_lazy_committer)) { |
541 | 101 | snapshot_manager_ = std::make_shared<SnapshotManager>(txn_kv_); |
542 | | |
543 | | // Since the recycler's resource manager could not be notified when instance info changes, |
544 | | // we need to refresh the instance info here to ensure the resource manager has the latest info. |
545 | 101 | txn_lazy_committer_->resource_manager()->refresh_instance(instance_id_, instance); |
546 | 101 | }; |
547 | | |
548 | 101 | InstanceRecycler::~InstanceRecycler() = default; |
549 | | |
550 | 89 | int InstanceRecycler::init_obj_store_accessors() { |
551 | 89 | for (const auto& obj_info : instance_info_.obj_info()) { |
552 | 58 | #ifdef UNIT_TEST |
553 | 58 | auto accessor = std::make_shared<MockAccessor>(); |
554 | | #else |
555 | | auto s3_conf = S3Conf::from_obj_store_info(obj_info); |
556 | | if (!s3_conf) { |
557 | | LOG(WARNING) << "failed to init object accessor, instance_id=" << instance_id_; |
558 | | return -1; |
559 | | } |
560 | | |
561 | | std::shared_ptr<S3Accessor> accessor; |
562 | | int ret = S3Accessor::create(std::move(*s3_conf), &accessor); |
563 | | if (ret != 0) { |
564 | | LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_ |
565 | | << " resource_id=" << obj_info.id(); |
566 | | return ret; |
567 | | } |
568 | | #endif |
569 | 58 | accessor_map_.emplace(obj_info.id(), std::move(accessor)); |
570 | 58 | } |
571 | | |
572 | 89 | return 0; |
573 | 89 | } |
574 | | |
575 | 89 | int InstanceRecycler::init_storage_vault_accessors() { |
576 | 89 | if (instance_info_.resource_ids().empty()) { |
577 | 82 | return 0; |
578 | 82 | } |
579 | | |
580 | 7 | FullRangeGetOptions opts(txn_kv_); |
581 | 7 | opts.prefetch = true; |
582 | 7 | auto it = txn_kv_->full_range_get(storage_vault_key({instance_id_, ""}), |
583 | 7 | storage_vault_key({instance_id_, "\xff"}), std::move(opts)); |
584 | | |
585 | 25 | for (auto kv = it->next(); kv.has_value(); kv = it->next()) { |
586 | 18 | auto [k, v] = *kv; |
587 | 18 | StorageVaultPB vault; |
588 | 18 | if (!vault.ParseFromArray(v.data(), v.size())) { |
589 | 0 | LOG(WARNING) << "malformed storage vault, unable to deserialize key=" << hex(k); |
590 | 0 | return -1; |
591 | 0 | } |
592 | 18 | std::string recycler_storage_vault_white_list = accumulate( |
593 | 18 | config::recycler_storage_vault_white_list.begin(), |
594 | 18 | config::recycler_storage_vault_white_list.end(), std::string(), |
595 | 24 | [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_ Line | Count | Source | 595 | 24 | [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_ |
596 | 18 | LOG_INFO("config::recycler_storage_vault_white_list") |
597 | 18 | .tag("", recycler_storage_vault_white_list); |
598 | 18 | if (!config::recycler_storage_vault_white_list.empty()) { |
599 | 8 | if (auto it = std::find(config::recycler_storage_vault_white_list.begin(), |
600 | 8 | config::recycler_storage_vault_white_list.end(), vault.name()); |
601 | 8 | it == config::recycler_storage_vault_white_list.end()) { |
602 | 2 | LOG_WARNING( |
603 | 2 | "failed to init accessor for vault because this vault is not in " |
604 | 2 | "config::recycler_storage_vault_white_list. ") |
605 | 2 | .tag(" vault name:", vault.name()) |
606 | 2 | .tag(" config::recycler_storage_vault_white_list:", |
607 | 2 | recycler_storage_vault_white_list); |
608 | 2 | continue; |
609 | 2 | } |
610 | 8 | } |
611 | 16 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault", |
612 | 16 | &accessor_map_, &vault); |
613 | 16 | if (vault.has_hdfs_info()) { |
614 | 9 | #ifdef ENABLE_HDFS_STORAGE_VAULT |
615 | 9 | auto accessor = std::make_shared<HdfsAccessor>(vault.hdfs_info()); |
616 | 9 | int ret = accessor->init(); |
617 | 9 | if (ret != 0) { |
618 | 4 | LOG(WARNING) << "failed to init hdfs accessor. instance_id=" << instance_id_ |
619 | 4 | << " resource_id=" << vault.id() << " name=" << vault.name() |
620 | 4 | << " hdfs_vault=" << vault.hdfs_info().ShortDebugString(); |
621 | 4 | continue; |
622 | 4 | } |
623 | 5 | LOG(INFO) << "succeed to init hdfs accessor. instance_id=" << instance_id_ |
624 | 5 | << " resource_id=" << vault.id() << " name=" << vault.name() |
625 | 5 | << " hdfs_vault=" << vault.hdfs_info().ShortDebugString(); |
626 | 5 | accessor_map_.emplace(vault.id(), std::move(accessor)); |
627 | | #else |
628 | | LOG(ERROR) << "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT build option), " |
629 | | << "but HDFS storage vaults were detected"; |
630 | | #endif |
631 | 7 | } else if (vault.has_obj_info()) { |
632 | 7 | auto s3_conf = S3Conf::from_obj_store_info(vault.obj_info()); |
633 | 7 | if (!s3_conf) { |
634 | 1 | LOG(WARNING) << "failed to init object accessor, invalid conf, instance_id=" |
635 | 1 | << instance_id_ << " s3_vault=" << vault.obj_info().ShortDebugString(); |
636 | 1 | continue; |
637 | 1 | } |
638 | | |
639 | 6 | std::shared_ptr<S3Accessor> accessor; |
640 | 6 | int ret = S3Accessor::create(*s3_conf, &accessor); |
641 | 6 | if (ret != 0) { |
642 | 0 | LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_ |
643 | 0 | << " resource_id=" << vault.id() << " name=" << vault.name() |
644 | 0 | << " ret=" << ret |
645 | 0 | << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString()); |
646 | 0 | continue; |
647 | 0 | } |
648 | 6 | LOG(INFO) << "succeed to init s3 accessor. instance_id=" << instance_id_ |
649 | 6 | << " resource_id=" << vault.id() << " name=" << vault.name() << " ret=" << ret |
650 | 6 | << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString()); |
651 | 6 | accessor_map_.emplace(vault.id(), std::move(accessor)); |
652 | 6 | } |
653 | 16 | } |
654 | | |
655 | 7 | if (!it->is_valid()) { |
656 | 0 | LOG_WARNING("failed to get storage vault kv"); |
657 | 0 | return -1; |
658 | 0 | } |
659 | | |
660 | 7 | if (accessor_map_.empty()) { |
661 | 1 | LOG(WARNING) << "no accessors for instance=" << instance_id_; |
662 | 1 | return -2; |
663 | 1 | } |
664 | 6 | LOG_INFO("finish init instance recycler number_accessors={} instance=", accessor_map_.size(), |
665 | 6 | instance_id_); |
666 | | |
667 | 6 | return 0; |
668 | 7 | } |
669 | | |
670 | 89 | int InstanceRecycler::init() { |
671 | 89 | int ret = init_obj_store_accessors(); |
672 | 89 | if (ret != 0) { |
673 | 0 | return ret; |
674 | 0 | } |
675 | | |
676 | 89 | return init_storage_vault_accessors(); |
677 | 89 | } |
678 | | |
679 | | template <typename... Func> |
680 | 110 | auto task_wrapper(Func... funcs) -> std::function<int()> { |
681 | 110 | return [funcs...]() { |
682 | 110 | return [](std::initializer_list<int> ret_vals) { |
683 | 110 | int i = 0; |
684 | 130 | for (int ret : ret_vals) { |
685 | 130 | if (ret != 0) { |
686 | 0 | i = ret; |
687 | 0 | } |
688 | 130 | } |
689 | 110 | return i; |
690 | 110 | }({funcs()...}); recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 0 | i = ret; | 687 | 0 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 0 | i = ret; | 687 | 0 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_ Line | Count | Source | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 20 | for (int ret : ret_vals) { | 685 | 20 | if (ret != 0) { | 686 | 0 | i = ret; | 687 | 0 | } | 688 | 20 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 0 | i = ret; | 687 | 0 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 0 | i = ret; | 687 | 0 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8ZNS2_10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_ Line | Count | Source | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 20 | for (int ret : ret_vals) { | 685 | 20 | if (ret != 0) { | 686 | 0 | i = ret; | 687 | 0 | } | 688 | 20 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 0 | i = ret; | 687 | 0 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 0 | i = ret; | 687 | 0 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 0 | i = ret; | 687 | 0 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 0 | i = ret; | 687 | 0 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 0 | i = ret; | 687 | 0 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); |
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8ZNS2_10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ |
691 | 110 | }; recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 681 | 10 | return [funcs...]() { | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 10 | i = ret; | 687 | 10 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); | 691 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 681 | 10 | return [funcs...]() { | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 10 | i = ret; | 687 | 10 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); | 691 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 681 | 10 | return [funcs...]() { | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 10 | i = ret; | 687 | 10 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); | 691 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 681 | 10 | return [funcs...]() { | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 10 | i = ret; | 687 | 10 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); | 691 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 681 | 10 | return [funcs...]() { | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 10 | i = ret; | 687 | 10 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); | 691 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8ZNS2_10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 681 | 10 | return [funcs...]() { | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 10 | i = ret; | 687 | 10 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); | 691 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 681 | 10 | return [funcs...]() { | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 10 | i = ret; | 687 | 10 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); | 691 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 681 | 10 | return [funcs...]() { | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 10 | i = ret; | 687 | 10 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); | 691 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 681 | 10 | return [funcs...]() { | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 10 | i = ret; | 687 | 10 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); | 691 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 681 | 10 | return [funcs...]() { | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 10 | i = ret; | 687 | 10 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); | 691 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 681 | 10 | return [funcs...]() { | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 10 | i = ret; | 687 | 10 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); | 691 | 10 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8ZNS2_10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv |
692 | 110 | } recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ Line | Count | Source | 680 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 681 | 10 | return [funcs...]() { | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 10 | i = ret; | 687 | 10 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); | 691 | 10 | }; | 692 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ Line | Count | Source | 680 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 681 | 10 | return [funcs...]() { | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 10 | i = ret; | 687 | 10 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); | 691 | 10 | }; | 692 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ Line | Count | Source | 680 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 681 | 10 | return [funcs...]() { | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 10 | i = ret; | 687 | 10 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); | 691 | 10 | }; | 692 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ Line | Count | Source | 680 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 681 | 10 | return [funcs...]() { | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 10 | i = ret; | 687 | 10 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); | 691 | 10 | }; | 692 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ Line | Count | Source | 680 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 681 | 10 | return [funcs...]() { | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 10 | i = ret; | 687 | 10 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); | 691 | 10 | }; | 692 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8ZNS2_10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ Line | Count | Source | 680 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 681 | 10 | return [funcs...]() { | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 10 | i = ret; | 687 | 10 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); | 691 | 10 | }; | 692 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ Line | Count | Source | 680 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 681 | 10 | return [funcs...]() { | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 10 | i = ret; | 687 | 10 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); | 691 | 10 | }; | 692 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ Line | Count | Source | 680 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 681 | 10 | return [funcs...]() { | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 10 | i = ret; | 687 | 10 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); | 691 | 10 | }; | 692 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ Line | Count | Source | 680 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 681 | 10 | return [funcs...]() { | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 10 | i = ret; | 687 | 10 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); | 691 | 10 | }; | 692 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ Line | Count | Source | 680 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 681 | 10 | return [funcs...]() { | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 10 | i = ret; | 687 | 10 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); | 691 | 10 | }; | 692 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ Line | Count | Source | 680 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 681 | 10 | return [funcs...]() { | 682 | 10 | return [](std::initializer_list<int> ret_vals) { | 683 | 10 | int i = 0; | 684 | 10 | for (int ret : ret_vals) { | 685 | 10 | if (ret != 0) { | 686 | 10 | i = ret; | 687 | 10 | } | 688 | 10 | } | 689 | 10 | return i; | 690 | 10 | }({funcs()...}); | 691 | 10 | }; | 692 | 10 | } |
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8ZNS2_10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ |
693 | | |
694 | 10 | int InstanceRecycler::do_recycle() { |
695 | 10 | TEST_SYNC_POINT("InstanceRecycler.do_recycle"); |
696 | 10 | tablet_metrics_context_.reset(); |
697 | 10 | segment_metrics_context_.reset(); |
698 | 10 | DORIS_CLOUD_DEFER { |
699 | 10 | tablet_metrics_context_.finish_report(); |
700 | 10 | segment_metrics_context_.finish_report(); |
701 | 10 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv Line | Count | Source | 698 | 10 | DORIS_CLOUD_DEFER { | 699 | 10 | tablet_metrics_context_.finish_report(); | 700 | 10 | segment_metrics_context_.finish_report(); | 701 | 10 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv |
702 | 10 | if (instance_info_.status() == InstanceInfoPB::DELETED) { |
703 | 0 | int res = recycle_cluster_snapshots(); |
704 | 0 | if (res != 0) { |
705 | 0 | return -1; |
706 | 0 | } |
707 | 0 | return recycle_deleted_instance(); |
708 | 10 | } else if (instance_info_.status() == InstanceInfoPB::NORMAL) { |
709 | 10 | SyncExecutor<int> sync_executor(_thread_pool_group.group_recycle_function_pool, |
710 | 10 | fmt::format("instance id {}", instance_id_), |
711 | 109 | [](int r) { return r != 0; }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi Line | Count | Source | 711 | 109 | [](int r) { return r != 0; }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi |
712 | 10 | sync_executor |
713 | 10 | .add(task_wrapper( |
714 | 10 | [this]() { return InstanceRecycler::recycle_cluster_snapshots(); })) recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv Line | Count | Source | 714 | 10 | [this]() { return InstanceRecycler::recycle_cluster_snapshots(); })) |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv |
715 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); })) recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv Line | Count | Source | 715 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); })) |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv |
716 | 10 | .add(task_wrapper( // dropped table and dropped partition need to be recycled in series |
717 | | // becase they may both recycle the same set of tablets |
718 | | // recycle dropped table or idexes(mv, rollup) |
719 | 10 | [this]() -> int { return InstanceRecycler::recycle_indexes(); }, recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv Line | Count | Source | 719 | 10 | [this]() -> int { return InstanceRecycler::recycle_indexes(); }, |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv |
720 | | // recycle dropped partitions |
721 | 10 | [this]() -> int { return InstanceRecycler::recycle_partitions(); })) recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv Line | Count | Source | 721 | 10 | [this]() -> int { return InstanceRecycler::recycle_partitions(); })) |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv |
722 | 10 | .add(task_wrapper( |
723 | 10 | [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); })) recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv Line | Count | Source | 723 | 10 | [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); })) |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv |
724 | 10 | .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); })) recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv Line | Count | Source | 724 | 10 | .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); })) |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv |
725 | 10 | .add(task_wrapper( |
726 | 10 | [this]() { return InstanceRecycler::abort_timeout_txn(); }, recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv Line | Count | Source | 726 | 10 | [this]() { return InstanceRecycler::abort_timeout_txn(); }, |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv |
727 | 10 | [this]() { return InstanceRecycler::recycle_expired_txn_label(); })) recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv Line | Count | Source | 727 | 10 | [this]() { return InstanceRecycler::recycle_expired_txn_label(); })) |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv |
728 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); })) recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv Line | Count | Source | 728 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); })) |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv |
729 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); })) recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv Line | Count | Source | 729 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); })) |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv |
730 | 10 | .add(task_wrapper( |
731 | 10 | [this]() { return InstanceRecycler::recycle_expired_stage_objects(); })) recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv Line | Count | Source | 731 | 10 | [this]() { return InstanceRecycler::recycle_expired_stage_objects(); })) |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv |
732 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); })) recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv Line | Count | Source | 732 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); })) |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv |
733 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); })); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv Line | Count | Source | 733 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); })); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv |
734 | 10 | bool finished = true; |
735 | 10 | std::vector<int> rets = sync_executor.when_all(&finished); |
736 | 110 | for (int ret : rets) { |
737 | 110 | if (ret != 0) { |
738 | 0 | return ret; |
739 | 0 | } |
740 | 110 | } |
741 | 10 | return finished ? 0 : -1; |
742 | 10 | } else { |
743 | 0 | LOG(WARNING) << "invalid instance status: " << instance_info_.status() |
744 | 0 | << " instance_id=" << instance_id_; |
745 | 0 | return -1; |
746 | 0 | } |
747 | 10 | } |
748 | | |
749 | | /** |
750 | | * 1. delete all remote data |
751 | | * 2. delete all kv |
752 | | * 3. remove instance kv |
753 | | */ |
754 | 4 | int InstanceRecycler::recycle_deleted_instance() { |
755 | 4 | LOG_WARNING("begin to recycle deleted instance").tag("instance_id", instance_id_); |
756 | | |
757 | 4 | int ret = 0; |
758 | 4 | auto start_time = steady_clock::now(); |
759 | | |
760 | 4 | DORIS_CLOUD_DEFER { |
761 | 4 | auto cost = duration<float>(steady_clock::now() - start_time).count(); |
762 | 4 | LOG(WARNING) << (ret == 0 ? "successfully" : "failed to") |
763 | 4 | << " recycle deleted instance, cost=" << cost |
764 | 4 | << "s, instance_id=" << instance_id_; |
765 | 4 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv Line | Count | Source | 760 | 4 | DORIS_CLOUD_DEFER { | 761 | 4 | auto cost = duration<float>(steady_clock::now() - start_time).count(); | 762 | 4 | LOG(WARNING) << (ret == 0 ? "successfully" : "failed to") | 763 | 4 | << " recycle deleted instance, cost=" << cost | 764 | 4 | << "s, instance_id=" << instance_id_; | 765 | 4 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv |
766 | | |
767 | 4 | bool has_snapshots = false; |
768 | 4 | if (has_cluster_snapshots(&has_snapshots) != 0) { |
769 | 0 | LOG(WARNING) << "check instance cluster snapshots failed, instance_id=" << instance_id_; |
770 | 0 | return -1; |
771 | 4 | } else if (has_snapshots) { |
772 | 1 | LOG(INFO) << "instance has cluster snapshots, skip recycling, instance_id=" << instance_id_; |
773 | 1 | return 0; |
774 | 1 | } |
775 | | |
776 | | // delete all remote data |
777 | 3 | for (auto& [_, accessor] : accessor_map_) { |
778 | 3 | if (stopped()) { |
779 | 0 | return ret; |
780 | 0 | } |
781 | | |
782 | 3 | LOG(INFO) << "begin to delete all objects in " << accessor->uri(); |
783 | 3 | int del_ret = accessor->delete_all(); |
784 | 3 | if (del_ret == 0) { |
785 | 3 | LOG(INFO) << "successfully delete all objects in " << accessor->uri(); |
786 | 3 | } else if (del_ret != 1) { // no need to log, because S3Accessor has logged this error |
787 | | // If `del_ret == 1`, it can be considered that the object data has been recycled by cloud platform, |
788 | | // so the recycling has been successful. |
789 | 0 | ret = -1; |
790 | 0 | } |
791 | 3 | } |
792 | | |
793 | 3 | if (ret != 0) { |
794 | 0 | LOG(WARNING) << "failed to delete all data of deleted instance=" << instance_id_; |
795 | 0 | return ret; |
796 | 0 | } |
797 | | |
798 | | // delete all kv |
799 | 3 | std::unique_ptr<Transaction> txn; |
800 | 3 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
801 | 3 | if (err != TxnErrorCode::TXN_OK) { |
802 | 0 | LOG(WARNING) << "failed to create txn"; |
803 | 0 | ret = -1; |
804 | 0 | return -1; |
805 | 0 | } |
806 | 3 | LOG(INFO) << "begin to delete all kv, instance_id=" << instance_id_; |
807 | | // delete kv before deleting objects to prevent the checker from misjudging data loss |
808 | 3 | std::string start_txn_key = txn_key_prefix(instance_id_); |
809 | 3 | std::string end_txn_key = txn_key_prefix(instance_id_ + '\x00'); |
810 | 3 | txn->remove(start_txn_key, end_txn_key); |
811 | 3 | std::string start_version_key = version_key_prefix(instance_id_); |
812 | 3 | std::string end_version_key = version_key_prefix(instance_id_ + '\x00'); |
813 | 3 | txn->remove(start_version_key, end_version_key); |
814 | 3 | std::string start_meta_key = meta_key_prefix(instance_id_); |
815 | 3 | std::string end_meta_key = meta_key_prefix(instance_id_ + '\x00'); |
816 | 3 | txn->remove(start_meta_key, end_meta_key); |
817 | 3 | std::string start_recycle_key = recycle_key_prefix(instance_id_); |
818 | 3 | std::string end_recycle_key = recycle_key_prefix(instance_id_ + '\x00'); |
819 | 3 | txn->remove(start_recycle_key, end_recycle_key); |
820 | 3 | std::string start_stats_tablet_key = stats_tablet_key({instance_id_, 0, 0, 0, 0}); |
821 | 3 | std::string end_stats_tablet_key = stats_tablet_key({instance_id_, INT64_MAX, 0, 0, 0}); |
822 | 3 | txn->remove(start_stats_tablet_key, end_stats_tablet_key); |
823 | 3 | std::string start_copy_key = copy_key_prefix(instance_id_); |
824 | 3 | std::string end_copy_key = copy_key_prefix(instance_id_ + '\x00'); |
825 | 3 | txn->remove(start_copy_key, end_copy_key); |
826 | | // should not remove job key range, because we need to reserve job recycle kv |
827 | | // 0:instance_id 1:table_id 2:index_id 3:part_id 4:tablet_id |
828 | 3 | std::string start_job_tablet_key = job_tablet_key({instance_id_, 0, 0, 0, 0}); |
829 | 3 | std::string end_job_tablet_key = job_tablet_key({instance_id_, INT64_MAX, 0, 0, 0}); |
830 | 3 | txn->remove(start_job_tablet_key, end_job_tablet_key); |
831 | 3 | StorageVaultKeyInfo key_info0 {instance_id_, ""}; |
832 | 3 | StorageVaultKeyInfo key_info1 {instance_id_, "\xff"}; |
833 | 3 | std::string start_vault_key = storage_vault_key(key_info0); |
834 | 3 | std::string end_vault_key = storage_vault_key(key_info1); |
835 | 3 | txn->remove(start_vault_key, end_vault_key); |
836 | 3 | std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, 0, ""}); |
837 | 3 | std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, INT64_MAX, ""}); |
838 | 3 | txn->remove(dbm_start_key, dbm_end_key); |
839 | 3 | std::string versioned_version_key_start = versioned::version_key_prefix(instance_id_); |
840 | 3 | std::string versioned_version_key_end = versioned::version_key_prefix(instance_id_ + '\x00'); |
841 | 3 | txn->remove(versioned_version_key_start, versioned_version_key_end); |
842 | 3 | std::string versioned_index_key_start = versioned::index_key_prefix(instance_id_); |
843 | 3 | std::string versioned_index_key_end = versioned::index_key_prefix(instance_id_ + '\x00'); |
844 | 3 | txn->remove(versioned_index_key_start, versioned_index_key_end); |
845 | 3 | std::string versioned_stats_tablet_key_start = versioned::stats_key_prefix(instance_id_); |
846 | 3 | std::string versioned_stats_tablet_key_end = versioned::stats_key_prefix(instance_id_ + '\x00'); |
847 | 3 | txn->remove(versioned_stats_tablet_key_start, versioned_stats_tablet_key_end); |
848 | 3 | std::string versioned_meta_key_start = versioned::meta_key_prefix(instance_id_); |
849 | 3 | std::string versioned_meta_key_end = versioned::meta_key_prefix(instance_id_ + '\x00'); |
850 | 3 | txn->remove(versioned_meta_key_start, versioned_meta_key_end); |
851 | 3 | std::string versioned_data_key_start = versioned::data_key_prefix(instance_id_); |
852 | 3 | std::string versioned_data_key_end = versioned::data_key_prefix(instance_id_ + '\x00'); |
853 | 3 | txn->remove(versioned_data_key_start, versioned_data_key_end); |
854 | 3 | std::string versioned_log_key_start = versioned::log_key_prefix(instance_id_); |
855 | 3 | std::string versioned_log_key_end = versioned::log_key_prefix(instance_id_ + '\x00'); |
856 | 3 | txn->remove(versioned_log_key_start, versioned_log_key_end); |
857 | 3 | err = txn->commit(); |
858 | 3 | if (err != TxnErrorCode::TXN_OK) { |
859 | 0 | LOG(WARNING) << "failed to delete all kv, instance_id=" << instance_id_ << ", err=" << err; |
860 | 0 | ret = -1; |
861 | 0 | } |
862 | | |
863 | 3 | if (ret == 0) { |
864 | | // remove instance kv |
865 | | // ATTN: MUST ensure that cloud platform won't regenerate the same instance id |
866 | 3 | err = txn_kv_->create_txn(&txn); |
867 | 3 | if (err != TxnErrorCode::TXN_OK) { |
868 | 0 | LOG(WARNING) << "failed to create txn"; |
869 | 0 | ret = -1; |
870 | 0 | return ret; |
871 | 0 | } |
872 | 3 | std::string key; |
873 | 3 | instance_key({instance_id_}, &key); |
874 | 3 | txn->remove(key); |
875 | 3 | err = txn->commit(); |
876 | 3 | if (err != TxnErrorCode::TXN_OK) { |
877 | 0 | LOG(WARNING) << "failed to delete instance kv, instance_id=" << instance_id_ |
878 | 0 | << " err=" << err; |
879 | 0 | ret = -1; |
880 | 0 | } |
881 | 3 | } |
882 | 3 | return ret; |
883 | 3 | } |
884 | | |
885 | | bool is_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string& instance_id, |
886 | 57.0k | int64_t txn_id) { |
887 | 57.0k | std::unique_ptr<Transaction> txn; |
888 | 57.0k | TxnErrorCode err = txn_kv->create_txn(&txn); |
889 | 57.0k | if (err != TxnErrorCode::TXN_OK) { |
890 | 0 | LOG(WARNING) << "failed to create txn, txn_id=" << txn_id << " instance_id=" << instance_id; |
891 | 0 | return false; |
892 | 0 | } |
893 | | |
894 | 57.0k | std::string index_val; |
895 | 57.0k | const std::string index_key = txn_index_key({instance_id, txn_id}); |
896 | 57.0k | err = txn->get(index_key, &index_val); |
897 | 57.0k | if (err != TxnErrorCode::TXN_OK) { |
898 | 53.0k | if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) { |
899 | 53.0k | TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_has_been_recycled"); |
900 | | // txn has been recycled; |
901 | 53.0k | LOG(INFO) << "txn index key has been recycled, txn_id=" << txn_id |
902 | 53.0k | << " instance_id=" << instance_id; |
903 | 53.0k | return true; |
904 | 53.0k | } |
905 | 0 | LOG(WARNING) << "failed to get txn index key, txn_id=" << txn_id |
906 | 0 | << " instance_id=" << instance_id << " key=" << hex(index_key) |
907 | 0 | << " err=" << err; |
908 | 0 | return false; |
909 | 53.0k | } |
910 | | |
911 | 4.00k | TxnIndexPB index_pb; |
912 | 4.00k | if (!index_pb.ParseFromString(index_val)) { |
913 | 0 | LOG(WARNING) << "failed to parse txn_index_pb, txn_id=" << txn_id |
914 | 0 | << " instance_id=" << instance_id; |
915 | 0 | return false; |
916 | 0 | } |
917 | | |
918 | 4.00k | DCHECK(index_pb.has_tablet_index() == true); |
919 | 4.00k | if (!index_pb.tablet_index().has_db_id()) { |
920 | | // In the previous version, the db_id was not set in the index_pb. |
921 | | // If updating to the version which enable txn lazy commit, the db_id will be set. |
922 | 0 | LOG(INFO) << "txn index has no db_id, txn_id=" << txn_id << " instance_id=" << instance_id |
923 | 0 | << " index=" << index_pb.ShortDebugString(); |
924 | 0 | return true; |
925 | 0 | } |
926 | | |
927 | 4.00k | int64_t db_id = index_pb.tablet_index().db_id(); |
928 | 4.00k | DCHECK_GT(db_id, 0) << "db_id=" << db_id << " txn_id=" << txn_id |
929 | 0 | << " instance_id=" << instance_id; |
930 | | |
931 | 4.00k | std::string info_val; |
932 | 4.00k | const std::string info_key = txn_info_key({instance_id, db_id, txn_id}); |
933 | 4.00k | err = txn->get(info_key, &info_val); |
934 | 4.00k | if (err != TxnErrorCode::TXN_OK) { |
935 | 0 | if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) { |
936 | | // txn info has been recycled; |
937 | 0 | LOG(INFO) << "txn info key has been recycled, db_id=" << db_id << " txn_id=" << txn_id |
938 | 0 | << " instance_id=" << instance_id; |
939 | 0 | return true; |
940 | 0 | } |
941 | | |
942 | 0 | DCHECK(err != TxnErrorCode::TXN_KEY_NOT_FOUND); |
943 | 0 | LOG(WARNING) << "failed to get txn info key, txn_id=" << txn_id |
944 | 0 | << " instance_id=" << instance_id << " key=" << hex(info_key) |
945 | 0 | << " err=" << err; |
946 | 0 | return false; |
947 | 0 | } |
948 | | |
949 | 4.00k | TxnInfoPB txn_info; |
950 | 4.00k | if (!txn_info.ParseFromString(info_val)) { |
951 | 0 | LOG(WARNING) << "failed to parse txn_info, txn_id=" << txn_id |
952 | 0 | << " instance_id=" << instance_id; |
953 | 0 | return false; |
954 | 0 | } |
955 | | |
956 | 4.00k | DCHECK(txn_info.txn_id() == txn_id) << "txn_id=" << txn_id << " instance_id=" << instance_id |
957 | 0 | << " txn_info=" << txn_info.ShortDebugString(); |
958 | | |
959 | 4.00k | if (TxnStatusPB::TXN_STATUS_ABORTED == txn_info.status() || |
960 | 4.00k | TxnStatusPB::TXN_STATUS_VISIBLE == txn_info.status()) { |
961 | 2.00k | TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_has_been_aborted", &txn_info); |
962 | 2.00k | return true; |
963 | 2.00k | } |
964 | | |
965 | 2.00k | TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_not_finished", &txn_info); |
966 | 2.00k | return false; |
967 | 4.00k | } |
968 | | |
969 | | int64_t calculate_rowset_expired_time(const std::string& instance_id_, const RecycleRowsetPB& rs, |
970 | 4.01k | int64_t* earlest_ts /* rowset earliest expiration ts */) { |
971 | 4.01k | if (config::force_immediate_recycle) { |
972 | 8 | return 0L; |
973 | 8 | } |
974 | | // RecycleRowsetPB created by compacted or dropped rowset has no expiration time, and will be recycled when exceed retention time |
975 | 4.00k | int64_t expiration = rs.expiration() > 0 ? rs.expiration() : rs.creation_time(); |
976 | 4.00k | int64_t retention_seconds = config::retention_seconds; |
977 | 4.00k | if (rs.type() == RecycleRowsetPB::COMPACT || rs.type() == RecycleRowsetPB::DROP) { |
978 | 3.10k | retention_seconds = std::min(config::compacted_rowset_retention_seconds, retention_seconds); |
979 | 3.10k | } |
980 | 4.00k | int64_t final_expiration = expiration + retention_seconds; |
981 | 4.00k | if (*earlest_ts > final_expiration) { |
982 | 3 | *earlest_ts = final_expiration; |
983 | 3 | g_bvar_recycler_recycle_rowset_earlest_ts.put(instance_id_, *earlest_ts); |
984 | 3 | } |
985 | 4.00k | return final_expiration; |
986 | 4.01k | } |
987 | | |
988 | | int64_t calculate_partition_expired_time( |
989 | | const std::string& instance_id_, const RecyclePartitionPB& partition_meta_pb, |
990 | 9 | int64_t* earlest_ts /* partition earliest expiration ts */) { |
991 | 9 | if (config::force_immediate_recycle) { |
992 | 3 | return 0L; |
993 | 3 | } |
994 | 6 | int64_t expiration = partition_meta_pb.expiration() > 0 ? partition_meta_pb.expiration() |
995 | 6 | : partition_meta_pb.creation_time(); |
996 | 6 | int64_t retention_seconds = config::retention_seconds; |
997 | 6 | if (partition_meta_pb.state() == RecyclePartitionPB::DROPPED) { |
998 | 6 | retention_seconds = |
999 | 6 | std::min(config::dropped_partition_retention_seconds, retention_seconds); |
1000 | 6 | } |
1001 | 6 | int64_t final_expiration = expiration + retention_seconds; |
1002 | 6 | if (*earlest_ts > final_expiration) { |
1003 | 2 | *earlest_ts = final_expiration; |
1004 | 2 | g_bvar_recycler_recycle_partition_earlest_ts.put(instance_id_, *earlest_ts); |
1005 | 2 | } |
1006 | 6 | return final_expiration; |
1007 | 9 | } |
1008 | | |
1009 | | int64_t calculate_index_expired_time(const std::string& instance_id_, |
1010 | | const RecycleIndexPB& index_meta_pb, |
1011 | 10 | int64_t* earlest_ts /* index earliest expiration ts */) { |
1012 | 10 | if (config::force_immediate_recycle) { |
1013 | 4 | return 0L; |
1014 | 4 | } |
1015 | 6 | int64_t expiration = index_meta_pb.expiration() > 0 ? index_meta_pb.expiration() |
1016 | 6 | : index_meta_pb.creation_time(); |
1017 | 6 | int64_t retention_seconds = config::retention_seconds; |
1018 | 6 | if (index_meta_pb.state() == RecycleIndexPB::DROPPED) { |
1019 | 6 | retention_seconds = std::min(config::dropped_index_retention_seconds, retention_seconds); |
1020 | 6 | } |
1021 | 6 | int64_t final_expiration = expiration + retention_seconds; |
1022 | 6 | if (*earlest_ts > final_expiration) { |
1023 | 2 | *earlest_ts = final_expiration; |
1024 | 2 | g_bvar_recycler_recycle_index_earlest_ts.put(instance_id_, *earlest_ts); |
1025 | 2 | } |
1026 | 6 | return final_expiration; |
1027 | 10 | } |
1028 | | |
1029 | | int64_t calculate_tmp_rowset_expired_time( |
1030 | | const std::string& instance_id_, const doris::RowsetMetaCloudPB& tmp_rowset_meta_pb, |
1031 | 57.0k | int64_t* earlest_ts /* tmp_rowset earliest expiration ts */) { |
1032 | | // ATTN: `txn_expiration` should > 0, however we use `creation_time` + a large `retention_time` (> 1 day in production environment) |
1033 | | // when `txn_expiration` <= 0 in some unexpected situation (usually when there are bugs). This is usually safe, coz loading |
1034 | | // duration or timeout always < `retention_time` in practice. |
1035 | 57.0k | int64_t expiration = tmp_rowset_meta_pb.txn_expiration() > 0 |
1036 | 57.0k | ? tmp_rowset_meta_pb.txn_expiration() |
1037 | 57.0k | : tmp_rowset_meta_pb.creation_time(); |
1038 | 57.0k | expiration = config::force_immediate_recycle ? 0 : expiration; |
1039 | 57.0k | int64_t final_expiration = expiration + config::retention_seconds; |
1040 | 57.0k | if (*earlest_ts > final_expiration) { |
1041 | 6 | *earlest_ts = final_expiration; |
1042 | 6 | g_bvar_recycler_recycle_tmp_rowset_earlest_ts.put(instance_id_, *earlest_ts); |
1043 | 6 | } |
1044 | 57.0k | return final_expiration; |
1045 | 57.0k | } |
1046 | | |
1047 | | int64_t calculate_txn_expired_time(const std::string& instance_id_, const RecycleTxnPB& txn_meta_pb, |
1048 | 30.0k | int64_t* earlest_ts /* txn earliest expiration ts */) { |
1049 | 30.0k | int64_t final_expiration = txn_meta_pb.creation_time() + config::label_keep_max_second * 1000L; |
1050 | 30.0k | if (*earlest_ts > final_expiration / 1000) { |
1051 | 8 | *earlest_ts = final_expiration / 1000; |
1052 | 8 | g_bvar_recycler_recycle_expired_txn_label_earlest_ts.put(instance_id_, *earlest_ts); |
1053 | 8 | } |
1054 | 30.0k | return final_expiration; |
1055 | 30.0k | } |
1056 | | |
1057 | | int64_t calculate_restore_job_expired_time( |
1058 | | const std::string& instance_id_, const RestoreJobCloudPB& restore_job, |
1059 | 41 | int64_t* earlest_ts /* restore job earliest expiration ts */) { |
1060 | 41 | if (config::force_immediate_recycle || restore_job.state() == RestoreJobCloudPB::DROPPED || |
1061 | 41 | restore_job.state() == RestoreJobCloudPB::COMPLETED || |
1062 | 41 | restore_job.state() == RestoreJobCloudPB::RECYCLING) { |
1063 | | // final state, recycle immediately |
1064 | 41 | return 0L; |
1065 | 41 | } |
1066 | | // not final state, wait much longer than the FE's timeout(1 day) |
1067 | 0 | int64_t last_modified_s = |
1068 | 0 | restore_job.has_mtime_s() ? restore_job.mtime_s() : restore_job.ctime_s(); |
1069 | 0 | int64_t expiration = restore_job.expired_at_s() > 0 |
1070 | 0 | ? last_modified_s + restore_job.expired_at_s() |
1071 | 0 | : last_modified_s; |
1072 | 0 | int64_t final_expiration = expiration + config::retention_seconds; |
1073 | 0 | if (*earlest_ts > final_expiration) { |
1074 | 0 | *earlest_ts = final_expiration; |
1075 | 0 | g_bvar_recycler_recycle_restore_job_earlest_ts.put(instance_id_, *earlest_ts); |
1076 | 0 | } |
1077 | 0 | return final_expiration; |
1078 | 41 | } |
1079 | | |
1080 | 17 | int InstanceRecycler::recycle_indexes() { |
1081 | 17 | const std::string task_name = "recycle_indexes"; |
1082 | 17 | int64_t num_scanned = 0; |
1083 | 17 | int64_t num_expired = 0; |
1084 | 17 | int64_t num_recycled = 0; |
1085 | 17 | RecyclerMetricsContext metrics_context(instance_id_, task_name); |
1086 | | |
1087 | 17 | RecycleIndexKeyInfo index_key_info0 {instance_id_, 0}; |
1088 | 17 | RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX}; |
1089 | 17 | std::string index_key0; |
1090 | 17 | std::string index_key1; |
1091 | 17 | recycle_index_key(index_key_info0, &index_key0); |
1092 | 17 | recycle_index_key(index_key_info1, &index_key1); |
1093 | | |
1094 | 17 | LOG_WARNING("begin to recycle indexes").tag("instance_id", instance_id_); |
1095 | | |
1096 | 17 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
1097 | 17 | register_recycle_task(task_name, start_time); |
1098 | | |
1099 | 17 | DORIS_CLOUD_DEFER { |
1100 | 17 | unregister_recycle_task(task_name); |
1101 | 17 | int64_t cost = |
1102 | 17 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
1103 | 17 | metrics_context.finish_report(); |
1104 | 17 | LOG_WARNING("recycle indexes finished, cost={}s", cost) |
1105 | 17 | .tag("instance_id", instance_id_) |
1106 | 17 | .tag("num_scanned", num_scanned) |
1107 | 17 | .tag("num_expired", num_expired) |
1108 | 17 | .tag("num_recycled", num_recycled); |
1109 | 17 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv Line | Count | Source | 1099 | 15 | DORIS_CLOUD_DEFER { | 1100 | 15 | unregister_recycle_task(task_name); | 1101 | 15 | int64_t cost = | 1102 | 15 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 1103 | 15 | metrics_context.finish_report(); | 1104 | 15 | LOG_WARNING("recycle indexes finished, cost={}s", cost) | 1105 | 15 | .tag("instance_id", instance_id_) | 1106 | 15 | .tag("num_scanned", num_scanned) | 1107 | 15 | .tag("num_expired", num_expired) | 1108 | 15 | .tag("num_recycled", num_recycled); | 1109 | 15 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv Line | Count | Source | 1099 | 2 | DORIS_CLOUD_DEFER { | 1100 | 2 | unregister_recycle_task(task_name); | 1101 | 2 | int64_t cost = | 1102 | 2 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 1103 | 2 | metrics_context.finish_report(); | 1104 | 2 | LOG_WARNING("recycle indexes finished, cost={}s", cost) | 1105 | 2 | .tag("instance_id", instance_id_) | 1106 | 2 | .tag("num_scanned", num_scanned) | 1107 | 2 | .tag("num_expired", num_expired) | 1108 | 2 | .tag("num_recycled", num_recycled); | 1109 | 2 | }; |
|
1110 | | |
1111 | 17 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
1112 | | |
1113 | | // Elements in `index_keys` has the same lifetime as `it` in `scan_and_recycle` |
1114 | 17 | std::vector<std::string_view> index_keys; |
1115 | 17 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { |
1116 | 10 | ++num_scanned; |
1117 | 10 | RecycleIndexPB index_pb; |
1118 | 10 | if (!index_pb.ParseFromArray(v.data(), v.size())) { |
1119 | 0 | LOG_WARNING("malformed recycle index value").tag("key", hex(k)); |
1120 | 0 | return -1; |
1121 | 0 | } |
1122 | 10 | int64_t current_time = ::time(nullptr); |
1123 | 10 | if (current_time < |
1124 | 10 | calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired |
1125 | 0 | return 0; |
1126 | 0 | } |
1127 | 10 | ++num_expired; |
1128 | | // decode index_id |
1129 | 10 | auto k1 = k; |
1130 | 10 | k1.remove_prefix(1); |
1131 | 10 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
1132 | 10 | decode_key(&k1, &out); |
1133 | | // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB |
1134 | 10 | auto index_id = std::get<int64_t>(std::get<0>(out[3])); |
1135 | 10 | LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_ |
1136 | 10 | << " table_id=" << index_pb.table_id() << " index_id=" << index_id |
1137 | 10 | << " state=" << RecycleIndexPB::State_Name(index_pb.state()); |
1138 | | // Change state to RECYCLING |
1139 | 10 | std::unique_ptr<Transaction> txn; |
1140 | 10 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
1141 | 10 | if (err != TxnErrorCode::TXN_OK) { |
1142 | 0 | LOG_WARNING("failed to create txn").tag("err", err); |
1143 | 0 | return -1; |
1144 | 0 | } |
1145 | 10 | std::string val; |
1146 | 10 | err = txn->get(k, &val); |
1147 | 10 | if (err == |
1148 | 10 | TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it |
1149 | 0 | LOG_INFO("index {} has been recycled or committed", index_id); |
1150 | 0 | return 0; |
1151 | 0 | } |
1152 | 10 | if (err != TxnErrorCode::TXN_OK) { |
1153 | 0 | LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err); |
1154 | 0 | return -1; |
1155 | 0 | } |
1156 | 10 | index_pb.Clear(); |
1157 | 10 | if (!index_pb.ParseFromString(val)) { |
1158 | 0 | LOG_WARNING("malformed recycle index value").tag("key", hex(k)); |
1159 | 0 | return -1; |
1160 | 0 | } |
1161 | 10 | if (index_pb.state() != RecycleIndexPB::RECYCLING) { |
1162 | 9 | index_pb.set_state(RecycleIndexPB::RECYCLING); |
1163 | 9 | txn->put(k, index_pb.SerializeAsString()); |
1164 | 9 | err = txn->commit(); |
1165 | 9 | if (err != TxnErrorCode::TXN_OK) { |
1166 | 0 | LOG_WARNING("failed to commit txn").tag("err", err); |
1167 | 0 | return -1; |
1168 | 0 | } |
1169 | 9 | } |
1170 | 10 | if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) { |
1171 | 1 | LOG_WARNING("failed to recycle tablets under index") |
1172 | 1 | .tag("table_id", index_pb.table_id()) |
1173 | 1 | .tag("instance_id", instance_id_) |
1174 | 1 | .tag("index_id", index_id); |
1175 | 1 | return -1; |
1176 | 1 | } |
1177 | | |
1178 | 9 | if (index_pb.has_db_id()) { |
1179 | | // Recycle the versioned keys |
1180 | 3 | std::unique_ptr<Transaction> txn; |
1181 | 3 | err = txn_kv_->create_txn(&txn); |
1182 | 3 | if (err != TxnErrorCode::TXN_OK) { |
1183 | 0 | LOG_WARNING("failed to create txn").tag("err", err); |
1184 | 0 | return -1; |
1185 | 0 | } |
1186 | 3 | std::string meta_key = versioned::meta_index_key({instance_id_, index_id}); |
1187 | 3 | std::string index_key = versioned::index_index_key({instance_id_, index_id}); |
1188 | 3 | std::string index_inverted_key = versioned::index_inverted_key( |
1189 | 3 | {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id}); |
1190 | 3 | versioned_remove_all(txn.get(), meta_key); |
1191 | 3 | txn->remove(index_key); |
1192 | 3 | txn->remove(index_inverted_key); |
1193 | 3 | err = txn->commit(); |
1194 | 3 | if (err != TxnErrorCode::TXN_OK) { |
1195 | 0 | LOG_WARNING("failed to commit txn").tag("err", err); |
1196 | 0 | return -1; |
1197 | 0 | } |
1198 | 3 | } |
1199 | | |
1200 | 9 | metrics_context.total_recycled_num = ++num_recycled; |
1201 | 9 | metrics_context.report(); |
1202 | 9 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); |
1203 | 9 | index_keys.push_back(k); |
1204 | 9 | return 0; |
1205 | 9 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 1115 | 8 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { | 1116 | 8 | ++num_scanned; | 1117 | 8 | RecycleIndexPB index_pb; | 1118 | 8 | if (!index_pb.ParseFromArray(v.data(), v.size())) { | 1119 | 0 | LOG_WARNING("malformed recycle index value").tag("key", hex(k)); | 1120 | 0 | return -1; | 1121 | 0 | } | 1122 | 8 | int64_t current_time = ::time(nullptr); | 1123 | 8 | if (current_time < | 1124 | 8 | calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired | 1125 | 0 | return 0; | 1126 | 0 | } | 1127 | 8 | ++num_expired; | 1128 | | // decode index_id | 1129 | 8 | auto k1 = k; | 1130 | 8 | k1.remove_prefix(1); | 1131 | 8 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 1132 | 8 | decode_key(&k1, &out); | 1133 | | // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB | 1134 | 8 | auto index_id = std::get<int64_t>(std::get<0>(out[3])); | 1135 | 8 | LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_ | 1136 | 8 | << " table_id=" << index_pb.table_id() << " index_id=" << index_id | 1137 | 8 | << " state=" << RecycleIndexPB::State_Name(index_pb.state()); | 1138 | | // Change state to RECYCLING | 1139 | 8 | std::unique_ptr<Transaction> txn; | 1140 | 8 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 1141 | 8 | if (err != TxnErrorCode::TXN_OK) { | 1142 | 0 | LOG_WARNING("failed to create txn").tag("err", err); | 1143 | 0 | return -1; | 1144 | 0 | } | 1145 | 8 | std::string val; | 1146 | 8 | err = txn->get(k, &val); | 1147 | 8 | if (err == | 1148 | 8 | TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it | 1149 | 0 | LOG_INFO("index {} has been recycled or committed", index_id); | 1150 | 0 | return 0; | 1151 | 0 | } | 1152 | 8 | if (err != TxnErrorCode::TXN_OK) { | 1153 | 0 | LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err); | 1154 | 0 | return -1; | 1155 | 0 | } | 1156 | 8 | index_pb.Clear(); | 1157 | 8 | if (!index_pb.ParseFromString(val)) { | 1158 | 0 | LOG_WARNING("malformed recycle index value").tag("key", hex(k)); | 1159 | 0 | return -1; | 1160 | 0 | } | 1161 | 8 | if (index_pb.state() != RecycleIndexPB::RECYCLING) { | 1162 | 8 | index_pb.set_state(RecycleIndexPB::RECYCLING); | 1163 | 8 | txn->put(k, index_pb.SerializeAsString()); | 1164 | 8 | err = txn->commit(); | 1165 | 8 | if (err != TxnErrorCode::TXN_OK) { | 1166 | 0 | LOG_WARNING("failed to commit txn").tag("err", err); | 1167 | 0 | return -1; | 1168 | 0 | } | 1169 | 8 | } | 1170 | 8 | if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) { | 1171 | 0 | LOG_WARNING("failed to recycle tablets under index") | 1172 | 0 | .tag("table_id", index_pb.table_id()) | 1173 | 0 | .tag("instance_id", instance_id_) | 1174 | 0 | .tag("index_id", index_id); | 1175 | 0 | return -1; | 1176 | 0 | } | 1177 | | | 1178 | 8 | if (index_pb.has_db_id()) { | 1179 | | // Recycle the versioned keys | 1180 | 2 | std::unique_ptr<Transaction> txn; | 1181 | 2 | err = txn_kv_->create_txn(&txn); | 1182 | 2 | if (err != TxnErrorCode::TXN_OK) { | 1183 | 0 | LOG_WARNING("failed to create txn").tag("err", err); | 1184 | 0 | return -1; | 1185 | 0 | } | 1186 | 2 | std::string meta_key = versioned::meta_index_key({instance_id_, index_id}); | 1187 | 2 | std::string index_key = versioned::index_index_key({instance_id_, index_id}); | 1188 | 2 | std::string index_inverted_key = versioned::index_inverted_key( | 1189 | 2 | {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id}); | 1190 | 2 | versioned_remove_all(txn.get(), meta_key); | 1191 | 2 | txn->remove(index_key); | 1192 | 2 | txn->remove(index_inverted_key); | 1193 | 2 | err = txn->commit(); | 1194 | 2 | if (err != TxnErrorCode::TXN_OK) { | 1195 | 0 | LOG_WARNING("failed to commit txn").tag("err", err); | 1196 | 0 | return -1; | 1197 | 0 | } | 1198 | 2 | } | 1199 | | | 1200 | 8 | metrics_context.total_recycled_num = ++num_recycled; | 1201 | 8 | metrics_context.report(); | 1202 | 8 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); | 1203 | 8 | index_keys.push_back(k); | 1204 | 8 | return 0; | 1205 | 8 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 1115 | 2 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { | 1116 | 2 | ++num_scanned; | 1117 | 2 | RecycleIndexPB index_pb; | 1118 | 2 | if (!index_pb.ParseFromArray(v.data(), v.size())) { | 1119 | 0 | LOG_WARNING("malformed recycle index value").tag("key", hex(k)); | 1120 | 0 | return -1; | 1121 | 0 | } | 1122 | 2 | int64_t current_time = ::time(nullptr); | 1123 | 2 | if (current_time < | 1124 | 2 | calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired | 1125 | 0 | return 0; | 1126 | 0 | } | 1127 | 2 | ++num_expired; | 1128 | | // decode index_id | 1129 | 2 | auto k1 = k; | 1130 | 2 | k1.remove_prefix(1); | 1131 | 2 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 1132 | 2 | decode_key(&k1, &out); | 1133 | | // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB | 1134 | 2 | auto index_id = std::get<int64_t>(std::get<0>(out[3])); | 1135 | 2 | LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_ | 1136 | 2 | << " table_id=" << index_pb.table_id() << " index_id=" << index_id | 1137 | 2 | << " state=" << RecycleIndexPB::State_Name(index_pb.state()); | 1138 | | // Change state to RECYCLING | 1139 | 2 | std::unique_ptr<Transaction> txn; | 1140 | 2 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 1141 | 2 | if (err != TxnErrorCode::TXN_OK) { | 1142 | 0 | LOG_WARNING("failed to create txn").tag("err", err); | 1143 | 0 | return -1; | 1144 | 0 | } | 1145 | 2 | std::string val; | 1146 | 2 | err = txn->get(k, &val); | 1147 | 2 | if (err == | 1148 | 2 | TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it | 1149 | 0 | LOG_INFO("index {} has been recycled or committed", index_id); | 1150 | 0 | return 0; | 1151 | 0 | } | 1152 | 2 | if (err != TxnErrorCode::TXN_OK) { | 1153 | 0 | LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err); | 1154 | 0 | return -1; | 1155 | 0 | } | 1156 | 2 | index_pb.Clear(); | 1157 | 2 | if (!index_pb.ParseFromString(val)) { | 1158 | 0 | LOG_WARNING("malformed recycle index value").tag("key", hex(k)); | 1159 | 0 | return -1; | 1160 | 0 | } | 1161 | 2 | if (index_pb.state() != RecycleIndexPB::RECYCLING) { | 1162 | 1 | index_pb.set_state(RecycleIndexPB::RECYCLING); | 1163 | 1 | txn->put(k, index_pb.SerializeAsString()); | 1164 | 1 | err = txn->commit(); | 1165 | 1 | if (err != TxnErrorCode::TXN_OK) { | 1166 | 0 | LOG_WARNING("failed to commit txn").tag("err", err); | 1167 | 0 | return -1; | 1168 | 0 | } | 1169 | 1 | } | 1170 | 2 | if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) { | 1171 | 1 | LOG_WARNING("failed to recycle tablets under index") | 1172 | 1 | .tag("table_id", index_pb.table_id()) | 1173 | 1 | .tag("instance_id", instance_id_) | 1174 | 1 | .tag("index_id", index_id); | 1175 | 1 | return -1; | 1176 | 1 | } | 1177 | | | 1178 | 1 | if (index_pb.has_db_id()) { | 1179 | | // Recycle the versioned keys | 1180 | 1 | std::unique_ptr<Transaction> txn; | 1181 | 1 | err = txn_kv_->create_txn(&txn); | 1182 | 1 | if (err != TxnErrorCode::TXN_OK) { | 1183 | 0 | LOG_WARNING("failed to create txn").tag("err", err); | 1184 | 0 | return -1; | 1185 | 0 | } | 1186 | 1 | std::string meta_key = versioned::meta_index_key({instance_id_, index_id}); | 1187 | 1 | std::string index_key = versioned::index_index_key({instance_id_, index_id}); | 1188 | 1 | std::string index_inverted_key = versioned::index_inverted_key( | 1189 | 1 | {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id}); | 1190 | 1 | versioned_remove_all(txn.get(), meta_key); | 1191 | 1 | txn->remove(index_key); | 1192 | 1 | txn->remove(index_inverted_key); | 1193 | 1 | err = txn->commit(); | 1194 | 1 | if (err != TxnErrorCode::TXN_OK) { | 1195 | 0 | LOG_WARNING("failed to commit txn").tag("err", err); | 1196 | 0 | return -1; | 1197 | 0 | } | 1198 | 1 | } | 1199 | | | 1200 | 1 | metrics_context.total_recycled_num = ++num_recycled; | 1201 | 1 | metrics_context.report(); | 1202 | 1 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); | 1203 | 1 | index_keys.push_back(k); | 1204 | 1 | return 0; | 1205 | 1 | }; |
|
1206 | | |
1207 | 17 | auto loop_done = [&index_keys, this]() -> int { |
1208 | 6 | if (index_keys.empty()) return 0; |
1209 | 5 | DORIS_CLOUD_DEFER { |
1210 | 5 | index_keys.clear(); |
1211 | 5 | }; recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv Line | Count | Source | 1209 | 4 | DORIS_CLOUD_DEFER { | 1210 | 4 | index_keys.clear(); | 1211 | 4 | }; |
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv Line | Count | Source | 1209 | 1 | DORIS_CLOUD_DEFER { | 1210 | 1 | index_keys.clear(); | 1211 | 1 | }; |
|
1212 | 5 | if (0 != txn_remove(txn_kv_.get(), index_keys)) { |
1213 | 0 | LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_; |
1214 | 0 | return -1; |
1215 | 0 | } |
1216 | 5 | return 0; |
1217 | 5 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv Line | Count | Source | 1207 | 4 | auto loop_done = [&index_keys, this]() -> int { | 1208 | 4 | if (index_keys.empty()) return 0; | 1209 | 4 | DORIS_CLOUD_DEFER { | 1210 | 4 | index_keys.clear(); | 1211 | 4 | }; | 1212 | 4 | if (0 != txn_remove(txn_kv_.get(), index_keys)) { | 1213 | 0 | LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_; | 1214 | 0 | return -1; | 1215 | 0 | } | 1216 | 4 | return 0; | 1217 | 4 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv Line | Count | Source | 1207 | 2 | auto loop_done = [&index_keys, this]() -> int { | 1208 | 2 | if (index_keys.empty()) return 0; | 1209 | 1 | DORIS_CLOUD_DEFER { | 1210 | 1 | index_keys.clear(); | 1211 | 1 | }; | 1212 | 1 | if (0 != txn_remove(txn_kv_.get(), index_keys)) { | 1213 | 0 | LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_; | 1214 | 0 | return -1; | 1215 | 0 | } | 1216 | 1 | return 0; | 1217 | 1 | }; |
|
1218 | | |
1219 | 17 | if (config::enable_recycler_stats_metrics) { |
1220 | 0 | scan_and_statistics_indexes(); |
1221 | 0 | } |
1222 | | // recycle_func and loop_done for scan and recycle |
1223 | 17 | return scan_and_recycle(index_key0, index_key1, std::move(recycle_func), std::move(loop_done)); |
1224 | 17 | } |
1225 | | |
1226 | | bool check_lazy_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string instance_id, |
1227 | 8.24k | int64_t tablet_id) { |
1228 | 8.24k | TEST_SYNC_POINT_RETURN_WITH_VALUE("check_lazy_txn_finished::bypass_check", true); |
1229 | | |
1230 | 8.23k | std::unique_ptr<Transaction> txn; |
1231 | 8.23k | TxnErrorCode err = txn_kv->create_txn(&txn); |
1232 | 8.23k | if (err != TxnErrorCode::TXN_OK) { |
1233 | 0 | LOG(WARNING) << "failed to create txn, instance_id=" << instance_id |
1234 | 0 | << " tablet_id=" << tablet_id << " err=" << err; |
1235 | 0 | return false; |
1236 | 0 | } |
1237 | | |
1238 | 8.23k | std::string tablet_idx_key = meta_tablet_idx_key({instance_id, tablet_id}); |
1239 | 8.23k | std::string tablet_idx_val; |
1240 | 8.23k | err = txn->get(tablet_idx_key, &tablet_idx_val); |
1241 | 8.23k | if (TxnErrorCode::TXN_OK != err) { |
1242 | 0 | LOG(WARNING) << "failed to get tablet index, instance_id=" << instance_id |
1243 | 0 | << " tablet_id=" << tablet_id << " err=" << err |
1244 | 0 | << " key=" << hex(tablet_idx_key); |
1245 | 0 | return false; |
1246 | 0 | } |
1247 | | |
1248 | 8.23k | TabletIndexPB tablet_idx_pb; |
1249 | 8.23k | if (!tablet_idx_pb.ParseFromString(tablet_idx_val)) { |
1250 | 0 | LOG(WARNING) << "failed to parse tablet_idx_pb, instance_id=" << instance_id |
1251 | 0 | << " tablet_id=" << tablet_id; |
1252 | 0 | return false; |
1253 | 0 | } |
1254 | | |
1255 | 8.23k | if (!tablet_idx_pb.has_db_id()) { |
1256 | | // In the previous version, the db_id was not set in the index_pb. |
1257 | | // If updating to the version which enable txn lazy commit, the db_id will be set. |
1258 | 0 | LOG(INFO) << "txn index has no db_id, tablet_id=" << tablet_id |
1259 | 0 | << " instance_id=" << instance_id |
1260 | 0 | << " tablet_idx_pb=" << tablet_idx_pb.ShortDebugString(); |
1261 | 0 | return true; |
1262 | 0 | } |
1263 | | |
1264 | 8.23k | std::string ver_val; |
1265 | 8.23k | std::string ver_key = |
1266 | 8.23k | partition_version_key({instance_id, tablet_idx_pb.db_id(), tablet_idx_pb.table_id(), |
1267 | 8.23k | tablet_idx_pb.partition_id()}); |
1268 | 8.23k | err = txn->get(ver_key, &ver_val); |
1269 | | |
1270 | 8.23k | if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) { |
1271 | 201 | LOG(INFO) << "" |
1272 | 201 | "partition version not found, instance_id=" |
1273 | 201 | << instance_id << " db_id=" << tablet_idx_pb.db_id() |
1274 | 201 | << " table_id=" << tablet_idx_pb.table_id() |
1275 | 201 | << " partition_id=" << tablet_idx_pb.partition_id() << " tablet_id=" << tablet_id |
1276 | 201 | << " key=" << hex(ver_key); |
1277 | 201 | return true; |
1278 | 201 | } |
1279 | | |
1280 | 8.03k | if (TxnErrorCode::TXN_OK != err) { |
1281 | 0 | LOG(WARNING) << "failed to get partition version, instance_id=" << instance_id |
1282 | 0 | << " db_id=" << tablet_idx_pb.db_id() |
1283 | 0 | << " table_id=" << tablet_idx_pb.table_id() |
1284 | 0 | << " partition_id=" << tablet_idx_pb.partition_id() |
1285 | 0 | << " tablet_id=" << tablet_id << " key=" << hex(ver_key) << " err=" << err; |
1286 | 0 | return false; |
1287 | 0 | } |
1288 | | |
1289 | 8.03k | VersionPB version_pb; |
1290 | 8.03k | if (!version_pb.ParseFromString(ver_val)) { |
1291 | 0 | LOG(WARNING) << "failed to parse version_pb, instance_id=" << instance_id |
1292 | 0 | << " db_id=" << tablet_idx_pb.db_id() |
1293 | 0 | << " table_id=" << tablet_idx_pb.table_id() |
1294 | 0 | << " partition_id=" << tablet_idx_pb.partition_id() |
1295 | 0 | << " tablet_id=" << tablet_id << " key=" << hex(ver_key); |
1296 | 0 | return false; |
1297 | 0 | } |
1298 | | |
1299 | 8.03k | if (version_pb.pending_txn_ids_size() > 0) { |
1300 | 4.00k | TEST_SYNC_POINT_CALLBACK("check_lazy_txn_finished::txn_not_finished"); |
1301 | 4.00k | DCHECK(version_pb.pending_txn_ids_size() == 1); |
1302 | 4.00k | LOG(WARNING) << "lazy txn not finished, instance_id=" << instance_id |
1303 | 4.00k | << " db_id=" << tablet_idx_pb.db_id() |
1304 | 4.00k | << " table_id=" << tablet_idx_pb.table_id() |
1305 | 4.00k | << " partition_id=" << tablet_idx_pb.partition_id() |
1306 | 4.00k | << " tablet_id=" << tablet_id << " txn_id=" << version_pb.pending_txn_ids(0) |
1307 | 4.00k | << " key=" << hex(ver_key); |
1308 | 4.00k | return false; |
1309 | 4.00k | } |
1310 | 4.03k | return true; |
1311 | 8.03k | } |
1312 | | |
1313 | 15 | int InstanceRecycler::recycle_partitions() { |
1314 | 15 | const std::string task_name = "recycle_partitions"; |
1315 | 15 | int64_t num_scanned = 0; |
1316 | 15 | int64_t num_expired = 0; |
1317 | 15 | int64_t num_recycled = 0; |
1318 | 15 | RecyclerMetricsContext metrics_context(instance_id_, task_name); |
1319 | | |
1320 | 15 | RecyclePartKeyInfo part_key_info0 {instance_id_, 0}; |
1321 | 15 | RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX}; |
1322 | 15 | std::string part_key0; |
1323 | 15 | std::string part_key1; |
1324 | 15 | recycle_partition_key(part_key_info0, &part_key0); |
1325 | 15 | recycle_partition_key(part_key_info1, &part_key1); |
1326 | | |
1327 | 15 | LOG_WARNING("begin to recycle partitions").tag("instance_id", instance_id_); |
1328 | | |
1329 | 15 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
1330 | 15 | register_recycle_task(task_name, start_time); |
1331 | | |
1332 | 15 | DORIS_CLOUD_DEFER { |
1333 | 15 | unregister_recycle_task(task_name); |
1334 | 15 | int64_t cost = |
1335 | 15 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
1336 | 15 | metrics_context.finish_report(); |
1337 | 15 | LOG_WARNING("recycle partitions finished, cost={}s", cost) |
1338 | 15 | .tag("instance_id", instance_id_) |
1339 | 15 | .tag("num_scanned", num_scanned) |
1340 | 15 | .tag("num_expired", num_expired) |
1341 | 15 | .tag("num_recycled", num_recycled); |
1342 | 15 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv Line | Count | Source | 1332 | 13 | DORIS_CLOUD_DEFER { | 1333 | 13 | unregister_recycle_task(task_name); | 1334 | 13 | int64_t cost = | 1335 | 13 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 1336 | 13 | metrics_context.finish_report(); | 1337 | 13 | LOG_WARNING("recycle partitions finished, cost={}s", cost) | 1338 | 13 | .tag("instance_id", instance_id_) | 1339 | 13 | .tag("num_scanned", num_scanned) | 1340 | 13 | .tag("num_expired", num_expired) | 1341 | 13 | .tag("num_recycled", num_recycled); | 1342 | 13 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv Line | Count | Source | 1332 | 2 | DORIS_CLOUD_DEFER { | 1333 | 2 | unregister_recycle_task(task_name); | 1334 | 2 | int64_t cost = | 1335 | 2 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 1336 | 2 | metrics_context.finish_report(); | 1337 | 2 | LOG_WARNING("recycle partitions finished, cost={}s", cost) | 1338 | 2 | .tag("instance_id", instance_id_) | 1339 | 2 | .tag("num_scanned", num_scanned) | 1340 | 2 | .tag("num_expired", num_expired) | 1341 | 2 | .tag("num_recycled", num_recycled); | 1342 | 2 | }; |
|
1343 | | |
1344 | 15 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
1345 | | |
1346 | | // Elements in `partition_keys` has the same lifetime as `it` in `scan_and_recycle` |
1347 | 15 | std::vector<std::string_view> partition_keys; |
1348 | 15 | std::vector<std::string> partition_version_keys; |
1349 | 15 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { |
1350 | 9 | ++num_scanned; |
1351 | 9 | RecyclePartitionPB part_pb; |
1352 | 9 | if (!part_pb.ParseFromArray(v.data(), v.size())) { |
1353 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); |
1354 | 0 | return -1; |
1355 | 0 | } |
1356 | 9 | int64_t current_time = ::time(nullptr); |
1357 | 9 | if (current_time < |
1358 | 9 | calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired |
1359 | 0 | return 0; |
1360 | 0 | } |
1361 | 9 | ++num_expired; |
1362 | | // decode partition_id |
1363 | 9 | auto k1 = k; |
1364 | 9 | k1.remove_prefix(1); |
1365 | 9 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
1366 | 9 | decode_key(&k1, &out); |
1367 | | // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB |
1368 | 9 | auto partition_id = std::get<int64_t>(std::get<0>(out[3])); |
1369 | 9 | LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_ |
1370 | 9 | << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id |
1371 | 9 | << " state=" << RecyclePartitionPB::State_Name(part_pb.state()); |
1372 | | // Change state to RECYCLING |
1373 | 9 | std::unique_ptr<Transaction> txn; |
1374 | 9 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
1375 | 9 | if (err != TxnErrorCode::TXN_OK) { |
1376 | 0 | LOG_WARNING("failed to create txn").tag("err", err); |
1377 | 0 | return -1; |
1378 | 0 | } |
1379 | 9 | std::string val; |
1380 | 9 | err = txn->get(k, &val); |
1381 | 9 | if (err == |
1382 | 9 | TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it |
1383 | 0 | LOG_INFO("partition {} has been recycled or committed", partition_id); |
1384 | 0 | return 0; |
1385 | 0 | } |
1386 | 9 | if (err != TxnErrorCode::TXN_OK) { |
1387 | 0 | LOG_WARNING("failed to get kv"); |
1388 | 0 | return -1; |
1389 | 0 | } |
1390 | 9 | part_pb.Clear(); |
1391 | 9 | if (!part_pb.ParseFromString(val)) { |
1392 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); |
1393 | 0 | return -1; |
1394 | 0 | } |
1395 | | // Partitions with PREPARED state MUST have no data |
1396 | 9 | if (part_pb.state() != RecyclePartitionPB::RECYCLING) { |
1397 | 8 | part_pb.set_state(RecyclePartitionPB::RECYCLING); |
1398 | 8 | txn->put(k, part_pb.SerializeAsString()); |
1399 | 8 | err = txn->commit(); |
1400 | 8 | if (err != TxnErrorCode::TXN_OK) { |
1401 | 0 | LOG_WARNING("failed to commit txn: {}", err); |
1402 | 0 | return -1; |
1403 | 0 | } |
1404 | 8 | } |
1405 | | |
1406 | 9 | int ret = 0; |
1407 | 33 | for (int64_t index_id : part_pb.index_id()) { |
1408 | 33 | if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) { |
1409 | 1 | LOG_WARNING("failed to recycle tablets under partition") |
1410 | 1 | .tag("table_id", part_pb.table_id()) |
1411 | 1 | .tag("instance_id", instance_id_) |
1412 | 1 | .tag("index_id", index_id) |
1413 | 1 | .tag("partition_id", partition_id); |
1414 | 1 | ret = -1; |
1415 | 1 | } |
1416 | 33 | } |
1417 | 9 | if (ret == 0 && part_pb.has_db_id()) { |
1418 | | // Recycle the versioned keys |
1419 | 8 | std::unique_ptr<Transaction> txn; |
1420 | 8 | err = txn_kv_->create_txn(&txn); |
1421 | 8 | if (err != TxnErrorCode::TXN_OK) { |
1422 | 0 | LOG_WARNING("failed to create txn").tag("err", err); |
1423 | 0 | return -1; |
1424 | 0 | } |
1425 | 8 | std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id}); |
1426 | 8 | std::string index_key = versioned::partition_index_key({instance_id_, partition_id}); |
1427 | 8 | std::string inverted_index_key = versioned::partition_inverted_index_key( |
1428 | 8 | {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}); |
1429 | 8 | std::string partition_version_key = |
1430 | 8 | versioned::partition_version_key({instance_id_, partition_id}); |
1431 | 8 | versioned_remove_all(txn.get(), meta_key); |
1432 | 8 | txn->remove(index_key); |
1433 | 8 | txn->remove(inverted_index_key); |
1434 | 8 | versioned_remove_all(txn.get(), partition_version_key); |
1435 | 8 | err = txn->commit(); |
1436 | 8 | if (err != TxnErrorCode::TXN_OK) { |
1437 | 0 | LOG_WARNING("failed to commit txn").tag("err", err); |
1438 | 0 | return -1; |
1439 | 0 | } |
1440 | 8 | } |
1441 | | |
1442 | 9 | if (ret == 0) { |
1443 | 8 | ++num_recycled; |
1444 | 8 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); |
1445 | 8 | partition_keys.push_back(k); |
1446 | 8 | if (part_pb.db_id() > 0) { |
1447 | 8 | partition_version_keys.push_back(partition_version_key( |
1448 | 8 | {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id})); |
1449 | 8 | } |
1450 | 8 | metrics_context.total_recycled_num = num_recycled; |
1451 | 8 | metrics_context.report(); |
1452 | 8 | } |
1453 | 9 | return ret; |
1454 | 9 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 1349 | 7 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { | 1350 | 7 | ++num_scanned; | 1351 | 7 | RecyclePartitionPB part_pb; | 1352 | 7 | if (!part_pb.ParseFromArray(v.data(), v.size())) { | 1353 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); | 1354 | 0 | return -1; | 1355 | 0 | } | 1356 | 7 | int64_t current_time = ::time(nullptr); | 1357 | 7 | if (current_time < | 1358 | 7 | calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired | 1359 | 0 | return 0; | 1360 | 0 | } | 1361 | 7 | ++num_expired; | 1362 | | // decode partition_id | 1363 | 7 | auto k1 = k; | 1364 | 7 | k1.remove_prefix(1); | 1365 | 7 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 1366 | 7 | decode_key(&k1, &out); | 1367 | | // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB | 1368 | 7 | auto partition_id = std::get<int64_t>(std::get<0>(out[3])); | 1369 | 7 | LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_ | 1370 | 7 | << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id | 1371 | 7 | << " state=" << RecyclePartitionPB::State_Name(part_pb.state()); | 1372 | | // Change state to RECYCLING | 1373 | 7 | std::unique_ptr<Transaction> txn; | 1374 | 7 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 1375 | 7 | if (err != TxnErrorCode::TXN_OK) { | 1376 | 0 | LOG_WARNING("failed to create txn").tag("err", err); | 1377 | 0 | return -1; | 1378 | 0 | } | 1379 | 7 | std::string val; | 1380 | 7 | err = txn->get(k, &val); | 1381 | 7 | if (err == | 1382 | 7 | TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it | 1383 | 0 | LOG_INFO("partition {} has been recycled or committed", partition_id); | 1384 | 0 | return 0; | 1385 | 0 | } | 1386 | 7 | if (err != TxnErrorCode::TXN_OK) { | 1387 | 0 | LOG_WARNING("failed to get kv"); | 1388 | 0 | return -1; | 1389 | 0 | } | 1390 | 7 | part_pb.Clear(); | 1391 | 7 | if (!part_pb.ParseFromString(val)) { | 1392 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); | 1393 | 0 | return -1; | 1394 | 0 | } | 1395 | | // Partitions with PREPARED state MUST have no data | 1396 | 7 | if (part_pb.state() != RecyclePartitionPB::RECYCLING) { | 1397 | 7 | part_pb.set_state(RecyclePartitionPB::RECYCLING); | 1398 | 7 | txn->put(k, part_pb.SerializeAsString()); | 1399 | 7 | err = txn->commit(); | 1400 | 7 | if (err != TxnErrorCode::TXN_OK) { | 1401 | 0 | LOG_WARNING("failed to commit txn: {}", err); | 1402 | 0 | return -1; | 1403 | 0 | } | 1404 | 7 | } | 1405 | | | 1406 | 7 | int ret = 0; | 1407 | 31 | for (int64_t index_id : part_pb.index_id()) { | 1408 | 31 | if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) { | 1409 | 0 | LOG_WARNING("failed to recycle tablets under partition") | 1410 | 0 | .tag("table_id", part_pb.table_id()) | 1411 | 0 | .tag("instance_id", instance_id_) | 1412 | 0 | .tag("index_id", index_id) | 1413 | 0 | .tag("partition_id", partition_id); | 1414 | 0 | ret = -1; | 1415 | 0 | } | 1416 | 31 | } | 1417 | 7 | if (ret == 0 && part_pb.has_db_id()) { | 1418 | | // Recycle the versioned keys | 1419 | 7 | std::unique_ptr<Transaction> txn; | 1420 | 7 | err = txn_kv_->create_txn(&txn); | 1421 | 7 | if (err != TxnErrorCode::TXN_OK) { | 1422 | 0 | LOG_WARNING("failed to create txn").tag("err", err); | 1423 | 0 | return -1; | 1424 | 0 | } | 1425 | 7 | std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id}); | 1426 | 7 | std::string index_key = versioned::partition_index_key({instance_id_, partition_id}); | 1427 | 7 | std::string inverted_index_key = versioned::partition_inverted_index_key( | 1428 | 7 | {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}); | 1429 | 7 | std::string partition_version_key = | 1430 | 7 | versioned::partition_version_key({instance_id_, partition_id}); | 1431 | 7 | versioned_remove_all(txn.get(), meta_key); | 1432 | 7 | txn->remove(index_key); | 1433 | 7 | txn->remove(inverted_index_key); | 1434 | 7 | versioned_remove_all(txn.get(), partition_version_key); | 1435 | 7 | err = txn->commit(); | 1436 | 7 | if (err != TxnErrorCode::TXN_OK) { | 1437 | 0 | LOG_WARNING("failed to commit txn").tag("err", err); | 1438 | 0 | return -1; | 1439 | 0 | } | 1440 | 7 | } | 1441 | | | 1442 | 7 | if (ret == 0) { | 1443 | 7 | ++num_recycled; | 1444 | 7 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); | 1445 | 7 | partition_keys.push_back(k); | 1446 | 7 | if (part_pb.db_id() > 0) { | 1447 | 7 | partition_version_keys.push_back(partition_version_key( | 1448 | 7 | {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id})); | 1449 | 7 | } | 1450 | 7 | metrics_context.total_recycled_num = num_recycled; | 1451 | 7 | metrics_context.report(); | 1452 | 7 | } | 1453 | 7 | return ret; | 1454 | 7 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 1349 | 2 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { | 1350 | 2 | ++num_scanned; | 1351 | 2 | RecyclePartitionPB part_pb; | 1352 | 2 | if (!part_pb.ParseFromArray(v.data(), v.size())) { | 1353 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); | 1354 | 0 | return -1; | 1355 | 0 | } | 1356 | 2 | int64_t current_time = ::time(nullptr); | 1357 | 2 | if (current_time < | 1358 | 2 | calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired | 1359 | 0 | return 0; | 1360 | 0 | } | 1361 | 2 | ++num_expired; | 1362 | | // decode partition_id | 1363 | 2 | auto k1 = k; | 1364 | 2 | k1.remove_prefix(1); | 1365 | 2 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 1366 | 2 | decode_key(&k1, &out); | 1367 | | // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB | 1368 | 2 | auto partition_id = std::get<int64_t>(std::get<0>(out[3])); | 1369 | 2 | LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_ | 1370 | 2 | << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id | 1371 | 2 | << " state=" << RecyclePartitionPB::State_Name(part_pb.state()); | 1372 | | // Change state to RECYCLING | 1373 | 2 | std::unique_ptr<Transaction> txn; | 1374 | 2 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 1375 | 2 | if (err != TxnErrorCode::TXN_OK) { | 1376 | 0 | LOG_WARNING("failed to create txn").tag("err", err); | 1377 | 0 | return -1; | 1378 | 0 | } | 1379 | 2 | std::string val; | 1380 | 2 | err = txn->get(k, &val); | 1381 | 2 | if (err == | 1382 | 2 | TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it | 1383 | 0 | LOG_INFO("partition {} has been recycled or committed", partition_id); | 1384 | 0 | return 0; | 1385 | 0 | } | 1386 | 2 | if (err != TxnErrorCode::TXN_OK) { | 1387 | 0 | LOG_WARNING("failed to get kv"); | 1388 | 0 | return -1; | 1389 | 0 | } | 1390 | 2 | part_pb.Clear(); | 1391 | 2 | if (!part_pb.ParseFromString(val)) { | 1392 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); | 1393 | 0 | return -1; | 1394 | 0 | } | 1395 | | // Partitions with PREPARED state MUST have no data | 1396 | 2 | if (part_pb.state() != RecyclePartitionPB::RECYCLING) { | 1397 | 1 | part_pb.set_state(RecyclePartitionPB::RECYCLING); | 1398 | 1 | txn->put(k, part_pb.SerializeAsString()); | 1399 | 1 | err = txn->commit(); | 1400 | 1 | if (err != TxnErrorCode::TXN_OK) { | 1401 | 0 | LOG_WARNING("failed to commit txn: {}", err); | 1402 | 0 | return -1; | 1403 | 0 | } | 1404 | 1 | } | 1405 | | | 1406 | 2 | int ret = 0; | 1407 | 2 | for (int64_t index_id : part_pb.index_id()) { | 1408 | 2 | if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) { | 1409 | 1 | LOG_WARNING("failed to recycle tablets under partition") | 1410 | 1 | .tag("table_id", part_pb.table_id()) | 1411 | 1 | .tag("instance_id", instance_id_) | 1412 | 1 | .tag("index_id", index_id) | 1413 | 1 | .tag("partition_id", partition_id); | 1414 | 1 | ret = -1; | 1415 | 1 | } | 1416 | 2 | } | 1417 | 2 | if (ret == 0 && part_pb.has_db_id()) { | 1418 | | // Recycle the versioned keys | 1419 | 1 | std::unique_ptr<Transaction> txn; | 1420 | 1 | err = txn_kv_->create_txn(&txn); | 1421 | 1 | if (err != TxnErrorCode::TXN_OK) { | 1422 | 0 | LOG_WARNING("failed to create txn").tag("err", err); | 1423 | 0 | return -1; | 1424 | 0 | } | 1425 | 1 | std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id}); | 1426 | 1 | std::string index_key = versioned::partition_index_key({instance_id_, partition_id}); | 1427 | 1 | std::string inverted_index_key = versioned::partition_inverted_index_key( | 1428 | 1 | {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}); | 1429 | 1 | std::string partition_version_key = | 1430 | 1 | versioned::partition_version_key({instance_id_, partition_id}); | 1431 | 1 | versioned_remove_all(txn.get(), meta_key); | 1432 | 1 | txn->remove(index_key); | 1433 | 1 | txn->remove(inverted_index_key); | 1434 | 1 | versioned_remove_all(txn.get(), partition_version_key); | 1435 | 1 | err = txn->commit(); | 1436 | 1 | if (err != TxnErrorCode::TXN_OK) { | 1437 | 0 | LOG_WARNING("failed to commit txn").tag("err", err); | 1438 | 0 | return -1; | 1439 | 0 | } | 1440 | 1 | } | 1441 | | | 1442 | 2 | if (ret == 0) { | 1443 | 1 | ++num_recycled; | 1444 | 1 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); | 1445 | 1 | partition_keys.push_back(k); | 1446 | 1 | if (part_pb.db_id() > 0) { | 1447 | 1 | partition_version_keys.push_back(partition_version_key( | 1448 | 1 | {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id})); | 1449 | 1 | } | 1450 | 1 | metrics_context.total_recycled_num = num_recycled; | 1451 | 1 | metrics_context.report(); | 1452 | 1 | } | 1453 | 2 | return ret; | 1454 | 2 | }; |
|
1455 | | |
1456 | 15 | auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int { |
1457 | 5 | if (partition_keys.empty()) return 0; |
1458 | 4 | DORIS_CLOUD_DEFER { |
1459 | 4 | partition_keys.clear(); |
1460 | 4 | partition_version_keys.clear(); |
1461 | 4 | }; recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv Line | Count | Source | 1458 | 3 | DORIS_CLOUD_DEFER { | 1459 | 3 | partition_keys.clear(); | 1460 | 3 | partition_version_keys.clear(); | 1461 | 3 | }; |
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv Line | Count | Source | 1458 | 1 | DORIS_CLOUD_DEFER { | 1459 | 1 | partition_keys.clear(); | 1460 | 1 | partition_version_keys.clear(); | 1461 | 1 | }; |
|
1462 | 4 | std::unique_ptr<Transaction> txn; |
1463 | 4 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
1464 | 4 | if (err != TxnErrorCode::TXN_OK) { |
1465 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_; |
1466 | 0 | return -1; |
1467 | 0 | } |
1468 | 8 | for (auto& k : partition_keys) { |
1469 | 8 | txn->remove(k); |
1470 | 8 | } |
1471 | 8 | for (auto& k : partition_version_keys) { |
1472 | 8 | txn->remove(k); |
1473 | 8 | } |
1474 | 4 | err = txn->commit(); |
1475 | 4 | if (err != TxnErrorCode::TXN_OK) { |
1476 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_ |
1477 | 0 | << " err=" << err; |
1478 | 0 | return -1; |
1479 | 0 | } |
1480 | 4 | return 0; |
1481 | 4 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv Line | Count | Source | 1456 | 3 | auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int { | 1457 | 3 | if (partition_keys.empty()) return 0; | 1458 | 3 | DORIS_CLOUD_DEFER { | 1459 | 3 | partition_keys.clear(); | 1460 | 3 | partition_version_keys.clear(); | 1461 | 3 | }; | 1462 | 3 | std::unique_ptr<Transaction> txn; | 1463 | 3 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 1464 | 3 | if (err != TxnErrorCode::TXN_OK) { | 1465 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_; | 1466 | 0 | return -1; | 1467 | 0 | } | 1468 | 7 | for (auto& k : partition_keys) { | 1469 | 7 | txn->remove(k); | 1470 | 7 | } | 1471 | 7 | for (auto& k : partition_version_keys) { | 1472 | 7 | txn->remove(k); | 1473 | 7 | } | 1474 | 3 | err = txn->commit(); | 1475 | 3 | if (err != TxnErrorCode::TXN_OK) { | 1476 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_ | 1477 | 0 | << " err=" << err; | 1478 | 0 | return -1; | 1479 | 0 | } | 1480 | 3 | return 0; | 1481 | 3 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv Line | Count | Source | 1456 | 2 | auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int { | 1457 | 2 | if (partition_keys.empty()) return 0; | 1458 | 1 | DORIS_CLOUD_DEFER { | 1459 | 1 | partition_keys.clear(); | 1460 | 1 | partition_version_keys.clear(); | 1461 | 1 | }; | 1462 | 1 | std::unique_ptr<Transaction> txn; | 1463 | 1 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 1464 | 1 | if (err != TxnErrorCode::TXN_OK) { | 1465 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_; | 1466 | 0 | return -1; | 1467 | 0 | } | 1468 | 1 | for (auto& k : partition_keys) { | 1469 | 1 | txn->remove(k); | 1470 | 1 | } | 1471 | 1 | for (auto& k : partition_version_keys) { | 1472 | 1 | txn->remove(k); | 1473 | 1 | } | 1474 | 1 | err = txn->commit(); | 1475 | 1 | if (err != TxnErrorCode::TXN_OK) { | 1476 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_ | 1477 | 0 | << " err=" << err; | 1478 | 0 | return -1; | 1479 | 0 | } | 1480 | 1 | return 0; | 1481 | 1 | }; |
|
1482 | | |
1483 | 15 | if (config::enable_recycler_stats_metrics) { |
1484 | 0 | scan_and_statistics_partitions(); |
1485 | 0 | } |
1486 | | // recycle_func and loop_done for scan and recycle |
1487 | 15 | return scan_and_recycle(part_key0, part_key1, std::move(recycle_func), std::move(loop_done)); |
1488 | 15 | } |
1489 | | |
1490 | 14 | int InstanceRecycler::recycle_versions() { |
1491 | 14 | if (instance_info_.has_multi_version_status() && |
1492 | 14 | instance_info_.multi_version_status() != MultiVersionStatus::MULTI_VERSION_DISABLED) { |
1493 | 2 | return recycle_orphan_partitions(); |
1494 | 2 | } |
1495 | | |
1496 | 12 | int64_t num_scanned = 0; |
1497 | 12 | int64_t num_recycled = 0; |
1498 | 12 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions"); |
1499 | | |
1500 | 12 | LOG_WARNING("begin to recycle table and partition versions").tag("instance_id", instance_id_); |
1501 | | |
1502 | 12 | auto start_time = steady_clock::now(); |
1503 | | |
1504 | 12 | DORIS_CLOUD_DEFER { |
1505 | 12 | auto cost = duration<float>(steady_clock::now() - start_time).count(); |
1506 | 12 | metrics_context.finish_report(); |
1507 | 12 | LOG_WARNING("recycle table and partition versions finished, cost={}s", cost) |
1508 | 12 | .tag("instance_id", instance_id_) |
1509 | 12 | .tag("num_scanned", num_scanned) |
1510 | 12 | .tag("num_recycled", num_recycled); |
1511 | 12 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv Line | Count | Source | 1504 | 12 | DORIS_CLOUD_DEFER { | 1505 | 12 | auto cost = duration<float>(steady_clock::now() - start_time).count(); | 1506 | 12 | metrics_context.finish_report(); | 1507 | 12 | LOG_WARNING("recycle table and partition versions finished, cost={}s", cost) | 1508 | 12 | .tag("instance_id", instance_id_) | 1509 | 12 | .tag("num_scanned", num_scanned) | 1510 | 12 | .tag("num_recycled", num_recycled); | 1511 | 12 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv |
1512 | | |
1513 | 12 | auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0}); |
1514 | 12 | auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0}); |
1515 | 12 | int64_t last_scanned_table_id = 0; |
1516 | 12 | bool is_recycled = false; // Is last scanned kv recycled |
1517 | 12 | auto recycle_func = [&num_scanned, &num_recycled, &last_scanned_table_id, &is_recycled, |
1518 | 12 | &metrics_context, this](std::string_view k, std::string_view) { |
1519 | 2 | ++num_scanned; |
1520 | 2 | auto k1 = k; |
1521 | 2 | k1.remove_prefix(1); |
1522 | | // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id} |
1523 | 2 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
1524 | 2 | decode_key(&k1, &out); |
1525 | 2 | DCHECK_EQ(out.size(), 6) << k; |
1526 | 2 | auto table_id = std::get<int64_t>(std::get<0>(out[4])); |
1527 | 2 | if (table_id == last_scanned_table_id) { // Already handle kvs of this table |
1528 | 0 | num_recycled += is_recycled; // Version kv of this table has been recycled |
1529 | 0 | return 0; |
1530 | 0 | } |
1531 | 2 | last_scanned_table_id = table_id; |
1532 | 2 | is_recycled = false; |
1533 | 2 | auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0}); |
1534 | 2 | auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0}); |
1535 | 2 | std::unique_ptr<Transaction> txn; |
1536 | 2 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
1537 | 2 | if (err != TxnErrorCode::TXN_OK) { |
1538 | 0 | return -1; |
1539 | 0 | } |
1540 | 2 | std::unique_ptr<RangeGetIterator> iter; |
1541 | 2 | err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1); |
1542 | 2 | if (err != TxnErrorCode::TXN_OK) { |
1543 | 0 | return -1; |
1544 | 0 | } |
1545 | 2 | if (iter->has_next()) { // Table is useful, should not recycle table and partition versions |
1546 | 1 | return 0; |
1547 | 1 | } |
1548 | 1 | auto db_id = std::get<int64_t>(std::get<0>(out[3])); |
1549 | | // 1. Remove all partition version kvs of this table |
1550 | 1 | auto partition_version_key_begin = |
1551 | 1 | partition_version_key({instance_id_, db_id, table_id, 0}); |
1552 | 1 | auto partition_version_key_end = |
1553 | 1 | partition_version_key({instance_id_, db_id, table_id, INT64_MAX}); |
1554 | 1 | txn->remove(partition_version_key_begin, partition_version_key_end); |
1555 | 1 | LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin) |
1556 | 1 | << " end=" << hex(partition_version_key_end) << " db_id=" << db_id |
1557 | 1 | << " table_id=" << table_id; |
1558 | | // 2. Remove the table version kv of this table |
1559 | 1 | auto tbl_version_key = table_version_key({instance_id_, db_id, table_id}); |
1560 | 1 | txn->remove(tbl_version_key); |
1561 | 1 | LOG(WARNING) << "remove table version kv " << hex(tbl_version_key); |
1562 | | // 3. Remove mow delete bitmap update lock and tablet job lock |
1563 | 1 | std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1}); |
1564 | 1 | txn->remove(lock_key); |
1565 | 1 | LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key); |
1566 | 1 | std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0}); |
1567 | 1 | std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX}); |
1568 | 1 | txn->remove(tablet_job_key_begin, tablet_job_key_end); |
1569 | 1 | LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin) |
1570 | 1 | << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id |
1571 | 1 | << " table_id=" << table_id; |
1572 | 1 | err = txn->commit(); |
1573 | 1 | if (err != TxnErrorCode::TXN_OK) { |
1574 | 0 | return -1; |
1575 | 0 | } |
1576 | 1 | metrics_context.total_recycled_num = ++num_recycled; |
1577 | 1 | metrics_context.report(); |
1578 | 1 | is_recycled = true; |
1579 | 1 | return 0; |
1580 | 1 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 1518 | 2 | &metrics_context, this](std::string_view k, std::string_view) { | 1519 | 2 | ++num_scanned; | 1520 | 2 | auto k1 = k; | 1521 | 2 | k1.remove_prefix(1); | 1522 | | // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id} | 1523 | 2 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 1524 | 2 | decode_key(&k1, &out); | 1525 | 2 | DCHECK_EQ(out.size(), 6) << k; | 1526 | 2 | auto table_id = std::get<int64_t>(std::get<0>(out[4])); | 1527 | 2 | if (table_id == last_scanned_table_id) { // Already handle kvs of this table | 1528 | 0 | num_recycled += is_recycled; // Version kv of this table has been recycled | 1529 | 0 | return 0; | 1530 | 0 | } | 1531 | 2 | last_scanned_table_id = table_id; | 1532 | 2 | is_recycled = false; | 1533 | 2 | auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0}); | 1534 | 2 | auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0}); | 1535 | 2 | std::unique_ptr<Transaction> txn; | 1536 | 2 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 1537 | 2 | if (err != TxnErrorCode::TXN_OK) { | 1538 | 0 | return -1; | 1539 | 0 | } | 1540 | 2 | std::unique_ptr<RangeGetIterator> iter; | 1541 | 2 | err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1); | 1542 | 2 | if (err != TxnErrorCode::TXN_OK) { | 1543 | 0 | return -1; | 1544 | 0 | } | 1545 | 2 | if (iter->has_next()) { // Table is useful, should not recycle table and partition versions | 1546 | 1 | return 0; | 1547 | 1 | } | 1548 | 1 | auto db_id = std::get<int64_t>(std::get<0>(out[3])); | 1549 | | // 1. Remove all partition version kvs of this table | 1550 | 1 | auto partition_version_key_begin = | 1551 | 1 | partition_version_key({instance_id_, db_id, table_id, 0}); | 1552 | 1 | auto partition_version_key_end = | 1553 | 1 | partition_version_key({instance_id_, db_id, table_id, INT64_MAX}); | 1554 | 1 | txn->remove(partition_version_key_begin, partition_version_key_end); | 1555 | 1 | LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin) | 1556 | 1 | << " end=" << hex(partition_version_key_end) << " db_id=" << db_id | 1557 | 1 | << " table_id=" << table_id; | 1558 | | // 2. Remove the table version kv of this table | 1559 | 1 | auto tbl_version_key = table_version_key({instance_id_, db_id, table_id}); | 1560 | 1 | txn->remove(tbl_version_key); | 1561 | 1 | LOG(WARNING) << "remove table version kv " << hex(tbl_version_key); | 1562 | | // 3. Remove mow delete bitmap update lock and tablet job lock | 1563 | 1 | std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1}); | 1564 | 1 | txn->remove(lock_key); | 1565 | 1 | LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key); | 1566 | 1 | std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0}); | 1567 | 1 | std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX}); | 1568 | 1 | txn->remove(tablet_job_key_begin, tablet_job_key_end); | 1569 | 1 | LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin) | 1570 | 1 | << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id | 1571 | 1 | << " table_id=" << table_id; | 1572 | 1 | err = txn->commit(); | 1573 | 1 | if (err != TxnErrorCode::TXN_OK) { | 1574 | 0 | return -1; | 1575 | 0 | } | 1576 | 1 | metrics_context.total_recycled_num = ++num_recycled; | 1577 | 1 | metrics_context.report(); | 1578 | 1 | is_recycled = true; | 1579 | 1 | return 0; | 1580 | 1 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
1581 | | |
1582 | 12 | if (config::enable_recycler_stats_metrics) { |
1583 | 0 | scan_and_statistics_versions(); |
1584 | 0 | } |
1585 | | // recycle_func and loop_done for scan and recycle |
1586 | 12 | return scan_and_recycle(version_key_begin, version_key_end, std::move(recycle_func)); |
1587 | 14 | } |
1588 | | |
1589 | 3 | int InstanceRecycler::recycle_orphan_partitions() { |
1590 | 3 | int64_t num_scanned = 0; |
1591 | 3 | int64_t num_recycled = 0; |
1592 | 3 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_orphan_partitions"); |
1593 | | |
1594 | 3 | LOG_WARNING("begin to recycle orphan table and partition versions") |
1595 | 3 | .tag("instance_id", instance_id_); |
1596 | | |
1597 | 3 | auto start_time = steady_clock::now(); |
1598 | | |
1599 | 3 | DORIS_CLOUD_DEFER { |
1600 | 3 | auto cost = duration<float>(steady_clock::now() - start_time).count(); |
1601 | 3 | metrics_context.finish_report(); |
1602 | 3 | LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost) |
1603 | 3 | .tag("instance_id", instance_id_) |
1604 | 3 | .tag("num_scanned", num_scanned) |
1605 | 3 | .tag("num_recycled", num_recycled); |
1606 | 3 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv Line | Count | Source | 1599 | 3 | DORIS_CLOUD_DEFER { | 1600 | 3 | auto cost = duration<float>(steady_clock::now() - start_time).count(); | 1601 | 3 | metrics_context.finish_report(); | 1602 | 3 | LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost) | 1603 | 3 | .tag("instance_id", instance_id_) | 1604 | 3 | .tag("num_scanned", num_scanned) | 1605 | 3 | .tag("num_recycled", num_recycled); | 1606 | 3 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv |
1607 | | |
1608 | 3 | bool is_empty_table = false; // whether the table has no indexes |
1609 | 3 | bool is_table_kvs_recycled = false; // whether the table related kvs have been recycled |
1610 | 3 | int64_t current_table_id = 0; // current scanning table id |
1611 | 3 | auto recycle_func = [&num_scanned, &num_recycled, &metrics_context, &is_empty_table, |
1612 | 3 | ¤t_table_id, &is_table_kvs_recycled, |
1613 | 3 | this](std::string_view k, std::string_view) { |
1614 | 2 | ++num_scanned; |
1615 | | |
1616 | 2 | std::string_view k1(k); |
1617 | 2 | int64_t db_id, table_id, partition_id; |
1618 | 2 | if (versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id, &partition_id) != |
1619 | 2 | 0) { |
1620 | 0 | LOG(WARNING) << "malformed partition inverted index key " << hex(k); |
1621 | 0 | return -1; |
1622 | 2 | } else if (table_id != current_table_id) { |
1623 | 2 | current_table_id = table_id; |
1624 | 2 | is_table_kvs_recycled = false; |
1625 | 2 | MetaReader meta_reader(instance_id_, txn_kv_.get()); |
1626 | 2 | TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table); |
1627 | 2 | if (err != TxnErrorCode::TXN_OK) { |
1628 | 0 | LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id |
1629 | 0 | << " table_id=" << table_id << " err=" << err; |
1630 | 0 | return -1; |
1631 | 0 | } |
1632 | 2 | } |
1633 | | |
1634 | 2 | if (!is_empty_table) { |
1635 | | // table is not empty, skip recycle |
1636 | 1 | return 0; |
1637 | 1 | } |
1638 | | |
1639 | 1 | std::unique_ptr<Transaction> txn; |
1640 | 1 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
1641 | 1 | if (err != TxnErrorCode::TXN_OK) { |
1642 | 0 | return -1; |
1643 | 0 | } |
1644 | | |
1645 | | // 1. Remove all partition related kvs |
1646 | 1 | std::string partition_meta_key = |
1647 | 1 | versioned::meta_partition_key({instance_id_, partition_id}); |
1648 | 1 | std::string partition_index_key = |
1649 | 1 | versioned::partition_index_key({instance_id_, partition_id}); |
1650 | 1 | std::string partition_inverted_key = versioned::partition_inverted_index_key( |
1651 | 1 | {instance_id_, db_id, table_id, partition_id}); |
1652 | 1 | std::string partition_version_key = |
1653 | 1 | versioned::partition_version_key({instance_id_, partition_id}); |
1654 | 1 | txn->remove(partition_index_key); |
1655 | 1 | txn->remove(partition_inverted_key); |
1656 | 1 | versioned_remove_all(txn.get(), partition_meta_key); |
1657 | 1 | versioned_remove_all(txn.get(), partition_version_key); |
1658 | 1 | LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id |
1659 | 1 | << " table_id=" << table_id << " db_id=" << db_id |
1660 | 1 | << " partition_meta_key=" << hex(partition_meta_key) |
1661 | 1 | << " partition_version_key=" << hex(partition_version_key); |
1662 | | |
1663 | 1 | if (!is_table_kvs_recycled) { |
1664 | 1 | is_table_kvs_recycled = true; |
1665 | | |
1666 | | // 2. Remove the table version kv of this table |
1667 | 1 | std::string table_version_key = versioned::table_version_key({instance_id_, table_id}); |
1668 | 1 | versioned_remove_all(txn.get(), table_version_key); |
1669 | 1 | LOG(WARNING) << "remove table version kv " << hex(table_version_key); |
1670 | | // 3. Remove mow delete bitmap update lock and tablet job lock |
1671 | 1 | std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1}); |
1672 | 1 | txn->remove(lock_key); |
1673 | 1 | LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key); |
1674 | 1 | std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0}); |
1675 | 1 | std::string tablet_job_key_end = |
1676 | 1 | mow_tablet_job_key({instance_id_, table_id, INT64_MAX}); |
1677 | 1 | txn->remove(tablet_job_key_begin, tablet_job_key_end); |
1678 | 1 | LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin) |
1679 | 1 | << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id |
1680 | 1 | << " table_id=" << table_id; |
1681 | 1 | } |
1682 | | |
1683 | 1 | err = txn->commit(); |
1684 | 1 | if (err != TxnErrorCode::TXN_OK) { |
1685 | 0 | return -1; |
1686 | 0 | } |
1687 | 1 | metrics_context.total_recycled_num = ++num_recycled; |
1688 | 1 | metrics_context.report(); |
1689 | 1 | return 0; |
1690 | 1 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 1613 | 2 | this](std::string_view k, std::string_view) { | 1614 | 2 | ++num_scanned; | 1615 | | | 1616 | 2 | std::string_view k1(k); | 1617 | 2 | int64_t db_id, table_id, partition_id; | 1618 | 2 | if (versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id, &partition_id) != | 1619 | 2 | 0) { | 1620 | 0 | LOG(WARNING) << "malformed partition inverted index key " << hex(k); | 1621 | 0 | return -1; | 1622 | 2 | } else if (table_id != current_table_id) { | 1623 | 2 | current_table_id = table_id; | 1624 | 2 | is_table_kvs_recycled = false; | 1625 | 2 | MetaReader meta_reader(instance_id_, txn_kv_.get()); | 1626 | 2 | TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table); | 1627 | 2 | if (err != TxnErrorCode::TXN_OK) { | 1628 | 0 | LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id | 1629 | 0 | << " table_id=" << table_id << " err=" << err; | 1630 | 0 | return -1; | 1631 | 0 | } | 1632 | 2 | } | 1633 | | | 1634 | 2 | if (!is_empty_table) { | 1635 | | // table is not empty, skip recycle | 1636 | 1 | return 0; | 1637 | 1 | } | 1638 | | | 1639 | 1 | std::unique_ptr<Transaction> txn; | 1640 | 1 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 1641 | 1 | if (err != TxnErrorCode::TXN_OK) { | 1642 | 0 | return -1; | 1643 | 0 | } | 1644 | | | 1645 | | // 1. Remove all partition related kvs | 1646 | 1 | std::string partition_meta_key = | 1647 | 1 | versioned::meta_partition_key({instance_id_, partition_id}); | 1648 | 1 | std::string partition_index_key = | 1649 | 1 | versioned::partition_index_key({instance_id_, partition_id}); | 1650 | 1 | std::string partition_inverted_key = versioned::partition_inverted_index_key( | 1651 | 1 | {instance_id_, db_id, table_id, partition_id}); | 1652 | 1 | std::string partition_version_key = | 1653 | 1 | versioned::partition_version_key({instance_id_, partition_id}); | 1654 | 1 | txn->remove(partition_index_key); | 1655 | 1 | txn->remove(partition_inverted_key); | 1656 | 1 | versioned_remove_all(txn.get(), partition_meta_key); | 1657 | 1 | versioned_remove_all(txn.get(), partition_version_key); | 1658 | 1 | LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id | 1659 | 1 | << " table_id=" << table_id << " db_id=" << db_id | 1660 | 1 | << " partition_meta_key=" << hex(partition_meta_key) | 1661 | 1 | << " partition_version_key=" << hex(partition_version_key); | 1662 | | | 1663 | 1 | if (!is_table_kvs_recycled) { | 1664 | 1 | is_table_kvs_recycled = true; | 1665 | | | 1666 | | // 2. Remove the table version kv of this table | 1667 | 1 | std::string table_version_key = versioned::table_version_key({instance_id_, table_id}); | 1668 | 1 | versioned_remove_all(txn.get(), table_version_key); | 1669 | 1 | LOG(WARNING) << "remove table version kv " << hex(table_version_key); | 1670 | | // 3. Remove mow delete bitmap update lock and tablet job lock | 1671 | 1 | std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1}); | 1672 | 1 | txn->remove(lock_key); | 1673 | 1 | LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key); | 1674 | 1 | std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0}); | 1675 | 1 | std::string tablet_job_key_end = | 1676 | 1 | mow_tablet_job_key({instance_id_, table_id, INT64_MAX}); | 1677 | 1 | txn->remove(tablet_job_key_begin, tablet_job_key_end); | 1678 | 1 | LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin) | 1679 | 1 | << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id | 1680 | 1 | << " table_id=" << table_id; | 1681 | 1 | } | 1682 | | | 1683 | 1 | err = txn->commit(); | 1684 | 1 | if (err != TxnErrorCode::TXN_OK) { | 1685 | 0 | return -1; | 1686 | 0 | } | 1687 | 1 | metrics_context.total_recycled_num = ++num_recycled; | 1688 | 1 | metrics_context.report(); | 1689 | 1 | return 0; | 1690 | 1 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
1691 | | |
1692 | | // recycle_func and loop_done for scan and recycle |
1693 | 3 | return scan_and_recycle( |
1694 | 3 | versioned::partition_inverted_index_key({instance_id_, 0, 0, 0}), |
1695 | 3 | versioned::partition_inverted_index_key({instance_id_, INT64_MAX, 0, 0}), |
1696 | 3 | std::move(recycle_func)); |
1697 | 3 | } |
1698 | | |
1699 | | int InstanceRecycler::recycle_tablets(int64_t table_id, int64_t index_id, |
1700 | | RecyclerMetricsContext& metrics_context, |
1701 | 46 | int64_t partition_id) { |
1702 | 46 | bool is_multi_version = |
1703 | 46 | instance_info_.has_multi_version_status() && |
1704 | 46 | instance_info_.multi_version_status() != MultiVersionStatus::MULTI_VERSION_DISABLED; |
1705 | 46 | int64_t num_scanned = 0; |
1706 | 46 | std::atomic_long num_recycled = 0; |
1707 | | |
1708 | 46 | std::string tablet_key_begin, tablet_key_end; |
1709 | 46 | std::string stats_key_begin, stats_key_end; |
1710 | 46 | std::string job_key_begin, job_key_end; |
1711 | | |
1712 | 46 | std::string tablet_belongs; |
1713 | 46 | if (partition_id > 0) { |
1714 | | // recycle tablets in a partition belonging to the index |
1715 | 33 | meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin); |
1716 | 33 | meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end); |
1717 | 33 | stats_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &stats_key_begin); |
1718 | 33 | stats_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &stats_key_end); |
1719 | 33 | job_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &job_key_begin); |
1720 | 33 | job_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &job_key_end); |
1721 | 33 | tablet_belongs = "partition"; |
1722 | 33 | } else { |
1723 | | // recycle tablets in the index |
1724 | 13 | meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin); |
1725 | 13 | meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end); |
1726 | 13 | stats_tablet_key({instance_id_, table_id, index_id, 0, 0}, &stats_key_begin); |
1727 | 13 | stats_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &stats_key_end); |
1728 | 13 | job_tablet_key({instance_id_, table_id, index_id, 0, 0}, &job_key_begin); |
1729 | 13 | job_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &job_key_end); |
1730 | 13 | tablet_belongs = "index"; |
1731 | 13 | } |
1732 | | |
1733 | 46 | LOG_INFO("begin to recycle tablets of the " + tablet_belongs) |
1734 | 46 | .tag("table_id", table_id) |
1735 | 46 | .tag("index_id", index_id) |
1736 | 46 | .tag("partition_id", partition_id); |
1737 | | |
1738 | 46 | auto start_time = steady_clock::now(); |
1739 | | |
1740 | 46 | DORIS_CLOUD_DEFER { |
1741 | 46 | auto cost = duration<float>(steady_clock::now() - start_time).count(); |
1742 | 46 | LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost) |
1743 | 46 | .tag("instance_id", instance_id_) |
1744 | 46 | .tag("table_id", table_id) |
1745 | 46 | .tag("index_id", index_id) |
1746 | 46 | .tag("partition_id", partition_id) |
1747 | 46 | .tag("num_scanned", num_scanned) |
1748 | 46 | .tag("num_recycled", num_recycled); |
1749 | 46 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv Line | Count | Source | 1740 | 42 | DORIS_CLOUD_DEFER { | 1741 | 42 | auto cost = duration<float>(steady_clock::now() - start_time).count(); | 1742 | 42 | LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost) | 1743 | 42 | .tag("instance_id", instance_id_) | 1744 | 42 | .tag("table_id", table_id) | 1745 | 42 | .tag("index_id", index_id) | 1746 | 42 | .tag("partition_id", partition_id) | 1747 | 42 | .tag("num_scanned", num_scanned) | 1748 | 42 | .tag("num_recycled", num_recycled); | 1749 | 42 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv Line | Count | Source | 1740 | 4 | DORIS_CLOUD_DEFER { | 1741 | 4 | auto cost = duration<float>(steady_clock::now() - start_time).count(); | 1742 | 4 | LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost) | 1743 | 4 | .tag("instance_id", instance_id_) | 1744 | 4 | .tag("table_id", table_id) | 1745 | 4 | .tag("index_id", index_id) | 1746 | 4 | .tag("partition_id", partition_id) | 1747 | 4 | .tag("num_scanned", num_scanned) | 1748 | 4 | .tag("num_recycled", num_recycled); | 1749 | 4 | }; |
|
1750 | | |
1751 | | // The first string_view represents the tablet key which has been recycled |
1752 | | // The second bool represents whether the following fdb's tablet key deletion could be done using range move or not |
1753 | 46 | using TabletKeyPair = std::pair<std::string_view, bool>; |
1754 | 46 | SyncExecutor<TabletKeyPair> sync_executor( |
1755 | 46 | _thread_pool_group.recycle_tablet_pool, |
1756 | 46 | fmt::format("recycle tablets, tablet id {}, index id {}, partition id {}", table_id, |
1757 | 46 | index_id, partition_id), |
1758 | 4.23k | [](const TabletKeyPair& k) { return k.first.empty(); }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE Line | Count | Source | 1758 | 234 | [](const TabletKeyPair& k) { return k.first.empty(); }); |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE Line | Count | Source | 1758 | 4.00k | [](const TabletKeyPair& k) { return k.first.empty(); }); |
|
1759 | | |
1760 | | // Elements in `tablet_keys` has the same lifetime as `it` in `scan_and_recycle` |
1761 | 46 | std::vector<std::string> tablet_idx_keys; |
1762 | 46 | std::vector<std::string> restore_job_keys; |
1763 | 46 | std::vector<std::string> init_rs_keys; |
1764 | 46 | std::vector<std::string> tablet_compact_stats_keys; |
1765 | 46 | std::vector<std::string> tablet_load_stats_keys; |
1766 | 46 | std::vector<std::string> versioned_meta_tablet_keys; |
1767 | 8.24k | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { |
1768 | 8.24k | bool use_range_remove = true; |
1769 | 8.24k | ++num_scanned; |
1770 | 8.24k | doris::TabletMetaCloudPB tablet_meta_pb; |
1771 | 8.24k | if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) { |
1772 | 0 | LOG_WARNING("malformed tablet meta").tag("key", hex(k)); |
1773 | 0 | use_range_remove = false; |
1774 | 0 | return -1; |
1775 | 0 | } |
1776 | 8.24k | int64_t tablet_id = tablet_meta_pb.tablet_id(); |
1777 | | |
1778 | 8.24k | if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) { |
1779 | 4.00k | LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id(); |
1780 | 4.00k | return -1; |
1781 | 4.00k | } |
1782 | | |
1783 | 4.23k | tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id})); |
1784 | 4.23k | restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id})); |
1785 | 4.23k | if (is_multi_version) { |
1786 | 6 | tablet_compact_stats_keys.push_back( |
1787 | 6 | versioned::tablet_compact_stats_key({instance_id_, tablet_id})); |
1788 | 6 | tablet_load_stats_keys.push_back( |
1789 | 6 | versioned::tablet_load_stats_key({instance_id_, tablet_id})); |
1790 | 6 | versioned_meta_tablet_keys.push_back( |
1791 | 6 | versioned::meta_tablet_key({instance_id_, tablet_id})); |
1792 | 6 | } |
1793 | 4.23k | TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false); |
1794 | 4.23k | sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove, |
1795 | 4.23k | &metrics_context, k]() mutable -> TabletKeyPair { |
1796 | 4.23k | if (recycle_tablet(tid, metrics_context) != 0) { |
1797 | 0 | LOG_WARNING("failed to recycle tablet") |
1798 | 0 | .tag("instance_id", instance_id_) |
1799 | 0 | .tag("tablet_id", tid); |
1800 | 0 | range_move = false; |
1801 | 0 | return {std::string_view(), range_move}; |
1802 | 0 | } |
1803 | 4.23k | ++num_recycled; |
1804 | 4.23k | LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k)); |
1805 | 4.23k | return {k, range_move}; |
1806 | 4.23k | }); recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv Line | Count | Source | 1795 | 234 | &metrics_context, k]() mutable -> TabletKeyPair { | 1796 | 234 | if (recycle_tablet(tid, metrics_context) != 0) { | 1797 | 0 | LOG_WARNING("failed to recycle tablet") | 1798 | 0 | .tag("instance_id", instance_id_) | 1799 | 0 | .tag("tablet_id", tid); | 1800 | 0 | range_move = false; | 1801 | 0 | return {std::string_view(), range_move}; | 1802 | 0 | } | 1803 | 234 | ++num_recycled; | 1804 | 234 | LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k)); | 1805 | 234 | return {k, range_move}; | 1806 | 234 | }); |
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv Line | Count | Source | 1795 | 4.00k | &metrics_context, k]() mutable -> TabletKeyPair { | 1796 | 4.00k | if (recycle_tablet(tid, metrics_context) != 0) { | 1797 | 0 | LOG_WARNING("failed to recycle tablet") | 1798 | 0 | .tag("instance_id", instance_id_) | 1799 | 0 | .tag("tablet_id", tid); | 1800 | 0 | range_move = false; | 1801 | 0 | return {std::string_view(), range_move}; | 1802 | 0 | } | 1803 | 4.00k | ++num_recycled; | 1804 | 4.00k | LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k)); | 1805 | 4.00k | return {k, range_move}; | 1806 | 4.00k | }); |
|
1807 | 4.23k | return 0; |
1808 | 4.23k | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ Line | Count | Source | 1767 | 237 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { | 1768 | 237 | bool use_range_remove = true; | 1769 | 237 | ++num_scanned; | 1770 | 237 | doris::TabletMetaCloudPB tablet_meta_pb; | 1771 | 237 | if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) { | 1772 | 0 | LOG_WARNING("malformed tablet meta").tag("key", hex(k)); | 1773 | 0 | use_range_remove = false; | 1774 | 0 | return -1; | 1775 | 0 | } | 1776 | 237 | int64_t tablet_id = tablet_meta_pb.tablet_id(); | 1777 | | | 1778 | 237 | if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) { | 1779 | 0 | LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id(); | 1780 | 0 | return -1; | 1781 | 0 | } | 1782 | | | 1783 | 237 | tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id})); | 1784 | 237 | restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id})); | 1785 | 237 | if (is_multi_version) { | 1786 | 6 | tablet_compact_stats_keys.push_back( | 1787 | 6 | versioned::tablet_compact_stats_key({instance_id_, tablet_id})); | 1788 | 6 | tablet_load_stats_keys.push_back( | 1789 | 6 | versioned::tablet_load_stats_key({instance_id_, tablet_id})); | 1790 | 6 | versioned_meta_tablet_keys.push_back( | 1791 | 6 | versioned::meta_tablet_key({instance_id_, tablet_id})); | 1792 | 6 | } | 1793 | 237 | TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false); | 1794 | 234 | sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove, | 1795 | 234 | &metrics_context, k]() mutable -> TabletKeyPair { | 1796 | 234 | if (recycle_tablet(tid, metrics_context) != 0) { | 1797 | 234 | LOG_WARNING("failed to recycle tablet") | 1798 | 234 | .tag("instance_id", instance_id_) | 1799 | 234 | .tag("tablet_id", tid); | 1800 | 234 | range_move = false; | 1801 | 234 | return {std::string_view(), range_move}; | 1802 | 234 | } | 1803 | 234 | ++num_recycled; | 1804 | 234 | LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k)); | 1805 | 234 | return {k, range_move}; | 1806 | 234 | }); | 1807 | 234 | return 0; | 1808 | 237 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ Line | Count | Source | 1767 | 8.00k | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { | 1768 | 8.00k | bool use_range_remove = true; | 1769 | 8.00k | ++num_scanned; | 1770 | 8.00k | doris::TabletMetaCloudPB tablet_meta_pb; | 1771 | 8.00k | if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) { | 1772 | 0 | LOG_WARNING("malformed tablet meta").tag("key", hex(k)); | 1773 | 0 | use_range_remove = false; | 1774 | 0 | return -1; | 1775 | 0 | } | 1776 | 8.00k | int64_t tablet_id = tablet_meta_pb.tablet_id(); | 1777 | | | 1778 | 8.00k | if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) { | 1779 | 4.00k | LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id(); | 1780 | 4.00k | return -1; | 1781 | 4.00k | } | 1782 | | | 1783 | 4.00k | tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id})); | 1784 | 4.00k | restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id})); | 1785 | 4.00k | if (is_multi_version) { | 1786 | 0 | tablet_compact_stats_keys.push_back( | 1787 | 0 | versioned::tablet_compact_stats_key({instance_id_, tablet_id})); | 1788 | 0 | tablet_load_stats_keys.push_back( | 1789 | 0 | versioned::tablet_load_stats_key({instance_id_, tablet_id})); | 1790 | 0 | versioned_meta_tablet_keys.push_back( | 1791 | 0 | versioned::meta_tablet_key({instance_id_, tablet_id})); | 1792 | 0 | } | 1793 | 4.00k | TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false); | 1794 | 4.00k | sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove, | 1795 | 4.00k | &metrics_context, k]() mutable -> TabletKeyPair { | 1796 | 4.00k | if (recycle_tablet(tid, metrics_context) != 0) { | 1797 | 4.00k | LOG_WARNING("failed to recycle tablet") | 1798 | 4.00k | .tag("instance_id", instance_id_) | 1799 | 4.00k | .tag("tablet_id", tid); | 1800 | 4.00k | range_move = false; | 1801 | 4.00k | return {std::string_view(), range_move}; | 1802 | 4.00k | } | 1803 | 4.00k | ++num_recycled; | 1804 | 4.00k | LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k)); | 1805 | 4.00k | return {k, range_move}; | 1806 | 4.00k | }); | 1807 | 4.00k | return 0; | 1808 | 4.00k | }; |
|
1809 | | |
1810 | | // TODO(AlexYue): Add one ut to cover use_range_remove = false |
1811 | 46 | auto loop_done = [&, this]() -> int { |
1812 | 46 | bool finished = true; |
1813 | 46 | auto tablet_keys = sync_executor.when_all(&finished); |
1814 | 46 | if (!finished) { |
1815 | 0 | LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_); |
1816 | 0 | return -1; |
1817 | 0 | } |
1818 | 46 | if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0; |
1819 | | // sort the vector using key's order |
1820 | 44 | std::sort(tablet_keys.begin(), tablet_keys.end(), |
1821 | 49.4k | [](const auto& prev, const auto& last) { return prev.first < last.first; }); recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_ Line | Count | Source | 1821 | 944 | [](const auto& prev, const auto& last) { return prev.first < last.first; }); |
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_ Line | Count | Source | 1821 | 48.4k | [](const auto& prev, const auto& last) { return prev.first < last.first; }); |
|
1822 | 44 | bool use_range_remove = true; |
1823 | 4.23k | for (auto& [_, remove] : tablet_keys) { |
1824 | 4.23k | if (!remove) { |
1825 | 0 | use_range_remove = remove; |
1826 | 0 | break; |
1827 | 0 | } |
1828 | 4.23k | } |
1829 | 44 | DORIS_CLOUD_DEFER { |
1830 | 44 | tablet_idx_keys.clear(); |
1831 | 44 | restore_job_keys.clear(); |
1832 | 44 | init_rs_keys.clear(); |
1833 | 44 | tablet_compact_stats_keys.clear(); |
1834 | 44 | tablet_load_stats_keys.clear(); |
1835 | 44 | versioned_meta_tablet_keys.clear(); |
1836 | 44 | }; recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv Line | Count | Source | 1829 | 42 | DORIS_CLOUD_DEFER { | 1830 | 42 | tablet_idx_keys.clear(); | 1831 | 42 | restore_job_keys.clear(); | 1832 | 42 | init_rs_keys.clear(); | 1833 | 42 | tablet_compact_stats_keys.clear(); | 1834 | 42 | tablet_load_stats_keys.clear(); | 1835 | 42 | versioned_meta_tablet_keys.clear(); | 1836 | 42 | }; |
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv Line | Count | Source | 1829 | 2 | DORIS_CLOUD_DEFER { | 1830 | 2 | tablet_idx_keys.clear(); | 1831 | 2 | restore_job_keys.clear(); | 1832 | 2 | init_rs_keys.clear(); | 1833 | 2 | tablet_compact_stats_keys.clear(); | 1834 | 2 | tablet_load_stats_keys.clear(); | 1835 | 2 | versioned_meta_tablet_keys.clear(); | 1836 | 2 | }; |
|
1837 | 44 | std::unique_ptr<Transaction> txn; |
1838 | 44 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { |
1839 | 0 | LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_; |
1840 | 0 | return -1; |
1841 | 0 | } |
1842 | 44 | std::string tablet_key_end; |
1843 | 44 | if (!tablet_keys.empty()) { |
1844 | 42 | if (use_range_remove) { |
1845 | 42 | tablet_key_end = std::string(tablet_keys.back().first) + '\x00'; |
1846 | 42 | txn->remove(tablet_keys.front().first, tablet_key_end); |
1847 | 42 | } else { |
1848 | 0 | for (auto& [k, _] : tablet_keys) { |
1849 | 0 | txn->remove(k); |
1850 | 0 | } |
1851 | 0 | } |
1852 | 42 | } |
1853 | 44 | if (is_multi_version) { |
1854 | 6 | for (auto& k : tablet_compact_stats_keys) { |
1855 | | // Remove all versions of tablet compact stats for recycled tablet |
1856 | 6 | LOG_INFO("remove versioned tablet compact stats key") |
1857 | 6 | .tag("compact_stats_key", hex(k)); |
1858 | 6 | versioned_remove_all(txn.get(), k); |
1859 | 6 | } |
1860 | 6 | for (auto& k : tablet_load_stats_keys) { |
1861 | | // Remove all versions of tablet load stats for recycled tablet |
1862 | 6 | LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k)); |
1863 | 6 | versioned_remove_all(txn.get(), k); |
1864 | 6 | } |
1865 | 6 | for (auto& k : versioned_meta_tablet_keys) { |
1866 | | // Remove all versions of meta tablet for recycled tablet |
1867 | 6 | LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k)); |
1868 | 6 | versioned_remove_all(txn.get(), k); |
1869 | 6 | } |
1870 | 5 | } |
1871 | 4.23k | for (auto& k : tablet_idx_keys) { |
1872 | 4.23k | txn->remove(k); |
1873 | 4.23k | } |
1874 | 4.23k | for (auto& k : restore_job_keys) { |
1875 | 4.23k | txn->remove(k); |
1876 | 4.23k | } |
1877 | 44 | for (auto& k : init_rs_keys) { |
1878 | 0 | txn->remove(k); |
1879 | 0 | } |
1880 | 44 | if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) { |
1881 | 0 | LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_ |
1882 | 0 | << ", err=" << err; |
1883 | 0 | return -1; |
1884 | 0 | } |
1885 | 44 | return 0; |
1886 | 44 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv Line | Count | Source | 1811 | 42 | auto loop_done = [&, this]() -> int { | 1812 | 42 | bool finished = true; | 1813 | 42 | auto tablet_keys = sync_executor.when_all(&finished); | 1814 | 42 | if (!finished) { | 1815 | 0 | LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_); | 1816 | 0 | return -1; | 1817 | 0 | } | 1818 | 42 | if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0; | 1819 | | // sort the vector using key's order | 1820 | 42 | std::sort(tablet_keys.begin(), tablet_keys.end(), | 1821 | 42 | [](const auto& prev, const auto& last) { return prev.first < last.first; }); | 1822 | 42 | bool use_range_remove = true; | 1823 | 234 | for (auto& [_, remove] : tablet_keys) { | 1824 | 234 | if (!remove) { | 1825 | 0 | use_range_remove = remove; | 1826 | 0 | break; | 1827 | 0 | } | 1828 | 234 | } | 1829 | 42 | DORIS_CLOUD_DEFER { | 1830 | 42 | tablet_idx_keys.clear(); | 1831 | 42 | restore_job_keys.clear(); | 1832 | 42 | init_rs_keys.clear(); | 1833 | 42 | tablet_compact_stats_keys.clear(); | 1834 | 42 | tablet_load_stats_keys.clear(); | 1835 | 42 | versioned_meta_tablet_keys.clear(); | 1836 | 42 | }; | 1837 | 42 | std::unique_ptr<Transaction> txn; | 1838 | 42 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { | 1839 | 0 | LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_; | 1840 | 0 | return -1; | 1841 | 0 | } | 1842 | 42 | std::string tablet_key_end; | 1843 | 42 | if (!tablet_keys.empty()) { | 1844 | 40 | if (use_range_remove) { | 1845 | 40 | tablet_key_end = std::string(tablet_keys.back().first) + '\x00'; | 1846 | 40 | txn->remove(tablet_keys.front().first, tablet_key_end); | 1847 | 40 | } else { | 1848 | 0 | for (auto& [k, _] : tablet_keys) { | 1849 | 0 | txn->remove(k); | 1850 | 0 | } | 1851 | 0 | } | 1852 | 40 | } | 1853 | 42 | if (is_multi_version) { | 1854 | 6 | for (auto& k : tablet_compact_stats_keys) { | 1855 | | // Remove all versions of tablet compact stats for recycled tablet | 1856 | 6 | LOG_INFO("remove versioned tablet compact stats key") | 1857 | 6 | .tag("compact_stats_key", hex(k)); | 1858 | 6 | versioned_remove_all(txn.get(), k); | 1859 | 6 | } | 1860 | 6 | for (auto& k : tablet_load_stats_keys) { | 1861 | | // Remove all versions of tablet load stats for recycled tablet | 1862 | 6 | LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k)); | 1863 | 6 | versioned_remove_all(txn.get(), k); | 1864 | 6 | } | 1865 | 6 | for (auto& k : versioned_meta_tablet_keys) { | 1866 | | // Remove all versions of meta tablet for recycled tablet | 1867 | 6 | LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k)); | 1868 | 6 | versioned_remove_all(txn.get(), k); | 1869 | 6 | } | 1870 | 5 | } | 1871 | 237 | for (auto& k : tablet_idx_keys) { | 1872 | 237 | txn->remove(k); | 1873 | 237 | } | 1874 | 237 | for (auto& k : restore_job_keys) { | 1875 | 237 | txn->remove(k); | 1876 | 237 | } | 1877 | 42 | for (auto& k : init_rs_keys) { | 1878 | 0 | txn->remove(k); | 1879 | 0 | } | 1880 | 42 | if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) { | 1881 | 0 | LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_ | 1882 | 0 | << ", err=" << err; | 1883 | 0 | return -1; | 1884 | 0 | } | 1885 | 42 | return 0; | 1886 | 42 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv Line | Count | Source | 1811 | 4 | auto loop_done = [&, this]() -> int { | 1812 | 4 | bool finished = true; | 1813 | 4 | auto tablet_keys = sync_executor.when_all(&finished); | 1814 | 4 | if (!finished) { | 1815 | 0 | LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_); | 1816 | 0 | return -1; | 1817 | 0 | } | 1818 | 4 | if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0; | 1819 | | // sort the vector using key's order | 1820 | 2 | std::sort(tablet_keys.begin(), tablet_keys.end(), | 1821 | 2 | [](const auto& prev, const auto& last) { return prev.first < last.first; }); | 1822 | 2 | bool use_range_remove = true; | 1823 | 4.00k | for (auto& [_, remove] : tablet_keys) { | 1824 | 4.00k | if (!remove) { | 1825 | 0 | use_range_remove = remove; | 1826 | 0 | break; | 1827 | 0 | } | 1828 | 4.00k | } | 1829 | 2 | DORIS_CLOUD_DEFER { | 1830 | 2 | tablet_idx_keys.clear(); | 1831 | 2 | restore_job_keys.clear(); | 1832 | 2 | init_rs_keys.clear(); | 1833 | 2 | tablet_compact_stats_keys.clear(); | 1834 | 2 | tablet_load_stats_keys.clear(); | 1835 | 2 | versioned_meta_tablet_keys.clear(); | 1836 | 2 | }; | 1837 | 2 | std::unique_ptr<Transaction> txn; | 1838 | 2 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { | 1839 | 0 | LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_; | 1840 | 0 | return -1; | 1841 | 0 | } | 1842 | 2 | std::string tablet_key_end; | 1843 | 2 | if (!tablet_keys.empty()) { | 1844 | 2 | if (use_range_remove) { | 1845 | 2 | tablet_key_end = std::string(tablet_keys.back().first) + '\x00'; | 1846 | 2 | txn->remove(tablet_keys.front().first, tablet_key_end); | 1847 | 2 | } else { | 1848 | 0 | for (auto& [k, _] : tablet_keys) { | 1849 | 0 | txn->remove(k); | 1850 | 0 | } | 1851 | 0 | } | 1852 | 2 | } | 1853 | 2 | if (is_multi_version) { | 1854 | 0 | for (auto& k : tablet_compact_stats_keys) { | 1855 | | // Remove all versions of tablet compact stats for recycled tablet | 1856 | 0 | LOG_INFO("remove versioned tablet compact stats key") | 1857 | 0 | .tag("compact_stats_key", hex(k)); | 1858 | 0 | versioned_remove_all(txn.get(), k); | 1859 | 0 | } | 1860 | 0 | for (auto& k : tablet_load_stats_keys) { | 1861 | | // Remove all versions of tablet load stats for recycled tablet | 1862 | 0 | LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k)); | 1863 | 0 | versioned_remove_all(txn.get(), k); | 1864 | 0 | } | 1865 | 0 | for (auto& k : versioned_meta_tablet_keys) { | 1866 | | // Remove all versions of meta tablet for recycled tablet | 1867 | 0 | LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k)); | 1868 | 0 | versioned_remove_all(txn.get(), k); | 1869 | 0 | } | 1870 | 0 | } | 1871 | 4.00k | for (auto& k : tablet_idx_keys) { | 1872 | 4.00k | txn->remove(k); | 1873 | 4.00k | } | 1874 | 4.00k | for (auto& k : restore_job_keys) { | 1875 | 4.00k | txn->remove(k); | 1876 | 4.00k | } | 1877 | 2 | for (auto& k : init_rs_keys) { | 1878 | 0 | txn->remove(k); | 1879 | 0 | } | 1880 | 2 | if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) { | 1881 | 0 | LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_ | 1882 | 0 | << ", err=" << err; | 1883 | 0 | return -1; | 1884 | 0 | } | 1885 | 2 | return 0; | 1886 | 2 | }; |
|
1887 | | |
1888 | 46 | int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(recycle_func), |
1889 | 46 | std::move(loop_done)); |
1890 | 46 | if (ret != 0) { |
1891 | 2 | LOG(WARNING) << "failed to scan_and_recycle, instance_id=" << instance_id_; |
1892 | 2 | return ret; |
1893 | 2 | } |
1894 | | |
1895 | | // directly remove tablet stats and tablet jobs of these dropped index or partition |
1896 | 44 | std::unique_ptr<Transaction> txn; |
1897 | 44 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { |
1898 | 0 | LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_; |
1899 | 0 | return -1; |
1900 | 0 | } |
1901 | 44 | txn->remove(stats_key_begin, stats_key_end); |
1902 | 44 | LOG(WARNING) << "remove stats kv, begin=" << hex(stats_key_begin) |
1903 | 44 | << " end=" << hex(stats_key_end); |
1904 | 44 | txn->remove(job_key_begin, job_key_end); |
1905 | 44 | LOG(WARNING) << "remove job kv, begin=" << hex(job_key_begin) << " end=" << hex(job_key_end); |
1906 | 44 | std::string schema_key_begin, schema_key_end; |
1907 | 44 | std::string schema_dict_key; |
1908 | 44 | std::string versioned_schema_key_begin, versioned_schema_key_end; |
1909 | 44 | if (partition_id <= 0) { |
1910 | | // Delete schema kv of this index |
1911 | 12 | meta_schema_key({instance_id_, index_id, 0}, &schema_key_begin); |
1912 | 12 | meta_schema_key({instance_id_, index_id + 1, 0}, &schema_key_end); |
1913 | 12 | txn->remove(schema_key_begin, schema_key_end); |
1914 | 12 | LOG(WARNING) << "remove schema kv, begin=" << hex(schema_key_begin) |
1915 | 12 | << " end=" << hex(schema_key_end); |
1916 | 12 | meta_schema_pb_dictionary_key({instance_id_, index_id}, &schema_dict_key); |
1917 | 12 | txn->remove(schema_dict_key); |
1918 | 12 | LOG(WARNING) << "remove schema dict kv, key=" << hex(schema_dict_key); |
1919 | 12 | versioned::meta_schema_key({instance_id_, index_id, 0}, &versioned_schema_key_begin); |
1920 | 12 | versioned::meta_schema_key({instance_id_, index_id + 1, 0}, &versioned_schema_key_end); |
1921 | 12 | txn->remove(versioned_schema_key_begin, versioned_schema_key_end); |
1922 | 12 | LOG(WARNING) << "remove versioned schema kv, begin=" << hex(versioned_schema_key_begin) |
1923 | 12 | << " end=" << hex(versioned_schema_key_end); |
1924 | 12 | } |
1925 | | |
1926 | 44 | TxnErrorCode err = txn->commit(); |
1927 | 44 | if (err != TxnErrorCode::TXN_OK) { |
1928 | 0 | LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_ |
1929 | 0 | << " err=" << err; |
1930 | 0 | return -1; |
1931 | 0 | } |
1932 | | |
1933 | 44 | return ret; |
1934 | 44 | } |
1935 | | |
1936 | 4.03k | int InstanceRecycler::delete_rowset_data(const RowsetMetaCloudPB& rs_meta_pb) { |
1937 | 4.03k | TEST_SYNC_POINT_RETURN_WITH_VALUE("delete_rowset_data::bypass_check", true); |
1938 | 4.03k | int64_t num_segments = rs_meta_pb.num_segments(); |
1939 | 4.03k | if (num_segments <= 0) return 0; |
1940 | | |
1941 | | // Process inverted indexes |
1942 | 4.02k | std::vector<std::pair<int64_t, std::string>> index_ids; |
1943 | | // default format as v1. |
1944 | 4.02k | InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1; |
1945 | 4.02k | bool delete_rowset_data_by_prefix = false; |
1946 | 4.02k | if (rs_meta_pb.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) { |
1947 | | // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data |
1948 | | // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix |
1949 | 0 | delete_rowset_data_by_prefix = true; |
1950 | 4.02k | } else if (rs_meta_pb.has_tablet_schema()) { |
1951 | 8.00k | for (const auto& index : rs_meta_pb.tablet_schema().index()) { |
1952 | 8.00k | if (index.has_index_type() && index.index_type() == IndexType::INVERTED) { |
1953 | 8.00k | index_ids.emplace_back(index.index_id(), index.index_suffix_name()); |
1954 | 8.00k | } |
1955 | 8.00k | } |
1956 | 4.00k | if (rs_meta_pb.tablet_schema().has_inverted_index_storage_format()) { |
1957 | 2.00k | index_format = rs_meta_pb.tablet_schema().inverted_index_storage_format(); |
1958 | 2.00k | } |
1959 | 4.00k | } else if (!rs_meta_pb.has_index_id() || !rs_meta_pb.has_schema_version()) { |
1960 | | // schema version and index id are not found, delete rowset data by prefix directly. |
1961 | 0 | delete_rowset_data_by_prefix = true; |
1962 | 28 | } else { |
1963 | | // otherwise, try to get schema kv |
1964 | 28 | InvertedIndexInfo index_info; |
1965 | 28 | int inverted_index_get_ret = inverted_index_id_cache_->get( |
1966 | 28 | rs_meta_pb.index_id(), rs_meta_pb.schema_version(), index_info); |
1967 | 28 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset", |
1968 | 28 | &inverted_index_get_ret); |
1969 | 28 | if (inverted_index_get_ret == 0) { |
1970 | 28 | index_format = index_info.first; |
1971 | 28 | index_ids = index_info.second; |
1972 | 28 | } else if (inverted_index_get_ret == 1) { |
1973 | | // 1. Schema kv not found means tablet has been recycled |
1974 | | // Maybe some tablet recycle failed by some bugs |
1975 | | // We need to delete again to double check |
1976 | | // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes, |
1977 | | // because we are uncertain about the inverted index information. |
1978 | | // If there are inverted indexes, some data might not be deleted, |
1979 | | // but this is acceptable as we have made our best effort to delete the data. |
1980 | 0 | LOG_INFO( |
1981 | 0 | "delete rowset data schema kv not found, need to delete again to double " |
1982 | 0 | "check") |
1983 | 0 | .tag("instance_id", instance_id_) |
1984 | 0 | .tag("tablet_id", rs_meta_pb.tablet_id()) |
1985 | 0 | .tag("rowset", rs_meta_pb.ShortDebugString()); |
1986 | | // Currently index_ids is guaranteed to be empty, |
1987 | | // but we clear it again here as a safeguard against future code changes |
1988 | | // that might cause index_ids to no longer be empty |
1989 | 0 | index_format = InvertedIndexStorageFormatPB::V2; |
1990 | 0 | index_ids.clear(); |
1991 | 0 | } else { |
1992 | | // failed to get schema kv, delete rowset data by prefix directly. |
1993 | 0 | delete_rowset_data_by_prefix = true; |
1994 | 0 | } |
1995 | 28 | } |
1996 | | |
1997 | 4.02k | if (delete_rowset_data_by_prefix) { |
1998 | 0 | return delete_rowset_data(rs_meta_pb.resource_id(), rs_meta_pb.tablet_id(), |
1999 | 0 | rs_meta_pb.rowset_id_v2()); |
2000 | 0 | } |
2001 | | |
2002 | 4.02k | auto it = accessor_map_.find(rs_meta_pb.resource_id()); |
2003 | 4.02k | if (it == accessor_map_.end()) { |
2004 | 0 | LOG_WARNING("instance has no such resource id") |
2005 | 0 | .tag("instance_id", instance_id_) |
2006 | 0 | .tag("resource_id", rs_meta_pb.resource_id()); |
2007 | 0 | return -1; |
2008 | 0 | } |
2009 | 4.02k | auto& accessor = it->second; |
2010 | 4.02k | int64_t tablet_id = rs_meta_pb.tablet_id(); |
2011 | 4.02k | const auto& rowset_id = rs_meta_pb.rowset_id_v2(); |
2012 | 4.02k | std::vector<std::string> file_paths; |
2013 | 24.0k | for (int64_t i = 0; i < num_segments; ++i) { |
2014 | 20.0k | file_paths.push_back(segment_path(tablet_id, rowset_id, i)); |
2015 | 20.0k | if (index_format == InvertedIndexStorageFormatPB::V1) { |
2016 | 40.0k | for (const auto& index_id : index_ids) { |
2017 | 40.0k | file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i, index_id.first, |
2018 | 40.0k | index_id.second)); |
2019 | 40.0k | } |
2020 | 20.0k | } else if (!index_ids.empty()) { |
2021 | 0 | file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i)); |
2022 | 0 | } |
2023 | 20.0k | } |
2024 | | |
2025 | | // Process delete bitmap |
2026 | 4.02k | file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id)); |
2027 | | // TODO(AlexYue): seems could do do batch |
2028 | 4.02k | return accessor->delete_files(file_paths); |
2029 | 4.02k | } |
2030 | | |
2031 | | int InstanceRecycler::delete_rowset_data( |
2032 | | const std::map<std::string, doris::RowsetMetaCloudPB>& rowsets, RowsetRecyclingState type, |
2033 | 36 | RecyclerMetricsContext& metrics_context) { |
2034 | 36 | int ret = 0; |
2035 | | // resource_id -> file_paths |
2036 | 36 | std::map<std::string, std::vector<std::string>> resource_file_paths; |
2037 | | // (resource_id, tablet_id, rowset_id) |
2038 | 36 | std::vector<std::tuple<std::string, int64_t, std::string>> rowsets_delete_by_prefix; |
2039 | 36 | bool is_formal_rowset = (type == RowsetRecyclingState::FORMAL_ROWSET); |
2040 | | |
2041 | 54.1k | for (const auto& [_, rs] : rowsets) { |
2042 | | // we have to treat tmp rowset as "orphans" that may not related to any existing tablets |
2043 | | // due to aborted schema change. |
2044 | 54.1k | if (is_formal_rowset) { |
2045 | 3.12k | std::lock_guard lock(recycled_tablets_mtx_); |
2046 | 3.12k | if (recycled_tablets_.count(rs.tablet_id())) { |
2047 | 0 | continue; // Rowset data has already been deleted |
2048 | 0 | } |
2049 | 3.12k | } |
2050 | | |
2051 | 54.1k | auto it = accessor_map_.find(rs.resource_id()); |
2052 | | // possible if the accessor is not initilized correctly |
2053 | 54.1k | if (it == accessor_map_.end()) [[unlikely]] { |
2054 | 1 | LOG_WARNING("instance has no such resource id") |
2055 | 1 | .tag("instance_id", instance_id_) |
2056 | 1 | .tag("resource_id", rs.resource_id()); |
2057 | 1 | ret = -1; |
2058 | 1 | continue; |
2059 | 1 | } |
2060 | | |
2061 | 54.1k | auto& file_paths = resource_file_paths[rs.resource_id()]; |
2062 | 54.1k | const auto& rowset_id = rs.rowset_id_v2(); |
2063 | 54.1k | int64_t tablet_id = rs.tablet_id(); |
2064 | 54.1k | int64_t num_segments = rs.num_segments(); |
2065 | 54.1k | if (num_segments <= 0) { |
2066 | 0 | metrics_context.total_recycled_num++; |
2067 | 0 | metrics_context.total_recycled_data_size += rs.total_disk_size(); |
2068 | 0 | continue; |
2069 | 0 | } |
2070 | | |
2071 | | // Process delete bitmap |
2072 | 54.1k | file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id)); |
2073 | | |
2074 | | // Process inverted indexes |
2075 | 54.1k | std::vector<std::pair<int64_t, std::string>> index_ids; |
2076 | | // default format as v1. |
2077 | 54.1k | InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1; |
2078 | 54.1k | int inverted_index_get_ret = 0; |
2079 | 54.1k | if (rs.has_tablet_schema()) { |
2080 | 53.5k | for (const auto& index : rs.tablet_schema().index()) { |
2081 | 53.5k | if (index.has_index_type() && index.index_type() == IndexType::INVERTED) { |
2082 | 53.5k | index_ids.emplace_back(index.index_id(), index.index_suffix_name()); |
2083 | 53.5k | } |
2084 | 53.5k | } |
2085 | 26.5k | if (rs.tablet_schema().has_inverted_index_storage_format()) { |
2086 | 26.5k | index_format = rs.tablet_schema().inverted_index_storage_format(); |
2087 | 26.5k | } |
2088 | 27.5k | } else { |
2089 | 27.5k | if (!rs.has_index_id() || !rs.has_schema_version()) { |
2090 | 0 | LOG(WARNING) << "rowset must have either schema or schema_version and index_id, " |
2091 | 0 | "instance_id=" |
2092 | 0 | << instance_id_ << " tablet_id=" << tablet_id |
2093 | 0 | << " rowset_id=" << rowset_id; |
2094 | 0 | ret = -1; |
2095 | 0 | continue; |
2096 | 0 | } |
2097 | 27.5k | InvertedIndexInfo index_info; |
2098 | 27.5k | inverted_index_get_ret = |
2099 | 27.5k | inverted_index_id_cache_->get(rs.index_id(), rs.schema_version(), index_info); |
2100 | 27.5k | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset", |
2101 | 27.5k | &inverted_index_get_ret); |
2102 | 27.5k | if (inverted_index_get_ret == 0) { |
2103 | 27.0k | index_format = index_info.first; |
2104 | 27.0k | index_ids = index_info.second; |
2105 | 27.0k | } else if (inverted_index_get_ret == 1) { |
2106 | | // 1. Schema kv not found means tablet has been recycled |
2107 | | // Maybe some tablet recycle failed by some bugs |
2108 | | // We need to delete again to double check |
2109 | | // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes, |
2110 | | // because we are uncertain about the inverted index information. |
2111 | | // If there are inverted indexes, some data might not be deleted, |
2112 | | // but this is acceptable as we have made our best effort to delete the data. |
2113 | 503 | LOG_INFO( |
2114 | 503 | "delete rowset data schema kv not found, need to delete again to double " |
2115 | 503 | "check") |
2116 | 503 | .tag("instance_id", instance_id_) |
2117 | 503 | .tag("tablet_id", tablet_id) |
2118 | 503 | .tag("rowset", rs.ShortDebugString()); |
2119 | | // Currently index_ids is guaranteed to be empty, |
2120 | | // but we clear it again here as a safeguard against future code changes |
2121 | | // that might cause index_ids to no longer be empty |
2122 | 503 | index_format = InvertedIndexStorageFormatPB::V2; |
2123 | 503 | index_ids.clear(); |
2124 | 18.4E | } else { |
2125 | 18.4E | LOG(WARNING) << "failed to get schema kv for rowset, instance_id=" << instance_id_ |
2126 | 18.4E | << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id; |
2127 | 18.4E | ret = -1; |
2128 | 18.4E | continue; |
2129 | 18.4E | } |
2130 | 27.5k | } |
2131 | 54.1k | if (rs.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) { |
2132 | | // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data |
2133 | | // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix |
2134 | 5 | rowsets_delete_by_prefix.emplace_back(rs.resource_id(), tablet_id, rs.rowset_id_v2()); |
2135 | 5 | continue; |
2136 | 5 | } |
2137 | 324k | for (int64_t i = 0; i < num_segments; ++i) { |
2138 | 270k | file_paths.push_back(segment_path(tablet_id, rowset_id, i)); |
2139 | 270k | if (index_format == InvertedIndexStorageFormatPB::V1) { |
2140 | 539k | for (const auto& index_id : index_ids) { |
2141 | 539k | file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i, |
2142 | 539k | index_id.first, index_id.second)); |
2143 | 539k | } |
2144 | 268k | } else if (!index_ids.empty() || inverted_index_get_ret == 1) { |
2145 | | // try to recycle inverted index v2 when get_ret == 1 |
2146 | | // we treat schema not found as if it has a v2 format inverted index |
2147 | | // to reduce chance of data leakage |
2148 | 2.50k | if (inverted_index_get_ret == 1) { |
2149 | 2.50k | LOG_INFO("delete rowset data schema kv not found, try to delete index file") |
2150 | 2.50k | .tag("instance_id", instance_id_) |
2151 | 2.50k | .tag("inverted index v2 path", |
2152 | 2.50k | inverted_index_path_v2(tablet_id, rowset_id, i)); |
2153 | 2.50k | } |
2154 | 2.50k | file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i)); |
2155 | 2.50k | } |
2156 | 270k | } |
2157 | 54.1k | } |
2158 | | |
2159 | 36 | SyncExecutor<int> concurrent_delete_executor(_thread_pool_group.s3_producer_pool, |
2160 | 36 | "delete_rowset_data", |
2161 | 38 | [](const int& ret) { return ret != 0; }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi Line | Count | Source | 2161 | 38 | [](const int& ret) { return ret != 0; }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi |
2162 | 36 | for (auto& [resource_id, file_paths] : resource_file_paths) { |
2163 | 33 | concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int { |
2164 | 33 | DCHECK(accessor_map_.count(*rid)) |
2165 | 0 | << "uninitilized accessor, instance_id=" << instance_id_ |
2166 | 0 | << " resource_id=" << resource_id << " path[0]=" << (*paths)[0]; |
2167 | 33 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id", |
2168 | 33 | &accessor_map_); |
2169 | 33 | if (!accessor_map_.contains(*rid)) { |
2170 | 0 | LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id") |
2171 | 0 | .tag("resource_id", resource_id) |
2172 | 0 | .tag("instance_id", instance_id_); |
2173 | 0 | return -1; |
2174 | 0 | } |
2175 | 33 | auto& accessor = accessor_map_[*rid]; |
2176 | 33 | int ret = accessor->delete_files(*paths); |
2177 | 33 | if (!ret) { |
2178 | | // deduplication of different files with the same rowset id |
2179 | | // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat |
2180 | | //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx |
2181 | 33 | std::set<std::string> deleted_rowset_id; |
2182 | | |
2183 | 33 | std::for_each(paths->begin(), paths->end(), |
2184 | 33 | [&metrics_context, &rowsets, &deleted_rowset_id, |
2185 | 862k | this](const std::string& path) { |
2186 | 862k | std::vector<std::string> str; |
2187 | 862k | butil::SplitString(path, '/', &str); |
2188 | 862k | std::string rowset_id; |
2189 | 862k | if (auto pos = str.back().find('_'); pos != std::string::npos) { |
2190 | 859k | rowset_id = str.back().substr(0, pos); |
2191 | 859k | } else { |
2192 | 2.94k | LOG(WARNING) << "failed to parse rowset_id, path=" << path; |
2193 | 2.94k | return; |
2194 | 2.94k | } |
2195 | 859k | auto rs_meta = rowsets.find(rowset_id); |
2196 | 859k | if (rs_meta != rowsets.end() && |
2197 | 862k | !deleted_rowset_id.contains(rowset_id)) { |
2198 | 54.1k | deleted_rowset_id.emplace(rowset_id); |
2199 | 54.1k | metrics_context.total_recycled_data_size += |
2200 | 54.1k | rs_meta->second.total_disk_size(); |
2201 | 54.1k | segment_metrics_context_.total_recycled_num += |
2202 | 54.1k | rs_meta->second.num_segments(); |
2203 | 54.1k | segment_metrics_context_.total_recycled_data_size += |
2204 | 54.1k | rs_meta->second.total_disk_size(); |
2205 | 54.1k | metrics_context.total_recycled_num++; |
2206 | 54.1k | } |
2207 | 859k | }); recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_ Line | Count | Source | 2185 | 862k | this](const std::string& path) { | 2186 | 862k | std::vector<std::string> str; | 2187 | 862k | butil::SplitString(path, '/', &str); | 2188 | 862k | std::string rowset_id; | 2189 | 862k | if (auto pos = str.back().find('_'); pos != std::string::npos) { | 2190 | 859k | rowset_id = str.back().substr(0, pos); | 2191 | 859k | } else { | 2192 | 2.94k | LOG(WARNING) << "failed to parse rowset_id, path=" << path; | 2193 | 2.94k | return; | 2194 | 2.94k | } | 2195 | 859k | auto rs_meta = rowsets.find(rowset_id); | 2196 | 859k | if (rs_meta != rowsets.end() && | 2197 | 862k | !deleted_rowset_id.contains(rowset_id)) { | 2198 | 54.1k | deleted_rowset_id.emplace(rowset_id); | 2199 | 54.1k | metrics_context.total_recycled_data_size += | 2200 | 54.1k | rs_meta->second.total_disk_size(); | 2201 | 54.1k | segment_metrics_context_.total_recycled_num += | 2202 | 54.1k | rs_meta->second.num_segments(); | 2203 | 54.1k | segment_metrics_context_.total_recycled_data_size += | 2204 | 54.1k | rs_meta->second.total_disk_size(); | 2205 | 54.1k | metrics_context.total_recycled_num++; | 2206 | 54.1k | } | 2207 | 859k | }); |
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_ |
2208 | 33 | segment_metrics_context_.report(); |
2209 | 33 | metrics_context.report(); |
2210 | 33 | } |
2211 | 33 | return ret; |
2212 | 33 | }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv Line | Count | Source | 2163 | 33 | concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int { | 2164 | 33 | DCHECK(accessor_map_.count(*rid)) | 2165 | 0 | << "uninitilized accessor, instance_id=" << instance_id_ | 2166 | 0 | << " resource_id=" << resource_id << " path[0]=" << (*paths)[0]; | 2167 | 33 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id", | 2168 | 33 | &accessor_map_); | 2169 | 33 | if (!accessor_map_.contains(*rid)) { | 2170 | 0 | LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id") | 2171 | 0 | .tag("resource_id", resource_id) | 2172 | 0 | .tag("instance_id", instance_id_); | 2173 | 0 | return -1; | 2174 | 0 | } | 2175 | 33 | auto& accessor = accessor_map_[*rid]; | 2176 | 33 | int ret = accessor->delete_files(*paths); | 2177 | 33 | if (!ret) { | 2178 | | // deduplication of different files with the same rowset id | 2179 | | // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat | 2180 | | //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx | 2181 | 33 | std::set<std::string> deleted_rowset_id; | 2182 | | | 2183 | 33 | std::for_each(paths->begin(), paths->end(), | 2184 | 33 | [&metrics_context, &rowsets, &deleted_rowset_id, | 2185 | 33 | this](const std::string& path) { | 2186 | 33 | std::vector<std::string> str; | 2187 | 33 | butil::SplitString(path, '/', &str); | 2188 | 33 | std::string rowset_id; | 2189 | 33 | if (auto pos = str.back().find('_'); pos != std::string::npos) { | 2190 | 33 | rowset_id = str.back().substr(0, pos); | 2191 | 33 | } else { | 2192 | 33 | LOG(WARNING) << "failed to parse rowset_id, path=" << path; | 2193 | 33 | return; | 2194 | 33 | } | 2195 | 33 | auto rs_meta = rowsets.find(rowset_id); | 2196 | 33 | if (rs_meta != rowsets.end() && | 2197 | 33 | !deleted_rowset_id.contains(rowset_id)) { | 2198 | 33 | deleted_rowset_id.emplace(rowset_id); | 2199 | 33 | metrics_context.total_recycled_data_size += | 2200 | 33 | rs_meta->second.total_disk_size(); | 2201 | 33 | segment_metrics_context_.total_recycled_num += | 2202 | 33 | rs_meta->second.num_segments(); | 2203 | 33 | segment_metrics_context_.total_recycled_data_size += | 2204 | 33 | rs_meta->second.total_disk_size(); | 2205 | 33 | metrics_context.total_recycled_num++; | 2206 | 33 | } | 2207 | 33 | }); | 2208 | 33 | segment_metrics_context_.report(); | 2209 | 33 | metrics_context.report(); | 2210 | 33 | } | 2211 | 33 | return ret; | 2212 | 33 | }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv |
2213 | 33 | } |
2214 | 36 | for (const auto& [resource_id, tablet_id, rowset_id] : rowsets_delete_by_prefix) { |
2215 | 5 | LOG_INFO( |
2216 | 5 | "delete rowset {} by prefix because it's in BEGIN_PARTIAL_UPDATE state, " |
2217 | 5 | "resource_id={}, tablet_id={}, instance_id={}", |
2218 | 5 | rowset_id, resource_id, tablet_id, instance_id_); |
2219 | 5 | concurrent_delete_executor.add([&]() -> int { |
2220 | 5 | int ret = delete_rowset_data(resource_id, tablet_id, rowset_id); |
2221 | 5 | if (!ret) { |
2222 | 5 | auto rs = rowsets.at(rowset_id); |
2223 | 5 | metrics_context.total_recycled_data_size += rs.total_disk_size(); |
2224 | 5 | metrics_context.total_recycled_num++; |
2225 | 5 | segment_metrics_context_.total_recycled_data_size += rs.total_disk_size(); |
2226 | 5 | segment_metrics_context_.total_recycled_num += rs.num_segments(); |
2227 | 5 | metrics_context.report(); |
2228 | 5 | segment_metrics_context_.report(); |
2229 | 5 | } |
2230 | 5 | return ret; |
2231 | 5 | }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv Line | Count | Source | 2219 | 5 | concurrent_delete_executor.add([&]() -> int { | 2220 | 5 | int ret = delete_rowset_data(resource_id, tablet_id, rowset_id); | 2221 | 5 | if (!ret) { | 2222 | 5 | auto rs = rowsets.at(rowset_id); | 2223 | 5 | metrics_context.total_recycled_data_size += rs.total_disk_size(); | 2224 | 5 | metrics_context.total_recycled_num++; | 2225 | 5 | segment_metrics_context_.total_recycled_data_size += rs.total_disk_size(); | 2226 | 5 | segment_metrics_context_.total_recycled_num += rs.num_segments(); | 2227 | 5 | metrics_context.report(); | 2228 | 5 | segment_metrics_context_.report(); | 2229 | 5 | } | 2230 | 5 | return ret; | 2231 | 5 | }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv |
2232 | 5 | } |
2233 | | |
2234 | 36 | bool finished = true; |
2235 | 36 | std::vector<int> rets = concurrent_delete_executor.when_all(&finished); |
2236 | 38 | for (int r : rets) { |
2237 | 38 | if (r != 0) { |
2238 | 0 | ret = -1; |
2239 | 0 | break; |
2240 | 0 | } |
2241 | 38 | } |
2242 | 36 | ret = finished ? ret : -1; |
2243 | 36 | return ret; |
2244 | 36 | } |
2245 | | |
2246 | | int InstanceRecycler::delete_rowset_data(const std::string& resource_id, int64_t tablet_id, |
2247 | 2.90k | const std::string& rowset_id) { |
2248 | 2.90k | auto it = accessor_map_.find(resource_id); |
2249 | 2.90k | if (it == accessor_map_.end()) { |
2250 | 0 | LOG_WARNING("instance has no such resource id") |
2251 | 0 | .tag("instance_id", instance_id_) |
2252 | 0 | .tag("resource_id", resource_id) |
2253 | 0 | .tag("tablet_id", tablet_id) |
2254 | 0 | .tag("rowset_id", rowset_id); |
2255 | 0 | return -1; |
2256 | 0 | } |
2257 | 2.90k | auto& accessor = it->second; |
2258 | 2.90k | return accessor->delete_prefix(rowset_path_prefix(tablet_id, rowset_id)); |
2259 | 2.90k | } |
2260 | | |
2261 | | int InstanceRecycler::scan_tablets_and_statistics(int64_t table_id, int64_t index_id, |
2262 | | RecyclerMetricsContext& metrics_context, |
2263 | 0 | int64_t partition_id, bool is_empty_tablet) { |
2264 | 0 | std::string tablet_key_begin, tablet_key_end; |
2265 | |
|
2266 | 0 | if (partition_id > 0) { |
2267 | 0 | meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin); |
2268 | 0 | meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end); |
2269 | 0 | } else { |
2270 | 0 | meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin); |
2271 | 0 | meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end); |
2272 | 0 | } |
2273 | | // for calculate the total num or bytes of recyled objects |
2274 | 0 | auto scan_and_statistics = [&, is_empty_tablet, this](std::string_view k, |
2275 | 0 | std::string_view v) -> int { |
2276 | 0 | doris::TabletMetaCloudPB tablet_meta_pb; |
2277 | 0 | if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) { |
2278 | 0 | return 0; |
2279 | 0 | } |
2280 | 0 | int64_t tablet_id = tablet_meta_pb.tablet_id(); |
2281 | |
|
2282 | 0 | if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) { |
2283 | 0 | return 0; |
2284 | 0 | } |
2285 | | |
2286 | 0 | if (!is_empty_tablet) { |
2287 | 0 | if (scan_tablet_and_statistics(tablet_id, metrics_context) != 0) { |
2288 | 0 | return 0; |
2289 | 0 | } |
2290 | 0 | tablet_metrics_context_.total_need_recycle_num++; |
2291 | 0 | } |
2292 | 0 | return 0; |
2293 | 0 | }; Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_ Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_ |
2294 | 0 | int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(scan_and_statistics)); |
2295 | 0 | metrics_context.report(true); |
2296 | 0 | tablet_metrics_context_.report(true); |
2297 | 0 | segment_metrics_context_.report(true); |
2298 | 0 | return ret; |
2299 | 0 | } |
2300 | | |
2301 | | int InstanceRecycler::scan_tablet_and_statistics(int64_t tablet_id, |
2302 | 0 | RecyclerMetricsContext& metrics_context) { |
2303 | 0 | int ret = 0; |
2304 | 0 | std::map<std::string, RowsetMetaCloudPB> rowset_meta_map; |
2305 | 0 | std::unique_ptr<Transaction> txn; |
2306 | 0 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { |
2307 | 0 | LOG_WARNING("failed to recycle tablet ") |
2308 | 0 | .tag("tablet id", tablet_id) |
2309 | 0 | .tag("instance_id", instance_id_) |
2310 | 0 | .tag("reason", "failed to create txn"); |
2311 | 0 | ret = -1; |
2312 | 0 | } |
2313 | 0 | GetRowsetResponse resp; |
2314 | 0 | std::string msg; |
2315 | 0 | MetaServiceCode code = MetaServiceCode::OK; |
2316 | | // get rowsets in tablet |
2317 | 0 | internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_, |
2318 | 0 | tablet_id, code, msg, &resp); |
2319 | 0 | if (code != MetaServiceCode::OK) { |
2320 | 0 | LOG_WARNING("failed to get rowsets of tablet when recycle tablet") |
2321 | 0 | .tag("tablet id", tablet_id) |
2322 | 0 | .tag("msg", msg) |
2323 | 0 | .tag("code", code) |
2324 | 0 | .tag("instance id", instance_id_); |
2325 | 0 | ret = -1; |
2326 | 0 | } |
2327 | 0 | for (const auto& rs_meta : resp.rowset_meta()) { |
2328 | | /* |
2329 | | * For compatibility, we skip the loop for [0-1] here. |
2330 | | * The purpose of this loop is to delete object files, |
2331 | | * and since [0-1] only has meta and doesn't have object files, |
2332 | | * skipping it doesn't affect system correctness. |
2333 | | * |
2334 | | * If not skipped, the check "if (!rs_meta.has_resource_id())" below |
2335 | | * would return error -1 directly, causing the recycle operation to fail. |
2336 | | * |
2337 | | * [0-1] doesn't have resource id is a bug. |
2338 | | * In the future, we will fix this problem, after that, |
2339 | | * we can remove this if statement. |
2340 | | * |
2341 | | * TODO(Yukang-Lian): remove this if statement when [0-1] has resource id in the future. |
2342 | | */ |
2343 | |
|
2344 | 0 | if (rs_meta.end_version() == 1) { |
2345 | | // Assert that [0-1] has no resource_id to make sure |
2346 | | // this if statement will not be forgetted to remove |
2347 | | // when the resource id bug is fixed |
2348 | 0 | DCHECK(!rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString(); |
2349 | 0 | continue; |
2350 | 0 | } |
2351 | 0 | if (!rs_meta.has_resource_id()) { |
2352 | 0 | LOG_WARNING("rowset meta does not have a resource id, impossible!") |
2353 | 0 | .tag("rs_meta", rs_meta.ShortDebugString()) |
2354 | 0 | .tag("instance_id", instance_id_) |
2355 | 0 | .tag("tablet_id", tablet_id); |
2356 | 0 | continue; |
2357 | 0 | } |
2358 | 0 | DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString(); |
2359 | 0 | auto it = accessor_map_.find(rs_meta.resource_id()); |
2360 | | // possible if the accessor is not initilized correctly |
2361 | 0 | if (it == accessor_map_.end()) [[unlikely]] { |
2362 | 0 | LOG_WARNING( |
2363 | 0 | "failed to find resource id when recycle tablet, skip this vault accessor " |
2364 | 0 | "recycle process") |
2365 | 0 | .tag("tablet id", tablet_id) |
2366 | 0 | .tag("instance_id", instance_id_) |
2367 | 0 | .tag("resource_id", rs_meta.resource_id()) |
2368 | 0 | .tag("rowset meta pb", rs_meta.ShortDebugString()); |
2369 | 0 | continue; |
2370 | 0 | } |
2371 | | |
2372 | 0 | metrics_context.total_need_recycle_data_size += rs_meta.total_disk_size(); |
2373 | 0 | tablet_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size(); |
2374 | 0 | segment_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size(); |
2375 | 0 | segment_metrics_context_.total_need_recycle_num += rs_meta.num_segments(); |
2376 | 0 | } |
2377 | 0 | return ret; |
2378 | 0 | } |
2379 | | |
2380 | 4.24k | int InstanceRecycler::recycle_tablet(int64_t tablet_id, RecyclerMetricsContext& metrics_context) { |
2381 | 4.24k | LOG_INFO("begin to recycle rowsets in a dropped tablet") |
2382 | 4.24k | .tag("instance_id", instance_id_) |
2383 | 4.24k | .tag("tablet_id", tablet_id); |
2384 | | |
2385 | 4.24k | if (instance_info_.has_multi_version_status() && |
2386 | 4.24k | instance_info_.multi_version_status() != MultiVersionStatus::MULTI_VERSION_DISABLED) { |
2387 | 6 | return recycle_versioned_tablet(tablet_id, metrics_context); |
2388 | 6 | } |
2389 | | |
2390 | 4.23k | int ret = 0; |
2391 | 4.23k | auto start_time = steady_clock::now(); |
2392 | | |
2393 | 4.23k | TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0); |
2394 | | |
2395 | | // collect resource ids |
2396 | 234 | std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0}); |
2397 | 234 | std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0}); |
2398 | 234 | std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""}); |
2399 | 234 | std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""}); |
2400 | 234 | std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0}); |
2401 | 234 | std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0}); |
2402 | | |
2403 | 234 | std::set<std::string> resource_ids; |
2404 | 234 | int64_t recycle_rowsets_number = 0; |
2405 | 234 | int64_t recycle_segments_number = 0; |
2406 | 234 | int64_t recycle_rowsets_data_size = 0; |
2407 | 234 | int64_t recycle_rowsets_index_size = 0; |
2408 | 234 | int64_t recycle_restore_job_rowsets_number = 0; |
2409 | 234 | int64_t recycle_restore_job_segments_number = 0; |
2410 | 234 | int64_t recycle_restore_job_rowsets_data_size = 0; |
2411 | 234 | int64_t recycle_restore_job_rowsets_index_size = 0; |
2412 | 234 | int64_t max_rowset_version = 0; |
2413 | 234 | int64_t min_rowset_creation_time = INT64_MAX; |
2414 | 234 | int64_t max_rowset_creation_time = 0; |
2415 | 234 | int64_t min_rowset_expiration_time = INT64_MAX; |
2416 | 234 | int64_t max_rowset_expiration_time = 0; |
2417 | | |
2418 | 234 | DORIS_CLOUD_DEFER { |
2419 | 234 | auto cost = duration<float>(steady_clock::now() - start_time).count(); |
2420 | 234 | LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost) |
2421 | 234 | .tag("instance_id", instance_id_) |
2422 | 234 | .tag("tablet_id", tablet_id) |
2423 | 234 | .tag("recycle rowsets number", recycle_rowsets_number) |
2424 | 234 | .tag("recycle segments number", recycle_segments_number) |
2425 | 234 | .tag("all rowsets recycle data size", recycle_rowsets_data_size) |
2426 | 234 | .tag("all rowsets recycle index size", recycle_rowsets_index_size) |
2427 | 234 | .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number) |
2428 | 234 | .tag("recycle restore job segments number", recycle_restore_job_segments_number) |
2429 | 234 | .tag("all restore job rowsets recycle data size", |
2430 | 234 | recycle_restore_job_rowsets_data_size) |
2431 | 234 | .tag("all restore job rowsets recycle index size", |
2432 | 234 | recycle_restore_job_rowsets_index_size) |
2433 | 234 | .tag("max rowset version", max_rowset_version) |
2434 | 234 | .tag("min rowset creation time", min_rowset_creation_time) |
2435 | 234 | .tag("max rowset creation time", max_rowset_creation_time) |
2436 | 234 | .tag("min rowset expiration time", min_rowset_expiration_time) |
2437 | 234 | .tag("max rowset expiration time", max_rowset_expiration_time) |
2438 | 234 | .tag("ret", ret); |
2439 | 234 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv Line | Count | Source | 2418 | 234 | DORIS_CLOUD_DEFER { | 2419 | 234 | auto cost = duration<float>(steady_clock::now() - start_time).count(); | 2420 | 234 | LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost) | 2421 | 234 | .tag("instance_id", instance_id_) | 2422 | 234 | .tag("tablet_id", tablet_id) | 2423 | 234 | .tag("recycle rowsets number", recycle_rowsets_number) | 2424 | 234 | .tag("recycle segments number", recycle_segments_number) | 2425 | 234 | .tag("all rowsets recycle data size", recycle_rowsets_data_size) | 2426 | 234 | .tag("all rowsets recycle index size", recycle_rowsets_index_size) | 2427 | 234 | .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number) | 2428 | 234 | .tag("recycle restore job segments number", recycle_restore_job_segments_number) | 2429 | 234 | .tag("all restore job rowsets recycle data size", | 2430 | 234 | recycle_restore_job_rowsets_data_size) | 2431 | 234 | .tag("all restore job rowsets recycle index size", | 2432 | 234 | recycle_restore_job_rowsets_index_size) | 2433 | 234 | .tag("max rowset version", max_rowset_version) | 2434 | 234 | .tag("min rowset creation time", min_rowset_creation_time) | 2435 | 234 | .tag("max rowset creation time", max_rowset_creation_time) | 2436 | 234 | .tag("min rowset expiration time", min_rowset_expiration_time) | 2437 | 234 | .tag("max rowset expiration time", max_rowset_expiration_time) | 2438 | 234 | .tag("ret", ret); | 2439 | 234 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv |
2440 | | |
2441 | 234 | std::unique_ptr<Transaction> txn; |
2442 | 234 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { |
2443 | 0 | LOG_WARNING("failed to recycle tablet ") |
2444 | 0 | .tag("tablet id", tablet_id) |
2445 | 0 | .tag("instance_id", instance_id_) |
2446 | 0 | .tag("reason", "failed to create txn"); |
2447 | 0 | ret = -1; |
2448 | 0 | } |
2449 | 234 | GetRowsetResponse resp; |
2450 | 234 | std::string msg; |
2451 | 234 | MetaServiceCode code = MetaServiceCode::OK; |
2452 | | // get rowsets in tablet |
2453 | 234 | internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_, |
2454 | 234 | tablet_id, code, msg, &resp); |
2455 | 234 | if (code != MetaServiceCode::OK) { |
2456 | 0 | LOG_WARNING("failed to get rowsets of tablet when recycle tablet") |
2457 | 0 | .tag("tablet id", tablet_id) |
2458 | 0 | .tag("msg", msg) |
2459 | 0 | .tag("code", code) |
2460 | 0 | .tag("instance id", instance_id_); |
2461 | 0 | ret = -1; |
2462 | 0 | } |
2463 | 234 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_tablet.create_rowset_meta", &resp); |
2464 | | |
2465 | 2.50k | for (const auto& rs_meta : resp.rowset_meta()) { |
2466 | | // The rowset has no resource id and segments when it was generated by compaction |
2467 | | // with multiple hole rowsets or it's version is [0-1], so we can skip it. |
2468 | 2.50k | if (!rs_meta.has_resource_id() && rs_meta.num_segments() == 0) { |
2469 | 0 | LOG_INFO("rowset meta does not have a resource id and no segments, skip this rowset") |
2470 | 0 | .tag("rs_meta", rs_meta.ShortDebugString()) |
2471 | 0 | .tag("instance_id", instance_id_) |
2472 | 0 | .tag("tablet_id", tablet_id); |
2473 | 0 | recycle_rowsets_number += 1; |
2474 | 0 | continue; |
2475 | 0 | } |
2476 | 2.50k | if (!rs_meta.has_resource_id()) { |
2477 | 1 | LOG_WARNING("rowset meta does not have a resource id, impossible!") |
2478 | 1 | .tag("rs_meta", rs_meta.ShortDebugString()) |
2479 | 1 | .tag("instance_id", instance_id_) |
2480 | 1 | .tag("tablet_id", tablet_id); |
2481 | 1 | return -1; |
2482 | 1 | } |
2483 | 2.50k | DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString(); |
2484 | 2.50k | auto it = accessor_map_.find(rs_meta.resource_id()); |
2485 | | // possible if the accessor is not initilized correctly |
2486 | 2.50k | if (it == accessor_map_.end()) [[unlikely]] { |
2487 | 1 | LOG_WARNING( |
2488 | 1 | "failed to find resource id when recycle tablet, skip this vault accessor " |
2489 | 1 | "recycle process") |
2490 | 1 | .tag("tablet id", tablet_id) |
2491 | 1 | .tag("instance_id", instance_id_) |
2492 | 1 | .tag("resource_id", rs_meta.resource_id()) |
2493 | 1 | .tag("rowset meta pb", rs_meta.ShortDebugString()); |
2494 | 1 | return -1; |
2495 | 1 | } |
2496 | 2.50k | recycle_rowsets_number += 1; |
2497 | 2.50k | recycle_segments_number += rs_meta.num_segments(); |
2498 | 2.50k | recycle_rowsets_data_size += rs_meta.data_disk_size(); |
2499 | 2.50k | recycle_rowsets_index_size += rs_meta.index_disk_size(); |
2500 | 2.50k | max_rowset_version = std::max(max_rowset_version, rs_meta.end_version()); |
2501 | 2.50k | min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time()); |
2502 | 2.50k | max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time()); |
2503 | 2.50k | min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration()); |
2504 | 2.50k | max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration()); |
2505 | 2.50k | resource_ids.emplace(rs_meta.resource_id()); |
2506 | 2.50k | } |
2507 | | |
2508 | | // get restore job rowset in tablet |
2509 | 232 | std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>> restore_job_rs_metas; |
2510 | 232 | scan_restore_job_rowset(txn.get(), instance_id_, tablet_id, code, msg, &restore_job_rs_metas); |
2511 | 232 | if (code != MetaServiceCode::OK) { |
2512 | 0 | LOG_WARNING("scan restore job rowsets failed when recycle tablet") |
2513 | 0 | .tag("tablet id", tablet_id) |
2514 | 0 | .tag("msg", msg) |
2515 | 0 | .tag("code", code) |
2516 | 0 | .tag("instance id", instance_id_); |
2517 | 0 | return -1; |
2518 | 0 | } |
2519 | | |
2520 | 232 | for (auto& [_, rs_meta] : restore_job_rs_metas) { |
2521 | 0 | if (!rs_meta.has_resource_id()) { |
2522 | 0 | LOG_WARNING("rowset meta does not have a resource id, impossible!") |
2523 | 0 | .tag("rs_meta", rs_meta.ShortDebugString()) |
2524 | 0 | .tag("instance_id", instance_id_) |
2525 | 0 | .tag("tablet_id", tablet_id); |
2526 | 0 | return -1; |
2527 | 0 | } |
2528 | | |
2529 | 0 | auto it = accessor_map_.find(rs_meta.resource_id()); |
2530 | | // possible if the accessor is not initilized correctly |
2531 | 0 | if (it == accessor_map_.end()) [[unlikely]] { |
2532 | 0 | LOG_WARNING( |
2533 | 0 | "failed to find resource id when recycle tablet, skip this vault accessor " |
2534 | 0 | "recycle process") |
2535 | 0 | .tag("tablet id", tablet_id) |
2536 | 0 | .tag("instance_id", instance_id_) |
2537 | 0 | .tag("resource_id", rs_meta.resource_id()) |
2538 | 0 | .tag("rowset meta pb", rs_meta.ShortDebugString()); |
2539 | 0 | return -1; |
2540 | 0 | } |
2541 | 0 | recycle_restore_job_rowsets_number += 1; |
2542 | 0 | recycle_restore_job_segments_number += rs_meta.num_segments(); |
2543 | 0 | recycle_restore_job_rowsets_data_size += rs_meta.data_disk_size(); |
2544 | 0 | recycle_restore_job_rowsets_index_size += rs_meta.index_disk_size(); |
2545 | 0 | resource_ids.emplace(rs_meta.resource_id()); |
2546 | 0 | } |
2547 | | |
2548 | 232 | LOG_INFO("recycle tablet start to delete object") |
2549 | 232 | .tag("instance id", instance_id_) |
2550 | 232 | .tag("tablet id", tablet_id) |
2551 | 232 | .tag("recycle tablet resource ids are", |
2552 | 232 | std::accumulate(resource_ids.begin(), resource_ids.end(), std::string(), |
2553 | 232 | [](std::string rs_id, const auto& it) { |
2554 | 203 | return rs_id.empty() ? it : rs_id + ", " + it; |
2555 | 203 | })); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_ Line | Count | Source | 2553 | 203 | [](std::string rs_id, const auto& it) { | 2554 | 203 | return rs_id.empty() ? it : rs_id + ", " + it; | 2555 | 203 | })); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_ |
2556 | | |
2557 | 232 | SyncExecutor<std::pair<int, std::string>> concurrent_delete_executor( |
2558 | 232 | _thread_pool_group.s3_producer_pool, |
2559 | 232 | fmt::format("delete tablet {} s3 rowset", tablet_id), |
2560 | 232 | [](const std::pair<int, std::string>& ret) { return ret.first != 0; }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE Line | Count | Source | 2560 | 203 | [](const std::pair<int, std::string>& ret) { return ret.first != 0; }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE |
2561 | | |
2562 | | // delete all rowset data in this tablet |
2563 | | // ATTN: there may be data leak if not all accessor initilized successfully |
2564 | | // partial data deleted if the tablet is stored cross-storage vault |
2565 | | // vault id is not attached to TabletMeta... |
2566 | 232 | for (const auto& resource_id : resource_ids) { |
2567 | 203 | g_bvar_recycler_vault_recycle_task_status.put({instance_id_, resource_id, "submitted"}, 1); |
2568 | 203 | concurrent_delete_executor.add( |
2569 | 203 | [&, rs_id = resource_id, |
2570 | 203 | accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) { |
2571 | 203 | std::unique_ptr<int, std::function<void(int*)>> defer( |
2572 | 203 | (int*)0x01, [&](int*) { metrics_context.report(); }); recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11EvENKUlPiE_clES5_ Line | Count | Source | 2572 | 203 | (int*)0x01, [&](int*) { metrics_context.report(); }); |
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11EvENKUlPiE_clES5_ |
2573 | 203 | int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id)); |
2574 | 203 | if (res != 0) { |
2575 | 1 | LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id |
2576 | 1 | << " path=" << accessor_ptr->uri(); |
2577 | 1 | return std::make_pair(-1, rs_id); |
2578 | 1 | } |
2579 | 202 | return std::make_pair(0, rs_id); |
2580 | 203 | }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev Line | Count | Source | 2570 | 203 | accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) { | 2571 | 203 | std::unique_ptr<int, std::function<void(int*)>> defer( | 2572 | 203 | (int*)0x01, [&](int*) { metrics_context.report(); }); | 2573 | 203 | int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id)); | 2574 | 203 | if (res != 0) { | 2575 | 1 | LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id | 2576 | 1 | << " path=" << accessor_ptr->uri(); | 2577 | 1 | return std::make_pair(-1, rs_id); | 2578 | 1 | } | 2579 | 202 | return std::make_pair(0, rs_id); | 2580 | 203 | }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev |
2581 | 203 | } |
2582 | | |
2583 | 232 | bool finished = true; |
2584 | 232 | std::vector<std::pair<int, std::string>> rets = concurrent_delete_executor.when_all(&finished); |
2585 | 232 | for (auto& r : rets) { |
2586 | 203 | if (r.first != 0) { |
2587 | 1 | g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "error"}, 1); |
2588 | 1 | ret = -1; |
2589 | 1 | } |
2590 | 203 | g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "completed"}, 1); |
2591 | 203 | } |
2592 | 232 | ret = finished ? ret : -1; |
2593 | | |
2594 | 232 | if (ret != 0) { // failed recycle tablet data |
2595 | 1 | LOG_WARNING("ret!=0") |
2596 | 1 | .tag("finished", finished) |
2597 | 1 | .tag("ret", ret) |
2598 | 1 | .tag("instance_id", instance_id_) |
2599 | 1 | .tag("tablet_id", tablet_id); |
2600 | 1 | return ret; |
2601 | 1 | } |
2602 | | |
2603 | 231 | tablet_metrics_context_.total_recycled_data_size += |
2604 | 231 | recycle_rowsets_data_size + recycle_rowsets_index_size; |
2605 | 231 | tablet_metrics_context_.total_recycled_num += 1; |
2606 | 231 | segment_metrics_context_.total_recycled_num += recycle_segments_number; |
2607 | 231 | segment_metrics_context_.total_recycled_data_size += |
2608 | 231 | recycle_rowsets_data_size + recycle_rowsets_index_size; |
2609 | 231 | metrics_context.total_recycled_data_size += |
2610 | 231 | recycle_rowsets_data_size + recycle_rowsets_index_size; |
2611 | 231 | tablet_metrics_context_.report(); |
2612 | 231 | segment_metrics_context_.report(); |
2613 | 231 | metrics_context.report(); |
2614 | | |
2615 | 231 | txn.reset(); |
2616 | 231 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { |
2617 | 0 | LOG_WARNING("failed to recycle tablet ") |
2618 | 0 | .tag("tablet id", tablet_id) |
2619 | 0 | .tag("instance_id", instance_id_) |
2620 | 0 | .tag("reason", "failed to create txn"); |
2621 | 0 | ret = -1; |
2622 | 0 | } |
2623 | | // delete all rowset kv in this tablet |
2624 | 231 | txn->remove(rs_key0, rs_key1); |
2625 | 231 | txn->remove(recyc_rs_key0, recyc_rs_key1); |
2626 | 231 | txn->remove(restore_job_rs_key0, restore_job_rs_key1); |
2627 | | |
2628 | | // remove delete bitmap for MoW table |
2629 | 231 | std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id}); |
2630 | 231 | txn->remove(pending_key); |
2631 | 231 | std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0}); |
2632 | 231 | std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0}); |
2633 | 231 | txn->remove(delete_bitmap_start, delete_bitmap_end); |
2634 | | |
2635 | 231 | std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""}); |
2636 | 231 | std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""}); |
2637 | 231 | txn->remove(dbm_start_key, dbm_end_key); |
2638 | 231 | LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key) |
2639 | 231 | << " end=" << hex(dbm_end_key); |
2640 | | |
2641 | 231 | TxnErrorCode err = txn->commit(); |
2642 | 231 | if (err != TxnErrorCode::TXN_OK) { |
2643 | 0 | LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err; |
2644 | 0 | ret = -1; |
2645 | 0 | } |
2646 | | |
2647 | 231 | if (ret == 0) { |
2648 | | // All object files under tablet have been deleted |
2649 | 231 | std::lock_guard lock(recycled_tablets_mtx_); |
2650 | 231 | recycled_tablets_.insert(tablet_id); |
2651 | 231 | } |
2652 | | |
2653 | 231 | return ret; |
2654 | 232 | } |
2655 | | |
2656 | | int InstanceRecycler::recycle_versioned_tablet(int64_t tablet_id, |
2657 | 6 | RecyclerMetricsContext& metrics_context) { |
2658 | 6 | int ret = 0; |
2659 | 6 | auto start_time = steady_clock::now(); |
2660 | | |
2661 | 6 | TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0); |
2662 | | |
2663 | | // collect resource ids |
2664 | 6 | std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0}); |
2665 | 6 | std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0}); |
2666 | 6 | std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""}); |
2667 | 6 | std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""}); |
2668 | | |
2669 | 6 | int64_t recycle_rowsets_number = 0; |
2670 | 6 | int64_t recycle_segments_number = 0; |
2671 | 6 | int64_t recycle_rowsets_data_size = 0; |
2672 | 6 | int64_t recycle_rowsets_index_size = 0; |
2673 | 6 | int64_t max_rowset_version = 0; |
2674 | 6 | int64_t min_rowset_creation_time = INT64_MAX; |
2675 | 6 | int64_t max_rowset_creation_time = 0; |
2676 | 6 | int64_t min_rowset_expiration_time = INT64_MAX; |
2677 | 6 | int64_t max_rowset_expiration_time = 0; |
2678 | | |
2679 | 6 | DORIS_CLOUD_DEFER { |
2680 | 6 | auto cost = duration<float>(steady_clock::now() - start_time).count(); |
2681 | 6 | LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost) |
2682 | 6 | .tag("instance_id", instance_id_) |
2683 | 6 | .tag("tablet_id", tablet_id) |
2684 | 6 | .tag("recycle rowsets number", recycle_rowsets_number) |
2685 | 6 | .tag("recycle segments number", recycle_segments_number) |
2686 | 6 | .tag("all rowsets recycle data size", recycle_rowsets_data_size) |
2687 | 6 | .tag("all rowsets recycle index size", recycle_rowsets_index_size) |
2688 | 6 | .tag("max rowset version", max_rowset_version) |
2689 | 6 | .tag("min rowset creation time", min_rowset_creation_time) |
2690 | 6 | .tag("max rowset creation time", max_rowset_creation_time) |
2691 | 6 | .tag("min rowset expiration time", min_rowset_expiration_time) |
2692 | 6 | .tag("max rowset expiration time", max_rowset_expiration_time) |
2693 | 6 | .tag("ret", ret); |
2694 | 6 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv Line | Count | Source | 2679 | 6 | DORIS_CLOUD_DEFER { | 2680 | 6 | auto cost = duration<float>(steady_clock::now() - start_time).count(); | 2681 | 6 | LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost) | 2682 | 6 | .tag("instance_id", instance_id_) | 2683 | 6 | .tag("tablet_id", tablet_id) | 2684 | 6 | .tag("recycle rowsets number", recycle_rowsets_number) | 2685 | 6 | .tag("recycle segments number", recycle_segments_number) | 2686 | 6 | .tag("all rowsets recycle data size", recycle_rowsets_data_size) | 2687 | 6 | .tag("all rowsets recycle index size", recycle_rowsets_index_size) | 2688 | 6 | .tag("max rowset version", max_rowset_version) | 2689 | 6 | .tag("min rowset creation time", min_rowset_creation_time) | 2690 | 6 | .tag("max rowset creation time", max_rowset_creation_time) | 2691 | 6 | .tag("min rowset expiration time", min_rowset_expiration_time) | 2692 | 6 | .tag("max rowset expiration time", max_rowset_expiration_time) | 2693 | 6 | .tag("ret", ret); | 2694 | 6 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv |
2695 | | |
2696 | 6 | std::unique_ptr<Transaction> txn; |
2697 | 6 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { |
2698 | 0 | LOG_WARNING("failed to recycle tablet ") |
2699 | 0 | .tag("tablet id", tablet_id) |
2700 | 0 | .tag("instance_id", instance_id_) |
2701 | 0 | .tag("reason", "failed to create txn"); |
2702 | 0 | ret = -1; |
2703 | 0 | } |
2704 | | |
2705 | | // Read the last version of load and compact rowsets, the previous rowsets will be recycled |
2706 | | // by the related operation logs. |
2707 | 6 | std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> load_rowset_metas; |
2708 | 6 | std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> compact_rowset_metas; |
2709 | 6 | MetaReader meta_reader(instance_id_); |
2710 | 6 | TxnErrorCode err = meta_reader.get_load_rowset_metas(txn.get(), tablet_id, &load_rowset_metas); |
2711 | 6 | if (err == TxnErrorCode::TXN_OK) { |
2712 | 6 | err = meta_reader.get_compact_rowset_metas(txn.get(), tablet_id, &compact_rowset_metas); |
2713 | 6 | } |
2714 | 6 | if (err != TxnErrorCode::TXN_OK) { |
2715 | 0 | LOG_WARNING("failed to get rowsets of tablet when recycle tablet") |
2716 | 0 | .tag("tablet id", tablet_id) |
2717 | 0 | .tag("err", err) |
2718 | 0 | .tag("instance id", instance_id_); |
2719 | 0 | ret = -1; |
2720 | 0 | } |
2721 | | |
2722 | 6 | LOG_INFO("recycle versioned tablet get {} load rowsets and {} compact rowsets", |
2723 | 6 | load_rowset_metas.size(), compact_rowset_metas.size()) |
2724 | 6 | .tag("instance_id", instance_id_) |
2725 | 6 | .tag("tablet_id", tablet_id); |
2726 | | |
2727 | 6 | SyncExecutor<int> concurrent_delete_executor( |
2728 | 6 | _thread_pool_group.s3_producer_pool, |
2729 | 6 | fmt::format("delete tablet {} s3 rowset", tablet_id), |
2730 | 30 | [](const int& ret) { return ret != 0; }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi Line | Count | Source | 2730 | 30 | [](const int& ret) { return ret != 0; }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi |
2731 | | |
2732 | 30 | auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) { |
2733 | 30 | recycle_rowsets_number += 1; |
2734 | 30 | recycle_segments_number += rs_meta.num_segments(); |
2735 | 30 | recycle_rowsets_data_size += rs_meta.data_disk_size(); |
2736 | 30 | recycle_rowsets_index_size += rs_meta.index_disk_size(); |
2737 | 30 | max_rowset_version = std::max(max_rowset_version, rs_meta.end_version()); |
2738 | 30 | min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time()); |
2739 | 30 | max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time()); |
2740 | 30 | min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration()); |
2741 | 30 | max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration()); |
2742 | 30 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE Line | Count | Source | 2732 | 30 | auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) { | 2733 | 30 | recycle_rowsets_number += 1; | 2734 | 30 | recycle_segments_number += rs_meta.num_segments(); | 2735 | 30 | recycle_rowsets_data_size += rs_meta.data_disk_size(); | 2736 | 30 | recycle_rowsets_index_size += rs_meta.index_disk_size(); | 2737 | 30 | max_rowset_version = std::max(max_rowset_version, rs_meta.end_version()); | 2738 | 30 | min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time()); | 2739 | 30 | max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time()); | 2740 | 30 | min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration()); | 2741 | 30 | max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration()); | 2742 | 30 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE |
2743 | | |
2744 | 30 | for (const auto& [rs_meta, versionstamp] : load_rowset_metas) { |
2745 | 30 | update_rowset_stats(rs_meta); |
2746 | 30 | concurrent_delete_executor.add([tablet_id, versionstamp, rs_meta_pb = rs_meta, this]() { |
2747 | 30 | std::string rowset_key = versioned::meta_rowset_load_key( |
2748 | 30 | {instance_id_, tablet_id, rs_meta_pb.end_version()}); |
2749 | 30 | return recycle_rowset_meta_and_data(encode_versioned_key(rowset_key, versionstamp), |
2750 | 30 | rs_meta_pb); |
2751 | 30 | }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clEv Line | Count | Source | 2746 | 30 | concurrent_delete_executor.add([tablet_id, versionstamp, rs_meta_pb = rs_meta, this]() { | 2747 | 30 | std::string rowset_key = versioned::meta_rowset_load_key( | 2748 | 30 | {instance_id_, tablet_id, rs_meta_pb.end_version()}); | 2749 | 30 | return recycle_rowset_meta_and_data(encode_versioned_key(rowset_key, versionstamp), | 2750 | 30 | rs_meta_pb); | 2751 | 30 | }); |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clEv |
2752 | 30 | } |
2753 | | |
2754 | 6 | for (const auto& [rs_meta, versionstamp] : compact_rowset_metas) { |
2755 | 0 | update_rowset_stats(rs_meta); |
2756 | 0 | concurrent_delete_executor.add([tablet_id, versionstamp, rs_meta_pb = rs_meta, this]() { |
2757 | 0 | std::string rowset_key = versioned::meta_rowset_compact_key( |
2758 | 0 | {instance_id_, tablet_id, rs_meta_pb.end_version()}); |
2759 | 0 | return recycle_rowset_meta_and_data(encode_versioned_key(rowset_key, versionstamp), |
2760 | 0 | rs_meta_pb); |
2761 | 0 | }); Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_4clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_4clEv |
2762 | 0 | } |
2763 | | |
2764 | 6 | auto handle_recycle_rowset_kv = [&](std::string_view k, std::string_view v) { |
2765 | 0 | RecycleRowsetPB recycle_rowset; |
2766 | 0 | if (!recycle_rowset.ParseFromArray(v.data(), v.size())) { |
2767 | 0 | LOG_WARNING("malformed recycle rowset").tag("key", hex(k)); |
2768 | 0 | return -1; |
2769 | 0 | } |
2770 | 0 | if (!recycle_rowset.has_type()) { // compatible with old version `RecycleRowsetPB` |
2771 | 0 | if (!recycle_rowset.has_resource_id()) [[unlikely]] { // impossible |
2772 | | // in old version, keep this key-value pair and it needs to be checked manually |
2773 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); |
2774 | 0 | return -1; |
2775 | 0 | } |
2776 | 0 | if (recycle_rowset.resource_id().empty()) [[unlikely]] { |
2777 | | // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv. |
2778 | 0 | LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key=" |
2779 | 0 | << hex(k) << " value=" << proto_to_json(recycle_rowset); |
2780 | 0 | return -1; |
2781 | 0 | } |
2782 | | // decode rowset_id |
2783 | 0 | auto k1 = k; |
2784 | 0 | k1.remove_prefix(1); |
2785 | 0 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
2786 | 0 | decode_key(&k1, &out); |
2787 | | // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB |
2788 | 0 | const auto& rowset_id = std::get<std::string>(std::get<0>(out[4])); |
2789 | 0 | LOG_INFO("delete rowset data") |
2790 | 0 | .tag("instance_id", instance_id_) |
2791 | 0 | .tag("tablet_id", tablet_id) |
2792 | 0 | .tag("rowset_id", rowset_id); |
2793 | |
|
2794 | 0 | concurrent_delete_executor.add( |
2795 | 0 | [tablet_id, resource_id = recycle_rowset.resource_id(), rowset_id, this]() { |
2796 | | // delete by prefix, the recycle rowset key will be deleted by range later. |
2797 | 0 | return delete_rowset_data(resource_id, tablet_id, rowset_id); |
2798 | 0 | }); Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_5clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_5clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv |
2799 | 0 | } else { |
2800 | 0 | concurrent_delete_executor.add( |
2801 | 0 | [k = std::string(k), recycle_rowset = std::move(recycle_rowset), this]() { |
2802 | 0 | return recycle_rowset_meta_and_data(k, recycle_rowset.rowset_meta()); |
2803 | 0 | }); Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_5clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE0_clEv Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_5clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE0_clEv |
2804 | 0 | } |
2805 | 0 | return 0; |
2806 | 0 | }; Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_5clESt17basic_string_viewIcSt11char_traitsIcEES8_ Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_5clESt17basic_string_viewIcSt11char_traitsIcEES8_ |
2807 | | |
2808 | 6 | if (scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_recycle_rowset_kv))) { |
2809 | 0 | LOG_WARNING("failed to recycle rowset kv of tablet") |
2810 | 0 | .tag("tablet id", tablet_id) |
2811 | 0 | .tag("instance_id", instance_id_) |
2812 | 0 | .tag("reason", "failed to scan and recycle RecycleRowsetPB"); |
2813 | 0 | ret = -1; |
2814 | 0 | } |
2815 | | |
2816 | 6 | bool finished = true; |
2817 | 6 | std::vector<int> rets = concurrent_delete_executor.when_all(&finished); |
2818 | 30 | for (int r : rets) { |
2819 | 30 | if (r != 0) { |
2820 | 0 | ret = -1; |
2821 | 0 | } |
2822 | 30 | } |
2823 | | |
2824 | 6 | ret = finished ? ret : -1; |
2825 | | |
2826 | 6 | if (ret != 0) { // failed recycle tablet data |
2827 | 0 | LOG_WARNING("ret!=0") |
2828 | 0 | .tag("finished", finished) |
2829 | 0 | .tag("ret", ret) |
2830 | 0 | .tag("instance_id", instance_id_) |
2831 | 0 | .tag("tablet_id", tablet_id); |
2832 | 0 | return ret; |
2833 | 0 | } |
2834 | | |
2835 | 6 | tablet_metrics_context_.total_recycled_data_size += |
2836 | 6 | recycle_rowsets_data_size + recycle_rowsets_index_size; |
2837 | 6 | tablet_metrics_context_.total_recycled_num += 1; |
2838 | 6 | segment_metrics_context_.total_recycled_num += recycle_segments_number; |
2839 | 6 | segment_metrics_context_.total_recycled_data_size += |
2840 | 6 | recycle_rowsets_data_size + recycle_rowsets_index_size; |
2841 | 6 | metrics_context.total_recycled_data_size += |
2842 | 6 | recycle_rowsets_data_size + recycle_rowsets_index_size; |
2843 | 6 | tablet_metrics_context_.report(); |
2844 | 6 | segment_metrics_context_.report(); |
2845 | 6 | metrics_context.report(); |
2846 | | |
2847 | 6 | txn.reset(); |
2848 | 6 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { |
2849 | 0 | LOG_WARNING("failed to recycle tablet ") |
2850 | 0 | .tag("tablet id", tablet_id) |
2851 | 0 | .tag("instance_id", instance_id_) |
2852 | 0 | .tag("reason", "failed to create txn"); |
2853 | 0 | ret = -1; |
2854 | 0 | } |
2855 | | // delete all rowset kv in this tablet |
2856 | 6 | txn->remove(rs_key0, rs_key1); |
2857 | 6 | txn->remove(recyc_rs_key0, recyc_rs_key1); |
2858 | | |
2859 | | // remove delete bitmap for MoW table |
2860 | 6 | std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id}); |
2861 | 6 | txn->remove(pending_key); |
2862 | 6 | std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0}); |
2863 | 6 | std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0}); |
2864 | 6 | txn->remove(delete_bitmap_start, delete_bitmap_end); |
2865 | | |
2866 | 6 | std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""}); |
2867 | 6 | std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""}); |
2868 | 6 | txn->remove(dbm_start_key, dbm_end_key); |
2869 | 6 | LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key) |
2870 | 6 | << " end=" << hex(dbm_end_key); |
2871 | | |
2872 | 6 | std::string versioned_idx_key = versioned::tablet_index_key({instance_id_, tablet_id}); |
2873 | 6 | std::string tablet_index_val; |
2874 | 6 | err = txn->get(versioned_idx_key, &tablet_index_val); |
2875 | 6 | if (err != TxnErrorCode::TXN_KEY_NOT_FOUND && err != TxnErrorCode::TXN_OK) { |
2876 | 0 | LOG_WARNING("failed to get tablet index kv") |
2877 | 0 | .tag("instance_id", instance_id_) |
2878 | 0 | .tag("tablet_id", tablet_id) |
2879 | 0 | .tag("err", err); |
2880 | 0 | ret = -1; |
2881 | 6 | } else if (err == TxnErrorCode::TXN_OK) { |
2882 | | // If the tablet index kv exists, we need to delete it |
2883 | 5 | TabletIndexPB tablet_index_pb; |
2884 | 5 | if (!tablet_index_pb.ParseFromString(tablet_index_val)) { |
2885 | 0 | LOG_WARNING("failed to parse tablet index pb") |
2886 | 0 | .tag("instance_id", instance_id_) |
2887 | 0 | .tag("tablet_id", tablet_id); |
2888 | 0 | ret = -1; |
2889 | 5 | } else { |
2890 | 5 | std::string versioned_inverted_idx_key = versioned::tablet_inverted_index_key( |
2891 | 5 | {instance_id_, tablet_index_pb.db_id(), tablet_index_pb.table_id(), |
2892 | 5 | tablet_index_pb.index_id(), tablet_index_pb.partition_id(), tablet_id}); |
2893 | 5 | txn->remove(versioned_inverted_idx_key); |
2894 | 5 | txn->remove(versioned_idx_key); |
2895 | 5 | } |
2896 | 5 | } |
2897 | | |
2898 | 6 | err = txn->commit(); |
2899 | 6 | if (err != TxnErrorCode::TXN_OK) { |
2900 | 0 | LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err; |
2901 | 0 | ret = -1; |
2902 | 0 | } |
2903 | | |
2904 | 6 | if (ret == 0) { |
2905 | | // All object files under tablet have been deleted |
2906 | 6 | std::lock_guard lock(recycled_tablets_mtx_); |
2907 | 6 | recycled_tablets_.insert(tablet_id); |
2908 | 6 | } |
2909 | | |
2910 | 6 | return ret; |
2911 | 6 | } |
2912 | | |
2913 | 18 | int InstanceRecycler::recycle_rowsets() { |
2914 | 18 | if (instance_info_.has_multi_version_status() && |
2915 | 18 | instance_info_.multi_version_status() != MultiVersionStatus::MULTI_VERSION_DISABLED) { |
2916 | 5 | return recycle_versioned_rowsets(); |
2917 | 5 | } |
2918 | | |
2919 | 13 | const std::string task_name = "recycle_rowsets"; |
2920 | 13 | int64_t num_scanned = 0; |
2921 | 13 | int64_t num_expired = 0; |
2922 | 13 | int64_t num_prepare = 0; |
2923 | 13 | int64_t num_compacted = 0; |
2924 | 13 | int64_t num_empty_rowset = 0; |
2925 | 13 | size_t total_rowset_key_size = 0; |
2926 | 13 | size_t total_rowset_value_size = 0; |
2927 | 13 | size_t expired_rowset_size = 0; |
2928 | 13 | std::atomic_long num_recycled = 0; |
2929 | 13 | RecyclerMetricsContext metrics_context(instance_id_, task_name); |
2930 | | |
2931 | 13 | RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""}; |
2932 | 13 | RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""}; |
2933 | 13 | std::string recyc_rs_key0; |
2934 | 13 | std::string recyc_rs_key1; |
2935 | 13 | recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0); |
2936 | 13 | recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1); |
2937 | | |
2938 | 13 | LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_); |
2939 | | |
2940 | 13 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
2941 | 13 | register_recycle_task(task_name, start_time); |
2942 | | |
2943 | 13 | DORIS_CLOUD_DEFER { |
2944 | 13 | unregister_recycle_task(task_name); |
2945 | 13 | int64_t cost = |
2946 | 13 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
2947 | 13 | metrics_context.finish_report(); |
2948 | 13 | LOG_WARNING("recycle rowsets finished, cost={}s", cost) |
2949 | 13 | .tag("instance_id", instance_id_) |
2950 | 13 | .tag("num_scanned", num_scanned) |
2951 | 13 | .tag("num_expired", num_expired) |
2952 | 13 | .tag("num_recycled", num_recycled) |
2953 | 13 | .tag("num_recycled.prepare", num_prepare) |
2954 | 13 | .tag("num_recycled.compacted", num_compacted) |
2955 | 13 | .tag("num_recycled.empty_rowset", num_empty_rowset) |
2956 | 13 | .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size) |
2957 | 13 | .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size) |
2958 | 13 | .tag("expired_rowset_meta_size", expired_rowset_size); |
2959 | 13 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv Line | Count | Source | 2943 | 13 | DORIS_CLOUD_DEFER { | 2944 | 13 | unregister_recycle_task(task_name); | 2945 | 13 | int64_t cost = | 2946 | 13 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 2947 | 13 | metrics_context.finish_report(); | 2948 | 13 | LOG_WARNING("recycle rowsets finished, cost={}s", cost) | 2949 | 13 | .tag("instance_id", instance_id_) | 2950 | 13 | .tag("num_scanned", num_scanned) | 2951 | 13 | .tag("num_expired", num_expired) | 2952 | 13 | .tag("num_recycled", num_recycled) | 2953 | 13 | .tag("num_recycled.prepare", num_prepare) | 2954 | 13 | .tag("num_recycled.compacted", num_compacted) | 2955 | 13 | .tag("num_recycled.empty_rowset", num_empty_rowset) | 2956 | 13 | .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size) | 2957 | 13 | .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size) | 2958 | 13 | .tag("expired_rowset_meta_size", expired_rowset_size); | 2959 | 13 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv |
2960 | | |
2961 | 13 | std::vector<std::string> rowset_keys; |
2962 | | // rowset_id -> rowset_meta |
2963 | | // store rowset id and meta for statistics rs size when delete |
2964 | 13 | std::map<std::string, doris::RowsetMetaCloudPB> rowsets; |
2965 | | |
2966 | | // Store keys of rowset recycled by background workers |
2967 | 13 | std::mutex async_recycled_rowset_keys_mutex; |
2968 | 13 | std::vector<std::string> async_recycled_rowset_keys; |
2969 | 13 | auto worker_pool = std::make_unique<SimpleThreadPool>( |
2970 | 13 | config::instance_recycler_worker_pool_size, "recycle_rowsets"); |
2971 | 13 | worker_pool->start(); |
2972 | | // TODO bacth delete |
2973 | 4.00k | auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) { |
2974 | 4.00k | std::string dbm_start_key = |
2975 | 4.00k | versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id}); |
2976 | 4.00k | std::string dbm_end_key = dbm_start_key; |
2977 | 4.00k | encode_int64(INT64_MAX, &dbm_end_key); |
2978 | 4.00k | auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key); |
2979 | 4.00k | if (ret != 0) { |
2980 | 0 | LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id=" |
2981 | 0 | << instance_id_; |
2982 | 0 | } |
2983 | 4.00k | return ret; |
2984 | 4.00k | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 2973 | 4.00k | auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) { | 2974 | 4.00k | std::string dbm_start_key = | 2975 | 4.00k | versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id}); | 2976 | 4.00k | std::string dbm_end_key = dbm_start_key; | 2977 | 4.00k | encode_int64(INT64_MAX, &dbm_end_key); | 2978 | 4.00k | auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key); | 2979 | 4.00k | if (ret != 0) { | 2980 | 0 | LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id=" | 2981 | 0 | << instance_id_; | 2982 | 0 | } | 2983 | 4.00k | return ret; | 2984 | 4.00k | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE |
2985 | 13 | auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id, |
2986 | 900 | int64_t tablet_id, const std::string& rowset_id) { |
2987 | | // Try to delete rowset data in background thread |
2988 | 900 | int ret = worker_pool->submit_with_timeout( |
2989 | 900 | [&, resource_id, tablet_id, rowset_id, key]() mutable { |
2990 | 799 | if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) { |
2991 | 0 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(key); |
2992 | 0 | return; |
2993 | 0 | } |
2994 | 799 | std::vector<std::string> keys; |
2995 | 799 | { |
2996 | 799 | std::lock_guard lock(async_recycled_rowset_keys_mutex); |
2997 | 799 | async_recycled_rowset_keys.push_back(std::move(key)); |
2998 | 799 | if (async_recycled_rowset_keys.size() > 100) { |
2999 | 7 | keys.swap(async_recycled_rowset_keys); |
3000 | 7 | } |
3001 | 799 | } |
3002 | 799 | delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id); |
3003 | 799 | if (keys.empty()) return; |
3004 | 7 | if (txn_remove(txn_kv_.get(), keys) != 0) { |
3005 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" |
3006 | 0 | << instance_id_; |
3007 | 7 | } else { |
3008 | 7 | num_recycled.fetch_add(keys.size(), std::memory_order_relaxed); |
3009 | 7 | check_recycle_task(instance_id_, "recycle_rowsets", num_scanned, |
3010 | 7 | num_recycled, start_time); |
3011 | 7 | } |
3012 | 7 | }, recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv Line | Count | Source | 2989 | 799 | [&, resource_id, tablet_id, rowset_id, key]() mutable { | 2990 | 799 | if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) { | 2991 | 0 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(key); | 2992 | 0 | return; | 2993 | 0 | } | 2994 | 799 | std::vector<std::string> keys; | 2995 | 799 | { | 2996 | 799 | std::lock_guard lock(async_recycled_rowset_keys_mutex); | 2997 | 799 | async_recycled_rowset_keys.push_back(std::move(key)); | 2998 | 799 | if (async_recycled_rowset_keys.size() > 100) { | 2999 | 7 | keys.swap(async_recycled_rowset_keys); | 3000 | 7 | } | 3001 | 799 | } | 3002 | 799 | delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id); | 3003 | 799 | if (keys.empty()) return; | 3004 | 7 | if (txn_remove(txn_kv_.get(), keys) != 0) { | 3005 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" | 3006 | 0 | << instance_id_; | 3007 | 7 | } else { | 3008 | 7 | num_recycled.fetch_add(keys.size(), std::memory_order_relaxed); | 3009 | 7 | check_recycle_task(instance_id_, "recycle_rowsets", num_scanned, | 3010 | 7 | num_recycled, start_time); | 3011 | 7 | } | 3012 | 7 | }, |
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv |
3013 | 900 | 0); |
3014 | 900 | if (ret == 0) return 0; |
3015 | | // Submit task failed, delete rowset data in current thread |
3016 | 101 | if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) { |
3017 | 0 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(key); |
3018 | 0 | return -1; |
3019 | 0 | } |
3020 | 101 | if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) { |
3021 | 0 | return -1; |
3022 | 0 | } |
3023 | 101 | rowset_keys.push_back(std::move(key)); |
3024 | 101 | return 0; |
3025 | 101 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ Line | Count | Source | 2986 | 900 | int64_t tablet_id, const std::string& rowset_id) { | 2987 | | // Try to delete rowset data in background thread | 2988 | 900 | int ret = worker_pool->submit_with_timeout( | 2989 | 900 | [&, resource_id, tablet_id, rowset_id, key]() mutable { | 2990 | 900 | if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) { | 2991 | 900 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(key); | 2992 | 900 | return; | 2993 | 900 | } | 2994 | 900 | std::vector<std::string> keys; | 2995 | 900 | { | 2996 | 900 | std::lock_guard lock(async_recycled_rowset_keys_mutex); | 2997 | 900 | async_recycled_rowset_keys.push_back(std::move(key)); | 2998 | 900 | if (async_recycled_rowset_keys.size() > 100) { | 2999 | 900 | keys.swap(async_recycled_rowset_keys); | 3000 | 900 | } | 3001 | 900 | } | 3002 | 900 | delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id); | 3003 | 900 | if (keys.empty()) return; | 3004 | 900 | if (txn_remove(txn_kv_.get(), keys) != 0) { | 3005 | 900 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" | 3006 | 900 | << instance_id_; | 3007 | 900 | } else { | 3008 | 900 | num_recycled.fetch_add(keys.size(), std::memory_order_relaxed); | 3009 | 900 | check_recycle_task(instance_id_, "recycle_rowsets", num_scanned, | 3010 | 900 | num_recycled, start_time); | 3011 | 900 | } | 3012 | 900 | }, | 3013 | 900 | 0); | 3014 | 900 | if (ret == 0) return 0; | 3015 | | // Submit task failed, delete rowset data in current thread | 3016 | 101 | if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) { | 3017 | 0 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(key); | 3018 | 0 | return -1; | 3019 | 0 | } | 3020 | 101 | if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) { | 3021 | 0 | return -1; | 3022 | 0 | } | 3023 | 101 | rowset_keys.push_back(std::move(key)); | 3024 | 101 | return 0; | 3025 | 101 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ |
3026 | | |
3027 | 13 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
3028 | | |
3029 | 4.00k | auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int { |
3030 | 4.00k | ++num_scanned; |
3031 | 4.00k | total_rowset_key_size += k.size(); |
3032 | 4.00k | total_rowset_value_size += v.size(); |
3033 | 4.00k | RecycleRowsetPB rowset; |
3034 | 4.00k | if (!rowset.ParseFromArray(v.data(), v.size())) { |
3035 | 0 | LOG_WARNING("malformed recycle rowset").tag("key", hex(k)); |
3036 | 0 | return -1; |
3037 | 0 | } |
3038 | | |
3039 | 4.00k | int64_t current_time = ::time(nullptr); |
3040 | 4.00k | int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts); |
3041 | | |
3042 | 4.00k | VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned |
3043 | 0 | << " num_expired=" << num_expired << " expiration=" << expiration |
3044 | 0 | << " RecycleRowsetPB=" << rowset.ShortDebugString(); |
3045 | 4.00k | if (current_time < expiration) { // not expired |
3046 | 0 | return 0; |
3047 | 0 | } |
3048 | 4.00k | ++num_expired; |
3049 | 4.00k | expired_rowset_size += v.size(); |
3050 | 4.00k | if (!rowset.has_type()) { // old version `RecycleRowsetPB` |
3051 | 250 | if (!rowset.has_resource_id()) [[unlikely]] { // impossible |
3052 | | // in old version, keep this key-value pair and it needs to be checked manually |
3053 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); |
3054 | 0 | return -1; |
3055 | 0 | } |
3056 | 250 | if (rowset.resource_id().empty()) [[unlikely]] { |
3057 | | // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv. |
3058 | 0 | LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key=" |
3059 | 0 | << hex(k) << " value=" << proto_to_json(rowset); |
3060 | 0 | rowset_keys.emplace_back(k); |
3061 | 0 | return -1; |
3062 | 0 | } |
3063 | | // decode rowset_id |
3064 | 250 | auto k1 = k; |
3065 | 250 | k1.remove_prefix(1); |
3066 | 250 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
3067 | 250 | decode_key(&k1, &out); |
3068 | | // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB |
3069 | 250 | const auto& rowset_id = std::get<std::string>(std::get<0>(out[4])); |
3070 | 250 | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ |
3071 | 250 | << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id; |
3072 | 250 | if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(), |
3073 | 250 | rowset.tablet_id(), rowset_id) != 0) { |
3074 | 0 | return -1; |
3075 | 0 | } |
3076 | 250 | metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size(); |
3077 | 250 | metrics_context.total_recycled_num++; |
3078 | 250 | segment_metrics_context_.total_recycled_data_size += |
3079 | 250 | rowset.rowset_meta().total_disk_size(); |
3080 | 250 | segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments(); |
3081 | 250 | segment_metrics_context_.report(); |
3082 | 250 | metrics_context.report(); |
3083 | 250 | return 0; |
3084 | 250 | } |
3085 | | // TODO(plat1ko): check rowset not referenced |
3086 | 3.75k | auto rowset_meta = rowset.mutable_rowset_meta(); |
3087 | 3.75k | if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible |
3088 | 0 | if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) { |
3089 | 0 | LOG_INFO("recycle rowset that has empty resource id"); |
3090 | 0 | } else { |
3091 | | // other situations, keep this key-value pair and it needs to be checked manually |
3092 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); |
3093 | 0 | return -1; |
3094 | 0 | } |
3095 | 0 | } |
3096 | 3.75k | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ |
3097 | 3.75k | << " tablet_id=" << rowset_meta->tablet_id() |
3098 | 3.75k | << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=[" |
3099 | 3.75k | << rowset_meta->start_version() << '-' << rowset_meta->end_version() |
3100 | 3.75k | << "] txn_id=" << rowset_meta->txn_id() |
3101 | 3.75k | << " type=" << RecycleRowsetPB_Type_Name(rowset.type()) |
3102 | 3.75k | << " rowset_meta_size=" << v.size() |
3103 | 3.75k | << " creation_time=" << rowset_meta->creation_time(); |
3104 | 3.75k | if (rowset.type() == RecycleRowsetPB::PREPARE) { |
3105 | | // unable to calculate file path, can only be deleted by rowset id prefix |
3106 | 650 | num_prepare += 1; |
3107 | 650 | if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(), |
3108 | 650 | rowset_meta->tablet_id(), |
3109 | 650 | rowset_meta->rowset_id_v2()) != 0) { |
3110 | 0 | return -1; |
3111 | 0 | } |
3112 | 3.10k | } else { |
3113 | 3.10k | num_compacted += rowset.type() == RecycleRowsetPB::COMPACT; |
3114 | 3.10k | rowset_keys.emplace_back(k); |
3115 | 3.10k | rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta)); |
3116 | 3.10k | if (rowset_meta->num_segments() <= 0) { // Skip empty rowset |
3117 | 3.10k | ++num_empty_rowset; |
3118 | 3.10k | } |
3119 | 3.10k | } |
3120 | 3.75k | return 0; |
3121 | 3.75k | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 3029 | 4.00k | auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int { | 3030 | 4.00k | ++num_scanned; | 3031 | 4.00k | total_rowset_key_size += k.size(); | 3032 | 4.00k | total_rowset_value_size += v.size(); | 3033 | 4.00k | RecycleRowsetPB rowset; | 3034 | 4.00k | if (!rowset.ParseFromArray(v.data(), v.size())) { | 3035 | 0 | LOG_WARNING("malformed recycle rowset").tag("key", hex(k)); | 3036 | 0 | return -1; | 3037 | 0 | } | 3038 | | | 3039 | 4.00k | int64_t current_time = ::time(nullptr); | 3040 | 4.00k | int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts); | 3041 | | | 3042 | 4.00k | VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned | 3043 | 0 | << " num_expired=" << num_expired << " expiration=" << expiration | 3044 | 0 | << " RecycleRowsetPB=" << rowset.ShortDebugString(); | 3045 | 4.00k | if (current_time < expiration) { // not expired | 3046 | 0 | return 0; | 3047 | 0 | } | 3048 | 4.00k | ++num_expired; | 3049 | 4.00k | expired_rowset_size += v.size(); | 3050 | 4.00k | if (!rowset.has_type()) { // old version `RecycleRowsetPB` | 3051 | 250 | if (!rowset.has_resource_id()) [[unlikely]] { // impossible | 3052 | | // in old version, keep this key-value pair and it needs to be checked manually | 3053 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); | 3054 | 0 | return -1; | 3055 | 0 | } | 3056 | 250 | if (rowset.resource_id().empty()) [[unlikely]] { | 3057 | | // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv. | 3058 | 0 | LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key=" | 3059 | 0 | << hex(k) << " value=" << proto_to_json(rowset); | 3060 | 0 | rowset_keys.emplace_back(k); | 3061 | 0 | return -1; | 3062 | 0 | } | 3063 | | // decode rowset_id | 3064 | 250 | auto k1 = k; | 3065 | 250 | k1.remove_prefix(1); | 3066 | 250 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 3067 | 250 | decode_key(&k1, &out); | 3068 | | // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB | 3069 | 250 | const auto& rowset_id = std::get<std::string>(std::get<0>(out[4])); | 3070 | 250 | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ | 3071 | 250 | << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id; | 3072 | 250 | if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(), | 3073 | 250 | rowset.tablet_id(), rowset_id) != 0) { | 3074 | 0 | return -1; | 3075 | 0 | } | 3076 | 250 | metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size(); | 3077 | 250 | metrics_context.total_recycled_num++; | 3078 | 250 | segment_metrics_context_.total_recycled_data_size += | 3079 | 250 | rowset.rowset_meta().total_disk_size(); | 3080 | 250 | segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments(); | 3081 | 250 | segment_metrics_context_.report(); | 3082 | 250 | metrics_context.report(); | 3083 | 250 | return 0; | 3084 | 250 | } | 3085 | | // TODO(plat1ko): check rowset not referenced | 3086 | 3.75k | auto rowset_meta = rowset.mutable_rowset_meta(); | 3087 | 3.75k | if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible | 3088 | 0 | if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) { | 3089 | 0 | LOG_INFO("recycle rowset that has empty resource id"); | 3090 | 0 | } else { | 3091 | | // other situations, keep this key-value pair and it needs to be checked manually | 3092 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); | 3093 | 0 | return -1; | 3094 | 0 | } | 3095 | 0 | } | 3096 | 3.75k | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ | 3097 | 3.75k | << " tablet_id=" << rowset_meta->tablet_id() | 3098 | 3.75k | << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=[" | 3099 | 3.75k | << rowset_meta->start_version() << '-' << rowset_meta->end_version() | 3100 | 3.75k | << "] txn_id=" << rowset_meta->txn_id() | 3101 | 3.75k | << " type=" << RecycleRowsetPB_Type_Name(rowset.type()) | 3102 | 3.75k | << " rowset_meta_size=" << v.size() | 3103 | 3.75k | << " creation_time=" << rowset_meta->creation_time(); | 3104 | 3.75k | if (rowset.type() == RecycleRowsetPB::PREPARE) { | 3105 | | // unable to calculate file path, can only be deleted by rowset id prefix | 3106 | 650 | num_prepare += 1; | 3107 | 650 | if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(), | 3108 | 650 | rowset_meta->tablet_id(), | 3109 | 650 | rowset_meta->rowset_id_v2()) != 0) { | 3110 | 0 | return -1; | 3111 | 0 | } | 3112 | 3.10k | } else { | 3113 | 3.10k | num_compacted += rowset.type() == RecycleRowsetPB::COMPACT; | 3114 | 3.10k | rowset_keys.emplace_back(k); | 3115 | 3.10k | rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta)); | 3116 | 3.10k | if (rowset_meta->num_segments() <= 0) { // Skip empty rowset | 3117 | 3.10k | ++num_empty_rowset; | 3118 | 3.10k | } | 3119 | 3.10k | } | 3120 | 3.75k | return 0; | 3121 | 3.75k | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
3122 | | |
3123 | 21 | auto loop_done = [&]() -> int { |
3124 | 21 | std::vector<std::string> rowset_keys_to_delete; |
3125 | | // rowset_id -> rowset_meta |
3126 | | // store rowset id and meta for statistics rs size when delete |
3127 | 21 | std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete; |
3128 | 21 | rowset_keys_to_delete.swap(rowset_keys); |
3129 | 21 | rowsets_to_delete.swap(rowsets); |
3130 | 21 | worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete), |
3131 | 21 | rowsets_to_delete = std::move(rowsets_to_delete)]() { |
3132 | 21 | if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET, |
3133 | 21 | metrics_context) != 0) { |
3134 | 0 | LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_; |
3135 | 0 | return; |
3136 | 0 | } |
3137 | 3.10k | for (const auto& [_, rs] : rowsets_to_delete) { |
3138 | 3.10k | if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { |
3139 | 0 | return; |
3140 | 0 | } |
3141 | 3.10k | } |
3142 | 21 | if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) { |
3143 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_; |
3144 | 0 | return; |
3145 | 0 | } |
3146 | 21 | num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed); |
3147 | 21 | }); recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv Line | Count | Source | 3131 | 21 | rowsets_to_delete = std::move(rowsets_to_delete)]() { | 3132 | 21 | if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET, | 3133 | 21 | metrics_context) != 0) { | 3134 | 0 | LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_; | 3135 | 0 | return; | 3136 | 0 | } | 3137 | 3.10k | for (const auto& [_, rs] : rowsets_to_delete) { | 3138 | 3.10k | if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { | 3139 | 0 | return; | 3140 | 0 | } | 3141 | 3.10k | } | 3142 | 21 | if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) { | 3143 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_; | 3144 | 0 | return; | 3145 | 0 | } | 3146 | 21 | num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed); | 3147 | 21 | }); |
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv |
3148 | 21 | return 0; |
3149 | 21 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv Line | Count | Source | 3123 | 21 | auto loop_done = [&]() -> int { | 3124 | 21 | std::vector<std::string> rowset_keys_to_delete; | 3125 | | // rowset_id -> rowset_meta | 3126 | | // store rowset id and meta for statistics rs size when delete | 3127 | 21 | std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete; | 3128 | 21 | rowset_keys_to_delete.swap(rowset_keys); | 3129 | 21 | rowsets_to_delete.swap(rowsets); | 3130 | 21 | worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete), | 3131 | 21 | rowsets_to_delete = std::move(rowsets_to_delete)]() { | 3132 | 21 | if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET, | 3133 | 21 | metrics_context) != 0) { | 3134 | 21 | LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_; | 3135 | 21 | return; | 3136 | 21 | } | 3137 | 21 | for (const auto& [_, rs] : rowsets_to_delete) { | 3138 | 21 | if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { | 3139 | 21 | return; | 3140 | 21 | } | 3141 | 21 | } | 3142 | 21 | if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) { | 3143 | 21 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_; | 3144 | 21 | return; | 3145 | 21 | } | 3146 | 21 | num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed); | 3147 | 21 | }); | 3148 | 21 | return 0; | 3149 | 21 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv |
3150 | | |
3151 | 13 | if (config::enable_recycler_stats_metrics) { |
3152 | 0 | scan_and_statistics_rowsets(); |
3153 | 0 | } |
3154 | | // recycle_func and loop_done for scan and recycle |
3155 | 13 | int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv), |
3156 | 13 | std::move(loop_done)); |
3157 | | |
3158 | 13 | worker_pool->stop(); |
3159 | | |
3160 | 13 | if (!async_recycled_rowset_keys.empty()) { |
3161 | 2 | if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) { |
3162 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_; |
3163 | 0 | return -1; |
3164 | 2 | } else { |
3165 | 2 | num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed); |
3166 | 2 | } |
3167 | 2 | } |
3168 | 13 | return ret; |
3169 | 13 | } |
3170 | | |
3171 | 13 | int InstanceRecycler::recycle_restore_jobs() { |
3172 | 13 | const std::string task_name = "recycle_restore_jobs"; |
3173 | 13 | int64_t num_scanned = 0; |
3174 | 13 | int64_t num_expired = 0; |
3175 | 13 | int64_t num_recycled = 0; |
3176 | 13 | int64_t num_aborted = 0; |
3177 | | |
3178 | 13 | RecyclerMetricsContext metrics_context(instance_id_, task_name); |
3179 | | |
3180 | 13 | JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0}; |
3181 | 13 | JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX}; |
3182 | 13 | std::string restore_job_key0; |
3183 | 13 | std::string restore_job_key1; |
3184 | 13 | job_restore_tablet_key(restore_job_key_info0, &restore_job_key0); |
3185 | 13 | job_restore_tablet_key(restore_job_key_info1, &restore_job_key1); |
3186 | | |
3187 | 13 | LOG_INFO("begin to recycle restore jobs").tag("instance_id", instance_id_); |
3188 | | |
3189 | 13 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
3190 | 13 | register_recycle_task(task_name, start_time); |
3191 | | |
3192 | 13 | DORIS_CLOUD_DEFER { |
3193 | 13 | unregister_recycle_task(task_name); |
3194 | 13 | int64_t cost = |
3195 | 13 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
3196 | 13 | metrics_context.finish_report(); |
3197 | | |
3198 | 13 | LOG_INFO("recycle restore jobs finished, cost={}s", cost) |
3199 | 13 | .tag("instance_id", instance_id_) |
3200 | 13 | .tag("num_scanned", num_scanned) |
3201 | 13 | .tag("num_expired", num_expired) |
3202 | 13 | .tag("num_recycled", num_recycled) |
3203 | 13 | .tag("num_aborted", num_aborted); |
3204 | 13 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv Line | Count | Source | 3192 | 13 | DORIS_CLOUD_DEFER { | 3193 | 13 | unregister_recycle_task(task_name); | 3194 | 13 | int64_t cost = | 3195 | 13 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 3196 | 13 | metrics_context.finish_report(); | 3197 | | | 3198 | 13 | LOG_INFO("recycle restore jobs finished, cost={}s", cost) | 3199 | 13 | .tag("instance_id", instance_id_) | 3200 | 13 | .tag("num_scanned", num_scanned) | 3201 | 13 | .tag("num_expired", num_expired) | 3202 | 13 | .tag("num_recycled", num_recycled) | 3203 | 13 | .tag("num_aborted", num_aborted); | 3204 | 13 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv |
3205 | | |
3206 | 13 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
3207 | | |
3208 | 13 | std::vector<std::string_view> restore_job_keys; |
3209 | 41 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { |
3210 | 41 | ++num_scanned; |
3211 | 41 | RestoreJobCloudPB restore_job_pb; |
3212 | 41 | if (!restore_job_pb.ParseFromArray(v.data(), v.size())) { |
3213 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); |
3214 | 0 | return -1; |
3215 | 0 | } |
3216 | 41 | int64_t expiration = |
3217 | 41 | calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts); |
3218 | 41 | VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned |
3219 | 0 | << " num_expired=" << num_expired << " expiration time=" << expiration |
3220 | 0 | << " job expiration=" << restore_job_pb.expired_at_s() |
3221 | 0 | << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s() |
3222 | 0 | << " state=" << restore_job_pb.state(); |
3223 | 41 | int64_t current_time = ::time(nullptr); |
3224 | 41 | if (current_time < expiration) { // not expired |
3225 | 0 | return 0; |
3226 | 0 | } |
3227 | 41 | ++num_expired; |
3228 | | |
3229 | 41 | int64_t tablet_id = restore_job_pb.tablet_id(); |
3230 | 41 | LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_ |
3231 | 41 | << " restore_job_pb=" << restore_job_pb.DebugString(); |
3232 | | |
3233 | 41 | std::unique_ptr<Transaction> txn; |
3234 | 41 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
3235 | 41 | if (err != TxnErrorCode::TXN_OK) { |
3236 | 0 | LOG_WARNING("failed to recycle restore job") |
3237 | 0 | .tag("err", err) |
3238 | 0 | .tag("tablet id", tablet_id) |
3239 | 0 | .tag("instance_id", instance_id_) |
3240 | 0 | .tag("reason", "failed to create txn"); |
3241 | 0 | return -1; |
3242 | 0 | } |
3243 | | |
3244 | 41 | std::string val; |
3245 | 41 | err = txn->get(k, &val); |
3246 | 41 | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it |
3247 | 0 | LOG_INFO("restore job {} has been recycled", tablet_id); |
3248 | 0 | return 0; |
3249 | 0 | } |
3250 | 41 | if (err != TxnErrorCode::TXN_OK) { |
3251 | 0 | LOG_WARNING("failed to get kv"); |
3252 | 0 | return -1; |
3253 | 0 | } |
3254 | 41 | restore_job_pb.Clear(); |
3255 | 41 | if (!restore_job_pb.ParseFromString(val)) { |
3256 | 0 | LOG_WARNING("malformed recycle restore job value").tag("key", hex(k)); |
3257 | 0 | return -1; |
3258 | 0 | } |
3259 | | |
3260 | | // PREPARED or COMMITTED, change state to DROPPED and return |
3261 | 41 | if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED || |
3262 | 41 | restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) { |
3263 | 0 | restore_job_pb.set_state(RestoreJobCloudPB::DROPPED); |
3264 | 0 | restore_job_pb.set_need_recycle_data(true); |
3265 | 0 | txn->put(k, restore_job_pb.SerializeAsString()); |
3266 | 0 | err = txn->commit(); |
3267 | 0 | if (err != TxnErrorCode::TXN_OK) { |
3268 | 0 | LOG_WARNING("failed to commit txn: {}", err); |
3269 | 0 | return -1; |
3270 | 0 | } |
3271 | 0 | num_aborted++; |
3272 | 0 | return 0; |
3273 | 0 | } |
3274 | | |
3275 | | // Change state to RECYCLING |
3276 | 41 | if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) { |
3277 | 21 | restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING); |
3278 | 21 | txn->put(k, restore_job_pb.SerializeAsString()); |
3279 | 21 | err = txn->commit(); |
3280 | 21 | if (err != TxnErrorCode::TXN_OK) { |
3281 | 0 | LOG_WARNING("failed to commit txn: {}", err); |
3282 | 0 | return -1; |
3283 | 0 | } |
3284 | 21 | return 0; |
3285 | 21 | } |
3286 | | |
3287 | 20 | std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0}); |
3288 | 20 | std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0}); |
3289 | | |
3290 | | // Recycle all data associated with the restore job. |
3291 | | // This includes rowsets, segments, and related resources. |
3292 | 20 | bool need_recycle_data = restore_job_pb.need_recycle_data(); |
3293 | 20 | if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) { |
3294 | 0 | LOG_WARNING("failed to recycle tablet") |
3295 | 0 | .tag("tablet_id", tablet_id) |
3296 | 0 | .tag("instance_id", instance_id_); |
3297 | 0 | return -1; |
3298 | 0 | } |
3299 | | |
3300 | | // delete all restore job rowset kv |
3301 | 20 | txn->remove(restore_job_rs_key0, restore_job_rs_key1); |
3302 | | |
3303 | 20 | err = txn->commit(); |
3304 | 20 | if (err != TxnErrorCode::TXN_OK) { |
3305 | 0 | LOG_WARNING("failed to recycle tablet restore job rowset kv") |
3306 | 0 | .tag("err", err) |
3307 | 0 | .tag("tablet id", tablet_id) |
3308 | 0 | .tag("instance_id", instance_id_) |
3309 | 0 | .tag("reason", "failed to commit txn"); |
3310 | 0 | return -1; |
3311 | 0 | } |
3312 | | |
3313 | 20 | metrics_context.total_recycled_num = ++num_recycled; |
3314 | 20 | metrics_context.report(); |
3315 | 20 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); |
3316 | 20 | restore_job_keys.push_back(k); |
3317 | | |
3318 | 20 | LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k) |
3319 | 20 | << " tablet_id=" << tablet_id; |
3320 | 20 | return 0; |
3321 | 20 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 3209 | 41 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { | 3210 | 41 | ++num_scanned; | 3211 | 41 | RestoreJobCloudPB restore_job_pb; | 3212 | 41 | if (!restore_job_pb.ParseFromArray(v.data(), v.size())) { | 3213 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); | 3214 | 0 | return -1; | 3215 | 0 | } | 3216 | 41 | int64_t expiration = | 3217 | 41 | calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts); | 3218 | 41 | VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned | 3219 | 0 | << " num_expired=" << num_expired << " expiration time=" << expiration | 3220 | 0 | << " job expiration=" << restore_job_pb.expired_at_s() | 3221 | 0 | << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s() | 3222 | 0 | << " state=" << restore_job_pb.state(); | 3223 | 41 | int64_t current_time = ::time(nullptr); | 3224 | 41 | if (current_time < expiration) { // not expired | 3225 | 0 | return 0; | 3226 | 0 | } | 3227 | 41 | ++num_expired; | 3228 | | | 3229 | 41 | int64_t tablet_id = restore_job_pb.tablet_id(); | 3230 | 41 | LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_ | 3231 | 41 | << " restore_job_pb=" << restore_job_pb.DebugString(); | 3232 | | | 3233 | 41 | std::unique_ptr<Transaction> txn; | 3234 | 41 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 3235 | 41 | if (err != TxnErrorCode::TXN_OK) { | 3236 | 0 | LOG_WARNING("failed to recycle restore job") | 3237 | 0 | .tag("err", err) | 3238 | 0 | .tag("tablet id", tablet_id) | 3239 | 0 | .tag("instance_id", instance_id_) | 3240 | 0 | .tag("reason", "failed to create txn"); | 3241 | 0 | return -1; | 3242 | 0 | } | 3243 | | | 3244 | 41 | std::string val; | 3245 | 41 | err = txn->get(k, &val); | 3246 | 41 | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it | 3247 | 0 | LOG_INFO("restore job {} has been recycled", tablet_id); | 3248 | 0 | return 0; | 3249 | 0 | } | 3250 | 41 | if (err != TxnErrorCode::TXN_OK) { | 3251 | 0 | LOG_WARNING("failed to get kv"); | 3252 | 0 | return -1; | 3253 | 0 | } | 3254 | 41 | restore_job_pb.Clear(); | 3255 | 41 | if (!restore_job_pb.ParseFromString(val)) { | 3256 | 0 | LOG_WARNING("malformed recycle restore job value").tag("key", hex(k)); | 3257 | 0 | return -1; | 3258 | 0 | } | 3259 | | | 3260 | | // PREPARED or COMMITTED, change state to DROPPED and return | 3261 | 41 | if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED || | 3262 | 41 | restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) { | 3263 | 0 | restore_job_pb.set_state(RestoreJobCloudPB::DROPPED); | 3264 | 0 | restore_job_pb.set_need_recycle_data(true); | 3265 | 0 | txn->put(k, restore_job_pb.SerializeAsString()); | 3266 | 0 | err = txn->commit(); | 3267 | 0 | if (err != TxnErrorCode::TXN_OK) { | 3268 | 0 | LOG_WARNING("failed to commit txn: {}", err); | 3269 | 0 | return -1; | 3270 | 0 | } | 3271 | 0 | num_aborted++; | 3272 | 0 | return 0; | 3273 | 0 | } | 3274 | | | 3275 | | // Change state to RECYCLING | 3276 | 41 | if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) { | 3277 | 21 | restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING); | 3278 | 21 | txn->put(k, restore_job_pb.SerializeAsString()); | 3279 | 21 | err = txn->commit(); | 3280 | 21 | if (err != TxnErrorCode::TXN_OK) { | 3281 | 0 | LOG_WARNING("failed to commit txn: {}", err); | 3282 | 0 | return -1; | 3283 | 0 | } | 3284 | 21 | return 0; | 3285 | 21 | } | 3286 | | | 3287 | 20 | std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0}); | 3288 | 20 | std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0}); | 3289 | | | 3290 | | // Recycle all data associated with the restore job. | 3291 | | // This includes rowsets, segments, and related resources. | 3292 | 20 | bool need_recycle_data = restore_job_pb.need_recycle_data(); | 3293 | 20 | if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) { | 3294 | 0 | LOG_WARNING("failed to recycle tablet") | 3295 | 0 | .tag("tablet_id", tablet_id) | 3296 | 0 | .tag("instance_id", instance_id_); | 3297 | 0 | return -1; | 3298 | 0 | } | 3299 | | | 3300 | | // delete all restore job rowset kv | 3301 | 20 | txn->remove(restore_job_rs_key0, restore_job_rs_key1); | 3302 | | | 3303 | 20 | err = txn->commit(); | 3304 | 20 | if (err != TxnErrorCode::TXN_OK) { | 3305 | 0 | LOG_WARNING("failed to recycle tablet restore job rowset kv") | 3306 | 0 | .tag("err", err) | 3307 | 0 | .tag("tablet id", tablet_id) | 3308 | 0 | .tag("instance_id", instance_id_) | 3309 | 0 | .tag("reason", "failed to commit txn"); | 3310 | 0 | return -1; | 3311 | 0 | } | 3312 | | | 3313 | 20 | metrics_context.total_recycled_num = ++num_recycled; | 3314 | 20 | metrics_context.report(); | 3315 | 20 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); | 3316 | 20 | restore_job_keys.push_back(k); | 3317 | | | 3318 | 20 | LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k) | 3319 | 20 | << " tablet_id=" << tablet_id; | 3320 | 20 | return 0; | 3321 | 20 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
3322 | | |
3323 | 13 | auto loop_done = [&restore_job_keys, this]() -> int { |
3324 | 3 | if (restore_job_keys.empty()) return 0; |
3325 | 1 | DORIS_CLOUD_DEFER { |
3326 | 1 | restore_job_keys.clear(); |
3327 | 1 | }; recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv Line | Count | Source | 3325 | 1 | DORIS_CLOUD_DEFER { | 3326 | 1 | restore_job_keys.clear(); | 3327 | 1 | }; |
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv |
3328 | | |
3329 | 1 | std::unique_ptr<Transaction> txn; |
3330 | 1 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
3331 | 1 | if (err != TxnErrorCode::TXN_OK) { |
3332 | 0 | LOG_WARNING("failed to recycle restore job") |
3333 | 0 | .tag("err", err) |
3334 | 0 | .tag("instance_id", instance_id_) |
3335 | 0 | .tag("reason", "failed to create txn"); |
3336 | 0 | return -1; |
3337 | 0 | } |
3338 | 20 | for (auto& k : restore_job_keys) { |
3339 | 20 | txn->remove(k); |
3340 | 20 | } |
3341 | 1 | err = txn->commit(); |
3342 | 1 | if (err != TxnErrorCode::TXN_OK) { |
3343 | 0 | LOG_WARNING("failed to recycle restore job") |
3344 | 0 | .tag("err", err) |
3345 | 0 | .tag("instance_id", instance_id_) |
3346 | 0 | .tag("reason", "failed to commit txn"); |
3347 | 0 | return -1; |
3348 | 0 | } |
3349 | 1 | return 0; |
3350 | 1 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv Line | Count | Source | 3323 | 3 | auto loop_done = [&restore_job_keys, this]() -> int { | 3324 | 3 | if (restore_job_keys.empty()) return 0; | 3325 | 1 | DORIS_CLOUD_DEFER { | 3326 | 1 | restore_job_keys.clear(); | 3327 | 1 | }; | 3328 | | | 3329 | 1 | std::unique_ptr<Transaction> txn; | 3330 | 1 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 3331 | 1 | if (err != TxnErrorCode::TXN_OK) { | 3332 | 0 | LOG_WARNING("failed to recycle restore job") | 3333 | 0 | .tag("err", err) | 3334 | 0 | .tag("instance_id", instance_id_) | 3335 | 0 | .tag("reason", "failed to create txn"); | 3336 | 0 | return -1; | 3337 | 0 | } | 3338 | 20 | for (auto& k : restore_job_keys) { | 3339 | 20 | txn->remove(k); | 3340 | 20 | } | 3341 | 1 | err = txn->commit(); | 3342 | 1 | if (err != TxnErrorCode::TXN_OK) { | 3343 | 0 | LOG_WARNING("failed to recycle restore job") | 3344 | 0 | .tag("err", err) | 3345 | 0 | .tag("instance_id", instance_id_) | 3346 | 0 | .tag("reason", "failed to commit txn"); | 3347 | 0 | return -1; | 3348 | 0 | } | 3349 | 1 | return 0; | 3350 | 1 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv |
3351 | | |
3352 | 13 | if (config::enable_recycler_stats_metrics) { |
3353 | 0 | scan_and_statistics_restore_jobs(); |
3354 | 0 | } |
3355 | | |
3356 | 13 | return scan_and_recycle(restore_job_key0, restore_job_key1, std::move(recycle_func), |
3357 | 13 | std::move(loop_done)); |
3358 | 13 | } |
3359 | | |
3360 | 5 | int InstanceRecycler::recycle_versioned_rowsets() { |
3361 | 5 | const std::string task_name = "recycle_rowsets"; |
3362 | 5 | int64_t num_scanned = 0; |
3363 | 5 | int64_t num_expired = 0; |
3364 | 5 | int64_t num_prepare = 0; |
3365 | 5 | int64_t num_compacted = 0; |
3366 | 5 | int64_t num_empty_rowset = 0; |
3367 | 5 | size_t total_rowset_key_size = 0; |
3368 | 5 | size_t total_rowset_value_size = 0; |
3369 | 5 | size_t expired_rowset_size = 0; |
3370 | 5 | std::atomic_long num_recycled = 0; |
3371 | 5 | RecyclerMetricsContext metrics_context(instance_id_, task_name); |
3372 | | |
3373 | 5 | RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""}; |
3374 | 5 | RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""}; |
3375 | 5 | std::string recyc_rs_key0; |
3376 | 5 | std::string recyc_rs_key1; |
3377 | 5 | recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0); |
3378 | 5 | recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1); |
3379 | | |
3380 | 5 | LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_); |
3381 | | |
3382 | 5 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
3383 | 5 | register_recycle_task(task_name, start_time); |
3384 | | |
3385 | 5 | DORIS_CLOUD_DEFER { |
3386 | 5 | unregister_recycle_task(task_name); |
3387 | 5 | int64_t cost = |
3388 | 5 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
3389 | 5 | metrics_context.finish_report(); |
3390 | 5 | LOG_WARNING("recycle rowsets finished, cost={}s", cost) |
3391 | 5 | .tag("instance_id", instance_id_) |
3392 | 5 | .tag("num_scanned", num_scanned) |
3393 | 5 | .tag("num_expired", num_expired) |
3394 | 5 | .tag("num_recycled", num_recycled) |
3395 | 5 | .tag("num_recycled.prepare", num_prepare) |
3396 | 5 | .tag("num_recycled.compacted", num_compacted) |
3397 | 5 | .tag("num_recycled.empty_rowset", num_empty_rowset) |
3398 | 5 | .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size) |
3399 | 5 | .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size) |
3400 | 5 | .tag("expired_rowset_meta_size", expired_rowset_size); |
3401 | 5 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv Line | Count | Source | 3385 | 5 | DORIS_CLOUD_DEFER { | 3386 | 5 | unregister_recycle_task(task_name); | 3387 | 5 | int64_t cost = | 3388 | 5 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 3389 | 5 | metrics_context.finish_report(); | 3390 | 5 | LOG_WARNING("recycle rowsets finished, cost={}s", cost) | 3391 | 5 | .tag("instance_id", instance_id_) | 3392 | 5 | .tag("num_scanned", num_scanned) | 3393 | 5 | .tag("num_expired", num_expired) | 3394 | 5 | .tag("num_recycled", num_recycled) | 3395 | 5 | .tag("num_recycled.prepare", num_prepare) | 3396 | 5 | .tag("num_recycled.compacted", num_compacted) | 3397 | 5 | .tag("num_recycled.empty_rowset", num_empty_rowset) | 3398 | 5 | .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size) | 3399 | 5 | .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size) | 3400 | 5 | .tag("expired_rowset_meta_size", expired_rowset_size); | 3401 | 5 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv |
3402 | | |
3403 | 5 | std::vector<std::string> orphan_rowset_keys; |
3404 | | |
3405 | | // Store keys of rowset recycled by background workers |
3406 | 5 | std::mutex async_recycled_rowset_keys_mutex; |
3407 | 5 | std::vector<std::string> async_recycled_rowset_keys; |
3408 | 5 | auto worker_pool = std::make_unique<SimpleThreadPool>( |
3409 | 5 | config::instance_recycler_worker_pool_size, "recycle_rowsets"); |
3410 | 5 | worker_pool->start(); |
3411 | 5 | auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id, |
3412 | 5 | int64_t tablet_id, const std::string& rowset_id) { |
3413 | | // Try to delete rowset data in background thread |
3414 | 0 | int ret = worker_pool->submit_with_timeout( |
3415 | 0 | [&, resource_id, tablet_id, rowset_id, key]() mutable { |
3416 | 0 | if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) { |
3417 | 0 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(key); |
3418 | 0 | return; |
3419 | 0 | } |
3420 | | // The async recycled rowsets are staled format or has not been used, |
3421 | | // so we don't need to check the rowset ref count key. |
3422 | 0 | std::vector<std::string> keys; |
3423 | 0 | { |
3424 | 0 | std::lock_guard lock(async_recycled_rowset_keys_mutex); |
3425 | 0 | async_recycled_rowset_keys.push_back(std::move(key)); |
3426 | 0 | if (async_recycled_rowset_keys.size() > 100) { |
3427 | 0 | keys.swap(async_recycled_rowset_keys); |
3428 | 0 | } |
3429 | 0 | } |
3430 | 0 | if (keys.empty()) return; |
3431 | 0 | if (txn_remove(txn_kv_.get(), keys) != 0) { |
3432 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" |
3433 | 0 | << instance_id_; |
3434 | 0 | } else { |
3435 | 0 | num_recycled.fetch_add(keys.size(), std::memory_order_relaxed); |
3436 | 0 | check_recycle_task(instance_id_, "recycle_rowsets", num_scanned, |
3437 | 0 | num_recycled, start_time); |
3438 | 0 | } |
3439 | 0 | }, Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv |
3440 | 0 | 0); |
3441 | 0 | if (ret == 0) return 0; |
3442 | | // Submit task failed, delete rowset data in current thread |
3443 | 0 | if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) { |
3444 | 0 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(key); |
3445 | 0 | return -1; |
3446 | 0 | } |
3447 | 0 | orphan_rowset_keys.push_back(std::move(key)); |
3448 | 0 | return 0; |
3449 | 0 | }; Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ |
3450 | | |
3451 | 5 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
3452 | | |
3453 | 13 | auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int { |
3454 | 13 | ++num_scanned; |
3455 | 13 | total_rowset_key_size += k.size(); |
3456 | 13 | total_rowset_value_size += v.size(); |
3457 | 13 | RecycleRowsetPB rowset; |
3458 | 13 | if (!rowset.ParseFromArray(v.data(), v.size())) { |
3459 | 0 | LOG_WARNING("malformed recycle rowset").tag("key", hex(k)); |
3460 | 0 | return -1; |
3461 | 0 | } |
3462 | | |
3463 | 13 | int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts); |
3464 | | |
3465 | 13 | VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned |
3466 | 0 | << " num_expired=" << num_expired << " expiration=" << final_expiration |
3467 | 0 | << " RecycleRowsetPB=" << rowset.ShortDebugString(); |
3468 | 13 | int64_t current_time = ::time(nullptr); |
3469 | 13 | if (current_time < final_expiration) { // not expired |
3470 | 0 | return 0; |
3471 | 0 | } |
3472 | 13 | ++num_expired; |
3473 | 13 | expired_rowset_size += v.size(); |
3474 | 13 | if (!rowset.has_type()) { // old version `RecycleRowsetPB` |
3475 | 0 | if (!rowset.has_resource_id()) [[unlikely]] { // impossible |
3476 | | // in old version, keep this key-value pair and it needs to be checked manually |
3477 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); |
3478 | 0 | return -1; |
3479 | 0 | } |
3480 | 0 | if (rowset.resource_id().empty()) [[unlikely]] { |
3481 | | // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv. |
3482 | 0 | LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key=" |
3483 | 0 | << hex(k) << " value=" << proto_to_json(rowset); |
3484 | 0 | orphan_rowset_keys.emplace_back(k); |
3485 | 0 | return -1; |
3486 | 0 | } |
3487 | | // decode rowset_id |
3488 | 0 | auto k1 = k; |
3489 | 0 | k1.remove_prefix(1); |
3490 | 0 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
3491 | 0 | decode_key(&k1, &out); |
3492 | | // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB |
3493 | 0 | const auto& rowset_id = std::get<std::string>(std::get<0>(out[4])); |
3494 | 0 | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ |
3495 | 0 | << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id; |
3496 | 0 | if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(), |
3497 | 0 | rowset.tablet_id(), rowset_id) != 0) { |
3498 | 0 | return -1; |
3499 | 0 | } |
3500 | 0 | return 0; |
3501 | 0 | } |
3502 | | // TODO(plat1ko): check rowset not referenced |
3503 | 13 | auto rowset_meta = rowset.mutable_rowset_meta(); |
3504 | 13 | if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible |
3505 | 0 | if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) { |
3506 | 0 | LOG_INFO("recycle rowset that has empty resource id"); |
3507 | 0 | } else { |
3508 | | // other situations, keep this key-value pair and it needs to be checked manually |
3509 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); |
3510 | 0 | return -1; |
3511 | 0 | } |
3512 | 0 | } |
3513 | 13 | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ |
3514 | 13 | << " tablet_id=" << rowset_meta->tablet_id() |
3515 | 13 | << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=[" |
3516 | 13 | << rowset_meta->start_version() << '-' << rowset_meta->end_version() |
3517 | 13 | << "] txn_id=" << rowset_meta->txn_id() |
3518 | 13 | << " type=" << RecycleRowsetPB_Type_Name(rowset.type()) |
3519 | 13 | << " rowset_meta_size=" << v.size() |
3520 | 13 | << " creation_time=" << rowset_meta->creation_time(); |
3521 | 13 | if (rowset.type() == RecycleRowsetPB::PREPARE) { |
3522 | | // unable to calculate file path, can only be deleted by rowset id prefix |
3523 | 0 | num_prepare += 1; |
3524 | 0 | if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(), |
3525 | 0 | rowset_meta->tablet_id(), |
3526 | 0 | rowset_meta->rowset_id_v2()) != 0) { |
3527 | 0 | return -1; |
3528 | 0 | } |
3529 | 13 | } else { |
3530 | 13 | bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT; |
3531 | 13 | worker_pool->submit( |
3532 | 13 | [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() { |
3533 | 13 | if (recycle_rowset_meta_and_data(k, rowset_meta) != 0) { |
3534 | 0 | return; |
3535 | 0 | } |
3536 | 13 | num_compacted += is_compacted; |
3537 | 13 | num_recycled.fetch_add(1, std::memory_order_relaxed); |
3538 | 13 | if (rowset_meta.num_segments() == 0) { |
3539 | 0 | ++num_empty_rowset; |
3540 | 0 | } |
3541 | 13 | }); recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv Line | Count | Source | 3532 | 13 | [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() { | 3533 | 13 | if (recycle_rowset_meta_and_data(k, rowset_meta) != 0) { | 3534 | 0 | return; | 3535 | 0 | } | 3536 | 13 | num_compacted += is_compacted; | 3537 | 13 | num_recycled.fetch_add(1, std::memory_order_relaxed); | 3538 | 13 | if (rowset_meta.num_segments() == 0) { | 3539 | 0 | ++num_empty_rowset; | 3540 | 0 | } | 3541 | 13 | }); |
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv |
3542 | 13 | } |
3543 | 13 | return 0; |
3544 | 13 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 3453 | 13 | auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int { | 3454 | 13 | ++num_scanned; | 3455 | 13 | total_rowset_key_size += k.size(); | 3456 | 13 | total_rowset_value_size += v.size(); | 3457 | 13 | RecycleRowsetPB rowset; | 3458 | 13 | if (!rowset.ParseFromArray(v.data(), v.size())) { | 3459 | 0 | LOG_WARNING("malformed recycle rowset").tag("key", hex(k)); | 3460 | 0 | return -1; | 3461 | 0 | } | 3462 | | | 3463 | 13 | int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts); | 3464 | | | 3465 | 13 | VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned | 3466 | 0 | << " num_expired=" << num_expired << " expiration=" << final_expiration | 3467 | 0 | << " RecycleRowsetPB=" << rowset.ShortDebugString(); | 3468 | 13 | int64_t current_time = ::time(nullptr); | 3469 | 13 | if (current_time < final_expiration) { // not expired | 3470 | 0 | return 0; | 3471 | 0 | } | 3472 | 13 | ++num_expired; | 3473 | 13 | expired_rowset_size += v.size(); | 3474 | 13 | if (!rowset.has_type()) { // old version `RecycleRowsetPB` | 3475 | 0 | if (!rowset.has_resource_id()) [[unlikely]] { // impossible | 3476 | | // in old version, keep this key-value pair and it needs to be checked manually | 3477 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); | 3478 | 0 | return -1; | 3479 | 0 | } | 3480 | 0 | if (rowset.resource_id().empty()) [[unlikely]] { | 3481 | | // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv. | 3482 | 0 | LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key=" | 3483 | 0 | << hex(k) << " value=" << proto_to_json(rowset); | 3484 | 0 | orphan_rowset_keys.emplace_back(k); | 3485 | 0 | return -1; | 3486 | 0 | } | 3487 | | // decode rowset_id | 3488 | 0 | auto k1 = k; | 3489 | 0 | k1.remove_prefix(1); | 3490 | 0 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 3491 | 0 | decode_key(&k1, &out); | 3492 | | // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB | 3493 | 0 | const auto& rowset_id = std::get<std::string>(std::get<0>(out[4])); | 3494 | 0 | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ | 3495 | 0 | << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id; | 3496 | 0 | if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(), | 3497 | 0 | rowset.tablet_id(), rowset_id) != 0) { | 3498 | 0 | return -1; | 3499 | 0 | } | 3500 | 0 | return 0; | 3501 | 0 | } | 3502 | | // TODO(plat1ko): check rowset not referenced | 3503 | 13 | auto rowset_meta = rowset.mutable_rowset_meta(); | 3504 | 13 | if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible | 3505 | 0 | if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) { | 3506 | 0 | LOG_INFO("recycle rowset that has empty resource id"); | 3507 | 0 | } else { | 3508 | | // other situations, keep this key-value pair and it needs to be checked manually | 3509 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); | 3510 | 0 | return -1; | 3511 | 0 | } | 3512 | 0 | } | 3513 | 13 | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ | 3514 | 13 | << " tablet_id=" << rowset_meta->tablet_id() | 3515 | 13 | << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=[" | 3516 | 13 | << rowset_meta->start_version() << '-' << rowset_meta->end_version() | 3517 | 13 | << "] txn_id=" << rowset_meta->txn_id() | 3518 | 13 | << " type=" << RecycleRowsetPB_Type_Name(rowset.type()) | 3519 | 13 | << " rowset_meta_size=" << v.size() | 3520 | 13 | << " creation_time=" << rowset_meta->creation_time(); | 3521 | 13 | if (rowset.type() == RecycleRowsetPB::PREPARE) { | 3522 | | // unable to calculate file path, can only be deleted by rowset id prefix | 3523 | 0 | num_prepare += 1; | 3524 | 0 | if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(), | 3525 | 0 | rowset_meta->tablet_id(), | 3526 | 0 | rowset_meta->rowset_id_v2()) != 0) { | 3527 | 0 | return -1; | 3528 | 0 | } | 3529 | 13 | } else { | 3530 | 13 | bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT; | 3531 | 13 | worker_pool->submit( | 3532 | 13 | [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() { | 3533 | 13 | if (recycle_rowset_meta_and_data(k, rowset_meta) != 0) { | 3534 | 13 | return; | 3535 | 13 | } | 3536 | 13 | num_compacted += is_compacted; | 3537 | 13 | num_recycled.fetch_add(1, std::memory_order_relaxed); | 3538 | 13 | if (rowset_meta.num_segments() == 0) { | 3539 | 13 | ++num_empty_rowset; | 3540 | 13 | } | 3541 | 13 | }); | 3542 | 13 | } | 3543 | 13 | return 0; | 3544 | 13 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
3545 | | |
3546 | 5 | if (config::enable_recycler_stats_metrics) { |
3547 | 0 | scan_and_statistics_rowsets(); |
3548 | 0 | } |
3549 | | |
3550 | 5 | auto loop_done = [&]() -> int { |
3551 | 4 | if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) { |
3552 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_; |
3553 | 0 | } |
3554 | 4 | orphan_rowset_keys.clear(); |
3555 | 4 | return 0; |
3556 | 4 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv Line | Count | Source | 3550 | 4 | auto loop_done = [&]() -> int { | 3551 | 4 | if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) { | 3552 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_; | 3553 | 0 | } | 3554 | 4 | orphan_rowset_keys.clear(); | 3555 | 4 | return 0; | 3556 | 4 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv |
3557 | | |
3558 | | // recycle_func and loop_done for scan and recycle |
3559 | 5 | int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv), |
3560 | 5 | std::move(loop_done)); |
3561 | | |
3562 | 5 | worker_pool->stop(); |
3563 | | |
3564 | 5 | if (!async_recycled_rowset_keys.empty()) { |
3565 | 0 | if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) { |
3566 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_; |
3567 | 0 | return -1; |
3568 | 0 | } else { |
3569 | 0 | num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed); |
3570 | 0 | } |
3571 | 0 | } |
3572 | 5 | return ret; |
3573 | 5 | } |
3574 | | |
3575 | | int InstanceRecycler::recycle_rowset_meta_and_data(std::string_view recycle_rowset_key, |
3576 | 43 | const RowsetMetaCloudPB& rowset_meta) { |
3577 | 43 | constexpr int MAX_RETRY = 10; |
3578 | 43 | int64_t tablet_id = rowset_meta.tablet_id(); |
3579 | 43 | const std::string& rowset_id = rowset_meta.rowset_id_v2(); |
3580 | 43 | std::string_view reference_instance_id = instance_id_; |
3581 | 43 | if (rowset_meta.has_reference_instance_id()) { |
3582 | 5 | reference_instance_id = rowset_meta.reference_instance_id(); |
3583 | 5 | } |
3584 | | |
3585 | 43 | AnnotateTag tablet_id_tag("tablet_id", tablet_id); |
3586 | 43 | AnnotateTag rowset_id_tag("rowset_id", rowset_id); |
3587 | 43 | AnnotateTag rowset_key_tag("recycle_rowset_key", hex(recycle_rowset_key)); |
3588 | 43 | AnnotateTag instance_id_tag("instance_id", instance_id_); |
3589 | 43 | AnnotateTag ref_instance_id_tag("ref_instance_id", reference_instance_id); |
3590 | 47 | for (int i = 0; i < MAX_RETRY; ++i) { |
3591 | 47 | std::unique_ptr<Transaction> txn; |
3592 | 47 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
3593 | 47 | if (err != TxnErrorCode::TXN_OK) { |
3594 | 0 | LOG_WARNING("failed to create txn").tag("err", err); |
3595 | 0 | return -1; |
3596 | 0 | } |
3597 | | |
3598 | 47 | std::string rowset_ref_count_key = |
3599 | 47 | versioned::data_rowset_ref_count_key({reference_instance_id, tablet_id, rowset_id}); |
3600 | 47 | int64_t ref_count = 0; |
3601 | 47 | { |
3602 | 47 | std::string value; |
3603 | 47 | TxnErrorCode err = txn->get(rowset_ref_count_key, &value); |
3604 | 47 | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { |
3605 | | // This is the old version rowset, we could recycle it directly. |
3606 | 6 | ref_count = 1; |
3607 | 41 | } else if (err != TxnErrorCode::TXN_OK) { |
3608 | 0 | LOG_WARNING("failed to get rowset ref count key").tag("err", err); |
3609 | 0 | return -1; |
3610 | 41 | } else if (!txn->decode_atomic_int(value, &ref_count)) { |
3611 | 0 | LOG_WARNING("failed to decode rowset data ref count").tag("value", hex(value)); |
3612 | 0 | return -1; |
3613 | 0 | } |
3614 | 47 | } |
3615 | | |
3616 | 47 | if (ref_count == 1) { |
3617 | | // It would not be added since it is recycling. |
3618 | 34 | if (delete_rowset_data(rowset_meta) != 0) { |
3619 | 0 | LOG_WARNING("failed to delete rowset data"); |
3620 | 0 | return -1; |
3621 | 0 | } |
3622 | | |
3623 | | // Reset the transaction to avoid timeout. |
3624 | 34 | err = txn_kv_->create_txn(&txn); |
3625 | 34 | if (err != TxnErrorCode::TXN_OK) { |
3626 | 0 | LOG_WARNING("failed to create txn").tag("err", err); |
3627 | 0 | return -1; |
3628 | 0 | } |
3629 | 34 | txn->remove(rowset_ref_count_key); |
3630 | 34 | LOG_INFO("delete rowset data ref count key") |
3631 | 34 | .tag("txn_id", rowset_meta.txn_id()) |
3632 | 34 | .tag("ref_count_key", hex(rowset_ref_count_key)); |
3633 | 34 | } else { |
3634 | | // Decrease the rowset ref count. |
3635 | | // |
3636 | | // The read conflict range will protect the rowset ref count key, if any conflict happens, |
3637 | | // we will retry and check whether the rowset ref count is 1 and the data need to be deleted. |
3638 | 13 | txn->atomic_add(rowset_ref_count_key, -1); |
3639 | 13 | LOG_INFO("decrease rowset data ref count") |
3640 | 13 | .tag("txn_id", rowset_meta.txn_id()) |
3641 | 13 | .tag("ref_count", ref_count - 1) |
3642 | 13 | .tag("ref_count_key", hex(rowset_ref_count_key)); |
3643 | 13 | } |
3644 | | |
3645 | 47 | txn->remove(recycle_rowset_key); |
3646 | 47 | err = txn->commit(); |
3647 | 47 | if (err == TxnErrorCode::TXN_CONFLICT) { // unlikely |
3648 | | // The rowset ref count key has been changed, we need to retry. |
3649 | 4 | VLOG_DEBUG << "decrease rowset ref count but txn conflict, retry" |
3650 | 0 | << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id |
3651 | 0 | << ", ref_count=" << ref_count << ", retry=" << i; |
3652 | 4 | std::this_thread::sleep_for(std::chrono::milliseconds(500)); |
3653 | 4 | continue; |
3654 | 43 | } else if (err != TxnErrorCode::TXN_OK) { |
3655 | 0 | LOG_WARNING("failed to recycle rowset meta and data").tag("err", err); |
3656 | 0 | return -1; |
3657 | 0 | } |
3658 | 43 | LOG_INFO("recycle rowset meta and data success"); |
3659 | 43 | return 0; |
3660 | 47 | } |
3661 | 0 | LOG_WARNING("failed to recycle rowset meta and data after retry") |
3662 | 0 | .tag("tablet_id", tablet_id) |
3663 | 0 | .tag("rowset_id", rowset_id) |
3664 | 0 | .tag("retry", MAX_RETRY); |
3665 | 0 | return -1; |
3666 | 43 | } |
3667 | | |
3668 | 18 | int InstanceRecycler::recycle_tmp_rowsets() { |
3669 | 18 | const std::string task_name = "recycle_tmp_rowsets"; |
3670 | 18 | int64_t num_scanned = 0; |
3671 | 18 | int64_t num_expired = 0; |
3672 | 18 | std::atomic_long num_recycled = 0; |
3673 | 18 | size_t expired_rowset_size = 0; |
3674 | 18 | size_t total_rowset_key_size = 0; |
3675 | 18 | size_t total_rowset_value_size = 0; |
3676 | 18 | RecyclerMetricsContext metrics_context(instance_id_, task_name); |
3677 | | |
3678 | 18 | MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0}; |
3679 | 18 | MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0}; |
3680 | 18 | std::string tmp_rs_key0; |
3681 | 18 | std::string tmp_rs_key1; |
3682 | 18 | meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0); |
3683 | 18 | meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1); |
3684 | | |
3685 | 18 | LOG_WARNING("begin to recycle tmp rowsets").tag("instance_id", instance_id_); |
3686 | | |
3687 | 18 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
3688 | 18 | register_recycle_task(task_name, start_time); |
3689 | | |
3690 | 18 | DORIS_CLOUD_DEFER { |
3691 | 18 | unregister_recycle_task(task_name); |
3692 | 18 | int64_t cost = |
3693 | 18 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
3694 | 18 | metrics_context.finish_report(); |
3695 | 18 | LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost) |
3696 | 18 | .tag("instance_id", instance_id_) |
3697 | 18 | .tag("num_scanned", num_scanned) |
3698 | 18 | .tag("num_expired", num_expired) |
3699 | 18 | .tag("num_recycled", num_recycled) |
3700 | 18 | .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size) |
3701 | 18 | .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size) |
3702 | 18 | .tag("expired_rowset_meta_size_recycled", expired_rowset_size); |
3703 | 18 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv Line | Count | Source | 3690 | 14 | DORIS_CLOUD_DEFER { | 3691 | 14 | unregister_recycle_task(task_name); | 3692 | 14 | int64_t cost = | 3693 | 14 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 3694 | 14 | metrics_context.finish_report(); | 3695 | 14 | LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost) | 3696 | 14 | .tag("instance_id", instance_id_) | 3697 | 14 | .tag("num_scanned", num_scanned) | 3698 | 14 | .tag("num_expired", num_expired) | 3699 | 14 | .tag("num_recycled", num_recycled) | 3700 | 14 | .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size) | 3701 | 14 | .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size) | 3702 | 14 | .tag("expired_rowset_meta_size_recycled", expired_rowset_size); | 3703 | 14 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv Line | Count | Source | 3690 | 4 | DORIS_CLOUD_DEFER { | 3691 | 4 | unregister_recycle_task(task_name); | 3692 | 4 | int64_t cost = | 3693 | 4 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 3694 | 4 | metrics_context.finish_report(); | 3695 | 4 | LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost) | 3696 | 4 | .tag("instance_id", instance_id_) | 3697 | 4 | .tag("num_scanned", num_scanned) | 3698 | 4 | .tag("num_expired", num_expired) | 3699 | 4 | .tag("num_recycled", num_recycled) | 3700 | 4 | .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size) | 3701 | 4 | .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size) | 3702 | 4 | .tag("expired_rowset_meta_size_recycled", expired_rowset_size); | 3703 | 4 | }; |
|
3704 | | |
3705 | | // Elements in `tmp_rowset_keys` has the same lifetime as `it` |
3706 | | |
3707 | 18 | std::vector<std::string> tmp_rowset_keys; |
3708 | 18 | std::vector<std::string> tmp_rowset_ref_count_keys; |
3709 | | |
3710 | | // rowset_id -> rowset_meta |
3711 | | // store tmp_rowset id and meta for statistics rs size when delete |
3712 | 18 | std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets; |
3713 | 18 | auto worker_pool = std::make_unique<SimpleThreadPool>( |
3714 | 18 | config::instance_recycler_worker_pool_size, "recycle_tmp_rowsets"); |
3715 | 18 | worker_pool->start(); |
3716 | | |
3717 | 18 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
3718 | | |
3719 | 18 | auto handle_rowset_kv = [&num_scanned, &num_expired, &tmp_rowset_keys, &tmp_rowsets, |
3720 | 18 | &expired_rowset_size, &total_rowset_key_size, &total_rowset_value_size, |
3721 | 18 | &earlest_ts, &tmp_rowset_ref_count_keys, |
3722 | 57.0k | this](std::string_view k, std::string_view v) -> int { |
3723 | 57.0k | ++num_scanned; |
3724 | 57.0k | total_rowset_key_size += k.size(); |
3725 | 57.0k | total_rowset_value_size += v.size(); |
3726 | 57.0k | doris::RowsetMetaCloudPB rowset; |
3727 | 57.0k | if (!rowset.ParseFromArray(v.data(), v.size())) { |
3728 | 0 | LOG_WARNING("malformed rowset meta").tag("key", hex(k)); |
3729 | 0 | return -1; |
3730 | 0 | } |
3731 | 57.0k | int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts); |
3732 | 57.0k | VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned |
3733 | 0 | << " num_expired=" << num_expired << " expiration=" << expiration |
3734 | 0 | << " txn_expiration=" << rowset.txn_expiration() |
3735 | 0 | << " rowset_creation_time=" << rowset.creation_time(); |
3736 | 57.0k | int64_t current_time = ::time(nullptr); |
3737 | 57.0k | if (current_time < expiration) { // not expired |
3738 | 0 | return 0; |
3739 | 0 | } |
3740 | | |
3741 | 57.0k | DCHECK_GT(rowset.txn_id(), 0) |
3742 | 0 | << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString(); |
3743 | 57.0k | if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) { |
3744 | 2.00k | LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id=" |
3745 | 2.00k | << instance_id_ << " tablet_id=" << rowset.tablet_id() |
3746 | 2.00k | << " rowset_id=" << rowset.rowset_id_v2() << " version=[" |
3747 | 2.00k | << rowset.start_version() << '-' << rowset.end_version() |
3748 | 2.00k | << "] txn_id=" << rowset.txn_id() |
3749 | 2.00k | << " creation_time=" << rowset.creation_time() << " expiration=" << expiration |
3750 | 2.00k | << " txn_expiration=" << rowset.txn_expiration(); |
3751 | 2.00k | return 0; |
3752 | 2.00k | } |
3753 | | |
3754 | 55.0k | ++num_expired; |
3755 | 55.0k | expired_rowset_size += v.size(); |
3756 | 55.0k | if (!rowset.has_resource_id()) { |
3757 | 4.00k | if (rowset.num_segments() > 0) [[unlikely]] { // impossible |
3758 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", k); |
3759 | 0 | return -1; |
3760 | 0 | } |
3761 | | // might be a delete pred rowset |
3762 | 4.00k | tmp_rowset_keys.emplace_back(k); |
3763 | 4.00k | return 0; |
3764 | 4.00k | } |
3765 | | // TODO(plat1ko): check rowset not referenced |
3766 | 51.0k | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ |
3767 | 51.0k | << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2() |
3768 | 51.0k | << " version=[" << rowset.start_version() << '-' << rowset.end_version() |
3769 | 51.0k | << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size() |
3770 | 51.0k | << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned |
3771 | 51.0k | << " num_expired=" << num_expired; |
3772 | | |
3773 | 51.0k | tmp_rowset_keys.emplace_back(k.data(), k.size()); |
3774 | | // Remove the rowset ref count key directly since it has not been used. |
3775 | 51.0k | std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key( |
3776 | 51.0k | {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()}); |
3777 | 51.0k | LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_ |
3778 | 51.0k | << "key=" << hex(rowset_ref_count_key); |
3779 | 51.0k | tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key); |
3780 | | |
3781 | 51.0k | tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset)); |
3782 | 51.0k | return 0; |
3783 | 55.0k | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 3722 | 51.0k | this](std::string_view k, std::string_view v) -> int { | 3723 | 51.0k | ++num_scanned; | 3724 | 51.0k | total_rowset_key_size += k.size(); | 3725 | 51.0k | total_rowset_value_size += v.size(); | 3726 | 51.0k | doris::RowsetMetaCloudPB rowset; | 3727 | 51.0k | if (!rowset.ParseFromArray(v.data(), v.size())) { | 3728 | 0 | LOG_WARNING("malformed rowset meta").tag("key", hex(k)); | 3729 | 0 | return -1; | 3730 | 0 | } | 3731 | 51.0k | int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts); | 3732 | 51.0k | VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned | 3733 | 0 | << " num_expired=" << num_expired << " expiration=" << expiration | 3734 | 0 | << " txn_expiration=" << rowset.txn_expiration() | 3735 | 0 | << " rowset_creation_time=" << rowset.creation_time(); | 3736 | 51.0k | int64_t current_time = ::time(nullptr); | 3737 | 51.0k | if (current_time < expiration) { // not expired | 3738 | 0 | return 0; | 3739 | 0 | } | 3740 | | | 3741 | 51.0k | DCHECK_GT(rowset.txn_id(), 0) | 3742 | 0 | << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString(); | 3743 | 51.0k | if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) { | 3744 | 0 | LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id=" | 3745 | 0 | << instance_id_ << " tablet_id=" << rowset.tablet_id() | 3746 | 0 | << " rowset_id=" << rowset.rowset_id_v2() << " version=[" | 3747 | 0 | << rowset.start_version() << '-' << rowset.end_version() | 3748 | 0 | << "] txn_id=" << rowset.txn_id() | 3749 | 0 | << " creation_time=" << rowset.creation_time() << " expiration=" << expiration | 3750 | 0 | << " txn_expiration=" << rowset.txn_expiration(); | 3751 | 0 | return 0; | 3752 | 0 | } | 3753 | | | 3754 | 51.0k | ++num_expired; | 3755 | 51.0k | expired_rowset_size += v.size(); | 3756 | 51.0k | if (!rowset.has_resource_id()) { | 3757 | 0 | if (rowset.num_segments() > 0) [[unlikely]] { // impossible | 3758 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", k); | 3759 | 0 | return -1; | 3760 | 0 | } | 3761 | | // might be a delete pred rowset | 3762 | 0 | tmp_rowset_keys.emplace_back(k); | 3763 | 0 | return 0; | 3764 | 0 | } | 3765 | | // TODO(plat1ko): check rowset not referenced | 3766 | 51.0k | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ | 3767 | 51.0k | << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2() | 3768 | 51.0k | << " version=[" << rowset.start_version() << '-' << rowset.end_version() | 3769 | 51.0k | << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size() | 3770 | 51.0k | << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned | 3771 | 51.0k | << " num_expired=" << num_expired; | 3772 | | | 3773 | 51.0k | tmp_rowset_keys.emplace_back(k.data(), k.size()); | 3774 | | // Remove the rowset ref count key directly since it has not been used. | 3775 | 51.0k | std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key( | 3776 | 51.0k | {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()}); | 3777 | 51.0k | LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_ | 3778 | 51.0k | << "key=" << hex(rowset_ref_count_key); | 3779 | 51.0k | tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key); | 3780 | | | 3781 | 51.0k | tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset)); | 3782 | 51.0k | return 0; | 3783 | 51.0k | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 3722 | 6.00k | this](std::string_view k, std::string_view v) -> int { | 3723 | 6.00k | ++num_scanned; | 3724 | 6.00k | total_rowset_key_size += k.size(); | 3725 | 6.00k | total_rowset_value_size += v.size(); | 3726 | 6.00k | doris::RowsetMetaCloudPB rowset; | 3727 | 6.00k | if (!rowset.ParseFromArray(v.data(), v.size())) { | 3728 | 0 | LOG_WARNING("malformed rowset meta").tag("key", hex(k)); | 3729 | 0 | return -1; | 3730 | 0 | } | 3731 | 6.00k | int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts); | 3732 | 6.00k | VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned | 3733 | 0 | << " num_expired=" << num_expired << " expiration=" << expiration | 3734 | 0 | << " txn_expiration=" << rowset.txn_expiration() | 3735 | 0 | << " rowset_creation_time=" << rowset.creation_time(); | 3736 | 6.00k | int64_t current_time = ::time(nullptr); | 3737 | 6.00k | if (current_time < expiration) { // not expired | 3738 | 0 | return 0; | 3739 | 0 | } | 3740 | | | 3741 | 6.00k | DCHECK_GT(rowset.txn_id(), 0) | 3742 | 0 | << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString(); | 3743 | 6.00k | if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) { | 3744 | 2.00k | LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id=" | 3745 | 2.00k | << instance_id_ << " tablet_id=" << rowset.tablet_id() | 3746 | 2.00k | << " rowset_id=" << rowset.rowset_id_v2() << " version=[" | 3747 | 2.00k | << rowset.start_version() << '-' << rowset.end_version() | 3748 | 2.00k | << "] txn_id=" << rowset.txn_id() | 3749 | 2.00k | << " creation_time=" << rowset.creation_time() << " expiration=" << expiration | 3750 | 2.00k | << " txn_expiration=" << rowset.txn_expiration(); | 3751 | 2.00k | return 0; | 3752 | 2.00k | } | 3753 | | | 3754 | 4.00k | ++num_expired; | 3755 | 4.00k | expired_rowset_size += v.size(); | 3756 | 4.00k | if (!rowset.has_resource_id()) { | 3757 | 4.00k | if (rowset.num_segments() > 0) [[unlikely]] { // impossible | 3758 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", k); | 3759 | 0 | return -1; | 3760 | 0 | } | 3761 | | // might be a delete pred rowset | 3762 | 4.00k | tmp_rowset_keys.emplace_back(k); | 3763 | 4.00k | return 0; | 3764 | 4.00k | } | 3765 | | // TODO(plat1ko): check rowset not referenced | 3766 | 0 | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ | 3767 | 0 | << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2() | 3768 | 0 | << " version=[" << rowset.start_version() << '-' << rowset.end_version() | 3769 | 0 | << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size() | 3770 | 0 | << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned | 3771 | 0 | << " num_expired=" << num_expired; | 3772 | |
| 3773 | 0 | tmp_rowset_keys.emplace_back(k.data(), k.size()); | 3774 | | // Remove the rowset ref count key directly since it has not been used. | 3775 | 0 | std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key( | 3776 | 0 | {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()}); | 3777 | 0 | LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_ | 3778 | 0 | << "key=" << hex(rowset_ref_count_key); | 3779 | 0 | tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key); | 3780 | |
| 3781 | 0 | tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset)); | 3782 | 0 | return 0; | 3783 | 4.00k | }; |
|
3784 | | |
3785 | | // TODO bacth delete |
3786 | 51.0k | auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) { |
3787 | 51.0k | std::string dbm_start_key = |
3788 | 51.0k | versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id}); |
3789 | 51.0k | std::string dbm_end_key = dbm_start_key; |
3790 | 51.0k | encode_int64(INT64_MAX, &dbm_end_key); |
3791 | 51.0k | auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key); |
3792 | 51.0k | if (ret != 0) { |
3793 | 0 | LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id=" |
3794 | 0 | << instance_id_ << ", tablet_id=" << tablet_id |
3795 | 0 | << ", rowset_id=" << rowset_id; |
3796 | 0 | } |
3797 | 51.0k | return ret; |
3798 | 51.0k | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 3786 | 51.0k | auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) { | 3787 | 51.0k | std::string dbm_start_key = | 3788 | 51.0k | versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id}); | 3789 | 51.0k | std::string dbm_end_key = dbm_start_key; | 3790 | 51.0k | encode_int64(INT64_MAX, &dbm_end_key); | 3791 | 51.0k | auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key); | 3792 | 51.0k | if (ret != 0) { | 3793 | 0 | LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id=" | 3794 | 0 | << instance_id_ << ", tablet_id=" << tablet_id | 3795 | 0 | << ", rowset_id=" << rowset_id; | 3796 | 0 | } | 3797 | 51.0k | return ret; | 3798 | 51.0k | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE |
3799 | | |
3800 | 51.0k | auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) { |
3801 | 51.0k | auto delete_bitmap_start = |
3802 | 51.0k | meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0}); |
3803 | 51.0k | auto delete_bitmap_end = |
3804 | 51.0k | meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX}); |
3805 | 51.0k | auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end); |
3806 | 51.0k | if (ret != 0) { |
3807 | 0 | LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_ |
3808 | 0 | << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id; |
3809 | 0 | } |
3810 | 51.0k | return ret; |
3811 | 51.0k | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 3800 | 51.0k | auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) { | 3801 | 51.0k | auto delete_bitmap_start = | 3802 | 51.0k | meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0}); | 3803 | 51.0k | auto delete_bitmap_end = | 3804 | 51.0k | meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX}); | 3805 | 51.0k | auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end); | 3806 | 51.0k | if (ret != 0) { | 3807 | 0 | LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_ | 3808 | 0 | << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id; | 3809 | 0 | } | 3810 | 51.0k | return ret; | 3811 | 51.0k | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE |
3812 | | |
3813 | 18 | auto loop_done = [&]() -> int { |
3814 | 10 | DORIS_CLOUD_DEFER { |
3815 | 10 | tmp_rowset_keys.clear(); |
3816 | 10 | tmp_rowsets.clear(); |
3817 | 10 | tmp_rowset_ref_count_keys.clear(); |
3818 | 10 | }; recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE_clEv Line | Count | Source | 3814 | 7 | DORIS_CLOUD_DEFER { | 3815 | 7 | tmp_rowset_keys.clear(); | 3816 | 7 | tmp_rowsets.clear(); | 3817 | 7 | tmp_rowset_ref_count_keys.clear(); | 3818 | 7 | }; |
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE_clEv Line | Count | Source | 3814 | 3 | DORIS_CLOUD_DEFER { | 3815 | 3 | tmp_rowset_keys.clear(); | 3816 | 3 | tmp_rowsets.clear(); | 3817 | 3 | tmp_rowset_ref_count_keys.clear(); | 3818 | 3 | }; |
|
3819 | 10 | worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys, |
3820 | 10 | tmp_rowsets_to_delete = tmp_rowsets, |
3821 | 10 | tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() { |
3822 | 10 | if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET, |
3823 | 10 | metrics_context) != 0) { |
3824 | 0 | LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_; |
3825 | 0 | return; |
3826 | 0 | } |
3827 | 51.0k | for (const auto& [_, rs] : tmp_rowsets_to_delete) { |
3828 | 51.0k | if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { |
3829 | 0 | LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs=" |
3830 | 0 | << rs.ShortDebugString(); |
3831 | 0 | return; |
3832 | 0 | } |
3833 | 51.0k | if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { |
3834 | 0 | LOG(WARNING) << "failed to delete delete bitmap kv, rs=" |
3835 | 0 | << rs.ShortDebugString(); |
3836 | 0 | return; |
3837 | 0 | } |
3838 | 51.0k | } |
3839 | 10 | if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) { |
3840 | 0 | LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_; |
3841 | 0 | return; |
3842 | 0 | } |
3843 | 10 | if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) { |
3844 | 0 | LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_; |
3845 | 0 | return; |
3846 | 0 | } |
3847 | 10 | num_recycled += tmp_rowset_keys.size(); |
3848 | 10 | return; |
3849 | 10 | }); recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE0_clEv Line | Count | Source | 3821 | 7 | tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() { | 3822 | 7 | if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET, | 3823 | 7 | metrics_context) != 0) { | 3824 | 0 | LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_; | 3825 | 0 | return; | 3826 | 0 | } | 3827 | 51.0k | for (const auto& [_, rs] : tmp_rowsets_to_delete) { | 3828 | 51.0k | if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { | 3829 | 0 | LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs=" | 3830 | 0 | << rs.ShortDebugString(); | 3831 | 0 | return; | 3832 | 0 | } | 3833 | 51.0k | if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { | 3834 | 0 | LOG(WARNING) << "failed to delete delete bitmap kv, rs=" | 3835 | 0 | << rs.ShortDebugString(); | 3836 | 0 | return; | 3837 | 0 | } | 3838 | 51.0k | } | 3839 | 7 | if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) { | 3840 | 0 | LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_; | 3841 | 0 | return; | 3842 | 0 | } | 3843 | 7 | if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) { | 3844 | 0 | LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_; | 3845 | 0 | return; | 3846 | 0 | } | 3847 | 7 | num_recycled += tmp_rowset_keys.size(); | 3848 | 7 | return; | 3849 | 7 | }); |
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE0_clEv Line | Count | Source | 3821 | 3 | tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() { | 3822 | 3 | if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET, | 3823 | 3 | metrics_context) != 0) { | 3824 | 0 | LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_; | 3825 | 0 | return; | 3826 | 0 | } | 3827 | 3 | for (const auto& [_, rs] : tmp_rowsets_to_delete) { | 3828 | 0 | if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { | 3829 | 0 | LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs=" | 3830 | 0 | << rs.ShortDebugString(); | 3831 | 0 | return; | 3832 | 0 | } | 3833 | 0 | if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { | 3834 | 0 | LOG(WARNING) << "failed to delete delete bitmap kv, rs=" | 3835 | 0 | << rs.ShortDebugString(); | 3836 | 0 | return; | 3837 | 0 | } | 3838 | 0 | } | 3839 | 3 | if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) { | 3840 | 0 | LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_; | 3841 | 0 | return; | 3842 | 0 | } | 3843 | 3 | if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) { | 3844 | 0 | LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_; | 3845 | 0 | return; | 3846 | 0 | } | 3847 | 3 | num_recycled += tmp_rowset_keys.size(); | 3848 | 3 | return; | 3849 | 3 | }); |
|
3850 | 10 | return 0; |
3851 | 10 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEv Line | Count | Source | 3813 | 7 | auto loop_done = [&]() -> int { | 3814 | 7 | DORIS_CLOUD_DEFER { | 3815 | 7 | tmp_rowset_keys.clear(); | 3816 | 7 | tmp_rowsets.clear(); | 3817 | 7 | tmp_rowset_ref_count_keys.clear(); | 3818 | 7 | }; | 3819 | 7 | worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys, | 3820 | 7 | tmp_rowsets_to_delete = tmp_rowsets, | 3821 | 7 | tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() { | 3822 | 7 | if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET, | 3823 | 7 | metrics_context) != 0) { | 3824 | 7 | LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_; | 3825 | 7 | return; | 3826 | 7 | } | 3827 | 7 | for (const auto& [_, rs] : tmp_rowsets_to_delete) { | 3828 | 7 | if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { | 3829 | 7 | LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs=" | 3830 | 7 | << rs.ShortDebugString(); | 3831 | 7 | return; | 3832 | 7 | } | 3833 | 7 | if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { | 3834 | 7 | LOG(WARNING) << "failed to delete delete bitmap kv, rs=" | 3835 | 7 | << rs.ShortDebugString(); | 3836 | 7 | return; | 3837 | 7 | } | 3838 | 7 | } | 3839 | 7 | if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) { | 3840 | 7 | LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_; | 3841 | 7 | return; | 3842 | 7 | } | 3843 | 7 | if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) { | 3844 | 7 | LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_; | 3845 | 7 | return; | 3846 | 7 | } | 3847 | 7 | num_recycled += tmp_rowset_keys.size(); | 3848 | 7 | return; | 3849 | 7 | }); | 3850 | 7 | return 0; | 3851 | 7 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEv Line | Count | Source | 3813 | 3 | auto loop_done = [&]() -> int { | 3814 | 3 | DORIS_CLOUD_DEFER { | 3815 | 3 | tmp_rowset_keys.clear(); | 3816 | 3 | tmp_rowsets.clear(); | 3817 | 3 | tmp_rowset_ref_count_keys.clear(); | 3818 | 3 | }; | 3819 | 3 | worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys, | 3820 | 3 | tmp_rowsets_to_delete = tmp_rowsets, | 3821 | 3 | tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() { | 3822 | 3 | if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET, | 3823 | 3 | metrics_context) != 0) { | 3824 | 3 | LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_; | 3825 | 3 | return; | 3826 | 3 | } | 3827 | 3 | for (const auto& [_, rs] : tmp_rowsets_to_delete) { | 3828 | 3 | if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { | 3829 | 3 | LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs=" | 3830 | 3 | << rs.ShortDebugString(); | 3831 | 3 | return; | 3832 | 3 | } | 3833 | 3 | if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { | 3834 | 3 | LOG(WARNING) << "failed to delete delete bitmap kv, rs=" | 3835 | 3 | << rs.ShortDebugString(); | 3836 | 3 | return; | 3837 | 3 | } | 3838 | 3 | } | 3839 | 3 | if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) { | 3840 | 3 | LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_; | 3841 | 3 | return; | 3842 | 3 | } | 3843 | 3 | if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) { | 3844 | 3 | LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_; | 3845 | 3 | return; | 3846 | 3 | } | 3847 | 3 | num_recycled += tmp_rowset_keys.size(); | 3848 | 3 | return; | 3849 | 3 | }); | 3850 | 3 | return 0; | 3851 | 3 | }; |
|
3852 | | |
3853 | 18 | if (config::enable_recycler_stats_metrics) { |
3854 | 0 | scan_and_statistics_tmp_rowsets(); |
3855 | 0 | } |
3856 | | // recycle_func and loop_done for scan and recycle |
3857 | 18 | int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_rowset_kv), |
3858 | 18 | std::move(loop_done)); |
3859 | | |
3860 | 18 | worker_pool->stop(); |
3861 | 18 | return ret; |
3862 | 18 | } |
3863 | | |
3864 | | int InstanceRecycler::scan_and_recycle( |
3865 | | std::string begin, std::string_view end, |
3866 | | std::function<int(std::string_view k, std::string_view v)> recycle_func, |
3867 | 235 | std::function<int()> loop_done) { |
3868 | 235 | LOG(INFO) << "begin scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) << ")"; |
3869 | 235 | int ret = 0; |
3870 | 235 | int64_t cnt = 0; |
3871 | 235 | int get_range_retried = 0; |
3872 | 235 | std::string err; |
3873 | 235 | DORIS_CLOUD_DEFER_COPY(begin, end) { |
3874 | 235 | LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) |
3875 | 235 | << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried |
3876 | 235 | << " ret=" << ret << " err=" << err; |
3877 | 235 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv Line | Count | Source | 3873 | 216 | DORIS_CLOUD_DEFER_COPY(begin, end) { | 3874 | 216 | LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) | 3875 | 216 | << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried | 3876 | 216 | << " ret=" << ret << " err=" << err; | 3877 | 216 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv Line | Count | Source | 3873 | 19 | DORIS_CLOUD_DEFER_COPY(begin, end) { | 3874 | 19 | LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) | 3875 | 19 | << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried | 3876 | 19 | << " ret=" << ret << " err=" << err; | 3877 | 19 | }; |
|
3878 | | |
3879 | 235 | std::unique_ptr<RangeGetIterator> it; |
3880 | 263 | do { |
3881 | 263 | if (get_range_retried > 1000) { |
3882 | 0 | err = "txn_get exceeds max retry, may not scan all keys"; |
3883 | 0 | ret = -1; |
3884 | 0 | return -1; |
3885 | 0 | } |
3886 | 263 | int get_ret = txn_get(txn_kv_.get(), begin, end, it); |
3887 | 263 | if (get_ret != 0) { // txn kv may complain "Request for future version" |
3888 | 0 | LOG(WARNING) << "failed to get kv, range=[" << hex(begin) << "," << hex(end) |
3889 | 0 | << ") num_scanned=" << cnt << " txn_get_ret=" << get_ret |
3890 | 0 | << " get_range_retried=" << get_range_retried; |
3891 | 0 | ++get_range_retried; |
3892 | 0 | std::this_thread::sleep_for(std::chrono::milliseconds(500)); |
3893 | 0 | continue; // try again |
3894 | 0 | } |
3895 | 263 | if (!it->has_next()) { |
3896 | 116 | LOG(INFO) << "no keys in the given range=[" << hex(begin) << "," << hex(end) << ")"; |
3897 | 116 | break; // scan finished |
3898 | 116 | } |
3899 | 99.6k | while (it->has_next()) { |
3900 | 99.4k | ++cnt; |
3901 | | // recycle corresponding resources |
3902 | 99.4k | auto [k, v] = it->next(); |
3903 | 99.4k | if (!it->has_next()) { |
3904 | 147 | begin = k; |
3905 | 147 | VLOG_DEBUG << "iterator has no more kvs. key=" << hex(k); |
3906 | 147 | } |
3907 | | // if we want to continue scanning, the recycle_func should not return non-zero |
3908 | 99.4k | if (recycle_func(k, v) != 0) { |
3909 | 4.00k | err = "recycle_func error"; |
3910 | 4.00k | ret = -1; |
3911 | 4.00k | } |
3912 | 99.4k | } |
3913 | 147 | begin.push_back('\x00'); // Update to next smallest key for iteration |
3914 | | // if we want to continue scanning, the recycle_func should not return non-zero |
3915 | 147 | if (loop_done && loop_done() != 0) { |
3916 | 3 | err = "loop_done error"; |
3917 | 3 | ret = -1; |
3918 | 3 | } |
3919 | 147 | } while (it->more() && !stopped()); |
3920 | 235 | return ret; |
3921 | 235 | } |
3922 | | |
3923 | 20 | int InstanceRecycler::abort_timeout_txn() { |
3924 | 20 | const std::string task_name = "abort_timeout_txn"; |
3925 | 20 | int64_t num_scanned = 0; |
3926 | 20 | int64_t num_timeout = 0; |
3927 | 20 | int64_t num_abort = 0; |
3928 | 20 | int64_t num_advance = 0; |
3929 | 20 | RecyclerMetricsContext metrics_context(instance_id_, task_name); |
3930 | | |
3931 | 20 | TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0}; |
3932 | 20 | TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX}; |
3933 | 20 | std::string begin_txn_running_key; |
3934 | 20 | std::string end_txn_running_key; |
3935 | 20 | txn_running_key(txn_running_key_info0, &begin_txn_running_key); |
3936 | 20 | txn_running_key(txn_running_key_info1, &end_txn_running_key); |
3937 | | |
3938 | 20 | LOG_WARNING("begin to abort timeout txn").tag("instance_id", instance_id_); |
3939 | | |
3940 | 20 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
3941 | 20 | register_recycle_task(task_name, start_time); |
3942 | | |
3943 | 20 | DORIS_CLOUD_DEFER { |
3944 | 20 | unregister_recycle_task(task_name); |
3945 | 20 | int64_t cost = |
3946 | 20 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
3947 | 20 | metrics_context.finish_report(); |
3948 | 20 | LOG_WARNING("end to abort timeout txn, cost={}s", cost) |
3949 | 20 | .tag("instance_id", instance_id_) |
3950 | 20 | .tag("num_scanned", num_scanned) |
3951 | 20 | .tag("num_timeout", num_timeout) |
3952 | 20 | .tag("num_abort", num_abort) |
3953 | 20 | .tag("num_advance", num_advance); |
3954 | 20 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv Line | Count | Source | 3943 | 16 | DORIS_CLOUD_DEFER { | 3944 | 16 | unregister_recycle_task(task_name); | 3945 | 16 | int64_t cost = | 3946 | 16 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 3947 | 16 | metrics_context.finish_report(); | 3948 | 16 | LOG_WARNING("end to abort timeout txn, cost={}s", cost) | 3949 | 16 | .tag("instance_id", instance_id_) | 3950 | 16 | .tag("num_scanned", num_scanned) | 3951 | 16 | .tag("num_timeout", num_timeout) | 3952 | 16 | .tag("num_abort", num_abort) | 3953 | 16 | .tag("num_advance", num_advance); | 3954 | 16 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv Line | Count | Source | 3943 | 4 | DORIS_CLOUD_DEFER { | 3944 | 4 | unregister_recycle_task(task_name); | 3945 | 4 | int64_t cost = | 3946 | 4 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 3947 | 4 | metrics_context.finish_report(); | 3948 | 4 | LOG_WARNING("end to abort timeout txn, cost={}s", cost) | 3949 | 4 | .tag("instance_id", instance_id_) | 3950 | 4 | .tag("num_scanned", num_scanned) | 3951 | 4 | .tag("num_timeout", num_timeout) | 3952 | 4 | .tag("num_abort", num_abort) | 3953 | 4 | .tag("num_advance", num_advance); | 3954 | 4 | }; |
|
3955 | | |
3956 | 20 | int64_t current_time = |
3957 | 20 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
3958 | | |
3959 | 20 | auto handle_txn_running_kv = [&num_scanned, &num_timeout, &num_abort, &num_advance, |
3960 | 20 | ¤t_time, &metrics_context, |
3961 | 20 | this](std::string_view k, std::string_view v) -> int { |
3962 | 10 | ++num_scanned; |
3963 | | |
3964 | 10 | std::unique_ptr<Transaction> txn; |
3965 | 10 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
3966 | 10 | if (err != TxnErrorCode::TXN_OK) { |
3967 | 0 | LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k)); |
3968 | 0 | return -1; |
3969 | 0 | } |
3970 | 10 | std::string_view k1 = k; |
3971 | | //TxnRunningKeyInfo 0:instance_id 1:db_id 2:txn_id |
3972 | 10 | k1.remove_prefix(1); // Remove key space |
3973 | 10 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
3974 | 10 | if (decode_key(&k1, &out) != 0) { |
3975 | 0 | LOG_ERROR("failed to decode key").tag("key", hex(k)); |
3976 | 0 | return -1; |
3977 | 0 | } |
3978 | 10 | int64_t db_id = std::get<int64_t>(std::get<0>(out[3])); |
3979 | 10 | int64_t txn_id = std::get<int64_t>(std::get<0>(out[4])); |
3980 | 10 | VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id; |
3981 | | // Update txn_info |
3982 | 10 | std::string txn_inf_key, txn_inf_val; |
3983 | 10 | txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key); |
3984 | 10 | err = txn->get(txn_inf_key, &txn_inf_val); |
3985 | 10 | if (err != TxnErrorCode::TXN_OK) { |
3986 | 0 | LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key)); |
3987 | 0 | return -1; |
3988 | 0 | } |
3989 | 10 | TxnInfoPB txn_info; |
3990 | 10 | if (!txn_info.ParseFromString(txn_inf_val)) { |
3991 | 0 | LOG_WARNING("failed to parse txn info").tag("key", hex(k)); |
3992 | 0 | return -1; |
3993 | 0 | } |
3994 | | |
3995 | 10 | if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) { |
3996 | 4 | txn.reset(); |
3997 | 4 | TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info); |
3998 | 4 | std::shared_ptr<TxnLazyCommitTask> task = |
3999 | 4 | txn_lazy_committer_->submit(instance_id_, txn_info.txn_id()); |
4000 | 4 | std::pair<MetaServiceCode, std::string> ret = task->wait(); |
4001 | 4 | if (ret.first != MetaServiceCode::OK) { |
4002 | 0 | LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first |
4003 | 0 | << "msg=" << ret.second; |
4004 | 0 | return -1; |
4005 | 0 | } |
4006 | 4 | ++num_advance; |
4007 | 4 | return 0; |
4008 | 6 | } else { |
4009 | 6 | TxnRunningPB txn_running_pb; |
4010 | 6 | if (!txn_running_pb.ParseFromArray(v.data(), v.size())) { |
4011 | 0 | LOG_WARNING("malformed txn_running_pb").tag("key", hex(k)); |
4012 | 0 | return -1; |
4013 | 0 | } |
4014 | 6 | if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) { |
4015 | 4 | return 0; |
4016 | 4 | } |
4017 | 2 | ++num_timeout; |
4018 | | |
4019 | 2 | DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE); |
4020 | 2 | txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED); |
4021 | 2 | txn_info.set_finish_time(current_time); |
4022 | 2 | txn_info.set_reason("timeout"); |
4023 | 2 | VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString(); |
4024 | 2 | txn_inf_val.clear(); |
4025 | 2 | if (!txn_info.SerializeToString(&txn_inf_val)) { |
4026 | 0 | LOG_WARNING("failed to serialize txn info").tag("key", hex(k)); |
4027 | 0 | return -1; |
4028 | 0 | } |
4029 | 2 | txn->put(txn_inf_key, txn_inf_val); |
4030 | 2 | VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key); |
4031 | | // Put recycle txn key |
4032 | 2 | std::string recyc_txn_key, recyc_txn_val; |
4033 | 2 | recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key); |
4034 | 2 | RecycleTxnPB recycle_txn_pb; |
4035 | 2 | recycle_txn_pb.set_creation_time(current_time); |
4036 | 2 | recycle_txn_pb.set_label(txn_info.label()); |
4037 | 2 | if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) { |
4038 | 0 | LOG_WARNING("failed to serialize txn recycle info") |
4039 | 0 | .tag("key", hex(k)) |
4040 | 0 | .tag("db_id", db_id) |
4041 | 0 | .tag("txn_id", txn_id); |
4042 | 0 | return -1; |
4043 | 0 | } |
4044 | 2 | txn->put(recyc_txn_key, recyc_txn_val); |
4045 | | // Remove txn running key |
4046 | 2 | txn->remove(k); |
4047 | 2 | err = txn->commit(); |
4048 | 2 | if (err != TxnErrorCode::TXN_OK) { |
4049 | 0 | LOG_WARNING("failed to commit txn err={}", err) |
4050 | 0 | .tag("key", hex(k)) |
4051 | 0 | .tag("db_id", db_id) |
4052 | 0 | .tag("txn_id", txn_id); |
4053 | 0 | return -1; |
4054 | 0 | } |
4055 | 2 | metrics_context.total_recycled_num = ++num_abort; |
4056 | 2 | metrics_context.report(); |
4057 | 2 | } |
4058 | | |
4059 | 2 | return 0; |
4060 | 10 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 3961 | 6 | this](std::string_view k, std::string_view v) -> int { | 3962 | 6 | ++num_scanned; | 3963 | | | 3964 | 6 | std::unique_ptr<Transaction> txn; | 3965 | 6 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 3966 | 6 | if (err != TxnErrorCode::TXN_OK) { | 3967 | 0 | LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k)); | 3968 | 0 | return -1; | 3969 | 0 | } | 3970 | 6 | std::string_view k1 = k; | 3971 | | //TxnRunningKeyInfo 0:instance_id 1:db_id 2:txn_id | 3972 | 6 | k1.remove_prefix(1); // Remove key space | 3973 | 6 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 3974 | 6 | if (decode_key(&k1, &out) != 0) { | 3975 | 0 | LOG_ERROR("failed to decode key").tag("key", hex(k)); | 3976 | 0 | return -1; | 3977 | 0 | } | 3978 | 6 | int64_t db_id = std::get<int64_t>(std::get<0>(out[3])); | 3979 | 6 | int64_t txn_id = std::get<int64_t>(std::get<0>(out[4])); | 3980 | 6 | VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id; | 3981 | | // Update txn_info | 3982 | 6 | std::string txn_inf_key, txn_inf_val; | 3983 | 6 | txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key); | 3984 | 6 | err = txn->get(txn_inf_key, &txn_inf_val); | 3985 | 6 | if (err != TxnErrorCode::TXN_OK) { | 3986 | 0 | LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key)); | 3987 | 0 | return -1; | 3988 | 0 | } | 3989 | 6 | TxnInfoPB txn_info; | 3990 | 6 | if (!txn_info.ParseFromString(txn_inf_val)) { | 3991 | 0 | LOG_WARNING("failed to parse txn info").tag("key", hex(k)); | 3992 | 0 | return -1; | 3993 | 0 | } | 3994 | | | 3995 | 6 | if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) { | 3996 | 0 | txn.reset(); | 3997 | 0 | TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info); | 3998 | 0 | std::shared_ptr<TxnLazyCommitTask> task = | 3999 | 0 | txn_lazy_committer_->submit(instance_id_, txn_info.txn_id()); | 4000 | 0 | std::pair<MetaServiceCode, std::string> ret = task->wait(); | 4001 | 0 | if (ret.first != MetaServiceCode::OK) { | 4002 | 0 | LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first | 4003 | 0 | << "msg=" << ret.second; | 4004 | 0 | return -1; | 4005 | 0 | } | 4006 | 0 | ++num_advance; | 4007 | 0 | return 0; | 4008 | 6 | } else { | 4009 | 6 | TxnRunningPB txn_running_pb; | 4010 | 6 | if (!txn_running_pb.ParseFromArray(v.data(), v.size())) { | 4011 | 0 | LOG_WARNING("malformed txn_running_pb").tag("key", hex(k)); | 4012 | 0 | return -1; | 4013 | 0 | } | 4014 | 6 | if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) { | 4015 | 4 | return 0; | 4016 | 4 | } | 4017 | 2 | ++num_timeout; | 4018 | | | 4019 | 2 | DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE); | 4020 | 2 | txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED); | 4021 | 2 | txn_info.set_finish_time(current_time); | 4022 | 2 | txn_info.set_reason("timeout"); | 4023 | 2 | VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString(); | 4024 | 2 | txn_inf_val.clear(); | 4025 | 2 | if (!txn_info.SerializeToString(&txn_inf_val)) { | 4026 | 0 | LOG_WARNING("failed to serialize txn info").tag("key", hex(k)); | 4027 | 0 | return -1; | 4028 | 0 | } | 4029 | 2 | txn->put(txn_inf_key, txn_inf_val); | 4030 | 2 | VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key); | 4031 | | // Put recycle txn key | 4032 | 2 | std::string recyc_txn_key, recyc_txn_val; | 4033 | 2 | recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key); | 4034 | 2 | RecycleTxnPB recycle_txn_pb; | 4035 | 2 | recycle_txn_pb.set_creation_time(current_time); | 4036 | 2 | recycle_txn_pb.set_label(txn_info.label()); | 4037 | 2 | if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) { | 4038 | 0 | LOG_WARNING("failed to serialize txn recycle info") | 4039 | 0 | .tag("key", hex(k)) | 4040 | 0 | .tag("db_id", db_id) | 4041 | 0 | .tag("txn_id", txn_id); | 4042 | 0 | return -1; | 4043 | 0 | } | 4044 | 2 | txn->put(recyc_txn_key, recyc_txn_val); | 4045 | | // Remove txn running key | 4046 | 2 | txn->remove(k); | 4047 | 2 | err = txn->commit(); | 4048 | 2 | if (err != TxnErrorCode::TXN_OK) { | 4049 | 0 | LOG_WARNING("failed to commit txn err={}", err) | 4050 | 0 | .tag("key", hex(k)) | 4051 | 0 | .tag("db_id", db_id) | 4052 | 0 | .tag("txn_id", txn_id); | 4053 | 0 | return -1; | 4054 | 0 | } | 4055 | 2 | metrics_context.total_recycled_num = ++num_abort; | 4056 | 2 | metrics_context.report(); | 4057 | 2 | } | 4058 | | | 4059 | 2 | return 0; | 4060 | 6 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 3961 | 4 | this](std::string_view k, std::string_view v) -> int { | 3962 | 4 | ++num_scanned; | 3963 | | | 3964 | 4 | std::unique_ptr<Transaction> txn; | 3965 | 4 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 3966 | 4 | if (err != TxnErrorCode::TXN_OK) { | 3967 | 0 | LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k)); | 3968 | 0 | return -1; | 3969 | 0 | } | 3970 | 4 | std::string_view k1 = k; | 3971 | | //TxnRunningKeyInfo 0:instance_id 1:db_id 2:txn_id | 3972 | 4 | k1.remove_prefix(1); // Remove key space | 3973 | 4 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 3974 | 4 | if (decode_key(&k1, &out) != 0) { | 3975 | 0 | LOG_ERROR("failed to decode key").tag("key", hex(k)); | 3976 | 0 | return -1; | 3977 | 0 | } | 3978 | 4 | int64_t db_id = std::get<int64_t>(std::get<0>(out[3])); | 3979 | 4 | int64_t txn_id = std::get<int64_t>(std::get<0>(out[4])); | 3980 | 4 | VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id; | 3981 | | // Update txn_info | 3982 | 4 | std::string txn_inf_key, txn_inf_val; | 3983 | 4 | txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key); | 3984 | 4 | err = txn->get(txn_inf_key, &txn_inf_val); | 3985 | 4 | if (err != TxnErrorCode::TXN_OK) { | 3986 | 0 | LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key)); | 3987 | 0 | return -1; | 3988 | 0 | } | 3989 | 4 | TxnInfoPB txn_info; | 3990 | 4 | if (!txn_info.ParseFromString(txn_inf_val)) { | 3991 | 0 | LOG_WARNING("failed to parse txn info").tag("key", hex(k)); | 3992 | 0 | return -1; | 3993 | 0 | } | 3994 | | | 3995 | 4 | if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) { | 3996 | 4 | txn.reset(); | 3997 | 4 | TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info); | 3998 | 4 | std::shared_ptr<TxnLazyCommitTask> task = | 3999 | 4 | txn_lazy_committer_->submit(instance_id_, txn_info.txn_id()); | 4000 | 4 | std::pair<MetaServiceCode, std::string> ret = task->wait(); | 4001 | 4 | if (ret.first != MetaServiceCode::OK) { | 4002 | 0 | LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first | 4003 | 0 | << "msg=" << ret.second; | 4004 | 0 | return -1; | 4005 | 0 | } | 4006 | 4 | ++num_advance; | 4007 | 4 | return 0; | 4008 | 4 | } else { | 4009 | 0 | TxnRunningPB txn_running_pb; | 4010 | 0 | if (!txn_running_pb.ParseFromArray(v.data(), v.size())) { | 4011 | 0 | LOG_WARNING("malformed txn_running_pb").tag("key", hex(k)); | 4012 | 0 | return -1; | 4013 | 0 | } | 4014 | 0 | if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) { | 4015 | 0 | return 0; | 4016 | 0 | } | 4017 | 0 | ++num_timeout; | 4018 | |
| 4019 | 0 | DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE); | 4020 | 0 | txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED); | 4021 | 0 | txn_info.set_finish_time(current_time); | 4022 | 0 | txn_info.set_reason("timeout"); | 4023 | 0 | VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString(); | 4024 | 0 | txn_inf_val.clear(); | 4025 | 0 | if (!txn_info.SerializeToString(&txn_inf_val)) { | 4026 | 0 | LOG_WARNING("failed to serialize txn info").tag("key", hex(k)); | 4027 | 0 | return -1; | 4028 | 0 | } | 4029 | 0 | txn->put(txn_inf_key, txn_inf_val); | 4030 | 0 | VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key); | 4031 | | // Put recycle txn key | 4032 | 0 | std::string recyc_txn_key, recyc_txn_val; | 4033 | 0 | recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key); | 4034 | 0 | RecycleTxnPB recycle_txn_pb; | 4035 | 0 | recycle_txn_pb.set_creation_time(current_time); | 4036 | 0 | recycle_txn_pb.set_label(txn_info.label()); | 4037 | 0 | if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) { | 4038 | 0 | LOG_WARNING("failed to serialize txn recycle info") | 4039 | 0 | .tag("key", hex(k)) | 4040 | 0 | .tag("db_id", db_id) | 4041 | 0 | .tag("txn_id", txn_id); | 4042 | 0 | return -1; | 4043 | 0 | } | 4044 | 0 | txn->put(recyc_txn_key, recyc_txn_val); | 4045 | | // Remove txn running key | 4046 | 0 | txn->remove(k); | 4047 | 0 | err = txn->commit(); | 4048 | 0 | if (err != TxnErrorCode::TXN_OK) { | 4049 | 0 | LOG_WARNING("failed to commit txn err={}", err) | 4050 | 0 | .tag("key", hex(k)) | 4051 | 0 | .tag("db_id", db_id) | 4052 | 0 | .tag("txn_id", txn_id); | 4053 | 0 | return -1; | 4054 | 0 | } | 4055 | 0 | metrics_context.total_recycled_num = ++num_abort; | 4056 | 0 | metrics_context.report(); | 4057 | 0 | } | 4058 | | | 4059 | 0 | return 0; | 4060 | 4 | }; |
|
4061 | | |
4062 | 20 | if (config::enable_recycler_stats_metrics) { |
4063 | 0 | scan_and_statistics_abort_timeout_txn(); |
4064 | 0 | } |
4065 | | // recycle_func and loop_done for scan and recycle |
4066 | 20 | return scan_and_recycle(begin_txn_running_key, end_txn_running_key, |
4067 | 20 | std::move(handle_txn_running_kv)); |
4068 | 20 | } |
4069 | | |
4070 | 21 | int InstanceRecycler::recycle_expired_txn_label() { |
4071 | 21 | const std::string task_name = "recycle_expired_txn_label"; |
4072 | 21 | int64_t num_scanned = 0; |
4073 | 21 | int64_t num_expired = 0; |
4074 | 21 | int64_t num_recycled = 0; |
4075 | 21 | RecyclerMetricsContext metrics_context(instance_id_, task_name); |
4076 | 21 | int ret = 0; |
4077 | | |
4078 | 21 | RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0}; |
4079 | 21 | RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX}; |
4080 | 21 | std::string begin_recycle_txn_key; |
4081 | 21 | std::string end_recycle_txn_key; |
4082 | 21 | recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key); |
4083 | 21 | recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key); |
4084 | 21 | std::vector<std::string> recycle_txn_info_keys; |
4085 | | |
4086 | 21 | LOG_WARNING("begin to recycle expired txn").tag("instance_id", instance_id_); |
4087 | | |
4088 | 21 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
4089 | 21 | register_recycle_task(task_name, start_time); |
4090 | 21 | DORIS_CLOUD_DEFER { |
4091 | 21 | unregister_recycle_task(task_name); |
4092 | 21 | int64_t cost = |
4093 | 21 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
4094 | 21 | metrics_context.finish_report(); |
4095 | 21 | LOG_WARNING("end to recycle expired txn, cost={}s", cost) |
4096 | 21 | .tag("instance_id", instance_id_) |
4097 | 21 | .tag("num_scanned", num_scanned) |
4098 | 21 | .tag("num_expired", num_expired) |
4099 | 21 | .tag("num_recycled", num_recycled); |
4100 | 21 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv Line | Count | Source | 4090 | 18 | DORIS_CLOUD_DEFER { | 4091 | 18 | unregister_recycle_task(task_name); | 4092 | 18 | int64_t cost = | 4093 | 18 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 4094 | 18 | metrics_context.finish_report(); | 4095 | 18 | LOG_WARNING("end to recycle expired txn, cost={}s", cost) | 4096 | 18 | .tag("instance_id", instance_id_) | 4097 | 18 | .tag("num_scanned", num_scanned) | 4098 | 18 | .tag("num_expired", num_expired) | 4099 | 18 | .tag("num_recycled", num_recycled); | 4100 | 18 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv Line | Count | Source | 4090 | 3 | DORIS_CLOUD_DEFER { | 4091 | 3 | unregister_recycle_task(task_name); | 4092 | 3 | int64_t cost = | 4093 | 3 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 4094 | 3 | metrics_context.finish_report(); | 4095 | 3 | LOG_WARNING("end to recycle expired txn, cost={}s", cost) | 4096 | 3 | .tag("instance_id", instance_id_) | 4097 | 3 | .tag("num_scanned", num_scanned) | 4098 | 3 | .tag("num_expired", num_expired) | 4099 | 3 | .tag("num_recycled", num_recycled); | 4100 | 3 | }; |
|
4101 | | |
4102 | 21 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
4103 | | |
4104 | 21 | SyncExecutor<int> concurrent_delete_executor( |
4105 | 21 | _thread_pool_group.s3_producer_pool, |
4106 | 21 | fmt::format("recycle expired txn label, instance id {}", instance_id_), |
4107 | 23.0k | [](const int& ret) { return ret != 0; }); recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi Line | Count | Source | 4107 | 23.0k | [](const int& ret) { return ret != 0; }); |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi Line | Count | Source | 4107 | 3 | [](const int& ret) { return ret != 0; }); |
|
4108 | | |
4109 | 21 | int64_t current_time_ms = |
4110 | 21 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
4111 | | |
4112 | 30.0k | auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int { |
4113 | 30.0k | ++num_scanned; |
4114 | 30.0k | RecycleTxnPB recycle_txn_pb; |
4115 | 30.0k | if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) { |
4116 | 0 | LOG_WARNING("malformed txn_running_pb").tag("key", hex(k)); |
4117 | 0 | return -1; |
4118 | 0 | } |
4119 | 30.0k | if ((config::force_immediate_recycle) || |
4120 | 30.0k | (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) || |
4121 | 30.0k | (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <= |
4122 | 30.0k | current_time_ms)) { |
4123 | 23.0k | VLOG_DEBUG << "found recycle txn, key=" << hex(k); |
4124 | 23.0k | num_expired++; |
4125 | 23.0k | recycle_txn_info_keys.emplace_back(k); |
4126 | 23.0k | } |
4127 | 30.0k | return 0; |
4128 | 30.0k | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 4112 | 30.0k | auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int { | 4113 | 30.0k | ++num_scanned; | 4114 | 30.0k | RecycleTxnPB recycle_txn_pb; | 4115 | 30.0k | if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) { | 4116 | 0 | LOG_WARNING("malformed txn_running_pb").tag("key", hex(k)); | 4117 | 0 | return -1; | 4118 | 0 | } | 4119 | 30.0k | if ((config::force_immediate_recycle) || | 4120 | 30.0k | (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) || | 4121 | 30.0k | (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <= | 4122 | 30.0k | current_time_ms)) { | 4123 | 23.0k | VLOG_DEBUG << "found recycle txn, key=" << hex(k); | 4124 | 23.0k | num_expired++; | 4125 | 23.0k | recycle_txn_info_keys.emplace_back(k); | 4126 | 23.0k | } | 4127 | 30.0k | return 0; | 4128 | 30.0k | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 4112 | 3 | auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int { | 4113 | 3 | ++num_scanned; | 4114 | 3 | RecycleTxnPB recycle_txn_pb; | 4115 | 3 | if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) { | 4116 | 0 | LOG_WARNING("malformed txn_running_pb").tag("key", hex(k)); | 4117 | 0 | return -1; | 4118 | 0 | } | 4119 | 3 | if ((config::force_immediate_recycle) || | 4120 | 3 | (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) || | 4121 | 3 | (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <= | 4122 | 3 | current_time_ms)) { | 4123 | 3 | VLOG_DEBUG << "found recycle txn, key=" << hex(k); | 4124 | 3 | num_expired++; | 4125 | 3 | recycle_txn_info_keys.emplace_back(k); | 4126 | 3 | } | 4127 | 3 | return 0; | 4128 | 3 | }; |
|
4129 | | |
4130 | | // int 0 for success, 1 for conflict, -1 for error |
4131 | 23.0k | auto delete_recycle_txn_kv = [&](const std::string& k) -> int { |
4132 | 23.0k | std::string_view k1 = k; |
4133 | | //RecycleTxnKeyInfo 0:instance_id 1:db_id 2:txn_id |
4134 | 23.0k | k1.remove_prefix(1); // Remove key space |
4135 | 23.0k | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
4136 | 23.0k | int ret = decode_key(&k1, &out); |
4137 | 23.0k | if (ret != 0) { |
4138 | 0 | LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k)); |
4139 | 0 | return -1; |
4140 | 0 | } |
4141 | 23.0k | int64_t db_id = std::get<int64_t>(std::get<0>(out[3])); |
4142 | 23.0k | int64_t txn_id = std::get<int64_t>(std::get<0>(out[4])); |
4143 | 23.0k | VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id; |
4144 | 23.0k | std::unique_ptr<Transaction> txn; |
4145 | 23.0k | TxnErrorCode err = txn_kv_->create_txn(&txn); |
4146 | 23.0k | if (err != TxnErrorCode::TXN_OK) { |
4147 | 0 | LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k)); |
4148 | 0 | return -1; |
4149 | 0 | } |
4150 | | // Remove txn index kv |
4151 | 23.0k | auto index_key = txn_index_key({instance_id_, txn_id}); |
4152 | 23.0k | txn->remove(index_key); |
4153 | | // Remove txn info kv |
4154 | 23.0k | std::string info_key, info_val; |
4155 | 23.0k | txn_info_key({instance_id_, db_id, txn_id}, &info_key); |
4156 | 23.0k | err = txn->get(info_key, &info_val); |
4157 | 23.0k | if (err != TxnErrorCode::TXN_OK) { |
4158 | 0 | LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key)); |
4159 | 0 | return -1; |
4160 | 0 | } |
4161 | 23.0k | TxnInfoPB txn_info; |
4162 | 23.0k | if (!txn_info.ParseFromString(info_val)) { |
4163 | 0 | LOG_WARNING("failed to parse txn info").tag("key", hex(info_key)); |
4164 | 0 | return -1; |
4165 | 0 | } |
4166 | 23.0k | txn->remove(info_key); |
4167 | | // Remove sub txn index kvs |
4168 | 23.0k | std::vector<std::string> sub_txn_index_keys; |
4169 | 23.0k | for (auto sub_txn_id : txn_info.sub_txn_ids()) { |
4170 | 22.9k | auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id}); |
4171 | 22.9k | sub_txn_index_keys.push_back(sub_txn_index_key); |
4172 | 22.9k | } |
4173 | 23.0k | for (auto& sub_txn_index_key : sub_txn_index_keys) { |
4174 | 22.9k | txn->remove(sub_txn_index_key); |
4175 | 22.9k | } |
4176 | | // Update txn label |
4177 | 23.0k | std::string label_key, label_val; |
4178 | 23.0k | txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key); |
4179 | 23.0k | err = txn->get(label_key, &label_val); |
4180 | 23.0k | if (err != TxnErrorCode::TXN_OK) { |
4181 | 0 | LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key |
4182 | 0 | << " err=" << err; |
4183 | 0 | return -1; |
4184 | 0 | } |
4185 | 23.0k | TxnLabelPB txn_label; |
4186 | 23.0k | if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) { |
4187 | 0 | LOG_WARNING("failed to parse txn label").tag("key", hex(label_key)); |
4188 | 0 | return -1; |
4189 | 0 | } |
4190 | 23.0k | auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id); |
4191 | 23.0k | if (it != txn_label.txn_ids().end()) { |
4192 | 23.0k | txn_label.mutable_txn_ids()->erase(it); |
4193 | 23.0k | } |
4194 | 23.0k | if (txn_label.txn_ids().empty()) { |
4195 | 23.0k | txn->remove(label_key); |
4196 | 23.0k | TEST_SYNC_POINT_CALLBACK( |
4197 | 23.0k | "InstanceRecycler::recycle_expired_txn_label.remove_label_before"); |
4198 | 23.0k | } else { |
4199 | 74 | if (!txn_label.SerializeToString(&label_val)) { |
4200 | 0 | LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key); |
4201 | 0 | return -1; |
4202 | 0 | } |
4203 | 74 | TEST_SYNC_POINT_CALLBACK( |
4204 | 74 | "InstanceRecycler::recycle_expired_txn_label.update_label_before"); |
4205 | 74 | txn->atomic_set_ver_value(label_key, label_val); |
4206 | 74 | TEST_SYNC_POINT_CALLBACK( |
4207 | 74 | "InstanceRecycler::recycle_expired_txn_label.update_label_after"); |
4208 | 74 | } |
4209 | | // Remove recycle txn kv |
4210 | 23.0k | txn->remove(k); |
4211 | 23.0k | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit"); |
4212 | 23.0k | err = txn->commit(); |
4213 | 23.0k | if (err != TxnErrorCode::TXN_OK) { |
4214 | 62 | if (err == TxnErrorCode::TXN_CONFLICT) { |
4215 | 62 | TEST_SYNC_POINT_CALLBACK( |
4216 | 62 | "InstanceRecycler::recycle_expired_txn_label.txn_conflict"); |
4217 | | // log the txn_id and label |
4218 | 62 | LOG(WARNING) << "txn conflict, txn_id=" << txn_id |
4219 | 62 | << " txn_label_pb=" << txn_label.ShortDebugString() |
4220 | 62 | << " txn_label=" << txn_info.label(); |
4221 | 62 | return 1; |
4222 | 62 | } |
4223 | 0 | LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k); |
4224 | 0 | return -1; |
4225 | 62 | } |
4226 | 23.0k | metrics_context.total_recycled_num = ++num_recycled; |
4227 | 23.0k | metrics_context.report(); |
4228 | | |
4229 | 23.0k | LOG(INFO) << "recycle expired txn, key=" << hex(k); |
4230 | 23.0k | return 0; |
4231 | 23.0k | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 4131 | 23.0k | auto delete_recycle_txn_kv = [&](const std::string& k) -> int { | 4132 | 23.0k | std::string_view k1 = k; | 4133 | | //RecycleTxnKeyInfo 0:instance_id 1:db_id 2:txn_id | 4134 | 23.0k | k1.remove_prefix(1); // Remove key space | 4135 | 23.0k | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 4136 | 23.0k | int ret = decode_key(&k1, &out); | 4137 | 23.0k | if (ret != 0) { | 4138 | 0 | LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k)); | 4139 | 0 | return -1; | 4140 | 0 | } | 4141 | 23.0k | int64_t db_id = std::get<int64_t>(std::get<0>(out[3])); | 4142 | 23.0k | int64_t txn_id = std::get<int64_t>(std::get<0>(out[4])); | 4143 | 23.0k | VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id; | 4144 | 23.0k | std::unique_ptr<Transaction> txn; | 4145 | 23.0k | TxnErrorCode err = txn_kv_->create_txn(&txn); | 4146 | 23.0k | if (err != TxnErrorCode::TXN_OK) { | 4147 | 0 | LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k)); | 4148 | 0 | return -1; | 4149 | 0 | } | 4150 | | // Remove txn index kv | 4151 | 23.0k | auto index_key = txn_index_key({instance_id_, txn_id}); | 4152 | 23.0k | txn->remove(index_key); | 4153 | | // Remove txn info kv | 4154 | 23.0k | std::string info_key, info_val; | 4155 | 23.0k | txn_info_key({instance_id_, db_id, txn_id}, &info_key); | 4156 | 23.0k | err = txn->get(info_key, &info_val); | 4157 | 23.0k | if (err != TxnErrorCode::TXN_OK) { | 4158 | 0 | LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key)); | 4159 | 0 | return -1; | 4160 | 0 | } | 4161 | 23.0k | TxnInfoPB txn_info; | 4162 | 23.0k | if (!txn_info.ParseFromString(info_val)) { | 4163 | 0 | LOG_WARNING("failed to parse txn info").tag("key", hex(info_key)); | 4164 | 0 | return -1; | 4165 | 0 | } | 4166 | 23.0k | txn->remove(info_key); | 4167 | | // Remove sub txn index kvs | 4168 | 23.0k | std::vector<std::string> sub_txn_index_keys; | 4169 | 23.0k | for (auto sub_txn_id : txn_info.sub_txn_ids()) { | 4170 | 22.9k | auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id}); | 4171 | 22.9k | sub_txn_index_keys.push_back(sub_txn_index_key); | 4172 | 22.9k | } | 4173 | 23.0k | for (auto& sub_txn_index_key : sub_txn_index_keys) { | 4174 | 22.9k | txn->remove(sub_txn_index_key); | 4175 | 22.9k | } | 4176 | | // Update txn label | 4177 | 23.0k | std::string label_key, label_val; | 4178 | 23.0k | txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key); | 4179 | 23.0k | err = txn->get(label_key, &label_val); | 4180 | 23.0k | if (err != TxnErrorCode::TXN_OK) { | 4181 | 0 | LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key | 4182 | 0 | << " err=" << err; | 4183 | 0 | return -1; | 4184 | 0 | } | 4185 | 23.0k | TxnLabelPB txn_label; | 4186 | 23.0k | if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) { | 4187 | 0 | LOG_WARNING("failed to parse txn label").tag("key", hex(label_key)); | 4188 | 0 | return -1; | 4189 | 0 | } | 4190 | 23.0k | auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id); | 4191 | 23.0k | if (it != txn_label.txn_ids().end()) { | 4192 | 23.0k | txn_label.mutable_txn_ids()->erase(it); | 4193 | 23.0k | } | 4194 | 23.0k | if (txn_label.txn_ids().empty()) { | 4195 | 23.0k | txn->remove(label_key); | 4196 | 23.0k | TEST_SYNC_POINT_CALLBACK( | 4197 | 23.0k | "InstanceRecycler::recycle_expired_txn_label.remove_label_before"); | 4198 | 23.0k | } else { | 4199 | 74 | if (!txn_label.SerializeToString(&label_val)) { | 4200 | 0 | LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key); | 4201 | 0 | return -1; | 4202 | 0 | } | 4203 | 74 | TEST_SYNC_POINT_CALLBACK( | 4204 | 74 | "InstanceRecycler::recycle_expired_txn_label.update_label_before"); | 4205 | 74 | txn->atomic_set_ver_value(label_key, label_val); | 4206 | 74 | TEST_SYNC_POINT_CALLBACK( | 4207 | 74 | "InstanceRecycler::recycle_expired_txn_label.update_label_after"); | 4208 | 74 | } | 4209 | | // Remove recycle txn kv | 4210 | 23.0k | txn->remove(k); | 4211 | 23.0k | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit"); | 4212 | 23.0k | err = txn->commit(); | 4213 | 23.0k | if (err != TxnErrorCode::TXN_OK) { | 4214 | 62 | if (err == TxnErrorCode::TXN_CONFLICT) { | 4215 | 62 | TEST_SYNC_POINT_CALLBACK( | 4216 | 62 | "InstanceRecycler::recycle_expired_txn_label.txn_conflict"); | 4217 | | // log the txn_id and label | 4218 | 62 | LOG(WARNING) << "txn conflict, txn_id=" << txn_id | 4219 | 62 | << " txn_label_pb=" << txn_label.ShortDebugString() | 4220 | 62 | << " txn_label=" << txn_info.label(); | 4221 | 62 | return 1; | 4222 | 62 | } | 4223 | 0 | LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k); | 4224 | 0 | return -1; | 4225 | 62 | } | 4226 | 23.0k | metrics_context.total_recycled_num = ++num_recycled; | 4227 | 23.0k | metrics_context.report(); | 4228 | | | 4229 | 23.0k | LOG(INFO) << "recycle expired txn, key=" << hex(k); | 4230 | 23.0k | return 0; | 4231 | 23.0k | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 4131 | 3 | auto delete_recycle_txn_kv = [&](const std::string& k) -> int { | 4132 | 3 | std::string_view k1 = k; | 4133 | | //RecycleTxnKeyInfo 0:instance_id 1:db_id 2:txn_id | 4134 | 3 | k1.remove_prefix(1); // Remove key space | 4135 | 3 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 4136 | 3 | int ret = decode_key(&k1, &out); | 4137 | 3 | if (ret != 0) { | 4138 | 0 | LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k)); | 4139 | 0 | return -1; | 4140 | 0 | } | 4141 | 3 | int64_t db_id = std::get<int64_t>(std::get<0>(out[3])); | 4142 | 3 | int64_t txn_id = std::get<int64_t>(std::get<0>(out[4])); | 4143 | 3 | VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id; | 4144 | 3 | std::unique_ptr<Transaction> txn; | 4145 | 3 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 4146 | 3 | if (err != TxnErrorCode::TXN_OK) { | 4147 | 0 | LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k)); | 4148 | 0 | return -1; | 4149 | 0 | } | 4150 | | // Remove txn index kv | 4151 | 3 | auto index_key = txn_index_key({instance_id_, txn_id}); | 4152 | 3 | txn->remove(index_key); | 4153 | | // Remove txn info kv | 4154 | 3 | std::string info_key, info_val; | 4155 | 3 | txn_info_key({instance_id_, db_id, txn_id}, &info_key); | 4156 | 3 | err = txn->get(info_key, &info_val); | 4157 | 3 | if (err != TxnErrorCode::TXN_OK) { | 4158 | 0 | LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key)); | 4159 | 0 | return -1; | 4160 | 0 | } | 4161 | 3 | TxnInfoPB txn_info; | 4162 | 3 | if (!txn_info.ParseFromString(info_val)) { | 4163 | 0 | LOG_WARNING("failed to parse txn info").tag("key", hex(info_key)); | 4164 | 0 | return -1; | 4165 | 0 | } | 4166 | 3 | txn->remove(info_key); | 4167 | | // Remove sub txn index kvs | 4168 | 3 | std::vector<std::string> sub_txn_index_keys; | 4169 | 3 | for (auto sub_txn_id : txn_info.sub_txn_ids()) { | 4170 | 0 | auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id}); | 4171 | 0 | sub_txn_index_keys.push_back(sub_txn_index_key); | 4172 | 0 | } | 4173 | 3 | for (auto& sub_txn_index_key : sub_txn_index_keys) { | 4174 | 0 | txn->remove(sub_txn_index_key); | 4175 | 0 | } | 4176 | | // Update txn label | 4177 | 3 | std::string label_key, label_val; | 4178 | 3 | txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key); | 4179 | 3 | err = txn->get(label_key, &label_val); | 4180 | 3 | if (err != TxnErrorCode::TXN_OK) { | 4181 | 0 | LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key | 4182 | 0 | << " err=" << err; | 4183 | 0 | return -1; | 4184 | 0 | } | 4185 | 3 | TxnLabelPB txn_label; | 4186 | 3 | if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) { | 4187 | 0 | LOG_WARNING("failed to parse txn label").tag("key", hex(label_key)); | 4188 | 0 | return -1; | 4189 | 0 | } | 4190 | 3 | auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id); | 4191 | 3 | if (it != txn_label.txn_ids().end()) { | 4192 | 3 | txn_label.mutable_txn_ids()->erase(it); | 4193 | 3 | } | 4194 | 3 | if (txn_label.txn_ids().empty()) { | 4195 | 3 | txn->remove(label_key); | 4196 | 3 | TEST_SYNC_POINT_CALLBACK( | 4197 | 3 | "InstanceRecycler::recycle_expired_txn_label.remove_label_before"); | 4198 | 3 | } else { | 4199 | 0 | if (!txn_label.SerializeToString(&label_val)) { | 4200 | 0 | LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key); | 4201 | 0 | return -1; | 4202 | 0 | } | 4203 | 0 | TEST_SYNC_POINT_CALLBACK( | 4204 | 0 | "InstanceRecycler::recycle_expired_txn_label.update_label_before"); | 4205 | 0 | txn->atomic_set_ver_value(label_key, label_val); | 4206 | 0 | TEST_SYNC_POINT_CALLBACK( | 4207 | 0 | "InstanceRecycler::recycle_expired_txn_label.update_label_after"); | 4208 | 0 | } | 4209 | | // Remove recycle txn kv | 4210 | 3 | txn->remove(k); | 4211 | 3 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit"); | 4212 | 3 | err = txn->commit(); | 4213 | 3 | if (err != TxnErrorCode::TXN_OK) { | 4214 | 0 | if (err == TxnErrorCode::TXN_CONFLICT) { | 4215 | 0 | TEST_SYNC_POINT_CALLBACK( | 4216 | 0 | "InstanceRecycler::recycle_expired_txn_label.txn_conflict"); | 4217 | | // log the txn_id and label | 4218 | 0 | LOG(WARNING) << "txn conflict, txn_id=" << txn_id | 4219 | 0 | << " txn_label_pb=" << txn_label.ShortDebugString() | 4220 | 0 | << " txn_label=" << txn_info.label(); | 4221 | 0 | return 1; | 4222 | 0 | } | 4223 | 0 | LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k); | 4224 | 0 | return -1; | 4225 | 0 | } | 4226 | 3 | metrics_context.total_recycled_num = ++num_recycled; | 4227 | 3 | metrics_context.report(); | 4228 | | | 4229 | 3 | LOG(INFO) << "recycle expired txn, key=" << hex(k); | 4230 | 3 | return 0; | 4231 | 3 | }; |
|
4232 | | |
4233 | 21 | auto loop_done = [&]() -> int { |
4234 | 12 | DORIS_CLOUD_DEFER { |
4235 | 12 | recycle_txn_info_keys.clear(); |
4236 | 12 | }; recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv Line | Count | Source | 4234 | 9 | DORIS_CLOUD_DEFER { | 4235 | 9 | recycle_txn_info_keys.clear(); | 4236 | 9 | }; |
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv Line | Count | Source | 4234 | 3 | DORIS_CLOUD_DEFER { | 4235 | 3 | recycle_txn_info_keys.clear(); | 4236 | 3 | }; |
|
4237 | 12 | TEST_SYNC_POINT_CALLBACK( |
4238 | 12 | "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys", |
4239 | 12 | &recycle_txn_info_keys); |
4240 | 23.0k | for (const auto& k : recycle_txn_info_keys) { |
4241 | 23.0k | concurrent_delete_executor.add([&]() { |
4242 | 23.0k | int ret = delete_recycle_txn_kv(k); |
4243 | 23.0k | if (ret == 1) { |
4244 | 18 | constexpr int MAX_RETRY = 10; |
4245 | 54 | for (size_t i = 1; i <= MAX_RETRY; ++i) { |
4246 | 54 | LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k); |
4247 | 54 | ret = delete_recycle_txn_kv(k); |
4248 | | // clang-format off |
4249 | 54 | TEST_SYNC_POINT_CALLBACK( |
4250 | 54 | "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret); |
4251 | | // clang-format off |
4252 | 54 | if (ret != 1) { |
4253 | 18 | break; |
4254 | 18 | } |
4255 | | // random sleep 0-100 ms to retry |
4256 | 36 | std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100)); |
4257 | 36 | } |
4258 | 18 | } |
4259 | 23.0k | if (ret != 0) { |
4260 | 9 | LOG_WARNING("failed to delete recycle txn kv") |
4261 | 9 | .tag("instance id", instance_id_) |
4262 | 9 | .tag("key", hex(k)); |
4263 | 9 | return -1; |
4264 | 9 | } |
4265 | 23.0k | return 0; |
4266 | 23.0k | }); recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv Line | Count | Source | 4241 | 23.0k | concurrent_delete_executor.add([&]() { | 4242 | 23.0k | int ret = delete_recycle_txn_kv(k); | 4243 | 23.0k | if (ret == 1) { | 4244 | 18 | constexpr int MAX_RETRY = 10; | 4245 | 54 | for (size_t i = 1; i <= MAX_RETRY; ++i) { | 4246 | 54 | LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k); | 4247 | 54 | ret = delete_recycle_txn_kv(k); | 4248 | | // clang-format off | 4249 | 54 | TEST_SYNC_POINT_CALLBACK( | 4250 | 54 | "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret); | 4251 | | // clang-format off | 4252 | 54 | if (ret != 1) { | 4253 | 18 | break; | 4254 | 18 | } | 4255 | | // random sleep 0-100 ms to retry | 4256 | 36 | std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100)); | 4257 | 36 | } | 4258 | 18 | } | 4259 | 23.0k | if (ret != 0) { | 4260 | 9 | LOG_WARNING("failed to delete recycle txn kv") | 4261 | 9 | .tag("instance id", instance_id_) | 4262 | 9 | .tag("key", hex(k)); | 4263 | 9 | return -1; | 4264 | 9 | } | 4265 | 23.0k | return 0; | 4266 | 23.0k | }); |
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv Line | Count | Source | 4241 | 3 | concurrent_delete_executor.add([&]() { | 4242 | 3 | int ret = delete_recycle_txn_kv(k); | 4243 | 3 | if (ret == 1) { | 4244 | 0 | constexpr int MAX_RETRY = 10; | 4245 | 0 | for (size_t i = 1; i <= MAX_RETRY; ++i) { | 4246 | 0 | LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k); | 4247 | 0 | ret = delete_recycle_txn_kv(k); | 4248 | | // clang-format off | 4249 | 0 | TEST_SYNC_POINT_CALLBACK( | 4250 | 0 | "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret); | 4251 | | // clang-format off | 4252 | 0 | if (ret != 1) { | 4253 | 0 | break; | 4254 | 0 | } | 4255 | | // random sleep 0-100 ms to retry | 4256 | 0 | std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100)); | 4257 | 0 | } | 4258 | 0 | } | 4259 | 3 | if (ret != 0) { | 4260 | 0 | LOG_WARNING("failed to delete recycle txn kv") | 4261 | 0 | .tag("instance id", instance_id_) | 4262 | 0 | .tag("key", hex(k)); | 4263 | 0 | return -1; | 4264 | 0 | } | 4265 | 3 | return 0; | 4266 | 3 | }); |
|
4267 | 23.0k | } |
4268 | 12 | bool finished = true; |
4269 | 12 | std::vector<int> rets = concurrent_delete_executor.when_all(&finished); |
4270 | 23.0k | for (int r : rets) { |
4271 | 23.0k | if (r != 0) { |
4272 | 9 | ret = -1; |
4273 | 9 | } |
4274 | 23.0k | } |
4275 | | |
4276 | 12 | ret = finished ? ret : -1; |
4277 | | |
4278 | 12 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret); |
4279 | | |
4280 | 12 | if (ret != 0) { |
4281 | 3 | LOG_WARNING("recycle txn kv ret!=0") |
4282 | 3 | .tag("finished", finished) |
4283 | 3 | .tag("ret", ret) |
4284 | 3 | .tag("instance_id", instance_id_); |
4285 | 3 | return ret; |
4286 | 3 | } |
4287 | 9 | return ret; |
4288 | 12 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv Line | Count | Source | 4233 | 9 | auto loop_done = [&]() -> int { | 4234 | 9 | DORIS_CLOUD_DEFER { | 4235 | 9 | recycle_txn_info_keys.clear(); | 4236 | 9 | }; | 4237 | 9 | TEST_SYNC_POINT_CALLBACK( | 4238 | 9 | "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys", | 4239 | 9 | &recycle_txn_info_keys); | 4240 | 23.0k | for (const auto& k : recycle_txn_info_keys) { | 4241 | 23.0k | concurrent_delete_executor.add([&]() { | 4242 | 23.0k | int ret = delete_recycle_txn_kv(k); | 4243 | 23.0k | if (ret == 1) { | 4244 | 23.0k | constexpr int MAX_RETRY = 10; | 4245 | 23.0k | for (size_t i = 1; i <= MAX_RETRY; ++i) { | 4246 | 23.0k | LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k); | 4247 | 23.0k | ret = delete_recycle_txn_kv(k); | 4248 | | // clang-format off | 4249 | 23.0k | TEST_SYNC_POINT_CALLBACK( | 4250 | 23.0k | "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret); | 4251 | | // clang-format off | 4252 | 23.0k | if (ret != 1) { | 4253 | 23.0k | break; | 4254 | 23.0k | } | 4255 | | // random sleep 0-100 ms to retry | 4256 | 23.0k | std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100)); | 4257 | 23.0k | } | 4258 | 23.0k | } | 4259 | 23.0k | if (ret != 0) { | 4260 | 23.0k | LOG_WARNING("failed to delete recycle txn kv") | 4261 | 23.0k | .tag("instance id", instance_id_) | 4262 | 23.0k | .tag("key", hex(k)); | 4263 | 23.0k | return -1; | 4264 | 23.0k | } | 4265 | 23.0k | return 0; | 4266 | 23.0k | }); | 4267 | 23.0k | } | 4268 | 9 | bool finished = true; | 4269 | 9 | std::vector<int> rets = concurrent_delete_executor.when_all(&finished); | 4270 | 23.0k | for (int r : rets) { | 4271 | 23.0k | if (r != 0) { | 4272 | 9 | ret = -1; | 4273 | 9 | } | 4274 | 23.0k | } | 4275 | | | 4276 | 9 | ret = finished ? ret : -1; | 4277 | | | 4278 | 9 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret); | 4279 | | | 4280 | 9 | if (ret != 0) { | 4281 | 3 | LOG_WARNING("recycle txn kv ret!=0") | 4282 | 3 | .tag("finished", finished) | 4283 | 3 | .tag("ret", ret) | 4284 | 3 | .tag("instance_id", instance_id_); | 4285 | 3 | return ret; | 4286 | 3 | } | 4287 | 6 | return ret; | 4288 | 9 | }; |
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv Line | Count | Source | 4233 | 3 | auto loop_done = [&]() -> int { | 4234 | 3 | DORIS_CLOUD_DEFER { | 4235 | 3 | recycle_txn_info_keys.clear(); | 4236 | 3 | }; | 4237 | 3 | TEST_SYNC_POINT_CALLBACK( | 4238 | 3 | "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys", | 4239 | 3 | &recycle_txn_info_keys); | 4240 | 3 | for (const auto& k : recycle_txn_info_keys) { | 4241 | 3 | concurrent_delete_executor.add([&]() { | 4242 | 3 | int ret = delete_recycle_txn_kv(k); | 4243 | 3 | if (ret == 1) { | 4244 | 3 | constexpr int MAX_RETRY = 10; | 4245 | 3 | for (size_t i = 1; i <= MAX_RETRY; ++i) { | 4246 | 3 | LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k); | 4247 | 3 | ret = delete_recycle_txn_kv(k); | 4248 | | // clang-format off | 4249 | 3 | TEST_SYNC_POINT_CALLBACK( | 4250 | 3 | "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret); | 4251 | | // clang-format off | 4252 | 3 | if (ret != 1) { | 4253 | 3 | break; | 4254 | 3 | } | 4255 | | // random sleep 0-100 ms to retry | 4256 | 3 | std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100)); | 4257 | 3 | } | 4258 | 3 | } | 4259 | 3 | if (ret != 0) { | 4260 | 3 | LOG_WARNING("failed to delete recycle txn kv") | 4261 | 3 | .tag("instance id", instance_id_) | 4262 | 3 | .tag("key", hex(k)); | 4263 | 3 | return -1; | 4264 | 3 | } | 4265 | 3 | return 0; | 4266 | 3 | }); | 4267 | 3 | } | 4268 | 3 | bool finished = true; | 4269 | 3 | std::vector<int> rets = concurrent_delete_executor.when_all(&finished); | 4270 | 3 | for (int r : rets) { | 4271 | 3 | if (r != 0) { | 4272 | 0 | ret = -1; | 4273 | 0 | } | 4274 | 3 | } | 4275 | | | 4276 | 3 | ret = finished ? ret : -1; | 4277 | | | 4278 | 3 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret); | 4279 | | | 4280 | 3 | if (ret != 0) { | 4281 | 0 | LOG_WARNING("recycle txn kv ret!=0") | 4282 | 0 | .tag("finished", finished) | 4283 | 0 | .tag("ret", ret) | 4284 | 0 | .tag("instance_id", instance_id_); | 4285 | 0 | return ret; | 4286 | 0 | } | 4287 | 3 | return ret; | 4288 | 3 | }; |
|
4289 | | |
4290 | 21 | if (config::enable_recycler_stats_metrics) { |
4291 | 0 | scan_and_statistics_expired_txn_label(); |
4292 | 0 | } |
4293 | | // recycle_func and loop_done for scan and recycle |
4294 | 21 | return scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key, |
4295 | 21 | std::move(handle_recycle_txn_kv), std::move(loop_done)); |
4296 | 21 | } |
4297 | | |
4298 | | struct CopyJobIdTuple { |
4299 | | std::string instance_id; |
4300 | | std::string stage_id; |
4301 | | long table_id; |
4302 | | std::string copy_id; |
4303 | | std::string stage_path; |
4304 | | }; |
4305 | | struct BatchObjStoreAccessor { |
4306 | | BatchObjStoreAccessor(std::shared_ptr<StorageVaultAccessor> accessor, uint64_t& batch_count, |
4307 | | TxnKv* txn_kv) |
4308 | 3 | : accessor_(std::move(accessor)), batch_count_(batch_count), txn_kv_(txn_kv) {}; |
4309 | 3 | ~BatchObjStoreAccessor() { |
4310 | 3 | if (!paths_.empty()) { |
4311 | 3 | consume(); |
4312 | 3 | } |
4313 | 3 | } |
4314 | | |
4315 | | /** |
4316 | | * To implicitely do batch work and submit the batch delete task to s3 |
4317 | | * The s3 delete opreations would be done in batches, and then delete CopyJobPB key one by one |
4318 | | * |
4319 | | * @param copy_job The protubuf struct consists of the copy job files. |
4320 | | * @param key The copy job's key on fdb, the key is originally occupied by fdb range iterator, to make sure |
4321 | | * it would last until we finish the delete task, here we need pass one string value |
4322 | | * @param cope_job_id_tuple One tuple {log_trace instance_id, stage_id, table_id, query_id, stage_path} to print log |
4323 | | */ |
4324 | 5 | void add(CopyJobPB copy_job, std::string key, const CopyJobIdTuple cope_job_id_tuple) { |
4325 | 5 | auto& [instance_id, stage_id, table_id, copy_id, path] = cope_job_id_tuple; |
4326 | 5 | auto& file_keys = copy_file_keys_[key]; |
4327 | 5 | file_keys.log_trace = |
4328 | 5 | fmt::format("instance_id={}, stage_id={}, table_id={}, query_id={}, path={}", |
4329 | 5 | instance_id, stage_id, table_id, copy_id, path); |
4330 | 5 | std::string_view log_trace = file_keys.log_trace; |
4331 | 2.03k | for (const auto& file : copy_job.object_files()) { |
4332 | 2.03k | auto relative_path = file.relative_path(); |
4333 | 2.03k | paths_.push_back(relative_path); |
4334 | 2.03k | file_keys.keys.push_back(copy_file_key( |
4335 | 2.03k | {instance_id, stage_id, table_id, file.relative_path(), file.etag()})); |
4336 | 2.03k | LOG_INFO(log_trace) |
4337 | 2.03k | .tag("relative_path", relative_path) |
4338 | 2.03k | .tag("batch_count", batch_count_); |
4339 | 2.03k | } |
4340 | 5 | LOG_INFO(log_trace) |
4341 | 5 | .tag("objects_num", copy_job.object_files().size()) |
4342 | 5 | .tag("batch_count", batch_count_); |
4343 | | // TODO(AlexYue): If the size is 1001, it would be one delete with 1000 objects and one delete request with only one object(**ATTN**: DOESN'T |
4344 | | // recommend using delete objects when objects num is less than 10) |
4345 | 5 | if (paths_.size() < 1000) { |
4346 | 3 | return; |
4347 | 3 | } |
4348 | 2 | consume(); |
4349 | 2 | } |
4350 | | |
4351 | | private: |
4352 | 5 | void consume() { |
4353 | 5 | DORIS_CLOUD_DEFER { |
4354 | 5 | paths_.clear(); |
4355 | 5 | copy_file_keys_.clear(); |
4356 | 5 | batch_count_++; |
4357 | | |
4358 | 5 | LOG_WARNING("begin to delete {} internal stage objects in batch {}", paths_.size(), |
4359 | 5 | batch_count_); |
4360 | 5 | }; |
4361 | | |
4362 | 5 | StopWatch sw; |
4363 | | // TODO(yuejing): 在accessor的delete_objets的实现里可以考虑如果_paths数量不超过10个的话,就直接发10个delete objection operation而不是发post |
4364 | 5 | if (0 != accessor_->delete_files(paths_)) { |
4365 | 2 | LOG_WARNING("failed to delete {} internal stage objects in batch {} and it takes {} us", |
4366 | 2 | paths_.size(), batch_count_, sw.elapsed_us()); |
4367 | 2 | return; |
4368 | 2 | } |
4369 | 3 | LOG_WARNING("succeed to delete {} internal stage objects in batch {} and it takes {} us", |
4370 | 3 | paths_.size(), batch_count_, sw.elapsed_us()); |
4371 | | // delete fdb's keys |
4372 | 3 | for (auto& file_keys : copy_file_keys_) { |
4373 | 3 | auto& [log_trace, keys] = file_keys.second; |
4374 | 3 | std::unique_ptr<Transaction> txn; |
4375 | 3 | if (txn_kv_->create_txn(&txn) != cloud::TxnErrorCode::TXN_OK) { |
4376 | 0 | LOG(WARNING) << "failed to create txn"; |
4377 | 0 | continue; |
4378 | 0 | } |
4379 | | // FIXME: We have already limited the file num and file meta size when selecting file in FE. |
4380 | | // And if too many copy files, begin_copy failed commit too. So here the copy file keys are |
4381 | | // limited, should not cause the txn commit failed. |
4382 | 1.02k | for (const auto& key : keys) { |
4383 | 1.02k | txn->remove(key); |
4384 | 1.02k | LOG_INFO("remove copy_file_key={}, {}", hex(key), log_trace); |
4385 | 1.02k | } |
4386 | 3 | txn->remove(file_keys.first); |
4387 | 3 | if (auto ret = txn->commit(); ret != cloud::TxnErrorCode::TXN_OK) { |
4388 | 0 | LOG(WARNING) << "failed to commit txn ret is " << ret; |
4389 | 0 | continue; |
4390 | 0 | } |
4391 | 3 | } |
4392 | 3 | } |
4393 | | std::shared_ptr<StorageVaultAccessor> accessor_; |
4394 | | // the path of the s3 files to be deleted |
4395 | | std::vector<std::string> paths_; |
4396 | | struct CopyFiles { |
4397 | | std::string log_trace; |
4398 | | std::vector<std::string> keys; |
4399 | | }; |
4400 | | // pair<std::string, std::vector<std::string>> |
4401 | | // first: instance_id_ stage_id table_id query_id |
4402 | | // second: keys to be deleted |
4403 | | // <fdb key, <{instance_id_ stage_id table_id query_id}, file keys to be deleted>> |
4404 | | std::unordered_map<std::string, CopyFiles> copy_file_keys_; |
4405 | | // used to distinguish different batch tasks, the task log consists of thread ID and batch number |
4406 | | // which can together uniquely identifies different tasks for tracing log |
4407 | | uint64_t& batch_count_; |
4408 | | TxnKv* txn_kv_; |
4409 | | }; |
4410 | | |
4411 | 13 | int InstanceRecycler::recycle_copy_jobs() { |
4412 | 13 | int64_t num_scanned = 0; |
4413 | 13 | int64_t num_finished = 0; |
4414 | 13 | int64_t num_expired = 0; |
4415 | 13 | int64_t num_recycled = 0; |
4416 | | // Used for INTERNAL stage's copy jobs to tag each batch for log trace |
4417 | 13 | uint64_t batch_count = 0; |
4418 | 13 | const std::string task_name = "recycle_copy_jobs"; |
4419 | 13 | RecyclerMetricsContext metrics_context(instance_id_, task_name); |
4420 | | |
4421 | 13 | LOG_WARNING("begin to recycle copy jobs").tag("instance_id", instance_id_); |
4422 | | |
4423 | 13 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
4424 | 13 | register_recycle_task(task_name, start_time); |
4425 | | |
4426 | 13 | DORIS_CLOUD_DEFER { |
4427 | 13 | unregister_recycle_task(task_name); |
4428 | 13 | int64_t cost = |
4429 | 13 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
4430 | 13 | metrics_context.finish_report(); |
4431 | 13 | LOG_WARNING("recycle copy jobs finished, cost={}s", cost) |
4432 | 13 | .tag("instance_id", instance_id_) |
4433 | 13 | .tag("num_scanned", num_scanned) |
4434 | 13 | .tag("num_finished", num_finished) |
4435 | 13 | .tag("num_expired", num_expired) |
4436 | 13 | .tag("num_recycled", num_recycled); |
4437 | 13 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv Line | Count | Source | 4426 | 13 | DORIS_CLOUD_DEFER { | 4427 | 13 | unregister_recycle_task(task_name); | 4428 | 13 | int64_t cost = | 4429 | 13 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 4430 | 13 | metrics_context.finish_report(); | 4431 | 13 | LOG_WARNING("recycle copy jobs finished, cost={}s", cost) | 4432 | 13 | .tag("instance_id", instance_id_) | 4433 | 13 | .tag("num_scanned", num_scanned) | 4434 | 13 | .tag("num_finished", num_finished) | 4435 | 13 | .tag("num_expired", num_expired) | 4436 | 13 | .tag("num_recycled", num_recycled); | 4437 | 13 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv |
4438 | | |
4439 | 13 | CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0}; |
4440 | 13 | CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0}; |
4441 | 13 | std::string key0; |
4442 | 13 | std::string key1; |
4443 | 13 | copy_job_key(key_info0, &key0); |
4444 | 13 | copy_job_key(key_info1, &key1); |
4445 | 13 | std::unordered_map<std::string, std::shared_ptr<BatchObjStoreAccessor>> stage_accessor_map; |
4446 | 13 | auto recycle_func = [&start_time, &num_scanned, &num_finished, &num_expired, &num_recycled, |
4447 | 13 | &batch_count, &stage_accessor_map, &task_name, &metrics_context, |
4448 | 16 | this](std::string_view k, std::string_view v) -> int { |
4449 | 16 | ++num_scanned; |
4450 | 16 | CopyJobPB copy_job; |
4451 | 16 | if (!copy_job.ParseFromArray(v.data(), v.size())) { |
4452 | 0 | LOG_WARNING("malformed copy job").tag("key", hex(k)); |
4453 | 0 | return -1; |
4454 | 0 | } |
4455 | | |
4456 | | // decode copy job key |
4457 | 16 | auto k1 = k; |
4458 | 16 | k1.remove_prefix(1); |
4459 | 16 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
4460 | 16 | decode_key(&k1, &out); |
4461 | | // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id} |
4462 | | // -> CopyJobPB |
4463 | 16 | const auto& stage_id = std::get<std::string>(std::get<0>(out[3])); |
4464 | 16 | const auto& table_id = std::get<int64_t>(std::get<0>(out[4])); |
4465 | 16 | const auto& copy_id = std::get<std::string>(std::get<0>(out[5])); |
4466 | | |
4467 | 16 | bool check_storage = true; |
4468 | 16 | if (copy_job.job_status() == CopyJobPB::FINISH) { |
4469 | 12 | ++num_finished; |
4470 | | |
4471 | 12 | if (copy_job.stage_type() == StagePB::INTERNAL) { |
4472 | 7 | auto it = stage_accessor_map.find(stage_id); |
4473 | 7 | std::shared_ptr<BatchObjStoreAccessor> accessor; |
4474 | 7 | std::string_view path; |
4475 | 7 | if (it != stage_accessor_map.end()) { |
4476 | 2 | accessor = it->second; |
4477 | 5 | } else { |
4478 | 5 | std::shared_ptr<StorageVaultAccessor> inner_accessor; |
4479 | 5 | auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(), |
4480 | 5 | &inner_accessor); |
4481 | 5 | if (ret < 0) { // error |
4482 | 0 | LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret); |
4483 | 0 | return -1; |
4484 | 5 | } else if (ret == 0) { |
4485 | 3 | path = inner_accessor->uri(); |
4486 | 3 | accessor = std::make_shared<BatchObjStoreAccessor>( |
4487 | 3 | inner_accessor, batch_count, txn_kv_.get()); |
4488 | 3 | stage_accessor_map.emplace(stage_id, accessor); |
4489 | 3 | } else { // stage not found, skip check storage |
4490 | 2 | check_storage = false; |
4491 | 2 | } |
4492 | 5 | } |
4493 | 7 | if (check_storage) { |
4494 | | // TODO delete objects with key and etag is not supported |
4495 | 5 | accessor->add(std::move(copy_job), std::string(k), |
4496 | 5 | {instance_id_, stage_id, table_id, copy_id, std::string(path)}); |
4497 | 5 | return 0; |
4498 | 5 | } |
4499 | 7 | } else if (copy_job.stage_type() == StagePB::EXTERNAL) { |
4500 | 5 | int64_t current_time = |
4501 | 5 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
4502 | 5 | if (copy_job.finish_time_ms() > 0) { |
4503 | 2 | if (!config::force_immediate_recycle && |
4504 | 2 | current_time < copy_job.finish_time_ms() + |
4505 | 2 | config::copy_job_max_retention_second * 1000) { |
4506 | 1 | return 0; |
4507 | 1 | } |
4508 | 3 | } else { |
4509 | | // For compatibility, copy job does not contain finish time before 2.2.2, use start time |
4510 | 3 | if (!config::force_immediate_recycle && |
4511 | 3 | current_time < copy_job.start_time_ms() + |
4512 | 3 | config::copy_job_max_retention_second * 1000) { |
4513 | 1 | return 0; |
4514 | 1 | } |
4515 | 3 | } |
4516 | 5 | } |
4517 | 12 | } else if (copy_job.job_status() == CopyJobPB::LOADING) { |
4518 | 4 | int64_t current_time = |
4519 | 4 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
4520 | | // if copy job is timeout: delete all copy file kvs and copy job kv |
4521 | 4 | if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) { |
4522 | 2 | return 0; |
4523 | 2 | } |
4524 | 2 | ++num_expired; |
4525 | 2 | } |
4526 | | |
4527 | | // delete all copy files |
4528 | 7 | std::vector<std::string> copy_file_keys; |
4529 | 70 | for (auto& file : copy_job.object_files()) { |
4530 | 70 | copy_file_keys.push_back(copy_file_key( |
4531 | 70 | {instance_id_, stage_id, table_id, file.relative_path(), file.etag()})); |
4532 | 70 | } |
4533 | 7 | std::unique_ptr<Transaction> txn; |
4534 | 7 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { |
4535 | 0 | LOG(WARNING) << "failed to create txn"; |
4536 | 0 | return -1; |
4537 | 0 | } |
4538 | | // FIXME: We have already limited the file num and file meta size when selecting file in FE. |
4539 | | // And if too many copy files, begin_copy failed commit too. So here the copy file keys are |
4540 | | // limited, should not cause the txn commit failed. |
4541 | 70 | for (const auto& key : copy_file_keys) { |
4542 | 70 | txn->remove(key); |
4543 | 70 | LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_ |
4544 | 70 | << ", stage_id=" << stage_id << ", table_id=" << table_id |
4545 | 70 | << ", query_id=" << copy_id; |
4546 | 70 | } |
4547 | 7 | txn->remove(k); |
4548 | 7 | TxnErrorCode err = txn->commit(); |
4549 | 7 | if (err != TxnErrorCode::TXN_OK) { |
4550 | 0 | LOG(WARNING) << "failed to commit txn, err=" << err; |
4551 | 0 | return -1; |
4552 | 0 | } |
4553 | | |
4554 | 7 | metrics_context.total_recycled_num = ++num_recycled; |
4555 | 7 | metrics_context.report(); |
4556 | 7 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); |
4557 | 7 | return 0; |
4558 | 7 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 4448 | 16 | this](std::string_view k, std::string_view v) -> int { | 4449 | 16 | ++num_scanned; | 4450 | 16 | CopyJobPB copy_job; | 4451 | 16 | if (!copy_job.ParseFromArray(v.data(), v.size())) { | 4452 | 0 | LOG_WARNING("malformed copy job").tag("key", hex(k)); | 4453 | 0 | return -1; | 4454 | 0 | } | 4455 | | | 4456 | | // decode copy job key | 4457 | 16 | auto k1 = k; | 4458 | 16 | k1.remove_prefix(1); | 4459 | 16 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 4460 | 16 | decode_key(&k1, &out); | 4461 | | // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id} | 4462 | | // -> CopyJobPB | 4463 | 16 | const auto& stage_id = std::get<std::string>(std::get<0>(out[3])); | 4464 | 16 | const auto& table_id = std::get<int64_t>(std::get<0>(out[4])); | 4465 | 16 | const auto& copy_id = std::get<std::string>(std::get<0>(out[5])); | 4466 | | | 4467 | 16 | bool check_storage = true; | 4468 | 16 | if (copy_job.job_status() == CopyJobPB::FINISH) { | 4469 | 12 | ++num_finished; | 4470 | | | 4471 | 12 | if (copy_job.stage_type() == StagePB::INTERNAL) { | 4472 | 7 | auto it = stage_accessor_map.find(stage_id); | 4473 | 7 | std::shared_ptr<BatchObjStoreAccessor> accessor; | 4474 | 7 | std::string_view path; | 4475 | 7 | if (it != stage_accessor_map.end()) { | 4476 | 2 | accessor = it->second; | 4477 | 5 | } else { | 4478 | 5 | std::shared_ptr<StorageVaultAccessor> inner_accessor; | 4479 | 5 | auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(), | 4480 | 5 | &inner_accessor); | 4481 | 5 | if (ret < 0) { // error | 4482 | 0 | LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret); | 4483 | 0 | return -1; | 4484 | 5 | } else if (ret == 0) { | 4485 | 3 | path = inner_accessor->uri(); | 4486 | 3 | accessor = std::make_shared<BatchObjStoreAccessor>( | 4487 | 3 | inner_accessor, batch_count, txn_kv_.get()); | 4488 | 3 | stage_accessor_map.emplace(stage_id, accessor); | 4489 | 3 | } else { // stage not found, skip check storage | 4490 | 2 | check_storage = false; | 4491 | 2 | } | 4492 | 5 | } | 4493 | 7 | if (check_storage) { | 4494 | | // TODO delete objects with key and etag is not supported | 4495 | 5 | accessor->add(std::move(copy_job), std::string(k), | 4496 | 5 | {instance_id_, stage_id, table_id, copy_id, std::string(path)}); | 4497 | 5 | return 0; | 4498 | 5 | } | 4499 | 7 | } else if (copy_job.stage_type() == StagePB::EXTERNAL) { | 4500 | 5 | int64_t current_time = | 4501 | 5 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); | 4502 | 5 | if (copy_job.finish_time_ms() > 0) { | 4503 | 2 | if (!config::force_immediate_recycle && | 4504 | 2 | current_time < copy_job.finish_time_ms() + | 4505 | 2 | config::copy_job_max_retention_second * 1000) { | 4506 | 1 | return 0; | 4507 | 1 | } | 4508 | 3 | } else { | 4509 | | // For compatibility, copy job does not contain finish time before 2.2.2, use start time | 4510 | 3 | if (!config::force_immediate_recycle && | 4511 | 3 | current_time < copy_job.start_time_ms() + | 4512 | 3 | config::copy_job_max_retention_second * 1000) { | 4513 | 1 | return 0; | 4514 | 1 | } | 4515 | 3 | } | 4516 | 5 | } | 4517 | 12 | } else if (copy_job.job_status() == CopyJobPB::LOADING) { | 4518 | 4 | int64_t current_time = | 4519 | 4 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); | 4520 | | // if copy job is timeout: delete all copy file kvs and copy job kv | 4521 | 4 | if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) { | 4522 | 2 | return 0; | 4523 | 2 | } | 4524 | 2 | ++num_expired; | 4525 | 2 | } | 4526 | | | 4527 | | // delete all copy files | 4528 | 7 | std::vector<std::string> copy_file_keys; | 4529 | 70 | for (auto& file : copy_job.object_files()) { | 4530 | 70 | copy_file_keys.push_back(copy_file_key( | 4531 | 70 | {instance_id_, stage_id, table_id, file.relative_path(), file.etag()})); | 4532 | 70 | } | 4533 | 7 | std::unique_ptr<Transaction> txn; | 4534 | 7 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { | 4535 | 0 | LOG(WARNING) << "failed to create txn"; | 4536 | 0 | return -1; | 4537 | 0 | } | 4538 | | // FIXME: We have already limited the file num and file meta size when selecting file in FE. | 4539 | | // And if too many copy files, begin_copy failed commit too. So here the copy file keys are | 4540 | | // limited, should not cause the txn commit failed. | 4541 | 70 | for (const auto& key : copy_file_keys) { | 4542 | 70 | txn->remove(key); | 4543 | 70 | LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_ | 4544 | 70 | << ", stage_id=" << stage_id << ", table_id=" << table_id | 4545 | 70 | << ", query_id=" << copy_id; | 4546 | 70 | } | 4547 | 7 | txn->remove(k); | 4548 | 7 | TxnErrorCode err = txn->commit(); | 4549 | 7 | if (err != TxnErrorCode::TXN_OK) { | 4550 | 0 | LOG(WARNING) << "failed to commit txn, err=" << err; | 4551 | 0 | return -1; | 4552 | 0 | } | 4553 | | | 4554 | 7 | metrics_context.total_recycled_num = ++num_recycled; | 4555 | 7 | metrics_context.report(); | 4556 | 7 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); | 4557 | 7 | return 0; | 4558 | 7 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
4559 | | |
4560 | 13 | if (config::enable_recycler_stats_metrics) { |
4561 | 0 | scan_and_statistics_copy_jobs(); |
4562 | 0 | } |
4563 | | // recycle_func and loop_done for scan and recycle |
4564 | 13 | return scan_and_recycle(key0, key1, std::move(recycle_func)); |
4565 | 13 | } |
4566 | | |
4567 | | int InstanceRecycler::init_copy_job_accessor(const std::string& stage_id, |
4568 | | const StagePB::StageType& stage_type, |
4569 | 5 | std::shared_ptr<StorageVaultAccessor>* accessor) { |
4570 | 5 | #ifdef UNIT_TEST |
4571 | | // In unit test, external use the same accessor as the internal stage |
4572 | 5 | auto it = accessor_map_.find(stage_id); |
4573 | 5 | if (it != accessor_map_.end()) { |
4574 | 3 | *accessor = it->second; |
4575 | 3 | } else { |
4576 | 2 | std::cout << "UT can not find accessor with stage_id: " << stage_id << std::endl; |
4577 | 2 | return 1; |
4578 | 2 | } |
4579 | | #else |
4580 | | // init s3 accessor and add to accessor map |
4581 | | auto stage_it = |
4582 | | std::find_if(instance_info_.stages().begin(), instance_info_.stages().end(), |
4583 | | [&stage_id](auto&& stage) { return stage.stage_id() == stage_id; }); |
4584 | | |
4585 | | if (stage_it == instance_info_.stages().end()) { |
4586 | | LOG(INFO) << "Recycle nonexisted stage copy jobs. instance_id=" << instance_id_ |
4587 | | << ", stage_id=" << stage_id << ", stage_type=" << stage_type; |
4588 | | return 1; |
4589 | | } |
4590 | | |
4591 | | const auto& object_store_info = stage_it->obj_info(); |
4592 | | auto stage_access_type = stage_it->has_access_type() ? stage_it->access_type() : StagePB::AKSK; |
4593 | | |
4594 | | S3Conf s3_conf; |
4595 | | if (stage_type == StagePB::EXTERNAL) { |
4596 | | if (stage_access_type == StagePB::AKSK) { |
4597 | | auto conf = S3Conf::from_obj_store_info(object_store_info); |
4598 | | if (!conf) { |
4599 | | return -1; |
4600 | | } |
4601 | | |
4602 | | s3_conf = std::move(*conf); |
4603 | | } else if (stage_access_type == StagePB::BUCKET_ACL) { |
4604 | | auto conf = S3Conf::from_obj_store_info(object_store_info, true /* skip_aksk */); |
4605 | | if (!conf) { |
4606 | | return -1; |
4607 | | } |
4608 | | |
4609 | | s3_conf = std::move(*conf); |
4610 | | if (instance_info_.ram_user().has_encryption_info()) { |
4611 | | AkSkPair plain_ak_sk_pair; |
4612 | | int ret = decrypt_ak_sk_helper( |
4613 | | instance_info_.ram_user().ak(), instance_info_.ram_user().sk(), |
4614 | | instance_info_.ram_user().encryption_info(), &plain_ak_sk_pair); |
4615 | | if (ret != 0) { |
4616 | | LOG(WARNING) << "fail to decrypt ak sk. instance_id: " << instance_id_ |
4617 | | << " ram_user: " << proto_to_json(instance_info_.ram_user()); |
4618 | | return -1; |
4619 | | } |
4620 | | s3_conf.ak = std::move(plain_ak_sk_pair.first); |
4621 | | s3_conf.sk = std::move(plain_ak_sk_pair.second); |
4622 | | } else { |
4623 | | s3_conf.ak = instance_info_.ram_user().ak(); |
4624 | | s3_conf.sk = instance_info_.ram_user().sk(); |
4625 | | } |
4626 | | } else { |
4627 | | LOG(INFO) << "Unsupported stage access type=" << stage_access_type |
4628 | | << ", instance_id=" << instance_id_ << ", stage_id=" << stage_id; |
4629 | | return -1; |
4630 | | } |
4631 | | } else if (stage_type == StagePB::INTERNAL) { |
4632 | | int idx = stoi(object_store_info.id()); |
4633 | | if (idx > instance_info_.obj_info().size() || idx < 1) { |
4634 | | LOG(WARNING) << "invalid idx: " << idx; |
4635 | | return -1; |
4636 | | } |
4637 | | |
4638 | | const auto& old_obj = instance_info_.obj_info()[idx - 1]; |
4639 | | auto conf = S3Conf::from_obj_store_info(old_obj); |
4640 | | if (!conf) { |
4641 | | return -1; |
4642 | | } |
4643 | | |
4644 | | s3_conf = std::move(*conf); |
4645 | | s3_conf.prefix = object_store_info.prefix(); |
4646 | | } else { |
4647 | | LOG(WARNING) << "unknown stage type " << stage_type; |
4648 | | return -1; |
4649 | | } |
4650 | | |
4651 | | std::shared_ptr<S3Accessor> s3_accessor; |
4652 | | int ret = S3Accessor::create(std::move(s3_conf), &s3_accessor); |
4653 | | if (ret != 0) { |
4654 | | LOG(WARNING) << "failed to init s3 accessor ret=" << ret; |
4655 | | return -1; |
4656 | | } |
4657 | | |
4658 | | *accessor = std::move(s3_accessor); |
4659 | | #endif |
4660 | 3 | return 0; |
4661 | 5 | } |
4662 | | |
4663 | 11 | int InstanceRecycler::recycle_stage() { |
4664 | 11 | int64_t num_scanned = 0; |
4665 | 11 | int64_t num_recycled = 0; |
4666 | 11 | const std::string task_name = "recycle_stage"; |
4667 | 11 | RecyclerMetricsContext metrics_context(instance_id_, task_name); |
4668 | | |
4669 | 11 | LOG_WARNING("begin to recycle stage").tag("instance_id", instance_id_); |
4670 | | |
4671 | 11 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
4672 | 11 | register_recycle_task(task_name, start_time); |
4673 | | |
4674 | 11 | DORIS_CLOUD_DEFER { |
4675 | 11 | unregister_recycle_task(task_name); |
4676 | 11 | int64_t cost = |
4677 | 11 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
4678 | 11 | metrics_context.finish_report(); |
4679 | 11 | LOG_WARNING("recycle stage, cost={}s", cost) |
4680 | 11 | .tag("instance_id", instance_id_) |
4681 | 11 | .tag("num_scanned", num_scanned) |
4682 | 11 | .tag("num_recycled", num_recycled); |
4683 | 11 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv Line | Count | Source | 4674 | 11 | DORIS_CLOUD_DEFER { | 4675 | 11 | unregister_recycle_task(task_name); | 4676 | 11 | int64_t cost = | 4677 | 11 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 4678 | 11 | metrics_context.finish_report(); | 4679 | 11 | LOG_WARNING("recycle stage, cost={}s", cost) | 4680 | 11 | .tag("instance_id", instance_id_) | 4681 | 11 | .tag("num_scanned", num_scanned) | 4682 | 11 | .tag("num_recycled", num_recycled); | 4683 | 11 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv |
4684 | | |
4685 | 11 | RecycleStageKeyInfo key_info0 {instance_id_, ""}; |
4686 | 11 | RecycleStageKeyInfo key_info1 {instance_id_, "\xff"}; |
4687 | 11 | std::string key0 = recycle_stage_key(key_info0); |
4688 | 11 | std::string key1 = recycle_stage_key(key_info1); |
4689 | | |
4690 | 11 | std::vector<std::string_view> stage_keys; |
4691 | 11 | auto recycle_func = [&start_time, &num_scanned, &num_recycled, &stage_keys, &metrics_context, |
4692 | 11 | this](std::string_view k, std::string_view v) -> int { |
4693 | 1 | ++num_scanned; |
4694 | 1 | RecycleStagePB recycle_stage; |
4695 | 1 | if (!recycle_stage.ParseFromArray(v.data(), v.size())) { |
4696 | 0 | LOG_WARNING("malformed recycle stage").tag("key", hex(k)); |
4697 | 0 | return -1; |
4698 | 0 | } |
4699 | | |
4700 | 1 | int idx = stoi(recycle_stage.stage().obj_info().id()); |
4701 | 1 | if (idx > instance_info_.obj_info().size() || idx < 1) { |
4702 | 0 | LOG(WARNING) << "invalid idx: " << idx; |
4703 | 0 | return -1; |
4704 | 0 | } |
4705 | | |
4706 | 1 | std::shared_ptr<StorageVaultAccessor> accessor; |
4707 | 1 | int ret = SYNC_POINT_HOOK_RETURN_VALUE( |
4708 | 1 | [&] { |
4709 | 1 | auto& old_obj = instance_info_.obj_info()[idx - 1]; |
4710 | 1 | auto s3_conf = S3Conf::from_obj_store_info(old_obj); |
4711 | 1 | if (!s3_conf) { |
4712 | 1 | return -1; |
4713 | 1 | } |
4714 | | |
4715 | 1 | s3_conf->prefix = recycle_stage.stage().obj_info().prefix(); |
4716 | 1 | std::shared_ptr<S3Accessor> s3_accessor; |
4717 | 1 | int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor); |
4718 | 1 | if (ret != 0) { |
4719 | 1 | return -1; |
4720 | 1 | } |
4721 | | |
4722 | 1 | accessor = std::move(s3_accessor); |
4723 | 1 | return 0; |
4724 | 1 | }(), |
4725 | 1 | "recycle_stage:get_accessor", &accessor); |
4726 | | |
4727 | 1 | if (ret != 0) { |
4728 | 0 | LOG(WARNING) << "failed to init accessor ret=" << ret; |
4729 | 0 | return ret; |
4730 | 0 | } |
4731 | | |
4732 | 1 | LOG_WARNING("begin to delete objects of dropped internal stage") |
4733 | 1 | .tag("instance_id", instance_id_) |
4734 | 1 | .tag("stage_id", recycle_stage.stage().stage_id()) |
4735 | 1 | .tag("user_name", recycle_stage.stage().mysql_user_name()[0]) |
4736 | 1 | .tag("user_id", recycle_stage.stage().mysql_user_id()[0]) |
4737 | 1 | .tag("obj_info_id", idx) |
4738 | 1 | .tag("prefix", recycle_stage.stage().obj_info().prefix()); |
4739 | 1 | ret = accessor->delete_all(); |
4740 | 1 | if (ret != 0) { |
4741 | 0 | LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id=" |
4742 | 0 | << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id() |
4743 | 0 | << ", prefix=" << recycle_stage.stage().obj_info().prefix() |
4744 | 0 | << ", ret=" << ret; |
4745 | 0 | return -1; |
4746 | 0 | } |
4747 | 1 | metrics_context.total_recycled_num = ++num_recycled; |
4748 | 1 | metrics_context.report(); |
4749 | 1 | check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time); |
4750 | 1 | stage_keys.push_back(k); |
4751 | 1 | return 0; |
4752 | 1 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 4692 | 1 | this](std::string_view k, std::string_view v) -> int { | 4693 | 1 | ++num_scanned; | 4694 | 1 | RecycleStagePB recycle_stage; | 4695 | 1 | if (!recycle_stage.ParseFromArray(v.data(), v.size())) { | 4696 | 0 | LOG_WARNING("malformed recycle stage").tag("key", hex(k)); | 4697 | 0 | return -1; | 4698 | 0 | } | 4699 | | | 4700 | 1 | int idx = stoi(recycle_stage.stage().obj_info().id()); | 4701 | 1 | if (idx > instance_info_.obj_info().size() || idx < 1) { | 4702 | 0 | LOG(WARNING) << "invalid idx: " << idx; | 4703 | 0 | return -1; | 4704 | 0 | } | 4705 | | | 4706 | 1 | std::shared_ptr<StorageVaultAccessor> accessor; | 4707 | 1 | int ret = SYNC_POINT_HOOK_RETURN_VALUE( | 4708 | 1 | [&] { | 4709 | 1 | auto& old_obj = instance_info_.obj_info()[idx - 1]; | 4710 | 1 | auto s3_conf = S3Conf::from_obj_store_info(old_obj); | 4711 | 1 | if (!s3_conf) { | 4712 | 1 | return -1; | 4713 | 1 | } | 4714 | | | 4715 | 1 | s3_conf->prefix = recycle_stage.stage().obj_info().prefix(); | 4716 | 1 | std::shared_ptr<S3Accessor> s3_accessor; | 4717 | 1 | int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor); | 4718 | 1 | if (ret != 0) { | 4719 | 1 | return -1; | 4720 | 1 | } | 4721 | | | 4722 | 1 | accessor = std::move(s3_accessor); | 4723 | 1 | return 0; | 4724 | 1 | }(), | 4725 | 1 | "recycle_stage:get_accessor", &accessor); | 4726 | | | 4727 | 1 | if (ret != 0) { | 4728 | 0 | LOG(WARNING) << "failed to init accessor ret=" << ret; | 4729 | 0 | return ret; | 4730 | 0 | } | 4731 | | | 4732 | 1 | LOG_WARNING("begin to delete objects of dropped internal stage") | 4733 | 1 | .tag("instance_id", instance_id_) | 4734 | 1 | .tag("stage_id", recycle_stage.stage().stage_id()) | 4735 | 1 | .tag("user_name", recycle_stage.stage().mysql_user_name()[0]) | 4736 | 1 | .tag("user_id", recycle_stage.stage().mysql_user_id()[0]) | 4737 | 1 | .tag("obj_info_id", idx) | 4738 | 1 | .tag("prefix", recycle_stage.stage().obj_info().prefix()); | 4739 | 1 | ret = accessor->delete_all(); | 4740 | 1 | if (ret != 0) { | 4741 | 0 | LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id=" | 4742 | 0 | << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id() | 4743 | 0 | << ", prefix=" << recycle_stage.stage().obj_info().prefix() | 4744 | 0 | << ", ret=" << ret; | 4745 | 0 | return -1; | 4746 | 0 | } | 4747 | 1 | metrics_context.total_recycled_num = ++num_recycled; | 4748 | 1 | metrics_context.report(); | 4749 | 1 | check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time); | 4750 | 1 | stage_keys.push_back(k); | 4751 | 1 | return 0; | 4752 | 1 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
4753 | | |
4754 | 11 | auto loop_done = [&stage_keys, this]() -> int { |
4755 | 1 | if (stage_keys.empty()) return 0; |
4756 | 1 | DORIS_CLOUD_DEFER { |
4757 | 1 | stage_keys.clear(); |
4758 | 1 | }; recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv Line | Count | Source | 4756 | 1 | DORIS_CLOUD_DEFER { | 4757 | 1 | stage_keys.clear(); | 4758 | 1 | }; |
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv |
4759 | 1 | if (0 != txn_remove(txn_kv_.get(), stage_keys)) { |
4760 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_; |
4761 | 0 | return -1; |
4762 | 0 | } |
4763 | 1 | return 0; |
4764 | 1 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv Line | Count | Source | 4754 | 1 | auto loop_done = [&stage_keys, this]() -> int { | 4755 | 1 | if (stage_keys.empty()) return 0; | 4756 | 1 | DORIS_CLOUD_DEFER { | 4757 | 1 | stage_keys.clear(); | 4758 | 1 | }; | 4759 | 1 | if (0 != txn_remove(txn_kv_.get(), stage_keys)) { | 4760 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_; | 4761 | 0 | return -1; | 4762 | 0 | } | 4763 | 1 | return 0; | 4764 | 1 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv |
4765 | 11 | if (config::enable_recycler_stats_metrics) { |
4766 | 0 | scan_and_statistics_stage(); |
4767 | 0 | } |
4768 | | // recycle_func and loop_done for scan and recycle |
4769 | 11 | return scan_and_recycle(key0, key1, std::move(recycle_func), std::move(loop_done)); |
4770 | 11 | } |
4771 | | |
4772 | 10 | int InstanceRecycler::recycle_expired_stage_objects() { |
4773 | 10 | LOG_WARNING("begin to recycle expired stage objects").tag("instance_id", instance_id_); |
4774 | | |
4775 | 10 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
4776 | 10 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects"); |
4777 | | |
4778 | 10 | DORIS_CLOUD_DEFER { |
4779 | 10 | int64_t cost = |
4780 | 10 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
4781 | 10 | metrics_context.finish_report(); |
4782 | 10 | LOG_WARNING("recycle expired stage objects, cost={}s", cost) |
4783 | 10 | .tag("instance_id", instance_id_); |
4784 | 10 | }; recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv Line | Count | Source | 4778 | 10 | DORIS_CLOUD_DEFER { | 4779 | 10 | int64_t cost = | 4780 | 10 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 4781 | 10 | metrics_context.finish_report(); | 4782 | 10 | LOG_WARNING("recycle expired stage objects, cost={}s", cost) | 4783 | 10 | .tag("instance_id", instance_id_); | 4784 | 10 | }; |
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv |
4785 | | |
4786 | 10 | int ret = 0; |
4787 | | |
4788 | 10 | if (config::enable_recycler_stats_metrics) { |
4789 | 0 | scan_and_statistics_expired_stage_objects(); |
4790 | 0 | } |
4791 | | |
4792 | 10 | for (const auto& stage : instance_info_.stages()) { |
4793 | 0 | std::stringstream ss; |
4794 | 0 | ss << "instance_id=" << instance_id_ << ", stage_id=" << stage.stage_id() << ", user_name=" |
4795 | 0 | << (stage.mysql_user_name().empty() ? "null" : stage.mysql_user_name().at(0)) |
4796 | 0 | << ", user_id=" << (stage.mysql_user_id().empty() ? "null" : stage.mysql_user_id().at(0)) |
4797 | 0 | << ", prefix=" << stage.obj_info().prefix(); |
4798 | |
|
4799 | 0 | if (stopped()) { |
4800 | 0 | break; |
4801 | 0 | } |
4802 | 0 | if (stage.type() == StagePB::EXTERNAL) { |
4803 | 0 | continue; |
4804 | 0 | } |
4805 | 0 | int idx = stoi(stage.obj_info().id()); |
4806 | 0 | if (idx > instance_info_.obj_info().size() || idx < 1) { |
4807 | 0 | LOG(WARNING) << "invalid idx: " << idx << ", id: " << stage.obj_info().id(); |
4808 | 0 | continue; |
4809 | 0 | } |
4810 | | |
4811 | 0 | const auto& old_obj = instance_info_.obj_info()[idx - 1]; |
4812 | 0 | auto s3_conf = S3Conf::from_obj_store_info(old_obj); |
4813 | 0 | if (!s3_conf) { |
4814 | 0 | LOG(WARNING) << "failed to init s3_conf with obj_info=" << old_obj.ShortDebugString(); |
4815 | 0 | continue; |
4816 | 0 | } |
4817 | | |
4818 | 0 | s3_conf->prefix = stage.obj_info().prefix(); |
4819 | 0 | std::shared_ptr<S3Accessor> accessor; |
4820 | 0 | int ret1 = S3Accessor::create(*s3_conf, &accessor); |
4821 | 0 | if (ret1 != 0) { |
4822 | 0 | LOG(WARNING) << "failed to init s3 accessor ret=" << ret1 << " " << ss.str(); |
4823 | 0 | ret = -1; |
4824 | 0 | continue; |
4825 | 0 | } |
4826 | | |
4827 | 0 | if (s3_conf->prefix.find("/stage/") == std::string::npos) { |
4828 | 0 | LOG(WARNING) << "try to delete illegal prefix, which is catastrophic, " << ss.str(); |
4829 | 0 | ret = -1; |
4830 | 0 | continue; |
4831 | 0 | } |
4832 | | |
4833 | 0 | LOG(INFO) << "recycle expired stage objects, " << ss.str(); |
4834 | 0 | int64_t expiration_time = |
4835 | 0 | duration_cast<seconds>(system_clock::now().time_since_epoch()).count() - |
4836 | 0 | config::internal_stage_objects_expire_time_second; |
4837 | 0 | if (config::force_immediate_recycle) { |
4838 | 0 | expiration_time = INT64_MAX; |
4839 | 0 | } |
4840 | 0 | ret1 = accessor->delete_all(expiration_time); |
4841 | 0 | if (ret1 != 0) { |
4842 | 0 | LOG(WARNING) << "failed to recycle expired stage objects, ret=" << ret1 << " " |
4843 | 0 | << ss.str(); |
4844 | 0 | ret = -1; |
4845 | 0 | continue; |
4846 | 0 | } |
4847 | 0 | metrics_context.total_recycled_num++; |
4848 | 0 | metrics_context.report(); |
4849 | 0 | } |
4850 | 10 | return ret; |
4851 | 10 | } |
4852 | | |
4853 | 146 | void InstanceRecycler::register_recycle_task(const std::string& task_name, int64_t start_time) { |
4854 | 146 | std::lock_guard lock(recycle_tasks_mutex); |
4855 | 146 | running_recycle_tasks[task_name] = start_time; |
4856 | 146 | } |
4857 | | |
4858 | 146 | void InstanceRecycler::unregister_recycle_task(const std::string& task_name) { |
4859 | 146 | std::lock_guard lock(recycle_tasks_mutex); |
4860 | 146 | DCHECK(running_recycle_tasks[task_name] > 0); |
4861 | 146 | running_recycle_tasks.erase(task_name); |
4862 | 146 | } |
4863 | | |
4864 | 21 | bool InstanceRecycler::check_recycle_tasks() { |
4865 | 21 | std::map<std::string, int64_t> tmp_running_recycle_tasks; |
4866 | 21 | { |
4867 | 21 | std::lock_guard lock(recycle_tasks_mutex); |
4868 | 21 | tmp_running_recycle_tasks = running_recycle_tasks; |
4869 | 21 | } |
4870 | | |
4871 | 21 | bool found = false; |
4872 | 21 | int64_t now = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
4873 | 21 | for (auto& [task_name, start_time] : tmp_running_recycle_tasks) { |
4874 | 20 | int64_t cost = now - start_time; |
4875 | 20 | if (cost > config::recycle_task_threshold_seconds) [[unlikely]] { |
4876 | 20 | LOG_INFO("recycle task cost too much time cost={}s", cost) |
4877 | 20 | .tag("instance_id", instance_id_) |
4878 | 20 | .tag("task", task_name); |
4879 | 20 | found = true; |
4880 | 20 | } |
4881 | 20 | } |
4882 | | |
4883 | 21 | return found; |
4884 | 21 | } |
4885 | | |
4886 | | // Scan and statistics indexes that need to be recycled |
4887 | 0 | int InstanceRecycler::scan_and_statistics_indexes() { |
4888 | 0 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_indexes"); |
4889 | |
|
4890 | 0 | RecycleIndexKeyInfo index_key_info0 {instance_id_, 0}; |
4891 | 0 | RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX}; |
4892 | 0 | std::string index_key0; |
4893 | 0 | std::string index_key1; |
4894 | 0 | recycle_index_key(index_key_info0, &index_key0); |
4895 | 0 | recycle_index_key(index_key_info1, &index_key1); |
4896 | 0 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
4897 | |
|
4898 | 0 | auto handle_index_kv = [&, this](std::string_view k, std::string_view v) -> int { |
4899 | 0 | RecycleIndexPB index_pb; |
4900 | 0 | if (!index_pb.ParseFromArray(v.data(), v.size())) { |
4901 | 0 | return 0; |
4902 | 0 | } |
4903 | 0 | int64_t current_time = ::time(nullptr); |
4904 | 0 | if (current_time < |
4905 | 0 | calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired |
4906 | 0 | return 0; |
4907 | 0 | } |
4908 | | // decode index_id |
4909 | 0 | auto k1 = k; |
4910 | 0 | k1.remove_prefix(1); |
4911 | 0 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
4912 | 0 | decode_key(&k1, &out); |
4913 | | // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB |
4914 | 0 | auto index_id = std::get<int64_t>(std::get<0>(out[3])); |
4915 | 0 | std::unique_ptr<Transaction> txn; |
4916 | 0 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
4917 | 0 | if (err != TxnErrorCode::TXN_OK) { |
4918 | 0 | return 0; |
4919 | 0 | } |
4920 | 0 | std::string val; |
4921 | 0 | err = txn->get(k, &val); |
4922 | 0 | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { |
4923 | 0 | return 0; |
4924 | 0 | } |
4925 | 0 | if (err != TxnErrorCode::TXN_OK) { |
4926 | 0 | return 0; |
4927 | 0 | } |
4928 | 0 | index_pb.Clear(); |
4929 | 0 | if (!index_pb.ParseFromString(val)) { |
4930 | 0 | return 0; |
4931 | 0 | } |
4932 | 0 | if (scan_tablets_and_statistics(index_pb.table_id(), index_id, metrics_context) != 0) { |
4933 | 0 | return 0; |
4934 | 0 | } |
4935 | 0 | metrics_context.total_need_recycle_num++; |
4936 | 0 | return 0; |
4937 | 0 | }; Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
4938 | |
|
4939 | 0 | int ret = scan_and_recycle(index_key0, index_key1, std::move(handle_index_kv)); |
4940 | 0 | metrics_context.report(true); |
4941 | 0 | segment_metrics_context_.report(true); |
4942 | 0 | tablet_metrics_context_.report(true); |
4943 | 0 | return ret; |
4944 | 0 | } |
4945 | | |
4946 | | // Scan and statistics partitions that need to be recycled |
4947 | 0 | int InstanceRecycler::scan_and_statistics_partitions() { |
4948 | 0 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_partitions"); |
4949 | |
|
4950 | 0 | RecyclePartKeyInfo part_key_info0 {instance_id_, 0}; |
4951 | 0 | RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX}; |
4952 | 0 | std::string part_key0; |
4953 | 0 | std::string part_key1; |
4954 | 0 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
4955 | |
|
4956 | 0 | recycle_partition_key(part_key_info0, &part_key0); |
4957 | 0 | recycle_partition_key(part_key_info1, &part_key1); |
4958 | 0 | auto handle_partition_kv = [&, this](std::string_view k, std::string_view v) -> int { |
4959 | 0 | RecyclePartitionPB part_pb; |
4960 | 0 | if (!part_pb.ParseFromArray(v.data(), v.size())) { |
4961 | 0 | return 0; |
4962 | 0 | } |
4963 | 0 | int64_t current_time = ::time(nullptr); |
4964 | 0 | if (current_time < |
4965 | 0 | calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired |
4966 | 0 | return 0; |
4967 | 0 | } |
4968 | | // decode partition_id |
4969 | 0 | auto k1 = k; |
4970 | 0 | k1.remove_prefix(1); |
4971 | 0 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
4972 | 0 | decode_key(&k1, &out); |
4973 | | // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB |
4974 | 0 | auto partition_id = std::get<int64_t>(std::get<0>(out[3])); |
4975 | | // Change state to RECYCLING |
4976 | 0 | std::unique_ptr<Transaction> txn; |
4977 | 0 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
4978 | 0 | if (err != TxnErrorCode::TXN_OK) { |
4979 | 0 | return 0; |
4980 | 0 | } |
4981 | 0 | std::string val; |
4982 | 0 | err = txn->get(k, &val); |
4983 | 0 | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { |
4984 | 0 | return 0; |
4985 | 0 | } |
4986 | 0 | if (err != TxnErrorCode::TXN_OK) { |
4987 | 0 | return 0; |
4988 | 0 | } |
4989 | 0 | part_pb.Clear(); |
4990 | 0 | if (!part_pb.ParseFromString(val)) { |
4991 | 0 | return 0; |
4992 | 0 | } |
4993 | | // Partitions with PREPARED state MUST have no data |
4994 | 0 | bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED; |
4995 | 0 | int ret = 0; |
4996 | 0 | for (int64_t index_id : part_pb.index_id()) { |
4997 | 0 | if (scan_tablets_and_statistics(part_pb.table_id(), index_id, metrics_context, |
4998 | 0 | partition_id, is_empty_tablet) != 0) { |
4999 | 0 | ret = 0; |
5000 | 0 | } |
5001 | 0 | } |
5002 | 0 | metrics_context.total_need_recycle_num++; |
5003 | 0 | return ret; |
5004 | 0 | }; Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
5005 | |
|
5006 | 0 | int ret = scan_and_recycle(part_key0, part_key1, std::move(handle_partition_kv)); |
5007 | 0 | metrics_context.report(true); |
5008 | 0 | segment_metrics_context_.report(true); |
5009 | 0 | tablet_metrics_context_.report(true); |
5010 | 0 | return ret; |
5011 | 0 | } |
5012 | | |
5013 | | // Scan and statistics rowsets that need to be recycled |
5014 | 0 | int InstanceRecycler::scan_and_statistics_rowsets() { |
5015 | 0 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_rowsets"); |
5016 | 0 | RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""}; |
5017 | 0 | RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""}; |
5018 | 0 | std::string recyc_rs_key0; |
5019 | 0 | std::string recyc_rs_key1; |
5020 | 0 | recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0); |
5021 | 0 | recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1); |
5022 | 0 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
5023 | |
|
5024 | 0 | auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int { |
5025 | 0 | RecycleRowsetPB rowset; |
5026 | 0 | if (!rowset.ParseFromArray(v.data(), v.size())) { |
5027 | 0 | return 0; |
5028 | 0 | } |
5029 | 0 | int64_t current_time = ::time(nullptr); |
5030 | 0 | if (current_time < |
5031 | 0 | calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts)) { // not expired |
5032 | 0 | return 0; |
5033 | 0 | } |
5034 | 0 | if (!rowset.has_type()) { |
5035 | 0 | if (!rowset.has_resource_id()) [[unlikely]] { |
5036 | 0 | return 0; |
5037 | 0 | } |
5038 | 0 | if (rowset.resource_id().empty()) [[unlikely]] { |
5039 | 0 | return 0; |
5040 | 0 | } |
5041 | 0 | metrics_context.total_need_recycle_num++; |
5042 | 0 | metrics_context.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size(); |
5043 | 0 | segment_metrics_context_.total_need_recycle_num += rowset.rowset_meta().num_segments(); |
5044 | 0 | segment_metrics_context_.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size(); |
5045 | 0 | return 0; |
5046 | 0 | } |
5047 | 0 | auto* rowset_meta = rowset.mutable_rowset_meta(); |
5048 | 0 | if (!rowset_meta->has_resource_id()) [[unlikely]] { |
5049 | 0 | if (rowset.type() == RecycleRowsetPB::PREPARE || rowset_meta->num_segments() != 0) { |
5050 | 0 | return 0; |
5051 | 0 | } |
5052 | 0 | } |
5053 | 0 | metrics_context.total_need_recycle_num++; |
5054 | 0 | metrics_context.total_need_recycle_data_size += rowset_meta->total_disk_size(); |
5055 | 0 | segment_metrics_context_.total_need_recycle_num += rowset_meta->num_segments(); |
5056 | 0 | segment_metrics_context_.total_need_recycle_data_size += rowset_meta->total_disk_size(); |
5057 | 0 | return 0; |
5058 | 0 | }; Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
5059 | 0 | int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv)); |
5060 | 0 | metrics_context.report(true); |
5061 | 0 | segment_metrics_context_.report(true); |
5062 | 0 | return ret; |
5063 | 0 | } |
5064 | | |
5065 | | // Scan and statistics tmp_rowsets that need to be recycled |
5066 | 0 | int InstanceRecycler::scan_and_statistics_tmp_rowsets() { |
5067 | 0 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_tmp_rowsets"); |
5068 | 0 | MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0}; |
5069 | 0 | MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0}; |
5070 | 0 | std::string tmp_rs_key0; |
5071 | 0 | std::string tmp_rs_key1; |
5072 | 0 | meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0); |
5073 | 0 | meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1); |
5074 | |
|
5075 | 0 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
5076 | |
|
5077 | 0 | auto handle_tmp_rowsets_kv = [&, this](std::string_view k, std::string_view v) -> int { |
5078 | 0 | doris::RowsetMetaCloudPB rowset; |
5079 | 0 | if (!rowset.ParseFromArray(v.data(), v.size())) { |
5080 | 0 | return 0; |
5081 | 0 | } |
5082 | 0 | int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts); |
5083 | 0 | int64_t current_time = ::time(nullptr); |
5084 | 0 | if (current_time < expiration) { |
5085 | 0 | return 0; |
5086 | 0 | } |
5087 | | |
5088 | 0 | DCHECK_GT(rowset.txn_id(), 0) |
5089 | 0 | << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString(); |
5090 | 0 | if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) { |
5091 | 0 | return 0; |
5092 | 0 | } |
5093 | | |
5094 | 0 | if (!rowset.has_resource_id()) { |
5095 | 0 | if (rowset.num_segments() > 0) [[unlikely]] { // impossible |
5096 | 0 | return 0; |
5097 | 0 | } |
5098 | 0 | return 0; |
5099 | 0 | } |
5100 | | |
5101 | 0 | metrics_context.total_need_recycle_num++; |
5102 | 0 | metrics_context.total_need_recycle_data_size += rowset.total_disk_size(); |
5103 | 0 | segment_metrics_context_.total_need_recycle_data_size += rowset.total_disk_size(); |
5104 | 0 | segment_metrics_context_.total_need_recycle_num += rowset.num_segments(); |
5105 | 0 | return 0; |
5106 | 0 | }; Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
5107 | 0 | int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_tmp_rowsets_kv)); |
5108 | 0 | metrics_context.report(true); |
5109 | 0 | segment_metrics_context_.report(true); |
5110 | 0 | return ret; |
5111 | 0 | } |
5112 | | |
5113 | | // Scan and statistics abort_timeout_txn that need to be recycled |
5114 | 0 | int InstanceRecycler::scan_and_statistics_abort_timeout_txn() { |
5115 | 0 | RecyclerMetricsContext metrics_context(instance_id_, "abort_timeout_txn"); |
5116 | |
|
5117 | 0 | TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0}; |
5118 | 0 | TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX}; |
5119 | 0 | std::string begin_txn_running_key; |
5120 | 0 | std::string end_txn_running_key; |
5121 | 0 | txn_running_key(txn_running_key_info0, &begin_txn_running_key); |
5122 | 0 | txn_running_key(txn_running_key_info1, &end_txn_running_key); |
5123 | |
|
5124 | 0 | int64_t current_time = |
5125 | 0 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
5126 | |
|
5127 | 0 | auto handle_abort_timeout_txn_kv = [&metrics_context, ¤t_time, this]( |
5128 | 0 | std::string_view k, std::string_view v) -> int { |
5129 | 0 | std::unique_ptr<Transaction> txn; |
5130 | 0 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
5131 | 0 | if (err != TxnErrorCode::TXN_OK) { |
5132 | 0 | return 0; |
5133 | 0 | } |
5134 | 0 | std::string_view k1 = k; |
5135 | 0 | k1.remove_prefix(1); |
5136 | 0 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
5137 | 0 | if (decode_key(&k1, &out) != 0) { |
5138 | 0 | return 0; |
5139 | 0 | } |
5140 | 0 | int64_t db_id = std::get<int64_t>(std::get<0>(out[3])); |
5141 | 0 | int64_t txn_id = std::get<int64_t>(std::get<0>(out[4])); |
5142 | | // Update txn_info |
5143 | 0 | std::string txn_inf_key, txn_inf_val; |
5144 | 0 | txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key); |
5145 | 0 | err = txn->get(txn_inf_key, &txn_inf_val); |
5146 | 0 | if (err != TxnErrorCode::TXN_OK) { |
5147 | 0 | return 0; |
5148 | 0 | } |
5149 | 0 | TxnInfoPB txn_info; |
5150 | 0 | if (!txn_info.ParseFromString(txn_inf_val)) { |
5151 | 0 | return 0; |
5152 | 0 | } |
5153 | | |
5154 | 0 | if (TxnStatusPB::TXN_STATUS_COMMITTED != txn_info.status()) { |
5155 | 0 | TxnRunningPB txn_running_pb; |
5156 | 0 | if (!txn_running_pb.ParseFromArray(v.data(), v.size())) { |
5157 | 0 | return 0; |
5158 | 0 | } |
5159 | 0 | if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) { |
5160 | 0 | return 0; |
5161 | 0 | } |
5162 | 0 | metrics_context.total_need_recycle_num++; |
5163 | 0 | } |
5164 | 0 | return 0; |
5165 | 0 | }; Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
5166 | |
|
5167 | 0 | int ret = scan_and_recycle(begin_txn_running_key, end_txn_running_key, std::move(handle_abort_timeout_txn_kv)); |
5168 | 0 | metrics_context.report(true); |
5169 | 0 | return ret; |
5170 | 0 | } |
5171 | | |
5172 | | // Scan and statistics expired_txn_label that need to be recycled |
5173 | 0 | int InstanceRecycler::scan_and_statistics_expired_txn_label() { |
5174 | 0 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_txn_label"); |
5175 | |
|
5176 | 0 | RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0}; |
5177 | 0 | RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX}; |
5178 | 0 | std::string begin_recycle_txn_key; |
5179 | 0 | std::string end_recycle_txn_key; |
5180 | 0 | recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key); |
5181 | 0 | recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key); |
5182 | 0 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
5183 | 0 | int64_t current_time_ms = |
5184 | 0 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
5185 | | |
5186 | | // for calculate the total num or bytes of recyled objects |
5187 | 0 | auto handle_expired_txn_label_kv = [&, this](std::string_view k, std::string_view v) -> int { |
5188 | 0 | RecycleTxnPB recycle_txn_pb; |
5189 | 0 | if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) { |
5190 | 0 | return 0; |
5191 | 0 | } |
5192 | 0 | if ((config::force_immediate_recycle) || |
5193 | 0 | (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) || |
5194 | 0 | (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <= |
5195 | 0 | current_time_ms)) { |
5196 | 0 | metrics_context.total_need_recycle_num++; |
5197 | 0 | } |
5198 | 0 | return 0; |
5199 | 0 | }; Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
5200 | |
|
5201 | 0 | int ret = scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key, std::move(handle_expired_txn_label_kv)); |
5202 | 0 | metrics_context.report(true); |
5203 | 0 | return ret; |
5204 | 0 | } |
5205 | | |
5206 | | // Scan and statistics copy_jobs that need to be recycled |
5207 | 0 | int InstanceRecycler::scan_and_statistics_copy_jobs() { |
5208 | 0 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_copy_jobs"); |
5209 | 0 | CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0}; |
5210 | 0 | CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0}; |
5211 | 0 | std::string key0; |
5212 | 0 | std::string key1; |
5213 | 0 | copy_job_key(key_info0, &key0); |
5214 | 0 | copy_job_key(key_info1, &key1); |
5215 | | |
5216 | | // for calculate the total num or bytes of recyled objects |
5217 | 0 | auto scan_and_statistics = [&metrics_context](std::string_view k, std::string_view v) -> int { |
5218 | 0 | CopyJobPB copy_job; |
5219 | 0 | if (!copy_job.ParseFromArray(v.data(), v.size())) { |
5220 | 0 | LOG_WARNING("malformed copy job").tag("key", hex(k)); |
5221 | 0 | return 0; |
5222 | 0 | } |
5223 | | |
5224 | 0 | if (copy_job.job_status() == CopyJobPB::FINISH) { |
5225 | 0 | if (copy_job.stage_type() == StagePB::EXTERNAL) { |
5226 | 0 | int64_t current_time = |
5227 | 0 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
5228 | 0 | if (copy_job.finish_time_ms() > 0) { |
5229 | 0 | if (!config::force_immediate_recycle && |
5230 | 0 | current_time < copy_job.finish_time_ms() + |
5231 | 0 | config::copy_job_max_retention_second * 1000) { |
5232 | 0 | return 0; |
5233 | 0 | } |
5234 | 0 | } else { |
5235 | 0 | if (!config::force_immediate_recycle && |
5236 | 0 | current_time < copy_job.start_time_ms() + |
5237 | 0 | config::copy_job_max_retention_second * 1000) { |
5238 | 0 | return 0; |
5239 | 0 | } |
5240 | 0 | } |
5241 | 0 | } |
5242 | 0 | } else if (copy_job.job_status() == CopyJobPB::LOADING) { |
5243 | 0 | int64_t current_time = |
5244 | 0 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
5245 | 0 | if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) { |
5246 | 0 | return 0; |
5247 | 0 | } |
5248 | 0 | } |
5249 | 0 | metrics_context.total_need_recycle_num++; |
5250 | 0 | return 0; |
5251 | 0 | }; Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
5252 | |
|
5253 | 0 | int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics)); |
5254 | 0 | metrics_context.report(true); |
5255 | 0 | return ret; |
5256 | 0 | } |
5257 | | |
5258 | | // Scan and statistics stage that need to be recycled |
5259 | 0 | int InstanceRecycler::scan_and_statistics_stage() { |
5260 | 0 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_stage"); |
5261 | 0 | RecycleStageKeyInfo key_info0 {instance_id_, ""}; |
5262 | 0 | RecycleStageKeyInfo key_info1 {instance_id_, "\xff"}; |
5263 | 0 | std::string key0 = recycle_stage_key(key_info0); |
5264 | 0 | std::string key1 = recycle_stage_key(key_info1); |
5265 | | |
5266 | | // for calculate the total num or bytes of recyled objects |
5267 | 0 | auto scan_and_statistics = [&metrics_context, this](std::string_view k, |
5268 | 0 | std::string_view v) -> int { |
5269 | 0 | RecycleStagePB recycle_stage; |
5270 | 0 | if (!recycle_stage.ParseFromArray(v.data(), v.size())) { |
5271 | 0 | LOG_WARNING("malformed recycle stage").tag("key", hex(k)); |
5272 | 0 | return 0; |
5273 | 0 | } |
5274 | | |
5275 | 0 | int idx = stoi(recycle_stage.stage().obj_info().id()); |
5276 | 0 | if (idx > instance_info_.obj_info().size() || idx < 1) { |
5277 | 0 | LOG(WARNING) << "invalid idx: " << idx; |
5278 | 0 | return 0; |
5279 | 0 | } |
5280 | | |
5281 | 0 | std::shared_ptr<StorageVaultAccessor> accessor; |
5282 | 0 | int ret = SYNC_POINT_HOOK_RETURN_VALUE( |
5283 | 0 | [&] { |
5284 | 0 | auto& old_obj = instance_info_.obj_info()[idx - 1]; |
5285 | 0 | auto s3_conf = S3Conf::from_obj_store_info(old_obj); |
5286 | 0 | if (!s3_conf) { |
5287 | 0 | return 0; |
5288 | 0 | } |
5289 | |
|
5290 | 0 | s3_conf->prefix = recycle_stage.stage().obj_info().prefix(); |
5291 | 0 | std::shared_ptr<S3Accessor> s3_accessor; |
5292 | 0 | int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor); |
5293 | 0 | if (ret != 0) { |
5294 | 0 | return 0; |
5295 | 0 | } |
5296 | |
|
5297 | 0 | accessor = std::move(s3_accessor); |
5298 | 0 | return 0; |
5299 | 0 | }(), |
5300 | 0 | "recycle_stage:get_accessor", &accessor); |
5301 | |
|
5302 | 0 | if (ret != 0) { |
5303 | 0 | LOG(WARNING) << "failed to init accessor ret=" << ret; |
5304 | 0 | return 0; |
5305 | 0 | } |
5306 | | |
5307 | 0 | metrics_context.total_need_recycle_num++; |
5308 | 0 | return 0; |
5309 | 0 | }; Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
5310 | |
|
5311 | 0 | int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics)); |
5312 | 0 | metrics_context.report(true); |
5313 | 0 | return ret; |
5314 | 0 | } |
5315 | | |
5316 | | // Scan and statistics expired_stage_objects that need to be recycled |
5317 | 0 | int InstanceRecycler::scan_and_statistics_expired_stage_objects() { |
5318 | 0 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects"); |
5319 | | |
5320 | | // for calculate the total num or bytes of recyled objects |
5321 | 0 | auto scan_and_statistics = [&metrics_context, this]() { |
5322 | 0 | for (const auto& stage : instance_info_.stages()) { |
5323 | 0 | if (stopped()) { |
5324 | 0 | break; |
5325 | 0 | } |
5326 | 0 | if (stage.type() == StagePB::EXTERNAL) { |
5327 | 0 | continue; |
5328 | 0 | } |
5329 | 0 | int idx = stoi(stage.obj_info().id()); |
5330 | 0 | if (idx > instance_info_.obj_info().size() || idx < 1) { |
5331 | 0 | continue; |
5332 | 0 | } |
5333 | 0 | const auto& old_obj = instance_info_.obj_info()[idx - 1]; |
5334 | 0 | auto s3_conf = S3Conf::from_obj_store_info(old_obj); |
5335 | 0 | if (!s3_conf) { |
5336 | 0 | continue; |
5337 | 0 | } |
5338 | 0 | s3_conf->prefix = stage.obj_info().prefix(); |
5339 | 0 | std::shared_ptr<S3Accessor> accessor; |
5340 | 0 | int ret1 = S3Accessor::create(*s3_conf, &accessor); |
5341 | 0 | if (ret1 != 0) { |
5342 | 0 | continue; |
5343 | 0 | } |
5344 | 0 | if (s3_conf->prefix.find("/stage/") == std::string::npos) { |
5345 | 0 | continue; |
5346 | 0 | } |
5347 | 0 | metrics_context.total_need_recycle_num++; |
5348 | 0 | } |
5349 | 0 | }; Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv |
5350 | |
|
5351 | 0 | scan_and_statistics(); |
5352 | 0 | metrics_context.report(true); |
5353 | 0 | return 0; |
5354 | 0 | } |
5355 | | |
5356 | | // Scan and statistics versions that need to be recycled |
5357 | 0 | int InstanceRecycler::scan_and_statistics_versions() { |
5358 | 0 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions"); |
5359 | 0 | auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0}); |
5360 | 0 | auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0}); |
5361 | |
|
5362 | 0 | int64_t last_scanned_table_id = 0; |
5363 | 0 | bool is_recycled = false; // Is last scanned kv recycled |
5364 | | // for calculate the total num or bytes of recyled objects |
5365 | 0 | auto scan_and_statistics = [&metrics_context, &last_scanned_table_id, &is_recycled, this]( |
5366 | 0 | std::string_view k, std::string_view) { |
5367 | 0 | auto k1 = k; |
5368 | 0 | k1.remove_prefix(1); |
5369 | | // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id} |
5370 | 0 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
5371 | 0 | decode_key(&k1, &out); |
5372 | 0 | DCHECK_EQ(out.size(), 6) << k; |
5373 | 0 | auto table_id = std::get<int64_t>(std::get<0>(out[4])); |
5374 | 0 | if (table_id == last_scanned_table_id) { // Already handle kvs of this table |
5375 | 0 | metrics_context.total_need_recycle_num += |
5376 | 0 | is_recycled; // Version kv of this table has been recycled |
5377 | 0 | return 0; |
5378 | 0 | } |
5379 | 0 | last_scanned_table_id = table_id; |
5380 | 0 | is_recycled = false; |
5381 | 0 | auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0}); |
5382 | 0 | auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0}); |
5383 | 0 | std::unique_ptr<Transaction> txn; |
5384 | 0 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
5385 | 0 | if (err != TxnErrorCode::TXN_OK) { |
5386 | 0 | return 0; |
5387 | 0 | } |
5388 | 0 | std::unique_ptr<RangeGetIterator> iter; |
5389 | 0 | err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1); |
5390 | 0 | if (err != TxnErrorCode::TXN_OK) { |
5391 | 0 | return 0; |
5392 | 0 | } |
5393 | 0 | if (iter->has_next()) { // Table is useful, should not recycle table and partition versions |
5394 | 0 | return 0; |
5395 | 0 | } |
5396 | 0 | metrics_context.total_need_recycle_num++; |
5397 | 0 | is_recycled = true; |
5398 | 0 | return 0; |
5399 | 0 | }; Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
5400 | |
|
5401 | 0 | int ret = scan_and_recycle(version_key_begin, version_key_end, std::move(scan_and_statistics)); |
5402 | 0 | metrics_context.report(true); |
5403 | 0 | return ret; |
5404 | 0 | } |
5405 | | |
5406 | | // Scan and statistics restore jobs that need to be recycled |
5407 | 0 | int InstanceRecycler::scan_and_statistics_restore_jobs() { |
5408 | 0 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_restore_jobs"); |
5409 | 0 | JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0}; |
5410 | 0 | JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX}; |
5411 | 0 | std::string restore_job_key0; |
5412 | 0 | std::string restore_job_key1; |
5413 | 0 | job_restore_tablet_key(restore_job_key_info0, &restore_job_key0); |
5414 | 0 | job_restore_tablet_key(restore_job_key_info1, &restore_job_key1); |
5415 | |
|
5416 | 0 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
5417 | | |
5418 | | // for calculate the total num or bytes of recyled objects |
5419 | 0 | auto scan_and_statistics = [&](std::string_view k, std::string_view v) -> int { |
5420 | 0 | RestoreJobCloudPB restore_job_pb; |
5421 | 0 | if (!restore_job_pb.ParseFromArray(v.data(), v.size())) { |
5422 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); |
5423 | 0 | return 0; |
5424 | 0 | } |
5425 | 0 | int64_t expiration = |
5426 | 0 | calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts); |
5427 | 0 | int64_t current_time = ::time(nullptr); |
5428 | 0 | if (current_time < expiration) { // not expired |
5429 | 0 | return 0; |
5430 | 0 | } |
5431 | 0 | metrics_context.total_need_recycle_num++; |
5432 | 0 | if(restore_job_pb.need_recycle_data()) { |
5433 | 0 | scan_tablet_and_statistics(restore_job_pb.tablet_id(), metrics_context); |
5434 | 0 | } |
5435 | 0 | return 0; |
5436 | 0 | }; Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
5437 | |
|
5438 | 0 | int ret = scan_and_recycle(restore_job_key0, restore_job_key1, std::move(scan_and_statistics)); |
5439 | 0 | metrics_context.report(true); |
5440 | 0 | return ret; |
5441 | 0 | } |
5442 | | |
5443 | | } // namespace doris::cloud |