/root/doris/cloud/src/recycler/recycler.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "recycler/recycler.h" |
19 | | |
20 | | #include <brpc/builtin_service.pb.h> |
21 | | #include <brpc/server.h> |
22 | | #include <butil/endpoint.h> |
23 | | #include <butil/strings/string_split.h> |
24 | | #include <bvar/status.h> |
25 | | #include <gen_cpp/cloud.pb.h> |
26 | | #include <gen_cpp/olap_file.pb.h> |
27 | | |
28 | | #include <algorithm> |
29 | | #include <atomic> |
30 | | #include <chrono> |
31 | | #include <cstddef> |
32 | | #include <cstdint> |
33 | | #include <cstdlib> |
34 | | #include <deque> |
35 | | #include <functional> |
36 | | #include <initializer_list> |
37 | | #include <memory> |
38 | | #include <numeric> |
39 | | #include <optional> |
40 | | #include <random> |
41 | | #include <string> |
42 | | #include <string_view> |
43 | | #include <thread> |
44 | | #include <unordered_map> |
45 | | #include <utility> |
46 | | #include <variant> |
47 | | |
48 | | #include "common/defer.h" |
49 | | #include "common/stopwatch.h" |
50 | | #include "meta-service/meta_service.h" |
51 | | #include "meta-service/meta_service_helper.h" |
52 | | #include "meta-service/meta_service_schema.h" |
53 | | #include "meta-store/blob_message.h" |
54 | | #include "meta-store/meta_reader.h" |
55 | | #include "meta-store/txn_kv.h" |
56 | | #include "meta-store/txn_kv_error.h" |
57 | | #include "meta-store/versioned_value.h" |
58 | | #include "recycler/checker.h" |
59 | | #ifdef ENABLE_HDFS_STORAGE_VAULT |
60 | | #include "recycler/hdfs_accessor.h" |
61 | | #endif |
62 | | #include "recycler/s3_accessor.h" |
63 | | #include "recycler/storage_vault_accessor.h" |
64 | | #ifdef UNIT_TEST |
65 | | #include "../test/mock_accessor.h" |
66 | | #endif |
67 | | #include "common/bvars.h" |
68 | | #include "common/config.h" |
69 | | #include "common/encryption_util.h" |
70 | | #include "common/logging.h" |
71 | | #include "common/simple_thread_pool.h" |
72 | | #include "common/util.h" |
73 | | #include "cpp/sync_point.h" |
74 | | #include "meta-store/codec.h" |
75 | | #include "meta-store/document_message.h" |
76 | | #include "meta-store/keys.h" |
77 | | #include "recycler/recycler_service.h" |
78 | | #include "recycler/sync_executor.h" |
79 | | #include "recycler/util.h" |
80 | | #include "snapshot/snapshot_manager_factory.h" |
81 | | |
82 | | namespace doris::cloud { |
83 | | |
84 | | using namespace std::chrono; |
85 | | |
86 | | namespace { |
87 | | |
88 | 0 | int64_t packed_file_retry_sleep_ms() { |
89 | 0 | const int64_t min_ms = std::max<int64_t>(0, config::packed_file_txn_retry_sleep_min_ms); |
90 | 0 | const int64_t max_ms = std::max<int64_t>(min_ms, config::packed_file_txn_retry_sleep_max_ms); |
91 | 0 | thread_local std::mt19937_64 gen(std::random_device {}()); |
92 | 0 | std::uniform_int_distribution<int64_t> dist(min_ms, max_ms); |
93 | 0 | return dist(gen); |
94 | 0 | } Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12_GLOBAL__N_126packed_file_retry_sleep_msEv Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloud12_GLOBAL__N_126packed_file_retry_sleep_msEv |
95 | | |
96 | 0 | void sleep_for_packed_file_retry() { |
97 | 0 | std::this_thread::sleep_for(std::chrono::milliseconds(packed_file_retry_sleep_ms())); |
98 | 0 | } Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12_GLOBAL__N_127sleep_for_packed_file_retryEv Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloud12_GLOBAL__N_127sleep_for_packed_file_retryEv |
99 | | |
100 | 37 | bool filter_out_instance(const std::string& instance_id) { |
101 | 37 | if (config::recycle_whitelist.empty()) { |
102 | 35 | return std::ranges::find(config::recycle_blacklist, instance_id) != |
103 | 35 | config::recycle_blacklist.end(); |
104 | 35 | } |
105 | 2 | return std::ranges::find(config::recycle_whitelist, instance_id) == |
106 | 2 | config::recycle_whitelist.end(); |
107 | 37 | } Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12_GLOBAL__N_119filter_out_instanceERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE recycler_test.cpp:_ZN5doris5cloud12_GLOBAL__N_119filter_out_instanceERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 100 | 37 | bool filter_out_instance(const std::string& instance_id) { | 101 | 37 | if (config::recycle_whitelist.empty()) { | 102 | 35 | return std::ranges::find(config::recycle_blacklist, instance_id) != | 103 | 35 | config::recycle_blacklist.end(); | 104 | 35 | } | 105 | 2 | return std::ranges::find(config::recycle_whitelist, instance_id) == | 106 | 2 | config::recycle_whitelist.end(); | 107 | 37 | } |
|
108 | | |
109 | | } // namespace |
110 | | |
111 | | // return 0 for success get a key, 1 for key not found, negative for error |
112 | 0 | [[maybe_unused]] static int txn_get(TxnKv* txn_kv, std::string_view key, std::string& val) { |
113 | 0 | std::unique_ptr<Transaction> txn; |
114 | 0 | TxnErrorCode err = txn_kv->create_txn(&txn); |
115 | 0 | if (err != TxnErrorCode::TXN_OK) { |
116 | 0 | return -1; |
117 | 0 | } |
118 | 0 | switch (txn->get(key, &val, true)) { |
119 | 0 | case TxnErrorCode::TXN_OK: |
120 | 0 | return 0; |
121 | 0 | case TxnErrorCode::TXN_KEY_NOT_FOUND: |
122 | 0 | return 1; |
123 | 0 | default: |
124 | 0 | return -1; |
125 | 0 | }; |
126 | 0 | } Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE |
127 | | |
128 | | // 0 for success, negative for error |
129 | | static int txn_get(TxnKv* txn_kv, std::string_view begin, std::string_view end, |
130 | 337 | std::unique_ptr<RangeGetIterator>& it) { |
131 | 337 | std::unique_ptr<Transaction> txn; |
132 | 337 | TxnErrorCode err = txn_kv->create_txn(&txn); |
133 | 337 | if (err != TxnErrorCode::TXN_OK) { |
134 | 0 | return -1; |
135 | 0 | } |
136 | 337 | switch (txn->get(begin, end, &it, true)) { |
137 | 337 | case TxnErrorCode::TXN_OK: |
138 | 337 | return 0; |
139 | 0 | case TxnErrorCode::TXN_KEY_NOT_FOUND: |
140 | 0 | return 1; |
141 | 0 | default: |
142 | 0 | return -1; |
143 | 337 | }; |
144 | 0 | } recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE Line | Count | Source | 130 | 31 | std::unique_ptr<RangeGetIterator>& it) { | 131 | 31 | std::unique_ptr<Transaction> txn; | 132 | 31 | TxnErrorCode err = txn_kv->create_txn(&txn); | 133 | 31 | if (err != TxnErrorCode::TXN_OK) { | 134 | 0 | return -1; | 135 | 0 | } | 136 | 31 | switch (txn->get(begin, end, &it, true)) { | 137 | 31 | case TxnErrorCode::TXN_OK: | 138 | 31 | return 0; | 139 | 0 | case TxnErrorCode::TXN_KEY_NOT_FOUND: | 140 | 0 | return 1; | 141 | 0 | default: | 142 | 0 | return -1; | 143 | 31 | }; | 144 | 0 | } |
recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE Line | Count | Source | 130 | 306 | std::unique_ptr<RangeGetIterator>& it) { | 131 | 306 | std::unique_ptr<Transaction> txn; | 132 | 306 | TxnErrorCode err = txn_kv->create_txn(&txn); | 133 | 306 | if (err != TxnErrorCode::TXN_OK) { | 134 | 0 | return -1; | 135 | 0 | } | 136 | 306 | switch (txn->get(begin, end, &it, true)) { | 137 | 306 | case TxnErrorCode::TXN_OK: | 138 | 306 | return 0; | 139 | 0 | case TxnErrorCode::TXN_KEY_NOT_FOUND: | 140 | 0 | return 1; | 141 | 0 | default: | 142 | 0 | return -1; | 143 | 306 | }; | 144 | 0 | } |
|
145 | | |
146 | | // return 0 for success otherwise error |
147 | 6 | static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) { |
148 | 6 | std::unique_ptr<Transaction> txn; |
149 | 6 | TxnErrorCode err = txn_kv->create_txn(&txn); |
150 | 6 | if (err != TxnErrorCode::TXN_OK) { |
151 | 0 | return -1; |
152 | 0 | } |
153 | 10 | for (auto k : keys) { |
154 | 10 | txn->remove(k); |
155 | 10 | } |
156 | 6 | switch (txn->commit()) { |
157 | 6 | case TxnErrorCode::TXN_OK: |
158 | 6 | return 0; |
159 | 0 | case TxnErrorCode::TXN_CONFLICT: |
160 | 0 | return -1; |
161 | 0 | default: |
162 | 0 | return -1; |
163 | 6 | } |
164 | 6 | } recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE Line | Count | Source | 147 | 1 | static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) { | 148 | 1 | std::unique_ptr<Transaction> txn; | 149 | 1 | TxnErrorCode err = txn_kv->create_txn(&txn); | 150 | 1 | if (err != TxnErrorCode::TXN_OK) { | 151 | 0 | return -1; | 152 | 0 | } | 153 | 1 | for (auto k : keys) { | 154 | 1 | txn->remove(k); | 155 | 1 | } | 156 | 1 | switch (txn->commit()) { | 157 | 1 | case TxnErrorCode::TXN_OK: | 158 | 1 | return 0; | 159 | 0 | case TxnErrorCode::TXN_CONFLICT: | 160 | 0 | return -1; | 161 | 0 | default: | 162 | 0 | return -1; | 163 | 1 | } | 164 | 1 | } |
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE Line | Count | Source | 147 | 5 | static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) { | 148 | 5 | std::unique_ptr<Transaction> txn; | 149 | 5 | TxnErrorCode err = txn_kv->create_txn(&txn); | 150 | 5 | if (err != TxnErrorCode::TXN_OK) { | 151 | 0 | return -1; | 152 | 0 | } | 153 | 9 | for (auto k : keys) { | 154 | 9 | txn->remove(k); | 155 | 9 | } | 156 | 5 | switch (txn->commit()) { | 157 | 5 | case TxnErrorCode::TXN_OK: | 158 | 5 | return 0; | 159 | 0 | case TxnErrorCode::TXN_CONFLICT: | 160 | 0 | return -1; | 161 | 0 | default: | 162 | 0 | return -1; | 163 | 5 | } | 164 | 5 | } |
|
165 | | |
166 | | // return 0 for success otherwise error |
167 | 139 | static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) { |
168 | 139 | std::unique_ptr<Transaction> txn; |
169 | 139 | TxnErrorCode err = txn_kv->create_txn(&txn); |
170 | 139 | if (err != TxnErrorCode::TXN_OK) { |
171 | 0 | return -1; |
172 | 0 | } |
173 | 106k | for (auto& k : keys) { |
174 | 106k | txn->remove(k); |
175 | 106k | } |
176 | 139 | switch (txn->commit()) { |
177 | 139 | case TxnErrorCode::TXN_OK: |
178 | 139 | return 0; |
179 | 0 | case TxnErrorCode::TXN_CONFLICT: |
180 | 0 | return -1; |
181 | 0 | default: |
182 | 0 | return -1; |
183 | 139 | } |
184 | 139 | } recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE Line | Count | Source | 167 | 33 | static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) { | 168 | 33 | std::unique_ptr<Transaction> txn; | 169 | 33 | TxnErrorCode err = txn_kv->create_txn(&txn); | 170 | 33 | if (err != TxnErrorCode::TXN_OK) { | 171 | 0 | return -1; | 172 | 0 | } | 173 | 33 | for (auto& k : keys) { | 174 | 16 | txn->remove(k); | 175 | 16 | } | 176 | 33 | switch (txn->commit()) { | 177 | 33 | case TxnErrorCode::TXN_OK: | 178 | 33 | return 0; | 179 | 0 | case TxnErrorCode::TXN_CONFLICT: | 180 | 0 | return -1; | 181 | 0 | default: | 182 | 0 | return -1; | 183 | 33 | } | 184 | 33 | } |
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE Line | Count | Source | 167 | 106 | static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) { | 168 | 106 | std::unique_ptr<Transaction> txn; | 169 | 106 | TxnErrorCode err = txn_kv->create_txn(&txn); | 170 | 106 | if (err != TxnErrorCode::TXN_OK) { | 171 | 0 | return -1; | 172 | 0 | } | 173 | 106k | for (auto& k : keys) { | 174 | 106k | txn->remove(k); | 175 | 106k | } | 176 | 106 | switch (txn->commit()) { | 177 | 106 | case TxnErrorCode::TXN_OK: | 178 | 106 | return 0; | 179 | 0 | case TxnErrorCode::TXN_CONFLICT: | 180 | 0 | return -1; | 181 | 0 | default: | 182 | 0 | return -1; | 183 | 106 | } | 184 | 106 | } |
|
185 | | |
186 | | // return 0 for success otherwise error |
187 | | [[maybe_unused]] static int txn_remove(TxnKv* txn_kv, std::string_view begin, |
188 | 106k | std::string_view end) { |
189 | 106k | std::unique_ptr<Transaction> txn; |
190 | 106k | TxnErrorCode err = txn_kv->create_txn(&txn); |
191 | 106k | if (err != TxnErrorCode::TXN_OK) { |
192 | 0 | return -1; |
193 | 0 | } |
194 | 106k | txn->remove(begin, end); |
195 | 106k | switch (txn->commit()) { |
196 | 106k | case TxnErrorCode::TXN_OK: |
197 | 106k | return 0; |
198 | 0 | case TxnErrorCode::TXN_CONFLICT: |
199 | 0 | return -1; |
200 | 0 | default: |
201 | 0 | return -1; |
202 | 106k | } |
203 | 106k | } recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 188 | 16 | std::string_view end) { | 189 | 16 | std::unique_ptr<Transaction> txn; | 190 | 16 | TxnErrorCode err = txn_kv->create_txn(&txn); | 191 | 16 | if (err != TxnErrorCode::TXN_OK) { | 192 | 0 | return -1; | 193 | 0 | } | 194 | 16 | txn->remove(begin, end); | 195 | 16 | switch (txn->commit()) { | 196 | 16 | case TxnErrorCode::TXN_OK: | 197 | 16 | return 0; | 198 | 0 | case TxnErrorCode::TXN_CONFLICT: | 199 | 0 | return -1; | 200 | 0 | default: | 201 | 0 | return -1; | 202 | 16 | } | 203 | 16 | } |
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 188 | 106k | std::string_view end) { | 189 | 106k | std::unique_ptr<Transaction> txn; | 190 | 106k | TxnErrorCode err = txn_kv->create_txn(&txn); | 191 | 106k | if (err != TxnErrorCode::TXN_OK) { | 192 | 0 | return -1; | 193 | 0 | } | 194 | 106k | txn->remove(begin, end); | 195 | 106k | switch (txn->commit()) { | 196 | 106k | case TxnErrorCode::TXN_OK: | 197 | 106k | return 0; | 198 | 0 | case TxnErrorCode::TXN_CONFLICT: | 199 | 0 | return -1; | 200 | 0 | default: | 201 | 0 | return -1; | 202 | 106k | } | 203 | 106k | } |
|
204 | | |
205 | | void scan_restore_job_rowset( |
206 | | Transaction* txn, const std::string& instance_id, int64_t tablet_id, MetaServiceCode& code, |
207 | | std::string& msg, |
208 | | std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>>* restore_job_rs_metas); |
209 | | |
210 | | static inline void check_recycle_task(const std::string& instance_id, const std::string& task_name, |
211 | | int64_t num_scanned, int64_t num_recycled, |
212 | 47 | int64_t start_time) { |
213 | 47 | if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] { |
214 | 0 | int64_t cost = |
215 | 0 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
216 | 0 | if (cost > config::recycle_task_threshold_seconds) { |
217 | 0 | LOG_WARNING("recycle task cost too much time cost={}s", cost) |
218 | 0 | .tag("instance_id", instance_id) |
219 | 0 | .tag("task", task_name) |
220 | 0 | .tag("num_scanned", num_scanned) |
221 | 0 | .tag("num_recycled", num_recycled); |
222 | 0 | } |
223 | 0 | } |
224 | 47 | return; |
225 | 47 | } recycler.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll Line | Count | Source | 212 | 2 | int64_t start_time) { | 213 | 2 | if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] { | 214 | 0 | int64_t cost = | 215 | 0 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 216 | 0 | if (cost > config::recycle_task_threshold_seconds) { | 217 | 0 | LOG_WARNING("recycle task cost too much time cost={}s", cost) | 218 | 0 | .tag("instance_id", instance_id) | 219 | 0 | .tag("task", task_name) | 220 | 0 | .tag("num_scanned", num_scanned) | 221 | 0 | .tag("num_recycled", num_recycled); | 222 | 0 | } | 223 | 0 | } | 224 | 2 | return; | 225 | 2 | } |
recycler_test.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll Line | Count | Source | 212 | 45 | int64_t start_time) { | 213 | 45 | if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] { | 214 | 0 | int64_t cost = | 215 | 0 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 216 | 0 | if (cost > config::recycle_task_threshold_seconds) { | 217 | 0 | LOG_WARNING("recycle task cost too much time cost={}s", cost) | 218 | 0 | .tag("instance_id", instance_id) | 219 | 0 | .tag("task", task_name) | 220 | 0 | .tag("num_scanned", num_scanned) | 221 | 0 | .tag("num_recycled", num_recycled); | 222 | 0 | } | 223 | 0 | } | 224 | 45 | return; | 225 | 45 | } |
|
226 | | |
227 | 6 | Recycler::Recycler(std::shared_ptr<TxnKv> txn_kv) : txn_kv_(std::move(txn_kv)) { |
228 | 6 | ip_port_ = std::string(butil::my_ip_cstr()) + ":" + std::to_string(config::brpc_listen_port); |
229 | | |
230 | 6 | auto s3_producer_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism, |
231 | 6 | "s3_producer_pool"); |
232 | 6 | s3_producer_pool->start(); |
233 | 6 | auto recycle_tablet_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism, |
234 | 6 | "recycle_tablet_pool"); |
235 | 6 | recycle_tablet_pool->start(); |
236 | 6 | auto group_recycle_function_pool = std::make_shared<SimpleThreadPool>( |
237 | 6 | config::recycle_pool_parallelism, "group_recycle_function_pool"); |
238 | 6 | group_recycle_function_pool->start(); |
239 | 6 | _thread_pool_group = |
240 | 6 | RecyclerThreadPoolGroup(std::move(s3_producer_pool), std::move(recycle_tablet_pool), |
241 | 6 | std::move(group_recycle_function_pool)); |
242 | | |
243 | 6 | auto resource_mgr = std::make_shared<ResourceManager>(txn_kv_); |
244 | 6 | txn_lazy_committer_ = std::make_shared<TxnLazyCommitter>(txn_kv_, std::move(resource_mgr)); |
245 | 6 | snapshot_manager_ = create_snapshot_manager(txn_kv_); |
246 | 6 | } |
247 | | |
248 | 6 | Recycler::~Recycler() { |
249 | 6 | if (!stopped()) { |
250 | 0 | stop(); |
251 | 0 | } |
252 | 6 | } |
253 | | |
254 | 5 | void Recycler::instance_scanner_callback() { |
255 | | // sleep 60 seconds before scheduling for the launch procedure to complete: |
256 | | // some bad hdfs connection may cause some log to stdout stderr |
257 | | // which may pollute .out file and affect the script to check success |
258 | 5 | std::this_thread::sleep_for( |
259 | 5 | std::chrono::seconds(config::recycler_sleep_before_scheduling_seconds)); |
260 | 1.36k | while (!stopped()) { |
261 | 1.36k | if (config::enable_recycler) { |
262 | 3 | std::vector<InstanceInfoPB> instances; |
263 | 3 | get_all_instances(txn_kv_.get(), instances); |
264 | | // TODO(plat1ko): delete job recycle kv of non-existent instances |
265 | 3 | LOG(INFO) << "Recycler get instances: " << [&instances] { |
266 | 3 | std::stringstream ss; |
267 | 30 | for (auto& i : instances) ss << ' ' << i.instance_id(); |
268 | 3 | return ss.str(); |
269 | 3 | }(); Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev Line | Count | Source | 265 | 3 | LOG(INFO) << "Recycler get instances: " << [&instances] { | 266 | 3 | std::stringstream ss; | 267 | 30 | for (auto& i : instances) ss << ' ' << i.instance_id(); | 268 | 3 | return ss.str(); | 269 | 3 | }(); |
|
270 | 3 | if (!instances.empty()) { |
271 | | // enqueue instances |
272 | 3 | std::lock_guard lock(mtx_); |
273 | 30 | for (auto& instance : instances) { |
274 | 30 | if (filter_out_instance(instance.instance_id())) continue; |
275 | 30 | auto [_, success] = pending_instance_set_.insert(instance.instance_id()); |
276 | | // skip instance already in pending queue |
277 | 30 | if (success) { |
278 | 30 | pending_instance_queue_.push_back(std::move(instance)); |
279 | 30 | } |
280 | 30 | } |
281 | 3 | pending_instance_cond_.notify_all(); |
282 | 3 | } |
283 | 1.35k | } else { |
284 | 1.35k | LOG(WARNING) << "Skip recycler since enable_recycler is false"; |
285 | 1.35k | } |
286 | 1.36k | { |
287 | 1.36k | std::unique_lock lock(mtx_); |
288 | 1.36k | notifier_.wait_for(lock, std::chrono::seconds(config::recycle_interval_seconds), |
289 | 2.72k | [&]() { return stopped(); });Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv Line | Count | Source | 289 | 2.72k | [&]() { return stopped(); }); |
|
290 | 1.36k | } |
291 | 1.36k | } |
292 | 5 | } |
293 | | |
294 | 9 | void Recycler::recycle_callback() { |
295 | 40 | while (!stopped()) { |
296 | 37 | InstanceInfoPB instance; |
297 | 37 | { |
298 | 37 | std::unique_lock lock(mtx_); |
299 | 37 | pending_instance_cond_.wait( |
300 | 49 | lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv recycler_test.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv Line | Count | Source | 300 | 49 | lock, [&]() { return !pending_instance_queue_.empty() || stopped(); }); |
|
301 | 37 | if (stopped()) { |
302 | 6 | return; |
303 | 6 | } |
304 | 31 | instance = std::move(pending_instance_queue_.front()); |
305 | 31 | pending_instance_queue_.pop_front(); |
306 | 31 | pending_instance_set_.erase(instance.instance_id()); |
307 | 31 | } |
308 | 0 | auto& instance_id = instance.instance_id(); |
309 | 31 | { |
310 | 31 | std::lock_guard lock(mtx_); |
311 | | // skip instance in recycling |
312 | 31 | if (recycling_instance_map_.count(instance_id)) continue; |
313 | 31 | } |
314 | 31 | if (!config::enable_recycler) { |
315 | 1 | LOG(WARNING) << "Skip recycle instance_id=" << instance_id |
316 | 1 | << " since enable_recycler is false"; |
317 | 1 | continue; |
318 | 1 | } |
319 | 30 | auto instance_recycler = std::make_shared<InstanceRecycler>( |
320 | 30 | txn_kv_, instance, _thread_pool_group, txn_lazy_committer_); |
321 | | |
322 | 30 | if (int r = instance_recycler->init(); r != 0) { |
323 | 0 | LOG(WARNING) << "failed to init instance recycler, instance_id=" << instance_id |
324 | 0 | << " ret=" << r; |
325 | 0 | continue; |
326 | 0 | } |
327 | 30 | std::string recycle_job_key; |
328 | 30 | job_recycle_key({instance_id}, &recycle_job_key); |
329 | 30 | int ret = prepare_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id, |
330 | 30 | ip_port_, config::recycle_interval_seconds * 1000); |
331 | 30 | if (ret != 0) { // Prepare failed |
332 | 20 | LOG(WARNING) << "failed to prepare recycle_job, instance_id=" << instance_id |
333 | 20 | << " ret=" << ret; |
334 | 20 | continue; |
335 | 20 | } else { |
336 | 10 | std::lock_guard lock(mtx_); |
337 | 10 | recycling_instance_map_.emplace(instance_id, instance_recycler); |
338 | 10 | } |
339 | 10 | if (stopped()) return; |
340 | 10 | LOG_WARNING("begin to recycle instance").tag("instance_id", instance_id); |
341 | 10 | auto ctime_ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
342 | 10 | g_bvar_recycler_instance_recycle_start_ts.put({instance_id}, ctime_ms); |
343 | 10 | g_bvar_recycler_instance_recycle_task_status.put({"submitted"}, 1); |
344 | 10 | ret = instance_recycler->do_recycle(); |
345 | | // If instance recycler has been aborted, don't finish this job |
346 | | |
347 | 10 | if (!instance_recycler->stopped()) { |
348 | 10 | finish_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id, ip_port_, |
349 | 10 | ret == 0, ctime_ms); |
350 | 10 | } |
351 | 10 | if (instance_recycler->stopped() || ret != 0) { |
352 | 0 | g_bvar_recycler_instance_recycle_task_status.put({"error"}, 1); |
353 | 0 | } |
354 | 10 | { |
355 | 10 | std::lock_guard lock(mtx_); |
356 | 10 | recycling_instance_map_.erase(instance_id); |
357 | 10 | } |
358 | | |
359 | 10 | auto now = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
360 | 10 | auto elpased_ms = now - ctime_ms; |
361 | 10 | g_bvar_recycler_instance_recycle_end_ts.put({instance_id}, now); |
362 | 10 | g_bvar_recycler_instance_last_round_recycle_duration.put({instance_id}, elpased_ms); |
363 | 10 | g_bvar_recycler_instance_next_ts.put({instance_id}, |
364 | 10 | now + config::recycle_interval_seconds * 1000); |
365 | 10 | g_bvar_recycler_instance_recycle_task_status.put({"completed"}, 1); |
366 | 10 | LOG(INFO) << "recycle instance done, " |
367 | 10 | << "instance_id=" << instance_id << " ret=" << ret << " ctime_ms: " << ctime_ms |
368 | 10 | << " now: " << now; |
369 | | |
370 | 10 | g_bvar_recycler_instance_recycle_last_success_ts.put({instance_id}, now); |
371 | | |
372 | 10 | LOG_WARNING("finish recycle instance") |
373 | 10 | .tag("instance_id", instance_id) |
374 | 10 | .tag("cost_ms", elpased_ms); |
375 | 10 | } |
376 | 9 | } |
377 | | |
378 | 4 | void Recycler::lease_recycle_jobs() { |
379 | 54 | while (!stopped()) { |
380 | 50 | std::vector<std::string> instances; |
381 | 50 | instances.reserve(recycling_instance_map_.size()); |
382 | 50 | { |
383 | 50 | std::lock_guard lock(mtx_); |
384 | 50 | for (auto& [id, _] : recycling_instance_map_) { |
385 | 30 | instances.push_back(id); |
386 | 30 | } |
387 | 50 | } |
388 | 50 | for (auto& i : instances) { |
389 | 30 | std::string recycle_job_key; |
390 | 30 | job_recycle_key({i}, &recycle_job_key); |
391 | 30 | int ret = lease_instance_recycle_job(txn_kv_.get(), recycle_job_key, i, ip_port_); |
392 | 30 | if (ret == 1) { |
393 | 0 | std::lock_guard lock(mtx_); |
394 | 0 | if (auto it = recycling_instance_map_.find(i); |
395 | 0 | it != recycling_instance_map_.end()) { |
396 | 0 | it->second->stop(); |
397 | 0 | } |
398 | 0 | } |
399 | 30 | } |
400 | 50 | { |
401 | 50 | std::unique_lock lock(mtx_); |
402 | 50 | notifier_.wait_for(lock, |
403 | 50 | std::chrono::milliseconds(config::recycle_job_lease_expired_ms / 3), |
404 | 100 | [&]() { return stopped(); });Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv recycler_test.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv Line | Count | Source | 404 | 100 | [&]() { return stopped(); }); |
|
405 | 50 | } |
406 | 50 | } |
407 | 4 | } |
408 | | |
409 | 4 | void Recycler::check_recycle_tasks() { |
410 | 7 | while (!stopped()) { |
411 | 3 | std::unordered_map<std::string, std::shared_ptr<InstanceRecycler>> recycling_instance_map; |
412 | 3 | { |
413 | 3 | std::lock_guard lock(mtx_); |
414 | 3 | recycling_instance_map = recycling_instance_map_; |
415 | 3 | } |
416 | 3 | for (auto& entry : recycling_instance_map) { |
417 | 0 | entry.second->check_recycle_tasks(); |
418 | 0 | } |
419 | | |
420 | 3 | std::unique_lock lock(mtx_); |
421 | 3 | notifier_.wait_for(lock, std::chrono::seconds(config::check_recycle_task_interval_seconds), |
422 | 6 | [&]() { return stopped(); });Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv recycler_test.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv Line | Count | Source | 422 | 6 | [&]() { return stopped(); }); |
|
423 | 3 | } |
424 | 4 | } |
425 | | |
426 | 4 | int Recycler::start(brpc::Server* server) { |
427 | 4 | g_bvar_recycler_task_max_concurrency.set_value(config::recycle_concurrency); |
428 | 4 | S3Environment::getInstance(); |
429 | | |
430 | 4 | if (config::enable_checker) { |
431 | 0 | checker_ = std::make_unique<Checker>(txn_kv_); |
432 | 0 | int ret = checker_->start(); |
433 | 0 | std::string msg; |
434 | 0 | if (ret != 0) { |
435 | 0 | msg = "failed to start checker"; |
436 | 0 | LOG(ERROR) << msg; |
437 | 0 | std::cerr << msg << std::endl; |
438 | 0 | return ret; |
439 | 0 | } |
440 | 0 | msg = "checker started"; |
441 | 0 | LOG(INFO) << msg; |
442 | 0 | std::cout << msg << std::endl; |
443 | 0 | } |
444 | | |
445 | 4 | if (server) { |
446 | | // Add service |
447 | 1 | auto recycler_service = |
448 | 1 | new RecyclerServiceImpl(txn_kv_, this, checker_.get(), txn_lazy_committer_); |
449 | 1 | server->AddService(recycler_service, brpc::SERVER_OWNS_SERVICE); |
450 | 1 | } |
451 | | |
452 | 4 | workers_.emplace_back([this] { instance_scanner_callback(); });Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv Line | Count | Source | 452 | 4 | workers_.emplace_back([this] { instance_scanner_callback(); }); |
|
453 | 12 | for (int i = 0; i < config::recycle_concurrency; ++i) { |
454 | 8 | workers_.emplace_back([this] { recycle_callback(); });Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv Line | Count | Source | 454 | 8 | workers_.emplace_back([this] { recycle_callback(); }); |
|
455 | 8 | } |
456 | | |
457 | 4 | workers_.emplace_back(std::mem_fn(&Recycler::lease_recycle_jobs), this); |
458 | 4 | workers_.emplace_back(std::mem_fn(&Recycler::check_recycle_tasks), this); |
459 | | |
460 | 4 | if (config::enable_snapshot_data_migrator) { |
461 | 0 | snapshot_data_migrator_ = std::make_shared<SnapshotDataMigrator>(txn_kv_); |
462 | 0 | int ret = snapshot_data_migrator_->start(); |
463 | 0 | if (ret != 0) { |
464 | 0 | LOG(ERROR) << "failed to start snapshot data migrator"; |
465 | 0 | return ret; |
466 | 0 | } |
467 | 0 | LOG(INFO) << "snapshot data migrator started"; |
468 | 0 | } |
469 | | |
470 | 4 | if (config::enable_snapshot_chain_compactor) { |
471 | 0 | snapshot_chain_compactor_ = std::make_shared<SnapshotChainCompactor>(txn_kv_); |
472 | 0 | int ret = snapshot_chain_compactor_->start(); |
473 | 0 | if (ret != 0) { |
474 | 0 | LOG(ERROR) << "failed to start snapshot chain compactor"; |
475 | 0 | return ret; |
476 | 0 | } |
477 | 0 | LOG(INFO) << "snapshot chain compactor started"; |
478 | 0 | } |
479 | | |
480 | 4 | return 0; |
481 | 4 | } |
482 | | |
483 | 4 | void Recycler::stop() { |
484 | 4 | stopped_ = true; |
485 | 4 | notifier_.notify_all(); |
486 | 4 | pending_instance_cond_.notify_all(); |
487 | 4 | { |
488 | 4 | std::lock_guard lock(mtx_); |
489 | 4 | for (auto& [_, recycler] : recycling_instance_map_) { |
490 | 0 | recycler->stop(); |
491 | 0 | } |
492 | 4 | } |
493 | 20 | for (auto& w : workers_) { |
494 | 20 | if (w.joinable()) w.join(); |
495 | 20 | } |
496 | 4 | if (checker_) { |
497 | 0 | checker_->stop(); |
498 | 0 | } |
499 | 4 | if (snapshot_data_migrator_) { |
500 | 0 | snapshot_data_migrator_->stop(); |
501 | 0 | } |
502 | 4 | if (snapshot_chain_compactor_) { |
503 | 0 | snapshot_chain_compactor_->stop(); |
504 | 0 | } |
505 | 4 | } |
506 | | |
507 | | class InstanceRecycler::InvertedIndexIdCache { |
508 | | public: |
509 | | InvertedIndexIdCache(std::string instance_id, std::shared_ptr<TxnKv> txn_kv) |
510 | 132 | : instance_id_(std::move(instance_id)), txn_kv_(std::move(txn_kv)) {} |
511 | | |
512 | | // Return 0 if success, 1 if schema kv not found, negative for error |
513 | | // For the same index_id, schema_version, res, since `get` is not completely atomic |
514 | | // one thread has not finished inserting, and another thread has not get the index_id and schema_version, |
515 | | // resulting in repeated addition and inaccuracy. |
516 | | // however, this approach can reduce the lock range and sacrifice a bit of meta repeated get to improve concurrency performance. |
517 | | // repeated addition does not affect correctness. |
518 | 28.4k | int get(int64_t index_id, int32_t schema_version, InvertedIndexInfo& res) { |
519 | 28.4k | { |
520 | 28.4k | std::lock_guard lock(mtx_); |
521 | 28.4k | if (schemas_without_inverted_index_.count({index_id, schema_version})) { |
522 | 3.75k | return 0; |
523 | 3.75k | } |
524 | 24.6k | if (auto it = inverted_index_id_map_.find({index_id, schema_version}); |
525 | 24.6k | it != inverted_index_id_map_.end()) { |
526 | 17.4k | res = it->second; |
527 | 17.4k | return 0; |
528 | 17.4k | } |
529 | 24.6k | } |
530 | | // Get schema from kv |
531 | | // TODO(plat1ko): Single flight |
532 | 7.24k | std::unique_ptr<Transaction> txn; |
533 | 7.24k | TxnErrorCode err = txn_kv_->create_txn(&txn); |
534 | 7.24k | if (err != TxnErrorCode::TXN_OK) { |
535 | 0 | LOG(WARNING) << "failed to create txn, err=" << err; |
536 | 0 | return -1; |
537 | 0 | } |
538 | 7.24k | auto schema_key = meta_schema_key({instance_id_, index_id, schema_version}); |
539 | 7.24k | ValueBuf val_buf; |
540 | 7.24k | err = cloud::blob_get(txn.get(), schema_key, &val_buf); |
541 | 7.24k | if (err != TxnErrorCode::TXN_OK) { |
542 | 504 | LOG(WARNING) << "failed to get schema, err=" << err; |
543 | 504 | return static_cast<int>(err); |
544 | 504 | } |
545 | 6.74k | doris::TabletSchemaCloudPB schema; |
546 | 6.74k | if (!parse_schema_value(val_buf, &schema)) { |
547 | 0 | LOG(WARNING) << "malformed schema value, key=" << hex(schema_key); |
548 | 0 | return -1; |
549 | 0 | } |
550 | 6.74k | if (schema.index_size() > 0) { |
551 | 4.79k | InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1; |
552 | 4.79k | if (schema.has_inverted_index_storage_format()) { |
553 | 4.79k | index_format = schema.inverted_index_storage_format(); |
554 | 4.79k | } |
555 | 4.79k | res.first = index_format; |
556 | 4.79k | res.second.reserve(schema.index_size()); |
557 | 12.3k | for (auto& i : schema.index()) { |
558 | 12.3k | if (i.has_index_type() && i.index_type() == IndexType::INVERTED) { |
559 | 12.3k | res.second.push_back(std::make_pair(i.index_id(), i.index_suffix_name())); |
560 | 12.3k | } |
561 | 12.3k | } |
562 | 4.79k | } |
563 | 6.74k | insert(index_id, schema_version, res); |
564 | 6.74k | return 0; |
565 | 6.74k | } |
566 | | |
567 | | // Empty `ids` means this schema has no inverted index |
568 | 6.74k | void insert(int64_t index_id, int32_t schema_version, const InvertedIndexInfo& index_info) { |
569 | 6.74k | if (index_info.second.empty()) { |
570 | 1.94k | TEST_SYNC_POINT("InvertedIndexIdCache::insert1"); |
571 | 1.94k | std::lock_guard lock(mtx_); |
572 | 1.94k | schemas_without_inverted_index_.emplace(index_id, schema_version); |
573 | 4.79k | } else { |
574 | 4.79k | TEST_SYNC_POINT("InvertedIndexIdCache::insert2"); |
575 | 4.79k | std::lock_guard lock(mtx_); |
576 | 4.79k | inverted_index_id_map_.try_emplace({index_id, schema_version}, index_info); |
577 | 4.79k | } |
578 | 6.74k | } |
579 | | |
580 | | private: |
581 | | std::string instance_id_; |
582 | | std::shared_ptr<TxnKv> txn_kv_; |
583 | | |
584 | | std::mutex mtx_; |
585 | | using Key = std::pair<int64_t, int32_t>; // <index_id, schema_version> |
586 | | struct HashOfKey { |
587 | 59.8k | size_t operator()(const Key& key) const { |
588 | 59.8k | size_t seed = 0; |
589 | 59.8k | seed = std::hash<int64_t> {}(key.first); |
590 | 59.8k | seed = std::hash<int32_t> {}(key.second); |
591 | 59.8k | return seed; |
592 | 59.8k | } |
593 | | }; |
594 | | // <index_id, schema_version> -> inverted_index_ids |
595 | | std::unordered_map<Key, InvertedIndexInfo, HashOfKey> inverted_index_id_map_; |
596 | | // Store <index_id, schema_version> of schema which doesn't have inverted index |
597 | | std::unordered_set<Key, HashOfKey> schemas_without_inverted_index_; |
598 | | }; |
599 | | |
600 | | InstanceRecycler::InstanceRecycler(std::shared_ptr<TxnKv> txn_kv, const InstanceInfoPB& instance, |
601 | | RecyclerThreadPoolGroup thread_pool_group, |
602 | | std::shared_ptr<TxnLazyCommitter> txn_lazy_committer) |
603 | | : txn_kv_(std::move(txn_kv)), |
604 | | instance_id_(instance.instance_id()), |
605 | | instance_info_(instance), |
606 | | inverted_index_id_cache_(std::make_unique<InvertedIndexIdCache>(instance_id_, txn_kv_)), |
607 | | _thread_pool_group(std::move(thread_pool_group)), |
608 | | txn_lazy_committer_(std::move(txn_lazy_committer)), |
609 | | delete_bitmap_lock_white_list_(std::make_shared<DeleteBitmapLockWhiteList>()), |
610 | 132 | resource_mgr_(std::make_shared<ResourceManager>(txn_kv_)) { |
611 | 132 | delete_bitmap_lock_white_list_->init(); |
612 | 132 | resource_mgr_->init(); |
613 | | |
614 | 132 | snapshot_manager_ = create_snapshot_manager(txn_kv_); |
615 | | |
616 | | // Since the recycler's resource manager could not be notified when instance info changes, |
617 | | // we need to refresh the instance info here to ensure the resource manager has the latest info. |
618 | 132 | txn_lazy_committer_->resource_manager()->refresh_instance(instance_id_, instance); |
619 | 132 | }; |
620 | | |
621 | 132 | InstanceRecycler::~InstanceRecycler() = default; |
622 | | |
623 | 116 | int InstanceRecycler::init_obj_store_accessors() { |
624 | 116 | for (const auto& obj_info : instance_info_.obj_info()) { |
625 | 76 | #ifdef UNIT_TEST |
626 | 76 | auto accessor = std::make_shared<MockAccessor>(); |
627 | | #else |
628 | | auto s3_conf = S3Conf::from_obj_store_info(obj_info); |
629 | | if (!s3_conf) { |
630 | | LOG(WARNING) << "failed to init object accessor, instance_id=" << instance_id_; |
631 | | return -1; |
632 | | } |
633 | | |
634 | | std::shared_ptr<S3Accessor> accessor; |
635 | | int ret = S3Accessor::create(std::move(*s3_conf), &accessor); |
636 | | if (ret != 0) { |
637 | | LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_ |
638 | | << " resource_id=" << obj_info.id(); |
639 | | return ret; |
640 | | } |
641 | | #endif |
642 | 76 | accessor_map_.emplace(obj_info.id(), std::move(accessor)); |
643 | 76 | } |
644 | | |
645 | 116 | return 0; |
646 | 116 | } |
647 | | |
648 | 116 | int InstanceRecycler::init_storage_vault_accessors() { |
649 | 116 | if (instance_info_.resource_ids().empty()) { |
650 | 109 | return 0; |
651 | 109 | } |
652 | | |
653 | 7 | FullRangeGetOptions opts(txn_kv_); |
654 | 7 | opts.prefetch = true; |
655 | 7 | auto it = txn_kv_->full_range_get(storage_vault_key({instance_id_, ""}), |
656 | 7 | storage_vault_key({instance_id_, "\xff"}), std::move(opts)); |
657 | | |
658 | 25 | for (auto kv = it->next(); kv.has_value(); kv = it->next()) { |
659 | 18 | auto [k, v] = *kv; |
660 | 18 | StorageVaultPB vault; |
661 | 18 | if (!vault.ParseFromArray(v.data(), v.size())) { |
662 | 0 | LOG(WARNING) << "malformed storage vault, unable to deserialize key=" << hex(k); |
663 | 0 | return -1; |
664 | 0 | } |
665 | 18 | std::string recycler_storage_vault_white_list = accumulate( |
666 | 18 | config::recycler_storage_vault_white_list.begin(), |
667 | 18 | config::recycler_storage_vault_white_list.end(), std::string(), |
668 | 24 | [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_ recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_ Line | Count | Source | 668 | 24 | [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; }); |
|
669 | 18 | LOG_INFO("config::recycler_storage_vault_white_list") |
670 | 18 | .tag("", recycler_storage_vault_white_list); |
671 | 18 | if (!config::recycler_storage_vault_white_list.empty()) { |
672 | 8 | if (auto it = std::find(config::recycler_storage_vault_white_list.begin(), |
673 | 8 | config::recycler_storage_vault_white_list.end(), vault.name()); |
674 | 8 | it == config::recycler_storage_vault_white_list.end()) { |
675 | 2 | LOG_WARNING( |
676 | 2 | "failed to init accessor for vault because this vault is not in " |
677 | 2 | "config::recycler_storage_vault_white_list. ") |
678 | 2 | .tag(" vault name:", vault.name()) |
679 | 2 | .tag(" config::recycler_storage_vault_white_list:", |
680 | 2 | recycler_storage_vault_white_list); |
681 | 2 | continue; |
682 | 2 | } |
683 | 8 | } |
684 | 16 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault", |
685 | 16 | &accessor_map_, &vault); |
686 | 16 | if (vault.has_hdfs_info()) { |
687 | 9 | #ifdef ENABLE_HDFS_STORAGE_VAULT |
688 | 9 | auto accessor = std::make_shared<HdfsAccessor>(vault.hdfs_info()); |
689 | 9 | int ret = accessor->init(); |
690 | 9 | if (ret != 0) { |
691 | 4 | LOG(WARNING) << "failed to init hdfs accessor. instance_id=" << instance_id_ |
692 | 4 | << " resource_id=" << vault.id() << " name=" << vault.name() |
693 | 4 | << " hdfs_vault=" << vault.hdfs_info().ShortDebugString(); |
694 | 4 | continue; |
695 | 4 | } |
696 | 5 | LOG(INFO) << "succeed to init hdfs accessor. instance_id=" << instance_id_ |
697 | 5 | << " resource_id=" << vault.id() << " name=" << vault.name() |
698 | 5 | << " hdfs_vault=" << vault.hdfs_info().ShortDebugString(); |
699 | 5 | accessor_map_.emplace(vault.id(), std::move(accessor)); |
700 | | #else |
701 | | LOG(ERROR) << "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT build option), " |
702 | | << "but HDFS storage vaults were detected"; |
703 | | #endif |
704 | 7 | } else if (vault.has_obj_info()) { |
705 | 7 | auto s3_conf = S3Conf::from_obj_store_info(vault.obj_info()); |
706 | 7 | if (!s3_conf) { |
707 | 1 | LOG(WARNING) << "failed to init object accessor, invalid conf, instance_id=" |
708 | 1 | << instance_id_ << " s3_vault=" << vault.obj_info().ShortDebugString(); |
709 | 1 | continue; |
710 | 1 | } |
711 | | |
712 | 6 | std::shared_ptr<S3Accessor> accessor; |
713 | 6 | int ret = S3Accessor::create(*s3_conf, &accessor); |
714 | 6 | if (ret != 0) { |
715 | 0 | LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_ |
716 | 0 | << " resource_id=" << vault.id() << " name=" << vault.name() |
717 | 0 | << " ret=" << ret |
718 | 0 | << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString()); |
719 | 0 | continue; |
720 | 0 | } |
721 | 6 | LOG(INFO) << "succeed to init s3 accessor. instance_id=" << instance_id_ |
722 | 6 | << " resource_id=" << vault.id() << " name=" << vault.name() << " ret=" << ret |
723 | 6 | << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString()); |
724 | 6 | accessor_map_.emplace(vault.id(), std::move(accessor)); |
725 | 6 | } |
726 | 16 | } |
727 | | |
728 | 7 | if (!it->is_valid()) { |
729 | 0 | LOG_WARNING("failed to get storage vault kv"); |
730 | 0 | return -1; |
731 | 0 | } |
732 | | |
733 | 7 | if (accessor_map_.empty()) { |
734 | 1 | LOG(WARNING) << "no accessors for instance=" << instance_id_; |
735 | 1 | return -2; |
736 | 1 | } |
737 | 6 | LOG_INFO("finish init instance recycler number_accessors={} instance=", accessor_map_.size(), |
738 | 6 | instance_id_); |
739 | | |
740 | 6 | return 0; |
741 | 7 | } |
742 | | |
743 | 116 | int InstanceRecycler::init() { |
744 | 116 | int ret = init_obj_store_accessors(); |
745 | 116 | if (ret != 0) { |
746 | 0 | return ret; |
747 | 0 | } |
748 | | |
749 | 116 | return init_storage_vault_accessors(); |
750 | 116 | } |
751 | | |
752 | | template <typename... Func> |
753 | 120 | auto task_wrapper(Func... funcs) -> std::function<int()> { |
754 | 120 | return [funcs...]() { |
755 | 120 | return [](std::initializer_list<int> ret_vals) { |
756 | 120 | int i = 0; |
757 | 140 | for (int ret : ret_vals) { |
758 | 140 | if (ret != 0) { |
759 | 0 | i = ret; |
760 | 0 | } |
761 | 140 | } |
762 | 120 | return i; |
763 | 120 | }({funcs()...});Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 0 | i = ret; | 760 | 0 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 0 | i = ret; | 760 | 0 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_ Line | Count | Source | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 20 | for (int ret : ret_vals) { | 758 | 20 | if (ret != 0) { | 759 | 0 | i = ret; | 760 | 0 | } | 761 | 20 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 0 | i = ret; | 760 | 0 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 0 | i = ret; | 760 | 0 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 0 | i = ret; | 760 | 0 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_ Line | Count | Source | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 20 | for (int ret : ret_vals) { | 758 | 20 | if (ret != 0) { | 759 | 0 | i = ret; | 760 | 0 | } | 761 | 20 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 0 | i = ret; | 760 | 0 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 0 | i = ret; | 760 | 0 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 0 | i = ret; | 760 | 0 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 0 | i = ret; | 760 | 0 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); |
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_ Line | Count | Source | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 0 | i = ret; | 760 | 0 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); |
|
764 | 120 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEv recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; |
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEv Line | Count | Source | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; |
|
765 | 120 | } Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ Line | Count | Source | 753 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; | 765 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ Line | Count | Source | 753 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; | 765 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ Line | Count | Source | 753 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; | 765 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ Line | Count | Source | 753 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; | 765 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ Line | Count | Source | 753 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; | 765 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ Line | Count | Source | 753 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; | 765 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ Line | Count | Source | 753 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; | 765 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ Line | Count | Source | 753 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; | 765 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ Line | Count | Source | 753 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; | 765 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ Line | Count | Source | 753 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; | 765 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ Line | Count | Source | 753 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; | 765 | 10 | } |
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ Line | Count | Source | 753 | 10 | auto task_wrapper(Func... funcs) -> std::function<int()> { | 754 | 10 | return [funcs...]() { | 755 | 10 | return [](std::initializer_list<int> ret_vals) { | 756 | 10 | int i = 0; | 757 | 10 | for (int ret : ret_vals) { | 758 | 10 | if (ret != 0) { | 759 | 10 | i = ret; | 760 | 10 | } | 761 | 10 | } | 762 | 10 | return i; | 763 | 10 | }({funcs()...}); | 764 | 10 | }; | 765 | 10 | } |
|
766 | | |
767 | 10 | int InstanceRecycler::do_recycle() { |
768 | 10 | TEST_SYNC_POINT("InstanceRecycler.do_recycle"); |
769 | 10 | tablet_metrics_context_.reset(); |
770 | 10 | segment_metrics_context_.reset(); |
771 | 10 | DORIS_CLOUD_DEFER { |
772 | 10 | tablet_metrics_context_.finish_report(); |
773 | 10 | segment_metrics_context_.finish_report(); |
774 | 10 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv Line | Count | Source | 771 | 10 | DORIS_CLOUD_DEFER { | 772 | 10 | tablet_metrics_context_.finish_report(); | 773 | 10 | segment_metrics_context_.finish_report(); | 774 | 10 | }; |
|
775 | 10 | if (instance_info_.status() == InstanceInfoPB::DELETED) { |
776 | 0 | int res = recycle_cluster_snapshots(); |
777 | 0 | if (res != 0) { |
778 | 0 | return -1; |
779 | 0 | } |
780 | 0 | return recycle_deleted_instance(); |
781 | 10 | } else if (instance_info_.status() == InstanceInfoPB::NORMAL) { |
782 | 10 | SyncExecutor<int> sync_executor(_thread_pool_group.group_recycle_function_pool, |
783 | 10 | fmt::format("instance id {}", instance_id_), |
784 | 120 | [](int r) { return r != 0; });Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi Line | Count | Source | 784 | 120 | [](int r) { return r != 0; }); |
|
785 | 10 | sync_executor |
786 | 10 | .add(task_wrapper( |
787 | 10 | [this]() { return InstanceRecycler::recycle_cluster_snapshots(); }))Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv Line | Count | Source | 787 | 10 | [this]() { return InstanceRecycler::recycle_cluster_snapshots(); })) |
|
788 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }))Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv Line | Count | Source | 788 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); })) |
|
789 | 10 | .add(task_wrapper( // dropped table and dropped partition need to be recycled in series |
790 | | // becase they may both recycle the same set of tablets |
791 | | // recycle dropped table or idexes(mv, rollup) |
792 | 10 | [this]() -> int { return InstanceRecycler::recycle_indexes(); },Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv Line | Count | Source | 792 | 10 | [this]() -> int { return InstanceRecycler::recycle_indexes(); }, |
|
793 | | // recycle dropped partitions |
794 | 10 | [this]() -> int { return InstanceRecycler::recycle_partitions(); }))Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv Line | Count | Source | 794 | 10 | [this]() -> int { return InstanceRecycler::recycle_partitions(); })) |
|
795 | 10 | .add(task_wrapper( |
796 | 10 | [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv Line | Count | Source | 796 | 10 | [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); })) |
|
797 | 10 | .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv Line | Count | Source | 797 | 10 | .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); })) |
|
798 | 10 | .add(task_wrapper( |
799 | 10 | [this]() -> int { return InstanceRecycler::recycle_packed_files(); }))Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv Line | Count | Source | 799 | 10 | [this]() -> int { return InstanceRecycler::recycle_packed_files(); })) |
|
800 | 10 | .add(task_wrapper( |
801 | 10 | [this]() { return InstanceRecycler::abort_timeout_txn(); },Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv Line | Count | Source | 801 | 10 | [this]() { return InstanceRecycler::abort_timeout_txn(); }, |
|
802 | 10 | [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv Line | Count | Source | 802 | 10 | [this]() { return InstanceRecycler::recycle_expired_txn_label(); })) |
|
803 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv Line | Count | Source | 803 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); })) |
|
804 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv Line | Count | Source | 804 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); })) |
|
805 | 10 | .add(task_wrapper( |
806 | 10 | [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv Line | Count | Source | 806 | 10 | [this]() { return InstanceRecycler::recycle_expired_stage_objects(); })) |
|
807 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv Line | Count | Source | 807 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); })) |
|
808 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); }));Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_15clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_15clEv Line | Count | Source | 808 | 10 | .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); })); |
|
809 | 10 | bool finished = true; |
810 | 10 | std::vector<int> rets = sync_executor.when_all(&finished); |
811 | 120 | for (int ret : rets) { |
812 | 120 | if (ret != 0) { |
813 | 0 | return ret; |
814 | 0 | } |
815 | 120 | } |
816 | 10 | return finished ? 0 : -1; |
817 | 10 | } else { |
818 | 0 | LOG(WARNING) << "invalid instance status: " << instance_info_.status() |
819 | 0 | << " instance_id=" << instance_id_; |
820 | 0 | return -1; |
821 | 0 | } |
822 | 10 | } |
823 | | |
824 | | /** |
825 | | * 1. delete all remote data |
826 | | * 2. delete all kv |
827 | | * 3. remove instance kv |
828 | | */ |
829 | 5 | int InstanceRecycler::recycle_deleted_instance() { |
830 | 5 | LOG_WARNING("begin to recycle deleted instance").tag("instance_id", instance_id_); |
831 | | |
832 | 5 | int ret = 0; |
833 | 5 | auto start_time = steady_clock::now(); |
834 | | |
835 | 5 | DORIS_CLOUD_DEFER { |
836 | 5 | auto cost = duration<float>(steady_clock::now() - start_time).count(); |
837 | 5 | LOG(WARNING) << (ret == 0 ? "successfully" : "failed to") |
838 | 5 | << " recycle deleted instance, cost=" << cost |
839 | 5 | << "s, instance_id=" << instance_id_; |
840 | 5 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv Line | Count | Source | 835 | 5 | DORIS_CLOUD_DEFER { | 836 | 5 | auto cost = duration<float>(steady_clock::now() - start_time).count(); | 837 | 5 | LOG(WARNING) << (ret == 0 ? "successfully" : "failed to") | 838 | 5 | << " recycle deleted instance, cost=" << cost | 839 | 5 | << "s, instance_id=" << instance_id_; | 840 | 5 | }; |
|
841 | | |
842 | | // Step 1: Recycle tmp rowsets (contains ref count but txn is not committed) |
843 | 5 | auto recycle_tmp_rowsets_with_mark_delete_enabled = [&]() -> int { |
844 | 5 | int res = recycle_tmp_rowsets(); |
845 | 5 | if (res == 0 && config::enable_mark_delete_rowset_before_recycle) { |
846 | | // If mark_delete_rowset_before_recycle is enabled, we will mark delete rowsets before recycling them, |
847 | | // so we need to recycle tmp rowsets again to make sure all rowsets in recycle space are marked for |
848 | | // deletion, otherwise we may meet some corner cases that some rowsets are not marked for deletion |
849 | | // and cannot be recycled. |
850 | 5 | res = recycle_tmp_rowsets(); |
851 | 5 | } |
852 | 5 | return res; |
853 | 5 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_1clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_1clEv Line | Count | Source | 843 | 5 | auto recycle_tmp_rowsets_with_mark_delete_enabled = [&]() -> int { | 844 | 5 | int res = recycle_tmp_rowsets(); | 845 | 5 | if (res == 0 && config::enable_mark_delete_rowset_before_recycle) { | 846 | | // If mark_delete_rowset_before_recycle is enabled, we will mark delete rowsets before recycling them, | 847 | | // so we need to recycle tmp rowsets again to make sure all rowsets in recycle space are marked for | 848 | | // deletion, otherwise we may meet some corner cases that some rowsets are not marked for deletion | 849 | | // and cannot be recycled. | 850 | 5 | res = recycle_tmp_rowsets(); | 851 | 5 | } | 852 | 5 | return res; | 853 | 5 | }; |
|
854 | 5 | if (recycle_tmp_rowsets_with_mark_delete_enabled() != 0) { |
855 | 0 | LOG_WARNING("failed to recycle tmp rowsets").tag("instance_id", instance_id_); |
856 | 0 | ret = -1; |
857 | 0 | return -1; |
858 | 0 | } |
859 | | |
860 | | // Step 2: Recycle versioned rowsets in recycle space (already marked for deletion) |
861 | 5 | if (recycle_versioned_rowsets() != 0) { |
862 | 0 | LOG_WARNING("failed to recycle versioned rowsets").tag("instance_id", instance_id_); |
863 | 0 | ret = -1; |
864 | 0 | return -1; |
865 | 0 | } |
866 | | |
867 | | // Step 3: Recycle operation logs (can recycle logs not referenced by snapshots) |
868 | 5 | if (recycle_operation_logs() != 0) { |
869 | 0 | LOG_WARNING("failed to recycle operation logs").tag("instance_id", instance_id_); |
870 | 0 | ret = -1; |
871 | 0 | return -1; |
872 | 0 | } |
873 | | |
874 | | // Step 4: Check if there are still cluster snapshots |
875 | 5 | bool has_snapshots = false; |
876 | 5 | if (has_cluster_snapshots(&has_snapshots) != 0) { |
877 | 0 | LOG(WARNING) << "check instance cluster snapshots failed, instance_id=" << instance_id_; |
878 | 0 | ret = -1; |
879 | 0 | return -1; |
880 | 5 | } else if (has_snapshots) { |
881 | 1 | LOG(INFO) << "instance has cluster snapshots, skip recycling, instance_id=" << instance_id_; |
882 | 1 | return 0; |
883 | 1 | } |
884 | | |
885 | 4 | bool snapshot_enabled = instance_info().has_snapshot_switch_status() && |
886 | 4 | instance_info().snapshot_switch_status() != |
887 | 1 | SnapshotSwitchStatus::SNAPSHOT_SWITCH_DISABLED; |
888 | 4 | if (snapshot_enabled) { |
889 | 1 | bool has_unrecycled_rowsets = false; |
890 | 1 | if (recycle_ref_rowsets(&has_unrecycled_rowsets) != 0) { |
891 | 0 | LOG_WARNING("failed to recycle ref rowsets").tag("instance_id", instance_id_); |
892 | 0 | ret = -1; |
893 | 0 | return -1; |
894 | 1 | } else if (has_unrecycled_rowsets) { |
895 | 0 | LOG_INFO("instance has referenced rowsets, skip recycling") |
896 | 0 | .tag("instance_id", instance_id_); |
897 | 0 | return ret; |
898 | 0 | } |
899 | 3 | } else { // delete all remote data if snapshot is disabled |
900 | 3 | for (auto& [_, accessor] : accessor_map_) { |
901 | 3 | if (stopped()) { |
902 | 0 | return ret; |
903 | 0 | } |
904 | | |
905 | 3 | LOG(INFO) << "begin to delete all objects in " << accessor->uri(); |
906 | 3 | int del_ret = accessor->delete_all(); |
907 | 3 | if (del_ret == 0) { |
908 | 3 | LOG(INFO) << "successfully delete all objects in " << accessor->uri(); |
909 | 3 | } else if (del_ret != 1) { // no need to log, because S3Accessor has logged this error |
910 | | // If `del_ret == 1`, it can be considered that the object data has been recycled by cloud platform, |
911 | | // so the recycling has been successful. |
912 | 0 | ret = -1; |
913 | 0 | } |
914 | 3 | } |
915 | | |
916 | 3 | if (ret != 0) { |
917 | 0 | LOG(WARNING) << "failed to delete all data of deleted instance=" << instance_id_; |
918 | 0 | return ret; |
919 | 0 | } |
920 | 3 | } |
921 | | |
922 | | // Check successor instance, if exists, skip deleting kv because successor instance may still need the data in kv |
923 | 4 | if (instance_info_.has_successor_instance_id() && |
924 | 4 | !instance_info_.successor_instance_id().empty()) { |
925 | 0 | std::string key = instance_key(instance_info_.successor_instance_id()); |
926 | 0 | std::unique_ptr<Transaction> txn; |
927 | 0 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
928 | 0 | if (err != TxnErrorCode::TXN_OK) { |
929 | 0 | LOG(WARNING) << "failed to create txn, instance_id=" << instance_id_ |
930 | 0 | << " successor_instance_id=" << instance_info_.successor_instance_id() |
931 | 0 | << " err=" << err; |
932 | 0 | ret = -1; |
933 | 0 | return -1; |
934 | 0 | } |
935 | | |
936 | 0 | std::string value; |
937 | 0 | err = txn->get(key, &value); |
938 | 0 | if (err == TxnErrorCode::TXN_OK) { |
939 | 0 | LOG(INFO) << "instance successor instance is still exist, skip deleting kv," |
940 | 0 | << " instance_id=" << instance_id_ |
941 | 0 | << " successor_instance_id=" << instance_info_.successor_instance_id(); |
942 | 0 | return 0; |
943 | 0 | } else if (err != TxnErrorCode::TXN_KEY_NOT_FOUND) { |
944 | 0 | LOG(WARNING) << "failed to get successor instance, instance_id=" << instance_id_ |
945 | 0 | << " successor_instance_id=" << instance_info_.successor_instance_id() |
946 | 0 | << " err=" << err; |
947 | 0 | ret = -1; |
948 | 0 | return -1; |
949 | 0 | } |
950 | 0 | } |
951 | | |
952 | | // delete all kv |
953 | 4 | std::unique_ptr<Transaction> txn; |
954 | 4 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
955 | 4 | if (err != TxnErrorCode::TXN_OK) { |
956 | 0 | LOG(WARNING) << "failed to create txn"; |
957 | 0 | ret = -1; |
958 | 0 | return -1; |
959 | 0 | } |
960 | 4 | LOG(INFO) << "begin to delete all kv, instance_id=" << instance_id_; |
961 | | // delete kv before deleting objects to prevent the checker from misjudging data loss |
962 | 4 | std::string start_txn_key = txn_key_prefix(instance_id_); |
963 | 4 | std::string end_txn_key = txn_key_prefix(instance_id_ + '\x00'); |
964 | 4 | txn->remove(start_txn_key, end_txn_key); |
965 | 4 | std::string start_version_key = version_key_prefix(instance_id_); |
966 | 4 | std::string end_version_key = version_key_prefix(instance_id_ + '\x00'); |
967 | 4 | txn->remove(start_version_key, end_version_key); |
968 | 4 | std::string start_meta_key = meta_key_prefix(instance_id_); |
969 | 4 | std::string end_meta_key = meta_key_prefix(instance_id_ + '\x00'); |
970 | 4 | txn->remove(start_meta_key, end_meta_key); |
971 | 4 | std::string start_recycle_key = recycle_key_prefix(instance_id_); |
972 | 4 | std::string end_recycle_key = recycle_key_prefix(instance_id_ + '\x00'); |
973 | 4 | txn->remove(start_recycle_key, end_recycle_key); |
974 | 4 | std::string start_stats_tablet_key = stats_tablet_key({instance_id_, 0, 0, 0, 0}); |
975 | 4 | std::string end_stats_tablet_key = stats_tablet_key({instance_id_, INT64_MAX, 0, 0, 0}); |
976 | 4 | txn->remove(start_stats_tablet_key, end_stats_tablet_key); |
977 | 4 | std::string start_copy_key = copy_key_prefix(instance_id_); |
978 | 4 | std::string end_copy_key = copy_key_prefix(instance_id_ + '\x00'); |
979 | 4 | txn->remove(start_copy_key, end_copy_key); |
980 | | // should not remove job key range, because we need to reserve job recycle kv |
981 | | // 0:instance_id 1:table_id 2:index_id 3:part_id 4:tablet_id |
982 | 4 | std::string start_job_tablet_key = job_tablet_key({instance_id_, 0, 0, 0, 0}); |
983 | 4 | std::string end_job_tablet_key = job_tablet_key({instance_id_, INT64_MAX, 0, 0, 0}); |
984 | 4 | txn->remove(start_job_tablet_key, end_job_tablet_key); |
985 | 4 | StorageVaultKeyInfo key_info0 {instance_id_, ""}; |
986 | 4 | StorageVaultKeyInfo key_info1 {instance_id_, "\xff"}; |
987 | 4 | std::string start_vault_key = storage_vault_key(key_info0); |
988 | 4 | std::string end_vault_key = storage_vault_key(key_info1); |
989 | 4 | txn->remove(start_vault_key, end_vault_key); |
990 | 4 | std::string versioned_version_key_start = versioned::version_key_prefix(instance_id_); |
991 | 4 | std::string versioned_version_key_end = versioned::version_key_prefix(instance_id_ + '\x00'); |
992 | 4 | txn->remove(versioned_version_key_start, versioned_version_key_end); |
993 | 4 | std::string versioned_index_key_start = versioned::index_key_prefix(instance_id_); |
994 | 4 | std::string versioned_index_key_end = versioned::index_key_prefix(instance_id_ + '\x00'); |
995 | 4 | txn->remove(versioned_index_key_start, versioned_index_key_end); |
996 | 4 | std::string versioned_stats_tablet_key_start = versioned::stats_key_prefix(instance_id_); |
997 | 4 | std::string versioned_stats_tablet_key_end = versioned::stats_key_prefix(instance_id_ + '\x00'); |
998 | 4 | txn->remove(versioned_stats_tablet_key_start, versioned_stats_tablet_key_end); |
999 | 4 | std::string versioned_meta_key_start = versioned::meta_key_prefix(instance_id_); |
1000 | 4 | std::string versioned_meta_key_end = versioned::meta_key_prefix(instance_id_ + '\x00'); |
1001 | 4 | txn->remove(versioned_meta_key_start, versioned_meta_key_end); |
1002 | 4 | std::string versioned_data_key_start = versioned::data_key_prefix(instance_id_); |
1003 | 4 | std::string versioned_data_key_end = versioned::data_key_prefix(instance_id_ + '\x00'); |
1004 | 4 | txn->remove(versioned_data_key_start, versioned_data_key_end); |
1005 | 4 | std::string versioned_log_key_start = versioned::log_key_prefix(instance_id_); |
1006 | 4 | std::string versioned_log_key_end = versioned::log_key_prefix(instance_id_ + '\x00'); |
1007 | 4 | txn->remove(versioned_log_key_start, versioned_log_key_end); |
1008 | 4 | err = txn->commit(); |
1009 | 4 | if (err != TxnErrorCode::TXN_OK) { |
1010 | 0 | LOG(WARNING) << "failed to delete all kv, instance_id=" << instance_id_ << ", err=" << err; |
1011 | 0 | ret = -1; |
1012 | 0 | } |
1013 | | |
1014 | 4 | if (ret == 0) { |
1015 | | // remove instance kv |
1016 | | // ATTN: MUST ensure that cloud platform won't regenerate the same instance id |
1017 | 4 | err = txn_kv_->create_txn(&txn); |
1018 | 4 | if (err != TxnErrorCode::TXN_OK) { |
1019 | 0 | LOG(WARNING) << "failed to create txn"; |
1020 | 0 | ret = -1; |
1021 | 0 | return ret; |
1022 | 0 | } |
1023 | 4 | std::string key; |
1024 | 4 | instance_key({instance_id_}, &key); |
1025 | 4 | txn->atomic_add(system_meta_service_instance_update_key(), 1); |
1026 | 4 | txn->remove(key); |
1027 | 4 | err = txn->commit(); |
1028 | 4 | if (err != TxnErrorCode::TXN_OK) { |
1029 | 0 | LOG(WARNING) << "failed to delete instance kv, instance_id=" << instance_id_ |
1030 | 0 | << " err=" << err; |
1031 | 0 | ret = -1; |
1032 | 0 | } |
1033 | 4 | } |
1034 | 4 | return ret; |
1035 | 4 | } |
1036 | | |
1037 | | int InstanceRecycler::check_rowset_exists(int64_t tablet_id, const std::string& rowset_id, |
1038 | 9 | bool* exists, PackedFileRecycleStats* stats) { |
1039 | 9 | if (exists == nullptr) { |
1040 | 0 | return -1; |
1041 | 0 | } |
1042 | 9 | *exists = false; |
1043 | | |
1044 | 9 | std::string begin = meta_rowset_key({instance_id_, tablet_id, 0}); |
1045 | 9 | std::string end = meta_rowset_key({instance_id_, tablet_id + 1, 0}); |
1046 | 9 | std::string scan_begin = begin; |
1047 | | |
1048 | 9 | while (true) { |
1049 | 9 | std::unique_ptr<RangeGetIterator> it_range; |
1050 | 9 | int get_ret = txn_get(txn_kv_.get(), scan_begin, end, it_range); |
1051 | 9 | if (get_ret < 0) { |
1052 | 0 | LOG_WARNING("failed to scan rowset metas when recycling packed file") |
1053 | 0 | .tag("instance_id", instance_id_) |
1054 | 0 | .tag("tablet_id", tablet_id) |
1055 | 0 | .tag("ret", get_ret); |
1056 | 0 | return -1; |
1057 | 0 | } |
1058 | 9 | if (get_ret == 1 || it_range == nullptr || !it_range->has_next()) { |
1059 | 6 | return 0; |
1060 | 6 | } |
1061 | | |
1062 | 3 | std::string last_key; |
1063 | 3 | while (it_range->has_next()) { |
1064 | 3 | auto [k, v] = it_range->next(); |
1065 | 3 | last_key.assign(k.data(), k.size()); |
1066 | 3 | doris::RowsetMetaCloudPB rowset_meta; |
1067 | 3 | if (!rowset_meta.ParseFromArray(v.data(), v.size())) { |
1068 | 0 | LOG_WARNING("malformed rowset meta when checking packed file rowset existence") |
1069 | 0 | .tag("instance_id", instance_id_) |
1070 | 0 | .tag("tablet_id", tablet_id) |
1071 | 0 | .tag("key", hex(k)); |
1072 | 0 | continue; |
1073 | 0 | } |
1074 | 3 | if (stats) { |
1075 | 3 | ++stats->rowset_scan_count; |
1076 | 3 | } |
1077 | 3 | if (rowset_meta.rowset_id_v2() == rowset_id) { |
1078 | 3 | *exists = true; |
1079 | 3 | return 0; |
1080 | 3 | } |
1081 | 3 | } |
1082 | | |
1083 | 0 | if (!it_range->more()) { |
1084 | 0 | return 0; |
1085 | 0 | } |
1086 | | |
1087 | | // Continue scanning from the next key to keep each transaction short. |
1088 | 0 | scan_begin = std::move(last_key); |
1089 | 0 | scan_begin.push_back('\x00'); |
1090 | 0 | } |
1091 | 9 | } |
1092 | | |
1093 | | int InstanceRecycler::check_recycle_and_tmp_rowset_exists(int64_t tablet_id, |
1094 | | const std::string& rowset_id, |
1095 | | int64_t txn_id, bool* recycle_exists, |
1096 | 11 | bool* tmp_exists) { |
1097 | 11 | if (recycle_exists == nullptr || tmp_exists == nullptr) { |
1098 | 0 | return -1; |
1099 | 0 | } |
1100 | 11 | *recycle_exists = false; |
1101 | 11 | *tmp_exists = false; |
1102 | | |
1103 | 11 | if (txn_id <= 0) { |
1104 | 0 | LOG_WARNING("invalid txn id when checking recycle/tmp rowset existence") |
1105 | 0 | .tag("instance_id", instance_id_) |
1106 | 0 | .tag("tablet_id", tablet_id) |
1107 | 0 | .tag("rowset_id", rowset_id) |
1108 | 0 | .tag("txn_id", txn_id); |
1109 | 0 | return -1; |
1110 | 0 | } |
1111 | | |
1112 | 11 | std::unique_ptr<Transaction> txn; |
1113 | 11 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
1114 | 11 | if (err != TxnErrorCode::TXN_OK) { |
1115 | 0 | LOG_WARNING("failed to create txn when checking recycle/tmp rowset existence") |
1116 | 0 | .tag("instance_id", instance_id_) |
1117 | 0 | .tag("tablet_id", tablet_id) |
1118 | 0 | .tag("rowset_id", rowset_id) |
1119 | 0 | .tag("txn_id", txn_id) |
1120 | 0 | .tag("err", err); |
1121 | 0 | return -1; |
1122 | 0 | } |
1123 | | |
1124 | 11 | std::string recycle_key = recycle_rowset_key({instance_id_, tablet_id, rowset_id}); |
1125 | 11 | auto ret = key_exists(txn.get(), recycle_key, true); |
1126 | 11 | if (ret == TxnErrorCode::TXN_OK) { |
1127 | 1 | *recycle_exists = true; |
1128 | 10 | } else if (ret != TxnErrorCode::TXN_KEY_NOT_FOUND) { |
1129 | 0 | LOG_WARNING("failed to check recycle rowset existence") |
1130 | 0 | .tag("instance_id", instance_id_) |
1131 | 0 | .tag("tablet_id", tablet_id) |
1132 | 0 | .tag("rowset_id", rowset_id) |
1133 | 0 | .tag("key", hex(recycle_key)) |
1134 | 0 | .tag("err", ret); |
1135 | 0 | return -1; |
1136 | 0 | } |
1137 | | |
1138 | 11 | std::string tmp_key = meta_rowset_tmp_key({instance_id_, txn_id, tablet_id}); |
1139 | 11 | ret = key_exists(txn.get(), tmp_key, true); |
1140 | 11 | if (ret == TxnErrorCode::TXN_OK) { |
1141 | 1 | *tmp_exists = true; |
1142 | 10 | } else if (ret != TxnErrorCode::TXN_KEY_NOT_FOUND) { |
1143 | 0 | LOG_WARNING("failed to check tmp rowset existence") |
1144 | 0 | .tag("instance_id", instance_id_) |
1145 | 0 | .tag("tablet_id", tablet_id) |
1146 | 0 | .tag("txn_id", txn_id) |
1147 | 0 | .tag("key", hex(tmp_key)) |
1148 | 0 | .tag("err", ret); |
1149 | 0 | return -1; |
1150 | 0 | } |
1151 | | |
1152 | 11 | return 0; |
1153 | 11 | } |
1154 | | |
1155 | | std::pair<std::string, std::shared_ptr<StorageVaultAccessor>> |
1156 | 8 | InstanceRecycler::resolve_packed_file_accessor(const std::string& hint) { |
1157 | 8 | if (!hint.empty()) { |
1158 | 8 | if (auto it = accessor_map_.find(hint); it != accessor_map_.end()) { |
1159 | 8 | return {hint, it->second}; |
1160 | 8 | } |
1161 | 8 | } |
1162 | | |
1163 | 0 | return {"", nullptr}; |
1164 | 8 | } |
1165 | | |
1166 | | int InstanceRecycler::correct_packed_file_info(cloud::PackedFileInfoPB* packed_info, bool* changed, |
1167 | | const std::string& packed_file_path, |
1168 | 3 | PackedFileRecycleStats* stats) { |
1169 | 3 | bool local_changed = false; |
1170 | 3 | int64_t left_num = 0; |
1171 | 3 | int64_t left_bytes = 0; |
1172 | 3 | bool all_small_files_confirmed = true; |
1173 | 3 | LOG(INFO) << "begin to correct file: " << packed_file_path; |
1174 | | |
1175 | 14 | auto log_small_file_status = [&](const cloud::PackedSlicePB& file, bool confirmed_this_round) { |
1176 | 14 | int64_t tablet_id = file.has_tablet_id() ? file.tablet_id() : int64_t {-1}; |
1177 | 14 | std::string rowset_id = file.has_rowset_id() ? file.rowset_id() : std::string {}; |
1178 | 14 | int64_t txn_id = file.has_txn_id() ? file.txn_id() : int64_t {0}; |
1179 | 14 | LOG_INFO("packed slice correction status") |
1180 | 14 | .tag("instance_id", instance_id_) |
1181 | 14 | .tag("packed_file_path", packed_file_path) |
1182 | 14 | .tag("small_file_path", file.path()) |
1183 | 14 | .tag("tablet_id", tablet_id) |
1184 | 14 | .tag("rowset_id", rowset_id) |
1185 | 14 | .tag("txn_id", txn_id) |
1186 | 14 | .tag("size", file.size()) |
1187 | 14 | .tag("deleted", file.deleted()) |
1188 | 14 | .tag("corrected", file.corrected()) |
1189 | 14 | .tag("confirmed_this_round", confirmed_this_round); |
1190 | 14 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24correct_packed_file_infoEPNS0_16PackedFileInfoPBEPbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPNS1_22PackedFileRecycleStatsEENK3$_0clERKNS0_13PackedSlicePBEb recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24correct_packed_file_infoEPNS0_16PackedFileInfoPBEPbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPNS1_22PackedFileRecycleStatsEENK3$_0clERKNS0_13PackedSlicePBEb Line | Count | Source | 1175 | 14 | auto log_small_file_status = [&](const cloud::PackedSlicePB& file, bool confirmed_this_round) { | 1176 | 14 | int64_t tablet_id = file.has_tablet_id() ? file.tablet_id() : int64_t {-1}; | 1177 | 14 | std::string rowset_id = file.has_rowset_id() ? file.rowset_id() : std::string {}; | 1178 | 14 | int64_t txn_id = file.has_txn_id() ? file.txn_id() : int64_t {0}; | 1179 | 14 | LOG_INFO("packed slice correction status") | 1180 | 14 | .tag("instance_id", instance_id_) | 1181 | 14 | .tag("packed_file_path", packed_file_path) | 1182 | 14 | .tag("small_file_path", file.path()) | 1183 | 14 | .tag("tablet_id", tablet_id) | 1184 | 14 | .tag("rowset_id", rowset_id) | 1185 | 14 | .tag("txn_id", txn_id) | 1186 | 14 | .tag("size", file.size()) | 1187 | 14 | .tag("deleted", file.deleted()) | 1188 | 14 | .tag("corrected", file.corrected()) | 1189 | 14 | .tag("confirmed_this_round", confirmed_this_round); | 1190 | 14 | }; |
|
1191 | | |
1192 | 17 | for (int i = 0; i < packed_info->slices_size(); ++i) { |
1193 | 14 | auto* small_file = packed_info->mutable_slices(i); |
1194 | 14 | if (small_file->deleted()) { |
1195 | 3 | log_small_file_status(*small_file, small_file->corrected()); |
1196 | 3 | continue; |
1197 | 3 | } |
1198 | | |
1199 | 11 | if (small_file->corrected()) { |
1200 | 0 | left_num++; |
1201 | 0 | left_bytes += small_file->size(); |
1202 | 0 | log_small_file_status(*small_file, true); |
1203 | 0 | continue; |
1204 | 0 | } |
1205 | | |
1206 | 11 | if (!small_file->has_tablet_id() || !small_file->has_rowset_id()) { |
1207 | 0 | LOG_WARNING("packed file small file missing identifiers during correction") |
1208 | 0 | .tag("instance_id", instance_id_) |
1209 | 0 | .tag("small_file_path", small_file->path()) |
1210 | 0 | .tag("index", i); |
1211 | 0 | return -1; |
1212 | 0 | } |
1213 | | |
1214 | 11 | int64_t tablet_id = small_file->tablet_id(); |
1215 | 11 | const std::string& rowset_id = small_file->rowset_id(); |
1216 | 11 | if (!small_file->has_txn_id() || small_file->txn_id() <= 0) { |
1217 | 0 | LOG_WARNING("packed file small file missing valid txn id during correction") |
1218 | 0 | .tag("instance_id", instance_id_) |
1219 | 0 | .tag("small_file_path", small_file->path()) |
1220 | 0 | .tag("index", i) |
1221 | 0 | .tag("tablet_id", tablet_id) |
1222 | 0 | .tag("rowset_id", rowset_id) |
1223 | 0 | .tag("has_txn_id", small_file->has_txn_id()) |
1224 | 0 | .tag("txn_id", small_file->has_txn_id() ? small_file->txn_id() : 0); |
1225 | 0 | return -1; |
1226 | 0 | } |
1227 | 11 | int64_t txn_id = small_file->txn_id(); |
1228 | 11 | bool recycle_exists = false; |
1229 | 11 | bool tmp_exists = false; |
1230 | 11 | if (check_recycle_and_tmp_rowset_exists(tablet_id, rowset_id, txn_id, &recycle_exists, |
1231 | 11 | &tmp_exists) != 0) { |
1232 | 0 | return -1; |
1233 | 0 | } |
1234 | | |
1235 | 11 | bool small_file_confirmed = false; |
1236 | 11 | if (tmp_exists) { |
1237 | 1 | left_num++; |
1238 | 1 | left_bytes += small_file->size(); |
1239 | 1 | small_file_confirmed = true; |
1240 | 10 | } else if (recycle_exists) { |
1241 | 1 | left_num++; |
1242 | 1 | left_bytes += small_file->size(); |
1243 | | // keep small_file_confirmed=false so the packed file remains uncorrected |
1244 | 9 | } else { |
1245 | 9 | bool rowset_exists = false; |
1246 | 9 | if (check_rowset_exists(tablet_id, rowset_id, &rowset_exists, stats) != 0) { |
1247 | 0 | return -1; |
1248 | 0 | } |
1249 | | |
1250 | 9 | if (!rowset_exists) { |
1251 | 6 | if (!small_file->deleted()) { |
1252 | 6 | small_file->set_deleted(true); |
1253 | 6 | local_changed = true; |
1254 | 6 | } |
1255 | 6 | if (!small_file->corrected()) { |
1256 | 6 | small_file->set_corrected(true); |
1257 | 6 | local_changed = true; |
1258 | 6 | } |
1259 | 6 | small_file_confirmed = true; |
1260 | 6 | } else { |
1261 | 3 | left_num++; |
1262 | 3 | left_bytes += small_file->size(); |
1263 | 3 | small_file_confirmed = true; |
1264 | 3 | } |
1265 | 9 | } |
1266 | | |
1267 | 11 | if (!small_file_confirmed) { |
1268 | 1 | all_small_files_confirmed = false; |
1269 | 1 | } |
1270 | | |
1271 | 11 | if (small_file->corrected() != small_file_confirmed) { |
1272 | 4 | small_file->set_corrected(small_file_confirmed); |
1273 | 4 | local_changed = true; |
1274 | 4 | } |
1275 | | |
1276 | 11 | log_small_file_status(*small_file, small_file_confirmed); |
1277 | 11 | } |
1278 | | |
1279 | 3 | if (packed_info->remaining_slice_bytes() != left_bytes) { |
1280 | 3 | packed_info->set_remaining_slice_bytes(left_bytes); |
1281 | 3 | local_changed = true; |
1282 | 3 | } |
1283 | 3 | if (packed_info->ref_cnt() != left_num) { |
1284 | 3 | auto old_ref_cnt = packed_info->ref_cnt(); |
1285 | 3 | packed_info->set_ref_cnt(left_num); |
1286 | 3 | LOG_INFO("corrected packed file ref count") |
1287 | 3 | .tag("instance_id", instance_id_) |
1288 | 3 | .tag("resource_id", packed_info->resource_id()) |
1289 | 3 | .tag("packed_file_path", packed_file_path) |
1290 | 3 | .tag("old_ref_cnt", old_ref_cnt) |
1291 | 3 | .tag("new_ref_cnt", left_num); |
1292 | 3 | local_changed = true; |
1293 | 3 | } |
1294 | 3 | if (packed_info->corrected() != all_small_files_confirmed) { |
1295 | 2 | packed_info->set_corrected(all_small_files_confirmed); |
1296 | 2 | local_changed = true; |
1297 | 2 | } |
1298 | 3 | if (left_num == 0 && packed_info->state() != cloud::PackedFileInfoPB::RECYCLING) { |
1299 | 1 | packed_info->set_state(cloud::PackedFileInfoPB::RECYCLING); |
1300 | 1 | local_changed = true; |
1301 | 1 | } |
1302 | | |
1303 | 3 | if (changed != nullptr) { |
1304 | 3 | *changed = local_changed; |
1305 | 3 | } |
1306 | 3 | return 0; |
1307 | 3 | } |
1308 | | |
1309 | | int InstanceRecycler::process_single_packed_file(const std::string& packed_key, |
1310 | | const std::string& packed_file_path, |
1311 | 4 | PackedFileRecycleStats* stats) { |
1312 | 4 | const int max_retry_times = std::max(1, config::packed_file_txn_retry_times); |
1313 | 4 | bool correction_ok = false; |
1314 | 4 | cloud::PackedFileInfoPB packed_info; |
1315 | | |
1316 | 4 | for (int attempt = 1; attempt <= max_retry_times; ++attempt) { |
1317 | 4 | if (stopped()) { |
1318 | 0 | LOG_WARNING("recycler stopped before processing packed file") |
1319 | 0 | .tag("instance_id", instance_id_) |
1320 | 0 | .tag("packed_file_path", packed_file_path) |
1321 | 0 | .tag("attempt", attempt); |
1322 | 0 | return -1; |
1323 | 0 | } |
1324 | | |
1325 | 4 | std::unique_ptr<Transaction> txn; |
1326 | 4 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
1327 | 4 | if (err != TxnErrorCode::TXN_OK) { |
1328 | 0 | LOG_WARNING("failed to create txn when processing packed file") |
1329 | 0 | .tag("instance_id", instance_id_) |
1330 | 0 | .tag("packed_file_path", packed_file_path) |
1331 | 0 | .tag("attempt", attempt) |
1332 | 0 | .tag("err", err); |
1333 | 0 | return -1; |
1334 | 0 | } |
1335 | | |
1336 | 4 | std::string packed_val; |
1337 | 4 | err = txn->get(packed_key, &packed_val); |
1338 | 4 | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { |
1339 | 0 | return 0; |
1340 | 0 | } |
1341 | 4 | if (err != TxnErrorCode::TXN_OK) { |
1342 | 0 | LOG_WARNING("failed to get packed file kv") |
1343 | 0 | .tag("instance_id", instance_id_) |
1344 | 0 | .tag("packed_file_path", packed_file_path) |
1345 | 0 | .tag("attempt", attempt) |
1346 | 0 | .tag("err", err); |
1347 | 0 | return -1; |
1348 | 0 | } |
1349 | | |
1350 | 4 | if (!packed_info.ParseFromString(packed_val)) { |
1351 | 0 | LOG_WARNING("failed to parse packed file info") |
1352 | 0 | .tag("instance_id", instance_id_) |
1353 | 0 | .tag("packed_file_path", packed_file_path) |
1354 | 0 | .tag("attempt", attempt); |
1355 | 0 | return -1; |
1356 | 0 | } |
1357 | | |
1358 | 4 | int64_t now_sec = ::time(nullptr); |
1359 | 4 | bool corrected = packed_info.corrected(); |
1360 | 4 | bool due = config::force_immediate_recycle || |
1361 | 4 | now_sec - packed_info.created_at_sec() >= |
1362 | 4 | config::packed_file_correction_delay_seconds; |
1363 | | |
1364 | 4 | if (!corrected && due) { |
1365 | 3 | bool changed = false; |
1366 | 3 | if (correct_packed_file_info(&packed_info, &changed, packed_file_path, stats) != 0) { |
1367 | 0 | LOG_WARNING("correct_packed_file_info failed") |
1368 | 0 | .tag("instance_id", instance_id_) |
1369 | 0 | .tag("packed_file_path", packed_file_path) |
1370 | 0 | .tag("attempt", attempt); |
1371 | 0 | return -1; |
1372 | 0 | } |
1373 | 3 | if (changed) { |
1374 | 3 | std::string updated; |
1375 | 3 | if (!packed_info.SerializeToString(&updated)) { |
1376 | 0 | LOG_WARNING("failed to serialize packed file info after correction") |
1377 | 0 | .tag("instance_id", instance_id_) |
1378 | 0 | .tag("packed_file_path", packed_file_path) |
1379 | 0 | .tag("attempt", attempt); |
1380 | 0 | return -1; |
1381 | 0 | } |
1382 | 3 | txn->put(packed_key, updated); |
1383 | 3 | err = txn->commit(); |
1384 | 3 | if (err == TxnErrorCode::TXN_OK) { |
1385 | 3 | if (stats) { |
1386 | 3 | ++stats->num_corrected; |
1387 | 3 | } |
1388 | 3 | } else { |
1389 | 0 | if (err == TxnErrorCode::TXN_CONFLICT && attempt < max_retry_times) { |
1390 | 0 | LOG_WARNING( |
1391 | 0 | "failed to commit correction for packed file due to conflict, " |
1392 | 0 | "retrying") |
1393 | 0 | .tag("instance_id", instance_id_) |
1394 | 0 | .tag("packed_file_path", packed_file_path) |
1395 | 0 | .tag("attempt", attempt); |
1396 | 0 | sleep_for_packed_file_retry(); |
1397 | 0 | packed_info.Clear(); |
1398 | 0 | continue; |
1399 | 0 | } |
1400 | 0 | LOG_WARNING("failed to commit correction for packed file") |
1401 | 0 | .tag("instance_id", instance_id_) |
1402 | 0 | .tag("packed_file_path", packed_file_path) |
1403 | 0 | .tag("attempt", attempt) |
1404 | 0 | .tag("err", err); |
1405 | 0 | return -1; |
1406 | 0 | } |
1407 | 3 | } |
1408 | 3 | } |
1409 | | |
1410 | 4 | correction_ok = true; |
1411 | 4 | break; |
1412 | 4 | } |
1413 | | |
1414 | 4 | if (!correction_ok) { |
1415 | 0 | return -1; |
1416 | 0 | } |
1417 | | |
1418 | 4 | if (!(packed_info.state() == cloud::PackedFileInfoPB::RECYCLING && |
1419 | 4 | packed_info.ref_cnt() == 0)) { |
1420 | 3 | return 0; |
1421 | 3 | } |
1422 | | |
1423 | 1 | if (!packed_info.has_resource_id() || packed_info.resource_id().empty()) { |
1424 | 0 | LOG_WARNING("packed file missing resource id when recycling") |
1425 | 0 | .tag("instance_id", instance_id_) |
1426 | 0 | .tag("packed_file_path", packed_file_path); |
1427 | 0 | return -1; |
1428 | 0 | } |
1429 | 1 | auto [resource_id, accessor] = resolve_packed_file_accessor(packed_info.resource_id()); |
1430 | 1 | if (!accessor) { |
1431 | 0 | LOG_WARNING("no accessor available to delete packed file") |
1432 | 0 | .tag("instance_id", instance_id_) |
1433 | 0 | .tag("packed_file_path", packed_file_path) |
1434 | 0 | .tag("resource_id", packed_info.resource_id()); |
1435 | 0 | return -1; |
1436 | 0 | } |
1437 | 1 | int del_ret = accessor->delete_file(packed_file_path); |
1438 | 1 | if (del_ret != 0 && del_ret != 1) { |
1439 | 0 | LOG_WARNING("failed to delete packed file") |
1440 | 0 | .tag("instance_id", instance_id_) |
1441 | 0 | .tag("packed_file_path", packed_file_path) |
1442 | 0 | .tag("resource_id", resource_id) |
1443 | 0 | .tag("ret", del_ret); |
1444 | 0 | return -1; |
1445 | 0 | } |
1446 | 1 | if (del_ret == 1) { |
1447 | 0 | LOG_INFO("packed file already removed") |
1448 | 0 | .tag("instance_id", instance_id_) |
1449 | 0 | .tag("packed_file_path", packed_file_path) |
1450 | 0 | .tag("resource_id", resource_id); |
1451 | 1 | } else { |
1452 | 1 | LOG_INFO("deleted packed file") |
1453 | 1 | .tag("instance_id", instance_id_) |
1454 | 1 | .tag("packed_file_path", packed_file_path) |
1455 | 1 | .tag("resource_id", resource_id); |
1456 | 1 | } |
1457 | | |
1458 | 1 | for (int del_attempt = 1; del_attempt <= max_retry_times; ++del_attempt) { |
1459 | 1 | std::unique_ptr<Transaction> del_txn; |
1460 | 1 | TxnErrorCode err = txn_kv_->create_txn(&del_txn); |
1461 | 1 | if (err != TxnErrorCode::TXN_OK) { |
1462 | 0 | LOG_WARNING("failed to create txn when removing packed file kv") |
1463 | 0 | .tag("instance_id", instance_id_) |
1464 | 0 | .tag("packed_file_path", packed_file_path) |
1465 | 0 | .tag("del_attempt", del_attempt) |
1466 | 0 | .tag("err", err); |
1467 | 0 | return -1; |
1468 | 0 | } |
1469 | | |
1470 | 1 | std::string latest_val; |
1471 | 1 | err = del_txn->get(packed_key, &latest_val); |
1472 | 1 | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { |
1473 | 0 | return 0; |
1474 | 0 | } |
1475 | 1 | if (err != TxnErrorCode::TXN_OK) { |
1476 | 0 | LOG_WARNING("failed to re-read packed file kv before removal") |
1477 | 0 | .tag("instance_id", instance_id_) |
1478 | 0 | .tag("packed_file_path", packed_file_path) |
1479 | 0 | .tag("del_attempt", del_attempt) |
1480 | 0 | .tag("err", err); |
1481 | 0 | return -1; |
1482 | 0 | } |
1483 | | |
1484 | 1 | cloud::PackedFileInfoPB latest_info; |
1485 | 1 | if (!latest_info.ParseFromString(latest_val)) { |
1486 | 0 | LOG_WARNING("failed to parse packed file info before removal") |
1487 | 0 | .tag("instance_id", instance_id_) |
1488 | 0 | .tag("packed_file_path", packed_file_path) |
1489 | 0 | .tag("del_attempt", del_attempt); |
1490 | 0 | return -1; |
1491 | 0 | } |
1492 | | |
1493 | 1 | if (!(latest_info.state() == cloud::PackedFileInfoPB::RECYCLING && |
1494 | 1 | latest_info.ref_cnt() == 0)) { |
1495 | 0 | LOG_INFO("packed file state changed before removal, skip deleting kv") |
1496 | 0 | .tag("instance_id", instance_id_) |
1497 | 0 | .tag("packed_file_path", packed_file_path) |
1498 | 0 | .tag("del_attempt", del_attempt); |
1499 | 0 | return 0; |
1500 | 0 | } |
1501 | | |
1502 | 1 | del_txn->remove(packed_key); |
1503 | 1 | err = del_txn->commit(); |
1504 | 1 | if (err == TxnErrorCode::TXN_OK) { |
1505 | 1 | if (stats) { |
1506 | 1 | ++stats->num_deleted; |
1507 | 1 | stats->bytes_deleted += static_cast<int64_t>(packed_key.size()) + |
1508 | 1 | static_cast<int64_t>(latest_val.size()); |
1509 | 1 | if (del_ret == 0 || del_ret == 1) { |
1510 | 1 | ++stats->num_object_deleted; |
1511 | 1 | int64_t object_size = latest_info.total_slice_bytes(); |
1512 | 1 | if (object_size <= 0) { |
1513 | 0 | object_size = packed_info.total_slice_bytes(); |
1514 | 0 | } |
1515 | 1 | stats->bytes_object_deleted += object_size; |
1516 | 1 | } |
1517 | 1 | } |
1518 | 1 | LOG_INFO("removed packed file metadata") |
1519 | 1 | .tag("instance_id", instance_id_) |
1520 | 1 | .tag("packed_file_path", packed_file_path); |
1521 | 1 | return 0; |
1522 | 1 | } |
1523 | 0 | if (err == TxnErrorCode::TXN_CONFLICT) { |
1524 | 0 | if (del_attempt >= max_retry_times) { |
1525 | 0 | LOG_WARNING("failed to remove packed file kv due to conflict after max retry") |
1526 | 0 | .tag("instance_id", instance_id_) |
1527 | 0 | .tag("packed_file_path", packed_file_path) |
1528 | 0 | .tag("del_attempt", del_attempt); |
1529 | 0 | return -1; |
1530 | 0 | } |
1531 | 0 | LOG_WARNING("failed to remove packed file kv due to conflict, retrying") |
1532 | 0 | .tag("instance_id", instance_id_) |
1533 | 0 | .tag("packed_file_path", packed_file_path) |
1534 | 0 | .tag("del_attempt", del_attempt); |
1535 | 0 | sleep_for_packed_file_retry(); |
1536 | 0 | continue; |
1537 | 0 | } |
1538 | 0 | LOG_WARNING("failed to remove packed file kv") |
1539 | 0 | .tag("instance_id", instance_id_) |
1540 | 0 | .tag("packed_file_path", packed_file_path) |
1541 | 0 | .tag("del_attempt", del_attempt) |
1542 | 0 | .tag("err", err); |
1543 | 0 | return -1; |
1544 | 0 | } |
1545 | | |
1546 | 0 | return -1; |
1547 | 1 | } |
1548 | | |
1549 | | int InstanceRecycler::handle_packed_file_kv(std::string_view key, std::string_view /*value*/, |
1550 | 4 | PackedFileRecycleStats* stats, int* ret) { |
1551 | 4 | if (stats) { |
1552 | 4 | ++stats->num_scanned; |
1553 | 4 | } |
1554 | 4 | std::string packed_file_path; |
1555 | 4 | if (!decode_packed_file_key(key, &packed_file_path)) { |
1556 | 0 | LOG_WARNING("failed to decode packed file key") |
1557 | 0 | .tag("instance_id", instance_id_) |
1558 | 0 | .tag("key", hex(key)); |
1559 | 0 | if (stats) { |
1560 | 0 | ++stats->num_failed; |
1561 | 0 | } |
1562 | 0 | if (ret) { |
1563 | 0 | *ret = -1; |
1564 | 0 | } |
1565 | 0 | return 0; |
1566 | 0 | } |
1567 | | |
1568 | 4 | std::string packed_key(key); |
1569 | 4 | int process_ret = process_single_packed_file(packed_key, packed_file_path, stats); |
1570 | 4 | if (process_ret != 0) { |
1571 | 0 | if (stats) { |
1572 | 0 | ++stats->num_failed; |
1573 | 0 | } |
1574 | 0 | if (ret) { |
1575 | 0 | *ret = -1; |
1576 | 0 | } |
1577 | 0 | } |
1578 | 4 | return 0; |
1579 | 4 | } |
1580 | | |
1581 | | int64_t calculate_rowset_expired_time(const std::string& instance_id_, const RecycleRowsetPB& rs, |
1582 | 9.77k | int64_t* earlest_ts /* rowset earliest expiration ts */) { |
1583 | 9.77k | if (config::force_immediate_recycle) { |
1584 | 15 | return 0L; |
1585 | 15 | } |
1586 | | // RecycleRowsetPB created by compacted or dropped rowset has no expiration time, and will be recycled when exceed retention time |
1587 | 9.75k | int64_t expiration = rs.expiration() > 0 ? rs.expiration() : rs.creation_time(); |
1588 | 9.75k | int64_t retention_seconds = config::retention_seconds; |
1589 | 9.75k | if (rs.type() == RecycleRowsetPB::COMPACT || rs.type() == RecycleRowsetPB::DROP) { |
1590 | 7.80k | retention_seconds = std::min(config::compacted_rowset_retention_seconds, retention_seconds); |
1591 | 7.80k | } |
1592 | 9.75k | int64_t final_expiration = expiration + retention_seconds; |
1593 | 9.75k | if (*earlest_ts > final_expiration) { |
1594 | 7 | *earlest_ts = final_expiration; |
1595 | 7 | g_bvar_recycler_recycle_rowset_earlest_ts.put(instance_id_, *earlest_ts); |
1596 | 7 | } |
1597 | 9.75k | return final_expiration; |
1598 | 9.77k | } |
1599 | | |
1600 | | int64_t calculate_partition_expired_time( |
1601 | | const std::string& instance_id_, const RecyclePartitionPB& partition_meta_pb, |
1602 | 9 | int64_t* earlest_ts /* partition earliest expiration ts */) { |
1603 | 9 | if (config::force_immediate_recycle) { |
1604 | 3 | return 0L; |
1605 | 3 | } |
1606 | 6 | int64_t expiration = partition_meta_pb.expiration() > 0 ? partition_meta_pb.expiration() |
1607 | 6 | : partition_meta_pb.creation_time(); |
1608 | 6 | int64_t retention_seconds = config::retention_seconds; |
1609 | 6 | if (partition_meta_pb.state() == RecyclePartitionPB::DROPPED) { |
1610 | 6 | retention_seconds = |
1611 | 6 | std::min(config::dropped_partition_retention_seconds, retention_seconds); |
1612 | 6 | } |
1613 | 6 | int64_t final_expiration = expiration + retention_seconds; |
1614 | 6 | if (*earlest_ts > final_expiration) { |
1615 | 2 | *earlest_ts = final_expiration; |
1616 | 2 | g_bvar_recycler_recycle_partition_earlest_ts.put(instance_id_, *earlest_ts); |
1617 | 2 | } |
1618 | 6 | return final_expiration; |
1619 | 9 | } |
1620 | | |
1621 | | int64_t calculate_index_expired_time(const std::string& instance_id_, |
1622 | | const RecycleIndexPB& index_meta_pb, |
1623 | 10 | int64_t* earlest_ts /* index earliest expiration ts */) { |
1624 | 10 | if (config::force_immediate_recycle) { |
1625 | 4 | return 0L; |
1626 | 4 | } |
1627 | 6 | int64_t expiration = index_meta_pb.expiration() > 0 ? index_meta_pb.expiration() |
1628 | 6 | : index_meta_pb.creation_time(); |
1629 | 6 | int64_t retention_seconds = config::retention_seconds; |
1630 | 6 | if (index_meta_pb.state() == RecycleIndexPB::DROPPED) { |
1631 | 6 | retention_seconds = std::min(config::dropped_index_retention_seconds, retention_seconds); |
1632 | 6 | } |
1633 | 6 | int64_t final_expiration = expiration + retention_seconds; |
1634 | 6 | if (*earlest_ts > final_expiration) { |
1635 | 2 | *earlest_ts = final_expiration; |
1636 | 2 | g_bvar_recycler_recycle_index_earlest_ts.put(instance_id_, *earlest_ts); |
1637 | 2 | } |
1638 | 6 | return final_expiration; |
1639 | 10 | } |
1640 | | |
1641 | | int64_t calculate_tmp_rowset_expired_time( |
1642 | | const std::string& instance_id_, const doris::RowsetMetaCloudPB& tmp_rowset_meta_pb, |
1643 | 106k | int64_t* earlest_ts /* tmp_rowset earliest expiration ts */) { |
1644 | | // ATTN: `txn_expiration` should > 0, however we use `creation_time` + a large `retention_time` (> 1 day in production environment) |
1645 | | // when `txn_expiration` <= 0 in some unexpected situation (usually when there are bugs). This is usually safe, coz loading |
1646 | | // duration or timeout always < `retention_time` in practice. |
1647 | 106k | int64_t expiration = tmp_rowset_meta_pb.txn_expiration() > 0 |
1648 | 106k | ? tmp_rowset_meta_pb.txn_expiration() |
1649 | 106k | : tmp_rowset_meta_pb.creation_time(); |
1650 | 106k | expiration = config::force_immediate_recycle ? 0 : expiration; |
1651 | 106k | int64_t final_expiration = expiration + config::retention_seconds; |
1652 | 106k | if (*earlest_ts > final_expiration) { |
1653 | 24 | *earlest_ts = final_expiration; |
1654 | 24 | g_bvar_recycler_recycle_tmp_rowset_earlest_ts.put(instance_id_, *earlest_ts); |
1655 | 24 | } |
1656 | 106k | return final_expiration; |
1657 | 106k | } |
1658 | | |
1659 | | int64_t calculate_txn_expired_time(const std::string& instance_id_, const RecycleTxnPB& txn_meta_pb, |
1660 | 30.0k | int64_t* earlest_ts /* txn earliest expiration ts */) { |
1661 | 30.0k | int64_t final_expiration = txn_meta_pb.creation_time() + config::label_keep_max_second * 1000L; |
1662 | 30.0k | if (*earlest_ts > final_expiration / 1000) { |
1663 | 8 | *earlest_ts = final_expiration / 1000; |
1664 | 8 | g_bvar_recycler_recycle_expired_txn_label_earlest_ts.put(instance_id_, *earlest_ts); |
1665 | 8 | } |
1666 | 30.0k | return final_expiration; |
1667 | 30.0k | } |
1668 | | |
1669 | | int64_t calculate_restore_job_expired_time( |
1670 | | const std::string& instance_id_, const RestoreJobCloudPB& restore_job, |
1671 | 41 | int64_t* earlest_ts /* restore job earliest expiration ts */) { |
1672 | 41 | if (config::force_immediate_recycle || restore_job.state() == RestoreJobCloudPB::DROPPED || |
1673 | 41 | restore_job.state() == RestoreJobCloudPB::COMPLETED || |
1674 | 41 | restore_job.state() == RestoreJobCloudPB::RECYCLING) { |
1675 | | // final state, recycle immediately |
1676 | 41 | return 0L; |
1677 | 41 | } |
1678 | | // not final state, wait much longer than the FE's timeout(1 day) |
1679 | 0 | int64_t last_modified_s = |
1680 | 0 | restore_job.has_mtime_s() ? restore_job.mtime_s() : restore_job.ctime_s(); |
1681 | 0 | int64_t expiration = restore_job.expired_at_s() > 0 |
1682 | 0 | ? last_modified_s + restore_job.expired_at_s() |
1683 | 0 | : last_modified_s; |
1684 | 0 | int64_t final_expiration = expiration + config::retention_seconds; |
1685 | 0 | if (*earlest_ts > final_expiration) { |
1686 | 0 | *earlest_ts = final_expiration; |
1687 | 0 | g_bvar_recycler_recycle_restore_job_earlest_ts.put(instance_id_, *earlest_ts); |
1688 | 0 | } |
1689 | 0 | return final_expiration; |
1690 | 41 | } |
1691 | | |
1692 | 2 | int InstanceRecycler::abort_txn_for_related_rowset(int64_t txn_id) { |
1693 | 2 | AbortTxnRequest req; |
1694 | 2 | TxnInfoPB txn_info; |
1695 | 2 | MetaServiceCode code = MetaServiceCode::OK; |
1696 | 2 | std::string msg; |
1697 | 2 | std::stringstream ss; |
1698 | 2 | std::unique_ptr<Transaction> txn; |
1699 | 2 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
1700 | 2 | if (err != TxnErrorCode::TXN_OK) { |
1701 | 0 | LOG_WARNING("failed to create txn").tag("err", err); |
1702 | 0 | return -1; |
1703 | 0 | } |
1704 | | |
1705 | | // get txn index |
1706 | 2 | TxnIndexPB txn_idx_pb; |
1707 | 2 | auto index_key = txn_index_key({instance_id_, txn_id}); |
1708 | 2 | std::string index_val; |
1709 | 2 | err = txn->get(index_key, &index_val); |
1710 | 2 | if (err != TxnErrorCode::TXN_OK) { |
1711 | 0 | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { |
1712 | | // maybe recycled |
1713 | 0 | LOG_INFO("txn index not found, txn_id={} instance_id={}", txn_id, instance_id_) |
1714 | 0 | .tag("key", hex(index_key)) |
1715 | 0 | .tag("txn_id", txn_id); |
1716 | 0 | return 0; |
1717 | 0 | } |
1718 | 0 | LOG_WARNING("failed to get txn index") |
1719 | 0 | .tag("err", err) |
1720 | 0 | .tag("key", hex(index_key)) |
1721 | 0 | .tag("txn_id", txn_id); |
1722 | 0 | return -1; |
1723 | 0 | } |
1724 | 2 | if (!txn_idx_pb.ParseFromString(index_val)) { |
1725 | 0 | LOG_WARNING("failed to parse txn index") |
1726 | 0 | .tag("err", err) |
1727 | 0 | .tag("key", hex(index_key)) |
1728 | 0 | .tag("txn_id", txn_id); |
1729 | 0 | return -1; |
1730 | 0 | } |
1731 | | |
1732 | 2 | auto info_key = txn_info_key({instance_id_, txn_idx_pb.tablet_index().db_id(), txn_id}); |
1733 | 2 | std::string info_val; |
1734 | 2 | err = txn->get(info_key, &info_val); |
1735 | 2 | if (err != TxnErrorCode::TXN_OK) { |
1736 | 0 | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { |
1737 | | // maybe recycled |
1738 | 0 | LOG_INFO("txn info not found, txn_id={} instance_id={}", txn_id, instance_id_) |
1739 | 0 | .tag("key", hex(info_key)) |
1740 | 0 | .tag("txn_id", txn_id); |
1741 | 0 | return 0; |
1742 | 0 | } |
1743 | 0 | LOG_WARNING("failed to get txn info") |
1744 | 0 | .tag("err", err) |
1745 | 0 | .tag("key", hex(info_key)) |
1746 | 0 | .tag("txn_id", txn_id); |
1747 | 0 | return -1; |
1748 | 0 | } |
1749 | 2 | if (!txn_info.ParseFromString(info_val)) { |
1750 | 0 | LOG_WARNING("failed to parse txn info") |
1751 | 0 | .tag("err", err) |
1752 | 0 | .tag("key", hex(info_key)) |
1753 | 0 | .tag("txn_id", txn_id); |
1754 | 0 | return -1; |
1755 | 0 | } |
1756 | | |
1757 | 2 | if (txn_info.status() != TxnStatusPB::TXN_STATUS_PREPARED) { |
1758 | 0 | LOG_INFO("txn is not prepared status, txn_id={} status={}", txn_id, txn_info.status()) |
1759 | 0 | .tag("key", hex(info_key)) |
1760 | 0 | .tag("txn_id", txn_id); |
1761 | 0 | return 0; |
1762 | 0 | } |
1763 | | |
1764 | 2 | req.set_txn_id(txn_id); |
1765 | | |
1766 | 2 | LOG(INFO) << "begin abort txn for related rowset, txn_id=" << txn_id |
1767 | 2 | << " instance_id=" << instance_id_ << " txn_info=" << txn_info.ShortDebugString(); |
1768 | | |
1769 | 2 | _abort_txn(instance_id_, &req, txn.get(), txn_info, ss, code, msg); |
1770 | 2 | err = txn->commit(); |
1771 | 2 | if (err != TxnErrorCode::TXN_OK) { |
1772 | 0 | code = cast_as<ErrCategory::COMMIT>(err); |
1773 | 0 | ss << "failed to commit kv txn, txn_id=" << txn_info.txn_id() << " err=" << err; |
1774 | 0 | msg = ss.str(); |
1775 | 0 | return -1; |
1776 | 0 | } |
1777 | | |
1778 | 2 | LOG(INFO) << "finish abort txn for related rowset, txn_id=" << txn_id |
1779 | 2 | << " instance_id=" << instance_id_ << " txn_info=" << txn_info.ShortDebugString() |
1780 | 2 | << " code=" << code << " msg=" << msg; |
1781 | | |
1782 | 2 | return 0; |
1783 | 2 | } |
1784 | | |
1785 | 4 | int InstanceRecycler::abort_job_for_related_rowset(const RowsetMetaCloudPB& rowset_meta) { |
1786 | 4 | FinishTabletJobRequest req; |
1787 | 4 | FinishTabletJobResponse res; |
1788 | 4 | req.set_action(FinishTabletJobRequest::ABORT); |
1789 | 4 | MetaServiceCode code = MetaServiceCode::OK; |
1790 | 4 | std::string msg; |
1791 | 4 | std::stringstream ss; |
1792 | | |
1793 | 4 | TabletIndexPB tablet_idx; |
1794 | 4 | int ret = get_tablet_idx(txn_kv_.get(), instance_id_, rowset_meta.tablet_id(), tablet_idx); |
1795 | 4 | if (ret == 1) { |
1796 | | // tablet maybe recycled, directly return 0 |
1797 | 1 | return 0; |
1798 | 3 | } else if (ret != 0) { |
1799 | 0 | LOG(WARNING) << "failed to get tablet index, tablet_id=" << rowset_meta.tablet_id() |
1800 | 0 | << " instance_id=" << instance_id_ << " ret=" << ret; |
1801 | 0 | return ret; |
1802 | 0 | } |
1803 | | |
1804 | 3 | std::unique_ptr<Transaction> txn; |
1805 | 3 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
1806 | 3 | if (err != TxnErrorCode::TXN_OK) { |
1807 | 0 | LOG(WARNING) << "failed to create txn, instance_id=" << instance_id_ << " err=" << err; |
1808 | 0 | return -1; |
1809 | 0 | } |
1810 | | |
1811 | 3 | std::string job_key = |
1812 | 3 | job_tablet_key({instance_id_, tablet_idx.table_id(), tablet_idx.index_id(), |
1813 | 3 | tablet_idx.partition_id(), tablet_idx.tablet_id()}); |
1814 | 3 | std::string job_val; |
1815 | 3 | err = txn->get(job_key, &job_val); |
1816 | 3 | if (err != TxnErrorCode::TXN_OK) { |
1817 | 0 | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { |
1818 | 0 | LOG(INFO) << "job not exists, instance_id=" << instance_id_ |
1819 | 0 | << " tablet_id=" << tablet_idx.tablet_id() << " key=" << hex(job_key); |
1820 | 0 | return 0; |
1821 | 0 | } |
1822 | 0 | LOG(WARNING) << "failed to get job, instance_id=" << instance_id_ |
1823 | 0 | << " tablet_id=" << tablet_idx.tablet_id() << " err=" << err |
1824 | 0 | << " key=" << hex(job_key); |
1825 | 0 | return -1; |
1826 | 0 | } |
1827 | | |
1828 | 3 | TabletJobInfoPB job_pb; |
1829 | 3 | if (!job_pb.ParseFromString(job_val)) { |
1830 | 0 | LOG(WARNING) << "failed to parse job, instance_id=" << instance_id_ |
1831 | 0 | << " tablet_id=" << tablet_idx.tablet_id() << " key=" << hex(job_key); |
1832 | 0 | return -1; |
1833 | 0 | } |
1834 | | |
1835 | 3 | std::string job_id {}; |
1836 | 3 | if (!job_pb.compaction().empty()) { |
1837 | 2 | for (const auto& c : job_pb.compaction()) { |
1838 | 2 | if (c.id() == rowset_meta.job_id()) { |
1839 | 2 | job_id = c.id(); |
1840 | 2 | break; |
1841 | 2 | } |
1842 | 2 | } |
1843 | 2 | } else if (job_pb.has_schema_change()) { |
1844 | 1 | job_id = job_pb.schema_change().id(); |
1845 | 1 | } |
1846 | | |
1847 | 3 | if (!job_id.empty() && rowset_meta.job_id() == job_id) { |
1848 | 3 | LOG(INFO) << "begin to abort job for related rowset, job_id=" << rowset_meta.job_id() |
1849 | 3 | << " instance_id=" << instance_id_ << " tablet_id=" << tablet_idx.tablet_id(); |
1850 | 3 | req.mutable_job()->CopyFrom(job_pb); |
1851 | 3 | req.set_action(FinishTabletJobRequest::ABORT); |
1852 | 3 | _finish_tablet_job(&req, &res, instance_id_, txn, txn_kv_.get(), |
1853 | 3 | delete_bitmap_lock_white_list_.get(), resource_mgr_.get(), code, msg, |
1854 | 3 | ss); |
1855 | 3 | if (code != MetaServiceCode::OK) { |
1856 | 0 | LOG(WARNING) << "failed to abort job, instance_id=" << instance_id_ |
1857 | 0 | << " tablet_id=" << tablet_idx.tablet_id() << " code=" << code |
1858 | 0 | << " msg=" << msg; |
1859 | 0 | return -1; |
1860 | 0 | } |
1861 | 3 | LOG(INFO) << "finish abort job for related rowset, job_id=" << rowset_meta.job_id() |
1862 | 3 | << " instance_id=" << instance_id_ << " tablet_id=" << tablet_idx.tablet_id() |
1863 | 3 | << " code=" << code << " msg=" << msg; |
1864 | 3 | } else { |
1865 | | // clang-format off |
1866 | 0 | LOG(INFO) << "there is no job for related rowset, directly recycle rowset data" |
1867 | 0 | << ", instance_id=" << instance_id_ |
1868 | 0 | << ", tablet_id=" << tablet_idx.tablet_id() |
1869 | 0 | << ", job_id=" << job_id |
1870 | 0 | << ", rowset_id=" << rowset_meta.rowset_id_v2(); |
1871 | | // clang-format on |
1872 | 0 | } |
1873 | | |
1874 | 3 | return 0; |
1875 | 3 | } |
1876 | | |
1877 | | template <typename T> |
1878 | 55.7k | RowsetMetaCloudPB* mutable_rowset_meta(T& rowset_meta_pb) { |
1879 | 55.7k | if constexpr (std::is_same_v<T, RecycleRowsetPB>) { |
1880 | 51.9k | return rowset_meta_pb.mutable_rowset_meta(); |
1881 | 51.9k | } else { |
1882 | 51.9k | return &rowset_meta_pb; |
1883 | 51.9k | } |
1884 | 55.7k | } _ZN5doris5cloud19mutable_rowset_metaINS0_15RecycleRowsetPBEEEPNS_17RowsetMetaCloudPBERT_ Line | Count | Source | 1878 | 3.75k | RowsetMetaCloudPB* mutable_rowset_meta(T& rowset_meta_pb) { | 1879 | 3.75k | if constexpr (std::is_same_v<T, RecycleRowsetPB>) { | 1880 | 3.75k | return rowset_meta_pb.mutable_rowset_meta(); | 1881 | 3.75k | } else { | 1882 | 3.75k | return &rowset_meta_pb; | 1883 | 3.75k | } | 1884 | 3.75k | } |
_ZN5doris5cloud19mutable_rowset_metaINS_17RowsetMetaCloudPBEEEPS2_RT_ Line | Count | Source | 1878 | 51.9k | RowsetMetaCloudPB* mutable_rowset_meta(T& rowset_meta_pb) { | 1879 | 51.9k | if constexpr (std::is_same_v<T, RecycleRowsetPB>) { | 1880 | 51.9k | return rowset_meta_pb.mutable_rowset_meta(); | 1881 | 51.9k | } else { | 1882 | 51.9k | return &rowset_meta_pb; | 1883 | 51.9k | } | 1884 | 51.9k | } |
|
1885 | | |
1886 | | template <typename T> |
1887 | 223k | const RowsetMetaCloudPB& rowset_meta(const T& rowset_meta_pb) { |
1888 | 223k | if constexpr (std::is_same_v<T, RecycleRowsetPB>) { |
1889 | 212k | return rowset_meta_pb.rowset_meta(); |
1890 | 212k | } else { |
1891 | 212k | return rowset_meta_pb; |
1892 | 212k | } |
1893 | 223k | } _ZN5doris5cloud11rowset_metaINS0_15RecycleRowsetPBEEERKNS_17RowsetMetaCloudPBERKT_ Line | Count | Source | 1887 | 11.9k | const RowsetMetaCloudPB& rowset_meta(const T& rowset_meta_pb) { | 1888 | 11.9k | if constexpr (std::is_same_v<T, RecycleRowsetPB>) { | 1889 | 11.9k | return rowset_meta_pb.rowset_meta(); | 1890 | 11.9k | } else { | 1891 | 11.9k | return rowset_meta_pb; | 1892 | 11.9k | } | 1893 | 11.9k | } |
_ZN5doris5cloud11rowset_metaINS_17RowsetMetaCloudPBEEERKS2_RKT_ Line | Count | Source | 1887 | 212k | const RowsetMetaCloudPB& rowset_meta(const T& rowset_meta_pb) { | 1888 | 212k | if constexpr (std::is_same_v<T, RecycleRowsetPB>) { | 1889 | 212k | return rowset_meta_pb.rowset_meta(); | 1890 | 212k | } else { | 1891 | 212k | return rowset_meta_pb; | 1892 | 212k | } | 1893 | 212k | } |
|
1894 | | |
1895 | | struct DeferredRecycleAbortTask { |
1896 | | enum class Type : uint8_t { |
1897 | | TXN, |
1898 | | JOB, |
1899 | | }; |
1900 | | |
1901 | | Type type = Type::TXN; |
1902 | | int64_t txn_id = 0; |
1903 | | int64_t tablet_id = 0; |
1904 | | int64_t start_version = 0; |
1905 | | int64_t end_version = 0; |
1906 | | std::string rowset_id; |
1907 | | std::string job_id; |
1908 | | }; |
1909 | | |
1910 | | struct DeferredRecyclePrepareDeleteTask { |
1911 | | std::string key; |
1912 | | std::string resource_id; |
1913 | | std::string rowset_id; |
1914 | | int64_t tablet_id = 0; |
1915 | | }; |
1916 | | |
1917 | | template <typename T> |
1918 | 57.7k | std::optional<DeferredRecycleAbortTask> make_deferred_abort_task(const T& rowset_meta_pb) { |
1919 | 57.7k | if constexpr (std::is_same_v<T, RecycleRowsetPB>) { |
1920 | 3.75k | if (rowset_meta_pb.type() != RecycleRowsetPB::PREPARE) { |
1921 | 3.10k | return std::nullopt; |
1922 | 3.10k | } |
1923 | 3.75k | } |
1924 | | |
1925 | 654 | const auto& rs_meta = rowset_meta(rowset_meta_pb); |
1926 | 654 | DeferredRecycleAbortTask task; |
1927 | 654 | task.tablet_id = rs_meta.tablet_id(); |
1928 | 654 | task.start_version = rs_meta.start_version(); |
1929 | 654 | task.end_version = rs_meta.end_version(); |
1930 | 54.6k | if (rs_meta.has_load_id()) { |
1931 | 4 | task.type = DeferredRecycleAbortTask::Type::TXN; |
1932 | 4 | task.txn_id = rs_meta.txn_id(); |
1933 | 4 | return task; |
1934 | 4 | } |
1935 | 54.6k | if (rs_meta.has_job_id()) { |
1936 | 6 | task.type = DeferredRecycleAbortTask::Type::JOB; |
1937 | 6 | task.rowset_id = rs_meta.rowset_id_v2(); |
1938 | 6 | task.job_id = rs_meta.job_id(); |
1939 | 6 | return task; |
1940 | 6 | } |
1941 | 54.6k | return std::nullopt; |
1942 | 54.6k | } _ZN5doris5cloud24make_deferred_abort_taskINS0_15RecycleRowsetPBEEESt8optionalINS0_24DeferredRecycleAbortTaskEERKT_ Line | Count | Source | 1918 | 3.75k | std::optional<DeferredRecycleAbortTask> make_deferred_abort_task(const T& rowset_meta_pb) { | 1919 | 3.75k | if constexpr (std::is_same_v<T, RecycleRowsetPB>) { | 1920 | 3.75k | if (rowset_meta_pb.type() != RecycleRowsetPB::PREPARE) { | 1921 | 3.10k | return std::nullopt; | 1922 | 3.10k | } | 1923 | 3.75k | } | 1924 | | | 1925 | 654 | const auto& rs_meta = rowset_meta(rowset_meta_pb); | 1926 | 654 | DeferredRecycleAbortTask task; | 1927 | 654 | task.tablet_id = rs_meta.tablet_id(); | 1928 | 654 | task.start_version = rs_meta.start_version(); | 1929 | 654 | task.end_version = rs_meta.end_version(); | 1930 | 654 | if (rs_meta.has_load_id()) { | 1931 | 2 | task.type = DeferredRecycleAbortTask::Type::TXN; | 1932 | 2 | task.txn_id = rs_meta.txn_id(); | 1933 | 2 | return task; | 1934 | 2 | } | 1935 | 652 | if (rs_meta.has_job_id()) { | 1936 | 2 | task.type = DeferredRecycleAbortTask::Type::JOB; | 1937 | 2 | task.rowset_id = rs_meta.rowset_id_v2(); | 1938 | 2 | task.job_id = rs_meta.job_id(); | 1939 | 2 | return task; | 1940 | 2 | } | 1941 | 650 | return std::nullopt; | 1942 | 652 | } |
_ZN5doris5cloud24make_deferred_abort_taskINS_17RowsetMetaCloudPBEEESt8optionalINS0_24DeferredRecycleAbortTaskEERKT_ Line | Count | Source | 1918 | 54.0k | std::optional<DeferredRecycleAbortTask> make_deferred_abort_task(const T& rowset_meta_pb) { | 1919 | 54.0k | if constexpr (std::is_same_v<T, RecycleRowsetPB>) { | 1920 | 54.0k | if (rowset_meta_pb.type() != RecycleRowsetPB::PREPARE) { | 1921 | 54.0k | return std::nullopt; | 1922 | 54.0k | } | 1923 | 54.0k | } | 1924 | | | 1925 | 54.0k | const auto& rs_meta = rowset_meta(rowset_meta_pb); | 1926 | 54.0k | DeferredRecycleAbortTask task; | 1927 | 54.0k | task.tablet_id = rs_meta.tablet_id(); | 1928 | 54.0k | task.start_version = rs_meta.start_version(); | 1929 | 54.0k | task.end_version = rs_meta.end_version(); | 1930 | 54.0k | if (rs_meta.has_load_id()) { | 1931 | 2 | task.type = DeferredRecycleAbortTask::Type::TXN; | 1932 | 2 | task.txn_id = rs_meta.txn_id(); | 1933 | 2 | return task; | 1934 | 2 | } | 1935 | 54.0k | if (rs_meta.has_job_id()) { | 1936 | 4 | task.type = DeferredRecycleAbortTask::Type::JOB; | 1937 | 4 | task.rowset_id = rs_meta.rowset_id_v2(); | 1938 | 4 | task.job_id = rs_meta.job_id(); | 1939 | 4 | return task; | 1940 | 4 | } | 1941 | 54.0k | return std::nullopt; | 1942 | 54.0k | } |
|
1943 | | |
1944 | | template <typename T> |
1945 | 169k | bool need_mark_rowset_as_recycled(const T& rowset_meta_pb) { |
1946 | 169k | const auto& rs_meta = rowset_meta(rowset_meta_pb); |
1947 | 169k | return !rs_meta.has_is_recycled() || !rs_meta.is_recycled(); |
1948 | 169k | } _ZN5doris5cloud28need_mark_rowset_as_recycledINS0_15RecycleRowsetPBEEEbRKT_ Line | Count | Source | 1945 | 11.2k | bool need_mark_rowset_as_recycled(const T& rowset_meta_pb) { | 1946 | 11.2k | const auto& rs_meta = rowset_meta(rowset_meta_pb); | 1947 | 11.2k | return !rs_meta.has_is_recycled() || !rs_meta.is_recycled(); | 1948 | 11.2k | } |
_ZN5doris5cloud28need_mark_rowset_as_recycledINS_17RowsetMetaCloudPBEEEbRKT_ Line | Count | Source | 1945 | 158k | bool need_mark_rowset_as_recycled(const T& rowset_meta_pb) { | 1946 | 158k | const auto& rs_meta = rowset_meta(rowset_meta_pb); | 1947 | 158k | return !rs_meta.has_is_recycled() || !rs_meta.is_recycled(); | 1948 | 158k | } |
|
1949 | | |
1950 | | template <typename T> |
1951 | | int batch_mark_rowsets_as_recycled(TxnKv* txn_kv, const std::string& instance_id, |
1952 | 42 | const std::vector<std::string>& keys) { |
1953 | 42 | std::unique_ptr<Transaction> txn; |
1954 | 42 | TxnErrorCode err = txn_kv->create_txn(&txn); |
1955 | 42 | if (err != TxnErrorCode::TXN_OK) { |
1956 | 0 | LOG(WARNING) << "failed to create txn, instance_id=" << instance_id; |
1957 | 0 | return -1; |
1958 | 0 | } |
1959 | 42 | std::vector<std::optional<std::string>> values; |
1960 | 42 | err = txn->batch_get(&values, keys); |
1961 | 42 | if (err != TxnErrorCode::TXN_OK) { |
1962 | 0 | LOG(WARNING) << "failed to batch get rowset meta, instance_id=" << instance_id << ' ' |
1963 | 0 | << "keys size=" << keys.size() << ' ' << "err=" << err; |
1964 | 0 | return -1; |
1965 | 0 | } |
1966 | 42 | size_t total_keys = keys.size(); |
1967 | 55.8k | for (size_t i = 0; i < total_keys; i++) { |
1968 | 55.7k | if (!values[i].has_value()) { |
1969 | | // has already been removed by commit_rowset |
1970 | 0 | continue; |
1971 | 0 | } |
1972 | 55.7k | auto key = keys[i]; |
1973 | 55.7k | auto val = values[i].value(); |
1974 | 55.7k | T rowset_meta_pb; |
1975 | 55.7k | if (!rowset_meta_pb.ParseFromString(val)) { |
1976 | 0 | LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id |
1977 | 0 | << " key=" << hex(key); |
1978 | 0 | return -1; |
1979 | 0 | } |
1980 | 55.7k | if (!need_mark_rowset_as_recycled(rowset_meta_pb)) { |
1981 | 0 | continue; |
1982 | 0 | } |
1983 | 55.7k | mutable_rowset_meta(rowset_meta_pb)->set_is_recycled(true); |
1984 | 55.7k | val.clear(); |
1985 | 55.7k | rowset_meta_pb.SerializeToString(&val); |
1986 | 55.7k | txn->put(key, val); |
1987 | 55.7k | } |
1988 | 42 | err = txn->commit(); |
1989 | 42 | if (err != TxnErrorCode::TXN_OK) { |
1990 | 0 | LOG(WARNING) << "failed to commit txn, instance_id=" << instance_id; |
1991 | 0 | return -1; |
1992 | 0 | } |
1993 | | |
1994 | 42 | return 0; |
1995 | 42 | } _ZN5doris5cloud30batch_mark_rowsets_as_recycledINS0_15RecycleRowsetPBEEEiPNS0_5TxnKvERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKSt6vectorISA_SaISA_EE Line | Count | Source | 1952 | 26 | const std::vector<std::string>& keys) { | 1953 | 26 | std::unique_ptr<Transaction> txn; | 1954 | 26 | TxnErrorCode err = txn_kv->create_txn(&txn); | 1955 | 26 | if (err != TxnErrorCode::TXN_OK) { | 1956 | 0 | LOG(WARNING) << "failed to create txn, instance_id=" << instance_id; | 1957 | 0 | return -1; | 1958 | 0 | } | 1959 | 26 | std::vector<std::optional<std::string>> values; | 1960 | 26 | err = txn->batch_get(&values, keys); | 1961 | 26 | if (err != TxnErrorCode::TXN_OK) { | 1962 | 0 | LOG(WARNING) << "failed to batch get rowset meta, instance_id=" << instance_id << ' ' | 1963 | 0 | << "keys size=" << keys.size() << ' ' << "err=" << err; | 1964 | 0 | return -1; | 1965 | 0 | } | 1966 | 26 | size_t total_keys = keys.size(); | 1967 | 3.78k | for (size_t i = 0; i < total_keys; i++) { | 1968 | 3.75k | if (!values[i].has_value()) { | 1969 | | // has already been removed by commit_rowset | 1970 | 0 | continue; | 1971 | 0 | } | 1972 | 3.75k | auto key = keys[i]; | 1973 | 3.75k | auto val = values[i].value(); | 1974 | 3.75k | T rowset_meta_pb; | 1975 | 3.75k | if (!rowset_meta_pb.ParseFromString(val)) { | 1976 | 0 | LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id | 1977 | 0 | << " key=" << hex(key); | 1978 | 0 | return -1; | 1979 | 0 | } | 1980 | 3.75k | if (!need_mark_rowset_as_recycled(rowset_meta_pb)) { | 1981 | 0 | continue; | 1982 | 0 | } | 1983 | 3.75k | mutable_rowset_meta(rowset_meta_pb)->set_is_recycled(true); | 1984 | 3.75k | val.clear(); | 1985 | 3.75k | rowset_meta_pb.SerializeToString(&val); | 1986 | 3.75k | txn->put(key, val); | 1987 | 3.75k | } | 1988 | 26 | err = txn->commit(); | 1989 | 26 | if (err != TxnErrorCode::TXN_OK) { | 1990 | 0 | LOG(WARNING) << "failed to commit txn, instance_id=" << instance_id; | 1991 | 0 | return -1; | 1992 | 0 | } | 1993 | | | 1994 | 26 | return 0; | 1995 | 26 | } |
_ZN5doris5cloud30batch_mark_rowsets_as_recycledINS_17RowsetMetaCloudPBEEEiPNS0_5TxnKvERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKSt6vectorISA_SaISA_EE Line | Count | Source | 1952 | 16 | const std::vector<std::string>& keys) { | 1953 | 16 | std::unique_ptr<Transaction> txn; | 1954 | 16 | TxnErrorCode err = txn_kv->create_txn(&txn); | 1955 | 16 | if (err != TxnErrorCode::TXN_OK) { | 1956 | 0 | LOG(WARNING) << "failed to create txn, instance_id=" << instance_id; | 1957 | 0 | return -1; | 1958 | 0 | } | 1959 | 16 | std::vector<std::optional<std::string>> values; | 1960 | 16 | err = txn->batch_get(&values, keys); | 1961 | 16 | if (err != TxnErrorCode::TXN_OK) { | 1962 | 0 | LOG(WARNING) << "failed to batch get rowset meta, instance_id=" << instance_id << ' ' | 1963 | 0 | << "keys size=" << keys.size() << ' ' << "err=" << err; | 1964 | 0 | return -1; | 1965 | 0 | } | 1966 | 16 | size_t total_keys = keys.size(); | 1967 | 52.0k | for (size_t i = 0; i < total_keys; i++) { | 1968 | 52.0k | if (!values[i].has_value()) { | 1969 | | // has already been removed by commit_rowset | 1970 | 0 | continue; | 1971 | 0 | } | 1972 | 52.0k | auto key = keys[i]; | 1973 | 52.0k | auto val = values[i].value(); | 1974 | 52.0k | T rowset_meta_pb; | 1975 | 52.0k | if (!rowset_meta_pb.ParseFromString(val)) { | 1976 | 0 | LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id | 1977 | 0 | << " key=" << hex(key); | 1978 | 0 | return -1; | 1979 | 0 | } | 1980 | 52.0k | if (!need_mark_rowset_as_recycled(rowset_meta_pb)) { | 1981 | 0 | continue; | 1982 | 0 | } | 1983 | 52.0k | mutable_rowset_meta(rowset_meta_pb)->set_is_recycled(true); | 1984 | 52.0k | val.clear(); | 1985 | 52.0k | rowset_meta_pb.SerializeToString(&val); | 1986 | 52.0k | txn->put(key, val); | 1987 | 52.0k | } | 1988 | 16 | err = txn->commit(); | 1989 | 16 | if (err != TxnErrorCode::TXN_OK) { | 1990 | 0 | LOG(WARNING) << "failed to commit txn, instance_id=" << instance_id; | 1991 | 0 | return -1; | 1992 | 0 | } | 1993 | | | 1994 | 16 | return 0; | 1995 | 16 | } |
|
1996 | | |
1997 | | template <typename T> |
1998 | | int collect_deferred_abort_tasks(TxnKv* txn_kv, const std::string& instance_id, |
1999 | | const std::vector<std::string>& keys, |
2000 | | std::vector<DeferredRecycleAbortTask>* abort_tasks, |
2001 | 5 | bool skip_base_version) { |
2002 | 5 | constexpr size_t kAbortCheckBatchSize = 256; |
2003 | 10 | for (size_t offset = 0; offset < keys.size(); offset += kAbortCheckBatchSize) { |
2004 | 5 | size_t limit = std::min(keys.size(), offset + kAbortCheckBatchSize); |
2005 | 5 | std::unique_ptr<Transaction> txn; |
2006 | 5 | TxnErrorCode err = txn_kv->create_txn(&txn); |
2007 | 5 | if (err != TxnErrorCode::TXN_OK) { |
2008 | 0 | LOG(WARNING) << "failed to create txn, instance_id=" << instance_id; |
2009 | 0 | return -1; |
2010 | 0 | } |
2011 | 10 | for (size_t idx = offset; idx < limit; ++idx) { |
2012 | 5 | const std::string& key = keys[idx]; |
2013 | 5 | std::string val; |
2014 | 5 | err = txn->get(key, &val); |
2015 | 5 | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { |
2016 | | // has already been removed |
2017 | 0 | continue; |
2018 | 0 | } |
2019 | 5 | if (err != TxnErrorCode::TXN_OK) { |
2020 | 0 | LOG(WARNING) << "failed to get rowset meta, instance_id=" << instance_id |
2021 | 0 | << " key=" << hex(key); |
2022 | 0 | return -1; |
2023 | 0 | } |
2024 | 5 | T rowset_meta_pb; |
2025 | 5 | if (!rowset_meta_pb.ParseFromString(val)) { |
2026 | 0 | LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id |
2027 | 0 | << " key=" << hex(key); |
2028 | 0 | return -1; |
2029 | 0 | } |
2030 | 5 | if (skip_base_version && rowset_meta(rowset_meta_pb).end_version() == 1) { |
2031 | 0 | continue; |
2032 | 0 | } |
2033 | 5 | if (auto abort_task = make_deferred_abort_task(rowset_meta_pb); |
2034 | 5 | abort_task.has_value()) { |
2035 | 5 | abort_tasks->emplace_back(std::move(*abort_task)); |
2036 | 5 | } |
2037 | 5 | } |
2038 | 5 | } |
2039 | 5 | return 0; |
2040 | 5 | } _ZN5doris5cloud28collect_deferred_abort_tasksINS0_15RecycleRowsetPBEEEiPNS0_5TxnKvERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKSt6vectorISA_SaISA_EEPSD_INS0_24DeferredRecycleAbortTaskESaISI_EEb Line | Count | Source | 2001 | 2 | bool skip_base_version) { | 2002 | 2 | constexpr size_t kAbortCheckBatchSize = 256; | 2003 | 4 | for (size_t offset = 0; offset < keys.size(); offset += kAbortCheckBatchSize) { | 2004 | 2 | size_t limit = std::min(keys.size(), offset + kAbortCheckBatchSize); | 2005 | 2 | std::unique_ptr<Transaction> txn; | 2006 | 2 | TxnErrorCode err = txn_kv->create_txn(&txn); | 2007 | 2 | if (err != TxnErrorCode::TXN_OK) { | 2008 | 0 | LOG(WARNING) << "failed to create txn, instance_id=" << instance_id; | 2009 | 0 | return -1; | 2010 | 0 | } | 2011 | 4 | for (size_t idx = offset; idx < limit; ++idx) { | 2012 | 2 | const std::string& key = keys[idx]; | 2013 | 2 | std::string val; | 2014 | 2 | err = txn->get(key, &val); | 2015 | 2 | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { | 2016 | | // has already been removed | 2017 | 0 | continue; | 2018 | 0 | } | 2019 | 2 | if (err != TxnErrorCode::TXN_OK) { | 2020 | 0 | LOG(WARNING) << "failed to get rowset meta, instance_id=" << instance_id | 2021 | 0 | << " key=" << hex(key); | 2022 | 0 | return -1; | 2023 | 0 | } | 2024 | 2 | T rowset_meta_pb; | 2025 | 2 | if (!rowset_meta_pb.ParseFromString(val)) { | 2026 | 0 | LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id | 2027 | 0 | << " key=" << hex(key); | 2028 | 0 | return -1; | 2029 | 0 | } | 2030 | 2 | if (skip_base_version && rowset_meta(rowset_meta_pb).end_version() == 1) { | 2031 | 0 | continue; | 2032 | 0 | } | 2033 | 2 | if (auto abort_task = make_deferred_abort_task(rowset_meta_pb); | 2034 | 2 | abort_task.has_value()) { | 2035 | 2 | abort_tasks->emplace_back(std::move(*abort_task)); | 2036 | 2 | } | 2037 | 2 | } | 2038 | 2 | } | 2039 | 2 | return 0; | 2040 | 2 | } |
_ZN5doris5cloud28collect_deferred_abort_tasksINS_17RowsetMetaCloudPBEEEiPNS0_5TxnKvERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKSt6vectorISA_SaISA_EEPSD_INS0_24DeferredRecycleAbortTaskESaISI_EEb Line | Count | Source | 2001 | 3 | bool skip_base_version) { | 2002 | 3 | constexpr size_t kAbortCheckBatchSize = 256; | 2003 | 6 | for (size_t offset = 0; offset < keys.size(); offset += kAbortCheckBatchSize) { | 2004 | 3 | size_t limit = std::min(keys.size(), offset + kAbortCheckBatchSize); | 2005 | 3 | std::unique_ptr<Transaction> txn; | 2006 | 3 | TxnErrorCode err = txn_kv->create_txn(&txn); | 2007 | 3 | if (err != TxnErrorCode::TXN_OK) { | 2008 | 0 | LOG(WARNING) << "failed to create txn, instance_id=" << instance_id; | 2009 | 0 | return -1; | 2010 | 0 | } | 2011 | 6 | for (size_t idx = offset; idx < limit; ++idx) { | 2012 | 3 | const std::string& key = keys[idx]; | 2013 | 3 | std::string val; | 2014 | 3 | err = txn->get(key, &val); | 2015 | 3 | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { | 2016 | | // has already been removed | 2017 | 0 | continue; | 2018 | 0 | } | 2019 | 3 | if (err != TxnErrorCode::TXN_OK) { | 2020 | 0 | LOG(WARNING) << "failed to get rowset meta, instance_id=" << instance_id | 2021 | 0 | << " key=" << hex(key); | 2022 | 0 | return -1; | 2023 | 0 | } | 2024 | 3 | T rowset_meta_pb; | 2025 | 3 | if (!rowset_meta_pb.ParseFromString(val)) { | 2026 | 0 | LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id | 2027 | 0 | << " key=" << hex(key); | 2028 | 0 | return -1; | 2029 | 0 | } | 2030 | 3 | if (skip_base_version && rowset_meta(rowset_meta_pb).end_version() == 1) { | 2031 | 0 | continue; | 2032 | 0 | } | 2033 | 3 | if (auto abort_task = make_deferred_abort_task(rowset_meta_pb); | 2034 | 3 | abort_task.has_value()) { | 2035 | 3 | abort_tasks->emplace_back(std::move(*abort_task)); | 2036 | 3 | } | 2037 | 3 | } | 2038 | 3 | } | 2039 | 3 | return 0; | 2040 | 3 | } |
|
2041 | | |
2042 | | template <typename T> |
2043 | | int InstanceRecycler::batch_abort_txn_or_job_for_recycle(const std::vector<std::string>& keys, |
2044 | 5 | bool skip_base_version) { |
2045 | 5 | std::vector<DeferredRecycleAbortTask> abort_tasks; |
2046 | 5 | if (collect_deferred_abort_tasks<T>(txn_kv_.get(), instance_id_, keys, &abort_tasks, |
2047 | 5 | skip_base_version) != 0) { |
2048 | 0 | LOG(WARNING) << "failed to collect rowset abort tasks, instance_id=" << instance_id_; |
2049 | 0 | return -1; |
2050 | 0 | } |
2051 | 5 | for (const auto& abort_task : abort_tasks) { |
2052 | 5 | LOG(INFO) << "begin to abort txn or job for related rowset, instance_id=" << instance_id_ |
2053 | 5 | << " tablet_id=" << abort_task.tablet_id << " version=[" |
2054 | 5 | << abort_task.start_version << '-' << abort_task.end_version << "]"; |
2055 | 5 | int abort_ret = 0; |
2056 | 5 | if (abort_task.type == DeferredRecycleAbortTask::Type::TXN) { |
2057 | 2 | abort_ret = abort_txn_for_related_rowset(abort_task.txn_id); |
2058 | 3 | } else { |
2059 | 3 | RowsetMetaCloudPB rowset_meta; |
2060 | 3 | rowset_meta.set_tablet_id(abort_task.tablet_id); |
2061 | 3 | rowset_meta.set_rowset_id_v2(abort_task.rowset_id); |
2062 | 3 | rowset_meta.set_job_id(abort_task.job_id); |
2063 | 3 | abort_ret = abort_job_for_related_rowset(rowset_meta); |
2064 | 3 | } |
2065 | 5 | if (abort_ret != 0) { |
2066 | 0 | LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id=" |
2067 | 0 | << instance_id_ << " tablet_id=" << abort_task.tablet_id << " version=[" |
2068 | 0 | << abort_task.start_version << '-' << abort_task.end_version << "]"; |
2069 | 0 | return abort_ret; |
2070 | 0 | } |
2071 | 5 | } |
2072 | 5 | return 0; |
2073 | 5 | } _ZN5doris5cloud16InstanceRecycler34batch_abort_txn_or_job_for_recycleINS0_15RecycleRowsetPBEEEiRKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaISA_EEb Line | Count | Source | 2044 | 2 | bool skip_base_version) { | 2045 | 2 | std::vector<DeferredRecycleAbortTask> abort_tasks; | 2046 | 2 | if (collect_deferred_abort_tasks<T>(txn_kv_.get(), instance_id_, keys, &abort_tasks, | 2047 | 2 | skip_base_version) != 0) { | 2048 | 0 | LOG(WARNING) << "failed to collect rowset abort tasks, instance_id=" << instance_id_; | 2049 | 0 | return -1; | 2050 | 0 | } | 2051 | 2 | for (const auto& abort_task : abort_tasks) { | 2052 | 2 | LOG(INFO) << "begin to abort txn or job for related rowset, instance_id=" << instance_id_ | 2053 | 2 | << " tablet_id=" << abort_task.tablet_id << " version=[" | 2054 | 2 | << abort_task.start_version << '-' << abort_task.end_version << "]"; | 2055 | 2 | int abort_ret = 0; | 2056 | 2 | if (abort_task.type == DeferredRecycleAbortTask::Type::TXN) { | 2057 | 1 | abort_ret = abort_txn_for_related_rowset(abort_task.txn_id); | 2058 | 1 | } else { | 2059 | 1 | RowsetMetaCloudPB rowset_meta; | 2060 | 1 | rowset_meta.set_tablet_id(abort_task.tablet_id); | 2061 | 1 | rowset_meta.set_rowset_id_v2(abort_task.rowset_id); | 2062 | 1 | rowset_meta.set_job_id(abort_task.job_id); | 2063 | 1 | abort_ret = abort_job_for_related_rowset(rowset_meta); | 2064 | 1 | } | 2065 | 2 | if (abort_ret != 0) { | 2066 | 0 | LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id=" | 2067 | 0 | << instance_id_ << " tablet_id=" << abort_task.tablet_id << " version=[" | 2068 | 0 | << abort_task.start_version << '-' << abort_task.end_version << "]"; | 2069 | 0 | return abort_ret; | 2070 | 0 | } | 2071 | 2 | } | 2072 | 2 | return 0; | 2073 | 2 | } |
_ZN5doris5cloud16InstanceRecycler34batch_abort_txn_or_job_for_recycleINS_17RowsetMetaCloudPBEEEiRKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaISA_EEb Line | Count | Source | 2044 | 3 | bool skip_base_version) { | 2045 | 3 | std::vector<DeferredRecycleAbortTask> abort_tasks; | 2046 | 3 | if (collect_deferred_abort_tasks<T>(txn_kv_.get(), instance_id_, keys, &abort_tasks, | 2047 | 3 | skip_base_version) != 0) { | 2048 | 0 | LOG(WARNING) << "failed to collect rowset abort tasks, instance_id=" << instance_id_; | 2049 | 0 | return -1; | 2050 | 0 | } | 2051 | 3 | for (const auto& abort_task : abort_tasks) { | 2052 | 3 | LOG(INFO) << "begin to abort txn or job for related rowset, instance_id=" << instance_id_ | 2053 | 3 | << " tablet_id=" << abort_task.tablet_id << " version=[" | 2054 | 3 | << abort_task.start_version << '-' << abort_task.end_version << "]"; | 2055 | 3 | int abort_ret = 0; | 2056 | 3 | if (abort_task.type == DeferredRecycleAbortTask::Type::TXN) { | 2057 | 1 | abort_ret = abort_txn_for_related_rowset(abort_task.txn_id); | 2058 | 2 | } else { | 2059 | 2 | RowsetMetaCloudPB rowset_meta; | 2060 | 2 | rowset_meta.set_tablet_id(abort_task.tablet_id); | 2061 | 2 | rowset_meta.set_rowset_id_v2(abort_task.rowset_id); | 2062 | 2 | rowset_meta.set_job_id(abort_task.job_id); | 2063 | 2 | abort_ret = abort_job_for_related_rowset(rowset_meta); | 2064 | 2 | } | 2065 | 3 | if (abort_ret != 0) { | 2066 | 0 | LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id=" | 2067 | 0 | << instance_id_ << " tablet_id=" << abort_task.tablet_id << " version=[" | 2068 | 0 | << abort_task.start_version << '-' << abort_task.end_version << "]"; | 2069 | 0 | return abort_ret; | 2070 | 0 | } | 2071 | 3 | } | 2072 | 3 | return 0; | 2073 | 3 | } |
|
2074 | | |
2075 | | int collect_prepare_delete_tasks(TxnKv* txn_kv, const std::string& instance_id, |
2076 | | const std::vector<std::string>& keys, |
2077 | 23 | std::vector<DeferredRecyclePrepareDeleteTask>* delete_tasks) { |
2078 | 23 | constexpr size_t kPrepareCheckBatchSize = 256; |
2079 | 46 | for (size_t offset = 0; offset < keys.size(); offset += kPrepareCheckBatchSize) { |
2080 | 23 | size_t limit = std::min(keys.size(), offset + kPrepareCheckBatchSize); |
2081 | 23 | std::unique_ptr<Transaction> txn; |
2082 | 23 | TxnErrorCode err = txn_kv->create_txn(&txn); |
2083 | 23 | if (err != TxnErrorCode::TXN_OK) { |
2084 | 0 | LOG(WARNING) << "failed to create txn, instance_id=" << instance_id; |
2085 | 0 | return -1; |
2086 | 0 | } |
2087 | 675 | for (size_t idx = offset; idx < limit; ++idx) { |
2088 | 652 | const std::string& key = keys[idx]; |
2089 | 652 | std::string val; |
2090 | 652 | err = txn->get(key, &val); |
2091 | 652 | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { |
2092 | | // has already been removed |
2093 | 0 | continue; |
2094 | 0 | } |
2095 | 652 | if (err != TxnErrorCode::TXN_OK) { |
2096 | 0 | LOG(WARNING) << "failed to get recycle rowset, instance_id=" << instance_id |
2097 | 0 | << " key=" << hex(key); |
2098 | 0 | return -1; |
2099 | 0 | } |
2100 | 652 | RecycleRowsetPB rowset; |
2101 | 652 | if (!rowset.ParseFromString(val)) { |
2102 | 0 | LOG(WARNING) << "failed to parse recycle rowset, instance_id=" << instance_id |
2103 | 0 | << " key=" << hex(key); |
2104 | 0 | return -1; |
2105 | 0 | } |
2106 | 652 | if (rowset.type() != RecycleRowsetPB::PREPARE) { |
2107 | 0 | continue; |
2108 | 0 | } |
2109 | 652 | const auto& rs_meta = rowset.rowset_meta(); |
2110 | 652 | delete_tasks->push_back( |
2111 | 652 | {key, rs_meta.resource_id(), rs_meta.rowset_id_v2(), rs_meta.tablet_id()}); |
2112 | 652 | } |
2113 | 23 | } |
2114 | 23 | return 0; |
2115 | 23 | } |
2116 | | |
2117 | 1 | int InstanceRecycler::recycle_ref_rowsets(bool* has_unrecycled_rowsets) { |
2118 | 1 | const std::string task_name = "recycle_ref_rowsets"; |
2119 | 1 | *has_unrecycled_rowsets = false; |
2120 | | |
2121 | 1 | std::string data_rowset_ref_count_key_start = |
2122 | 1 | versioned::data_rowset_ref_count_key({instance_id_, 0, ""}); |
2123 | 1 | std::string data_rowset_ref_count_key_end = |
2124 | 1 | versioned::data_rowset_ref_count_key({instance_id_, INT64_MAX, ""}); |
2125 | | |
2126 | 1 | LOG_WARNING("begin to recycle ref rowsets").tag("instance_id", instance_id_); |
2127 | | |
2128 | 1 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
2129 | 1 | register_recycle_task(task_name, start_time); |
2130 | | |
2131 | 1 | DORIS_CLOUD_DEFER { |
2132 | 1 | unregister_recycle_task(task_name); |
2133 | 1 | int64_t cost = |
2134 | 1 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
2135 | 1 | LOG_WARNING("recycle ref rowsets finished, cost={}s", cost) |
2136 | 1 | .tag("instance_id", instance_id_); |
2137 | 1 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_0clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_0clEv Line | Count | Source | 2131 | 1 | DORIS_CLOUD_DEFER { | 2132 | 1 | unregister_recycle_task(task_name); | 2133 | 1 | int64_t cost = | 2134 | 1 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 2135 | 1 | LOG_WARNING("recycle ref rowsets finished, cost={}s", cost) | 2136 | 1 | .tag("instance_id", instance_id_); | 2137 | 1 | }; |
|
2138 | | |
2139 | | // Phase 1: Scan to collect all tablet_ids that have rowset ref counts |
2140 | 1 | std::set<int64_t> tablets_with_refs; |
2141 | 1 | int64_t num_scanned = 0; |
2142 | | |
2143 | 1 | auto scan_func = [&](std::string_view k, std::string_view v) -> int { |
2144 | 0 | ++num_scanned; |
2145 | 0 | int64_t tablet_id; |
2146 | 0 | std::string rowset_id; |
2147 | 0 | std::string_view key(k); |
2148 | 0 | if (!versioned::decode_data_rowset_ref_count_key(&key, &tablet_id, &rowset_id)) { |
2149 | 0 | LOG_WARNING("failed to decode data rowset ref count key").tag("key", hex(k)); |
2150 | 0 | return 0; // Continue scanning |
2151 | 0 | } |
2152 | | |
2153 | 0 | tablets_with_refs.insert(tablet_id); |
2154 | 0 | return 0; |
2155 | 0 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES7_ Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES7_ |
2156 | | |
2157 | 1 | if (scan_and_recycle(data_rowset_ref_count_key_start, data_rowset_ref_count_key_end, |
2158 | 1 | std::move(scan_func)) != 0) { |
2159 | 0 | LOG_WARNING("failed to scan data rowset ref count keys"); |
2160 | 0 | return -1; |
2161 | 0 | } |
2162 | | |
2163 | 1 | LOG_INFO("collected {} tablets with rowset refs, scanned {} ref count keys", |
2164 | 1 | tablets_with_refs.size(), num_scanned) |
2165 | 1 | .tag("instance_id", instance_id_); |
2166 | | |
2167 | | // Phase 2: Recycle each tablet |
2168 | 1 | int64_t num_recycled_tablets = 0; |
2169 | 1 | for (int64_t tablet_id : tablets_with_refs) { |
2170 | 0 | if (stopped()) { |
2171 | 0 | LOG_INFO("recycler stopped, skip remaining tablets") |
2172 | 0 | .tag("instance_id", instance_id_) |
2173 | 0 | .tag("tablets_processed", num_recycled_tablets) |
2174 | 0 | .tag("tablets_remaining", tablets_with_refs.size() - num_recycled_tablets); |
2175 | 0 | break; |
2176 | 0 | } |
2177 | | |
2178 | 0 | RecyclerMetricsContext metrics_context(instance_id_, task_name); |
2179 | 0 | if (recycle_versioned_tablet(tablet_id, metrics_context) != 0) { |
2180 | 0 | LOG_WARNING("failed to recycle tablet") |
2181 | 0 | .tag("instance_id", instance_id_) |
2182 | 0 | .tag("tablet_id", tablet_id); |
2183 | 0 | return -1; |
2184 | 0 | } |
2185 | 0 | ++num_recycled_tablets; |
2186 | 0 | } |
2187 | | |
2188 | 1 | LOG_INFO("recycled {} tablets", num_recycled_tablets) |
2189 | 1 | .tag("instance_id", instance_id_) |
2190 | 1 | .tag("total_tablets", tablets_with_refs.size()); |
2191 | | |
2192 | | // Phase 3: Scan again to check if any ref count keys still exist |
2193 | 1 | std::unique_ptr<Transaction> txn; |
2194 | 1 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
2195 | 1 | if (err != TxnErrorCode::TXN_OK) { |
2196 | 0 | LOG_WARNING("failed to create txn for final check") |
2197 | 0 | .tag("instance_id", instance_id_) |
2198 | 0 | .tag("err", err); |
2199 | 0 | return -1; |
2200 | 0 | } |
2201 | | |
2202 | 1 | std::unique_ptr<RangeGetIterator> iter; |
2203 | 1 | err = txn->get(data_rowset_ref_count_key_start, data_rowset_ref_count_key_end, &iter, true); |
2204 | 1 | if (err != TxnErrorCode::TXN_OK) { |
2205 | 0 | LOG_WARNING("failed to create range iterator for final check") |
2206 | 0 | .tag("instance_id", instance_id_) |
2207 | 0 | .tag("err", err); |
2208 | 0 | return -1; |
2209 | 0 | } |
2210 | | |
2211 | 1 | *has_unrecycled_rowsets = iter->has_next(); |
2212 | 1 | if (*has_unrecycled_rowsets) { |
2213 | 0 | LOG_INFO("still has unrecycled rowsets after recycle_ref_rowsets") |
2214 | 0 | .tag("instance_id", instance_id_); |
2215 | 0 | } |
2216 | | |
2217 | 1 | return 0; |
2218 | 1 | } |
2219 | | |
2220 | 17 | int InstanceRecycler::recycle_indexes() { |
2221 | 17 | const std::string task_name = "recycle_indexes"; |
2222 | 17 | int64_t num_scanned = 0; |
2223 | 17 | int64_t num_expired = 0; |
2224 | 17 | int64_t num_recycled = 0; |
2225 | 17 | RecyclerMetricsContext metrics_context(instance_id_, task_name); |
2226 | | |
2227 | 17 | RecycleIndexKeyInfo index_key_info0 {instance_id_, 0}; |
2228 | 17 | RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX}; |
2229 | 17 | std::string index_key0; |
2230 | 17 | std::string index_key1; |
2231 | 17 | recycle_index_key(index_key_info0, &index_key0); |
2232 | 17 | recycle_index_key(index_key_info1, &index_key1); |
2233 | | |
2234 | 17 | LOG_WARNING("begin to recycle indexes").tag("instance_id", instance_id_); |
2235 | | |
2236 | 17 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
2237 | 17 | register_recycle_task(task_name, start_time); |
2238 | | |
2239 | 17 | DORIS_CLOUD_DEFER { |
2240 | 17 | unregister_recycle_task(task_name); |
2241 | 17 | int64_t cost = |
2242 | 17 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
2243 | 17 | metrics_context.finish_report(); |
2244 | 17 | LOG_WARNING("recycle indexes finished, cost={}s", cost) |
2245 | 17 | .tag("instance_id", instance_id_) |
2246 | 17 | .tag("num_scanned", num_scanned) |
2247 | 17 | .tag("num_expired", num_expired) |
2248 | 17 | .tag("num_recycled", num_recycled); |
2249 | 17 | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv Line | Count | Source | 2239 | 2 | DORIS_CLOUD_DEFER { | 2240 | 2 | unregister_recycle_task(task_name); | 2241 | 2 | int64_t cost = | 2242 | 2 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 2243 | 2 | metrics_context.finish_report(); | 2244 | 2 | LOG_WARNING("recycle indexes finished, cost={}s", cost) | 2245 | 2 | .tag("instance_id", instance_id_) | 2246 | 2 | .tag("num_scanned", num_scanned) | 2247 | 2 | .tag("num_expired", num_expired) | 2248 | 2 | .tag("num_recycled", num_recycled); | 2249 | 2 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv Line | Count | Source | 2239 | 15 | DORIS_CLOUD_DEFER { | 2240 | 15 | unregister_recycle_task(task_name); | 2241 | 15 | int64_t cost = | 2242 | 15 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 2243 | 15 | metrics_context.finish_report(); | 2244 | 15 | LOG_WARNING("recycle indexes finished, cost={}s", cost) | 2245 | 15 | .tag("instance_id", instance_id_) | 2246 | 15 | .tag("num_scanned", num_scanned) | 2247 | 15 | .tag("num_expired", num_expired) | 2248 | 15 | .tag("num_recycled", num_recycled); | 2249 | 15 | }; |
|
2250 | | |
2251 | 17 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
2252 | | |
2253 | | // Elements in `index_keys` has the same lifetime as `it` in `scan_and_recycle` |
2254 | 17 | std::vector<std::string_view> index_keys; |
2255 | 17 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { |
2256 | 10 | ++num_scanned; |
2257 | 10 | RecycleIndexPB index_pb; |
2258 | 10 | if (!index_pb.ParseFromArray(v.data(), v.size())) { |
2259 | 0 | LOG_WARNING("malformed recycle index value").tag("key", hex(k)); |
2260 | 0 | return -1; |
2261 | 0 | } |
2262 | 10 | int64_t current_time = ::time(nullptr); |
2263 | 10 | if (current_time < |
2264 | 10 | calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired |
2265 | 0 | return 0; |
2266 | 0 | } |
2267 | 10 | ++num_expired; |
2268 | | // decode index_id |
2269 | 10 | auto k1 = k; |
2270 | 10 | k1.remove_prefix(1); |
2271 | 10 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
2272 | 10 | decode_key(&k1, &out); |
2273 | | // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB |
2274 | 10 | auto index_id = std::get<int64_t>(std::get<0>(out[3])); |
2275 | 10 | LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_ |
2276 | 10 | << " table_id=" << index_pb.table_id() << " index_id=" << index_id |
2277 | 10 | << " state=" << RecycleIndexPB::State_Name(index_pb.state()); |
2278 | | // Change state to RECYCLING |
2279 | 10 | std::unique_ptr<Transaction> txn; |
2280 | 10 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
2281 | 10 | if (err != TxnErrorCode::TXN_OK) { |
2282 | 0 | LOG_WARNING("failed to create txn").tag("err", err); |
2283 | 0 | return -1; |
2284 | 0 | } |
2285 | 10 | std::string val; |
2286 | 10 | err = txn->get(k, &val); |
2287 | 10 | if (err == |
2288 | 10 | TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it |
2289 | 0 | LOG_INFO("index {} has been recycled or committed", index_id); |
2290 | 0 | return 0; |
2291 | 0 | } |
2292 | 10 | if (err != TxnErrorCode::TXN_OK) { |
2293 | 0 | LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err); |
2294 | 0 | return -1; |
2295 | 0 | } |
2296 | 10 | index_pb.Clear(); |
2297 | 10 | if (!index_pb.ParseFromString(val)) { |
2298 | 0 | LOG_WARNING("malformed recycle index value").tag("key", hex(k)); |
2299 | 0 | return -1; |
2300 | 0 | } |
2301 | 10 | if (index_pb.state() != RecycleIndexPB::RECYCLING) { |
2302 | 9 | index_pb.set_state(RecycleIndexPB::RECYCLING); |
2303 | 9 | txn->put(k, index_pb.SerializeAsString()); |
2304 | 9 | err = txn->commit(); |
2305 | 9 | if (err != TxnErrorCode::TXN_OK) { |
2306 | 0 | LOG_WARNING("failed to commit txn").tag("err", err); |
2307 | 0 | return -1; |
2308 | 0 | } |
2309 | 9 | } |
2310 | 10 | if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) { |
2311 | 1 | LOG_WARNING("failed to recycle tablets under index") |
2312 | 1 | .tag("table_id", index_pb.table_id()) |
2313 | 1 | .tag("instance_id", instance_id_) |
2314 | 1 | .tag("index_id", index_id); |
2315 | 1 | return -1; |
2316 | 1 | } |
2317 | | |
2318 | 9 | if (index_pb.has_db_id()) { |
2319 | | // Recycle the versioned keys |
2320 | 3 | std::unique_ptr<Transaction> txn; |
2321 | 3 | err = txn_kv_->create_txn(&txn); |
2322 | 3 | if (err != TxnErrorCode::TXN_OK) { |
2323 | 0 | LOG_WARNING("failed to create txn").tag("err", err); |
2324 | 0 | return -1; |
2325 | 0 | } |
2326 | 3 | std::string meta_key = versioned::meta_index_key({instance_id_, index_id}); |
2327 | 3 | std::string index_key = versioned::index_index_key({instance_id_, index_id}); |
2328 | 3 | std::string index_inverted_key = versioned::index_inverted_key( |
2329 | 3 | {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id}); |
2330 | 3 | versioned_remove_all(txn.get(), meta_key); |
2331 | 3 | txn->remove(index_key); |
2332 | 3 | txn->remove(index_inverted_key); |
2333 | 3 | err = txn->commit(); |
2334 | 3 | if (err != TxnErrorCode::TXN_OK) { |
2335 | 0 | LOG_WARNING("failed to commit txn").tag("err", err); |
2336 | 0 | return -1; |
2337 | 0 | } |
2338 | 3 | } |
2339 | | |
2340 | 9 | metrics_context.total_recycled_num = ++num_recycled; |
2341 | 9 | metrics_context.report(); |
2342 | 9 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); |
2343 | 9 | index_keys.push_back(k); |
2344 | 9 | return 0; |
2345 | 9 | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 2255 | 2 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { | 2256 | 2 | ++num_scanned; | 2257 | 2 | RecycleIndexPB index_pb; | 2258 | 2 | if (!index_pb.ParseFromArray(v.data(), v.size())) { | 2259 | 0 | LOG_WARNING("malformed recycle index value").tag("key", hex(k)); | 2260 | 0 | return -1; | 2261 | 0 | } | 2262 | 2 | int64_t current_time = ::time(nullptr); | 2263 | 2 | if (current_time < | 2264 | 2 | calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired | 2265 | 0 | return 0; | 2266 | 0 | } | 2267 | 2 | ++num_expired; | 2268 | | // decode index_id | 2269 | 2 | auto k1 = k; | 2270 | 2 | k1.remove_prefix(1); | 2271 | 2 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 2272 | 2 | decode_key(&k1, &out); | 2273 | | // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB | 2274 | 2 | auto index_id = std::get<int64_t>(std::get<0>(out[3])); | 2275 | 2 | LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_ | 2276 | 2 | << " table_id=" << index_pb.table_id() << " index_id=" << index_id | 2277 | 2 | << " state=" << RecycleIndexPB::State_Name(index_pb.state()); | 2278 | | // Change state to RECYCLING | 2279 | 2 | std::unique_ptr<Transaction> txn; | 2280 | 2 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 2281 | 2 | if (err != TxnErrorCode::TXN_OK) { | 2282 | 0 | LOG_WARNING("failed to create txn").tag("err", err); | 2283 | 0 | return -1; | 2284 | 0 | } | 2285 | 2 | std::string val; | 2286 | 2 | err = txn->get(k, &val); | 2287 | 2 | if (err == | 2288 | 2 | TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it | 2289 | 0 | LOG_INFO("index {} has been recycled or committed", index_id); | 2290 | 0 | return 0; | 2291 | 0 | } | 2292 | 2 | if (err != TxnErrorCode::TXN_OK) { | 2293 | 0 | LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err); | 2294 | 0 | return -1; | 2295 | 0 | } | 2296 | 2 | index_pb.Clear(); | 2297 | 2 | if (!index_pb.ParseFromString(val)) { | 2298 | 0 | LOG_WARNING("malformed recycle index value").tag("key", hex(k)); | 2299 | 0 | return -1; | 2300 | 0 | } | 2301 | 2 | if (index_pb.state() != RecycleIndexPB::RECYCLING) { | 2302 | 1 | index_pb.set_state(RecycleIndexPB::RECYCLING); | 2303 | 1 | txn->put(k, index_pb.SerializeAsString()); | 2304 | 1 | err = txn->commit(); | 2305 | 1 | if (err != TxnErrorCode::TXN_OK) { | 2306 | 0 | LOG_WARNING("failed to commit txn").tag("err", err); | 2307 | 0 | return -1; | 2308 | 0 | } | 2309 | 1 | } | 2310 | 2 | if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) { | 2311 | 1 | LOG_WARNING("failed to recycle tablets under index") | 2312 | 1 | .tag("table_id", index_pb.table_id()) | 2313 | 1 | .tag("instance_id", instance_id_) | 2314 | 1 | .tag("index_id", index_id); | 2315 | 1 | return -1; | 2316 | 1 | } | 2317 | | | 2318 | 1 | if (index_pb.has_db_id()) { | 2319 | | // Recycle the versioned keys | 2320 | 1 | std::unique_ptr<Transaction> txn; | 2321 | 1 | err = txn_kv_->create_txn(&txn); | 2322 | 1 | if (err != TxnErrorCode::TXN_OK) { | 2323 | 0 | LOG_WARNING("failed to create txn").tag("err", err); | 2324 | 0 | return -1; | 2325 | 0 | } | 2326 | 1 | std::string meta_key = versioned::meta_index_key({instance_id_, index_id}); | 2327 | 1 | std::string index_key = versioned::index_index_key({instance_id_, index_id}); | 2328 | 1 | std::string index_inverted_key = versioned::index_inverted_key( | 2329 | 1 | {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id}); | 2330 | 1 | versioned_remove_all(txn.get(), meta_key); | 2331 | 1 | txn->remove(index_key); | 2332 | 1 | txn->remove(index_inverted_key); | 2333 | 1 | err = txn->commit(); | 2334 | 1 | if (err != TxnErrorCode::TXN_OK) { | 2335 | 0 | LOG_WARNING("failed to commit txn").tag("err", err); | 2336 | 0 | return -1; | 2337 | 0 | } | 2338 | 1 | } | 2339 | | | 2340 | 1 | metrics_context.total_recycled_num = ++num_recycled; | 2341 | 1 | metrics_context.report(); | 2342 | 1 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); | 2343 | 1 | index_keys.push_back(k); | 2344 | 1 | return 0; | 2345 | 1 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 2255 | 8 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { | 2256 | 8 | ++num_scanned; | 2257 | 8 | RecycleIndexPB index_pb; | 2258 | 8 | if (!index_pb.ParseFromArray(v.data(), v.size())) { | 2259 | 0 | LOG_WARNING("malformed recycle index value").tag("key", hex(k)); | 2260 | 0 | return -1; | 2261 | 0 | } | 2262 | 8 | int64_t current_time = ::time(nullptr); | 2263 | 8 | if (current_time < | 2264 | 8 | calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired | 2265 | 0 | return 0; | 2266 | 0 | } | 2267 | 8 | ++num_expired; | 2268 | | // decode index_id | 2269 | 8 | auto k1 = k; | 2270 | 8 | k1.remove_prefix(1); | 2271 | 8 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 2272 | 8 | decode_key(&k1, &out); | 2273 | | // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB | 2274 | 8 | auto index_id = std::get<int64_t>(std::get<0>(out[3])); | 2275 | 8 | LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_ | 2276 | 8 | << " table_id=" << index_pb.table_id() << " index_id=" << index_id | 2277 | 8 | << " state=" << RecycleIndexPB::State_Name(index_pb.state()); | 2278 | | // Change state to RECYCLING | 2279 | 8 | std::unique_ptr<Transaction> txn; | 2280 | 8 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 2281 | 8 | if (err != TxnErrorCode::TXN_OK) { | 2282 | 0 | LOG_WARNING("failed to create txn").tag("err", err); | 2283 | 0 | return -1; | 2284 | 0 | } | 2285 | 8 | std::string val; | 2286 | 8 | err = txn->get(k, &val); | 2287 | 8 | if (err == | 2288 | 8 | TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it | 2289 | 0 | LOG_INFO("index {} has been recycled or committed", index_id); | 2290 | 0 | return 0; | 2291 | 0 | } | 2292 | 8 | if (err != TxnErrorCode::TXN_OK) { | 2293 | 0 | LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err); | 2294 | 0 | return -1; | 2295 | 0 | } | 2296 | 8 | index_pb.Clear(); | 2297 | 8 | if (!index_pb.ParseFromString(val)) { | 2298 | 0 | LOG_WARNING("malformed recycle index value").tag("key", hex(k)); | 2299 | 0 | return -1; | 2300 | 0 | } | 2301 | 8 | if (index_pb.state() != RecycleIndexPB::RECYCLING) { | 2302 | 8 | index_pb.set_state(RecycleIndexPB::RECYCLING); | 2303 | 8 | txn->put(k, index_pb.SerializeAsString()); | 2304 | 8 | err = txn->commit(); | 2305 | 8 | if (err != TxnErrorCode::TXN_OK) { | 2306 | 0 | LOG_WARNING("failed to commit txn").tag("err", err); | 2307 | 0 | return -1; | 2308 | 0 | } | 2309 | 8 | } | 2310 | 8 | if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) { | 2311 | 0 | LOG_WARNING("failed to recycle tablets under index") | 2312 | 0 | .tag("table_id", index_pb.table_id()) | 2313 | 0 | .tag("instance_id", instance_id_) | 2314 | 0 | .tag("index_id", index_id); | 2315 | 0 | return -1; | 2316 | 0 | } | 2317 | | | 2318 | 8 | if (index_pb.has_db_id()) { | 2319 | | // Recycle the versioned keys | 2320 | 2 | std::unique_ptr<Transaction> txn; | 2321 | 2 | err = txn_kv_->create_txn(&txn); | 2322 | 2 | if (err != TxnErrorCode::TXN_OK) { | 2323 | 0 | LOG_WARNING("failed to create txn").tag("err", err); | 2324 | 0 | return -1; | 2325 | 0 | } | 2326 | 2 | std::string meta_key = versioned::meta_index_key({instance_id_, index_id}); | 2327 | 2 | std::string index_key = versioned::index_index_key({instance_id_, index_id}); | 2328 | 2 | std::string index_inverted_key = versioned::index_inverted_key( | 2329 | 2 | {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id}); | 2330 | 2 | versioned_remove_all(txn.get(), meta_key); | 2331 | 2 | txn->remove(index_key); | 2332 | 2 | txn->remove(index_inverted_key); | 2333 | 2 | err = txn->commit(); | 2334 | 2 | if (err != TxnErrorCode::TXN_OK) { | 2335 | 0 | LOG_WARNING("failed to commit txn").tag("err", err); | 2336 | 0 | return -1; | 2337 | 0 | } | 2338 | 2 | } | 2339 | | | 2340 | 8 | metrics_context.total_recycled_num = ++num_recycled; | 2341 | 8 | metrics_context.report(); | 2342 | 8 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); | 2343 | 8 | index_keys.push_back(k); | 2344 | 8 | return 0; | 2345 | 8 | }; |
|
2346 | | |
2347 | 17 | auto loop_done = [&index_keys, this]() -> int { |
2348 | 6 | if (index_keys.empty()) return 0; |
2349 | 5 | DORIS_CLOUD_DEFER { |
2350 | 5 | index_keys.clear(); |
2351 | 5 | }; recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv Line | Count | Source | 2349 | 1 | DORIS_CLOUD_DEFER { | 2350 | 1 | index_keys.clear(); | 2351 | 1 | }; |
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv Line | Count | Source | 2349 | 4 | DORIS_CLOUD_DEFER { | 2350 | 4 | index_keys.clear(); | 2351 | 4 | }; |
|
2352 | 5 | if (0 != txn_remove(txn_kv_.get(), index_keys)) { |
2353 | 0 | LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_; |
2354 | 0 | return -1; |
2355 | 0 | } |
2356 | 5 | return 0; |
2357 | 5 | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv Line | Count | Source | 2347 | 2 | auto loop_done = [&index_keys, this]() -> int { | 2348 | 2 | if (index_keys.empty()) return 0; | 2349 | 1 | DORIS_CLOUD_DEFER { | 2350 | 1 | index_keys.clear(); | 2351 | 1 | }; | 2352 | 1 | if (0 != txn_remove(txn_kv_.get(), index_keys)) { | 2353 | 0 | LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_; | 2354 | 0 | return -1; | 2355 | 0 | } | 2356 | 1 | return 0; | 2357 | 1 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv Line | Count | Source | 2347 | 4 | auto loop_done = [&index_keys, this]() -> int { | 2348 | 4 | if (index_keys.empty()) return 0; | 2349 | 4 | DORIS_CLOUD_DEFER { | 2350 | 4 | index_keys.clear(); | 2351 | 4 | }; | 2352 | 4 | if (0 != txn_remove(txn_kv_.get(), index_keys)) { | 2353 | 0 | LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_; | 2354 | 0 | return -1; | 2355 | 0 | } | 2356 | 4 | return 0; | 2357 | 4 | }; |
|
2358 | | |
2359 | 17 | if (config::enable_recycler_stats_metrics) { |
2360 | 0 | scan_and_statistics_indexes(); |
2361 | 0 | } |
2362 | | // recycle_func and loop_done for scan and recycle |
2363 | 17 | return scan_and_recycle(index_key0, index_key1, std::move(recycle_func), std::move(loop_done)); |
2364 | 17 | } |
2365 | | |
2366 | | bool check_lazy_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string instance_id, |
2367 | 8.24k | int64_t tablet_id) { |
2368 | 8.24k | TEST_SYNC_POINT_RETURN_WITH_VALUE("check_lazy_txn_finished::bypass_check", true); |
2369 | | |
2370 | 8.24k | std::unique_ptr<Transaction> txn; |
2371 | 8.24k | TxnErrorCode err = txn_kv->create_txn(&txn); |
2372 | 8.24k | if (err != TxnErrorCode::TXN_OK) { |
2373 | 0 | LOG(WARNING) << "failed to create txn, instance_id=" << instance_id |
2374 | 0 | << " tablet_id=" << tablet_id << " err=" << err; |
2375 | 0 | return false; |
2376 | 0 | } |
2377 | | |
2378 | 8.24k | std::string tablet_idx_key = meta_tablet_idx_key({instance_id, tablet_id}); |
2379 | 8.24k | std::string tablet_idx_val; |
2380 | 8.24k | err = txn->get(tablet_idx_key, &tablet_idx_val); |
2381 | 8.24k | if (TxnErrorCode::TXN_OK != err) { |
2382 | 0 | LOG(WARNING) << "failed to get tablet index, instance_id=" << instance_id |
2383 | 0 | << " tablet_id=" << tablet_id << " err=" << err |
2384 | 0 | << " key=" << hex(tablet_idx_key); |
2385 | 0 | return false; |
2386 | 0 | } |
2387 | | |
2388 | 8.24k | TabletIndexPB tablet_idx_pb; |
2389 | 8.24k | if (!tablet_idx_pb.ParseFromString(tablet_idx_val)) { |
2390 | 0 | LOG(WARNING) << "failed to parse tablet_idx_pb, instance_id=" << instance_id |
2391 | 0 | << " tablet_id=" << tablet_id; |
2392 | 0 | return false; |
2393 | 0 | } |
2394 | | |
2395 | 8.24k | if (!tablet_idx_pb.has_db_id()) { |
2396 | | // In the previous version, the db_id was not set in the index_pb. |
2397 | | // If updating to the version which enable txn lazy commit, the db_id will be set. |
2398 | 0 | LOG(INFO) << "txn index has no db_id, tablet_id=" << tablet_id |
2399 | 0 | << " instance_id=" << instance_id |
2400 | 0 | << " tablet_idx_pb=" << tablet_idx_pb.ShortDebugString(); |
2401 | 0 | return true; |
2402 | 0 | } |
2403 | | |
2404 | 8.24k | std::string ver_val; |
2405 | 8.24k | std::string ver_key = |
2406 | 8.24k | partition_version_key({instance_id, tablet_idx_pb.db_id(), tablet_idx_pb.table_id(), |
2407 | 8.24k | tablet_idx_pb.partition_id()}); |
2408 | 8.24k | err = txn->get(ver_key, &ver_val); |
2409 | | |
2410 | 8.24k | if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) { |
2411 | 204 | LOG(INFO) << "" |
2412 | 204 | "partition version not found, instance_id=" |
2413 | 204 | << instance_id << " db_id=" << tablet_idx_pb.db_id() |
2414 | 204 | << " table_id=" << tablet_idx_pb.table_id() |
2415 | 204 | << " partition_id=" << tablet_idx_pb.partition_id() << " tablet_id=" << tablet_id |
2416 | 204 | << " key=" << hex(ver_key); |
2417 | 204 | return true; |
2418 | 204 | } |
2419 | | |
2420 | 8.03k | if (TxnErrorCode::TXN_OK != err) { |
2421 | 0 | LOG(WARNING) << "failed to get partition version, instance_id=" << instance_id |
2422 | 0 | << " db_id=" << tablet_idx_pb.db_id() |
2423 | 0 | << " table_id=" << tablet_idx_pb.table_id() |
2424 | 0 | << " partition_id=" << tablet_idx_pb.partition_id() |
2425 | 0 | << " tablet_id=" << tablet_id << " key=" << hex(ver_key) << " err=" << err; |
2426 | 0 | return false; |
2427 | 0 | } |
2428 | | |
2429 | 8.03k | VersionPB version_pb; |
2430 | 8.03k | if (!version_pb.ParseFromString(ver_val)) { |
2431 | 0 | LOG(WARNING) << "failed to parse version_pb, instance_id=" << instance_id |
2432 | 0 | << " db_id=" << tablet_idx_pb.db_id() |
2433 | 0 | << " table_id=" << tablet_idx_pb.table_id() |
2434 | 0 | << " partition_id=" << tablet_idx_pb.partition_id() |
2435 | 0 | << " tablet_id=" << tablet_id << " key=" << hex(ver_key); |
2436 | 0 | return false; |
2437 | 0 | } |
2438 | | |
2439 | 8.03k | if (version_pb.pending_txn_ids_size() > 0) { |
2440 | 4.00k | TEST_SYNC_POINT_CALLBACK("check_lazy_txn_finished::txn_not_finished"); |
2441 | 4.00k | DCHECK(version_pb.pending_txn_ids_size() == 1); |
2442 | 4.00k | LOG(WARNING) << "lazy txn not finished, instance_id=" << instance_id |
2443 | 4.00k | << " db_id=" << tablet_idx_pb.db_id() |
2444 | 4.00k | << " table_id=" << tablet_idx_pb.table_id() |
2445 | 4.00k | << " partition_id=" << tablet_idx_pb.partition_id() |
2446 | 4.00k | << " tablet_id=" << tablet_id << " txn_id=" << version_pb.pending_txn_ids(0) |
2447 | 4.00k | << " key=" << hex(ver_key); |
2448 | 4.00k | return false; |
2449 | 4.00k | } |
2450 | 4.03k | return true; |
2451 | 8.03k | } |
2452 | | |
2453 | 15 | int InstanceRecycler::recycle_partitions() { |
2454 | 15 | const std::string task_name = "recycle_partitions"; |
2455 | 15 | int64_t num_scanned = 0; |
2456 | 15 | int64_t num_expired = 0; |
2457 | 15 | int64_t num_recycled = 0; |
2458 | 15 | RecyclerMetricsContext metrics_context(instance_id_, task_name); |
2459 | | |
2460 | 15 | RecyclePartKeyInfo part_key_info0 {instance_id_, 0}; |
2461 | 15 | RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX}; |
2462 | 15 | std::string part_key0; |
2463 | 15 | std::string part_key1; |
2464 | 15 | recycle_partition_key(part_key_info0, &part_key0); |
2465 | 15 | recycle_partition_key(part_key_info1, &part_key1); |
2466 | | |
2467 | 15 | LOG_WARNING("begin to recycle partitions").tag("instance_id", instance_id_); |
2468 | | |
2469 | 15 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
2470 | 15 | register_recycle_task(task_name, start_time); |
2471 | | |
2472 | 15 | DORIS_CLOUD_DEFER { |
2473 | 15 | unregister_recycle_task(task_name); |
2474 | 15 | int64_t cost = |
2475 | 15 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
2476 | 15 | metrics_context.finish_report(); |
2477 | 15 | LOG_WARNING("recycle partitions finished, cost={}s", cost) |
2478 | 15 | .tag("instance_id", instance_id_) |
2479 | 15 | .tag("num_scanned", num_scanned) |
2480 | 15 | .tag("num_expired", num_expired) |
2481 | 15 | .tag("num_recycled", num_recycled); |
2482 | 15 | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv Line | Count | Source | 2472 | 2 | DORIS_CLOUD_DEFER { | 2473 | 2 | unregister_recycle_task(task_name); | 2474 | 2 | int64_t cost = | 2475 | 2 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 2476 | 2 | metrics_context.finish_report(); | 2477 | 2 | LOG_WARNING("recycle partitions finished, cost={}s", cost) | 2478 | 2 | .tag("instance_id", instance_id_) | 2479 | 2 | .tag("num_scanned", num_scanned) | 2480 | 2 | .tag("num_expired", num_expired) | 2481 | 2 | .tag("num_recycled", num_recycled); | 2482 | 2 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv Line | Count | Source | 2472 | 13 | DORIS_CLOUD_DEFER { | 2473 | 13 | unregister_recycle_task(task_name); | 2474 | 13 | int64_t cost = | 2475 | 13 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 2476 | 13 | metrics_context.finish_report(); | 2477 | 13 | LOG_WARNING("recycle partitions finished, cost={}s", cost) | 2478 | 13 | .tag("instance_id", instance_id_) | 2479 | 13 | .tag("num_scanned", num_scanned) | 2480 | 13 | .tag("num_expired", num_expired) | 2481 | 13 | .tag("num_recycled", num_recycled); | 2482 | 13 | }; |
|
2483 | | |
2484 | 15 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
2485 | | |
2486 | | // Elements in `partition_keys` has the same lifetime as `it` in `scan_and_recycle` |
2487 | 15 | std::vector<std::string_view> partition_keys; |
2488 | 15 | std::vector<std::string> partition_version_keys; |
2489 | 15 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { |
2490 | 9 | ++num_scanned; |
2491 | 9 | RecyclePartitionPB part_pb; |
2492 | 9 | if (!part_pb.ParseFromArray(v.data(), v.size())) { |
2493 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); |
2494 | 0 | return -1; |
2495 | 0 | } |
2496 | 9 | int64_t current_time = ::time(nullptr); |
2497 | 9 | if (current_time < calculate_partition_expired_time(instance_id_, part_pb, |
2498 | 9 | &earlest_ts)) { // not expired |
2499 | 0 | return 0; |
2500 | 0 | } |
2501 | 9 | ++num_expired; |
2502 | | // decode partition_id |
2503 | 9 | auto k1 = k; |
2504 | 9 | k1.remove_prefix(1); |
2505 | 9 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
2506 | 9 | decode_key(&k1, &out); |
2507 | | // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB |
2508 | 9 | auto partition_id = std::get<int64_t>(std::get<0>(out[3])); |
2509 | 9 | LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_ |
2510 | 9 | << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id |
2511 | 9 | << " state=" << RecyclePartitionPB::State_Name(part_pb.state()); |
2512 | | // Change state to RECYCLING |
2513 | 9 | std::unique_ptr<Transaction> txn; |
2514 | 9 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
2515 | 9 | if (err != TxnErrorCode::TXN_OK) { |
2516 | 0 | LOG_WARNING("failed to create txn").tag("err", err); |
2517 | 0 | return -1; |
2518 | 0 | } |
2519 | 9 | std::string val; |
2520 | 9 | err = txn->get(k, &val); |
2521 | 9 | if (err == |
2522 | 9 | TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it |
2523 | 0 | LOG_INFO("partition {} has been recycled or committed", partition_id); |
2524 | 0 | return 0; |
2525 | 0 | } |
2526 | 9 | if (err != TxnErrorCode::TXN_OK) { |
2527 | 0 | LOG_WARNING("failed to get kv"); |
2528 | 0 | return -1; |
2529 | 0 | } |
2530 | 9 | part_pb.Clear(); |
2531 | 9 | if (!part_pb.ParseFromString(val)) { |
2532 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); |
2533 | 0 | return -1; |
2534 | 0 | } |
2535 | | // Partitions with PREPARED state MUST have no data |
2536 | 9 | if (part_pb.state() != RecyclePartitionPB::RECYCLING) { |
2537 | 8 | part_pb.set_state(RecyclePartitionPB::RECYCLING); |
2538 | 8 | txn->put(k, part_pb.SerializeAsString()); |
2539 | 8 | err = txn->commit(); |
2540 | 8 | if (err != TxnErrorCode::TXN_OK) { |
2541 | 0 | LOG_WARNING("failed to commit txn: {}", err); |
2542 | 0 | return -1; |
2543 | 0 | } |
2544 | 8 | } |
2545 | | |
2546 | 9 | int ret = 0; |
2547 | 33 | for (int64_t index_id : part_pb.index_id()) { |
2548 | 33 | if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) { |
2549 | 1 | LOG_WARNING("failed to recycle tablets under partition") |
2550 | 1 | .tag("table_id", part_pb.table_id()) |
2551 | 1 | .tag("instance_id", instance_id_) |
2552 | 1 | .tag("index_id", index_id) |
2553 | 1 | .tag("partition_id", partition_id); |
2554 | 1 | ret = -1; |
2555 | 1 | } |
2556 | 33 | } |
2557 | 9 | if (ret == 0 && part_pb.has_db_id()) { |
2558 | | // Recycle the versioned keys |
2559 | 8 | std::unique_ptr<Transaction> txn; |
2560 | 8 | err = txn_kv_->create_txn(&txn); |
2561 | 8 | if (err != TxnErrorCode::TXN_OK) { |
2562 | 0 | LOG_WARNING("failed to create txn").tag("err", err); |
2563 | 0 | return -1; |
2564 | 0 | } |
2565 | 8 | std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id}); |
2566 | 8 | std::string index_key = versioned::partition_index_key({instance_id_, partition_id}); |
2567 | 8 | std::string inverted_index_key = versioned::partition_inverted_index_key( |
2568 | 8 | {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}); |
2569 | 8 | std::string partition_version_key = |
2570 | 8 | versioned::partition_version_key({instance_id_, partition_id}); |
2571 | 8 | versioned_remove_all(txn.get(), meta_key); |
2572 | 8 | txn->remove(index_key); |
2573 | 8 | txn->remove(inverted_index_key); |
2574 | 8 | versioned_remove_all(txn.get(), partition_version_key); |
2575 | 8 | err = txn->commit(); |
2576 | 8 | if (err != TxnErrorCode::TXN_OK) { |
2577 | 0 | LOG_WARNING("failed to commit txn").tag("err", err); |
2578 | 0 | return -1; |
2579 | 0 | } |
2580 | 8 | } |
2581 | | |
2582 | 9 | if (ret == 0) { |
2583 | 8 | ++num_recycled; |
2584 | 8 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); |
2585 | 8 | partition_keys.push_back(k); |
2586 | 8 | if (part_pb.db_id() > 0) { |
2587 | 8 | partition_version_keys.push_back(partition_version_key( |
2588 | 8 | {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id})); |
2589 | 8 | } |
2590 | 8 | metrics_context.total_recycled_num = num_recycled; |
2591 | 8 | metrics_context.report(); |
2592 | 8 | } |
2593 | 9 | return ret; |
2594 | 9 | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 2489 | 2 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { | 2490 | 2 | ++num_scanned; | 2491 | 2 | RecyclePartitionPB part_pb; | 2492 | 2 | if (!part_pb.ParseFromArray(v.data(), v.size())) { | 2493 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); | 2494 | 0 | return -1; | 2495 | 0 | } | 2496 | 2 | int64_t current_time = ::time(nullptr); | 2497 | 2 | if (current_time < calculate_partition_expired_time(instance_id_, part_pb, | 2498 | 2 | &earlest_ts)) { // not expired | 2499 | 0 | return 0; | 2500 | 0 | } | 2501 | 2 | ++num_expired; | 2502 | | // decode partition_id | 2503 | 2 | auto k1 = k; | 2504 | 2 | k1.remove_prefix(1); | 2505 | 2 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 2506 | 2 | decode_key(&k1, &out); | 2507 | | // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB | 2508 | 2 | auto partition_id = std::get<int64_t>(std::get<0>(out[3])); | 2509 | 2 | LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_ | 2510 | 2 | << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id | 2511 | 2 | << " state=" << RecyclePartitionPB::State_Name(part_pb.state()); | 2512 | | // Change state to RECYCLING | 2513 | 2 | std::unique_ptr<Transaction> txn; | 2514 | 2 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 2515 | 2 | if (err != TxnErrorCode::TXN_OK) { | 2516 | 0 | LOG_WARNING("failed to create txn").tag("err", err); | 2517 | 0 | return -1; | 2518 | 0 | } | 2519 | 2 | std::string val; | 2520 | 2 | err = txn->get(k, &val); | 2521 | 2 | if (err == | 2522 | 2 | TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it | 2523 | 0 | LOG_INFO("partition {} has been recycled or committed", partition_id); | 2524 | 0 | return 0; | 2525 | 0 | } | 2526 | 2 | if (err != TxnErrorCode::TXN_OK) { | 2527 | 0 | LOG_WARNING("failed to get kv"); | 2528 | 0 | return -1; | 2529 | 0 | } | 2530 | 2 | part_pb.Clear(); | 2531 | 2 | if (!part_pb.ParseFromString(val)) { | 2532 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); | 2533 | 0 | return -1; | 2534 | 0 | } | 2535 | | // Partitions with PREPARED state MUST have no data | 2536 | 2 | if (part_pb.state() != RecyclePartitionPB::RECYCLING) { | 2537 | 1 | part_pb.set_state(RecyclePartitionPB::RECYCLING); | 2538 | 1 | txn->put(k, part_pb.SerializeAsString()); | 2539 | 1 | err = txn->commit(); | 2540 | 1 | if (err != TxnErrorCode::TXN_OK) { | 2541 | 0 | LOG_WARNING("failed to commit txn: {}", err); | 2542 | 0 | return -1; | 2543 | 0 | } | 2544 | 1 | } | 2545 | | | 2546 | 2 | int ret = 0; | 2547 | 2 | for (int64_t index_id : part_pb.index_id()) { | 2548 | 2 | if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) { | 2549 | 1 | LOG_WARNING("failed to recycle tablets under partition") | 2550 | 1 | .tag("table_id", part_pb.table_id()) | 2551 | 1 | .tag("instance_id", instance_id_) | 2552 | 1 | .tag("index_id", index_id) | 2553 | 1 | .tag("partition_id", partition_id); | 2554 | 1 | ret = -1; | 2555 | 1 | } | 2556 | 2 | } | 2557 | 2 | if (ret == 0 && part_pb.has_db_id()) { | 2558 | | // Recycle the versioned keys | 2559 | 1 | std::unique_ptr<Transaction> txn; | 2560 | 1 | err = txn_kv_->create_txn(&txn); | 2561 | 1 | if (err != TxnErrorCode::TXN_OK) { | 2562 | 0 | LOG_WARNING("failed to create txn").tag("err", err); | 2563 | 0 | return -1; | 2564 | 0 | } | 2565 | 1 | std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id}); | 2566 | 1 | std::string index_key = versioned::partition_index_key({instance_id_, partition_id}); | 2567 | 1 | std::string inverted_index_key = versioned::partition_inverted_index_key( | 2568 | 1 | {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}); | 2569 | 1 | std::string partition_version_key = | 2570 | 1 | versioned::partition_version_key({instance_id_, partition_id}); | 2571 | 1 | versioned_remove_all(txn.get(), meta_key); | 2572 | 1 | txn->remove(index_key); | 2573 | 1 | txn->remove(inverted_index_key); | 2574 | 1 | versioned_remove_all(txn.get(), partition_version_key); | 2575 | 1 | err = txn->commit(); | 2576 | 1 | if (err != TxnErrorCode::TXN_OK) { | 2577 | 0 | LOG_WARNING("failed to commit txn").tag("err", err); | 2578 | 0 | return -1; | 2579 | 0 | } | 2580 | 1 | } | 2581 | | | 2582 | 2 | if (ret == 0) { | 2583 | 1 | ++num_recycled; | 2584 | 1 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); | 2585 | 1 | partition_keys.push_back(k); | 2586 | 1 | if (part_pb.db_id() > 0) { | 2587 | 1 | partition_version_keys.push_back(partition_version_key( | 2588 | 1 | {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id})); | 2589 | 1 | } | 2590 | 1 | metrics_context.total_recycled_num = num_recycled; | 2591 | 1 | metrics_context.report(); | 2592 | 1 | } | 2593 | 2 | return ret; | 2594 | 2 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 2489 | 7 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { | 2490 | 7 | ++num_scanned; | 2491 | 7 | RecyclePartitionPB part_pb; | 2492 | 7 | if (!part_pb.ParseFromArray(v.data(), v.size())) { | 2493 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); | 2494 | 0 | return -1; | 2495 | 0 | } | 2496 | 7 | int64_t current_time = ::time(nullptr); | 2497 | 7 | if (current_time < calculate_partition_expired_time(instance_id_, part_pb, | 2498 | 7 | &earlest_ts)) { // not expired | 2499 | 0 | return 0; | 2500 | 0 | } | 2501 | 7 | ++num_expired; | 2502 | | // decode partition_id | 2503 | 7 | auto k1 = k; | 2504 | 7 | k1.remove_prefix(1); | 2505 | 7 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 2506 | 7 | decode_key(&k1, &out); | 2507 | | // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB | 2508 | 7 | auto partition_id = std::get<int64_t>(std::get<0>(out[3])); | 2509 | 7 | LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_ | 2510 | 7 | << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id | 2511 | 7 | << " state=" << RecyclePartitionPB::State_Name(part_pb.state()); | 2512 | | // Change state to RECYCLING | 2513 | 7 | std::unique_ptr<Transaction> txn; | 2514 | 7 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 2515 | 7 | if (err != TxnErrorCode::TXN_OK) { | 2516 | 0 | LOG_WARNING("failed to create txn").tag("err", err); | 2517 | 0 | return -1; | 2518 | 0 | } | 2519 | 7 | std::string val; | 2520 | 7 | err = txn->get(k, &val); | 2521 | 7 | if (err == | 2522 | 7 | TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it | 2523 | 0 | LOG_INFO("partition {} has been recycled or committed", partition_id); | 2524 | 0 | return 0; | 2525 | 0 | } | 2526 | 7 | if (err != TxnErrorCode::TXN_OK) { | 2527 | 0 | LOG_WARNING("failed to get kv"); | 2528 | 0 | return -1; | 2529 | 0 | } | 2530 | 7 | part_pb.Clear(); | 2531 | 7 | if (!part_pb.ParseFromString(val)) { | 2532 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); | 2533 | 0 | return -1; | 2534 | 0 | } | 2535 | | // Partitions with PREPARED state MUST have no data | 2536 | 7 | if (part_pb.state() != RecyclePartitionPB::RECYCLING) { | 2537 | 7 | part_pb.set_state(RecyclePartitionPB::RECYCLING); | 2538 | 7 | txn->put(k, part_pb.SerializeAsString()); | 2539 | 7 | err = txn->commit(); | 2540 | 7 | if (err != TxnErrorCode::TXN_OK) { | 2541 | 0 | LOG_WARNING("failed to commit txn: {}", err); | 2542 | 0 | return -1; | 2543 | 0 | } | 2544 | 7 | } | 2545 | | | 2546 | 7 | int ret = 0; | 2547 | 31 | for (int64_t index_id : part_pb.index_id()) { | 2548 | 31 | if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) { | 2549 | 0 | LOG_WARNING("failed to recycle tablets under partition") | 2550 | 0 | .tag("table_id", part_pb.table_id()) | 2551 | 0 | .tag("instance_id", instance_id_) | 2552 | 0 | .tag("index_id", index_id) | 2553 | 0 | .tag("partition_id", partition_id); | 2554 | 0 | ret = -1; | 2555 | 0 | } | 2556 | 31 | } | 2557 | 7 | if (ret == 0 && part_pb.has_db_id()) { | 2558 | | // Recycle the versioned keys | 2559 | 7 | std::unique_ptr<Transaction> txn; | 2560 | 7 | err = txn_kv_->create_txn(&txn); | 2561 | 7 | if (err != TxnErrorCode::TXN_OK) { | 2562 | 0 | LOG_WARNING("failed to create txn").tag("err", err); | 2563 | 0 | return -1; | 2564 | 0 | } | 2565 | 7 | std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id}); | 2566 | 7 | std::string index_key = versioned::partition_index_key({instance_id_, partition_id}); | 2567 | 7 | std::string inverted_index_key = versioned::partition_inverted_index_key( | 2568 | 7 | {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}); | 2569 | 7 | std::string partition_version_key = | 2570 | 7 | versioned::partition_version_key({instance_id_, partition_id}); | 2571 | 7 | versioned_remove_all(txn.get(), meta_key); | 2572 | 7 | txn->remove(index_key); | 2573 | 7 | txn->remove(inverted_index_key); | 2574 | 7 | versioned_remove_all(txn.get(), partition_version_key); | 2575 | 7 | err = txn->commit(); | 2576 | 7 | if (err != TxnErrorCode::TXN_OK) { | 2577 | 0 | LOG_WARNING("failed to commit txn").tag("err", err); | 2578 | 0 | return -1; | 2579 | 0 | } | 2580 | 7 | } | 2581 | | | 2582 | 7 | if (ret == 0) { | 2583 | 7 | ++num_recycled; | 2584 | 7 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); | 2585 | 7 | partition_keys.push_back(k); | 2586 | 7 | if (part_pb.db_id() > 0) { | 2587 | 7 | partition_version_keys.push_back(partition_version_key( | 2588 | 7 | {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id})); | 2589 | 7 | } | 2590 | 7 | metrics_context.total_recycled_num = num_recycled; | 2591 | 7 | metrics_context.report(); | 2592 | 7 | } | 2593 | 7 | return ret; | 2594 | 7 | }; |
|
2595 | | |
2596 | 15 | auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int { |
2597 | 5 | if (partition_keys.empty()) return 0; |
2598 | 4 | DORIS_CLOUD_DEFER { |
2599 | 4 | partition_keys.clear(); |
2600 | 4 | partition_version_keys.clear(); |
2601 | 4 | }; recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv Line | Count | Source | 2598 | 1 | DORIS_CLOUD_DEFER { | 2599 | 1 | partition_keys.clear(); | 2600 | 1 | partition_version_keys.clear(); | 2601 | 1 | }; |
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv Line | Count | Source | 2598 | 3 | DORIS_CLOUD_DEFER { | 2599 | 3 | partition_keys.clear(); | 2600 | 3 | partition_version_keys.clear(); | 2601 | 3 | }; |
|
2602 | 4 | std::unique_ptr<Transaction> txn; |
2603 | 4 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
2604 | 4 | if (err != TxnErrorCode::TXN_OK) { |
2605 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_; |
2606 | 0 | return -1; |
2607 | 0 | } |
2608 | 8 | for (auto& k : partition_keys) { |
2609 | 8 | txn->remove(k); |
2610 | 8 | } |
2611 | 8 | for (auto& k : partition_version_keys) { |
2612 | 8 | txn->remove(k); |
2613 | 8 | } |
2614 | 4 | err = txn->commit(); |
2615 | 4 | if (err != TxnErrorCode::TXN_OK) { |
2616 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_ |
2617 | 0 | << " err=" << err; |
2618 | 0 | return -1; |
2619 | 0 | } |
2620 | 4 | return 0; |
2621 | 4 | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv Line | Count | Source | 2596 | 2 | auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int { | 2597 | 2 | if (partition_keys.empty()) return 0; | 2598 | 1 | DORIS_CLOUD_DEFER { | 2599 | 1 | partition_keys.clear(); | 2600 | 1 | partition_version_keys.clear(); | 2601 | 1 | }; | 2602 | 1 | std::unique_ptr<Transaction> txn; | 2603 | 1 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 2604 | 1 | if (err != TxnErrorCode::TXN_OK) { | 2605 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_; | 2606 | 0 | return -1; | 2607 | 0 | } | 2608 | 1 | for (auto& k : partition_keys) { | 2609 | 1 | txn->remove(k); | 2610 | 1 | } | 2611 | 1 | for (auto& k : partition_version_keys) { | 2612 | 1 | txn->remove(k); | 2613 | 1 | } | 2614 | 1 | err = txn->commit(); | 2615 | 1 | if (err != TxnErrorCode::TXN_OK) { | 2616 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_ | 2617 | 0 | << " err=" << err; | 2618 | 0 | return -1; | 2619 | 0 | } | 2620 | 1 | return 0; | 2621 | 1 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv Line | Count | Source | 2596 | 3 | auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int { | 2597 | 3 | if (partition_keys.empty()) return 0; | 2598 | 3 | DORIS_CLOUD_DEFER { | 2599 | 3 | partition_keys.clear(); | 2600 | 3 | partition_version_keys.clear(); | 2601 | 3 | }; | 2602 | 3 | std::unique_ptr<Transaction> txn; | 2603 | 3 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 2604 | 3 | if (err != TxnErrorCode::TXN_OK) { | 2605 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_; | 2606 | 0 | return -1; | 2607 | 0 | } | 2608 | 7 | for (auto& k : partition_keys) { | 2609 | 7 | txn->remove(k); | 2610 | 7 | } | 2611 | 7 | for (auto& k : partition_version_keys) { | 2612 | 7 | txn->remove(k); | 2613 | 7 | } | 2614 | 3 | err = txn->commit(); | 2615 | 3 | if (err != TxnErrorCode::TXN_OK) { | 2616 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_ | 2617 | 0 | << " err=" << err; | 2618 | 0 | return -1; | 2619 | 0 | } | 2620 | 3 | return 0; | 2621 | 3 | }; |
|
2622 | | |
2623 | 15 | if (config::enable_recycler_stats_metrics) { |
2624 | 0 | scan_and_statistics_partitions(); |
2625 | 0 | } |
2626 | | // recycle_func and loop_done for scan and recycle |
2627 | 15 | return scan_and_recycle(part_key0, part_key1, std::move(recycle_func), std::move(loop_done)); |
2628 | 15 | } |
2629 | | |
2630 | 14 | int InstanceRecycler::recycle_versions() { |
2631 | 14 | if (should_recycle_versioned_keys()) { |
2632 | 2 | return recycle_orphan_partitions(); |
2633 | 2 | } |
2634 | | |
2635 | 12 | int64_t num_scanned = 0; |
2636 | 12 | int64_t num_recycled = 0; |
2637 | 12 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions"); |
2638 | | |
2639 | 12 | LOG_WARNING("begin to recycle table and partition versions").tag("instance_id", instance_id_); |
2640 | | |
2641 | 12 | auto start_time = steady_clock::now(); |
2642 | | |
2643 | 12 | DORIS_CLOUD_DEFER { |
2644 | 12 | auto cost = duration<float>(steady_clock::now() - start_time).count(); |
2645 | 12 | metrics_context.finish_report(); |
2646 | 12 | LOG_WARNING("recycle table and partition versions finished, cost={}s", cost) |
2647 | 12 | .tag("instance_id", instance_id_) |
2648 | 12 | .tag("num_scanned", num_scanned) |
2649 | 12 | .tag("num_recycled", num_recycled); |
2650 | 12 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv Line | Count | Source | 2643 | 12 | DORIS_CLOUD_DEFER { | 2644 | 12 | auto cost = duration<float>(steady_clock::now() - start_time).count(); | 2645 | 12 | metrics_context.finish_report(); | 2646 | 12 | LOG_WARNING("recycle table and partition versions finished, cost={}s", cost) | 2647 | 12 | .tag("instance_id", instance_id_) | 2648 | 12 | .tag("num_scanned", num_scanned) | 2649 | 12 | .tag("num_recycled", num_recycled); | 2650 | 12 | }; |
|
2651 | | |
2652 | 12 | auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0}); |
2653 | 12 | auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0}); |
2654 | 12 | int64_t last_scanned_table_id = 0; |
2655 | 12 | bool is_recycled = false; // Is last scanned kv recycled |
2656 | 12 | auto recycle_func = [&num_scanned, &num_recycled, &last_scanned_table_id, &is_recycled, |
2657 | 12 | &metrics_context, this](std::string_view k, std::string_view) { |
2658 | 2 | ++num_scanned; |
2659 | 2 | auto k1 = k; |
2660 | 2 | k1.remove_prefix(1); |
2661 | | // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id} |
2662 | 2 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
2663 | 2 | decode_key(&k1, &out); |
2664 | 2 | DCHECK_EQ(out.size(), 6) << k; |
2665 | 2 | auto table_id = std::get<int64_t>(std::get<0>(out[4])); |
2666 | 2 | if (table_id == last_scanned_table_id) { // Already handle kvs of this table |
2667 | 0 | num_recycled += is_recycled; // Version kv of this table has been recycled |
2668 | 0 | return 0; |
2669 | 0 | } |
2670 | 2 | last_scanned_table_id = table_id; |
2671 | 2 | is_recycled = false; |
2672 | 2 | auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0}); |
2673 | 2 | auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0}); |
2674 | 2 | std::unique_ptr<Transaction> txn; |
2675 | 2 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
2676 | 2 | if (err != TxnErrorCode::TXN_OK) { |
2677 | 0 | return -1; |
2678 | 0 | } |
2679 | 2 | std::unique_ptr<RangeGetIterator> iter; |
2680 | 2 | err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1); |
2681 | 2 | if (err != TxnErrorCode::TXN_OK) { |
2682 | 0 | return -1; |
2683 | 0 | } |
2684 | 2 | if (iter->has_next()) { // Table is useful, should not recycle table and partition versions |
2685 | 1 | return 0; |
2686 | 1 | } |
2687 | 1 | auto db_id = std::get<int64_t>(std::get<0>(out[3])); |
2688 | | // 1. Remove all partition version kvs of this table |
2689 | 1 | auto partition_version_key_begin = |
2690 | 1 | partition_version_key({instance_id_, db_id, table_id, 0}); |
2691 | 1 | auto partition_version_key_end = |
2692 | 1 | partition_version_key({instance_id_, db_id, table_id, INT64_MAX}); |
2693 | 1 | txn->remove(partition_version_key_begin, partition_version_key_end); |
2694 | 1 | LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin) |
2695 | 1 | << " end=" << hex(partition_version_key_end) << " db_id=" << db_id |
2696 | 1 | << " table_id=" << table_id; |
2697 | | // 2. Remove the table version kv of this table |
2698 | 1 | auto tbl_version_key = table_version_key({instance_id_, db_id, table_id}); |
2699 | 1 | txn->remove(tbl_version_key); |
2700 | 1 | LOG(WARNING) << "remove table version kv " << hex(tbl_version_key); |
2701 | | // 3. Remove mow delete bitmap update lock and tablet job lock |
2702 | 1 | std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1}); |
2703 | 1 | txn->remove(lock_key); |
2704 | 1 | LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key); |
2705 | 1 | std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0}); |
2706 | 1 | std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX}); |
2707 | 1 | txn->remove(tablet_job_key_begin, tablet_job_key_end); |
2708 | 1 | LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin) |
2709 | 1 | << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id |
2710 | 1 | << " table_id=" << table_id; |
2711 | 1 | err = txn->commit(); |
2712 | 1 | if (err != TxnErrorCode::TXN_OK) { |
2713 | 0 | return -1; |
2714 | 0 | } |
2715 | 1 | metrics_context.total_recycled_num = ++num_recycled; |
2716 | 1 | metrics_context.report(); |
2717 | 1 | is_recycled = true; |
2718 | 1 | return 0; |
2719 | 1 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 2657 | 2 | &metrics_context, this](std::string_view k, std::string_view) { | 2658 | 2 | ++num_scanned; | 2659 | 2 | auto k1 = k; | 2660 | 2 | k1.remove_prefix(1); | 2661 | | // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id} | 2662 | 2 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 2663 | 2 | decode_key(&k1, &out); | 2664 | 2 | DCHECK_EQ(out.size(), 6) << k; | 2665 | 2 | auto table_id = std::get<int64_t>(std::get<0>(out[4])); | 2666 | 2 | if (table_id == last_scanned_table_id) { // Already handle kvs of this table | 2667 | 0 | num_recycled += is_recycled; // Version kv of this table has been recycled | 2668 | 0 | return 0; | 2669 | 0 | } | 2670 | 2 | last_scanned_table_id = table_id; | 2671 | 2 | is_recycled = false; | 2672 | 2 | auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0}); | 2673 | 2 | auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0}); | 2674 | 2 | std::unique_ptr<Transaction> txn; | 2675 | 2 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 2676 | 2 | if (err != TxnErrorCode::TXN_OK) { | 2677 | 0 | return -1; | 2678 | 0 | } | 2679 | 2 | std::unique_ptr<RangeGetIterator> iter; | 2680 | 2 | err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1); | 2681 | 2 | if (err != TxnErrorCode::TXN_OK) { | 2682 | 0 | return -1; | 2683 | 0 | } | 2684 | 2 | if (iter->has_next()) { // Table is useful, should not recycle table and partition versions | 2685 | 1 | return 0; | 2686 | 1 | } | 2687 | 1 | auto db_id = std::get<int64_t>(std::get<0>(out[3])); | 2688 | | // 1. Remove all partition version kvs of this table | 2689 | 1 | auto partition_version_key_begin = | 2690 | 1 | partition_version_key({instance_id_, db_id, table_id, 0}); | 2691 | 1 | auto partition_version_key_end = | 2692 | 1 | partition_version_key({instance_id_, db_id, table_id, INT64_MAX}); | 2693 | 1 | txn->remove(partition_version_key_begin, partition_version_key_end); | 2694 | 1 | LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin) | 2695 | 1 | << " end=" << hex(partition_version_key_end) << " db_id=" << db_id | 2696 | 1 | << " table_id=" << table_id; | 2697 | | // 2. Remove the table version kv of this table | 2698 | 1 | auto tbl_version_key = table_version_key({instance_id_, db_id, table_id}); | 2699 | 1 | txn->remove(tbl_version_key); | 2700 | 1 | LOG(WARNING) << "remove table version kv " << hex(tbl_version_key); | 2701 | | // 3. Remove mow delete bitmap update lock and tablet job lock | 2702 | 1 | std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1}); | 2703 | 1 | txn->remove(lock_key); | 2704 | 1 | LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key); | 2705 | 1 | std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0}); | 2706 | 1 | std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX}); | 2707 | 1 | txn->remove(tablet_job_key_begin, tablet_job_key_end); | 2708 | 1 | LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin) | 2709 | 1 | << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id | 2710 | 1 | << " table_id=" << table_id; | 2711 | 1 | err = txn->commit(); | 2712 | 1 | if (err != TxnErrorCode::TXN_OK) { | 2713 | 0 | return -1; | 2714 | 0 | } | 2715 | 1 | metrics_context.total_recycled_num = ++num_recycled; | 2716 | 1 | metrics_context.report(); | 2717 | 1 | is_recycled = true; | 2718 | 1 | return 0; | 2719 | 1 | }; |
|
2720 | | |
2721 | 12 | if (config::enable_recycler_stats_metrics) { |
2722 | 0 | scan_and_statistics_versions(); |
2723 | 0 | } |
2724 | | // recycle_func and loop_done for scan and recycle |
2725 | 12 | return scan_and_recycle(version_key_begin, version_key_end, std::move(recycle_func)); |
2726 | 14 | } |
2727 | | |
2728 | 3 | int InstanceRecycler::recycle_orphan_partitions() { |
2729 | 3 | int64_t num_scanned = 0; |
2730 | 3 | int64_t num_recycled = 0; |
2731 | 3 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_orphan_partitions"); |
2732 | | |
2733 | 3 | LOG_WARNING("begin to recycle orphan table and partition versions") |
2734 | 3 | .tag("instance_id", instance_id_); |
2735 | | |
2736 | 3 | auto start_time = steady_clock::now(); |
2737 | | |
2738 | 3 | DORIS_CLOUD_DEFER { |
2739 | 3 | auto cost = duration<float>(steady_clock::now() - start_time).count(); |
2740 | 3 | metrics_context.finish_report(); |
2741 | 3 | LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost) |
2742 | 3 | .tag("instance_id", instance_id_) |
2743 | 3 | .tag("num_scanned", num_scanned) |
2744 | 3 | .tag("num_recycled", num_recycled); |
2745 | 3 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv Line | Count | Source | 2738 | 3 | DORIS_CLOUD_DEFER { | 2739 | 3 | auto cost = duration<float>(steady_clock::now() - start_time).count(); | 2740 | 3 | metrics_context.finish_report(); | 2741 | 3 | LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost) | 2742 | 3 | .tag("instance_id", instance_id_) | 2743 | 3 | .tag("num_scanned", num_scanned) | 2744 | 3 | .tag("num_recycled", num_recycled); | 2745 | 3 | }; |
|
2746 | | |
2747 | 3 | bool is_empty_table = false; // whether the table has no indexes |
2748 | 3 | bool is_table_kvs_recycled = false; // whether the table related kvs have been recycled |
2749 | 3 | int64_t current_table_id = 0; // current scanning table id |
2750 | 3 | auto recycle_func = [&num_scanned, &num_recycled, &metrics_context, &is_empty_table, |
2751 | 3 | ¤t_table_id, &is_table_kvs_recycled, |
2752 | 3 | this](std::string_view k, std::string_view) { |
2753 | 2 | ++num_scanned; |
2754 | | |
2755 | 2 | std::string_view k1(k); |
2756 | 2 | int64_t db_id, table_id, partition_id; |
2757 | 2 | if (!versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id, |
2758 | 2 | &partition_id)) { |
2759 | 0 | LOG(WARNING) << "malformed partition inverted index key " << hex(k); |
2760 | 0 | return -1; |
2761 | 2 | } else if (table_id != current_table_id) { |
2762 | 2 | current_table_id = table_id; |
2763 | 2 | is_table_kvs_recycled = false; |
2764 | 2 | MetaReader meta_reader(instance_id_, txn_kv_.get()); |
2765 | 2 | TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table); |
2766 | 2 | if (err != TxnErrorCode::TXN_OK) { |
2767 | 0 | LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id |
2768 | 0 | << " table_id=" << table_id << " err=" << err; |
2769 | 0 | return -1; |
2770 | 0 | } |
2771 | 2 | } |
2772 | | |
2773 | 2 | if (!is_empty_table) { |
2774 | | // table is not empty, skip recycle |
2775 | 1 | return 0; |
2776 | 1 | } |
2777 | | |
2778 | 1 | std::unique_ptr<Transaction> txn; |
2779 | 1 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
2780 | 1 | if (err != TxnErrorCode::TXN_OK) { |
2781 | 0 | return -1; |
2782 | 0 | } |
2783 | | |
2784 | | // 1. Remove all partition related kvs |
2785 | 1 | std::string partition_meta_key = |
2786 | 1 | versioned::meta_partition_key({instance_id_, partition_id}); |
2787 | 1 | std::string partition_index_key = |
2788 | 1 | versioned::partition_index_key({instance_id_, partition_id}); |
2789 | 1 | std::string partition_inverted_key = versioned::partition_inverted_index_key( |
2790 | 1 | {instance_id_, db_id, table_id, partition_id}); |
2791 | 1 | std::string partition_version_key = |
2792 | 1 | versioned::partition_version_key({instance_id_, partition_id}); |
2793 | 1 | txn->remove(partition_index_key); |
2794 | 1 | txn->remove(partition_inverted_key); |
2795 | 1 | versioned_remove_all(txn.get(), partition_meta_key); |
2796 | 1 | versioned_remove_all(txn.get(), partition_version_key); |
2797 | 1 | LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id |
2798 | 1 | << " table_id=" << table_id << " db_id=" << db_id |
2799 | 1 | << " partition_meta_key=" << hex(partition_meta_key) |
2800 | 1 | << " partition_version_key=" << hex(partition_version_key); |
2801 | | |
2802 | 1 | if (!is_table_kvs_recycled) { |
2803 | 1 | is_table_kvs_recycled = true; |
2804 | | |
2805 | | // 2. Remove the table version kv of this table |
2806 | 1 | std::string table_version_key = versioned::table_version_key({instance_id_, table_id}); |
2807 | 1 | versioned_remove_all(txn.get(), table_version_key); |
2808 | 1 | LOG(WARNING) << "remove table version kv " << hex(table_version_key); |
2809 | | // 3. Remove mow delete bitmap update lock and tablet job lock |
2810 | 1 | std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1}); |
2811 | 1 | txn->remove(lock_key); |
2812 | 1 | LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key); |
2813 | 1 | std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0}); |
2814 | 1 | std::string tablet_job_key_end = |
2815 | 1 | mow_tablet_job_key({instance_id_, table_id, INT64_MAX}); |
2816 | 1 | txn->remove(tablet_job_key_begin, tablet_job_key_end); |
2817 | 1 | LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin) |
2818 | 1 | << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id |
2819 | 1 | << " table_id=" << table_id; |
2820 | 1 | } |
2821 | | |
2822 | 1 | err = txn->commit(); |
2823 | 1 | if (err != TxnErrorCode::TXN_OK) { |
2824 | 0 | return -1; |
2825 | 0 | } |
2826 | 1 | metrics_context.total_recycled_num = ++num_recycled; |
2827 | 1 | metrics_context.report(); |
2828 | 1 | return 0; |
2829 | 1 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 2752 | 2 | this](std::string_view k, std::string_view) { | 2753 | 2 | ++num_scanned; | 2754 | | | 2755 | 2 | std::string_view k1(k); | 2756 | 2 | int64_t db_id, table_id, partition_id; | 2757 | 2 | if (!versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id, | 2758 | 2 | &partition_id)) { | 2759 | 0 | LOG(WARNING) << "malformed partition inverted index key " << hex(k); | 2760 | 0 | return -1; | 2761 | 2 | } else if (table_id != current_table_id) { | 2762 | 2 | current_table_id = table_id; | 2763 | 2 | is_table_kvs_recycled = false; | 2764 | 2 | MetaReader meta_reader(instance_id_, txn_kv_.get()); | 2765 | 2 | TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table); | 2766 | 2 | if (err != TxnErrorCode::TXN_OK) { | 2767 | 0 | LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id | 2768 | 0 | << " table_id=" << table_id << " err=" << err; | 2769 | 0 | return -1; | 2770 | 0 | } | 2771 | 2 | } | 2772 | | | 2773 | 2 | if (!is_empty_table) { | 2774 | | // table is not empty, skip recycle | 2775 | 1 | return 0; | 2776 | 1 | } | 2777 | | | 2778 | 1 | std::unique_ptr<Transaction> txn; | 2779 | 1 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 2780 | 1 | if (err != TxnErrorCode::TXN_OK) { | 2781 | 0 | return -1; | 2782 | 0 | } | 2783 | | | 2784 | | // 1. Remove all partition related kvs | 2785 | 1 | std::string partition_meta_key = | 2786 | 1 | versioned::meta_partition_key({instance_id_, partition_id}); | 2787 | 1 | std::string partition_index_key = | 2788 | 1 | versioned::partition_index_key({instance_id_, partition_id}); | 2789 | 1 | std::string partition_inverted_key = versioned::partition_inverted_index_key( | 2790 | 1 | {instance_id_, db_id, table_id, partition_id}); | 2791 | 1 | std::string partition_version_key = | 2792 | 1 | versioned::partition_version_key({instance_id_, partition_id}); | 2793 | 1 | txn->remove(partition_index_key); | 2794 | 1 | txn->remove(partition_inverted_key); | 2795 | 1 | versioned_remove_all(txn.get(), partition_meta_key); | 2796 | 1 | versioned_remove_all(txn.get(), partition_version_key); | 2797 | 1 | LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id | 2798 | 1 | << " table_id=" << table_id << " db_id=" << db_id | 2799 | 1 | << " partition_meta_key=" << hex(partition_meta_key) | 2800 | 1 | << " partition_version_key=" << hex(partition_version_key); | 2801 | | | 2802 | 1 | if (!is_table_kvs_recycled) { | 2803 | 1 | is_table_kvs_recycled = true; | 2804 | | | 2805 | | // 2. Remove the table version kv of this table | 2806 | 1 | std::string table_version_key = versioned::table_version_key({instance_id_, table_id}); | 2807 | 1 | versioned_remove_all(txn.get(), table_version_key); | 2808 | 1 | LOG(WARNING) << "remove table version kv " << hex(table_version_key); | 2809 | | // 3. Remove mow delete bitmap update lock and tablet job lock | 2810 | 1 | std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1}); | 2811 | 1 | txn->remove(lock_key); | 2812 | 1 | LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key); | 2813 | 1 | std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0}); | 2814 | 1 | std::string tablet_job_key_end = | 2815 | 1 | mow_tablet_job_key({instance_id_, table_id, INT64_MAX}); | 2816 | 1 | txn->remove(tablet_job_key_begin, tablet_job_key_end); | 2817 | 1 | LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin) | 2818 | 1 | << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id | 2819 | 1 | << " table_id=" << table_id; | 2820 | 1 | } | 2821 | | | 2822 | 1 | err = txn->commit(); | 2823 | 1 | if (err != TxnErrorCode::TXN_OK) { | 2824 | 0 | return -1; | 2825 | 0 | } | 2826 | 1 | metrics_context.total_recycled_num = ++num_recycled; | 2827 | 1 | metrics_context.report(); | 2828 | 1 | return 0; | 2829 | 1 | }; |
|
2830 | | |
2831 | | // recycle_func and loop_done for scan and recycle |
2832 | 3 | return scan_and_recycle( |
2833 | 3 | versioned::partition_inverted_index_key({instance_id_, 0, 0, 0}), |
2834 | 3 | versioned::partition_inverted_index_key({instance_id_, INT64_MAX, 0, 0}), |
2835 | 3 | std::move(recycle_func)); |
2836 | 3 | } |
2837 | | |
2838 | | int InstanceRecycler::recycle_tablets(int64_t table_id, int64_t index_id, |
2839 | | RecyclerMetricsContext& metrics_context, |
2840 | 49 | int64_t partition_id) { |
2841 | 49 | bool is_multi_version = |
2842 | 49 | instance_info_.has_multi_version_status() && |
2843 | 49 | instance_info_.multi_version_status() != MultiVersionStatus::MULTI_VERSION_DISABLED; |
2844 | 49 | int64_t num_scanned = 0; |
2845 | 49 | std::atomic_long num_recycled = 0; |
2846 | | |
2847 | 49 | std::string tablet_key_begin, tablet_key_end; |
2848 | 49 | std::string stats_key_begin, stats_key_end; |
2849 | 49 | std::string job_key_begin, job_key_end; |
2850 | | |
2851 | 49 | std::string tablet_belongs; |
2852 | 49 | if (partition_id > 0) { |
2853 | | // recycle tablets in a partition belonging to the index |
2854 | 33 | meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin); |
2855 | 33 | meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end); |
2856 | 33 | stats_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &stats_key_begin); |
2857 | 33 | stats_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &stats_key_end); |
2858 | 33 | job_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &job_key_begin); |
2859 | 33 | job_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &job_key_end); |
2860 | 33 | tablet_belongs = "partition"; |
2861 | 33 | } else { |
2862 | | // recycle tablets in the index |
2863 | 16 | meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin); |
2864 | 16 | meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end); |
2865 | 16 | stats_tablet_key({instance_id_, table_id, index_id, 0, 0}, &stats_key_begin); |
2866 | 16 | stats_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &stats_key_end); |
2867 | 16 | job_tablet_key({instance_id_, table_id, index_id, 0, 0}, &job_key_begin); |
2868 | 16 | job_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &job_key_end); |
2869 | 16 | tablet_belongs = "index"; |
2870 | 16 | } |
2871 | | |
2872 | 49 | LOG_INFO("begin to recycle tablets of the " + tablet_belongs) |
2873 | 49 | .tag("table_id", table_id) |
2874 | 49 | .tag("index_id", index_id) |
2875 | 49 | .tag("partition_id", partition_id); |
2876 | | |
2877 | 49 | auto start_time = steady_clock::now(); |
2878 | | |
2879 | 49 | DORIS_CLOUD_DEFER { |
2880 | 49 | auto cost = duration<float>(steady_clock::now() - start_time).count(); |
2881 | 49 | LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost) |
2882 | 49 | .tag("instance_id", instance_id_) |
2883 | 49 | .tag("table_id", table_id) |
2884 | 49 | .tag("index_id", index_id) |
2885 | 49 | .tag("partition_id", partition_id) |
2886 | 49 | .tag("num_scanned", num_scanned) |
2887 | 49 | .tag("num_recycled", num_recycled); |
2888 | 49 | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv Line | Count | Source | 2879 | 4 | DORIS_CLOUD_DEFER { | 2880 | 4 | auto cost = duration<float>(steady_clock::now() - start_time).count(); | 2881 | 4 | LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost) | 2882 | 4 | .tag("instance_id", instance_id_) | 2883 | 4 | .tag("table_id", table_id) | 2884 | 4 | .tag("index_id", index_id) | 2885 | 4 | .tag("partition_id", partition_id) | 2886 | 4 | .tag("num_scanned", num_scanned) | 2887 | 4 | .tag("num_recycled", num_recycled); | 2888 | 4 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv Line | Count | Source | 2879 | 45 | DORIS_CLOUD_DEFER { | 2880 | 45 | auto cost = duration<float>(steady_clock::now() - start_time).count(); | 2881 | 45 | LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost) | 2882 | 45 | .tag("instance_id", instance_id_) | 2883 | 45 | .tag("table_id", table_id) | 2884 | 45 | .tag("index_id", index_id) | 2885 | 45 | .tag("partition_id", partition_id) | 2886 | 45 | .tag("num_scanned", num_scanned) | 2887 | 45 | .tag("num_recycled", num_recycled); | 2888 | 45 | }; |
|
2889 | | |
2890 | | // The first string_view represents the tablet key which has been recycled |
2891 | | // The second bool represents whether the following fdb's tablet key deletion could be done using range move or not |
2892 | 49 | using TabletKeyPair = std::pair<std::string_view, bool>; |
2893 | 49 | SyncExecutor<TabletKeyPair> sync_executor( |
2894 | 49 | _thread_pool_group.recycle_tablet_pool, |
2895 | 49 | fmt::format("recycle tablets, tablet id {}, index id {}, partition id {}", table_id, |
2896 | 49 | index_id, partition_id), |
2897 | 4.23k | [](const TabletKeyPair& k) { return k.first.empty(); });recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE Line | Count | Source | 2897 | 4.00k | [](const TabletKeyPair& k) { return k.first.empty(); }); |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE Line | Count | Source | 2897 | 237 | [](const TabletKeyPair& k) { return k.first.empty(); }); |
|
2898 | | |
2899 | | // Elements in `tablet_keys` has the same lifetime as `it` in `scan_and_recycle` |
2900 | 49 | std::vector<std::string> tablet_idx_keys; |
2901 | 49 | std::vector<std::string> restore_job_keys; |
2902 | 49 | std::vector<std::string> init_rs_keys; |
2903 | 49 | std::vector<std::string> tablet_compact_stats_keys; |
2904 | 49 | std::vector<std::string> tablet_load_stats_keys; |
2905 | 49 | std::vector<std::string> versioned_meta_tablet_keys; |
2906 | 8.24k | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { |
2907 | 8.24k | bool use_range_remove = true; |
2908 | 8.24k | ++num_scanned; |
2909 | 8.24k | doris::TabletMetaCloudPB tablet_meta_pb; |
2910 | 8.24k | if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) { |
2911 | 0 | LOG_WARNING("malformed tablet meta").tag("key", hex(k)); |
2912 | 0 | use_range_remove = false; |
2913 | 0 | return -1; |
2914 | 0 | } |
2915 | 8.24k | int64_t tablet_id = tablet_meta_pb.tablet_id(); |
2916 | | |
2917 | 8.24k | if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) { |
2918 | 4.00k | LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id(); |
2919 | 4.00k | return -1; |
2920 | 4.00k | } |
2921 | | |
2922 | 4.24k | tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id})); |
2923 | 4.24k | restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id})); |
2924 | 4.24k | if (is_multi_version) { |
2925 | | // The tablet index/inverted index are recycled in recycle_versioned_tablet. |
2926 | 6 | tablet_compact_stats_keys.push_back( |
2927 | 6 | versioned::tablet_compact_stats_key({instance_id_, tablet_id})); |
2928 | 6 | tablet_load_stats_keys.push_back( |
2929 | 6 | versioned::tablet_load_stats_key({instance_id_, tablet_id})); |
2930 | 6 | versioned_meta_tablet_keys.push_back( |
2931 | 6 | versioned::meta_tablet_key({instance_id_, tablet_id})); |
2932 | 6 | } |
2933 | 4.24k | TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false); |
2934 | 4.23k | sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove, |
2935 | 4.23k | &metrics_context, k]() mutable -> TabletKeyPair { |
2936 | 4.23k | if (recycle_tablet(tid, metrics_context) != 0) { |
2937 | 1 | LOG_WARNING("failed to recycle tablet") |
2938 | 1 | .tag("instance_id", instance_id_) |
2939 | 1 | .tag("tablet_id", tid); |
2940 | 1 | range_move = false; |
2941 | 1 | return {std::string_view(), range_move}; |
2942 | 1 | } |
2943 | 4.23k | ++num_recycled; |
2944 | 4.23k | LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k)); |
2945 | 4.23k | return {k, range_move}; |
2946 | 4.23k | }); recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv Line | Count | Source | 2935 | 4.00k | &metrics_context, k]() mutable -> TabletKeyPair { | 2936 | 4.00k | if (recycle_tablet(tid, metrics_context) != 0) { | 2937 | 0 | LOG_WARNING("failed to recycle tablet") | 2938 | 0 | .tag("instance_id", instance_id_) | 2939 | 0 | .tag("tablet_id", tid); | 2940 | 0 | range_move = false; | 2941 | 0 | return {std::string_view(), range_move}; | 2942 | 0 | } | 2943 | 4.00k | ++num_recycled; | 2944 | 4.00k | LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k)); | 2945 | 4.00k | return {k, range_move}; | 2946 | 4.00k | }); |
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv Line | Count | Source | 2935 | 237 | &metrics_context, k]() mutable -> TabletKeyPair { | 2936 | 237 | if (recycle_tablet(tid, metrics_context) != 0) { | 2937 | 1 | LOG_WARNING("failed to recycle tablet") | 2938 | 1 | .tag("instance_id", instance_id_) | 2939 | 1 | .tag("tablet_id", tid); | 2940 | 1 | range_move = false; | 2941 | 1 | return {std::string_view(), range_move}; | 2942 | 1 | } | 2943 | 236 | ++num_recycled; | 2944 | 236 | LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k)); | 2945 | 236 | return {k, range_move}; | 2946 | 237 | }); |
|
2947 | 4.23k | return 0; |
2948 | 4.24k | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ Line | Count | Source | 2906 | 8.00k | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { | 2907 | 8.00k | bool use_range_remove = true; | 2908 | 8.00k | ++num_scanned; | 2909 | 8.00k | doris::TabletMetaCloudPB tablet_meta_pb; | 2910 | 8.00k | if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) { | 2911 | 0 | LOG_WARNING("malformed tablet meta").tag("key", hex(k)); | 2912 | 0 | use_range_remove = false; | 2913 | 0 | return -1; | 2914 | 0 | } | 2915 | 8.00k | int64_t tablet_id = tablet_meta_pb.tablet_id(); | 2916 | | | 2917 | 8.00k | if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) { | 2918 | 4.00k | LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id(); | 2919 | 4.00k | return -1; | 2920 | 4.00k | } | 2921 | | | 2922 | 4.00k | tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id})); | 2923 | 4.00k | restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id})); | 2924 | 4.00k | if (is_multi_version) { | 2925 | | // The tablet index/inverted index are recycled in recycle_versioned_tablet. | 2926 | 0 | tablet_compact_stats_keys.push_back( | 2927 | 0 | versioned::tablet_compact_stats_key({instance_id_, tablet_id})); | 2928 | 0 | tablet_load_stats_keys.push_back( | 2929 | 0 | versioned::tablet_load_stats_key({instance_id_, tablet_id})); | 2930 | 0 | versioned_meta_tablet_keys.push_back( | 2931 | 0 | versioned::meta_tablet_key({instance_id_, tablet_id})); | 2932 | 0 | } | 2933 | 4.00k | TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false); | 2934 | 4.00k | sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove, | 2935 | 4.00k | &metrics_context, k]() mutable -> TabletKeyPair { | 2936 | 4.00k | if (recycle_tablet(tid, metrics_context) != 0) { | 2937 | 4.00k | LOG_WARNING("failed to recycle tablet") | 2938 | 4.00k | .tag("instance_id", instance_id_) | 2939 | 4.00k | .tag("tablet_id", tid); | 2940 | 4.00k | range_move = false; | 2941 | 4.00k | return {std::string_view(), range_move}; | 2942 | 4.00k | } | 2943 | 4.00k | ++num_recycled; | 2944 | 4.00k | LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k)); | 2945 | 4.00k | return {k, range_move}; | 2946 | 4.00k | }); | 2947 | 4.00k | return 0; | 2948 | 4.00k | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ Line | Count | Source | 2906 | 240 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { | 2907 | 240 | bool use_range_remove = true; | 2908 | 240 | ++num_scanned; | 2909 | 240 | doris::TabletMetaCloudPB tablet_meta_pb; | 2910 | 240 | if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) { | 2911 | 0 | LOG_WARNING("malformed tablet meta").tag("key", hex(k)); | 2912 | 0 | use_range_remove = false; | 2913 | 0 | return -1; | 2914 | 0 | } | 2915 | 240 | int64_t tablet_id = tablet_meta_pb.tablet_id(); | 2916 | | | 2917 | 240 | if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) { | 2918 | 0 | LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id(); | 2919 | 0 | return -1; | 2920 | 0 | } | 2921 | | | 2922 | 240 | tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id})); | 2923 | 240 | restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id})); | 2924 | 240 | if (is_multi_version) { | 2925 | | // The tablet index/inverted index are recycled in recycle_versioned_tablet. | 2926 | 6 | tablet_compact_stats_keys.push_back( | 2927 | 6 | versioned::tablet_compact_stats_key({instance_id_, tablet_id})); | 2928 | 6 | tablet_load_stats_keys.push_back( | 2929 | 6 | versioned::tablet_load_stats_key({instance_id_, tablet_id})); | 2930 | 6 | versioned_meta_tablet_keys.push_back( | 2931 | 6 | versioned::meta_tablet_key({instance_id_, tablet_id})); | 2932 | 6 | } | 2933 | 240 | TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false); | 2934 | 237 | sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove, | 2935 | 237 | &metrics_context, k]() mutable -> TabletKeyPair { | 2936 | 237 | if (recycle_tablet(tid, metrics_context) != 0) { | 2937 | 237 | LOG_WARNING("failed to recycle tablet") | 2938 | 237 | .tag("instance_id", instance_id_) | 2939 | 237 | .tag("tablet_id", tid); | 2940 | 237 | range_move = false; | 2941 | 237 | return {std::string_view(), range_move}; | 2942 | 237 | } | 2943 | 237 | ++num_recycled; | 2944 | 237 | LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k)); | 2945 | 237 | return {k, range_move}; | 2946 | 237 | }); | 2947 | 237 | return 0; | 2948 | 240 | }; |
|
2949 | | |
2950 | | // TODO(AlexYue): Add one ut to cover use_range_remove = false |
2951 | 49 | auto loop_done = [&, this]() -> int { |
2952 | 49 | bool finished = true; |
2953 | 49 | auto tablet_keys = sync_executor.when_all(&finished); |
2954 | 49 | if (!finished) { |
2955 | 1 | LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_); |
2956 | 1 | return -1; |
2957 | 1 | } |
2958 | 48 | if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0; |
2959 | 46 | if (!tablet_keys.empty() && |
2960 | 46 | std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbEEEDaS7_ Line | Count | Source | 2960 | 2 | std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) { |
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbEEEDaS7_ Line | Count | Source | 2960 | 42 | std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) { |
|
2961 | 0 | return -1; |
2962 | 0 | } |
2963 | | // sort the vector using key's order |
2964 | 46 | std::sort(tablet_keys.begin(), tablet_keys.end(), |
2965 | 49.4k | [](const auto& prev, const auto& last) { return prev.first < last.first; });recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_ Line | Count | Source | 2965 | 48.4k | [](const auto& prev, const auto& last) { return prev.first < last.first; }); |
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_ Line | Count | Source | 2965 | 944 | [](const auto& prev, const auto& last) { return prev.first < last.first; }); |
|
2966 | 46 | bool use_range_remove = true; |
2967 | 4.23k | for (auto& [_, remove] : tablet_keys) { |
2968 | 4.23k | if (!remove) { |
2969 | 0 | use_range_remove = remove; |
2970 | 0 | break; |
2971 | 0 | } |
2972 | 4.23k | } |
2973 | 46 | DORIS_CLOUD_DEFER { |
2974 | 46 | tablet_idx_keys.clear(); |
2975 | 46 | restore_job_keys.clear(); |
2976 | 46 | init_rs_keys.clear(); |
2977 | 46 | tablet_compact_stats_keys.clear(); |
2978 | 46 | tablet_load_stats_keys.clear(); |
2979 | 46 | versioned_meta_tablet_keys.clear(); |
2980 | 46 | }; recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv Line | Count | Source | 2973 | 2 | DORIS_CLOUD_DEFER { | 2974 | 2 | tablet_idx_keys.clear(); | 2975 | 2 | restore_job_keys.clear(); | 2976 | 2 | init_rs_keys.clear(); | 2977 | 2 | tablet_compact_stats_keys.clear(); | 2978 | 2 | tablet_load_stats_keys.clear(); | 2979 | 2 | versioned_meta_tablet_keys.clear(); | 2980 | 2 | }; |
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv Line | Count | Source | 2973 | 44 | DORIS_CLOUD_DEFER { | 2974 | 44 | tablet_idx_keys.clear(); | 2975 | 44 | restore_job_keys.clear(); | 2976 | 44 | init_rs_keys.clear(); | 2977 | 44 | tablet_compact_stats_keys.clear(); | 2978 | 44 | tablet_load_stats_keys.clear(); | 2979 | 44 | versioned_meta_tablet_keys.clear(); | 2980 | 44 | }; |
|
2981 | 46 | std::unique_ptr<Transaction> txn; |
2982 | 46 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { |
2983 | 0 | LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_; |
2984 | 0 | return -1; |
2985 | 0 | } |
2986 | 46 | std::string tablet_key_end; |
2987 | 46 | if (!tablet_keys.empty()) { |
2988 | 44 | if (use_range_remove) { |
2989 | 44 | tablet_key_end = std::string(tablet_keys.back().first) + '\x00'; |
2990 | 44 | txn->remove(tablet_keys.front().first, tablet_key_end); |
2991 | 44 | } else { |
2992 | 0 | for (auto& [k, _] : tablet_keys) { |
2993 | 0 | txn->remove(k); |
2994 | 0 | } |
2995 | 0 | } |
2996 | 44 | } |
2997 | 46 | if (is_multi_version) { |
2998 | 6 | for (auto& k : tablet_compact_stats_keys) { |
2999 | | // Remove all versions of tablet compact stats for recycled tablet |
3000 | 6 | LOG_INFO("remove versioned tablet compact stats key") |
3001 | 6 | .tag("compact_stats_key", hex(k)); |
3002 | 6 | versioned_remove_all(txn.get(), k); |
3003 | 6 | } |
3004 | 6 | for (auto& k : tablet_load_stats_keys) { |
3005 | | // Remove all versions of tablet load stats for recycled tablet |
3006 | 6 | LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k)); |
3007 | 6 | versioned_remove_all(txn.get(), k); |
3008 | 6 | } |
3009 | 6 | for (auto& k : versioned_meta_tablet_keys) { |
3010 | | // Remove all versions of meta tablet for recycled tablet |
3011 | 6 | LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k)); |
3012 | 6 | versioned_remove_all(txn.get(), k); |
3013 | 6 | } |
3014 | 5 | } |
3015 | 4.24k | for (auto& k : tablet_idx_keys) { |
3016 | 4.24k | txn->remove(k); |
3017 | 4.24k | } |
3018 | 4.24k | for (auto& k : restore_job_keys) { |
3019 | 4.24k | txn->remove(k); |
3020 | 4.24k | } |
3021 | 46 | for (auto& k : init_rs_keys) { |
3022 | 0 | txn->remove(k); |
3023 | 0 | } |
3024 | 46 | if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) { |
3025 | 0 | LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_ |
3026 | 0 | << ", err=" << err; |
3027 | 0 | return -1; |
3028 | 0 | } |
3029 | 46 | return 0; |
3030 | 46 | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv Line | Count | Source | 2951 | 4 | auto loop_done = [&, this]() -> int { | 2952 | 4 | bool finished = true; | 2953 | 4 | auto tablet_keys = sync_executor.when_all(&finished); | 2954 | 4 | if (!finished) { | 2955 | 0 | LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_); | 2956 | 0 | return -1; | 2957 | 0 | } | 2958 | 4 | if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0; | 2959 | 2 | if (!tablet_keys.empty() && | 2960 | 2 | std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) { | 2961 | 0 | return -1; | 2962 | 0 | } | 2963 | | // sort the vector using key's order | 2964 | 2 | std::sort(tablet_keys.begin(), tablet_keys.end(), | 2965 | 2 | [](const auto& prev, const auto& last) { return prev.first < last.first; }); | 2966 | 2 | bool use_range_remove = true; | 2967 | 4.00k | for (auto& [_, remove] : tablet_keys) { | 2968 | 4.00k | if (!remove) { | 2969 | 0 | use_range_remove = remove; | 2970 | 0 | break; | 2971 | 0 | } | 2972 | 4.00k | } | 2973 | 2 | DORIS_CLOUD_DEFER { | 2974 | 2 | tablet_idx_keys.clear(); | 2975 | 2 | restore_job_keys.clear(); | 2976 | 2 | init_rs_keys.clear(); | 2977 | 2 | tablet_compact_stats_keys.clear(); | 2978 | 2 | tablet_load_stats_keys.clear(); | 2979 | 2 | versioned_meta_tablet_keys.clear(); | 2980 | 2 | }; | 2981 | 2 | std::unique_ptr<Transaction> txn; | 2982 | 2 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { | 2983 | 0 | LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_; | 2984 | 0 | return -1; | 2985 | 0 | } | 2986 | 2 | std::string tablet_key_end; | 2987 | 2 | if (!tablet_keys.empty()) { | 2988 | 2 | if (use_range_remove) { | 2989 | 2 | tablet_key_end = std::string(tablet_keys.back().first) + '\x00'; | 2990 | 2 | txn->remove(tablet_keys.front().first, tablet_key_end); | 2991 | 2 | } else { | 2992 | 0 | for (auto& [k, _] : tablet_keys) { | 2993 | 0 | txn->remove(k); | 2994 | 0 | } | 2995 | 0 | } | 2996 | 2 | } | 2997 | 2 | if (is_multi_version) { | 2998 | 0 | for (auto& k : tablet_compact_stats_keys) { | 2999 | | // Remove all versions of tablet compact stats for recycled tablet | 3000 | 0 | LOG_INFO("remove versioned tablet compact stats key") | 3001 | 0 | .tag("compact_stats_key", hex(k)); | 3002 | 0 | versioned_remove_all(txn.get(), k); | 3003 | 0 | } | 3004 | 0 | for (auto& k : tablet_load_stats_keys) { | 3005 | | // Remove all versions of tablet load stats for recycled tablet | 3006 | 0 | LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k)); | 3007 | 0 | versioned_remove_all(txn.get(), k); | 3008 | 0 | } | 3009 | 0 | for (auto& k : versioned_meta_tablet_keys) { | 3010 | | // Remove all versions of meta tablet for recycled tablet | 3011 | 0 | LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k)); | 3012 | 0 | versioned_remove_all(txn.get(), k); | 3013 | 0 | } | 3014 | 0 | } | 3015 | 4.00k | for (auto& k : tablet_idx_keys) { | 3016 | 4.00k | txn->remove(k); | 3017 | 4.00k | } | 3018 | 4.00k | for (auto& k : restore_job_keys) { | 3019 | 4.00k | txn->remove(k); | 3020 | 4.00k | } | 3021 | 2 | for (auto& k : init_rs_keys) { | 3022 | 0 | txn->remove(k); | 3023 | 0 | } | 3024 | 2 | if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) { | 3025 | 0 | LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_ | 3026 | 0 | << ", err=" << err; | 3027 | 0 | return -1; | 3028 | 0 | } | 3029 | 2 | return 0; | 3030 | 2 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv Line | Count | Source | 2951 | 45 | auto loop_done = [&, this]() -> int { | 2952 | 45 | bool finished = true; | 2953 | 45 | auto tablet_keys = sync_executor.when_all(&finished); | 2954 | 45 | if (!finished) { | 2955 | 1 | LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_); | 2956 | 1 | return -1; | 2957 | 1 | } | 2958 | 44 | if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0; | 2959 | 44 | if (!tablet_keys.empty() && | 2960 | 44 | std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) { | 2961 | 0 | return -1; | 2962 | 0 | } | 2963 | | // sort the vector using key's order | 2964 | 44 | std::sort(tablet_keys.begin(), tablet_keys.end(), | 2965 | 44 | [](const auto& prev, const auto& last) { return prev.first < last.first; }); | 2966 | 44 | bool use_range_remove = true; | 2967 | 236 | for (auto& [_, remove] : tablet_keys) { | 2968 | 236 | if (!remove) { | 2969 | 0 | use_range_remove = remove; | 2970 | 0 | break; | 2971 | 0 | } | 2972 | 236 | } | 2973 | 44 | DORIS_CLOUD_DEFER { | 2974 | 44 | tablet_idx_keys.clear(); | 2975 | 44 | restore_job_keys.clear(); | 2976 | 44 | init_rs_keys.clear(); | 2977 | 44 | tablet_compact_stats_keys.clear(); | 2978 | 44 | tablet_load_stats_keys.clear(); | 2979 | 44 | versioned_meta_tablet_keys.clear(); | 2980 | 44 | }; | 2981 | 44 | std::unique_ptr<Transaction> txn; | 2982 | 44 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { | 2983 | 0 | LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_; | 2984 | 0 | return -1; | 2985 | 0 | } | 2986 | 44 | std::string tablet_key_end; | 2987 | 44 | if (!tablet_keys.empty()) { | 2988 | 42 | if (use_range_remove) { | 2989 | 42 | tablet_key_end = std::string(tablet_keys.back().first) + '\x00'; | 2990 | 42 | txn->remove(tablet_keys.front().first, tablet_key_end); | 2991 | 42 | } else { | 2992 | 0 | for (auto& [k, _] : tablet_keys) { | 2993 | 0 | txn->remove(k); | 2994 | 0 | } | 2995 | 0 | } | 2996 | 42 | } | 2997 | 44 | if (is_multi_version) { | 2998 | 6 | for (auto& k : tablet_compact_stats_keys) { | 2999 | | // Remove all versions of tablet compact stats for recycled tablet | 3000 | 6 | LOG_INFO("remove versioned tablet compact stats key") | 3001 | 6 | .tag("compact_stats_key", hex(k)); | 3002 | 6 | versioned_remove_all(txn.get(), k); | 3003 | 6 | } | 3004 | 6 | for (auto& k : tablet_load_stats_keys) { | 3005 | | // Remove all versions of tablet load stats for recycled tablet | 3006 | 6 | LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k)); | 3007 | 6 | versioned_remove_all(txn.get(), k); | 3008 | 6 | } | 3009 | 6 | for (auto& k : versioned_meta_tablet_keys) { | 3010 | | // Remove all versions of meta tablet for recycled tablet | 3011 | 6 | LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k)); | 3012 | 6 | versioned_remove_all(txn.get(), k); | 3013 | 6 | } | 3014 | 5 | } | 3015 | 239 | for (auto& k : tablet_idx_keys) { | 3016 | 239 | txn->remove(k); | 3017 | 239 | } | 3018 | 239 | for (auto& k : restore_job_keys) { | 3019 | 239 | txn->remove(k); | 3020 | 239 | } | 3021 | 44 | for (auto& k : init_rs_keys) { | 3022 | 0 | txn->remove(k); | 3023 | 0 | } | 3024 | 44 | if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) { | 3025 | 0 | LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_ | 3026 | 0 | << ", err=" << err; | 3027 | 0 | return -1; | 3028 | 0 | } | 3029 | 44 | return 0; | 3030 | 44 | }; |
|
3031 | | |
3032 | 49 | int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(recycle_func), |
3033 | 49 | std::move(loop_done)); |
3034 | 49 | if (ret != 0) { |
3035 | 3 | LOG(WARNING) << "failed to scan_and_recycle, instance_id=" << instance_id_; |
3036 | 3 | return ret; |
3037 | 3 | } |
3038 | | |
3039 | | // directly remove tablet stats and tablet jobs of these dropped index or partition |
3040 | 46 | std::unique_ptr<Transaction> txn; |
3041 | 46 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { |
3042 | 0 | LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_; |
3043 | 0 | return -1; |
3044 | 0 | } |
3045 | 46 | txn->remove(stats_key_begin, stats_key_end); |
3046 | 46 | LOG(WARNING) << "remove stats kv, begin=" << hex(stats_key_begin) |
3047 | 46 | << " end=" << hex(stats_key_end); |
3048 | 46 | txn->remove(job_key_begin, job_key_end); |
3049 | 46 | LOG(WARNING) << "remove job kv, begin=" << hex(job_key_begin) << " end=" << hex(job_key_end); |
3050 | 46 | std::string schema_key_begin, schema_key_end; |
3051 | 46 | std::string schema_dict_key; |
3052 | 46 | std::string versioned_schema_key_begin, versioned_schema_key_end; |
3053 | 46 | if (partition_id <= 0) { |
3054 | | // Delete schema kv of this index |
3055 | 14 | meta_schema_key({instance_id_, index_id, 0}, &schema_key_begin); |
3056 | 14 | meta_schema_key({instance_id_, index_id + 1, 0}, &schema_key_end); |
3057 | 14 | txn->remove(schema_key_begin, schema_key_end); |
3058 | 14 | LOG(WARNING) << "remove schema kv, begin=" << hex(schema_key_begin) |
3059 | 14 | << " end=" << hex(schema_key_end); |
3060 | 14 | meta_schema_pb_dictionary_key({instance_id_, index_id}, &schema_dict_key); |
3061 | 14 | txn->remove(schema_dict_key); |
3062 | 14 | LOG(WARNING) << "remove schema dict kv, key=" << hex(schema_dict_key); |
3063 | 14 | versioned::meta_schema_key({instance_id_, index_id, 0}, &versioned_schema_key_begin); |
3064 | 14 | versioned::meta_schema_key({instance_id_, index_id + 1, 0}, &versioned_schema_key_end); |
3065 | 14 | txn->remove(versioned_schema_key_begin, versioned_schema_key_end); |
3066 | 14 | LOG(WARNING) << "remove versioned schema kv, begin=" << hex(versioned_schema_key_begin) |
3067 | 14 | << " end=" << hex(versioned_schema_key_end); |
3068 | 14 | } |
3069 | | |
3070 | 46 | TxnErrorCode err = txn->commit(); |
3071 | 46 | if (err != TxnErrorCode::TXN_OK) { |
3072 | 0 | LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_ |
3073 | 0 | << " err=" << err; |
3074 | 0 | return -1; |
3075 | 0 | } |
3076 | | |
3077 | 46 | return ret; |
3078 | 46 | } |
3079 | | |
3080 | 5.61k | int InstanceRecycler::delete_rowset_data(const RowsetMetaCloudPB& rs_meta_pb) { |
3081 | 5.61k | TEST_SYNC_POINT_RETURN_WITH_VALUE("delete_rowset_data::bypass_check", true); |
3082 | 5.61k | int64_t num_segments = rs_meta_pb.num_segments(); |
3083 | 5.61k | if (num_segments <= 0) return 0; |
3084 | | |
3085 | 5.61k | std::vector<std::string> file_paths; |
3086 | 5.61k | if (decrement_packed_file_ref_counts(rs_meta_pb) != 0) { |
3087 | 0 | return -1; |
3088 | 0 | } |
3089 | | |
3090 | | // Process inverted indexes |
3091 | 5.61k | std::vector<std::pair<int64_t, std::string>> index_ids; |
3092 | | // default format as v1. |
3093 | 5.61k | InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1; |
3094 | 5.61k | bool delete_rowset_data_by_prefix = false; |
3095 | 5.61k | if (rs_meta_pb.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) { |
3096 | | // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data |
3097 | | // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix |
3098 | 0 | delete_rowset_data_by_prefix = true; |
3099 | 5.61k | } else if (rs_meta_pb.has_tablet_schema()) { |
3100 | 10.0k | for (const auto& index : rs_meta_pb.tablet_schema().index()) { |
3101 | 10.0k | if (index.has_index_type() && index.index_type() == IndexType::INVERTED) { |
3102 | 10.0k | index_ids.emplace_back(index.index_id(), index.index_suffix_name()); |
3103 | 10.0k | } |
3104 | 10.0k | } |
3105 | 4.80k | if (rs_meta_pb.tablet_schema().has_inverted_index_storage_format()) { |
3106 | 2.00k | index_format = rs_meta_pb.tablet_schema().inverted_index_storage_format(); |
3107 | 2.00k | } |
3108 | 4.80k | } else if (!rs_meta_pb.has_index_id() || !rs_meta_pb.has_schema_version()) { |
3109 | | // schema version and index id are not found, delete rowset data by prefix directly. |
3110 | 0 | delete_rowset_data_by_prefix = true; |
3111 | 809 | } else { |
3112 | | // otherwise, try to get schema kv |
3113 | 809 | InvertedIndexInfo index_info; |
3114 | 809 | int inverted_index_get_ret = inverted_index_id_cache_->get( |
3115 | 809 | rs_meta_pb.index_id(), rs_meta_pb.schema_version(), index_info); |
3116 | 809 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset", |
3117 | 809 | &inverted_index_get_ret); |
3118 | 809 | if (inverted_index_get_ret == 0) { |
3119 | 809 | index_format = index_info.first; |
3120 | 809 | index_ids = index_info.second; |
3121 | 809 | } else if (inverted_index_get_ret == 1) { |
3122 | | // 1. Schema kv not found means tablet has been recycled |
3123 | | // Maybe some tablet recycle failed by some bugs |
3124 | | // We need to delete again to double check |
3125 | | // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes, |
3126 | | // because we are uncertain about the inverted index information. |
3127 | | // If there are inverted indexes, some data might not be deleted, |
3128 | | // but this is acceptable as we have made our best effort to delete the data. |
3129 | 0 | LOG_INFO( |
3130 | 0 | "delete rowset data schema kv not found, need to delete again to double " |
3131 | 0 | "check") |
3132 | 0 | .tag("instance_id", instance_id_) |
3133 | 0 | .tag("tablet_id", rs_meta_pb.tablet_id()) |
3134 | 0 | .tag("rowset", rs_meta_pb.ShortDebugString()); |
3135 | | // Currently index_ids is guaranteed to be empty, |
3136 | | // but we clear it again here as a safeguard against future code changes |
3137 | | // that might cause index_ids to no longer be empty |
3138 | 0 | index_format = InvertedIndexStorageFormatPB::V2; |
3139 | 0 | index_ids.clear(); |
3140 | 0 | } else { |
3141 | | // failed to get schema kv, delete rowset data by prefix directly. |
3142 | 0 | delete_rowset_data_by_prefix = true; |
3143 | 0 | } |
3144 | 809 | } |
3145 | | |
3146 | 5.61k | if (delete_rowset_data_by_prefix) { |
3147 | 0 | return delete_rowset_data(rs_meta_pb.resource_id(), rs_meta_pb.tablet_id(), |
3148 | 0 | rs_meta_pb.rowset_id_v2()); |
3149 | 0 | } |
3150 | | |
3151 | 5.61k | auto it = accessor_map_.find(rs_meta_pb.resource_id()); |
3152 | 5.61k | if (it == accessor_map_.end()) { |
3153 | 1.59k | LOG_WARNING("instance has no such resource id") |
3154 | 1.59k | .tag("instance_id", instance_id_) |
3155 | 1.59k | .tag("resource_id", rs_meta_pb.resource_id()); |
3156 | 1.59k | return -1; |
3157 | 1.59k | } |
3158 | 4.01k | auto& accessor = it->second; |
3159 | | |
3160 | 4.01k | int64_t tablet_id = rs_meta_pb.tablet_id(); |
3161 | 4.01k | const auto& rowset_id = rs_meta_pb.rowset_id_v2(); |
3162 | 24.0k | for (int64_t i = 0; i < num_segments; ++i) { |
3163 | 20.0k | file_paths.push_back(segment_path(tablet_id, rowset_id, i)); |
3164 | 20.0k | if (index_format == InvertedIndexStorageFormatPB::V1) { |
3165 | 40.0k | for (const auto& index_id : index_ids) { |
3166 | 40.0k | file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i, index_id.first, |
3167 | 40.0k | index_id.second)); |
3168 | 40.0k | } |
3169 | 20.0k | } else if (!index_ids.empty()) { |
3170 | 0 | file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i)); |
3171 | 0 | } |
3172 | 20.0k | } |
3173 | | |
3174 | | // Process delete bitmap - check where it's stored. |
3175 | 4.01k | DeleteBitmapStorageType delete_bitmap_storage_type = DeleteBitmapStorageType::NOT_FOUND; |
3176 | 4.01k | if (decrement_delete_bitmap_packed_file_ref_counts(tablet_id, rowset_id, |
3177 | 4.01k | &delete_bitmap_storage_type) != 0) { |
3178 | 0 | LOG_WARNING("failed to decrement delete bitmap packed file ref count") |
3179 | 0 | .tag("instance_id", instance_id_) |
3180 | 0 | .tag("tablet_id", tablet_id) |
3181 | 0 | .tag("rowset_id", rowset_id); |
3182 | 0 | return -1; |
3183 | 0 | } |
3184 | 4.01k | if (delete_bitmap_storage_type == DeleteBitmapStorageType::STANDALONE_FILE) { |
3185 | 2.00k | file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id)); |
3186 | 2.00k | } |
3187 | | // TODO(AlexYue): seems could do do batch |
3188 | 4.01k | return accessor->delete_files(file_paths); |
3189 | 4.01k | } |
3190 | | |
3191 | 62.3k | int InstanceRecycler::decrement_packed_file_ref_counts(const doris::RowsetMetaCloudPB& rs_meta_pb) { |
3192 | 62.3k | LOG_INFO("begin process_packed_file_location_index") |
3193 | 62.3k | .tag("instance_id", instance_id_) |
3194 | 62.3k | .tag("tablet_id", rs_meta_pb.tablet_id()) |
3195 | 62.3k | .tag("rowset_id", rs_meta_pb.rowset_id_v2()) |
3196 | 62.3k | .tag("index_map_size", rs_meta_pb.packed_slice_locations_size()); |
3197 | 62.3k | const auto& index_map = rs_meta_pb.packed_slice_locations(); |
3198 | 62.3k | if (index_map.empty()) { |
3199 | 62.3k | LOG_INFO("skip merge file update: empty merge_file_segment_index") |
3200 | 62.3k | .tag("instance_id", instance_id_) |
3201 | 62.3k | .tag("tablet_id", rs_meta_pb.tablet_id()) |
3202 | 62.3k | .tag("rowset_id", rs_meta_pb.rowset_id_v2()); |
3203 | 62.3k | return 0; |
3204 | 62.3k | } |
3205 | | |
3206 | 10 | struct PackedSmallFileInfo { |
3207 | 10 | std::string small_file_path; |
3208 | 10 | }; |
3209 | 10 | std::unordered_map<std::string, std::vector<PackedSmallFileInfo>> packed_file_updates; |
3210 | 10 | packed_file_updates.reserve(index_map.size()); |
3211 | 27 | for (const auto& [small_path, index_pb] : index_map) { |
3212 | 27 | if (!index_pb.has_packed_file_path() || index_pb.packed_file_path().empty()) { |
3213 | 0 | continue; |
3214 | 0 | } |
3215 | 27 | packed_file_updates[index_pb.packed_file_path()].push_back( |
3216 | 27 | PackedSmallFileInfo {small_path}); |
3217 | 27 | } |
3218 | 10 | if (packed_file_updates.empty()) { |
3219 | 0 | LOG_INFO("skip packed file update: no valid merge_file_path in merge_file_segment_index") |
3220 | 0 | .tag("instance_id", instance_id_) |
3221 | 0 | .tag("tablet_id", rs_meta_pb.tablet_id()) |
3222 | 0 | .tag("rowset_id", rs_meta_pb.rowset_id_v2()) |
3223 | 0 | .tag("index_map_size", index_map.size()); |
3224 | 0 | return 0; |
3225 | 0 | } |
3226 | | |
3227 | 10 | const int max_retry_times = std::max(1, config::decrement_packed_file_ref_counts_retry_times); |
3228 | 10 | int ret = 0; |
3229 | 24 | for (auto& [packed_file_path, small_files] : packed_file_updates) { |
3230 | 24 | if (small_files.empty()) { |
3231 | 0 | continue; |
3232 | 0 | } |
3233 | | |
3234 | 24 | bool success = false; |
3235 | 24 | for (int attempt = 1; attempt <= max_retry_times; ++attempt) { |
3236 | 24 | std::unique_ptr<Transaction> txn; |
3237 | 24 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
3238 | 24 | if (err != TxnErrorCode::TXN_OK) { |
3239 | 0 | LOG_WARNING("failed to create txn when updating packed file ref count") |
3240 | 0 | .tag("instance_id", instance_id_) |
3241 | 0 | .tag("packed_file_path", packed_file_path) |
3242 | 0 | .tag("rowset_id", rs_meta_pb.rowset_id_v2()) |
3243 | 0 | .tag("tablet_id", rs_meta_pb.tablet_id()) |
3244 | 0 | .tag("err", err); |
3245 | 0 | ret = -1; |
3246 | 0 | break; |
3247 | 0 | } |
3248 | | |
3249 | 24 | std::string packed_key = packed_file_key({instance_id_, packed_file_path}); |
3250 | 24 | std::string packed_val; |
3251 | 24 | err = txn->get(packed_key, &packed_val); |
3252 | 24 | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { |
3253 | 0 | LOG_WARNING("packed file info not found when recycling rowset") |
3254 | 0 | .tag("instance_id", instance_id_) |
3255 | 0 | .tag("packed_file_path", packed_file_path) |
3256 | 0 | .tag("rowset_id", rs_meta_pb.rowset_id_v2()) |
3257 | 0 | .tag("tablet_id", rs_meta_pb.tablet_id()) |
3258 | 0 | .tag("key", hex(packed_key)) |
3259 | 0 | .tag("tablet id", rs_meta_pb.tablet_id()); |
3260 | | // Skip this packed file entry and continue with others |
3261 | 0 | success = true; |
3262 | 0 | break; |
3263 | 0 | } |
3264 | 24 | if (err != TxnErrorCode::TXN_OK) { |
3265 | 0 | LOG_WARNING("failed to get packed file info when recycling rowset") |
3266 | 0 | .tag("instance_id", instance_id_) |
3267 | 0 | .tag("packed_file_path", packed_file_path) |
3268 | 0 | .tag("rowset_id", rs_meta_pb.rowset_id_v2()) |
3269 | 0 | .tag("tablet_id", rs_meta_pb.tablet_id()) |
3270 | 0 | .tag("err", err); |
3271 | 0 | ret = -1; |
3272 | 0 | break; |
3273 | 0 | } |
3274 | | |
3275 | 24 | cloud::PackedFileInfoPB packed_info; |
3276 | 24 | if (!packed_info.ParseFromString(packed_val)) { |
3277 | 0 | LOG_WARNING("failed to parse packed file info when recycling rowset") |
3278 | 0 | .tag("instance_id", instance_id_) |
3279 | 0 | .tag("packed_file_path", packed_file_path) |
3280 | 0 | .tag("rowset_id", rs_meta_pb.rowset_id_v2()) |
3281 | 0 | .tag("tablet_id", rs_meta_pb.tablet_id()); |
3282 | 0 | ret = -1; |
3283 | 0 | break; |
3284 | 0 | } |
3285 | | |
3286 | 24 | LOG_INFO("packed file update check") |
3287 | 24 | .tag("instance_id", instance_id_) |
3288 | 24 | .tag("rowset_id", rs_meta_pb.rowset_id_v2()) |
3289 | 24 | .tag("tablet_id", rs_meta_pb.tablet_id()) |
3290 | 24 | .tag("merged_file_path", packed_file_path) |
3291 | 24 | .tag("requested_small_files", small_files.size()) |
3292 | 24 | .tag("merge_entries", packed_info.slices_size()); |
3293 | | |
3294 | 24 | auto* small_file_entries = packed_info.mutable_slices(); |
3295 | 24 | int64_t changed_files = 0; |
3296 | 24 | int64_t missing_entries = 0; |
3297 | 24 | int64_t already_deleted = 0; |
3298 | 27 | for (const auto& small_file_info : small_files) { |
3299 | 27 | bool found = false; |
3300 | 87 | for (auto& small_file_entry : *small_file_entries) { |
3301 | 87 | if (small_file_entry.path() == small_file_info.small_file_path) { |
3302 | 27 | if (!small_file_entry.deleted()) { |
3303 | 27 | small_file_entry.set_deleted(true); |
3304 | 27 | if (!small_file_entry.corrected()) { |
3305 | 27 | small_file_entry.set_corrected(true); |
3306 | 27 | } |
3307 | 27 | ++changed_files; |
3308 | 27 | } else { |
3309 | 0 | ++already_deleted; |
3310 | 0 | } |
3311 | 27 | found = true; |
3312 | 27 | break; |
3313 | 27 | } |
3314 | 87 | } |
3315 | 27 | if (!found) { |
3316 | 0 | ++missing_entries; |
3317 | 0 | LOG_WARNING("packed file info missing small file entry") |
3318 | 0 | .tag("instance_id", instance_id_) |
3319 | 0 | .tag("packed_file_path", packed_file_path) |
3320 | 0 | .tag("small_file_path", small_file_info.small_file_path) |
3321 | 0 | .tag("rowset_id", rs_meta_pb.rowset_id_v2()) |
3322 | 0 | .tag("tablet_id", rs_meta_pb.tablet_id()); |
3323 | 0 | } |
3324 | 27 | } |
3325 | | |
3326 | 24 | if (changed_files == 0) { |
3327 | 0 | LOG_INFO("skip merge file update: no merge entries changed") |
3328 | 0 | .tag("instance_id", instance_id_) |
3329 | 0 | .tag("rowset_id", rs_meta_pb.rowset_id_v2()) |
3330 | 0 | .tag("tablet_id", rs_meta_pb.tablet_id()) |
3331 | 0 | .tag("merged_file_path", packed_file_path) |
3332 | 0 | .tag("missing_entries", missing_entries) |
3333 | 0 | .tag("already_deleted", already_deleted) |
3334 | 0 | .tag("requested_small_files", small_files.size()) |
3335 | 0 | .tag("merge_entries", packed_info.slices_size()); |
3336 | 0 | success = true; |
3337 | 0 | break; |
3338 | 0 | } |
3339 | | |
3340 | | // Calculate remaining files |
3341 | 24 | int64_t left_file_count = 0; |
3342 | 24 | int64_t left_file_bytes = 0; |
3343 | 141 | for (const auto& small_file_entry : packed_info.slices()) { |
3344 | 141 | if (!small_file_entry.deleted()) { |
3345 | 57 | ++left_file_count; |
3346 | 57 | left_file_bytes += small_file_entry.size(); |
3347 | 57 | } |
3348 | 141 | } |
3349 | 24 | packed_info.set_remaining_slice_bytes(left_file_bytes); |
3350 | 24 | packed_info.set_ref_cnt(left_file_count); |
3351 | 24 | LOG_INFO("updated packed file reference info") |
3352 | 24 | .tag("instance_id", instance_id_) |
3353 | 24 | .tag("rowset_id", rs_meta_pb.rowset_id_v2()) |
3354 | 24 | .tag("tablet_id", rs_meta_pb.tablet_id()) |
3355 | 24 | .tag("packed_file_path", packed_file_path) |
3356 | 24 | .tag("ref_cnt", left_file_count) |
3357 | 24 | .tag("left_file_bytes", left_file_bytes); |
3358 | | |
3359 | 24 | if (left_file_count == 0) { |
3360 | 7 | packed_info.set_state(cloud::PackedFileInfoPB::RECYCLING); |
3361 | 7 | } |
3362 | | |
3363 | 24 | std::string updated_val; |
3364 | 24 | if (!packed_info.SerializeToString(&updated_val)) { |
3365 | 0 | LOG_WARNING("failed to serialize packed file info when recycling rowset") |
3366 | 0 | .tag("instance_id", instance_id_) |
3367 | 0 | .tag("packed_file_path", packed_file_path) |
3368 | 0 | .tag("rowset_id", rs_meta_pb.rowset_id_v2()) |
3369 | 0 | .tag("tablet_id", rs_meta_pb.tablet_id()); |
3370 | 0 | ret = -1; |
3371 | 0 | break; |
3372 | 0 | } |
3373 | | |
3374 | 24 | txn->put(packed_key, updated_val); |
3375 | 24 | err = txn->commit(); |
3376 | 24 | if (err == TxnErrorCode::TXN_OK) { |
3377 | 24 | success = true; |
3378 | 24 | if (left_file_count == 0) { |
3379 | 7 | LOG_INFO("packed file ready to delete, deleting immediately") |
3380 | 7 | .tag("instance_id", instance_id_) |
3381 | 7 | .tag("packed_file_path", packed_file_path); |
3382 | 7 | if (delete_packed_file_and_kv(packed_file_path, packed_key, packed_info) != 0) { |
3383 | 0 | ret = -1; |
3384 | 0 | } |
3385 | 7 | } |
3386 | 24 | break; |
3387 | 24 | } |
3388 | 0 | if (err == TxnErrorCode::TXN_CONFLICT) { |
3389 | 0 | if (attempt >= max_retry_times) { |
3390 | 0 | LOG_WARNING("packed file info update conflict after max retry") |
3391 | 0 | .tag("instance_id", instance_id_) |
3392 | 0 | .tag("packed_file_path", packed_file_path) |
3393 | 0 | .tag("rowset_id", rs_meta_pb.rowset_id_v2()) |
3394 | 0 | .tag("tablet_id", rs_meta_pb.tablet_id()) |
3395 | 0 | .tag("changed_files", changed_files) |
3396 | 0 | .tag("attempt", attempt); |
3397 | 0 | ret = -1; |
3398 | 0 | break; |
3399 | 0 | } |
3400 | 0 | LOG_WARNING("packed file info update conflict, retrying") |
3401 | 0 | .tag("instance_id", instance_id_) |
3402 | 0 | .tag("packed_file_path", packed_file_path) |
3403 | 0 | .tag("rowset_id", rs_meta_pb.rowset_id_v2()) |
3404 | 0 | .tag("tablet_id", rs_meta_pb.tablet_id()) |
3405 | 0 | .tag("changed_files", changed_files) |
3406 | 0 | .tag("attempt", attempt); |
3407 | 0 | sleep_for_packed_file_retry(); |
3408 | 0 | continue; |
3409 | 0 | } |
3410 | | |
3411 | 0 | LOG_WARNING("failed to commit packed file info update") |
3412 | 0 | .tag("instance_id", instance_id_) |
3413 | 0 | .tag("packed_file_path", packed_file_path) |
3414 | 0 | .tag("rowset_id", rs_meta_pb.rowset_id_v2()) |
3415 | 0 | .tag("tablet_id", rs_meta_pb.tablet_id()) |
3416 | 0 | .tag("err", err) |
3417 | 0 | .tag("changed_files", changed_files); |
3418 | 0 | ret = -1; |
3419 | 0 | break; |
3420 | 0 | } |
3421 | | |
3422 | 24 | if (!success) { |
3423 | 0 | ret = -1; |
3424 | 0 | } |
3425 | 24 | } |
3426 | | |
3427 | 10 | return ret; |
3428 | 10 | } |
3429 | | |
3430 | | int InstanceRecycler::decrement_delete_bitmap_packed_file_ref_counts( |
3431 | | int64_t tablet_id, const std::string& rowset_id, |
3432 | 58.2k | DeleteBitmapStorageType* out_storage_type) { |
3433 | 58.2k | if (out_storage_type) { |
3434 | 58.2k | *out_storage_type = DeleteBitmapStorageType::NOT_FOUND; |
3435 | 58.2k | } |
3436 | | |
3437 | | // Get delete bitmap storage info from FDB |
3438 | 58.2k | std::string dbm_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id}); |
3439 | 58.2k | std::unique_ptr<Transaction> txn; |
3440 | 58.2k | TxnErrorCode err = txn_kv_->create_txn(&txn); |
3441 | 58.2k | if (err != TxnErrorCode::TXN_OK) { |
3442 | 0 | LOG_WARNING("failed to create txn when getting delete bitmap storage") |
3443 | 0 | .tag("instance_id", instance_id_) |
3444 | 0 | .tag("tablet_id", tablet_id) |
3445 | 0 | .tag("rowset_id", rowset_id) |
3446 | 0 | .tag("err", err); |
3447 | 0 | return -1; |
3448 | 0 | } |
3449 | | |
3450 | 58.2k | std::string dbm_val; |
3451 | 58.2k | err = txn->get(dbm_key, &dbm_val); |
3452 | 58.2k | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { |
3453 | | // No delete bitmap for this rowset, nothing to do |
3454 | 4.63k | LOG_INFO("delete bitmap not found, skip packed file ref count decrement") |
3455 | 4.63k | .tag("instance_id", instance_id_) |
3456 | 4.63k | .tag("tablet_id", tablet_id) |
3457 | 4.63k | .tag("rowset_id", rowset_id); |
3458 | 4.63k | return 0; |
3459 | 4.63k | } |
3460 | 53.5k | if (err != TxnErrorCode::TXN_OK) { |
3461 | 0 | LOG_WARNING("failed to get delete bitmap storage") |
3462 | 0 | .tag("instance_id", instance_id_) |
3463 | 0 | .tag("tablet_id", tablet_id) |
3464 | 0 | .tag("rowset_id", rowset_id) |
3465 | 0 | .tag("err", err); |
3466 | 0 | return -1; |
3467 | 0 | } |
3468 | | |
3469 | 53.5k | DeleteBitmapStoragePB storage; |
3470 | 53.5k | if (!storage.ParseFromString(dbm_val)) { |
3471 | 0 | LOG_WARNING("failed to parse delete bitmap storage") |
3472 | 0 | .tag("instance_id", instance_id_) |
3473 | 0 | .tag("tablet_id", tablet_id) |
3474 | 0 | .tag("rowset_id", rowset_id); |
3475 | 0 | return -1; |
3476 | 0 | } |
3477 | | |
3478 | 53.5k | if (storage.store_in_fdb()) { |
3479 | 0 | if (out_storage_type) { |
3480 | 0 | *out_storage_type = DeleteBitmapStorageType::IN_FDB; |
3481 | 0 | } |
3482 | 0 | return 0; |
3483 | 0 | } |
3484 | | |
3485 | | // Check if delete bitmap is stored in standalone file. |
3486 | 53.5k | if (!storage.has_packed_slice_location() || |
3487 | 53.5k | storage.packed_slice_location().packed_file_path().empty()) { |
3488 | 53.5k | if (out_storage_type) { |
3489 | 53.5k | *out_storage_type = DeleteBitmapStorageType::STANDALONE_FILE; |
3490 | 53.5k | } |
3491 | 53.5k | return 0; |
3492 | 53.5k | } |
3493 | | |
3494 | 18.4E | if (out_storage_type) { |
3495 | 0 | *out_storage_type = DeleteBitmapStorageType::PACKED_FILE; |
3496 | 0 | } |
3497 | | |
3498 | 18.4E | const auto& packed_loc = storage.packed_slice_location(); |
3499 | 18.4E | const std::string& packed_file_path = packed_loc.packed_file_path(); |
3500 | | |
3501 | 18.4E | LOG_INFO("decrementing delete bitmap packed file ref count") |
3502 | 18.4E | .tag("instance_id", instance_id_) |
3503 | 18.4E | .tag("tablet_id", tablet_id) |
3504 | 18.4E | .tag("rowset_id", rowset_id) |
3505 | 18.4E | .tag("packed_file_path", packed_file_path); |
3506 | | |
3507 | 18.4E | const int max_retry_times = std::max(1, config::decrement_packed_file_ref_counts_retry_times); |
3508 | 18.4E | for (int attempt = 1; attempt <= max_retry_times; ++attempt) { |
3509 | 0 | std::unique_ptr<Transaction> update_txn; |
3510 | 0 | err = txn_kv_->create_txn(&update_txn); |
3511 | 0 | if (err != TxnErrorCode::TXN_OK) { |
3512 | 0 | LOG_WARNING("failed to create txn for delete bitmap packed file update") |
3513 | 0 | .tag("instance_id", instance_id_) |
3514 | 0 | .tag("tablet_id", tablet_id) |
3515 | 0 | .tag("rowset_id", rowset_id) |
3516 | 0 | .tag("err", err); |
3517 | 0 | return -1; |
3518 | 0 | } |
3519 | | |
3520 | 0 | std::string packed_key = packed_file_key({instance_id_, packed_file_path}); |
3521 | 0 | std::string packed_val; |
3522 | 0 | err = update_txn->get(packed_key, &packed_val); |
3523 | 0 | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { |
3524 | 0 | LOG_WARNING("packed file info not found for delete bitmap") |
3525 | 0 | .tag("instance_id", instance_id_) |
3526 | 0 | .tag("tablet_id", tablet_id) |
3527 | 0 | .tag("rowset_id", rowset_id) |
3528 | 0 | .tag("packed_file_path", packed_file_path); |
3529 | 0 | return 0; |
3530 | 0 | } |
3531 | 0 | if (err != TxnErrorCode::TXN_OK) { |
3532 | 0 | LOG_WARNING("failed to get packed file info for delete bitmap") |
3533 | 0 | .tag("instance_id", instance_id_) |
3534 | 0 | .tag("tablet_id", tablet_id) |
3535 | 0 | .tag("rowset_id", rowset_id) |
3536 | 0 | .tag("packed_file_path", packed_file_path) |
3537 | 0 | .tag("err", err); |
3538 | 0 | return -1; |
3539 | 0 | } |
3540 | | |
3541 | 0 | cloud::PackedFileInfoPB packed_info; |
3542 | 0 | if (!packed_info.ParseFromString(packed_val)) { |
3543 | 0 | LOG_WARNING("failed to parse packed file info for delete bitmap") |
3544 | 0 | .tag("instance_id", instance_id_) |
3545 | 0 | .tag("tablet_id", tablet_id) |
3546 | 0 | .tag("rowset_id", rowset_id) |
3547 | 0 | .tag("packed_file_path", packed_file_path); |
3548 | 0 | return -1; |
3549 | 0 | } |
3550 | | |
3551 | | // Find and mark the small file entry as deleted |
3552 | | // Use tablet_id and rowset_id to match entry instead of path, |
3553 | | // because path format may vary with path_version (with or without shard prefix) |
3554 | 0 | auto* entries = packed_info.mutable_slices(); |
3555 | 0 | bool found = false; |
3556 | 0 | bool already_deleted = false; |
3557 | 0 | for (auto& entry : *entries) { |
3558 | 0 | if (entry.tablet_id() == tablet_id && entry.rowset_id() == rowset_id) { |
3559 | 0 | if (!entry.deleted()) { |
3560 | 0 | entry.set_deleted(true); |
3561 | 0 | if (!entry.corrected()) { |
3562 | 0 | entry.set_corrected(true); |
3563 | 0 | } |
3564 | 0 | } else { |
3565 | 0 | already_deleted = true; |
3566 | 0 | } |
3567 | 0 | found = true; |
3568 | 0 | break; |
3569 | 0 | } |
3570 | 0 | } |
3571 | |
|
3572 | 0 | if (!found) { |
3573 | 0 | LOG_WARNING("delete bitmap entry not found in packed file") |
3574 | 0 | .tag("instance_id", instance_id_) |
3575 | 0 | .tag("tablet_id", tablet_id) |
3576 | 0 | .tag("rowset_id", rowset_id) |
3577 | 0 | .tag("packed_file_path", packed_file_path); |
3578 | 0 | return 0; |
3579 | 0 | } |
3580 | | |
3581 | 0 | if (already_deleted) { |
3582 | 0 | LOG_INFO("delete bitmap entry already deleted in packed file") |
3583 | 0 | .tag("instance_id", instance_id_) |
3584 | 0 | .tag("tablet_id", tablet_id) |
3585 | 0 | .tag("rowset_id", rowset_id) |
3586 | 0 | .tag("packed_file_path", packed_file_path); |
3587 | 0 | return 0; |
3588 | 0 | } |
3589 | | |
3590 | | // Calculate remaining files |
3591 | 0 | int64_t left_file_count = 0; |
3592 | 0 | int64_t left_file_bytes = 0; |
3593 | 0 | for (const auto& entry : packed_info.slices()) { |
3594 | 0 | if (!entry.deleted()) { |
3595 | 0 | ++left_file_count; |
3596 | 0 | left_file_bytes += entry.size(); |
3597 | 0 | } |
3598 | 0 | } |
3599 | 0 | packed_info.set_remaining_slice_bytes(left_file_bytes); |
3600 | 0 | packed_info.set_ref_cnt(left_file_count); |
3601 | |
|
3602 | 0 | if (left_file_count == 0) { |
3603 | 0 | packed_info.set_state(cloud::PackedFileInfoPB::RECYCLING); |
3604 | 0 | } |
3605 | |
|
3606 | 0 | std::string updated_val; |
3607 | 0 | if (!packed_info.SerializeToString(&updated_val)) { |
3608 | 0 | LOG_WARNING("failed to serialize packed file info for delete bitmap") |
3609 | 0 | .tag("instance_id", instance_id_) |
3610 | 0 | .tag("tablet_id", tablet_id) |
3611 | 0 | .tag("rowset_id", rowset_id) |
3612 | 0 | .tag("packed_file_path", packed_file_path); |
3613 | 0 | return -1; |
3614 | 0 | } |
3615 | | |
3616 | 0 | update_txn->put(packed_key, updated_val); |
3617 | 0 | err = update_txn->commit(); |
3618 | 0 | if (err == TxnErrorCode::TXN_OK) { |
3619 | 0 | LOG_INFO("delete bitmap packed file ref count decremented") |
3620 | 0 | .tag("instance_id", instance_id_) |
3621 | 0 | .tag("tablet_id", tablet_id) |
3622 | 0 | .tag("rowset_id", rowset_id) |
3623 | 0 | .tag("packed_file_path", packed_file_path) |
3624 | 0 | .tag("left_file_count", left_file_count); |
3625 | 0 | if (left_file_count == 0) { |
3626 | 0 | if (delete_packed_file_and_kv(packed_file_path, packed_key, packed_info) != 0) { |
3627 | 0 | return -1; |
3628 | 0 | } |
3629 | 0 | } |
3630 | 0 | return 0; |
3631 | 0 | } |
3632 | 0 | if (err == TxnErrorCode::TXN_CONFLICT) { |
3633 | 0 | if (attempt >= max_retry_times) { |
3634 | 0 | LOG_WARNING("delete bitmap packed file update conflict after max retry") |
3635 | 0 | .tag("instance_id", instance_id_) |
3636 | 0 | .tag("tablet_id", tablet_id) |
3637 | 0 | .tag("rowset_id", rowset_id) |
3638 | 0 | .tag("packed_file_path", packed_file_path) |
3639 | 0 | .tag("attempt", attempt); |
3640 | 0 | return -1; |
3641 | 0 | } |
3642 | 0 | sleep_for_packed_file_retry(); |
3643 | 0 | continue; |
3644 | 0 | } |
3645 | | |
3646 | 0 | LOG_WARNING("failed to commit delete bitmap packed file update") |
3647 | 0 | .tag("instance_id", instance_id_) |
3648 | 0 | .tag("tablet_id", tablet_id) |
3649 | 0 | .tag("rowset_id", rowset_id) |
3650 | 0 | .tag("packed_file_path", packed_file_path) |
3651 | 0 | .tag("err", err); |
3652 | 0 | return -1; |
3653 | 0 | } |
3654 | | |
3655 | 18.4E | return -1; |
3656 | 18.4E | } |
3657 | | |
3658 | | int InstanceRecycler::delete_packed_file_and_kv(const std::string& packed_file_path, |
3659 | | const std::string& packed_key, |
3660 | 7 | const cloud::PackedFileInfoPB& packed_info) { |
3661 | 7 | if (!packed_info.has_resource_id() || packed_info.resource_id().empty()) { |
3662 | 0 | LOG_WARNING("packed file missing resource id when recycling") |
3663 | 0 | .tag("instance_id", instance_id_) |
3664 | 0 | .tag("packed_file_path", packed_file_path); |
3665 | 0 | return -1; |
3666 | 0 | } |
3667 | | |
3668 | 7 | auto [resource_id, accessor] = resolve_packed_file_accessor(packed_info.resource_id()); |
3669 | 7 | if (!accessor) { |
3670 | 0 | LOG_WARNING("no accessor available to delete packed file") |
3671 | 0 | .tag("instance_id", instance_id_) |
3672 | 0 | .tag("packed_file_path", packed_file_path) |
3673 | 0 | .tag("resource_id", packed_info.resource_id()); |
3674 | 0 | return -1; |
3675 | 0 | } |
3676 | | |
3677 | 7 | int del_ret = accessor->delete_file(packed_file_path); |
3678 | 7 | if (del_ret != 0 && del_ret != 1) { |
3679 | 0 | LOG_WARNING("failed to delete packed file") |
3680 | 0 | .tag("instance_id", instance_id_) |
3681 | 0 | .tag("packed_file_path", packed_file_path) |
3682 | 0 | .tag("resource_id", resource_id) |
3683 | 0 | .tag("ret", del_ret); |
3684 | 0 | return -1; |
3685 | 0 | } |
3686 | 7 | if (del_ret == 1) { |
3687 | 0 | LOG_INFO("packed file already removed") |
3688 | 0 | .tag("instance_id", instance_id_) |
3689 | 0 | .tag("packed_file_path", packed_file_path) |
3690 | 0 | .tag("resource_id", resource_id); |
3691 | 7 | } else { |
3692 | 7 | LOG_INFO("deleted packed file") |
3693 | 7 | .tag("instance_id", instance_id_) |
3694 | 7 | .tag("packed_file_path", packed_file_path) |
3695 | 7 | .tag("resource_id", resource_id); |
3696 | 7 | } |
3697 | | |
3698 | 7 | const int max_retry_times = std::max(1, config::packed_file_txn_retry_times); |
3699 | 7 | for (int attempt = 1; attempt <= max_retry_times; ++attempt) { |
3700 | 7 | std::unique_ptr<Transaction> del_txn; |
3701 | 7 | TxnErrorCode err = txn_kv_->create_txn(&del_txn); |
3702 | 7 | if (err != TxnErrorCode::TXN_OK) { |
3703 | 0 | LOG_WARNING("failed to create txn when removing packed file kv") |
3704 | 0 | .tag("instance_id", instance_id_) |
3705 | 0 | .tag("packed_file_path", packed_file_path) |
3706 | 0 | .tag("attempt", attempt) |
3707 | 0 | .tag("err", err); |
3708 | 0 | return -1; |
3709 | 0 | } |
3710 | | |
3711 | 7 | std::string latest_val; |
3712 | 7 | err = del_txn->get(packed_key, &latest_val); |
3713 | 7 | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { |
3714 | 0 | return 0; |
3715 | 0 | } |
3716 | 7 | if (err != TxnErrorCode::TXN_OK) { |
3717 | 0 | LOG_WARNING("failed to re-read packed file kv before removal") |
3718 | 0 | .tag("instance_id", instance_id_) |
3719 | 0 | .tag("packed_file_path", packed_file_path) |
3720 | 0 | .tag("attempt", attempt) |
3721 | 0 | .tag("err", err); |
3722 | 0 | return -1; |
3723 | 0 | } |
3724 | | |
3725 | 7 | cloud::PackedFileInfoPB latest_info; |
3726 | 7 | if (!latest_info.ParseFromString(latest_val)) { |
3727 | 0 | LOG_WARNING("failed to parse packed file info before removal") |
3728 | 0 | .tag("instance_id", instance_id_) |
3729 | 0 | .tag("packed_file_path", packed_file_path) |
3730 | 0 | .tag("attempt", attempt); |
3731 | 0 | return -1; |
3732 | 0 | } |
3733 | | |
3734 | 7 | if (!(latest_info.state() == cloud::PackedFileInfoPB::RECYCLING && |
3735 | 7 | latest_info.ref_cnt() == 0)) { |
3736 | 0 | LOG_INFO("packed file state changed before removal, skip deleting kv") |
3737 | 0 | .tag("instance_id", instance_id_) |
3738 | 0 | .tag("packed_file_path", packed_file_path) |
3739 | 0 | .tag("attempt", attempt); |
3740 | 0 | return 0; |
3741 | 0 | } |
3742 | | |
3743 | 7 | del_txn->remove(packed_key); |
3744 | 7 | err = del_txn->commit(); |
3745 | 7 | if (err == TxnErrorCode::TXN_OK) { |
3746 | 7 | LOG_INFO("removed packed file metadata") |
3747 | 7 | .tag("instance_id", instance_id_) |
3748 | 7 | .tag("packed_file_path", packed_file_path); |
3749 | 7 | return 0; |
3750 | 7 | } |
3751 | 0 | if (err == TxnErrorCode::TXN_CONFLICT) { |
3752 | 0 | if (attempt >= max_retry_times) { |
3753 | 0 | LOG_WARNING("failed to remove packed file kv due to conflict after max retry") |
3754 | 0 | .tag("instance_id", instance_id_) |
3755 | 0 | .tag("packed_file_path", packed_file_path) |
3756 | 0 | .tag("attempt", attempt); |
3757 | 0 | return -1; |
3758 | 0 | } |
3759 | 0 | LOG_WARNING("failed to remove packed file kv due to conflict, retrying") |
3760 | 0 | .tag("instance_id", instance_id_) |
3761 | 0 | .tag("packed_file_path", packed_file_path) |
3762 | 0 | .tag("attempt", attempt); |
3763 | 0 | sleep_for_packed_file_retry(); |
3764 | 0 | continue; |
3765 | 0 | } |
3766 | 0 | LOG_WARNING("failed to remove packed file kv") |
3767 | 0 | .tag("instance_id", instance_id_) |
3768 | 0 | .tag("packed_file_path", packed_file_path) |
3769 | 0 | .tag("attempt", attempt) |
3770 | 0 | .tag("err", err); |
3771 | 0 | return -1; |
3772 | 0 | } |
3773 | 0 | return -1; |
3774 | 7 | } |
3775 | | |
3776 | | int InstanceRecycler::delete_rowset_data( |
3777 | | const std::map<std::string, doris::RowsetMetaCloudPB>& rowsets, RowsetRecyclingState type, |
3778 | 98 | RecyclerMetricsContext& metrics_context) { |
3779 | 98 | int ret = 0; |
3780 | | // resource_id -> file_paths |
3781 | 98 | std::map<std::string, std::vector<std::string>> resource_file_paths; |
3782 | | // (resource_id, tablet_id, rowset_id) |
3783 | 98 | std::vector<std::tuple<std::string, int64_t, std::string>> rowsets_delete_by_prefix; |
3784 | 98 | bool is_formal_rowset = (type == RowsetRecyclingState::FORMAL_ROWSET); |
3785 | | |
3786 | 57.1k | for (const auto& [_, rs] : rowsets) { |
3787 | | // we have to treat tmp rowset as "orphans" that may not related to any existing tablets |
3788 | | // due to aborted schema change. |
3789 | 57.1k | if (is_formal_rowset) { |
3790 | 3.15k | std::lock_guard lock(recycled_tablets_mtx_); |
3791 | 3.15k | if (recycled_tablets_.count(rs.tablet_id()) && rs.packed_slice_locations_size() == 0) { |
3792 | | // Tablet has been recycled and this rowset has no packed slices, so file data |
3793 | | // should already be gone; skip to avoid redundant deletes. Rowsets with packed |
3794 | | // slice info must still run to decrement packed file ref counts. |
3795 | 0 | continue; |
3796 | 0 | } |
3797 | 3.15k | } |
3798 | | |
3799 | 57.1k | int64_t num_segments = rs.num_segments(); |
3800 | | // Check num_segments before accessor lookup, because empty rowsets |
3801 | | // (e.g. base compaction output of empty rowsets) may have no resource_id |
3802 | | // set. Skipping them early avoids a spurious "no such resource id" error |
3803 | | // that marks the entire batch as failed and prevents txn_remove from |
3804 | | // cleaning up recycle KV keys. |
3805 | 57.1k | if (num_segments <= 0) { |
3806 | 0 | metrics_context.total_recycled_num++; |
3807 | 0 | metrics_context.total_recycled_data_size += rs.total_disk_size(); |
3808 | 0 | continue; |
3809 | 0 | } |
3810 | | |
3811 | 57.1k | auto it = accessor_map_.find(rs.resource_id()); |
3812 | | // possible if the accessor is not initilized correctly |
3813 | 57.1k | if (it == accessor_map_.end()) [[unlikely]] { |
3814 | 3.00k | LOG_WARNING("instance has no such resource id") |
3815 | 3.00k | .tag("instance_id", instance_id_) |
3816 | 3.00k | .tag("resource_id", rs.resource_id()); |
3817 | 3.00k | ret = -1; |
3818 | 3.00k | continue; |
3819 | 3.00k | } |
3820 | | |
3821 | 54.1k | auto& file_paths = resource_file_paths[rs.resource_id()]; |
3822 | 54.1k | const auto& rowset_id = rs.rowset_id_v2(); |
3823 | 54.1k | int64_t tablet_id = rs.tablet_id(); |
3824 | 54.1k | LOG_INFO("recycle rowset merge index size") |
3825 | 54.1k | .tag("instance_id", instance_id_) |
3826 | 54.1k | .tag("tablet_id", tablet_id) |
3827 | 54.1k | .tag("rowset_id", rowset_id) |
3828 | 54.1k | .tag("merge_index_size", rs.packed_slice_locations_size()); |
3829 | 54.1k | if (decrement_packed_file_ref_counts(rs) != 0) { |
3830 | 0 | ret = -1; |
3831 | 0 | continue; |
3832 | 0 | } |
3833 | | |
3834 | | // Process delete bitmap - check where it's stored. |
3835 | 54.1k | DeleteBitmapStorageType delete_bitmap_storage_type = DeleteBitmapStorageType::NOT_FOUND; |
3836 | 54.1k | if (decrement_delete_bitmap_packed_file_ref_counts(tablet_id, rowset_id, |
3837 | 54.1k | &delete_bitmap_storage_type) != 0) { |
3838 | 0 | LOG_WARNING("failed to decrement delete bitmap packed file ref count") |
3839 | 0 | .tag("instance_id", instance_id_) |
3840 | 0 | .tag("tablet_id", tablet_id) |
3841 | 0 | .tag("rowset_id", rowset_id); |
3842 | 0 | ret = -1; |
3843 | 0 | continue; |
3844 | 0 | } |
3845 | 54.1k | if (delete_bitmap_storage_type == DeleteBitmapStorageType::STANDALONE_FILE) { |
3846 | 51.5k | file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id)); |
3847 | 51.5k | } |
3848 | | |
3849 | | // Process inverted indexes |
3850 | 54.1k | std::vector<std::pair<int64_t, std::string>> index_ids; |
3851 | | // default format as v1. |
3852 | 54.1k | InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1; |
3853 | 54.1k | int inverted_index_get_ret = 0; |
3854 | 54.1k | if (rs.has_tablet_schema()) { |
3855 | 53.5k | for (const auto& index : rs.tablet_schema().index()) { |
3856 | 53.5k | if (index.has_index_type() && index.index_type() == IndexType::INVERTED) { |
3857 | 53.5k | index_ids.emplace_back(index.index_id(), index.index_suffix_name()); |
3858 | 53.5k | } |
3859 | 53.5k | } |
3860 | 26.6k | if (rs.tablet_schema().has_inverted_index_storage_format()) { |
3861 | 26.5k | index_format = rs.tablet_schema().inverted_index_storage_format(); |
3862 | 26.5k | } |
3863 | 27.5k | } else { |
3864 | 27.5k | if (!rs.has_index_id() || !rs.has_schema_version()) { |
3865 | 0 | LOG(WARNING) << "rowset must have either schema or schema_version and index_id, " |
3866 | 0 | "instance_id=" |
3867 | 0 | << instance_id_ << " tablet_id=" << tablet_id |
3868 | 0 | << " rowset_id=" << rowset_id; |
3869 | 0 | ret = -1; |
3870 | 0 | continue; |
3871 | 0 | } |
3872 | 27.5k | InvertedIndexInfo index_info; |
3873 | 27.5k | inverted_index_get_ret = |
3874 | 27.5k | inverted_index_id_cache_->get(rs.index_id(), rs.schema_version(), index_info); |
3875 | 27.5k | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset", |
3876 | 27.5k | &inverted_index_get_ret); |
3877 | 27.5k | if (inverted_index_get_ret == 0) { |
3878 | 27.0k | index_format = index_info.first; |
3879 | 27.0k | index_ids = index_info.second; |
3880 | 27.0k | } else if (inverted_index_get_ret == 1) { |
3881 | | // 1. Schema kv not found means tablet has been recycled |
3882 | | // Maybe some tablet recycle failed by some bugs |
3883 | | // We need to delete again to double check |
3884 | | // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes, |
3885 | | // because we are uncertain about the inverted index information. |
3886 | | // If there are inverted indexes, some data might not be deleted, |
3887 | | // but this is acceptable as we have made our best effort to delete the data. |
3888 | 507 | LOG_INFO( |
3889 | 507 | "delete rowset data schema kv not found, need to delete again to " |
3890 | 507 | "double " |
3891 | 507 | "check") |
3892 | 507 | .tag("instance_id", instance_id_) |
3893 | 507 | .tag("tablet_id", tablet_id) |
3894 | 507 | .tag("rowset", rs.ShortDebugString()); |
3895 | | // Currently index_ids is guaranteed to be empty, |
3896 | | // but we clear it again here as a safeguard against future code changes |
3897 | | // that might cause index_ids to no longer be empty |
3898 | 507 | index_format = InvertedIndexStorageFormatPB::V2; |
3899 | 507 | index_ids.clear(); |
3900 | 18.4E | } else { |
3901 | 18.4E | LOG(WARNING) << "failed to get schema kv for rowset, instance_id=" << instance_id_ |
3902 | 18.4E | << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id; |
3903 | 18.4E | ret = -1; |
3904 | 18.4E | continue; |
3905 | 18.4E | } |
3906 | 27.5k | } |
3907 | 54.2k | if (rs.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) { |
3908 | | // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data |
3909 | | // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix |
3910 | 5 | rowsets_delete_by_prefix.emplace_back(rs.resource_id(), tablet_id, rs.rowset_id_v2()); |
3911 | 5 | continue; |
3912 | 5 | } |
3913 | 323k | for (int64_t i = 0; i < num_segments; ++i) { |
3914 | 268k | file_paths.push_back(segment_path(tablet_id, rowset_id, i)); |
3915 | 268k | if (index_format == InvertedIndexStorageFormatPB::V1) { |
3916 | 531k | for (const auto& index_id : index_ids) { |
3917 | 531k | file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i, |
3918 | 531k | index_id.first, index_id.second)); |
3919 | 531k | } |
3920 | 266k | } else if (!index_ids.empty() || inverted_index_get_ret == 1) { |
3921 | | // try to recycle inverted index v2 when get_ret == 1 |
3922 | | // we treat schema not found as if it has a v2 format inverted index |
3923 | | // to reduce chance of data leakage |
3924 | 2.50k | if (inverted_index_get_ret == 1) { |
3925 | 2.50k | LOG_INFO("delete rowset data schema kv not found, try to delete index file") |
3926 | 2.50k | .tag("instance_id", instance_id_) |
3927 | 2.50k | .tag("inverted index v2 path", |
3928 | 2.50k | inverted_index_path_v2(tablet_id, rowset_id, i)); |
3929 | 2.50k | } |
3930 | 2.50k | file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i)); |
3931 | 2.50k | } |
3932 | 268k | } |
3933 | 54.1k | } |
3934 | | |
3935 | 98 | SyncExecutor<int> concurrent_delete_executor(_thread_pool_group.s3_producer_pool, |
3936 | 98 | "delete_rowset_data", |
3937 | 98 | [](const int& ret) { return ret != 0; });recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi Line | Count | Source | 3937 | 5 | [](const int& ret) { return ret != 0; }); |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi Line | Count | Source | 3937 | 51 | [](const int& ret) { return ret != 0; }); |
|
3938 | 98 | for (auto& [resource_id, file_paths] : resource_file_paths) { |
3939 | 51 | concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int { |
3940 | 51 | DCHECK(accessor_map_.count(*rid)) |
3941 | 0 | << "uninitilized accessor, instance_id=" << instance_id_ |
3942 | 0 | << " resource_id=" << resource_id << " path[0]=" << (*paths)[0]; |
3943 | 51 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id", |
3944 | 51 | &accessor_map_); |
3945 | 51 | if (!accessor_map_.contains(*rid)) { |
3946 | 0 | LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id") |
3947 | 0 | .tag("resource_id", resource_id) |
3948 | 0 | .tag("instance_id", instance_id_); |
3949 | 0 | return -1; |
3950 | 0 | } |
3951 | 51 | auto& accessor = accessor_map_[*rid]; |
3952 | 51 | int ret = accessor->delete_files(*paths); |
3953 | 51 | if (!ret) { |
3954 | | // deduplication of different files with the same rowset id |
3955 | | // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat |
3956 | | //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx |
3957 | 51 | std::set<std::string> deleted_rowset_id; |
3958 | | |
3959 | 51 | std::for_each(paths->begin(), paths->end(), |
3960 | 51 | [&metrics_context, &rowsets, &deleted_rowset_id, |
3961 | 856k | this](const std::string& path) { |
3962 | 856k | std::vector<std::string> str; |
3963 | 856k | butil::SplitString(path, '/', &str); |
3964 | 856k | std::string rowset_id; |
3965 | 856k | if (auto pos = str.back().find('_'); pos != std::string::npos) { |
3966 | 852k | rowset_id = str.back().substr(0, pos); |
3967 | 852k | } else { |
3968 | 3.95k | if (path.find("packed_file/") != std::string::npos) { |
3969 | 0 | return; // packed files do not have rowset_id encoded |
3970 | 0 | } |
3971 | 3.95k | LOG(WARNING) << "failed to parse rowset_id, path=" << path; |
3972 | 3.95k | return; |
3973 | 3.95k | } |
3974 | 852k | auto rs_meta = rowsets.find(rowset_id); |
3975 | 852k | if (rs_meta != rowsets.end() && |
3976 | 858k | !deleted_rowset_id.contains(rowset_id)) { |
3977 | 54.1k | deleted_rowset_id.emplace(rowset_id); |
3978 | 54.1k | metrics_context.total_recycled_data_size += |
3979 | 54.1k | rs_meta->second.total_disk_size(); |
3980 | 54.1k | segment_metrics_context_.total_recycled_num += |
3981 | 54.1k | rs_meta->second.num_segments(); |
3982 | 54.1k | segment_metrics_context_.total_recycled_data_size += |
3983 | 54.1k | rs_meta->second.total_disk_size(); |
3984 | 54.1k | metrics_context.total_recycled_num++; |
3985 | 54.1k | } |
3986 | 852k | }); recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_ Line | Count | Source | 3961 | 7 | this](const std::string& path) { | 3962 | 7 | std::vector<std::string> str; | 3963 | 7 | butil::SplitString(path, '/', &str); | 3964 | 7 | std::string rowset_id; | 3965 | 7 | if (auto pos = str.back().find('_'); pos != std::string::npos) { | 3966 | 7 | rowset_id = str.back().substr(0, pos); | 3967 | 7 | } else { | 3968 | 0 | if (path.find("packed_file/") != std::string::npos) { | 3969 | 0 | return; // packed files do not have rowset_id encoded | 3970 | 0 | } | 3971 | 0 | LOG(WARNING) << "failed to parse rowset_id, path=" << path; | 3972 | 0 | return; | 3973 | 0 | } | 3974 | 7 | auto rs_meta = rowsets.find(rowset_id); | 3975 | 7 | if (rs_meta != rowsets.end() && | 3976 | 7 | !deleted_rowset_id.contains(rowset_id)) { | 3977 | 7 | deleted_rowset_id.emplace(rowset_id); | 3978 | 7 | metrics_context.total_recycled_data_size += | 3979 | 7 | rs_meta->second.total_disk_size(); | 3980 | 7 | segment_metrics_context_.total_recycled_num += | 3981 | 7 | rs_meta->second.num_segments(); | 3982 | 7 | segment_metrics_context_.total_recycled_data_size += | 3983 | 7 | rs_meta->second.total_disk_size(); | 3984 | 7 | metrics_context.total_recycled_num++; | 3985 | 7 | } | 3986 | 7 | }); |
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_ Line | Count | Source | 3961 | 856k | this](const std::string& path) { | 3962 | 856k | std::vector<std::string> str; | 3963 | 856k | butil::SplitString(path, '/', &str); | 3964 | 856k | std::string rowset_id; | 3965 | 856k | if (auto pos = str.back().find('_'); pos != std::string::npos) { | 3966 | 852k | rowset_id = str.back().substr(0, pos); | 3967 | 852k | } else { | 3968 | 3.95k | if (path.find("packed_file/") != std::string::npos) { | 3969 | 0 | return; // packed files do not have rowset_id encoded | 3970 | 0 | } | 3971 | 3.95k | LOG(WARNING) << "failed to parse rowset_id, path=" << path; | 3972 | 3.95k | return; | 3973 | 3.95k | } | 3974 | 852k | auto rs_meta = rowsets.find(rowset_id); | 3975 | 852k | if (rs_meta != rowsets.end() && | 3976 | 858k | !deleted_rowset_id.contains(rowset_id)) { | 3977 | 54.1k | deleted_rowset_id.emplace(rowset_id); | 3978 | 54.1k | metrics_context.total_recycled_data_size += | 3979 | 54.1k | rs_meta->second.total_disk_size(); | 3980 | 54.1k | segment_metrics_context_.total_recycled_num += | 3981 | 54.1k | rs_meta->second.num_segments(); | 3982 | 54.1k | segment_metrics_context_.total_recycled_data_size += | 3983 | 54.1k | rs_meta->second.total_disk_size(); | 3984 | 54.1k | metrics_context.total_recycled_num++; | 3985 | 54.1k | } | 3986 | 852k | }); |
|
3987 | 51 | } |
3988 | 51 | return ret; |
3989 | 51 | }); recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv Line | Count | Source | 3939 | 5 | concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int { | 3940 | 5 | DCHECK(accessor_map_.count(*rid)) | 3941 | 0 | << "uninitilized accessor, instance_id=" << instance_id_ | 3942 | 0 | << " resource_id=" << resource_id << " path[0]=" << (*paths)[0]; | 3943 | 5 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id", | 3944 | 5 | &accessor_map_); | 3945 | 5 | if (!accessor_map_.contains(*rid)) { | 3946 | 0 | LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id") | 3947 | 0 | .tag("resource_id", resource_id) | 3948 | 0 | .tag("instance_id", instance_id_); | 3949 | 0 | return -1; | 3950 | 0 | } | 3951 | 5 | auto& accessor = accessor_map_[*rid]; | 3952 | 5 | int ret = accessor->delete_files(*paths); | 3953 | 5 | if (!ret) { | 3954 | | // deduplication of different files with the same rowset id | 3955 | | // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat | 3956 | | //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx | 3957 | 5 | std::set<std::string> deleted_rowset_id; | 3958 | | | 3959 | 5 | std::for_each(paths->begin(), paths->end(), | 3960 | 5 | [&metrics_context, &rowsets, &deleted_rowset_id, | 3961 | 5 | this](const std::string& path) { | 3962 | 5 | std::vector<std::string> str; | 3963 | 5 | butil::SplitString(path, '/', &str); | 3964 | 5 | std::string rowset_id; | 3965 | 5 | if (auto pos = str.back().find('_'); pos != std::string::npos) { | 3966 | 5 | rowset_id = str.back().substr(0, pos); | 3967 | 5 | } else { | 3968 | 5 | if (path.find("packed_file/") != std::string::npos) { | 3969 | 5 | return; // packed files do not have rowset_id encoded | 3970 | 5 | } | 3971 | 5 | LOG(WARNING) << "failed to parse rowset_id, path=" << path; | 3972 | 5 | return; | 3973 | 5 | } | 3974 | 5 | auto rs_meta = rowsets.find(rowset_id); | 3975 | 5 | if (rs_meta != rowsets.end() && | 3976 | 5 | !deleted_rowset_id.contains(rowset_id)) { | 3977 | 5 | deleted_rowset_id.emplace(rowset_id); | 3978 | 5 | metrics_context.total_recycled_data_size += | 3979 | 5 | rs_meta->second.total_disk_size(); | 3980 | 5 | segment_metrics_context_.total_recycled_num += | 3981 | 5 | rs_meta->second.num_segments(); | 3982 | 5 | segment_metrics_context_.total_recycled_data_size += | 3983 | 5 | rs_meta->second.total_disk_size(); | 3984 | 5 | metrics_context.total_recycled_num++; | 3985 | 5 | } | 3986 | 5 | }); | 3987 | 5 | } | 3988 | 5 | return ret; | 3989 | 5 | }); |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv Line | Count | Source | 3939 | 46 | concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int { | 3940 | 46 | DCHECK(accessor_map_.count(*rid)) | 3941 | 0 | << "uninitilized accessor, instance_id=" << instance_id_ | 3942 | 0 | << " resource_id=" << resource_id << " path[0]=" << (*paths)[0]; | 3943 | 46 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id", | 3944 | 46 | &accessor_map_); | 3945 | 46 | if (!accessor_map_.contains(*rid)) { | 3946 | 0 | LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id") | 3947 | 0 | .tag("resource_id", resource_id) | 3948 | 0 | .tag("instance_id", instance_id_); | 3949 | 0 | return -1; | 3950 | 0 | } | 3951 | 46 | auto& accessor = accessor_map_[*rid]; | 3952 | 46 | int ret = accessor->delete_files(*paths); | 3953 | 46 | if (!ret) { | 3954 | | // deduplication of different files with the same rowset id | 3955 | | // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat | 3956 | | //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx | 3957 | 46 | std::set<std::string> deleted_rowset_id; | 3958 | | | 3959 | 46 | std::for_each(paths->begin(), paths->end(), | 3960 | 46 | [&metrics_context, &rowsets, &deleted_rowset_id, | 3961 | 46 | this](const std::string& path) { | 3962 | 46 | std::vector<std::string> str; | 3963 | 46 | butil::SplitString(path, '/', &str); | 3964 | 46 | std::string rowset_id; | 3965 | 46 | if (auto pos = str.back().find('_'); pos != std::string::npos) { | 3966 | 46 | rowset_id = str.back().substr(0, pos); | 3967 | 46 | } else { | 3968 | 46 | if (path.find("packed_file/") != std::string::npos) { | 3969 | 46 | return; // packed files do not have rowset_id encoded | 3970 | 46 | } | 3971 | 46 | LOG(WARNING) << "failed to parse rowset_id, path=" << path; | 3972 | 46 | return; | 3973 | 46 | } | 3974 | 46 | auto rs_meta = rowsets.find(rowset_id); | 3975 | 46 | if (rs_meta != rowsets.end() && | 3976 | 46 | !deleted_rowset_id.contains(rowset_id)) { | 3977 | 46 | deleted_rowset_id.emplace(rowset_id); | 3978 | 46 | metrics_context.total_recycled_data_size += | 3979 | 46 | rs_meta->second.total_disk_size(); | 3980 | 46 | segment_metrics_context_.total_recycled_num += | 3981 | 46 | rs_meta->second.num_segments(); | 3982 | 46 | segment_metrics_context_.total_recycled_data_size += | 3983 | 46 | rs_meta->second.total_disk_size(); | 3984 | 46 | metrics_context.total_recycled_num++; | 3985 | 46 | } | 3986 | 46 | }); | 3987 | 46 | } | 3988 | 46 | return ret; | 3989 | 46 | }); |
|
3990 | 51 | } |
3991 | 98 | for (const auto& [resource_id, tablet_id, rowset_id] : rowsets_delete_by_prefix) { |
3992 | 5 | LOG_INFO( |
3993 | 5 | "delete rowset {} by prefix because it's in BEGIN_PARTIAL_UPDATE state, " |
3994 | 5 | "resource_id={}, tablet_id={}, instance_id={}, task_type={}", |
3995 | 5 | rowset_id, resource_id, tablet_id, instance_id_, metrics_context.operation_type); |
3996 | 5 | concurrent_delete_executor.add([&]() -> int { |
3997 | 5 | int ret = delete_rowset_data(resource_id, tablet_id, rowset_id); |
3998 | 5 | if (!ret) { |
3999 | 5 | auto rs = rowsets.at(rowset_id); |
4000 | 5 | metrics_context.total_recycled_data_size += rs.total_disk_size(); |
4001 | 5 | metrics_context.total_recycled_num++; |
4002 | 5 | segment_metrics_context_.total_recycled_data_size += rs.total_disk_size(); |
4003 | 5 | segment_metrics_context_.total_recycled_num += rs.num_segments(); |
4004 | 5 | } |
4005 | 5 | return ret; |
4006 | 5 | }); Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv Line | Count | Source | 3996 | 5 | concurrent_delete_executor.add([&]() -> int { | 3997 | 5 | int ret = delete_rowset_data(resource_id, tablet_id, rowset_id); | 3998 | 5 | if (!ret) { | 3999 | 5 | auto rs = rowsets.at(rowset_id); | 4000 | 5 | metrics_context.total_recycled_data_size += rs.total_disk_size(); | 4001 | 5 | metrics_context.total_recycled_num++; | 4002 | 5 | segment_metrics_context_.total_recycled_data_size += rs.total_disk_size(); | 4003 | 5 | segment_metrics_context_.total_recycled_num += rs.num_segments(); | 4004 | 5 | } | 4005 | 5 | return ret; | 4006 | 5 | }); |
|
4007 | 5 | } |
4008 | | |
4009 | 98 | bool finished = true; |
4010 | 98 | std::vector<int> rets = concurrent_delete_executor.when_all(&finished); |
4011 | 98 | for (int r : rets) { |
4012 | 56 | if (r != 0) { |
4013 | 0 | ret = -1; |
4014 | 0 | break; |
4015 | 0 | } |
4016 | 56 | } |
4017 | 98 | ret = finished ? ret : -1; |
4018 | 98 | return ret; |
4019 | 98 | } |
4020 | | |
4021 | | int InstanceRecycler::delete_rowset_data(const std::string& resource_id, int64_t tablet_id, |
4022 | 3.30k | const std::string& rowset_id) { |
4023 | 3.30k | auto it = accessor_map_.find(resource_id); |
4024 | 3.30k | if (it == accessor_map_.end()) { |
4025 | 400 | LOG_WARNING("instance has no such resource id") |
4026 | 400 | .tag("instance_id", instance_id_) |
4027 | 400 | .tag("resource_id", resource_id) |
4028 | 400 | .tag("tablet_id", tablet_id) |
4029 | 400 | .tag("rowset_id", rowset_id); |
4030 | 400 | return -1; |
4031 | 400 | } |
4032 | 2.90k | auto& accessor = it->second; |
4033 | 2.90k | return accessor->delete_prefix(rowset_path_prefix(tablet_id, rowset_id)); |
4034 | 3.30k | } |
4035 | | |
4036 | 4 | bool InstanceRecycler::decode_packed_file_key(std::string_view key, std::string* packed_path) { |
4037 | 4 | if (key.empty()) { |
4038 | 0 | return false; |
4039 | 0 | } |
4040 | 4 | std::string_view key_view = key; |
4041 | 4 | key_view.remove_prefix(1); // remove keyspace prefix |
4042 | 4 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> decoded; |
4043 | 4 | if (decode_key(&key_view, &decoded) != 0) { |
4044 | 0 | return false; |
4045 | 0 | } |
4046 | 4 | if (decoded.size() < 4) { |
4047 | 0 | return false; |
4048 | 0 | } |
4049 | 4 | try { |
4050 | 4 | *packed_path = std::get<std::string>(std::get<0>(decoded.back())); |
4051 | 4 | } catch (const std::bad_variant_access&) { |
4052 | 0 | return false; |
4053 | 0 | } |
4054 | 4 | return true; |
4055 | 4 | } |
4056 | | |
4057 | 14 | int InstanceRecycler::recycle_packed_files() { |
4058 | 14 | const std::string task_name = "recycle_packed_files"; |
4059 | 14 | auto start_tp = steady_clock::now(); |
4060 | 14 | int64_t start_time = duration_cast<seconds>(start_tp.time_since_epoch()).count(); |
4061 | 14 | int ret = 0; |
4062 | 14 | PackedFileRecycleStats stats; |
4063 | | |
4064 | 14 | register_recycle_task(task_name, start_time); |
4065 | 14 | DORIS_CLOUD_DEFER { |
4066 | 14 | unregister_recycle_task(task_name); |
4067 | 14 | int64_t cost = |
4068 | 14 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
4069 | 14 | int64_t cost_ms = duration_cast<milliseconds>(steady_clock::now() - start_tp).count(); |
4070 | 14 | g_bvar_recycler_packed_file_recycled_kv_num.put(instance_id_, stats.num_deleted); |
4071 | 14 | g_bvar_recycler_packed_file_recycled_kv_bytes.put(instance_id_, stats.bytes_deleted); |
4072 | 14 | g_bvar_recycler_packed_file_recycle_cost_ms.put(instance_id_, cost_ms); |
4073 | 14 | g_bvar_recycler_packed_file_scanned_kv_num.put(instance_id_, stats.num_scanned); |
4074 | 14 | g_bvar_recycler_packed_file_corrected_kv_num.put(instance_id_, stats.num_corrected); |
4075 | 14 | g_bvar_recycler_packed_file_recycled_object_num.put(instance_id_, stats.num_object_deleted); |
4076 | 14 | g_bvar_recycler_packed_file_bytes_object_deleted.put(instance_id_, |
4077 | 14 | stats.bytes_object_deleted); |
4078 | 14 | g_bvar_recycler_packed_file_rowset_scanned_num.put(instance_id_, stats.rowset_scan_count); |
4079 | 14 | LOG_INFO("recycle packed files finished, cost={}s", cost) |
4080 | 14 | .tag("instance_id", instance_id_) |
4081 | 14 | .tag("num_scanned", stats.num_scanned) |
4082 | 14 | .tag("num_corrected", stats.num_corrected) |
4083 | 14 | .tag("num_deleted", stats.num_deleted) |
4084 | 14 | .tag("num_failed", stats.num_failed) |
4085 | 14 | .tag("num_objects_deleted", stats.num_object_deleted) |
4086 | 14 | .tag("bytes_object_deleted", stats.bytes_object_deleted) |
4087 | 14 | .tag("rowset_scan_count", stats.rowset_scan_count) |
4088 | 14 | .tag("bytes_deleted", stats.bytes_deleted) |
4089 | 14 | .tag("ret", ret); |
4090 | 14 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_0clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_0clEv Line | Count | Source | 4065 | 14 | DORIS_CLOUD_DEFER { | 4066 | 14 | unregister_recycle_task(task_name); | 4067 | 14 | int64_t cost = | 4068 | 14 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 4069 | 14 | int64_t cost_ms = duration_cast<milliseconds>(steady_clock::now() - start_tp).count(); | 4070 | 14 | g_bvar_recycler_packed_file_recycled_kv_num.put(instance_id_, stats.num_deleted); | 4071 | 14 | g_bvar_recycler_packed_file_recycled_kv_bytes.put(instance_id_, stats.bytes_deleted); | 4072 | 14 | g_bvar_recycler_packed_file_recycle_cost_ms.put(instance_id_, cost_ms); | 4073 | 14 | g_bvar_recycler_packed_file_scanned_kv_num.put(instance_id_, stats.num_scanned); | 4074 | 14 | g_bvar_recycler_packed_file_corrected_kv_num.put(instance_id_, stats.num_corrected); | 4075 | 14 | g_bvar_recycler_packed_file_recycled_object_num.put(instance_id_, stats.num_object_deleted); | 4076 | 14 | g_bvar_recycler_packed_file_bytes_object_deleted.put(instance_id_, | 4077 | 14 | stats.bytes_object_deleted); | 4078 | 14 | g_bvar_recycler_packed_file_rowset_scanned_num.put(instance_id_, stats.rowset_scan_count); | 4079 | 14 | LOG_INFO("recycle packed files finished, cost={}s", cost) | 4080 | 14 | .tag("instance_id", instance_id_) | 4081 | 14 | .tag("num_scanned", stats.num_scanned) | 4082 | 14 | .tag("num_corrected", stats.num_corrected) | 4083 | 14 | .tag("num_deleted", stats.num_deleted) | 4084 | 14 | .tag("num_failed", stats.num_failed) | 4085 | 14 | .tag("num_objects_deleted", stats.num_object_deleted) | 4086 | 14 | .tag("bytes_object_deleted", stats.bytes_object_deleted) | 4087 | 14 | .tag("rowset_scan_count", stats.rowset_scan_count) | 4088 | 14 | .tag("bytes_deleted", stats.bytes_deleted) | 4089 | 14 | .tag("ret", ret); | 4090 | 14 | }; |
|
4091 | | |
4092 | 14 | auto recycle_func = [this, &stats, &ret](auto&& key, auto&& value) { |
4093 | 4 | return handle_packed_file_kv(std::forward<decltype(key)>(key), |
4094 | 4 | std::forward<decltype(value)>(value), &stats, &ret); |
4095 | 4 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_1clISt17basic_string_viewIcSt11char_traitsIcEES7_EEDaOT_OT0_ recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_1clISt17basic_string_viewIcSt11char_traitsIcEES7_EEDaOT_OT0_ Line | Count | Source | 4092 | 4 | auto recycle_func = [this, &stats, &ret](auto&& key, auto&& value) { | 4093 | 4 | return handle_packed_file_kv(std::forward<decltype(key)>(key), | 4094 | 4 | std::forward<decltype(value)>(value), &stats, &ret); | 4095 | 4 | }; |
|
4096 | | |
4097 | 14 | LOG_INFO("begin to recycle packed file").tag("instance_id", instance_id_); |
4098 | | |
4099 | 14 | std::string begin = packed_file_key({instance_id_, ""}); |
4100 | 14 | std::string end = packed_file_key({instance_id_, "\xff"}); |
4101 | 14 | if (scan_and_recycle(begin, end, recycle_func) != 0) { |
4102 | 0 | ret = -1; |
4103 | 0 | } |
4104 | | |
4105 | 14 | return ret; |
4106 | 14 | } |
4107 | | |
4108 | | int InstanceRecycler::scan_tablets_and_statistics(int64_t table_id, int64_t index_id, |
4109 | | RecyclerMetricsContext& metrics_context, |
4110 | 0 | int64_t partition_id, bool is_empty_tablet) { |
4111 | 0 | std::string tablet_key_begin, tablet_key_end; |
4112 | |
|
4113 | 0 | if (partition_id > 0) { |
4114 | 0 | meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin); |
4115 | 0 | meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end); |
4116 | 0 | } else { |
4117 | 0 | meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin); |
4118 | 0 | meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end); |
4119 | 0 | } |
4120 | | // for calculate the total num or bytes of recyled objects |
4121 | 0 | auto scan_and_statistics = [&, is_empty_tablet, this](std::string_view k, |
4122 | 0 | std::string_view v) -> int { |
4123 | 0 | doris::TabletMetaCloudPB tablet_meta_pb; |
4124 | 0 | if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) { |
4125 | 0 | return 0; |
4126 | 0 | } |
4127 | 0 | int64_t tablet_id = tablet_meta_pb.tablet_id(); |
4128 | |
|
4129 | 0 | if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) { |
4130 | 0 | return 0; |
4131 | 0 | } |
4132 | | |
4133 | 0 | if (!is_empty_tablet) { |
4134 | 0 | if (scan_tablet_and_statistics(tablet_id, metrics_context) != 0) { |
4135 | 0 | return 0; |
4136 | 0 | } |
4137 | 0 | tablet_metrics_context_.total_need_recycle_num++; |
4138 | 0 | } |
4139 | 0 | return 0; |
4140 | 0 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_ Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_ |
4141 | 0 | int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(scan_and_statistics)); |
4142 | 0 | metrics_context.report(true); |
4143 | 0 | tablet_metrics_context_.report(true); |
4144 | 0 | segment_metrics_context_.report(true); |
4145 | 0 | return ret; |
4146 | 0 | } |
4147 | | |
4148 | | int InstanceRecycler::scan_tablet_and_statistics(int64_t tablet_id, |
4149 | 0 | RecyclerMetricsContext& metrics_context) { |
4150 | 0 | int ret = 0; |
4151 | 0 | std::map<std::string, RowsetMetaCloudPB> rowset_meta_map; |
4152 | 0 | std::unique_ptr<Transaction> txn; |
4153 | 0 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { |
4154 | 0 | LOG_WARNING("failed to recycle tablet ") |
4155 | 0 | .tag("tablet id", tablet_id) |
4156 | 0 | .tag("instance_id", instance_id_) |
4157 | 0 | .tag("reason", "failed to create txn"); |
4158 | 0 | ret = -1; |
4159 | 0 | } |
4160 | 0 | GetRowsetResponse resp; |
4161 | 0 | std::string msg; |
4162 | 0 | MetaServiceCode code = MetaServiceCode::OK; |
4163 | | // get rowsets in tablet |
4164 | 0 | internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_, |
4165 | 0 | tablet_id, code, msg, &resp); |
4166 | 0 | if (code != MetaServiceCode::OK) { |
4167 | 0 | LOG_WARNING("failed to get rowsets of tablet when recycle tablet") |
4168 | 0 | .tag("tablet id", tablet_id) |
4169 | 0 | .tag("msg", msg) |
4170 | 0 | .tag("code", code) |
4171 | 0 | .tag("instance id", instance_id_); |
4172 | 0 | ret = -1; |
4173 | 0 | } |
4174 | 0 | for (const auto& rs_meta : resp.rowset_meta()) { |
4175 | | /* |
4176 | | * For compatibility, we skip the loop for [0-1] here. |
4177 | | * The purpose of this loop is to delete object files, |
4178 | | * and since [0-1] only has meta and doesn't have object files, |
4179 | | * skipping it doesn't affect system correctness. |
4180 | | * |
4181 | | * If not skipped, the check "if (!rs_meta.has_resource_id())" below |
4182 | | * would return error -1 directly, causing the recycle operation to fail. |
4183 | | * |
4184 | | * [0-1] doesn't have resource id is a bug. |
4185 | | * In the future, we will fix this problem, after that, |
4186 | | * we can remove this if statement. |
4187 | | * |
4188 | | * TODO(Yukang-Lian): remove this if statement when [0-1] has resource id in the future. |
4189 | | */ |
4190 | |
|
4191 | 0 | if (rs_meta.end_version() == 1) { |
4192 | | // Assert that [0-1] has no resource_id to make sure |
4193 | | // this if statement will not be forgetted to remove |
4194 | | // when the resource id bug is fixed |
4195 | 0 | DCHECK(!rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString(); |
4196 | 0 | continue; |
4197 | 0 | } |
4198 | 0 | if (!rs_meta.has_resource_id()) { |
4199 | 0 | LOG_WARNING("rowset meta does not have a resource id, impossible!") |
4200 | 0 | .tag("rs_meta", rs_meta.ShortDebugString()) |
4201 | 0 | .tag("instance_id", instance_id_) |
4202 | 0 | .tag("tablet_id", tablet_id); |
4203 | 0 | continue; |
4204 | 0 | } |
4205 | 0 | DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString(); |
4206 | 0 | auto it = accessor_map_.find(rs_meta.resource_id()); |
4207 | | // possible if the accessor is not initilized correctly |
4208 | 0 | if (it == accessor_map_.end()) [[unlikely]] { |
4209 | 0 | LOG_WARNING( |
4210 | 0 | "failed to find resource id when recycle tablet, skip this vault accessor " |
4211 | 0 | "recycle process") |
4212 | 0 | .tag("tablet id", tablet_id) |
4213 | 0 | .tag("instance_id", instance_id_) |
4214 | 0 | .tag("resource_id", rs_meta.resource_id()) |
4215 | 0 | .tag("rowset meta pb", rs_meta.ShortDebugString()); |
4216 | 0 | continue; |
4217 | 0 | } |
4218 | | |
4219 | 0 | metrics_context.total_need_recycle_data_size += rs_meta.total_disk_size(); |
4220 | 0 | tablet_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size(); |
4221 | 0 | segment_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size(); |
4222 | 0 | segment_metrics_context_.total_need_recycle_num += rs_meta.num_segments(); |
4223 | 0 | } |
4224 | 0 | return ret; |
4225 | 0 | } |
4226 | | |
4227 | 4.25k | int InstanceRecycler::recycle_tablet(int64_t tablet_id, RecyclerMetricsContext& metrics_context) { |
4228 | 4.25k | LOG_INFO("begin to recycle rowsets in a dropped tablet") |
4229 | 4.25k | .tag("instance_id", instance_id_) |
4230 | 4.25k | .tag("tablet_id", tablet_id); |
4231 | | |
4232 | 4.25k | if (should_recycle_versioned_keys()) { |
4233 | 11 | int ret = recycle_versioned_tablet(tablet_id, metrics_context); |
4234 | 11 | if (ret != 0) { |
4235 | 0 | return ret; |
4236 | 0 | } |
4237 | | // Continue to recycle non-versioned rowsets, if multi-version is set to DISABLED |
4238 | | // during the recycle_versioned_tablet process. |
4239 | | // |
4240 | | // .. And remove restore job rowsets of this tablet too |
4241 | 11 | } |
4242 | | |
4243 | 4.25k | int ret = 0; |
4244 | 4.25k | auto start_time = steady_clock::now(); |
4245 | | |
4246 | 4.25k | TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0); |
4247 | | |
4248 | | // collect resource ids |
4249 | 248 | std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0}); |
4250 | 248 | std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0}); |
4251 | 248 | std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""}); |
4252 | 248 | std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""}); |
4253 | 248 | std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0}); |
4254 | 248 | std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0}); |
4255 | | |
4256 | 248 | std::set<std::string> resource_ids; |
4257 | 248 | int64_t recycle_rowsets_number = 0; |
4258 | 248 | int64_t recycle_segments_number = 0; |
4259 | 248 | int64_t recycle_rowsets_data_size = 0; |
4260 | 248 | int64_t recycle_rowsets_index_size = 0; |
4261 | 248 | int64_t recycle_restore_job_rowsets_number = 0; |
4262 | 248 | int64_t recycle_restore_job_segments_number = 0; |
4263 | 248 | int64_t recycle_restore_job_rowsets_data_size = 0; |
4264 | 248 | int64_t recycle_restore_job_rowsets_index_size = 0; |
4265 | 248 | int64_t max_rowset_version = 0; |
4266 | 248 | int64_t min_rowset_creation_time = INT64_MAX; |
4267 | 248 | int64_t max_rowset_creation_time = 0; |
4268 | 248 | int64_t min_rowset_expiration_time = INT64_MAX; |
4269 | 248 | int64_t max_rowset_expiration_time = 0; |
4270 | | |
4271 | 248 | DORIS_CLOUD_DEFER { |
4272 | 248 | auto cost = duration<float>(steady_clock::now() - start_time).count(); |
4273 | 248 | LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost) |
4274 | 248 | .tag("instance_id", instance_id_) |
4275 | 248 | .tag("tablet_id", tablet_id) |
4276 | 248 | .tag("recycle rowsets number", recycle_rowsets_number) |
4277 | 248 | .tag("recycle segments number", recycle_segments_number) |
4278 | 248 | .tag("all rowsets recycle data size", recycle_rowsets_data_size) |
4279 | 248 | .tag("all rowsets recycle index size", recycle_rowsets_index_size) |
4280 | 248 | .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number) |
4281 | 248 | .tag("recycle restore job segments number", recycle_restore_job_segments_number) |
4282 | 248 | .tag("all restore job rowsets recycle data size", |
4283 | 248 | recycle_restore_job_rowsets_data_size) |
4284 | 248 | .tag("all restore job rowsets recycle index size", |
4285 | 248 | recycle_restore_job_rowsets_index_size) |
4286 | 248 | .tag("max rowset version", max_rowset_version) |
4287 | 248 | .tag("min rowset creation time", min_rowset_creation_time) |
4288 | 248 | .tag("max rowset creation time", max_rowset_creation_time) |
4289 | 248 | .tag("min rowset expiration time", min_rowset_expiration_time) |
4290 | 248 | .tag("max rowset expiration time", max_rowset_expiration_time) |
4291 | 248 | .tag("task type", metrics_context.operation_type) |
4292 | 248 | .tag("ret", ret); |
4293 | 248 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv Line | Count | Source | 4271 | 248 | DORIS_CLOUD_DEFER { | 4272 | 248 | auto cost = duration<float>(steady_clock::now() - start_time).count(); | 4273 | 248 | LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost) | 4274 | 248 | .tag("instance_id", instance_id_) | 4275 | 248 | .tag("tablet_id", tablet_id) | 4276 | 248 | .tag("recycle rowsets number", recycle_rowsets_number) | 4277 | 248 | .tag("recycle segments number", recycle_segments_number) | 4278 | 248 | .tag("all rowsets recycle data size", recycle_rowsets_data_size) | 4279 | 248 | .tag("all rowsets recycle index size", recycle_rowsets_index_size) | 4280 | 248 | .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number) | 4281 | 248 | .tag("recycle restore job segments number", recycle_restore_job_segments_number) | 4282 | 248 | .tag("all restore job rowsets recycle data size", | 4283 | 248 | recycle_restore_job_rowsets_data_size) | 4284 | 248 | .tag("all restore job rowsets recycle index size", | 4285 | 248 | recycle_restore_job_rowsets_index_size) | 4286 | 248 | .tag("max rowset version", max_rowset_version) | 4287 | 248 | .tag("min rowset creation time", min_rowset_creation_time) | 4288 | 248 | .tag("max rowset creation time", max_rowset_creation_time) | 4289 | 248 | .tag("min rowset expiration time", min_rowset_expiration_time) | 4290 | 248 | .tag("max rowset expiration time", max_rowset_expiration_time) | 4291 | 248 | .tag("task type", metrics_context.operation_type) | 4292 | 248 | .tag("ret", ret); | 4293 | 248 | }; |
|
4294 | | |
4295 | 248 | std::unique_ptr<Transaction> txn; |
4296 | 248 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { |
4297 | 0 | LOG_WARNING("failed to recycle tablet ") |
4298 | 0 | .tag("tablet id", tablet_id) |
4299 | 0 | .tag("instance_id", instance_id_) |
4300 | 0 | .tag("reason", "failed to create txn"); |
4301 | 0 | ret = -1; |
4302 | 0 | } |
4303 | 248 | GetRowsetResponse resp; |
4304 | 248 | std::string msg; |
4305 | 248 | MetaServiceCode code = MetaServiceCode::OK; |
4306 | | // get rowsets in tablet |
4307 | 248 | internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_, |
4308 | 248 | tablet_id, code, msg, &resp); |
4309 | 248 | if (code != MetaServiceCode::OK) { |
4310 | 0 | LOG_WARNING("failed to get rowsets of tablet when recycle tablet") |
4311 | 0 | .tag("tablet id", tablet_id) |
4312 | 0 | .tag("msg", msg) |
4313 | 0 | .tag("code", code) |
4314 | 0 | .tag("instance id", instance_id_); |
4315 | 0 | ret = -1; |
4316 | 0 | } |
4317 | 248 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_tablet.create_rowset_meta", &resp); |
4318 | | |
4319 | 2.51k | for (const auto& rs_meta : resp.rowset_meta()) { |
4320 | | // The rowset has no resource id and segments when it was generated by compaction |
4321 | | // with multiple hole rowsets or it's version is [0-1], so we can skip it. |
4322 | 2.51k | if (!rs_meta.has_resource_id() && rs_meta.num_segments() == 0) { |
4323 | 0 | LOG_INFO("rowset meta does not have a resource id and no segments, skip this rowset") |
4324 | 0 | .tag("rs_meta", rs_meta.ShortDebugString()) |
4325 | 0 | .tag("instance_id", instance_id_) |
4326 | 0 | .tag("tablet_id", tablet_id); |
4327 | 0 | recycle_rowsets_number += 1; |
4328 | 0 | continue; |
4329 | 0 | } |
4330 | 2.51k | if (!rs_meta.has_resource_id()) { |
4331 | 1 | LOG_WARNING("rowset meta does not have a resource id, impossible!") |
4332 | 1 | .tag("rs_meta", rs_meta.ShortDebugString()) |
4333 | 1 | .tag("instance_id", instance_id_) |
4334 | 1 | .tag("tablet_id", tablet_id); |
4335 | 1 | return -1; |
4336 | 1 | } |
4337 | 18.4E | DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString(); |
4338 | 2.51k | auto it = accessor_map_.find(rs_meta.resource_id()); |
4339 | | // possible if the accessor is not initilized correctly |
4340 | 2.51k | if (it == accessor_map_.end()) [[unlikely]] { |
4341 | 1 | LOG_WARNING( |
4342 | 1 | "failed to find resource id when recycle tablet, skip this vault accessor " |
4343 | 1 | "recycle process") |
4344 | 1 | .tag("tablet id", tablet_id) |
4345 | 1 | .tag("instance_id", instance_id_) |
4346 | 1 | .tag("resource_id", rs_meta.resource_id()) |
4347 | 1 | .tag("rowset meta pb", rs_meta.ShortDebugString()); |
4348 | 1 | return -1; |
4349 | 1 | } |
4350 | 2.51k | if (decrement_packed_file_ref_counts(rs_meta) != 0) { |
4351 | 0 | LOG_WARNING("failed to update packed file info when recycling tablet") |
4352 | 0 | .tag("instance_id", instance_id_) |
4353 | 0 | .tag("tablet_id", tablet_id) |
4354 | 0 | .tag("rowset_id", rs_meta.rowset_id_v2()); |
4355 | 0 | return -1; |
4356 | 0 | } |
4357 | 2.51k | recycle_rowsets_number += 1; |
4358 | 2.51k | recycle_segments_number += rs_meta.num_segments(); |
4359 | 2.51k | recycle_rowsets_data_size += rs_meta.data_disk_size(); |
4360 | 2.51k | recycle_rowsets_index_size += rs_meta.index_disk_size(); |
4361 | 2.51k | max_rowset_version = std::max(max_rowset_version, rs_meta.end_version()); |
4362 | 2.51k | min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time()); |
4363 | 2.51k | max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time()); |
4364 | 2.51k | min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration()); |
4365 | 2.51k | max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration()); |
4366 | 2.51k | resource_ids.emplace(rs_meta.resource_id()); |
4367 | 2.51k | } |
4368 | | |
4369 | | // get restore job rowset in tablet |
4370 | 246 | std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>> restore_job_rs_metas; |
4371 | 246 | scan_restore_job_rowset(txn.get(), instance_id_, tablet_id, code, msg, &restore_job_rs_metas); |
4372 | 246 | if (code != MetaServiceCode::OK) { |
4373 | 0 | LOG_WARNING("scan restore job rowsets failed when recycle tablet") |
4374 | 0 | .tag("tablet id", tablet_id) |
4375 | 0 | .tag("msg", msg) |
4376 | 0 | .tag("code", code) |
4377 | 0 | .tag("instance id", instance_id_); |
4378 | 0 | return -1; |
4379 | 0 | } |
4380 | | |
4381 | 246 | for (auto& [_, rs_meta] : restore_job_rs_metas) { |
4382 | 0 | if (!rs_meta.has_resource_id()) { |
4383 | 0 | LOG_WARNING("rowset meta does not have a resource id, impossible!") |
4384 | 0 | .tag("rs_meta", rs_meta.ShortDebugString()) |
4385 | 0 | .tag("instance_id", instance_id_) |
4386 | 0 | .tag("tablet_id", tablet_id); |
4387 | 0 | return -1; |
4388 | 0 | } |
4389 | | |
4390 | 0 | auto it = accessor_map_.find(rs_meta.resource_id()); |
4391 | | // possible if the accessor is not initilized correctly |
4392 | 0 | if (it == accessor_map_.end()) [[unlikely]] { |
4393 | 0 | LOG_WARNING( |
4394 | 0 | "failed to find resource id when recycle tablet, skip this vault accessor " |
4395 | 0 | "recycle process") |
4396 | 0 | .tag("tablet id", tablet_id) |
4397 | 0 | .tag("instance_id", instance_id_) |
4398 | 0 | .tag("resource_id", rs_meta.resource_id()) |
4399 | 0 | .tag("rowset meta pb", rs_meta.ShortDebugString()); |
4400 | 0 | return -1; |
4401 | 0 | } |
4402 | 0 | if (decrement_packed_file_ref_counts(rs_meta) != 0) { |
4403 | 0 | LOG_WARNING("failed to update packed file info when recycling restore job rowset") |
4404 | 0 | .tag("instance_id", instance_id_) |
4405 | 0 | .tag("tablet_id", tablet_id) |
4406 | 0 | .tag("rowset_id", rs_meta.rowset_id_v2()); |
4407 | 0 | return -1; |
4408 | 0 | } |
4409 | 0 | recycle_restore_job_rowsets_number += 1; |
4410 | 0 | recycle_restore_job_segments_number += rs_meta.num_segments(); |
4411 | 0 | recycle_restore_job_rowsets_data_size += rs_meta.data_disk_size(); |
4412 | 0 | recycle_restore_job_rowsets_index_size += rs_meta.index_disk_size(); |
4413 | 0 | resource_ids.emplace(rs_meta.resource_id()); |
4414 | 0 | } |
4415 | | |
4416 | 246 | LOG_INFO("recycle tablet start to delete object") |
4417 | 246 | .tag("instance id", instance_id_) |
4418 | 246 | .tag("tablet id", tablet_id) |
4419 | 246 | .tag("recycle tablet resource ids are", |
4420 | 246 | std::accumulate(resource_ids.begin(), resource_ids.end(), std::string(), |
4421 | 246 | [](std::string rs_id, const auto& it) { |
4422 | 205 | return rs_id.empty() ? it : rs_id + ", " + it; |
4423 | 205 | })); Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_ recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_ Line | Count | Source | 4421 | 205 | [](std::string rs_id, const auto& it) { | 4422 | 205 | return rs_id.empty() ? it : rs_id + ", " + it; | 4423 | 205 | })); |
|
4424 | | |
4425 | 246 | SyncExecutor<std::pair<int, std::string>> concurrent_delete_executor( |
4426 | 246 | _thread_pool_group.s3_producer_pool, |
4427 | 246 | fmt::format("delete tablet {} s3 rowset", tablet_id), |
4428 | 246 | [](const std::pair<int, std::string>& ret) { return ret.first != 0; });Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE Line | Count | Source | 4428 | 206 | [](const std::pair<int, std::string>& ret) { return ret.first != 0; }); |
|
4429 | | |
4430 | | // delete all rowset data in this tablet |
4431 | | // ATTN: there may be data leak if not all accessor initilized successfully |
4432 | | // partial data deleted if the tablet is stored cross-storage vault |
4433 | | // vault id is not attached to TabletMeta... |
4434 | 246 | for (const auto& resource_id : resource_ids) { |
4435 | 206 | g_bvar_recycler_vault_recycle_task_status.put({instance_id_, resource_id, "submitted"}, 1); |
4436 | 206 | concurrent_delete_executor.add( |
4437 | 206 | [&, rs_id = resource_id, |
4438 | 206 | accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) { |
4439 | 206 | int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id)); |
4440 | 206 | if (res != 0) { |
4441 | 2 | LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id |
4442 | 2 | << " path=" << accessor_ptr->uri() |
4443 | 2 | << " task type=" << metrics_context.operation_type; |
4444 | 2 | return std::make_pair(-1, rs_id); |
4445 | 2 | } |
4446 | 204 | return std::make_pair(0, rs_id); |
4447 | 206 | }); Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev Line | Count | Source | 4438 | 206 | accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) { | 4439 | 206 | int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id)); | 4440 | 206 | if (res != 0) { | 4441 | 2 | LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id | 4442 | 2 | << " path=" << accessor_ptr->uri() | 4443 | 2 | << " task type=" << metrics_context.operation_type; | 4444 | 2 | return std::make_pair(-1, rs_id); | 4445 | 2 | } | 4446 | 204 | return std::make_pair(0, rs_id); | 4447 | 206 | }); |
|
4448 | 206 | } |
4449 | | |
4450 | 246 | bool finished = true; |
4451 | 246 | std::vector<std::pair<int, std::string>> rets = concurrent_delete_executor.when_all(&finished); |
4452 | 246 | for (auto& r : rets) { |
4453 | 206 | if (r.first != 0) { |
4454 | 2 | g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "error"}, 1); |
4455 | 2 | ret = -1; |
4456 | 2 | } |
4457 | 206 | g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "completed"}, 1); |
4458 | 206 | } |
4459 | 246 | ret = finished ? ret : -1; |
4460 | | |
4461 | 246 | if (ret != 0) { // failed recycle tablet data |
4462 | 2 | LOG_WARNING("ret!=0") |
4463 | 2 | .tag("finished", finished) |
4464 | 2 | .tag("ret", ret) |
4465 | 2 | .tag("instance_id", instance_id_) |
4466 | 2 | .tag("tablet_id", tablet_id); |
4467 | 2 | return ret; |
4468 | 2 | } |
4469 | | |
4470 | 244 | tablet_metrics_context_.total_recycled_data_size += |
4471 | 244 | recycle_rowsets_data_size + recycle_rowsets_index_size; |
4472 | 244 | tablet_metrics_context_.total_recycled_num += 1; |
4473 | 244 | segment_metrics_context_.total_recycled_num += recycle_segments_number; |
4474 | 244 | segment_metrics_context_.total_recycled_data_size += |
4475 | 244 | recycle_rowsets_data_size + recycle_rowsets_index_size; |
4476 | 244 | metrics_context.total_recycled_data_size += |
4477 | 244 | recycle_rowsets_data_size + recycle_rowsets_index_size; |
4478 | 244 | tablet_metrics_context_.report(); |
4479 | 244 | segment_metrics_context_.report(); |
4480 | 244 | metrics_context.report(); |
4481 | | |
4482 | 244 | txn.reset(); |
4483 | 244 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { |
4484 | 0 | LOG_WARNING("failed to recycle tablet ") |
4485 | 0 | .tag("tablet id", tablet_id) |
4486 | 0 | .tag("instance_id", instance_id_) |
4487 | 0 | .tag("reason", "failed to create txn"); |
4488 | 0 | ret = -1; |
4489 | 0 | } |
4490 | | // delete all rowset kv in this tablet |
4491 | 244 | txn->remove(rs_key0, rs_key1); |
4492 | 244 | txn->remove(recyc_rs_key0, recyc_rs_key1); |
4493 | 244 | txn->remove(restore_job_rs_key0, restore_job_rs_key1); |
4494 | | |
4495 | | // remove delete bitmap for MoW table |
4496 | 244 | std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id}); |
4497 | 244 | txn->remove(pending_key); |
4498 | 244 | std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0}); |
4499 | 244 | std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0}); |
4500 | 244 | txn->remove(delete_bitmap_start, delete_bitmap_end); |
4501 | | |
4502 | 244 | std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""}); |
4503 | 244 | std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""}); |
4504 | 244 | txn->remove(dbm_start_key, dbm_end_key); |
4505 | 244 | LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key) |
4506 | 244 | << " end=" << hex(dbm_end_key); |
4507 | | |
4508 | 244 | TxnErrorCode err = txn->commit(); |
4509 | 244 | if (err != TxnErrorCode::TXN_OK) { |
4510 | 0 | LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err; |
4511 | 0 | ret = -1; |
4512 | 0 | } |
4513 | | |
4514 | 244 | if (ret == 0) { |
4515 | | // All object files under tablet have been deleted |
4516 | 244 | std::lock_guard lock(recycled_tablets_mtx_); |
4517 | 244 | recycled_tablets_.insert(tablet_id); |
4518 | 244 | } |
4519 | | |
4520 | 244 | return ret; |
4521 | 246 | } |
4522 | | |
4523 | | int InstanceRecycler::recycle_versioned_tablet(int64_t tablet_id, |
4524 | 11 | RecyclerMetricsContext& metrics_context) { |
4525 | 11 | int ret = 0; |
4526 | 11 | auto start_time = steady_clock::now(); |
4527 | | |
4528 | 11 | TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0); |
4529 | | |
4530 | | // collect resource ids |
4531 | 11 | std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0}); |
4532 | 11 | std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0}); |
4533 | 11 | std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""}); |
4534 | 11 | std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""}); |
4535 | | |
4536 | 11 | int64_t recycle_rowsets_number = 0; |
4537 | 11 | int64_t recycle_segments_number = 0; |
4538 | 11 | int64_t recycle_rowsets_data_size = 0; |
4539 | 11 | int64_t recycle_rowsets_index_size = 0; |
4540 | 11 | int64_t max_rowset_version = 0; |
4541 | 11 | int64_t min_rowset_creation_time = INT64_MAX; |
4542 | 11 | int64_t max_rowset_creation_time = 0; |
4543 | 11 | int64_t min_rowset_expiration_time = INT64_MAX; |
4544 | 11 | int64_t max_rowset_expiration_time = 0; |
4545 | | |
4546 | 11 | DORIS_CLOUD_DEFER { |
4547 | 11 | auto cost = duration<float>(steady_clock::now() - start_time).count(); |
4548 | 11 | LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost) |
4549 | 11 | .tag("instance_id", instance_id_) |
4550 | 11 | .tag("tablet_id", tablet_id) |
4551 | 11 | .tag("recycle rowsets number", recycle_rowsets_number) |
4552 | 11 | .tag("recycle segments number", recycle_segments_number) |
4553 | 11 | .tag("all rowsets recycle data size", recycle_rowsets_data_size) |
4554 | 11 | .tag("all rowsets recycle index size", recycle_rowsets_index_size) |
4555 | 11 | .tag("max rowset version", max_rowset_version) |
4556 | 11 | .tag("min rowset creation time", min_rowset_creation_time) |
4557 | 11 | .tag("max rowset creation time", max_rowset_creation_time) |
4558 | 11 | .tag("min rowset expiration time", min_rowset_expiration_time) |
4559 | 11 | .tag("max rowset expiration time", max_rowset_expiration_time) |
4560 | 11 | .tag("ret", ret); |
4561 | 11 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv Line | Count | Source | 4546 | 11 | DORIS_CLOUD_DEFER { | 4547 | 11 | auto cost = duration<float>(steady_clock::now() - start_time).count(); | 4548 | 11 | LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost) | 4549 | 11 | .tag("instance_id", instance_id_) | 4550 | 11 | .tag("tablet_id", tablet_id) | 4551 | 11 | .tag("recycle rowsets number", recycle_rowsets_number) | 4552 | 11 | .tag("recycle segments number", recycle_segments_number) | 4553 | 11 | .tag("all rowsets recycle data size", recycle_rowsets_data_size) | 4554 | 11 | .tag("all rowsets recycle index size", recycle_rowsets_index_size) | 4555 | 11 | .tag("max rowset version", max_rowset_version) | 4556 | 11 | .tag("min rowset creation time", min_rowset_creation_time) | 4557 | 11 | .tag("max rowset creation time", max_rowset_creation_time) | 4558 | 11 | .tag("min rowset expiration time", min_rowset_expiration_time) | 4559 | 11 | .tag("max rowset expiration time", max_rowset_expiration_time) | 4560 | 11 | .tag("ret", ret); | 4561 | 11 | }; |
|
4562 | | |
4563 | 11 | std::unique_ptr<Transaction> txn; |
4564 | 11 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { |
4565 | 0 | LOG_WARNING("failed to recycle tablet ") |
4566 | 0 | .tag("tablet id", tablet_id) |
4567 | 0 | .tag("instance_id", instance_id_) |
4568 | 0 | .tag("reason", "failed to create txn"); |
4569 | 0 | ret = -1; |
4570 | 0 | } |
4571 | | |
4572 | | // Read the last version of load and compact rowsets, the previous rowsets will be recycled |
4573 | | // by the related operation logs. |
4574 | 11 | std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> load_rowset_metas; |
4575 | 11 | std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> compact_rowset_metas; |
4576 | 11 | MetaReader meta_reader(instance_id_); |
4577 | 11 | TxnErrorCode err = meta_reader.get_load_rowset_metas(txn.get(), tablet_id, &load_rowset_metas); |
4578 | 11 | if (err == TxnErrorCode::TXN_OK) { |
4579 | 11 | err = meta_reader.get_compact_rowset_metas(txn.get(), tablet_id, &compact_rowset_metas); |
4580 | 11 | } |
4581 | 11 | if (err != TxnErrorCode::TXN_OK) { |
4582 | 0 | LOG_WARNING("failed to get rowsets of tablet when recycle tablet") |
4583 | 0 | .tag("tablet id", tablet_id) |
4584 | 0 | .tag("err", err) |
4585 | 0 | .tag("instance id", instance_id_); |
4586 | 0 | ret = -1; |
4587 | 0 | } |
4588 | | |
4589 | 11 | LOG_INFO("recycle versioned tablet get {} load rowsets and {} compact rowsets", |
4590 | 11 | load_rowset_metas.size(), compact_rowset_metas.size()) |
4591 | 11 | .tag("instance_id", instance_id_) |
4592 | 11 | .tag("tablet_id", tablet_id); |
4593 | | |
4594 | 11 | SyncExecutor<int> concurrent_delete_executor( |
4595 | 11 | _thread_pool_group.s3_producer_pool, |
4596 | 11 | fmt::format("delete tablet {} s3 rowset", tablet_id), |
4597 | 11 | [](const int& ret) { return ret != 0; });Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi |
4598 | | |
4599 | 60 | auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) { |
4600 | 60 | recycle_rowsets_number += 1; |
4601 | 60 | recycle_segments_number += rs_meta.num_segments(); |
4602 | 60 | recycle_rowsets_data_size += rs_meta.data_disk_size(); |
4603 | 60 | recycle_rowsets_index_size += rs_meta.index_disk_size(); |
4604 | 60 | max_rowset_version = std::max(max_rowset_version, rs_meta.end_version()); |
4605 | 60 | min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time()); |
4606 | 60 | max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time()); |
4607 | 60 | min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration()); |
4608 | 60 | max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration()); |
4609 | 60 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE Line | Count | Source | 4599 | 60 | auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) { | 4600 | 60 | recycle_rowsets_number += 1; | 4601 | 60 | recycle_segments_number += rs_meta.num_segments(); | 4602 | 60 | recycle_rowsets_data_size += rs_meta.data_disk_size(); | 4603 | 60 | recycle_rowsets_index_size += rs_meta.index_disk_size(); | 4604 | 60 | max_rowset_version = std::max(max_rowset_version, rs_meta.end_version()); | 4605 | 60 | min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time()); | 4606 | 60 | max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time()); | 4607 | 60 | min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration()); | 4608 | 60 | max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration()); | 4609 | 60 | }; |
|
4610 | | |
4611 | 11 | std::vector<RowsetDeleteTask> all_tasks; |
4612 | 60 | for (const auto& [rs_meta, versionstamp] : load_rowset_metas) { |
4613 | 60 | update_rowset_stats(rs_meta); |
4614 | | // Version 0-1 rowset has no resource_id and no actual data files, |
4615 | | // but still needs ref_count key cleanup, so we add it to all_tasks. |
4616 | | // It will be filtered out in Phase 2 when building rowsets_to_delete. |
4617 | 60 | RowsetDeleteTask task; |
4618 | 60 | task.rowset_meta = rs_meta; |
4619 | 60 | task.versioned_rowset_key = |
4620 | 60 | versioned::meta_rowset_load_key({instance_id_, tablet_id, rs_meta.end_version()}); |
4621 | 60 | task.non_versioned_rowset_key = |
4622 | 60 | meta_rowset_key({instance_id_, tablet_id, rs_meta.end_version()}); |
4623 | 60 | task.versionstamp = versionstamp; |
4624 | 60 | all_tasks.push_back(std::move(task)); |
4625 | 60 | } |
4626 | | |
4627 | 11 | for (const auto& [rs_meta, versionstamp] : compact_rowset_metas) { |
4628 | 0 | update_rowset_stats(rs_meta); |
4629 | | // Version 0-1 rowset has no resource_id and no actual data files, |
4630 | | // but still needs ref_count key cleanup, so we add it to all_tasks. |
4631 | | // It will be filtered out in Phase 2 when building rowsets_to_delete. |
4632 | 0 | RowsetDeleteTask task; |
4633 | 0 | task.rowset_meta = rs_meta; |
4634 | 0 | task.versioned_rowset_key = versioned::meta_rowset_compact_key( |
4635 | 0 | {instance_id_, tablet_id, rs_meta.end_version()}); |
4636 | 0 | task.non_versioned_rowset_key = |
4637 | 0 | meta_rowset_key({instance_id_, tablet_id, rs_meta.end_version()}); |
4638 | 0 | task.versionstamp = versionstamp; |
4639 | 0 | all_tasks.push_back(std::move(task)); |
4640 | 0 | } |
4641 | | |
4642 | 11 | auto handle_recycle_rowset_kv = [&](std::string_view k, std::string_view v) { |
4643 | 0 | RecycleRowsetPB recycle_rowset; |
4644 | 0 | if (!recycle_rowset.ParseFromArray(v.data(), v.size())) { |
4645 | 0 | LOG_WARNING("malformed recycle rowset").tag("key", hex(k)); |
4646 | 0 | return -1; |
4647 | 0 | } |
4648 | 0 | if (!recycle_rowset.has_type()) { // compatible with old version `RecycleRowsetPB` |
4649 | 0 | if (!recycle_rowset.has_resource_id()) [[unlikely]] { // impossible |
4650 | | // in old version, keep this key-value pair and it needs to be checked manually |
4651 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); |
4652 | 0 | return -1; |
4653 | 0 | } |
4654 | 0 | if (recycle_rowset.resource_id().empty()) [[unlikely]] { |
4655 | | // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv. |
4656 | 0 | LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key=" |
4657 | 0 | << hex(k) << " value=" << proto_to_json(recycle_rowset); |
4658 | 0 | return -1; |
4659 | 0 | } |
4660 | | // decode rowset_id |
4661 | 0 | auto k1 = k; |
4662 | 0 | k1.remove_prefix(1); |
4663 | 0 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
4664 | 0 | decode_key(&k1, &out); |
4665 | | // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB |
4666 | 0 | const auto& rowset_id = std::get<std::string>(std::get<0>(out[4])); |
4667 | 0 | LOG_INFO("delete old-version rowset data") |
4668 | 0 | .tag("instance_id", instance_id_) |
4669 | 0 | .tag("tablet_id", tablet_id) |
4670 | 0 | .tag("rowset_id", rowset_id); |
4671 | | |
4672 | | // Old version RecycleRowsetPB lacks full rowset_meta info (num_segments, schema, etc.), |
4673 | | // so we must use prefix deletion directly instead of batch delete. |
4674 | 0 | concurrent_delete_executor.add( |
4675 | 0 | [tablet_id, resource_id = recycle_rowset.resource_id(), rowset_id, this]() { |
4676 | | // delete by prefix, the recycle rowset key will be deleted by range later. |
4677 | 0 | return delete_rowset_data(resource_id, tablet_id, rowset_id); |
4678 | 0 | }); Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv |
4679 | 0 | } else { |
4680 | 0 | const auto& rowset_meta = recycle_rowset.rowset_meta(); |
4681 | | // Version 0-1 rowset has no resource_id and no actual data files, |
4682 | | // but still needs ref_count key cleanup, so we add it to all_tasks. |
4683 | | // It will be filtered out in Phase 2 when building rowsets_to_delete. |
4684 | 0 | RowsetDeleteTask task; |
4685 | 0 | task.rowset_meta = rowset_meta; |
4686 | 0 | task.recycle_rowset_key = k; |
4687 | 0 | all_tasks.push_back(std::move(task)); |
4688 | 0 | } |
4689 | 0 | return 0; |
4690 | 0 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ |
4691 | | |
4692 | 11 | if (scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_recycle_rowset_kv))) { |
4693 | 0 | LOG_WARNING("failed to recycle rowset kv of tablet") |
4694 | 0 | .tag("tablet id", tablet_id) |
4695 | 0 | .tag("instance_id", instance_id_) |
4696 | 0 | .tag("reason", "failed to scan and recycle RecycleRowsetPB"); |
4697 | 0 | ret = -1; |
4698 | 0 | } |
4699 | | |
4700 | | // Phase 1: Classify tasks by ref_count |
4701 | 11 | std::vector<RowsetDeleteTask> batch_delete_tasks; |
4702 | 60 | for (auto& task : all_tasks) { |
4703 | 60 | int classify_ret = classify_rowset_task_by_ref_count(task, batch_delete_tasks); |
4704 | 60 | if (classify_ret < 0) { |
4705 | 0 | LOG_WARNING("failed to classify rowset task, fallback to old logic") |
4706 | 0 | .tag("instance_id", instance_id_) |
4707 | 0 | .tag("tablet_id", tablet_id) |
4708 | 0 | .tag("rowset_id", task.rowset_meta.rowset_id_v2()); |
4709 | 0 | concurrent_delete_executor.add([this, t = std::move(task)]() mutable { |
4710 | 0 | return recycle_rowset_meta_and_data(t); |
4711 | 0 | }); Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEEN3$_4clEv Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEEN3$_4clEv |
4712 | 0 | } |
4713 | 60 | } |
4714 | | |
4715 | 11 | g_bvar_recycler_batch_delete_rowset_plan_count.put(instance_id_, batch_delete_tasks.size()); |
4716 | | |
4717 | 11 | LOG_INFO("batch delete plan created") |
4718 | 11 | .tag("instance_id", instance_id_) |
4719 | 11 | .tag("tablet_id", tablet_id) |
4720 | 11 | .tag("plan_count", batch_delete_tasks.size()); |
4721 | | |
4722 | | // Phase 2: Execute batch delete using existing delete_rowset_data |
4723 | 11 | if (!batch_delete_tasks.empty()) { |
4724 | 10 | std::map<std::string, RowsetMetaCloudPB> rowsets_to_delete; |
4725 | 49 | for (const auto& task : batch_delete_tasks) { |
4726 | | // Version 0-1 rowset has no resource_id and no actual data files, skip it |
4727 | 49 | if (task.rowset_meta.resource_id().empty()) { |
4728 | 10 | LOG_INFO("skip rowset with empty resource_id in batch delete") |
4729 | 10 | .tag("instance_id", instance_id_) |
4730 | 10 | .tag("tablet_id", tablet_id) |
4731 | 10 | .tag("rowset_id", task.rowset_meta.rowset_id_v2()); |
4732 | 10 | continue; |
4733 | 10 | } |
4734 | 39 | rowsets_to_delete[task.rowset_meta.rowset_id_v2()] = task.rowset_meta; |
4735 | 39 | } |
4736 | | |
4737 | | // Only call delete_rowset_data if there are rowsets with actual data to delete |
4738 | 10 | bool delete_success = true; |
4739 | 10 | if (!rowsets_to_delete.empty()) { |
4740 | 9 | RecyclerMetricsContext batch_metrics_context(instance_id_, |
4741 | 9 | "batch_delete_versioned_tablet"); |
4742 | 9 | int delete_ret = delete_rowset_data( |
4743 | 9 | rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET, batch_metrics_context); |
4744 | 9 | if (delete_ret != 0) { |
4745 | 0 | LOG_WARNING("batch delete execution failed") |
4746 | 0 | .tag("instance_id", instance_id_) |
4747 | 0 | .tag("tablet_id", tablet_id); |
4748 | 0 | g_bvar_recycler_batch_delete_failures.put(instance_id_, 1); |
4749 | 0 | ret = -1; |
4750 | 0 | delete_success = false; |
4751 | 0 | } |
4752 | 9 | } |
4753 | | |
4754 | | // Phase 3: Only cleanup metadata if data deletion succeeded. |
4755 | | // If deletion failed, keep recycle_rowset_key so next round will retry. |
4756 | 10 | if (delete_success) { |
4757 | 10 | int cleanup_ret = cleanup_rowset_metadata(batch_delete_tasks); |
4758 | 10 | if (cleanup_ret != 0) { |
4759 | 0 | LOG_WARNING("batch delete cleanup failed") |
4760 | 0 | .tag("instance_id", instance_id_) |
4761 | 0 | .tag("tablet_id", tablet_id); |
4762 | 0 | ret = -1; |
4763 | 0 | } |
4764 | 10 | } |
4765 | 10 | } |
4766 | | |
4767 | | // Always wait for fallback tasks to complete before returning |
4768 | 11 | bool finished = true; |
4769 | 11 | std::vector<int> rets = concurrent_delete_executor.when_all(&finished); |
4770 | 11 | for (int r : rets) { |
4771 | 0 | if (r != 0) { |
4772 | 0 | ret = -1; |
4773 | 0 | } |
4774 | 0 | } |
4775 | | |
4776 | 11 | ret = finished ? ret : -1; |
4777 | | |
4778 | 11 | if (ret != 0) { // failed recycle tablet data |
4779 | 0 | LOG_WARNING("recycle versioned tablet failed") |
4780 | 0 | .tag("finished", finished) |
4781 | 0 | .tag("ret", ret) |
4782 | 0 | .tag("instance_id", instance_id_) |
4783 | 0 | .tag("tablet_id", tablet_id); |
4784 | 0 | return ret; |
4785 | 0 | } |
4786 | | |
4787 | 11 | tablet_metrics_context_.total_recycled_data_size += |
4788 | 11 | recycle_rowsets_data_size + recycle_rowsets_index_size; |
4789 | 11 | tablet_metrics_context_.total_recycled_num += 1; |
4790 | 11 | segment_metrics_context_.total_recycled_num += recycle_segments_number; |
4791 | 11 | segment_metrics_context_.total_recycled_data_size += |
4792 | 11 | recycle_rowsets_data_size + recycle_rowsets_index_size; |
4793 | 11 | metrics_context.total_recycled_data_size += |
4794 | 11 | recycle_rowsets_data_size + recycle_rowsets_index_size; |
4795 | 11 | tablet_metrics_context_.report(); |
4796 | 11 | segment_metrics_context_.report(); |
4797 | 11 | metrics_context.report(); |
4798 | | |
4799 | 11 | txn.reset(); |
4800 | 11 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { |
4801 | 0 | LOG_WARNING("failed to recycle tablet ") |
4802 | 0 | .tag("tablet id", tablet_id) |
4803 | 0 | .tag("instance_id", instance_id_) |
4804 | 0 | .tag("reason", "failed to create txn"); |
4805 | 0 | ret = -1; |
4806 | 0 | } |
4807 | | // delete all rowset kv in this tablet |
4808 | 11 | txn->remove(rs_key0, rs_key1); |
4809 | 11 | txn->remove(recyc_rs_key0, recyc_rs_key1); |
4810 | | |
4811 | | // remove delete bitmap for MoW table |
4812 | 11 | std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id}); |
4813 | 11 | txn->remove(pending_key); |
4814 | 11 | std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0}); |
4815 | 11 | std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0}); |
4816 | 11 | txn->remove(delete_bitmap_start, delete_bitmap_end); |
4817 | | |
4818 | 11 | std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""}); |
4819 | 11 | std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""}); |
4820 | 11 | txn->remove(dbm_start_key, dbm_end_key); |
4821 | 11 | LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key) |
4822 | 11 | << " end=" << hex(dbm_end_key); |
4823 | | |
4824 | 11 | std::string versioned_idx_key = versioned::tablet_index_key({instance_id_, tablet_id}); |
4825 | 11 | std::string tablet_index_val; |
4826 | 11 | err = txn->get(versioned_idx_key, &tablet_index_val); |
4827 | 11 | if (err != TxnErrorCode::TXN_KEY_NOT_FOUND && err != TxnErrorCode::TXN_OK) { |
4828 | 0 | LOG_WARNING("failed to get tablet index kv") |
4829 | 0 | .tag("instance_id", instance_id_) |
4830 | 0 | .tag("tablet_id", tablet_id) |
4831 | 0 | .tag("err", err); |
4832 | 0 | ret = -1; |
4833 | 11 | } else if (err == TxnErrorCode::TXN_OK) { |
4834 | | // If the tablet index kv exists, we need to delete it |
4835 | 10 | TabletIndexPB tablet_index_pb; |
4836 | 10 | if (!tablet_index_pb.ParseFromString(tablet_index_val)) { |
4837 | 0 | LOG_WARNING("failed to parse tablet index pb") |
4838 | 0 | .tag("instance_id", instance_id_) |
4839 | 0 | .tag("tablet_id", tablet_id); |
4840 | 0 | ret = -1; |
4841 | 10 | } else { |
4842 | 10 | std::string versioned_inverted_idx_key = versioned::tablet_inverted_index_key( |
4843 | 10 | {instance_id_, tablet_index_pb.db_id(), tablet_index_pb.table_id(), |
4844 | 10 | tablet_index_pb.index_id(), tablet_index_pb.partition_id(), tablet_id}); |
4845 | 10 | txn->remove(versioned_inverted_idx_key); |
4846 | 10 | txn->remove(versioned_idx_key); |
4847 | 10 | } |
4848 | 10 | } |
4849 | | |
4850 | 11 | err = txn->commit(); |
4851 | 11 | if (err != TxnErrorCode::TXN_OK) { |
4852 | 0 | LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err; |
4853 | 0 | ret = -1; |
4854 | 0 | } |
4855 | | |
4856 | 11 | if (ret == 0) { |
4857 | | // All object files under tablet have been deleted |
4858 | 11 | std::lock_guard lock(recycled_tablets_mtx_); |
4859 | 11 | recycled_tablets_.insert(tablet_id); |
4860 | 11 | } |
4861 | | |
4862 | 11 | return ret; |
4863 | 11 | } |
4864 | | |
4865 | 27 | int InstanceRecycler::recycle_rowsets() { |
4866 | 27 | if (should_recycle_versioned_keys()) { |
4867 | 5 | return recycle_versioned_rowsets(); |
4868 | 5 | } |
4869 | | |
4870 | 22 | const std::string task_name = "recycle_rowsets"; |
4871 | 22 | int64_t num_scanned = 0; |
4872 | 22 | int64_t num_expired = 0; |
4873 | 22 | int64_t num_prepare = 0; |
4874 | 22 | int64_t num_compacted = 0; |
4875 | 22 | int64_t num_empty_rowset = 0; |
4876 | 22 | size_t total_rowset_key_size = 0; |
4877 | 22 | size_t total_rowset_value_size = 0; |
4878 | 22 | size_t expired_rowset_size = 0; |
4879 | 22 | std::atomic_long num_recycled = 0; |
4880 | 22 | RecyclerMetricsContext metrics_context(instance_id_, task_name); |
4881 | | |
4882 | 22 | RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""}; |
4883 | 22 | RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""}; |
4884 | 22 | std::string recyc_rs_key0; |
4885 | 22 | std::string recyc_rs_key1; |
4886 | 22 | recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0); |
4887 | 22 | recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1); |
4888 | | |
4889 | 22 | LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_); |
4890 | | |
4891 | 22 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
4892 | 22 | register_recycle_task(task_name, start_time); |
4893 | | |
4894 | 22 | DORIS_CLOUD_DEFER { |
4895 | 22 | unregister_recycle_task(task_name); |
4896 | 22 | int64_t cost = |
4897 | 22 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
4898 | 22 | metrics_context.finish_report(); |
4899 | 22 | LOG_WARNING("recycle rowsets finished, cost={}s", cost) |
4900 | 22 | .tag("instance_id", instance_id_) |
4901 | 22 | .tag("num_scanned", num_scanned) |
4902 | 22 | .tag("num_expired", num_expired) |
4903 | 22 | .tag("num_recycled", num_recycled) |
4904 | 22 | .tag("num_recycled.prepare", num_prepare) |
4905 | 22 | .tag("num_recycled.compacted", num_compacted) |
4906 | 22 | .tag("num_recycled.empty_rowset", num_empty_rowset) |
4907 | 22 | .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size) |
4908 | 22 | .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size) |
4909 | 22 | .tag("expired_rowset_meta_size", expired_rowset_size); |
4910 | 22 | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv Line | Count | Source | 4894 | 7 | DORIS_CLOUD_DEFER { | 4895 | 7 | unregister_recycle_task(task_name); | 4896 | 7 | int64_t cost = | 4897 | 7 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 4898 | 7 | metrics_context.finish_report(); | 4899 | 7 | LOG_WARNING("recycle rowsets finished, cost={}s", cost) | 4900 | 7 | .tag("instance_id", instance_id_) | 4901 | 7 | .tag("num_scanned", num_scanned) | 4902 | 7 | .tag("num_expired", num_expired) | 4903 | 7 | .tag("num_recycled", num_recycled) | 4904 | 7 | .tag("num_recycled.prepare", num_prepare) | 4905 | 7 | .tag("num_recycled.compacted", num_compacted) | 4906 | 7 | .tag("num_recycled.empty_rowset", num_empty_rowset) | 4907 | 7 | .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size) | 4908 | 7 | .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size) | 4909 | 7 | .tag("expired_rowset_meta_size", expired_rowset_size); | 4910 | 7 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv Line | Count | Source | 4894 | 15 | DORIS_CLOUD_DEFER { | 4895 | 15 | unregister_recycle_task(task_name); | 4896 | 15 | int64_t cost = | 4897 | 15 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 4898 | 15 | metrics_context.finish_report(); | 4899 | 15 | LOG_WARNING("recycle rowsets finished, cost={}s", cost) | 4900 | 15 | .tag("instance_id", instance_id_) | 4901 | 15 | .tag("num_scanned", num_scanned) | 4902 | 15 | .tag("num_expired", num_expired) | 4903 | 15 | .tag("num_recycled", num_recycled) | 4904 | 15 | .tag("num_recycled.prepare", num_prepare) | 4905 | 15 | .tag("num_recycled.compacted", num_compacted) | 4906 | 15 | .tag("num_recycled.empty_rowset", num_empty_rowset) | 4907 | 15 | .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size) | 4908 | 15 | .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size) | 4909 | 15 | .tag("expired_rowset_meta_size", expired_rowset_size); | 4910 | 15 | }; |
|
4911 | | |
4912 | 22 | std::vector<std::string> rowset_keys; |
4913 | 22 | std::vector<std::string> rowset_keys_to_mark_recycled; |
4914 | 22 | std::vector<std::string> rowset_keys_to_abort; |
4915 | 22 | std::vector<std::string> prepare_rowset_keys_to_delete; |
4916 | | // rowset_id -> rowset_meta |
4917 | | // store rowset id and meta for statistics rs size when delete |
4918 | 22 | std::map<std::string, doris::RowsetMetaCloudPB> rowsets; |
4919 | | |
4920 | | // Store keys of rowset recycled by background workers |
4921 | 22 | std::mutex async_recycled_rowset_keys_mutex; |
4922 | 22 | std::vector<std::string> async_recycled_rowset_keys; |
4923 | 22 | auto worker_pool = std::make_unique<SimpleThreadPool>( |
4924 | 22 | config::instance_recycler_worker_pool_size, "recycle_rowsets"); |
4925 | 22 | worker_pool->start(); |
4926 | | // TODO bacth delete |
4927 | 4.00k | auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) { |
4928 | 4.00k | std::string dbm_start_key = |
4929 | 4.00k | versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id}); |
4930 | 4.00k | std::string dbm_end_key = dbm_start_key; |
4931 | 4.00k | encode_int64(INT64_MAX, &dbm_end_key); |
4932 | 4.00k | auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key); |
4933 | 4.00k | if (ret != 0) { |
4934 | 0 | LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id=" |
4935 | 0 | << instance_id_; |
4936 | 0 | } |
4937 | 4.00k | return ret; |
4938 | 4.00k | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 4927 | 2 | auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) { | 4928 | 2 | std::string dbm_start_key = | 4929 | 2 | versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id}); | 4930 | 2 | std::string dbm_end_key = dbm_start_key; | 4931 | 2 | encode_int64(INT64_MAX, &dbm_end_key); | 4932 | 2 | auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key); | 4933 | 2 | if (ret != 0) { | 4934 | 0 | LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id=" | 4935 | 0 | << instance_id_; | 4936 | 0 | } | 4937 | 2 | return ret; | 4938 | 2 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 4927 | 4.00k | auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) { | 4928 | 4.00k | std::string dbm_start_key = | 4929 | 4.00k | versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id}); | 4930 | 4.00k | std::string dbm_end_key = dbm_start_key; | 4931 | 4.00k | encode_int64(INT64_MAX, &dbm_end_key); | 4932 | 4.00k | auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key); | 4933 | 4.00k | if (ret != 0) { | 4934 | 0 | LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id=" | 4935 | 0 | << instance_id_; | 4936 | 0 | } | 4937 | 4.00k | return ret; | 4938 | 4.00k | }; |
|
4939 | 22 | auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id, |
4940 | 250 | int64_t tablet_id, const std::string& rowset_id) { |
4941 | | // Try to delete rowset data in background thread |
4942 | 250 | int ret = worker_pool->submit_with_timeout( |
4943 | 250 | [&, resource_id, tablet_id, rowset_id, key]() mutable { |
4944 | 245 | if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) { |
4945 | 0 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(key); |
4946 | 0 | return; |
4947 | 0 | } |
4948 | 245 | std::vector<std::string> keys; |
4949 | 245 | { |
4950 | 245 | std::lock_guard lock(async_recycled_rowset_keys_mutex); |
4951 | 245 | async_recycled_rowset_keys.push_back(std::move(key)); |
4952 | 245 | if (async_recycled_rowset_keys.size() > 100) { |
4953 | 2 | keys.swap(async_recycled_rowset_keys); |
4954 | 2 | } |
4955 | 245 | } |
4956 | 245 | delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id); |
4957 | 245 | if (keys.empty()) return; |
4958 | 2 | if (txn_remove(txn_kv_.get(), keys) != 0) { |
4959 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" |
4960 | 0 | << instance_id_; |
4961 | 2 | } else { |
4962 | 2 | num_recycled.fetch_add(keys.size(), std::memory_order_relaxed); |
4963 | 2 | check_recycle_task(instance_id_, "recycle_rowsets", num_scanned, |
4964 | 2 | num_recycled, start_time); |
4965 | 2 | } |
4966 | 2 | }, Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv Line | Count | Source | 4943 | 245 | [&, resource_id, tablet_id, rowset_id, key]() mutable { | 4944 | 245 | if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) { | 4945 | 0 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(key); | 4946 | 0 | return; | 4947 | 0 | } | 4948 | 245 | std::vector<std::string> keys; | 4949 | 245 | { | 4950 | 245 | std::lock_guard lock(async_recycled_rowset_keys_mutex); | 4951 | 245 | async_recycled_rowset_keys.push_back(std::move(key)); | 4952 | 245 | if (async_recycled_rowset_keys.size() > 100) { | 4953 | 2 | keys.swap(async_recycled_rowset_keys); | 4954 | 2 | } | 4955 | 245 | } | 4956 | 245 | delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id); | 4957 | 245 | if (keys.empty()) return; | 4958 | 2 | if (txn_remove(txn_kv_.get(), keys) != 0) { | 4959 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" | 4960 | 0 | << instance_id_; | 4961 | 2 | } else { | 4962 | 2 | num_recycled.fetch_add(keys.size(), std::memory_order_relaxed); | 4963 | 2 | check_recycle_task(instance_id_, "recycle_rowsets", num_scanned, | 4964 | 2 | num_recycled, start_time); | 4965 | 2 | } | 4966 | 2 | }, |
|
4967 | 250 | 0); |
4968 | 250 | if (ret == 0) return 0; |
4969 | | // Submit task failed, delete rowset data in current thread |
4970 | 5 | if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) { |
4971 | 0 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(key); |
4972 | 0 | return -1; |
4973 | 0 | } |
4974 | 5 | if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) { |
4975 | 0 | return -1; |
4976 | 0 | } |
4977 | 5 | rowset_keys.push_back(std::move(key)); |
4978 | 5 | return 0; |
4979 | 5 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ Line | Count | Source | 4940 | 250 | int64_t tablet_id, const std::string& rowset_id) { | 4941 | | // Try to delete rowset data in background thread | 4942 | 250 | int ret = worker_pool->submit_with_timeout( | 4943 | 250 | [&, resource_id, tablet_id, rowset_id, key]() mutable { | 4944 | 250 | if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) { | 4945 | 250 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(key); | 4946 | 250 | return; | 4947 | 250 | } | 4948 | 250 | std::vector<std::string> keys; | 4949 | 250 | { | 4950 | 250 | std::lock_guard lock(async_recycled_rowset_keys_mutex); | 4951 | 250 | async_recycled_rowset_keys.push_back(std::move(key)); | 4952 | 250 | if (async_recycled_rowset_keys.size() > 100) { | 4953 | 250 | keys.swap(async_recycled_rowset_keys); | 4954 | 250 | } | 4955 | 250 | } | 4956 | 250 | delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id); | 4957 | 250 | if (keys.empty()) return; | 4958 | 250 | if (txn_remove(txn_kv_.get(), keys) != 0) { | 4959 | 250 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" | 4960 | 250 | << instance_id_; | 4961 | 250 | } else { | 4962 | 250 | num_recycled.fetch_add(keys.size(), std::memory_order_relaxed); | 4963 | 250 | check_recycle_task(instance_id_, "recycle_rowsets", num_scanned, | 4964 | 250 | num_recycled, start_time); | 4965 | 250 | } | 4966 | 250 | }, | 4967 | 250 | 0); | 4968 | 250 | if (ret == 0) return 0; | 4969 | | // Submit task failed, delete rowset data in current thread | 4970 | 5 | if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) { | 4971 | 0 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(key); | 4972 | 0 | return -1; | 4973 | 0 | } | 4974 | 5 | if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) { | 4975 | 0 | return -1; | 4976 | 0 | } | 4977 | 5 | rowset_keys.push_back(std::move(key)); | 4978 | 5 | return 0; | 4979 | 5 | }; |
|
4980 | | |
4981 | 22 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
4982 | | |
4983 | 7.75k | auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int { |
4984 | 7.75k | ++num_scanned; |
4985 | 7.75k | total_rowset_key_size += k.size(); |
4986 | 7.75k | total_rowset_value_size += v.size(); |
4987 | 7.75k | RecycleRowsetPB rowset; |
4988 | 7.75k | if (!rowset.ParseFromArray(v.data(), v.size())) { |
4989 | 0 | LOG_WARNING("malformed recycle rowset").tag("key", hex(k)); |
4990 | 0 | return -1; |
4991 | 0 | } |
4992 | | |
4993 | 7.75k | int64_t current_time = ::time(nullptr); |
4994 | 7.75k | int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts); |
4995 | | |
4996 | 7.75k | VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned |
4997 | 0 | << " num_expired=" << num_expired << " expiration=" << expiration |
4998 | 0 | << " RecycleRowsetPB=" << rowset.ShortDebugString(); |
4999 | 7.75k | if (current_time < expiration) { // not expired |
5000 | 0 | return 0; |
5001 | 0 | } |
5002 | 7.75k | ++num_expired; |
5003 | 7.75k | expired_rowset_size += v.size(); |
5004 | | |
5005 | 7.75k | if (!rowset.has_type()) { // old version `RecycleRowsetPB` |
5006 | 250 | if (!rowset.has_resource_id()) [[unlikely]] { // impossible |
5007 | | // in old version, keep this key-value pair and it needs to be checked manually |
5008 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); |
5009 | 0 | return -1; |
5010 | 0 | } |
5011 | 250 | if (rowset.resource_id().empty()) [[unlikely]] { |
5012 | | // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv. |
5013 | 0 | LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key=" |
5014 | 0 | << hex(k) << " value=" << proto_to_json(rowset); |
5015 | 0 | rowset_keys.emplace_back(k); |
5016 | 0 | return -1; |
5017 | 0 | } |
5018 | | // decode rowset_id |
5019 | 250 | auto k1 = k; |
5020 | 250 | k1.remove_prefix(1); |
5021 | 250 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
5022 | 250 | decode_key(&k1, &out); |
5023 | | // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB |
5024 | 250 | const auto& rowset_id = std::get<std::string>(std::get<0>(out[4])); |
5025 | 250 | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ |
5026 | 250 | << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id |
5027 | 250 | << " task_type=" << metrics_context.operation_type; |
5028 | 250 | if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(), |
5029 | 250 | rowset.tablet_id(), rowset_id) != 0) { |
5030 | 0 | return -1; |
5031 | 0 | } |
5032 | 250 | metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size(); |
5033 | 250 | metrics_context.total_recycled_num++; |
5034 | 250 | segment_metrics_context_.total_recycled_data_size += |
5035 | 250 | rowset.rowset_meta().total_disk_size(); |
5036 | 250 | segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments(); |
5037 | 250 | return 0; |
5038 | 250 | } |
5039 | | |
5040 | 7.50k | auto* rowset_meta = rowset.mutable_rowset_meta(); |
5041 | 7.50k | if (config::enable_mark_delete_rowset_before_recycle) { |
5042 | 7.50k | if (need_mark_rowset_as_recycled(rowset)) { |
5043 | 3.75k | rowset_keys_to_mark_recycled.emplace_back(k); |
5044 | 3.75k | LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv " |
5045 | 3.75k | "at next turn, instance_id=" |
5046 | 3.75k | << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() |
5047 | 3.75k | << " version=[" << rowset_meta->start_version() << '-' |
5048 | 3.75k | << rowset_meta->end_version() << "]"; |
5049 | 3.75k | return 0; |
5050 | 3.75k | } |
5051 | 7.50k | } |
5052 | | |
5053 | 3.75k | if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle && |
5054 | 3.75k | rowset_meta->end_version() != 1) { |
5055 | 3.75k | if (make_deferred_abort_task(rowset).has_value()) { |
5056 | 2 | LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, " |
5057 | 2 | "instance_id=" |
5058 | 2 | << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() |
5059 | 2 | << " version=[" << rowset_meta->start_version() << '-' |
5060 | 2 | << rowset_meta->end_version() << "]"; |
5061 | 2 | rowset_keys_to_abort.emplace_back(k); |
5062 | 2 | } |
5063 | 3.75k | } |
5064 | | |
5065 | | // TODO(plat1ko): check rowset not referenced |
5066 | 3.75k | if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible |
5067 | 0 | if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) { |
5068 | 0 | LOG_INFO("recycle rowset that has empty resource id"); |
5069 | 0 | } else { |
5070 | | // other situations, keep this key-value pair and it needs to be checked manually |
5071 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); |
5072 | 0 | return -1; |
5073 | 0 | } |
5074 | 0 | } |
5075 | 3.75k | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ |
5076 | 3.75k | << " tablet_id=" << rowset_meta->tablet_id() |
5077 | 3.75k | << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=[" |
5078 | 3.75k | << rowset_meta->start_version() << '-' << rowset_meta->end_version() |
5079 | 3.75k | << "] txn_id=" << rowset_meta->txn_id() |
5080 | 3.75k | << " type=" << RecycleRowsetPB_Type_Name(rowset.type()) |
5081 | 3.75k | << " rowset_meta_size=" << v.size() |
5082 | 3.75k | << " creation_time=" << rowset_meta->creation_time() |
5083 | 3.75k | << " task_type=" << metrics_context.operation_type; |
5084 | 3.75k | if (rowset.type() == RecycleRowsetPB::PREPARE) { |
5085 | | // unable to calculate file path, can only be deleted by rowset id prefix |
5086 | 652 | num_prepare += 1; |
5087 | 652 | prepare_rowset_keys_to_delete.emplace_back(k); |
5088 | 3.10k | } else { |
5089 | 3.10k | num_compacted += rowset.type() == RecycleRowsetPB::COMPACT; |
5090 | 3.10k | rowset_keys.emplace_back(k); |
5091 | 3.10k | rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta)); |
5092 | 3.10k | if (rowset_meta->num_segments() <= 0) { // Skip empty rowset |
5093 | 3.10k | ++num_empty_rowset; |
5094 | 3.10k | } |
5095 | 3.10k | } |
5096 | 3.75k | return 0; |
5097 | 3.75k | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 4983 | 7 | auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int { | 4984 | 7 | ++num_scanned; | 4985 | 7 | total_rowset_key_size += k.size(); | 4986 | 7 | total_rowset_value_size += v.size(); | 4987 | 7 | RecycleRowsetPB rowset; | 4988 | 7 | if (!rowset.ParseFromArray(v.data(), v.size())) { | 4989 | 0 | LOG_WARNING("malformed recycle rowset").tag("key", hex(k)); | 4990 | 0 | return -1; | 4991 | 0 | } | 4992 | | | 4993 | 7 | int64_t current_time = ::time(nullptr); | 4994 | 7 | int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts); | 4995 | | | 4996 | 7 | VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned | 4997 | 0 | << " num_expired=" << num_expired << " expiration=" << expiration | 4998 | 0 | << " RecycleRowsetPB=" << rowset.ShortDebugString(); | 4999 | 7 | if (current_time < expiration) { // not expired | 5000 | 0 | return 0; | 5001 | 0 | } | 5002 | 7 | ++num_expired; | 5003 | 7 | expired_rowset_size += v.size(); | 5004 | | | 5005 | 7 | if (!rowset.has_type()) { // old version `RecycleRowsetPB` | 5006 | 0 | if (!rowset.has_resource_id()) [[unlikely]] { // impossible | 5007 | | // in old version, keep this key-value pair and it needs to be checked manually | 5008 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); | 5009 | 0 | return -1; | 5010 | 0 | } | 5011 | 0 | if (rowset.resource_id().empty()) [[unlikely]] { | 5012 | | // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv. | 5013 | 0 | LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key=" | 5014 | 0 | << hex(k) << " value=" << proto_to_json(rowset); | 5015 | 0 | rowset_keys.emplace_back(k); | 5016 | 0 | return -1; | 5017 | 0 | } | 5018 | | // decode rowset_id | 5019 | 0 | auto k1 = k; | 5020 | 0 | k1.remove_prefix(1); | 5021 | 0 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 5022 | 0 | decode_key(&k1, &out); | 5023 | | // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB | 5024 | 0 | const auto& rowset_id = std::get<std::string>(std::get<0>(out[4])); | 5025 | 0 | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ | 5026 | 0 | << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id | 5027 | 0 | << " task_type=" << metrics_context.operation_type; | 5028 | 0 | if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(), | 5029 | 0 | rowset.tablet_id(), rowset_id) != 0) { | 5030 | 0 | return -1; | 5031 | 0 | } | 5032 | 0 | metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size(); | 5033 | 0 | metrics_context.total_recycled_num++; | 5034 | 0 | segment_metrics_context_.total_recycled_data_size += | 5035 | 0 | rowset.rowset_meta().total_disk_size(); | 5036 | 0 | segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments(); | 5037 | 0 | return 0; | 5038 | 0 | } | 5039 | | | 5040 | 7 | auto* rowset_meta = rowset.mutable_rowset_meta(); | 5041 | 7 | if (config::enable_mark_delete_rowset_before_recycle) { | 5042 | 7 | if (need_mark_rowset_as_recycled(rowset)) { | 5043 | 5 | rowset_keys_to_mark_recycled.emplace_back(k); | 5044 | 5 | LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv " | 5045 | 5 | "at next turn, instance_id=" | 5046 | 5 | << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() | 5047 | 5 | << " version=[" << rowset_meta->start_version() << '-' | 5048 | 5 | << rowset_meta->end_version() << "]"; | 5049 | 5 | return 0; | 5050 | 5 | } | 5051 | 7 | } | 5052 | | | 5053 | 2 | if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle && | 5054 | 2 | rowset_meta->end_version() != 1) { | 5055 | 2 | if (make_deferred_abort_task(rowset).has_value()) { | 5056 | 2 | LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, " | 5057 | 2 | "instance_id=" | 5058 | 2 | << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() | 5059 | 2 | << " version=[" << rowset_meta->start_version() << '-' | 5060 | 2 | << rowset_meta->end_version() << "]"; | 5061 | 2 | rowset_keys_to_abort.emplace_back(k); | 5062 | 2 | } | 5063 | 2 | } | 5064 | | | 5065 | | // TODO(plat1ko): check rowset not referenced | 5066 | 2 | if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible | 5067 | 0 | if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) { | 5068 | 0 | LOG_INFO("recycle rowset that has empty resource id"); | 5069 | 0 | } else { | 5070 | | // other situations, keep this key-value pair and it needs to be checked manually | 5071 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); | 5072 | 0 | return -1; | 5073 | 0 | } | 5074 | 0 | } | 5075 | 2 | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ | 5076 | 2 | << " tablet_id=" << rowset_meta->tablet_id() | 5077 | 2 | << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=[" | 5078 | 2 | << rowset_meta->start_version() << '-' << rowset_meta->end_version() | 5079 | 2 | << "] txn_id=" << rowset_meta->txn_id() | 5080 | 2 | << " type=" << RecycleRowsetPB_Type_Name(rowset.type()) | 5081 | 2 | << " rowset_meta_size=" << v.size() | 5082 | 2 | << " creation_time=" << rowset_meta->creation_time() | 5083 | 2 | << " task_type=" << metrics_context.operation_type; | 5084 | 2 | if (rowset.type() == RecycleRowsetPB::PREPARE) { | 5085 | | // unable to calculate file path, can only be deleted by rowset id prefix | 5086 | 2 | num_prepare += 1; | 5087 | 2 | prepare_rowset_keys_to_delete.emplace_back(k); | 5088 | 2 | } else { | 5089 | 0 | num_compacted += rowset.type() == RecycleRowsetPB::COMPACT; | 5090 | 0 | rowset_keys.emplace_back(k); | 5091 | 0 | rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta)); | 5092 | 0 | if (rowset_meta->num_segments() <= 0) { // Skip empty rowset | 5093 | 0 | ++num_empty_rowset; | 5094 | 0 | } | 5095 | 0 | } | 5096 | 2 | return 0; | 5097 | 2 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 4983 | 7.75k | auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int { | 4984 | 7.75k | ++num_scanned; | 4985 | 7.75k | total_rowset_key_size += k.size(); | 4986 | 7.75k | total_rowset_value_size += v.size(); | 4987 | 7.75k | RecycleRowsetPB rowset; | 4988 | 7.75k | if (!rowset.ParseFromArray(v.data(), v.size())) { | 4989 | 0 | LOG_WARNING("malformed recycle rowset").tag("key", hex(k)); | 4990 | 0 | return -1; | 4991 | 0 | } | 4992 | | | 4993 | 7.75k | int64_t current_time = ::time(nullptr); | 4994 | 7.75k | int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts); | 4995 | | | 4996 | 7.75k | VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned | 4997 | 0 | << " num_expired=" << num_expired << " expiration=" << expiration | 4998 | 0 | << " RecycleRowsetPB=" << rowset.ShortDebugString(); | 4999 | 7.75k | if (current_time < expiration) { // not expired | 5000 | 0 | return 0; | 5001 | 0 | } | 5002 | 7.75k | ++num_expired; | 5003 | 7.75k | expired_rowset_size += v.size(); | 5004 | | | 5005 | 7.75k | if (!rowset.has_type()) { // old version `RecycleRowsetPB` | 5006 | 250 | if (!rowset.has_resource_id()) [[unlikely]] { // impossible | 5007 | | // in old version, keep this key-value pair and it needs to be checked manually | 5008 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); | 5009 | 0 | return -1; | 5010 | 0 | } | 5011 | 250 | if (rowset.resource_id().empty()) [[unlikely]] { | 5012 | | // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv. | 5013 | 0 | LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key=" | 5014 | 0 | << hex(k) << " value=" << proto_to_json(rowset); | 5015 | 0 | rowset_keys.emplace_back(k); | 5016 | 0 | return -1; | 5017 | 0 | } | 5018 | | // decode rowset_id | 5019 | 250 | auto k1 = k; | 5020 | 250 | k1.remove_prefix(1); | 5021 | 250 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 5022 | 250 | decode_key(&k1, &out); | 5023 | | // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB | 5024 | 250 | const auto& rowset_id = std::get<std::string>(std::get<0>(out[4])); | 5025 | 250 | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ | 5026 | 250 | << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id | 5027 | 250 | << " task_type=" << metrics_context.operation_type; | 5028 | 250 | if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(), | 5029 | 250 | rowset.tablet_id(), rowset_id) != 0) { | 5030 | 0 | return -1; | 5031 | 0 | } | 5032 | 250 | metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size(); | 5033 | 250 | metrics_context.total_recycled_num++; | 5034 | 250 | segment_metrics_context_.total_recycled_data_size += | 5035 | 250 | rowset.rowset_meta().total_disk_size(); | 5036 | 250 | segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments(); | 5037 | 250 | return 0; | 5038 | 250 | } | 5039 | | | 5040 | 7.50k | auto* rowset_meta = rowset.mutable_rowset_meta(); | 5041 | 7.50k | if (config::enable_mark_delete_rowset_before_recycle) { | 5042 | 7.50k | if (need_mark_rowset_as_recycled(rowset)) { | 5043 | 3.75k | rowset_keys_to_mark_recycled.emplace_back(k); | 5044 | 3.75k | LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv " | 5045 | 3.75k | "at next turn, instance_id=" | 5046 | 3.75k | << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() | 5047 | 3.75k | << " version=[" << rowset_meta->start_version() << '-' | 5048 | 3.75k | << rowset_meta->end_version() << "]"; | 5049 | 3.75k | return 0; | 5050 | 3.75k | } | 5051 | 7.50k | } | 5052 | | | 5053 | 3.75k | if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle && | 5054 | 3.75k | rowset_meta->end_version() != 1) { | 5055 | 3.75k | if (make_deferred_abort_task(rowset).has_value()) { | 5056 | 0 | LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, " | 5057 | 0 | "instance_id=" | 5058 | 0 | << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() | 5059 | 0 | << " version=[" << rowset_meta->start_version() << '-' | 5060 | 0 | << rowset_meta->end_version() << "]"; | 5061 | 0 | rowset_keys_to_abort.emplace_back(k); | 5062 | 0 | } | 5063 | 3.75k | } | 5064 | | | 5065 | | // TODO(plat1ko): check rowset not referenced | 5066 | 3.75k | if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible | 5067 | 0 | if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) { | 5068 | 0 | LOG_INFO("recycle rowset that has empty resource id"); | 5069 | 0 | } else { | 5070 | | // other situations, keep this key-value pair and it needs to be checked manually | 5071 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); | 5072 | 0 | return -1; | 5073 | 0 | } | 5074 | 0 | } | 5075 | 3.75k | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ | 5076 | 3.75k | << " tablet_id=" << rowset_meta->tablet_id() | 5077 | 3.75k | << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=[" | 5078 | 3.75k | << rowset_meta->start_version() << '-' << rowset_meta->end_version() | 5079 | 3.75k | << "] txn_id=" << rowset_meta->txn_id() | 5080 | 3.75k | << " type=" << RecycleRowsetPB_Type_Name(rowset.type()) | 5081 | 3.75k | << " rowset_meta_size=" << v.size() | 5082 | 3.75k | << " creation_time=" << rowset_meta->creation_time() | 5083 | 3.75k | << " task_type=" << metrics_context.operation_type; | 5084 | 3.75k | if (rowset.type() == RecycleRowsetPB::PREPARE) { | 5085 | | // unable to calculate file path, can only be deleted by rowset id prefix | 5086 | 650 | num_prepare += 1; | 5087 | 650 | prepare_rowset_keys_to_delete.emplace_back(k); | 5088 | 3.10k | } else { | 5089 | 3.10k | num_compacted += rowset.type() == RecycleRowsetPB::COMPACT; | 5090 | 3.10k | rowset_keys.emplace_back(k); | 5091 | 3.10k | rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta)); | 5092 | 3.10k | if (rowset_meta->num_segments() <= 0) { // Skip empty rowset | 5093 | 3.10k | ++num_empty_rowset; | 5094 | 3.10k | } | 5095 | 3.10k | } | 5096 | 3.75k | return 0; | 5097 | 3.75k | }; |
|
5098 | | |
5099 | 49 | auto loop_done = [&]() -> int { |
5100 | 49 | std::vector<std::string> rowset_keys_to_delete; |
5101 | 49 | std::vector<std::string> mark_keys_to_process; |
5102 | 49 | std::vector<std::string> abort_keys_to_process; |
5103 | 49 | std::vector<std::string> prepare_keys_to_process; |
5104 | | // rowset_id -> rowset_meta |
5105 | | // store rowset id and meta for statistics rs size when delete |
5106 | 49 | std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete; |
5107 | 49 | rowset_keys_to_delete.swap(rowset_keys); |
5108 | 49 | mark_keys_to_process.swap(rowset_keys_to_mark_recycled); |
5109 | 49 | abort_keys_to_process.swap(rowset_keys_to_abort); |
5110 | 49 | prepare_keys_to_process.swap(prepare_rowset_keys_to_delete); |
5111 | 49 | rowsets_to_delete.swap(rowsets); |
5112 | 49 | worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete), |
5113 | 49 | rowsets_to_delete = std::move(rowsets_to_delete), |
5114 | 49 | prepare_keys_to_process = std::move(prepare_keys_to_process), |
5115 | 49 | mark_keys_to_process = std::move(mark_keys_to_process), |
5116 | 49 | abort_keys_to_process = std::move(abort_keys_to_process)]() mutable { |
5117 | 49 | if (!mark_keys_to_process.empty() && |
5118 | 49 | batch_mark_rowsets_as_recycled<RecycleRowsetPB>(txn_kv_.get(), instance_id_, |
5119 | 26 | mark_keys_to_process) != 0) { |
5120 | 0 | LOG(WARNING) << "failed to batch mark recycle rowsets as recycled, instance_id=" |
5121 | 0 | << instance_id_; |
5122 | 0 | return; |
5123 | 0 | } |
5124 | 49 | if (!abort_keys_to_process.empty() && |
5125 | 49 | batch_abort_txn_or_job_for_recycle<RecycleRowsetPB>(abort_keys_to_process, true) != |
5126 | 2 | 0) { |
5127 | 0 | return; |
5128 | 0 | } |
5129 | 49 | std::vector<DeferredRecyclePrepareDeleteTask> prepare_delete_tasks; |
5130 | 49 | if (!prepare_keys_to_process.empty() && |
5131 | 49 | collect_prepare_delete_tasks(txn_kv_.get(), instance_id_, prepare_keys_to_process, |
5132 | 23 | &prepare_delete_tasks) != 0) { |
5133 | 0 | LOG(WARNING) << "failed to collect prepare rowset delete tasks, instance_id=" |
5134 | 0 | << instance_id_; |
5135 | 0 | return; |
5136 | 0 | } |
5137 | 49 | if (!prepare_delete_tasks.empty()) { |
5138 | 23 | std::vector<std::string> prepare_rowset_keys_to_delete; |
5139 | 23 | prepare_rowset_keys_to_delete.reserve(prepare_delete_tasks.size()); |
5140 | 652 | for (const auto& task : prepare_delete_tasks) { |
5141 | 652 | if (delete_rowset_data(task.resource_id, task.tablet_id, task.rowset_id) != 0) { |
5142 | 0 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(task.key); |
5143 | 0 | return; |
5144 | 0 | } |
5145 | 652 | if (delete_versioned_delete_bitmap_kvs(task.tablet_id, task.rowset_id) != 0) { |
5146 | 0 | return; |
5147 | 0 | } |
5148 | 652 | prepare_rowset_keys_to_delete.emplace_back(task.key); |
5149 | 652 | } |
5150 | 23 | if (txn_remove(txn_kv_.get(), prepare_rowset_keys_to_delete) != 0) { |
5151 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" |
5152 | 0 | << instance_id_; |
5153 | 0 | return; |
5154 | 0 | } |
5155 | 23 | num_recycled.fetch_add(prepare_rowset_keys_to_delete.size(), |
5156 | 23 | std::memory_order_relaxed); |
5157 | 23 | } |
5158 | 49 | if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET, |
5159 | 49 | metrics_context) != 0) { |
5160 | 0 | LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_; |
5161 | 0 | return; |
5162 | 0 | } |
5163 | 3.10k | for (const auto& [_, rs] : rowsets_to_delete) { |
5164 | 3.10k | if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { |
5165 | 0 | return; |
5166 | 0 | } |
5167 | 3.10k | } |
5168 | 49 | if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) { |
5169 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_; |
5170 | 0 | return; |
5171 | 0 | } |
5172 | 49 | num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed); |
5173 | 49 | }); recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENUlvE_clEv Line | Count | Source | 5116 | 7 | abort_keys_to_process = std::move(abort_keys_to_process)]() mutable { | 5117 | 7 | if (!mark_keys_to_process.empty() && | 5118 | 7 | batch_mark_rowsets_as_recycled<RecycleRowsetPB>(txn_kv_.get(), instance_id_, | 5119 | 5 | mark_keys_to_process) != 0) { | 5120 | 0 | LOG(WARNING) << "failed to batch mark recycle rowsets as recycled, instance_id=" | 5121 | 0 | << instance_id_; | 5122 | 0 | return; | 5123 | 0 | } | 5124 | 7 | if (!abort_keys_to_process.empty() && | 5125 | 7 | batch_abort_txn_or_job_for_recycle<RecycleRowsetPB>(abort_keys_to_process, true) != | 5126 | 2 | 0) { | 5127 | 0 | return; | 5128 | 0 | } | 5129 | 7 | std::vector<DeferredRecyclePrepareDeleteTask> prepare_delete_tasks; | 5130 | 7 | if (!prepare_keys_to_process.empty() && | 5131 | 7 | collect_prepare_delete_tasks(txn_kv_.get(), instance_id_, prepare_keys_to_process, | 5132 | 2 | &prepare_delete_tasks) != 0) { | 5133 | 0 | LOG(WARNING) << "failed to collect prepare rowset delete tasks, instance_id=" | 5134 | 0 | << instance_id_; | 5135 | 0 | return; | 5136 | 0 | } | 5137 | 7 | if (!prepare_delete_tasks.empty()) { | 5138 | 2 | std::vector<std::string> prepare_rowset_keys_to_delete; | 5139 | 2 | prepare_rowset_keys_to_delete.reserve(prepare_delete_tasks.size()); | 5140 | 2 | for (const auto& task : prepare_delete_tasks) { | 5141 | 2 | if (delete_rowset_data(task.resource_id, task.tablet_id, task.rowset_id) != 0) { | 5142 | 0 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(task.key); | 5143 | 0 | return; | 5144 | 0 | } | 5145 | 2 | if (delete_versioned_delete_bitmap_kvs(task.tablet_id, task.rowset_id) != 0) { | 5146 | 0 | return; | 5147 | 0 | } | 5148 | 2 | prepare_rowset_keys_to_delete.emplace_back(task.key); | 5149 | 2 | } | 5150 | 2 | if (txn_remove(txn_kv_.get(), prepare_rowset_keys_to_delete) != 0) { | 5151 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" | 5152 | 0 | << instance_id_; | 5153 | 0 | return; | 5154 | 0 | } | 5155 | 2 | num_recycled.fetch_add(prepare_rowset_keys_to_delete.size(), | 5156 | 2 | std::memory_order_relaxed); | 5157 | 2 | } | 5158 | 7 | if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET, | 5159 | 7 | metrics_context) != 0) { | 5160 | 0 | LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_; | 5161 | 0 | return; | 5162 | 0 | } | 5163 | 7 | for (const auto& [_, rs] : rowsets_to_delete) { | 5164 | 0 | if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { | 5165 | 0 | return; | 5166 | 0 | } | 5167 | 0 | } | 5168 | 7 | if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) { | 5169 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_; | 5170 | 0 | return; | 5171 | 0 | } | 5172 | 7 | num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed); | 5173 | 7 | }); |
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENUlvE_clEv Line | Count | Source | 5116 | 42 | abort_keys_to_process = std::move(abort_keys_to_process)]() mutable { | 5117 | 42 | if (!mark_keys_to_process.empty() && | 5118 | 42 | batch_mark_rowsets_as_recycled<RecycleRowsetPB>(txn_kv_.get(), instance_id_, | 5119 | 21 | mark_keys_to_process) != 0) { | 5120 | 0 | LOG(WARNING) << "failed to batch mark recycle rowsets as recycled, instance_id=" | 5121 | 0 | << instance_id_; | 5122 | 0 | return; | 5123 | 0 | } | 5124 | 42 | if (!abort_keys_to_process.empty() && | 5125 | 42 | batch_abort_txn_or_job_for_recycle<RecycleRowsetPB>(abort_keys_to_process, true) != | 5126 | 0 | 0) { | 5127 | 0 | return; | 5128 | 0 | } | 5129 | 42 | std::vector<DeferredRecyclePrepareDeleteTask> prepare_delete_tasks; | 5130 | 42 | if (!prepare_keys_to_process.empty() && | 5131 | 42 | collect_prepare_delete_tasks(txn_kv_.get(), instance_id_, prepare_keys_to_process, | 5132 | 21 | &prepare_delete_tasks) != 0) { | 5133 | 0 | LOG(WARNING) << "failed to collect prepare rowset delete tasks, instance_id=" | 5134 | 0 | << instance_id_; | 5135 | 0 | return; | 5136 | 0 | } | 5137 | 42 | if (!prepare_delete_tasks.empty()) { | 5138 | 21 | std::vector<std::string> prepare_rowset_keys_to_delete; | 5139 | 21 | prepare_rowset_keys_to_delete.reserve(prepare_delete_tasks.size()); | 5140 | 650 | for (const auto& task : prepare_delete_tasks) { | 5141 | 650 | if (delete_rowset_data(task.resource_id, task.tablet_id, task.rowset_id) != 0) { | 5142 | 0 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(task.key); | 5143 | 0 | return; | 5144 | 0 | } | 5145 | 650 | if (delete_versioned_delete_bitmap_kvs(task.tablet_id, task.rowset_id) != 0) { | 5146 | 0 | return; | 5147 | 0 | } | 5148 | 650 | prepare_rowset_keys_to_delete.emplace_back(task.key); | 5149 | 650 | } | 5150 | 21 | if (txn_remove(txn_kv_.get(), prepare_rowset_keys_to_delete) != 0) { | 5151 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" | 5152 | 0 | << instance_id_; | 5153 | 0 | return; | 5154 | 0 | } | 5155 | 21 | num_recycled.fetch_add(prepare_rowset_keys_to_delete.size(), | 5156 | 21 | std::memory_order_relaxed); | 5157 | 21 | } | 5158 | 42 | if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET, | 5159 | 42 | metrics_context) != 0) { | 5160 | 0 | LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_; | 5161 | 0 | return; | 5162 | 0 | } | 5163 | 3.10k | for (const auto& [_, rs] : rowsets_to_delete) { | 5164 | 3.10k | if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { | 5165 | 0 | return; | 5166 | 0 | } | 5167 | 3.10k | } | 5168 | 42 | if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) { | 5169 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_; | 5170 | 0 | return; | 5171 | 0 | } | 5172 | 42 | num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed); | 5173 | 42 | }); |
|
5174 | 49 | return 0; |
5175 | 49 | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv Line | Count | Source | 5099 | 7 | auto loop_done = [&]() -> int { | 5100 | 7 | std::vector<std::string> rowset_keys_to_delete; | 5101 | 7 | std::vector<std::string> mark_keys_to_process; | 5102 | 7 | std::vector<std::string> abort_keys_to_process; | 5103 | 7 | std::vector<std::string> prepare_keys_to_process; | 5104 | | // rowset_id -> rowset_meta | 5105 | | // store rowset id and meta for statistics rs size when delete | 5106 | 7 | std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete; | 5107 | 7 | rowset_keys_to_delete.swap(rowset_keys); | 5108 | 7 | mark_keys_to_process.swap(rowset_keys_to_mark_recycled); | 5109 | 7 | abort_keys_to_process.swap(rowset_keys_to_abort); | 5110 | 7 | prepare_keys_to_process.swap(prepare_rowset_keys_to_delete); | 5111 | 7 | rowsets_to_delete.swap(rowsets); | 5112 | 7 | worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete), | 5113 | 7 | rowsets_to_delete = std::move(rowsets_to_delete), | 5114 | 7 | prepare_keys_to_process = std::move(prepare_keys_to_process), | 5115 | 7 | mark_keys_to_process = std::move(mark_keys_to_process), | 5116 | 7 | abort_keys_to_process = std::move(abort_keys_to_process)]() mutable { | 5117 | 7 | if (!mark_keys_to_process.empty() && | 5118 | 7 | batch_mark_rowsets_as_recycled<RecycleRowsetPB>(txn_kv_.get(), instance_id_, | 5119 | 7 | mark_keys_to_process) != 0) { | 5120 | 7 | LOG(WARNING) << "failed to batch mark recycle rowsets as recycled, instance_id=" | 5121 | 7 | << instance_id_; | 5122 | 7 | return; | 5123 | 7 | } | 5124 | 7 | if (!abort_keys_to_process.empty() && | 5125 | 7 | batch_abort_txn_or_job_for_recycle<RecycleRowsetPB>(abort_keys_to_process, true) != | 5126 | 7 | 0) { | 5127 | 7 | return; | 5128 | 7 | } | 5129 | 7 | std::vector<DeferredRecyclePrepareDeleteTask> prepare_delete_tasks; | 5130 | 7 | if (!prepare_keys_to_process.empty() && | 5131 | 7 | collect_prepare_delete_tasks(txn_kv_.get(), instance_id_, prepare_keys_to_process, | 5132 | 7 | &prepare_delete_tasks) != 0) { | 5133 | 7 | LOG(WARNING) << "failed to collect prepare rowset delete tasks, instance_id=" | 5134 | 7 | << instance_id_; | 5135 | 7 | return; | 5136 | 7 | } | 5137 | 7 | if (!prepare_delete_tasks.empty()) { | 5138 | 7 | std::vector<std::string> prepare_rowset_keys_to_delete; | 5139 | 7 | prepare_rowset_keys_to_delete.reserve(prepare_delete_tasks.size()); | 5140 | 7 | for (const auto& task : prepare_delete_tasks) { | 5141 | 7 | if (delete_rowset_data(task.resource_id, task.tablet_id, task.rowset_id) != 0) { | 5142 | 7 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(task.key); | 5143 | 7 | return; | 5144 | 7 | } | 5145 | 7 | if (delete_versioned_delete_bitmap_kvs(task.tablet_id, task.rowset_id) != 0) { | 5146 | 7 | return; | 5147 | 7 | } | 5148 | 7 | prepare_rowset_keys_to_delete.emplace_back(task.key); | 5149 | 7 | } | 5150 | 7 | if (txn_remove(txn_kv_.get(), prepare_rowset_keys_to_delete) != 0) { | 5151 | 7 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" | 5152 | 7 | << instance_id_; | 5153 | 7 | return; | 5154 | 7 | } | 5155 | 7 | num_recycled.fetch_add(prepare_rowset_keys_to_delete.size(), | 5156 | 7 | std::memory_order_relaxed); | 5157 | 7 | } | 5158 | 7 | if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET, | 5159 | 7 | metrics_context) != 0) { | 5160 | 7 | LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_; | 5161 | 7 | return; | 5162 | 7 | } | 5163 | 7 | for (const auto& [_, rs] : rowsets_to_delete) { | 5164 | 7 | if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { | 5165 | 7 | return; | 5166 | 7 | } | 5167 | 7 | } | 5168 | 7 | if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) { | 5169 | 7 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_; | 5170 | 7 | return; | 5171 | 7 | } | 5172 | 7 | num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed); | 5173 | 7 | }); | 5174 | 7 | return 0; | 5175 | 7 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv Line | Count | Source | 5099 | 42 | auto loop_done = [&]() -> int { | 5100 | 42 | std::vector<std::string> rowset_keys_to_delete; | 5101 | 42 | std::vector<std::string> mark_keys_to_process; | 5102 | 42 | std::vector<std::string> abort_keys_to_process; | 5103 | 42 | std::vector<std::string> prepare_keys_to_process; | 5104 | | // rowset_id -> rowset_meta | 5105 | | // store rowset id and meta for statistics rs size when delete | 5106 | 42 | std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete; | 5107 | 42 | rowset_keys_to_delete.swap(rowset_keys); | 5108 | 42 | mark_keys_to_process.swap(rowset_keys_to_mark_recycled); | 5109 | 42 | abort_keys_to_process.swap(rowset_keys_to_abort); | 5110 | 42 | prepare_keys_to_process.swap(prepare_rowset_keys_to_delete); | 5111 | 42 | rowsets_to_delete.swap(rowsets); | 5112 | 42 | worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete), | 5113 | 42 | rowsets_to_delete = std::move(rowsets_to_delete), | 5114 | 42 | prepare_keys_to_process = std::move(prepare_keys_to_process), | 5115 | 42 | mark_keys_to_process = std::move(mark_keys_to_process), | 5116 | 42 | abort_keys_to_process = std::move(abort_keys_to_process)]() mutable { | 5117 | 42 | if (!mark_keys_to_process.empty() && | 5118 | 42 | batch_mark_rowsets_as_recycled<RecycleRowsetPB>(txn_kv_.get(), instance_id_, | 5119 | 42 | mark_keys_to_process) != 0) { | 5120 | 42 | LOG(WARNING) << "failed to batch mark recycle rowsets as recycled, instance_id=" | 5121 | 42 | << instance_id_; | 5122 | 42 | return; | 5123 | 42 | } | 5124 | 42 | if (!abort_keys_to_process.empty() && | 5125 | 42 | batch_abort_txn_or_job_for_recycle<RecycleRowsetPB>(abort_keys_to_process, true) != | 5126 | 42 | 0) { | 5127 | 42 | return; | 5128 | 42 | } | 5129 | 42 | std::vector<DeferredRecyclePrepareDeleteTask> prepare_delete_tasks; | 5130 | 42 | if (!prepare_keys_to_process.empty() && | 5131 | 42 | collect_prepare_delete_tasks(txn_kv_.get(), instance_id_, prepare_keys_to_process, | 5132 | 42 | &prepare_delete_tasks) != 0) { | 5133 | 42 | LOG(WARNING) << "failed to collect prepare rowset delete tasks, instance_id=" | 5134 | 42 | << instance_id_; | 5135 | 42 | return; | 5136 | 42 | } | 5137 | 42 | if (!prepare_delete_tasks.empty()) { | 5138 | 42 | std::vector<std::string> prepare_rowset_keys_to_delete; | 5139 | 42 | prepare_rowset_keys_to_delete.reserve(prepare_delete_tasks.size()); | 5140 | 42 | for (const auto& task : prepare_delete_tasks) { | 5141 | 42 | if (delete_rowset_data(task.resource_id, task.tablet_id, task.rowset_id) != 0) { | 5142 | 42 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(task.key); | 5143 | 42 | return; | 5144 | 42 | } | 5145 | 42 | if (delete_versioned_delete_bitmap_kvs(task.tablet_id, task.rowset_id) != 0) { | 5146 | 42 | return; | 5147 | 42 | } | 5148 | 42 | prepare_rowset_keys_to_delete.emplace_back(task.key); | 5149 | 42 | } | 5150 | 42 | if (txn_remove(txn_kv_.get(), prepare_rowset_keys_to_delete) != 0) { | 5151 | 42 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" | 5152 | 42 | << instance_id_; | 5153 | 42 | return; | 5154 | 42 | } | 5155 | 42 | num_recycled.fetch_add(prepare_rowset_keys_to_delete.size(), | 5156 | 42 | std::memory_order_relaxed); | 5157 | 42 | } | 5158 | 42 | if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET, | 5159 | 42 | metrics_context) != 0) { | 5160 | 42 | LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_; | 5161 | 42 | return; | 5162 | 42 | } | 5163 | 42 | for (const auto& [_, rs] : rowsets_to_delete) { | 5164 | 42 | if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { | 5165 | 42 | return; | 5166 | 42 | } | 5167 | 42 | } | 5168 | 42 | if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) { | 5169 | 42 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_; | 5170 | 42 | return; | 5171 | 42 | } | 5172 | 42 | num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed); | 5173 | 42 | }); | 5174 | 42 | return 0; | 5175 | 42 | }; |
|
5176 | | |
5177 | 22 | if (config::enable_recycler_stats_metrics) { |
5178 | 0 | scan_and_statistics_rowsets(); |
5179 | 0 | } |
5180 | | // recycle_func and loop_done for scan and recycle |
5181 | 22 | int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv), |
5182 | 22 | std::move(loop_done)); |
5183 | | |
5184 | 22 | worker_pool->stop(); |
5185 | | |
5186 | 22 | if (!async_recycled_rowset_keys.empty()) { |
5187 | 1 | if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) { |
5188 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_; |
5189 | 0 | return -1; |
5190 | 1 | } else { |
5191 | 1 | num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed); |
5192 | 1 | } |
5193 | 1 | } |
5194 | | |
5195 | | // Report final metrics after all concurrent tasks completed |
5196 | 22 | segment_metrics_context_.report(); |
5197 | 22 | metrics_context.report(); |
5198 | | |
5199 | 22 | return ret; |
5200 | 22 | } |
5201 | | |
5202 | 13 | int InstanceRecycler::recycle_restore_jobs() { |
5203 | 13 | const std::string task_name = "recycle_restore_jobs"; |
5204 | 13 | int64_t num_scanned = 0; |
5205 | 13 | int64_t num_expired = 0; |
5206 | 13 | int64_t num_recycled = 0; |
5207 | 13 | int64_t num_aborted = 0; |
5208 | | |
5209 | 13 | RecyclerMetricsContext metrics_context(instance_id_, task_name); |
5210 | | |
5211 | 13 | JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0}; |
5212 | 13 | JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX}; |
5213 | 13 | std::string restore_job_key0; |
5214 | 13 | std::string restore_job_key1; |
5215 | 13 | job_restore_tablet_key(restore_job_key_info0, &restore_job_key0); |
5216 | 13 | job_restore_tablet_key(restore_job_key_info1, &restore_job_key1); |
5217 | | |
5218 | 13 | LOG_INFO("begin to recycle restore jobs").tag("instance_id", instance_id_); |
5219 | | |
5220 | 13 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
5221 | 13 | register_recycle_task(task_name, start_time); |
5222 | | |
5223 | 13 | DORIS_CLOUD_DEFER { |
5224 | 13 | unregister_recycle_task(task_name); |
5225 | 13 | int64_t cost = |
5226 | 13 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
5227 | 13 | metrics_context.finish_report(); |
5228 | | |
5229 | 13 | LOG_INFO("recycle restore jobs finished, cost={}s", cost) |
5230 | 13 | .tag("instance_id", instance_id_) |
5231 | 13 | .tag("num_scanned", num_scanned) |
5232 | 13 | .tag("num_expired", num_expired) |
5233 | 13 | .tag("num_recycled", num_recycled) |
5234 | 13 | .tag("num_aborted", num_aborted); |
5235 | 13 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv Line | Count | Source | 5223 | 13 | DORIS_CLOUD_DEFER { | 5224 | 13 | unregister_recycle_task(task_name); | 5225 | 13 | int64_t cost = | 5226 | 13 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 5227 | 13 | metrics_context.finish_report(); | 5228 | | | 5229 | 13 | LOG_INFO("recycle restore jobs finished, cost={}s", cost) | 5230 | 13 | .tag("instance_id", instance_id_) | 5231 | 13 | .tag("num_scanned", num_scanned) | 5232 | 13 | .tag("num_expired", num_expired) | 5233 | 13 | .tag("num_recycled", num_recycled) | 5234 | 13 | .tag("num_aborted", num_aborted); | 5235 | 13 | }; |
|
5236 | | |
5237 | 13 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
5238 | | |
5239 | 13 | std::vector<std::string_view> restore_job_keys; |
5240 | 41 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { |
5241 | 41 | ++num_scanned; |
5242 | 41 | RestoreJobCloudPB restore_job_pb; |
5243 | 41 | if (!restore_job_pb.ParseFromArray(v.data(), v.size())) { |
5244 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); |
5245 | 0 | return -1; |
5246 | 0 | } |
5247 | 41 | int64_t expiration = |
5248 | 41 | calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts); |
5249 | 41 | VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned |
5250 | 0 | << " num_expired=" << num_expired << " expiration time=" << expiration |
5251 | 0 | << " job expiration=" << restore_job_pb.expired_at_s() |
5252 | 0 | << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s() |
5253 | 0 | << " state=" << restore_job_pb.state(); |
5254 | 41 | int64_t current_time = ::time(nullptr); |
5255 | 41 | if (current_time < expiration) { // not expired |
5256 | 0 | return 0; |
5257 | 0 | } |
5258 | 41 | ++num_expired; |
5259 | | |
5260 | 41 | int64_t tablet_id = restore_job_pb.tablet_id(); |
5261 | 41 | LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_ |
5262 | 41 | << " restore_job_pb=" << restore_job_pb.DebugString(); |
5263 | | |
5264 | 41 | std::unique_ptr<Transaction> txn; |
5265 | 41 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
5266 | 41 | if (err != TxnErrorCode::TXN_OK) { |
5267 | 0 | LOG_WARNING("failed to recycle restore job") |
5268 | 0 | .tag("err", err) |
5269 | 0 | .tag("tablet id", tablet_id) |
5270 | 0 | .tag("instance_id", instance_id_) |
5271 | 0 | .tag("reason", "failed to create txn"); |
5272 | 0 | return -1; |
5273 | 0 | } |
5274 | | |
5275 | 41 | std::string val; |
5276 | 41 | err = txn->get(k, &val); |
5277 | 41 | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it |
5278 | 0 | LOG_INFO("restore job {} has been recycled", tablet_id); |
5279 | 0 | return 0; |
5280 | 0 | } |
5281 | 41 | if (err != TxnErrorCode::TXN_OK) { |
5282 | 0 | LOG_WARNING("failed to get kv"); |
5283 | 0 | return -1; |
5284 | 0 | } |
5285 | 41 | restore_job_pb.Clear(); |
5286 | 41 | if (!restore_job_pb.ParseFromString(val)) { |
5287 | 0 | LOG_WARNING("malformed recycle restore job value").tag("key", hex(k)); |
5288 | 0 | return -1; |
5289 | 0 | } |
5290 | | |
5291 | | // PREPARED or COMMITTED, change state to DROPPED and return |
5292 | 41 | if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED || |
5293 | 41 | restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) { |
5294 | 0 | restore_job_pb.set_state(RestoreJobCloudPB::DROPPED); |
5295 | 0 | restore_job_pb.set_need_recycle_data(true); |
5296 | 0 | txn->put(k, restore_job_pb.SerializeAsString()); |
5297 | 0 | err = txn->commit(); |
5298 | 0 | if (err != TxnErrorCode::TXN_OK) { |
5299 | 0 | LOG_WARNING("failed to commit txn: {}", err); |
5300 | 0 | return -1; |
5301 | 0 | } |
5302 | 0 | num_aborted++; |
5303 | 0 | return 0; |
5304 | 0 | } |
5305 | | |
5306 | | // Change state to RECYCLING |
5307 | 41 | if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) { |
5308 | 21 | restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING); |
5309 | 21 | txn->put(k, restore_job_pb.SerializeAsString()); |
5310 | 21 | err = txn->commit(); |
5311 | 21 | if (err != TxnErrorCode::TXN_OK) { |
5312 | 0 | LOG_WARNING("failed to commit txn: {}", err); |
5313 | 0 | return -1; |
5314 | 0 | } |
5315 | 21 | return 0; |
5316 | 21 | } |
5317 | | |
5318 | 20 | std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0}); |
5319 | 20 | std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0}); |
5320 | | |
5321 | | // Recycle all data associated with the restore job. |
5322 | | // This includes rowsets, segments, and related resources. |
5323 | 20 | bool need_recycle_data = restore_job_pb.need_recycle_data(); |
5324 | 20 | if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) { |
5325 | 0 | LOG_WARNING("failed to recycle tablet") |
5326 | 0 | .tag("tablet_id", tablet_id) |
5327 | 0 | .tag("instance_id", instance_id_); |
5328 | 0 | return -1; |
5329 | 0 | } |
5330 | | |
5331 | | // delete all restore job rowset kv |
5332 | 20 | txn->remove(restore_job_rs_key0, restore_job_rs_key1); |
5333 | | |
5334 | 20 | err = txn->commit(); |
5335 | 20 | if (err != TxnErrorCode::TXN_OK) { |
5336 | 0 | LOG_WARNING("failed to recycle tablet restore job rowset kv") |
5337 | 0 | .tag("err", err) |
5338 | 0 | .tag("tablet id", tablet_id) |
5339 | 0 | .tag("instance_id", instance_id_) |
5340 | 0 | .tag("reason", "failed to commit txn"); |
5341 | 0 | return -1; |
5342 | 0 | } |
5343 | | |
5344 | 20 | metrics_context.total_recycled_num = ++num_recycled; |
5345 | 20 | metrics_context.report(); |
5346 | 20 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); |
5347 | 20 | restore_job_keys.push_back(k); |
5348 | | |
5349 | 20 | LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k) |
5350 | 20 | << " tablet_id=" << tablet_id; |
5351 | 20 | return 0; |
5352 | 20 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_ recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 5240 | 41 | auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int { | 5241 | 41 | ++num_scanned; | 5242 | 41 | RestoreJobCloudPB restore_job_pb; | 5243 | 41 | if (!restore_job_pb.ParseFromArray(v.data(), v.size())) { | 5244 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); | 5245 | 0 | return -1; | 5246 | 0 | } | 5247 | 41 | int64_t expiration = | 5248 | 41 | calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts); | 5249 | 41 | VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned | 5250 | 0 | << " num_expired=" << num_expired << " expiration time=" << expiration | 5251 | 0 | << " job expiration=" << restore_job_pb.expired_at_s() | 5252 | 0 | << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s() | 5253 | 0 | << " state=" << restore_job_pb.state(); | 5254 | 41 | int64_t current_time = ::time(nullptr); | 5255 | 41 | if (current_time < expiration) { // not expired | 5256 | 0 | return 0; | 5257 | 0 | } | 5258 | 41 | ++num_expired; | 5259 | | | 5260 | 41 | int64_t tablet_id = restore_job_pb.tablet_id(); | 5261 | 41 | LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_ | 5262 | 41 | << " restore_job_pb=" << restore_job_pb.DebugString(); | 5263 | | | 5264 | 41 | std::unique_ptr<Transaction> txn; | 5265 | 41 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 5266 | 41 | if (err != TxnErrorCode::TXN_OK) { | 5267 | 0 | LOG_WARNING("failed to recycle restore job") | 5268 | 0 | .tag("err", err) | 5269 | 0 | .tag("tablet id", tablet_id) | 5270 | 0 | .tag("instance_id", instance_id_) | 5271 | 0 | .tag("reason", "failed to create txn"); | 5272 | 0 | return -1; | 5273 | 0 | } | 5274 | | | 5275 | 41 | std::string val; | 5276 | 41 | err = txn->get(k, &val); | 5277 | 41 | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it | 5278 | 0 | LOG_INFO("restore job {} has been recycled", tablet_id); | 5279 | 0 | return 0; | 5280 | 0 | } | 5281 | 41 | if (err != TxnErrorCode::TXN_OK) { | 5282 | 0 | LOG_WARNING("failed to get kv"); | 5283 | 0 | return -1; | 5284 | 0 | } | 5285 | 41 | restore_job_pb.Clear(); | 5286 | 41 | if (!restore_job_pb.ParseFromString(val)) { | 5287 | 0 | LOG_WARNING("malformed recycle restore job value").tag("key", hex(k)); | 5288 | 0 | return -1; | 5289 | 0 | } | 5290 | | | 5291 | | // PREPARED or COMMITTED, change state to DROPPED and return | 5292 | 41 | if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED || | 5293 | 41 | restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) { | 5294 | 0 | restore_job_pb.set_state(RestoreJobCloudPB::DROPPED); | 5295 | 0 | restore_job_pb.set_need_recycle_data(true); | 5296 | 0 | txn->put(k, restore_job_pb.SerializeAsString()); | 5297 | 0 | err = txn->commit(); | 5298 | 0 | if (err != TxnErrorCode::TXN_OK) { | 5299 | 0 | LOG_WARNING("failed to commit txn: {}", err); | 5300 | 0 | return -1; | 5301 | 0 | } | 5302 | 0 | num_aborted++; | 5303 | 0 | return 0; | 5304 | 0 | } | 5305 | | | 5306 | | // Change state to RECYCLING | 5307 | 41 | if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) { | 5308 | 21 | restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING); | 5309 | 21 | txn->put(k, restore_job_pb.SerializeAsString()); | 5310 | 21 | err = txn->commit(); | 5311 | 21 | if (err != TxnErrorCode::TXN_OK) { | 5312 | 0 | LOG_WARNING("failed to commit txn: {}", err); | 5313 | 0 | return -1; | 5314 | 0 | } | 5315 | 21 | return 0; | 5316 | 21 | } | 5317 | | | 5318 | 20 | std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0}); | 5319 | 20 | std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0}); | 5320 | | | 5321 | | // Recycle all data associated with the restore job. | 5322 | | // This includes rowsets, segments, and related resources. | 5323 | 20 | bool need_recycle_data = restore_job_pb.need_recycle_data(); | 5324 | 20 | if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) { | 5325 | 0 | LOG_WARNING("failed to recycle tablet") | 5326 | 0 | .tag("tablet_id", tablet_id) | 5327 | 0 | .tag("instance_id", instance_id_); | 5328 | 0 | return -1; | 5329 | 0 | } | 5330 | | | 5331 | | // delete all restore job rowset kv | 5332 | 20 | txn->remove(restore_job_rs_key0, restore_job_rs_key1); | 5333 | | | 5334 | 20 | err = txn->commit(); | 5335 | 20 | if (err != TxnErrorCode::TXN_OK) { | 5336 | 0 | LOG_WARNING("failed to recycle tablet restore job rowset kv") | 5337 | 0 | .tag("err", err) | 5338 | 0 | .tag("tablet id", tablet_id) | 5339 | 0 | .tag("instance_id", instance_id_) | 5340 | 0 | .tag("reason", "failed to commit txn"); | 5341 | 0 | return -1; | 5342 | 0 | } | 5343 | | | 5344 | 20 | metrics_context.total_recycled_num = ++num_recycled; | 5345 | 20 | metrics_context.report(); | 5346 | 20 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); | 5347 | 20 | restore_job_keys.push_back(k); | 5348 | | | 5349 | 20 | LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k) | 5350 | 20 | << " tablet_id=" << tablet_id; | 5351 | 20 | return 0; | 5352 | 20 | }; |
|
5353 | | |
5354 | 13 | auto loop_done = [&restore_job_keys, this]() -> int { |
5355 | 3 | if (restore_job_keys.empty()) return 0; |
5356 | 1 | DORIS_CLOUD_DEFER { |
5357 | 1 | restore_job_keys.clear(); |
5358 | 1 | }; Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv Line | Count | Source | 5356 | 1 | DORIS_CLOUD_DEFER { | 5357 | 1 | restore_job_keys.clear(); | 5358 | 1 | }; |
|
5359 | | |
5360 | 1 | std::unique_ptr<Transaction> txn; |
5361 | 1 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
5362 | 1 | if (err != TxnErrorCode::TXN_OK) { |
5363 | 0 | LOG_WARNING("failed to recycle restore job") |
5364 | 0 | .tag("err", err) |
5365 | 0 | .tag("instance_id", instance_id_) |
5366 | 0 | .tag("reason", "failed to create txn"); |
5367 | 0 | return -1; |
5368 | 0 | } |
5369 | 20 | for (auto& k : restore_job_keys) { |
5370 | 20 | txn->remove(k); |
5371 | 20 | } |
5372 | 1 | err = txn->commit(); |
5373 | 1 | if (err != TxnErrorCode::TXN_OK) { |
5374 | 0 | LOG_WARNING("failed to recycle restore job") |
5375 | 0 | .tag("err", err) |
5376 | 0 | .tag("instance_id", instance_id_) |
5377 | 0 | .tag("reason", "failed to commit txn"); |
5378 | 0 | return -1; |
5379 | 0 | } |
5380 | 1 | return 0; |
5381 | 1 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv Line | Count | Source | 5354 | 3 | auto loop_done = [&restore_job_keys, this]() -> int { | 5355 | 3 | if (restore_job_keys.empty()) return 0; | 5356 | 1 | DORIS_CLOUD_DEFER { | 5357 | 1 | restore_job_keys.clear(); | 5358 | 1 | }; | 5359 | | | 5360 | 1 | std::unique_ptr<Transaction> txn; | 5361 | 1 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 5362 | 1 | if (err != TxnErrorCode::TXN_OK) { | 5363 | 0 | LOG_WARNING("failed to recycle restore job") | 5364 | 0 | .tag("err", err) | 5365 | 0 | .tag("instance_id", instance_id_) | 5366 | 0 | .tag("reason", "failed to create txn"); | 5367 | 0 | return -1; | 5368 | 0 | } | 5369 | 20 | for (auto& k : restore_job_keys) { | 5370 | 20 | txn->remove(k); | 5371 | 20 | } | 5372 | 1 | err = txn->commit(); | 5373 | 1 | if (err != TxnErrorCode::TXN_OK) { | 5374 | 0 | LOG_WARNING("failed to recycle restore job") | 5375 | 0 | .tag("err", err) | 5376 | 0 | .tag("instance_id", instance_id_) | 5377 | 0 | .tag("reason", "failed to commit txn"); | 5378 | 0 | return -1; | 5379 | 0 | } | 5380 | 1 | return 0; | 5381 | 1 | }; |
|
5382 | | |
5383 | 13 | if (config::enable_recycler_stats_metrics) { |
5384 | 0 | scan_and_statistics_restore_jobs(); |
5385 | 0 | } |
5386 | | |
5387 | 13 | return scan_and_recycle(restore_job_key0, restore_job_key1, std::move(recycle_func), |
5388 | 13 | std::move(loop_done)); |
5389 | 13 | } |
5390 | | |
5391 | 10 | int InstanceRecycler::recycle_versioned_rowsets() { |
5392 | 10 | const std::string task_name = "recycle_rowsets"; |
5393 | 10 | int64_t num_scanned = 0; |
5394 | 10 | int64_t num_expired = 0; |
5395 | 10 | int64_t num_prepare = 0; |
5396 | 10 | int64_t num_compacted = 0; |
5397 | 10 | int64_t num_empty_rowset = 0; |
5398 | 10 | size_t total_rowset_key_size = 0; |
5399 | 10 | size_t total_rowset_value_size = 0; |
5400 | 10 | size_t expired_rowset_size = 0; |
5401 | 10 | std::atomic_long num_recycled = 0; |
5402 | 10 | RecyclerMetricsContext metrics_context(instance_id_, task_name); |
5403 | | |
5404 | 10 | RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""}; |
5405 | 10 | RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""}; |
5406 | 10 | std::string recyc_rs_key0; |
5407 | 10 | std::string recyc_rs_key1; |
5408 | 10 | recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0); |
5409 | 10 | recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1); |
5410 | | |
5411 | 10 | LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_); |
5412 | | |
5413 | 10 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
5414 | 10 | register_recycle_task(task_name, start_time); |
5415 | | |
5416 | 10 | DORIS_CLOUD_DEFER { |
5417 | 10 | unregister_recycle_task(task_name); |
5418 | 10 | int64_t cost = |
5419 | 10 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
5420 | 10 | metrics_context.finish_report(); |
5421 | 10 | LOG_WARNING("recycle rowsets finished, cost={}s", cost) |
5422 | 10 | .tag("instance_id", instance_id_) |
5423 | 10 | .tag("num_scanned", num_scanned) |
5424 | 10 | .tag("num_expired", num_expired) |
5425 | 10 | .tag("num_recycled", num_recycled) |
5426 | 10 | .tag("num_recycled.prepare", num_prepare) |
5427 | 10 | .tag("num_recycled.compacted", num_compacted) |
5428 | 10 | .tag("num_recycled.empty_rowset", num_empty_rowset) |
5429 | 10 | .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size) |
5430 | 10 | .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size) |
5431 | 10 | .tag("expired_rowset_meta_size", expired_rowset_size); |
5432 | 10 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv Line | Count | Source | 5416 | 10 | DORIS_CLOUD_DEFER { | 5417 | 10 | unregister_recycle_task(task_name); | 5418 | 10 | int64_t cost = | 5419 | 10 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 5420 | 10 | metrics_context.finish_report(); | 5421 | 10 | LOG_WARNING("recycle rowsets finished, cost={}s", cost) | 5422 | 10 | .tag("instance_id", instance_id_) | 5423 | 10 | .tag("num_scanned", num_scanned) | 5424 | 10 | .tag("num_expired", num_expired) | 5425 | 10 | .tag("num_recycled", num_recycled) | 5426 | 10 | .tag("num_recycled.prepare", num_prepare) | 5427 | 10 | .tag("num_recycled.compacted", num_compacted) | 5428 | 10 | .tag("num_recycled.empty_rowset", num_empty_rowset) | 5429 | 10 | .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size) | 5430 | 10 | .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size) | 5431 | 10 | .tag("expired_rowset_meta_size", expired_rowset_size); | 5432 | 10 | }; |
|
5433 | | |
5434 | 10 | std::vector<std::string> orphan_rowset_keys; |
5435 | | |
5436 | | // Store keys of rowset recycled by background workers |
5437 | 10 | std::mutex async_recycled_rowset_keys_mutex; |
5438 | 10 | std::vector<std::string> async_recycled_rowset_keys; |
5439 | 10 | auto worker_pool = std::make_unique<SimpleThreadPool>( |
5440 | 10 | config::instance_recycler_worker_pool_size, "recycle_rowsets"); |
5441 | 10 | worker_pool->start(); |
5442 | 10 | auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id, |
5443 | 400 | int64_t tablet_id, const std::string& rowset_id) { |
5444 | | // Try to delete rowset data in background thread |
5445 | 400 | int ret = worker_pool->submit_with_timeout( |
5446 | 400 | [&, resource_id, tablet_id, rowset_id, key]() mutable { |
5447 | 400 | if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) { |
5448 | 400 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(key); |
5449 | 400 | return; |
5450 | 400 | } |
5451 | | // The async recycled rowsets are staled format or has not been used, |
5452 | | // so we don't need to check the rowset ref count key. |
5453 | 0 | std::vector<std::string> keys; |
5454 | 0 | { |
5455 | 0 | std::lock_guard lock(async_recycled_rowset_keys_mutex); |
5456 | 0 | async_recycled_rowset_keys.push_back(std::move(key)); |
5457 | 0 | if (async_recycled_rowset_keys.size() > 100) { |
5458 | 0 | keys.swap(async_recycled_rowset_keys); |
5459 | 0 | } |
5460 | 0 | } |
5461 | 0 | if (keys.empty()) return; |
5462 | 0 | if (txn_remove(txn_kv_.get(), keys) != 0) { |
5463 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" |
5464 | 0 | << instance_id_; |
5465 | 0 | } else { |
5466 | 0 | num_recycled.fetch_add(keys.size(), std::memory_order_relaxed); |
5467 | 0 | check_recycle_task(instance_id_, "recycle_rowsets", num_scanned, |
5468 | 0 | num_recycled, start_time); |
5469 | 0 | } |
5470 | 0 | }, Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv Line | Count | Source | 5446 | 400 | [&, resource_id, tablet_id, rowset_id, key]() mutable { | 5447 | 400 | if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) { | 5448 | 400 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(key); | 5449 | 400 | return; | 5450 | 400 | } | 5451 | | // The async recycled rowsets are staled format or has not been used, | 5452 | | // so we don't need to check the rowset ref count key. | 5453 | 0 | std::vector<std::string> keys; | 5454 | 0 | { | 5455 | 0 | std::lock_guard lock(async_recycled_rowset_keys_mutex); | 5456 | 0 | async_recycled_rowset_keys.push_back(std::move(key)); | 5457 | 0 | if (async_recycled_rowset_keys.size() > 100) { | 5458 | 0 | keys.swap(async_recycled_rowset_keys); | 5459 | 0 | } | 5460 | 0 | } | 5461 | 0 | if (keys.empty()) return; | 5462 | 0 | if (txn_remove(txn_kv_.get(), keys) != 0) { | 5463 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" | 5464 | 0 | << instance_id_; | 5465 | 0 | } else { | 5466 | 0 | num_recycled.fetch_add(keys.size(), std::memory_order_relaxed); | 5467 | 0 | check_recycle_task(instance_id_, "recycle_rowsets", num_scanned, | 5468 | 0 | num_recycled, start_time); | 5469 | 0 | } | 5470 | 0 | }, |
|
5471 | 400 | 0); |
5472 | 400 | if (ret == 0) return 0; |
5473 | | // Submit task failed, delete rowset data in current thread |
5474 | 0 | if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) { |
5475 | 0 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(key); |
5476 | 0 | return -1; |
5477 | 0 | } |
5478 | 0 | orphan_rowset_keys.push_back(std::move(key)); |
5479 | 0 | return 0; |
5480 | 0 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ Line | Count | Source | 5443 | 400 | int64_t tablet_id, const std::string& rowset_id) { | 5444 | | // Try to delete rowset data in background thread | 5445 | 400 | int ret = worker_pool->submit_with_timeout( | 5446 | 400 | [&, resource_id, tablet_id, rowset_id, key]() mutable { | 5447 | 400 | if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) { | 5448 | 400 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(key); | 5449 | 400 | return; | 5450 | 400 | } | 5451 | | // The async recycled rowsets are staled format or has not been used, | 5452 | | // so we don't need to check the rowset ref count key. | 5453 | 400 | std::vector<std::string> keys; | 5454 | 400 | { | 5455 | 400 | std::lock_guard lock(async_recycled_rowset_keys_mutex); | 5456 | 400 | async_recycled_rowset_keys.push_back(std::move(key)); | 5457 | 400 | if (async_recycled_rowset_keys.size() > 100) { | 5458 | 400 | keys.swap(async_recycled_rowset_keys); | 5459 | 400 | } | 5460 | 400 | } | 5461 | 400 | if (keys.empty()) return; | 5462 | 400 | if (txn_remove(txn_kv_.get(), keys) != 0) { | 5463 | 400 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" | 5464 | 400 | << instance_id_; | 5465 | 400 | } else { | 5466 | 400 | num_recycled.fetch_add(keys.size(), std::memory_order_relaxed); | 5467 | 400 | check_recycle_task(instance_id_, "recycle_rowsets", num_scanned, | 5468 | 400 | num_recycled, start_time); | 5469 | 400 | } | 5470 | 400 | }, | 5471 | 400 | 0); | 5472 | 400 | if (ret == 0) return 0; | 5473 | | // Submit task failed, delete rowset data in current thread | 5474 | 0 | if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) { | 5475 | 0 | LOG(WARNING) << "failed to delete rowset data, key=" << hex(key); | 5476 | 0 | return -1; | 5477 | 0 | } | 5478 | 0 | orphan_rowset_keys.push_back(std::move(key)); | 5479 | 0 | return 0; | 5480 | 0 | }; |
|
5481 | | |
5482 | 10 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
5483 | | |
5484 | 2.01k | auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int { |
5485 | 2.01k | ++num_scanned; |
5486 | 2.01k | total_rowset_key_size += k.size(); |
5487 | 2.01k | total_rowset_value_size += v.size(); |
5488 | 2.01k | RecycleRowsetPB rowset; |
5489 | 2.01k | if (!rowset.ParseFromArray(v.data(), v.size())) { |
5490 | 0 | LOG_WARNING("malformed recycle rowset").tag("key", hex(k)); |
5491 | 0 | return -1; |
5492 | 0 | } |
5493 | | |
5494 | 2.01k | int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts); |
5495 | | |
5496 | 2.01k | VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned |
5497 | 0 | << " num_expired=" << num_expired << " expiration=" << final_expiration |
5498 | 0 | << " RecycleRowsetPB=" << rowset.ShortDebugString(); |
5499 | 2.01k | int64_t current_time = ::time(nullptr); |
5500 | 2.01k | if (current_time < final_expiration) { // not expired |
5501 | 0 | return 0; |
5502 | 0 | } |
5503 | 2.01k | ++num_expired; |
5504 | 2.01k | expired_rowset_size += v.size(); |
5505 | 2.01k | if (!rowset.has_type()) { // old version `RecycleRowsetPB` |
5506 | 0 | if (!rowset.has_resource_id()) [[unlikely]] { // impossible |
5507 | | // in old version, keep this key-value pair and it needs to be checked manually |
5508 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); |
5509 | 0 | return -1; |
5510 | 0 | } |
5511 | 0 | if (rowset.resource_id().empty()) [[unlikely]] { |
5512 | | // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv. |
5513 | 0 | LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key=" |
5514 | 0 | << hex(k) << " value=" << proto_to_json(rowset); |
5515 | 0 | orphan_rowset_keys.emplace_back(k); |
5516 | 0 | return -1; |
5517 | 0 | } |
5518 | | // decode rowset_id |
5519 | 0 | auto k1 = k; |
5520 | 0 | k1.remove_prefix(1); |
5521 | 0 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
5522 | 0 | decode_key(&k1, &out); |
5523 | | // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB |
5524 | 0 | const auto& rowset_id = std::get<std::string>(std::get<0>(out[4])); |
5525 | 0 | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ |
5526 | 0 | << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id; |
5527 | 0 | if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(), |
5528 | 0 | rowset.tablet_id(), rowset_id) != 0) { |
5529 | 0 | return -1; |
5530 | 0 | } |
5531 | 0 | return 0; |
5532 | 0 | } |
5533 | | // TODO(plat1ko): check rowset not referenced |
5534 | 2.01k | auto rowset_meta = rowset.mutable_rowset_meta(); |
5535 | 2.01k | if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible |
5536 | 0 | if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) { |
5537 | 0 | LOG_INFO("recycle rowset that has empty resource id"); |
5538 | 0 | } else { |
5539 | | // other situations, keep this key-value pair and it needs to be checked manually |
5540 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); |
5541 | 0 | return -1; |
5542 | 0 | } |
5543 | 0 | } |
5544 | 2.01k | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ |
5545 | 2.01k | << " tablet_id=" << rowset_meta->tablet_id() |
5546 | 2.01k | << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=[" |
5547 | 2.01k | << rowset_meta->start_version() << '-' << rowset_meta->end_version() |
5548 | 2.01k | << "] txn_id=" << rowset_meta->txn_id() |
5549 | 2.01k | << " type=" << RecycleRowsetPB_Type_Name(rowset.type()) |
5550 | 2.01k | << " rowset_meta_size=" << v.size() |
5551 | 2.01k | << " creation_time=" << rowset_meta->creation_time(); |
5552 | 2.01k | if (rowset.type() == RecycleRowsetPB::PREPARE) { |
5553 | | // unable to calculate file path, can only be deleted by rowset id prefix |
5554 | 400 | num_prepare += 1; |
5555 | 400 | if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(), |
5556 | 400 | rowset_meta->tablet_id(), |
5557 | 400 | rowset_meta->rowset_id_v2()) != 0) { |
5558 | 0 | return -1; |
5559 | 0 | } |
5560 | 1.61k | } else { |
5561 | 1.61k | bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT; |
5562 | 1.61k | worker_pool->submit( |
5563 | 1.61k | [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() { |
5564 | | // The load & compact rowset keys are recycled during recycling operation logs. |
5565 | 1.61k | RowsetDeleteTask task; |
5566 | 1.61k | task.rowset_meta = rowset_meta; |
5567 | 1.61k | task.recycle_rowset_key = k; |
5568 | 1.61k | if (recycle_rowset_meta_and_data(task) != 0) { |
5569 | 1.60k | return; |
5570 | 1.60k | } |
5571 | 13 | num_compacted += is_compacted; |
5572 | 13 | num_recycled.fetch_add(1, std::memory_order_relaxed); |
5573 | 13 | if (rowset_meta.num_segments() == 0) { |
5574 | 0 | ++num_empty_rowset; |
5575 | 0 | } |
5576 | 13 | }); Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv Line | Count | Source | 5563 | 1.61k | [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() { | 5564 | | // The load & compact rowset keys are recycled during recycling operation logs. | 5565 | 1.61k | RowsetDeleteTask task; | 5566 | 1.61k | task.rowset_meta = rowset_meta; | 5567 | 1.61k | task.recycle_rowset_key = k; | 5568 | 1.61k | if (recycle_rowset_meta_and_data(task) != 0) { | 5569 | 1.60k | return; | 5570 | 1.60k | } | 5571 | 13 | num_compacted += is_compacted; | 5572 | 13 | num_recycled.fetch_add(1, std::memory_order_relaxed); | 5573 | 13 | if (rowset_meta.num_segments() == 0) { | 5574 | 0 | ++num_empty_rowset; | 5575 | 0 | } | 5576 | 13 | }); |
|
5577 | 1.61k | } |
5578 | 2.01k | return 0; |
5579 | 2.01k | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 5484 | 2.01k | auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int { | 5485 | 2.01k | ++num_scanned; | 5486 | 2.01k | total_rowset_key_size += k.size(); | 5487 | 2.01k | total_rowset_value_size += v.size(); | 5488 | 2.01k | RecycleRowsetPB rowset; | 5489 | 2.01k | if (!rowset.ParseFromArray(v.data(), v.size())) { | 5490 | 0 | LOG_WARNING("malformed recycle rowset").tag("key", hex(k)); | 5491 | 0 | return -1; | 5492 | 0 | } | 5493 | | | 5494 | 2.01k | int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts); | 5495 | | | 5496 | 2.01k | VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned | 5497 | 0 | << " num_expired=" << num_expired << " expiration=" << final_expiration | 5498 | 0 | << " RecycleRowsetPB=" << rowset.ShortDebugString(); | 5499 | 2.01k | int64_t current_time = ::time(nullptr); | 5500 | 2.01k | if (current_time < final_expiration) { // not expired | 5501 | 0 | return 0; | 5502 | 0 | } | 5503 | 2.01k | ++num_expired; | 5504 | 2.01k | expired_rowset_size += v.size(); | 5505 | 2.01k | if (!rowset.has_type()) { // old version `RecycleRowsetPB` | 5506 | 0 | if (!rowset.has_resource_id()) [[unlikely]] { // impossible | 5507 | | // in old version, keep this key-value pair and it needs to be checked manually | 5508 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); | 5509 | 0 | return -1; | 5510 | 0 | } | 5511 | 0 | if (rowset.resource_id().empty()) [[unlikely]] { | 5512 | | // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv. | 5513 | 0 | LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key=" | 5514 | 0 | << hex(k) << " value=" << proto_to_json(rowset); | 5515 | 0 | orphan_rowset_keys.emplace_back(k); | 5516 | 0 | return -1; | 5517 | 0 | } | 5518 | | // decode rowset_id | 5519 | 0 | auto k1 = k; | 5520 | 0 | k1.remove_prefix(1); | 5521 | 0 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 5522 | 0 | decode_key(&k1, &out); | 5523 | | // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB | 5524 | 0 | const auto& rowset_id = std::get<std::string>(std::get<0>(out[4])); | 5525 | 0 | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ | 5526 | 0 | << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id; | 5527 | 0 | if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(), | 5528 | 0 | rowset.tablet_id(), rowset_id) != 0) { | 5529 | 0 | return -1; | 5530 | 0 | } | 5531 | 0 | return 0; | 5532 | 0 | } | 5533 | | // TODO(plat1ko): check rowset not referenced | 5534 | 2.01k | auto rowset_meta = rowset.mutable_rowset_meta(); | 5535 | 2.01k | if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible | 5536 | 0 | if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) { | 5537 | 0 | LOG_INFO("recycle rowset that has empty resource id"); | 5538 | 0 | } else { | 5539 | | // other situations, keep this key-value pair and it needs to be checked manually | 5540 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k)); | 5541 | 0 | return -1; | 5542 | 0 | } | 5543 | 0 | } | 5544 | 2.01k | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ | 5545 | 2.01k | << " tablet_id=" << rowset_meta->tablet_id() | 5546 | 2.01k | << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=[" | 5547 | 2.01k | << rowset_meta->start_version() << '-' << rowset_meta->end_version() | 5548 | 2.01k | << "] txn_id=" << rowset_meta->txn_id() | 5549 | 2.01k | << " type=" << RecycleRowsetPB_Type_Name(rowset.type()) | 5550 | 2.01k | << " rowset_meta_size=" << v.size() | 5551 | 2.01k | << " creation_time=" << rowset_meta->creation_time(); | 5552 | 2.01k | if (rowset.type() == RecycleRowsetPB::PREPARE) { | 5553 | | // unable to calculate file path, can only be deleted by rowset id prefix | 5554 | 400 | num_prepare += 1; | 5555 | 400 | if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(), | 5556 | 400 | rowset_meta->tablet_id(), | 5557 | 400 | rowset_meta->rowset_id_v2()) != 0) { | 5558 | 0 | return -1; | 5559 | 0 | } | 5560 | 1.61k | } else { | 5561 | 1.61k | bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT; | 5562 | 1.61k | worker_pool->submit( | 5563 | 1.61k | [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() { | 5564 | | // The load & compact rowset keys are recycled during recycling operation logs. | 5565 | 1.61k | RowsetDeleteTask task; | 5566 | 1.61k | task.rowset_meta = rowset_meta; | 5567 | 1.61k | task.recycle_rowset_key = k; | 5568 | 1.61k | if (recycle_rowset_meta_and_data(task) != 0) { | 5569 | 1.61k | return; | 5570 | 1.61k | } | 5571 | 1.61k | num_compacted += is_compacted; | 5572 | 1.61k | num_recycled.fetch_add(1, std::memory_order_relaxed); | 5573 | 1.61k | if (rowset_meta.num_segments() == 0) { | 5574 | 1.61k | ++num_empty_rowset; | 5575 | 1.61k | } | 5576 | 1.61k | }); | 5577 | 1.61k | } | 5578 | 2.01k | return 0; | 5579 | 2.01k | }; |
|
5580 | | |
5581 | 10 | if (config::enable_recycler_stats_metrics) { |
5582 | 0 | scan_and_statistics_rowsets(); |
5583 | 0 | } |
5584 | | |
5585 | 10 | auto loop_done = [&]() -> int { |
5586 | 6 | if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) { |
5587 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_; |
5588 | 0 | } |
5589 | 6 | orphan_rowset_keys.clear(); |
5590 | 6 | return 0; |
5591 | 6 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv Line | Count | Source | 5585 | 6 | auto loop_done = [&]() -> int { | 5586 | 6 | if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) { | 5587 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_; | 5588 | 0 | } | 5589 | 6 | orphan_rowset_keys.clear(); | 5590 | 6 | return 0; | 5591 | 6 | }; |
|
5592 | | |
5593 | | // recycle_func and loop_done for scan and recycle |
5594 | 10 | int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv), |
5595 | 10 | std::move(loop_done)); |
5596 | | |
5597 | 10 | worker_pool->stop(); |
5598 | | |
5599 | 10 | if (!async_recycled_rowset_keys.empty()) { |
5600 | 0 | if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) { |
5601 | 0 | LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_; |
5602 | 0 | return -1; |
5603 | 0 | } else { |
5604 | 0 | num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed); |
5605 | 0 | } |
5606 | 0 | } |
5607 | | |
5608 | | // Report final metrics after all concurrent tasks completed |
5609 | 10 | segment_metrics_context_.report(); |
5610 | 10 | metrics_context.report(); |
5611 | | |
5612 | 10 | return ret; |
5613 | 10 | } |
5614 | | |
5615 | 1.61k | int InstanceRecycler::recycle_rowset_meta_and_data(const RowsetDeleteTask& task) { |
5616 | 1.61k | constexpr int MAX_RETRY = 10; |
5617 | 1.61k | const RowsetMetaCloudPB& rowset_meta = task.rowset_meta; |
5618 | 1.61k | int64_t tablet_id = rowset_meta.tablet_id(); |
5619 | 1.61k | const std::string& rowset_id = rowset_meta.rowset_id_v2(); |
5620 | 1.61k | std::string_view reference_instance_id = instance_id_; |
5621 | 1.61k | if (rowset_meta.has_reference_instance_id()) { |
5622 | 8 | reference_instance_id = rowset_meta.reference_instance_id(); |
5623 | 8 | } |
5624 | | |
5625 | 1.61k | AnnotateTag tablet_id_tag("tablet_id", tablet_id); |
5626 | 1.61k | AnnotateTag rowset_id_tag("rowset_id", rowset_id); |
5627 | 1.61k | AnnotateTag rowset_key_tag("recycle_rowset_key", hex(task.recycle_rowset_key)); |
5628 | 1.61k | AnnotateTag instance_id_tag("instance_id", instance_id_); |
5629 | 1.61k | AnnotateTag ref_instance_id_tag("ref_instance_id", reference_instance_id); |
5630 | 1.61k | for (int i = 0; i < MAX_RETRY; ++i) { |
5631 | 1.61k | std::unique_ptr<Transaction> txn; |
5632 | 1.61k | TxnErrorCode err = txn_kv_->create_txn(&txn); |
5633 | 1.61k | if (err != TxnErrorCode::TXN_OK) { |
5634 | 0 | LOG_WARNING("failed to create txn").tag("err", err); |
5635 | 0 | return -1; |
5636 | 0 | } |
5637 | | |
5638 | 1.61k | std::string rowset_ref_count_key = |
5639 | 1.61k | versioned::data_rowset_ref_count_key({reference_instance_id, tablet_id, rowset_id}); |
5640 | 1.61k | int64_t ref_count = 0; |
5641 | 1.61k | { |
5642 | 1.61k | std::string value; |
5643 | 1.61k | TxnErrorCode err = txn->get(rowset_ref_count_key, &value); |
5644 | 1.61k | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { |
5645 | | // This is the old version rowset, we could recycle it directly. |
5646 | 1.60k | ref_count = 1; |
5647 | 1.60k | } else if (err != TxnErrorCode::TXN_OK) { |
5648 | 0 | LOG_WARNING("failed to get rowset ref count key").tag("err", err); |
5649 | 0 | return -1; |
5650 | 11 | } else if (!txn->decode_atomic_int(value, &ref_count)) { |
5651 | 0 | LOG_WARNING("failed to decode rowset data ref count").tag("value", hex(value)); |
5652 | 0 | return -1; |
5653 | 0 | } |
5654 | 1.61k | } |
5655 | | |
5656 | 1.61k | if (ref_count == 1) { |
5657 | | // It would not be added since it is recycling. |
5658 | 1.61k | if (delete_rowset_data(rowset_meta) != 0) { |
5659 | 1.60k | LOG_WARNING("failed to delete rowset data"); |
5660 | 1.60k | return -1; |
5661 | 1.60k | } |
5662 | | |
5663 | | // Reset the transaction to avoid timeout. |
5664 | 10 | err = txn_kv_->create_txn(&txn); |
5665 | 10 | if (err != TxnErrorCode::TXN_OK) { |
5666 | 0 | LOG_WARNING("failed to create txn").tag("err", err); |
5667 | 0 | return -1; |
5668 | 0 | } |
5669 | 10 | txn->remove(rowset_ref_count_key); |
5670 | 10 | LOG_INFO("delete rowset data ref count key") |
5671 | 10 | .tag("txn_id", rowset_meta.txn_id()) |
5672 | 10 | .tag("ref_count_key", hex(rowset_ref_count_key)); |
5673 | | |
5674 | 10 | std::string dbm_start_key = |
5675 | 10 | meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id, 0, 0}); |
5676 | 10 | std::string dbm_end_key = meta_delete_bitmap_key( |
5677 | 10 | {reference_instance_id, tablet_id, rowset_id, |
5678 | 10 | std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::max()}); |
5679 | 10 | txn->remove(dbm_start_key, dbm_end_key); |
5680 | 10 | LOG_INFO("remove delete bitmap kv") |
5681 | 10 | .tag("begin", hex(dbm_start_key)) |
5682 | 10 | .tag("end", hex(dbm_end_key)); |
5683 | | |
5684 | 10 | std::string versioned_dbm_start_key = versioned::meta_delete_bitmap_key( |
5685 | 10 | {reference_instance_id, tablet_id, rowset_id}); |
5686 | 10 | std::string versioned_dbm_end_key = versioned_dbm_start_key; |
5687 | 10 | encode_int64(INT64_MAX, &versioned_dbm_end_key); |
5688 | 10 | txn->remove(versioned_dbm_start_key, versioned_dbm_end_key); |
5689 | 10 | LOG_INFO("remove versioned delete bitmap kv") |
5690 | 10 | .tag("begin", hex(versioned_dbm_start_key)) |
5691 | 10 | .tag("end", hex(versioned_dbm_end_key)); |
5692 | 10 | } else { |
5693 | | // Decrease the rowset ref count. |
5694 | | // |
5695 | | // The read conflict range will protect the rowset ref count key, if any conflict happens, |
5696 | | // we will retry and check whether the rowset ref count is 1 and the data need to be deleted. |
5697 | 3 | txn->atomic_add(rowset_ref_count_key, -1); |
5698 | 3 | LOG_INFO("decrease rowset data ref count") |
5699 | 3 | .tag("txn_id", rowset_meta.txn_id()) |
5700 | 3 | .tag("ref_count", ref_count - 1) |
5701 | 3 | .tag("ref_count_key", hex(rowset_ref_count_key)); |
5702 | 3 | } |
5703 | | |
5704 | 13 | if (!task.versioned_rowset_key.empty()) { |
5705 | 0 | versioned::document_remove<RowsetMetaCloudPB>(txn.get(), task.versioned_rowset_key, |
5706 | 0 | task.versionstamp); |
5707 | 0 | LOG_INFO("remove versioned meta rowset key").tag("key", hex(task.versioned_rowset_key)); |
5708 | 0 | } |
5709 | | |
5710 | 13 | if (!task.non_versioned_rowset_key.empty()) { |
5711 | 0 | txn->remove(task.non_versioned_rowset_key); |
5712 | 0 | LOG_INFO("remove non versioned rowset key") |
5713 | 0 | .tag("key", hex(task.non_versioned_rowset_key)); |
5714 | 0 | } |
5715 | | |
5716 | | // empty when recycle ref rowsets for deleted instance |
5717 | 13 | if (!task.recycle_rowset_key.empty()) { |
5718 | 13 | txn->remove(task.recycle_rowset_key); |
5719 | 13 | LOG_INFO("remove recycle rowset key").tag("key", hex(task.recycle_rowset_key)); |
5720 | 13 | } |
5721 | | |
5722 | 13 | err = txn->commit(); |
5723 | 13 | if (err == TxnErrorCode::TXN_CONFLICT) { // unlikely |
5724 | | // The rowset ref count key has been changed, we need to retry. |
5725 | 0 | VLOG_DEBUG << "decrease rowset ref count but txn conflict, retry" |
5726 | 0 | << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id |
5727 | 0 | << ", ref_count=" << ref_count << ", retry=" << i; |
5728 | 0 | std::this_thread::sleep_for(std::chrono::milliseconds(500)); |
5729 | 0 | continue; |
5730 | 13 | } else if (err != TxnErrorCode::TXN_OK) { |
5731 | 0 | LOG_WARNING("failed to recycle rowset meta and data").tag("err", err); |
5732 | 0 | return -1; |
5733 | 0 | } |
5734 | 13 | LOG_INFO("recycle rowset meta and data success"); |
5735 | 13 | return 0; |
5736 | 13 | } |
5737 | 0 | LOG_WARNING("failed to recycle rowset meta and data after retry") |
5738 | 0 | .tag("tablet_id", tablet_id) |
5739 | 0 | .tag("rowset_id", rowset_id) |
5740 | 0 | .tag("retry", MAX_RETRY); |
5741 | 0 | return -1; |
5742 | 1.61k | } |
5743 | | |
5744 | 39 | int InstanceRecycler::recycle_tmp_rowsets() { |
5745 | 39 | const std::string task_name = "recycle_tmp_rowsets"; |
5746 | 39 | int64_t num_scanned = 0; |
5747 | 39 | int64_t num_expired = 0; |
5748 | 39 | std::atomic_long num_recycled = 0; |
5749 | 39 | size_t expired_rowset_size = 0; |
5750 | 39 | size_t total_rowset_key_size = 0; |
5751 | 39 | size_t total_rowset_value_size = 0; |
5752 | 39 | RecyclerMetricsContext metrics_context(instance_id_, task_name); |
5753 | | |
5754 | 39 | MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0}; |
5755 | 39 | MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0}; |
5756 | 39 | std::string tmp_rs_key0; |
5757 | 39 | std::string tmp_rs_key1; |
5758 | 39 | meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0); |
5759 | 39 | meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1); |
5760 | | |
5761 | 39 | LOG_WARNING("begin to recycle tmp rowsets").tag("instance_id", instance_id_); |
5762 | | |
5763 | 39 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
5764 | 39 | register_recycle_task(task_name, start_time); |
5765 | | |
5766 | 39 | DORIS_CLOUD_DEFER { |
5767 | 39 | unregister_recycle_task(task_name); |
5768 | 39 | int64_t cost = |
5769 | 39 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
5770 | 39 | metrics_context.finish_report(); |
5771 | 39 | LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost) |
5772 | 39 | .tag("instance_id", instance_id_) |
5773 | 39 | .tag("num_scanned", num_scanned) |
5774 | 39 | .tag("num_expired", num_expired) |
5775 | 39 | .tag("num_recycled", num_recycled) |
5776 | 39 | .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size) |
5777 | 39 | .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size) |
5778 | 39 | .tag("expired_rowset_meta_size_recycled", expired_rowset_size); |
5779 | 39 | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv Line | Count | Source | 5766 | 12 | DORIS_CLOUD_DEFER { | 5767 | 12 | unregister_recycle_task(task_name); | 5768 | 12 | int64_t cost = | 5769 | 12 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 5770 | 12 | metrics_context.finish_report(); | 5771 | 12 | LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost) | 5772 | 12 | .tag("instance_id", instance_id_) | 5773 | 12 | .tag("num_scanned", num_scanned) | 5774 | 12 | .tag("num_expired", num_expired) | 5775 | 12 | .tag("num_recycled", num_recycled) | 5776 | 12 | .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size) | 5777 | 12 | .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size) | 5778 | 12 | .tag("expired_rowset_meta_size_recycled", expired_rowset_size); | 5779 | 12 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv Line | Count | Source | 5766 | 27 | DORIS_CLOUD_DEFER { | 5767 | 27 | unregister_recycle_task(task_name); | 5768 | 27 | int64_t cost = | 5769 | 27 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 5770 | 27 | metrics_context.finish_report(); | 5771 | 27 | LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost) | 5772 | 27 | .tag("instance_id", instance_id_) | 5773 | 27 | .tag("num_scanned", num_scanned) | 5774 | 27 | .tag("num_expired", num_expired) | 5775 | 27 | .tag("num_recycled", num_recycled) | 5776 | 27 | .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size) | 5777 | 27 | .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size) | 5778 | 27 | .tag("expired_rowset_meta_size_recycled", expired_rowset_size); | 5779 | 27 | }; |
|
5780 | | |
5781 | | // Elements in `tmp_rowset_keys` has the same lifetime as `it` |
5782 | | |
5783 | 39 | std::vector<std::string> tmp_rowset_keys; |
5784 | 39 | std::vector<std::string> tmp_rowset_ref_count_keys; |
5785 | 39 | std::vector<std::string> tmp_rowset_keys_to_mark_recycled; |
5786 | 39 | std::vector<std::string> tmp_rowset_keys_to_abort; |
5787 | | |
5788 | | // rowset_id -> rowset_meta |
5789 | | // store tmp_rowset id and meta for statistics rs size when delete |
5790 | 39 | std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets; |
5791 | 39 | auto worker_pool = std::make_unique<SimpleThreadPool>( |
5792 | 39 | config::instance_recycler_worker_pool_size, "recycle_tmp_rowsets"); |
5793 | 39 | worker_pool->start(); |
5794 | | |
5795 | 39 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
5796 | | |
5797 | 39 | auto handle_rowset_kv = [&num_scanned, &num_expired, &tmp_rowset_keys, &tmp_rowsets, |
5798 | 39 | &expired_rowset_size, &total_rowset_key_size, &total_rowset_value_size, |
5799 | 39 | &earlest_ts, &tmp_rowset_ref_count_keys, |
5800 | 39 | &tmp_rowset_keys_to_mark_recycled, &tmp_rowset_keys_to_abort, this, |
5801 | 106k | &metrics_context](std::string_view k, std::string_view v) -> int { |
5802 | 106k | ++num_scanned; |
5803 | 106k | total_rowset_key_size += k.size(); |
5804 | 106k | total_rowset_value_size += v.size(); |
5805 | 106k | doris::RowsetMetaCloudPB rowset; |
5806 | 106k | if (!rowset.ParseFromArray(v.data(), v.size())) { |
5807 | 0 | LOG_WARNING("malformed rowset meta").tag("key", hex(k)); |
5808 | 0 | return -1; |
5809 | 0 | } |
5810 | 106k | int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts); |
5811 | 106k | VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned |
5812 | 0 | << " num_expired=" << num_expired << " expiration=" << expiration |
5813 | 0 | << " txn_expiration=" << rowset.txn_expiration() |
5814 | 0 | << " rowset_creation_time=" << rowset.creation_time(); |
5815 | 106k | int64_t current_time = ::time(nullptr); |
5816 | 106k | if (current_time < expiration) { // not expired |
5817 | 0 | return 0; |
5818 | 0 | } |
5819 | | |
5820 | 106k | if (config::enable_mark_delete_rowset_before_recycle) { |
5821 | 106k | if (need_mark_rowset_as_recycled(rowset)) { |
5822 | 52.0k | tmp_rowset_keys_to_mark_recycled.emplace_back(k); |
5823 | 52.0k | LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv " |
5824 | 52.0k | "at next turn, instance_id=" |
5825 | 52.0k | << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=[" |
5826 | 52.0k | << rowset.start_version() << '-' << rowset.end_version() << "]"; |
5827 | 52.0k | return 0; |
5828 | 52.0k | } |
5829 | 106k | } |
5830 | | |
5831 | 54.0k | if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) { |
5832 | 54.0k | if (make_deferred_abort_task(rowset).has_value()) { |
5833 | 3 | LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, " |
5834 | 3 | "instance_id=" |
5835 | 3 | << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=[" |
5836 | 3 | << rowset.start_version() << '-' << rowset.end_version() << "]"; |
5837 | 3 | tmp_rowset_keys_to_abort.emplace_back(k); |
5838 | 3 | } |
5839 | 54.0k | } |
5840 | | |
5841 | 54.0k | ++num_expired; |
5842 | 54.0k | expired_rowset_size += v.size(); |
5843 | 54.0k | if (!rowset.has_resource_id()) { |
5844 | 0 | if (rowset.num_segments() > 0) [[unlikely]] { // impossible |
5845 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", k); |
5846 | 0 | return -1; |
5847 | 0 | } |
5848 | | // might be a delete pred rowset |
5849 | 0 | tmp_rowset_keys.emplace_back(k); |
5850 | 0 | return 0; |
5851 | 0 | } |
5852 | | // TODO(plat1ko): check rowset not referenced |
5853 | 54.0k | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ |
5854 | 54.0k | << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2() |
5855 | 54.0k | << " version=[" << rowset.start_version() << '-' << rowset.end_version() |
5856 | 54.0k | << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size() |
5857 | 54.0k | << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned |
5858 | 54.0k | << " num_expired=" << num_expired |
5859 | 54.0k | << " task_type=" << metrics_context.operation_type; |
5860 | | |
5861 | 54.0k | tmp_rowset_keys.emplace_back(k.data(), k.size()); |
5862 | | // Remove the rowset ref count key directly since it has not been used. |
5863 | 54.0k | std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key( |
5864 | 54.0k | {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()}); |
5865 | 54.0k | LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_ |
5866 | 54.0k | << "key=" << hex(rowset_ref_count_key); |
5867 | 54.0k | tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key); |
5868 | | |
5869 | 54.0k | tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset)); |
5870 | 54.0k | return 0; |
5871 | 54.0k | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 5801 | 16 | &metrics_context](std::string_view k, std::string_view v) -> int { | 5802 | 16 | ++num_scanned; | 5803 | 16 | total_rowset_key_size += k.size(); | 5804 | 16 | total_rowset_value_size += v.size(); | 5805 | 16 | doris::RowsetMetaCloudPB rowset; | 5806 | 16 | if (!rowset.ParseFromArray(v.data(), v.size())) { | 5807 | 0 | LOG_WARNING("malformed rowset meta").tag("key", hex(k)); | 5808 | 0 | return -1; | 5809 | 0 | } | 5810 | 16 | int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts); | 5811 | 16 | VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned | 5812 | 0 | << " num_expired=" << num_expired << " expiration=" << expiration | 5813 | 0 | << " txn_expiration=" << rowset.txn_expiration() | 5814 | 0 | << " rowset_creation_time=" << rowset.creation_time(); | 5815 | 16 | int64_t current_time = ::time(nullptr); | 5816 | 16 | if (current_time < expiration) { // not expired | 5817 | 0 | return 0; | 5818 | 0 | } | 5819 | | | 5820 | 16 | if (config::enable_mark_delete_rowset_before_recycle) { | 5821 | 16 | if (need_mark_rowset_as_recycled(rowset)) { | 5822 | 9 | tmp_rowset_keys_to_mark_recycled.emplace_back(k); | 5823 | 9 | LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv " | 5824 | 9 | "at next turn, instance_id=" | 5825 | 9 | << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=[" | 5826 | 9 | << rowset.start_version() << '-' << rowset.end_version() << "]"; | 5827 | 9 | return 0; | 5828 | 9 | } | 5829 | 16 | } | 5830 | | | 5831 | 7 | if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) { | 5832 | 7 | if (make_deferred_abort_task(rowset).has_value()) { | 5833 | 3 | LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, " | 5834 | 3 | "instance_id=" | 5835 | 3 | << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=[" | 5836 | 3 | << rowset.start_version() << '-' << rowset.end_version() << "]"; | 5837 | 3 | tmp_rowset_keys_to_abort.emplace_back(k); | 5838 | 3 | } | 5839 | 7 | } | 5840 | | | 5841 | 7 | ++num_expired; | 5842 | 7 | expired_rowset_size += v.size(); | 5843 | 7 | if (!rowset.has_resource_id()) { | 5844 | 0 | if (rowset.num_segments() > 0) [[unlikely]] { // impossible | 5845 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", k); | 5846 | 0 | return -1; | 5847 | 0 | } | 5848 | | // might be a delete pred rowset | 5849 | 0 | tmp_rowset_keys.emplace_back(k); | 5850 | 0 | return 0; | 5851 | 0 | } | 5852 | | // TODO(plat1ko): check rowset not referenced | 5853 | 7 | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ | 5854 | 7 | << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2() | 5855 | 7 | << " version=[" << rowset.start_version() << '-' << rowset.end_version() | 5856 | 7 | << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size() | 5857 | 7 | << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned | 5858 | 7 | << " num_expired=" << num_expired | 5859 | 7 | << " task_type=" << metrics_context.operation_type; | 5860 | | | 5861 | 7 | tmp_rowset_keys.emplace_back(k.data(), k.size()); | 5862 | | // Remove the rowset ref count key directly since it has not been used. | 5863 | 7 | std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key( | 5864 | 7 | {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()}); | 5865 | 7 | LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_ | 5866 | 7 | << "key=" << hex(rowset_ref_count_key); | 5867 | 7 | tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key); | 5868 | | | 5869 | 7 | tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset)); | 5870 | 7 | return 0; | 5871 | 7 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 5801 | 106k | &metrics_context](std::string_view k, std::string_view v) -> int { | 5802 | 106k | ++num_scanned; | 5803 | 106k | total_rowset_key_size += k.size(); | 5804 | 106k | total_rowset_value_size += v.size(); | 5805 | 106k | doris::RowsetMetaCloudPB rowset; | 5806 | 106k | if (!rowset.ParseFromArray(v.data(), v.size())) { | 5807 | 0 | LOG_WARNING("malformed rowset meta").tag("key", hex(k)); | 5808 | 0 | return -1; | 5809 | 0 | } | 5810 | 106k | int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts); | 5811 | 106k | VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned | 5812 | 0 | << " num_expired=" << num_expired << " expiration=" << expiration | 5813 | 0 | << " txn_expiration=" << rowset.txn_expiration() | 5814 | 0 | << " rowset_creation_time=" << rowset.creation_time(); | 5815 | 106k | int64_t current_time = ::time(nullptr); | 5816 | 106k | if (current_time < expiration) { // not expired | 5817 | 0 | return 0; | 5818 | 0 | } | 5819 | | | 5820 | 106k | if (config::enable_mark_delete_rowset_before_recycle) { | 5821 | 106k | if (need_mark_rowset_as_recycled(rowset)) { | 5822 | 52.0k | tmp_rowset_keys_to_mark_recycled.emplace_back(k); | 5823 | 52.0k | LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv " | 5824 | 52.0k | "at next turn, instance_id=" | 5825 | 52.0k | << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=[" | 5826 | 52.0k | << rowset.start_version() << '-' << rowset.end_version() << "]"; | 5827 | 52.0k | return 0; | 5828 | 52.0k | } | 5829 | 106k | } | 5830 | | | 5831 | 54.0k | if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) { | 5832 | 54.0k | if (make_deferred_abort_task(rowset).has_value()) { | 5833 | 0 | LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, " | 5834 | 0 | "instance_id=" | 5835 | 0 | << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=[" | 5836 | 0 | << rowset.start_version() << '-' << rowset.end_version() << "]"; | 5837 | 0 | tmp_rowset_keys_to_abort.emplace_back(k); | 5838 | 0 | } | 5839 | 54.0k | } | 5840 | | | 5841 | 54.0k | ++num_expired; | 5842 | 54.0k | expired_rowset_size += v.size(); | 5843 | 54.0k | if (!rowset.has_resource_id()) { | 5844 | 0 | if (rowset.num_segments() > 0) [[unlikely]] { // impossible | 5845 | 0 | LOG_WARNING("rowset meta has empty resource id").tag("key", k); | 5846 | 0 | return -1; | 5847 | 0 | } | 5848 | | // might be a delete pred rowset | 5849 | 0 | tmp_rowset_keys.emplace_back(k); | 5850 | 0 | return 0; | 5851 | 0 | } | 5852 | | // TODO(plat1ko): check rowset not referenced | 5853 | 54.0k | LOG(INFO) << "delete rowset data, instance_id=" << instance_id_ | 5854 | 54.0k | << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2() | 5855 | 54.0k | << " version=[" << rowset.start_version() << '-' << rowset.end_version() | 5856 | 54.0k | << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size() | 5857 | 54.0k | << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned | 5858 | 54.0k | << " num_expired=" << num_expired | 5859 | 54.0k | << " task_type=" << metrics_context.operation_type; | 5860 | | | 5861 | 54.0k | tmp_rowset_keys.emplace_back(k.data(), k.size()); | 5862 | | // Remove the rowset ref count key directly since it has not been used. | 5863 | 54.0k | std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key( | 5864 | 54.0k | {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()}); | 5865 | 54.0k | LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_ | 5866 | 54.0k | << "key=" << hex(rowset_ref_count_key); | 5867 | 54.0k | tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key); | 5868 | | | 5869 | 54.0k | tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset)); | 5870 | 54.0k | return 0; | 5871 | 54.0k | }; |
|
5872 | | |
5873 | | // TODO bacth delete |
5874 | 51.0k | auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) { |
5875 | 51.0k | std::string dbm_start_key = |
5876 | 51.0k | versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id}); |
5877 | 51.0k | std::string dbm_end_key = dbm_start_key; |
5878 | 51.0k | encode_int64(INT64_MAX, &dbm_end_key); |
5879 | 51.0k | auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key); |
5880 | 51.0k | if (ret != 0) { |
5881 | 0 | LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id=" |
5882 | 0 | << instance_id_ << ", tablet_id=" << tablet_id |
5883 | 0 | << ", rowset_id=" << rowset_id; |
5884 | 0 | } |
5885 | 51.0k | return ret; |
5886 | 51.0k | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 5874 | 7 | auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) { | 5875 | 7 | std::string dbm_start_key = | 5876 | 7 | versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id}); | 5877 | 7 | std::string dbm_end_key = dbm_start_key; | 5878 | 7 | encode_int64(INT64_MAX, &dbm_end_key); | 5879 | 7 | auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key); | 5880 | 7 | if (ret != 0) { | 5881 | 0 | LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id=" | 5882 | 0 | << instance_id_ << ", tablet_id=" << tablet_id | 5883 | 0 | << ", rowset_id=" << rowset_id; | 5884 | 0 | } | 5885 | 7 | return ret; | 5886 | 7 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 5874 | 51.0k | auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) { | 5875 | 51.0k | std::string dbm_start_key = | 5876 | 51.0k | versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id}); | 5877 | 51.0k | std::string dbm_end_key = dbm_start_key; | 5878 | 51.0k | encode_int64(INT64_MAX, &dbm_end_key); | 5879 | 51.0k | auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key); | 5880 | 51.0k | if (ret != 0) { | 5881 | 0 | LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id=" | 5882 | 0 | << instance_id_ << ", tablet_id=" << tablet_id | 5883 | 0 | << ", rowset_id=" << rowset_id; | 5884 | 0 | } | 5885 | 51.0k | return ret; | 5886 | 51.0k | }; |
|
5887 | | |
5888 | 51.0k | auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) { |
5889 | 51.0k | auto delete_bitmap_start = |
5890 | 51.0k | meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0}); |
5891 | 51.0k | auto delete_bitmap_end = |
5892 | 51.0k | meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX}); |
5893 | 51.0k | auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end); |
5894 | 51.0k | if (ret != 0) { |
5895 | 0 | LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_ |
5896 | 0 | << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id; |
5897 | 0 | } |
5898 | 51.0k | return ret; |
5899 | 51.0k | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 5888 | 7 | auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) { | 5889 | 7 | auto delete_bitmap_start = | 5890 | 7 | meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0}); | 5891 | 7 | auto delete_bitmap_end = | 5892 | 7 | meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX}); | 5893 | 7 | auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end); | 5894 | 7 | if (ret != 0) { | 5895 | 0 | LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_ | 5896 | 0 | << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id; | 5897 | 0 | } | 5898 | 7 | return ret; | 5899 | 7 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 5888 | 51.0k | auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) { | 5889 | 51.0k | auto delete_bitmap_start = | 5890 | 51.0k | meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0}); | 5891 | 51.0k | auto delete_bitmap_end = | 5892 | 51.0k | meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX}); | 5893 | 51.0k | auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end); | 5894 | 51.0k | if (ret != 0) { | 5895 | 0 | LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_ | 5896 | 0 | << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id; | 5897 | 0 | } | 5898 | 51.0k | return ret; | 5899 | 51.0k | }; |
|
5900 | | |
5901 | 39 | auto loop_done = [&]() -> int { |
5902 | 32 | std::vector<std::string> tmp_rowset_keys_to_delete; |
5903 | 32 | std::vector<std::string> tmp_rowset_ref_count_keys_to_delete; |
5904 | 32 | std::vector<std::string> mark_keys_to_process; |
5905 | 32 | std::vector<std::string> abort_keys_to_process; |
5906 | 32 | std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets_to_delete; |
5907 | 32 | tmp_rowset_keys_to_delete.swap(tmp_rowset_keys); |
5908 | 32 | tmp_rowsets_to_delete.swap(tmp_rowsets); |
5909 | 32 | tmp_rowset_ref_count_keys_to_delete.swap(tmp_rowset_ref_count_keys); |
5910 | 32 | mark_keys_to_process.swap(tmp_rowset_keys_to_mark_recycled); |
5911 | 32 | abort_keys_to_process.swap(tmp_rowset_keys_to_abort); |
5912 | 32 | worker_pool->submit([&, tmp_rowset_keys_to_delete = std::move(tmp_rowset_keys_to_delete), |
5913 | 32 | tmp_rowsets_to_delete = std::move(tmp_rowsets_to_delete), |
5914 | 32 | tmp_rowset_ref_count_keys_to_delete = |
5915 | 32 | std::move(tmp_rowset_ref_count_keys_to_delete), |
5916 | 32 | mark_keys_to_process = std::move(mark_keys_to_process), |
5917 | 32 | abort_keys_to_process = std::move(abort_keys_to_process)]() mutable { |
5918 | 32 | if (!mark_keys_to_process.empty() && |
5919 | 32 | batch_mark_rowsets_as_recycled<RowsetMetaCloudPB>(txn_kv_.get(), instance_id_, |
5920 | 16 | mark_keys_to_process) != 0) { |
5921 | 0 | LOG(WARNING) << "failed to batch mark tmp rowsets as recycled, instance_id=" |
5922 | 0 | << instance_id_; |
5923 | 0 | return; |
5924 | 0 | } |
5925 | 32 | if (!abort_keys_to_process.empty() && |
5926 | 32 | batch_abort_txn_or_job_for_recycle<RowsetMetaCloudPB>(abort_keys_to_process, |
5927 | 3 | false) != 0) { |
5928 | 0 | LOG(WARNING) << "failed to batch abort txn or job for releated rowset, instance_id=" |
5929 | 0 | << instance_id_; |
5930 | 0 | return; |
5931 | 0 | } |
5932 | 32 | if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET, |
5933 | 32 | metrics_context) != 0) { |
5934 | 3 | LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_; |
5935 | 3 | return; |
5936 | 3 | } |
5937 | 51.0k | for (const auto& [_, rs] : tmp_rowsets_to_delete) { |
5938 | 51.0k | if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { |
5939 | 0 | LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs=" |
5940 | 0 | << rs.ShortDebugString(); |
5941 | 0 | return; |
5942 | 0 | } |
5943 | 51.0k | if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { |
5944 | 0 | LOG(WARNING) << "failed to delete delete bitmap kv, rs=" |
5945 | 0 | << rs.ShortDebugString(); |
5946 | 0 | return; |
5947 | 0 | } |
5948 | 51.0k | } |
5949 | 29 | if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) { |
5950 | 0 | LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_; |
5951 | 0 | return; |
5952 | 0 | } |
5953 | 29 | if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) { |
5954 | 0 | LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_; |
5955 | 0 | return; |
5956 | 0 | } |
5957 | 29 | num_recycled += tmp_rowset_keys_to_delete.size(); |
5958 | 29 | return; |
5959 | 29 | }); recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEvENUlvE_clEv Line | Count | Source | 5917 | 12 | abort_keys_to_process = std::move(abort_keys_to_process)]() mutable { | 5918 | 12 | if (!mark_keys_to_process.empty() && | 5919 | 12 | batch_mark_rowsets_as_recycled<RowsetMetaCloudPB>(txn_kv_.get(), instance_id_, | 5920 | 7 | mark_keys_to_process) != 0) { | 5921 | 0 | LOG(WARNING) << "failed to batch mark tmp rowsets as recycled, instance_id=" | 5922 | 0 | << instance_id_; | 5923 | 0 | return; | 5924 | 0 | } | 5925 | 12 | if (!abort_keys_to_process.empty() && | 5926 | 12 | batch_abort_txn_or_job_for_recycle<RowsetMetaCloudPB>(abort_keys_to_process, | 5927 | 3 | false) != 0) { | 5928 | 0 | LOG(WARNING) << "failed to batch abort txn or job for releated rowset, instance_id=" | 5929 | 0 | << instance_id_; | 5930 | 0 | return; | 5931 | 0 | } | 5932 | 12 | if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET, | 5933 | 12 | metrics_context) != 0) { | 5934 | 0 | LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_; | 5935 | 0 | return; | 5936 | 0 | } | 5937 | 12 | for (const auto& [_, rs] : tmp_rowsets_to_delete) { | 5938 | 7 | if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { | 5939 | 0 | LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs=" | 5940 | 0 | << rs.ShortDebugString(); | 5941 | 0 | return; | 5942 | 0 | } | 5943 | 7 | if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { | 5944 | 0 | LOG(WARNING) << "failed to delete delete bitmap kv, rs=" | 5945 | 0 | << rs.ShortDebugString(); | 5946 | 0 | return; | 5947 | 0 | } | 5948 | 7 | } | 5949 | 12 | if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) { | 5950 | 0 | LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_; | 5951 | 0 | return; | 5952 | 0 | } | 5953 | 12 | if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) { | 5954 | 0 | LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_; | 5955 | 0 | return; | 5956 | 0 | } | 5957 | 12 | num_recycled += tmp_rowset_keys_to_delete.size(); | 5958 | 12 | return; | 5959 | 12 | }); |
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEvENUlvE_clEv Line | Count | Source | 5917 | 20 | abort_keys_to_process = std::move(abort_keys_to_process)]() mutable { | 5918 | 20 | if (!mark_keys_to_process.empty() && | 5919 | 20 | batch_mark_rowsets_as_recycled<RowsetMetaCloudPB>(txn_kv_.get(), instance_id_, | 5920 | 9 | mark_keys_to_process) != 0) { | 5921 | 0 | LOG(WARNING) << "failed to batch mark tmp rowsets as recycled, instance_id=" | 5922 | 0 | << instance_id_; | 5923 | 0 | return; | 5924 | 0 | } | 5925 | 20 | if (!abort_keys_to_process.empty() && | 5926 | 20 | batch_abort_txn_or_job_for_recycle<RowsetMetaCloudPB>(abort_keys_to_process, | 5927 | 0 | false) != 0) { | 5928 | 0 | LOG(WARNING) << "failed to batch abort txn or job for releated rowset, instance_id=" | 5929 | 0 | << instance_id_; | 5930 | 0 | return; | 5931 | 0 | } | 5932 | 20 | if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET, | 5933 | 20 | metrics_context) != 0) { | 5934 | 3 | LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_; | 5935 | 3 | return; | 5936 | 3 | } | 5937 | 51.0k | for (const auto& [_, rs] : tmp_rowsets_to_delete) { | 5938 | 51.0k | if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { | 5939 | 0 | LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs=" | 5940 | 0 | << rs.ShortDebugString(); | 5941 | 0 | return; | 5942 | 0 | } | 5943 | 51.0k | if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { | 5944 | 0 | LOG(WARNING) << "failed to delete delete bitmap kv, rs=" | 5945 | 0 | << rs.ShortDebugString(); | 5946 | 0 | return; | 5947 | 0 | } | 5948 | 51.0k | } | 5949 | 17 | if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) { | 5950 | 0 | LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_; | 5951 | 0 | return; | 5952 | 0 | } | 5953 | 17 | if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) { | 5954 | 0 | LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_; | 5955 | 0 | return; | 5956 | 0 | } | 5957 | 17 | num_recycled += tmp_rowset_keys_to_delete.size(); | 5958 | 17 | return; | 5959 | 17 | }); |
|
5960 | 32 | return 0; |
5961 | 32 | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEv Line | Count | Source | 5901 | 12 | auto loop_done = [&]() -> int { | 5902 | 12 | std::vector<std::string> tmp_rowset_keys_to_delete; | 5903 | 12 | std::vector<std::string> tmp_rowset_ref_count_keys_to_delete; | 5904 | 12 | std::vector<std::string> mark_keys_to_process; | 5905 | 12 | std::vector<std::string> abort_keys_to_process; | 5906 | 12 | std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets_to_delete; | 5907 | 12 | tmp_rowset_keys_to_delete.swap(tmp_rowset_keys); | 5908 | 12 | tmp_rowsets_to_delete.swap(tmp_rowsets); | 5909 | 12 | tmp_rowset_ref_count_keys_to_delete.swap(tmp_rowset_ref_count_keys); | 5910 | 12 | mark_keys_to_process.swap(tmp_rowset_keys_to_mark_recycled); | 5911 | 12 | abort_keys_to_process.swap(tmp_rowset_keys_to_abort); | 5912 | 12 | worker_pool->submit([&, tmp_rowset_keys_to_delete = std::move(tmp_rowset_keys_to_delete), | 5913 | 12 | tmp_rowsets_to_delete = std::move(tmp_rowsets_to_delete), | 5914 | 12 | tmp_rowset_ref_count_keys_to_delete = | 5915 | 12 | std::move(tmp_rowset_ref_count_keys_to_delete), | 5916 | 12 | mark_keys_to_process = std::move(mark_keys_to_process), | 5917 | 12 | abort_keys_to_process = std::move(abort_keys_to_process)]() mutable { | 5918 | 12 | if (!mark_keys_to_process.empty() && | 5919 | 12 | batch_mark_rowsets_as_recycled<RowsetMetaCloudPB>(txn_kv_.get(), instance_id_, | 5920 | 12 | mark_keys_to_process) != 0) { | 5921 | 12 | LOG(WARNING) << "failed to batch mark tmp rowsets as recycled, instance_id=" | 5922 | 12 | << instance_id_; | 5923 | 12 | return; | 5924 | 12 | } | 5925 | 12 | if (!abort_keys_to_process.empty() && | 5926 | 12 | batch_abort_txn_or_job_for_recycle<RowsetMetaCloudPB>(abort_keys_to_process, | 5927 | 12 | false) != 0) { | 5928 | 12 | LOG(WARNING) << "failed to batch abort txn or job for releated rowset, instance_id=" | 5929 | 12 | << instance_id_; | 5930 | 12 | return; | 5931 | 12 | } | 5932 | 12 | if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET, | 5933 | 12 | metrics_context) != 0) { | 5934 | 12 | LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_; | 5935 | 12 | return; | 5936 | 12 | } | 5937 | 12 | for (const auto& [_, rs] : tmp_rowsets_to_delete) { | 5938 | 12 | if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { | 5939 | 12 | LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs=" | 5940 | 12 | << rs.ShortDebugString(); | 5941 | 12 | return; | 5942 | 12 | } | 5943 | 12 | if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { | 5944 | 12 | LOG(WARNING) << "failed to delete delete bitmap kv, rs=" | 5945 | 12 | << rs.ShortDebugString(); | 5946 | 12 | return; | 5947 | 12 | } | 5948 | 12 | } | 5949 | 12 | if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) { | 5950 | 12 | LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_; | 5951 | 12 | return; | 5952 | 12 | } | 5953 | 12 | if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) { | 5954 | 12 | LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_; | 5955 | 12 | return; | 5956 | 12 | } | 5957 | 12 | num_recycled += tmp_rowset_keys_to_delete.size(); | 5958 | 12 | return; | 5959 | 12 | }); | 5960 | 12 | return 0; | 5961 | 12 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEv Line | Count | Source | 5901 | 20 | auto loop_done = [&]() -> int { | 5902 | 20 | std::vector<std::string> tmp_rowset_keys_to_delete; | 5903 | 20 | std::vector<std::string> tmp_rowset_ref_count_keys_to_delete; | 5904 | 20 | std::vector<std::string> mark_keys_to_process; | 5905 | 20 | std::vector<std::string> abort_keys_to_process; | 5906 | 20 | std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets_to_delete; | 5907 | 20 | tmp_rowset_keys_to_delete.swap(tmp_rowset_keys); | 5908 | 20 | tmp_rowsets_to_delete.swap(tmp_rowsets); | 5909 | 20 | tmp_rowset_ref_count_keys_to_delete.swap(tmp_rowset_ref_count_keys); | 5910 | 20 | mark_keys_to_process.swap(tmp_rowset_keys_to_mark_recycled); | 5911 | 20 | abort_keys_to_process.swap(tmp_rowset_keys_to_abort); | 5912 | 20 | worker_pool->submit([&, tmp_rowset_keys_to_delete = std::move(tmp_rowset_keys_to_delete), | 5913 | 20 | tmp_rowsets_to_delete = std::move(tmp_rowsets_to_delete), | 5914 | 20 | tmp_rowset_ref_count_keys_to_delete = | 5915 | 20 | std::move(tmp_rowset_ref_count_keys_to_delete), | 5916 | 20 | mark_keys_to_process = std::move(mark_keys_to_process), | 5917 | 20 | abort_keys_to_process = std::move(abort_keys_to_process)]() mutable { | 5918 | 20 | if (!mark_keys_to_process.empty() && | 5919 | 20 | batch_mark_rowsets_as_recycled<RowsetMetaCloudPB>(txn_kv_.get(), instance_id_, | 5920 | 20 | mark_keys_to_process) != 0) { | 5921 | 20 | LOG(WARNING) << "failed to batch mark tmp rowsets as recycled, instance_id=" | 5922 | 20 | << instance_id_; | 5923 | 20 | return; | 5924 | 20 | } | 5925 | 20 | if (!abort_keys_to_process.empty() && | 5926 | 20 | batch_abort_txn_or_job_for_recycle<RowsetMetaCloudPB>(abort_keys_to_process, | 5927 | 20 | false) != 0) { | 5928 | 20 | LOG(WARNING) << "failed to batch abort txn or job for releated rowset, instance_id=" | 5929 | 20 | << instance_id_; | 5930 | 20 | return; | 5931 | 20 | } | 5932 | 20 | if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET, | 5933 | 20 | metrics_context) != 0) { | 5934 | 20 | LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_; | 5935 | 20 | return; | 5936 | 20 | } | 5937 | 20 | for (const auto& [_, rs] : tmp_rowsets_to_delete) { | 5938 | 20 | if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { | 5939 | 20 | LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs=" | 5940 | 20 | << rs.ShortDebugString(); | 5941 | 20 | return; | 5942 | 20 | } | 5943 | 20 | if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) { | 5944 | 20 | LOG(WARNING) << "failed to delete delete bitmap kv, rs=" | 5945 | 20 | << rs.ShortDebugString(); | 5946 | 20 | return; | 5947 | 20 | } | 5948 | 20 | } | 5949 | 20 | if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) { | 5950 | 20 | LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_; | 5951 | 20 | return; | 5952 | 20 | } | 5953 | 20 | if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) { | 5954 | 20 | LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_; | 5955 | 20 | return; | 5956 | 20 | } | 5957 | 20 | num_recycled += tmp_rowset_keys_to_delete.size(); | 5958 | 20 | return; | 5959 | 20 | }); | 5960 | 20 | return 0; | 5961 | 20 | }; |
|
5962 | | |
5963 | 39 | if (config::enable_recycler_stats_metrics) { |
5964 | 0 | scan_and_statistics_tmp_rowsets(); |
5965 | 0 | } |
5966 | | // recycle_func and loop_done for scan and recycle |
5967 | 39 | int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_rowset_kv), |
5968 | 39 | std::move(loop_done)); |
5969 | | |
5970 | 39 | worker_pool->stop(); |
5971 | | |
5972 | | // Report final metrics after all concurrent tasks completed |
5973 | 39 | segment_metrics_context_.report(); |
5974 | 39 | metrics_context.report(); |
5975 | | |
5976 | 39 | return ret; |
5977 | 39 | } |
5978 | | |
5979 | | int InstanceRecycler::scan_and_recycle( |
5980 | | std::string begin, std::string_view end, |
5981 | | std::function<int(std::string_view k, std::string_view v)> recycle_func, |
5982 | 268 | std::function<int()> loop_done) { |
5983 | 268 | LOG(INFO) << "begin scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) << ")"; |
5984 | 268 | int ret = 0; |
5985 | 268 | int64_t cnt = 0; |
5986 | 268 | int get_range_retried = 0; |
5987 | 268 | std::string err; |
5988 | 268 | DORIS_CLOUD_DEFER_COPY(begin, end) { |
5989 | 268 | LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) |
5990 | 268 | << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried |
5991 | 268 | << " ret=" << ret << " err=" << err; |
5992 | 268 | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv Line | Count | Source | 5988 | 31 | DORIS_CLOUD_DEFER_COPY(begin, end) { | 5989 | 31 | LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) | 5990 | 31 | << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried | 5991 | 31 | << " ret=" << ret << " err=" << err; | 5992 | 31 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv Line | Count | Source | 5988 | 237 | DORIS_CLOUD_DEFER_COPY(begin, end) { | 5989 | 237 | LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) | 5990 | 237 | << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried | 5991 | 237 | << " ret=" << ret << " err=" << err; | 5992 | 237 | }; |
|
5993 | | |
5994 | 268 | std::unique_ptr<RangeGetIterator> it; |
5995 | 449 | while (it == nullptr /* may be not init */ || (it->more() && !stopped())) { |
5996 | 321 | if (get_range_retried > 1000) { |
5997 | 0 | err = "txn_get exceeds max retry(1000), may not scan all keys"; |
5998 | 0 | ret = -3; |
5999 | 0 | return ret; |
6000 | 0 | } |
6001 | 321 | int get_ret = txn_get(txn_kv_.get(), begin, end, it); |
6002 | 321 | if (get_ret != 0) { // txn kv may complain "Request for future version" |
6003 | 0 | LOG(WARNING) << "failed to get kv, range=[" << hex(begin) << "," << hex(end) |
6004 | 0 | << ") num_scanned=" << cnt << " txn_get_ret=" << get_ret |
6005 | 0 | << " get_range_retried=" << get_range_retried; |
6006 | 0 | ++get_range_retried; |
6007 | 0 | std::this_thread::sleep_for(std::chrono::milliseconds(500)); |
6008 | 0 | continue; // try again |
6009 | 0 | } |
6010 | 321 | if (!it->has_next()) { |
6011 | 140 | LOG(INFO) << "no keys in the given range=[" << hex(begin) << "," << hex(end) << ")"; |
6012 | 140 | break; // scan finished |
6013 | 140 | } |
6014 | 154k | while (it->has_next()) { |
6015 | 154k | ++cnt; |
6016 | | // recycle corresponding resources |
6017 | 154k | auto [k, v] = it->next(); |
6018 | 154k | if (!it->has_next()) { |
6019 | 181 | begin = k; |
6020 | 181 | VLOG_DEBUG << "iterator has no more kvs. key=" << hex(k); |
6021 | 181 | } |
6022 | | // FIXME(gavin): if we want to continue scanning, the recycle_func should not return non-zero |
6023 | 154k | if (recycle_func(k, v) != 0) { |
6024 | 4.00k | err = "recycle_func error"; |
6025 | 4.00k | ret = -1; |
6026 | 4.00k | } |
6027 | 154k | } |
6028 | 181 | begin.push_back('\x00'); // Update to next smallest key for iteration |
6029 | | // FIXME(gavin): if we want to continue scanning, the loop_done should not return non-zero |
6030 | 181 | if (loop_done && loop_done() != 0) { |
6031 | 4 | err = "loop_done error"; |
6032 | 4 | ret = -1; |
6033 | 4 | } |
6034 | 181 | } |
6035 | 268 | return ret; |
6036 | 268 | } |
6037 | | |
6038 | 19 | int InstanceRecycler::abort_timeout_txn() { |
6039 | 19 | const std::string task_name = "abort_timeout_txn"; |
6040 | 19 | int64_t num_scanned = 0; |
6041 | 19 | int64_t num_timeout = 0; |
6042 | 19 | int64_t num_abort = 0; |
6043 | 19 | int64_t num_advance = 0; |
6044 | 19 | RecyclerMetricsContext metrics_context(instance_id_, task_name); |
6045 | | |
6046 | 19 | TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0}; |
6047 | 19 | TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX}; |
6048 | 19 | std::string begin_txn_running_key; |
6049 | 19 | std::string end_txn_running_key; |
6050 | 19 | txn_running_key(txn_running_key_info0, &begin_txn_running_key); |
6051 | 19 | txn_running_key(txn_running_key_info1, &end_txn_running_key); |
6052 | | |
6053 | 19 | LOG_WARNING("begin to abort timeout txn").tag("instance_id", instance_id_); |
6054 | | |
6055 | 19 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
6056 | 19 | register_recycle_task(task_name, start_time); |
6057 | | |
6058 | 19 | DORIS_CLOUD_DEFER { |
6059 | 19 | unregister_recycle_task(task_name); |
6060 | 19 | int64_t cost = |
6061 | 19 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
6062 | 19 | metrics_context.finish_report(); |
6063 | 19 | LOG_WARNING("end to abort timeout txn, cost={}s", cost) |
6064 | 19 | .tag("instance_id", instance_id_) |
6065 | 19 | .tag("num_scanned", num_scanned) |
6066 | 19 | .tag("num_timeout", num_timeout) |
6067 | 19 | .tag("num_abort", num_abort) |
6068 | 19 | .tag("num_advance", num_advance); |
6069 | 19 | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv Line | Count | Source | 6058 | 3 | DORIS_CLOUD_DEFER { | 6059 | 3 | unregister_recycle_task(task_name); | 6060 | 3 | int64_t cost = | 6061 | 3 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 6062 | 3 | metrics_context.finish_report(); | 6063 | 3 | LOG_WARNING("end to abort timeout txn, cost={}s", cost) | 6064 | 3 | .tag("instance_id", instance_id_) | 6065 | 3 | .tag("num_scanned", num_scanned) | 6066 | 3 | .tag("num_timeout", num_timeout) | 6067 | 3 | .tag("num_abort", num_abort) | 6068 | 3 | .tag("num_advance", num_advance); | 6069 | 3 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv Line | Count | Source | 6058 | 16 | DORIS_CLOUD_DEFER { | 6059 | 16 | unregister_recycle_task(task_name); | 6060 | 16 | int64_t cost = | 6061 | 16 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 6062 | 16 | metrics_context.finish_report(); | 6063 | 16 | LOG_WARNING("end to abort timeout txn, cost={}s", cost) | 6064 | 16 | .tag("instance_id", instance_id_) | 6065 | 16 | .tag("num_scanned", num_scanned) | 6066 | 16 | .tag("num_timeout", num_timeout) | 6067 | 16 | .tag("num_abort", num_abort) | 6068 | 16 | .tag("num_advance", num_advance); | 6069 | 16 | }; |
|
6070 | | |
6071 | 19 | int64_t current_time = |
6072 | 19 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
6073 | | |
6074 | 19 | auto handle_txn_running_kv = [&num_scanned, &num_timeout, &num_abort, &num_advance, |
6075 | 19 | ¤t_time, &metrics_context, |
6076 | 19 | this](std::string_view k, std::string_view v) -> int { |
6077 | 9 | ++num_scanned; |
6078 | | |
6079 | 9 | std::unique_ptr<Transaction> txn; |
6080 | 9 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
6081 | 9 | if (err != TxnErrorCode::TXN_OK) { |
6082 | 0 | LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k)); |
6083 | 0 | return -1; |
6084 | 0 | } |
6085 | 9 | std::string_view k1 = k; |
6086 | | //TxnRunningKeyInfo 0:instance_id 1:db_id 2:txn_id |
6087 | 9 | k1.remove_prefix(1); // Remove key space |
6088 | 9 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
6089 | 9 | if (decode_key(&k1, &out) != 0) { |
6090 | 0 | LOG_ERROR("failed to decode key").tag("key", hex(k)); |
6091 | 0 | return -1; |
6092 | 0 | } |
6093 | 9 | int64_t db_id = std::get<int64_t>(std::get<0>(out[3])); |
6094 | 9 | int64_t txn_id = std::get<int64_t>(std::get<0>(out[4])); |
6095 | 9 | VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id; |
6096 | | // Update txn_info |
6097 | 9 | std::string txn_inf_key, txn_inf_val; |
6098 | 9 | txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key); |
6099 | 9 | err = txn->get(txn_inf_key, &txn_inf_val); |
6100 | 9 | if (err != TxnErrorCode::TXN_OK) { |
6101 | 0 | LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key)); |
6102 | 0 | return -1; |
6103 | 0 | } |
6104 | 9 | TxnInfoPB txn_info; |
6105 | 9 | if (!txn_info.ParseFromString(txn_inf_val)) { |
6106 | 0 | LOG_WARNING("failed to parse txn info").tag("key", hex(k)); |
6107 | 0 | return -1; |
6108 | 0 | } |
6109 | | |
6110 | 9 | if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) { |
6111 | 3 | txn.reset(); |
6112 | 3 | TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info); |
6113 | 3 | std::shared_ptr<TxnLazyCommitTask> task = |
6114 | 3 | txn_lazy_committer_->submit(instance_id_, txn_info.txn_id()); |
6115 | 3 | std::pair<MetaServiceCode, std::string> ret = task->wait(); |
6116 | 3 | if (ret.first != MetaServiceCode::OK) { |
6117 | 0 | LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first |
6118 | 0 | << "msg=" << ret.second; |
6119 | 0 | return -1; |
6120 | 0 | } |
6121 | 3 | ++num_advance; |
6122 | 3 | return 0; |
6123 | 6 | } else { |
6124 | 6 | TxnRunningPB txn_running_pb; |
6125 | 6 | if (!txn_running_pb.ParseFromArray(v.data(), v.size())) { |
6126 | 0 | LOG_WARNING("malformed txn_running_pb").tag("key", hex(k)); |
6127 | 0 | return -1; |
6128 | 0 | } |
6129 | 6 | if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) { |
6130 | 4 | return 0; |
6131 | 4 | } |
6132 | 2 | ++num_timeout; |
6133 | | |
6134 | 2 | DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE); |
6135 | 2 | txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED); |
6136 | 2 | txn_info.set_finish_time(current_time); |
6137 | 2 | txn_info.set_reason("timeout"); |
6138 | 2 | VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString(); |
6139 | 2 | txn_inf_val.clear(); |
6140 | 2 | if (!txn_info.SerializeToString(&txn_inf_val)) { |
6141 | 0 | LOG_WARNING("failed to serialize txn info").tag("key", hex(k)); |
6142 | 0 | return -1; |
6143 | 0 | } |
6144 | 2 | txn->put(txn_inf_key, txn_inf_val); |
6145 | 2 | VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key); |
6146 | | // Put recycle txn key |
6147 | 2 | std::string recyc_txn_key, recyc_txn_val; |
6148 | 2 | recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key); |
6149 | 2 | RecycleTxnPB recycle_txn_pb; |
6150 | 2 | recycle_txn_pb.set_creation_time(current_time); |
6151 | 2 | recycle_txn_pb.set_label(txn_info.label()); |
6152 | 2 | if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) { |
6153 | 0 | LOG_WARNING("failed to serialize txn recycle info") |
6154 | 0 | .tag("key", hex(k)) |
6155 | 0 | .tag("db_id", db_id) |
6156 | 0 | .tag("txn_id", txn_id); |
6157 | 0 | return -1; |
6158 | 0 | } |
6159 | 2 | txn->put(recyc_txn_key, recyc_txn_val); |
6160 | | // Remove txn running key |
6161 | 2 | txn->remove(k); |
6162 | 2 | err = txn->commit(); |
6163 | 2 | if (err != TxnErrorCode::TXN_OK) { |
6164 | 0 | LOG_WARNING("failed to commit txn err={}", err) |
6165 | 0 | .tag("key", hex(k)) |
6166 | 0 | .tag("db_id", db_id) |
6167 | 0 | .tag("txn_id", txn_id); |
6168 | 0 | return -1; |
6169 | 0 | } |
6170 | 2 | metrics_context.total_recycled_num = ++num_abort; |
6171 | 2 | metrics_context.report(); |
6172 | 2 | } |
6173 | | |
6174 | 2 | return 0; |
6175 | 9 | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 6076 | 3 | this](std::string_view k, std::string_view v) -> int { | 6077 | 3 | ++num_scanned; | 6078 | | | 6079 | 3 | std::unique_ptr<Transaction> txn; | 6080 | 3 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 6081 | 3 | if (err != TxnErrorCode::TXN_OK) { | 6082 | 0 | LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k)); | 6083 | 0 | return -1; | 6084 | 0 | } | 6085 | 3 | std::string_view k1 = k; | 6086 | | //TxnRunningKeyInfo 0:instance_id 1:db_id 2:txn_id | 6087 | 3 | k1.remove_prefix(1); // Remove key space | 6088 | 3 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 6089 | 3 | if (decode_key(&k1, &out) != 0) { | 6090 | 0 | LOG_ERROR("failed to decode key").tag("key", hex(k)); | 6091 | 0 | return -1; | 6092 | 0 | } | 6093 | 3 | int64_t db_id = std::get<int64_t>(std::get<0>(out[3])); | 6094 | 3 | int64_t txn_id = std::get<int64_t>(std::get<0>(out[4])); | 6095 | 3 | VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id; | 6096 | | // Update txn_info | 6097 | 3 | std::string txn_inf_key, txn_inf_val; | 6098 | 3 | txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key); | 6099 | 3 | err = txn->get(txn_inf_key, &txn_inf_val); | 6100 | 3 | if (err != TxnErrorCode::TXN_OK) { | 6101 | 0 | LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key)); | 6102 | 0 | return -1; | 6103 | 0 | } | 6104 | 3 | TxnInfoPB txn_info; | 6105 | 3 | if (!txn_info.ParseFromString(txn_inf_val)) { | 6106 | 0 | LOG_WARNING("failed to parse txn info").tag("key", hex(k)); | 6107 | 0 | return -1; | 6108 | 0 | } | 6109 | | | 6110 | 3 | if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) { | 6111 | 3 | txn.reset(); | 6112 | 3 | TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info); | 6113 | 3 | std::shared_ptr<TxnLazyCommitTask> task = | 6114 | 3 | txn_lazy_committer_->submit(instance_id_, txn_info.txn_id()); | 6115 | 3 | std::pair<MetaServiceCode, std::string> ret = task->wait(); | 6116 | 3 | if (ret.first != MetaServiceCode::OK) { | 6117 | 0 | LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first | 6118 | 0 | << "msg=" << ret.second; | 6119 | 0 | return -1; | 6120 | 0 | } | 6121 | 3 | ++num_advance; | 6122 | 3 | return 0; | 6123 | 3 | } else { | 6124 | 0 | TxnRunningPB txn_running_pb; | 6125 | 0 | if (!txn_running_pb.ParseFromArray(v.data(), v.size())) { | 6126 | 0 | LOG_WARNING("malformed txn_running_pb").tag("key", hex(k)); | 6127 | 0 | return -1; | 6128 | 0 | } | 6129 | 0 | if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) { | 6130 | 0 | return 0; | 6131 | 0 | } | 6132 | 0 | ++num_timeout; | 6133 | |
| 6134 | 0 | DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE); | 6135 | 0 | txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED); | 6136 | 0 | txn_info.set_finish_time(current_time); | 6137 | 0 | txn_info.set_reason("timeout"); | 6138 | 0 | VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString(); | 6139 | 0 | txn_inf_val.clear(); | 6140 | 0 | if (!txn_info.SerializeToString(&txn_inf_val)) { | 6141 | 0 | LOG_WARNING("failed to serialize txn info").tag("key", hex(k)); | 6142 | 0 | return -1; | 6143 | 0 | } | 6144 | 0 | txn->put(txn_inf_key, txn_inf_val); | 6145 | 0 | VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key); | 6146 | | // Put recycle txn key | 6147 | 0 | std::string recyc_txn_key, recyc_txn_val; | 6148 | 0 | recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key); | 6149 | 0 | RecycleTxnPB recycle_txn_pb; | 6150 | 0 | recycle_txn_pb.set_creation_time(current_time); | 6151 | 0 | recycle_txn_pb.set_label(txn_info.label()); | 6152 | 0 | if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) { | 6153 | 0 | LOG_WARNING("failed to serialize txn recycle info") | 6154 | 0 | .tag("key", hex(k)) | 6155 | 0 | .tag("db_id", db_id) | 6156 | 0 | .tag("txn_id", txn_id); | 6157 | 0 | return -1; | 6158 | 0 | } | 6159 | 0 | txn->put(recyc_txn_key, recyc_txn_val); | 6160 | | // Remove txn running key | 6161 | 0 | txn->remove(k); | 6162 | 0 | err = txn->commit(); | 6163 | 0 | if (err != TxnErrorCode::TXN_OK) { | 6164 | 0 | LOG_WARNING("failed to commit txn err={}", err) | 6165 | 0 | .tag("key", hex(k)) | 6166 | 0 | .tag("db_id", db_id) | 6167 | 0 | .tag("txn_id", txn_id); | 6168 | 0 | return -1; | 6169 | 0 | } | 6170 | 0 | metrics_context.total_recycled_num = ++num_abort; | 6171 | 0 | metrics_context.report(); | 6172 | 0 | } | 6173 | | | 6174 | 0 | return 0; | 6175 | 3 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 6076 | 6 | this](std::string_view k, std::string_view v) -> int { | 6077 | 6 | ++num_scanned; | 6078 | | | 6079 | 6 | std::unique_ptr<Transaction> txn; | 6080 | 6 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 6081 | 6 | if (err != TxnErrorCode::TXN_OK) { | 6082 | 0 | LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k)); | 6083 | 0 | return -1; | 6084 | 0 | } | 6085 | 6 | std::string_view k1 = k; | 6086 | | //TxnRunningKeyInfo 0:instance_id 1:db_id 2:txn_id | 6087 | 6 | k1.remove_prefix(1); // Remove key space | 6088 | 6 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 6089 | 6 | if (decode_key(&k1, &out) != 0) { | 6090 | 0 | LOG_ERROR("failed to decode key").tag("key", hex(k)); | 6091 | 0 | return -1; | 6092 | 0 | } | 6093 | 6 | int64_t db_id = std::get<int64_t>(std::get<0>(out[3])); | 6094 | 6 | int64_t txn_id = std::get<int64_t>(std::get<0>(out[4])); | 6095 | 6 | VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id; | 6096 | | // Update txn_info | 6097 | 6 | std::string txn_inf_key, txn_inf_val; | 6098 | 6 | txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key); | 6099 | 6 | err = txn->get(txn_inf_key, &txn_inf_val); | 6100 | 6 | if (err != TxnErrorCode::TXN_OK) { | 6101 | 0 | LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key)); | 6102 | 0 | return -1; | 6103 | 0 | } | 6104 | 6 | TxnInfoPB txn_info; | 6105 | 6 | if (!txn_info.ParseFromString(txn_inf_val)) { | 6106 | 0 | LOG_WARNING("failed to parse txn info").tag("key", hex(k)); | 6107 | 0 | return -1; | 6108 | 0 | } | 6109 | | | 6110 | 6 | if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) { | 6111 | 0 | txn.reset(); | 6112 | 0 | TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info); | 6113 | 0 | std::shared_ptr<TxnLazyCommitTask> task = | 6114 | 0 | txn_lazy_committer_->submit(instance_id_, txn_info.txn_id()); | 6115 | 0 | std::pair<MetaServiceCode, std::string> ret = task->wait(); | 6116 | 0 | if (ret.first != MetaServiceCode::OK) { | 6117 | 0 | LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first | 6118 | 0 | << "msg=" << ret.second; | 6119 | 0 | return -1; | 6120 | 0 | } | 6121 | 0 | ++num_advance; | 6122 | 0 | return 0; | 6123 | 6 | } else { | 6124 | 6 | TxnRunningPB txn_running_pb; | 6125 | 6 | if (!txn_running_pb.ParseFromArray(v.data(), v.size())) { | 6126 | 0 | LOG_WARNING("malformed txn_running_pb").tag("key", hex(k)); | 6127 | 0 | return -1; | 6128 | 0 | } | 6129 | 6 | if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) { | 6130 | 4 | return 0; | 6131 | 4 | } | 6132 | 2 | ++num_timeout; | 6133 | | | 6134 | 2 | DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE); | 6135 | 2 | txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED); | 6136 | 2 | txn_info.set_finish_time(current_time); | 6137 | 2 | txn_info.set_reason("timeout"); | 6138 | 2 | VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString(); | 6139 | 2 | txn_inf_val.clear(); | 6140 | 2 | if (!txn_info.SerializeToString(&txn_inf_val)) { | 6141 | 0 | LOG_WARNING("failed to serialize txn info").tag("key", hex(k)); | 6142 | 0 | return -1; | 6143 | 0 | } | 6144 | 2 | txn->put(txn_inf_key, txn_inf_val); | 6145 | 2 | VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key); | 6146 | | // Put recycle txn key | 6147 | 2 | std::string recyc_txn_key, recyc_txn_val; | 6148 | 2 | recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key); | 6149 | 2 | RecycleTxnPB recycle_txn_pb; | 6150 | 2 | recycle_txn_pb.set_creation_time(current_time); | 6151 | 2 | recycle_txn_pb.set_label(txn_info.label()); | 6152 | 2 | if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) { | 6153 | 0 | LOG_WARNING("failed to serialize txn recycle info") | 6154 | 0 | .tag("key", hex(k)) | 6155 | 0 | .tag("db_id", db_id) | 6156 | 0 | .tag("txn_id", txn_id); | 6157 | 0 | return -1; | 6158 | 0 | } | 6159 | 2 | txn->put(recyc_txn_key, recyc_txn_val); | 6160 | | // Remove txn running key | 6161 | 2 | txn->remove(k); | 6162 | 2 | err = txn->commit(); | 6163 | 2 | if (err != TxnErrorCode::TXN_OK) { | 6164 | 0 | LOG_WARNING("failed to commit txn err={}", err) | 6165 | 0 | .tag("key", hex(k)) | 6166 | 0 | .tag("db_id", db_id) | 6167 | 0 | .tag("txn_id", txn_id); | 6168 | 0 | return -1; | 6169 | 0 | } | 6170 | 2 | metrics_context.total_recycled_num = ++num_abort; | 6171 | 2 | metrics_context.report(); | 6172 | 2 | } | 6173 | | | 6174 | 2 | return 0; | 6175 | 6 | }; |
|
6176 | | |
6177 | 19 | if (config::enable_recycler_stats_metrics) { |
6178 | 0 | scan_and_statistics_abort_timeout_txn(); |
6179 | 0 | } |
6180 | | // recycle_func and loop_done for scan and recycle |
6181 | 19 | return scan_and_recycle(begin_txn_running_key, end_txn_running_key, |
6182 | 19 | std::move(handle_txn_running_kv)); |
6183 | 19 | } |
6184 | | |
6185 | 19 | int InstanceRecycler::recycle_expired_txn_label() { |
6186 | 19 | const std::string task_name = "recycle_expired_txn_label"; |
6187 | 19 | int64_t num_scanned = 0; |
6188 | 19 | int64_t num_expired = 0; |
6189 | 19 | std::atomic_long num_recycled = 0; |
6190 | 19 | RecyclerMetricsContext metrics_context(instance_id_, task_name); |
6191 | 19 | int ret = 0; |
6192 | | |
6193 | 19 | RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0}; |
6194 | 19 | RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX}; |
6195 | 19 | std::string begin_recycle_txn_key; |
6196 | 19 | std::string end_recycle_txn_key; |
6197 | 19 | recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key); |
6198 | 19 | recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key); |
6199 | 19 | std::vector<std::string> recycle_txn_info_keys; |
6200 | | |
6201 | 19 | LOG_WARNING("begin to recycle expired txn").tag("instance_id", instance_id_); |
6202 | | |
6203 | 19 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
6204 | 19 | register_recycle_task(task_name, start_time); |
6205 | 19 | DORIS_CLOUD_DEFER { |
6206 | 19 | unregister_recycle_task(task_name); |
6207 | 19 | int64_t cost = |
6208 | 19 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
6209 | 19 | metrics_context.finish_report(); |
6210 | 19 | LOG_WARNING("end to recycle expired txn, cost={}s", cost) |
6211 | 19 | .tag("instance_id", instance_id_) |
6212 | 19 | .tag("num_scanned", num_scanned) |
6213 | 19 | .tag("num_expired", num_expired) |
6214 | 19 | .tag("num_recycled", num_recycled); |
6215 | 19 | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv Line | Count | Source | 6205 | 1 | DORIS_CLOUD_DEFER { | 6206 | 1 | unregister_recycle_task(task_name); | 6207 | 1 | int64_t cost = | 6208 | 1 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 6209 | 1 | metrics_context.finish_report(); | 6210 | 1 | LOG_WARNING("end to recycle expired txn, cost={}s", cost) | 6211 | 1 | .tag("instance_id", instance_id_) | 6212 | 1 | .tag("num_scanned", num_scanned) | 6213 | 1 | .tag("num_expired", num_expired) | 6214 | 1 | .tag("num_recycled", num_recycled); | 6215 | 1 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv Line | Count | Source | 6205 | 18 | DORIS_CLOUD_DEFER { | 6206 | 18 | unregister_recycle_task(task_name); | 6207 | 18 | int64_t cost = | 6208 | 18 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 6209 | 18 | metrics_context.finish_report(); | 6210 | 18 | LOG_WARNING("end to recycle expired txn, cost={}s", cost) | 6211 | 18 | .tag("instance_id", instance_id_) | 6212 | 18 | .tag("num_scanned", num_scanned) | 6213 | 18 | .tag("num_expired", num_expired) | 6214 | 18 | .tag("num_recycled", num_recycled); | 6215 | 18 | }; |
|
6216 | | |
6217 | 19 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
6218 | | |
6219 | 19 | SyncExecutor<int> concurrent_delete_executor( |
6220 | 19 | _thread_pool_group.s3_producer_pool, |
6221 | 19 | fmt::format("recycle expired txn label, instance id {}", instance_id_), |
6222 | 23.0k | [](const int& ret) { return ret != 0; });recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi Line | Count | Source | 6222 | 1 | [](const int& ret) { return ret != 0; }); |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi Line | Count | Source | 6222 | 23.0k | [](const int& ret) { return ret != 0; }); |
|
6223 | | |
6224 | 19 | int64_t current_time_ms = |
6225 | 19 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
6226 | | |
6227 | 30.0k | auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int { |
6228 | 30.0k | ++num_scanned; |
6229 | 30.0k | RecycleTxnPB recycle_txn_pb; |
6230 | 30.0k | if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) { |
6231 | 0 | LOG_WARNING("malformed txn_running_pb").tag("key", hex(k)); |
6232 | 0 | return -1; |
6233 | 0 | } |
6234 | 30.0k | if ((config::force_immediate_recycle) || |
6235 | 30.0k | (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) || |
6236 | 30.0k | (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <= |
6237 | 30.0k | current_time_ms)) { |
6238 | 23.0k | VLOG_DEBUG << "found recycle txn, key=" << hex(k); |
6239 | 23.0k | num_expired++; |
6240 | 23.0k | recycle_txn_info_keys.emplace_back(k); |
6241 | 23.0k | } |
6242 | 30.0k | return 0; |
6243 | 30.0k | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 6227 | 1 | auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int { | 6228 | 1 | ++num_scanned; | 6229 | 1 | RecycleTxnPB recycle_txn_pb; | 6230 | 1 | if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) { | 6231 | 0 | LOG_WARNING("malformed txn_running_pb").tag("key", hex(k)); | 6232 | 0 | return -1; | 6233 | 0 | } | 6234 | 1 | if ((config::force_immediate_recycle) || | 6235 | 1 | (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) || | 6236 | 1 | (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <= | 6237 | 1 | current_time_ms)) { | 6238 | 1 | VLOG_DEBUG << "found recycle txn, key=" << hex(k); | 6239 | 1 | num_expired++; | 6240 | 1 | recycle_txn_info_keys.emplace_back(k); | 6241 | 1 | } | 6242 | 1 | return 0; | 6243 | 1 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 6227 | 30.0k | auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int { | 6228 | 30.0k | ++num_scanned; | 6229 | 30.0k | RecycleTxnPB recycle_txn_pb; | 6230 | 30.0k | if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) { | 6231 | 0 | LOG_WARNING("malformed txn_running_pb").tag("key", hex(k)); | 6232 | 0 | return -1; | 6233 | 0 | } | 6234 | 30.0k | if ((config::force_immediate_recycle) || | 6235 | 30.0k | (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) || | 6236 | 30.0k | (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <= | 6237 | 30.0k | current_time_ms)) { | 6238 | 23.0k | VLOG_DEBUG << "found recycle txn, key=" << hex(k); | 6239 | 23.0k | num_expired++; | 6240 | 23.0k | recycle_txn_info_keys.emplace_back(k); | 6241 | 23.0k | } | 6242 | 30.0k | return 0; | 6243 | 30.0k | }; |
|
6244 | | |
6245 | | // int 0 for success, 1 for conflict, -1 for error |
6246 | 23.0k | auto delete_recycle_txn_kv = [&](const std::string& k) -> int { |
6247 | 23.0k | std::string_view k1 = k; |
6248 | | //RecycleTxnKeyInfo 0:instance_id 1:db_id 2:txn_id |
6249 | 23.0k | k1.remove_prefix(1); // Remove key space |
6250 | 23.0k | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
6251 | 23.0k | int ret = decode_key(&k1, &out); |
6252 | 23.0k | if (ret != 0) { |
6253 | 0 | LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k)); |
6254 | 0 | return -1; |
6255 | 0 | } |
6256 | 23.0k | int64_t db_id = std::get<int64_t>(std::get<0>(out[3])); |
6257 | 23.0k | int64_t txn_id = std::get<int64_t>(std::get<0>(out[4])); |
6258 | 23.0k | VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id; |
6259 | 23.0k | std::unique_ptr<Transaction> txn; |
6260 | 23.0k | TxnErrorCode err = txn_kv_->create_txn(&txn); |
6261 | 23.0k | if (err != TxnErrorCode::TXN_OK) { |
6262 | 0 | LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k)); |
6263 | 0 | return -1; |
6264 | 0 | } |
6265 | | // Remove txn index kv |
6266 | 23.0k | auto index_key = txn_index_key({instance_id_, txn_id}); |
6267 | 23.0k | txn->remove(index_key); |
6268 | | // Remove txn info kv |
6269 | 23.0k | std::string info_key, info_val; |
6270 | 23.0k | txn_info_key({instance_id_, db_id, txn_id}, &info_key); |
6271 | 23.0k | err = txn->get(info_key, &info_val); |
6272 | 23.0k | if (err != TxnErrorCode::TXN_OK) { |
6273 | 0 | LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key)); |
6274 | 0 | return -1; |
6275 | 0 | } |
6276 | 23.0k | TxnInfoPB txn_info; |
6277 | 23.0k | if (!txn_info.ParseFromString(info_val)) { |
6278 | 0 | LOG_WARNING("failed to parse txn info").tag("key", hex(info_key)); |
6279 | 0 | return -1; |
6280 | 0 | } |
6281 | 23.0k | txn->remove(info_key); |
6282 | | // Remove sub txn index kvs |
6283 | 23.0k | std::vector<std::string> sub_txn_index_keys; |
6284 | 23.0k | for (auto sub_txn_id : txn_info.sub_txn_ids()) { |
6285 | 22.9k | auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id}); |
6286 | 22.9k | sub_txn_index_keys.push_back(sub_txn_index_key); |
6287 | 22.9k | } |
6288 | 23.0k | for (auto& sub_txn_index_key : sub_txn_index_keys) { |
6289 | 22.9k | txn->remove(sub_txn_index_key); |
6290 | 22.9k | } |
6291 | | // Update txn label |
6292 | 23.0k | std::string label_key, label_val; |
6293 | 23.0k | txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key); |
6294 | 23.0k | err = txn->get(label_key, &label_val); |
6295 | 23.0k | if (err != TxnErrorCode::TXN_OK) { |
6296 | 0 | LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key |
6297 | 0 | << " err=" << err; |
6298 | 0 | return -1; |
6299 | 0 | } |
6300 | 23.0k | TxnLabelPB txn_label; |
6301 | 23.0k | if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) { |
6302 | 0 | LOG_WARNING("failed to parse txn label").tag("key", hex(label_key)); |
6303 | 0 | return -1; |
6304 | 0 | } |
6305 | 23.0k | auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id); |
6306 | 23.0k | if (it != txn_label.txn_ids().end()) { |
6307 | 23.0k | txn_label.mutable_txn_ids()->erase(it); |
6308 | 23.0k | } |
6309 | 23.0k | if (txn_label.txn_ids().empty()) { |
6310 | 23.0k | txn->remove(label_key); |
6311 | 23.0k | TEST_SYNC_POINT_CALLBACK( |
6312 | 23.0k | "InstanceRecycler::recycle_expired_txn_label.remove_label_before"); |
6313 | 23.0k | } else { |
6314 | 72 | if (!txn_label.SerializeToString(&label_val)) { |
6315 | 0 | LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key); |
6316 | 0 | return -1; |
6317 | 0 | } |
6318 | 72 | TEST_SYNC_POINT_CALLBACK( |
6319 | 72 | "InstanceRecycler::recycle_expired_txn_label.update_label_before"); |
6320 | 72 | txn->atomic_set_ver_value(label_key, label_val); |
6321 | 72 | TEST_SYNC_POINT_CALLBACK( |
6322 | 72 | "InstanceRecycler::recycle_expired_txn_label.update_label_after"); |
6323 | 72 | } |
6324 | | // Remove recycle txn kv |
6325 | 23.0k | txn->remove(k); |
6326 | 23.0k | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit"); |
6327 | 23.0k | err = txn->commit(); |
6328 | 23.0k | if (err != TxnErrorCode::TXN_OK) { |
6329 | 62 | if (err == TxnErrorCode::TXN_CONFLICT) { |
6330 | 62 | TEST_SYNC_POINT_CALLBACK( |
6331 | 62 | "InstanceRecycler::recycle_expired_txn_label.txn_conflict"); |
6332 | | // log the txn_id and label |
6333 | 62 | LOG(WARNING) << "txn conflict, txn_id=" << txn_id |
6334 | 62 | << " txn_label_pb=" << txn_label.ShortDebugString() |
6335 | 62 | << " txn_label=" << txn_info.label(); |
6336 | 62 | return 1; |
6337 | 62 | } |
6338 | 0 | LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k); |
6339 | 0 | return -1; |
6340 | 62 | } |
6341 | 23.0k | ++num_recycled; |
6342 | | |
6343 | 23.0k | LOG(INFO) << "recycle expired txn, key=" << hex(k); |
6344 | 23.0k | return 0; |
6345 | 23.0k | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 6246 | 1 | auto delete_recycle_txn_kv = [&](const std::string& k) -> int { | 6247 | 1 | std::string_view k1 = k; | 6248 | | //RecycleTxnKeyInfo 0:instance_id 1:db_id 2:txn_id | 6249 | 1 | k1.remove_prefix(1); // Remove key space | 6250 | 1 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 6251 | 1 | int ret = decode_key(&k1, &out); | 6252 | 1 | if (ret != 0) { | 6253 | 0 | LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k)); | 6254 | 0 | return -1; | 6255 | 0 | } | 6256 | 1 | int64_t db_id = std::get<int64_t>(std::get<0>(out[3])); | 6257 | 1 | int64_t txn_id = std::get<int64_t>(std::get<0>(out[4])); | 6258 | 1 | VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id; | 6259 | 1 | std::unique_ptr<Transaction> txn; | 6260 | 1 | TxnErrorCode err = txn_kv_->create_txn(&txn); | 6261 | 1 | if (err != TxnErrorCode::TXN_OK) { | 6262 | 0 | LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k)); | 6263 | 0 | return -1; | 6264 | 0 | } | 6265 | | // Remove txn index kv | 6266 | 1 | auto index_key = txn_index_key({instance_id_, txn_id}); | 6267 | 1 | txn->remove(index_key); | 6268 | | // Remove txn info kv | 6269 | 1 | std::string info_key, info_val; | 6270 | 1 | txn_info_key({instance_id_, db_id, txn_id}, &info_key); | 6271 | 1 | err = txn->get(info_key, &info_val); | 6272 | 1 | if (err != TxnErrorCode::TXN_OK) { | 6273 | 0 | LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key)); | 6274 | 0 | return -1; | 6275 | 0 | } | 6276 | 1 | TxnInfoPB txn_info; | 6277 | 1 | if (!txn_info.ParseFromString(info_val)) { | 6278 | 0 | LOG_WARNING("failed to parse txn info").tag("key", hex(info_key)); | 6279 | 0 | return -1; | 6280 | 0 | } | 6281 | 1 | txn->remove(info_key); | 6282 | | // Remove sub txn index kvs | 6283 | 1 | std::vector<std::string> sub_txn_index_keys; | 6284 | 1 | for (auto sub_txn_id : txn_info.sub_txn_ids()) { | 6285 | 0 | auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id}); | 6286 | 0 | sub_txn_index_keys.push_back(sub_txn_index_key); | 6287 | 0 | } | 6288 | 1 | for (auto& sub_txn_index_key : sub_txn_index_keys) { | 6289 | 0 | txn->remove(sub_txn_index_key); | 6290 | 0 | } | 6291 | | // Update txn label | 6292 | 1 | std::string label_key, label_val; | 6293 | 1 | txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key); | 6294 | 1 | err = txn->get(label_key, &label_val); | 6295 | 1 | if (err != TxnErrorCode::TXN_OK) { | 6296 | 0 | LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key | 6297 | 0 | << " err=" << err; | 6298 | 0 | return -1; | 6299 | 0 | } | 6300 | 1 | TxnLabelPB txn_label; | 6301 | 1 | if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) { | 6302 | 0 | LOG_WARNING("failed to parse txn label").tag("key", hex(label_key)); | 6303 | 0 | return -1; | 6304 | 0 | } | 6305 | 1 | auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id); | 6306 | 1 | if (it != txn_label.txn_ids().end()) { | 6307 | 1 | txn_label.mutable_txn_ids()->erase(it); | 6308 | 1 | } | 6309 | 1 | if (txn_label.txn_ids().empty()) { | 6310 | 1 | txn->remove(label_key); | 6311 | 1 | TEST_SYNC_POINT_CALLBACK( | 6312 | 1 | "InstanceRecycler::recycle_expired_txn_label.remove_label_before"); | 6313 | 1 | } else { | 6314 | 0 | if (!txn_label.SerializeToString(&label_val)) { | 6315 | 0 | LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key); | 6316 | 0 | return -1; | 6317 | 0 | } | 6318 | 0 | TEST_SYNC_POINT_CALLBACK( | 6319 | 0 | "InstanceRecycler::recycle_expired_txn_label.update_label_before"); | 6320 | 0 | txn->atomic_set_ver_value(label_key, label_val); | 6321 | 0 | TEST_SYNC_POINT_CALLBACK( | 6322 | 0 | "InstanceRecycler::recycle_expired_txn_label.update_label_after"); | 6323 | 0 | } | 6324 | | // Remove recycle txn kv | 6325 | 1 | txn->remove(k); | 6326 | 1 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit"); | 6327 | 1 | err = txn->commit(); | 6328 | 1 | if (err != TxnErrorCode::TXN_OK) { | 6329 | 0 | if (err == TxnErrorCode::TXN_CONFLICT) { | 6330 | 0 | TEST_SYNC_POINT_CALLBACK( | 6331 | 0 | "InstanceRecycler::recycle_expired_txn_label.txn_conflict"); | 6332 | | // log the txn_id and label | 6333 | 0 | LOG(WARNING) << "txn conflict, txn_id=" << txn_id | 6334 | 0 | << " txn_label_pb=" << txn_label.ShortDebugString() | 6335 | 0 | << " txn_label=" << txn_info.label(); | 6336 | 0 | return 1; | 6337 | 0 | } | 6338 | 0 | LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k); | 6339 | 0 | return -1; | 6340 | 0 | } | 6341 | 1 | ++num_recycled; | 6342 | | | 6343 | 1 | LOG(INFO) << "recycle expired txn, key=" << hex(k); | 6344 | 1 | return 0; | 6345 | 1 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 6246 | 23.0k | auto delete_recycle_txn_kv = [&](const std::string& k) -> int { | 6247 | 23.0k | std::string_view k1 = k; | 6248 | | //RecycleTxnKeyInfo 0:instance_id 1:db_id 2:txn_id | 6249 | 23.0k | k1.remove_prefix(1); // Remove key space | 6250 | 23.0k | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 6251 | 23.0k | int ret = decode_key(&k1, &out); | 6252 | 23.0k | if (ret != 0) { | 6253 | 0 | LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k)); | 6254 | 0 | return -1; | 6255 | 0 | } | 6256 | 23.0k | int64_t db_id = std::get<int64_t>(std::get<0>(out[3])); | 6257 | 23.0k | int64_t txn_id = std::get<int64_t>(std::get<0>(out[4])); | 6258 | 23.0k | VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id; | 6259 | 23.0k | std::unique_ptr<Transaction> txn; | 6260 | 23.0k | TxnErrorCode err = txn_kv_->create_txn(&txn); | 6261 | 23.0k | if (err != TxnErrorCode::TXN_OK) { | 6262 | 0 | LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k)); | 6263 | 0 | return -1; | 6264 | 0 | } | 6265 | | // Remove txn index kv | 6266 | 23.0k | auto index_key = txn_index_key({instance_id_, txn_id}); | 6267 | 23.0k | txn->remove(index_key); | 6268 | | // Remove txn info kv | 6269 | 23.0k | std::string info_key, info_val; | 6270 | 23.0k | txn_info_key({instance_id_, db_id, txn_id}, &info_key); | 6271 | 23.0k | err = txn->get(info_key, &info_val); | 6272 | 23.0k | if (err != TxnErrorCode::TXN_OK) { | 6273 | 0 | LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key)); | 6274 | 0 | return -1; | 6275 | 0 | } | 6276 | 23.0k | TxnInfoPB txn_info; | 6277 | 23.0k | if (!txn_info.ParseFromString(info_val)) { | 6278 | 0 | LOG_WARNING("failed to parse txn info").tag("key", hex(info_key)); | 6279 | 0 | return -1; | 6280 | 0 | } | 6281 | 23.0k | txn->remove(info_key); | 6282 | | // Remove sub txn index kvs | 6283 | 23.0k | std::vector<std::string> sub_txn_index_keys; | 6284 | 23.0k | for (auto sub_txn_id : txn_info.sub_txn_ids()) { | 6285 | 22.9k | auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id}); | 6286 | 22.9k | sub_txn_index_keys.push_back(sub_txn_index_key); | 6287 | 22.9k | } | 6288 | 23.0k | for (auto& sub_txn_index_key : sub_txn_index_keys) { | 6289 | 22.9k | txn->remove(sub_txn_index_key); | 6290 | 22.9k | } | 6291 | | // Update txn label | 6292 | 23.0k | std::string label_key, label_val; | 6293 | 23.0k | txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key); | 6294 | 23.0k | err = txn->get(label_key, &label_val); | 6295 | 23.0k | if (err != TxnErrorCode::TXN_OK) { | 6296 | 0 | LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key | 6297 | 0 | << " err=" << err; | 6298 | 0 | return -1; | 6299 | 0 | } | 6300 | 23.0k | TxnLabelPB txn_label; | 6301 | 23.0k | if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) { | 6302 | 0 | LOG_WARNING("failed to parse txn label").tag("key", hex(label_key)); | 6303 | 0 | return -1; | 6304 | 0 | } | 6305 | 23.0k | auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id); | 6306 | 23.0k | if (it != txn_label.txn_ids().end()) { | 6307 | 23.0k | txn_label.mutable_txn_ids()->erase(it); | 6308 | 23.0k | } | 6309 | 23.0k | if (txn_label.txn_ids().empty()) { | 6310 | 23.0k | txn->remove(label_key); | 6311 | 23.0k | TEST_SYNC_POINT_CALLBACK( | 6312 | 23.0k | "InstanceRecycler::recycle_expired_txn_label.remove_label_before"); | 6313 | 23.0k | } else { | 6314 | 72 | if (!txn_label.SerializeToString(&label_val)) { | 6315 | 0 | LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key); | 6316 | 0 | return -1; | 6317 | 0 | } | 6318 | 72 | TEST_SYNC_POINT_CALLBACK( | 6319 | 72 | "InstanceRecycler::recycle_expired_txn_label.update_label_before"); | 6320 | 72 | txn->atomic_set_ver_value(label_key, label_val); | 6321 | 72 | TEST_SYNC_POINT_CALLBACK( | 6322 | 72 | "InstanceRecycler::recycle_expired_txn_label.update_label_after"); | 6323 | 72 | } | 6324 | | // Remove recycle txn kv | 6325 | 23.0k | txn->remove(k); | 6326 | 23.0k | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit"); | 6327 | 23.0k | err = txn->commit(); | 6328 | 23.0k | if (err != TxnErrorCode::TXN_OK) { | 6329 | 62 | if (err == TxnErrorCode::TXN_CONFLICT) { | 6330 | 62 | TEST_SYNC_POINT_CALLBACK( | 6331 | 62 | "InstanceRecycler::recycle_expired_txn_label.txn_conflict"); | 6332 | | // log the txn_id and label | 6333 | 62 | LOG(WARNING) << "txn conflict, txn_id=" << txn_id | 6334 | 62 | << " txn_label_pb=" << txn_label.ShortDebugString() | 6335 | 62 | << " txn_label=" << txn_info.label(); | 6336 | 62 | return 1; | 6337 | 62 | } | 6338 | 0 | LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k); | 6339 | 0 | return -1; | 6340 | 62 | } | 6341 | 23.0k | ++num_recycled; | 6342 | | | 6343 | 23.0k | LOG(INFO) << "recycle expired txn, key=" << hex(k); | 6344 | 23.0k | return 0; | 6345 | 23.0k | }; |
|
6346 | | |
6347 | 19 | auto loop_done = [&]() -> int { |
6348 | 10 | DORIS_CLOUD_DEFER { |
6349 | 10 | recycle_txn_info_keys.clear(); |
6350 | 10 | }; recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv Line | Count | Source | 6348 | 1 | DORIS_CLOUD_DEFER { | 6349 | 1 | recycle_txn_info_keys.clear(); | 6350 | 1 | }; |
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv Line | Count | Source | 6348 | 9 | DORIS_CLOUD_DEFER { | 6349 | 9 | recycle_txn_info_keys.clear(); | 6350 | 9 | }; |
|
6351 | 10 | TEST_SYNC_POINT_CALLBACK( |
6352 | 10 | "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys", |
6353 | 10 | &recycle_txn_info_keys); |
6354 | 23.0k | for (const auto& k : recycle_txn_info_keys) { |
6355 | 23.0k | concurrent_delete_executor.add([&]() { |
6356 | 23.0k | int ret = delete_recycle_txn_kv(k); |
6357 | 23.0k | if (ret == 1) { |
6358 | 18 | const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times); |
6359 | 54 | for (int i = 1; i <= max_retry; ++i) { |
6360 | 54 | LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k); |
6361 | 54 | ret = delete_recycle_txn_kv(k); |
6362 | | // clang-format off |
6363 | 54 | TEST_SYNC_POINT_CALLBACK( |
6364 | 54 | "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret); |
6365 | | // clang-format off |
6366 | 54 | if (ret != 1) { |
6367 | 18 | break; |
6368 | 18 | } |
6369 | | // random sleep 0-100 ms to retry |
6370 | 36 | std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100)); |
6371 | 36 | } |
6372 | 18 | } |
6373 | 23.0k | if (ret != 0) { |
6374 | 9 | LOG_WARNING("failed to delete recycle txn kv") |
6375 | 9 | .tag("instance id", instance_id_) |
6376 | 9 | .tag("key", hex(k)); |
6377 | 9 | return -1; |
6378 | 9 | } |
6379 | 23.0k | return 0; |
6380 | 23.0k | }); recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv Line | Count | Source | 6355 | 1 | concurrent_delete_executor.add([&]() { | 6356 | 1 | int ret = delete_recycle_txn_kv(k); | 6357 | 1 | if (ret == 1) { | 6358 | 0 | const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times); | 6359 | 0 | for (int i = 1; i <= max_retry; ++i) { | 6360 | 0 | LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k); | 6361 | 0 | ret = delete_recycle_txn_kv(k); | 6362 | | // clang-format off | 6363 | 0 | TEST_SYNC_POINT_CALLBACK( | 6364 | 0 | "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret); | 6365 | | // clang-format off | 6366 | 0 | if (ret != 1) { | 6367 | 0 | break; | 6368 | 0 | } | 6369 | | // random sleep 0-100 ms to retry | 6370 | 0 | std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100)); | 6371 | 0 | } | 6372 | 0 | } | 6373 | 1 | if (ret != 0) { | 6374 | 0 | LOG_WARNING("failed to delete recycle txn kv") | 6375 | 0 | .tag("instance id", instance_id_) | 6376 | 0 | .tag("key", hex(k)); | 6377 | 0 | return -1; | 6378 | 0 | } | 6379 | 1 | return 0; | 6380 | 1 | }); |
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv Line | Count | Source | 6355 | 23.0k | concurrent_delete_executor.add([&]() { | 6356 | 23.0k | int ret = delete_recycle_txn_kv(k); | 6357 | 23.0k | if (ret == 1) { | 6358 | 18 | const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times); | 6359 | 54 | for (int i = 1; i <= max_retry; ++i) { | 6360 | 54 | LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k); | 6361 | 54 | ret = delete_recycle_txn_kv(k); | 6362 | | // clang-format off | 6363 | 54 | TEST_SYNC_POINT_CALLBACK( | 6364 | 54 | "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret); | 6365 | | // clang-format off | 6366 | 54 | if (ret != 1) { | 6367 | 18 | break; | 6368 | 18 | } | 6369 | | // random sleep 0-100 ms to retry | 6370 | 36 | std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100)); | 6371 | 36 | } | 6372 | 18 | } | 6373 | 23.0k | if (ret != 0) { | 6374 | 9 | LOG_WARNING("failed to delete recycle txn kv") | 6375 | 9 | .tag("instance id", instance_id_) | 6376 | 9 | .tag("key", hex(k)); | 6377 | 9 | return -1; | 6378 | 9 | } | 6379 | 23.0k | return 0; | 6380 | 23.0k | }); |
|
6381 | 23.0k | } |
6382 | 10 | bool finished = true; |
6383 | 10 | std::vector<int> rets = concurrent_delete_executor.when_all(&finished); |
6384 | 23.0k | for (int r : rets) { |
6385 | 23.0k | if (r != 0) { |
6386 | 9 | ret = -1; |
6387 | 9 | } |
6388 | 23.0k | } |
6389 | | |
6390 | 10 | ret = finished ? ret : -1; |
6391 | | |
6392 | | // Update metrics after all concurrent tasks completed |
6393 | 10 | metrics_context.total_recycled_num = num_recycled.load(); |
6394 | 10 | metrics_context.report(); |
6395 | | |
6396 | 10 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret); |
6397 | | |
6398 | 10 | if (ret != 0) { |
6399 | 3 | LOG_WARNING("recycle txn kv ret!=0") |
6400 | 3 | .tag("finished", finished) |
6401 | 3 | .tag("ret", ret) |
6402 | 3 | .tag("instance_id", instance_id_); |
6403 | 3 | return ret; |
6404 | 3 | } |
6405 | 7 | return ret; |
6406 | 10 | }; recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv Line | Count | Source | 6347 | 1 | auto loop_done = [&]() -> int { | 6348 | 1 | DORIS_CLOUD_DEFER { | 6349 | 1 | recycle_txn_info_keys.clear(); | 6350 | 1 | }; | 6351 | 1 | TEST_SYNC_POINT_CALLBACK( | 6352 | 1 | "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys", | 6353 | 1 | &recycle_txn_info_keys); | 6354 | 1 | for (const auto& k : recycle_txn_info_keys) { | 6355 | 1 | concurrent_delete_executor.add([&]() { | 6356 | 1 | int ret = delete_recycle_txn_kv(k); | 6357 | 1 | if (ret == 1) { | 6358 | 1 | const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times); | 6359 | 1 | for (int i = 1; i <= max_retry; ++i) { | 6360 | 1 | LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k); | 6361 | 1 | ret = delete_recycle_txn_kv(k); | 6362 | | // clang-format off | 6363 | 1 | TEST_SYNC_POINT_CALLBACK( | 6364 | 1 | "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret); | 6365 | | // clang-format off | 6366 | 1 | if (ret != 1) { | 6367 | 1 | break; | 6368 | 1 | } | 6369 | | // random sleep 0-100 ms to retry | 6370 | 1 | std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100)); | 6371 | 1 | } | 6372 | 1 | } | 6373 | 1 | if (ret != 0) { | 6374 | 1 | LOG_WARNING("failed to delete recycle txn kv") | 6375 | 1 | .tag("instance id", instance_id_) | 6376 | 1 | .tag("key", hex(k)); | 6377 | 1 | return -1; | 6378 | 1 | } | 6379 | 1 | return 0; | 6380 | 1 | }); | 6381 | 1 | } | 6382 | 1 | bool finished = true; | 6383 | 1 | std::vector<int> rets = concurrent_delete_executor.when_all(&finished); | 6384 | 1 | for (int r : rets) { | 6385 | 1 | if (r != 0) { | 6386 | 0 | ret = -1; | 6387 | 0 | } | 6388 | 1 | } | 6389 | | | 6390 | 1 | ret = finished ? ret : -1; | 6391 | | | 6392 | | // Update metrics after all concurrent tasks completed | 6393 | 1 | metrics_context.total_recycled_num = num_recycled.load(); | 6394 | 1 | metrics_context.report(); | 6395 | | | 6396 | 1 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret); | 6397 | | | 6398 | 1 | if (ret != 0) { | 6399 | 0 | LOG_WARNING("recycle txn kv ret!=0") | 6400 | 0 | .tag("finished", finished) | 6401 | 0 | .tag("ret", ret) | 6402 | 0 | .tag("instance_id", instance_id_); | 6403 | 0 | return ret; | 6404 | 0 | } | 6405 | 1 | return ret; | 6406 | 1 | }; |
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv Line | Count | Source | 6347 | 9 | auto loop_done = [&]() -> int { | 6348 | 9 | DORIS_CLOUD_DEFER { | 6349 | 9 | recycle_txn_info_keys.clear(); | 6350 | 9 | }; | 6351 | 9 | TEST_SYNC_POINT_CALLBACK( | 6352 | 9 | "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys", | 6353 | 9 | &recycle_txn_info_keys); | 6354 | 23.0k | for (const auto& k : recycle_txn_info_keys) { | 6355 | 23.0k | concurrent_delete_executor.add([&]() { | 6356 | 23.0k | int ret = delete_recycle_txn_kv(k); | 6357 | 23.0k | if (ret == 1) { | 6358 | 23.0k | const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times); | 6359 | 23.0k | for (int i = 1; i <= max_retry; ++i) { | 6360 | 23.0k | LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k); | 6361 | 23.0k | ret = delete_recycle_txn_kv(k); | 6362 | | // clang-format off | 6363 | 23.0k | TEST_SYNC_POINT_CALLBACK( | 6364 | 23.0k | "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret); | 6365 | | // clang-format off | 6366 | 23.0k | if (ret != 1) { | 6367 | 23.0k | break; | 6368 | 23.0k | } | 6369 | | // random sleep 0-100 ms to retry | 6370 | 23.0k | std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100)); | 6371 | 23.0k | } | 6372 | 23.0k | } | 6373 | 23.0k | if (ret != 0) { | 6374 | 23.0k | LOG_WARNING("failed to delete recycle txn kv") | 6375 | 23.0k | .tag("instance id", instance_id_) | 6376 | 23.0k | .tag("key", hex(k)); | 6377 | 23.0k | return -1; | 6378 | 23.0k | } | 6379 | 23.0k | return 0; | 6380 | 23.0k | }); | 6381 | 23.0k | } | 6382 | 9 | bool finished = true; | 6383 | 9 | std::vector<int> rets = concurrent_delete_executor.when_all(&finished); | 6384 | 23.0k | for (int r : rets) { | 6385 | 23.0k | if (r != 0) { | 6386 | 9 | ret = -1; | 6387 | 9 | } | 6388 | 23.0k | } | 6389 | | | 6390 | 9 | ret = finished ? ret : -1; | 6391 | | | 6392 | | // Update metrics after all concurrent tasks completed | 6393 | 9 | metrics_context.total_recycled_num = num_recycled.load(); | 6394 | 9 | metrics_context.report(); | 6395 | | | 6396 | 9 | TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret); | 6397 | | | 6398 | 9 | if (ret != 0) { | 6399 | 3 | LOG_WARNING("recycle txn kv ret!=0") | 6400 | 3 | .tag("finished", finished) | 6401 | 3 | .tag("ret", ret) | 6402 | 3 | .tag("instance_id", instance_id_); | 6403 | 3 | return ret; | 6404 | 3 | } | 6405 | 6 | return ret; | 6406 | 9 | }; |
|
6407 | | |
6408 | 19 | if (config::enable_recycler_stats_metrics) { |
6409 | 0 | scan_and_statistics_expired_txn_label(); |
6410 | 0 | } |
6411 | | // recycle_func and loop_done for scan and recycle |
6412 | 19 | return scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key, |
6413 | 19 | std::move(handle_recycle_txn_kv), std::move(loop_done)); |
6414 | 19 | } |
6415 | | |
6416 | | struct CopyJobIdTuple { |
6417 | | std::string instance_id; |
6418 | | std::string stage_id; |
6419 | | long table_id; |
6420 | | std::string copy_id; |
6421 | | std::string stage_path; |
6422 | | }; |
6423 | | struct BatchObjStoreAccessor { |
6424 | | BatchObjStoreAccessor(std::shared_ptr<StorageVaultAccessor> accessor, uint64_t& batch_count, |
6425 | | TxnKv* txn_kv) |
6426 | 3 | : accessor_(std::move(accessor)), batch_count_(batch_count), txn_kv_(txn_kv) {}; |
6427 | 3 | ~BatchObjStoreAccessor() { |
6428 | 3 | if (!paths_.empty()) { |
6429 | 3 | consume(); |
6430 | 3 | } |
6431 | 3 | } |
6432 | | |
6433 | | /** |
6434 | | * To implicitely do batch work and submit the batch delete task to s3 |
6435 | | * The s3 delete opreations would be done in batches, and then delete CopyJobPB key one by one |
6436 | | * |
6437 | | * @param copy_job The protubuf struct consists of the copy job files. |
6438 | | * @param key The copy job's key on fdb, the key is originally occupied by fdb range iterator, to make sure |
6439 | | * it would last until we finish the delete task, here we need pass one string value |
6440 | | * @param cope_job_id_tuple One tuple {log_trace instance_id, stage_id, table_id, query_id, stage_path} to print log |
6441 | | */ |
6442 | 5 | void add(CopyJobPB copy_job, std::string key, const CopyJobIdTuple cope_job_id_tuple) { |
6443 | 5 | auto& [instance_id, stage_id, table_id, copy_id, path] = cope_job_id_tuple; |
6444 | 5 | auto& file_keys = copy_file_keys_[key]; |
6445 | 5 | file_keys.log_trace = |
6446 | 5 | fmt::format("instance_id={}, stage_id={}, table_id={}, query_id={}, path={}", |
6447 | 5 | instance_id, stage_id, table_id, copy_id, path); |
6448 | 5 | std::string_view log_trace = file_keys.log_trace; |
6449 | 2.03k | for (const auto& file : copy_job.object_files()) { |
6450 | 2.03k | auto relative_path = file.relative_path(); |
6451 | 2.03k | paths_.push_back(relative_path); |
6452 | 2.03k | file_keys.keys.push_back(copy_file_key( |
6453 | 2.03k | {instance_id, stage_id, table_id, file.relative_path(), file.etag()})); |
6454 | 2.03k | LOG_INFO(log_trace) |
6455 | 2.03k | .tag("relative_path", relative_path) |
6456 | 2.03k | .tag("batch_count", batch_count_); |
6457 | 2.03k | } |
6458 | 5 | LOG_INFO(log_trace) |
6459 | 5 | .tag("objects_num", copy_job.object_files().size()) |
6460 | 5 | .tag("batch_count", batch_count_); |
6461 | | // TODO(AlexYue): If the size is 1001, it would be one delete with 1000 objects and one delete request with only one object(**ATTN**: DOESN'T |
6462 | | // recommend using delete objects when objects num is less than 10) |
6463 | 5 | if (paths_.size() < 1000) { |
6464 | 3 | return; |
6465 | 3 | } |
6466 | 2 | consume(); |
6467 | 2 | } |
6468 | | |
6469 | | private: |
6470 | 5 | void consume() { |
6471 | 5 | DORIS_CLOUD_DEFER { |
6472 | 5 | paths_.clear(); |
6473 | 5 | copy_file_keys_.clear(); |
6474 | 5 | batch_count_++; |
6475 | | |
6476 | 5 | LOG_WARNING("begin to delete {} internal stage objects in batch {}", paths_.size(), |
6477 | 5 | batch_count_); |
6478 | 5 | }; |
6479 | | |
6480 | 5 | StopWatch sw; |
6481 | | // TODO(yuejing): 在accessor的delete_objets的实现里可以考虑如果_paths数量不超过10个的话,就直接发10个delete objection operation而不是发post |
6482 | 5 | if (0 != accessor_->delete_files(paths_)) { |
6483 | 2 | LOG_WARNING("failed to delete {} internal stage objects in batch {} and it takes {} us", |
6484 | 2 | paths_.size(), batch_count_, sw.elapsed_us()); |
6485 | 2 | return; |
6486 | 2 | } |
6487 | 3 | LOG_WARNING("succeed to delete {} internal stage objects in batch {} and it takes {} us", |
6488 | 3 | paths_.size(), batch_count_, sw.elapsed_us()); |
6489 | | // delete fdb's keys |
6490 | 3 | for (auto& file_keys : copy_file_keys_) { |
6491 | 3 | auto& [log_trace, keys] = file_keys.second; |
6492 | 3 | std::unique_ptr<Transaction> txn; |
6493 | 3 | if (txn_kv_->create_txn(&txn) != cloud::TxnErrorCode::TXN_OK) { |
6494 | 0 | LOG(WARNING) << "failed to create txn"; |
6495 | 0 | continue; |
6496 | 0 | } |
6497 | | // FIXME: We have already limited the file num and file meta size when selecting file in FE. |
6498 | | // And if too many copy files, begin_copy failed commit too. So here the copy file keys are |
6499 | | // limited, should not cause the txn commit failed. |
6500 | 1.02k | for (const auto& key : keys) { |
6501 | 1.02k | txn->remove(key); |
6502 | 1.02k | LOG_INFO("remove copy_file_key={}, {}", hex(key), log_trace); |
6503 | 1.02k | } |
6504 | 3 | txn->remove(file_keys.first); |
6505 | 3 | if (auto ret = txn->commit(); ret != cloud::TxnErrorCode::TXN_OK) { |
6506 | 0 | LOG(WARNING) << "failed to commit txn ret is " << ret; |
6507 | 0 | continue; |
6508 | 0 | } |
6509 | 3 | } |
6510 | 3 | } |
6511 | | std::shared_ptr<StorageVaultAccessor> accessor_; |
6512 | | // the path of the s3 files to be deleted |
6513 | | std::vector<std::string> paths_; |
6514 | | struct CopyFiles { |
6515 | | std::string log_trace; |
6516 | | std::vector<std::string> keys; |
6517 | | }; |
6518 | | // pair<std::string, std::vector<std::string>> |
6519 | | // first: instance_id_ stage_id table_id query_id |
6520 | | // second: keys to be deleted |
6521 | | // <fdb key, <{instance_id_ stage_id table_id query_id}, file keys to be deleted>> |
6522 | | std::unordered_map<std::string, CopyFiles> copy_file_keys_; |
6523 | | // used to distinguish different batch tasks, the task log consists of thread ID and batch number |
6524 | | // which can together uniquely identifies different tasks for tracing log |
6525 | | uint64_t& batch_count_; |
6526 | | TxnKv* txn_kv_; |
6527 | | }; |
6528 | | |
6529 | 13 | int InstanceRecycler::recycle_copy_jobs() { |
6530 | 13 | int64_t num_scanned = 0; |
6531 | 13 | int64_t num_finished = 0; |
6532 | 13 | int64_t num_expired = 0; |
6533 | 13 | int64_t num_recycled = 0; |
6534 | | // Used for INTERNAL stage's copy jobs to tag each batch for log trace |
6535 | 13 | uint64_t batch_count = 0; |
6536 | 13 | const std::string task_name = "recycle_copy_jobs"; |
6537 | 13 | RecyclerMetricsContext metrics_context(instance_id_, task_name); |
6538 | | |
6539 | 13 | LOG_WARNING("begin to recycle copy jobs").tag("instance_id", instance_id_); |
6540 | | |
6541 | 13 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
6542 | 13 | register_recycle_task(task_name, start_time); |
6543 | | |
6544 | 13 | DORIS_CLOUD_DEFER { |
6545 | 13 | unregister_recycle_task(task_name); |
6546 | 13 | int64_t cost = |
6547 | 13 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
6548 | 13 | metrics_context.finish_report(); |
6549 | 13 | LOG_WARNING("recycle copy jobs finished, cost={}s", cost) |
6550 | 13 | .tag("instance_id", instance_id_) |
6551 | 13 | .tag("num_scanned", num_scanned) |
6552 | 13 | .tag("num_finished", num_finished) |
6553 | 13 | .tag("num_expired", num_expired) |
6554 | 13 | .tag("num_recycled", num_recycled); |
6555 | 13 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv Line | Count | Source | 6544 | 13 | DORIS_CLOUD_DEFER { | 6545 | 13 | unregister_recycle_task(task_name); | 6546 | 13 | int64_t cost = | 6547 | 13 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 6548 | 13 | metrics_context.finish_report(); | 6549 | 13 | LOG_WARNING("recycle copy jobs finished, cost={}s", cost) | 6550 | 13 | .tag("instance_id", instance_id_) | 6551 | 13 | .tag("num_scanned", num_scanned) | 6552 | 13 | .tag("num_finished", num_finished) | 6553 | 13 | .tag("num_expired", num_expired) | 6554 | 13 | .tag("num_recycled", num_recycled); | 6555 | 13 | }; |
|
6556 | | |
6557 | 13 | CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0}; |
6558 | 13 | CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0}; |
6559 | 13 | std::string key0; |
6560 | 13 | std::string key1; |
6561 | 13 | copy_job_key(key_info0, &key0); |
6562 | 13 | copy_job_key(key_info1, &key1); |
6563 | 13 | std::unordered_map<std::string, std::shared_ptr<BatchObjStoreAccessor>> stage_accessor_map; |
6564 | 13 | auto recycle_func = [&start_time, &num_scanned, &num_finished, &num_expired, &num_recycled, |
6565 | 13 | &batch_count, &stage_accessor_map, &task_name, &metrics_context, |
6566 | 16 | this](std::string_view k, std::string_view v) -> int { |
6567 | 16 | ++num_scanned; |
6568 | 16 | CopyJobPB copy_job; |
6569 | 16 | if (!copy_job.ParseFromArray(v.data(), v.size())) { |
6570 | 0 | LOG_WARNING("malformed copy job").tag("key", hex(k)); |
6571 | 0 | return -1; |
6572 | 0 | } |
6573 | | |
6574 | | // decode copy job key |
6575 | 16 | auto k1 = k; |
6576 | 16 | k1.remove_prefix(1); |
6577 | 16 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
6578 | 16 | decode_key(&k1, &out); |
6579 | | // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id} |
6580 | | // -> CopyJobPB |
6581 | 16 | const auto& stage_id = std::get<std::string>(std::get<0>(out[3])); |
6582 | 16 | const auto& table_id = std::get<int64_t>(std::get<0>(out[4])); |
6583 | 16 | const auto& copy_id = std::get<std::string>(std::get<0>(out[5])); |
6584 | | |
6585 | 16 | bool check_storage = true; |
6586 | 16 | if (copy_job.job_status() == CopyJobPB::FINISH) { |
6587 | 12 | ++num_finished; |
6588 | | |
6589 | 12 | if (copy_job.stage_type() == StagePB::INTERNAL) { |
6590 | 7 | auto it = stage_accessor_map.find(stage_id); |
6591 | 7 | std::shared_ptr<BatchObjStoreAccessor> accessor; |
6592 | 7 | std::string_view path; |
6593 | 7 | if (it != stage_accessor_map.end()) { |
6594 | 2 | accessor = it->second; |
6595 | 5 | } else { |
6596 | 5 | std::shared_ptr<StorageVaultAccessor> inner_accessor; |
6597 | 5 | auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(), |
6598 | 5 | &inner_accessor); |
6599 | 5 | if (ret < 0) { // error |
6600 | 0 | LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret); |
6601 | 0 | return -1; |
6602 | 5 | } else if (ret == 0) { |
6603 | 3 | path = inner_accessor->uri(); |
6604 | 3 | accessor = std::make_shared<BatchObjStoreAccessor>( |
6605 | 3 | inner_accessor, batch_count, txn_kv_.get()); |
6606 | 3 | stage_accessor_map.emplace(stage_id, accessor); |
6607 | 3 | } else { // stage not found, skip check storage |
6608 | 2 | check_storage = false; |
6609 | 2 | } |
6610 | 5 | } |
6611 | 7 | if (check_storage) { |
6612 | | // TODO delete objects with key and etag is not supported |
6613 | 5 | accessor->add(std::move(copy_job), std::string(k), |
6614 | 5 | {instance_id_, stage_id, table_id, copy_id, std::string(path)}); |
6615 | 5 | return 0; |
6616 | 5 | } |
6617 | 7 | } else if (copy_job.stage_type() == StagePB::EXTERNAL) { |
6618 | 5 | int64_t current_time = |
6619 | 5 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
6620 | 5 | if (copy_job.finish_time_ms() > 0) { |
6621 | 2 | if (!config::force_immediate_recycle && |
6622 | 2 | current_time < copy_job.finish_time_ms() + |
6623 | 2 | config::copy_job_max_retention_second * 1000) { |
6624 | 1 | return 0; |
6625 | 1 | } |
6626 | 3 | } else { |
6627 | | // For compatibility, copy job does not contain finish time before 2.2.2, use start time |
6628 | 3 | if (!config::force_immediate_recycle && |
6629 | 3 | current_time < copy_job.start_time_ms() + |
6630 | 3 | config::copy_job_max_retention_second * 1000) { |
6631 | 1 | return 0; |
6632 | 1 | } |
6633 | 3 | } |
6634 | 5 | } |
6635 | 12 | } else if (copy_job.job_status() == CopyJobPB::LOADING) { |
6636 | 4 | int64_t current_time = |
6637 | 4 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
6638 | | // if copy job is timeout: delete all copy file kvs and copy job kv |
6639 | 4 | if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) { |
6640 | 2 | return 0; |
6641 | 2 | } |
6642 | 2 | ++num_expired; |
6643 | 2 | } |
6644 | | |
6645 | | // delete all copy files |
6646 | 7 | std::vector<std::string> copy_file_keys; |
6647 | 70 | for (auto& file : copy_job.object_files()) { |
6648 | 70 | copy_file_keys.push_back(copy_file_key( |
6649 | 70 | {instance_id_, stage_id, table_id, file.relative_path(), file.etag()})); |
6650 | 70 | } |
6651 | 7 | std::unique_ptr<Transaction> txn; |
6652 | 7 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { |
6653 | 0 | LOG(WARNING) << "failed to create txn"; |
6654 | 0 | return -1; |
6655 | 0 | } |
6656 | | // FIXME: We have already limited the file num and file meta size when selecting file in FE. |
6657 | | // And if too many copy files, begin_copy failed commit too. So here the copy file keys are |
6658 | | // limited, should not cause the txn commit failed. |
6659 | 70 | for (const auto& key : copy_file_keys) { |
6660 | 70 | txn->remove(key); |
6661 | 70 | LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_ |
6662 | 70 | << ", stage_id=" << stage_id << ", table_id=" << table_id |
6663 | 70 | << ", query_id=" << copy_id; |
6664 | 70 | } |
6665 | 7 | txn->remove(k); |
6666 | 7 | TxnErrorCode err = txn->commit(); |
6667 | 7 | if (err != TxnErrorCode::TXN_OK) { |
6668 | 0 | LOG(WARNING) << "failed to commit txn, err=" << err; |
6669 | 0 | return -1; |
6670 | 0 | } |
6671 | | |
6672 | 7 | metrics_context.total_recycled_num = ++num_recycled; |
6673 | 7 | metrics_context.report(); |
6674 | 7 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); |
6675 | 7 | return 0; |
6676 | 7 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 6566 | 16 | this](std::string_view k, std::string_view v) -> int { | 6567 | 16 | ++num_scanned; | 6568 | 16 | CopyJobPB copy_job; | 6569 | 16 | if (!copy_job.ParseFromArray(v.data(), v.size())) { | 6570 | 0 | LOG_WARNING("malformed copy job").tag("key", hex(k)); | 6571 | 0 | return -1; | 6572 | 0 | } | 6573 | | | 6574 | | // decode copy job key | 6575 | 16 | auto k1 = k; | 6576 | 16 | k1.remove_prefix(1); | 6577 | 16 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; | 6578 | 16 | decode_key(&k1, &out); | 6579 | | // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id} | 6580 | | // -> CopyJobPB | 6581 | 16 | const auto& stage_id = std::get<std::string>(std::get<0>(out[3])); | 6582 | 16 | const auto& table_id = std::get<int64_t>(std::get<0>(out[4])); | 6583 | 16 | const auto& copy_id = std::get<std::string>(std::get<0>(out[5])); | 6584 | | | 6585 | 16 | bool check_storage = true; | 6586 | 16 | if (copy_job.job_status() == CopyJobPB::FINISH) { | 6587 | 12 | ++num_finished; | 6588 | | | 6589 | 12 | if (copy_job.stage_type() == StagePB::INTERNAL) { | 6590 | 7 | auto it = stage_accessor_map.find(stage_id); | 6591 | 7 | std::shared_ptr<BatchObjStoreAccessor> accessor; | 6592 | 7 | std::string_view path; | 6593 | 7 | if (it != stage_accessor_map.end()) { | 6594 | 2 | accessor = it->second; | 6595 | 5 | } else { | 6596 | 5 | std::shared_ptr<StorageVaultAccessor> inner_accessor; | 6597 | 5 | auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(), | 6598 | 5 | &inner_accessor); | 6599 | 5 | if (ret < 0) { // error | 6600 | 0 | LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret); | 6601 | 0 | return -1; | 6602 | 5 | } else if (ret == 0) { | 6603 | 3 | path = inner_accessor->uri(); | 6604 | 3 | accessor = std::make_shared<BatchObjStoreAccessor>( | 6605 | 3 | inner_accessor, batch_count, txn_kv_.get()); | 6606 | 3 | stage_accessor_map.emplace(stage_id, accessor); | 6607 | 3 | } else { // stage not found, skip check storage | 6608 | 2 | check_storage = false; | 6609 | 2 | } | 6610 | 5 | } | 6611 | 7 | if (check_storage) { | 6612 | | // TODO delete objects with key and etag is not supported | 6613 | 5 | accessor->add(std::move(copy_job), std::string(k), | 6614 | 5 | {instance_id_, stage_id, table_id, copy_id, std::string(path)}); | 6615 | 5 | return 0; | 6616 | 5 | } | 6617 | 7 | } else if (copy_job.stage_type() == StagePB::EXTERNAL) { | 6618 | 5 | int64_t current_time = | 6619 | 5 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); | 6620 | 5 | if (copy_job.finish_time_ms() > 0) { | 6621 | 2 | if (!config::force_immediate_recycle && | 6622 | 2 | current_time < copy_job.finish_time_ms() + | 6623 | 2 | config::copy_job_max_retention_second * 1000) { | 6624 | 1 | return 0; | 6625 | 1 | } | 6626 | 3 | } else { | 6627 | | // For compatibility, copy job does not contain finish time before 2.2.2, use start time | 6628 | 3 | if (!config::force_immediate_recycle && | 6629 | 3 | current_time < copy_job.start_time_ms() + | 6630 | 3 | config::copy_job_max_retention_second * 1000) { | 6631 | 1 | return 0; | 6632 | 1 | } | 6633 | 3 | } | 6634 | 5 | } | 6635 | 12 | } else if (copy_job.job_status() == CopyJobPB::LOADING) { | 6636 | 4 | int64_t current_time = | 6637 | 4 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); | 6638 | | // if copy job is timeout: delete all copy file kvs and copy job kv | 6639 | 4 | if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) { | 6640 | 2 | return 0; | 6641 | 2 | } | 6642 | 2 | ++num_expired; | 6643 | 2 | } | 6644 | | | 6645 | | // delete all copy files | 6646 | 7 | std::vector<std::string> copy_file_keys; | 6647 | 70 | for (auto& file : copy_job.object_files()) { | 6648 | 70 | copy_file_keys.push_back(copy_file_key( | 6649 | 70 | {instance_id_, stage_id, table_id, file.relative_path(), file.etag()})); | 6650 | 70 | } | 6651 | 7 | std::unique_ptr<Transaction> txn; | 6652 | 7 | if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) { | 6653 | 0 | LOG(WARNING) << "failed to create txn"; | 6654 | 0 | return -1; | 6655 | 0 | } | 6656 | | // FIXME: We have already limited the file num and file meta size when selecting file in FE. | 6657 | | // And if too many copy files, begin_copy failed commit too. So here the copy file keys are | 6658 | | // limited, should not cause the txn commit failed. | 6659 | 70 | for (const auto& key : copy_file_keys) { | 6660 | 70 | txn->remove(key); | 6661 | 70 | LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_ | 6662 | 70 | << ", stage_id=" << stage_id << ", table_id=" << table_id | 6663 | 70 | << ", query_id=" << copy_id; | 6664 | 70 | } | 6665 | 7 | txn->remove(k); | 6666 | 7 | TxnErrorCode err = txn->commit(); | 6667 | 7 | if (err != TxnErrorCode::TXN_OK) { | 6668 | 0 | LOG(WARNING) << "failed to commit txn, err=" << err; | 6669 | 0 | return -1; | 6670 | 0 | } | 6671 | | | 6672 | 7 | metrics_context.total_recycled_num = ++num_recycled; | 6673 | 7 | metrics_context.report(); | 6674 | 7 | check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); | 6675 | 7 | return 0; | 6676 | 7 | }; |
|
6677 | | |
6678 | 13 | if (config::enable_recycler_stats_metrics) { |
6679 | 0 | scan_and_statistics_copy_jobs(); |
6680 | 0 | } |
6681 | | // recycle_func and loop_done for scan and recycle |
6682 | 13 | return scan_and_recycle(key0, key1, std::move(recycle_func)); |
6683 | 13 | } |
6684 | | |
6685 | | int InstanceRecycler::init_copy_job_accessor(const std::string& stage_id, |
6686 | | const StagePB::StageType& stage_type, |
6687 | 5 | std::shared_ptr<StorageVaultAccessor>* accessor) { |
6688 | 5 | #ifdef UNIT_TEST |
6689 | | // In unit test, external use the same accessor as the internal stage |
6690 | 5 | auto it = accessor_map_.find(stage_id); |
6691 | 5 | if (it != accessor_map_.end()) { |
6692 | 3 | *accessor = it->second; |
6693 | 3 | } else { |
6694 | 2 | std::cout << "UT can not find accessor with stage_id: " << stage_id << std::endl; |
6695 | 2 | return 1; |
6696 | 2 | } |
6697 | | #else |
6698 | | // init s3 accessor and add to accessor map |
6699 | | auto stage_it = |
6700 | | std::find_if(instance_info_.stages().begin(), instance_info_.stages().end(), |
6701 | | [&stage_id](auto&& stage) { return stage.stage_id() == stage_id; }); |
6702 | | |
6703 | | if (stage_it == instance_info_.stages().end()) { |
6704 | | LOG(INFO) << "Recycle nonexisted stage copy jobs. instance_id=" << instance_id_ |
6705 | | << ", stage_id=" << stage_id << ", stage_type=" << stage_type; |
6706 | | return 1; |
6707 | | } |
6708 | | |
6709 | | const auto& object_store_info = stage_it->obj_info(); |
6710 | | auto stage_access_type = stage_it->has_access_type() ? stage_it->access_type() : StagePB::AKSK; |
6711 | | |
6712 | | S3Conf s3_conf; |
6713 | | if (stage_type == StagePB::EXTERNAL) { |
6714 | | if (stage_access_type == StagePB::AKSK) { |
6715 | | auto conf = S3Conf::from_obj_store_info(object_store_info); |
6716 | | if (!conf) { |
6717 | | return -1; |
6718 | | } |
6719 | | |
6720 | | s3_conf = std::move(*conf); |
6721 | | } else if (stage_access_type == StagePB::BUCKET_ACL) { |
6722 | | auto conf = S3Conf::from_obj_store_info(object_store_info, true /* skip_aksk */); |
6723 | | if (!conf) { |
6724 | | return -1; |
6725 | | } |
6726 | | |
6727 | | s3_conf = std::move(*conf); |
6728 | | if (instance_info_.ram_user().has_encryption_info()) { |
6729 | | AkSkPair plain_ak_sk_pair; |
6730 | | int ret = decrypt_ak_sk_helper( |
6731 | | instance_info_.ram_user().ak(), instance_info_.ram_user().sk(), |
6732 | | instance_info_.ram_user().encryption_info(), &plain_ak_sk_pair); |
6733 | | if (ret != 0) { |
6734 | | LOG(WARNING) << "fail to decrypt ak sk. instance_id: " << instance_id_ |
6735 | | << " ram_user: " << proto_to_json(instance_info_.ram_user()); |
6736 | | return -1; |
6737 | | } |
6738 | | s3_conf.ak = std::move(plain_ak_sk_pair.first); |
6739 | | s3_conf.sk = std::move(plain_ak_sk_pair.second); |
6740 | | } else { |
6741 | | s3_conf.ak = instance_info_.ram_user().ak(); |
6742 | | s3_conf.sk = instance_info_.ram_user().sk(); |
6743 | | } |
6744 | | } else { |
6745 | | LOG(INFO) << "Unsupported stage access type=" << stage_access_type |
6746 | | << ", instance_id=" << instance_id_ << ", stage_id=" << stage_id; |
6747 | | return -1; |
6748 | | } |
6749 | | } else if (stage_type == StagePB::INTERNAL) { |
6750 | | int idx = stoi(object_store_info.id()); |
6751 | | if (idx > instance_info_.obj_info().size() || idx < 1) { |
6752 | | LOG(WARNING) << "invalid idx: " << idx; |
6753 | | return -1; |
6754 | | } |
6755 | | |
6756 | | const auto& old_obj = instance_info_.obj_info()[idx - 1]; |
6757 | | auto conf = S3Conf::from_obj_store_info(old_obj); |
6758 | | if (!conf) { |
6759 | | return -1; |
6760 | | } |
6761 | | |
6762 | | s3_conf = std::move(*conf); |
6763 | | s3_conf.prefix = object_store_info.prefix(); |
6764 | | } else { |
6765 | | LOG(WARNING) << "unknown stage type " << stage_type; |
6766 | | return -1; |
6767 | | } |
6768 | | |
6769 | | std::shared_ptr<S3Accessor> s3_accessor; |
6770 | | int ret = S3Accessor::create(std::move(s3_conf), &s3_accessor); |
6771 | | if (ret != 0) { |
6772 | | LOG(WARNING) << "failed to init s3 accessor ret=" << ret; |
6773 | | return -1; |
6774 | | } |
6775 | | |
6776 | | *accessor = std::move(s3_accessor); |
6777 | | #endif |
6778 | 3 | return 0; |
6779 | 5 | } |
6780 | | |
6781 | 11 | int InstanceRecycler::recycle_stage() { |
6782 | 11 | int64_t num_scanned = 0; |
6783 | 11 | int64_t num_recycled = 0; |
6784 | 11 | const std::string task_name = "recycle_stage"; |
6785 | 11 | RecyclerMetricsContext metrics_context(instance_id_, task_name); |
6786 | | |
6787 | 11 | LOG_WARNING("begin to recycle stage").tag("instance_id", instance_id_); |
6788 | | |
6789 | 11 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
6790 | 11 | register_recycle_task(task_name, start_time); |
6791 | | |
6792 | 11 | DORIS_CLOUD_DEFER { |
6793 | 11 | unregister_recycle_task(task_name); |
6794 | 11 | int64_t cost = |
6795 | 11 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
6796 | 11 | metrics_context.finish_report(); |
6797 | 11 | LOG_WARNING("recycle stage, cost={}s", cost) |
6798 | 11 | .tag("instance_id", instance_id_) |
6799 | 11 | .tag("num_scanned", num_scanned) |
6800 | 11 | .tag("num_recycled", num_recycled); |
6801 | 11 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv Line | Count | Source | 6792 | 11 | DORIS_CLOUD_DEFER { | 6793 | 11 | unregister_recycle_task(task_name); | 6794 | 11 | int64_t cost = | 6795 | 11 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 6796 | 11 | metrics_context.finish_report(); | 6797 | 11 | LOG_WARNING("recycle stage, cost={}s", cost) | 6798 | 11 | .tag("instance_id", instance_id_) | 6799 | 11 | .tag("num_scanned", num_scanned) | 6800 | 11 | .tag("num_recycled", num_recycled); | 6801 | 11 | }; |
|
6802 | | |
6803 | 11 | RecycleStageKeyInfo key_info0 {instance_id_, ""}; |
6804 | 11 | RecycleStageKeyInfo key_info1 {instance_id_, "\xff"}; |
6805 | 11 | std::string key0 = recycle_stage_key(key_info0); |
6806 | 11 | std::string key1 = recycle_stage_key(key_info1); |
6807 | | |
6808 | 11 | std::vector<std::string_view> stage_keys; |
6809 | 11 | auto recycle_func = [&start_time, &num_scanned, &num_recycled, &stage_keys, &metrics_context, |
6810 | 11 | this](std::string_view k, std::string_view v) -> int { |
6811 | 1 | ++num_scanned; |
6812 | 1 | RecycleStagePB recycle_stage; |
6813 | 1 | if (!recycle_stage.ParseFromArray(v.data(), v.size())) { |
6814 | 0 | LOG_WARNING("malformed recycle stage").tag("key", hex(k)); |
6815 | 0 | return -1; |
6816 | 0 | } |
6817 | | |
6818 | 1 | int idx = stoi(recycle_stage.stage().obj_info().id()); |
6819 | 1 | if (idx > instance_info_.obj_info().size() || idx < 1) { |
6820 | 0 | LOG(WARNING) << "invalid idx: " << idx; |
6821 | 0 | return -1; |
6822 | 0 | } |
6823 | | |
6824 | 1 | std::shared_ptr<StorageVaultAccessor> accessor; |
6825 | 1 | int ret = SYNC_POINT_HOOK_RETURN_VALUE( |
6826 | 1 | [&] { |
6827 | 1 | auto& old_obj = instance_info_.obj_info()[idx - 1]; |
6828 | 1 | auto s3_conf = S3Conf::from_obj_store_info(old_obj); |
6829 | 1 | if (!s3_conf) { |
6830 | 1 | return -1; |
6831 | 1 | } |
6832 | | |
6833 | 1 | s3_conf->prefix = recycle_stage.stage().obj_info().prefix(); |
6834 | 1 | std::shared_ptr<S3Accessor> s3_accessor; |
6835 | 1 | int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor); |
6836 | 1 | if (ret != 0) { |
6837 | 1 | return -1; |
6838 | 1 | } |
6839 | | |
6840 | 1 | accessor = std::move(s3_accessor); |
6841 | 1 | return 0; |
6842 | 1 | }(), |
6843 | 1 | "recycle_stage:get_accessor", &accessor); |
6844 | | |
6845 | 1 | if (ret != 0) { |
6846 | 0 | LOG(WARNING) << "failed to init accessor ret=" << ret; |
6847 | 0 | return ret; |
6848 | 0 | } |
6849 | | |
6850 | 1 | LOG_WARNING("begin to delete objects of dropped internal stage") |
6851 | 1 | .tag("instance_id", instance_id_) |
6852 | 1 | .tag("stage_id", recycle_stage.stage().stage_id()) |
6853 | 1 | .tag("user_name", recycle_stage.stage().mysql_user_name()[0]) |
6854 | 1 | .tag("user_id", recycle_stage.stage().mysql_user_id()[0]) |
6855 | 1 | .tag("obj_info_id", idx) |
6856 | 1 | .tag("prefix", recycle_stage.stage().obj_info().prefix()); |
6857 | 1 | ret = accessor->delete_all(); |
6858 | 1 | if (ret != 0) { |
6859 | 0 | LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id=" |
6860 | 0 | << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id() |
6861 | 0 | << ", prefix=" << recycle_stage.stage().obj_info().prefix() |
6862 | 0 | << ", ret=" << ret; |
6863 | 0 | return -1; |
6864 | 0 | } |
6865 | 1 | metrics_context.total_recycled_num = ++num_recycled; |
6866 | 1 | metrics_context.report(); |
6867 | 1 | check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time); |
6868 | 1 | stage_keys.push_back(k); |
6869 | 1 | return 0; |
6870 | 1 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_ recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_ Line | Count | Source | 6810 | 1 | this](std::string_view k, std::string_view v) -> int { | 6811 | 1 | ++num_scanned; | 6812 | 1 | RecycleStagePB recycle_stage; | 6813 | 1 | if (!recycle_stage.ParseFromArray(v.data(), v.size())) { | 6814 | 0 | LOG_WARNING("malformed recycle stage").tag("key", hex(k)); | 6815 | 0 | return -1; | 6816 | 0 | } | 6817 | | | 6818 | 1 | int idx = stoi(recycle_stage.stage().obj_info().id()); | 6819 | 1 | if (idx > instance_info_.obj_info().size() || idx < 1) { | 6820 | 0 | LOG(WARNING) << "invalid idx: " << idx; | 6821 | 0 | return -1; | 6822 | 0 | } | 6823 | | | 6824 | 1 | std::shared_ptr<StorageVaultAccessor> accessor; | 6825 | 1 | int ret = SYNC_POINT_HOOK_RETURN_VALUE( | 6826 | 1 | [&] { | 6827 | 1 | auto& old_obj = instance_info_.obj_info()[idx - 1]; | 6828 | 1 | auto s3_conf = S3Conf::from_obj_store_info(old_obj); | 6829 | 1 | if (!s3_conf) { | 6830 | 1 | return -1; | 6831 | 1 | } | 6832 | | | 6833 | 1 | s3_conf->prefix = recycle_stage.stage().obj_info().prefix(); | 6834 | 1 | std::shared_ptr<S3Accessor> s3_accessor; | 6835 | 1 | int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor); | 6836 | 1 | if (ret != 0) { | 6837 | 1 | return -1; | 6838 | 1 | } | 6839 | | | 6840 | 1 | accessor = std::move(s3_accessor); | 6841 | 1 | return 0; | 6842 | 1 | }(), | 6843 | 1 | "recycle_stage:get_accessor", &accessor); | 6844 | | | 6845 | 1 | if (ret != 0) { | 6846 | 0 | LOG(WARNING) << "failed to init accessor ret=" << ret; | 6847 | 0 | return ret; | 6848 | 0 | } | 6849 | | | 6850 | 1 | LOG_WARNING("begin to delete objects of dropped internal stage") | 6851 | 1 | .tag("instance_id", instance_id_) | 6852 | 1 | .tag("stage_id", recycle_stage.stage().stage_id()) | 6853 | 1 | .tag("user_name", recycle_stage.stage().mysql_user_name()[0]) | 6854 | 1 | .tag("user_id", recycle_stage.stage().mysql_user_id()[0]) | 6855 | 1 | .tag("obj_info_id", idx) | 6856 | 1 | .tag("prefix", recycle_stage.stage().obj_info().prefix()); | 6857 | 1 | ret = accessor->delete_all(); | 6858 | 1 | if (ret != 0) { | 6859 | 0 | LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id=" | 6860 | 0 | << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id() | 6861 | 0 | << ", prefix=" << recycle_stage.stage().obj_info().prefix() | 6862 | 0 | << ", ret=" << ret; | 6863 | 0 | return -1; | 6864 | 0 | } | 6865 | 1 | metrics_context.total_recycled_num = ++num_recycled; | 6866 | 1 | metrics_context.report(); | 6867 | 1 | check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time); | 6868 | 1 | stage_keys.push_back(k); | 6869 | 1 | return 0; | 6870 | 1 | }; |
|
6871 | | |
6872 | 11 | auto loop_done = [&stage_keys, this]() -> int { |
6873 | 1 | if (stage_keys.empty()) return 0; |
6874 | 1 | DORIS_CLOUD_DEFER { |
6875 | 1 | stage_keys.clear(); |
6876 | 1 | }; Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv Line | Count | Source | 6874 | 1 | DORIS_CLOUD_DEFER { | 6875 | 1 | stage_keys.clear(); | 6876 | 1 | }; |
|
6877 | 1 | if (0 != txn_remove(txn_kv_.get(), stage_keys)) { |
6878 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_; |
6879 | 0 | return -1; |
6880 | 0 | } |
6881 | 1 | return 0; |
6882 | 1 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv Line | Count | Source | 6872 | 1 | auto loop_done = [&stage_keys, this]() -> int { | 6873 | 1 | if (stage_keys.empty()) return 0; | 6874 | 1 | DORIS_CLOUD_DEFER { | 6875 | 1 | stage_keys.clear(); | 6876 | 1 | }; | 6877 | 1 | if (0 != txn_remove(txn_kv_.get(), stage_keys)) { | 6878 | 0 | LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_; | 6879 | 0 | return -1; | 6880 | 0 | } | 6881 | 1 | return 0; | 6882 | 1 | }; |
|
6883 | 11 | if (config::enable_recycler_stats_metrics) { |
6884 | 0 | scan_and_statistics_stage(); |
6885 | 0 | } |
6886 | | // recycle_func and loop_done for scan and recycle |
6887 | 11 | return scan_and_recycle(key0, key1, std::move(recycle_func), std::move(loop_done)); |
6888 | 11 | } |
6889 | | |
6890 | 10 | int InstanceRecycler::recycle_expired_stage_objects() { |
6891 | 10 | LOG_WARNING("begin to recycle expired stage objects").tag("instance_id", instance_id_); |
6892 | | |
6893 | 10 | int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
6894 | 10 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects"); |
6895 | | |
6896 | 10 | DORIS_CLOUD_DEFER { |
6897 | 10 | int64_t cost = |
6898 | 10 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; |
6899 | 10 | metrics_context.finish_report(); |
6900 | 10 | LOG_WARNING("recycle expired stage objects, cost={}s", cost) |
6901 | 10 | .tag("instance_id", instance_id_); |
6902 | 10 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv Line | Count | Source | 6896 | 10 | DORIS_CLOUD_DEFER { | 6897 | 10 | int64_t cost = | 6898 | 10 | duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; | 6899 | 10 | metrics_context.finish_report(); | 6900 | 10 | LOG_WARNING("recycle expired stage objects, cost={}s", cost) | 6901 | 10 | .tag("instance_id", instance_id_); | 6902 | 10 | }; |
|
6903 | | |
6904 | 10 | int ret = 0; |
6905 | | |
6906 | 10 | if (config::enable_recycler_stats_metrics) { |
6907 | 0 | scan_and_statistics_expired_stage_objects(); |
6908 | 0 | } |
6909 | | |
6910 | 10 | for (const auto& stage : instance_info_.stages()) { |
6911 | 0 | std::stringstream ss; |
6912 | 0 | ss << "instance_id=" << instance_id_ << ", stage_id=" << stage.stage_id() << ", user_name=" |
6913 | 0 | << (stage.mysql_user_name().empty() ? "null" : stage.mysql_user_name().at(0)) |
6914 | 0 | << ", user_id=" << (stage.mysql_user_id().empty() ? "null" : stage.mysql_user_id().at(0)) |
6915 | 0 | << ", prefix=" << stage.obj_info().prefix(); |
6916 | |
|
6917 | 0 | if (stopped()) { |
6918 | 0 | break; |
6919 | 0 | } |
6920 | 0 | if (stage.type() == StagePB::EXTERNAL) { |
6921 | 0 | continue; |
6922 | 0 | } |
6923 | 0 | int idx = stoi(stage.obj_info().id()); |
6924 | 0 | if (idx > instance_info_.obj_info().size() || idx < 1) { |
6925 | 0 | LOG(WARNING) << "invalid idx: " << idx << ", id: " << stage.obj_info().id(); |
6926 | 0 | continue; |
6927 | 0 | } |
6928 | | |
6929 | 0 | const auto& old_obj = instance_info_.obj_info()[idx - 1]; |
6930 | 0 | auto s3_conf = S3Conf::from_obj_store_info(old_obj); |
6931 | 0 | if (!s3_conf) { |
6932 | 0 | LOG(WARNING) << "failed to init s3_conf with obj_info=" << old_obj.ShortDebugString(); |
6933 | 0 | continue; |
6934 | 0 | } |
6935 | | |
6936 | 0 | s3_conf->prefix = stage.obj_info().prefix(); |
6937 | 0 | std::shared_ptr<S3Accessor> accessor; |
6938 | 0 | int ret1 = S3Accessor::create(*s3_conf, &accessor); |
6939 | 0 | if (ret1 != 0) { |
6940 | 0 | LOG(WARNING) << "failed to init s3 accessor ret=" << ret1 << " " << ss.str(); |
6941 | 0 | ret = -1; |
6942 | 0 | continue; |
6943 | 0 | } |
6944 | | |
6945 | 0 | if (s3_conf->prefix.find("/stage/") == std::string::npos) { |
6946 | 0 | LOG(WARNING) << "try to delete illegal prefix, which is catastrophic, " << ss.str(); |
6947 | 0 | ret = -1; |
6948 | 0 | continue; |
6949 | 0 | } |
6950 | | |
6951 | 0 | LOG(INFO) << "recycle expired stage objects, " << ss.str(); |
6952 | 0 | int64_t expiration_time = |
6953 | 0 | duration_cast<seconds>(system_clock::now().time_since_epoch()).count() - |
6954 | 0 | config::internal_stage_objects_expire_time_second; |
6955 | 0 | if (config::force_immediate_recycle) { |
6956 | 0 | expiration_time = INT64_MAX; |
6957 | 0 | } |
6958 | 0 | ret1 = accessor->delete_all(expiration_time); |
6959 | 0 | if (ret1 != 0) { |
6960 | 0 | LOG(WARNING) << "failed to recycle expired stage objects, ret=" << ret1 << " " |
6961 | 0 | << ss.str(); |
6962 | 0 | ret = -1; |
6963 | 0 | continue; |
6964 | 0 | } |
6965 | 0 | metrics_context.total_recycled_num++; |
6966 | 0 | metrics_context.report(); |
6967 | 0 | } |
6968 | 10 | return ret; |
6969 | 10 | } |
6970 | | |
6971 | 193 | void InstanceRecycler::register_recycle_task(const std::string& task_name, int64_t start_time) { |
6972 | 193 | std::lock_guard lock(recycle_tasks_mutex); |
6973 | 193 | running_recycle_tasks[task_name] = start_time; |
6974 | 193 | } |
6975 | | |
6976 | 193 | void InstanceRecycler::unregister_recycle_task(const std::string& task_name) { |
6977 | 193 | std::lock_guard lock(recycle_tasks_mutex); |
6978 | 193 | DCHECK(running_recycle_tasks[task_name] > 0); |
6979 | 193 | running_recycle_tasks.erase(task_name); |
6980 | 193 | } |
6981 | | |
6982 | 21 | bool InstanceRecycler::check_recycle_tasks() { |
6983 | 21 | std::map<std::string, int64_t> tmp_running_recycle_tasks; |
6984 | 21 | { |
6985 | 21 | std::lock_guard lock(recycle_tasks_mutex); |
6986 | 21 | tmp_running_recycle_tasks = running_recycle_tasks; |
6987 | 21 | } |
6988 | | |
6989 | 21 | bool found = false; |
6990 | 21 | int64_t now = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); |
6991 | 21 | for (auto& [task_name, start_time] : tmp_running_recycle_tasks) { |
6992 | 20 | int64_t cost = now - start_time; |
6993 | 20 | if (cost > config::recycle_task_threshold_seconds) [[unlikely]] { |
6994 | 20 | LOG_INFO("recycle task cost too much time cost={}s", cost) |
6995 | 20 | .tag("instance_id", instance_id_) |
6996 | 20 | .tag("task", task_name); |
6997 | 20 | found = true; |
6998 | 20 | } |
6999 | 20 | } |
7000 | | |
7001 | 21 | return found; |
7002 | 21 | } |
7003 | | |
7004 | | // Scan and statistics indexes that need to be recycled |
7005 | 0 | int InstanceRecycler::scan_and_statistics_indexes() { |
7006 | 0 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_indexes"); |
7007 | |
|
7008 | 0 | RecycleIndexKeyInfo index_key_info0 {instance_id_, 0}; |
7009 | 0 | RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX}; |
7010 | 0 | std::string index_key0; |
7011 | 0 | std::string index_key1; |
7012 | 0 | recycle_index_key(index_key_info0, &index_key0); |
7013 | 0 | recycle_index_key(index_key_info1, &index_key1); |
7014 | 0 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
7015 | |
|
7016 | 0 | auto handle_index_kv = [&, this](std::string_view k, std::string_view v) -> int { |
7017 | 0 | RecycleIndexPB index_pb; |
7018 | 0 | if (!index_pb.ParseFromArray(v.data(), v.size())) { |
7019 | 0 | return 0; |
7020 | 0 | } |
7021 | 0 | int64_t current_time = ::time(nullptr); |
7022 | 0 | if (current_time < |
7023 | 0 | calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired |
7024 | 0 | return 0; |
7025 | 0 | } |
7026 | | // decode index_id |
7027 | 0 | auto k1 = k; |
7028 | 0 | k1.remove_prefix(1); |
7029 | 0 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
7030 | 0 | decode_key(&k1, &out); |
7031 | | // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB |
7032 | 0 | auto index_id = std::get<int64_t>(std::get<0>(out[3])); |
7033 | 0 | std::unique_ptr<Transaction> txn; |
7034 | 0 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
7035 | 0 | if (err != TxnErrorCode::TXN_OK) { |
7036 | 0 | return 0; |
7037 | 0 | } |
7038 | 0 | std::string val; |
7039 | 0 | err = txn->get(k, &val); |
7040 | 0 | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { |
7041 | 0 | return 0; |
7042 | 0 | } |
7043 | 0 | if (err != TxnErrorCode::TXN_OK) { |
7044 | 0 | return 0; |
7045 | 0 | } |
7046 | 0 | index_pb.Clear(); |
7047 | 0 | if (!index_pb.ParseFromString(val)) { |
7048 | 0 | return 0; |
7049 | 0 | } |
7050 | 0 | if (scan_tablets_and_statistics(index_pb.table_id(), index_id, metrics_context) != 0) { |
7051 | 0 | return 0; |
7052 | 0 | } |
7053 | 0 | metrics_context.total_need_recycle_num++; |
7054 | 0 | return 0; |
7055 | 0 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
7056 | |
|
7057 | 0 | int ret = scan_and_recycle(index_key0, index_key1, std::move(handle_index_kv)); |
7058 | 0 | metrics_context.report(true); |
7059 | 0 | segment_metrics_context_.report(true); |
7060 | 0 | tablet_metrics_context_.report(true); |
7061 | 0 | return ret; |
7062 | 0 | } |
7063 | | |
7064 | | // Scan and statistics partitions that need to be recycled |
7065 | 0 | int InstanceRecycler::scan_and_statistics_partitions() { |
7066 | 0 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_partitions"); |
7067 | |
|
7068 | 0 | RecyclePartKeyInfo part_key_info0 {instance_id_, 0}; |
7069 | 0 | RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX}; |
7070 | 0 | std::string part_key0; |
7071 | 0 | std::string part_key1; |
7072 | 0 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
7073 | |
|
7074 | 0 | recycle_partition_key(part_key_info0, &part_key0); |
7075 | 0 | recycle_partition_key(part_key_info1, &part_key1); |
7076 | 0 | auto handle_partition_kv = [&, this](std::string_view k, std::string_view v) -> int { |
7077 | 0 | RecyclePartitionPB part_pb; |
7078 | 0 | if (!part_pb.ParseFromArray(v.data(), v.size())) { |
7079 | 0 | return 0; |
7080 | 0 | } |
7081 | 0 | int64_t current_time = ::time(nullptr); |
7082 | 0 | if (current_time < |
7083 | 0 | calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired |
7084 | 0 | return 0; |
7085 | 0 | } |
7086 | | // decode partition_id |
7087 | 0 | auto k1 = k; |
7088 | 0 | k1.remove_prefix(1); |
7089 | 0 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
7090 | 0 | decode_key(&k1, &out); |
7091 | | // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB |
7092 | 0 | auto partition_id = std::get<int64_t>(std::get<0>(out[3])); |
7093 | | // Change state to RECYCLING |
7094 | 0 | std::unique_ptr<Transaction> txn; |
7095 | 0 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
7096 | 0 | if (err != TxnErrorCode::TXN_OK) { |
7097 | 0 | return 0; |
7098 | 0 | } |
7099 | 0 | std::string val; |
7100 | 0 | err = txn->get(k, &val); |
7101 | 0 | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { |
7102 | 0 | return 0; |
7103 | 0 | } |
7104 | 0 | if (err != TxnErrorCode::TXN_OK) { |
7105 | 0 | return 0; |
7106 | 0 | } |
7107 | 0 | part_pb.Clear(); |
7108 | 0 | if (!part_pb.ParseFromString(val)) { |
7109 | 0 | return 0; |
7110 | 0 | } |
7111 | | // Partitions with PREPARED state MUST have no data |
7112 | 0 | bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED; |
7113 | 0 | int ret = 0; |
7114 | 0 | for (int64_t index_id : part_pb.index_id()) { |
7115 | 0 | if (scan_tablets_and_statistics(part_pb.table_id(), index_id, metrics_context, |
7116 | 0 | partition_id, is_empty_tablet) != 0) { |
7117 | 0 | ret = 0; |
7118 | 0 | } |
7119 | 0 | } |
7120 | 0 | metrics_context.total_need_recycle_num++; |
7121 | 0 | return ret; |
7122 | 0 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
7123 | |
|
7124 | 0 | int ret = scan_and_recycle(part_key0, part_key1, std::move(handle_partition_kv)); |
7125 | 0 | metrics_context.report(true); |
7126 | 0 | segment_metrics_context_.report(true); |
7127 | 0 | tablet_metrics_context_.report(true); |
7128 | 0 | return ret; |
7129 | 0 | } |
7130 | | |
7131 | | // Scan and statistics rowsets that need to be recycled |
7132 | 0 | int InstanceRecycler::scan_and_statistics_rowsets() { |
7133 | 0 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_rowsets"); |
7134 | 0 | RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""}; |
7135 | 0 | RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""}; |
7136 | 0 | std::string recyc_rs_key0; |
7137 | 0 | std::string recyc_rs_key1; |
7138 | 0 | recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0); |
7139 | 0 | recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1); |
7140 | 0 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
7141 | |
|
7142 | 0 | auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int { |
7143 | 0 | RecycleRowsetPB rowset; |
7144 | 0 | if (!rowset.ParseFromArray(v.data(), v.size())) { |
7145 | 0 | return 0; |
7146 | 0 | } |
7147 | 0 | auto* rowset_meta = rowset.mutable_rowset_meta(); |
7148 | 0 | int64_t current_time = ::time(nullptr); |
7149 | 0 | if (current_time < |
7150 | 0 | calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts)) { // not expired |
7151 | 0 | return 0; |
7152 | 0 | } |
7153 | | |
7154 | 0 | if (!rowset.has_type()) { |
7155 | 0 | if (!rowset.has_resource_id()) [[unlikely]] { |
7156 | 0 | return 0; |
7157 | 0 | } |
7158 | 0 | if (rowset.resource_id().empty()) [[unlikely]] { |
7159 | 0 | return 0; |
7160 | 0 | } |
7161 | 0 | metrics_context.total_need_recycle_num++; |
7162 | 0 | metrics_context.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size(); |
7163 | 0 | segment_metrics_context_.total_need_recycle_num += rowset.rowset_meta().num_segments(); |
7164 | 0 | segment_metrics_context_.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size(); |
7165 | 0 | return 0; |
7166 | 0 | } |
7167 | | |
7168 | 0 | if(!rowset_meta->has_is_recycled() || !rowset_meta->is_recycled()) { |
7169 | 0 | return 0; |
7170 | 0 | } |
7171 | | |
7172 | 0 | if (!rowset_meta->has_resource_id()) [[unlikely]] { |
7173 | 0 | if (rowset.type() == RecycleRowsetPB::PREPARE || rowset_meta->num_segments() != 0) { |
7174 | 0 | return 0; |
7175 | 0 | } |
7176 | 0 | } |
7177 | 0 | metrics_context.total_need_recycle_num++; |
7178 | 0 | metrics_context.total_need_recycle_data_size += rowset_meta->total_disk_size(); |
7179 | 0 | segment_metrics_context_.total_need_recycle_num += rowset_meta->num_segments(); |
7180 | 0 | segment_metrics_context_.total_need_recycle_data_size += rowset_meta->total_disk_size(); |
7181 | 0 | return 0; |
7182 | 0 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
7183 | 0 | int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv)); |
7184 | 0 | metrics_context.report(true); |
7185 | 0 | segment_metrics_context_.report(true); |
7186 | 0 | return ret; |
7187 | 0 | } |
7188 | | |
7189 | | // Scan and statistics tmp_rowsets that need to be recycled |
7190 | 0 | int InstanceRecycler::scan_and_statistics_tmp_rowsets() { |
7191 | 0 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_tmp_rowsets"); |
7192 | 0 | MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0}; |
7193 | 0 | MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0}; |
7194 | 0 | std::string tmp_rs_key0; |
7195 | 0 | std::string tmp_rs_key1; |
7196 | 0 | meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0); |
7197 | 0 | meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1); |
7198 | |
|
7199 | 0 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
7200 | |
|
7201 | 0 | auto handle_tmp_rowsets_kv = [&, this](std::string_view k, std::string_view v) -> int { |
7202 | 0 | doris::RowsetMetaCloudPB rowset; |
7203 | 0 | if (!rowset.ParseFromArray(v.data(), v.size())) { |
7204 | 0 | return 0; |
7205 | 0 | } |
7206 | 0 | int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts); |
7207 | 0 | int64_t current_time = ::time(nullptr); |
7208 | 0 | if (current_time < expiration) { |
7209 | 0 | return 0; |
7210 | 0 | } |
7211 | | |
7212 | 0 | DCHECK_GT(rowset.txn_id(), 0) |
7213 | 0 | << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString(); |
7214 | |
|
7215 | 0 | if(!rowset.has_is_recycled() || !rowset.is_recycled()) { |
7216 | 0 | return 0; |
7217 | 0 | } |
7218 | | |
7219 | 0 | if (!rowset.has_resource_id()) { |
7220 | 0 | if (rowset.num_segments() > 0) [[unlikely]] { // impossible |
7221 | 0 | return 0; |
7222 | 0 | } |
7223 | 0 | return 0; |
7224 | 0 | } |
7225 | | |
7226 | 0 | metrics_context.total_need_recycle_num++; |
7227 | 0 | metrics_context.total_need_recycle_data_size += rowset.total_disk_size(); |
7228 | 0 | segment_metrics_context_.total_need_recycle_data_size += rowset.total_disk_size(); |
7229 | 0 | segment_metrics_context_.total_need_recycle_num += rowset.num_segments(); |
7230 | 0 | return 0; |
7231 | 0 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
7232 | 0 | int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_tmp_rowsets_kv)); |
7233 | 0 | metrics_context.report(true); |
7234 | 0 | segment_metrics_context_.report(true); |
7235 | 0 | return ret; |
7236 | 0 | } |
7237 | | |
7238 | | // Scan and statistics abort_timeout_txn that need to be recycled |
7239 | 0 | int InstanceRecycler::scan_and_statistics_abort_timeout_txn() { |
7240 | 0 | RecyclerMetricsContext metrics_context(instance_id_, "abort_timeout_txn"); |
7241 | |
|
7242 | 0 | TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0}; |
7243 | 0 | TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX}; |
7244 | 0 | std::string begin_txn_running_key; |
7245 | 0 | std::string end_txn_running_key; |
7246 | 0 | txn_running_key(txn_running_key_info0, &begin_txn_running_key); |
7247 | 0 | txn_running_key(txn_running_key_info1, &end_txn_running_key); |
7248 | |
|
7249 | 0 | int64_t current_time = |
7250 | 0 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
7251 | |
|
7252 | 0 | auto handle_abort_timeout_txn_kv = [&metrics_context, ¤t_time, this]( |
7253 | 0 | std::string_view k, std::string_view v) -> int { |
7254 | 0 | std::unique_ptr<Transaction> txn; |
7255 | 0 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
7256 | 0 | if (err != TxnErrorCode::TXN_OK) { |
7257 | 0 | return 0; |
7258 | 0 | } |
7259 | 0 | std::string_view k1 = k; |
7260 | 0 | k1.remove_prefix(1); |
7261 | 0 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
7262 | 0 | if (decode_key(&k1, &out) != 0) { |
7263 | 0 | return 0; |
7264 | 0 | } |
7265 | 0 | int64_t db_id = std::get<int64_t>(std::get<0>(out[3])); |
7266 | 0 | int64_t txn_id = std::get<int64_t>(std::get<0>(out[4])); |
7267 | | // Update txn_info |
7268 | 0 | std::string txn_inf_key, txn_inf_val; |
7269 | 0 | txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key); |
7270 | 0 | err = txn->get(txn_inf_key, &txn_inf_val); |
7271 | 0 | if (err != TxnErrorCode::TXN_OK) { |
7272 | 0 | return 0; |
7273 | 0 | } |
7274 | 0 | TxnInfoPB txn_info; |
7275 | 0 | if (!txn_info.ParseFromString(txn_inf_val)) { |
7276 | 0 | return 0; |
7277 | 0 | } |
7278 | | |
7279 | 0 | if (TxnStatusPB::TXN_STATUS_COMMITTED != txn_info.status()) { |
7280 | 0 | TxnRunningPB txn_running_pb; |
7281 | 0 | if (!txn_running_pb.ParseFromArray(v.data(), v.size())) { |
7282 | 0 | return 0; |
7283 | 0 | } |
7284 | 0 | if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) { |
7285 | 0 | return 0; |
7286 | 0 | } |
7287 | 0 | metrics_context.total_need_recycle_num++; |
7288 | 0 | } |
7289 | 0 | return 0; |
7290 | 0 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
7291 | |
|
7292 | 0 | int ret = scan_and_recycle(begin_txn_running_key, end_txn_running_key, std::move(handle_abort_timeout_txn_kv)); |
7293 | 0 | metrics_context.report(true); |
7294 | 0 | return ret; |
7295 | 0 | } |
7296 | | |
7297 | | // Scan and statistics expired_txn_label that need to be recycled |
7298 | 0 | int InstanceRecycler::scan_and_statistics_expired_txn_label() { |
7299 | 0 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_txn_label"); |
7300 | |
|
7301 | 0 | RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0}; |
7302 | 0 | RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX}; |
7303 | 0 | std::string begin_recycle_txn_key; |
7304 | 0 | std::string end_recycle_txn_key; |
7305 | 0 | recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key); |
7306 | 0 | recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key); |
7307 | 0 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
7308 | 0 | int64_t current_time_ms = |
7309 | 0 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
7310 | | |
7311 | | // for calculate the total num or bytes of recyled objects |
7312 | 0 | auto handle_expired_txn_label_kv = [&, this](std::string_view k, std::string_view v) -> int { |
7313 | 0 | RecycleTxnPB recycle_txn_pb; |
7314 | 0 | if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) { |
7315 | 0 | return 0; |
7316 | 0 | } |
7317 | 0 | if ((config::force_immediate_recycle) || |
7318 | 0 | (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) || |
7319 | 0 | (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <= |
7320 | 0 | current_time_ms)) { |
7321 | 0 | metrics_context.total_need_recycle_num++; |
7322 | 0 | } |
7323 | 0 | return 0; |
7324 | 0 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
7325 | |
|
7326 | 0 | int ret = scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key, std::move(handle_expired_txn_label_kv)); |
7327 | 0 | metrics_context.report(true); |
7328 | 0 | return ret; |
7329 | 0 | } |
7330 | | |
7331 | | // Scan and statistics copy_jobs that need to be recycled |
7332 | 0 | int InstanceRecycler::scan_and_statistics_copy_jobs() { |
7333 | 0 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_copy_jobs"); |
7334 | 0 | CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0}; |
7335 | 0 | CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0}; |
7336 | 0 | std::string key0; |
7337 | 0 | std::string key1; |
7338 | 0 | copy_job_key(key_info0, &key0); |
7339 | 0 | copy_job_key(key_info1, &key1); |
7340 | | |
7341 | | // for calculate the total num or bytes of recyled objects |
7342 | 0 | auto scan_and_statistics = [&metrics_context](std::string_view k, std::string_view v) -> int { |
7343 | 0 | CopyJobPB copy_job; |
7344 | 0 | if (!copy_job.ParseFromArray(v.data(), v.size())) { |
7345 | 0 | LOG_WARNING("malformed copy job").tag("key", hex(k)); |
7346 | 0 | return 0; |
7347 | 0 | } |
7348 | | |
7349 | 0 | if (copy_job.job_status() == CopyJobPB::FINISH) { |
7350 | 0 | if (copy_job.stage_type() == StagePB::EXTERNAL) { |
7351 | 0 | int64_t current_time = |
7352 | 0 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
7353 | 0 | if (copy_job.finish_time_ms() > 0) { |
7354 | 0 | if (!config::force_immediate_recycle && |
7355 | 0 | current_time < copy_job.finish_time_ms() + |
7356 | 0 | config::copy_job_max_retention_second * 1000) { |
7357 | 0 | return 0; |
7358 | 0 | } |
7359 | 0 | } else { |
7360 | 0 | if (!config::force_immediate_recycle && |
7361 | 0 | current_time < copy_job.start_time_ms() + |
7362 | 0 | config::copy_job_max_retention_second * 1000) { |
7363 | 0 | return 0; |
7364 | 0 | } |
7365 | 0 | } |
7366 | 0 | } |
7367 | 0 | } else if (copy_job.job_status() == CopyJobPB::LOADING) { |
7368 | 0 | int64_t current_time = |
7369 | 0 | duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); |
7370 | 0 | if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) { |
7371 | 0 | return 0; |
7372 | 0 | } |
7373 | 0 | } |
7374 | 0 | metrics_context.total_need_recycle_num++; |
7375 | 0 | return 0; |
7376 | 0 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
7377 | |
|
7378 | 0 | int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics)); |
7379 | 0 | metrics_context.report(true); |
7380 | 0 | return ret; |
7381 | 0 | } |
7382 | | |
7383 | | // Scan and statistics stage that need to be recycled |
7384 | 0 | int InstanceRecycler::scan_and_statistics_stage() { |
7385 | 0 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_stage"); |
7386 | 0 | RecycleStageKeyInfo key_info0 {instance_id_, ""}; |
7387 | 0 | RecycleStageKeyInfo key_info1 {instance_id_, "\xff"}; |
7388 | 0 | std::string key0 = recycle_stage_key(key_info0); |
7389 | 0 | std::string key1 = recycle_stage_key(key_info1); |
7390 | | |
7391 | | // for calculate the total num or bytes of recyled objects |
7392 | 0 | auto scan_and_statistics = [&metrics_context, this](std::string_view k, |
7393 | 0 | std::string_view v) -> int { |
7394 | 0 | RecycleStagePB recycle_stage; |
7395 | 0 | if (!recycle_stage.ParseFromArray(v.data(), v.size())) { |
7396 | 0 | LOG_WARNING("malformed recycle stage").tag("key", hex(k)); |
7397 | 0 | return 0; |
7398 | 0 | } |
7399 | | |
7400 | 0 | int idx = stoi(recycle_stage.stage().obj_info().id()); |
7401 | 0 | if (idx > instance_info_.obj_info().size() || idx < 1) { |
7402 | 0 | LOG(WARNING) << "invalid idx: " << idx; |
7403 | 0 | return 0; |
7404 | 0 | } |
7405 | | |
7406 | 0 | std::shared_ptr<StorageVaultAccessor> accessor; |
7407 | 0 | int ret = SYNC_POINT_HOOK_RETURN_VALUE( |
7408 | 0 | [&] { |
7409 | 0 | auto& old_obj = instance_info_.obj_info()[idx - 1]; |
7410 | 0 | auto s3_conf = S3Conf::from_obj_store_info(old_obj); |
7411 | 0 | if (!s3_conf) { |
7412 | 0 | return 0; |
7413 | 0 | } |
7414 | |
|
7415 | 0 | s3_conf->prefix = recycle_stage.stage().obj_info().prefix(); |
7416 | 0 | std::shared_ptr<S3Accessor> s3_accessor; |
7417 | 0 | int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor); |
7418 | 0 | if (ret != 0) { |
7419 | 0 | return 0; |
7420 | 0 | } |
7421 | |
|
7422 | 0 | accessor = std::move(s3_accessor); |
7423 | 0 | return 0; |
7424 | 0 | }(), |
7425 | 0 | "recycle_stage:get_accessor", &accessor); |
7426 | |
|
7427 | 0 | if (ret != 0) { |
7428 | 0 | LOG(WARNING) << "failed to init accessor ret=" << ret; |
7429 | 0 | return 0; |
7430 | 0 | } |
7431 | | |
7432 | 0 | metrics_context.total_need_recycle_num++; |
7433 | 0 | return 0; |
7434 | 0 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
7435 | |
|
7436 | 0 | int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics)); |
7437 | 0 | metrics_context.report(true); |
7438 | 0 | return ret; |
7439 | 0 | } |
7440 | | |
7441 | | // Scan and statistics expired_stage_objects that need to be recycled |
7442 | 0 | int InstanceRecycler::scan_and_statistics_expired_stage_objects() { |
7443 | 0 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects"); |
7444 | | |
7445 | | // for calculate the total num or bytes of recyled objects |
7446 | 0 | auto scan_and_statistics = [&metrics_context, this]() { |
7447 | 0 | for (const auto& stage : instance_info_.stages()) { |
7448 | 0 | if (stopped()) { |
7449 | 0 | break; |
7450 | 0 | } |
7451 | 0 | if (stage.type() == StagePB::EXTERNAL) { |
7452 | 0 | continue; |
7453 | 0 | } |
7454 | 0 | int idx = stoi(stage.obj_info().id()); |
7455 | 0 | if (idx > instance_info_.obj_info().size() || idx < 1) { |
7456 | 0 | continue; |
7457 | 0 | } |
7458 | 0 | const auto& old_obj = instance_info_.obj_info()[idx - 1]; |
7459 | 0 | auto s3_conf = S3Conf::from_obj_store_info(old_obj); |
7460 | 0 | if (!s3_conf) { |
7461 | 0 | continue; |
7462 | 0 | } |
7463 | 0 | s3_conf->prefix = stage.obj_info().prefix(); |
7464 | 0 | std::shared_ptr<S3Accessor> accessor; |
7465 | 0 | int ret1 = S3Accessor::create(*s3_conf, &accessor); |
7466 | 0 | if (ret1 != 0) { |
7467 | 0 | continue; |
7468 | 0 | } |
7469 | 0 | if (s3_conf->prefix.find("/stage/") == std::string::npos) { |
7470 | 0 | continue; |
7471 | 0 | } |
7472 | 0 | metrics_context.total_need_recycle_num++; |
7473 | 0 | } |
7474 | 0 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv |
7475 | |
|
7476 | 0 | scan_and_statistics(); |
7477 | 0 | metrics_context.report(true); |
7478 | 0 | return 0; |
7479 | 0 | } |
7480 | | |
7481 | | // Scan and statistics versions that need to be recycled |
7482 | 0 | int InstanceRecycler::scan_and_statistics_versions() { |
7483 | 0 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions"); |
7484 | 0 | auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0}); |
7485 | 0 | auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0}); |
7486 | |
|
7487 | 0 | int64_t last_scanned_table_id = 0; |
7488 | 0 | bool is_recycled = false; // Is last scanned kv recycled |
7489 | | // for calculate the total num or bytes of recyled objects |
7490 | 0 | auto scan_and_statistics = [&metrics_context, &last_scanned_table_id, &is_recycled, this]( |
7491 | 0 | std::string_view k, std::string_view) { |
7492 | 0 | auto k1 = k; |
7493 | 0 | k1.remove_prefix(1); |
7494 | | // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id} |
7495 | 0 | std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; |
7496 | 0 | decode_key(&k1, &out); |
7497 | 0 | DCHECK_EQ(out.size(), 6) << k; |
7498 | 0 | auto table_id = std::get<int64_t>(std::get<0>(out[4])); |
7499 | 0 | if (table_id == last_scanned_table_id) { // Already handle kvs of this table |
7500 | 0 | metrics_context.total_need_recycle_num += |
7501 | 0 | is_recycled; // Version kv of this table has been recycled |
7502 | 0 | return 0; |
7503 | 0 | } |
7504 | 0 | last_scanned_table_id = table_id; |
7505 | 0 | is_recycled = false; |
7506 | 0 | auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0}); |
7507 | 0 | auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0}); |
7508 | 0 | std::unique_ptr<Transaction> txn; |
7509 | 0 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
7510 | 0 | if (err != TxnErrorCode::TXN_OK) { |
7511 | 0 | return 0; |
7512 | 0 | } |
7513 | 0 | std::unique_ptr<RangeGetIterator> iter; |
7514 | 0 | err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1); |
7515 | 0 | if (err != TxnErrorCode::TXN_OK) { |
7516 | 0 | return 0; |
7517 | 0 | } |
7518 | 0 | if (iter->has_next()) { // Table is useful, should not recycle table and partition versions |
7519 | 0 | return 0; |
7520 | 0 | } |
7521 | 0 | metrics_context.total_need_recycle_num++; |
7522 | 0 | is_recycled = true; |
7523 | 0 | return 0; |
7524 | 0 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
7525 | |
|
7526 | 0 | int ret = scan_and_recycle(version_key_begin, version_key_end, std::move(scan_and_statistics)); |
7527 | 0 | metrics_context.report(true); |
7528 | 0 | return ret; |
7529 | 0 | } |
7530 | | |
7531 | | // Scan and statistics restore jobs that need to be recycled |
7532 | 0 | int InstanceRecycler::scan_and_statistics_restore_jobs() { |
7533 | 0 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_restore_jobs"); |
7534 | 0 | JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0}; |
7535 | 0 | JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX}; |
7536 | 0 | std::string restore_job_key0; |
7537 | 0 | std::string restore_job_key1; |
7538 | 0 | job_restore_tablet_key(restore_job_key_info0, &restore_job_key0); |
7539 | 0 | job_restore_tablet_key(restore_job_key_info1, &restore_job_key1); |
7540 | |
|
7541 | 0 | int64_t earlest_ts = std::numeric_limits<int64_t>::max(); |
7542 | | |
7543 | | // for calculate the total num or bytes of recyled objects |
7544 | 0 | auto scan_and_statistics = [&](std::string_view k, std::string_view v) -> int { |
7545 | 0 | RestoreJobCloudPB restore_job_pb; |
7546 | 0 | if (!restore_job_pb.ParseFromArray(v.data(), v.size())) { |
7547 | 0 | LOG_WARNING("malformed recycle partition value").tag("key", hex(k)); |
7548 | 0 | return 0; |
7549 | 0 | } |
7550 | 0 | int64_t expiration = |
7551 | 0 | calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts); |
7552 | 0 | int64_t current_time = ::time(nullptr); |
7553 | 0 | if (current_time < expiration) { // not expired |
7554 | 0 | return 0; |
7555 | 0 | } |
7556 | 0 | metrics_context.total_need_recycle_num++; |
7557 | 0 | if(restore_job_pb.need_recycle_data()) { |
7558 | 0 | scan_tablet_and_statistics(restore_job_pb.tablet_id(), metrics_context); |
7559 | 0 | } |
7560 | 0 | return 0; |
7561 | 0 | }; Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_ |
7562 | |
|
7563 | 0 | int ret = scan_and_recycle(restore_job_key0, restore_job_key1, std::move(scan_and_statistics)); |
7564 | 0 | metrics_context.report(true); |
7565 | 0 | return ret; |
7566 | 0 | } |
7567 | | |
7568 | 3 | void InstanceRecycler::scan_and_statistics_operation_logs() { |
7569 | 3 | if (!should_recycle_versioned_keys()) { |
7570 | 0 | return; |
7571 | 0 | } |
7572 | | |
7573 | 3 | RecyclerMetricsContext metrics_context(instance_id_, "recycle_operation_logs"); |
7574 | | |
7575 | 3 | OperationLogRecycleChecker recycle_checker(instance_id_, txn_kv_.get(), instance_info_); |
7576 | 3 | if (recycle_checker.init() != 0) { |
7577 | 0 | return; |
7578 | 0 | } |
7579 | | |
7580 | 3 | std::string log_key_prefix = versioned::log_key(instance_id_); |
7581 | 3 | std::string begin_key = encode_versioned_key(log_key_prefix, Versionstamp::min()); |
7582 | 3 | std::string end_key = encode_versioned_key(log_key_prefix, Versionstamp::max()); |
7583 | | |
7584 | 3 | std::unique_ptr<BlobIterator> iter = blob_get_range(txn_kv_, begin_key, end_key); |
7585 | 8 | for (; iter->valid(); iter->next()) { |
7586 | 5 | OperationLogPB operation_log; |
7587 | 5 | if (!iter->parse_value(&operation_log)) { |
7588 | 0 | continue; |
7589 | 0 | } |
7590 | | |
7591 | 5 | std::string_view key = iter->key(); |
7592 | 5 | Versionstamp log_versionstamp; |
7593 | 5 | if (!decode_versioned_key(&key, &log_versionstamp)) { |
7594 | 0 | continue; |
7595 | 0 | } |
7596 | | |
7597 | 5 | OperationLogReferenceInfo ref_info; |
7598 | 5 | if (recycle_checker.can_recycle(log_versionstamp, operation_log.min_timestamp(), |
7599 | 5 | &ref_info)) { |
7600 | 4 | metrics_context.total_need_recycle_num++; |
7601 | 4 | metrics_context.total_need_recycle_data_size += operation_log.ByteSizeLong(); |
7602 | 4 | } |
7603 | 5 | } |
7604 | | |
7605 | 3 | metrics_context.report(true); |
7606 | 3 | } |
7607 | | |
7608 | | int InstanceRecycler::classify_rowset_task_by_ref_count( |
7609 | 60 | RowsetDeleteTask& task, std::vector<RowsetDeleteTask>& batch_delete_tasks) { |
7610 | 60 | constexpr int MAX_RETRY = 10; |
7611 | 60 | const auto& rowset_meta = task.rowset_meta; |
7612 | 60 | int64_t tablet_id = rowset_meta.tablet_id(); |
7613 | 60 | const std::string& rowset_id = rowset_meta.rowset_id_v2(); |
7614 | 60 | std::string_view reference_instance_id = instance_id_; |
7615 | 60 | if (rowset_meta.has_reference_instance_id()) { |
7616 | 5 | reference_instance_id = rowset_meta.reference_instance_id(); |
7617 | 5 | } |
7618 | | |
7619 | 61 | for (int i = 0; i < MAX_RETRY; ++i) { |
7620 | 61 | std::unique_ptr<Transaction> txn; |
7621 | 61 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
7622 | 61 | if (err != TxnErrorCode::TXN_OK) { |
7623 | 0 | LOG_WARNING("failed to create txn when classifying rowset task") |
7624 | 0 | .tag("instance_id", instance_id_) |
7625 | 0 | .tag("tablet_id", tablet_id) |
7626 | 0 | .tag("rowset_id", rowset_id) |
7627 | 0 | .tag("err", err); |
7628 | 0 | return -1; |
7629 | 0 | } |
7630 | | |
7631 | 61 | std::string rowset_ref_count_key = |
7632 | 61 | versioned::data_rowset_ref_count_key({reference_instance_id, tablet_id, rowset_id}); |
7633 | 61 | task.rowset_ref_count_key = rowset_ref_count_key; |
7634 | | |
7635 | 61 | int64_t ref_count = 0; |
7636 | 61 | { |
7637 | 61 | std::string value; |
7638 | 61 | TxnErrorCode err = txn->get(rowset_ref_count_key, &value); |
7639 | 61 | if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { |
7640 | 0 | ref_count = 1; |
7641 | 61 | } else if (err != TxnErrorCode::TXN_OK) { |
7642 | 0 | LOG_WARNING("failed to get rowset ref count key when classifying") |
7643 | 0 | .tag("instance_id", instance_id_) |
7644 | 0 | .tag("tablet_id", tablet_id) |
7645 | 0 | .tag("rowset_id", rowset_id) |
7646 | 0 | .tag("err", err); |
7647 | 0 | return -1; |
7648 | 61 | } else if (!txn->decode_atomic_int(value, &ref_count)) { |
7649 | 0 | LOG_WARNING("failed to decode rowset data ref count when classifying") |
7650 | 0 | .tag("instance_id", instance_id_) |
7651 | 0 | .tag("tablet_id", tablet_id) |
7652 | 0 | .tag("rowset_id", rowset_id) |
7653 | 0 | .tag("value", hex(value)); |
7654 | 0 | return -1; |
7655 | 0 | } |
7656 | 61 | } |
7657 | | |
7658 | 61 | if (ref_count > 1) { |
7659 | | // ref_count > 1: decrement count, remove recycle keys, don't add to batch delete |
7660 | 12 | txn->atomic_add(rowset_ref_count_key, -1); |
7661 | 12 | LOG_INFO("decrease rowset data ref count in classification phase") |
7662 | 12 | .tag("instance_id", instance_id_) |
7663 | 12 | .tag("tablet_id", tablet_id) |
7664 | 12 | .tag("rowset_id", rowset_id) |
7665 | 12 | .tag("ref_count", ref_count - 1) |
7666 | 12 | .tag("ref_count_key", hex(rowset_ref_count_key)); |
7667 | | |
7668 | 12 | if (!task.recycle_rowset_key.empty()) { |
7669 | 0 | txn->remove(task.recycle_rowset_key); |
7670 | 0 | LOG_INFO("remove recycle rowset key in classification phase") |
7671 | 0 | .tag("key", hex(task.recycle_rowset_key)); |
7672 | 0 | } |
7673 | 12 | if (!task.non_versioned_rowset_key.empty()) { |
7674 | 12 | txn->remove(task.non_versioned_rowset_key); |
7675 | 12 | LOG_INFO("remove non versioned rowset key in classification phase") |
7676 | 12 | .tag("key", hex(task.non_versioned_rowset_key)); |
7677 | 12 | } |
7678 | | |
7679 | 12 | err = txn->commit(); |
7680 | 12 | if (err == TxnErrorCode::TXN_CONFLICT) { |
7681 | 1 | VLOG_DEBUG << "decrease rowset ref count but txn conflict in classification, retry" |
7682 | 0 | << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id |
7683 | 0 | << ", ref_count=" << ref_count << ", retry=" << i; |
7684 | 1 | std::this_thread::sleep_for(std::chrono::milliseconds(500)); |
7685 | 1 | continue; |
7686 | 11 | } else if (err != TxnErrorCode::TXN_OK) { |
7687 | 0 | LOG_WARNING("failed to commit txn when classifying rowset task") |
7688 | 0 | .tag("instance_id", instance_id_) |
7689 | 0 | .tag("tablet_id", tablet_id) |
7690 | 0 | .tag("rowset_id", rowset_id) |
7691 | 0 | .tag("err", err); |
7692 | 0 | return -1; |
7693 | 0 | } |
7694 | 11 | return 1; // handled, not added to batch delete |
7695 | 49 | } else { |
7696 | | // ref_count == 1: Add to batch delete plan without modifying any KV. |
7697 | | // Keep recycle_rowset_key as "pending recycle" marker until data is actually deleted. |
7698 | 49 | LOG_INFO("add rowset to batch delete plan") |
7699 | 49 | .tag("instance_id", instance_id_) |
7700 | 49 | .tag("tablet_id", tablet_id) |
7701 | 49 | .tag("rowset_id", rowset_id) |
7702 | 49 | .tag("resource_id", rowset_meta.resource_id()) |
7703 | 49 | .tag("ref_count", ref_count); |
7704 | | |
7705 | 49 | batch_delete_tasks.push_back(std::move(task)); |
7706 | 49 | return 0; // added to batch delete |
7707 | 49 | } |
7708 | 61 | } |
7709 | | |
7710 | 0 | LOG_WARNING("failed to classify rowset task after retry") |
7711 | 0 | .tag("instance_id", instance_id_) |
7712 | 0 | .tag("tablet_id", tablet_id) |
7713 | 0 | .tag("rowset_id", rowset_id) |
7714 | 0 | .tag("retry", MAX_RETRY); |
7715 | 0 | return -1; |
7716 | 60 | } |
7717 | | |
7718 | 10 | int InstanceRecycler::cleanup_rowset_metadata(const std::vector<RowsetDeleteTask>& tasks) { |
7719 | 10 | int ret = 0; |
7720 | 49 | for (const auto& task : tasks) { |
7721 | 49 | int64_t tablet_id = task.rowset_meta.tablet_id(); |
7722 | 49 | const std::string& rowset_id = task.rowset_meta.rowset_id_v2(); |
7723 | | |
7724 | | // Note: decrement_packed_file_ref_counts is already called in delete_rowset_data, |
7725 | | // so we don't need to call it again here. |
7726 | | |
7727 | | // Remove all metadata keys in one transaction |
7728 | 49 | std::unique_ptr<Transaction> txn; |
7729 | 49 | TxnErrorCode err = txn_kv_->create_txn(&txn); |
7730 | 49 | if (err != TxnErrorCode::TXN_OK) { |
7731 | 0 | LOG_WARNING("failed to create txn when cleaning up metadata") |
7732 | 0 | .tag("instance_id", instance_id_) |
7733 | 0 | .tag("tablet_id", tablet_id) |
7734 | 0 | .tag("rowset_id", rowset_id) |
7735 | 0 | .tag("err", err); |
7736 | 0 | ret = -1; |
7737 | 0 | continue; |
7738 | 0 | } |
7739 | | |
7740 | 49 | std::string_view reference_instance_id = instance_id_; |
7741 | 49 | if (task.rowset_meta.has_reference_instance_id()) { |
7742 | 5 | reference_instance_id = task.rowset_meta.reference_instance_id(); |
7743 | 5 | } |
7744 | | |
7745 | 49 | txn->remove(task.rowset_ref_count_key); |
7746 | 49 | LOG_INFO("delete rowset data ref count key in cleanup phase") |
7747 | 49 | .tag("instance_id", instance_id_) |
7748 | 49 | .tag("tablet_id", tablet_id) |
7749 | 49 | .tag("rowset_id", rowset_id) |
7750 | 49 | .tag("ref_count_key", hex(task.rowset_ref_count_key)); |
7751 | | |
7752 | 49 | std::string dbm_start_key = |
7753 | 49 | meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id, 0, 0}); |
7754 | 49 | std::string dbm_end_key = meta_delete_bitmap_key( |
7755 | 49 | {reference_instance_id, tablet_id, rowset_id, |
7756 | 49 | std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::max()}); |
7757 | 49 | txn->remove(dbm_start_key, dbm_end_key); |
7758 | 49 | LOG_INFO("remove delete bitmap kv in cleanup phase") |
7759 | 49 | .tag("instance_id", instance_id_) |
7760 | 49 | .tag("tablet_id", tablet_id) |
7761 | 49 | .tag("rowset_id", rowset_id) |
7762 | 49 | .tag("begin", hex(dbm_start_key)) |
7763 | 49 | .tag("end", hex(dbm_end_key)); |
7764 | | |
7765 | 49 | std::string versioned_dbm_start_key = |
7766 | 49 | versioned::meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id}); |
7767 | 49 | std::string versioned_dbm_end_key = versioned_dbm_start_key; |
7768 | 49 | encode_int64(INT64_MAX, &versioned_dbm_end_key); |
7769 | 49 | txn->remove(versioned_dbm_start_key, versioned_dbm_end_key); |
7770 | 49 | LOG_INFO("remove versioned delete bitmap kv in cleanup phase") |
7771 | 49 | .tag("instance_id", instance_id_) |
7772 | 49 | .tag("tablet_id", tablet_id) |
7773 | 49 | .tag("rowset_id", rowset_id) |
7774 | 49 | .tag("begin", hex(versioned_dbm_start_key)) |
7775 | 49 | .tag("end", hex(versioned_dbm_end_key)); |
7776 | | |
7777 | | // Remove versioned meta rowset key |
7778 | 49 | if (!task.versioned_rowset_key.empty()) { |
7779 | 49 | versioned::document_remove<RowsetMetaCloudPB>( |
7780 | 49 | txn.get(), task.versioned_rowset_key, task.versionstamp); |
7781 | 49 | LOG_INFO("remove versioned meta rowset key in cleanup phase") |
7782 | 49 | .tag("instance_id", instance_id_) |
7783 | 49 | .tag("tablet_id", tablet_id) |
7784 | 49 | .tag("rowset_id", rowset_id) |
7785 | 49 | .tag("key_prefix", hex(task.versioned_rowset_key)); |
7786 | 49 | } |
7787 | | |
7788 | 49 | if (!task.non_versioned_rowset_key.empty()) { |
7789 | 49 | txn->remove(task.non_versioned_rowset_key); |
7790 | 49 | LOG_INFO("remove non versioned rowset key in cleanup phase") |
7791 | 49 | .tag("instance_id", instance_id_) |
7792 | 49 | .tag("tablet_id", tablet_id) |
7793 | 49 | .tag("rowset_id", rowset_id) |
7794 | 49 | .tag("key", hex(task.non_versioned_rowset_key)); |
7795 | 49 | } |
7796 | | |
7797 | | // Remove recycle_rowset_key last to ensure retry safety: |
7798 | | // if cleanup fails, this key remains and triggers next round retry. |
7799 | 49 | if (!task.recycle_rowset_key.empty()) { |
7800 | 0 | txn->remove(task.recycle_rowset_key); |
7801 | 0 | LOG_INFO("remove recycle rowset key in cleanup phase") |
7802 | 0 | .tag("instance_id", instance_id_) |
7803 | 0 | .tag("tablet_id", tablet_id) |
7804 | 0 | .tag("rowset_id", rowset_id) |
7805 | 0 | .tag("key", hex(task.recycle_rowset_key)); |
7806 | 0 | } |
7807 | | |
7808 | 49 | err = txn->commit(); |
7809 | 49 | if (err != TxnErrorCode::TXN_OK) { |
7810 | | // Metadata cleanup failed. recycle_rowset_key remains, next round will retry. |
7811 | 0 | LOG_WARNING("failed to commit cleanup metadata txn, will retry next round") |
7812 | 0 | .tag("instance_id", instance_id_) |
7813 | 0 | .tag("tablet_id", tablet_id) |
7814 | 0 | .tag("rowset_id", rowset_id) |
7815 | 0 | .tag("err", err); |
7816 | 0 | ret = -1; |
7817 | 0 | continue; |
7818 | 0 | } |
7819 | | |
7820 | 49 | LOG_INFO("cleanup rowset metadata success") |
7821 | 49 | .tag("instance_id", instance_id_) |
7822 | 49 | .tag("tablet_id", tablet_id) |
7823 | 49 | .tag("rowset_id", rowset_id); |
7824 | 49 | } |
7825 | 10 | return ret; |
7826 | 10 | } |
7827 | | |
7828 | | } // namespace doris::cloud |