/root/doris/be/src/olap/delete_handler.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "olap/delete_handler.h" |
19 | | |
20 | | #include <gen_cpp/PaloInternalService_types.h> |
21 | | #include <gen_cpp/olap_file.pb.h> |
22 | | #include <thrift/protocol/TDebugProtocol.h> |
23 | | |
24 | | #include <string> |
25 | | #include <vector> |
26 | | |
27 | | #include "common/config.h" |
28 | | #include "common/logging.h" |
29 | | #include "common/status.h" |
30 | | #include "olap/block_column_predicate.h" |
31 | | #include "olap/column_predicate.h" |
32 | | #include "olap/olap_common.h" |
33 | | #include "olap/predicate_creator.h" |
34 | | #include "olap/tablet_schema.h" |
35 | | #include "olap/utils.h" |
36 | | #include "util/debug_points.h" |
37 | | |
38 | | using apache::thrift::ThriftDebugString; |
39 | | using std::vector; |
40 | | using std::string; |
41 | | |
42 | | using ::google::protobuf::RepeatedPtrField; |
43 | | |
44 | | namespace doris { |
45 | | |
46 | | // construct sub condition from TCondition |
47 | 87 | std::string construct_sub_predicate(const TCondition& condition) { |
48 | 87 | string op = condition.condition_op; |
49 | 87 | if (op == "<") { Branch (49:9): [True: 50, False: 37]
|
50 | 50 | op += "<"; |
51 | 50 | } else if (op == ">") { Branch (51:16): [True: 3, False: 34]
|
52 | 3 | op += ">"; |
53 | 3 | } |
54 | 87 | string condition_str; |
55 | 87 | if ("IS" == op) { Branch (55:9): [True: 1, False: 86]
|
56 | | // ATTN: tricky! Surround IS with spaces to make it "special" |
57 | 1 | condition_str = condition.column_name + " IS " + condition.condition_values[0]; |
58 | 86 | } else { // multi-elements IN expr has been processed with InPredicatePB |
59 | 86 | if (op == "*=") { Branch (59:13): [True: 1, False: 85]
|
60 | 1 | op = "="; |
61 | 85 | } else if (op == "!*=") { Branch (61:20): [True: 1, False: 84]
|
62 | 1 | op = "!="; |
63 | 1 | } |
64 | 86 | condition_str = condition.column_name + op + "'" + condition.condition_values[0] + "'"; |
65 | 86 | } |
66 | 87 | return condition_str; |
67 | 87 | } |
68 | | |
69 | | // make operators from FE adaptive to BE |
70 | 87 | std::string trans_op(const std::string& opt) { |
71 | 87 | std::string op = string(opt); |
72 | 87 | if (op == "<") { Branch (72:9): [True: 50, False: 37]
|
73 | 50 | op += "<"; |
74 | 50 | } else if (op == ">") { Branch (74:16): [True: 3, False: 34]
|
75 | 3 | op += ">"; |
76 | 3 | } |
77 | 87 | if ("IS" != op) { Branch (77:9): [True: 86, False: 1]
|
78 | 86 | if (op == "*=") { Branch (78:13): [True: 1, False: 85]
|
79 | 1 | op = "="; |
80 | 85 | } else if (op == "!*=") { Branch (80:20): [True: 1, False: 84]
|
81 | 1 | op = "!="; |
82 | 1 | } |
83 | 86 | } |
84 | 87 | return op; |
85 | 87 | } |
86 | | |
87 | | Status DeleteHandler::generate_delete_predicate(const TabletSchema& schema, |
88 | | const std::vector<TCondition>& conditions, |
89 | 97 | DeletePredicatePB* del_pred) { |
90 | 97 | DBUG_EXECUTE_IF("DeleteHandler::generate_delete_predicate.inject_failure", {Line | Count | Source | 37 | 97 | if (UNLIKELY(config::enable_debug_points)) { \Line | Count | Source | 36 | 97 | #define UNLIKELY(expr) __builtin_expect(!!(expr), 0) Branch (36:24): [True: 50, False: 47]
|
| 38 | 50 | auto dp = DebugPoints::instance()->get_debug_point(debug_point_name); \ | 39 | 50 | if (dp) { \ Branch (39:13): [True: 0, False: 50]
| 40 | 0 | [[maybe_unused]] auto DP_NAME = debug_point_name; \ | 41 | 0 | { code; } \ | 42 | 0 | } \ | 43 | 50 | } |
|
91 | 97 | return Status::Error<false>(dp->param<int>("error_code"), |
92 | 97 | dp->param<std::string>("error_msg")); |
93 | 97 | }) |
94 | 97 | if (conditions.empty()) { Branch (94:9): [True: 1, False: 96]
|
95 | 1 | return Status::Error<ErrorCode::INVALID_ARGUMENT>( |
96 | 1 | "invalid parameters for store_cond. condition_size={}", conditions.size()); |
97 | 1 | } |
98 | | |
99 | | // Check whether the delete condition meets the requirements |
100 | 113 | for (const TCondition& condition : conditions) { Branch (100:38): [True: 113, False: 71]
|
101 | 113 | RETURN_IF_ERROR(check_condition_valid(schema, condition)); Line | Count | Source | 637 | 113 | do { \ | 638 | 113 | Status _status_ = (stmt); \ | 639 | 113 | if (UNLIKELY(!_status_.ok())) { \Line | Count | Source | 36 | 113 | #define UNLIKELY(expr) __builtin_expect(!!(expr), 0) Branch (36:24): [True: 25, False: 88]
|
| 640 | 25 | return _status_; \ | 641 | 25 | } \ | 642 | 113 | } while (false) Branch (642:14): [Folded - Ignored]
|
|
102 | 113 | } |
103 | | |
104 | | // Store delete condition |
105 | 88 | for (const TCondition& condition : conditions) { Branch (105:38): [True: 88, False: 71]
|
106 | 88 | if (condition.condition_values.size() > 1) { Branch (106:13): [True: 1, False: 87]
|
107 | 1 | InPredicatePB* in_pred = del_pred->add_in_predicates(); |
108 | 1 | if (condition.__isset.column_unique_id) { Branch (108:17): [True: 0, False: 1]
|
109 | 0 | in_pred->set_column_unique_id(condition.column_unique_id); |
110 | 0 | } |
111 | 1 | in_pred->set_column_name(condition.column_name); |
112 | 1 | bool is_not_in = condition.condition_op == "!*="; |
113 | 1 | in_pred->set_is_not_in(is_not_in); |
114 | 2 | for (const auto& condition_value : condition.condition_values) { Branch (114:46): [True: 2, False: 1]
|
115 | 2 | in_pred->add_values(condition_value); |
116 | 2 | } |
117 | | |
118 | 1 | LOG(INFO) << "store one sub-delete condition. condition name=" << in_pred->column_name() |
119 | 1 | << "condition size=" << in_pred->values().size(); |
120 | 87 | } else { |
121 | | // write sub predicate v1 for compactbility |
122 | 87 | std::string condition_str = construct_sub_predicate(condition); |
123 | 87 | VLOG_NOTICE << __PRETTY_FUNCTION__ << " condition_str: " << condition_str; Line | Count | Source | 42 | 0 | #define VLOG_NOTICE VLOG(3) |
|
124 | 87 | del_pred->add_sub_predicates(condition_str); |
125 | 87 | DeleteSubPredicatePB* sub_predicate = del_pred->add_sub_predicates_v2(); |
126 | 87 | if (condition.__isset.column_unique_id) { Branch (126:17): [True: 0, False: 87]
|
127 | | // only light schema change capable table set this field |
128 | 0 | sub_predicate->set_column_unique_id(condition.column_unique_id); |
129 | 87 | } else if (TCondition tmp; !DeleteHandler::parse_condition(condition_str, &tmp)) { Branch (129:40): [True: 0, False: 87]
|
130 | | // for non light shema change tables, check regex match for condition str |
131 | 0 | LOG(WARNING) << "failed to parse condition_str, condtion=" |
132 | 0 | << ThriftDebugString(condition); |
133 | 0 | return Status::Error<ErrorCode::INVALID_ARGUMENT>( |
134 | 0 | "failed to parse condition_str, condtion={}", ThriftDebugString(condition)); |
135 | 0 | } |
136 | | |
137 | 87 | sub_predicate->set_column_name(condition.column_name); |
138 | 87 | sub_predicate->set_op(trans_op(condition.condition_op)); |
139 | 87 | sub_predicate->set_cond_value(condition.condition_values[0]); |
140 | 87 | LOG(INFO) << "store one sub-delete condition. condition=" |
141 | 87 | << fmt::format(" {} {} {}", condition.column_name, condition.condition_op, |
142 | 87 | condition.condition_values[0]); |
143 | 87 | } |
144 | 88 | } |
145 | 71 | del_pred->set_version(-1); |
146 | | |
147 | 71 | return Status::OK(); |
148 | 71 | } |
149 | | |
150 | | Status DeleteHandler::convert_to_sub_pred_v2(DeletePredicatePB* delete_pred, |
151 | 0 | TabletSchemaSPtr schema) { |
152 | 0 | if (!delete_pred->sub_predicates().empty() && delete_pred->sub_predicates_v2().empty()) { Branch (152:9): [True: 0, False: 0]
Branch (152:51): [True: 0, False: 0]
|
153 | 0 | for (const auto& condition_str : delete_pred->sub_predicates()) { Branch (153:40): [True: 0, False: 0]
|
154 | 0 | auto* sub_pred = delete_pred->add_sub_predicates_v2(); |
155 | 0 | TCondition condition; |
156 | 0 | static_cast<void>(parse_condition(condition_str, &condition)); |
157 | 0 | const auto& column = *DORIS_TRY(schema->column(condition.column_name)); Line | Count | Source | 716 | 0 | ({ \ | 717 | 0 | auto&& res = (stmt); \ | 718 | 0 | using T = std::decay_t<decltype(res)>; \ | 719 | 0 | if (!res.has_value()) [[unlikely]] { \ Branch (719:13): [True: 0, False: 0]
| 720 | 0 | return std::forward<T>(res).error(); \ | 721 | 0 | } \ | 722 | 0 | std::forward<T>(res).value(); \ | 723 | 0 | }); |
|
158 | 0 | sub_pred->set_column_unique_id(column.unique_id()); |
159 | 0 | sub_pred->set_column_name(condition.column_name); |
160 | 0 | sub_pred->set_op(condition.condition_op); |
161 | 0 | sub_pred->set_cond_value(condition.condition_values[0]); |
162 | 0 | } |
163 | 0 | } |
164 | | |
165 | 0 | auto* in_pred_list = delete_pred->mutable_in_predicates(); |
166 | 0 | for (auto& in_pred : *in_pred_list) { Branch (166:24): [True: 0, False: 0]
|
167 | 0 | const auto& column = *DORIS_TRY(schema->column(in_pred.column_name())); Line | Count | Source | 716 | 0 | ({ \ | 717 | 0 | auto&& res = (stmt); \ | 718 | 0 | using T = std::decay_t<decltype(res)>; \ | 719 | 0 | if (!res.has_value()) [[unlikely]] { \ Branch (719:13): [True: 0, False: 0]
| 720 | 0 | return std::forward<T>(res).error(); \ | 721 | 0 | } \ | 722 | 0 | std::forward<T>(res).value(); \ | 723 | 0 | }); |
|
168 | 0 | in_pred.set_column_unique_id(column.unique_id()); |
169 | 0 | } |
170 | 0 | return Status::OK(); |
171 | 0 | } |
172 | | |
173 | | bool DeleteHandler::is_condition_value_valid(const TabletColumn& column, |
174 | | const std::string& condition_op, |
175 | 113 | const string& value_str) { |
176 | 113 | if ("IS" == condition_op && ("NULL" == value_str || "NOT NULL" == value_str)) { Branch (176:9): [True: 1, False: 112]
Branch (176:34): [True: 1, False: 0]
Branch (176:57): [True: 0, False: 0]
|
177 | 1 | return true; |
178 | 1 | } |
179 | | |
180 | 112 | FieldType field_type = column.type(); |
181 | 112 | switch (field_type) { |
182 | 11 | case FieldType::OLAP_FIELD_TYPE_TINYINT: Branch (182:5): [True: 11, False: 101]
|
183 | 11 | return valid_signed_number<int8_t>(value_str); |
184 | 13 | case FieldType::OLAP_FIELD_TYPE_SMALLINT: Branch (184:5): [True: 13, False: 99]
|
185 | 13 | return valid_signed_number<int16_t>(value_str); |
186 | 54 | case FieldType::OLAP_FIELD_TYPE_INT: Branch (186:5): [True: 54, False: 58]
|
187 | 54 | return valid_signed_number<int32_t>(value_str); |
188 | 5 | case FieldType::OLAP_FIELD_TYPE_BIGINT: Branch (188:5): [True: 5, False: 107]
|
189 | 5 | return valid_signed_number<int64_t>(value_str); |
190 | 4 | case FieldType::OLAP_FIELD_TYPE_LARGEINT: Branch (190:5): [True: 4, False: 108]
|
191 | 4 | return valid_signed_number<int128_t>(value_str); |
192 | 0 | case FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT: Branch (192:5): [True: 0, False: 112]
|
193 | 0 | return valid_unsigned_number<uint8_t>(value_str); |
194 | 0 | case FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT: Branch (194:5): [True: 0, False: 112]
|
195 | 0 | return valid_unsigned_number<uint16_t>(value_str); |
196 | 0 | case FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT: Branch (196:5): [True: 0, False: 112]
|
197 | 0 | return valid_unsigned_number<uint32_t>(value_str); |
198 | 0 | case FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT: Branch (198:5): [True: 0, False: 112]
|
199 | 0 | return valid_unsigned_number<uint64_t>(value_str); |
200 | 7 | case FieldType::OLAP_FIELD_TYPE_DECIMAL: Branch (200:5): [True: 7, False: 105]
|
201 | 7 | return valid_decimal(value_str, column.precision(), column.frac()); |
202 | 0 | case FieldType::OLAP_FIELD_TYPE_DECIMAL32: Branch (202:5): [True: 0, False: 112]
|
203 | 0 | return valid_decimal(value_str, column.precision(), column.frac()); |
204 | 0 | case FieldType::OLAP_FIELD_TYPE_DECIMAL64: Branch (204:5): [True: 0, False: 112]
|
205 | 0 | return valid_decimal(value_str, column.precision(), column.frac()); |
206 | 0 | case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: Branch (206:5): [True: 0, False: 112]
|
207 | 0 | return valid_decimal(value_str, column.precision(), column.frac()); |
208 | 0 | case FieldType::OLAP_FIELD_TYPE_DECIMAL256: Branch (208:5): [True: 0, False: 112]
|
209 | 0 | return valid_decimal(value_str, column.precision(), column.frac()); |
210 | 3 | case FieldType::OLAP_FIELD_TYPE_CHAR: Branch (210:5): [True: 3, False: 109]
|
211 | 7 | case FieldType::OLAP_FIELD_TYPE_VARCHAR: Branch (211:5): [True: 4, False: 108]
|
212 | 7 | return value_str.size() <= column.length(); |
213 | 0 | case FieldType::OLAP_FIELD_TYPE_STRING: Branch (213:5): [True: 0, False: 112]
|
214 | 0 | return value_str.size() <= config::string_type_length_soft_limit_bytes; |
215 | 5 | case FieldType::OLAP_FIELD_TYPE_DATE: Branch (215:5): [True: 5, False: 107]
|
216 | 11 | case FieldType::OLAP_FIELD_TYPE_DATETIME: Branch (216:5): [True: 6, False: 106]
|
217 | 11 | case FieldType::OLAP_FIELD_TYPE_DATEV2: Branch (217:5): [True: 0, False: 112]
|
218 | 11 | case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: Branch (218:5): [True: 0, False: 112]
|
219 | 11 | return valid_datetime(value_str, column.frac()); |
220 | 0 | case FieldType::OLAP_FIELD_TYPE_BOOL: Branch (220:5): [True: 0, False: 112]
|
221 | 0 | return valid_bool(value_str); |
222 | 0 | case FieldType::OLAP_FIELD_TYPE_IPV4: Branch (222:5): [True: 0, False: 112]
|
223 | 0 | return valid_ipv4(value_str); |
224 | 0 | case FieldType::OLAP_FIELD_TYPE_IPV6: Branch (224:5): [True: 0, False: 112]
|
225 | 0 | return valid_ipv6(value_str); |
226 | 0 | default: Branch (226:5): [True: 0, False: 112]
|
227 | 0 | LOG(WARNING) << "unknown field type. [type=" << int(field_type) << "]"; |
228 | 112 | } |
229 | 0 | return false; |
230 | 112 | } |
231 | | |
232 | 113 | Status DeleteHandler::check_condition_valid(const TabletSchema& schema, const TCondition& cond) { |
233 | | // Check whether the column exists |
234 | 113 | int32_t field_index = schema.field_index(cond.column_name); |
235 | 113 | if (field_index < 0) { Branch (235:9): [True: 1, False: 112]
|
236 | 1 | return Status::Error<ErrorCode::INVALID_ARGUMENT>("field is not existent. [field_index={}]", |
237 | 1 | field_index); |
238 | 1 | } |
239 | | |
240 | | // Delete condition should only applied on key columns or duplicate key table, and |
241 | | // the condition column type should not be float or double. |
242 | 112 | const TabletColumn& column = schema.column(field_index); |
243 | | |
244 | 112 | if (column.type() == FieldType::OLAP_FIELD_TYPE_DOUBLE || Branch (244:9): [True: 0, False: 112]
|
245 | 112 | column.type() == FieldType::OLAP_FIELD_TYPE_FLOAT) { Branch (245:9): [True: 0, False: 112]
|
246 | 0 | return Status::Error<ErrorCode::INVALID_ARGUMENT>("data type is float or double."); |
247 | 0 | } |
248 | | |
249 | | // Check operator and operands size are matched. |
250 | 112 | if ("*=" != cond.condition_op && "!*=" != cond.condition_op && Branch (250:9): [True: 110, False: 2]
Branch (250:38): [True: 109, False: 1]
|
251 | 112 | cond.condition_values.size() != 1) { Branch (251:9): [True: 0, False: 109]
|
252 | 0 | return Status::Error<ErrorCode::INVALID_ARGUMENT>("invalid condition value size. [size={}]", |
253 | 0 | cond.condition_values.size()); |
254 | 0 | } |
255 | | |
256 | | // Check each operand is valid |
257 | 113 | for (const auto& condition_value : cond.condition_values) { Branch (257:38): [True: 113, False: 88]
|
258 | 113 | if (!is_condition_value_valid(column, cond.condition_op, condition_value)) { Branch (258:13): [True: 24, False: 89]
|
259 | 24 | return Status::Error<ErrorCode::INVALID_ARGUMENT>("invalid condition value. [value={}]", |
260 | 24 | condition_value); |
261 | 24 | } |
262 | 113 | } |
263 | | |
264 | 88 | if (!cond.__isset.column_unique_id) { Branch (264:9): [True: 88, False: 0]
|
265 | 88 | LOG(WARNING) << "column=" << cond.column_name |
266 | 88 | << " in predicate does not have uid, table id=" << schema.table_id(); |
267 | | // TODO(tsy): make it fail here after FE forbidding hard-link-schema-change |
268 | 88 | return Status::OK(); |
269 | 88 | } |
270 | 0 | if (schema.field_index(cond.column_unique_id) == -1) { Branch (270:9): [True: 0, False: 0]
|
271 | 0 | const auto& err_msg = |
272 | 0 | fmt::format("column id does not exists in table={}, schema version={},", |
273 | 0 | schema.table_id(), schema.schema_version()); |
274 | 0 | return Status::Error<ErrorCode::INVALID_ARGUMENT>(err_msg); |
275 | 0 | } |
276 | 0 | if (!iequal(schema.column_by_uid(cond.column_unique_id).name(), cond.column_name)) { Branch (276:9): [True: 0, False: 0]
|
277 | 0 | const auto& err_msg = fmt::format( |
278 | 0 | "colum name={} does not belongs to column uid={}, which " |
279 | 0 | "column name={}, " |
280 | 0 | "delete_cond.column_name ={}", |
281 | 0 | cond.column_name, cond.column_unique_id, |
282 | 0 | schema.column_by_uid(cond.column_unique_id).name(), cond.column_name); |
283 | 0 | return Status::Error<ErrorCode::INVALID_ARGUMENT>(err_msg); |
284 | 0 | } |
285 | | |
286 | 0 | return Status::OK(); |
287 | 0 | } |
288 | | |
289 | 90 | Status DeleteHandler::parse_condition(const DeleteSubPredicatePB& sub_cond, TCondition* condition) { |
290 | 90 | if (!sub_cond.has_column_name() || !sub_cond.has_op() || !sub_cond.has_cond_value()) { Branch (290:9): [True: 0, False: 90]
Branch (290:40): [True: 0, False: 90]
Branch (290:62): [True: 0, False: 90]
|
291 | 0 | return Status::Error<ErrorCode::INVALID_ARGUMENT>( |
292 | 0 | "fail to parse condition. condition={} {} {}", sub_cond.column_name(), |
293 | 0 | sub_cond.op(), sub_cond.cond_value()); |
294 | 0 | } |
295 | 90 | if (sub_cond.has_column_unique_id()) { Branch (295:9): [True: 0, False: 90]
|
296 | 0 | condition->column_unique_id = sub_cond.column_unique_id(); |
297 | 0 | } |
298 | 90 | condition->column_name = sub_cond.column_name(); |
299 | 90 | condition->condition_op = sub_cond.op(); |
300 | 90 | condition->condition_values.push_back(sub_cond.cond_value()); |
301 | 90 | return Status::OK(); |
302 | 90 | } |
303 | | |
304 | | // clang-format off |
305 | | // Condition string format, the format is (column_name)(op)(value) |
306 | | // eg: condition_str="c1 = 1597751948193618247 and length(source)<1;\n;\n" |
307 | | // column_name: matches "c1", must include FeNameFormat.java COLUMN_NAME_REGEX |
308 | | // and compactible with any the lagacy |
309 | | // operator: matches "=" |
310 | | // value: matches "1597751948193618247 and length(source)<1;\n;\n" |
311 | | // |
312 | | // For more info, see DeleteHandler::construct_sub_predicates |
313 | | // FIXME(gavin): This is a tricky implementation, it should not be the final resolution, refactor it. |
314 | | const char* const CONDITION_STR_PATTERN = |
315 | | // .----------------- column-name --------------------------. .----------------------- operator ------------------------. .------------ value ----------. |
316 | | R"(([_a-zA-Z@0-9\s/\p{L}][.a-zA-Z0-9_+-/?@#$%^&*"\s,:\p{L}]*)\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?: IS ))\s*('((?:[\s\S]+)?)'|(?:[\s\S]+)?))"; |
317 | | // '----------------- group 1 ------------------------------' '--------------------- group 2 ---------------------------' | '-- group 4--' | |
318 | | // match any of: = != >> << >= <= *= " IS " '----------- group 3 ---------' |
319 | | // match **ANY THING** without(4) |
320 | | // or with(3) single quote |
321 | | // clang-format on |
322 | | RE2 DELETE_HANDLER_REGEX(CONDITION_STR_PATTERN); |
323 | | |
324 | 189 | Status DeleteHandler::parse_condition(const std::string& condition_str, TCondition* condition) { |
325 | 189 | std::string col_name, op, value, g4; |
326 | | |
327 | 189 | bool matched = RE2::FullMatch(condition_str, DELETE_HANDLER_REGEX, &col_name, &op, &value, |
328 | 189 | &g4); // exact match |
329 | | |
330 | 189 | if (!matched) { Branch (330:9): [True: 3, False: 186]
|
331 | 3 | return Status::InvalidArgument("fail to sub condition. condition={}", condition_str); |
332 | 3 | } |
333 | | |
334 | 186 | condition->column_name = col_name; |
335 | 186 | condition->condition_op = op == " IS " ? "IS" : op; Branch (335:31): [True: 40, False: 146]
|
336 | | // match string with single quotes, a = b or a = 'b' |
337 | 186 | if (!g4.empty()) { Branch (337:9): [True: 129, False: 57]
|
338 | 129 | condition->condition_values.push_back(g4); |
339 | 129 | } else { |
340 | 57 | condition->condition_values.push_back(value); |
341 | 57 | } |
342 | 186 | VLOG_NOTICE << "parsed condition_str: col_name={" << condition->column_name << "} op={"Line | Count | Source | 42 | 0 | #define VLOG_NOTICE VLOG(3) |
|
343 | 0 | << condition->condition_op << "} val={" << condition->condition_values.back() |
344 | 0 | << "}"; |
345 | 186 | return Status::OK(); |
346 | 189 | } |
347 | | |
348 | | template <typename SubPredType> |
349 | | requires(std::is_same_v<SubPredType, DeleteSubPredicatePB> or |
350 | | std::is_same_v<SubPredType, std::string>) |
351 | | Status DeleteHandler::_parse_column_pred(TabletSchemaSPtr complete_schema, |
352 | | TabletSchemaSPtr delete_pred_related_schema, |
353 | | const RepeatedPtrField<SubPredType>& sub_pred_list, |
354 | 161 | DeleteConditions* delete_conditions) { |
355 | 172 | for (const auto& sub_predicate : sub_pred_list) { Branch (355:36): [True: 90, False: 85]
Branch (355:36): [True: 82, False: 76]
|
356 | 172 | TCondition condition; |
357 | 172 | RETURN_IF_ERROR(parse_condition(sub_predicate, &condition)); Line | Count | Source | 637 | 90 | do { \ | 638 | 90 | Status _status_ = (stmt); \ | 639 | 90 | if (UNLIKELY(!_status_.ok())) { \Line | Count | Source | 36 | 90 | #define UNLIKELY(expr) __builtin_expect(!!(expr), 0) Branch (36:24): [True: 0, False: 90]
|
| 640 | 0 | return _status_; \ | 641 | 0 | } \ | 642 | 90 | } while (false) Branch (642:14): [Folded - Ignored]
|
| RETURN_IF_ERROR(parse_condition(sub_predicate, &condition)); Line | Count | Source | 637 | 82 | do { \ | 638 | 82 | Status _status_ = (stmt); \ | 639 | 82 | if (UNLIKELY(!_status_.ok())) { \Line | Count | Source | 36 | 82 | #define UNLIKELY(expr) __builtin_expect(!!(expr), 0) Branch (36:24): [True: 0, False: 82]
|
| 640 | 0 | return _status_; \ | 641 | 0 | } \ | 642 | 82 | } while (false) Branch (642:14): [Folded - Ignored]
|
|
358 | 172 | int32_t col_unique_id = -1; |
359 | 172 | if constexpr (std::is_same_v<SubPredType, DeleteSubPredicatePB>) { Branch (359:23): [Folded - Ignored]
Branch (359:23): [Folded - Ignored]
|
360 | 90 | if (sub_predicate.has_column_unique_id()) [[likely]] { Branch (360:17): [True: 0, False: 90]
|
361 | 0 | col_unique_id = sub_predicate.column_unique_id(); |
362 | 0 | } |
363 | 90 | } |
364 | 172 | if (col_unique_id < 0) { Branch (364:13): [True: 90, False: 0]
Branch (364:13): [True: 82, False: 0]
|
365 | 172 | const auto& column = |
366 | 172 | *DORIS_TRY(delete_pred_related_schema->column(condition.column_name)); Line | Count | Source | 716 | 90 | ({ \ | 717 | 90 | auto&& res = (stmt); \ | 718 | 90 | using T = std::decay_t<decltype(res)>; \ | 719 | 90 | if (!res.has_value()) [[unlikely]] { \ Branch (719:13): [True: 0, False: 90]
| 720 | 0 | return std::forward<T>(res).error(); \ | 721 | 0 | } \ | 722 | 90 | std::forward<T>(res).value(); \ | 723 | 90 | }); |
| *DORIS_TRY(delete_pred_related_schema->column(condition.column_name)); Line | Count | Source | 716 | 82 | ({ \ | 717 | 82 | auto&& res = (stmt); \ | 718 | 82 | using T = std::decay_t<decltype(res)>; \ | 719 | 82 | if (!res.has_value()) [[unlikely]] { \ Branch (719:13): [True: 0, False: 82]
| 720 | 0 | return std::forward<T>(res).error(); \ | 721 | 0 | } \ | 722 | 82 | std::forward<T>(res).value(); \ | 723 | 82 | }); |
|
367 | 172 | col_unique_id = column.unique_id(); |
368 | 172 | } |
369 | 172 | condition.__set_column_unique_id(col_unique_id); |
370 | 172 | const auto& column = complete_schema->column_by_uid(col_unique_id); |
371 | 172 | uint32_t index = complete_schema->field_index(col_unique_id); |
372 | 172 | auto* predicate = |
373 | 172 | parse_to_predicate(column, index, condition, _predicate_arena.get(), true); |
374 | 172 | if (predicate != nullptr) { Branch (374:13): [True: 90, False: 0]
Branch (374:13): [True: 82, False: 0]
|
375 | 172 | delete_conditions->column_predicate_vec.push_back(predicate); |
376 | 172 | } |
377 | 172 | } |
378 | 161 | return Status::OK(); |
379 | 161 | } _ZN5doris13DeleteHandler18_parse_column_predINS_20DeleteSubPredicatePBEEENS_6StatusESt10shared_ptrINS_12TabletSchemaEES6_RKN6google8protobuf16RepeatedPtrFieldIT_EEPNS_16DeleteConditionsE Line | Count | Source | 354 | 85 | DeleteConditions* delete_conditions) { | 355 | 90 | for (const auto& sub_predicate : sub_pred_list) { Branch (355:36): [True: 90, False: 85]
| 356 | 90 | TCondition condition; | 357 | 90 | RETURN_IF_ERROR(parse_condition(sub_predicate, &condition)); Line | Count | Source | 637 | 90 | do { \ | 638 | 90 | Status _status_ = (stmt); \ | 639 | 90 | if (UNLIKELY(!_status_.ok())) { \Line | Count | Source | 36 | 90 | #define UNLIKELY(expr) __builtin_expect(!!(expr), 0) Branch (36:24): [True: 0, False: 90]
|
| 640 | 0 | return _status_; \ | 641 | 0 | } \ | 642 | 90 | } while (false) Branch (642:14): [Folded - Ignored]
|
| 358 | 90 | int32_t col_unique_id = -1; | 359 | 90 | if constexpr (std::is_same_v<SubPredType, DeleteSubPredicatePB>) { Branch (359:23): [Folded - Ignored]
| 360 | 90 | if (sub_predicate.has_column_unique_id()) [[likely]] { Branch (360:17): [True: 0, False: 90]
| 361 | 0 | col_unique_id = sub_predicate.column_unique_id(); | 362 | 0 | } | 363 | 90 | } | 364 | 90 | if (col_unique_id < 0) { Branch (364:13): [True: 90, False: 0]
| 365 | 90 | const auto& column = | 366 | 90 | *DORIS_TRY(delete_pred_related_schema->column(condition.column_name)); Line | Count | Source | 716 | 90 | ({ \ | 717 | 90 | auto&& res = (stmt); \ | 718 | 90 | using T = std::decay_t<decltype(res)>; \ | 719 | 90 | if (!res.has_value()) [[unlikely]] { \ Branch (719:13): [True: 0, False: 90]
| 720 | 0 | return std::forward<T>(res).error(); \ | 721 | 0 | } \ | 722 | 90 | std::forward<T>(res).value(); \ | 723 | 90 | }); |
| 367 | 90 | col_unique_id = column.unique_id(); | 368 | 90 | } | 369 | 90 | condition.__set_column_unique_id(col_unique_id); | 370 | 90 | const auto& column = complete_schema->column_by_uid(col_unique_id); | 371 | 90 | uint32_t index = complete_schema->field_index(col_unique_id); | 372 | 90 | auto* predicate = | 373 | 90 | parse_to_predicate(column, index, condition, _predicate_arena.get(), true); | 374 | 90 | if (predicate != nullptr) { Branch (374:13): [True: 90, False: 0]
| 375 | 90 | delete_conditions->column_predicate_vec.push_back(predicate); | 376 | 90 | } | 377 | 90 | } | 378 | 85 | return Status::OK(); | 379 | 85 | } |
_ZN5doris13DeleteHandler18_parse_column_predINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEENS_6StatusESt10shared_ptrINS_12TabletSchemaEESB_RKN6google8protobuf16RepeatedPtrFieldIT_EEPNS_16DeleteConditionsE Line | Count | Source | 354 | 76 | DeleteConditions* delete_conditions) { | 355 | 82 | for (const auto& sub_predicate : sub_pred_list) { Branch (355:36): [True: 82, False: 76]
| 356 | 82 | TCondition condition; | 357 | 82 | RETURN_IF_ERROR(parse_condition(sub_predicate, &condition)); Line | Count | Source | 637 | 82 | do { \ | 638 | 82 | Status _status_ = (stmt); \ | 639 | 82 | if (UNLIKELY(!_status_.ok())) { \Line | Count | Source | 36 | 82 | #define UNLIKELY(expr) __builtin_expect(!!(expr), 0) Branch (36:24): [True: 0, False: 82]
|
| 640 | 0 | return _status_; \ | 641 | 0 | } \ | 642 | 82 | } while (false) Branch (642:14): [Folded - Ignored]
|
| 358 | 82 | int32_t col_unique_id = -1; | 359 | 82 | if constexpr (std::is_same_v<SubPredType, DeleteSubPredicatePB>) { Branch (359:23): [Folded - Ignored]
| 360 | 82 | if (sub_predicate.has_column_unique_id()) [[likely]] { | 361 | 82 | col_unique_id = sub_predicate.column_unique_id(); | 362 | 82 | } | 363 | 82 | } | 364 | 82 | if (col_unique_id < 0) { Branch (364:13): [True: 82, False: 0]
| 365 | 82 | const auto& column = | 366 | 82 | *DORIS_TRY(delete_pred_related_schema->column(condition.column_name)); Line | Count | Source | 716 | 82 | ({ \ | 717 | 82 | auto&& res = (stmt); \ | 718 | 82 | using T = std::decay_t<decltype(res)>; \ | 719 | 82 | if (!res.has_value()) [[unlikely]] { \ Branch (719:13): [True: 0, False: 82]
| 720 | 0 | return std::forward<T>(res).error(); \ | 721 | 0 | } \ | 722 | 82 | std::forward<T>(res).value(); \ | 723 | 82 | }); |
| 367 | 82 | col_unique_id = column.unique_id(); | 368 | 82 | } | 369 | 82 | condition.__set_column_unique_id(col_unique_id); | 370 | 82 | const auto& column = complete_schema->column_by_uid(col_unique_id); | 371 | 82 | uint32_t index = complete_schema->field_index(col_unique_id); | 372 | 82 | auto* predicate = | 373 | 82 | parse_to_predicate(column, index, condition, _predicate_arena.get(), true); | 374 | 82 | if (predicate != nullptr) { Branch (374:13): [True: 82, False: 0]
| 375 | 82 | delete_conditions->column_predicate_vec.push_back(predicate); | 376 | 82 | } | 377 | 82 | } | 378 | 76 | return Status::OK(); | 379 | 76 | } |
|
380 | | |
381 | | template Status DeleteHandler::_parse_column_pred<DeleteSubPredicatePB>( |
382 | | TabletSchemaSPtr complete_schema, TabletSchemaSPtr delete_pred_related_schema, |
383 | | const ::google::protobuf::RepeatedPtrField<DeleteSubPredicatePB>& sub_pred_list, |
384 | | DeleteConditions* delete_conditions); |
385 | | |
386 | | template Status DeleteHandler::_parse_column_pred<std::string>( |
387 | | TabletSchemaSPtr complete_schema, TabletSchemaSPtr delete_pred_related_schema, |
388 | | const ::google::protobuf::RepeatedPtrField<std::string>& sub_pred_list, |
389 | | DeleteConditions* delete_conditions); |
390 | | |
391 | | Status DeleteHandler::init(TabletSchemaSPtr tablet_schema, |
392 | 364 | const std::vector<RowsetMetaSharedPtr>& delete_preds, int64_t version) { |
393 | 364 | DCHECK(!_is_inited) << "reinitialize delete handler."; |
394 | 364 | DCHECK(version >= 0) << "invalid parameters. version=" << version; |
395 | 364 | _predicate_arena = std::make_unique<vectorized::Arena>(); |
396 | | |
397 | 364 | for (const auto& delete_pred : delete_preds) { Branch (397:34): [True: 161, False: 364]
|
398 | | // Skip the delete condition with large version |
399 | 161 | if (delete_pred->version().first > version) { Branch (399:13): [True: 0, False: 161]
|
400 | 0 | continue; |
401 | 0 | } |
402 | | // Need the tablet schema at the delete condition to parse the accurate column |
403 | 161 | const auto& delete_pred_related_schema = delete_pred->tablet_schema(); |
404 | 161 | const auto& delete_condition = delete_pred->delete_predicate(); |
405 | 161 | DeleteConditions temp; |
406 | 161 | temp.filter_version = delete_pred->version().first; |
407 | 161 | if (!delete_condition.sub_predicates_v2().empty()) { Branch (407:13): [True: 85, False: 76]
|
408 | 85 | RETURN_IF_ERROR(_parse_column_pred(tablet_schema, delete_pred_related_schema, Line | Count | Source | 637 | 85 | do { \ | 638 | 85 | Status _status_ = (stmt); \ | 639 | 85 | if (UNLIKELY(!_status_.ok())) { \Line | Count | Source | 36 | 85 | #define UNLIKELY(expr) __builtin_expect(!!(expr), 0) Branch (36:24): [True: 0, False: 85]
|
| 640 | 0 | return _status_; \ | 641 | 0 | } \ | 642 | 85 | } while (false) Branch (642:14): [Folded - Ignored]
|
|
409 | 85 | delete_condition.sub_predicates_v2(), &temp)); |
410 | 85 | } else { |
411 | | // make it compatible with the former versions |
412 | 76 | RETURN_IF_ERROR(_parse_column_pred(tablet_schema, delete_pred_related_schema, Line | Count | Source | 637 | 76 | do { \ | 638 | 76 | Status _status_ = (stmt); \ | 639 | 76 | if (UNLIKELY(!_status_.ok())) { \Line | Count | Source | 36 | 76 | #define UNLIKELY(expr) __builtin_expect(!!(expr), 0) Branch (36:24): [True: 0, False: 76]
|
| 640 | 0 | return _status_; \ | 641 | 0 | } \ | 642 | 76 | } while (false) Branch (642:14): [Folded - Ignored]
|
|
413 | 76 | delete_condition.sub_predicates(), &temp)); |
414 | 76 | } |
415 | 161 | for (const auto& in_predicate : delete_condition.in_predicates()) { Branch (415:39): [True: 0, False: 161]
|
416 | 0 | TCondition condition; |
417 | 0 | condition.__set_column_name(in_predicate.column_name()); |
418 | |
|
419 | 0 | int32_t col_unique_id = -1; |
420 | 0 | if (in_predicate.has_column_unique_id()) { Branch (420:17): [True: 0, False: 0]
|
421 | 0 | col_unique_id = in_predicate.column_unique_id(); |
422 | 0 | } else { |
423 | | // if upgrade from version 2.0.x, column_unique_id maybe not set |
424 | 0 | const auto& pre_column = |
425 | 0 | *DORIS_TRY(delete_pred_related_schema->column(condition.column_name)); Line | Count | Source | 716 | 0 | ({ \ | 717 | 0 | auto&& res = (stmt); \ | 718 | 0 | using T = std::decay_t<decltype(res)>; \ | 719 | 0 | if (!res.has_value()) [[unlikely]] { \ Branch (719:13): [True: 0, False: 0]
| 720 | 0 | return std::forward<T>(res).error(); \ | 721 | 0 | } \ | 722 | 0 | std::forward<T>(res).value(); \ | 723 | 0 | }); |
|
426 | 0 | col_unique_id = pre_column.unique_id(); |
427 | 0 | } |
428 | 0 | if (col_unique_id == -1) { Branch (428:17): [True: 0, False: 0]
|
429 | 0 | return Status::Error<ErrorCode::DELETE_INVALID_CONDITION>( |
430 | 0 | "cannot get column_unique_id for column {}", condition.column_name); |
431 | 0 | } |
432 | 0 | condition.__set_column_unique_id(col_unique_id); |
433 | |
|
434 | 0 | if (in_predicate.is_not_in()) { Branch (434:17): [True: 0, False: 0]
|
435 | 0 | condition.__set_condition_op("!*="); |
436 | 0 | } else { |
437 | 0 | condition.__set_condition_op("*="); |
438 | 0 | } |
439 | 0 | for (const auto& value : in_predicate.values()) { Branch (439:36): [True: 0, False: 0]
|
440 | 0 | condition.condition_values.push_back(value); |
441 | 0 | } |
442 | 0 | const auto& column = tablet_schema->column_by_uid(col_unique_id); |
443 | 0 | uint32_t index = tablet_schema->field_index(col_unique_id); |
444 | 0 | temp.column_predicate_vec.push_back( |
445 | 0 | parse_to_predicate(column, index, condition, _predicate_arena.get(), true)); |
446 | 0 | } |
447 | | |
448 | 161 | _del_conds.emplace_back(std::move(temp)); |
449 | 161 | } |
450 | | |
451 | 364 | _is_inited = true; |
452 | | |
453 | 364 | return Status::OK(); |
454 | 364 | } |
455 | | |
456 | 389 | DeleteHandler::~DeleteHandler() { |
457 | 389 | if (!_is_inited) { Branch (457:9): [True: 25, False: 364]
|
458 | 25 | return; |
459 | 25 | } |
460 | | |
461 | 364 | for (auto& cond : _del_conds) { Branch (461:21): [True: 161, False: 364]
|
462 | 172 | for (const auto* pred : cond.column_predicate_vec) { Branch (462:31): [True: 172, False: 161]
|
463 | 172 | delete pred; |
464 | 172 | } |
465 | 161 | } |
466 | | |
467 | 364 | _del_conds.clear(); |
468 | 364 | _is_inited = false; |
469 | 364 | } |
470 | | |
471 | | void DeleteHandler::get_delete_conditions_after_version( |
472 | | int64_t version, AndBlockColumnPredicate* and_block_column_predicate_ptr, |
473 | | std::unordered_map<int32_t, std::vector<const ColumnPredicate*>>* |
474 | 1.10k | del_predicates_for_zone_map) const { |
475 | 1.10k | for (const auto& del_cond : _del_conds) { Branch (475:31): [True: 555, False: 1.10k]
|
476 | 555 | if (del_cond.filter_version > version) { Branch (476:13): [True: 406, False: 149]
|
477 | | // now, only query support delete column predicate operator |
478 | 406 | if (!del_cond.column_predicate_vec.empty()) { Branch (478:17): [True: 406, False: 0]
|
479 | 406 | if (del_cond.column_predicate_vec.size() == 1) { Branch (479:21): [True: 406, False: 0]
|
480 | 406 | auto single_column_block_predicate = SingleColumnBlockPredicate::create_unique( |
481 | 406 | del_cond.column_predicate_vec[0]); |
482 | 406 | and_block_column_predicate_ptr->add_column_predicate( |
483 | 406 | std::move(single_column_block_predicate)); |
484 | 406 | if (del_predicates_for_zone_map->count( Branch (484:25): [True: 406, False: 0]
|
485 | 406 | del_cond.column_predicate_vec[0]->column_id()) < 1) { |
486 | 406 | del_predicates_for_zone_map->insert( |
487 | 406 | {del_cond.column_predicate_vec[0]->column_id(), |
488 | 406 | std::vector<const ColumnPredicate*> {}}); |
489 | 406 | } |
490 | 406 | (*del_predicates_for_zone_map)[del_cond.column_predicate_vec[0]->column_id()] |
491 | 406 | .push_back(del_cond.column_predicate_vec[0]); |
492 | 406 | } else { |
493 | 0 | auto or_column_predicate = OrBlockColumnPredicate::create_unique(); |
494 | | |
495 | | // build or_column_predicate |
496 | | // when delete from where a = 1 and b = 2, we can not use del_predicates_for_zone_map to filter zone page, |
497 | | // so here do not put predicate to del_predicates_for_zone_map, |
498 | | // refer #17145 for more details. |
499 | | // // TODO: need refactor design and code to use more version delete and more column delete to filter zone page. |
500 | 0 | std::for_each(del_cond.column_predicate_vec.cbegin(), |
501 | 0 | del_cond.column_predicate_vec.cend(), |
502 | 0 | [&or_column_predicate](const ColumnPredicate* predicate) { |
503 | 0 | or_column_predicate->add_column_predicate( |
504 | 0 | SingleColumnBlockPredicate::create_unique(predicate)); |
505 | 0 | }); |
506 | 0 | and_block_column_predicate_ptr->add_column_predicate( |
507 | 0 | std::move(or_column_predicate)); |
508 | 0 | } |
509 | 406 | } |
510 | 406 | } |
511 | 555 | } |
512 | 1.10k | } |
513 | | |
514 | | } // namespace doris |