Coverage Report

Created: 2026-04-01 19:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/delete/delete_handler.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "storage/delete/delete_handler.h"
19
20
#include <gen_cpp/PaloInternalService_types.h>
21
#include <gen_cpp/olap_file.pb.h>
22
#include <thrift/protocol/TDebugProtocol.h>
23
24
#include <string>
25
#include <vector>
26
27
#include "common/config.h"
28
#include "common/logging.h"
29
#include "common/status.h"
30
#include "core/data_type_serde/data_type_serde.h"
31
#include "storage/olap_common.h"
32
#include "storage/predicate/block_column_predicate.h"
33
#include "storage/predicate/null_predicate.h"
34
#include "storage/predicate/predicate_creator.h"
35
#include "storage/tablet/tablet_schema.h"
36
#include "storage/utils.h"
37
#include "util/debug_points.h"
38
39
using apache::thrift::ThriftDebugString;
40
using std::vector;
41
using std::string;
42
43
using ::google::protobuf::RepeatedPtrField;
44
45
namespace doris {
46
47
// Parses a string value into a Field using the serde's from_fe_string, then builds
48
// a HybridSetBase for IN/NOT_IN predicates.
49
// The type-dispatch via switch/case is still needed because build_set<PType>() and
50
// HybridSet::insert(const void*) require compile-time PrimitiveType, and Field::get<PType>()
51
// must be invoked with the correct type to extract the underlying CppType value.
52
template <PrimitiveType PType>
53
5
void insert_field_to_set(const Field& field, HybridSetBase* set) {
54
5
    if constexpr (is_string_type(PType)) {
55
        // StringSet::insert expects const StringRef*, so we must construct a StringRef
56
        // from the std::string returned by Field::get<>.
57
0
        const auto& tmp = field.get<PType>();
58
0
        StringRef ref(tmp.data(), tmp.size());
59
0
        set->insert(reinterpret_cast<const void*>(&ref));
60
5
    } else {
61
5
        auto tmp = field.get<PType>();
62
5
        set->insert(reinterpret_cast<const void*>(&tmp));
63
5
    }
64
5
}
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE3EEEvRKNS_5FieldEPNS_13HybridSetBaseE
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE4EEEvRKNS_5FieldEPNS_13HybridSetBaseE
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE5EEEvRKNS_5FieldEPNS_13HybridSetBaseE
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE6EEEvRKNS_5FieldEPNS_13HybridSetBaseE
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE7EEEvRKNS_5FieldEPNS_13HybridSetBaseE
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE8EEEvRKNS_5FieldEPNS_13HybridSetBaseE
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE9EEEvRKNS_5FieldEPNS_13HybridSetBaseE
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE11EEEvRKNS_5FieldEPNS_13HybridSetBaseE
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE12EEEvRKNS_5FieldEPNS_13HybridSetBaseE
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE25EEEvRKNS_5FieldEPNS_13HybridSetBaseE
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE26EEEvRKNS_5FieldEPNS_13HybridSetBaseE
_ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE42EEEvRKNS_5FieldEPNS_13HybridSetBaseE
Line
Count
Source
53
5
void insert_field_to_set(const Field& field, HybridSetBase* set) {
54
    if constexpr (is_string_type(PType)) {
55
        // StringSet::insert expects const StringRef*, so we must construct a StringRef
56
        // from the std::string returned by Field::get<>.
57
        const auto& tmp = field.get<PType>();
58
        StringRef ref(tmp.data(), tmp.size());
59
        set->insert(reinterpret_cast<const void*>(&ref));
60
5
    } else {
61
5
        auto tmp = field.get<PType>();
62
5
        set->insert(reinterpret_cast<const void*>(&tmp));
63
5
    }
64
5
}
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE2EEEvRKNS_5FieldEPNS_13HybridSetBaseE
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE36EEEvRKNS_5FieldEPNS_13HybridSetBaseE
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE37EEEvRKNS_5FieldEPNS_13HybridSetBaseE
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE20EEEvRKNS_5FieldEPNS_13HybridSetBaseE
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE28EEEvRKNS_5FieldEPNS_13HybridSetBaseE
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE29EEEvRKNS_5FieldEPNS_13HybridSetBaseE
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE30EEEvRKNS_5FieldEPNS_13HybridSetBaseE
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE35EEEvRKNS_5FieldEPNS_13HybridSetBaseE
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE15EEEvRKNS_5FieldEPNS_13HybridSetBaseE
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE10EEEvRKNS_5FieldEPNS_13HybridSetBaseE
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE23EEEvRKNS_5FieldEPNS_13HybridSetBaseE
65
66
#define FROM_FE_STRING_CASE(PType)                            \
67
2
    case PType: {                                             \
68
2
        set = build_set<PType>();                             \
69
5
        for (const auto& s : str) {                           \
70
5
            Field field;                                      \
71
5
            RETURN_IF_ERROR(serde->from_fe_string(s, field)); \
72
5
            insert_field_to_set<PType>(field, set.get());     \
73
5
        }                                                     \
74
2
        return Status::OK();                                  \
75
2
    }
76
Status convert(const DataTypePtr& data_type, const std::list<std::string>& str,
77
2
               std::shared_ptr<HybridSetBase>& set) {
78
2
    auto serde = data_type->get_serde();
79
2
    switch (data_type->get_primitive_type()) {
80
0
        FROM_FE_STRING_CASE(TYPE_TINYINT);
81
0
        FROM_FE_STRING_CASE(TYPE_SMALLINT);
82
0
        FROM_FE_STRING_CASE(TYPE_INT);
83
0
        FROM_FE_STRING_CASE(TYPE_BIGINT);
84
0
        FROM_FE_STRING_CASE(TYPE_LARGEINT);
85
0
        FROM_FE_STRING_CASE(TYPE_FLOAT);
86
0
        FROM_FE_STRING_CASE(TYPE_DOUBLE);
87
0
        FROM_FE_STRING_CASE(TYPE_DATE);
88
0
        FROM_FE_STRING_CASE(TYPE_DATETIME);
89
0
        FROM_FE_STRING_CASE(TYPE_DATEV2);
90
0
        FROM_FE_STRING_CASE(TYPE_DATETIMEV2);
91
2
        FROM_FE_STRING_CASE(TYPE_TIMESTAMPTZ);
92
0
        FROM_FE_STRING_CASE(TYPE_BOOLEAN);
93
0
        FROM_FE_STRING_CASE(TYPE_IPV4);
94
0
        FROM_FE_STRING_CASE(TYPE_IPV6);
95
0
        FROM_FE_STRING_CASE(TYPE_DECIMALV2);
96
0
        FROM_FE_STRING_CASE(TYPE_DECIMAL32);
97
0
        FROM_FE_STRING_CASE(TYPE_DECIMAL64);
98
0
        FROM_FE_STRING_CASE(TYPE_DECIMAL128I);
99
0
        FROM_FE_STRING_CASE(TYPE_DECIMAL256);
100
0
        FROM_FE_STRING_CASE(TYPE_CHAR);
101
0
        FROM_FE_STRING_CASE(TYPE_VARCHAR);
102
0
        FROM_FE_STRING_CASE(TYPE_STRING);
103
0
    default:
104
0
        return Status::Error<ErrorCode::INVALID_ARGUMENT>(
105
0
                "unsupported data type in delete handler. type={}",
106
0
                type_to_string(data_type->get_primitive_type()));
107
2
    }
108
0
    return Status::OK();
109
2
}
110
#undef FROM_FE_STRING_CASE
111
112
// Parses a single condition value string into a Field and creates a comparison predicate.
113
// Uses serde->from_fe_string to do the parsing, which handles all type-specific
114
// conversions (including decimal scale, etc.).
115
// For CHAR type, the value is padded with '\0' to the declared column length, consistent
116
// with the IN list path in convert() above.
117
// For VARCHAR/STRING, the Field is created directly from the raw string.
118
Status parse_to_predicate(const uint32_t index, const std::string col_name, const DataTypePtr& type,
119
                          DeleteHandler::ConditionParseResult& res, Arena& arena,
120
181
                          std::shared_ptr<ColumnPredicate>& predicate) {
121
181
    DCHECK_EQ(res.value_str.size(), 1);
122
181
    if (res.condition_op == PredicateType::IS_NULL ||
123
181
        res.condition_op == PredicateType::IS_NOT_NULL) {
124
37
        predicate = NullPredicate::create_shared(index, col_name,
125
37
                                                 res.condition_op == PredicateType::IS_NOT_NULL,
126
37
                                                 type->get_primitive_type());
127
37
        return Status::OK();
128
37
    }
129
130
144
    Field v;
131
144
    if (type->get_primitive_type() == TYPE_CHAR) {
132
        // CHAR type: create Field and pad with '\0' to the declared column length,
133
        // consistent with IN list path (convert() above) and create_comparison_predicate.
134
0
        const auto& str = res.value_str.front();
135
0
        auto char_len = cast_set<size_t>(
136
0
                assert_cast<const DataTypeString*>(remove_nullable(type).get())->len());
137
0
        auto target = std::max(char_len, str.size());
138
0
        if (target > str.size()) {
139
0
            std::string padded(target, '\0');
140
0
            memcpy(padded.data(), str.data(), str.size());
141
0
            v = Field::create_field<TYPE_CHAR>(std::move(padded));
142
0
        } else {
143
0
            v = Field::create_field<TYPE_CHAR>(str);
144
0
        }
145
144
    } else if (is_string_type(type->get_primitive_type())) {
146
        // VARCHAR/STRING: create Field directly from the raw string, no padding needed.
147
38
        v = Field::create_field<TYPE_STRING>(res.value_str.front());
148
106
    } else {
149
106
        auto serde = type->get_serde();
150
106
        RETURN_IF_ERROR(serde->from_fe_string(res.value_str.front(), v));
151
106
    }
152
153
137
    switch (res.condition_op) {
154
48
    case PredicateType::EQ:
155
48
        predicate = create_comparison_predicate<PredicateType::EQ>(index, col_name, type, v, true);
156
48
        return Status::OK();
157
7
    case PredicateType::NE:
158
7
        predicate = create_comparison_predicate<PredicateType::NE>(index, col_name, type, v, true);
159
7
        return Status::OK();
160
2
    case PredicateType::GT:
161
2
        predicate = create_comparison_predicate<PredicateType::GT>(index, col_name, type, v, true);
162
2
        return Status::OK();
163
2
    case PredicateType::GE:
164
2
        predicate = create_comparison_predicate<PredicateType::GE>(index, col_name, type, v, true);
165
2
        return Status::OK();
166
76
    case PredicateType::LT:
167
76
        predicate = create_comparison_predicate<PredicateType::LT>(index, col_name, type, v, true);
168
76
        return Status::OK();
169
2
    case PredicateType::LE:
170
2
        predicate = create_comparison_predicate<PredicateType::LE>(index, col_name, type, v, true);
171
2
        return Status::OK();
172
0
    default:
173
0
        return Status::Error<ErrorCode::INVALID_ARGUMENT>("invalid condition operator. operator={}",
174
0
                                                          type_to_op_str(res.condition_op));
175
137
    }
176
137
}
177
178
Status parse_to_in_predicate(const uint32_t index, const std::string& col_name,
179
                             const DataTypePtr& type, DeleteHandler::ConditionParseResult& res,
180
2
                             Arena& arena, std::shared_ptr<ColumnPredicate>& predicate) {
181
2
    DCHECK_GT(res.value_str.size(), 1);
182
2
    switch (res.condition_op) {
183
1
    case PredicateType::IN_LIST: {
184
1
        std::shared_ptr<HybridSetBase> set;
185
1
        RETURN_IF_ERROR(convert(type, res.value_str, set));
186
1
        predicate =
187
1
                create_in_list_predicate<PredicateType::IN_LIST>(index, col_name, type, set, true);
188
1
        break;
189
1
    }
190
1
    case PredicateType::NOT_IN_LIST: {
191
1
        std::shared_ptr<HybridSetBase> set;
192
1
        RETURN_IF_ERROR(convert(type, res.value_str, set));
193
1
        predicate = create_in_list_predicate<PredicateType::NOT_IN_LIST>(index, col_name, type, set,
194
1
                                                                         true);
195
1
        break;
196
1
    }
197
0
    default:
198
0
        return Status::Error<ErrorCode::INVALID_ARGUMENT>("invalid condition operator. operator={}",
199
0
                                                          type_to_op_str(res.condition_op));
200
2
    }
201
2
    return Status::OK();
202
2
}
203
204
// construct sub condition from TCondition
205
101
std::string construct_sub_predicate(const TCondition& condition) {
206
101
    string op = condition.condition_op;
207
101
    if (op == "<") {
208
50
        op += "<";
209
51
    } else if (op == ">") {
210
3
        op += ">";
211
3
    }
212
101
    string condition_str;
213
101
    if ("IS" == op) {
214
        // ATTN: tricky! Surround IS with spaces to make it "special"
215
2
        condition_str = condition.column_name + " IS " + condition.condition_values[0];
216
99
    } else { // multi-elements IN expr has been processed with InPredicatePB
217
99
        if (op == "*=") {
218
2
            op = "=";
219
97
        } else if (op == "!*=") {
220
2
            op = "!=";
221
2
        }
222
99
        condition_str = condition.column_name + op + "'" + condition.condition_values[0] + "'";
223
99
    }
224
101
    return condition_str;
225
101
}
226
227
// make operators from FE adaptive to BE
228
101
std::string trans_op(const std::string& opt) {
229
101
    std::string op = string(opt);
230
101
    if (op == "<") {
231
50
        op += "<";
232
51
    } else if (op == ">") {
233
3
        op += ">";
234
3
    }
235
101
    if ("IS" != op) {
236
99
        if (op == "*=") {
237
2
            op = "=";
238
97
        } else if (op == "!*=") {
239
2
            op = "!=";
240
2
        }
241
99
    }
242
101
    return op;
243
101
}
244
245
Status DeleteHandler::generate_delete_predicate(const TabletSchema& schema,
246
                                                const std::vector<TCondition>& conditions,
247
106
                                                DeletePredicatePB* del_pred) {
248
106
    DBUG_EXECUTE_IF("DeleteHandler::generate_delete_predicate.inject_failure", {
249
106
        return Status::Error<false>(dp->param<int>("error_code"),
250
106
                                    dp->param<std::string>("error_msg"));
251
106
    })
252
106
    if (conditions.empty()) {
253
1
        return Status::Error<ErrorCode::INVALID_ARGUMENT>(
254
1
                "invalid parameters for store_cond. condition_size={}", conditions.size());
255
1
    }
256
257
    // Check whether the delete condition meets the requirements
258
135
    for (const TCondition& condition : conditions) {
259
135
        RETURN_IF_ERROR(check_condition_valid(schema, condition));
260
135
    }
261
262
    // Store delete condition
263
105
    for (const TCondition& condition : conditions) {
264
105
        if (condition.condition_values.size() > 1) {
265
4
            InPredicatePB* in_pred = del_pred->add_in_predicates();
266
4
            if (condition.__isset.column_unique_id) {
267
0
                in_pred->set_column_unique_id(condition.column_unique_id);
268
0
            }
269
4
            in_pred->set_column_name(condition.column_name);
270
4
            bool is_not_in = condition.condition_op == "!*=";
271
4
            in_pred->set_is_not_in(is_not_in);
272
10
            for (const auto& condition_value : condition.condition_values) {
273
10
                in_pred->add_values(condition_value);
274
10
            }
275
276
4
            LOG(INFO) << "store one sub-delete condition. condition name=" << in_pred->column_name()
277
4
                      << "condition size=" << in_pred->values().size();
278
101
        } else {
279
            // write sub predicate v1 for compactbility
280
101
            std::string condition_str = construct_sub_predicate(condition);
281
101
            VLOG_NOTICE << __PRETTY_FUNCTION__ << " condition_str: " << condition_str;
282
101
            del_pred->add_sub_predicates(condition_str);
283
101
            DeleteSubPredicatePB* sub_predicate = del_pred->add_sub_predicates_v2();
284
101
            if (condition.__isset.column_unique_id) {
285
                // only light schema change capable table set this field
286
0
                sub_predicate->set_column_unique_id(condition.column_unique_id);
287
101
            } else {
288
101
                try {
289
101
                    [[maybe_unused]] auto parsed_cond = parse_condition(condition_str);
290
101
                } catch (const Exception& e) {
291
0
                    return Status::Error<ErrorCode::INVALID_ARGUMENT>(
292
0
                            "failed to parse condition_str, condition={}, error={}",
293
0
                            ThriftDebugString(condition), e.to_string());
294
0
                }
295
101
            }
296
297
101
            sub_predicate->set_column_name(condition.column_name);
298
101
            sub_predicate->set_op(trans_op(condition.condition_op));
299
101
            sub_predicate->set_cond_value(condition.condition_values[0]);
300
101
            LOG(INFO) << "store one sub-delete condition. condition="
301
101
                      << fmt::format(" {} {} {}", condition.column_name, condition.condition_op,
302
101
                                     condition.condition_values[0]);
303
101
        }
304
105
    }
305
75
    del_pred->set_version(-1);
306
307
75
    return Status::OK();
308
75
}
309
310
Status DeleteHandler::convert_to_sub_pred_v2(DeletePredicatePB* delete_pred,
311
0
                                             TabletSchemaSPtr schema) {
312
0
    if (!delete_pred->sub_predicates().empty() && delete_pred->sub_predicates_v2().empty()) {
313
0
        for (const auto& condition_str : delete_pred->sub_predicates()) {
314
0
            auto* sub_pred = delete_pred->add_sub_predicates_v2();
315
0
            auto condition = parse_condition(condition_str);
316
0
            const auto& column = *DORIS_TRY(schema->column(condition.column_name));
317
0
            sub_pred->set_column_unique_id(column.unique_id());
318
0
            sub_pred->set_column_name(condition.column_name);
319
0
            sub_pred->set_op(type_to_op_str(condition.condition_op));
320
0
            sub_pred->set_cond_value(condition.value_str.front());
321
0
        }
322
0
    }
323
324
0
    auto* in_pred_list = delete_pred->mutable_in_predicates();
325
0
    for (auto& in_pred : *in_pred_list) {
326
0
        const auto& column = *DORIS_TRY(schema->column(in_pred.column_name()));
327
0
        in_pred.set_column_unique_id(column.unique_id());
328
0
    }
329
0
    return Status::OK();
330
0
}
331
332
bool DeleteHandler::is_condition_value_valid(const TabletColumn& column,
333
                                             const std::string& condition_op,
334
140
                                             const string& value_str) {
335
140
    if ("IS" == condition_op && ("NULL" == value_str || "NOT NULL" == value_str)) {
336
2
        return true;
337
2
    }
338
339
138
    FieldType field_type = column.type();
340
138
    switch (field_type) {
341
11
    case FieldType::OLAP_FIELD_TYPE_TINYINT:
342
11
        return valid_signed_number<int8_t>(value_str);
343
13
    case FieldType::OLAP_FIELD_TYPE_SMALLINT:
344
13
        return valid_signed_number<int16_t>(value_str);
345
54
    case FieldType::OLAP_FIELD_TYPE_INT:
346
54
        return valid_signed_number<int32_t>(value_str);
347
5
    case FieldType::OLAP_FIELD_TYPE_BIGINT:
348
5
        return valid_signed_number<int64_t>(value_str);
349
4
    case FieldType::OLAP_FIELD_TYPE_LARGEINT:
350
4
        return valid_signed_number<int128_t>(value_str);
351
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT:
352
0
        return valid_unsigned_number<uint8_t>(value_str);
353
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT:
354
0
        return valid_unsigned_number<uint16_t>(value_str);
355
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT:
356
0
        return valid_unsigned_number<uint32_t>(value_str);
357
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT:
358
0
        return valid_unsigned_number<uint64_t>(value_str);
359
7
    case FieldType::OLAP_FIELD_TYPE_DECIMAL:
360
7
        return valid_decimal(value_str, column.precision(), column.frac());
361
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
362
0
        return valid_decimal(value_str, column.precision(), column.frac());
363
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
364
0
        return valid_decimal(value_str, column.precision(), column.frac());
365
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
366
0
        return valid_decimal(value_str, column.precision(), column.frac());
367
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
368
0
        return valid_decimal(value_str, column.precision(), column.frac());
369
3
    case FieldType::OLAP_FIELD_TYPE_CHAR:
370
7
    case FieldType::OLAP_FIELD_TYPE_VARCHAR:
371
7
        return value_str.size() <= column.length();
372
0
    case FieldType::OLAP_FIELD_TYPE_STRING:
373
0
        return value_str.size() <= config::string_type_length_soft_limit_bytes;
374
5
    case FieldType::OLAP_FIELD_TYPE_DATE:
375
11
    case FieldType::OLAP_FIELD_TYPE_DATETIME:
376
11
    case FieldType::OLAP_FIELD_TYPE_DATEV2:
377
11
    case FieldType::OLAP_FIELD_TYPE_DATETIMEV2:
378
24
    case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ:
379
24
        return valid_datetime(value_str, column.frac());
380
0
    case FieldType::OLAP_FIELD_TYPE_BOOL:
381
0
        return valid_bool(value_str);
382
5
    case FieldType::OLAP_FIELD_TYPE_IPV4:
383
5
        return valid_ipv4(value_str);
384
8
    case FieldType::OLAP_FIELD_TYPE_IPV6:
385
8
        return valid_ipv6(value_str);
386
0
    default:
387
0
        LOG(WARNING) << "unknown field type. [type=" << int(field_type) << "]";
388
138
    }
389
0
    return false;
390
138
}
391
392
135
Status DeleteHandler::check_condition_valid(const TabletSchema& schema, const TCondition& cond) {
393
    // Check whether the column exists
394
135
    int32_t field_index = schema.field_index(cond.column_name);
395
135
    if (field_index < 0) {
396
1
        return Status::Error<ErrorCode::INVALID_ARGUMENT>("field is not existent. [field_index={}]",
397
1
                                                          field_index);
398
1
    }
399
400
    // Delete condition should only applied on key columns or duplicate key table, and
401
    // the condition column type should not be float or double.
402
134
    const TabletColumn& column = schema.column(field_index);
403
404
134
    if (column.type() == FieldType::OLAP_FIELD_TYPE_DOUBLE ||
405
134
        column.type() == FieldType::OLAP_FIELD_TYPE_FLOAT) {
406
0
        return Status::Error<ErrorCode::INVALID_ARGUMENT>("data type is float or double.");
407
0
    }
408
409
    // Check operator and operands size are matched.
410
134
    if ("*=" != cond.condition_op && "!*=" != cond.condition_op &&
411
134
        cond.condition_values.size() != 1) {
412
0
        return Status::Error<ErrorCode::INVALID_ARGUMENT>("invalid condition value size. [size={}]",
413
0
                                                          cond.condition_values.size());
414
0
    }
415
416
    // Check each operand is valid
417
140
    for (const auto& condition_value : cond.condition_values) {
418
140
        if (!is_condition_value_valid(column, cond.condition_op, condition_value)) {
419
29
            return Status::Error<ErrorCode::INVALID_ARGUMENT>("invalid condition value. [value={}]",
420
29
                                                              condition_value);
421
29
        }
422
140
    }
423
424
105
    if (!cond.__isset.column_unique_id) {
425
105
        LOG(WARNING) << "column=" << cond.column_name
426
105
                     << " in predicate does not have uid, table id=" << schema.table_id();
427
        // TODO(tsy): make it fail here after FE forbidding hard-link-schema-change
428
105
        return Status::OK();
429
105
    }
430
0
    if (schema.field_index(cond.column_unique_id) == -1) {
431
0
        const auto& err_msg =
432
0
                fmt::format("column id does not exists in table={}, schema version={},",
433
0
                            schema.table_id(), schema.schema_version());
434
0
        return Status::Error<ErrorCode::INVALID_ARGUMENT>(err_msg);
435
0
    }
436
0
    if (!iequal(schema.column_by_uid(cond.column_unique_id).name(), cond.column_name)) {
437
0
        const auto& err_msg = fmt::format(
438
0
                "colum name={} does not belongs to column uid={}, which "
439
0
                "column name={}, "
440
0
                "delete_cond.column_name ={}",
441
0
                cond.column_name, cond.column_unique_id,
442
0
                schema.column_by_uid(cond.column_unique_id).name(), cond.column_name);
443
0
        return Status::Error<ErrorCode::INVALID_ARGUMENT>(err_msg);
444
0
    }
445
446
0
    return Status::OK();
447
0
}
448
449
PredicateType DeleteHandler::parse_condition_op(const std::string& op_str,
450
302
                                                const std::list<std::string>& cond_values) {
451
302
    if (trim(to_lower(op_str)) == "=") {
452
92
        return PredicateType::EQ;
453
210
    } else if (trim(to_lower(op_str)) == "!=") {
454
18
        return PredicateType::NE;
455
192
    } else if (trim(to_lower(op_str)) == ">>") {
456
9
        return PredicateType::GT;
457
183
    } else if (trim(to_lower(op_str)) == "<<") {
458
129
        return PredicateType::LT;
459
129
    } else if (trim(to_lower(op_str)) == ">=") {
460
5
        return PredicateType::GE;
461
49
    } else if (trim(to_lower(op_str)) == "<=") {
462
7
        return PredicateType::LE;
463
42
    } else if (trim(to_lower(op_str)) == "*=") {
464
0
        return cond_values.size() > 1 ? PredicateType::IN_LIST : PredicateType::EQ;
465
42
    } else if (trim(to_lower(op_str)) == "!*=") {
466
0
        return cond_values.size() > 1 ? PredicateType::NOT_IN_LIST : PredicateType::NE;
467
42
    } else if (trim(to_lower(op_str)) == "is") {
468
42
        return to_lower(cond_values.front()) == "null" ? PredicateType::IS_NULL
469
42
                                                       : PredicateType::IS_NOT_NULL;
470
42
    } else {
471
0
        throw Exception(Status::Error<ErrorCode::INVALID_ARGUMENT>(
472
0
                "invalid condition operator. operator={}", op_str));
473
0
    }
474
0
    return PredicateType::UNKNOWN;
475
302
}
476
477
DeleteHandler::ConditionParseResult DeleteHandler::parse_condition(
478
100
        const DeleteSubPredicatePB& sub_cond) {
479
100
    ConditionParseResult res;
480
100
    if (!sub_cond.has_column_name() || !sub_cond.has_op() || !sub_cond.has_cond_value()) {
481
0
        throw Exception(Status::Error<ErrorCode::INVALID_ARGUMENT>(
482
0
                "fail to parse condition. condition={} {} {}", sub_cond.column_name(),
483
0
                sub_cond.op(), sub_cond.cond_value()));
484
0
    }
485
100
    if (sub_cond.has_column_unique_id()) {
486
0
        res.col_unique_id = sub_cond.column_unique_id();
487
0
    }
488
100
    res.column_name = sub_cond.column_name();
489
100
    res.value_str.push_back(sub_cond.cond_value());
490
100
    res.condition_op = parse_condition_op(sub_cond.op(), res.value_str);
491
100
    return res;
492
100
}
493
494
// clang-format off
495
// Condition string format, the format is (column_name)(op)(value)
496
// eg: condition_str="c1 = 1597751948193618247 and length(source)<1;\n;\n"
497
// column_name: matches "c1", must include FeNameFormat.java COLUMN_NAME_REGEX
498
//              and compactible with any the lagacy
499
// operator: matches "="
500
// value: matches "1597751948193618247  and length(source)<1;\n;\n"
501
//
502
// For more info, see DeleteHandler::construct_sub_predicates
503
// FIXME(gavin): This is a tricky implementation, it should not be the final resolution, refactor it.
504
const char* const CONDITION_STR_PATTERN =
505
    // .----------------- column-name --------------------------.   .----------------------- operator ------------------------.   .------------ value ----------.
506
    R"(([_a-zA-Z@0-9\s/\p{L}][.a-zA-Z0-9_+-/?@#$%^&*"\s,:\p{L}]*)\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?: IS ))\s*('((?:[\s\S]+)?)'|(?:[\s\S]+)?))";
507
    // '----------------- group 1 ------------------------------'   '--------------------- group 2 ---------------------------'   | '-- group 4--'              |
508
    //                                                                   match any of: = != >> << >= <= *= " IS "                 '----------- group 3 ---------'
509
    //                                                                                                                             match **ANY THING** without(4)
510
    //                                                                                                                             or with(3) single quote
511
// clang-format on
512
RE2 DELETE_HANDLER_REGEX(CONDITION_STR_PATTERN);
513
514
DeleteHandler::ConditionParseResult DeleteHandler::parse_condition(
515
182
        const std::string& condition_str) {
516
182
    ConditionParseResult res;
517
182
    std::string col_name, op, value, g4;
518
519
182
    bool matched = RE2::FullMatch(condition_str, DELETE_HANDLER_REGEX, &col_name, &op, &value,
520
182
                                  &g4); // exact match
521
522
182
    if (!matched) {
523
0
        throw Exception(
524
0
                Status::InvalidArgument("fail to sub condition. condition={}", condition_str));
525
0
    }
526
527
182
    res.column_name = col_name;
528
529
    // match string with single quotes, a = b  or a = 'b'
530
182
    if (!g4.empty()) {
531
141
        res.value_str.push_back(g4);
532
141
    } else {
533
41
        res.value_str.push_back(value);
534
41
    }
535
182
    res.condition_op = DeleteHandler::parse_condition_op(op, res.value_str);
536
182
    VLOG_NOTICE << "parsed condition_str: col_name={" << col_name << "} op={" << op << "} val={"
537
48
                << res.value_str.back() << "}";
538
182
    return res;
539
182
}
540
541
template <typename SubPredType>
542
    requires(std::is_same_v<SubPredType, DeleteSubPredicatePB> or
543
             std::is_same_v<SubPredType, std::string>)
544
Status DeleteHandler::_parse_column_pred(TabletSchemaSPtr complete_schema,
545
                                         TabletSchemaSPtr delete_pred_related_schema,
546
                                         const RepeatedPtrField<SubPredType>& sub_pred_list,
547
169
                                         DeleteConditions* delete_conditions) {
548
181
    for (const auto& sub_predicate : sub_pred_list) {
549
181
        auto condition = parse_condition(sub_predicate);
550
181
        int32_t col_unique_id = -1;
551
181
        if constexpr (std::is_same_v<SubPredType, DeleteSubPredicatePB>) {
552
100
            if (sub_predicate.has_column_unique_id()) [[likely]] {
553
0
                col_unique_id = sub_predicate.column_unique_id();
554
0
            }
555
100
        }
556
181
        if (col_unique_id < 0) {
557
181
            const auto& column =
558
181
                    *DORIS_TRY(delete_pred_related_schema->column(condition.column_name));
559
181
            col_unique_id = column.unique_id();
560
181
        }
561
181
        condition.col_unique_id = col_unique_id;
562
181
        const auto& column = complete_schema->column_by_uid(col_unique_id);
563
181
        uint32_t index = complete_schema->field_index(col_unique_id);
564
181
        std::shared_ptr<ColumnPredicate> predicate;
565
181
        RETURN_IF_ERROR(parse_to_predicate(index, column.name(), column.get_vec_type(), condition,
566
181
                                           _predicate_arena, predicate));
567
174
        if (predicate != nullptr) {
568
174
            delete_conditions->column_predicate_vec.push_back(predicate);
569
174
        }
570
174
    }
571
162
    return Status::OK();
572
169
}
_ZN5doris13DeleteHandler18_parse_column_predINS_20DeleteSubPredicatePBEQoosr3stdE9is_same_vIT_S2_Esr3stdE9is_same_vIS3_NSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEENS_6StatusESt10shared_ptrINS_12TabletSchemaEESD_RKN6google8protobuf16RepeatedPtrFieldIS3_EEPNS_16DeleteConditionsE
Line
Count
Source
547
87
                                         DeleteConditions* delete_conditions) {
548
100
    for (const auto& sub_predicate : sub_pred_list) {
549
100
        auto condition = parse_condition(sub_predicate);
550
100
        int32_t col_unique_id = -1;
551
100
        if constexpr (std::is_same_v<SubPredType, DeleteSubPredicatePB>) {
552
100
            if (sub_predicate.has_column_unique_id()) [[likely]] {
553
0
                col_unique_id = sub_predicate.column_unique_id();
554
0
            }
555
100
        }
556
100
        if (col_unique_id < 0) {
557
100
            const auto& column =
558
100
                    *DORIS_TRY(delete_pred_related_schema->column(condition.column_name));
559
100
            col_unique_id = column.unique_id();
560
100
        }
561
100
        condition.col_unique_id = col_unique_id;
562
100
        const auto& column = complete_schema->column_by_uid(col_unique_id);
563
100
        uint32_t index = complete_schema->field_index(col_unique_id);
564
100
        std::shared_ptr<ColumnPredicate> predicate;
565
100
        RETURN_IF_ERROR(parse_to_predicate(index, column.name(), column.get_vec_type(), condition,
566
100
                                           _predicate_arena, predicate));
567
100
        if (predicate != nullptr) {
568
100
            delete_conditions->column_predicate_vec.push_back(predicate);
569
100
        }
570
100
    }
571
87
    return Status::OK();
572
87
}
_ZN5doris13DeleteHandler18_parse_column_predINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEQoosr3stdE9is_same_vIT_NS_20DeleteSubPredicatePBEEsr3stdE9is_same_vIS8_S7_EEENS_6StatusESt10shared_ptrINS_12TabletSchemaEESD_RKN6google8protobuf16RepeatedPtrFieldIS8_EEPNS_16DeleteConditionsE
Line
Count
Source
547
82
                                         DeleteConditions* delete_conditions) {
548
82
    for (const auto& sub_predicate : sub_pred_list) {
549
81
        auto condition = parse_condition(sub_predicate);
550
81
        int32_t col_unique_id = -1;
551
        if constexpr (std::is_same_v<SubPredType, DeleteSubPredicatePB>) {
552
            if (sub_predicate.has_column_unique_id()) [[likely]] {
553
                col_unique_id = sub_predicate.column_unique_id();
554
            }
555
        }
556
81
        if (col_unique_id < 0) {
557
81
            const auto& column =
558
81
                    *DORIS_TRY(delete_pred_related_schema->column(condition.column_name));
559
81
            col_unique_id = column.unique_id();
560
81
        }
561
81
        condition.col_unique_id = col_unique_id;
562
81
        const auto& column = complete_schema->column_by_uid(col_unique_id);
563
81
        uint32_t index = complete_schema->field_index(col_unique_id);
564
81
        std::shared_ptr<ColumnPredicate> predicate;
565
81
        RETURN_IF_ERROR(parse_to_predicate(index, column.name(), column.get_vec_type(), condition,
566
81
                                           _predicate_arena, predicate));
567
74
        if (predicate != nullptr) {
568
74
            delete_conditions->column_predicate_vec.push_back(predicate);
569
74
        }
570
74
    }
571
75
    return Status::OK();
572
82
}
573
574
Status DeleteHandler::init(TabletSchemaSPtr tablet_schema,
575
374
                           const std::vector<RowsetMetaSharedPtr>& delete_preds, int64_t version) {
576
374
    DCHECK(!_is_inited) << "reinitialize delete handler.";
577
374
    DCHECK(version >= 0) << "invalid parameters. version=" << version;
578
579
374
    for (const auto& delete_pred : delete_preds) {
580
        // Skip the delete condition with large version
581
169
        if (delete_pred->version().first > version) {
582
0
            continue;
583
0
        }
584
        // Need the tablet schema at the delete condition to parse the accurate column
585
169
        const auto& delete_pred_related_schema = delete_pred->tablet_schema();
586
169
        const auto& delete_condition = delete_pred->delete_predicate();
587
169
        DeleteConditions temp;
588
169
        temp.filter_version = delete_pred->version().first;
589
169
        if (!delete_condition.sub_predicates_v2().empty()) {
590
87
            RETURN_IF_ERROR(_parse_column_pred(tablet_schema, delete_pred_related_schema,
591
87
                                               delete_condition.sub_predicates_v2(), &temp));
592
87
        } else {
593
            // make it compatible with the former versions
594
82
            RETURN_IF_ERROR(_parse_column_pred(tablet_schema, delete_pred_related_schema,
595
82
                                               delete_condition.sub_predicates(), &temp));
596
82
        }
597
162
        for (const auto& in_predicate : delete_condition.in_predicates()) {
598
2
            ConditionParseResult condition;
599
2
            condition.column_name = in_predicate.column_name();
600
601
2
            int32_t col_unique_id = -1;
602
2
            if (in_predicate.has_column_unique_id()) {
603
0
                col_unique_id = in_predicate.column_unique_id();
604
2
            } else {
605
                // if upgrade from version 2.0.x, column_unique_id maybe not set
606
2
                const auto& pre_column =
607
2
                        *DORIS_TRY(delete_pred_related_schema->column(condition.column_name));
608
2
                col_unique_id = pre_column.unique_id();
609
2
            }
610
2
            if (col_unique_id == -1) {
611
0
                return Status::Error<ErrorCode::DELETE_INVALID_CONDITION>(
612
0
                        "cannot get column_unique_id for column {}", condition.column_name);
613
0
            }
614
2
            condition.col_unique_id = col_unique_id;
615
616
2
            condition.condition_op =
617
2
                    in_predicate.is_not_in() ? PredicateType::NOT_IN_LIST : PredicateType::IN_LIST;
618
5
            for (const auto& value : in_predicate.values()) {
619
5
                condition.value_str.push_back(value);
620
5
            }
621
2
            const auto& column = tablet_schema->column_by_uid(col_unique_id);
622
2
            uint32_t index = tablet_schema->field_index(col_unique_id);
623
2
            std::shared_ptr<ColumnPredicate> predicate;
624
2
            RETURN_IF_ERROR(parse_to_in_predicate(index, column.name(), column.get_vec_type(),
625
2
                                                  condition, _predicate_arena, predicate));
626
2
            temp.column_predicate_vec.push_back(predicate);
627
2
        }
628
629
162
        _del_conds.emplace_back(std::move(temp));
630
162
    }
631
632
367
    _is_inited = true;
633
634
367
    return Status::OK();
635
374
}
636
637
391
DeleteHandler::~DeleteHandler() {
638
391
    if (!_is_inited) {
639
24
        return;
640
24
    }
641
642
367
    _del_conds.clear();
643
367
    _is_inited = false;
644
367
}
645
646
void DeleteHandler::get_delete_conditions_after_version(
647
        int64_t version, AndBlockColumnPredicate* and_block_column_predicate_ptr,
648
        std::unordered_map<int32_t, std::vector<std::shared_ptr<const ColumnPredicate>>>*
649
1.08k
                del_predicates_for_zone_map) const {
650
1.08k
    for (const auto& del_cond : _del_conds) {
651
555
        if (del_cond.filter_version > version) {
652
            // now, only query support delete column predicate operator
653
406
            if (!del_cond.column_predicate_vec.empty()) {
654
406
                if (del_cond.column_predicate_vec.size() == 1) {
655
406
                    auto single_column_block_predicate = SingleColumnBlockPredicate::create_unique(
656
406
                            del_cond.column_predicate_vec[0]);
657
406
                    and_block_column_predicate_ptr->add_column_predicate(
658
406
                            std::move(single_column_block_predicate));
659
406
                    if (del_predicates_for_zone_map->count(
660
406
                                del_cond.column_predicate_vec[0]->column_id()) < 1) {
661
406
                        del_predicates_for_zone_map->insert(
662
406
                                {del_cond.column_predicate_vec[0]->column_id(),
663
406
                                 std::vector<std::shared_ptr<const ColumnPredicate>> {}});
664
406
                    }
665
406
                    (*del_predicates_for_zone_map)[del_cond.column_predicate_vec[0]->column_id()]
666
406
                            .push_back(del_cond.column_predicate_vec[0]);
667
406
                } else {
668
0
                    auto or_column_predicate = OrBlockColumnPredicate::create_unique();
669
670
                    // build or_column_predicate
671
                    // when delete from where a = 1 and b = 2, we can not use del_predicates_for_zone_map to filter zone page,
672
                    // so here do not put predicate to del_predicates_for_zone_map,
673
                    // refer #17145 for more details.
674
                    // // TODO: need refactor design and code to use more version delete and more column delete to filter zone page.
675
0
                    std::for_each(del_cond.column_predicate_vec.cbegin(),
676
0
                                  del_cond.column_predicate_vec.cend(),
677
0
                                  [&or_column_predicate](
678
0
                                          const std::shared_ptr<const ColumnPredicate> predicate) {
679
0
                                      or_column_predicate->add_column_predicate(
680
0
                                              SingleColumnBlockPredicate::create_unique(predicate));
681
0
                                  });
682
0
                    and_block_column_predicate_ptr->add_column_predicate(
683
0
                            std::move(or_column_predicate));
684
0
                }
685
406
            }
686
406
        }
687
555
    }
688
1.08k
}
689
690
} // namespace doris