be/src/storage/delete/delete_handler.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "storage/delete/delete_handler.h" |
19 | | |
20 | | #include <gen_cpp/PaloInternalService_types.h> |
21 | | #include <gen_cpp/olap_file.pb.h> |
22 | | #include <thrift/protocol/TDebugProtocol.h> |
23 | | |
24 | | #include <string> |
25 | | #include <vector> |
26 | | |
27 | | #include "common/config.h" |
28 | | #include "common/logging.h" |
29 | | #include "common/status.h" |
30 | | #include "core/data_type_serde/data_type_serde.h" |
31 | | #include "storage/olap_common.h" |
32 | | #include "storage/predicate/block_column_predicate.h" |
33 | | #include "storage/predicate/null_predicate.h" |
34 | | #include "storage/predicate/predicate_creator.h" |
35 | | #include "storage/tablet/tablet_schema.h" |
36 | | #include "storage/utils.h" |
37 | | #include "util/debug_points.h" |
38 | | |
39 | | using apache::thrift::ThriftDebugString; |
40 | | using std::vector; |
41 | | using std::string; |
42 | | |
43 | | using ::google::protobuf::RepeatedPtrField; |
44 | | |
45 | | namespace doris { |
46 | | |
47 | | // Parses a string value into a Field using the serde's from_fe_string, then builds |
48 | | // a HybridSetBase for IN/NOT_IN predicates. |
49 | | // The type-dispatch via switch/case is still needed because build_set<PType>() and |
50 | | // HybridSet::insert(const void*) require compile-time PrimitiveType, and Field::get<PType>() |
51 | | // must be invoked with the correct type to extract the underlying CppType value. |
52 | | template <PrimitiveType PType> |
53 | 955 | void insert_field_to_set(const Field& field, HybridSetBase* set) { |
54 | 955 | if constexpr (is_string_type(PType)) { |
55 | | // StringSet::insert expects const StringRef*, so we must construct a StringRef |
56 | | // from the std::string returned by Field::get<>. |
57 | 201 | const auto& tmp = field.get<PType>(); |
58 | 201 | StringRef ref(tmp.data(), tmp.size()); |
59 | 201 | set->insert(reinterpret_cast<const void*>(&ref)); |
60 | 754 | } else { |
61 | 754 | auto tmp = field.get<PType>(); |
62 | 754 | set->insert(reinterpret_cast<const void*>(&tmp)); |
63 | 754 | } |
64 | 955 | } _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE3EEEvRKNS_5FieldEPNS_13HybridSetBaseE Line | Count | Source | 53 | 4 | void insert_field_to_set(const Field& field, HybridSetBase* set) { | 54 | | if constexpr (is_string_type(PType)) { | 55 | | // StringSet::insert expects const StringRef*, so we must construct a StringRef | 56 | | // from the std::string returned by Field::get<>. | 57 | | const auto& tmp = field.get<PType>(); | 58 | | StringRef ref(tmp.data(), tmp.size()); | 59 | | set->insert(reinterpret_cast<const void*>(&ref)); | 60 | 4 | } else { | 61 | 4 | auto tmp = field.get<PType>(); | 62 | 4 | set->insert(reinterpret_cast<const void*>(&tmp)); | 63 | 4 | } | 64 | 4 | } |
_ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE4EEEvRKNS_5FieldEPNS_13HybridSetBaseE Line | Count | Source | 53 | 2 | void insert_field_to_set(const Field& field, HybridSetBase* set) { | 54 | | if constexpr (is_string_type(PType)) { | 55 | | // StringSet::insert expects const StringRef*, so we must construct a StringRef | 56 | | // from the std::string returned by Field::get<>. | 57 | | const auto& tmp = field.get<PType>(); | 58 | | StringRef ref(tmp.data(), tmp.size()); | 59 | | set->insert(reinterpret_cast<const void*>(&ref)); | 60 | 2 | } else { | 61 | 2 | auto tmp = field.get<PType>(); | 62 | 2 | set->insert(reinterpret_cast<const void*>(&tmp)); | 63 | 2 | } | 64 | 2 | } |
_ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE5EEEvRKNS_5FieldEPNS_13HybridSetBaseE Line | Count | Source | 53 | 266 | void insert_field_to_set(const Field& field, HybridSetBase* set) { | 54 | | if constexpr (is_string_type(PType)) { | 55 | | // StringSet::insert expects const StringRef*, so we must construct a StringRef | 56 | | // from the std::string returned by Field::get<>. | 57 | | const auto& tmp = field.get<PType>(); | 58 | | StringRef ref(tmp.data(), tmp.size()); | 59 | | set->insert(reinterpret_cast<const void*>(&ref)); | 60 | 266 | } else { | 61 | 266 | auto tmp = field.get<PType>(); | 62 | 266 | set->insert(reinterpret_cast<const void*>(&tmp)); | 63 | 266 | } | 64 | 266 | } |
_ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE6EEEvRKNS_5FieldEPNS_13HybridSetBaseE Line | Count | Source | 53 | 17 | void insert_field_to_set(const Field& field, HybridSetBase* set) { | 54 | | if constexpr (is_string_type(PType)) { | 55 | | // StringSet::insert expects const StringRef*, so we must construct a StringRef | 56 | | // from the std::string returned by Field::get<>. | 57 | | const auto& tmp = field.get<PType>(); | 58 | | StringRef ref(tmp.data(), tmp.size()); | 59 | | set->insert(reinterpret_cast<const void*>(&ref)); | 60 | 17 | } else { | 61 | 17 | auto tmp = field.get<PType>(); | 62 | 17 | set->insert(reinterpret_cast<const void*>(&tmp)); | 63 | 17 | } | 64 | 17 | } |
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE7EEEvRKNS_5FieldEPNS_13HybridSetBaseE Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE8EEEvRKNS_5FieldEPNS_13HybridSetBaseE Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE9EEEvRKNS_5FieldEPNS_13HybridSetBaseE Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE11EEEvRKNS_5FieldEPNS_13HybridSetBaseE Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE12EEEvRKNS_5FieldEPNS_13HybridSetBaseE _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE25EEEvRKNS_5FieldEPNS_13HybridSetBaseE Line | Count | Source | 53 | 2 | void insert_field_to_set(const Field& field, HybridSetBase* set) { | 54 | | if constexpr (is_string_type(PType)) { | 55 | | // StringSet::insert expects const StringRef*, so we must construct a StringRef | 56 | | // from the std::string returned by Field::get<>. | 57 | | const auto& tmp = field.get<PType>(); | 58 | | StringRef ref(tmp.data(), tmp.size()); | 59 | | set->insert(reinterpret_cast<const void*>(&ref)); | 60 | 2 | } else { | 61 | 2 | auto tmp = field.get<PType>(); | 62 | 2 | set->insert(reinterpret_cast<const void*>(&tmp)); | 63 | 2 | } | 64 | 2 | } |
_ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE26EEEvRKNS_5FieldEPNS_13HybridSetBaseE Line | Count | Source | 53 | 2 | void insert_field_to_set(const Field& field, HybridSetBase* set) { | 54 | | if constexpr (is_string_type(PType)) { | 55 | | // StringSet::insert expects const StringRef*, so we must construct a StringRef | 56 | | // from the std::string returned by Field::get<>. | 57 | | const auto& tmp = field.get<PType>(); | 58 | | StringRef ref(tmp.data(), tmp.size()); | 59 | | set->insert(reinterpret_cast<const void*>(&ref)); | 60 | 2 | } else { | 61 | 2 | auto tmp = field.get<PType>(); | 62 | 2 | set->insert(reinterpret_cast<const void*>(&tmp)); | 63 | 2 | } | 64 | 2 | } |
_ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE42EEEvRKNS_5FieldEPNS_13HybridSetBaseE Line | Count | Source | 53 | 451 | void insert_field_to_set(const Field& field, HybridSetBase* set) { | 54 | | if constexpr (is_string_type(PType)) { | 55 | | // StringSet::insert expects const StringRef*, so we must construct a StringRef | 56 | | // from the std::string returned by Field::get<>. | 57 | | const auto& tmp = field.get<PType>(); | 58 | | StringRef ref(tmp.data(), tmp.size()); | 59 | | set->insert(reinterpret_cast<const void*>(&ref)); | 60 | 451 | } else { | 61 | 451 | auto tmp = field.get<PType>(); | 62 | 451 | set->insert(reinterpret_cast<const void*>(&tmp)); | 63 | 451 | } | 64 | 451 | } |
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE2EEEvRKNS_5FieldEPNS_13HybridSetBaseE Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE36EEEvRKNS_5FieldEPNS_13HybridSetBaseE Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE37EEEvRKNS_5FieldEPNS_13HybridSetBaseE _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE20EEEvRKNS_5FieldEPNS_13HybridSetBaseE Line | Count | Source | 53 | 8 | void insert_field_to_set(const Field& field, HybridSetBase* set) { | 54 | | if constexpr (is_string_type(PType)) { | 55 | | // StringSet::insert expects const StringRef*, so we must construct a StringRef | 56 | | // from the std::string returned by Field::get<>. | 57 | | const auto& tmp = field.get<PType>(); | 58 | | StringRef ref(tmp.data(), tmp.size()); | 59 | | set->insert(reinterpret_cast<const void*>(&ref)); | 60 | 8 | } else { | 61 | 8 | auto tmp = field.get<PType>(); | 62 | 8 | set->insert(reinterpret_cast<const void*>(&tmp)); | 63 | 8 | } | 64 | 8 | } |
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE28EEEvRKNS_5FieldEPNS_13HybridSetBaseE _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE29EEEvRKNS_5FieldEPNS_13HybridSetBaseE Line | Count | Source | 53 | 2 | void insert_field_to_set(const Field& field, HybridSetBase* set) { | 54 | | if constexpr (is_string_type(PType)) { | 55 | | // StringSet::insert expects const StringRef*, so we must construct a StringRef | 56 | | // from the std::string returned by Field::get<>. | 57 | | const auto& tmp = field.get<PType>(); | 58 | | StringRef ref(tmp.data(), tmp.size()); | 59 | | set->insert(reinterpret_cast<const void*>(&ref)); | 60 | 2 | } else { | 61 | 2 | auto tmp = field.get<PType>(); | 62 | 2 | set->insert(reinterpret_cast<const void*>(&tmp)); | 63 | 2 | } | 64 | 2 | } |
Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE30EEEvRKNS_5FieldEPNS_13HybridSetBaseE Unexecuted instantiation: _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE35EEEvRKNS_5FieldEPNS_13HybridSetBaseE _ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE15EEEvRKNS_5FieldEPNS_13HybridSetBaseE Line | Count | Source | 53 | 4 | void insert_field_to_set(const Field& field, HybridSetBase* set) { | 54 | 4 | if constexpr (is_string_type(PType)) { | 55 | | // StringSet::insert expects const StringRef*, so we must construct a StringRef | 56 | | // from the std::string returned by Field::get<>. | 57 | 4 | const auto& tmp = field.get<PType>(); | 58 | 4 | StringRef ref(tmp.data(), tmp.size()); | 59 | 4 | set->insert(reinterpret_cast<const void*>(&ref)); | 60 | | } else { | 61 | | auto tmp = field.get<PType>(); | 62 | | set->insert(reinterpret_cast<const void*>(&tmp)); | 63 | | } | 64 | 4 | } |
_ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE10EEEvRKNS_5FieldEPNS_13HybridSetBaseE Line | Count | Source | 53 | 195 | void insert_field_to_set(const Field& field, HybridSetBase* set) { | 54 | 195 | if constexpr (is_string_type(PType)) { | 55 | | // StringSet::insert expects const StringRef*, so we must construct a StringRef | 56 | | // from the std::string returned by Field::get<>. | 57 | 195 | const auto& tmp = field.get<PType>(); | 58 | 195 | StringRef ref(tmp.data(), tmp.size()); | 59 | 195 | set->insert(reinterpret_cast<const void*>(&ref)); | 60 | | } else { | 61 | | auto tmp = field.get<PType>(); | 62 | | set->insert(reinterpret_cast<const void*>(&tmp)); | 63 | | } | 64 | 195 | } |
_ZN5doris19insert_field_to_setILNS_13PrimitiveTypeE23EEEvRKNS_5FieldEPNS_13HybridSetBaseE Line | Count | Source | 53 | 2 | void insert_field_to_set(const Field& field, HybridSetBase* set) { | 54 | 2 | if constexpr (is_string_type(PType)) { | 55 | | // StringSet::insert expects const StringRef*, so we must construct a StringRef | 56 | | // from the std::string returned by Field::get<>. | 57 | 2 | const auto& tmp = field.get<PType>(); | 58 | 2 | StringRef ref(tmp.data(), tmp.size()); | 59 | 2 | set->insert(reinterpret_cast<const void*>(&ref)); | 60 | | } else { | 61 | | auto tmp = field.get<PType>(); | 62 | | set->insert(reinterpret_cast<const void*>(&tmp)); | 63 | | } | 64 | 2 | } |
|
65 | | |
66 | | #define FROM_FE_STRING_CASE(PType) \ |
67 | 302 | case PType: { \ |
68 | 302 | set = build_set<PType>(); \ |
69 | 957 | for (const auto& s : str) { \ |
70 | 957 | Field field; \ |
71 | 957 | RETURN_IF_ERROR(serde->from_fe_string(s, field)); \ |
72 | 957 | insert_field_to_set<PType>(field, set.get()); \ |
73 | 957 | } \ |
74 | 302 | return Status::OK(); \ |
75 | 302 | } |
76 | | Status convert(const DataTypePtr& data_type, const std::list<std::string>& str, |
77 | 303 | std::shared_ptr<HybridSetBase>& set) { |
78 | 303 | auto serde = data_type->get_serde(); |
79 | 303 | switch (data_type->get_primitive_type()) { |
80 | 1 | FROM_FE_STRING_CASE(TYPE_TINYINT); |
81 | 1 | FROM_FE_STRING_CASE(TYPE_SMALLINT); |
82 | 97 | FROM_FE_STRING_CASE(TYPE_INT); |
83 | 6 | FROM_FE_STRING_CASE(TYPE_BIGINT); |
84 | 0 | FROM_FE_STRING_CASE(TYPE_LARGEINT); |
85 | 0 | FROM_FE_STRING_CASE(TYPE_FLOAT); |
86 | 0 | FROM_FE_STRING_CASE(TYPE_DOUBLE); |
87 | 0 | FROM_FE_STRING_CASE(TYPE_DATE); |
88 | 0 | FROM_FE_STRING_CASE(TYPE_DATETIME); |
89 | 1 | FROM_FE_STRING_CASE(TYPE_DATEV2); |
90 | 1 | FROM_FE_STRING_CASE(TYPE_DATETIMEV2); |
91 | 129 | FROM_FE_STRING_CASE(TYPE_TIMESTAMPTZ); |
92 | 0 | FROM_FE_STRING_CASE(TYPE_BOOLEAN); |
93 | 0 | FROM_FE_STRING_CASE(TYPE_IPV4); |
94 | 0 | FROM_FE_STRING_CASE(TYPE_IPV6); |
95 | 2 | FROM_FE_STRING_CASE(TYPE_DECIMALV2); |
96 | 0 | FROM_FE_STRING_CASE(TYPE_DECIMAL32); |
97 | 1 | FROM_FE_STRING_CASE(TYPE_DECIMAL64); |
98 | 0 | FROM_FE_STRING_CASE(TYPE_DECIMAL128I); |
99 | 0 | FROM_FE_STRING_CASE(TYPE_DECIMAL256); |
100 | 2 | FROM_FE_STRING_CASE(TYPE_CHAR); |
101 | 60 | FROM_FE_STRING_CASE(TYPE_VARCHAR); |
102 | 1 | FROM_FE_STRING_CASE(TYPE_STRING); |
103 | 0 | default: |
104 | 0 | return Status::Error<ErrorCode::INVALID_ARGUMENT>( |
105 | 0 | "unsupported data type in delete handler. type={}", |
106 | 0 | type_to_string(data_type->get_primitive_type())); |
107 | 303 | } |
108 | 0 | return Status::OK(); |
109 | 303 | } |
110 | | #undef FROM_FE_STRING_CASE |
111 | | |
112 | | // Parses a single condition value string into a Field and creates a comparison predicate. |
113 | | // Uses serde->from_fe_string to do the parsing, which handles all type-specific |
114 | | // conversions (including decimal scale, etc.). |
115 | | // For CHAR type, the value is padded with '\0' to the declared column length, consistent |
116 | | // with the IN list path in convert() above. |
117 | | // For VARCHAR/STRING, the Field is created directly from the raw string. |
118 | | Status parse_to_predicate(const uint32_t index, const std::string col_name, const DataTypePtr& type, |
119 | | DeleteHandler::ConditionParseResult& res, Arena& arena, |
120 | 8.10k | std::shared_ptr<ColumnPredicate>& predicate) { |
121 | 8.10k | DCHECK_EQ(res.value_str.size(), 1); |
122 | 8.10k | if (res.condition_op == PredicateType::IS_NULL || |
123 | 8.10k | res.condition_op == PredicateType::IS_NOT_NULL) { |
124 | 554 | predicate = NullPredicate::create_shared(index, col_name, |
125 | 554 | res.condition_op == PredicateType::IS_NOT_NULL, |
126 | 554 | type->get_primitive_type()); |
127 | 554 | return Status::OK(); |
128 | 554 | } |
129 | | |
130 | 7.55k | Field v; |
131 | 7.55k | if (type->get_primitive_type() == TYPE_CHAR) { |
132 | | // CHAR type: create Field and pad with '\0' to the declared column length, |
133 | | // consistent with IN list path (convert() above) and create_comparison_predicate. |
134 | 4 | const auto& str = res.value_str.front(); |
135 | 4 | auto char_len = cast_set<size_t>( |
136 | 4 | assert_cast<const DataTypeString*>(remove_nullable(type).get())->len()); |
137 | 4 | auto target = std::max(char_len, str.size()); |
138 | 4 | if (target > str.size()) { |
139 | 0 | std::string padded(target, '\0'); |
140 | 0 | memcpy(padded.data(), str.data(), str.size()); |
141 | 0 | v = Field::create_field<TYPE_CHAR>(std::move(padded)); |
142 | 4 | } else { |
143 | 4 | v = Field::create_field<TYPE_CHAR>(str); |
144 | 4 | } |
145 | 7.54k | } else if (is_string_type(type->get_primitive_type())) { |
146 | | // VARCHAR/STRING: create Field directly from the raw string, no padding needed. |
147 | 699 | v = Field::create_field<TYPE_STRING>(res.value_str.front()); |
148 | 6.85k | } else { |
149 | 6.85k | auto serde = type->get_serde(); |
150 | 6.85k | RETURN_IF_ERROR(serde->from_fe_string(res.value_str.front(), v)); |
151 | 6.85k | } |
152 | | |
153 | 7.54k | switch (res.condition_op) { |
154 | 3.74k | case PredicateType::EQ: |
155 | 3.74k | predicate = create_comparison_predicate<PredicateType::EQ>(index, col_name, type, v, true); |
156 | 3.74k | return Status::OK(); |
157 | 948 | case PredicateType::NE: |
158 | 948 | predicate = create_comparison_predicate<PredicateType::NE>(index, col_name, type, v, true); |
159 | 948 | return Status::OK(); |
160 | 558 | case PredicateType::GT: |
161 | 558 | predicate = create_comparison_predicate<PredicateType::GT>(index, col_name, type, v, true); |
162 | 558 | return Status::OK(); |
163 | 418 | case PredicateType::GE: |
164 | 418 | predicate = create_comparison_predicate<PredicateType::GE>(index, col_name, type, v, true); |
165 | 418 | return Status::OK(); |
166 | 1.01k | case PredicateType::LT: |
167 | 1.01k | predicate = create_comparison_predicate<PredicateType::LT>(index, col_name, type, v, true); |
168 | 1.01k | return Status::OK(); |
169 | 861 | case PredicateType::LE: |
170 | 861 | predicate = create_comparison_predicate<PredicateType::LE>(index, col_name, type, v, true); |
171 | 861 | return Status::OK(); |
172 | 0 | default: |
173 | 0 | return Status::Error<ErrorCode::INVALID_ARGUMENT>("invalid condition operator. operator={}", |
174 | 0 | type_to_op_str(res.condition_op)); |
175 | 7.54k | } |
176 | 7.54k | } |
177 | | |
178 | | Status parse_to_in_predicate(const uint32_t index, const std::string& col_name, |
179 | | const DataTypePtr& type, DeleteHandler::ConditionParseResult& res, |
180 | 303 | Arena& arena, std::shared_ptr<ColumnPredicate>& predicate) { |
181 | 303 | DCHECK_GT(res.value_str.size(), 1); |
182 | 303 | switch (res.condition_op) { |
183 | 232 | case PredicateType::IN_LIST: { |
184 | 232 | std::shared_ptr<HybridSetBase> set; |
185 | 232 | RETURN_IF_ERROR(convert(type, res.value_str, set)); |
186 | 232 | predicate = |
187 | 232 | create_in_list_predicate<PredicateType::IN_LIST>(index, col_name, type, set, true); |
188 | 232 | break; |
189 | 232 | } |
190 | 71 | case PredicateType::NOT_IN_LIST: { |
191 | 71 | std::shared_ptr<HybridSetBase> set; |
192 | 71 | RETURN_IF_ERROR(convert(type, res.value_str, set)); |
193 | 71 | predicate = create_in_list_predicate<PredicateType::NOT_IN_LIST>(index, col_name, type, set, |
194 | 71 | true); |
195 | 71 | break; |
196 | 71 | } |
197 | 0 | default: |
198 | 0 | return Status::Error<ErrorCode::INVALID_ARGUMENT>("invalid condition operator. operator={}", |
199 | 0 | type_to_op_str(res.condition_op)); |
200 | 303 | } |
201 | 304 | return Status::OK(); |
202 | 303 | } |
203 | | |
204 | | // construct sub condition from TCondition |
205 | 3.21k | std::string construct_sub_predicate(const TCondition& condition) { |
206 | 3.21k | string op = condition.condition_op; |
207 | 3.21k | if (op == "<") { |
208 | 333 | op += "<"; |
209 | 2.88k | } else if (op == ">") { |
210 | 235 | op += ">"; |
211 | 235 | } |
212 | 3.21k | string condition_str; |
213 | 3.21k | if ("IS" == op) { |
214 | | // ATTN: tricky! Surround IS with spaces to make it "special" |
215 | 411 | condition_str = condition.column_name + " IS " + condition.condition_values[0]; |
216 | 2.80k | } else { // multi-elements IN expr has been processed with InPredicatePB |
217 | 2.80k | if (op == "*=") { |
218 | 2 | op = "="; |
219 | 2.80k | } else if (op == "!*=") { |
220 | 2 | op = "!="; |
221 | 2 | } |
222 | 2.80k | condition_str = condition.column_name + op + "'" + condition.condition_values[0] + "'"; |
223 | 2.80k | } |
224 | 3.21k | return condition_str; |
225 | 3.21k | } |
226 | | |
227 | | // make operators from FE adaptive to BE |
228 | 3.19k | std::string trans_op(const std::string& opt) { |
229 | 3.19k | std::string op = string(opt); |
230 | 3.19k | if (op == "<") { |
231 | 334 | op += "<"; |
232 | 2.85k | } else if (op == ">") { |
233 | 230 | op += ">"; |
234 | 230 | } |
235 | 3.19k | if ("IS" != op) { |
236 | 2.79k | if (op == "*=") { |
237 | 2 | op = "="; |
238 | 2.79k | } else if (op == "!*=") { |
239 | 2 | op = "!="; |
240 | 2 | } |
241 | 2.79k | } |
242 | 3.19k | return op; |
243 | 3.19k | } |
244 | | |
245 | | Status DeleteHandler::generate_delete_predicate(const TabletSchema& schema, |
246 | | const std::vector<TCondition>& conditions, |
247 | 3.33k | DeletePredicatePB* del_pred) { |
248 | 3.33k | DBUG_EXECUTE_IF("DeleteHandler::generate_delete_predicate.inject_failure", { |
249 | 3.33k | return Status::Error<false>(dp->param<int>("error_code"), |
250 | 3.33k | dp->param<std::string>("error_msg")); |
251 | 3.33k | }) |
252 | 3.33k | if (conditions.empty()) { |
253 | 1 | return Status::Error<ErrorCode::INVALID_ARGUMENT>( |
254 | 1 | "invalid parameters for store_cond. condition_size={}", conditions.size()); |
255 | 1 | } |
256 | | |
257 | | // Check whether the delete condition meets the requirements |
258 | 3.46k | for (const TCondition& condition : conditions) { |
259 | 3.46k | RETURN_IF_ERROR(check_condition_valid(schema, condition)); |
260 | 3.46k | } |
261 | | |
262 | | // Store delete condition |
263 | 3.43k | for (const TCondition& condition : conditions) { |
264 | 3.43k | if (condition.condition_values.size() > 1) { |
265 | 195 | InPredicatePB* in_pred = del_pred->add_in_predicates(); |
266 | 195 | if (condition.__isset.column_unique_id) { |
267 | 191 | in_pred->set_column_unique_id(condition.column_unique_id); |
268 | 191 | } |
269 | 195 | in_pred->set_column_name(condition.column_name); |
270 | 195 | bool is_not_in = condition.condition_op == "!*="; |
271 | 195 | in_pred->set_is_not_in(is_not_in); |
272 | 623 | for (const auto& condition_value : condition.condition_values) { |
273 | 623 | in_pred->add_values(condition_value); |
274 | 623 | } |
275 | | |
276 | 195 | LOG(INFO) << "store one sub-delete condition. condition name=" << in_pred->column_name() |
277 | 195 | << "condition size=" << in_pred->values().size(); |
278 | 3.23k | } else { |
279 | | // write sub predicate v1 for compactbility |
280 | 3.23k | std::string condition_str = construct_sub_predicate(condition); |
281 | 3.23k | VLOG_NOTICE << __PRETTY_FUNCTION__ << " condition_str: " << condition_str; |
282 | 3.23k | del_pred->add_sub_predicates(condition_str); |
283 | 3.23k | DeleteSubPredicatePB* sub_predicate = del_pred->add_sub_predicates_v2(); |
284 | 3.23k | if (condition.__isset.column_unique_id) { |
285 | | // only light schema change capable table set this field |
286 | 3.12k | sub_predicate->set_column_unique_id(condition.column_unique_id); |
287 | 3.12k | } else { |
288 | 109 | try { |
289 | 109 | [[maybe_unused]] auto parsed_cond = parse_condition(condition_str); |
290 | 109 | } catch (const Exception& e) { |
291 | 0 | return Status::Error<ErrorCode::INVALID_ARGUMENT>( |
292 | 0 | "failed to parse condition_str, condition={}, error={}", |
293 | 0 | ThriftDebugString(condition), e.to_string()); |
294 | 0 | } |
295 | 109 | } |
296 | | |
297 | 3.22k | sub_predicate->set_column_name(condition.column_name); |
298 | 3.22k | sub_predicate->set_op(trans_op(condition.condition_op)); |
299 | 3.22k | sub_predicate->set_cond_value(condition.condition_values[0]); |
300 | 3.22k | LOG(INFO) << "store one sub-delete condition. condition=" |
301 | 3.22k | << fmt::format(" {} {} {}", condition.column_name, condition.condition_op, |
302 | 3.22k | condition.condition_values[0]); |
303 | 3.22k | } |
304 | 3.43k | } |
305 | 3.29k | del_pred->set_version(-1); |
306 | | |
307 | 3.29k | return Status::OK(); |
308 | 3.30k | } |
309 | | |
310 | | Status DeleteHandler::convert_to_sub_pred_v2(DeletePredicatePB* delete_pred, |
311 | 0 | TabletSchemaSPtr schema) { |
312 | 0 | if (!delete_pred->sub_predicates().empty() && delete_pred->sub_predicates_v2().empty()) { |
313 | 0 | for (const auto& condition_str : delete_pred->sub_predicates()) { |
314 | 0 | auto* sub_pred = delete_pred->add_sub_predicates_v2(); |
315 | 0 | auto condition = parse_condition(condition_str); |
316 | 0 | const auto& column = *DORIS_TRY(schema->column(condition.column_name)); |
317 | 0 | sub_pred->set_column_unique_id(column.unique_id()); |
318 | 0 | sub_pred->set_column_name(condition.column_name); |
319 | 0 | sub_pred->set_op(type_to_op_str(condition.condition_op)); |
320 | 0 | sub_pred->set_cond_value(condition.value_str.front()); |
321 | 0 | } |
322 | 0 | } |
323 | | |
324 | 0 | auto* in_pred_list = delete_pred->mutable_in_predicates(); |
325 | 0 | for (auto& in_pred : *in_pred_list) { |
326 | 0 | const auto& column = *DORIS_TRY(schema->column(in_pred.column_name())); |
327 | 0 | in_pred.set_column_unique_id(column.unique_id()); |
328 | 0 | } |
329 | 0 | return Status::OK(); |
330 | 0 | } |
331 | | |
332 | | bool DeleteHandler::is_condition_value_valid(const TabletColumn& column, |
333 | | const std::string& condition_op, |
334 | 3.87k | const string& value_str) { |
335 | 3.87k | if ("IS" == condition_op && ("NULL" == value_str || "NOT NULL" == value_str)) { |
336 | 411 | return true; |
337 | 411 | } |
338 | | |
339 | 3.46k | FieldType field_type = column.type(); |
340 | 3.46k | switch (field_type) { |
341 | 25 | case FieldType::OLAP_FIELD_TYPE_TINYINT: |
342 | 25 | return valid_signed_number<int8_t>(value_str); |
343 | 17 | case FieldType::OLAP_FIELD_TYPE_SMALLINT: |
344 | 17 | return valid_signed_number<int16_t>(value_str); |
345 | 832 | case FieldType::OLAP_FIELD_TYPE_INT: |
346 | 832 | return valid_signed_number<int32_t>(value_str); |
347 | 59 | case FieldType::OLAP_FIELD_TYPE_BIGINT: |
348 | 59 | return valid_signed_number<int64_t>(value_str); |
349 | 161 | case FieldType::OLAP_FIELD_TYPE_LARGEINT: |
350 | 161 | return valid_signed_number<int128_t>(value_str); |
351 | 0 | case FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT: |
352 | 0 | return valid_unsigned_number<uint8_t>(value_str); |
353 | 0 | case FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT: |
354 | 0 | return valid_unsigned_number<uint16_t>(value_str); |
355 | 0 | case FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT: |
356 | 0 | return valid_unsigned_number<uint32_t>(value_str); |
357 | 0 | case FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT: |
358 | 0 | return valid_unsigned_number<uint64_t>(value_str); |
359 | 15 | case FieldType::OLAP_FIELD_TYPE_DECIMAL: |
360 | 15 | return valid_decimal(value_str, column.precision(), column.frac()); |
361 | 8 | case FieldType::OLAP_FIELD_TYPE_DECIMAL32: |
362 | 8 | return valid_decimal(value_str, column.precision(), column.frac()); |
363 | 10 | case FieldType::OLAP_FIELD_TYPE_DECIMAL64: |
364 | 10 | return valid_decimal(value_str, column.precision(), column.frac()); |
365 | 0 | case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: |
366 | 0 | return valid_decimal(value_str, column.precision(), column.frac()); |
367 | 1 | case FieldType::OLAP_FIELD_TYPE_DECIMAL256: |
368 | 1 | return valid_decimal(value_str, column.precision(), column.frac()); |
369 | 11 | case FieldType::OLAP_FIELD_TYPE_CHAR: |
370 | 232 | case FieldType::OLAP_FIELD_TYPE_VARCHAR: |
371 | 232 | return value_str.size() <= column.length(); |
372 | 44 | case FieldType::OLAP_FIELD_TYPE_STRING: |
373 | 44 | return value_str.size() <= config::string_type_length_soft_limit_bytes; |
374 | 14 | case FieldType::OLAP_FIELD_TYPE_DATE: |
375 | 29 | case FieldType::OLAP_FIELD_TYPE_DATETIME: |
376 | 62 | case FieldType::OLAP_FIELD_TYPE_DATEV2: |
377 | 97 | case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: |
378 | 2.00k | case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ: |
379 | 2.00k | return valid_datetime(value_str, column.frac()); |
380 | 5 | case FieldType::OLAP_FIELD_TYPE_BOOL: |
381 | 5 | return valid_bool(value_str); |
382 | 9 | case FieldType::OLAP_FIELD_TYPE_IPV4: |
383 | 9 | return valid_ipv4(value_str); |
384 | 36 | case FieldType::OLAP_FIELD_TYPE_IPV6: |
385 | 36 | return valid_ipv6(value_str); |
386 | 0 | default: |
387 | 0 | LOG(WARNING) << "unknown field type. [type=" << int(field_type) << "]"; |
388 | 3.46k | } |
389 | 0 | return false; |
390 | 3.46k | } |
391 | | |
392 | 3.44k | Status DeleteHandler::check_condition_valid(const TabletSchema& schema, const TCondition& cond) { |
393 | | // Check whether the column exists |
394 | 3.44k | int32_t field_index = schema.field_index(cond.column_name); |
395 | 3.44k | if (field_index < 0) { |
396 | 1 | return Status::Error<ErrorCode::INVALID_ARGUMENT>("field is not existent. [field_index={}]", |
397 | 1 | field_index); |
398 | 1 | } |
399 | | |
400 | | // Delete condition should only applied on key columns or duplicate key table, and |
401 | | // the condition column type should not be float or double. |
402 | 3.44k | const TabletColumn& column = schema.column(field_index); |
403 | | |
404 | 3.44k | if (column.type() == FieldType::OLAP_FIELD_TYPE_DOUBLE || |
405 | 3.45k | column.type() == FieldType::OLAP_FIELD_TYPE_FLOAT) { |
406 | 0 | return Status::Error<ErrorCode::INVALID_ARGUMENT>("data type is float or double."); |
407 | 0 | } |
408 | | |
409 | | // Check operator and operands size are matched. |
410 | 3.44k | if ("*=" != cond.condition_op && "!*=" != cond.condition_op && |
411 | 3.44k | cond.condition_values.size() != 1) { |
412 | 0 | return Status::Error<ErrorCode::INVALID_ARGUMENT>("invalid condition value size. [size={}]", |
413 | 0 | cond.condition_values.size()); |
414 | 0 | } |
415 | | |
416 | | // Check each operand is valid |
417 | 3.88k | for (const auto& condition_value : cond.condition_values) { |
418 | 3.88k | if (!is_condition_value_valid(column, cond.condition_op, condition_value)) { |
419 | 29 | return Status::Error<ErrorCode::INVALID_ARGUMENT>("invalid condition value. [value={}]", |
420 | 29 | condition_value); |
421 | 29 | } |
422 | 3.88k | } |
423 | | |
424 | 3.41k | if (!cond.__isset.column_unique_id) { |
425 | 105 | LOG(WARNING) << "column=" << cond.column_name |
426 | 105 | << " in predicate does not have uid, table id=" << schema.table_id(); |
427 | | // TODO(tsy): make it fail here after FE forbidding hard-link-schema-change |
428 | 105 | return Status::OK(); |
429 | 105 | } |
430 | 3.31k | if (schema.field_index(cond.column_unique_id) == -1) { |
431 | 0 | const auto& err_msg = |
432 | 0 | fmt::format("column id does not exists in table={}, schema version={},", |
433 | 0 | schema.table_id(), schema.schema_version()); |
434 | 0 | return Status::Error<ErrorCode::INVALID_ARGUMENT>(err_msg); |
435 | 0 | } |
436 | 3.31k | if (!iequal(schema.column_by_uid(cond.column_unique_id).name(), cond.column_name)) { |
437 | 0 | const auto& err_msg = fmt::format( |
438 | 0 | "colum name={} does not belongs to column uid={}, which " |
439 | 0 | "column name={}, " |
440 | 0 | "delete_cond.column_name ={}", |
441 | 0 | cond.column_name, cond.column_unique_id, |
442 | 0 | schema.column_by_uid(cond.column_unique_id).name(), cond.column_name); |
443 | 0 | return Status::Error<ErrorCode::INVALID_ARGUMENT>(err_msg); |
444 | 0 | } |
445 | | |
446 | 3.31k | return Status::OK(); |
447 | 3.31k | } |
448 | | |
449 | | PredicateType DeleteHandler::parse_condition_op(const std::string& op_str, |
450 | 8.22k | const std::list<std::string>& cond_values) { |
451 | 8.22k | if (trim(to_lower(op_str)) == "=") { |
452 | 3.81k | return PredicateType::EQ; |
453 | 4.41k | } else if (trim(to_lower(op_str)) == "!=") { |
454 | 959 | return PredicateType::NE; |
455 | 3.45k | } else if (trim(to_lower(op_str)) == ">>") { |
456 | 566 | return PredicateType::GT; |
457 | 2.89k | } else if (trim(to_lower(op_str)) == "<<") { |
458 | 1.06k | return PredicateType::LT; |
459 | 1.82k | } else if (trim(to_lower(op_str)) == ">=") { |
460 | 422 | return PredicateType::GE; |
461 | 1.40k | } else if (trim(to_lower(op_str)) == "<=") { |
462 | 867 | return PredicateType::LE; |
463 | 867 | } else if (trim(to_lower(op_str)) == "*=") { |
464 | 0 | return cond_values.size() > 1 ? PredicateType::IN_LIST : PredicateType::EQ; |
465 | 539 | } else if (trim(to_lower(op_str)) == "!*=") { |
466 | 0 | return cond_values.size() > 1 ? PredicateType::NOT_IN_LIST : PredicateType::NE; |
467 | 559 | } else if (trim(to_lower(op_str)) == "is") { |
468 | 559 | return to_lower(cond_values.front()) == "null" ? PredicateType::IS_NULL |
469 | 559 | : PredicateType::IS_NOT_NULL; |
470 | 18.4E | } else { |
471 | 18.4E | throw Exception(Status::Error<ErrorCode::INVALID_ARGUMENT>( |
472 | 18.4E | "invalid condition operator. operator={}", op_str)); |
473 | 18.4E | } |
474 | 0 | return PredicateType::UNKNOWN; |
475 | 8.22k | } |
476 | | |
477 | | DeleteHandler::ConditionParseResult DeleteHandler::parse_condition( |
478 | 8.04k | const DeleteSubPredicatePB& sub_cond) { |
479 | 8.04k | ConditionParseResult res; |
480 | 8.05k | if (!sub_cond.has_column_name() || !sub_cond.has_op() || !sub_cond.has_cond_value()) { |
481 | 0 | throw Exception(Status::Error<ErrorCode::INVALID_ARGUMENT>( |
482 | 0 | "fail to parse condition. condition={} {} {}", sub_cond.column_name(), |
483 | 0 | sub_cond.op(), sub_cond.cond_value())); |
484 | 0 | } |
485 | 8.04k | if (sub_cond.has_column_unique_id()) { |
486 | 7.95k | res.col_unique_id = sub_cond.column_unique_id(); |
487 | 7.95k | } |
488 | 8.04k | res.column_name = sub_cond.column_name(); |
489 | 8.04k | res.value_str.push_back(sub_cond.cond_value()); |
490 | 8.04k | res.condition_op = parse_condition_op(sub_cond.op(), res.value_str); |
491 | 8.04k | return res; |
492 | 8.04k | } |
493 | | |
494 | | // clang-format off |
495 | | // Condition string format, the format is (column_name)(op)(value) |
496 | | // eg: condition_str="c1 = 1597751948193618247 and length(source)<1;\n;\n" |
497 | | // column_name: matches "c1", must include FeNameFormat.java COLUMN_NAME_REGEX |
498 | | // and compactible with any the lagacy |
499 | | // operator: matches "=" |
500 | | // value: matches "1597751948193618247 and length(source)<1;\n;\n" |
501 | | // |
502 | | // For more info, see DeleteHandler::construct_sub_predicates |
503 | | // FIXME(gavin): This is a tricky implementation, it should not be the final resolution, refactor it. |
504 | | const char* const CONDITION_STR_PATTERN = |
505 | | // .----------------- column-name --------------------------. .----------------------- operator ------------------------. .------------ value ----------. |
506 | | R"(([_a-zA-Z@0-9\s/\p{L}][.a-zA-Z0-9_+-/?@#$%^&*"\s,:\p{L}]*)\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?: IS ))\s*('((?:[\s\S]+)?)'|(?:[\s\S]+)?))"; |
507 | | // '----------------- group 1 ------------------------------' '--------------------- group 2 ---------------------------' | '-- group 4--' | |
508 | | // match any of: = != >> << >= <= *= " IS " '----------- group 3 ---------' |
509 | | // match **ANY THING** without(4) |
510 | | // or with(3) single quote |
511 | | // clang-format on |
512 | | RE2 DELETE_HANDLER_REGEX(CONDITION_STR_PATTERN); |
513 | | |
514 | | DeleteHandler::ConditionParseResult DeleteHandler::parse_condition( |
515 | 182 | const std::string& condition_str) { |
516 | 182 | ConditionParseResult res; |
517 | 182 | std::string col_name, op, value, g4; |
518 | | |
519 | 182 | bool matched = RE2::FullMatch(condition_str, DELETE_HANDLER_REGEX, &col_name, &op, &value, |
520 | 182 | &g4); // exact match |
521 | | |
522 | 182 | if (!matched) { |
523 | 0 | throw Exception( |
524 | 0 | Status::InvalidArgument("fail to sub condition. condition={}", condition_str)); |
525 | 0 | } |
526 | | |
527 | 182 | res.column_name = col_name; |
528 | | |
529 | | // match string with single quotes, a = b or a = 'b' |
530 | 182 | if (!g4.empty()) { |
531 | 141 | res.value_str.push_back(g4); |
532 | 141 | } else { |
533 | 41 | res.value_str.push_back(value); |
534 | 41 | } |
535 | 182 | res.condition_op = DeleteHandler::parse_condition_op(op, res.value_str); |
536 | 182 | VLOG_NOTICE << "parsed condition_str: col_name={" << col_name << "} op={" << op << "} val={" |
537 | 48 | << res.value_str.back() << "}"; |
538 | 182 | return res; |
539 | 182 | } |
540 | | |
541 | | template <typename SubPredType> |
542 | | requires(std::is_same_v<SubPredType, DeleteSubPredicatePB> or |
543 | | std::is_same_v<SubPredType, std::string>) |
544 | | Status DeleteHandler::_parse_column_pred(TabletSchemaSPtr complete_schema, |
545 | | TabletSchemaSPtr delete_pred_related_schema, |
546 | | const RepeatedPtrField<SubPredType>& sub_pred_list, |
547 | 8.12k | DeleteConditions* delete_conditions) { |
548 | 8.12k | for (const auto& sub_predicate : sub_pred_list) { |
549 | 8.12k | auto condition = parse_condition(sub_predicate); |
550 | 8.12k | int32_t col_unique_id = -1; |
551 | 8.12k | if constexpr (std::is_same_v<SubPredType, DeleteSubPredicatePB>) { |
552 | 8.04k | if (sub_predicate.has_column_unique_id()) [[likely]] { |
553 | 7.94k | col_unique_id = sub_predicate.column_unique_id(); |
554 | 7.94k | } |
555 | 8.04k | } |
556 | 8.12k | if (col_unique_id < 0) { |
557 | 181 | const auto& column = |
558 | 181 | *DORIS_TRY(delete_pred_related_schema->column(condition.column_name)); |
559 | 181 | col_unique_id = column.unique_id(); |
560 | 181 | } |
561 | 8.12k | condition.col_unique_id = col_unique_id; |
562 | 8.12k | const auto& column = complete_schema->column_by_uid(col_unique_id); |
563 | 8.12k | uint32_t index = complete_schema->field_index(col_unique_id); |
564 | 8.12k | std::shared_ptr<ColumnPredicate> predicate; |
565 | 8.12k | RETURN_IF_ERROR(parse_to_predicate(index, column.name(), column.get_vec_type(), condition, |
566 | 8.12k | _predicate_arena, predicate)); |
567 | 8.11k | if (predicate != nullptr) { |
568 | 8.09k | delete_conditions->column_predicate_vec.push_back(predicate); |
569 | 8.09k | } |
570 | 8.11k | } |
571 | 8.12k | return Status::OK(); |
572 | 8.12k | } _ZN5doris13DeleteHandler18_parse_column_predINS_20DeleteSubPredicatePBEQoosr3stdE9is_same_vIT_S2_Esr3stdE9is_same_vIS3_NSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEENS_6StatusESt10shared_ptrINS_12TabletSchemaEESD_RKN6google8protobuf16RepeatedPtrFieldIS3_EEPNS_16DeleteConditionsE Line | Count | Source | 547 | 7.80k | DeleteConditions* delete_conditions) { | 548 | 8.04k | for (const auto& sub_predicate : sub_pred_list) { | 549 | 8.04k | auto condition = parse_condition(sub_predicate); | 550 | 8.04k | int32_t col_unique_id = -1; | 551 | 8.04k | if constexpr (std::is_same_v<SubPredType, DeleteSubPredicatePB>) { | 552 | 8.04k | if (sub_predicate.has_column_unique_id()) [[likely]] { | 553 | 7.94k | col_unique_id = sub_predicate.column_unique_id(); | 554 | 7.94k | } | 555 | 8.04k | } | 556 | 8.04k | if (col_unique_id < 0) { | 557 | 100 | const auto& column = | 558 | 100 | *DORIS_TRY(delete_pred_related_schema->column(condition.column_name)); | 559 | 100 | col_unique_id = column.unique_id(); | 560 | 100 | } | 561 | 8.04k | condition.col_unique_id = col_unique_id; | 562 | 8.04k | const auto& column = complete_schema->column_by_uid(col_unique_id); | 563 | 8.04k | uint32_t index = complete_schema->field_index(col_unique_id); | 564 | 8.04k | std::shared_ptr<ColumnPredicate> predicate; | 565 | 8.04k | RETURN_IF_ERROR(parse_to_predicate(index, column.name(), column.get_vec_type(), condition, | 566 | 8.04k | _predicate_arena, predicate)); | 567 | 8.04k | if (predicate != nullptr) { | 568 | 8.02k | delete_conditions->column_predicate_vec.push_back(predicate); | 569 | 8.02k | } | 570 | 8.04k | } | 571 | 7.80k | return Status::OK(); | 572 | 7.80k | } |
_ZN5doris13DeleteHandler18_parse_column_predINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEQoosr3stdE9is_same_vIT_NS_20DeleteSubPredicatePBEEsr3stdE9is_same_vIS8_S7_EEENS_6StatusESt10shared_ptrINS_12TabletSchemaEESD_RKN6google8protobuf16RepeatedPtrFieldIS8_EEPNS_16DeleteConditionsE Line | Count | Source | 547 | 326 | DeleteConditions* delete_conditions) { | 548 | 326 | for (const auto& sub_predicate : sub_pred_list) { | 549 | 81 | auto condition = parse_condition(sub_predicate); | 550 | 81 | int32_t col_unique_id = -1; | 551 | | if constexpr (std::is_same_v<SubPredType, DeleteSubPredicatePB>) { | 552 | | if (sub_predicate.has_column_unique_id()) [[likely]] { | 553 | | col_unique_id = sub_predicate.column_unique_id(); | 554 | | } | 555 | | } | 556 | 81 | if (col_unique_id < 0) { | 557 | 81 | const auto& column = | 558 | 81 | *DORIS_TRY(delete_pred_related_schema->column(condition.column_name)); | 559 | 81 | col_unique_id = column.unique_id(); | 560 | 81 | } | 561 | 81 | condition.col_unique_id = col_unique_id; | 562 | 81 | const auto& column = complete_schema->column_by_uid(col_unique_id); | 563 | 81 | uint32_t index = complete_schema->field_index(col_unique_id); | 564 | 81 | std::shared_ptr<ColumnPredicate> predicate; | 565 | 81 | RETURN_IF_ERROR(parse_to_predicate(index, column.name(), column.get_vec_type(), condition, | 566 | 81 | _predicate_arena, predicate)); | 567 | 74 | if (predicate != nullptr) { | 568 | 74 | delete_conditions->column_predicate_vec.push_back(predicate); | 569 | 74 | } | 570 | 74 | } | 571 | 319 | return Status::OK(); | 572 | 326 | } |
|
573 | | |
574 | | Status DeleteHandler::init(TabletSchemaSPtr tablet_schema, |
575 | 1.17M | const std::vector<RowsetMetaSharedPtr>& delete_preds, int64_t version) { |
576 | 1.17M | DCHECK(!_is_inited) << "reinitialize delete handler."; |
577 | 1.17M | DCHECK(version >= 0) << "invalid parameters. version=" << version; |
578 | | |
579 | 1.17M | for (const auto& delete_pred : delete_preds) { |
580 | | // Skip the delete condition with large version |
581 | 8.14k | if (delete_pred->version().first > version) { |
582 | 0 | continue; |
583 | 0 | } |
584 | | // Need the tablet schema at the delete condition to parse the accurate column |
585 | 8.14k | const auto& delete_pred_related_schema = delete_pred->tablet_schema(); |
586 | 8.14k | const auto& delete_condition = delete_pred->delete_predicate(); |
587 | 8.14k | DeleteConditions temp; |
588 | 8.14k | temp.filter_version = delete_pred->version().first; |
589 | 8.14k | if (!delete_condition.sub_predicates_v2().empty()) { |
590 | 7.83k | RETURN_IF_ERROR(_parse_column_pred(tablet_schema, delete_pred_related_schema, |
591 | 7.83k | delete_condition.sub_predicates_v2(), &temp)); |
592 | 7.83k | } else { |
593 | | // make it compatible with the former versions |
594 | 309 | RETURN_IF_ERROR(_parse_column_pred(tablet_schema, delete_pred_related_schema, |
595 | 309 | delete_condition.sub_predicates(), &temp)); |
596 | 309 | } |
597 | 8.13k | for (const auto& in_predicate : delete_condition.in_predicates()) { |
598 | 303 | ConditionParseResult condition; |
599 | 303 | condition.column_name = in_predicate.column_name(); |
600 | | |
601 | 303 | int32_t col_unique_id = -1; |
602 | 303 | if (in_predicate.has_column_unique_id()) { |
603 | 301 | col_unique_id = in_predicate.column_unique_id(); |
604 | 301 | } else { |
605 | | // if upgrade from version 2.0.x, column_unique_id maybe not set |
606 | 2 | const auto& pre_column = |
607 | 2 | *DORIS_TRY(delete_pred_related_schema->column(condition.column_name)); |
608 | 2 | col_unique_id = pre_column.unique_id(); |
609 | 2 | } |
610 | 303 | if (col_unique_id == -1) { |
611 | 0 | return Status::Error<ErrorCode::DELETE_INVALID_CONDITION>( |
612 | 0 | "cannot get column_unique_id for column {}", condition.column_name); |
613 | 0 | } |
614 | 303 | condition.col_unique_id = col_unique_id; |
615 | | |
616 | 303 | condition.condition_op = |
617 | 303 | in_predicate.is_not_in() ? PredicateType::NOT_IN_LIST : PredicateType::IN_LIST; |
618 | 957 | for (const auto& value : in_predicate.values()) { |
619 | 957 | condition.value_str.push_back(value); |
620 | 957 | } |
621 | 303 | const auto& column = tablet_schema->column_by_uid(col_unique_id); |
622 | 303 | uint32_t index = tablet_schema->field_index(col_unique_id); |
623 | 303 | std::shared_ptr<ColumnPredicate> predicate; |
624 | 303 | RETURN_IF_ERROR(parse_to_in_predicate(index, column.name(), column.get_vec_type(), |
625 | 303 | condition, _predicate_arena, predicate)); |
626 | 303 | temp.column_predicate_vec.push_back(predicate); |
627 | 303 | } |
628 | | |
629 | 8.13k | _del_conds.emplace_back(std::move(temp)); |
630 | 8.13k | } |
631 | | |
632 | 1.17M | _is_inited = true; |
633 | | |
634 | 1.17M | return Status::OK(); |
635 | 1.17M | } |
636 | | |
637 | 1.19M | DeleteHandler::~DeleteHandler() { |
638 | 1.19M | if (!_is_inited) { |
639 | 23.0k | return; |
640 | 23.0k | } |
641 | | |
642 | 1.17M | _del_conds.clear(); |
643 | 1.17M | _is_inited = false; |
644 | 1.17M | } |
645 | | |
646 | | void DeleteHandler::get_delete_conditions_after_version( |
647 | | int64_t version, AndBlockColumnPredicate* and_block_column_predicate_ptr, |
648 | | std::unordered_map<int32_t, std::vector<std::shared_ptr<const ColumnPredicate>>>* |
649 | 3.88M | del_predicates_for_zone_map) const { |
650 | 3.88M | for (const auto& del_cond : _del_conds) { |
651 | 85.3k | if (del_cond.filter_version > version) { |
652 | | // now, only query support delete column predicate operator |
653 | 47.4k | if (!del_cond.column_predicate_vec.empty()) { |
654 | 47.4k | if (del_cond.column_predicate_vec.size() == 1) { |
655 | 46.5k | auto single_column_block_predicate = SingleColumnBlockPredicate::create_unique( |
656 | 46.5k | del_cond.column_predicate_vec[0]); |
657 | 46.5k | and_block_column_predicate_ptr->add_column_predicate( |
658 | 46.5k | std::move(single_column_block_predicate)); |
659 | 46.5k | if (del_predicates_for_zone_map->count( |
660 | 46.5k | del_cond.column_predicate_vec[0]->column_id()) < 1) { |
661 | 20.6k | del_predicates_for_zone_map->insert( |
662 | 20.6k | {del_cond.column_predicate_vec[0]->column_id(), |
663 | 20.6k | std::vector<std::shared_ptr<const ColumnPredicate>> {}}); |
664 | 20.6k | } |
665 | 46.5k | (*del_predicates_for_zone_map)[del_cond.column_predicate_vec[0]->column_id()] |
666 | 46.5k | .push_back(del_cond.column_predicate_vec[0]); |
667 | 46.5k | } else { |
668 | 931 | auto or_column_predicate = OrBlockColumnPredicate::create_unique(); |
669 | | |
670 | | // build or_column_predicate |
671 | | // when delete from where a = 1 and b = 2, we can not use del_predicates_for_zone_map to filter zone page, |
672 | | // so here do not put predicate to del_predicates_for_zone_map, |
673 | | // refer #17145 for more details. |
674 | | // // TODO: need refactor design and code to use more version delete and more column delete to filter zone page. |
675 | 931 | std::for_each(del_cond.column_predicate_vec.cbegin(), |
676 | 931 | del_cond.column_predicate_vec.cend(), |
677 | 931 | [&or_column_predicate]( |
678 | 2.06k | const std::shared_ptr<const ColumnPredicate> predicate) { |
679 | 2.06k | or_column_predicate->add_column_predicate( |
680 | 2.06k | SingleColumnBlockPredicate::create_unique(predicate)); |
681 | 2.06k | }); |
682 | 931 | and_block_column_predicate_ptr->add_column_predicate( |
683 | 931 | std::move(or_column_predicate)); |
684 | 931 | } |
685 | 47.4k | } |
686 | 47.4k | } |
687 | 85.3k | } |
688 | 3.88M | } |
689 | | |
690 | | } // namespace doris |