/root/doris/be/src/vec/functions/like.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <hs/hs_common.h> |
21 | | #include <hs/hs_runtime.h> |
22 | | #include <re2/re2.h> |
23 | | #include <stddef.h> |
24 | | #include <stdint.h> |
25 | | |
26 | | #include <algorithm> |
27 | | #include <boost/iterator/iterator_facade.hpp> |
28 | | #include <functional> |
29 | | #include <memory> |
30 | | #include <string> |
31 | | |
32 | | #include "common/status.h" |
33 | | #include "runtime/define_primitive_type.h" |
34 | | #include "runtime/string_search.hpp" |
35 | | #include "udf/udf.h" |
36 | | #include "vec/aggregate_functions/aggregate_function.h" |
37 | | #include "vec/columns/column_string.h" |
38 | | #include "vec/columns/columns_number.h" |
39 | | #include "vec/columns/predicate_column.h" |
40 | | #include "vec/common/string_ref.h" |
41 | | #include "vec/core/column_numbers.h" |
42 | | #include "vec/core/types.h" |
43 | | #include "vec/data_types/data_type_number.h" |
44 | | #include "vec/functions/function.h" |
45 | | |
46 | | namespace doris { |
47 | | namespace vectorized { |
48 | | class Block; |
49 | | } // namespace vectorized |
50 | | } // namespace doris |
51 | | |
52 | | namespace doris::vectorized { |
53 | | |
54 | | // TODO: replace with std::string_view when `LikeSearchState.substring_pattern` can |
55 | | // construct from std::string_view. |
56 | | struct LikeSearchState { |
57 | | char escape_char; |
58 | | |
59 | | /// Holds the string the StringRef points to and is set any time StringRef is |
60 | | /// used. |
61 | | std::string search_string; |
62 | | |
63 | | std::string pattern_str; |
64 | | |
65 | | /// Used for LIKE predicates if the pattern is a constant argument, and is either a |
66 | | /// constant string or has a constant string at the beginning or end of the pattern. |
67 | | /// This will be set in order to check for that pattern in the corresponding part of |
68 | | /// the string. |
69 | | StringRef search_string_sv; |
70 | | |
71 | | /// Used for LIKE predicates if the pattern is a constant argument and has a constant |
72 | | /// string in the middle of it. This will be use in order to check for the substring |
73 | | /// in the value. |
74 | | doris::StringSearch substring_pattern; |
75 | | |
76 | | /// Used for RLIKE and REGEXP predicates if the pattern is a constant argument. |
77 | | std::unique_ptr<re2::RE2> regex; |
78 | | |
79 | | template <typename Deleter, Deleter deleter> |
80 | | struct HyperscanDeleter { |
81 | | template <typename T> |
82 | 10 | void operator()(T* ptr) const { |
83 | 10 | deleter(ptr); |
84 | 10 | } _ZNK5doris10vectorized15LikeSearchState16HyperscanDeleterIPFiP11hs_databaseEXadL_Z16hs_free_databaseEEEclIS3_EEvPT_ Line | Count | Source | 82 | 5 | void operator()(T* ptr) const { | 83 | 5 | deleter(ptr); | 84 | 5 | } |
_ZNK5doris10vectorized15LikeSearchState16HyperscanDeleterIPFiP10hs_scratchEXadL_Z15hs_free_scratchEEEclIS3_EEvPT_ Line | Count | Source | 82 | 5 | void operator()(T* ptr) const { | 83 | 5 | deleter(ptr); | 84 | 5 | } |
|
85 | | }; |
86 | | |
87 | | // hyperscan compiled pattern database and scratch space, reused for performance |
88 | | std::unique_ptr<hs_database_t, HyperscanDeleter<decltype(&hs_free_database), &hs_free_database>> |
89 | | hs_database; |
90 | | std::unique_ptr<hs_scratch_t, HyperscanDeleter<decltype(&hs_free_scratch), &hs_free_scratch>> |
91 | | hs_scratch; |
92 | | |
93 | | // hyperscan match callback |
94 | | static int hs_match_handler(unsigned int /* from */, // NOLINT |
95 | | unsigned long long /* from */, // NOLINT |
96 | | unsigned long long /* to */, // NOLINT |
97 | 3 | unsigned int /* flags */, void* ctx) { |
98 | | // set result to 1 for matched row |
99 | 3 | *((unsigned char*)ctx) = 1; |
100 | | /// return non-zero to indicate hyperscan stop after first matched |
101 | 3 | return 1; |
102 | 3 | } |
103 | | |
104 | 18 | LikeSearchState() : escape_char('\\') {} |
105 | | |
106 | | Status clone(LikeSearchState& cloned); |
107 | | |
108 | 12 | void set_search_string(const std::string& search_string_arg) { |
109 | 12 | search_string = search_string_arg; |
110 | 12 | search_string_sv = StringRef(search_string); |
111 | 12 | substring_pattern.set_pattern(&search_string_sv); |
112 | 12 | } |
113 | | }; |
114 | | |
115 | | using LikeFn = std::function<doris::Status(LikeSearchState*, const ColumnString&, const StringRef&, |
116 | | ColumnUInt8::Container&)>; |
117 | | |
118 | | using ScalarLikeFn = std::function<doris::Status(LikeSearchState*, const StringRef&, |
119 | | const StringRef&, unsigned char*)>; |
120 | | |
121 | | using VectorLikeFn = std::function<doris::Status(const ColumnString&, const ColumnString&, |
122 | | ColumnUInt8::Container&)>; |
123 | | |
124 | | struct LikeState { |
125 | | bool is_like_pattern; |
126 | | LikeSearchState search_state; |
127 | | LikeFn function; |
128 | | ScalarLikeFn scalar_function; |
129 | | }; |
130 | | |
131 | | struct VectorPatternSearchState { |
132 | | MutableColumnPtr _search_strings; |
133 | | std::string _search_string; |
134 | | VectorLikeFn _vector_function; |
135 | | bool _pattern_matched; |
136 | | |
137 | | VectorPatternSearchState(VectorLikeFn vector_function) |
138 | | : _search_strings(ColumnString::create()), |
139 | | _vector_function(vector_function), |
140 | 60 | _pattern_matched(true) {} |
141 | | |
142 | 60 | virtual ~VectorPatternSearchState() = default; |
143 | | |
144 | | virtual void like_pattern_match(const std::string& pattern_str) = 0; |
145 | | |
146 | | virtual void regexp_pattern_match(const std::string& pattern_str) = 0; |
147 | | }; |
148 | | |
149 | | using VPatternSearchStateSPtr = std::shared_ptr<VectorPatternSearchState>; |
150 | | |
151 | | class FunctionLikeBase : public IFunction { |
152 | | public: |
153 | 18 | size_t get_number_of_arguments() const override { return 2; } |
154 | | |
155 | 18 | DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const override { |
156 | 18 | return std::make_shared<DataTypeUInt8>(); |
157 | 18 | } |
158 | | |
159 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
160 | | uint32_t result, size_t /*input_rows_count*/) const override; |
161 | | |
162 | | Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) override; |
163 | | |
164 | | friend struct VectorAllpassSearchState; |
165 | | friend struct VectorEqualSearchState; |
166 | | friend struct VectorSubStringSearchState; |
167 | | friend struct VectorStartsWithSearchState; |
168 | | friend struct VectorEndsWithSearchState; |
169 | | |
170 | | protected: |
171 | | Status vector_const(const ColumnString& values, const StringRef* pattern_val, |
172 | | ColumnUInt8::Container& result, const LikeFn& function, |
173 | | LikeSearchState* search_state) const; |
174 | | |
175 | | Status vector_non_const(const ColumnString& values, const ColumnString& patterns, |
176 | | ColumnUInt8::Container& result, LikeState* state, |
177 | | size_t input_rows_count) const; |
178 | | |
179 | | Status execute_substring(const ColumnString::Chars& values, |
180 | | const ColumnString::Offsets& value_offsets, |
181 | | ColumnUInt8::Container& result, LikeSearchState* search_state) const; |
182 | | |
183 | | template <bool LIKE_PATTERN> |
184 | | static VPatternSearchStateSPtr pattern_type_recognition(const ColumnString& patterns); |
185 | | |
186 | | static Status constant_allpass_fn(LikeSearchState* state, const ColumnString& val, |
187 | | const StringRef& pattern, ColumnUInt8::Container& result); |
188 | | |
189 | | static Status constant_allpass_fn_scalar(LikeSearchState* state, const StringRef& val, |
190 | | const StringRef& pattern, unsigned char* result); |
191 | | |
192 | | static Status vector_allpass_fn(const ColumnString& vals, const ColumnString& search_strings, |
193 | | ColumnUInt8::Container& result); |
194 | | |
195 | | static Status constant_starts_with_fn(LikeSearchState* state, const ColumnString& val, |
196 | | const StringRef& pattern, ColumnUInt8::Container& result); |
197 | | |
198 | | static Status constant_starts_with_fn_scalar(LikeSearchState* state, const StringRef& val, |
199 | | const StringRef& pattern, unsigned char* result); |
200 | | |
201 | | static Status vector_starts_with_fn(const ColumnString& vals, |
202 | | const ColumnString& search_strings, |
203 | | ColumnUInt8::Container& result); |
204 | | |
205 | | static Status constant_ends_with_fn(LikeSearchState* state, const ColumnString& val, |
206 | | const StringRef& pattern, ColumnUInt8::Container& result); |
207 | | |
208 | | static Status constant_ends_with_fn_scalar(LikeSearchState* state, const StringRef& val, |
209 | | const StringRef& pattern, unsigned char* result); |
210 | | |
211 | | static Status vector_ends_with_fn(const ColumnString& vals, const ColumnString& search_strings, |
212 | | ColumnUInt8::Container& result); |
213 | | |
214 | | static Status constant_equals_fn(LikeSearchState* state, const ColumnString& val, |
215 | | const StringRef& pattern, ColumnUInt8::Container& result); |
216 | | |
217 | | static Status constant_equals_fn_scalar(LikeSearchState* state, const StringRef& val, |
218 | | const StringRef& pattern, unsigned char* result); |
219 | | |
220 | | static Status vector_equals_fn(const ColumnString& vals, const ColumnString& search_strings, |
221 | | ColumnUInt8::Container& result); |
222 | | |
223 | | static Status constant_substring_fn(LikeSearchState* state, const ColumnString& val, |
224 | | const StringRef& pattern, ColumnUInt8::Container& result); |
225 | | |
226 | | static Status constant_substring_fn_scalar(LikeSearchState* state, const StringRef& val, |
227 | | const StringRef& pattern, unsigned char* result); |
228 | | |
229 | | static Status vector_substring_fn(const ColumnString& vals, const ColumnString& search_strings, |
230 | | ColumnUInt8::Container& result); |
231 | | |
232 | | static Status constant_regex_fn(LikeSearchState* state, const ColumnString& val, |
233 | | const StringRef& pattern, ColumnUInt8::Container& result); |
234 | | |
235 | | static Status constant_regex_fn_scalar(LikeSearchState* state, const StringRef& val, |
236 | | const StringRef& pattern, unsigned char* result); |
237 | | |
238 | | static Status regexp_fn(LikeSearchState* state, const ColumnString& val, |
239 | | const StringRef& pattern, ColumnUInt8::Container& result); |
240 | | |
241 | | static Status regexp_fn_scalar(LikeSearchState* state, const StringRef& val, |
242 | | const StringRef& pattern, unsigned char* result); |
243 | | |
244 | | // hyperscan compile expression to database and allocate scratch space |
245 | | static Status hs_prepare(FunctionContext* context, const char* expression, |
246 | | hs_database_t** database, hs_scratch_t** scratch); |
247 | | }; |
248 | | |
249 | | class FunctionLike : public FunctionLikeBase { |
250 | | public: |
251 | | static constexpr auto name = "like"; |
252 | | |
253 | 2 | static FunctionPtr create() { return std::make_shared<FunctionLike>(); } |
254 | | |
255 | 0 | String get_name() const override { return name; } |
256 | | |
257 | | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override; |
258 | | |
259 | | static Status construct_like_const_state(FunctionContext* ctx, const StringRef& pattern, |
260 | | std::shared_ptr<LikeState>& state, |
261 | | bool try_hyperscan = true); |
262 | | |
263 | | friend struct LikeSearchState; |
264 | | friend struct VectorAllpassSearchState; |
265 | | friend struct VectorEqualSearchState; |
266 | | friend struct VectorSubStringSearchState; |
267 | | friend struct VectorStartsWithSearchState; |
268 | | friend struct VectorEndsWithSearchState; |
269 | | |
270 | | private: |
271 | | static Status like_fn(LikeSearchState* state, const ColumnString& val, const StringRef& pattern, |
272 | | ColumnUInt8::Container& result); |
273 | | |
274 | | static Status like_fn_scalar(LikeSearchState* state, const StringRef& val, |
275 | | const StringRef& pattern, unsigned char* result); |
276 | | |
277 | | static void convert_like_pattern(LikeSearchState* state, const std::string& pattern, |
278 | | std::string* re_pattern); |
279 | | |
280 | | static void remove_escape_character(std::string* search_string); |
281 | | }; |
282 | | |
283 | | class FunctionRegexp : public FunctionLikeBase { |
284 | | public: |
285 | | static constexpr auto name = "regexp"; |
286 | | static constexpr auto alias = "rlike"; |
287 | | |
288 | 18 | static FunctionPtr create() { return std::make_shared<FunctionRegexp>(); } |
289 | | |
290 | 0 | String get_name() const override { return name; } |
291 | | |
292 | | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override; |
293 | | }; |
294 | | |
295 | | } // namespace doris::vectorized |