be/src/exprs/function/function_regexp.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <glog/logging.h> |
19 | | #include <re2/re2.h> |
20 | | #include <re2/stringpiece.h> |
21 | | #include <stddef.h> |
22 | | |
23 | | #include <boost/regex.hpp> |
24 | | #include <memory> |
25 | | #include <string> |
26 | | #include <string_view> |
27 | | #include <type_traits> |
28 | | #include <utility> |
29 | | #include <vector> |
30 | | |
31 | | #include "common/status.h" |
32 | | #include "core/block/block.h" |
33 | | #include "core/block/column_numbers.h" |
34 | | #include "core/block/column_with_type_and_name.h" |
35 | | #include "core/column/column.h" |
36 | | #include "core/column/column_const.h" |
37 | | #include "core/column/column_execute_util.h" |
38 | | #include "core/column/column_nullable.h" |
39 | | #include "core/column/column_string.h" |
40 | | #include "core/column/column_vector.h" |
41 | | #include "core/data_type/data_type.h" |
42 | | #include "core/data_type/data_type_nullable.h" |
43 | | #include "core/data_type/data_type_number.h" |
44 | | #include "core/data_type/data_type_string.h" |
45 | | #include "core/string_ref.h" |
46 | | #include "core/types.h" |
47 | | #include "exec/common/stringop_substring.h" |
48 | | #include "exprs/aggregate/aggregate_function.h" |
49 | | #include "exprs/function/function.h" |
50 | | #include "exprs/function/simple_function_factory.h" |
51 | | #include "exprs/function_context.h" |
52 | | #include "exprs/string_functions.h" |
53 | | |
54 | | namespace doris { |
55 | | |
56 | | // Helper structure to hold either RE2 or Boost.Regex |
57 | | struct RegexpExtractEngine { |
58 | | std::unique_ptr<re2::RE2> re2_regex; |
59 | | std::unique_ptr<boost::regex> boost_regex; |
60 | | |
61 | 0 | bool is_boost() const { return boost_regex != nullptr; } |
62 | 46 | bool is_re2() const { return re2_regex != nullptr; } |
63 | | |
64 | | // Try to compile with RE2 first, fallback to Boost.Regex if RE2 fails |
65 | | static bool compile(const StringRef& pattern, std::string* error_str, |
66 | 26 | RegexpExtractEngine& engine, bool enable_extended_regex) { |
67 | 26 | re2::RE2::Options options; |
68 | 26 | options.set_log_errors(false); // avoid RE2 printing to stderr; we handle errors ourselves |
69 | 26 | options.set_dot_nl(true); // make '.' match '\n' by default, consistent with REGEXP/LIKE |
70 | 26 | engine.re2_regex = |
71 | 26 | std::make_unique<re2::RE2>(re2::StringPiece(pattern.data, pattern.size), options); |
72 | | |
73 | 26 | if (engine.re2_regex->ok()) { |
74 | 26 | return true; |
75 | 26 | } else if (!enable_extended_regex) { |
76 | 0 | *error_str = fmt::format( |
77 | 0 | "Invalid regex pattern: {}. Error: {}. If you need advanced regex features, " |
78 | 0 | "try setting enable_extended_regex=true", |
79 | 0 | std::string(pattern.data, pattern.size), engine.re2_regex->error()); |
80 | 0 | return false; |
81 | 0 | } |
82 | | |
83 | | // RE2 failed, try Boost.Regex for advanced features like zero-width assertions |
84 | 0 | engine.re2_regex.reset(); |
85 | 0 | try { |
86 | 0 | boost::regex::flag_type flags = boost::regex::normal; |
87 | 0 | engine.boost_regex = std::make_unique<boost::regex>(pattern.data, |
88 | 0 | pattern.data + pattern.size, flags); |
89 | 0 | return true; |
90 | 0 | } catch (const boost::regex_error& e) { |
91 | 0 | if (error_str) { |
92 | 0 | *error_str = fmt::format("Invalid regex pattern: {}. Error: {}", |
93 | 0 | std::string(pattern.data, pattern.size), e.what()); |
94 | 0 | } |
95 | 0 | return false; |
96 | 0 | } |
97 | 0 | } |
98 | | |
99 | | // Get number of capturing groups |
100 | 23 | int number_of_capturing_groups() const { |
101 | 23 | if (is_re2()) { |
102 | 23 | return re2_regex->NumberOfCapturingGroups(); |
103 | 23 | } else if (is_boost()) { |
104 | 0 | return static_cast<int>(boost_regex->mark_count()); |
105 | 0 | } |
106 | 0 | return 0; |
107 | 23 | } |
108 | | |
109 | | // Match function for extraction |
110 | 16 | bool match_and_extract(const char* data, size_t size, int index, std::string& result) const { |
111 | 16 | if (is_re2()) { |
112 | 16 | int max_matches = 1 + re2_regex->NumberOfCapturingGroups(); |
113 | 16 | if (index >= max_matches) { |
114 | 0 | return false; |
115 | 0 | } |
116 | 16 | std::vector<re2::StringPiece> matches(max_matches); |
117 | 16 | bool success = re2_regex->Match(re2::StringPiece(data, size), 0, size, |
118 | 16 | re2::RE2::UNANCHORED, matches.data(), max_matches); |
119 | 16 | if (success && index < matches.size()) { |
120 | 16 | const re2::StringPiece& match = matches[index]; |
121 | 16 | result.assign(match.data(), match.size()); |
122 | 16 | return true; |
123 | 16 | } |
124 | 0 | return false; |
125 | 16 | } else if (is_boost()) { |
126 | 0 | boost::cmatch matches; |
127 | 0 | bool success = boost::regex_search(data, data + size, matches, *boost_regex); |
128 | 0 | if (success && index < matches.size()) { |
129 | 0 | result = matches[index].str(); |
130 | 0 | return true; |
131 | 0 | } |
132 | 0 | return false; |
133 | 0 | } |
134 | 0 | return false; |
135 | 16 | } |
136 | | |
137 | | // Match all occurrences and extract the first capturing group |
138 | | void match_all_and_extract(const char* data, size_t size, |
139 | 7 | std::vector<std::string>& results) const { |
140 | 7 | if (is_re2()) { |
141 | 7 | int max_matches = 1 + re2_regex->NumberOfCapturingGroups(); |
142 | 7 | if (max_matches < 2) { |
143 | 0 | return; // No capturing groups |
144 | 0 | } |
145 | | |
146 | 7 | size_t pos = 0; |
147 | 19 | while (pos < size) { |
148 | 18 | const char* str_pos = data + pos; |
149 | 18 | size_t str_size = size - pos; |
150 | 18 | std::vector<re2::StringPiece> matches(max_matches); |
151 | 18 | bool success = re2_regex->Match(re2::StringPiece(str_pos, str_size), 0, str_size, |
152 | 18 | re2::RE2::UNANCHORED, matches.data(), max_matches); |
153 | 18 | if (!success) { |
154 | 6 | break; |
155 | 6 | } |
156 | 12 | if (matches[0].empty()) { |
157 | 0 | pos += 1; |
158 | 0 | continue; |
159 | 0 | } |
160 | | // Extract first capturing group |
161 | 12 | if (matches.size() > 1 && !matches[1].empty()) { |
162 | 12 | results.emplace_back(matches[1].data(), matches[1].size()); |
163 | 12 | } |
164 | | // Move position forward |
165 | 12 | auto offset = std::string(str_pos, str_size) |
166 | 12 | .find(std::string(matches[0].data(), matches[0].size())); |
167 | 12 | pos += offset + matches[0].size(); |
168 | 12 | } |
169 | 7 | } else if (is_boost()) { |
170 | 0 | const char* search_start = data; |
171 | 0 | const char* search_end = data + size; |
172 | 0 | boost::match_results<const char*> matches; |
173 | |
|
174 | 0 | while (boost::regex_search(search_start, search_end, matches, *boost_regex)) { |
175 | 0 | if (matches.size() > 1 && matches[1].matched) { |
176 | 0 | results.emplace_back(matches[1].str()); |
177 | 0 | } |
178 | 0 | if (matches[0].length() == 0) { |
179 | 0 | if (search_start == search_end) { |
180 | 0 | break; |
181 | 0 | } |
182 | 0 | search_start += 1; |
183 | 0 | } else { |
184 | 0 | search_start = matches[0].second; |
185 | 0 | } |
186 | 0 | } |
187 | 0 | } |
188 | 7 | } |
189 | | }; |
190 | | |
191 | | struct RegexpCountImpl { |
192 | | using StringColumnView = ColumnView<TYPE_STRING>; |
193 | | |
194 | | static void execute_impl(FunctionContext* context, ColumnPtr argument_columns[], |
195 | 13 | size_t input_rows_count, ColumnInt32::Container& result_data) { |
196 | 13 | auto str_col = StringColumnView::create(argument_columns[0]); |
197 | 13 | auto pattern_col = StringColumnView::create(argument_columns[1]); |
198 | 31 | for (size_t i = 0; i < input_rows_count; ++i) { |
199 | 18 | DCHECK(!str_col.is_null_at(i)); |
200 | 18 | DCHECK(!pattern_col.is_null_at(i)); |
201 | 18 | result_data[i] = _execute_inner_loop(context, str_col, pattern_col, i); |
202 | 18 | } |
203 | 13 | } |
204 | | static int _execute_inner_loop(FunctionContext* context, const StringColumnView& str_col, |
205 | 18 | const StringColumnView& pattern_col, const size_t index_now) { |
206 | 18 | re2::RE2* re = reinterpret_cast<re2::RE2*>( |
207 | 18 | context->get_function_state(FunctionContext::THREAD_LOCAL)); |
208 | 18 | std::unique_ptr<re2::RE2> scoped_re; |
209 | 18 | if (re == nullptr) { |
210 | 12 | std::string error_str; |
211 | 12 | const auto pattern = pattern_col.value_at(index_now); |
212 | 12 | bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(), StringRef(), |
213 | 12 | scoped_re); |
214 | 12 | if (!st) { |
215 | 0 | context->add_warning(error_str.c_str()); |
216 | 0 | throw Exception(Status::InvalidArgument(error_str)); |
217 | 0 | return 0; |
218 | 0 | } |
219 | 12 | re = scoped_re.get(); |
220 | 12 | } |
221 | | |
222 | 18 | const auto str = str_col.value_at(index_now); |
223 | 18 | int count = 0; |
224 | 18 | size_t pos = 0; |
225 | 70 | while (pos < str.size) { |
226 | 64 | auto str_pos = str.data + pos; |
227 | 64 | auto str_size = str.size - pos; |
228 | 64 | re2::StringPiece str_sp_current = re2::StringPiece(str_pos, str_size); |
229 | 64 | re2::StringPiece match; |
230 | | |
231 | 64 | bool success = re->Match(str_sp_current, 0, str_size, re2::RE2::UNANCHORED, &match, 1); |
232 | 64 | if (!success) { |
233 | 12 | break; |
234 | 12 | } |
235 | 52 | if (match.empty()) { |
236 | 20 | pos += 1; |
237 | 20 | continue; |
238 | 20 | } |
239 | 32 | count++; |
240 | 32 | size_t match_start = match.data() - str_sp_current.data(); |
241 | 32 | pos += match_start + match.size(); |
242 | 32 | } |
243 | | |
244 | 18 | return count; |
245 | 18 | } |
246 | | }; |
247 | | |
248 | | class FunctionRegexpCount : public IFunction { |
249 | | public: |
250 | | static constexpr auto name = "regexp_count"; |
251 | | |
252 | 21 | static FunctionPtr create() { return std::make_shared<FunctionRegexpCount>(); } |
253 | | |
254 | 1 | String get_name() const override { return name; } |
255 | | |
256 | 19 | size_t get_number_of_arguments() const override { return 2; } |
257 | | |
258 | 19 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
259 | 19 | return std::make_shared<DataTypeInt32>(); |
260 | 19 | } |
261 | | |
262 | 38 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
263 | 38 | if (scope == FunctionContext::THREAD_LOCAL) { |
264 | 19 | if (context->is_col_constant(1)) { |
265 | 12 | DCHECK(!context->get_function_state(scope)); |
266 | 12 | const auto pattern_col = context->get_constant_col(1)->column_ptr; |
267 | 12 | const auto& pattern = pattern_col->get_data_at(0); |
268 | 12 | if (pattern.size == 0) { |
269 | 4 | return Status::OK(); |
270 | 4 | } |
271 | | |
272 | 8 | std::string error_str; |
273 | 8 | std::unique_ptr<re2::RE2> scoped_re; |
274 | 8 | bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(), |
275 | 8 | StringRef(), scoped_re); |
276 | 8 | if (!st) { |
277 | 0 | context->set_error(error_str.c_str()); |
278 | 0 | return Status::InvalidArgument(error_str); |
279 | 0 | } |
280 | 8 | std::shared_ptr<re2::RE2> re(scoped_re.release()); |
281 | 8 | context->set_function_state(scope, re); |
282 | 8 | } |
283 | 19 | } |
284 | 34 | return Status::OK(); |
285 | 38 | } |
286 | | |
287 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
288 | 13 | uint32_t result, size_t input_rows_count) const override { |
289 | 13 | auto result_data_column = ColumnInt32::create(input_rows_count); |
290 | 13 | auto& result_data = result_data_column->get_data(); |
291 | | |
292 | 13 | ColumnPtr argument_columns[2]; |
293 | | |
294 | 13 | argument_columns[0] = block.get_by_position(arguments[0]).column; |
295 | 13 | argument_columns[1] = block.get_by_position(arguments[1]).column; |
296 | 13 | RegexpCountImpl::execute_impl(context, argument_columns, input_rows_count, result_data); |
297 | | |
298 | 13 | block.get_by_position(result).column = std::move(result_data_column); |
299 | 13 | return Status::OK(); |
300 | 13 | } |
301 | | }; |
302 | | |
303 | | struct ThreeParamTypes { |
304 | 2 | static DataTypes get_variadic_argument_types() { |
305 | 2 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), |
306 | 2 | std::make_shared<DataTypeString>()}; |
307 | 2 | } |
308 | | }; |
309 | | |
310 | | struct FourParamTypes { |
311 | 2 | static DataTypes get_variadic_argument_types() { |
312 | 2 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), |
313 | 2 | std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; |
314 | 2 | } |
315 | | }; |
316 | | |
317 | | // template FunctionRegexpFunctionality is used for regexp_replace/regexp_replace_one |
318 | | template <typename Impl, typename ParamTypes> |
319 | | class FunctionRegexpReplace : public IFunction { |
320 | | public: |
321 | | static constexpr auto name = Impl::name; |
322 | | |
323 | 24 | static FunctionPtr create() { return std::make_shared<FunctionRegexpReplace>(); }_ZN5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE6createEv Line | Count | Source | 323 | 10 | static FunctionPtr create() { return std::make_shared<FunctionRegexpReplace>(); } |
_ZN5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE6createEv Line | Count | Source | 323 | 2 | static FunctionPtr create() { return std::make_shared<FunctionRegexpReplace>(); } |
_ZN5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE6createEv Line | Count | Source | 323 | 10 | static FunctionPtr create() { return std::make_shared<FunctionRegexpReplace>(); } |
_ZN5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE6createEv Line | Count | Source | 323 | 2 | static FunctionPtr create() { return std::make_shared<FunctionRegexpReplace>(); } |
|
324 | | |
325 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE8get_nameB5cxx11Ev |
326 | | |
327 | 0 | size_t get_number_of_arguments() const override { |
328 | 0 | return get_variadic_argument_types_impl().size(); |
329 | 0 | } Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE23get_number_of_argumentsEv |
330 | | |
331 | 20 | bool is_variadic() const override { return true; }_ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE11is_variadicEv Line | Count | Source | 331 | 9 | bool is_variadic() const override { return true; } |
_ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE11is_variadicEv Line | Count | Source | 331 | 1 | bool is_variadic() const override { return true; } |
_ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE11is_variadicEv Line | Count | Source | 331 | 9 | bool is_variadic() const override { return true; } |
_ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE11is_variadicEv Line | Count | Source | 331 | 1 | bool is_variadic() const override { return true; } |
|
332 | | |
333 | 16 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
334 | 16 | return make_nullable(std::make_shared<DataTypeString>()); |
335 | 16 | } _ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE Line | Count | Source | 333 | 8 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 334 | 8 | return make_nullable(std::make_shared<DataTypeString>()); | 335 | 8 | } |
Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE _ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE Line | Count | Source | 333 | 8 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 334 | 8 | return make_nullable(std::make_shared<DataTypeString>()); | 335 | 8 | } |
Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE |
336 | | |
337 | 4 | DataTypes get_variadic_argument_types_impl() const override { |
338 | 4 | return ParamTypes::get_variadic_argument_types(); |
339 | 4 | } _ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE32get_variadic_argument_types_implEv Line | Count | Source | 337 | 1 | DataTypes get_variadic_argument_types_impl() const override { | 338 | 1 | return ParamTypes::get_variadic_argument_types(); | 339 | 1 | } |
_ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE32get_variadic_argument_types_implEv Line | Count | Source | 337 | 1 | DataTypes get_variadic_argument_types_impl() const override { | 338 | 1 | return ParamTypes::get_variadic_argument_types(); | 339 | 1 | } |
_ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE32get_variadic_argument_types_implEv Line | Count | Source | 337 | 1 | DataTypes get_variadic_argument_types_impl() const override { | 338 | 1 | return ParamTypes::get_variadic_argument_types(); | 339 | 1 | } |
_ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE32get_variadic_argument_types_implEv Line | Count | Source | 337 | 1 | DataTypes get_variadic_argument_types_impl() const override { | 338 | 1 | return ParamTypes::get_variadic_argument_types(); | 339 | 1 | } |
|
340 | | |
341 | 32 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
342 | 32 | if (scope == FunctionContext::THREAD_LOCAL) { |
343 | 16 | if (context->is_col_constant(1)) { |
344 | 16 | DCHECK(!context->get_function_state(scope)); |
345 | 16 | const auto pattern_col = context->get_constant_col(1)->column_ptr; |
346 | 16 | const auto& pattern = pattern_col->get_data_at(0); |
347 | 16 | if (pattern.size == 0) { |
348 | 4 | return Status::OK(); |
349 | 4 | } |
350 | | |
351 | 12 | std::string error_str; |
352 | 12 | std::unique_ptr<re2::RE2> scoped_re; |
353 | 12 | StringRef options_value; |
354 | 12 | if constexpr (std::is_same_v<FourParamTypes, ParamTypes>) { |
355 | 0 | DCHECK_EQ(context->get_num_args(), 4); |
356 | 0 | DCHECK(context->is_col_constant(3)); |
357 | 0 | const auto options_col = context->get_constant_col(3)->column_ptr; |
358 | 0 | options_value = options_col->get_data_at(0); |
359 | 0 | } |
360 | | |
361 | 12 | bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(), |
362 | 12 | options_value, scoped_re); |
363 | 12 | if (!st) { |
364 | 0 | context->set_error(error_str.c_str()); |
365 | 0 | return Status::InvalidArgument(error_str); |
366 | 0 | } |
367 | 12 | std::shared_ptr<re2::RE2> re(scoped_re.release()); |
368 | 12 | context->set_function_state(scope, re); |
369 | 12 | } |
370 | 16 | } |
371 | 28 | return Status::OK(); |
372 | 32 | } _ZN5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE Line | Count | Source | 341 | 16 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { | 342 | 16 | if (scope == FunctionContext::THREAD_LOCAL) { | 343 | 8 | if (context->is_col_constant(1)) { | 344 | 8 | DCHECK(!context->get_function_state(scope)); | 345 | 8 | const auto pattern_col = context->get_constant_col(1)->column_ptr; | 346 | 8 | const auto& pattern = pattern_col->get_data_at(0); | 347 | 8 | if (pattern.size == 0) { | 348 | 2 | return Status::OK(); | 349 | 2 | } | 350 | | | 351 | 6 | std::string error_str; | 352 | 6 | std::unique_ptr<re2::RE2> scoped_re; | 353 | 6 | StringRef options_value; | 354 | | if constexpr (std::is_same_v<FourParamTypes, ParamTypes>) { | 355 | | DCHECK_EQ(context->get_num_args(), 4); | 356 | | DCHECK(context->is_col_constant(3)); | 357 | | const auto options_col = context->get_constant_col(3)->column_ptr; | 358 | | options_value = options_col->get_data_at(0); | 359 | | } | 360 | | | 361 | 6 | bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(), | 362 | 6 | options_value, scoped_re); | 363 | 6 | if (!st) { | 364 | 0 | context->set_error(error_str.c_str()); | 365 | 0 | return Status::InvalidArgument(error_str); | 366 | 0 | } | 367 | 6 | std::shared_ptr<re2::RE2> re(scoped_re.release()); | 368 | 6 | context->set_function_state(scope, re); | 369 | 6 | } | 370 | 8 | } | 371 | 14 | return Status::OK(); | 372 | 16 | } |
Unexecuted instantiation: _ZN5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE _ZN5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE Line | Count | Source | 341 | 16 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { | 342 | 16 | if (scope == FunctionContext::THREAD_LOCAL) { | 343 | 8 | if (context->is_col_constant(1)) { | 344 | 8 | DCHECK(!context->get_function_state(scope)); | 345 | 8 | const auto pattern_col = context->get_constant_col(1)->column_ptr; | 346 | 8 | const auto& pattern = pattern_col->get_data_at(0); | 347 | 8 | if (pattern.size == 0) { | 348 | 2 | return Status::OK(); | 349 | 2 | } | 350 | | | 351 | 6 | std::string error_str; | 352 | 6 | std::unique_ptr<re2::RE2> scoped_re; | 353 | 6 | StringRef options_value; | 354 | | if constexpr (std::is_same_v<FourParamTypes, ParamTypes>) { | 355 | | DCHECK_EQ(context->get_num_args(), 4); | 356 | | DCHECK(context->is_col_constant(3)); | 357 | | const auto options_col = context->get_constant_col(3)->column_ptr; | 358 | | options_value = options_col->get_data_at(0); | 359 | | } | 360 | | | 361 | 6 | bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(), | 362 | 6 | options_value, scoped_re); | 363 | 6 | if (!st) { | 364 | 0 | context->set_error(error_str.c_str()); | 365 | 0 | return Status::InvalidArgument(error_str); | 366 | 0 | } | 367 | 6 | std::shared_ptr<re2::RE2> re(scoped_re.release()); | 368 | 6 | context->set_function_state(scope, re); | 369 | 6 | } | 370 | 8 | } | 371 | 14 | return Status::OK(); | 372 | 16 | } |
Unexecuted instantiation: _ZN5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE |
373 | | |
374 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
375 | 12 | uint32_t result, size_t input_rows_count) const override { |
376 | 12 | size_t argument_size = arguments.size(); |
377 | | |
378 | 12 | auto result_null_map = ColumnUInt8::create(input_rows_count, 0); |
379 | 12 | auto result_data_column = ColumnString::create(); |
380 | 12 | auto& result_data = result_data_column->get_chars(); |
381 | 12 | auto& result_offset = result_data_column->get_offsets(); |
382 | 12 | result_offset.resize(input_rows_count); |
383 | | |
384 | 12 | bool col_const[3]; |
385 | 12 | ColumnPtr argument_columns[3]; |
386 | 48 | for (int i = 0; i < 3; ++i) { |
387 | 36 | col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column); |
388 | 36 | } |
389 | 12 | argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>( |
390 | 0 | *block.get_by_position(arguments[0]).column) |
391 | 0 | .convert_to_full_column() |
392 | 12 | : block.get_by_position(arguments[0]).column; |
393 | | |
394 | 12 | default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments); |
395 | | |
396 | 12 | StringRef options_value; |
397 | 12 | if (col_const[1] && col_const[2]) { |
398 | 0 | Impl::execute_impl_const_args(context, argument_columns, options_value, |
399 | 0 | input_rows_count, result_data, result_offset, |
400 | 0 | result_null_map->get_data()); |
401 | 12 | } else { |
402 | | // the options have check in FE, so is always const, and get idx of 0 |
403 | 12 | if (argument_size == 4) { |
404 | 0 | options_value = block.get_by_position(arguments[3]).column->get_data_at(0); |
405 | 0 | } |
406 | 12 | Impl::execute_impl(context, argument_columns, options_value, input_rows_count, |
407 | 12 | result_data, result_offset, result_null_map->get_data()); |
408 | 12 | } |
409 | | |
410 | 12 | block.get_by_position(result).column = |
411 | 12 | ColumnNullable::create(std::move(result_data_column), std::move(result_null_map)); |
412 | 12 | return Status::OK(); |
413 | 12 | } _ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 375 | 6 | uint32_t result, size_t input_rows_count) const override { | 376 | 6 | size_t argument_size = arguments.size(); | 377 | | | 378 | 6 | auto result_null_map = ColumnUInt8::create(input_rows_count, 0); | 379 | 6 | auto result_data_column = ColumnString::create(); | 380 | 6 | auto& result_data = result_data_column->get_chars(); | 381 | 6 | auto& result_offset = result_data_column->get_offsets(); | 382 | 6 | result_offset.resize(input_rows_count); | 383 | | | 384 | 6 | bool col_const[3]; | 385 | 6 | ColumnPtr argument_columns[3]; | 386 | 24 | for (int i = 0; i < 3; ++i) { | 387 | 18 | col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column); | 388 | 18 | } | 389 | 6 | argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>( | 390 | 0 | *block.get_by_position(arguments[0]).column) | 391 | 0 | .convert_to_full_column() | 392 | 6 | : block.get_by_position(arguments[0]).column; | 393 | | | 394 | 6 | default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments); | 395 | | | 396 | 6 | StringRef options_value; | 397 | 6 | if (col_const[1] && col_const[2]) { | 398 | 0 | Impl::execute_impl_const_args(context, argument_columns, options_value, | 399 | 0 | input_rows_count, result_data, result_offset, | 400 | 0 | result_null_map->get_data()); | 401 | 6 | } else { | 402 | | // the options have check in FE, so is always const, and get idx of 0 | 403 | 6 | if (argument_size == 4) { | 404 | 0 | options_value = block.get_by_position(arguments[3]).column->get_data_at(0); | 405 | 0 | } | 406 | 6 | Impl::execute_impl(context, argument_columns, options_value, input_rows_count, | 407 | 6 | result_data, result_offset, result_null_map->get_data()); | 408 | 6 | } | 409 | | | 410 | 6 | block.get_by_position(result).column = | 411 | 6 | ColumnNullable::create(std::move(result_data_column), std::move(result_null_map)); | 412 | 6 | return Status::OK(); | 413 | 6 | } |
Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm _ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 375 | 6 | uint32_t result, size_t input_rows_count) const override { | 376 | 6 | size_t argument_size = arguments.size(); | 377 | | | 378 | 6 | auto result_null_map = ColumnUInt8::create(input_rows_count, 0); | 379 | 6 | auto result_data_column = ColumnString::create(); | 380 | 6 | auto& result_data = result_data_column->get_chars(); | 381 | 6 | auto& result_offset = result_data_column->get_offsets(); | 382 | 6 | result_offset.resize(input_rows_count); | 383 | | | 384 | 6 | bool col_const[3]; | 385 | 6 | ColumnPtr argument_columns[3]; | 386 | 24 | for (int i = 0; i < 3; ++i) { | 387 | 18 | col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column); | 388 | 18 | } | 389 | 6 | argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>( | 390 | 0 | *block.get_by_position(arguments[0]).column) | 391 | 0 | .convert_to_full_column() | 392 | 6 | : block.get_by_position(arguments[0]).column; | 393 | | | 394 | 6 | default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments); | 395 | | | 396 | 6 | StringRef options_value; | 397 | 6 | if (col_const[1] && col_const[2]) { | 398 | 0 | Impl::execute_impl_const_args(context, argument_columns, options_value, | 399 | 0 | input_rows_count, result_data, result_offset, | 400 | 0 | result_null_map->get_data()); | 401 | 6 | } else { | 402 | | // the options have check in FE, so is always const, and get idx of 0 | 403 | 6 | if (argument_size == 4) { | 404 | 0 | options_value = block.get_by_position(arguments[3]).column->get_data_at(0); | 405 | 0 | } | 406 | 6 | Impl::execute_impl(context, argument_columns, options_value, input_rows_count, | 407 | 6 | result_data, result_offset, result_null_map->get_data()); | 408 | 6 | } | 409 | | | 410 | 6 | block.get_by_position(result).column = | 411 | 6 | ColumnNullable::create(std::move(result_data_column), std::move(result_null_map)); | 412 | 6 | return Status::OK(); | 413 | 6 | } |
Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm |
414 | | }; |
415 | | |
416 | | struct RegexpReplaceImpl { |
417 | | static constexpr auto name = "regexp_replace"; |
418 | | static void execute_impl(FunctionContext* context, ColumnPtr argument_columns[], |
419 | | const StringRef& options_value, size_t input_rows_count, |
420 | | ColumnString::Chars& result_data, ColumnString::Offsets& result_offset, |
421 | 6 | NullMap& null_map) { |
422 | 6 | const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get()); |
423 | 6 | const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get()); |
424 | 6 | const auto* replace_col = check_and_get_column<ColumnString>(argument_columns[2].get()); |
425 | | |
426 | 12 | for (size_t i = 0; i < input_rows_count; ++i) { |
427 | 6 | if (null_map[i]) { |
428 | 0 | StringOP::push_null_string(i, result_data, result_offset, null_map); |
429 | 0 | continue; |
430 | 0 | } |
431 | 6 | _execute_inner_loop<false>(context, str_col, pattern_col, replace_col, options_value, |
432 | 6 | result_data, result_offset, null_map, i); |
433 | 6 | } |
434 | 6 | } |
435 | | static void execute_impl_const_args(FunctionContext* context, ColumnPtr argument_columns[], |
436 | | const StringRef& options_value, size_t input_rows_count, |
437 | | ColumnString::Chars& result_data, |
438 | 0 | ColumnString::Offsets& result_offset, NullMap& null_map) { |
439 | 0 | const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get()); |
440 | 0 | const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get()); |
441 | 0 | const auto* replace_col = check_and_get_column<ColumnString>(argument_columns[2].get()); |
442 | |
|
443 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
444 | 0 | if (null_map[i]) { |
445 | 0 | StringOP::push_null_string(i, result_data, result_offset, null_map); |
446 | 0 | continue; |
447 | 0 | } |
448 | 0 | _execute_inner_loop<true>(context, str_col, pattern_col, replace_col, options_value, |
449 | 0 | result_data, result_offset, null_map, i); |
450 | 0 | } |
451 | 0 | } |
452 | | template <bool Const> |
453 | | static void _execute_inner_loop(FunctionContext* context, const ColumnString* str_col, |
454 | | const ColumnString* pattern_col, |
455 | | const ColumnString* replace_col, const StringRef& options_value, |
456 | | ColumnString::Chars& result_data, |
457 | | ColumnString::Offsets& result_offset, NullMap& null_map, |
458 | 6 | const size_t index_now) { |
459 | 6 | re2::RE2* re = reinterpret_cast<re2::RE2*>( |
460 | 6 | context->get_function_state(FunctionContext::THREAD_LOCAL)); |
461 | 6 | std::unique_ptr<re2::RE2> scoped_re; // destroys re if state->re is nullptr |
462 | 6 | if (re == nullptr) { |
463 | 2 | std::string error_str; |
464 | 2 | const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const)); |
465 | 2 | bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(), |
466 | 2 | options_value, scoped_re); |
467 | 2 | if (!st) { |
468 | 0 | context->add_warning(error_str.c_str()); |
469 | 0 | StringOP::push_null_string(index_now, result_data, result_offset, null_map); |
470 | 0 | return; |
471 | 0 | } |
472 | 2 | re = scoped_re.get(); |
473 | 2 | } |
474 | | |
475 | 6 | re2::StringPiece replace_str = re2::StringPiece( |
476 | 6 | replace_col->get_data_at(index_check_const(index_now, Const)).to_string_view()); |
477 | | |
478 | 6 | std::string result_str(str_col->get_data_at(index_now).to_string()); |
479 | 6 | re2::RE2::GlobalReplace(&result_str, *re, replace_str); |
480 | 6 | StringOP::push_value_string(result_str, index_now, result_data, result_offset); |
481 | 6 | } Unexecuted instantiation: _ZN5doris17RegexpReplaceImpl19_execute_inner_loopILb1EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES7_S7_RKNS_9StringRefERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSB_IjLm4096ESE_Lm16ELm15EEESG_m _ZN5doris17RegexpReplaceImpl19_execute_inner_loopILb0EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES7_S7_RKNS_9StringRefERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSB_IjLm4096ESE_Lm16ELm15EEESG_m Line | Count | Source | 458 | 6 | const size_t index_now) { | 459 | 6 | re2::RE2* re = reinterpret_cast<re2::RE2*>( | 460 | 6 | context->get_function_state(FunctionContext::THREAD_LOCAL)); | 461 | 6 | std::unique_ptr<re2::RE2> scoped_re; // destroys re if state->re is nullptr | 462 | 6 | if (re == nullptr) { | 463 | 2 | std::string error_str; | 464 | 2 | const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const)); | 465 | 2 | bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(), | 466 | 2 | options_value, scoped_re); | 467 | 2 | if (!st) { | 468 | 0 | context->add_warning(error_str.c_str()); | 469 | 0 | StringOP::push_null_string(index_now, result_data, result_offset, null_map); | 470 | 0 | return; | 471 | 0 | } | 472 | 2 | re = scoped_re.get(); | 473 | 2 | } | 474 | | | 475 | 6 | re2::StringPiece replace_str = re2::StringPiece( | 476 | 6 | replace_col->get_data_at(index_check_const(index_now, Const)).to_string_view()); | 477 | | | 478 | 6 | std::string result_str(str_col->get_data_at(index_now).to_string()); | 479 | 6 | re2::RE2::GlobalReplace(&result_str, *re, replace_str); | 480 | 6 | StringOP::push_value_string(result_str, index_now, result_data, result_offset); | 481 | 6 | } |
|
482 | | }; |
483 | | |
484 | | struct RegexpReplaceOneImpl { |
485 | | static constexpr auto name = "regexp_replace_one"; |
486 | | |
487 | | static void execute_impl(FunctionContext* context, ColumnPtr argument_columns[], |
488 | | const StringRef& options_value, size_t input_rows_count, |
489 | | ColumnString::Chars& result_data, ColumnString::Offsets& result_offset, |
490 | 6 | NullMap& null_map) { |
491 | 6 | const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get()); |
492 | 6 | const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get()); |
493 | 6 | const auto* replace_col = check_and_get_column<ColumnString>(argument_columns[2].get()); |
494 | | // 3 args |
495 | 12 | for (size_t i = 0; i < input_rows_count; ++i) { |
496 | 6 | if (null_map[i]) { |
497 | 0 | StringOP::push_null_string(i, result_data, result_offset, null_map); |
498 | 0 | continue; |
499 | 0 | } |
500 | 6 | _execute_inner_loop<false>(context, str_col, pattern_col, replace_col, options_value, |
501 | 6 | result_data, result_offset, null_map, i); |
502 | 6 | } |
503 | 6 | } |
504 | | |
505 | | static void execute_impl_const_args(FunctionContext* context, ColumnPtr argument_columns[], |
506 | | const StringRef& options_value, size_t input_rows_count, |
507 | | ColumnString::Chars& result_data, |
508 | 0 | ColumnString::Offsets& result_offset, NullMap& null_map) { |
509 | 0 | const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get()); |
510 | 0 | const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get()); |
511 | 0 | const auto* replace_col = check_and_get_column<ColumnString>(argument_columns[2].get()); |
512 | | // 3 args |
513 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
514 | 0 | if (null_map[i]) { |
515 | 0 | StringOP::push_null_string(i, result_data, result_offset, null_map); |
516 | 0 | continue; |
517 | 0 | } |
518 | 0 | _execute_inner_loop<true>(context, str_col, pattern_col, replace_col, options_value, |
519 | 0 | result_data, result_offset, null_map, i); |
520 | 0 | } |
521 | 0 | } |
522 | | template <bool Const> |
523 | | static void _execute_inner_loop(FunctionContext* context, const ColumnString* str_col, |
524 | | const ColumnString* pattern_col, |
525 | | const ColumnString* replace_col, const StringRef& options_value, |
526 | | ColumnString::Chars& result_data, |
527 | | ColumnString::Offsets& result_offset, NullMap& null_map, |
528 | 6 | const size_t index_now) { |
529 | 6 | re2::RE2* re = reinterpret_cast<re2::RE2*>( |
530 | 6 | context->get_function_state(FunctionContext::THREAD_LOCAL)); |
531 | 6 | std::unique_ptr<re2::RE2> scoped_re; // destroys re if state->re is nullptr |
532 | 6 | if (re == nullptr) { |
533 | 2 | std::string error_str; |
534 | 2 | const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const)); |
535 | 2 | bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(), |
536 | 2 | options_value, scoped_re); |
537 | 2 | if (!st) { |
538 | 0 | context->add_warning(error_str.c_str()); |
539 | 0 | StringOP::push_null_string(index_now, result_data, result_offset, null_map); |
540 | 0 | return; |
541 | 0 | } |
542 | 2 | re = scoped_re.get(); |
543 | 2 | } |
544 | | |
545 | 6 | re2::StringPiece replace_str = re2::StringPiece( |
546 | 6 | replace_col->get_data_at(index_check_const(index_now, Const)).to_string_view()); |
547 | | |
548 | 6 | std::string result_str(str_col->get_data_at(index_now).to_string()); |
549 | 6 | re2::RE2::Replace(&result_str, *re, replace_str); |
550 | 6 | StringOP::push_value_string(result_str, index_now, result_data, result_offset); |
551 | 6 | } Unexecuted instantiation: _ZN5doris20RegexpReplaceOneImpl19_execute_inner_loopILb1EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES7_S7_RKNS_9StringRefERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSB_IjLm4096ESE_Lm16ELm15EEESG_m _ZN5doris20RegexpReplaceOneImpl19_execute_inner_loopILb0EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES7_S7_RKNS_9StringRefERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSB_IjLm4096ESE_Lm16ELm15EEESG_m Line | Count | Source | 528 | 6 | const size_t index_now) { | 529 | 6 | re2::RE2* re = reinterpret_cast<re2::RE2*>( | 530 | 6 | context->get_function_state(FunctionContext::THREAD_LOCAL)); | 531 | 6 | std::unique_ptr<re2::RE2> scoped_re; // destroys re if state->re is nullptr | 532 | 6 | if (re == nullptr) { | 533 | 2 | std::string error_str; | 534 | 2 | const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const)); | 535 | 2 | bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(), | 536 | 2 | options_value, scoped_re); | 537 | 2 | if (!st) { | 538 | 0 | context->add_warning(error_str.c_str()); | 539 | 0 | StringOP::push_null_string(index_now, result_data, result_offset, null_map); | 540 | 0 | return; | 541 | 0 | } | 542 | 2 | re = scoped_re.get(); | 543 | 2 | } | 544 | | | 545 | 6 | re2::StringPiece replace_str = re2::StringPiece( | 546 | 6 | replace_col->get_data_at(index_check_const(index_now, Const)).to_string_view()); | 547 | | | 548 | 6 | std::string result_str(str_col->get_data_at(index_now).to_string()); | 549 | 6 | re2::RE2::Replace(&result_str, *re, replace_str); | 550 | 6 | StringOP::push_value_string(result_str, index_now, result_data, result_offset); | 551 | 6 | } |
|
552 | | }; |
553 | | |
554 | | template <bool ReturnNull> |
555 | | struct RegexpExtractImpl { |
556 | | static constexpr auto name = ReturnNull ? "regexp_extract_or_null" : "regexp_extract"; |
557 | | // 3 args |
558 | | static void execute_impl(FunctionContext* context, ColumnPtr argument_columns[], |
559 | | size_t input_rows_count, ColumnString::Chars& result_data, |
560 | 16 | ColumnString::Offsets& result_offset, NullMap& null_map) { |
561 | 16 | const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get()); |
562 | 16 | const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get()); |
563 | 16 | const auto* index_col = check_and_get_column<ColumnInt64>(argument_columns[2].get()); |
564 | 32 | for (size_t i = 0; i < input_rows_count; ++i) { |
565 | 16 | if (null_map[i]) { |
566 | 0 | StringOP::push_null_string(i, result_data, result_offset, null_map); |
567 | 0 | continue; |
568 | 0 | } |
569 | 16 | const auto& index_data = index_col->get_int(i); |
570 | 16 | if (index_data < 0) { |
571 | 0 | ReturnNull ? StringOP::push_null_string(i, result_data, result_offset, null_map) |
572 | 0 | : StringOP::push_empty_string(i, result_data, result_offset); |
573 | 0 | continue; |
574 | 0 | } |
575 | 16 | _execute_inner_loop<false>(context, str_col, pattern_col, index_data, result_data, |
576 | 16 | result_offset, null_map, i); |
577 | 16 | } |
578 | 16 | } _ZN5doris17RegexpExtractImplILb1EE12execute_implEPNS_15FunctionContextEPNS_3COWINS_7IColumnEE13immutable_ptrIS5_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSA_IjLm4096ESD_Lm16ELm15EEESF_ Line | Count | Source | 560 | 8 | ColumnString::Offsets& result_offset, NullMap& null_map) { | 561 | 8 | const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get()); | 562 | 8 | const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get()); | 563 | 8 | const auto* index_col = check_and_get_column<ColumnInt64>(argument_columns[2].get()); | 564 | 16 | for (size_t i = 0; i < input_rows_count; ++i) { | 565 | 8 | if (null_map[i]) { | 566 | 0 | StringOP::push_null_string(i, result_data, result_offset, null_map); | 567 | 0 | continue; | 568 | 0 | } | 569 | 8 | const auto& index_data = index_col->get_int(i); | 570 | 8 | if (index_data < 0) { | 571 | 0 | ReturnNull ? StringOP::push_null_string(i, result_data, result_offset, null_map) | 572 | 0 | : StringOP::push_empty_string(i, result_data, result_offset); | 573 | 0 | continue; | 574 | 0 | } | 575 | 8 | _execute_inner_loop<false>(context, str_col, pattern_col, index_data, result_data, | 576 | 8 | result_offset, null_map, i); | 577 | 8 | } | 578 | 8 | } |
_ZN5doris17RegexpExtractImplILb0EE12execute_implEPNS_15FunctionContextEPNS_3COWINS_7IColumnEE13immutable_ptrIS5_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSA_IjLm4096ESD_Lm16ELm15EEESF_ Line | Count | Source | 560 | 8 | ColumnString::Offsets& result_offset, NullMap& null_map) { | 561 | 8 | const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get()); | 562 | 8 | const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get()); | 563 | 8 | const auto* index_col = check_and_get_column<ColumnInt64>(argument_columns[2].get()); | 564 | 16 | for (size_t i = 0; i < input_rows_count; ++i) { | 565 | 8 | if (null_map[i]) { | 566 | 0 | StringOP::push_null_string(i, result_data, result_offset, null_map); | 567 | 0 | continue; | 568 | 0 | } | 569 | 8 | const auto& index_data = index_col->get_int(i); | 570 | 8 | if (index_data < 0) { | 571 | 0 | ReturnNull ? StringOP::push_null_string(i, result_data, result_offset, null_map) | 572 | 0 | : StringOP::push_empty_string(i, result_data, result_offset); | 573 | 0 | continue; | 574 | 0 | } | 575 | 8 | _execute_inner_loop<false>(context, str_col, pattern_col, index_data, result_data, | 576 | 8 | result_offset, null_map, i); | 577 | 8 | } | 578 | 8 | } |
|
579 | | |
580 | | static void execute_impl_const_args(FunctionContext* context, ColumnPtr argument_columns[], |
581 | | size_t input_rows_count, ColumnString::Chars& result_data, |
582 | 0 | ColumnString::Offsets& result_offset, NullMap& null_map) { |
583 | 0 | const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get()); |
584 | 0 | const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get()); |
585 | 0 | const auto* index_col = check_and_get_column<ColumnInt64>(argument_columns[2].get()); |
586 | |
|
587 | 0 | const auto& index_data = index_col->get_int(0); |
588 | 0 | if (index_data < 0) { |
589 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
590 | 0 | ReturnNull ? StringOP::push_null_string(i, result_data, result_offset, null_map) |
591 | 0 | : StringOP::push_empty_string(i, result_data, result_offset); |
592 | 0 | } |
593 | 0 | return; |
594 | 0 | } |
595 | | |
596 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
597 | 0 | if (null_map[i]) { |
598 | 0 | StringOP::push_null_string(i, result_data, result_offset, null_map); |
599 | 0 | continue; |
600 | 0 | } |
601 | | |
602 | 0 | _execute_inner_loop<true>(context, str_col, pattern_col, index_data, result_data, |
603 | 0 | result_offset, null_map, i); |
604 | 0 | } |
605 | 0 | } Unexecuted instantiation: _ZN5doris17RegexpExtractImplILb1EE23execute_impl_const_argsEPNS_15FunctionContextEPNS_3COWINS_7IColumnEE13immutable_ptrIS5_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSA_IjLm4096ESD_Lm16ELm15EEESF_ Unexecuted instantiation: _ZN5doris17RegexpExtractImplILb0EE23execute_impl_const_argsEPNS_15FunctionContextEPNS_3COWINS_7IColumnEE13immutable_ptrIS5_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSA_IjLm4096ESD_Lm16ELm15EEESF_ |
606 | | template <bool Const> |
607 | | static void _execute_inner_loop(FunctionContext* context, const ColumnString* str_col, |
608 | | const ColumnString* pattern_col, const Int64 index_data, |
609 | | ColumnString::Chars& result_data, |
610 | | ColumnString::Offsets& result_offset, NullMap& null_map, |
611 | 16 | const size_t index_now) { |
612 | 16 | auto* engine = reinterpret_cast<RegexpExtractEngine*>( |
613 | 16 | context->get_function_state(FunctionContext::THREAD_LOCAL)); |
614 | 16 | std::unique_ptr<RegexpExtractEngine> scoped_engine; |
615 | | |
616 | 16 | if (engine == nullptr) { |
617 | 0 | std::string error_str; |
618 | 0 | const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const)); |
619 | 0 | scoped_engine = std::make_unique<RegexpExtractEngine>(); |
620 | 0 | bool st = RegexpExtractEngine::compile(pattern, &error_str, *scoped_engine, |
621 | 0 | context->state()->enable_extended_regex()); |
622 | 0 | if (!st) { |
623 | 0 | context->add_warning(error_str.c_str()); |
624 | 0 | StringOP::push_null_string(index_now, result_data, result_offset, null_map); |
625 | 0 | return; |
626 | 0 | } |
627 | 0 | engine = scoped_engine.get(); |
628 | 0 | } |
629 | | |
630 | 16 | const auto& str = str_col->get_data_at(index_now); |
631 | | |
632 | 16 | int max_matches = 1 + engine->number_of_capturing_groups(); |
633 | 16 | if (index_data >= max_matches) { |
634 | 0 | ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map) |
635 | 0 | : StringOP::push_empty_string(index_now, result_data, result_offset); |
636 | 0 | return; |
637 | 0 | } |
638 | | |
639 | 16 | std::string match_result; |
640 | 16 | bool success = engine->match_and_extract(str.data, str.size, static_cast<int>(index_data), |
641 | 16 | match_result); |
642 | | |
643 | 16 | if (!success) { |
644 | 0 | ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map) |
645 | 0 | : StringOP::push_empty_string(index_now, result_data, result_offset); |
646 | 0 | return; |
647 | 0 | } |
648 | | |
649 | 16 | StringOP::push_value_string(std::string_view(match_result.data(), match_result.size()), |
650 | 16 | index_now, result_data, result_offset); |
651 | 16 | } Unexecuted instantiation: _ZN5doris17RegexpExtractImplILb1EE19_execute_inner_loopILb1EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES8_lRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS9_IjLm4096ESC_Lm16ELm15EEESE_m _ZN5doris17RegexpExtractImplILb1EE19_execute_inner_loopILb0EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES8_lRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS9_IjLm4096ESC_Lm16ELm15EEESE_m Line | Count | Source | 611 | 8 | const size_t index_now) { | 612 | 8 | auto* engine = reinterpret_cast<RegexpExtractEngine*>( | 613 | 8 | context->get_function_state(FunctionContext::THREAD_LOCAL)); | 614 | 8 | std::unique_ptr<RegexpExtractEngine> scoped_engine; | 615 | | | 616 | 8 | if (engine == nullptr) { | 617 | 0 | std::string error_str; | 618 | 0 | const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const)); | 619 | 0 | scoped_engine = std::make_unique<RegexpExtractEngine>(); | 620 | 0 | bool st = RegexpExtractEngine::compile(pattern, &error_str, *scoped_engine, | 621 | 0 | context->state()->enable_extended_regex()); | 622 | 0 | if (!st) { | 623 | 0 | context->add_warning(error_str.c_str()); | 624 | 0 | StringOP::push_null_string(index_now, result_data, result_offset, null_map); | 625 | 0 | return; | 626 | 0 | } | 627 | 0 | engine = scoped_engine.get(); | 628 | 0 | } | 629 | | | 630 | 8 | const auto& str = str_col->get_data_at(index_now); | 631 | | | 632 | 8 | int max_matches = 1 + engine->number_of_capturing_groups(); | 633 | 8 | if (index_data >= max_matches) { | 634 | 0 | ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map) | 635 | 0 | : StringOP::push_empty_string(index_now, result_data, result_offset); | 636 | 0 | return; | 637 | 0 | } | 638 | | | 639 | 8 | std::string match_result; | 640 | 8 | bool success = engine->match_and_extract(str.data, str.size, static_cast<int>(index_data), | 641 | 8 | match_result); | 642 | | | 643 | 8 | if (!success) { | 644 | 0 | ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map) | 645 | 0 | : StringOP::push_empty_string(index_now, result_data, result_offset); | 646 | 0 | return; | 647 | 0 | } | 648 | | | 649 | 8 | StringOP::push_value_string(std::string_view(match_result.data(), match_result.size()), | 650 | 8 | index_now, result_data, result_offset); | 651 | 8 | } |
Unexecuted instantiation: _ZN5doris17RegexpExtractImplILb0EE19_execute_inner_loopILb1EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES8_lRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS9_IjLm4096ESC_Lm16ELm15EEESE_m _ZN5doris17RegexpExtractImplILb0EE19_execute_inner_loopILb0EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES8_lRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS9_IjLm4096ESC_Lm16ELm15EEESE_m Line | Count | Source | 611 | 8 | const size_t index_now) { | 612 | 8 | auto* engine = reinterpret_cast<RegexpExtractEngine*>( | 613 | 8 | context->get_function_state(FunctionContext::THREAD_LOCAL)); | 614 | 8 | std::unique_ptr<RegexpExtractEngine> scoped_engine; | 615 | | | 616 | 8 | if (engine == nullptr) { | 617 | 0 | std::string error_str; | 618 | 0 | const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const)); | 619 | 0 | scoped_engine = std::make_unique<RegexpExtractEngine>(); | 620 | 0 | bool st = RegexpExtractEngine::compile(pattern, &error_str, *scoped_engine, | 621 | 0 | context->state()->enable_extended_regex()); | 622 | 0 | if (!st) { | 623 | 0 | context->add_warning(error_str.c_str()); | 624 | 0 | StringOP::push_null_string(index_now, result_data, result_offset, null_map); | 625 | 0 | return; | 626 | 0 | } | 627 | 0 | engine = scoped_engine.get(); | 628 | 0 | } | 629 | | | 630 | 8 | const auto& str = str_col->get_data_at(index_now); | 631 | | | 632 | 8 | int max_matches = 1 + engine->number_of_capturing_groups(); | 633 | 8 | if (index_data >= max_matches) { | 634 | 0 | ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map) | 635 | 0 | : StringOP::push_empty_string(index_now, result_data, result_offset); | 636 | 0 | return; | 637 | 0 | } | 638 | | | 639 | 8 | std::string match_result; | 640 | 8 | bool success = engine->match_and_extract(str.data, str.size, static_cast<int>(index_data), | 641 | 8 | match_result); | 642 | | | 643 | 8 | if (!success) { | 644 | 0 | ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map) | 645 | 0 | : StringOP::push_empty_string(index_now, result_data, result_offset); | 646 | 0 | return; | 647 | 0 | } | 648 | | | 649 | 8 | StringOP::push_value_string(std::string_view(match_result.data(), match_result.size()), | 650 | 8 | index_now, result_data, result_offset); | 651 | 8 | } |
|
652 | | }; |
653 | | |
654 | | struct RegexpExtractAllImpl { |
655 | | static constexpr auto name = "regexp_extract_all"; |
656 | | |
657 | 0 | size_t get_number_of_arguments() const { return 2; } |
658 | | |
659 | | static void execute_impl(FunctionContext* context, ColumnPtr argument_columns[], |
660 | | size_t input_rows_count, ColumnString::Chars& result_data, |
661 | 0 | ColumnString::Offsets& result_offset, NullMap& null_map) { |
662 | 0 | const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get()); |
663 | 0 | const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get()); |
664 | 0 | for (int i = 0; i < input_rows_count; ++i) { |
665 | 0 | if (null_map[i]) { |
666 | 0 | StringOP::push_null_string(i, result_data, result_offset, null_map); |
667 | 0 | continue; |
668 | 0 | } |
669 | 0 | _execute_inner_loop<false>(context, str_col, pattern_col, result_data, result_offset, |
670 | 0 | null_map, i); |
671 | 0 | } |
672 | 0 | } |
673 | | |
674 | | static void execute_impl_const_args(FunctionContext* context, ColumnPtr argument_columns[], |
675 | | size_t input_rows_count, ColumnString::Chars& result_data, |
676 | 7 | ColumnString::Offsets& result_offset, NullMap& null_map) { |
677 | 7 | const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get()); |
678 | 7 | const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get()); |
679 | 14 | for (int i = 0; i < input_rows_count; ++i) { |
680 | 7 | if (null_map[i]) { |
681 | 0 | StringOP::push_null_string(i, result_data, result_offset, null_map); |
682 | 0 | continue; |
683 | 0 | } |
684 | 7 | _execute_inner_loop<true>(context, str_col, pattern_col, result_data, result_offset, |
685 | 7 | null_map, i); |
686 | 7 | } |
687 | 7 | } |
688 | | template <bool Const> |
689 | | static void _execute_inner_loop(FunctionContext* context, const ColumnString* str_col, |
690 | | const ColumnString* pattern_col, |
691 | | ColumnString::Chars& result_data, |
692 | | ColumnString::Offsets& result_offset, NullMap& null_map, |
693 | 7 | const size_t index_now) { |
694 | 7 | auto* engine = reinterpret_cast<RegexpExtractEngine*>( |
695 | 7 | context->get_function_state(FunctionContext::THREAD_LOCAL)); |
696 | 7 | std::unique_ptr<RegexpExtractEngine> scoped_engine; |
697 | | |
698 | 7 | if (engine == nullptr) { |
699 | 0 | std::string error_str; |
700 | 0 | const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const)); |
701 | 0 | scoped_engine = std::make_unique<RegexpExtractEngine>(); |
702 | 0 | bool st = RegexpExtractEngine::compile(pattern, &error_str, *scoped_engine, |
703 | 0 | context->state()->enable_extended_regex()); |
704 | 0 | if (!st) { |
705 | 0 | context->add_warning(error_str.c_str()); |
706 | 0 | StringOP::push_null_string(index_now, result_data, result_offset, null_map); |
707 | 0 | return; |
708 | 0 | } |
709 | 0 | engine = scoped_engine.get(); |
710 | 0 | } |
711 | | |
712 | 7 | if (engine->number_of_capturing_groups() == 0) { |
713 | 0 | StringOP::push_empty_string(index_now, result_data, result_offset); |
714 | 0 | return; |
715 | 0 | } |
716 | 7 | const auto& str = str_col->get_data_at(index_now); |
717 | 7 | std::vector<std::string> res_matches; |
718 | 7 | engine->match_all_and_extract(str.data, str.size, res_matches); |
719 | | |
720 | 7 | if (res_matches.empty()) { |
721 | 0 | StringOP::push_empty_string(index_now, result_data, result_offset); |
722 | 0 | return; |
723 | 0 | } |
724 | | |
725 | 7 | std::string res = "["; |
726 | 19 | for (int j = 0; j < res_matches.size(); ++j) { |
727 | 12 | res += "'" + res_matches[j] + "'"; |
728 | 12 | if (j < res_matches.size() - 1) { |
729 | 5 | res += ","; |
730 | 5 | } |
731 | 12 | } |
732 | 7 | res += "]"; |
733 | 7 | StringOP::push_value_string(std::string_view(res), index_now, result_data, result_offset); |
734 | 7 | } _ZN5doris20RegexpExtractAllImpl19_execute_inner_loopILb1EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES7_RNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEESD_m Line | Count | Source | 693 | 7 | const size_t index_now) { | 694 | 7 | auto* engine = reinterpret_cast<RegexpExtractEngine*>( | 695 | 7 | context->get_function_state(FunctionContext::THREAD_LOCAL)); | 696 | 7 | std::unique_ptr<RegexpExtractEngine> scoped_engine; | 697 | | | 698 | 7 | if (engine == nullptr) { | 699 | 0 | std::string error_str; | 700 | 0 | const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const)); | 701 | 0 | scoped_engine = std::make_unique<RegexpExtractEngine>(); | 702 | 0 | bool st = RegexpExtractEngine::compile(pattern, &error_str, *scoped_engine, | 703 | 0 | context->state()->enable_extended_regex()); | 704 | 0 | if (!st) { | 705 | 0 | context->add_warning(error_str.c_str()); | 706 | 0 | StringOP::push_null_string(index_now, result_data, result_offset, null_map); | 707 | 0 | return; | 708 | 0 | } | 709 | 0 | engine = scoped_engine.get(); | 710 | 0 | } | 711 | | | 712 | 7 | if (engine->number_of_capturing_groups() == 0) { | 713 | 0 | StringOP::push_empty_string(index_now, result_data, result_offset); | 714 | 0 | return; | 715 | 0 | } | 716 | 7 | const auto& str = str_col->get_data_at(index_now); | 717 | 7 | std::vector<std::string> res_matches; | 718 | 7 | engine->match_all_and_extract(str.data, str.size, res_matches); | 719 | | | 720 | 7 | if (res_matches.empty()) { | 721 | 0 | StringOP::push_empty_string(index_now, result_data, result_offset); | 722 | 0 | return; | 723 | 0 | } | 724 | | | 725 | 7 | std::string res = "["; | 726 | 19 | for (int j = 0; j < res_matches.size(); ++j) { | 727 | 12 | res += "'" + res_matches[j] + "'"; | 728 | 12 | if (j < res_matches.size() - 1) { | 729 | 5 | res += ","; | 730 | 5 | } | 731 | 12 | } | 732 | 7 | res += "]"; | 733 | 7 | StringOP::push_value_string(std::string_view(res), index_now, result_data, result_offset); | 734 | 7 | } |
Unexecuted instantiation: _ZN5doris20RegexpExtractAllImpl19_execute_inner_loopILb0EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES7_RNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEESD_m |
735 | | }; |
736 | | |
737 | | // template FunctionRegexpFunctionality is used for regexp_xxxx series functions, not for regexp match. |
738 | | template <typename Impl> |
739 | | class FunctionRegexpFunctionality : public IFunction { |
740 | | public: |
741 | | static constexpr auto name = Impl::name; |
742 | | |
743 | 35 | static FunctionPtr create() { return std::make_shared<FunctionRegexpFunctionality>(); }_ZN5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb1EEEE6createEv Line | Count | Source | 743 | 12 | static FunctionPtr create() { return std::make_shared<FunctionRegexpFunctionality>(); } |
_ZN5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb0EEEE6createEv Line | Count | Source | 743 | 12 | static FunctionPtr create() { return std::make_shared<FunctionRegexpFunctionality>(); } |
_ZN5doris27FunctionRegexpFunctionalityINS_20RegexpExtractAllImplEE6createEv Line | Count | Source | 743 | 11 | static FunctionPtr create() { return std::make_shared<FunctionRegexpFunctionality>(); } |
|
744 | | |
745 | 3 | String get_name() const override { return name; }_ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb1EEEE8get_nameB5cxx11Ev Line | Count | Source | 745 | 1 | String get_name() const override { return name; } |
_ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb0EEEE8get_nameB5cxx11Ev Line | Count | Source | 745 | 1 | String get_name() const override { return name; } |
_ZNK5doris27FunctionRegexpFunctionalityINS_20RegexpExtractAllImplEE8get_nameB5cxx11Ev Line | Count | Source | 745 | 1 | String get_name() const override { return name; } |
|
746 | | |
747 | 29 | size_t get_number_of_arguments() const override { |
748 | 29 | if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) { |
749 | 9 | return 2; |
750 | 9 | } |
751 | 0 | return 3; |
752 | 29 | } _ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb1EEEE23get_number_of_argumentsEv Line | Count | Source | 747 | 10 | size_t get_number_of_arguments() const override { | 748 | | if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) { | 749 | | return 2; | 750 | | } | 751 | 10 | return 3; | 752 | 10 | } |
_ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb0EEEE23get_number_of_argumentsEv Line | Count | Source | 747 | 10 | size_t get_number_of_arguments() const override { | 748 | | if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) { | 749 | | return 2; | 750 | | } | 751 | 10 | return 3; | 752 | 10 | } |
_ZNK5doris27FunctionRegexpFunctionalityINS_20RegexpExtractAllImplEE23get_number_of_argumentsEv Line | Count | Source | 747 | 9 | size_t get_number_of_arguments() const override { | 748 | 9 | if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) { | 749 | 9 | return 2; | 750 | 9 | } | 751 | 0 | return 3; | 752 | 9 | } |
|
753 | | |
754 | 29 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
755 | 29 | return make_nullable(std::make_shared<DataTypeString>()); |
756 | 29 | } _ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb1EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE Line | Count | Source | 754 | 10 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 755 | 10 | return make_nullable(std::make_shared<DataTypeString>()); | 756 | 10 | } |
_ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb0EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE Line | Count | Source | 754 | 10 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 755 | 10 | return make_nullable(std::make_shared<DataTypeString>()); | 756 | 10 | } |
_ZNK5doris27FunctionRegexpFunctionalityINS_20RegexpExtractAllImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 754 | 9 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 755 | 9 | return make_nullable(std::make_shared<DataTypeString>()); | 756 | 9 | } |
|
757 | | |
758 | 58 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
759 | 58 | if (scope == FunctionContext::THREAD_LOCAL) { |
760 | 29 | if (context->is_col_constant(1)) { |
761 | 29 | DCHECK(!context->get_function_state(scope)); |
762 | 29 | const auto pattern_col = context->get_constant_col(1)->column_ptr; |
763 | 29 | const auto& pattern = pattern_col->get_data_at(0); |
764 | 29 | if (pattern.size == 0) { |
765 | 3 | return Status::OK(); |
766 | 3 | } |
767 | | |
768 | 26 | std::string error_str; |
769 | 26 | auto engine = std::make_shared<RegexpExtractEngine>(); |
770 | 26 | bool st = RegexpExtractEngine::compile(pattern, &error_str, *engine, |
771 | 26 | context->state()->enable_extended_regex()); |
772 | 26 | if (!st) { |
773 | 0 | context->set_error(error_str.c_str()); |
774 | 0 | return Status::InvalidArgument(error_str); |
775 | 0 | } |
776 | 26 | context->set_function_state(scope, engine); |
777 | 26 | } |
778 | 29 | } |
779 | 55 | return Status::OK(); |
780 | 58 | } _ZN5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb1EEEE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE Line | Count | Source | 758 | 20 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { | 759 | 20 | if (scope == FunctionContext::THREAD_LOCAL) { | 760 | 10 | if (context->is_col_constant(1)) { | 761 | 10 | DCHECK(!context->get_function_state(scope)); | 762 | 10 | const auto pattern_col = context->get_constant_col(1)->column_ptr; | 763 | 10 | const auto& pattern = pattern_col->get_data_at(0); | 764 | 10 | if (pattern.size == 0) { | 765 | 1 | return Status::OK(); | 766 | 1 | } | 767 | | | 768 | 9 | std::string error_str; | 769 | 9 | auto engine = std::make_shared<RegexpExtractEngine>(); | 770 | 9 | bool st = RegexpExtractEngine::compile(pattern, &error_str, *engine, | 771 | 9 | context->state()->enable_extended_regex()); | 772 | 9 | if (!st) { | 773 | 0 | context->set_error(error_str.c_str()); | 774 | 0 | return Status::InvalidArgument(error_str); | 775 | 0 | } | 776 | 9 | context->set_function_state(scope, engine); | 777 | 9 | } | 778 | 10 | } | 779 | 19 | return Status::OK(); | 780 | 20 | } |
_ZN5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb0EEEE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE Line | Count | Source | 758 | 20 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { | 759 | 20 | if (scope == FunctionContext::THREAD_LOCAL) { | 760 | 10 | if (context->is_col_constant(1)) { | 761 | 10 | DCHECK(!context->get_function_state(scope)); | 762 | 10 | const auto pattern_col = context->get_constant_col(1)->column_ptr; | 763 | 10 | const auto& pattern = pattern_col->get_data_at(0); | 764 | 10 | if (pattern.size == 0) { | 765 | 1 | return Status::OK(); | 766 | 1 | } | 767 | | | 768 | 9 | std::string error_str; | 769 | 9 | auto engine = std::make_shared<RegexpExtractEngine>(); | 770 | 9 | bool st = RegexpExtractEngine::compile(pattern, &error_str, *engine, | 771 | 9 | context->state()->enable_extended_regex()); | 772 | 9 | if (!st) { | 773 | 0 | context->set_error(error_str.c_str()); | 774 | 0 | return Status::InvalidArgument(error_str); | 775 | 0 | } | 776 | 9 | context->set_function_state(scope, engine); | 777 | 9 | } | 778 | 10 | } | 779 | 19 | return Status::OK(); | 780 | 20 | } |
_ZN5doris27FunctionRegexpFunctionalityINS_20RegexpExtractAllImplEE4openEPNS_15FunctionContextENS3_18FunctionStateScopeE Line | Count | Source | 758 | 18 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { | 759 | 18 | if (scope == FunctionContext::THREAD_LOCAL) { | 760 | 9 | if (context->is_col_constant(1)) { | 761 | 9 | DCHECK(!context->get_function_state(scope)); | 762 | 9 | const auto pattern_col = context->get_constant_col(1)->column_ptr; | 763 | 9 | const auto& pattern = pattern_col->get_data_at(0); | 764 | 9 | if (pattern.size == 0) { | 765 | 1 | return Status::OK(); | 766 | 1 | } | 767 | | | 768 | 8 | std::string error_str; | 769 | 8 | auto engine = std::make_shared<RegexpExtractEngine>(); | 770 | 8 | bool st = RegexpExtractEngine::compile(pattern, &error_str, *engine, | 771 | 8 | context->state()->enable_extended_regex()); | 772 | 8 | if (!st) { | 773 | 0 | context->set_error(error_str.c_str()); | 774 | 0 | return Status::InvalidArgument(error_str); | 775 | 0 | } | 776 | 8 | context->set_function_state(scope, engine); | 777 | 8 | } | 778 | 9 | } | 779 | 17 | return Status::OK(); | 780 | 18 | } |
|
781 | | |
782 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
783 | 23 | uint32_t result, size_t input_rows_count) const override { |
784 | 23 | size_t argument_size = arguments.size(); |
785 | | |
786 | 23 | auto result_null_map = ColumnUInt8::create(input_rows_count, 0); |
787 | 23 | auto result_data_column = ColumnString::create(); |
788 | 23 | auto& result_data = result_data_column->get_chars(); |
789 | 23 | auto& result_offset = result_data_column->get_offsets(); |
790 | 23 | result_offset.resize(input_rows_count); |
791 | | |
792 | 23 | bool col_const[3]; |
793 | 23 | ColumnPtr argument_columns[3]; |
794 | 85 | for (int i = 0; i < argument_size; ++i) { |
795 | 62 | col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column); |
796 | 62 | } |
797 | 23 | argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>( |
798 | 0 | *block.get_by_position(arguments[0]).column) |
799 | 0 | .convert_to_full_column() |
800 | 23 | : block.get_by_position(arguments[0]).column; |
801 | 23 | if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) { |
802 | 7 | default_preprocess_parameter_columns(argument_columns, col_const, {1}, block, |
803 | 7 | arguments); |
804 | 16 | } else { |
805 | 16 | default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, |
806 | 16 | arguments); |
807 | 16 | } |
808 | | |
809 | 23 | if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) { |
810 | 7 | if (col_const[1]) { |
811 | 7 | Impl::execute_impl_const_args(context, argument_columns, input_rows_count, |
812 | 7 | result_data, result_offset, |
813 | 7 | result_null_map->get_data()); |
814 | 7 | } else { |
815 | 0 | Impl::execute_impl(context, argument_columns, input_rows_count, result_data, |
816 | 0 | result_offset, result_null_map->get_data()); |
817 | 0 | } |
818 | 16 | } else { |
819 | 16 | if (col_const[1] && col_const[2]) { |
820 | 0 | Impl::execute_impl_const_args(context, argument_columns, input_rows_count, |
821 | 0 | result_data, result_offset, |
822 | 0 | result_null_map->get_data()); |
823 | 16 | } else { |
824 | 16 | Impl::execute_impl(context, argument_columns, input_rows_count, result_data, |
825 | 16 | result_offset, result_null_map->get_data()); |
826 | 16 | } |
827 | 16 | } |
828 | | |
829 | 23 | block.get_by_position(result).column = |
830 | 23 | ColumnNullable::create(std::move(result_data_column), std::move(result_null_map)); |
831 | 23 | return Status::OK(); |
832 | 23 | } _ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb1EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 783 | 8 | uint32_t result, size_t input_rows_count) const override { | 784 | 8 | size_t argument_size = arguments.size(); | 785 | | | 786 | 8 | auto result_null_map = ColumnUInt8::create(input_rows_count, 0); | 787 | 8 | auto result_data_column = ColumnString::create(); | 788 | 8 | auto& result_data = result_data_column->get_chars(); | 789 | 8 | auto& result_offset = result_data_column->get_offsets(); | 790 | 8 | result_offset.resize(input_rows_count); | 791 | | | 792 | 8 | bool col_const[3]; | 793 | 8 | ColumnPtr argument_columns[3]; | 794 | 32 | for (int i = 0; i < argument_size; ++i) { | 795 | 24 | col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column); | 796 | 24 | } | 797 | 8 | argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>( | 798 | 0 | *block.get_by_position(arguments[0]).column) | 799 | 0 | .convert_to_full_column() | 800 | 8 | : block.get_by_position(arguments[0]).column; | 801 | | if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) { | 802 | | default_preprocess_parameter_columns(argument_columns, col_const, {1}, block, | 803 | | arguments); | 804 | 8 | } else { | 805 | 8 | default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, | 806 | 8 | arguments); | 807 | 8 | } | 808 | | | 809 | | if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) { | 810 | | if (col_const[1]) { | 811 | | Impl::execute_impl_const_args(context, argument_columns, input_rows_count, | 812 | | result_data, result_offset, | 813 | | result_null_map->get_data()); | 814 | | } else { | 815 | | Impl::execute_impl(context, argument_columns, input_rows_count, result_data, | 816 | | result_offset, result_null_map->get_data()); | 817 | | } | 818 | 8 | } else { | 819 | 8 | if (col_const[1] && col_const[2]) { | 820 | 0 | Impl::execute_impl_const_args(context, argument_columns, input_rows_count, | 821 | 0 | result_data, result_offset, | 822 | 0 | result_null_map->get_data()); | 823 | 8 | } else { | 824 | 8 | Impl::execute_impl(context, argument_columns, input_rows_count, result_data, | 825 | 8 | result_offset, result_null_map->get_data()); | 826 | 8 | } | 827 | 8 | } | 828 | | | 829 | 8 | block.get_by_position(result).column = | 830 | 8 | ColumnNullable::create(std::move(result_data_column), std::move(result_null_map)); | 831 | 8 | return Status::OK(); | 832 | 8 | } |
_ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb0EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 783 | 8 | uint32_t result, size_t input_rows_count) const override { | 784 | 8 | size_t argument_size = arguments.size(); | 785 | | | 786 | 8 | auto result_null_map = ColumnUInt8::create(input_rows_count, 0); | 787 | 8 | auto result_data_column = ColumnString::create(); | 788 | 8 | auto& result_data = result_data_column->get_chars(); | 789 | 8 | auto& result_offset = result_data_column->get_offsets(); | 790 | 8 | result_offset.resize(input_rows_count); | 791 | | | 792 | 8 | bool col_const[3]; | 793 | 8 | ColumnPtr argument_columns[3]; | 794 | 32 | for (int i = 0; i < argument_size; ++i) { | 795 | 24 | col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column); | 796 | 24 | } | 797 | 8 | argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>( | 798 | 0 | *block.get_by_position(arguments[0]).column) | 799 | 0 | .convert_to_full_column() | 800 | 8 | : block.get_by_position(arguments[0]).column; | 801 | | if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) { | 802 | | default_preprocess_parameter_columns(argument_columns, col_const, {1}, block, | 803 | | arguments); | 804 | 8 | } else { | 805 | 8 | default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, | 806 | 8 | arguments); | 807 | 8 | } | 808 | | | 809 | | if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) { | 810 | | if (col_const[1]) { | 811 | | Impl::execute_impl_const_args(context, argument_columns, input_rows_count, | 812 | | result_data, result_offset, | 813 | | result_null_map->get_data()); | 814 | | } else { | 815 | | Impl::execute_impl(context, argument_columns, input_rows_count, result_data, | 816 | | result_offset, result_null_map->get_data()); | 817 | | } | 818 | 8 | } else { | 819 | 8 | if (col_const[1] && col_const[2]) { | 820 | 0 | Impl::execute_impl_const_args(context, argument_columns, input_rows_count, | 821 | 0 | result_data, result_offset, | 822 | 0 | result_null_map->get_data()); | 823 | 8 | } else { | 824 | 8 | Impl::execute_impl(context, argument_columns, input_rows_count, result_data, | 825 | 8 | result_offset, result_null_map->get_data()); | 826 | 8 | } | 827 | 8 | } | 828 | | | 829 | 8 | block.get_by_position(result).column = | 830 | 8 | ColumnNullable::create(std::move(result_data_column), std::move(result_null_map)); | 831 | 8 | return Status::OK(); | 832 | 8 | } |
_ZNK5doris27FunctionRegexpFunctionalityINS_20RegexpExtractAllImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 783 | 7 | uint32_t result, size_t input_rows_count) const override { | 784 | 7 | size_t argument_size = arguments.size(); | 785 | | | 786 | 7 | auto result_null_map = ColumnUInt8::create(input_rows_count, 0); | 787 | 7 | auto result_data_column = ColumnString::create(); | 788 | 7 | auto& result_data = result_data_column->get_chars(); | 789 | 7 | auto& result_offset = result_data_column->get_offsets(); | 790 | 7 | result_offset.resize(input_rows_count); | 791 | | | 792 | 7 | bool col_const[3]; | 793 | 7 | ColumnPtr argument_columns[3]; | 794 | 21 | for (int i = 0; i < argument_size; ++i) { | 795 | 14 | col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column); | 796 | 14 | } | 797 | 7 | argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>( | 798 | 0 | *block.get_by_position(arguments[0]).column) | 799 | 0 | .convert_to_full_column() | 800 | 7 | : block.get_by_position(arguments[0]).column; | 801 | 7 | if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) { | 802 | 7 | default_preprocess_parameter_columns(argument_columns, col_const, {1}, block, | 803 | 7 | arguments); | 804 | | } else { | 805 | | default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, | 806 | | arguments); | 807 | | } | 808 | | | 809 | 7 | if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) { | 810 | 7 | if (col_const[1]) { | 811 | 7 | Impl::execute_impl_const_args(context, argument_columns, input_rows_count, | 812 | 7 | result_data, result_offset, | 813 | 7 | result_null_map->get_data()); | 814 | 7 | } else { | 815 | 0 | Impl::execute_impl(context, argument_columns, input_rows_count, result_data, | 816 | 0 | result_offset, result_null_map->get_data()); | 817 | 0 | } | 818 | | } else { | 819 | | if (col_const[1] && col_const[2]) { | 820 | | Impl::execute_impl_const_args(context, argument_columns, input_rows_count, | 821 | | result_data, result_offset, | 822 | | result_null_map->get_data()); | 823 | | } else { | 824 | | Impl::execute_impl(context, argument_columns, input_rows_count, result_data, | 825 | | result_offset, result_null_map->get_data()); | 826 | | } | 827 | | } | 828 | | | 829 | 7 | block.get_by_position(result).column = | 830 | 7 | ColumnNullable::create(std::move(result_data_column), std::move(result_null_map)); | 831 | 7 | return Status::OK(); | 832 | 7 | } |
|
833 | | }; |
834 | | |
835 | 1 | void register_function_regexp_extract(SimpleFunctionFactory& factory) { |
836 | 1 | factory.register_function<FunctionRegexpReplace<RegexpReplaceImpl, ThreeParamTypes>>(); |
837 | 1 | factory.register_function<FunctionRegexpReplace<RegexpReplaceImpl, FourParamTypes>>(); |
838 | 1 | factory.register_function<FunctionRegexpReplace<RegexpReplaceOneImpl, ThreeParamTypes>>(); |
839 | 1 | factory.register_function<FunctionRegexpReplace<RegexpReplaceOneImpl, FourParamTypes>>(); |
840 | 1 | factory.register_function<FunctionRegexpFunctionality<RegexpExtractImpl<true>>>(); |
841 | 1 | factory.register_function<FunctionRegexpFunctionality<RegexpExtractImpl<false>>>(); |
842 | 1 | factory.register_function<FunctionRegexpFunctionality<RegexpExtractAllImpl>>(); |
843 | 1 | factory.register_function<FunctionRegexpCount>(); |
844 | 1 | } |
845 | | |
846 | | } // namespace doris |