Coverage Report

Created: 2026-03-13 05:13

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_regexp.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <glog/logging.h>
19
#include <re2/re2.h>
20
#include <re2/stringpiece.h>
21
#include <stddef.h>
22
23
#include <boost/regex.hpp>
24
#include <memory>
25
#include <string>
26
#include <string_view>
27
#include <type_traits>
28
#include <utility>
29
#include <vector>
30
31
#include "common/status.h"
32
#include "core/block/block.h"
33
#include "core/block/column_numbers.h"
34
#include "core/block/column_with_type_and_name.h"
35
#include "core/column/column.h"
36
#include "core/column/column_const.h"
37
#include "core/column/column_nullable.h"
38
#include "core/column/column_string.h"
39
#include "core/column/column_vector.h"
40
#include "core/data_type/data_type.h"
41
#include "core/data_type/data_type_nullable.h"
42
#include "core/data_type/data_type_number.h"
43
#include "core/data_type/data_type_string.h"
44
#include "core/string_ref.h"
45
#include "core/types.h"
46
#include "exec/common/stringop_substring.h"
47
#include "exprs/aggregate/aggregate_function.h"
48
#include "exprs/function/function.h"
49
#include "exprs/function/simple_function_factory.h"
50
#include "exprs/function_context.h"
51
#include "exprs/string_functions.h"
52
53
namespace doris {
54
#include "common/compile_check_begin.h"
55
56
// Helper structure to hold either RE2 or Boost.Regex
57
struct RegexpExtractEngine {
58
    std::unique_ptr<re2::RE2> re2_regex;
59
    std::unique_ptr<boost::regex> boost_regex;
60
61
18
    bool is_boost() const { return boost_regex != nullptr; }
62
313
    bool is_re2() const { return re2_regex != nullptr; }
63
64
    // Try to compile with RE2 first, fallback to Boost.Regex if RE2 fails
65
    static bool compile(const StringRef& pattern, std::string* error_str,
66
260
                        RegexpExtractEngine& engine, bool enable_extended_regex) {
67
260
        re2::RE2::Options options;
68
260
        options.set_log_errors(false); // avoid RE2 printing to stderr; we handle errors ourselves
69
260
        options.set_dot_nl(true); // make '.' match '\n' by default, consistent with REGEXP/LIKE
70
260
        engine.re2_regex =
71
260
                std::make_unique<re2::RE2>(re2::StringPiece(pattern.data, pattern.size), options);
72
73
260
        if (engine.re2_regex->ok()) {
74
239
            return true;
75
239
        } else if (!enable_extended_regex) {
76
3
            *error_str = fmt::format(
77
3
                    "Invalid regex pattern: {}. Error: {}. If you need advanced regex features, "
78
3
                    "try setting enable_extended_regex=true",
79
3
                    std::string(pattern.data, pattern.size), engine.re2_regex->error());
80
3
            return false;
81
3
        }
82
83
        // RE2 failed, try Boost.Regex for advanced features like zero-width assertions
84
18
        engine.re2_regex.reset();
85
18
        try {
86
18
            boost::regex::flag_type flags = boost::regex::normal;
87
18
            engine.boost_regex = std::make_unique<boost::regex>(pattern.data,
88
18
                                                                pattern.data + pattern.size, flags);
89
18
            return true;
90
18
        } catch (const boost::regex_error& e) {
91
0
            if (error_str) {
92
0
                *error_str = fmt::format("Invalid regex pattern: {}. Error: {}",
93
0
                                         std::string(pattern.data, pattern.size), e.what());
94
0
            }
95
0
            return false;
96
0
        }
97
18
    }
98
99
    // Get number of capturing groups
100
231
    int number_of_capturing_groups() const {
101
231
        if (is_re2()) {
102
222
            return re2_regex->NumberOfCapturingGroups();
103
222
        } else if (is_boost()) {
104
9
            return static_cast<int>(boost_regex->mark_count());
105
9
        }
106
0
        return 0;
107
231
    }
108
109
    // Match function for extraction
110
52
    bool match_and_extract(const char* data, size_t size, int index, std::string& result) const {
111
52
        if (is_re2()) {
112
47
            int max_matches = 1 + re2_regex->NumberOfCapturingGroups();
113
47
            if (index >= max_matches) {
114
0
                return false;
115
0
            }
116
47
            std::vector<re2::StringPiece> matches(max_matches);
117
47
            bool success = re2_regex->Match(re2::StringPiece(data, size), 0, size,
118
47
                                            re2::RE2::UNANCHORED, matches.data(), max_matches);
119
47
            if (success && index < matches.size()) {
120
34
                const re2::StringPiece& match = matches[index];
121
34
                result.assign(match.data(), match.size());
122
34
                return true;
123
34
            }
124
13
            return false;
125
47
        } else if (is_boost()) {
126
5
            boost::cmatch matches;
127
5
            bool success = boost::regex_search(data, data + size, matches, *boost_regex);
128
5
            if (success && index < matches.size()) {
129
5
                result = matches[index].str();
130
5
                return true;
131
5
            }
132
0
            return false;
133
5
        }
134
0
        return false;
135
52
    }
136
137
    // Match all occurrences and extract the first capturing group
138
    void match_all_and_extract(const char* data, size_t size,
139
30
                               std::vector<std::string>& results) const {
140
30
        if (is_re2()) {
141
26
            int max_matches = 1 + re2_regex->NumberOfCapturingGroups();
142
26
            if (max_matches < 2) {
143
0
                return; // No capturing groups
144
0
            }
145
146
26
            size_t pos = 0;
147
67
            while (pos < size) {
148
55
                const char* str_pos = data + pos;
149
55
                size_t str_size = size - pos;
150
55
                std::vector<re2::StringPiece> matches(max_matches);
151
55
                bool success = re2_regex->Match(re2::StringPiece(str_pos, str_size), 0, str_size,
152
55
                                                re2::RE2::UNANCHORED, matches.data(), max_matches);
153
55
                if (!success) {
154
14
                    break;
155
14
                }
156
41
                if (matches[0].empty()) {
157
11
                    pos += 1;
158
11
                    continue;
159
11
                }
160
                // Extract first capturing group
161
30
                if (matches.size() > 1 && !matches[1].empty()) {
162
30
                    results.emplace_back(matches[1].data(), matches[1].size());
163
30
                }
164
                // Move position forward
165
30
                auto offset = std::string(str_pos, str_size)
166
30
                                      .find(std::string(matches[0].data(), matches[0].size()));
167
30
                pos += offset + matches[0].size();
168
30
            }
169
26
        } else if (is_boost()) {
170
4
            const char* search_start = data;
171
4
            const char* search_end = data + size;
172
4
            boost::match_results<const char*> matches;
173
174
13
            while (boost::regex_search(search_start, search_end, matches, *boost_regex)) {
175
9
                if (matches.size() > 1 && matches[1].matched) {
176
9
                    results.emplace_back(matches[1].str());
177
9
                }
178
9
                if (matches[0].length() == 0) {
179
0
                    if (search_start == search_end) {
180
0
                        break;
181
0
                    }
182
0
                    search_start += 1;
183
9
                } else {
184
9
                    search_start = matches[0].second;
185
9
                }
186
9
            }
187
4
        }
188
30
    }
189
};
190
191
struct RegexpCountImpl {
192
    static void execute_impl(FunctionContext* context, ColumnPtr argument_columns[],
193
15
                             size_t input_rows_count, ColumnInt32::Container& result_data) {
194
15
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
195
15
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
196
48
        for (int i = 0; i < input_rows_count; ++i) {
197
33
            result_data[i] = _execute_inner_loop(context, str_col, pattern_col, i);
198
33
        }
199
15
    }
200
    static int _execute_inner_loop(FunctionContext* context, const ColumnString* str_col,
201
33
                                   const ColumnString* pattern_col, const size_t index_now) {
202
33
        re2::RE2* re = reinterpret_cast<re2::RE2*>(
203
33
                context->get_function_state(FunctionContext::THREAD_LOCAL));
204
33
        std::unique_ptr<re2::RE2> scoped_re;
205
33
        if (re == nullptr) {
206
12
            std::string error_str;
207
12
            DCHECK(pattern_col);
208
12
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, false));
209
12
            bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(), StringRef(),
210
12
                                                     scoped_re);
211
12
            if (!st) {
212
0
                context->add_warning(error_str.c_str());
213
0
                throw Exception(Status::InvalidArgument(error_str));
214
0
                return 0;
215
0
            }
216
12
            re = scoped_re.get();
217
12
        }
218
219
33
        const auto& str = str_col->get_data_at(index_now);
220
33
        int count = 0;
221
33
        size_t pos = 0;
222
101
        while (pos < str.size) {
223
87
            auto str_pos = str.data + pos;
224
87
            auto str_size = str.size - pos;
225
87
            re2::StringPiece str_sp_current = re2::StringPiece(str_pos, str_size);
226
87
            re2::StringPiece match;
227
228
87
            bool success = re->Match(str_sp_current, 0, str_size, re2::RE2::UNANCHORED, &match, 1);
229
87
            if (!success) {
230
19
                break;
231
19
            }
232
68
            if (match.empty()) {
233
4
                pos += 1;
234
4
                continue;
235
4
            }
236
64
            count++;
237
64
            size_t match_start = match.data() - str_sp_current.data();
238
64
            pos += match_start + match.size();
239
64
        }
240
241
33
        return count;
242
33
    }
243
};
244
245
class FunctionRegexpCount : public IFunction {
246
public:
247
    static constexpr auto name = "regexp_count";
248
249
24
    static FunctionPtr create() { return std::make_shared<FunctionRegexpCount>(); }
250
251
1
    String get_name() const override { return name; }
252
253
15
    size_t get_number_of_arguments() const override { return 2; }
254
255
15
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
256
15
        return std::make_shared<DataTypeInt32>();
257
15
    }
258
259
71
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
260
71
        if (scope == FunctionContext::THREAD_LOCAL) {
261
56
            if (context->is_col_constant(1)) {
262
41
                DCHECK(!context->get_function_state(scope));
263
41
                const auto pattern_col = context->get_constant_col(1)->column_ptr;
264
41
                const auto& pattern = pattern_col->get_data_at(0);
265
41
                if (pattern.size == 0) {
266
4
                    return Status::OK();
267
4
                }
268
269
37
                std::string error_str;
270
37
                std::unique_ptr<re2::RE2> scoped_re;
271
37
                bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
272
37
                                                         StringRef(), scoped_re);
273
37
                if (!st) {
274
0
                    context->set_error(error_str.c_str());
275
0
                    return Status::InvalidArgument(error_str);
276
0
                }
277
37
                std::shared_ptr<re2::RE2> re(scoped_re.release());
278
37
                context->set_function_state(scope, re);
279
37
            }
280
56
        }
281
67
        return Status::OK();
282
71
    }
283
284
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
285
15
                        uint32_t result, size_t input_rows_count) const override {
286
15
        auto result_data_column = ColumnInt32::create(input_rows_count);
287
15
        auto& result_data = result_data_column->get_data();
288
289
15
        ColumnPtr argument_columns[2];
290
291
15
        argument_columns[0] = block.get_by_position(arguments[0]).column;
292
15
        argument_columns[1] = block.get_by_position(arguments[1]).column;
293
15
        RegexpCountImpl::execute_impl(context, argument_columns, input_rows_count, result_data);
294
295
15
        block.get_by_position(result).column = std::move(result_data_column);
296
15
        return Status::OK();
297
15
    }
298
};
299
300
struct ThreeParamTypes {
301
16
    static DataTypes get_variadic_argument_types() {
302
16
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
303
16
                std::make_shared<DataTypeString>()};
304
16
    }
305
};
306
307
struct FourParamTypes {
308
16
    static DataTypes get_variadic_argument_types() {
309
16
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
310
16
                std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
311
16
    }
312
};
313
314
// template FunctionRegexpFunctionality is used for regexp_replace/regexp_replace_one
315
template <typename Impl, typename ParamTypes>
316
class FunctionRegexpReplace : public IFunction {
317
public:
318
    static constexpr auto name = Impl::name;
319
320
94
    static FunctionPtr create() { return std::make_shared<FunctionRegexpReplace>(); }
_ZN5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE6createEv
Line
Count
Source
320
34
    static FunctionPtr create() { return std::make_shared<FunctionRegexpReplace>(); }
_ZN5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE6createEv
Line
Count
Source
320
17
    static FunctionPtr create() { return std::make_shared<FunctionRegexpReplace>(); }
_ZN5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE6createEv
Line
Count
Source
320
25
    static FunctionPtr create() { return std::make_shared<FunctionRegexpReplace>(); }
_ZN5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE6createEv
Line
Count
Source
320
18
    static FunctionPtr create() { return std::make_shared<FunctionRegexpReplace>(); }
321
322
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE8get_nameB5cxx11Ev
323
324
0
    size_t get_number_of_arguments() const override {
325
0
        return get_variadic_argument_types_impl().size();
326
0
    }
Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE23get_number_of_argumentsEv
327
328
62
    bool is_variadic() const override { return true; }
_ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE11is_variadicEv
Line
Count
Source
328
26
    bool is_variadic() const override { return true; }
_ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE11is_variadicEv
Line
Count
Source
328
9
    bool is_variadic() const override { return true; }
_ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE11is_variadicEv
Line
Count
Source
328
17
    bool is_variadic() const override { return true; }
_ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE11is_variadicEv
Line
Count
Source
328
10
    bool is_variadic() const override { return true; }
329
330
58
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
331
58
        return make_nullable(std::make_shared<DataTypeString>());
332
58
    }
_ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
330
25
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
331
25
        return make_nullable(std::make_shared<DataTypeString>());
332
25
    }
_ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
330
8
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
331
8
        return make_nullable(std::make_shared<DataTypeString>());
332
8
    }
_ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
330
16
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
331
16
        return make_nullable(std::make_shared<DataTypeString>());
332
16
    }
_ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
330
9
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
331
9
        return make_nullable(std::make_shared<DataTypeString>());
332
9
    }
333
334
32
    DataTypes get_variadic_argument_types_impl() const override {
335
32
        return ParamTypes::get_variadic_argument_types();
336
32
    }
_ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE32get_variadic_argument_types_implEv
Line
Count
Source
334
8
    DataTypes get_variadic_argument_types_impl() const override {
335
8
        return ParamTypes::get_variadic_argument_types();
336
8
    }
_ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE32get_variadic_argument_types_implEv
Line
Count
Source
334
8
    DataTypes get_variadic_argument_types_impl() const override {
335
8
        return ParamTypes::get_variadic_argument_types();
336
8
    }
_ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE32get_variadic_argument_types_implEv
Line
Count
Source
334
8
    DataTypes get_variadic_argument_types_impl() const override {
335
8
        return ParamTypes::get_variadic_argument_types();
336
8
    }
_ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE32get_variadic_argument_types_implEv
Line
Count
Source
334
8
    DataTypes get_variadic_argument_types_impl() const override {
335
8
        return ParamTypes::get_variadic_argument_types();
336
8
    }
337
338
345
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
339
345
        if (scope == FunctionContext::THREAD_LOCAL) {
340
287
            if (context->is_col_constant(1)) {
341
139
                DCHECK(!context->get_function_state(scope));
342
139
                const auto pattern_col = context->get_constant_col(1)->column_ptr;
343
139
                const auto& pattern = pattern_col->get_data_at(0);
344
139
                if (pattern.size == 0) {
345
6
                    return Status::OK();
346
6
                }
347
348
133
                std::string error_str;
349
133
                std::unique_ptr<re2::RE2> scoped_re;
350
133
                StringRef options_value;
351
133
                if constexpr (std::is_same_v<FourParamTypes, ParamTypes>) {
352
80
                    DCHECK_EQ(context->get_num_args(), 4);
353
80
                    DCHECK(context->is_col_constant(3));
354
80
                    const auto options_col = context->get_constant_col(3)->column_ptr;
355
80
                    options_value = options_col->get_data_at(0);
356
80
                }
357
358
133
                bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
359
133
                                                         options_value, scoped_re);
360
133
                if (!st) {
361
0
                    context->set_error(error_str.c_str());
362
0
                    return Status::InvalidArgument(error_str);
363
0
                }
364
133
                std::shared_ptr<re2::RE2> re(scoped_re.release());
365
133
                context->set_function_state(scope, re);
366
133
            }
367
287
        }
368
339
        return Status::OK();
369
345
    }
_ZN5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
338
100
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
339
100
        if (scope == FunctionContext::THREAD_LOCAL) {
340
75
            if (context->is_col_constant(1)) {
341
43
                DCHECK(!context->get_function_state(scope));
342
43
                const auto pattern_col = context->get_constant_col(1)->column_ptr;
343
43
                const auto& pattern = pattern_col->get_data_at(0);
344
43
                if (pattern.size == 0) {
345
4
                    return Status::OK();
346
4
                }
347
348
39
                std::string error_str;
349
39
                std::unique_ptr<re2::RE2> scoped_re;
350
39
                StringRef options_value;
351
                if constexpr (std::is_same_v<FourParamTypes, ParamTypes>) {
352
                    DCHECK_EQ(context->get_num_args(), 4);
353
                    DCHECK(context->is_col_constant(3));
354
                    const auto options_col = context->get_constant_col(3)->column_ptr;
355
                    options_value = options_col->get_data_at(0);
356
                }
357
358
39
                bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
359
39
                                                         options_value, scoped_re);
360
39
                if (!st) {
361
0
                    context->set_error(error_str.c_str());
362
0
                    return Status::InvalidArgument(error_str);
363
0
                }
364
39
                std::shared_ptr<re2::RE2> re(scoped_re.release());
365
39
                context->set_function_state(scope, re);
366
39
            }
367
75
        }
368
96
        return Status::OK();
369
100
    }
_ZN5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
338
84
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
339
84
        if (scope == FunctionContext::THREAD_LOCAL) {
340
76
            if (context->is_col_constant(1)) {
341
40
                DCHECK(!context->get_function_state(scope));
342
40
                const auto pattern_col = context->get_constant_col(1)->column_ptr;
343
40
                const auto& pattern = pattern_col->get_data_at(0);
344
40
                if (pattern.size == 0) {
345
0
                    return Status::OK();
346
0
                }
347
348
40
                std::string error_str;
349
40
                std::unique_ptr<re2::RE2> scoped_re;
350
40
                StringRef options_value;
351
40
                if constexpr (std::is_same_v<FourParamTypes, ParamTypes>) {
352
40
                    DCHECK_EQ(context->get_num_args(), 4);
353
40
                    DCHECK(context->is_col_constant(3));
354
40
                    const auto options_col = context->get_constant_col(3)->column_ptr;
355
40
                    options_value = options_col->get_data_at(0);
356
40
                }
357
358
40
                bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
359
40
                                                         options_value, scoped_re);
360
40
                if (!st) {
361
0
                    context->set_error(error_str.c_str());
362
0
                    return Status::InvalidArgument(error_str);
363
0
                }
364
40
                std::shared_ptr<re2::RE2> re(scoped_re.release());
365
40
                context->set_function_state(scope, re);
366
40
            }
367
76
        }
368
84
        return Status::OK();
369
84
    }
_ZN5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
338
64
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
339
64
        if (scope == FunctionContext::THREAD_LOCAL) {
340
48
            if (context->is_col_constant(1)) {
341
16
                DCHECK(!context->get_function_state(scope));
342
16
                const auto pattern_col = context->get_constant_col(1)->column_ptr;
343
16
                const auto& pattern = pattern_col->get_data_at(0);
344
16
                if (pattern.size == 0) {
345
2
                    return Status::OK();
346
2
                }
347
348
14
                std::string error_str;
349
14
                std::unique_ptr<re2::RE2> scoped_re;
350
14
                StringRef options_value;
351
                if constexpr (std::is_same_v<FourParamTypes, ParamTypes>) {
352
                    DCHECK_EQ(context->get_num_args(), 4);
353
                    DCHECK(context->is_col_constant(3));
354
                    const auto options_col = context->get_constant_col(3)->column_ptr;
355
                    options_value = options_col->get_data_at(0);
356
                }
357
358
14
                bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
359
14
                                                         options_value, scoped_re);
360
14
                if (!st) {
361
0
                    context->set_error(error_str.c_str());
362
0
                    return Status::InvalidArgument(error_str);
363
0
                }
364
14
                std::shared_ptr<re2::RE2> re(scoped_re.release());
365
14
                context->set_function_state(scope, re);
366
14
            }
367
48
        }
368
62
        return Status::OK();
369
64
    }
_ZN5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
338
97
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
339
97
        if (scope == FunctionContext::THREAD_LOCAL) {
340
88
            if (context->is_col_constant(1)) {
341
40
                DCHECK(!context->get_function_state(scope));
342
40
                const auto pattern_col = context->get_constant_col(1)->column_ptr;
343
40
                const auto& pattern = pattern_col->get_data_at(0);
344
40
                if (pattern.size == 0) {
345
0
                    return Status::OK();
346
0
                }
347
348
40
                std::string error_str;
349
40
                std::unique_ptr<re2::RE2> scoped_re;
350
40
                StringRef options_value;
351
40
                if constexpr (std::is_same_v<FourParamTypes, ParamTypes>) {
352
40
                    DCHECK_EQ(context->get_num_args(), 4);
353
40
                    DCHECK(context->is_col_constant(3));
354
40
                    const auto options_col = context->get_constant_col(3)->column_ptr;
355
40
                    options_value = options_col->get_data_at(0);
356
40
                }
357
358
40
                bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
359
40
                                                         options_value, scoped_re);
360
40
                if (!st) {
361
0
                    context->set_error(error_str.c_str());
362
0
                    return Status::InvalidArgument(error_str);
363
0
                }
364
40
                std::shared_ptr<re2::RE2> re(scoped_re.release());
365
40
                context->set_function_state(scope, re);
366
40
            }
367
88
        }
368
97
        return Status::OK();
369
97
    }
370
371
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
372
75
                        uint32_t result, size_t input_rows_count) const override {
373
75
        size_t argument_size = arguments.size();
374
375
75
        auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
376
75
        auto result_data_column = ColumnString::create();
377
75
        auto& result_data = result_data_column->get_chars();
378
75
        auto& result_offset = result_data_column->get_offsets();
379
75
        result_offset.resize(input_rows_count);
380
381
75
        bool col_const[3];
382
75
        ColumnPtr argument_columns[3];
383
300
        for (int i = 0; i < 3; ++i) {
384
225
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
385
225
        }
386
75
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
387
6
                                                     *block.get_by_position(arguments[0]).column)
388
6
                                                     .convert_to_full_column()
389
75
                                           : block.get_by_position(arguments[0]).column;
390
391
75
        default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments);
392
393
75
        StringRef options_value;
394
75
        if (col_const[1] && col_const[2]) {
395
3
            Impl::execute_impl_const_args(context, argument_columns, options_value,
396
3
                                          input_rows_count, result_data, result_offset,
397
3
                                          result_null_map->get_data());
398
72
        } else {
399
            // the options have check in FE, so is always const, and get idx of 0
400
72
            if (argument_size == 4) {
401
15
                options_value = block.get_by_position(arguments[3]).column->get_data_at(0);
402
15
            }
403
72
            Impl::execute_impl(context, argument_columns, options_value, input_rows_count,
404
72
                               result_data, result_offset, result_null_map->get_data());
405
72
        }
406
407
75
        block.get_by_position(result).column =
408
75
                ColumnNullable::create(std::move(result_data_column), std::move(result_null_map));
409
75
        return Status::OK();
410
75
    }
_ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
372
32
                        uint32_t result, size_t input_rows_count) const override {
373
32
        size_t argument_size = arguments.size();
374
375
32
        auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
376
32
        auto result_data_column = ColumnString::create();
377
32
        auto& result_data = result_data_column->get_chars();
378
32
        auto& result_offset = result_data_column->get_offsets();
379
32
        result_offset.resize(input_rows_count);
380
381
32
        bool col_const[3];
382
32
        ColumnPtr argument_columns[3];
383
128
        for (int i = 0; i < 3; ++i) {
384
96
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
385
96
        }
386
32
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
387
0
                                                     *block.get_by_position(arguments[0]).column)
388
0
                                                     .convert_to_full_column()
389
32
                                           : block.get_by_position(arguments[0]).column;
390
391
32
        default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments);
392
393
32
        StringRef options_value;
394
32
        if (col_const[1] && col_const[2]) {
395
1
            Impl::execute_impl_const_args(context, argument_columns, options_value,
396
1
                                          input_rows_count, result_data, result_offset,
397
1
                                          result_null_map->get_data());
398
31
        } else {
399
            // the options have check in FE, so is always const, and get idx of 0
400
31
            if (argument_size == 4) {
401
0
                options_value = block.get_by_position(arguments[3]).column->get_data_at(0);
402
0
            }
403
31
            Impl::execute_impl(context, argument_columns, options_value, input_rows_count,
404
31
                               result_data, result_offset, result_null_map->get_data());
405
31
        }
406
407
32
        block.get_by_position(result).column =
408
32
                ColumnNullable::create(std::move(result_data_column), std::move(result_null_map));
409
32
        return Status::OK();
410
32
    }
_ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
372
8
                        uint32_t result, size_t input_rows_count) const override {
373
8
        size_t argument_size = arguments.size();
374
375
8
        auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
376
8
        auto result_data_column = ColumnString::create();
377
8
        auto& result_data = result_data_column->get_chars();
378
8
        auto& result_offset = result_data_column->get_offsets();
379
8
        result_offset.resize(input_rows_count);
380
381
8
        bool col_const[3];
382
8
        ColumnPtr argument_columns[3];
383
32
        for (int i = 0; i < 3; ++i) {
384
24
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
385
24
        }
386
8
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
387
3
                                                     *block.get_by_position(arguments[0]).column)
388
3
                                                     .convert_to_full_column()
389
8
                                           : block.get_by_position(arguments[0]).column;
390
391
8
        default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments);
392
393
8
        StringRef options_value;
394
8
        if (col_const[1] && col_const[2]) {
395
1
            Impl::execute_impl_const_args(context, argument_columns, options_value,
396
1
                                          input_rows_count, result_data, result_offset,
397
1
                                          result_null_map->get_data());
398
7
        } else {
399
            // the options have check in FE, so is always const, and get idx of 0
400
7
            if (argument_size == 4) {
401
7
                options_value = block.get_by_position(arguments[3]).column->get_data_at(0);
402
7
            }
403
7
            Impl::execute_impl(context, argument_columns, options_value, input_rows_count,
404
7
                               result_data, result_offset, result_null_map->get_data());
405
7
        }
406
407
8
        block.get_by_position(result).column =
408
8
                ColumnNullable::create(std::move(result_data_column), std::move(result_null_map));
409
8
        return Status::OK();
410
8
    }
_ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
372
26
                        uint32_t result, size_t input_rows_count) const override {
373
26
        size_t argument_size = arguments.size();
374
375
26
        auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
376
26
        auto result_data_column = ColumnString::create();
377
26
        auto& result_data = result_data_column->get_chars();
378
26
        auto& result_offset = result_data_column->get_offsets();
379
26
        result_offset.resize(input_rows_count);
380
381
26
        bool col_const[3];
382
26
        ColumnPtr argument_columns[3];
383
104
        for (int i = 0; i < 3; ++i) {
384
78
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
385
78
        }
386
26
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
387
0
                                                     *block.get_by_position(arguments[0]).column)
388
0
                                                     .convert_to_full_column()
389
26
                                           : block.get_by_position(arguments[0]).column;
390
391
26
        default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments);
392
393
26
        StringRef options_value;
394
26
        if (col_const[1] && col_const[2]) {
395
0
            Impl::execute_impl_const_args(context, argument_columns, options_value,
396
0
                                          input_rows_count, result_data, result_offset,
397
0
                                          result_null_map->get_data());
398
26
        } else {
399
            // the options have check in FE, so is always const, and get idx of 0
400
26
            if (argument_size == 4) {
401
0
                options_value = block.get_by_position(arguments[3]).column->get_data_at(0);
402
0
            }
403
26
            Impl::execute_impl(context, argument_columns, options_value, input_rows_count,
404
26
                               result_data, result_offset, result_null_map->get_data());
405
26
        }
406
407
26
        block.get_by_position(result).column =
408
26
                ColumnNullable::create(std::move(result_data_column), std::move(result_null_map));
409
26
        return Status::OK();
410
26
    }
_ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
372
9
                        uint32_t result, size_t input_rows_count) const override {
373
9
        size_t argument_size = arguments.size();
374
375
9
        auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
376
9
        auto result_data_column = ColumnString::create();
377
9
        auto& result_data = result_data_column->get_chars();
378
9
        auto& result_offset = result_data_column->get_offsets();
379
9
        result_offset.resize(input_rows_count);
380
381
9
        bool col_const[3];
382
9
        ColumnPtr argument_columns[3];
383
36
        for (int i = 0; i < 3; ++i) {
384
27
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
385
27
        }
386
9
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
387
3
                                                     *block.get_by_position(arguments[0]).column)
388
3
                                                     .convert_to_full_column()
389
9
                                           : block.get_by_position(arguments[0]).column;
390
391
9
        default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments);
392
393
9
        StringRef options_value;
394
9
        if (col_const[1] && col_const[2]) {
395
1
            Impl::execute_impl_const_args(context, argument_columns, options_value,
396
1
                                          input_rows_count, result_data, result_offset,
397
1
                                          result_null_map->get_data());
398
8
        } else {
399
            // the options have check in FE, so is always const, and get idx of 0
400
8
            if (argument_size == 4) {
401
8
                options_value = block.get_by_position(arguments[3]).column->get_data_at(0);
402
8
            }
403
8
            Impl::execute_impl(context, argument_columns, options_value, input_rows_count,
404
8
                               result_data, result_offset, result_null_map->get_data());
405
8
        }
406
407
9
        block.get_by_position(result).column =
408
9
                ColumnNullable::create(std::move(result_data_column), std::move(result_null_map));
409
9
        return Status::OK();
410
9
    }
411
};
412
413
struct RegexpReplaceImpl {
414
    static constexpr auto name = "regexp_replace";
415
    static void execute_impl(FunctionContext* context, ColumnPtr argument_columns[],
416
                             const StringRef& options_value, size_t input_rows_count,
417
                             ColumnString::Chars& result_data, ColumnString::Offsets& result_offset,
418
38
                             NullMap& null_map) {
419
38
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
420
38
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
421
38
        const auto* replace_col = check_and_get_column<ColumnString>(argument_columns[2].get());
422
423
130
        for (size_t i = 0; i < input_rows_count; ++i) {
424
92
            if (null_map[i]) {
425
0
                StringOP::push_null_string(i, result_data, result_offset, null_map);
426
0
                continue;
427
0
            }
428
92
            _execute_inner_loop<false>(context, str_col, pattern_col, replace_col, options_value,
429
92
                                       result_data, result_offset, null_map, i);
430
92
        }
431
38
    }
432
    static void execute_impl_const_args(FunctionContext* context, ColumnPtr argument_columns[],
433
                                        const StringRef& options_value, size_t input_rows_count,
434
                                        ColumnString::Chars& result_data,
435
2
                                        ColumnString::Offsets& result_offset, NullMap& null_map) {
436
2
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
437
2
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
438
2
        const auto* replace_col = check_and_get_column<ColumnString>(argument_columns[2].get());
439
440
12
        for (size_t i = 0; i < input_rows_count; ++i) {
441
10
            if (null_map[i]) {
442
0
                StringOP::push_null_string(i, result_data, result_offset, null_map);
443
0
                continue;
444
0
            }
445
10
            _execute_inner_loop<true>(context, str_col, pattern_col, replace_col, options_value,
446
10
                                      result_data, result_offset, null_map, i);
447
10
        }
448
2
    }
449
    template <bool Const>
450
    static void _execute_inner_loop(FunctionContext* context, const ColumnString* str_col,
451
                                    const ColumnString* pattern_col,
452
                                    const ColumnString* replace_col, const StringRef& options_value,
453
                                    ColumnString::Chars& result_data,
454
                                    ColumnString::Offsets& result_offset, NullMap& null_map,
455
102
                                    const size_t index_now) {
456
102
        re2::RE2* re = reinterpret_cast<re2::RE2*>(
457
102
                context->get_function_state(FunctionContext::THREAD_LOCAL));
458
102
        std::unique_ptr<re2::RE2> scoped_re; // destroys re if state->re is nullptr
459
102
        if (re == nullptr) {
460
67
            std::string error_str;
461
67
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
462
67
            bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
463
67
                                                     options_value, scoped_re);
464
67
            if (!st) {
465
0
                context->add_warning(error_str.c_str());
466
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
467
0
                return;
468
0
            }
469
67
            re = scoped_re.get();
470
67
        }
471
472
102
        re2::StringPiece replace_str = re2::StringPiece(
473
102
                replace_col->get_data_at(index_check_const(index_now, Const)).to_string_view());
474
475
102
        std::string result_str(str_col->get_data_at(index_now).to_string());
476
102
        re2::RE2::GlobalReplace(&result_str, *re, replace_str);
477
102
        StringOP::push_value_string(result_str, index_now, result_data, result_offset);
478
102
    }
_ZN5doris17RegexpReplaceImpl19_execute_inner_loopILb1EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES7_S7_RKNS_9StringRefERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNSB_IjLm4096ESE_Lm16ELm15EEESG_m
Line
Count
Source
455
10
                                    const size_t index_now) {
456
10
        re2::RE2* re = reinterpret_cast<re2::RE2*>(
457
10
                context->get_function_state(FunctionContext::THREAD_LOCAL));
458
10
        std::unique_ptr<re2::RE2> scoped_re; // destroys re if state->re is nullptr
459
10
        if (re == nullptr) {
460
0
            std::string error_str;
461
0
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
462
0
            bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
463
0
                                                     options_value, scoped_re);
464
0
            if (!st) {
465
0
                context->add_warning(error_str.c_str());
466
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
467
0
                return;
468
0
            }
469
0
            re = scoped_re.get();
470
0
        }
471
472
10
        re2::StringPiece replace_str = re2::StringPiece(
473
10
                replace_col->get_data_at(index_check_const(index_now, Const)).to_string_view());
474
475
10
        std::string result_str(str_col->get_data_at(index_now).to_string());
476
10
        re2::RE2::GlobalReplace(&result_str, *re, replace_str);
477
10
        StringOP::push_value_string(result_str, index_now, result_data, result_offset);
478
10
    }
_ZN5doris17RegexpReplaceImpl19_execute_inner_loopILb0EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES7_S7_RKNS_9StringRefERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNSB_IjLm4096ESE_Lm16ELm15EEESG_m
Line
Count
Source
455
92
                                    const size_t index_now) {
456
92
        re2::RE2* re = reinterpret_cast<re2::RE2*>(
457
92
                context->get_function_state(FunctionContext::THREAD_LOCAL));
458
92
        std::unique_ptr<re2::RE2> scoped_re; // destroys re if state->re is nullptr
459
92
        if (re == nullptr) {
460
67
            std::string error_str;
461
67
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
462
67
            bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
463
67
                                                     options_value, scoped_re);
464
67
            if (!st) {
465
0
                context->add_warning(error_str.c_str());
466
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
467
0
                return;
468
0
            }
469
67
            re = scoped_re.get();
470
67
        }
471
472
92
        re2::StringPiece replace_str = re2::StringPiece(
473
92
                replace_col->get_data_at(index_check_const(index_now, Const)).to_string_view());
474
475
92
        std::string result_str(str_col->get_data_at(index_now).to_string());
476
92
        re2::RE2::GlobalReplace(&result_str, *re, replace_str);
477
92
        StringOP::push_value_string(result_str, index_now, result_data, result_offset);
478
92
    }
479
};
480
481
struct RegexpReplaceOneImpl {
482
    static constexpr auto name = "regexp_replace_one";
483
484
    static void execute_impl(FunctionContext* context, ColumnPtr argument_columns[],
485
                             const StringRef& options_value, size_t input_rows_count,
486
                             ColumnString::Chars& result_data, ColumnString::Offsets& result_offset,
487
34
                             NullMap& null_map) {
488
34
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
489
34
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
490
34
        const auto* replace_col = check_and_get_column<ColumnString>(argument_columns[2].get());
491
        // 3 args
492
126
        for (size_t i = 0; i < input_rows_count; ++i) {
493
92
            if (null_map[i]) {
494
0
                StringOP::push_null_string(i, result_data, result_offset, null_map);
495
0
                continue;
496
0
            }
497
92
            _execute_inner_loop<false>(context, str_col, pattern_col, replace_col, options_value,
498
92
                                       result_data, result_offset, null_map, i);
499
92
        }
500
34
    }
501
502
    static void execute_impl_const_args(FunctionContext* context, ColumnPtr argument_columns[],
503
                                        const StringRef& options_value, size_t input_rows_count,
504
                                        ColumnString::Chars& result_data,
505
1
                                        ColumnString::Offsets& result_offset, NullMap& null_map) {
506
1
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
507
1
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
508
1
        const auto* replace_col = check_and_get_column<ColumnString>(argument_columns[2].get());
509
        // 3 args
510
6
        for (size_t i = 0; i < input_rows_count; ++i) {
511
5
            if (null_map[i]) {
512
0
                StringOP::push_null_string(i, result_data, result_offset, null_map);
513
0
                continue;
514
0
            }
515
5
            _execute_inner_loop<true>(context, str_col, pattern_col, replace_col, options_value,
516
5
                                      result_data, result_offset, null_map, i);
517
5
        }
518
1
    }
519
    template <bool Const>
520
    static void _execute_inner_loop(FunctionContext* context, const ColumnString* str_col,
521
                                    const ColumnString* pattern_col,
522
                                    const ColumnString* replace_col, const StringRef& options_value,
523
                                    ColumnString::Chars& result_data,
524
                                    ColumnString::Offsets& result_offset, NullMap& null_map,
525
97
                                    const size_t index_now) {
526
97
        re2::RE2* re = reinterpret_cast<re2::RE2*>(
527
97
                context->get_function_state(FunctionContext::THREAD_LOCAL));
528
97
        std::unique_ptr<re2::RE2> scoped_re; // destroys re if state->re is nullptr
529
97
        if (re == nullptr) {
530
72
            std::string error_str;
531
72
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
532
72
            bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
533
72
                                                     options_value, scoped_re);
534
72
            if (!st) {
535
0
                context->add_warning(error_str.c_str());
536
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
537
0
                return;
538
0
            }
539
72
            re = scoped_re.get();
540
72
        }
541
542
97
        re2::StringPiece replace_str = re2::StringPiece(
543
97
                replace_col->get_data_at(index_check_const(index_now, Const)).to_string_view());
544
545
97
        std::string result_str(str_col->get_data_at(index_now).to_string());
546
97
        re2::RE2::Replace(&result_str, *re, replace_str);
547
97
        StringOP::push_value_string(result_str, index_now, result_data, result_offset);
548
97
    }
_ZN5doris20RegexpReplaceOneImpl19_execute_inner_loopILb1EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES7_S7_RKNS_9StringRefERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNSB_IjLm4096ESE_Lm16ELm15EEESG_m
Line
Count
Source
525
5
                                    const size_t index_now) {
526
5
        re2::RE2* re = reinterpret_cast<re2::RE2*>(
527
5
                context->get_function_state(FunctionContext::THREAD_LOCAL));
528
5
        std::unique_ptr<re2::RE2> scoped_re; // destroys re if state->re is nullptr
529
5
        if (re == nullptr) {
530
0
            std::string error_str;
531
0
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
532
0
            bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
533
0
                                                     options_value, scoped_re);
534
0
            if (!st) {
535
0
                context->add_warning(error_str.c_str());
536
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
537
0
                return;
538
0
            }
539
0
            re = scoped_re.get();
540
0
        }
541
542
5
        re2::StringPiece replace_str = re2::StringPiece(
543
5
                replace_col->get_data_at(index_check_const(index_now, Const)).to_string_view());
544
545
5
        std::string result_str(str_col->get_data_at(index_now).to_string());
546
5
        re2::RE2::Replace(&result_str, *re, replace_str);
547
5
        StringOP::push_value_string(result_str, index_now, result_data, result_offset);
548
5
    }
_ZN5doris20RegexpReplaceOneImpl19_execute_inner_loopILb0EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES7_S7_RKNS_9StringRefERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNSB_IjLm4096ESE_Lm16ELm15EEESG_m
Line
Count
Source
525
92
                                    const size_t index_now) {
526
92
        re2::RE2* re = reinterpret_cast<re2::RE2*>(
527
92
                context->get_function_state(FunctionContext::THREAD_LOCAL));
528
92
        std::unique_ptr<re2::RE2> scoped_re; // destroys re if state->re is nullptr
529
92
        if (re == nullptr) {
530
72
            std::string error_str;
531
72
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
532
72
            bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
533
72
                                                     options_value, scoped_re);
534
72
            if (!st) {
535
0
                context->add_warning(error_str.c_str());
536
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
537
0
                return;
538
0
            }
539
72
            re = scoped_re.get();
540
72
        }
541
542
92
        re2::StringPiece replace_str = re2::StringPiece(
543
92
                replace_col->get_data_at(index_check_const(index_now, Const)).to_string_view());
544
545
92
        std::string result_str(str_col->get_data_at(index_now).to_string());
546
92
        re2::RE2::Replace(&result_str, *re, replace_str);
547
92
        StringOP::push_value_string(result_str, index_now, result_data, result_offset);
548
92
    }
549
};
550
551
template <bool ReturnNull>
552
struct RegexpExtractImpl {
553
    static constexpr auto name = ReturnNull ? "regexp_extract_or_null" : "regexp_extract";
554
    // 3 args
555
    static void execute_impl(FunctionContext* context, ColumnPtr argument_columns[],
556
                             size_t input_rows_count, ColumnString::Chars& result_data,
557
59
                             ColumnString::Offsets& result_offset, NullMap& null_map) {
558
59
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
559
59
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
560
59
        const auto* index_col = check_and_get_column<ColumnInt64>(argument_columns[2].get());
561
188
        for (size_t i = 0; i < input_rows_count; ++i) {
562
129
            if (null_map[i]) {
563
0
                StringOP::push_null_string(i, result_data, result_offset, null_map);
564
0
                continue;
565
0
            }
566
129
            const auto& index_data = index_col->get_int(i);
567
129
            if (index_data < 0) {
568
0
                ReturnNull ? StringOP::push_null_string(i, result_data, result_offset, null_map)
569
0
                           : StringOP::push_empty_string(i, result_data, result_offset);
570
0
                continue;
571
0
            }
572
129
            _execute_inner_loop<false>(context, str_col, pattern_col, index_data, result_data,
573
129
                                       result_offset, null_map, i);
574
129
        }
575
59
    }
_ZN5doris17RegexpExtractImplILb1EE12execute_implEPNS_15FunctionContextEPNS_3COWINS_7IColumnEE13immutable_ptrIS5_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNSA_IjLm4096ESD_Lm16ELm15EEESF_
Line
Count
Source
557
18
                             ColumnString::Offsets& result_offset, NullMap& null_map) {
558
18
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
559
18
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
560
18
        const auto* index_col = check_and_get_column<ColumnInt64>(argument_columns[2].get());
561
36
        for (size_t i = 0; i < input_rows_count; ++i) {
562
18
            if (null_map[i]) {
563
0
                StringOP::push_null_string(i, result_data, result_offset, null_map);
564
0
                continue;
565
0
            }
566
18
            const auto& index_data = index_col->get_int(i);
567
18
            if (index_data < 0) {
568
0
                ReturnNull ? StringOP::push_null_string(i, result_data, result_offset, null_map)
569
0
                           : StringOP::push_empty_string(i, result_data, result_offset);
570
0
                continue;
571
0
            }
572
18
            _execute_inner_loop<false>(context, str_col, pattern_col, index_data, result_data,
573
18
                                       result_offset, null_map, i);
574
18
        }
575
18
    }
_ZN5doris17RegexpExtractImplILb0EE12execute_implEPNS_15FunctionContextEPNS_3COWINS_7IColumnEE13immutable_ptrIS5_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNSA_IjLm4096ESD_Lm16ELm15EEESF_
Line
Count
Source
557
41
                             ColumnString::Offsets& result_offset, NullMap& null_map) {
558
41
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
559
41
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
560
41
        const auto* index_col = check_and_get_column<ColumnInt64>(argument_columns[2].get());
561
152
        for (size_t i = 0; i < input_rows_count; ++i) {
562
111
            if (null_map[i]) {
563
0
                StringOP::push_null_string(i, result_data, result_offset, null_map);
564
0
                continue;
565
0
            }
566
111
            const auto& index_data = index_col->get_int(i);
567
111
            if (index_data < 0) {
568
0
                ReturnNull ? StringOP::push_null_string(i, result_data, result_offset, null_map)
569
0
                           : StringOP::push_empty_string(i, result_data, result_offset);
570
0
                continue;
571
0
            }
572
111
            _execute_inner_loop<false>(context, str_col, pattern_col, index_data, result_data,
573
111
                                       result_offset, null_map, i);
574
111
        }
575
41
    }
576
577
    static void execute_impl_const_args(FunctionContext* context, ColumnPtr argument_columns[],
578
                                        size_t input_rows_count, ColumnString::Chars& result_data,
579
1
                                        ColumnString::Offsets& result_offset, NullMap& null_map) {
580
1
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
581
1
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
582
1
        const auto* index_col = check_and_get_column<ColumnInt64>(argument_columns[2].get());
583
584
1
        const auto& index_data = index_col->get_int(0);
585
1
        if (index_data < 0) {
586
0
            for (size_t i = 0; i < input_rows_count; ++i) {
587
0
                ReturnNull ? StringOP::push_null_string(i, result_data, result_offset, null_map)
588
0
                           : StringOP::push_empty_string(i, result_data, result_offset);
589
0
            }
590
0
            return;
591
0
        }
592
593
8
        for (size_t i = 0; i < input_rows_count; ++i) {
594
7
            if (null_map[i]) {
595
0
                StringOP::push_null_string(i, result_data, result_offset, null_map);
596
0
                continue;
597
0
            }
598
599
7
            _execute_inner_loop<true>(context, str_col, pattern_col, index_data, result_data,
600
7
                                      result_offset, null_map, i);
601
7
        }
602
1
    }
Unexecuted instantiation: _ZN5doris17RegexpExtractImplILb1EE23execute_impl_const_argsEPNS_15FunctionContextEPNS_3COWINS_7IColumnEE13immutable_ptrIS5_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNSA_IjLm4096ESD_Lm16ELm15EEESF_
_ZN5doris17RegexpExtractImplILb0EE23execute_impl_const_argsEPNS_15FunctionContextEPNS_3COWINS_7IColumnEE13immutable_ptrIS5_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNSA_IjLm4096ESD_Lm16ELm15EEESF_
Line
Count
Source
579
1
                                        ColumnString::Offsets& result_offset, NullMap& null_map) {
580
1
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
581
1
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
582
1
        const auto* index_col = check_and_get_column<ColumnInt64>(argument_columns[2].get());
583
584
1
        const auto& index_data = index_col->get_int(0);
585
1
        if (index_data < 0) {
586
0
            for (size_t i = 0; i < input_rows_count; ++i) {
587
0
                ReturnNull ? StringOP::push_null_string(i, result_data, result_offset, null_map)
588
0
                           : StringOP::push_empty_string(i, result_data, result_offset);
589
0
            }
590
0
            return;
591
0
        }
592
593
8
        for (size_t i = 0; i < input_rows_count; ++i) {
594
7
            if (null_map[i]) {
595
0
                StringOP::push_null_string(i, result_data, result_offset, null_map);
596
0
                continue;
597
0
            }
598
599
7
            _execute_inner_loop<true>(context, str_col, pattern_col, index_data, result_data,
600
7
                                      result_offset, null_map, i);
601
7
        }
602
1
    }
603
    template <bool Const>
604
    static void _execute_inner_loop(FunctionContext* context, const ColumnString* str_col,
605
                                    const ColumnString* pattern_col, const Int64 index_data,
606
                                    ColumnString::Chars& result_data,
607
                                    ColumnString::Offsets& result_offset, NullMap& null_map,
608
136
                                    const size_t index_now) {
609
136
        auto* engine = reinterpret_cast<RegexpExtractEngine*>(
610
136
                context->get_function_state(FunctionContext::THREAD_LOCAL));
611
136
        std::unique_ptr<RegexpExtractEngine> scoped_engine;
612
613
136
        if (engine == nullptr) {
614
78
            std::string error_str;
615
78
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
616
78
            scoped_engine = std::make_unique<RegexpExtractEngine>();
617
78
            bool st = RegexpExtractEngine::compile(pattern, &error_str, *scoped_engine,
618
78
                                                   context->state()->enable_extended_regex());
619
78
            if (!st) {
620
0
                context->add_warning(error_str.c_str());
621
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
622
0
                return;
623
0
            }
624
78
            engine = scoped_engine.get();
625
78
        }
626
627
136
        const auto& str = str_col->get_data_at(index_now);
628
629
136
        int max_matches = 1 + engine->number_of_capturing_groups();
630
136
        if (index_data >= max_matches) {
631
84
            ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map)
632
84
                       : StringOP::push_empty_string(index_now, result_data, result_offset);
633
84
            return;
634
84
        }
635
636
52
        std::string match_result;
637
52
        bool success = engine->match_and_extract(str.data, str.size, static_cast<int>(index_data),
638
52
                                                 match_result);
639
640
52
        if (!success) {
641
13
            ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map)
642
13
                       : StringOP::push_empty_string(index_now, result_data, result_offset);
643
13
            return;
644
13
        }
645
646
39
        StringOP::push_value_string(std::string_view(match_result.data(), match_result.size()),
647
39
                                    index_now, result_data, result_offset);
648
39
    }
Unexecuted instantiation: _ZN5doris17RegexpExtractImplILb1EE19_execute_inner_loopILb1EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES8_lRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS9_IjLm4096ESC_Lm16ELm15EEESE_m
_ZN5doris17RegexpExtractImplILb1EE19_execute_inner_loopILb0EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES8_lRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS9_IjLm4096ESC_Lm16ELm15EEESE_m
Line
Count
Source
608
18
                                    const size_t index_now) {
609
18
        auto* engine = reinterpret_cast<RegexpExtractEngine*>(
610
18
                context->get_function_state(FunctionContext::THREAD_LOCAL));
611
18
        std::unique_ptr<RegexpExtractEngine> scoped_engine;
612
613
18
        if (engine == nullptr) {
614
0
            std::string error_str;
615
0
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
616
0
            scoped_engine = std::make_unique<RegexpExtractEngine>();
617
0
            bool st = RegexpExtractEngine::compile(pattern, &error_str, *scoped_engine,
618
0
                                                   context->state()->enable_extended_regex());
619
0
            if (!st) {
620
0
                context->add_warning(error_str.c_str());
621
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
622
0
                return;
623
0
            }
624
0
            engine = scoped_engine.get();
625
0
        }
626
627
18
        const auto& str = str_col->get_data_at(index_now);
628
629
18
        int max_matches = 1 + engine->number_of_capturing_groups();
630
18
        if (index_data >= max_matches) {
631
1
            ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map)
632
1
                       : StringOP::push_empty_string(index_now, result_data, result_offset);
633
1
            return;
634
1
        }
635
636
17
        std::string match_result;
637
17
        bool success = engine->match_and_extract(str.data, str.size, static_cast<int>(index_data),
638
17
                                                 match_result);
639
640
17
        if (!success) {
641
1
            ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map)
642
1
                       : StringOP::push_empty_string(index_now, result_data, result_offset);
643
1
            return;
644
1
        }
645
646
16
        StringOP::push_value_string(std::string_view(match_result.data(), match_result.size()),
647
16
                                    index_now, result_data, result_offset);
648
16
    }
_ZN5doris17RegexpExtractImplILb0EE19_execute_inner_loopILb1EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES8_lRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS9_IjLm4096ESC_Lm16ELm15EEESE_m
Line
Count
Source
608
7
                                    const size_t index_now) {
609
7
        auto* engine = reinterpret_cast<RegexpExtractEngine*>(
610
7
                context->get_function_state(FunctionContext::THREAD_LOCAL));
611
7
        std::unique_ptr<RegexpExtractEngine> scoped_engine;
612
613
7
        if (engine == nullptr) {
614
0
            std::string error_str;
615
0
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
616
0
            scoped_engine = std::make_unique<RegexpExtractEngine>();
617
0
            bool st = RegexpExtractEngine::compile(pattern, &error_str, *scoped_engine,
618
0
                                                   context->state()->enable_extended_regex());
619
0
            if (!st) {
620
0
                context->add_warning(error_str.c_str());
621
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
622
0
                return;
623
0
            }
624
0
            engine = scoped_engine.get();
625
0
        }
626
627
7
        const auto& str = str_col->get_data_at(index_now);
628
629
7
        int max_matches = 1 + engine->number_of_capturing_groups();
630
7
        if (index_data >= max_matches) {
631
0
            ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map)
632
0
                       : StringOP::push_empty_string(index_now, result_data, result_offset);
633
0
            return;
634
0
        }
635
636
7
        std::string match_result;
637
7
        bool success = engine->match_and_extract(str.data, str.size, static_cast<int>(index_data),
638
7
                                                 match_result);
639
640
7
        if (!success) {
641
7
            ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map)
642
7
                       : StringOP::push_empty_string(index_now, result_data, result_offset);
643
7
            return;
644
7
        }
645
646
0
        StringOP::push_value_string(std::string_view(match_result.data(), match_result.size()),
647
0
                                    index_now, result_data, result_offset);
648
0
    }
_ZN5doris17RegexpExtractImplILb0EE19_execute_inner_loopILb0EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES8_lRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS9_IjLm4096ESC_Lm16ELm15EEESE_m
Line
Count
Source
608
111
                                    const size_t index_now) {
609
111
        auto* engine = reinterpret_cast<RegexpExtractEngine*>(
610
111
                context->get_function_state(FunctionContext::THREAD_LOCAL));
611
111
        std::unique_ptr<RegexpExtractEngine> scoped_engine;
612
613
111
        if (engine == nullptr) {
614
78
            std::string error_str;
615
78
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
616
78
            scoped_engine = std::make_unique<RegexpExtractEngine>();
617
78
            bool st = RegexpExtractEngine::compile(pattern, &error_str, *scoped_engine,
618
78
                                                   context->state()->enable_extended_regex());
619
78
            if (!st) {
620
0
                context->add_warning(error_str.c_str());
621
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
622
0
                return;
623
0
            }
624
78
            engine = scoped_engine.get();
625
78
        }
626
627
111
        const auto& str = str_col->get_data_at(index_now);
628
629
111
        int max_matches = 1 + engine->number_of_capturing_groups();
630
111
        if (index_data >= max_matches) {
631
83
            ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map)
632
83
                       : StringOP::push_empty_string(index_now, result_data, result_offset);
633
83
            return;
634
83
        }
635
636
28
        std::string match_result;
637
28
        bool success = engine->match_and_extract(str.data, str.size, static_cast<int>(index_data),
638
28
                                                 match_result);
639
640
28
        if (!success) {
641
5
            ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map)
642
5
                       : StringOP::push_empty_string(index_now, result_data, result_offset);
643
5
            return;
644
5
        }
645
646
23
        StringOP::push_value_string(std::string_view(match_result.data(), match_result.size()),
647
23
                                    index_now, result_data, result_offset);
648
23
    }
649
};
650
651
struct RegexpExtractAllImpl {
652
    static constexpr auto name = "regexp_extract_all";
653
654
0
    size_t get_number_of_arguments() const { return 2; }
655
656
    static void execute_impl(FunctionContext* context, ColumnPtr argument_columns[],
657
                             size_t input_rows_count, ColumnString::Chars& result_data,
658
35
                             ColumnString::Offsets& result_offset, NullMap& null_map) {
659
35
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
660
35
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
661
116
        for (int i = 0; i < input_rows_count; ++i) {
662
81
            if (null_map[i]) {
663
0
                StringOP::push_null_string(i, result_data, result_offset, null_map);
664
0
                continue;
665
0
            }
666
81
            _execute_inner_loop<false>(context, str_col, pattern_col, result_data, result_offset,
667
81
                                       null_map, i);
668
81
        }
669
35
    }
670
671
    static void execute_impl_const_args(FunctionContext* context, ColumnPtr argument_columns[],
672
                                        size_t input_rows_count, ColumnString::Chars& result_data,
673
8
                                        ColumnString::Offsets& result_offset, NullMap& null_map) {
674
8
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
675
8
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
676
22
        for (int i = 0; i < input_rows_count; ++i) {
677
14
            if (null_map[i]) {
678
0
                StringOP::push_null_string(i, result_data, result_offset, null_map);
679
0
                continue;
680
0
            }
681
14
            _execute_inner_loop<true>(context, str_col, pattern_col, result_data, result_offset,
682
14
                                      null_map, i);
683
14
        }
684
8
    }
685
    template <bool Const>
686
    static void _execute_inner_loop(FunctionContext* context, const ColumnString* str_col,
687
                                    const ColumnString* pattern_col,
688
                                    ColumnString::Chars& result_data,
689
                                    ColumnString::Offsets& result_offset, NullMap& null_map,
690
95
                                    const size_t index_now) {
691
95
        auto* engine = reinterpret_cast<RegexpExtractEngine*>(
692
95
                context->get_function_state(FunctionContext::THREAD_LOCAL));
693
95
        std::unique_ptr<RegexpExtractEngine> scoped_engine;
694
695
95
        if (engine == nullptr) {
696
64
            std::string error_str;
697
64
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
698
64
            scoped_engine = std::make_unique<RegexpExtractEngine>();
699
64
            bool st = RegexpExtractEngine::compile(pattern, &error_str, *scoped_engine,
700
64
                                                   context->state()->enable_extended_regex());
701
64
            if (!st) {
702
0
                context->add_warning(error_str.c_str());
703
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
704
0
                return;
705
0
            }
706
64
            engine = scoped_engine.get();
707
64
        }
708
709
95
        if (engine->number_of_capturing_groups() == 0) {
710
65
            StringOP::push_empty_string(index_now, result_data, result_offset);
711
65
            return;
712
65
        }
713
30
        const auto& str = str_col->get_data_at(index_now);
714
30
        std::vector<std::string> res_matches;
715
30
        engine->match_all_and_extract(str.data, str.size, res_matches);
716
717
30
        if (res_matches.empty()) {
718
10
            StringOP::push_empty_string(index_now, result_data, result_offset);
719
10
            return;
720
10
        }
721
722
20
        std::string res = "[";
723
59
        for (int j = 0; j < res_matches.size(); ++j) {
724
39
            res += "'" + res_matches[j] + "'";
725
39
            if (j < res_matches.size() - 1) {
726
19
                res += ",";
727
19
            }
728
39
        }
729
20
        res += "]";
730
20
        StringOP::push_value_string(std::string_view(res), index_now, result_data, result_offset);
731
20
    }
_ZN5doris20RegexpExtractAllImpl19_execute_inner_loopILb1EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES7_RNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEESD_m
Line
Count
Source
690
14
                                    const size_t index_now) {
691
14
        auto* engine = reinterpret_cast<RegexpExtractEngine*>(
692
14
                context->get_function_state(FunctionContext::THREAD_LOCAL));
693
14
        std::unique_ptr<RegexpExtractEngine> scoped_engine;
694
695
14
        if (engine == nullptr) {
696
0
            std::string error_str;
697
0
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
698
0
            scoped_engine = std::make_unique<RegexpExtractEngine>();
699
0
            bool st = RegexpExtractEngine::compile(pattern, &error_str, *scoped_engine,
700
0
                                                   context->state()->enable_extended_regex());
701
0
            if (!st) {
702
0
                context->add_warning(error_str.c_str());
703
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
704
0
                return;
705
0
            }
706
0
            engine = scoped_engine.get();
707
0
        }
708
709
14
        if (engine->number_of_capturing_groups() == 0) {
710
0
            StringOP::push_empty_string(index_now, result_data, result_offset);
711
0
            return;
712
0
        }
713
14
        const auto& str = str_col->get_data_at(index_now);
714
14
        std::vector<std::string> res_matches;
715
14
        engine->match_all_and_extract(str.data, str.size, res_matches);
716
717
14
        if (res_matches.empty()) {
718
7
            StringOP::push_empty_string(index_now, result_data, result_offset);
719
7
            return;
720
7
        }
721
722
7
        std::string res = "[";
723
19
        for (int j = 0; j < res_matches.size(); ++j) {
724
12
            res += "'" + res_matches[j] + "'";
725
12
            if (j < res_matches.size() - 1) {
726
5
                res += ",";
727
5
            }
728
12
        }
729
7
        res += "]";
730
7
        StringOP::push_value_string(std::string_view(res), index_now, result_data, result_offset);
731
7
    }
_ZN5doris20RegexpExtractAllImpl19_execute_inner_loopILb0EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES7_RNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEESD_m
Line
Count
Source
690
81
                                    const size_t index_now) {
691
81
        auto* engine = reinterpret_cast<RegexpExtractEngine*>(
692
81
                context->get_function_state(FunctionContext::THREAD_LOCAL));
693
81
        std::unique_ptr<RegexpExtractEngine> scoped_engine;
694
695
81
        if (engine == nullptr) {
696
64
            std::string error_str;
697
64
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
698
64
            scoped_engine = std::make_unique<RegexpExtractEngine>();
699
64
            bool st = RegexpExtractEngine::compile(pattern, &error_str, *scoped_engine,
700
64
                                                   context->state()->enable_extended_regex());
701
64
            if (!st) {
702
0
                context->add_warning(error_str.c_str());
703
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
704
0
                return;
705
0
            }
706
64
            engine = scoped_engine.get();
707
64
        }
708
709
81
        if (engine->number_of_capturing_groups() == 0) {
710
65
            StringOP::push_empty_string(index_now, result_data, result_offset);
711
65
            return;
712
65
        }
713
16
        const auto& str = str_col->get_data_at(index_now);
714
16
        std::vector<std::string> res_matches;
715
16
        engine->match_all_and_extract(str.data, str.size, res_matches);
716
717
16
        if (res_matches.empty()) {
718
3
            StringOP::push_empty_string(index_now, result_data, result_offset);
719
3
            return;
720
3
        }
721
722
13
        std::string res = "[";
723
40
        for (int j = 0; j < res_matches.size(); ++j) {
724
27
            res += "'" + res_matches[j] + "'";
725
27
            if (j < res_matches.size() - 1) {
726
14
                res += ",";
727
14
            }
728
27
        }
729
13
        res += "]";
730
13
        StringOP::push_value_string(std::string_view(res), index_now, result_data, result_offset);
731
13
    }
732
};
733
734
// template FunctionRegexpFunctionality is used for regexp_xxxx series functions, not for regexp match.
735
template <typename Impl>
736
class FunctionRegexpFunctionality : public IFunction {
737
public:
738
    static constexpr auto name = Impl::name;
739
740
115
    static FunctionPtr create() { return std::make_shared<FunctionRegexpFunctionality>(); }
_ZN5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb1EEEE6createEv
Line
Count
Source
740
30
    static FunctionPtr create() { return std::make_shared<FunctionRegexpFunctionality>(); }
_ZN5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb0EEEE6createEv
Line
Count
Source
740
42
    static FunctionPtr create() { return std::make_shared<FunctionRegexpFunctionality>(); }
_ZN5doris27FunctionRegexpFunctionalityINS_20RegexpExtractAllImplEE6createEv
Line
Count
Source
740
43
    static FunctionPtr create() { return std::make_shared<FunctionRegexpFunctionality>(); }
741
742
3
    String get_name() const override { return name; }
_ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb1EEEE8get_nameB5cxx11Ev
Line
Count
Source
742
1
    String get_name() const override { return name; }
_ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb0EEEE8get_nameB5cxx11Ev
Line
Count
Source
742
1
    String get_name() const override { return name; }
_ZNK5doris27FunctionRegexpFunctionalityINS_20RegexpExtractAllImplEE8get_nameB5cxx11Ev
Line
Count
Source
742
1
    String get_name() const override { return name; }
743
744
88
    size_t get_number_of_arguments() const override {
745
88
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
746
34
            return 2;
747
34
        }
748
0
        return 3;
749
88
    }
_ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb1EEEE23get_number_of_argumentsEv
Line
Count
Source
744
21
    size_t get_number_of_arguments() const override {
745
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
746
            return 2;
747
        }
748
21
        return 3;
749
21
    }
_ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb0EEEE23get_number_of_argumentsEv
Line
Count
Source
744
33
    size_t get_number_of_arguments() const override {
745
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
746
            return 2;
747
        }
748
33
        return 3;
749
33
    }
_ZNK5doris27FunctionRegexpFunctionalityINS_20RegexpExtractAllImplEE23get_number_of_argumentsEv
Line
Count
Source
744
34
    size_t get_number_of_arguments() const override {
745
34
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
746
34
            return 2;
747
34
        }
748
0
        return 3;
749
34
    }
750
751
88
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
752
88
        return make_nullable(std::make_shared<DataTypeString>());
753
88
    }
_ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb1EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
751
21
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
752
21
        return make_nullable(std::make_shared<DataTypeString>());
753
21
    }
_ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb0EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
751
33
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
752
33
        return make_nullable(std::make_shared<DataTypeString>());
753
33
    }
_ZNK5doris27FunctionRegexpFunctionalityINS_20RegexpExtractAllImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
751
34
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
752
34
        return make_nullable(std::make_shared<DataTypeString>());
753
34
    }
754
755
290
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
756
290
        if (scope == FunctionContext::THREAD_LOCAL) {
757
202
            if (context->is_col_constant(1)) {
758
121
                DCHECK(!context->get_function_state(scope));
759
121
                const auto pattern_col = context->get_constant_col(1)->column_ptr;
760
121
                const auto& pattern = pattern_col->get_data_at(0);
761
121
                if (pattern.size == 0) {
762
3
                    return Status::OK();
763
3
                }
764
765
118
                std::string error_str;
766
118
                auto engine = std::make_shared<RegexpExtractEngine>();
767
118
                bool st = RegexpExtractEngine::compile(pattern, &error_str, *engine,
768
118
                                                       context->state()->enable_extended_regex());
769
118
                if (!st) {
770
3
                    context->set_error(error_str.c_str());
771
3
                    return Status::InvalidArgument(error_str);
772
3
                }
773
115
                context->set_function_state(scope, engine);
774
115
            }
775
202
        }
776
284
        return Status::OK();
777
290
    }
_ZN5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb1EEEE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
755
52
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
756
52
        if (scope == FunctionContext::THREAD_LOCAL) {
757
31
            if (context->is_col_constant(1)) {
758
31
                DCHECK(!context->get_function_state(scope));
759
31
                const auto pattern_col = context->get_constant_col(1)->column_ptr;
760
31
                const auto& pattern = pattern_col->get_data_at(0);
761
31
                if (pattern.size == 0) {
762
1
                    return Status::OK();
763
1
                }
764
765
30
                std::string error_str;
766
30
                auto engine = std::make_shared<RegexpExtractEngine>();
767
30
                bool st = RegexpExtractEngine::compile(pattern, &error_str, *engine,
768
30
                                                       context->state()->enable_extended_regex());
769
30
                if (!st) {
770
1
                    context->set_error(error_str.c_str());
771
1
                    return Status::InvalidArgument(error_str);
772
1
                }
773
29
                context->set_function_state(scope, engine);
774
29
            }
775
31
        }
776
50
        return Status::OK();
777
52
    }
_ZN5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb0EEEE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
755
118
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
756
118
        if (scope == FunctionContext::THREAD_LOCAL) {
757
85
            if (context->is_col_constant(1)) {
758
42
                DCHECK(!context->get_function_state(scope));
759
42
                const auto pattern_col = context->get_constant_col(1)->column_ptr;
760
42
                const auto& pattern = pattern_col->get_data_at(0);
761
42
                if (pattern.size == 0) {
762
1
                    return Status::OK();
763
1
                }
764
765
41
                std::string error_str;
766
41
                auto engine = std::make_shared<RegexpExtractEngine>();
767
41
                bool st = RegexpExtractEngine::compile(pattern, &error_str, *engine,
768
41
                                                       context->state()->enable_extended_regex());
769
41
                if (!st) {
770
1
                    context->set_error(error_str.c_str());
771
1
                    return Status::InvalidArgument(error_str);
772
1
                }
773
40
                context->set_function_state(scope, engine);
774
40
            }
775
85
        }
776
116
        return Status::OK();
777
118
    }
_ZN5doris27FunctionRegexpFunctionalityINS_20RegexpExtractAllImplEE4openEPNS_15FunctionContextENS3_18FunctionStateScopeE
Line
Count
Source
755
120
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
756
120
        if (scope == FunctionContext::THREAD_LOCAL) {
757
86
            if (context->is_col_constant(1)) {
758
48
                DCHECK(!context->get_function_state(scope));
759
48
                const auto pattern_col = context->get_constant_col(1)->column_ptr;
760
48
                const auto& pattern = pattern_col->get_data_at(0);
761
48
                if (pattern.size == 0) {
762
1
                    return Status::OK();
763
1
                }
764
765
47
                std::string error_str;
766
47
                auto engine = std::make_shared<RegexpExtractEngine>();
767
47
                bool st = RegexpExtractEngine::compile(pattern, &error_str, *engine,
768
47
                                                       context->state()->enable_extended_regex());
769
47
                if (!st) {
770
1
                    context->set_error(error_str.c_str());
771
1
                    return Status::InvalidArgument(error_str);
772
1
                }
773
46
                context->set_function_state(scope, engine);
774
46
            }
775
86
        }
776
118
        return Status::OK();
777
120
    }
778
779
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
780
103
                        uint32_t result, size_t input_rows_count) const override {
781
103
        size_t argument_size = arguments.size();
782
783
103
        auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
784
103
        auto result_data_column = ColumnString::create();
785
103
        auto& result_data = result_data_column->get_chars();
786
103
        auto& result_offset = result_data_column->get_offsets();
787
103
        result_offset.resize(input_rows_count);
788
789
103
        bool col_const[3];
790
103
        ColumnPtr argument_columns[3];
791
369
        for (int i = 0; i < argument_size; ++i) {
792
266
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
793
266
        }
794
103
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
795
4
                                                     *block.get_by_position(arguments[0]).column)
796
4
                                                     .convert_to_full_column()
797
103
                                           : block.get_by_position(arguments[0]).column;
798
103
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
799
43
            default_preprocess_parameter_columns(argument_columns, col_const, {1}, block,
800
43
                                                 arguments);
801
60
        } else {
802
60
            default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block,
803
60
                                                 arguments);
804
60
        }
805
806
103
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
807
43
            if (col_const[1]) {
808
8
                Impl::execute_impl_const_args(context, argument_columns, input_rows_count,
809
8
                                              result_data, result_offset,
810
8
                                              result_null_map->get_data());
811
35
            } else {
812
35
                Impl::execute_impl(context, argument_columns, input_rows_count, result_data,
813
35
                                   result_offset, result_null_map->get_data());
814
35
            }
815
60
        } else {
816
60
            if (col_const[1] && col_const[2]) {
817
1
                Impl::execute_impl_const_args(context, argument_columns, input_rows_count,
818
1
                                              result_data, result_offset,
819
1
                                              result_null_map->get_data());
820
59
            } else {
821
59
                Impl::execute_impl(context, argument_columns, input_rows_count, result_data,
822
59
                                   result_offset, result_null_map->get_data());
823
59
            }
824
60
        }
825
826
103
        block.get_by_position(result).column =
827
103
                ColumnNullable::create(std::move(result_data_column), std::move(result_null_map));
828
103
        return Status::OK();
829
103
    }
_ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb1EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
780
18
                        uint32_t result, size_t input_rows_count) const override {
781
18
        size_t argument_size = arguments.size();
782
783
18
        auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
784
18
        auto result_data_column = ColumnString::create();
785
18
        auto& result_data = result_data_column->get_chars();
786
18
        auto& result_offset = result_data_column->get_offsets();
787
18
        result_offset.resize(input_rows_count);
788
789
18
        bool col_const[3];
790
18
        ColumnPtr argument_columns[3];
791
72
        for (int i = 0; i < argument_size; ++i) {
792
54
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
793
54
        }
794
18
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
795
0
                                                     *block.get_by_position(arguments[0]).column)
796
0
                                                     .convert_to_full_column()
797
18
                                           : block.get_by_position(arguments[0]).column;
798
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
799
            default_preprocess_parameter_columns(argument_columns, col_const, {1}, block,
800
                                                 arguments);
801
18
        } else {
802
18
            default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block,
803
18
                                                 arguments);
804
18
        }
805
806
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
807
            if (col_const[1]) {
808
                Impl::execute_impl_const_args(context, argument_columns, input_rows_count,
809
                                              result_data, result_offset,
810
                                              result_null_map->get_data());
811
            } else {
812
                Impl::execute_impl(context, argument_columns, input_rows_count, result_data,
813
                                   result_offset, result_null_map->get_data());
814
            }
815
18
        } else {
816
18
            if (col_const[1] && col_const[2]) {
817
0
                Impl::execute_impl_const_args(context, argument_columns, input_rows_count,
818
0
                                              result_data, result_offset,
819
0
                                              result_null_map->get_data());
820
18
            } else {
821
18
                Impl::execute_impl(context, argument_columns, input_rows_count, result_data,
822
18
                                   result_offset, result_null_map->get_data());
823
18
            }
824
18
        }
825
826
18
        block.get_by_position(result).column =
827
18
                ColumnNullable::create(std::move(result_data_column), std::move(result_null_map));
828
18
        return Status::OK();
829
18
    }
_ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb0EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
780
42
                        uint32_t result, size_t input_rows_count) const override {
781
42
        size_t argument_size = arguments.size();
782
783
42
        auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
784
42
        auto result_data_column = ColumnString::create();
785
42
        auto& result_data = result_data_column->get_chars();
786
42
        auto& result_offset = result_data_column->get_offsets();
787
42
        result_offset.resize(input_rows_count);
788
789
42
        bool col_const[3];
790
42
        ColumnPtr argument_columns[3];
791
168
        for (int i = 0; i < argument_size; ++i) {
792
126
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
793
126
        }
794
42
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
795
3
                                                     *block.get_by_position(arguments[0]).column)
796
3
                                                     .convert_to_full_column()
797
42
                                           : block.get_by_position(arguments[0]).column;
798
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
799
            default_preprocess_parameter_columns(argument_columns, col_const, {1}, block,
800
                                                 arguments);
801
42
        } else {
802
42
            default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block,
803
42
                                                 arguments);
804
42
        }
805
806
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
807
            if (col_const[1]) {
808
                Impl::execute_impl_const_args(context, argument_columns, input_rows_count,
809
                                              result_data, result_offset,
810
                                              result_null_map->get_data());
811
            } else {
812
                Impl::execute_impl(context, argument_columns, input_rows_count, result_data,
813
                                   result_offset, result_null_map->get_data());
814
            }
815
42
        } else {
816
42
            if (col_const[1] && col_const[2]) {
817
1
                Impl::execute_impl_const_args(context, argument_columns, input_rows_count,
818
1
                                              result_data, result_offset,
819
1
                                              result_null_map->get_data());
820
41
            } else {
821
41
                Impl::execute_impl(context, argument_columns, input_rows_count, result_data,
822
41
                                   result_offset, result_null_map->get_data());
823
41
            }
824
42
        }
825
826
42
        block.get_by_position(result).column =
827
42
                ColumnNullable::create(std::move(result_data_column), std::move(result_null_map));
828
42
        return Status::OK();
829
42
    }
_ZNK5doris27FunctionRegexpFunctionalityINS_20RegexpExtractAllImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
780
43
                        uint32_t result, size_t input_rows_count) const override {
781
43
        size_t argument_size = arguments.size();
782
783
43
        auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
784
43
        auto result_data_column = ColumnString::create();
785
43
        auto& result_data = result_data_column->get_chars();
786
43
        auto& result_offset = result_data_column->get_offsets();
787
43
        result_offset.resize(input_rows_count);
788
789
43
        bool col_const[3];
790
43
        ColumnPtr argument_columns[3];
791
129
        for (int i = 0; i < argument_size; ++i) {
792
86
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
793
86
        }
794
43
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
795
1
                                                     *block.get_by_position(arguments[0]).column)
796
1
                                                     .convert_to_full_column()
797
43
                                           : block.get_by_position(arguments[0]).column;
798
43
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
799
43
            default_preprocess_parameter_columns(argument_columns, col_const, {1}, block,
800
43
                                                 arguments);
801
        } else {
802
            default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block,
803
                                                 arguments);
804
        }
805
806
43
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
807
43
            if (col_const[1]) {
808
8
                Impl::execute_impl_const_args(context, argument_columns, input_rows_count,
809
8
                                              result_data, result_offset,
810
8
                                              result_null_map->get_data());
811
35
            } else {
812
35
                Impl::execute_impl(context, argument_columns, input_rows_count, result_data,
813
35
                                   result_offset, result_null_map->get_data());
814
35
            }
815
        } else {
816
            if (col_const[1] && col_const[2]) {
817
                Impl::execute_impl_const_args(context, argument_columns, input_rows_count,
818
                                              result_data, result_offset,
819
                                              result_null_map->get_data());
820
            } else {
821
                Impl::execute_impl(context, argument_columns, input_rows_count, result_data,
822
                                   result_offset, result_null_map->get_data());
823
            }
824
        }
825
826
43
        block.get_by_position(result).column =
827
43
                ColumnNullable::create(std::move(result_data_column), std::move(result_null_map));
828
43
        return Status::OK();
829
43
    }
830
};
831
832
8
void register_function_regexp_extract(SimpleFunctionFactory& factory) {
833
8
    factory.register_function<FunctionRegexpReplace<RegexpReplaceImpl, ThreeParamTypes>>();
834
8
    factory.register_function<FunctionRegexpReplace<RegexpReplaceImpl, FourParamTypes>>();
835
8
    factory.register_function<FunctionRegexpReplace<RegexpReplaceOneImpl, ThreeParamTypes>>();
836
8
    factory.register_function<FunctionRegexpReplace<RegexpReplaceOneImpl, FourParamTypes>>();
837
8
    factory.register_function<FunctionRegexpFunctionality<RegexpExtractImpl<true>>>();
838
8
    factory.register_function<FunctionRegexpFunctionality<RegexpExtractImpl<false>>>();
839
8
    factory.register_function<FunctionRegexpFunctionality<RegexpExtractAllImpl>>();
840
8
    factory.register_function<FunctionRegexpCount>();
841
8
}
842
843
} // namespace doris