Coverage Report

Created: 2026-04-15 12:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_regexp.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <glog/logging.h>
19
#include <re2/re2.h>
20
#include <re2/stringpiece.h>
21
#include <stddef.h>
22
23
#include <boost/regex.hpp>
24
#include <memory>
25
#include <string>
26
#include <string_view>
27
#include <type_traits>
28
#include <utility>
29
#include <vector>
30
31
#include "common/status.h"
32
#include "core/block/block.h"
33
#include "core/block/column_numbers.h"
34
#include "core/block/column_with_type_and_name.h"
35
#include "core/column/column.h"
36
#include "core/column/column_const.h"
37
#include "core/column/column_nullable.h"
38
#include "core/column/column_string.h"
39
#include "core/column/column_vector.h"
40
#include "core/data_type/data_type.h"
41
#include "core/data_type/data_type_nullable.h"
42
#include "core/data_type/data_type_number.h"
43
#include "core/data_type/data_type_string.h"
44
#include "core/string_ref.h"
45
#include "core/types.h"
46
#include "exec/common/stringop_substring.h"
47
#include "exprs/aggregate/aggregate_function.h"
48
#include "exprs/function/function.h"
49
#include "exprs/function/simple_function_factory.h"
50
#include "exprs/function_context.h"
51
#include "exprs/string_functions.h"
52
53
namespace doris {
54
55
// Helper structure to hold either RE2 or Boost.Regex
56
struct RegexpExtractEngine {
57
    std::unique_ptr<re2::RE2> re2_regex;
58
    std::unique_ptr<boost::regex> boost_regex;
59
60
18
    bool is_boost() const { return boost_regex != nullptr; }
61
313
    bool is_re2() const { return re2_regex != nullptr; }
62
63
    // Try to compile with RE2 first, fallback to Boost.Regex if RE2 fails
64
    static bool compile(const StringRef& pattern, std::string* error_str,
65
264
                        RegexpExtractEngine& engine, bool enable_extended_regex) {
66
264
        re2::RE2::Options options;
67
264
        options.set_log_errors(false); // avoid RE2 printing to stderr; we handle errors ourselves
68
264
        options.set_dot_nl(true); // make '.' match '\n' by default, consistent with REGEXP/LIKE
69
264
        engine.re2_regex =
70
264
                std::make_unique<re2::RE2>(re2::StringPiece(pattern.data, pattern.size), options);
71
72
264
        if (engine.re2_regex->ok()) {
73
243
            return true;
74
243
        } else if (!enable_extended_regex) {
75
3
            *error_str = fmt::format(
76
3
                    "Invalid regex pattern: {}. Error: {}. If you need advanced regex features, "
77
3
                    "try setting enable_extended_regex=true",
78
3
                    std::string(pattern.data, pattern.size), engine.re2_regex->error());
79
3
            return false;
80
3
        }
81
82
        // RE2 failed, try Boost.Regex for advanced features like zero-width assertions
83
18
        engine.re2_regex.reset();
84
18
        try {
85
18
            boost::regex::flag_type flags = boost::regex::normal;
86
18
            engine.boost_regex = std::make_unique<boost::regex>(pattern.data,
87
18
                                                                pattern.data + pattern.size, flags);
88
18
            return true;
89
18
        } catch (const boost::regex_error& e) {
90
0
            if (error_str) {
91
0
                *error_str = fmt::format("Invalid regex pattern: {}. Error: {}",
92
0
                                         std::string(pattern.data, pattern.size), e.what());
93
0
            }
94
0
            return false;
95
0
        }
96
18
    }
97
98
    // Get number of capturing groups
99
231
    int number_of_capturing_groups() const {
100
231
        if (is_re2()) {
101
222
            return re2_regex->NumberOfCapturingGroups();
102
222
        } else if (is_boost()) {
103
9
            return static_cast<int>(boost_regex->mark_count());
104
9
        }
105
0
        return 0;
106
231
    }
107
108
    // Match function for extraction
109
52
    bool match_and_extract(const char* data, size_t size, int index, std::string& result) const {
110
52
        if (is_re2()) {
111
47
            int max_matches = 1 + re2_regex->NumberOfCapturingGroups();
112
47
            if (index >= max_matches) {
113
0
                return false;
114
0
            }
115
47
            std::vector<re2::StringPiece> matches(max_matches);
116
47
            bool success = re2_regex->Match(re2::StringPiece(data, size), 0, size,
117
47
                                            re2::RE2::UNANCHORED, matches.data(), max_matches);
118
47
            if (success && index < matches.size()) {
119
34
                const re2::StringPiece& match = matches[index];
120
34
                result.assign(match.data(), match.size());
121
34
                return true;
122
34
            }
123
13
            return false;
124
47
        } else if (is_boost()) {
125
5
            boost::cmatch matches;
126
5
            bool success = boost::regex_search(data, data + size, matches, *boost_regex);
127
5
            if (success && index < matches.size()) {
128
5
                result = matches[index].str();
129
5
                return true;
130
5
            }
131
0
            return false;
132
5
        }
133
0
        return false;
134
52
    }
135
136
    // Match all occurrences and extract the first capturing group
137
    void match_all_and_extract(const char* data, size_t size,
138
30
                               std::vector<std::string>& results) const {
139
30
        if (is_re2()) {
140
26
            int max_matches = 1 + re2_regex->NumberOfCapturingGroups();
141
26
            if (max_matches < 2) {
142
0
                return; // No capturing groups
143
0
            }
144
145
26
            size_t pos = 0;
146
67
            while (pos < size) {
147
55
                const char* str_pos = data + pos;
148
55
                size_t str_size = size - pos;
149
55
                std::vector<re2::StringPiece> matches(max_matches);
150
55
                bool success = re2_regex->Match(re2::StringPiece(str_pos, str_size), 0, str_size,
151
55
                                                re2::RE2::UNANCHORED, matches.data(), max_matches);
152
55
                if (!success) {
153
14
                    break;
154
14
                }
155
41
                if (matches[0].empty()) {
156
11
                    pos += 1;
157
11
                    continue;
158
11
                }
159
                // Extract first capturing group
160
30
                if (matches.size() > 1 && !matches[1].empty()) {
161
30
                    results.emplace_back(matches[1].data(), matches[1].size());
162
30
                }
163
                // Move position forward
164
30
                auto offset = std::string(str_pos, str_size)
165
30
                                      .find(std::string(matches[0].data(), matches[0].size()));
166
30
                pos += offset + matches[0].size();
167
30
            }
168
26
        } else if (is_boost()) {
169
4
            const char* search_start = data;
170
4
            const char* search_end = data + size;
171
4
            boost::match_results<const char*> matches;
172
173
13
            while (boost::regex_search(search_start, search_end, matches, *boost_regex)) {
174
9
                if (matches.size() > 1 && matches[1].matched) {
175
9
                    results.emplace_back(matches[1].str());
176
9
                }
177
9
                if (matches[0].length() == 0) {
178
0
                    if (search_start == search_end) {
179
0
                        break;
180
0
                    }
181
0
                    search_start += 1;
182
9
                } else {
183
9
                    search_start = matches[0].second;
184
9
                }
185
9
            }
186
4
        }
187
30
    }
188
};
189
190
struct RegexpCountImpl {
191
    static void execute_impl(FunctionContext* context, ColumnPtr argument_columns[],
192
15
                             size_t input_rows_count, ColumnInt32::Container& result_data) {
193
15
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
194
15
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
195
48
        for (int i = 0; i < input_rows_count; ++i) {
196
33
            result_data[i] = _execute_inner_loop(context, str_col, pattern_col, i);
197
33
        }
198
15
    }
199
    static int _execute_inner_loop(FunctionContext* context, const ColumnString* str_col,
200
33
                                   const ColumnString* pattern_col, const size_t index_now) {
201
33
        re2::RE2* re = reinterpret_cast<re2::RE2*>(
202
33
                context->get_function_state(FunctionContext::THREAD_LOCAL));
203
33
        std::unique_ptr<re2::RE2> scoped_re;
204
33
        if (re == nullptr) {
205
12
            std::string error_str;
206
12
            DCHECK(pattern_col);
207
12
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, false));
208
12
            bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(), StringRef(),
209
12
                                                     scoped_re);
210
12
            if (!st) {
211
0
                context->add_warning(error_str.c_str());
212
0
                throw Exception(Status::InvalidArgument(error_str));
213
0
                return 0;
214
0
            }
215
12
            re = scoped_re.get();
216
12
        }
217
218
33
        const auto& str = str_col->get_data_at(index_now);
219
33
        int count = 0;
220
33
        size_t pos = 0;
221
101
        while (pos < str.size) {
222
87
            auto str_pos = str.data + pos;
223
87
            auto str_size = str.size - pos;
224
87
            re2::StringPiece str_sp_current = re2::StringPiece(str_pos, str_size);
225
87
            re2::StringPiece match;
226
227
87
            bool success = re->Match(str_sp_current, 0, str_size, re2::RE2::UNANCHORED, &match, 1);
228
87
            if (!success) {
229
19
                break;
230
19
            }
231
68
            if (match.empty()) {
232
4
                pos += 1;
233
4
                continue;
234
4
            }
235
64
            count++;
236
64
            size_t match_start = match.data() - str_sp_current.data();
237
64
            pos += match_start + match.size();
238
64
        }
239
240
33
        return count;
241
33
    }
242
};
243
244
class FunctionRegexpCount : public IFunction {
245
public:
246
    static constexpr auto name = "regexp_count";
247
248
24
    static FunctionPtr create() { return std::make_shared<FunctionRegexpCount>(); }
249
250
1
    String get_name() const override { return name; }
251
252
15
    size_t get_number_of_arguments() const override { return 2; }
253
254
15
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
255
15
        return std::make_shared<DataTypeInt32>();
256
15
    }
257
258
67
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
259
67
        if (scope == FunctionContext::THREAD_LOCAL) {
260
52
            if (context->is_col_constant(1)) {
261
39
                DCHECK(!context->get_function_state(scope));
262
39
                const auto pattern_col = context->get_constant_col(1)->column_ptr;
263
39
                const auto& pattern = pattern_col->get_data_at(0);
264
39
                if (pattern.size == 0) {
265
4
                    return Status::OK();
266
4
                }
267
268
35
                std::string error_str;
269
35
                std::unique_ptr<re2::RE2> scoped_re;
270
35
                bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
271
35
                                                         StringRef(), scoped_re);
272
35
                if (!st) {
273
0
                    context->set_error(error_str.c_str());
274
0
                    return Status::InvalidArgument(error_str);
275
0
                }
276
35
                std::shared_ptr<re2::RE2> re(scoped_re.release());
277
35
                context->set_function_state(scope, re);
278
35
            }
279
52
        }
280
63
        return Status::OK();
281
67
    }
282
283
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
284
15
                        uint32_t result, size_t input_rows_count) const override {
285
15
        auto result_data_column = ColumnInt32::create(input_rows_count);
286
15
        auto& result_data = result_data_column->get_data();
287
288
15
        ColumnPtr argument_columns[2];
289
290
15
        argument_columns[0] = block.get_by_position(arguments[0]).column;
291
15
        argument_columns[1] = block.get_by_position(arguments[1]).column;
292
15
        RegexpCountImpl::execute_impl(context, argument_columns, input_rows_count, result_data);
293
294
15
        block.get_by_position(result).column = std::move(result_data_column);
295
15
        return Status::OK();
296
15
    }
297
};
298
299
struct ThreeParamTypes {
300
16
    static DataTypes get_variadic_argument_types() {
301
16
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
302
16
                std::make_shared<DataTypeString>()};
303
16
    }
304
};
305
306
struct FourParamTypes {
307
16
    static DataTypes get_variadic_argument_types() {
308
16
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
309
16
                std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
310
16
    }
311
};
312
313
// template FunctionRegexpFunctionality is used for regexp_replace/regexp_replace_one
314
template <typename Impl, typename ParamTypes>
315
class FunctionRegexpReplace : public IFunction {
316
public:
317
    static constexpr auto name = Impl::name;
318
319
92
    static FunctionPtr create() { return std::make_shared<FunctionRegexpReplace>(); }
_ZN5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE6createEv
Line
Count
Source
319
33
    static FunctionPtr create() { return std::make_shared<FunctionRegexpReplace>(); }
_ZN5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE6createEv
Line
Count
Source
319
17
    static FunctionPtr create() { return std::make_shared<FunctionRegexpReplace>(); }
_ZN5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE6createEv
Line
Count
Source
319
24
    static FunctionPtr create() { return std::make_shared<FunctionRegexpReplace>(); }
_ZN5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE6createEv
Line
Count
Source
319
18
    static FunctionPtr create() { return std::make_shared<FunctionRegexpReplace>(); }
320
321
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE8get_nameB5cxx11Ev
322
323
0
    size_t get_number_of_arguments() const override {
324
0
        return get_variadic_argument_types_impl().size();
325
0
    }
Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE23get_number_of_argumentsEv
326
327
60
    bool is_variadic() const override { return true; }
_ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE11is_variadicEv
Line
Count
Source
327
25
    bool is_variadic() const override { return true; }
_ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE11is_variadicEv
Line
Count
Source
327
9
    bool is_variadic() const override { return true; }
_ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE11is_variadicEv
Line
Count
Source
327
16
    bool is_variadic() const override { return true; }
_ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE11is_variadicEv
Line
Count
Source
327
10
    bool is_variadic() const override { return true; }
328
329
56
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
330
56
        return make_nullable(std::make_shared<DataTypeString>());
331
56
    }
_ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
329
24
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
330
24
        return make_nullable(std::make_shared<DataTypeString>());
331
24
    }
_ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
329
8
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
330
8
        return make_nullable(std::make_shared<DataTypeString>());
331
8
    }
_ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
329
15
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
330
15
        return make_nullable(std::make_shared<DataTypeString>());
331
15
    }
_ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
329
9
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
330
9
        return make_nullable(std::make_shared<DataTypeString>());
331
9
    }
332
333
32
    DataTypes get_variadic_argument_types_impl() const override {
334
32
        return ParamTypes::get_variadic_argument_types();
335
32
    }
_ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE32get_variadic_argument_types_implEv
Line
Count
Source
333
8
    DataTypes get_variadic_argument_types_impl() const override {
334
8
        return ParamTypes::get_variadic_argument_types();
335
8
    }
_ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE32get_variadic_argument_types_implEv
Line
Count
Source
333
8
    DataTypes get_variadic_argument_types_impl() const override {
334
8
        return ParamTypes::get_variadic_argument_types();
335
8
    }
_ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE32get_variadic_argument_types_implEv
Line
Count
Source
333
8
    DataTypes get_variadic_argument_types_impl() const override {
334
8
        return ParamTypes::get_variadic_argument_types();
335
8
    }
_ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE32get_variadic_argument_types_implEv
Line
Count
Source
333
8
    DataTypes get_variadic_argument_types_impl() const override {
334
8
        return ParamTypes::get_variadic_argument_types();
335
8
    }
336
337
371
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
338
371
        if (scope == FunctionContext::THREAD_LOCAL) {
339
315
            if (context->is_col_constant(1)) {
340
157
                DCHECK(!context->get_function_state(scope));
341
157
                const auto pattern_col = context->get_constant_col(1)->column_ptr;
342
157
                const auto& pattern = pattern_col->get_data_at(0);
343
157
                if (pattern.size == 0) {
344
6
                    return Status::OK();
345
6
                }
346
347
151
                std::string error_str;
348
151
                std::unique_ptr<re2::RE2> scoped_re;
349
151
                StringRef options_value;
350
151
                if constexpr (std::is_same_v<FourParamTypes, ParamTypes>) {
351
102
                    DCHECK_EQ(context->get_num_args(), 4);
352
102
                    DCHECK(context->is_col_constant(3));
353
102
                    const auto options_col = context->get_constant_col(3)->column_ptr;
354
102
                    options_value = options_col->get_data_at(0);
355
102
                }
356
357
151
                bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
358
151
                                                         options_value, scoped_re);
359
151
                if (!st) {
360
0
                    context->set_error(error_str.c_str());
361
0
                    return Status::InvalidArgument(error_str);
362
0
                }
363
151
                std::shared_ptr<re2::RE2> re(scoped_re.release());
364
151
                context->set_function_state(scope, re);
365
151
            }
366
315
        }
367
365
        return Status::OK();
368
371
    }
_ZN5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
337
89
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
338
89
        if (scope == FunctionContext::THREAD_LOCAL) {
339
65
            if (context->is_col_constant(1)) {
340
41
                DCHECK(!context->get_function_state(scope));
341
41
                const auto pattern_col = context->get_constant_col(1)->column_ptr;
342
41
                const auto& pattern = pattern_col->get_data_at(0);
343
41
                if (pattern.size == 0) {
344
4
                    return Status::OK();
345
4
                }
346
347
37
                std::string error_str;
348
37
                std::unique_ptr<re2::RE2> scoped_re;
349
37
                StringRef options_value;
350
                if constexpr (std::is_same_v<FourParamTypes, ParamTypes>) {
351
                    DCHECK_EQ(context->get_num_args(), 4);
352
                    DCHECK(context->is_col_constant(3));
353
                    const auto options_col = context->get_constant_col(3)->column_ptr;
354
                    options_value = options_col->get_data_at(0);
355
                }
356
357
37
                bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
358
37
                                                         options_value, scoped_re);
359
37
                if (!st) {
360
0
                    context->set_error(error_str.c_str());
361
0
                    return Status::InvalidArgument(error_str);
362
0
                }
363
37
                std::shared_ptr<re2::RE2> re(scoped_re.release());
364
37
                context->set_function_state(scope, re);
365
37
            }
366
65
        }
367
85
        return Status::OK();
368
89
    }
_ZN5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
337
106
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
338
106
        if (scope == FunctionContext::THREAD_LOCAL) {
339
98
            if (context->is_col_constant(1)) {
340
50
                DCHECK(!context->get_function_state(scope));
341
50
                const auto pattern_col = context->get_constant_col(1)->column_ptr;
342
50
                const auto& pattern = pattern_col->get_data_at(0);
343
50
                if (pattern.size == 0) {
344
0
                    return Status::OK();
345
0
                }
346
347
50
                std::string error_str;
348
50
                std::unique_ptr<re2::RE2> scoped_re;
349
50
                StringRef options_value;
350
50
                if constexpr (std::is_same_v<FourParamTypes, ParamTypes>) {
351
50
                    DCHECK_EQ(context->get_num_args(), 4);
352
50
                    DCHECK(context->is_col_constant(3));
353
50
                    const auto options_col = context->get_constant_col(3)->column_ptr;
354
50
                    options_value = options_col->get_data_at(0);
355
50
                }
356
357
50
                bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
358
50
                                                         options_value, scoped_re);
359
50
                if (!st) {
360
0
                    context->set_error(error_str.c_str());
361
0
                    return Status::InvalidArgument(error_str);
362
0
                }
363
50
                std::shared_ptr<re2::RE2> re(scoped_re.release());
364
50
                context->set_function_state(scope, re);
365
50
            }
366
98
        }
367
106
        return Status::OK();
368
106
    }
_ZN5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
337
53
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
338
53
        if (scope == FunctionContext::THREAD_LOCAL) {
339
38
            if (context->is_col_constant(1)) {
340
14
                DCHECK(!context->get_function_state(scope));
341
14
                const auto pattern_col = context->get_constant_col(1)->column_ptr;
342
14
                const auto& pattern = pattern_col->get_data_at(0);
343
14
                if (pattern.size == 0) {
344
2
                    return Status::OK();
345
2
                }
346
347
12
                std::string error_str;
348
12
                std::unique_ptr<re2::RE2> scoped_re;
349
12
                StringRef options_value;
350
                if constexpr (std::is_same_v<FourParamTypes, ParamTypes>) {
351
                    DCHECK_EQ(context->get_num_args(), 4);
352
                    DCHECK(context->is_col_constant(3));
353
                    const auto options_col = context->get_constant_col(3)->column_ptr;
354
                    options_value = options_col->get_data_at(0);
355
                }
356
357
12
                bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
358
12
                                                         options_value, scoped_re);
359
12
                if (!st) {
360
0
                    context->set_error(error_str.c_str());
361
0
                    return Status::InvalidArgument(error_str);
362
0
                }
363
12
                std::shared_ptr<re2::RE2> re(scoped_re.release());
364
12
                context->set_function_state(scope, re);
365
12
            }
366
38
        }
367
51
        return Status::OK();
368
53
    }
_ZN5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
337
123
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
338
123
        if (scope == FunctionContext::THREAD_LOCAL) {
339
114
            if (context->is_col_constant(1)) {
340
52
                DCHECK(!context->get_function_state(scope));
341
52
                const auto pattern_col = context->get_constant_col(1)->column_ptr;
342
52
                const auto& pattern = pattern_col->get_data_at(0);
343
52
                if (pattern.size == 0) {
344
0
                    return Status::OK();
345
0
                }
346
347
52
                std::string error_str;
348
52
                std::unique_ptr<re2::RE2> scoped_re;
349
52
                StringRef options_value;
350
52
                if constexpr (std::is_same_v<FourParamTypes, ParamTypes>) {
351
52
                    DCHECK_EQ(context->get_num_args(), 4);
352
52
                    DCHECK(context->is_col_constant(3));
353
52
                    const auto options_col = context->get_constant_col(3)->column_ptr;
354
52
                    options_value = options_col->get_data_at(0);
355
52
                }
356
357
52
                bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
358
52
                                                         options_value, scoped_re);
359
52
                if (!st) {
360
0
                    context->set_error(error_str.c_str());
361
0
                    return Status::InvalidArgument(error_str);
362
0
                }
363
52
                std::shared_ptr<re2::RE2> re(scoped_re.release());
364
52
                context->set_function_state(scope, re);
365
52
            }
366
114
        }
367
123
        return Status::OK();
368
123
    }
369
370
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
371
71
                        uint32_t result, size_t input_rows_count) const override {
372
71
        size_t argument_size = arguments.size();
373
374
71
        auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
375
71
        auto result_data_column = ColumnString::create();
376
71
        auto& result_data = result_data_column->get_chars();
377
71
        auto& result_offset = result_data_column->get_offsets();
378
71
        result_offset.resize(input_rows_count);
379
380
71
        bool col_const[3];
381
71
        ColumnPtr argument_columns[3];
382
284
        for (int i = 0; i < 3; ++i) {
383
213
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
384
213
        }
385
71
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
386
6
                                                     *block.get_by_position(arguments[0]).column)
387
6
                                                     .convert_to_full_column()
388
71
                                           : block.get_by_position(arguments[0]).column;
389
390
71
        default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments);
391
392
71
        StringRef options_value;
393
71
        if (col_const[1] && col_const[2]) {
394
3
            Impl::execute_impl_const_args(context, argument_columns, options_value,
395
3
                                          input_rows_count, result_data, result_offset,
396
3
                                          result_null_map->get_data());
397
68
        } else {
398
            // the options have check in FE, so is always const, and get idx of 0
399
68
            if (argument_size == 4) {
400
15
                options_value = block.get_by_position(arguments[3]).column->get_data_at(0);
401
15
            }
402
68
            Impl::execute_impl(context, argument_columns, options_value, input_rows_count,
403
68
                               result_data, result_offset, result_null_map->get_data());
404
68
        }
405
406
71
        block.get_by_position(result).column =
407
71
                ColumnNullable::create(std::move(result_data_column), std::move(result_null_map));
408
71
        return Status::OK();
409
71
    }
_ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_15ThreeParamTypesEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
371
30
                        uint32_t result, size_t input_rows_count) const override {
372
30
        size_t argument_size = arguments.size();
373
374
30
        auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
375
30
        auto result_data_column = ColumnString::create();
376
30
        auto& result_data = result_data_column->get_chars();
377
30
        auto& result_offset = result_data_column->get_offsets();
378
30
        result_offset.resize(input_rows_count);
379
380
30
        bool col_const[3];
381
30
        ColumnPtr argument_columns[3];
382
120
        for (int i = 0; i < 3; ++i) {
383
90
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
384
90
        }
385
30
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
386
0
                                                     *block.get_by_position(arguments[0]).column)
387
0
                                                     .convert_to_full_column()
388
30
                                           : block.get_by_position(arguments[0]).column;
389
390
30
        default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments);
391
392
30
        StringRef options_value;
393
30
        if (col_const[1] && col_const[2]) {
394
1
            Impl::execute_impl_const_args(context, argument_columns, options_value,
395
1
                                          input_rows_count, result_data, result_offset,
396
1
                                          result_null_map->get_data());
397
29
        } else {
398
            // the options have check in FE, so is always const, and get idx of 0
399
29
            if (argument_size == 4) {
400
0
                options_value = block.get_by_position(arguments[3]).column->get_data_at(0);
401
0
            }
402
29
            Impl::execute_impl(context, argument_columns, options_value, input_rows_count,
403
29
                               result_data, result_offset, result_null_map->get_data());
404
29
        }
405
406
30
        block.get_by_position(result).column =
407
30
                ColumnNullable::create(std::move(result_data_column), std::move(result_null_map));
408
30
        return Status::OK();
409
30
    }
_ZNK5doris21FunctionRegexpReplaceINS_17RegexpReplaceImplENS_14FourParamTypesEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
371
8
                        uint32_t result, size_t input_rows_count) const override {
372
8
        size_t argument_size = arguments.size();
373
374
8
        auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
375
8
        auto result_data_column = ColumnString::create();
376
8
        auto& result_data = result_data_column->get_chars();
377
8
        auto& result_offset = result_data_column->get_offsets();
378
8
        result_offset.resize(input_rows_count);
379
380
8
        bool col_const[3];
381
8
        ColumnPtr argument_columns[3];
382
32
        for (int i = 0; i < 3; ++i) {
383
24
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
384
24
        }
385
8
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
386
3
                                                     *block.get_by_position(arguments[0]).column)
387
3
                                                     .convert_to_full_column()
388
8
                                           : block.get_by_position(arguments[0]).column;
389
390
8
        default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments);
391
392
8
        StringRef options_value;
393
8
        if (col_const[1] && col_const[2]) {
394
1
            Impl::execute_impl_const_args(context, argument_columns, options_value,
395
1
                                          input_rows_count, result_data, result_offset,
396
1
                                          result_null_map->get_data());
397
7
        } else {
398
            // the options have check in FE, so is always const, and get idx of 0
399
7
            if (argument_size == 4) {
400
7
                options_value = block.get_by_position(arguments[3]).column->get_data_at(0);
401
7
            }
402
7
            Impl::execute_impl(context, argument_columns, options_value, input_rows_count,
403
7
                               result_data, result_offset, result_null_map->get_data());
404
7
        }
405
406
8
        block.get_by_position(result).column =
407
8
                ColumnNullable::create(std::move(result_data_column), std::move(result_null_map));
408
8
        return Status::OK();
409
8
    }
_ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_15ThreeParamTypesEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
371
24
                        uint32_t result, size_t input_rows_count) const override {
372
24
        size_t argument_size = arguments.size();
373
374
24
        auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
375
24
        auto result_data_column = ColumnString::create();
376
24
        auto& result_data = result_data_column->get_chars();
377
24
        auto& result_offset = result_data_column->get_offsets();
378
24
        result_offset.resize(input_rows_count);
379
380
24
        bool col_const[3];
381
24
        ColumnPtr argument_columns[3];
382
96
        for (int i = 0; i < 3; ++i) {
383
72
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
384
72
        }
385
24
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
386
0
                                                     *block.get_by_position(arguments[0]).column)
387
0
                                                     .convert_to_full_column()
388
24
                                           : block.get_by_position(arguments[0]).column;
389
390
24
        default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments);
391
392
24
        StringRef options_value;
393
24
        if (col_const[1] && col_const[2]) {
394
0
            Impl::execute_impl_const_args(context, argument_columns, options_value,
395
0
                                          input_rows_count, result_data, result_offset,
396
0
                                          result_null_map->get_data());
397
24
        } else {
398
            // the options have check in FE, so is always const, and get idx of 0
399
24
            if (argument_size == 4) {
400
0
                options_value = block.get_by_position(arguments[3]).column->get_data_at(0);
401
0
            }
402
24
            Impl::execute_impl(context, argument_columns, options_value, input_rows_count,
403
24
                               result_data, result_offset, result_null_map->get_data());
404
24
        }
405
406
24
        block.get_by_position(result).column =
407
24
                ColumnNullable::create(std::move(result_data_column), std::move(result_null_map));
408
24
        return Status::OK();
409
24
    }
_ZNK5doris21FunctionRegexpReplaceINS_20RegexpReplaceOneImplENS_14FourParamTypesEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
371
9
                        uint32_t result, size_t input_rows_count) const override {
372
9
        size_t argument_size = arguments.size();
373
374
9
        auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
375
9
        auto result_data_column = ColumnString::create();
376
9
        auto& result_data = result_data_column->get_chars();
377
9
        auto& result_offset = result_data_column->get_offsets();
378
9
        result_offset.resize(input_rows_count);
379
380
9
        bool col_const[3];
381
9
        ColumnPtr argument_columns[3];
382
36
        for (int i = 0; i < 3; ++i) {
383
27
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
384
27
        }
385
9
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
386
3
                                                     *block.get_by_position(arguments[0]).column)
387
3
                                                     .convert_to_full_column()
388
9
                                           : block.get_by_position(arguments[0]).column;
389
390
9
        default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments);
391
392
9
        StringRef options_value;
393
9
        if (col_const[1] && col_const[2]) {
394
1
            Impl::execute_impl_const_args(context, argument_columns, options_value,
395
1
                                          input_rows_count, result_data, result_offset,
396
1
                                          result_null_map->get_data());
397
8
        } else {
398
            // the options have check in FE, so is always const, and get idx of 0
399
8
            if (argument_size == 4) {
400
8
                options_value = block.get_by_position(arguments[3]).column->get_data_at(0);
401
8
            }
402
8
            Impl::execute_impl(context, argument_columns, options_value, input_rows_count,
403
8
                               result_data, result_offset, result_null_map->get_data());
404
8
        }
405
406
9
        block.get_by_position(result).column =
407
9
                ColumnNullable::create(std::move(result_data_column), std::move(result_null_map));
408
9
        return Status::OK();
409
9
    }
410
};
411
412
struct RegexpReplaceImpl {
413
    static constexpr auto name = "regexp_replace";
414
    static void execute_impl(FunctionContext* context, ColumnPtr argument_columns[],
415
                             const StringRef& options_value, size_t input_rows_count,
416
                             ColumnString::Chars& result_data, ColumnString::Offsets& result_offset,
417
36
                             NullMap& null_map) {
418
36
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
419
36
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
420
36
        const auto* replace_col = check_and_get_column<ColumnString>(argument_columns[2].get());
421
422
126
        for (size_t i = 0; i < input_rows_count; ++i) {
423
90
            if (null_map[i]) {
424
0
                StringOP::push_null_string(i, result_data, result_offset, null_map);
425
0
                continue;
426
0
            }
427
90
            _execute_inner_loop<false>(context, str_col, pattern_col, replace_col, options_value,
428
90
                                       result_data, result_offset, null_map, i);
429
90
        }
430
36
    }
431
    static void execute_impl_const_args(FunctionContext* context, ColumnPtr argument_columns[],
432
                                        const StringRef& options_value, size_t input_rows_count,
433
                                        ColumnString::Chars& result_data,
434
2
                                        ColumnString::Offsets& result_offset, NullMap& null_map) {
435
2
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
436
2
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
437
2
        const auto* replace_col = check_and_get_column<ColumnString>(argument_columns[2].get());
438
439
12
        for (size_t i = 0; i < input_rows_count; ++i) {
440
10
            if (null_map[i]) {
441
0
                StringOP::push_null_string(i, result_data, result_offset, null_map);
442
0
                continue;
443
0
            }
444
10
            _execute_inner_loop<true>(context, str_col, pattern_col, replace_col, options_value,
445
10
                                      result_data, result_offset, null_map, i);
446
10
        }
447
2
    }
448
    template <bool Const>
449
    static void _execute_inner_loop(FunctionContext* context, const ColumnString* str_col,
450
                                    const ColumnString* pattern_col,
451
                                    const ColumnString* replace_col, const StringRef& options_value,
452
                                    ColumnString::Chars& result_data,
453
                                    ColumnString::Offsets& result_offset, NullMap& null_map,
454
100
                                    const size_t index_now) {
455
100
        re2::RE2* re = reinterpret_cast<re2::RE2*>(
456
100
                context->get_function_state(FunctionContext::THREAD_LOCAL));
457
100
        std::unique_ptr<re2::RE2> scoped_re; // destroys re if state->re is nullptr
458
100
        if (re == nullptr) {
459
67
            std::string error_str;
460
67
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
461
67
            bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
462
67
                                                     options_value, scoped_re);
463
67
            if (!st) {
464
0
                context->add_warning(error_str.c_str());
465
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
466
0
                return;
467
0
            }
468
67
            re = scoped_re.get();
469
67
        }
470
471
100
        re2::StringPiece replace_str = re2::StringPiece(
472
100
                replace_col->get_data_at(index_check_const(index_now, Const)).to_string_view());
473
474
100
        std::string result_str(str_col->get_data_at(index_now).to_string());
475
100
        re2::RE2::GlobalReplace(&result_str, *re, replace_str);
476
100
        StringOP::push_value_string(result_str, index_now, result_data, result_offset);
477
100
    }
_ZN5doris17RegexpReplaceImpl19_execute_inner_loopILb1EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES7_S7_RKNS_9StringRefERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSB_IjLm4096ESE_Lm16ELm15EEESG_m
Line
Count
Source
454
10
                                    const size_t index_now) {
455
10
        re2::RE2* re = reinterpret_cast<re2::RE2*>(
456
10
                context->get_function_state(FunctionContext::THREAD_LOCAL));
457
10
        std::unique_ptr<re2::RE2> scoped_re; // destroys re if state->re is nullptr
458
10
        if (re == nullptr) {
459
0
            std::string error_str;
460
0
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
461
0
            bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
462
0
                                                     options_value, scoped_re);
463
0
            if (!st) {
464
0
                context->add_warning(error_str.c_str());
465
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
466
0
                return;
467
0
            }
468
0
            re = scoped_re.get();
469
0
        }
470
471
10
        re2::StringPiece replace_str = re2::StringPiece(
472
10
                replace_col->get_data_at(index_check_const(index_now, Const)).to_string_view());
473
474
10
        std::string result_str(str_col->get_data_at(index_now).to_string());
475
10
        re2::RE2::GlobalReplace(&result_str, *re, replace_str);
476
10
        StringOP::push_value_string(result_str, index_now, result_data, result_offset);
477
10
    }
_ZN5doris17RegexpReplaceImpl19_execute_inner_loopILb0EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES7_S7_RKNS_9StringRefERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSB_IjLm4096ESE_Lm16ELm15EEESG_m
Line
Count
Source
454
90
                                    const size_t index_now) {
455
90
        re2::RE2* re = reinterpret_cast<re2::RE2*>(
456
90
                context->get_function_state(FunctionContext::THREAD_LOCAL));
457
90
        std::unique_ptr<re2::RE2> scoped_re; // destroys re if state->re is nullptr
458
90
        if (re == nullptr) {
459
67
            std::string error_str;
460
67
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
461
67
            bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
462
67
                                                     options_value, scoped_re);
463
67
            if (!st) {
464
0
                context->add_warning(error_str.c_str());
465
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
466
0
                return;
467
0
            }
468
67
            re = scoped_re.get();
469
67
        }
470
471
90
        re2::StringPiece replace_str = re2::StringPiece(
472
90
                replace_col->get_data_at(index_check_const(index_now, Const)).to_string_view());
473
474
90
        std::string result_str(str_col->get_data_at(index_now).to_string());
475
90
        re2::RE2::GlobalReplace(&result_str, *re, replace_str);
476
90
        StringOP::push_value_string(result_str, index_now, result_data, result_offset);
477
90
    }
478
};
479
480
struct RegexpReplaceOneImpl {
481
    static constexpr auto name = "regexp_replace_one";
482
483
    static void execute_impl(FunctionContext* context, ColumnPtr argument_columns[],
484
                             const StringRef& options_value, size_t input_rows_count,
485
                             ColumnString::Chars& result_data, ColumnString::Offsets& result_offset,
486
32
                             NullMap& null_map) {
487
32
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
488
32
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
489
32
        const auto* replace_col = check_and_get_column<ColumnString>(argument_columns[2].get());
490
        // 3 args
491
122
        for (size_t i = 0; i < input_rows_count; ++i) {
492
90
            if (null_map[i]) {
493
0
                StringOP::push_null_string(i, result_data, result_offset, null_map);
494
0
                continue;
495
0
            }
496
90
            _execute_inner_loop<false>(context, str_col, pattern_col, replace_col, options_value,
497
90
                                       result_data, result_offset, null_map, i);
498
90
        }
499
32
    }
500
501
    static void execute_impl_const_args(FunctionContext* context, ColumnPtr argument_columns[],
502
                                        const StringRef& options_value, size_t input_rows_count,
503
                                        ColumnString::Chars& result_data,
504
1
                                        ColumnString::Offsets& result_offset, NullMap& null_map) {
505
1
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
506
1
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
507
1
        const auto* replace_col = check_and_get_column<ColumnString>(argument_columns[2].get());
508
        // 3 args
509
6
        for (size_t i = 0; i < input_rows_count; ++i) {
510
5
            if (null_map[i]) {
511
0
                StringOP::push_null_string(i, result_data, result_offset, null_map);
512
0
                continue;
513
0
            }
514
5
            _execute_inner_loop<true>(context, str_col, pattern_col, replace_col, options_value,
515
5
                                      result_data, result_offset, null_map, i);
516
5
        }
517
1
    }
518
    template <bool Const>
519
    static void _execute_inner_loop(FunctionContext* context, const ColumnString* str_col,
520
                                    const ColumnString* pattern_col,
521
                                    const ColumnString* replace_col, const StringRef& options_value,
522
                                    ColumnString::Chars& result_data,
523
                                    ColumnString::Offsets& result_offset, NullMap& null_map,
524
95
                                    const size_t index_now) {
525
95
        re2::RE2* re = reinterpret_cast<re2::RE2*>(
526
95
                context->get_function_state(FunctionContext::THREAD_LOCAL));
527
95
        std::unique_ptr<re2::RE2> scoped_re; // destroys re if state->re is nullptr
528
95
        if (re == nullptr) {
529
72
            std::string error_str;
530
72
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
531
72
            bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
532
72
                                                     options_value, scoped_re);
533
72
            if (!st) {
534
0
                context->add_warning(error_str.c_str());
535
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
536
0
                return;
537
0
            }
538
72
            re = scoped_re.get();
539
72
        }
540
541
95
        re2::StringPiece replace_str = re2::StringPiece(
542
95
                replace_col->get_data_at(index_check_const(index_now, Const)).to_string_view());
543
544
95
        std::string result_str(str_col->get_data_at(index_now).to_string());
545
95
        re2::RE2::Replace(&result_str, *re, replace_str);
546
95
        StringOP::push_value_string(result_str, index_now, result_data, result_offset);
547
95
    }
_ZN5doris20RegexpReplaceOneImpl19_execute_inner_loopILb1EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES7_S7_RKNS_9StringRefERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSB_IjLm4096ESE_Lm16ELm15EEESG_m
Line
Count
Source
524
5
                                    const size_t index_now) {
525
5
        re2::RE2* re = reinterpret_cast<re2::RE2*>(
526
5
                context->get_function_state(FunctionContext::THREAD_LOCAL));
527
5
        std::unique_ptr<re2::RE2> scoped_re; // destroys re if state->re is nullptr
528
5
        if (re == nullptr) {
529
0
            std::string error_str;
530
0
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
531
0
            bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
532
0
                                                     options_value, scoped_re);
533
0
            if (!st) {
534
0
                context->add_warning(error_str.c_str());
535
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
536
0
                return;
537
0
            }
538
0
            re = scoped_re.get();
539
0
        }
540
541
5
        re2::StringPiece replace_str = re2::StringPiece(
542
5
                replace_col->get_data_at(index_check_const(index_now, Const)).to_string_view());
543
544
5
        std::string result_str(str_col->get_data_at(index_now).to_string());
545
5
        re2::RE2::Replace(&result_str, *re, replace_str);
546
5
        StringOP::push_value_string(result_str, index_now, result_data, result_offset);
547
5
    }
_ZN5doris20RegexpReplaceOneImpl19_execute_inner_loopILb0EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES7_S7_RKNS_9StringRefERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSB_IjLm4096ESE_Lm16ELm15EEESG_m
Line
Count
Source
524
90
                                    const size_t index_now) {
525
90
        re2::RE2* re = reinterpret_cast<re2::RE2*>(
526
90
                context->get_function_state(FunctionContext::THREAD_LOCAL));
527
90
        std::unique_ptr<re2::RE2> scoped_re; // destroys re if state->re is nullptr
528
90
        if (re == nullptr) {
529
72
            std::string error_str;
530
72
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
531
72
            bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(),
532
72
                                                     options_value, scoped_re);
533
72
            if (!st) {
534
0
                context->add_warning(error_str.c_str());
535
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
536
0
                return;
537
0
            }
538
72
            re = scoped_re.get();
539
72
        }
540
541
90
        re2::StringPiece replace_str = re2::StringPiece(
542
90
                replace_col->get_data_at(index_check_const(index_now, Const)).to_string_view());
543
544
90
        std::string result_str(str_col->get_data_at(index_now).to_string());
545
90
        re2::RE2::Replace(&result_str, *re, replace_str);
546
90
        StringOP::push_value_string(result_str, index_now, result_data, result_offset);
547
90
    }
548
};
549
550
template <bool ReturnNull>
551
struct RegexpExtractImpl {
552
    static constexpr auto name = ReturnNull ? "regexp_extract_or_null" : "regexp_extract";
553
    // 3 args
554
    static void execute_impl(FunctionContext* context, ColumnPtr argument_columns[],
555
                             size_t input_rows_count, ColumnString::Chars& result_data,
556
59
                             ColumnString::Offsets& result_offset, NullMap& null_map) {
557
59
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
558
59
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
559
59
        const auto* index_col = check_and_get_column<ColumnInt64>(argument_columns[2].get());
560
188
        for (size_t i = 0; i < input_rows_count; ++i) {
561
129
            if (null_map[i]) {
562
0
                StringOP::push_null_string(i, result_data, result_offset, null_map);
563
0
                continue;
564
0
            }
565
129
            const auto& index_data = index_col->get_int(i);
566
129
            if (index_data < 0) {
567
0
                ReturnNull ? StringOP::push_null_string(i, result_data, result_offset, null_map)
568
0
                           : StringOP::push_empty_string(i, result_data, result_offset);
569
0
                continue;
570
0
            }
571
129
            _execute_inner_loop<false>(context, str_col, pattern_col, index_data, result_data,
572
129
                                       result_offset, null_map, i);
573
129
        }
574
59
    }
_ZN5doris17RegexpExtractImplILb1EE12execute_implEPNS_15FunctionContextEPNS_3COWINS_7IColumnEE13immutable_ptrIS5_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSA_IjLm4096ESD_Lm16ELm15EEESF_
Line
Count
Source
556
18
                             ColumnString::Offsets& result_offset, NullMap& null_map) {
557
18
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
558
18
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
559
18
        const auto* index_col = check_and_get_column<ColumnInt64>(argument_columns[2].get());
560
36
        for (size_t i = 0; i < input_rows_count; ++i) {
561
18
            if (null_map[i]) {
562
0
                StringOP::push_null_string(i, result_data, result_offset, null_map);
563
0
                continue;
564
0
            }
565
18
            const auto& index_data = index_col->get_int(i);
566
18
            if (index_data < 0) {
567
0
                ReturnNull ? StringOP::push_null_string(i, result_data, result_offset, null_map)
568
0
                           : StringOP::push_empty_string(i, result_data, result_offset);
569
0
                continue;
570
0
            }
571
18
            _execute_inner_loop<false>(context, str_col, pattern_col, index_data, result_data,
572
18
                                       result_offset, null_map, i);
573
18
        }
574
18
    }
_ZN5doris17RegexpExtractImplILb0EE12execute_implEPNS_15FunctionContextEPNS_3COWINS_7IColumnEE13immutable_ptrIS5_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSA_IjLm4096ESD_Lm16ELm15EEESF_
Line
Count
Source
556
41
                             ColumnString::Offsets& result_offset, NullMap& null_map) {
557
41
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
558
41
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
559
41
        const auto* index_col = check_and_get_column<ColumnInt64>(argument_columns[2].get());
560
152
        for (size_t i = 0; i < input_rows_count; ++i) {
561
111
            if (null_map[i]) {
562
0
                StringOP::push_null_string(i, result_data, result_offset, null_map);
563
0
                continue;
564
0
            }
565
111
            const auto& index_data = index_col->get_int(i);
566
111
            if (index_data < 0) {
567
0
                ReturnNull ? StringOP::push_null_string(i, result_data, result_offset, null_map)
568
0
                           : StringOP::push_empty_string(i, result_data, result_offset);
569
0
                continue;
570
0
            }
571
111
            _execute_inner_loop<false>(context, str_col, pattern_col, index_data, result_data,
572
111
                                       result_offset, null_map, i);
573
111
        }
574
41
    }
575
576
    static void execute_impl_const_args(FunctionContext* context, ColumnPtr argument_columns[],
577
                                        size_t input_rows_count, ColumnString::Chars& result_data,
578
1
                                        ColumnString::Offsets& result_offset, NullMap& null_map) {
579
1
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
580
1
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
581
1
        const auto* index_col = check_and_get_column<ColumnInt64>(argument_columns[2].get());
582
583
1
        const auto& index_data = index_col->get_int(0);
584
1
        if (index_data < 0) {
585
0
            for (size_t i = 0; i < input_rows_count; ++i) {
586
0
                ReturnNull ? StringOP::push_null_string(i, result_data, result_offset, null_map)
587
0
                           : StringOP::push_empty_string(i, result_data, result_offset);
588
0
            }
589
0
            return;
590
0
        }
591
592
8
        for (size_t i = 0; i < input_rows_count; ++i) {
593
7
            if (null_map[i]) {
594
0
                StringOP::push_null_string(i, result_data, result_offset, null_map);
595
0
                continue;
596
0
            }
597
598
7
            _execute_inner_loop<true>(context, str_col, pattern_col, index_data, result_data,
599
7
                                      result_offset, null_map, i);
600
7
        }
601
1
    }
Unexecuted instantiation: _ZN5doris17RegexpExtractImplILb1EE23execute_impl_const_argsEPNS_15FunctionContextEPNS_3COWINS_7IColumnEE13immutable_ptrIS5_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSA_IjLm4096ESD_Lm16ELm15EEESF_
_ZN5doris17RegexpExtractImplILb0EE23execute_impl_const_argsEPNS_15FunctionContextEPNS_3COWINS_7IColumnEE13immutable_ptrIS5_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSA_IjLm4096ESD_Lm16ELm15EEESF_
Line
Count
Source
578
1
                                        ColumnString::Offsets& result_offset, NullMap& null_map) {
579
1
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
580
1
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
581
1
        const auto* index_col = check_and_get_column<ColumnInt64>(argument_columns[2].get());
582
583
1
        const auto& index_data = index_col->get_int(0);
584
1
        if (index_data < 0) {
585
0
            for (size_t i = 0; i < input_rows_count; ++i) {
586
0
                ReturnNull ? StringOP::push_null_string(i, result_data, result_offset, null_map)
587
0
                           : StringOP::push_empty_string(i, result_data, result_offset);
588
0
            }
589
0
            return;
590
0
        }
591
592
8
        for (size_t i = 0; i < input_rows_count; ++i) {
593
7
            if (null_map[i]) {
594
0
                StringOP::push_null_string(i, result_data, result_offset, null_map);
595
0
                continue;
596
0
            }
597
598
7
            _execute_inner_loop<true>(context, str_col, pattern_col, index_data, result_data,
599
7
                                      result_offset, null_map, i);
600
7
        }
601
1
    }
602
    template <bool Const>
603
    static void _execute_inner_loop(FunctionContext* context, const ColumnString* str_col,
604
                                    const ColumnString* pattern_col, const Int64 index_data,
605
                                    ColumnString::Chars& result_data,
606
                                    ColumnString::Offsets& result_offset, NullMap& null_map,
607
136
                                    const size_t index_now) {
608
136
        auto* engine = reinterpret_cast<RegexpExtractEngine*>(
609
136
                context->get_function_state(FunctionContext::THREAD_LOCAL));
610
136
        std::unique_ptr<RegexpExtractEngine> scoped_engine;
611
612
136
        if (engine == nullptr) {
613
78
            std::string error_str;
614
78
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
615
78
            scoped_engine = std::make_unique<RegexpExtractEngine>();
616
78
            bool st = RegexpExtractEngine::compile(pattern, &error_str, *scoped_engine,
617
78
                                                   context->state()->enable_extended_regex());
618
78
            if (!st) {
619
0
                context->add_warning(error_str.c_str());
620
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
621
0
                return;
622
0
            }
623
78
            engine = scoped_engine.get();
624
78
        }
625
626
136
        const auto& str = str_col->get_data_at(index_now);
627
628
136
        int max_matches = 1 + engine->number_of_capturing_groups();
629
136
        if (index_data >= max_matches) {
630
84
            ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map)
631
84
                       : StringOP::push_empty_string(index_now, result_data, result_offset);
632
84
            return;
633
84
        }
634
635
52
        std::string match_result;
636
52
        bool success = engine->match_and_extract(str.data, str.size, static_cast<int>(index_data),
637
52
                                                 match_result);
638
639
52
        if (!success) {
640
13
            ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map)
641
13
                       : StringOP::push_empty_string(index_now, result_data, result_offset);
642
13
            return;
643
13
        }
644
645
39
        StringOP::push_value_string(std::string_view(match_result.data(), match_result.size()),
646
39
                                    index_now, result_data, result_offset);
647
39
    }
Unexecuted instantiation: _ZN5doris17RegexpExtractImplILb1EE19_execute_inner_loopILb1EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES8_lRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS9_IjLm4096ESC_Lm16ELm15EEESE_m
_ZN5doris17RegexpExtractImplILb1EE19_execute_inner_loopILb0EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES8_lRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS9_IjLm4096ESC_Lm16ELm15EEESE_m
Line
Count
Source
607
18
                                    const size_t index_now) {
608
18
        auto* engine = reinterpret_cast<RegexpExtractEngine*>(
609
18
                context->get_function_state(FunctionContext::THREAD_LOCAL));
610
18
        std::unique_ptr<RegexpExtractEngine> scoped_engine;
611
612
18
        if (engine == nullptr) {
613
0
            std::string error_str;
614
0
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
615
0
            scoped_engine = std::make_unique<RegexpExtractEngine>();
616
0
            bool st = RegexpExtractEngine::compile(pattern, &error_str, *scoped_engine,
617
0
                                                   context->state()->enable_extended_regex());
618
0
            if (!st) {
619
0
                context->add_warning(error_str.c_str());
620
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
621
0
                return;
622
0
            }
623
0
            engine = scoped_engine.get();
624
0
        }
625
626
18
        const auto& str = str_col->get_data_at(index_now);
627
628
18
        int max_matches = 1 + engine->number_of_capturing_groups();
629
18
        if (index_data >= max_matches) {
630
1
            ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map)
631
1
                       : StringOP::push_empty_string(index_now, result_data, result_offset);
632
1
            return;
633
1
        }
634
635
17
        std::string match_result;
636
17
        bool success = engine->match_and_extract(str.data, str.size, static_cast<int>(index_data),
637
17
                                                 match_result);
638
639
17
        if (!success) {
640
1
            ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map)
641
1
                       : StringOP::push_empty_string(index_now, result_data, result_offset);
642
1
            return;
643
1
        }
644
645
16
        StringOP::push_value_string(std::string_view(match_result.data(), match_result.size()),
646
16
                                    index_now, result_data, result_offset);
647
16
    }
_ZN5doris17RegexpExtractImplILb0EE19_execute_inner_loopILb1EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES8_lRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS9_IjLm4096ESC_Lm16ELm15EEESE_m
Line
Count
Source
607
7
                                    const size_t index_now) {
608
7
        auto* engine = reinterpret_cast<RegexpExtractEngine*>(
609
7
                context->get_function_state(FunctionContext::THREAD_LOCAL));
610
7
        std::unique_ptr<RegexpExtractEngine> scoped_engine;
611
612
7
        if (engine == nullptr) {
613
0
            std::string error_str;
614
0
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
615
0
            scoped_engine = std::make_unique<RegexpExtractEngine>();
616
0
            bool st = RegexpExtractEngine::compile(pattern, &error_str, *scoped_engine,
617
0
                                                   context->state()->enable_extended_regex());
618
0
            if (!st) {
619
0
                context->add_warning(error_str.c_str());
620
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
621
0
                return;
622
0
            }
623
0
            engine = scoped_engine.get();
624
0
        }
625
626
7
        const auto& str = str_col->get_data_at(index_now);
627
628
7
        int max_matches = 1 + engine->number_of_capturing_groups();
629
7
        if (index_data >= max_matches) {
630
0
            ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map)
631
0
                       : StringOP::push_empty_string(index_now, result_data, result_offset);
632
0
            return;
633
0
        }
634
635
7
        std::string match_result;
636
7
        bool success = engine->match_and_extract(str.data, str.size, static_cast<int>(index_data),
637
7
                                                 match_result);
638
639
7
        if (!success) {
640
7
            ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map)
641
7
                       : StringOP::push_empty_string(index_now, result_data, result_offset);
642
7
            return;
643
7
        }
644
645
0
        StringOP::push_value_string(std::string_view(match_result.data(), match_result.size()),
646
0
                                    index_now, result_data, result_offset);
647
0
    }
_ZN5doris17RegexpExtractImplILb0EE19_execute_inner_loopILb0EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES8_lRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS9_IjLm4096ESC_Lm16ELm15EEESE_m
Line
Count
Source
607
111
                                    const size_t index_now) {
608
111
        auto* engine = reinterpret_cast<RegexpExtractEngine*>(
609
111
                context->get_function_state(FunctionContext::THREAD_LOCAL));
610
111
        std::unique_ptr<RegexpExtractEngine> scoped_engine;
611
612
111
        if (engine == nullptr) {
613
78
            std::string error_str;
614
78
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
615
78
            scoped_engine = std::make_unique<RegexpExtractEngine>();
616
78
            bool st = RegexpExtractEngine::compile(pattern, &error_str, *scoped_engine,
617
78
                                                   context->state()->enable_extended_regex());
618
78
            if (!st) {
619
0
                context->add_warning(error_str.c_str());
620
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
621
0
                return;
622
0
            }
623
78
            engine = scoped_engine.get();
624
78
        }
625
626
111
        const auto& str = str_col->get_data_at(index_now);
627
628
111
        int max_matches = 1 + engine->number_of_capturing_groups();
629
111
        if (index_data >= max_matches) {
630
83
            ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map)
631
83
                       : StringOP::push_empty_string(index_now, result_data, result_offset);
632
83
            return;
633
83
        }
634
635
28
        std::string match_result;
636
28
        bool success = engine->match_and_extract(str.data, str.size, static_cast<int>(index_data),
637
28
                                                 match_result);
638
639
28
        if (!success) {
640
5
            ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map)
641
5
                       : StringOP::push_empty_string(index_now, result_data, result_offset);
642
5
            return;
643
5
        }
644
645
23
        StringOP::push_value_string(std::string_view(match_result.data(), match_result.size()),
646
23
                                    index_now, result_data, result_offset);
647
23
    }
648
};
649
650
struct RegexpExtractAllImpl {
651
    static constexpr auto name = "regexp_extract_all";
652
653
0
    size_t get_number_of_arguments() const { return 2; }
654
655
    static void execute_impl(FunctionContext* context, ColumnPtr argument_columns[],
656
                             size_t input_rows_count, ColumnString::Chars& result_data,
657
35
                             ColumnString::Offsets& result_offset, NullMap& null_map) {
658
35
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
659
35
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
660
116
        for (int i = 0; i < input_rows_count; ++i) {
661
81
            if (null_map[i]) {
662
0
                StringOP::push_null_string(i, result_data, result_offset, null_map);
663
0
                continue;
664
0
            }
665
81
            _execute_inner_loop<false>(context, str_col, pattern_col, result_data, result_offset,
666
81
                                       null_map, i);
667
81
        }
668
35
    }
669
670
    static void execute_impl_const_args(FunctionContext* context, ColumnPtr argument_columns[],
671
                                        size_t input_rows_count, ColumnString::Chars& result_data,
672
8
                                        ColumnString::Offsets& result_offset, NullMap& null_map) {
673
8
        const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get());
674
8
        const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get());
675
22
        for (int i = 0; i < input_rows_count; ++i) {
676
14
            if (null_map[i]) {
677
0
                StringOP::push_null_string(i, result_data, result_offset, null_map);
678
0
                continue;
679
0
            }
680
14
            _execute_inner_loop<true>(context, str_col, pattern_col, result_data, result_offset,
681
14
                                      null_map, i);
682
14
        }
683
8
    }
684
    template <bool Const>
685
    static void _execute_inner_loop(FunctionContext* context, const ColumnString* str_col,
686
                                    const ColumnString* pattern_col,
687
                                    ColumnString::Chars& result_data,
688
                                    ColumnString::Offsets& result_offset, NullMap& null_map,
689
95
                                    const size_t index_now) {
690
95
        auto* engine = reinterpret_cast<RegexpExtractEngine*>(
691
95
                context->get_function_state(FunctionContext::THREAD_LOCAL));
692
95
        std::unique_ptr<RegexpExtractEngine> scoped_engine;
693
694
95
        if (engine == nullptr) {
695
64
            std::string error_str;
696
64
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
697
64
            scoped_engine = std::make_unique<RegexpExtractEngine>();
698
64
            bool st = RegexpExtractEngine::compile(pattern, &error_str, *scoped_engine,
699
64
                                                   context->state()->enable_extended_regex());
700
64
            if (!st) {
701
0
                context->add_warning(error_str.c_str());
702
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
703
0
                return;
704
0
            }
705
64
            engine = scoped_engine.get();
706
64
        }
707
708
95
        if (engine->number_of_capturing_groups() == 0) {
709
65
            StringOP::push_empty_string(index_now, result_data, result_offset);
710
65
            return;
711
65
        }
712
30
        const auto& str = str_col->get_data_at(index_now);
713
30
        std::vector<std::string> res_matches;
714
30
        engine->match_all_and_extract(str.data, str.size, res_matches);
715
716
30
        if (res_matches.empty()) {
717
10
            StringOP::push_empty_string(index_now, result_data, result_offset);
718
10
            return;
719
10
        }
720
721
20
        std::string res = "[";
722
59
        for (int j = 0; j < res_matches.size(); ++j) {
723
39
            res += "'" + res_matches[j] + "'";
724
39
            if (j < res_matches.size() - 1) {
725
19
                res += ",";
726
19
            }
727
39
        }
728
20
        res += "]";
729
20
        StringOP::push_value_string(std::string_view(res), index_now, result_data, result_offset);
730
20
    }
_ZN5doris20RegexpExtractAllImpl19_execute_inner_loopILb1EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES7_RNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEESD_m
Line
Count
Source
689
14
                                    const size_t index_now) {
690
14
        auto* engine = reinterpret_cast<RegexpExtractEngine*>(
691
14
                context->get_function_state(FunctionContext::THREAD_LOCAL));
692
14
        std::unique_ptr<RegexpExtractEngine> scoped_engine;
693
694
14
        if (engine == nullptr) {
695
0
            std::string error_str;
696
0
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
697
0
            scoped_engine = std::make_unique<RegexpExtractEngine>();
698
0
            bool st = RegexpExtractEngine::compile(pattern, &error_str, *scoped_engine,
699
0
                                                   context->state()->enable_extended_regex());
700
0
            if (!st) {
701
0
                context->add_warning(error_str.c_str());
702
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
703
0
                return;
704
0
            }
705
0
            engine = scoped_engine.get();
706
0
        }
707
708
14
        if (engine->number_of_capturing_groups() == 0) {
709
0
            StringOP::push_empty_string(index_now, result_data, result_offset);
710
0
            return;
711
0
        }
712
14
        const auto& str = str_col->get_data_at(index_now);
713
14
        std::vector<std::string> res_matches;
714
14
        engine->match_all_and_extract(str.data, str.size, res_matches);
715
716
14
        if (res_matches.empty()) {
717
7
            StringOP::push_empty_string(index_now, result_data, result_offset);
718
7
            return;
719
7
        }
720
721
7
        std::string res = "[";
722
19
        for (int j = 0; j < res_matches.size(); ++j) {
723
12
            res += "'" + res_matches[j] + "'";
724
12
            if (j < res_matches.size() - 1) {
725
5
                res += ",";
726
5
            }
727
12
        }
728
7
        res += "]";
729
7
        StringOP::push_value_string(std::string_view(res), index_now, result_data, result_offset);
730
7
    }
_ZN5doris20RegexpExtractAllImpl19_execute_inner_loopILb0EEEvPNS_15FunctionContextEPKNS_9ColumnStrIjEES7_RNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEESD_m
Line
Count
Source
689
81
                                    const size_t index_now) {
690
81
        auto* engine = reinterpret_cast<RegexpExtractEngine*>(
691
81
                context->get_function_state(FunctionContext::THREAD_LOCAL));
692
81
        std::unique_ptr<RegexpExtractEngine> scoped_engine;
693
694
81
        if (engine == nullptr) {
695
64
            std::string error_str;
696
64
            const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const));
697
64
            scoped_engine = std::make_unique<RegexpExtractEngine>();
698
64
            bool st = RegexpExtractEngine::compile(pattern, &error_str, *scoped_engine,
699
64
                                                   context->state()->enable_extended_regex());
700
64
            if (!st) {
701
0
                context->add_warning(error_str.c_str());
702
0
                StringOP::push_null_string(index_now, result_data, result_offset, null_map);
703
0
                return;
704
0
            }
705
64
            engine = scoped_engine.get();
706
64
        }
707
708
81
        if (engine->number_of_capturing_groups() == 0) {
709
65
            StringOP::push_empty_string(index_now, result_data, result_offset);
710
65
            return;
711
65
        }
712
16
        const auto& str = str_col->get_data_at(index_now);
713
16
        std::vector<std::string> res_matches;
714
16
        engine->match_all_and_extract(str.data, str.size, res_matches);
715
716
16
        if (res_matches.empty()) {
717
3
            StringOP::push_empty_string(index_now, result_data, result_offset);
718
3
            return;
719
3
        }
720
721
13
        std::string res = "[";
722
40
        for (int j = 0; j < res_matches.size(); ++j) {
723
27
            res += "'" + res_matches[j] + "'";
724
27
            if (j < res_matches.size() - 1) {
725
14
                res += ",";
726
14
            }
727
27
        }
728
13
        res += "]";
729
13
        StringOP::push_value_string(std::string_view(res), index_now, result_data, result_offset);
730
13
    }
731
};
732
733
// template FunctionRegexpFunctionality is used for regexp_xxxx series functions, not for regexp match.
734
template <typename Impl>
735
class FunctionRegexpFunctionality : public IFunction {
736
public:
737
    static constexpr auto name = Impl::name;
738
739
117
    static FunctionPtr create() { return std::make_shared<FunctionRegexpFunctionality>(); }
_ZN5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb1EEEE6createEv
Line
Count
Source
739
30
    static FunctionPtr create() { return std::make_shared<FunctionRegexpFunctionality>(); }
_ZN5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb0EEEE6createEv
Line
Count
Source
739
43
    static FunctionPtr create() { return std::make_shared<FunctionRegexpFunctionality>(); }
_ZN5doris27FunctionRegexpFunctionalityINS_20RegexpExtractAllImplEE6createEv
Line
Count
Source
739
44
    static FunctionPtr create() { return std::make_shared<FunctionRegexpFunctionality>(); }
740
741
3
    String get_name() const override { return name; }
_ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb1EEEE8get_nameB5cxx11Ev
Line
Count
Source
741
1
    String get_name() const override { return name; }
_ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb0EEEE8get_nameB5cxx11Ev
Line
Count
Source
741
1
    String get_name() const override { return name; }
_ZNK5doris27FunctionRegexpFunctionalityINS_20RegexpExtractAllImplEE8get_nameB5cxx11Ev
Line
Count
Source
741
1
    String get_name() const override { return name; }
742
743
90
    size_t get_number_of_arguments() const override {
744
90
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
745
35
            return 2;
746
35
        }
747
0
        return 3;
748
90
    }
_ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb1EEEE23get_number_of_argumentsEv
Line
Count
Source
743
21
    size_t get_number_of_arguments() const override {
744
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
745
            return 2;
746
        }
747
21
        return 3;
748
21
    }
_ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb0EEEE23get_number_of_argumentsEv
Line
Count
Source
743
34
    size_t get_number_of_arguments() const override {
744
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
745
            return 2;
746
        }
747
34
        return 3;
748
34
    }
_ZNK5doris27FunctionRegexpFunctionalityINS_20RegexpExtractAllImplEE23get_number_of_argumentsEv
Line
Count
Source
743
35
    size_t get_number_of_arguments() const override {
744
35
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
745
35
            return 2;
746
35
        }
747
0
        return 3;
748
35
    }
749
750
90
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
751
90
        return make_nullable(std::make_shared<DataTypeString>());
752
90
    }
_ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb1EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
750
21
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
751
21
        return make_nullable(std::make_shared<DataTypeString>());
752
21
    }
_ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb0EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
750
34
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
751
34
        return make_nullable(std::make_shared<DataTypeString>());
752
34
    }
_ZNK5doris27FunctionRegexpFunctionalityINS_20RegexpExtractAllImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
750
35
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
751
35
        return make_nullable(std::make_shared<DataTypeString>());
752
35
    }
753
754
281
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
755
281
        if (scope == FunctionContext::THREAD_LOCAL) {
756
191
            if (context->is_col_constant(1)) {
757
125
                DCHECK(!context->get_function_state(scope));
758
125
                const auto pattern_col = context->get_constant_col(1)->column_ptr;
759
125
                const auto& pattern = pattern_col->get_data_at(0);
760
125
                if (pattern.size == 0) {
761
3
                    return Status::OK();
762
3
                }
763
764
122
                std::string error_str;
765
122
                auto engine = std::make_shared<RegexpExtractEngine>();
766
122
                bool st = RegexpExtractEngine::compile(pattern, &error_str, *engine,
767
122
                                                       context->state()->enable_extended_regex());
768
122
                if (!st) {
769
3
                    context->set_error(error_str.c_str());
770
3
                    return Status::InvalidArgument(error_str);
771
3
                }
772
119
                context->set_function_state(scope, engine);
773
119
            }
774
191
        }
775
275
        return Status::OK();
776
281
    }
_ZN5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb1EEEE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
754
52
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
755
52
        if (scope == FunctionContext::THREAD_LOCAL) {
756
31
            if (context->is_col_constant(1)) {
757
31
                DCHECK(!context->get_function_state(scope));
758
31
                const auto pattern_col = context->get_constant_col(1)->column_ptr;
759
31
                const auto& pattern = pattern_col->get_data_at(0);
760
31
                if (pattern.size == 0) {
761
1
                    return Status::OK();
762
1
                }
763
764
30
                std::string error_str;
765
30
                auto engine = std::make_shared<RegexpExtractEngine>();
766
30
                bool st = RegexpExtractEngine::compile(pattern, &error_str, *engine,
767
30
                                                       context->state()->enable_extended_regex());
768
30
                if (!st) {
769
1
                    context->set_error(error_str.c_str());
770
1
                    return Status::InvalidArgument(error_str);
771
1
                }
772
29
                context->set_function_state(scope, engine);
773
29
            }
774
31
        }
775
50
        return Status::OK();
776
52
    }
_ZN5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb0EEEE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
754
114
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
755
114
        if (scope == FunctionContext::THREAD_LOCAL) {
756
80
            if (context->is_col_constant(1)) {
757
44
                DCHECK(!context->get_function_state(scope));
758
44
                const auto pattern_col = context->get_constant_col(1)->column_ptr;
759
44
                const auto& pattern = pattern_col->get_data_at(0);
760
44
                if (pattern.size == 0) {
761
1
                    return Status::OK();
762
1
                }
763
764
43
                std::string error_str;
765
43
                auto engine = std::make_shared<RegexpExtractEngine>();
766
43
                bool st = RegexpExtractEngine::compile(pattern, &error_str, *engine,
767
43
                                                       context->state()->enable_extended_regex());
768
43
                if (!st) {
769
1
                    context->set_error(error_str.c_str());
770
1
                    return Status::InvalidArgument(error_str);
771
1
                }
772
42
                context->set_function_state(scope, engine);
773
42
            }
774
80
        }
775
112
        return Status::OK();
776
114
    }
_ZN5doris27FunctionRegexpFunctionalityINS_20RegexpExtractAllImplEE4openEPNS_15FunctionContextENS3_18FunctionStateScopeE
Line
Count
Source
754
115
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
755
115
        if (scope == FunctionContext::THREAD_LOCAL) {
756
80
            if (context->is_col_constant(1)) {
757
50
                DCHECK(!context->get_function_state(scope));
758
50
                const auto pattern_col = context->get_constant_col(1)->column_ptr;
759
50
                const auto& pattern = pattern_col->get_data_at(0);
760
50
                if (pattern.size == 0) {
761
1
                    return Status::OK();
762
1
                }
763
764
49
                std::string error_str;
765
49
                auto engine = std::make_shared<RegexpExtractEngine>();
766
49
                bool st = RegexpExtractEngine::compile(pattern, &error_str, *engine,
767
49
                                                       context->state()->enable_extended_regex());
768
49
                if (!st) {
769
1
                    context->set_error(error_str.c_str());
770
1
                    return Status::InvalidArgument(error_str);
771
1
                }
772
48
                context->set_function_state(scope, engine);
773
48
            }
774
80
        }
775
113
        return Status::OK();
776
115
    }
777
778
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
779
103
                        uint32_t result, size_t input_rows_count) const override {
780
103
        size_t argument_size = arguments.size();
781
782
103
        auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
783
103
        auto result_data_column = ColumnString::create();
784
103
        auto& result_data = result_data_column->get_chars();
785
103
        auto& result_offset = result_data_column->get_offsets();
786
103
        result_offset.resize(input_rows_count);
787
788
103
        bool col_const[3];
789
103
        ColumnPtr argument_columns[3];
790
369
        for (int i = 0; i < argument_size; ++i) {
791
266
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
792
266
        }
793
103
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
794
4
                                                     *block.get_by_position(arguments[0]).column)
795
4
                                                     .convert_to_full_column()
796
103
                                           : block.get_by_position(arguments[0]).column;
797
103
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
798
43
            default_preprocess_parameter_columns(argument_columns, col_const, {1}, block,
799
43
                                                 arguments);
800
60
        } else {
801
60
            default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block,
802
60
                                                 arguments);
803
60
        }
804
805
103
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
806
43
            if (col_const[1]) {
807
8
                Impl::execute_impl_const_args(context, argument_columns, input_rows_count,
808
8
                                              result_data, result_offset,
809
8
                                              result_null_map->get_data());
810
35
            } else {
811
35
                Impl::execute_impl(context, argument_columns, input_rows_count, result_data,
812
35
                                   result_offset, result_null_map->get_data());
813
35
            }
814
60
        } else {
815
60
            if (col_const[1] && col_const[2]) {
816
1
                Impl::execute_impl_const_args(context, argument_columns, input_rows_count,
817
1
                                              result_data, result_offset,
818
1
                                              result_null_map->get_data());
819
59
            } else {
820
59
                Impl::execute_impl(context, argument_columns, input_rows_count, result_data,
821
59
                                   result_offset, result_null_map->get_data());
822
59
            }
823
60
        }
824
825
103
        block.get_by_position(result).column =
826
103
                ColumnNullable::create(std::move(result_data_column), std::move(result_null_map));
827
103
        return Status::OK();
828
103
    }
_ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb1EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
779
18
                        uint32_t result, size_t input_rows_count) const override {
780
18
        size_t argument_size = arguments.size();
781
782
18
        auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
783
18
        auto result_data_column = ColumnString::create();
784
18
        auto& result_data = result_data_column->get_chars();
785
18
        auto& result_offset = result_data_column->get_offsets();
786
18
        result_offset.resize(input_rows_count);
787
788
18
        bool col_const[3];
789
18
        ColumnPtr argument_columns[3];
790
72
        for (int i = 0; i < argument_size; ++i) {
791
54
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
792
54
        }
793
18
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
794
0
                                                     *block.get_by_position(arguments[0]).column)
795
0
                                                     .convert_to_full_column()
796
18
                                           : block.get_by_position(arguments[0]).column;
797
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
798
            default_preprocess_parameter_columns(argument_columns, col_const, {1}, block,
799
                                                 arguments);
800
18
        } else {
801
18
            default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block,
802
18
                                                 arguments);
803
18
        }
804
805
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
806
            if (col_const[1]) {
807
                Impl::execute_impl_const_args(context, argument_columns, input_rows_count,
808
                                              result_data, result_offset,
809
                                              result_null_map->get_data());
810
            } else {
811
                Impl::execute_impl(context, argument_columns, input_rows_count, result_data,
812
                                   result_offset, result_null_map->get_data());
813
            }
814
18
        } else {
815
18
            if (col_const[1] && col_const[2]) {
816
0
                Impl::execute_impl_const_args(context, argument_columns, input_rows_count,
817
0
                                              result_data, result_offset,
818
0
                                              result_null_map->get_data());
819
18
            } else {
820
18
                Impl::execute_impl(context, argument_columns, input_rows_count, result_data,
821
18
                                   result_offset, result_null_map->get_data());
822
18
            }
823
18
        }
824
825
18
        block.get_by_position(result).column =
826
18
                ColumnNullable::create(std::move(result_data_column), std::move(result_null_map));
827
18
        return Status::OK();
828
18
    }
_ZNK5doris27FunctionRegexpFunctionalityINS_17RegexpExtractImplILb0EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
779
42
                        uint32_t result, size_t input_rows_count) const override {
780
42
        size_t argument_size = arguments.size();
781
782
42
        auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
783
42
        auto result_data_column = ColumnString::create();
784
42
        auto& result_data = result_data_column->get_chars();
785
42
        auto& result_offset = result_data_column->get_offsets();
786
42
        result_offset.resize(input_rows_count);
787
788
42
        bool col_const[3];
789
42
        ColumnPtr argument_columns[3];
790
168
        for (int i = 0; i < argument_size; ++i) {
791
126
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
792
126
        }
793
42
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
794
3
                                                     *block.get_by_position(arguments[0]).column)
795
3
                                                     .convert_to_full_column()
796
42
                                           : block.get_by_position(arguments[0]).column;
797
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
798
            default_preprocess_parameter_columns(argument_columns, col_const, {1}, block,
799
                                                 arguments);
800
42
        } else {
801
42
            default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block,
802
42
                                                 arguments);
803
42
        }
804
805
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
806
            if (col_const[1]) {
807
                Impl::execute_impl_const_args(context, argument_columns, input_rows_count,
808
                                              result_data, result_offset,
809
                                              result_null_map->get_data());
810
            } else {
811
                Impl::execute_impl(context, argument_columns, input_rows_count, result_data,
812
                                   result_offset, result_null_map->get_data());
813
            }
814
42
        } else {
815
42
            if (col_const[1] && col_const[2]) {
816
1
                Impl::execute_impl_const_args(context, argument_columns, input_rows_count,
817
1
                                              result_data, result_offset,
818
1
                                              result_null_map->get_data());
819
41
            } else {
820
41
                Impl::execute_impl(context, argument_columns, input_rows_count, result_data,
821
41
                                   result_offset, result_null_map->get_data());
822
41
            }
823
42
        }
824
825
42
        block.get_by_position(result).column =
826
42
                ColumnNullable::create(std::move(result_data_column), std::move(result_null_map));
827
42
        return Status::OK();
828
42
    }
_ZNK5doris27FunctionRegexpFunctionalityINS_20RegexpExtractAllImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
779
43
                        uint32_t result, size_t input_rows_count) const override {
780
43
        size_t argument_size = arguments.size();
781
782
43
        auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
783
43
        auto result_data_column = ColumnString::create();
784
43
        auto& result_data = result_data_column->get_chars();
785
43
        auto& result_offset = result_data_column->get_offsets();
786
43
        result_offset.resize(input_rows_count);
787
788
43
        bool col_const[3];
789
43
        ColumnPtr argument_columns[3];
790
129
        for (int i = 0; i < argument_size; ++i) {
791
86
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
792
86
        }
793
43
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
794
1
                                                     *block.get_by_position(arguments[0]).column)
795
1
                                                     .convert_to_full_column()
796
43
                                           : block.get_by_position(arguments[0]).column;
797
43
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
798
43
            default_preprocess_parameter_columns(argument_columns, col_const, {1}, block,
799
43
                                                 arguments);
800
        } else {
801
            default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block,
802
                                                 arguments);
803
        }
804
805
43
        if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) {
806
43
            if (col_const[1]) {
807
8
                Impl::execute_impl_const_args(context, argument_columns, input_rows_count,
808
8
                                              result_data, result_offset,
809
8
                                              result_null_map->get_data());
810
35
            } else {
811
35
                Impl::execute_impl(context, argument_columns, input_rows_count, result_data,
812
35
                                   result_offset, result_null_map->get_data());
813
35
            }
814
        } else {
815
            if (col_const[1] && col_const[2]) {
816
                Impl::execute_impl_const_args(context, argument_columns, input_rows_count,
817
                                              result_data, result_offset,
818
                                              result_null_map->get_data());
819
            } else {
820
                Impl::execute_impl(context, argument_columns, input_rows_count, result_data,
821
                                   result_offset, result_null_map->get_data());
822
            }
823
        }
824
825
43
        block.get_by_position(result).column =
826
43
                ColumnNullable::create(std::move(result_data_column), std::move(result_null_map));
827
43
        return Status::OK();
828
43
    }
829
};
830
831
8
void register_function_regexp_extract(SimpleFunctionFactory& factory) {
832
8
    factory.register_function<FunctionRegexpReplace<RegexpReplaceImpl, ThreeParamTypes>>();
833
8
    factory.register_function<FunctionRegexpReplace<RegexpReplaceImpl, FourParamTypes>>();
834
8
    factory.register_function<FunctionRegexpReplace<RegexpReplaceOneImpl, ThreeParamTypes>>();
835
8
    factory.register_function<FunctionRegexpReplace<RegexpReplaceOneImpl, FourParamTypes>>();
836
8
    factory.register_function<FunctionRegexpFunctionality<RegexpExtractImpl<true>>>();
837
8
    factory.register_function<FunctionRegexpFunctionality<RegexpExtractImpl<false>>>();
838
8
    factory.register_function<FunctionRegexpFunctionality<RegexpExtractAllImpl>>();
839
8
    factory.register_function<FunctionRegexpCount>();
840
8
}
841
842
} // namespace doris