Coverage Report

Created: 2025-06-18 21:38

/root/doris/be/src/vec/functions/like.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "like.h"
19
20
#include <fmt/format.h>
21
#include <hs/hs_compile.h>
22
#include <re2/stringpiece.h>
23
24
#include <cstddef>
25
#include <ostream>
26
#include <utility>
27
#include <vector>
28
29
#include "common/logging.h"
30
#include "vec/columns/column.h"
31
#include "vec/columns/column_const.h"
32
#include "vec/columns/column_vector.h"
33
#include "vec/common/string_ref.h"
34
#include "vec/core/block.h"
35
#include "vec/core/column_with_type_and_name.h"
36
#include "vec/functions/simple_function_factory.h"
37
38
namespace doris::vectorized {
39
// A regex to match any regex pattern is equivalent to a substring search.
40
static const RE2 SUBSTRING_RE(R"((?:\.\*)*([^\.\^\{\[\(\|\)\]\}\+\*\?\$\\]*)(?:\.\*)*)");
41
42
// A regex to match any regex pattern which is equivalent to matching a constant string
43
// at the end of the string values.
44
static const RE2 ENDS_WITH_RE(R"((?:\.\*)*([^\.\^\{\[\(\|\)\]\}\+\*\?\$\\]*)\$)");
45
46
// A regex to match any regex pattern which is equivalent to matching a constant string
47
// at the end of the string values.
48
static const RE2 STARTS_WITH_RE(R"(\^([^\.\^\{\[\(\|\)\]\}\+\*\?\$\\]*)(?:\.\*)*)");
49
50
// A regex to match any regex pattern which is equivalent to a constant string match.
51
static const RE2 EQUALS_RE(R"(\^([^\.\^\{\[\(\|\)\]\}\+\*\?\$\\]*)\$)");
52
// A regex to match .*
53
static const RE2 ALLPASS_RE(R"((\.\*)+)");
54
55
// Like patterns
56
static const re2::RE2 LIKE_SUBSTRING_RE(R"((?:%+)(((\\_)|([^%_\\]))+)(?:%+))");
57
static const re2::RE2 LIKE_ENDS_WITH_RE("(?:%+)(((\\\\_)|([^%_]))+)");
58
static const re2::RE2 LIKE_STARTS_WITH_RE(R"((((\\%)|(\\_)|([^%_\\]))+)(?:%+))");
59
static const re2::RE2 LIKE_EQUALS_RE("(((\\\\_)|([^%_]))+)");
60
static const re2::RE2 LIKE_ALLPASS_RE("%+");
61
62
struct VectorAllpassSearchState : public VectorPatternSearchState {
63
12
    VectorAllpassSearchState() : VectorPatternSearchState(FunctionLikeBase::vector_allpass_fn) {}
64
65
12
    ~VectorAllpassSearchState() override = default;
66
67
1
    void like_pattern_match(const std::string& pattern_str) override {
68
1
        if (!pattern_str.empty() && RE2::FullMatch(pattern_str, LIKE_ALLPASS_RE)) {
69
0
            _search_strings->insert_default();
70
1
        } else {
71
1
            _pattern_matched = false;
72
1
        }
73
1
    }
74
75
11
    void regexp_pattern_match(const std::string& pattern_str) override {
76
11
        if (RE2::FullMatch(pattern_str, ALLPASS_RE)) {
77
0
            _search_strings->insert_default();
78
11
        } else {
79
11
            _pattern_matched = false;
80
11
        }
81
11
    }
82
};
83
84
struct VectorEqualSearchState : public VectorPatternSearchState {
85
12
    VectorEqualSearchState() : VectorPatternSearchState(FunctionLikeBase::vector_equals_fn) {}
86
87
12
    ~VectorEqualSearchState() override = default;
88
89
1
    void like_pattern_match(const std::string& pattern_str) override {
90
1
        _search_string.clear();
91
1
        if (pattern_str.empty() || RE2::FullMatch(pattern_str, LIKE_EQUALS_RE, &_search_string)) {
92
0
            FunctionLike::remove_escape_character(&_search_string);
93
0
            _search_strings->insert_data(_search_string.c_str(), _search_string.size());
94
1
        } else {
95
1
            _pattern_matched = false;
96
1
        }
97
1
    }
98
99
11
    void regexp_pattern_match(const std::string& pattern_str) override {
100
11
        _search_string.clear();
101
11
        if (RE2::FullMatch(pattern_str, EQUALS_RE, &_search_string)) {
102
2
            _search_strings->insert_data(_search_string.c_str(), _search_string.size());
103
9
        } else {
104
9
            _pattern_matched = false;
105
9
        }
106
11
    }
107
};
108
109
struct VectorSubStringSearchState : public VectorPatternSearchState {
110
    VectorSubStringSearchState()
111
12
            : VectorPatternSearchState(FunctionLikeBase::vector_substring_fn) {}
112
113
12
    ~VectorSubStringSearchState() override = default;
114
115
3
    void like_pattern_match(const std::string& pattern_str) override {
116
3
        _search_string.clear();
117
3
        if (RE2::FullMatch(pattern_str, LIKE_SUBSTRING_RE, &_search_string)) {
118
2
            FunctionLike::remove_escape_character(&_search_string);
119
2
            _search_strings->insert_data(_search_string.c_str(), _search_string.size());
120
2
        } else {
121
1
            _pattern_matched = false;
122
1
        }
123
3
    }
124
125
11
    void regexp_pattern_match(const std::string& pattern_str) override {
126
11
        _search_string.clear();
127
11
        if (RE2::FullMatch(pattern_str, SUBSTRING_RE, &_search_string)) {
128
0
            _search_strings->insert_data(_search_string.c_str(), _search_string.size());
129
11
        } else {
130
11
            _pattern_matched = false;
131
11
        }
132
11
    }
133
};
134
135
struct VectorStartsWithSearchState : public VectorPatternSearchState {
136
    VectorStartsWithSearchState()
137
12
            : VectorPatternSearchState(FunctionLikeBase::vector_starts_with_fn) {}
138
139
12
    ~VectorStartsWithSearchState() override = default;
140
141
1
    void like_pattern_match(const std::string& pattern_str) override {
142
1
        _search_string.clear();
143
1
        if (RE2::FullMatch(pattern_str, LIKE_STARTS_WITH_RE, &_search_string)) {
144
0
            FunctionLike::remove_escape_character(&_search_string);
145
0
            _search_strings->insert_data(_search_string.c_str(), _search_string.size());
146
1
        } else {
147
1
            _pattern_matched = false;
148
1
        }
149
1
    }
150
151
11
    void regexp_pattern_match(const std::string& pattern_str) override {
152
11
        _search_string.clear();
153
11
        if (RE2::FullMatch(pattern_str, STARTS_WITH_RE, &_search_string)) {
154
2
            _search_strings->insert_data(_search_string.c_str(), _search_string.size());
155
9
        } else {
156
9
            _pattern_matched = false;
157
9
        }
158
11
    }
159
};
160
161
struct VectorEndsWithSearchState : public VectorPatternSearchState {
162
12
    VectorEndsWithSearchState() : VectorPatternSearchState(FunctionLikeBase::vector_ends_with_fn) {}
163
164
12
    ~VectorEndsWithSearchState() override = default;
165
166
1
    void like_pattern_match(const std::string& pattern_str) override {
167
1
        _search_string.clear();
168
1
        if (RE2::FullMatch(pattern_str, LIKE_ENDS_WITH_RE, &_search_string)) {
169
0
            FunctionLike::remove_escape_character(&_search_string);
170
0
            _search_strings->insert_data(_search_string.c_str(), _search_string.size());
171
1
        } else {
172
1
            _pattern_matched = false;
173
1
        }
174
1
    }
175
176
11
    void regexp_pattern_match(const std::string& pattern_str) override {
177
11
        _search_string.clear();
178
11
        if (RE2::FullMatch(pattern_str, ENDS_WITH_RE, &_search_string)) {
179
2
            _search_strings->insert_data(_search_string.c_str(), _search_string.size());
180
9
        } else {
181
9
            _pattern_matched = false;
182
9
        }
183
11
    }
184
};
185
186
0
Status LikeSearchState::clone(LikeSearchState& cloned) {
187
0
    cloned.escape_char = escape_char;
188
0
    cloned.set_search_string(search_string);
189
190
0
    std::string re_pattern;
191
0
    FunctionLike::convert_like_pattern(this, pattern_str, &re_pattern);
192
0
    if (hs_database) { // use hyperscan
193
0
        hs_database_t* database = nullptr;
194
0
        hs_scratch_t* scratch = nullptr;
195
0
        RETURN_IF_ERROR(FunctionLike::hs_prepare(nullptr, re_pattern.c_str(), &database, &scratch));
196
197
0
        cloned.hs_database.reset(database);
198
0
        cloned.hs_scratch.reset(scratch);
199
0
    } else { // fallback to re2
200
0
        cloned.hs_database.reset();
201
0
        cloned.hs_scratch.reset();
202
203
0
        RE2::Options opts;
204
0
        opts.set_never_nl(false);
205
0
        opts.set_dot_nl(true);
206
0
        cloned.regex = std::make_unique<RE2>(re_pattern, opts);
207
0
        if (!cloned.regex->ok()) {
208
0
            return Status::InternalError("Invalid regex expression: {}", re_pattern);
209
0
        }
210
0
    }
211
212
0
    return Status::OK();
213
0
}
214
215
Status FunctionLikeBase::constant_allpass_fn(LikeSearchState* state, const ColumnString& vals,
216
                                             const StringRef& pattern,
217
0
                                             ColumnUInt8::Container& result) {
218
0
    memset(result.data(), 1, vals.size());
219
0
    return Status::OK();
220
0
}
221
222
Status FunctionLikeBase::constant_allpass_fn_scalar(LikeSearchState* state, const StringRef& val,
223
                                                    const StringRef& pattern,
224
0
                                                    unsigned char* result) {
225
0
    *result = 1;
226
0
    return Status::OK();
227
0
}
228
229
Status FunctionLikeBase::vector_allpass_fn(const ColumnString& vals,
230
                                           const ColumnString& search_strings,
231
0
                                           ColumnUInt8::Container& result) {
232
0
    DCHECK(vals.size() == search_strings.size());
233
0
    DCHECK(vals.size() == result.size());
234
0
    memset(result.data(), 1, vals.size());
235
0
    return Status::OK();
236
0
}
237
238
Status FunctionLikeBase::constant_starts_with_fn(LikeSearchState* state, const ColumnString& val,
239
                                                 const StringRef& pattern,
240
0
                                                 ColumnUInt8::Container& result) {
241
0
    auto sz = val.size();
242
0
    for (size_t i = 0; i < sz; i++) {
243
0
        const auto& str_ref = val.get_data_at(i);
244
0
        result[i] = (str_ref.size >= state->search_string_sv.size) &&
245
0
                    str_ref.start_with(state->search_string_sv);
246
0
    }
247
0
    return Status::OK();
248
0
}
249
250
Status FunctionLikeBase::constant_starts_with_fn_scalar(LikeSearchState* state,
251
                                                        const StringRef& val,
252
                                                        const StringRef& pattern,
253
0
                                                        unsigned char* result) {
254
0
    *result = (val.size >= state->search_string_sv.size) &&
255
0
              (state->search_string_sv == val.substring(0, state->search_string_sv.size));
256
0
    return Status::OK();
257
0
}
258
259
Status FunctionLikeBase::vector_starts_with_fn(const ColumnString& vals,
260
                                               const ColumnString& search_strings,
261
2
                                               ColumnUInt8::Container& result) {
262
2
    DCHECK(vals.size() == search_strings.size());
263
2
    DCHECK(vals.size() == result.size());
264
2
    auto sz = vals.size();
265
4
    for (size_t i = 0; i < sz; ++i) {
266
2
        const auto& str_sv = vals.get_data_at(i);
267
2
        const auto& search_string_sv = search_strings.get_data_at(i);
268
2
        result[i] = (str_sv.size >= search_string_sv.size) && str_sv.start_with(search_string_sv);
269
2
    }
270
2
    return Status::OK();
271
2
}
272
273
Status FunctionLikeBase::constant_ends_with_fn(LikeSearchState* state, const ColumnString& val,
274
                                               const StringRef& pattern,
275
0
                                               ColumnUInt8::Container& result) {
276
0
    auto sz = val.size();
277
0
    for (size_t i = 0; i < sz; i++) {
278
0
        const auto& str_ref = val.get_data_at(i);
279
0
        result[i] = (str_ref.size >= state->search_string_sv.size) &&
280
0
                    str_ref.end_with(state->search_string_sv);
281
0
    }
282
0
    return Status::OK();
283
0
}
284
285
Status FunctionLikeBase::constant_ends_with_fn_scalar(LikeSearchState* state, const StringRef& val,
286
                                                      const StringRef& pattern,
287
0
                                                      unsigned char* result) {
288
0
    *result = (val.size >= state->search_string_sv.size) &&
289
0
              (state->search_string_sv == val.substring(val.size - state->search_string_sv.size,
290
0
                                                        state->search_string_sv.size));
291
0
    return Status::OK();
292
0
}
293
294
Status FunctionLikeBase::vector_ends_with_fn(const ColumnString& vals,
295
                                             const ColumnString& search_strings,
296
2
                                             ColumnUInt8::Container& result) {
297
2
    DCHECK(vals.size() == search_strings.size());
298
2
    DCHECK(vals.size() == result.size());
299
2
    auto sz = vals.size();
300
4
    for (size_t i = 0; i < sz; ++i) {
301
2
        const auto& str_sv = vals.get_data_at(i);
302
2
        const auto& search_string_sv = search_strings.get_data_at(i);
303
2
        result[i] = (str_sv.size >= search_string_sv.size) && str_sv.end_with(search_string_sv);
304
2
    }
305
2
    return Status::OK();
306
2
}
307
308
Status FunctionLikeBase::constant_equals_fn(LikeSearchState* state, const ColumnString& val,
309
                                            const StringRef& pattern,
310
0
                                            ColumnUInt8::Container& result) {
311
0
    auto sz = val.size();
312
0
    for (size_t i = 0; i < sz; i++) {
313
0
        result[i] = (val.get_data_at(i) == state->search_string_sv);
314
0
    }
315
0
    return Status::OK();
316
0
}
317
318
Status FunctionLikeBase::constant_equals_fn_scalar(LikeSearchState* state, const StringRef& val,
319
                                                   const StringRef& pattern,
320
0
                                                   unsigned char* result) {
321
0
    *result = (val == state->search_string_sv);
322
0
    return Status::OK();
323
0
}
324
325
Status FunctionLikeBase::vector_equals_fn(const ColumnString& vals,
326
                                          const ColumnString& search_strings,
327
2
                                          ColumnUInt8::Container& result) {
328
2
    DCHECK(vals.size() == search_strings.size());
329
2
    DCHECK(vals.size() == result.size());
330
2
    auto sz = vals.size();
331
4
    for (size_t i = 0; i < sz; ++i) {
332
2
        const auto& str_sv = vals.get_data_at(i);
333
2
        const auto& search_string_sv = search_strings.get_data_at(i);
334
2
        result[i] = str_sv == search_string_sv;
335
2
    }
336
2
    return Status::OK();
337
2
}
338
339
Status FunctionLikeBase::constant_substring_fn(LikeSearchState* state, const ColumnString& val,
340
                                               const StringRef& pattern,
341
0
                                               ColumnUInt8::Container& result) {
342
0
    auto sz = val.size();
343
0
    for (size_t i = 0; i < sz; i++) {
344
0
        if (state->search_string_sv.size == 0) {
345
0
            result[i] = true;
346
0
            continue;
347
0
        }
348
0
        result[i] = state->substring_pattern.search(val.get_data_at(i)) != -1;
349
0
    }
350
0
    return Status::OK();
351
0
}
352
353
Status FunctionLikeBase::constant_substring_fn_scalar(LikeSearchState* state, const StringRef& val,
354
                                                      const StringRef& pattern,
355
0
                                                      unsigned char* result) {
356
0
    if (state->search_string_sv.size == 0) {
357
0
        *result = true;
358
0
        return Status::OK();
359
0
    }
360
0
    *result = state->substring_pattern.search(val) != -1;
361
0
    return Status::OK();
362
0
}
363
364
Status FunctionLikeBase::vector_substring_fn(const ColumnString& vals,
365
                                             const ColumnString& search_strings,
366
0
                                             ColumnUInt8::Container& result) {
367
0
    DCHECK(vals.size() == search_strings.size());
368
0
    DCHECK(vals.size() == result.size());
369
0
    auto sz = vals.size();
370
0
    for (size_t i = 0; i < sz; ++i) {
371
0
        const auto& str_sv = vals.get_data_at(i);
372
0
        const auto& search_string_sv = search_strings.get_data_at(i);
373
0
        if (search_string_sv.size == 0) {
374
0
            result[i] = true;
375
0
            continue;
376
0
        }
377
0
        doris::StringSearch substring_search(&search_string_sv);
378
0
        result[i] = substring_search.search(str_sv) != -1;
379
0
    }
380
0
    return Status::OK();
381
0
}
382
383
Status FunctionLikeBase::constant_regex_fn_scalar(LikeSearchState* state, const StringRef& val,
384
5
                                                  const StringRef& pattern, unsigned char* result) {
385
5
    if (state->hs_database) { // use hyperscan
386
5
        auto ret = hs_scan(state->hs_database.get(), val.data, val.size, 0, state->hs_scratch.get(),
387
5
                           doris::vectorized::LikeSearchState::hs_match_handler, (void*)result);
388
5
        if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
389
0
            return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
390
0
        }
391
5
    } else { // fallback to re2
392
0
        *result = RE2::PartialMatch(re2::StringPiece(val.data, val.size), *state->regex);
393
0
    }
394
395
5
    return Status::OK();
396
5
}
397
398
Status FunctionLikeBase::regexp_fn_scalar(LikeSearchState* state, const StringRef& val,
399
30
                                          const StringRef& pattern, unsigned char* result) {
400
30
    RE2::Options opts;
401
30
    opts.set_never_nl(false);
402
30
    opts.set_dot_nl(true);
403
30
    re2::RE2 re(re2::StringPiece(pattern.data, pattern.size), opts);
404
30
    if (re.ok()) {
405
30
        *result = RE2::PartialMatch(re2::StringPiece(val.data, val.size), re);
406
30
    } else {
407
0
        return Status::RuntimeError("Invalid pattern: {}", pattern.debug_string());
408
0
    }
409
410
30
    return Status::OK();
411
30
}
412
413
Status FunctionLikeBase::constant_regex_fn(LikeSearchState* state, const ColumnString& val,
414
                                           const StringRef& pattern,
415
0
                                           ColumnUInt8::Container& result) {
416
0
    auto sz = val.size();
417
0
    if (state->hs_database) { // use hyperscan
418
0
        for (size_t i = 0; i < sz; i++) {
419
0
            const auto& str_ref = val.get_data_at(i);
420
0
            auto ret = hs_scan(state->hs_database.get(), str_ref.data, str_ref.size, 0,
421
0
                               state->hs_scratch.get(),
422
0
                               doris::vectorized::LikeSearchState::hs_match_handler,
423
0
                               (void*)(result.data() + i));
424
0
            if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
425
0
                return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
426
0
            }
427
0
        }
428
0
    } else { // fallback to re2
429
0
        for (size_t i = 0; i < sz; i++) {
430
0
            const auto& str_ref = val.get_data_at(i);
431
0
            *(result.data() + i) =
432
0
                    RE2::PartialMatch(re2::StringPiece(str_ref.data, str_ref.size), *state->regex);
433
0
        }
434
0
    }
435
436
0
    return Status::OK();
437
0
}
438
439
Status FunctionLikeBase::regexp_fn(LikeSearchState* state, const ColumnString& val,
440
0
                                   const StringRef& pattern, ColumnUInt8::Container& result) {
441
0
    std::string re_pattern(pattern.data, pattern.size);
442
443
0
    hs_database_t* database = nullptr;
444
0
    hs_scratch_t* scratch = nullptr;
445
0
    if (hs_prepare(nullptr, re_pattern.c_str(), &database, &scratch).ok()) { // use hyperscan
446
0
        auto sz = val.size();
447
0
        for (size_t i = 0; i < sz; i++) {
448
0
            const auto& str_ref = val.get_data_at(i);
449
0
            auto ret = hs_scan(database, str_ref.data, str_ref.size, 0, scratch,
450
0
                               doris::vectorized::LikeSearchState::hs_match_handler,
451
0
                               (void*)(result.data() + i));
452
0
            if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
453
0
                return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
454
0
            }
455
0
        }
456
457
0
        hs_free_scratch(scratch);
458
0
        hs_free_database(database);
459
0
    } else { // fallback to re2
460
0
        RE2::Options opts;
461
0
        opts.set_never_nl(false);
462
0
        opts.set_dot_nl(true);
463
0
        re2::RE2 re(re_pattern, opts);
464
0
        if (re.ok()) {
465
0
            auto sz = val.size();
466
0
            for (size_t i = 0; i < sz; i++) {
467
0
                const auto& str_ref = val.get_data_at(i);
468
0
                *(result.data() + i) =
469
0
                        RE2::PartialMatch(re2::StringPiece(str_ref.data, str_ref.size), re);
470
0
            }
471
0
        } else {
472
0
            return Status::RuntimeError("Invalid pattern: {}", pattern.debug_string());
473
0
        }
474
0
    }
475
476
0
    return Status::OK();
477
0
}
478
479
// hyperscan compile expression to database and allocate scratch space
480
Status FunctionLikeBase::hs_prepare(FunctionContext* context, const char* expression,
481
5
                                    hs_database_t** database, hs_scratch_t** scratch) {
482
5
    hs_compile_error_t* compile_err;
483
5
    auto res = hs_compile(expression, HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8,
484
5
                          HS_MODE_BLOCK, nullptr, database, &compile_err);
485
486
5
    if (res != HS_SUCCESS) {
487
0
        *database = nullptr;
488
0
        std::string error_message = compile_err->message;
489
0
        hs_free_compile_error(compile_err);
490
        // Do not call FunctionContext::set_error here, since we do not want to cancel the query here.
491
0
        return Status::RuntimeError<false>("hs_compile regex pattern error:" + error_message);
492
0
    }
493
5
    hs_free_compile_error(compile_err);
494
495
5
    if (hs_alloc_scratch(*database, scratch) != HS_SUCCESS) {
496
0
        hs_free_database(*database);
497
0
        *database = nullptr;
498
0
        *scratch = nullptr;
499
        // Do not call FunctionContext::set_error here, since we do not want to cancel the query here.
500
0
        return Status::RuntimeError<false>("hs_alloc_scratch allocate scratch space error");
501
0
    }
502
503
5
    return Status::OK();
504
5
}
505
506
Status FunctionLikeBase::execute_impl(FunctionContext* context, Block& block,
507
                                      const ColumnNumbers& arguments, uint32_t result,
508
16
                                      size_t input_rows_count) const {
509
16
    const auto values_col =
510
16
            block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
511
16
    const auto* values = check_and_get_column<ColumnString>(values_col.get());
512
513
16
    if (!values) {
514
0
        return Status::InternalError("Not supported input arguments types");
515
0
    }
516
    // result column
517
16
    auto res = ColumnUInt8::create();
518
16
    ColumnUInt8::Container& vec_res = res->get_data();
519
    // set default value to 0, and match functions only need to set 1/true
520
16
    vec_res.resize_fill(input_rows_count);
521
16
    auto* state = reinterpret_cast<LikeState*>(
522
16
            context->get_function_state(FunctionContext::THREAD_LOCAL));
523
    // for constant_substring_fn, use long run length search for performance
524
16
    if (constant_substring_fn ==
525
16
        *(state->function.target<doris::Status (*)(LikeSearchState* state, const ColumnString&,
526
16
                                                   const StringRef&, ColumnUInt8::Container&)>())) {
527
4
        RETURN_IF_ERROR(execute_substring(values->get_chars(), values->get_offsets(), vec_res,
528
4
                                          &state->search_state));
529
12
    } else {
530
12
        const auto pattern_col = block.get_by_position(arguments[1]).column;
531
12
        if (const auto* str_patterns = check_and_get_column<ColumnString>(pattern_col.get())) {
532
12
            RETURN_IF_ERROR(
533
12
                    vector_non_const(*values, *str_patterns, vec_res, state, input_rows_count));
534
12
        } else if (const auto* const_patterns =
535
0
                           check_and_get_column<ColumnConst>(pattern_col.get())) {
536
0
            const auto& pattern_val = const_patterns->get_data_at(0);
537
0
            RETURN_IF_ERROR(vector_const(*values, &pattern_val, vec_res, state->function,
538
0
                                         &state->search_state));
539
0
        } else {
540
0
            return Status::InternalError("Not supported input arguments types");
541
0
        }
542
12
    }
543
16
    block.replace_by_position(result, std::move(res));
544
16
    return Status::OK();
545
16
}
546
547
Status FunctionLikeBase::execute_substring(const ColumnString::Chars& values,
548
                                           const ColumnString::Offsets& value_offsets,
549
                                           ColumnUInt8::Container& result,
550
4
                                           LikeSearchState* search_state) const {
551
    // treat continuous multi string data as a long string data
552
4
    const UInt8* begin = values.data();
553
4
    const UInt8* end = begin + values.size();
554
4
    const UInt8* pos = begin;
555
556
    /// Current index in the array of strings.
557
4
    size_t i = 0;
558
4
    size_t needle_size = search_state->substring_pattern.get_pattern_length();
559
560
    /// We will search for the next occurrence in all strings at once.
561
7
    while (pos < end) {
562
        // search return matched substring start offset
563
4
        pos = (UInt8*)search_state->substring_pattern.search((char*)pos, end - pos);
564
4
        if (pos >= end) {
565
1
            break;
566
1
        }
567
568
        /// Determine which index it refers to.
569
        /// begin + value_offsets[i] is the start offset of string at i+1
570
3
        while (i < value_offsets.size() && begin + value_offsets[i] < pos) {
571
0
            ++i;
572
0
        }
573
574
        /// We check that the entry does not pass through the boundaries of strings.
575
3
        if (pos + needle_size <= begin + value_offsets[i]) {
576
3
            result[i] = 1;
577
3
        }
578
579
        // move to next string offset
580
3
        pos = begin + value_offsets[i];
581
3
        ++i;
582
3
    }
583
584
4
    return Status::OK();
585
4
}
586
587
Status FunctionLikeBase::vector_const(const ColumnString& values, const StringRef* pattern_val,
588
                                      ColumnUInt8::Container& result, const LikeFn& function,
589
0
                                      LikeSearchState* search_state) const {
590
0
    RETURN_IF_ERROR((function)(search_state, values,
591
0
                               *reinterpret_cast<const StringRef*>(pattern_val), result));
592
0
    return Status::OK();
593
0
}
594
595
template <bool LIKE_PATTERN>
596
12
VPatternSearchStateSPtr FunctionLikeBase::pattern_type_recognition(const ColumnString& patterns) {
597
12
    VPatternSearchStateSPtr allpass_state = std::make_shared<VectorAllpassSearchState>();
598
12
    VPatternSearchStateSPtr equal_state = std::make_shared<VectorEqualSearchState>();
599
12
    VPatternSearchStateSPtr substring_state = std::make_shared<VectorSubStringSearchState>();
600
12
    VPatternSearchStateSPtr starts_with_state = std::make_shared<VectorStartsWithSearchState>();
601
12
    VPatternSearchStateSPtr ends_with_state = std::make_shared<VectorEndsWithSearchState>();
602
12
    size_t size = patterns.size();
603
604
26
    for (size_t i = 0; i < size; ++i) {
605
15
        if (!allpass_state->_pattern_matched && !equal_state->_pattern_matched &&
606
15
            !substring_state->_pattern_matched && !starts_with_state->_pattern_matched &&
607
15
            !ends_with_state->_pattern_matched) {
608
1
            return nullptr;
609
1
        }
610
14
        std::string pattern_str = patterns.get_data_at(i).to_string();
611
14
        if (allpass_state->_pattern_matched) {
612
12
            if constexpr (LIKE_PATTERN) {
613
11
                allpass_state->like_pattern_match(pattern_str);
614
11
            } else {
615
11
                allpass_state->regexp_pattern_match(pattern_str);
616
11
            }
617
12
        }
618
14
        if (equal_state->_pattern_matched) {
619
12
            if constexpr (LIKE_PATTERN) {
620
11
                equal_state->like_pattern_match(pattern_str);
621
11
            } else {
622
11
                equal_state->regexp_pattern_match(pattern_str);
623
11
            }
624
12
        }
625
14
        if (substring_state->_pattern_matched) {
626
14
            if constexpr (LIKE_PATTERN) {
627
11
                substring_state->like_pattern_match(pattern_str);
628
11
            } else {
629
11
                substring_state->regexp_pattern_match(pattern_str);
630
11
            }
631
14
        }
632
14
        if (starts_with_state->_pattern_matched) {
633
12
            if constexpr (LIKE_PATTERN) {
634
11
                starts_with_state->like_pattern_match(pattern_str);
635
11
            } else {
636
11
                starts_with_state->regexp_pattern_match(pattern_str);
637
11
            }
638
12
        }
639
14
        if (ends_with_state->_pattern_matched) {
640
12
            if constexpr (LIKE_PATTERN) {
641
11
                ends_with_state->like_pattern_match(pattern_str);
642
11
            } else {
643
11
                ends_with_state->regexp_pattern_match(pattern_str);
644
11
            }
645
12
        }
646
14
    }
647
648
11
    if (allpass_state->_pattern_matched) {
649
0
        return allpass_state;
650
11
    } else if (equal_state->_pattern_matched) {
651
2
        return equal_state;
652
9
    } else if (substring_state->_pattern_matched) {
653
0
        return substring_state;
654
9
    } else if (starts_with_state->_pattern_matched) {
655
2
        return starts_with_state;
656
7
    } else if (ends_with_state->_pattern_matched) {
657
2
        return ends_with_state;
658
5
    } else {
659
5
        return nullptr;
660
5
    }
661
11
}
_ZN5doris10vectorized16FunctionLikeBase24pattern_type_recognitionILb1EEESt10shared_ptrINS0_24VectorPatternSearchStateEERKNS0_9ColumnStrIjEE
Line
Count
Source
596
1
VPatternSearchStateSPtr FunctionLikeBase::pattern_type_recognition(const ColumnString& patterns) {
597
1
    VPatternSearchStateSPtr allpass_state = std::make_shared<VectorAllpassSearchState>();
598
1
    VPatternSearchStateSPtr equal_state = std::make_shared<VectorEqualSearchState>();
599
1
    VPatternSearchStateSPtr substring_state = std::make_shared<VectorSubStringSearchState>();
600
1
    VPatternSearchStateSPtr starts_with_state = std::make_shared<VectorStartsWithSearchState>();
601
1
    VPatternSearchStateSPtr ends_with_state = std::make_shared<VectorEndsWithSearchState>();
602
1
    size_t size = patterns.size();
603
604
4
    for (size_t i = 0; i < size; ++i) {
605
4
        if (!allpass_state->_pattern_matched && !equal_state->_pattern_matched &&
606
4
            !substring_state->_pattern_matched && !starts_with_state->_pattern_matched &&
607
4
            !ends_with_state->_pattern_matched) {
608
1
            return nullptr;
609
1
        }
610
3
        std::string pattern_str = patterns.get_data_at(i).to_string();
611
3
        if (allpass_state->_pattern_matched) {
612
1
            if constexpr (LIKE_PATTERN) {
613
1
                allpass_state->like_pattern_match(pattern_str);
614
1
            } else {
615
1
                allpass_state->regexp_pattern_match(pattern_str);
616
1
            }
617
1
        }
618
3
        if (equal_state->_pattern_matched) {
619
1
            if constexpr (LIKE_PATTERN) {
620
1
                equal_state->like_pattern_match(pattern_str);
621
1
            } else {
622
1
                equal_state->regexp_pattern_match(pattern_str);
623
1
            }
624
1
        }
625
3
        if (substring_state->_pattern_matched) {
626
3
            if constexpr (LIKE_PATTERN) {
627
3
                substring_state->like_pattern_match(pattern_str);
628
3
            } else {
629
3
                substring_state->regexp_pattern_match(pattern_str);
630
3
            }
631
3
        }
632
3
        if (starts_with_state->_pattern_matched) {
633
1
            if constexpr (LIKE_PATTERN) {
634
1
                starts_with_state->like_pattern_match(pattern_str);
635
1
            } else {
636
1
                starts_with_state->regexp_pattern_match(pattern_str);
637
1
            }
638
1
        }
639
3
        if (ends_with_state->_pattern_matched) {
640
1
            if constexpr (LIKE_PATTERN) {
641
1
                ends_with_state->like_pattern_match(pattern_str);
642
1
            } else {
643
1
                ends_with_state->regexp_pattern_match(pattern_str);
644
1
            }
645
1
        }
646
3
    }
647
648
0
    if (allpass_state->_pattern_matched) {
649
0
        return allpass_state;
650
0
    } else if (equal_state->_pattern_matched) {
651
0
        return equal_state;
652
0
    } else if (substring_state->_pattern_matched) {
653
0
        return substring_state;
654
0
    } else if (starts_with_state->_pattern_matched) {
655
0
        return starts_with_state;
656
0
    } else if (ends_with_state->_pattern_matched) {
657
0
        return ends_with_state;
658
0
    } else {
659
0
        return nullptr;
660
0
    }
661
0
}
_ZN5doris10vectorized16FunctionLikeBase24pattern_type_recognitionILb0EEESt10shared_ptrINS0_24VectorPatternSearchStateEERKNS0_9ColumnStrIjEE
Line
Count
Source
596
11
VPatternSearchStateSPtr FunctionLikeBase::pattern_type_recognition(const ColumnString& patterns) {
597
11
    VPatternSearchStateSPtr allpass_state = std::make_shared<VectorAllpassSearchState>();
598
11
    VPatternSearchStateSPtr equal_state = std::make_shared<VectorEqualSearchState>();
599
11
    VPatternSearchStateSPtr substring_state = std::make_shared<VectorSubStringSearchState>();
600
11
    VPatternSearchStateSPtr starts_with_state = std::make_shared<VectorStartsWithSearchState>();
601
11
    VPatternSearchStateSPtr ends_with_state = std::make_shared<VectorEndsWithSearchState>();
602
11
    size_t size = patterns.size();
603
604
22
    for (size_t i = 0; i < size; ++i) {
605
11
        if (!allpass_state->_pattern_matched && !equal_state->_pattern_matched &&
606
11
            !substring_state->_pattern_matched && !starts_with_state->_pattern_matched &&
607
11
            !ends_with_state->_pattern_matched) {
608
0
            return nullptr;
609
0
        }
610
11
        std::string pattern_str = patterns.get_data_at(i).to_string();
611
11
        if (allpass_state->_pattern_matched) {
612
11
            if constexpr (LIKE_PATTERN) {
613
11
                allpass_state->like_pattern_match(pattern_str);
614
11
            } else {
615
11
                allpass_state->regexp_pattern_match(pattern_str);
616
11
            }
617
11
        }
618
11
        if (equal_state->_pattern_matched) {
619
11
            if constexpr (LIKE_PATTERN) {
620
11
                equal_state->like_pattern_match(pattern_str);
621
11
            } else {
622
11
                equal_state->regexp_pattern_match(pattern_str);
623
11
            }
624
11
        }
625
11
        if (substring_state->_pattern_matched) {
626
11
            if constexpr (LIKE_PATTERN) {
627
11
                substring_state->like_pattern_match(pattern_str);
628
11
            } else {
629
11
                substring_state->regexp_pattern_match(pattern_str);
630
11
            }
631
11
        }
632
11
        if (starts_with_state->_pattern_matched) {
633
11
            if constexpr (LIKE_PATTERN) {
634
11
                starts_with_state->like_pattern_match(pattern_str);
635
11
            } else {
636
11
                starts_with_state->regexp_pattern_match(pattern_str);
637
11
            }
638
11
        }
639
11
        if (ends_with_state->_pattern_matched) {
640
11
            if constexpr (LIKE_PATTERN) {
641
11
                ends_with_state->like_pattern_match(pattern_str);
642
11
            } else {
643
11
                ends_with_state->regexp_pattern_match(pattern_str);
644
11
            }
645
11
        }
646
11
    }
647
648
11
    if (allpass_state->_pattern_matched) {
649
0
        return allpass_state;
650
11
    } else if (equal_state->_pattern_matched) {
651
2
        return equal_state;
652
9
    } else if (substring_state->_pattern_matched) {
653
0
        return substring_state;
654
9
    } else if (starts_with_state->_pattern_matched) {
655
2
        return starts_with_state;
656
7
    } else if (ends_with_state->_pattern_matched) {
657
2
        return ends_with_state;
658
5
    } else {
659
5
        return nullptr;
660
5
    }
661
11
}
662
663
Status FunctionLikeBase::vector_non_const(const ColumnString& values, const ColumnString& patterns,
664
                                          ColumnUInt8::Container& result, LikeState* state,
665
12
                                          size_t input_rows_count) const {
666
12
    VPatternSearchStateSPtr vector_search_state;
667
12
    if (state->is_like_pattern) {
668
1
        vector_search_state = pattern_type_recognition<true>(patterns);
669
11
    } else {
670
11
        vector_search_state = pattern_type_recognition<false>(patterns);
671
11
    }
672
12
    if (vector_search_state == nullptr) {
673
        // pattern type recognition failed, use default case
674
41
        for (int i = 0; i < input_rows_count; ++i) {
675
35
            const auto pattern_val = patterns.get_data_at(i);
676
35
            const auto value_val = values.get_data_at(i);
677
35
            RETURN_IF_ERROR((state->scalar_function)(&state->search_state, value_val, pattern_val,
678
35
                                                     &result[i]));
679
35
        }
680
6
        return Status::OK();
681
6
    }
682
6
    const auto* search_strings =
683
6
            static_cast<const ColumnString*>(vector_search_state->_search_strings.get());
684
6
    return (vector_search_state->_vector_function)(values, *search_strings, result);
685
12
}
686
687
Status FunctionLike::like_fn(LikeSearchState* state, const ColumnString& val,
688
0
                             const StringRef& pattern, ColumnUInt8::Container& result) {
689
0
    std::string re_pattern;
690
0
    convert_like_pattern(state, std::string(pattern.data, pattern.size), &re_pattern);
691
0
    return regexp_fn(state, val, {re_pattern.c_str(), re_pattern.size()}, result);
692
0
}
693
694
Status FunctionLike::like_fn_scalar(LikeSearchState* state, const StringRef& val,
695
30
                                    const StringRef& pattern, unsigned char* result) {
696
30
    std::string re_pattern;
697
30
    convert_like_pattern(state, std::string(pattern.data, pattern.size), &re_pattern);
698
699
30
    return regexp_fn_scalar(state, StringRef(val.data, val.size),
700
30
                            {re_pattern.c_str(), re_pattern.size()}, result);
701
30
}
702
703
void FunctionLike::convert_like_pattern(LikeSearchState* state, const std::string& pattern,
704
30
                                        std::string* re_pattern) {
705
30
    re_pattern->clear();
706
707
30
    if (pattern.empty()) {
708
4
        re_pattern->append("^$");
709
4
        return;
710
4
    }
711
712
    // add ^ to pattern head to match line head
713
26
    if (!pattern.empty() && pattern[0] != '%') {
714
17
        re_pattern->append("^");
715
17
    }
716
717
26
    bool is_escaped = false;
718
    // expect % and _, all chars should keep it literal means.
719
107
    for (char i : pattern) {
720
107
        if (is_escaped) { // last is \, this should be escape
721
4
            if (i == '[' || i == ']' || i == '(' || i == ')' || i == '{' || i == '}' || i == '-' ||
722
4
                i == '*' || i == '+' || i == '\\' || i == '|' || i == '/' || i == ':' || i == '^' ||
723
4
                i == '.' || i == '$' || i == '?') {
724
0
                re_pattern->append(1, '\\');
725
4
            } else if (i != '%' && i != '_') {
726
4
                re_pattern->append(2, '\\');
727
4
            }
728
4
            re_pattern->append(1, i);
729
4
            is_escaped = false;
730
103
        } else {
731
103
            switch (i) {
732
22
            case '%':
733
22
                re_pattern->append(".*");
734
22
                break;
735
21
            case '_':
736
21
                re_pattern->append(".");
737
21
                break;
738
60
            default:
739
60
                is_escaped = i == state->escape_char;
740
60
                if (!is_escaped) {
741
                    // special for hyperscan: [, ], (, ), {, }, -, *, +, \, |, /, :, ^, ., $, ?
742
56
                    if (i == '[' || i == ']' || i == '(' || i == ')' || i == '{' || i == '}' ||
743
56
                        i == '-' || i == '*' || i == '+' || i == '\\' || i == '|' || i == '/' ||
744
56
                        i == ':' || i == '^' || i == '.' || i == '$' || i == '?') {
745
7
                        re_pattern->append(1, '\\');
746
7
                    }
747
56
                    re_pattern->append(1, i);
748
56
                }
749
60
                break;
750
103
            }
751
103
        }
752
107
    }
753
754
    // add $ to pattern tail to match line tail
755
26
    if (!pattern.empty() && re_pattern->back() != '*') {
756
13
        re_pattern->append("$");
757
13
    }
758
26
}
759
760
2
void FunctionLike::remove_escape_character(std::string* search_string) {
761
2
    std::string tmp_search_string;
762
2
    tmp_search_string.swap(*search_string);
763
2
    int len = tmp_search_string.length();
764
5
    for (int i = 0; i < len;) {
765
3
        if (tmp_search_string[i] == '\\' && i + 1 < len &&
766
3
            (tmp_search_string[i + 1] == '%' || tmp_search_string[i + 1] == '_' ||
767
0
             tmp_search_string[i + 1] == '\\')) {
768
0
            search_string->append(1, tmp_search_string[i + 1]);
769
0
            i += 2;
770
3
        } else {
771
3
            search_string->append(1, tmp_search_string[i]);
772
3
            i++;
773
3
        }
774
3
    }
775
2
}
776
777
0
bool re2_full_match(const std::string& str, const RE2& re, std::vector<std::string>& results) {
778
0
    if (!re.ok()) {
779
0
        return false;
780
0
    }
781
782
0
    std::vector<RE2::Arg> arguments;
783
0
    std::vector<RE2::Arg*> arguments_ptrs;
784
0
    std::size_t args_count = re.NumberOfCapturingGroups();
785
0
    arguments.resize(args_count);
786
0
    arguments_ptrs.resize(args_count);
787
0
    results.resize(args_count);
788
0
    for (std::size_t i = 0; i < args_count; ++i) {
789
0
        arguments[i] = &results[i];
790
0
        arguments_ptrs[i] = &arguments[i];
791
0
    }
792
793
0
    return RE2::FullMatchN(str, re, arguments_ptrs.data(), args_count);
794
0
}
795
796
0
void verbose_log_match(const std::string& str, const std::string& pattern_name, const RE2& re) {
797
0
    std::vector<std::string> results;
798
0
    VLOG_DEBUG << "arg str: " << str << ", size: " << str.size() << ", pattern " << pattern_name
799
0
               << ": " << re.pattern() << ", size: " << re.pattern().size();
800
0
    if (re2_full_match(str, re, results)) {
801
0
        for (int i = 0; i < results.size(); ++i) {
802
0
            VLOG_DEBUG << "match " << i << ": " << results[i] << ", size: " << results[i].size();
803
0
        }
804
0
    } else {
805
0
        VLOG_DEBUG << "no match";
806
0
    }
807
0
}
808
809
Status FunctionLike::construct_like_const_state(FunctionContext* context, const StringRef& pattern,
810
                                                std::shared_ptr<LikeState>& state,
811
0
                                                bool try_hyperscan) {
812
0
    std::string pattern_str = pattern.to_string();
813
0
    state->search_state.pattern_str = pattern_str;
814
0
    std::string search_string;
815
816
0
    if (!pattern_str.empty() && RE2::FullMatch(pattern_str, LIKE_ALLPASS_RE)) {
817
0
        state->search_state.set_search_string("");
818
0
        state->function = constant_allpass_fn;
819
0
        state->scalar_function = constant_allpass_fn_scalar;
820
0
    } else if (pattern_str.empty() || RE2::FullMatch(pattern_str, LIKE_EQUALS_RE, &search_string)) {
821
0
        if (VLOG_DEBUG_IS_ON) {
822
0
            verbose_log_match(pattern_str, "LIKE_EQUALS_RE", LIKE_EQUALS_RE);
823
0
            VLOG_DEBUG << "search_string : " << search_string << ", size: " << search_string.size();
824
0
        }
825
0
        remove_escape_character(&search_string);
826
0
        if (VLOG_DEBUG_IS_ON) {
827
0
            VLOG_DEBUG << "search_string escape removed: " << search_string
828
0
                       << ", size: " << search_string.size();
829
0
        }
830
0
        state->search_state.set_search_string(search_string);
831
0
        state->function = constant_equals_fn;
832
0
        state->scalar_function = constant_equals_fn_scalar;
833
0
    } else if (RE2::FullMatch(pattern_str, LIKE_STARTS_WITH_RE, &search_string)) {
834
0
        if (VLOG_DEBUG_IS_ON) {
835
0
            verbose_log_match(pattern_str, "LIKE_STARTS_WITH_RE", LIKE_STARTS_WITH_RE);
836
0
            VLOG_DEBUG << "search_string : " << search_string << ", size: " << search_string.size();
837
0
        }
838
0
        remove_escape_character(&search_string);
839
0
        if (VLOG_DEBUG_IS_ON) {
840
0
            VLOG_DEBUG << "search_string escape removed: " << search_string
841
0
                       << ", size: " << search_string.size();
842
0
        }
843
0
        state->search_state.set_search_string(search_string);
844
0
        state->function = constant_starts_with_fn;
845
0
        state->scalar_function = constant_starts_with_fn_scalar;
846
0
    } else if (RE2::FullMatch(pattern_str, LIKE_ENDS_WITH_RE, &search_string)) {
847
0
        if (VLOG_DEBUG_IS_ON) {
848
0
            verbose_log_match(pattern_str, "LIKE_ENDS_WITH_RE", LIKE_ENDS_WITH_RE);
849
0
            VLOG_DEBUG << "search_string : " << search_string << ", size: " << search_string.size();
850
0
        }
851
0
        remove_escape_character(&search_string);
852
0
        if (VLOG_DEBUG_IS_ON) {
853
0
            VLOG_DEBUG << "search_string escape removed: " << search_string
854
0
                       << ", size: " << search_string.size();
855
0
        }
856
0
        state->search_state.set_search_string(search_string);
857
0
        state->function = constant_ends_with_fn;
858
0
        state->scalar_function = constant_ends_with_fn_scalar;
859
0
    } else if (RE2::FullMatch(pattern_str, LIKE_SUBSTRING_RE, &search_string)) {
860
0
        if (VLOG_DEBUG_IS_ON) {
861
0
            verbose_log_match(pattern_str, "LIKE_SUBSTRING_RE", LIKE_SUBSTRING_RE);
862
0
            VLOG_DEBUG << "search_string : " << search_string << ", size: " << search_string.size();
863
0
        }
864
0
        remove_escape_character(&search_string);
865
0
        if (VLOG_DEBUG_IS_ON) {
866
0
            VLOG_DEBUG << "search_string escape removed: " << search_string
867
0
                       << ", size: " << search_string.size();
868
0
        }
869
0
        state->search_state.set_search_string(search_string);
870
0
        state->function = constant_substring_fn;
871
0
        state->scalar_function = constant_substring_fn_scalar;
872
0
    } else {
873
0
        std::string re_pattern;
874
0
        convert_like_pattern(&state->search_state, pattern_str, &re_pattern);
875
0
        if (VLOG_DEBUG_IS_ON) {
876
0
            VLOG_DEBUG << "hyperscan, pattern str: " << pattern_str
877
0
                       << ", size: " << pattern_str.size() << ", re pattern: " << re_pattern
878
0
                       << ", size: " << re_pattern.size();
879
0
        }
880
881
0
        hs_database_t* database = nullptr;
882
0
        hs_scratch_t* scratch = nullptr;
883
0
        if (try_hyperscan && hs_prepare(context, re_pattern.c_str(), &database, &scratch).ok()) {
884
            // use hyperscan
885
0
            state->search_state.hs_database.reset(database);
886
0
            state->search_state.hs_scratch.reset(scratch);
887
0
        } else {
888
            // fallback to re2
889
            // reset hs_database to nullptr to indicate not use hyperscan
890
0
            state->search_state.hs_database.reset();
891
0
            state->search_state.hs_scratch.reset();
892
893
0
            RE2::Options opts;
894
0
            opts.set_never_nl(false);
895
0
            opts.set_dot_nl(true);
896
0
            state->search_state.regex = std::make_unique<RE2>(re_pattern, opts);
897
0
            if (!state->search_state.regex->ok()) {
898
0
                return Status::InternalError("Invalid regex expression: {}(origin: {})", re_pattern,
899
0
                                             pattern_str);
900
0
            }
901
0
        }
902
903
0
        state->function = constant_regex_fn;
904
0
        state->scalar_function = constant_regex_fn_scalar;
905
0
    }
906
0
    return Status::OK();
907
0
}
908
909
2
Status FunctionLike::open(FunctionContext* context, FunctionContext::FunctionStateScope scope) {
910
2
    if (scope != FunctionContext::THREAD_LOCAL) {
911
1
        return Status::OK();
912
1
    }
913
1
    std::shared_ptr<LikeState> state = std::make_shared<LikeState>();
914
1
    state->is_like_pattern = true;
915
1
    state->function = like_fn;
916
1
    state->scalar_function = like_fn_scalar;
917
1
    if (context->is_col_constant(1)) {
918
0
        const auto pattern_col = context->get_constant_col(1)->column_ptr;
919
0
        const auto& pattern = pattern_col->get_data_at(0);
920
0
        RETURN_IF_ERROR(construct_like_const_state(context, pattern, state));
921
0
    }
922
1
    context->set_function_state(scope, state);
923
924
1
    return Status::OK();
925
1
}
926
927
Status FunctionRegexpLike::open(FunctionContext* context,
928
34
                                FunctionContext::FunctionStateScope scope) {
929
34
    if (scope != FunctionContext::THREAD_LOCAL) {
930
17
        return Status::OK();
931
17
    }
932
17
    std::shared_ptr<LikeState> state = std::make_shared<LikeState>();
933
17
    context->set_function_state(scope, state);
934
17
    state->is_like_pattern = false;
935
17
    state->function = regexp_fn;
936
17
    state->scalar_function = regexp_fn_scalar;
937
17
    if (context->is_col_constant(1)) {
938
17
        const auto pattern_col = context->get_constant_col(1)->column_ptr;
939
17
        const auto& pattern = pattern_col->get_data_at(0);
940
941
17
        std::string pattern_str = pattern.to_string();
942
17
        std::string search_string;
943
17
        if (RE2::FullMatch(pattern_str, ALLPASS_RE)) {
944
0
            state->search_state.set_search_string("");
945
0
            state->function = constant_allpass_fn;
946
0
            state->scalar_function = constant_allpass_fn_scalar;
947
17
        } else if (RE2::FullMatch(pattern_str, EQUALS_RE, &search_string)) {
948
2
            state->search_state.set_search_string(search_string);
949
2
            state->function = constant_equals_fn;
950
2
            state->scalar_function = constant_equals_fn_scalar;
951
15
        } else if (RE2::FullMatch(pattern_str, STARTS_WITH_RE, &search_string)) {
952
2
            state->search_state.set_search_string(search_string);
953
2
            state->function = constant_starts_with_fn;
954
2
            state->scalar_function = constant_starts_with_fn_scalar;
955
13
        } else if (RE2::FullMatch(pattern_str, ENDS_WITH_RE, &search_string)) {
956
2
            state->search_state.set_search_string(search_string);
957
2
            state->function = constant_ends_with_fn;
958
2
            state->scalar_function = constant_ends_with_fn_scalar;
959
11
        } else if (RE2::FullMatch(pattern_str, SUBSTRING_RE, &search_string)) {
960
6
            state->search_state.set_search_string(search_string);
961
6
            state->function = constant_substring_fn;
962
6
            state->scalar_function = constant_substring_fn_scalar;
963
6
        } else {
964
5
            hs_database_t* database = nullptr;
965
5
            hs_scratch_t* scratch = nullptr;
966
5
            if (hs_prepare(context, pattern_str.c_str(), &database, &scratch).ok()) {
967
                // use hyperscan
968
5
                state->search_state.hs_database.reset(database);
969
5
                state->search_state.hs_scratch.reset(scratch);
970
5
            } else {
971
                // fallback to re2
972
                // reset hs_database to nullptr to indicate not use hyperscan
973
0
                state->search_state.hs_database.reset();
974
0
                state->search_state.hs_scratch.reset();
975
0
                RE2::Options opts;
976
0
                opts.set_never_nl(false);
977
0
                opts.set_dot_nl(true);
978
0
                state->search_state.regex = std::make_unique<RE2>(pattern_str, opts);
979
0
                if (!state->search_state.regex->ok()) {
980
0
                    return Status::InternalError("Invalid regex expression: {}", pattern_str);
981
0
                }
982
0
            }
983
5
            state->function = constant_regex_fn;
984
5
            state->scalar_function = constant_regex_fn_scalar;
985
5
        }
986
17
    }
987
17
    return Status::OK();
988
17
}
989
990
1
void register_function_like(SimpleFunctionFactory& factory) {
991
1
    factory.register_function<FunctionLike>();
992
1
}
993
994
1
void register_function_regexp(SimpleFunctionFactory& factory) {
995
1
    factory.register_function<FunctionRegexpLike>();
996
1
    factory.register_alias(FunctionRegexpLike::name, FunctionRegexpLike::alias);
997
1
}
998
999
} // namespace doris::vectorized