Coverage Report

Created: 2025-11-20 13:26

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/vec/functions/like.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <hs/hs_common.h>
21
#include <hs/hs_runtime.h>
22
#include <re2/re2.h>
23
#include <stddef.h>
24
#include <stdint.h>
25
26
#include <algorithm>
27
#include <boost/iterator/iterator_facade.hpp>
28
#include <boost/regex.hpp>
29
#include <functional>
30
#include <memory>
31
#include <string>
32
33
#include "common/status.h"
34
#include "runtime/define_primitive_type.h"
35
#include "runtime/string_search.hpp"
36
#include "udf/udf.h"
37
#include "vec/aggregate_functions/aggregate_function.h"
38
#include "vec/columns/column_string.h"
39
#include "vec/columns/predicate_column.h"
40
#include "vec/common/string_ref.h"
41
#include "vec/core/column_numbers.h"
42
#include "vec/core/types.h"
43
#include "vec/data_types/data_type_number.h"
44
#include "vec/functions/function.h"
45
46
namespace doris {
47
namespace vectorized {
48
class Block;
49
} // namespace vectorized
50
} // namespace doris
51
52
namespace doris::vectorized {
53
54
8
inline std::string replace_pattern_by_escape(const StringRef& pattern, char escape_char) {
55
8
    std::string result;
56
8
    result.reserve(pattern.size);
57
59
    for (size_t i = 0; i < pattern.size; ++i) {
58
51
        if (i + 1 < pattern.size && pattern.data[i] == escape_char &&
59
51
            (pattern.data[i + 1] == escape_char || pattern.data[i + 1] == '%' ||
60
13
             pattern.data[i + 1] == '_')) {
61
            // "^^" -> "^"
62
            // "^%" -> "\%"
63
            // "^_" -> "\_"
64
10
            if ((pattern.data[i + 1] == '%' || pattern.data[i + 1] == '_')) {
65
4
                result.push_back('\\');
66
4
            }
67
10
            result.push_back(pattern.data[i + 1]);
68
10
            ++i; // skip next char
69
41
        } else if (pattern.data[i] == '\\') {
70
            // "\" -> "\\"
71
1
            result.append("\\\\");
72
40
        } else {
73
40
            result.push_back(pattern.data[i]);
74
40
        }
75
51
    }
76
8
    return result;
77
8
}
78
79
// TODO: replace with std::string_view when `LikeSearchState.substring_pattern` can
80
// construct from std::string_view.
81
struct LikeSearchState {
82
    static constexpr char escape_char = '\\';
83
84
    /// Holds the string the StringRef points to and is set any time StringRef is
85
    /// used.
86
    std::string search_string;
87
88
    std::string pattern_str;
89
90
    /// Used for LIKE predicates if the pattern is a constant argument, and is either a
91
    /// constant string or has a constant string at the beginning or end of the pattern.
92
    /// This will be set in order to check for that pattern in the corresponding part of
93
    /// the string.
94
    StringRef search_string_sv;
95
96
    /// Used for LIKE predicates if the pattern is a constant argument and has a constant
97
    /// string in the middle of it. This will be use in order to check for the substring
98
    /// in the value.
99
    doris::StringSearch substring_pattern;
100
101
    /// Used for RLIKE and REGEXP predicates if the pattern is a constant argument.
102
    std::unique_ptr<re2::RE2> regex;
103
104
    /// Used for REGEXP predicates when RE2 doesn't support the pattern (e.g., zero-width assertions like `?=`, `?!`, `?<=`, `?<!`)
105
    std::unique_ptr<boost::regex> boost_regex;
106
107
    template <typename Deleter, Deleter deleter>
108
    struct HyperscanDeleter {
109
        template <typename T>
110
264
        void operator()(T* ptr) const {
111
264
            deleter(ptr);
112
264
        }
_ZNK5doris10vectorized15LikeSearchState16HyperscanDeleterIPFiP10hs_scratchEXadL_Z15hs_free_scratchEEEclIS3_EEvPT_
Line
Count
Source
110
132
        void operator()(T* ptr) const {
111
132
            deleter(ptr);
112
132
        }
_ZNK5doris10vectorized15LikeSearchState16HyperscanDeleterIPFiP11hs_databaseEXadL_Z16hs_free_databaseEEEclIS3_EEvPT_
Line
Count
Source
110
132
        void operator()(T* ptr) const {
111
132
            deleter(ptr);
112
132
        }
113
    };
114
115
    // hyperscan compiled pattern database and scratch space, reused for performance
116
    std::unique_ptr<hs_database_t, HyperscanDeleter<decltype(&hs_free_database), &hs_free_database>>
117
            hs_database;
118
    std::unique_ptr<hs_scratch_t, HyperscanDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>
119
            hs_scratch;
120
121
    // hyperscan match callback
122
    static int hs_match_handler(unsigned int /* from */,       // NOLINT
123
                                unsigned long long /* from */, // NOLINT
124
                                unsigned long long /* to */,   // NOLINT
125
71
                                unsigned int /* flags */, void* ctx) {
126
        // set result to 1 for matched row
127
71
        *((unsigned char*)ctx) = 1;
128
        /// return non-zero to indicate hyperscan stop after first matched
129
71
        return 1;
130
71
    }
131
132
597
    LikeSearchState() = default;
133
134
    Status clone(LikeSearchState& cloned);
135
136
276
    void set_search_string(const std::string& search_string_arg) {
137
276
        search_string = search_string_arg;
138
276
        search_string_sv = StringRef(search_string);
139
276
        substring_pattern.set_pattern(&search_string_sv);
140
276
    }
141
};
142
143
using LikeFn = std::function<doris::Status(const LikeSearchState*, const ColumnString&,
144
                                           const StringRef&, ColumnUInt8::Container&)>;
145
146
using ScalarLikeFn = std::function<doris::Status(const LikeSearchState*, const StringRef&,
147
                                                 const StringRef&, unsigned char*)>;
148
149
using VectorLikeFn = std::function<doris::Status(const ColumnString&, const ColumnString&,
150
                                                 ColumnUInt8::Container&)>;
151
152
struct LikeState {
153
    bool is_like_pattern;
154
    bool has_custom_escape = false;
155
    char escape_char = {};
156
    LikeSearchState search_state;
157
    LikeFn function;
158
    ScalarLikeFn scalar_function;
159
};
160
161
struct VectorPatternSearchState {
162
    MutableColumnPtr _search_strings;
163
    std::string _search_string;
164
    VectorLikeFn _vector_function;
165
    bool _pattern_matched;
166
167
    VectorPatternSearchState(VectorLikeFn vector_function)
168
1.53k
            : _search_strings(ColumnString::create()),
169
1.53k
              _vector_function(vector_function),
170
1.53k
              _pattern_matched(true) {}
171
172
1.53k
    virtual ~VectorPatternSearchState() = default;
173
174
    virtual void like_pattern_match(const std::string& pattern_str) = 0;
175
176
    virtual void regexp_pattern_match(const std::string& pattern_str) = 0;
177
};
178
179
using VPatternSearchStateSPtr = std::shared_ptr<VectorPatternSearchState>;
180
181
class FunctionLikeBase : public IFunction {
182
public:
183
0
    size_t get_number_of_arguments() const override { return 0; }
184
601
    bool is_variadic() const override { return true; }
185
186
599
    DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const override {
187
599
        return std::make_shared<DataTypeUInt8>();
188
599
    }
189
190
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
191
                        uint32_t result, size_t /*input_rows_count*/) const override;
192
193
    friend struct VectorAllpassSearchState;
194
    friend struct VectorEqualSearchState;
195
    friend struct VectorSubStringSearchState;
196
    friend struct VectorStartsWithSearchState;
197
    friend struct VectorEndsWithSearchState;
198
199
protected:
200
    Status vector_const(const ColumnString& values, const StringRef* pattern_val,
201
                        ColumnUInt8::Container& result, const LikeFn& function,
202
                        LikeSearchState* search_state) const;
203
204
    Status vector_non_const(const ColumnString& values, const ColumnString& patterns,
205
                            ColumnUInt8::Container& result, LikeState* state,
206
                            size_t input_rows_count) const;
207
208
    Status execute_substring(const ColumnString::Chars& values,
209
                             const ColumnString::Offsets& value_offsets,
210
                             ColumnUInt8::Container& result, LikeSearchState* search_state) const;
211
212
    template <bool LIKE_PATTERN>
213
    static VPatternSearchStateSPtr pattern_type_recognition(const ColumnString& patterns);
214
215
    static Status constant_allpass_fn(const LikeSearchState* state, const ColumnString& val,
216
                                      const StringRef& pattern, ColumnUInt8::Container& result);
217
218
    static Status constant_allpass_fn_scalar(const LikeSearchState* state, const StringRef& val,
219
                                             const StringRef& pattern, unsigned char* result);
220
221
    static Status vector_allpass_fn(const ColumnString& vals, const ColumnString& search_strings,
222
                                    ColumnUInt8::Container& result);
223
224
    static Status constant_starts_with_fn(const LikeSearchState* state, const ColumnString& val,
225
                                          const StringRef& pattern, ColumnUInt8::Container& result);
226
227
    static Status constant_starts_with_fn_scalar(const LikeSearchState* state, const StringRef& val,
228
                                                 const StringRef& pattern, unsigned char* result);
229
230
    static Status vector_starts_with_fn(const ColumnString& vals,
231
                                        const ColumnString& search_strings,
232
                                        ColumnUInt8::Container& result);
233
234
    static Status constant_ends_with_fn(const LikeSearchState* state, const ColumnString& val,
235
                                        const StringRef& pattern, ColumnUInt8::Container& result);
236
237
    static Status constant_ends_with_fn_scalar(const LikeSearchState* state, const StringRef& val,
238
                                               const StringRef& pattern, unsigned char* result);
239
240
    static Status vector_ends_with_fn(const ColumnString& vals, const ColumnString& search_strings,
241
                                      ColumnUInt8::Container& result);
242
243
    static Status constant_equals_fn(const LikeSearchState* state, const ColumnString& val,
244
                                     const StringRef& pattern, ColumnUInt8::Container& result);
245
246
    static Status constant_equals_fn_scalar(const LikeSearchState* state, const StringRef& val,
247
                                            const StringRef& pattern, unsigned char* result);
248
249
    static Status vector_equals_fn(const ColumnString& vals, const ColumnString& search_strings,
250
                                   ColumnUInt8::Container& result);
251
252
    static Status constant_substring_fn(const LikeSearchState* state, const ColumnString& val,
253
                                        const StringRef& pattern, ColumnUInt8::Container& result);
254
255
    static Status constant_substring_fn_scalar(const LikeSearchState* state, const StringRef& val,
256
                                               const StringRef& pattern, unsigned char* result);
257
258
    static Status vector_substring_fn(const ColumnString& vals, const ColumnString& search_strings,
259
                                      ColumnUInt8::Container& result);
260
261
    static Status constant_regex_fn(const LikeSearchState* state, const ColumnString& val,
262
                                    const StringRef& pattern, ColumnUInt8::Container& result);
263
264
    static Status constant_regex_fn_scalar(const LikeSearchState* state, const StringRef& val,
265
                                           const StringRef& pattern, unsigned char* result);
266
267
    static Status regexp_fn(const LikeSearchState* state, const ColumnString& val,
268
                            const StringRef& pattern, ColumnUInt8::Container& result);
269
270
    static Status regexp_fn_scalar(const LikeSearchState* state, const StringRef& val,
271
                                   const StringRef& pattern, unsigned char* result);
272
273
    // hyperscan compile expression to database and allocate scratch space
274
    static Status hs_prepare(FunctionContext* context, const char* expression,
275
                             hs_database_t** database, hs_scratch_t** scratch);
276
};
277
278
class FunctionLike : public FunctionLikeBase {
279
public:
280
    static constexpr auto name = "like";
281
282
540
    static FunctionPtr create() { return std::make_shared<FunctionLike>(); }
283
284
0
    String get_name() const override { return name; }
285
286
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override;
287
288
    static Status construct_like_const_state(FunctionContext* ctx, const StringRef& pattern,
289
                                             std::shared_ptr<LikeState>& state,
290
                                             bool try_hyperscan = true);
291
292
    friend struct LikeSearchState;
293
    friend struct VectorAllpassSearchState;
294
    friend struct VectorEqualSearchState;
295
    friend struct VectorSubStringSearchState;
296
    friend struct VectorStartsWithSearchState;
297
    friend struct VectorEndsWithSearchState;
298
299
private:
300
    static Status like_fn(const LikeSearchState* state, const ColumnString& val,
301
                          const StringRef& pattern, ColumnUInt8::Container& result);
302
303
    static Status like_fn_scalar(const LikeSearchState* state, const StringRef& val,
304
                                 const StringRef& pattern, unsigned char* result);
305
306
    static void convert_like_pattern(const LikeSearchState* state, const std::string& pattern,
307
                                     std::string* re_pattern);
308
309
    static void remove_escape_character(std::string* search_string);
310
};
311
312
class FunctionRegexpLike : public FunctionLikeBase {
313
public:
314
    static constexpr auto name = "regexp";
315
    static constexpr auto alias = "rlike";
316
317
63
    static FunctionPtr create() { return std::make_shared<FunctionRegexpLike>(); }
318
319
0
    String get_name() const override { return name; }
320
321
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override;
322
};
323
324
} // namespace doris::vectorized