Coverage Report

Created: 2024-11-21 21:13

/root/doris/be/src/vec/functions/like.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <hs/hs_common.h>
21
#include <hs/hs_runtime.h>
22
#include <re2/re2.h>
23
#include <stddef.h>
24
#include <stdint.h>
25
26
#include <algorithm>
27
#include <boost/iterator/iterator_facade.hpp>
28
#include <functional>
29
#include <memory>
30
#include <string>
31
32
#include "common/status.h"
33
#include "runtime/define_primitive_type.h"
34
#include "runtime/string_search.hpp"
35
#include "udf/udf.h"
36
#include "vec/aggregate_functions/aggregate_function.h"
37
#include "vec/columns/column_string.h"
38
#include "vec/columns/columns_number.h"
39
#include "vec/columns/predicate_column.h"
40
#include "vec/common/string_ref.h"
41
#include "vec/core/column_numbers.h"
42
#include "vec/core/types.h"
43
#include "vec/data_types/data_type_number.h"
44
#include "vec/functions/function.h"
45
46
namespace doris {
47
namespace vectorized {
48
class Block;
49
} // namespace vectorized
50
} // namespace doris
51
52
namespace doris::vectorized {
53
54
// TODO: replace with std::string_view when `LikeSearchState.substring_pattern` can
55
// construct from std::string_view.
56
struct LikeSearchState {
57
    char escape_char;
58
59
    /// Holds the string the StringRef points to and is set any time StringRef is
60
    /// used.
61
    std::string search_string;
62
63
    std::string pattern_str;
64
65
    /// Used for LIKE predicates if the pattern is a constant argument, and is either a
66
    /// constant string or has a constant string at the beginning or end of the pattern.
67
    /// This will be set in order to check for that pattern in the corresponding part of
68
    /// the string.
69
    StringRef search_string_sv;
70
71
    /// Used for LIKE predicates if the pattern is a constant argument and has a constant
72
    /// string in the middle of it. This will be use in order to check for the substring
73
    /// in the value.
74
    doris::StringSearch substring_pattern;
75
76
    /// Used for RLIKE and REGEXP predicates if the pattern is a constant argument.
77
    std::unique_ptr<re2::RE2> regex;
78
79
    template <typename Deleter, Deleter deleter>
80
    struct HyperscanDeleter {
81
        template <typename T>
82
10
        void operator()(T* ptr) const {
83
10
            deleter(ptr);
84
10
        }
_ZNK5doris10vectorized15LikeSearchState16HyperscanDeleterIPFiP11hs_databaseEXadL_Z16hs_free_databaseEEEclIS3_EEvPT_
Line
Count
Source
82
5
        void operator()(T* ptr) const {
83
5
            deleter(ptr);
84
5
        }
_ZNK5doris10vectorized15LikeSearchState16HyperscanDeleterIPFiP10hs_scratchEXadL_Z15hs_free_scratchEEEclIS3_EEvPT_
Line
Count
Source
82
5
        void operator()(T* ptr) const {
83
5
            deleter(ptr);
84
5
        }
85
    };
86
87
    // hyperscan compiled pattern database and scratch space, reused for performance
88
    std::unique_ptr<hs_database_t, HyperscanDeleter<decltype(&hs_free_database), &hs_free_database>>
89
            hs_database;
90
    std::unique_ptr<hs_scratch_t, HyperscanDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>
91
            hs_scratch;
92
93
    // hyperscan match callback
94
    static int hs_match_handler(unsigned int /* from */,       // NOLINT
95
                                unsigned long long /* from */, // NOLINT
96
                                unsigned long long /* to */,   // NOLINT
97
3
                                unsigned int /* flags */, void* ctx) {
98
        // set result to 1 for matched row
99
3
        *((unsigned char*)ctx) = 1;
100
        /// return non-zero to indicate hyperscan stop after first matched
101
3
        return 1;
102
3
    }
103
104
18
    LikeSearchState() : escape_char('\\') {}
105
106
    Status clone(LikeSearchState& cloned);
107
108
12
    void set_search_string(const std::string& search_string_arg) {
109
12
        search_string = search_string_arg;
110
12
        search_string_sv = StringRef(search_string);
111
12
        substring_pattern.set_pattern(&search_string_sv);
112
12
    }
113
};
114
115
using LikeFn = std::function<doris::Status(LikeSearchState*, const ColumnString&, const StringRef&,
116
                                           ColumnUInt8::Container&)>;
117
118
using ScalarLikeFn = std::function<doris::Status(LikeSearchState*, const StringRef&,
119
                                                 const StringRef&, unsigned char*)>;
120
121
using VectorLikeFn = std::function<doris::Status(const ColumnString&, const ColumnString&,
122
                                                 ColumnUInt8::Container&)>;
123
124
struct LikeState {
125
    bool is_like_pattern;
126
    LikeSearchState search_state;
127
    LikeFn function;
128
    ScalarLikeFn scalar_function;
129
};
130
131
struct VectorPatternSearchState {
132
    MutableColumnPtr _search_strings;
133
    std::string _search_string;
134
    VectorLikeFn _vector_function;
135
    bool _pattern_matched;
136
137
    VectorPatternSearchState(VectorLikeFn vector_function)
138
            : _search_strings(ColumnString::create()),
139
              _vector_function(vector_function),
140
60
              _pattern_matched(true) {}
141
142
60
    virtual ~VectorPatternSearchState() = default;
143
144
    virtual void like_pattern_match(const std::string& pattern_str) = 0;
145
146
    virtual void regexp_pattern_match(const std::string& pattern_str) = 0;
147
};
148
149
using VPatternSearchStateSPtr = std::shared_ptr<VectorPatternSearchState>;
150
151
class FunctionLikeBase : public IFunction {
152
public:
153
18
    size_t get_number_of_arguments() const override { return 2; }
154
155
18
    DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const override {
156
18
        return std::make_shared<DataTypeUInt8>();
157
18
    }
158
159
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
160
                        uint32_t result, size_t /*input_rows_count*/) const override;
161
162
    Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) override;
163
164
    friend struct VectorAllpassSearchState;
165
    friend struct VectorEqualSearchState;
166
    friend struct VectorSubStringSearchState;
167
    friend struct VectorStartsWithSearchState;
168
    friend struct VectorEndsWithSearchState;
169
170
protected:
171
    Status vector_const(const ColumnString& values, const StringRef* pattern_val,
172
                        ColumnUInt8::Container& result, const LikeFn& function,
173
                        LikeSearchState* search_state) const;
174
175
    Status vector_non_const(const ColumnString& values, const ColumnString& patterns,
176
                            ColumnUInt8::Container& result, LikeState* state,
177
                            size_t input_rows_count) const;
178
179
    Status execute_substring(const ColumnString::Chars& values,
180
                             const ColumnString::Offsets& value_offsets,
181
                             ColumnUInt8::Container& result, LikeSearchState* search_state) const;
182
183
    template <bool LIKE_PATTERN>
184
    static VPatternSearchStateSPtr pattern_type_recognition(const ColumnString& patterns);
185
186
    static Status constant_allpass_fn(LikeSearchState* state, const ColumnString& val,
187
                                      const StringRef& pattern, ColumnUInt8::Container& result);
188
189
    static Status constant_allpass_fn_scalar(LikeSearchState* state, const StringRef& val,
190
                                             const StringRef& pattern, unsigned char* result);
191
192
    static Status vector_allpass_fn(const ColumnString& vals, const ColumnString& search_strings,
193
                                    ColumnUInt8::Container& result);
194
195
    static Status constant_starts_with_fn(LikeSearchState* state, const ColumnString& val,
196
                                          const StringRef& pattern, ColumnUInt8::Container& result);
197
198
    static Status constant_starts_with_fn_scalar(LikeSearchState* state, const StringRef& val,
199
                                                 const StringRef& pattern, unsigned char* result);
200
201
    static Status vector_starts_with_fn(const ColumnString& vals,
202
                                        const ColumnString& search_strings,
203
                                        ColumnUInt8::Container& result);
204
205
    static Status constant_ends_with_fn(LikeSearchState* state, const ColumnString& val,
206
                                        const StringRef& pattern, ColumnUInt8::Container& result);
207
208
    static Status constant_ends_with_fn_scalar(LikeSearchState* state, const StringRef& val,
209
                                               const StringRef& pattern, unsigned char* result);
210
211
    static Status vector_ends_with_fn(const ColumnString& vals, const ColumnString& search_strings,
212
                                      ColumnUInt8::Container& result);
213
214
    static Status constant_equals_fn(LikeSearchState* state, const ColumnString& val,
215
                                     const StringRef& pattern, ColumnUInt8::Container& result);
216
217
    static Status constant_equals_fn_scalar(LikeSearchState* state, const StringRef& val,
218
                                            const StringRef& pattern, unsigned char* result);
219
220
    static Status vector_equals_fn(const ColumnString& vals, const ColumnString& search_strings,
221
                                   ColumnUInt8::Container& result);
222
223
    static Status constant_substring_fn(LikeSearchState* state, const ColumnString& val,
224
                                        const StringRef& pattern, ColumnUInt8::Container& result);
225
226
    static Status constant_substring_fn_scalar(LikeSearchState* state, const StringRef& val,
227
                                               const StringRef& pattern, unsigned char* result);
228
229
    static Status vector_substring_fn(const ColumnString& vals, const ColumnString& search_strings,
230
                                      ColumnUInt8::Container& result);
231
232
    static Status constant_regex_fn(LikeSearchState* state, const ColumnString& val,
233
                                    const StringRef& pattern, ColumnUInt8::Container& result);
234
235
    static Status constant_regex_fn_scalar(LikeSearchState* state, const StringRef& val,
236
                                           const StringRef& pattern, unsigned char* result);
237
238
    static Status regexp_fn(LikeSearchState* state, const ColumnString& val,
239
                            const StringRef& pattern, ColumnUInt8::Container& result);
240
241
    static Status regexp_fn_scalar(LikeSearchState* state, const StringRef& val,
242
                                   const StringRef& pattern, unsigned char* result);
243
244
    // hyperscan compile expression to database and allocate scratch space
245
    static Status hs_prepare(FunctionContext* context, const char* expression,
246
                             hs_database_t** database, hs_scratch_t** scratch);
247
};
248
249
class FunctionLike : public FunctionLikeBase {
250
public:
251
    static constexpr auto name = "like";
252
253
2
    static FunctionPtr create() { return std::make_shared<FunctionLike>(); }
254
255
0
    String get_name() const override { return name; }
256
257
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override;
258
259
    static Status construct_like_const_state(FunctionContext* ctx, const StringRef& pattern,
260
                                             std::shared_ptr<LikeState>& state,
261
                                             bool try_hyperscan = true);
262
263
    friend struct LikeSearchState;
264
    friend struct VectorAllpassSearchState;
265
    friend struct VectorEqualSearchState;
266
    friend struct VectorSubStringSearchState;
267
    friend struct VectorStartsWithSearchState;
268
    friend struct VectorEndsWithSearchState;
269
270
private:
271
    static Status like_fn(LikeSearchState* state, const ColumnString& val, const StringRef& pattern,
272
                          ColumnUInt8::Container& result);
273
274
    static Status like_fn_scalar(LikeSearchState* state, const StringRef& val,
275
                                 const StringRef& pattern, unsigned char* result);
276
277
    static void convert_like_pattern(LikeSearchState* state, const std::string& pattern,
278
                                     std::string* re_pattern);
279
280
    static void remove_escape_character(std::string* search_string);
281
};
282
283
class FunctionRegexp : public FunctionLikeBase {
284
public:
285
    static constexpr auto name = "regexp";
286
    static constexpr auto alias = "rlike";
287
288
18
    static FunctionPtr create() { return std::make_shared<FunctionRegexp>(); }
289
290
0
    String get_name() const override { return name; }
291
292
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override;
293
};
294
295
} // namespace doris::vectorized