Coverage Report

Created: 2025-12-31 18:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/vec/functions/match.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <stddef.h>
21
22
#include <algorithm>
23
#include <boost/iterator/iterator_facade.hpp>
24
#include <memory>
25
#include <ostream>
26
#include <string>
27
#include <utility>
28
29
#include "common/config.h"
30
#include "common/consts.h"
31
#include "common/logging.h"
32
#include "common/status.h"
33
#include "olap/inverted_index_parser.h"
34
#include "olap/rowset/segment_v2/inverted_index/query/query_info.h"
35
#include "olap/rowset/segment_v2/inverted_index_reader.h"
36
#include "vec/aggregate_functions/aggregate_function.h"
37
#include "vec/columns/column.h"
38
#include "vec/columns/column_array.h"
39
#include "vec/core/block.h"
40
#include "vec/core/column_numbers.h"
41
#include "vec/core/column_with_type_and_name.h"
42
#include "vec/core/types.h"
43
#include "vec/data_types/data_type_number.h"
44
#include "vec/exprs/vmatch_predicate.h"
45
#include "vec/functions/function.h"
46
#include "vec/functions/simple_function_factory.h"
47
48
namespace doris {
49
class FunctionContext;
50
} // namespace doris
51
52
namespace doris::vectorized {
53
54
using namespace segment_v2;
55
56
const std::string MATCH_ANY_FUNCTION = "match_any";
57
const std::string MATCH_ALL_FUNCTION = "match_all";
58
const std::string MATCH_PHRASE_FUNCTION = "match_phrase";
59
const std::string MATCH_PHRASE_PREFIX_FUNCTION = "match_phrase_prefix";
60
const std::string MATCH_PHRASE_REGEXP_FUNCTION = "match_regexp";
61
const std::string MATCH_PHRASE_EDGE_FUNCTION = "match_phrase_edge";
62
63
class FunctionMatchBase : public IFunction {
64
public:
65
0
    size_t get_number_of_arguments() const override { return 2; }
66
67
0
    String get_name() const override { return "match"; }
68
69
    /// Get result types by argument types. If the function does not apply to these arguments, throw an exception.
70
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
71
0
        return std::make_shared<DataTypeUInt8>();
72
0
    }
73
74
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
75
                        uint32_t result, size_t input_rows_count) const override;
76
77
    virtual Status execute_match(FunctionContext* context, const std::string& column_name,
78
                                 const std::string& match_query_str, size_t input_rows_count,
79
                                 const ColumnString* string_col,
80
                                 const InvertedIndexAnalyzerCtx* analyzer_ctx,
81
                                 const ColumnArray::Offsets64* array_offsets,
82
                                 ColumnUInt8::Container& result) const = 0;
83
84
    doris::segment_v2::InvertedIndexQueryType get_query_type_from_fn_name() const;
85
86
    std::vector<TermInfo> analyse_query_str_token(const InvertedIndexAnalyzerCtx* analyzer_ctx,
87
                                                  const std::string& match_query_str,
88
                                                  const std::string& field_name) const;
89
90
    std::vector<TermInfo> analyse_data_token(const std::string& column_name,
91
                                             const InvertedIndexAnalyzerCtx* analyzer_ctx,
92
                                             const ColumnString* string_col,
93
                                             int32_t current_block_row_idx,
94
                                             const ColumnArray::Offsets64* array_offsets,
95
                                             int32_t& current_src_array_offset) const;
96
97
    Status check(FunctionContext* context, const std::string& function_name) const;
98
99
    Status evaluate_inverted_index(
100
            const ColumnsWithTypeAndName& arguments,
101
            const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names,
102
            std::vector<segment_v2::IndexIterator*> iterators, uint32_t num_rows,
103
            const InvertedIndexAnalyzerCtx* analyzer_ctx,
104
            segment_v2::InvertedIndexResultBitmap& bitmap_result) const override;
105
};
106
107
class FunctionMatchAny : public FunctionMatchBase {
108
public:
109
    static constexpr auto name = "match_any";
110
2
    static FunctionPtr create() { return std::make_shared<FunctionMatchAny>(); }
111
112
4
    String get_name() const override { return name; }
113
114
    Status execute_match(FunctionContext* context, const std::string& column_name,
115
                         const std::string& match_query_str, size_t input_rows_count,
116
                         const ColumnString* string_col,
117
                         const InvertedIndexAnalyzerCtx* analyzer_ctx,
118
                         const ColumnArray::Offsets64* array_offsets,
119
                         ColumnUInt8::Container& result) const override;
120
};
121
122
class FunctionMatchAll : public FunctionMatchBase {
123
public:
124
    static constexpr auto name = "match_all";
125
2
    static FunctionPtr create() { return std::make_shared<FunctionMatchAll>(); }
126
127
4
    String get_name() const override { return name; }
128
129
    Status execute_match(FunctionContext* context, const std::string& column_name,
130
                         const std::string& match_query_str, size_t input_rows_count,
131
                         const ColumnString* string_col,
132
                         const InvertedIndexAnalyzerCtx* analyzer_ctx,
133
                         const ColumnArray::Offsets64* array_offsets,
134
                         ColumnUInt8::Container& result) const override;
135
};
136
137
class FunctionMatchPhrase : public FunctionMatchBase {
138
public:
139
    static constexpr auto name = "match_phrase";
140
2
    static FunctionPtr create() { return std::make_shared<FunctionMatchPhrase>(); }
141
142
5
    String get_name() const override { return name; }
143
144
    Status execute_match(FunctionContext* context, const std::string& column_name,
145
                         const std::string& match_query_str, size_t input_rows_count,
146
                         const ColumnString* string_col,
147
                         const InvertedIndexAnalyzerCtx* analyzer_ctx,
148
                         const ColumnArray::Offsets64* array_offsets,
149
                         ColumnUInt8::Container& result) const override;
150
};
151
152
class FunctionMatchPhrasePrefix : public FunctionMatchBase {
153
public:
154
    static constexpr auto name = "match_phrase_prefix";
155
2
    static FunctionPtr create() { return std::make_shared<FunctionMatchPhrasePrefix>(); }
156
157
5
    String get_name() const override { return name; }
158
159
    Status execute_match(FunctionContext* context, const std::string& column_name,
160
                         const std::string& match_query_str, size_t input_rows_count,
161
                         const ColumnString* string_col,
162
                         const InvertedIndexAnalyzerCtx* analyzer_ctx,
163
                         const ColumnArray::Offsets64* array_offsets,
164
                         ColumnUInt8::Container& result) const override;
165
};
166
167
class FunctionMatchRegexp : public FunctionMatchBase {
168
public:
169
    static constexpr auto name = "match_regexp";
170
2
    static FunctionPtr create() { return std::make_shared<FunctionMatchRegexp>(); }
171
172
4
    String get_name() const override { return name; }
173
174
    Status execute_match(FunctionContext* context, const std::string& column_name,
175
                         const std::string& match_query_str, size_t input_rows_count,
176
                         const ColumnString* string_col,
177
                         const InvertedIndexAnalyzerCtx* analyzer_ctx,
178
                         const ColumnArray::Offsets64* array_offsets,
179
                         ColumnUInt8::Container& result) const override;
180
};
181
182
class FunctionMatchPhraseEdge : public FunctionMatchBase {
183
public:
184
    static constexpr auto name = "match_phrase_edge";
185
2
    static FunctionPtr create() { return std::make_shared<FunctionMatchPhraseEdge>(); }
186
187
5
    String get_name() const override { return name; }
188
189
    Status execute_match(FunctionContext* context, const std::string& column_name,
190
                         const std::string& match_query_str, size_t input_rows_count,
191
                         const ColumnString* string_col,
192
                         const InvertedIndexAnalyzerCtx* analyzer_ctx,
193
                         const ColumnArray::Offsets64* array_offsets,
194
                         ColumnUInt8::Container& result) const override;
195
};
196
197
} // namespace doris::vectorized