Coverage Report

Created: 2026-04-14 13:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/vmatch_predicate.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exprs/vmatch_predicate.h"
19
20
#include <cstdint>
21
22
#ifdef __clang__
23
#pragma clang diagnostic push
24
#pragma clang diagnostic ignored "-Wshadow-field"
25
#endif
26
27
#include <fmt/format.h>
28
#include <fmt/ranges.h> // IWYU pragma: keep
29
#include <gen_cpp/Exprs_types.h>
30
#include <glog/logging.h>
31
32
#include <memory>
33
#include <string>
34
#include <string_view>
35
#include <type_traits>
36
#include <vector>
37
38
#include "common/status.h"
39
#include "core/block/block.h"
40
#include "core/block/column_numbers.h"
41
#include "core/block/column_with_type_and_name.h"
42
#include "exprs/function/match.h"
43
#include "exprs/function/simple_function_factory.h"
44
#include "exprs/vexpr_context.h"
45
#include "exprs/vslot_ref.h"
46
#include "runtime/runtime_state.h"
47
#include "storage/index/inverted/analyzer/analyzer.h"
48
#include "storage/index/inverted/inverted_index_reader.h"
49
50
namespace doris {
51
class RowDescriptor;
52
class RuntimeState;
53
} // namespace doris
54
55
namespace doris {
56
57
using namespace doris::segment_v2;
58
59
1.67k
VMatchPredicate::VMatchPredicate(const TExprNode& node) : VExpr(node) {
60
    // Step 1: Create configuration (stack-allocated temporary, follows SRP)
61
1.67k
    InvertedIndexAnalyzerConfig config;
62
1.67k
    config.analyzer_name = node.match_predicate.analyzer_name;
63
1.67k
    config.parser_type =
64
1.67k
            get_inverted_index_parser_type_from_string(node.match_predicate.parser_type);
65
1.67k
    config.parser_mode = node.match_predicate.parser_mode;
66
1.67k
    config.char_filter_map = node.match_predicate.char_filter_map;
67
1.67k
    if (node.match_predicate.parser_lowercase) {
68
1.66k
        config.lower_case = INVERTED_INDEX_PARSER_TRUE;
69
1.66k
    } else {
70
9
        config.lower_case = INVERTED_INDEX_PARSER_FALSE;
71
9
    }
72
1.67k
    DBUG_EXECUTE_IF("inverted_index_parser.get_parser_lowercase_from_properties",
73
1.67k
                    { config.lower_case = ""; })
74
1.67k
    config.stop_words = node.match_predicate.parser_stopwords;
75
76
    // Step 2: Use config to create analyzer (factory method).
77
    // Always create analyzer based on parser_type for slow path (tables without index).
78
    // For index path, FullTextIndexReader will check analyzer_name to decide whether
79
    // to use this analyzer or fallback to index's own analyzer.
80
1.67k
    _analyzer = inverted_index::InvertedIndexAnalyzer::create_analyzer(&config);
81
82
    // Step 3: Create runtime context (only extract runtime-needed info)
83
1.67k
    _analyzer_ctx = std::make_shared<InvertedIndexAnalyzerCtx>();
84
1.67k
    _analyzer_ctx->analyzer_name = config.analyzer_name;
85
1.67k
    _analyzer_ctx->parser_type = config.parser_type;
86
1.67k
    _analyzer_ctx->char_filter_map = std::move(config.char_filter_map);
87
1.67k
    _analyzer_ctx->analyzer = _analyzer;
88
1.67k
}
89
90
1.67k
VMatchPredicate::~VMatchPredicate() = default;
91
92
Status VMatchPredicate::prepare(RuntimeState* state, const RowDescriptor& desc,
93
1.67k
                                VExprContext* context) {
94
1.67k
    RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context));
95
96
1.67k
    ColumnsWithTypeAndName argument_template;
97
1.67k
    argument_template.reserve(_children.size());
98
1.67k
    std::vector<std::string_view> child_expr_name;
99
3.33k
    for (const auto& child : _children) {
100
3.33k
        argument_template.emplace_back(nullptr, child->data_type(), child->expr_name());
101
3.33k
        child_expr_name.emplace_back(child->expr_name());
102
3.33k
    }
103
104
1.67k
    _function = SimpleFunctionFactory::instance().get_function(_fn.name.function_name,
105
1.67k
                                                               argument_template, _data_type, {});
106
1.67k
    if (_function == nullptr) {
107
0
        std::string type_str;
108
0
        for (const auto& arg : argument_template) {
109
0
            type_str = type_str + " " + arg.type->get_name();
110
0
        }
111
0
        return Status::NotSupported(
112
0
                "Function {} is not implemented, input param type is {}, "
113
0
                "and return type is {}.",
114
0
                _fn.name.function_name, type_str, _data_type->get_name());
115
0
    }
116
117
1.67k
    VExpr::register_function_context(state, context);
118
1.67k
    _expr_name = fmt::format("{}({})", _fn.name.function_name, child_expr_name);
119
1.67k
    _function_name = _fn.name.function_name;
120
1.67k
    _prepare_finished = true;
121
1.67k
    return Status::OK();
122
1.67k
}
123
124
Status VMatchPredicate::open(RuntimeState* state, VExprContext* context,
125
14.8k
                             FunctionContext::FunctionStateScope scope) {
126
14.8k
    DCHECK(_prepare_finished);
127
29.7k
    for (auto& i : _children) {
128
29.7k
        RETURN_IF_ERROR(i->open(state, context, scope));
129
29.7k
    }
130
14.8k
    RETURN_IF_ERROR(VExpr::init_function_context(state, context, scope, _function));
131
14.8k
    if (scope == FunctionContext::THREAD_LOCAL || scope == FunctionContext::FRAGMENT_LOCAL) {
132
14.8k
        context->fn_context(_fn_context_index)->set_function_state(scope, _analyzer_ctx);
133
14.8k
    }
134
14.8k
    if (scope == FunctionContext::FRAGMENT_LOCAL) {
135
1.67k
        RETURN_IF_ERROR(VExpr::get_const_col(context, nullptr));
136
1.67k
    }
137
14.8k
    _open_finished = true;
138
14.8k
    return Status::OK();
139
14.8k
}
140
141
14.8k
void VMatchPredicate::close(VExprContext* context, FunctionContext::FunctionStateScope scope) {
142
14.8k
    VExpr::close_function_context(context, scope, _function);
143
14.8k
    VExpr::close(context, scope);
144
14.8k
}
145
146
3.99k
Status VMatchPredicate::evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) {
147
3.99k
    DCHECK_EQ(get_num_children(), 2);
148
4.03k
    if (context != nullptr && context->get_index_context() != nullptr && _analyzer_ctx != nullptr) {
149
4.03k
        context->get_index_context()->set_analyzer_ctx_for_expr(this, _analyzer_ctx);
150
4.03k
    }
151
3.99k
    return _evaluate_inverted_index(context, _function, segment_num_rows);
152
3.99k
}
153
154
0
const std::string& VMatchPredicate::get_analyzer_key() const {
155
0
    return _analyzer_ctx->analyzer_name;
156
0
}
157
158
Status VMatchPredicate::execute_column(VExprContext* context, const Block* block,
159
                                       Selector* selector, size_t count,
160
1.33k
                                       ColumnPtr& result_column) const {
161
1.33k
    DCHECK(_open_finished || block == nullptr);
162
1.33k
    if (fast_execute(context, selector, count, result_column)) {
163
13
        return Status::OK();
164
13
    }
165
1.32k
    DBUG_EXECUTE_IF("VMatchPredicate.execute", {
166
1.32k
        return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
167
1.32k
                "{} not support slow path, hit debug point.", _expr_name);
168
1.32k
    });
169
1.32k
    DBUG_EXECUTE_IF("VMatchPredicate.must_in_slow_path", {
170
1.32k
        auto debug_col_name = DebugPoints::instance()->get_debug_param_or_default<std::string>(
171
1.32k
                "VMatchPredicate.must_in_slow_path", "column_name", "");
172
173
1.32k
        std::vector<std::string> column_names;
174
1.32k
        boost::split(column_names, debug_col_name, boost::algorithm::is_any_of(","));
175
176
1.32k
        auto* column_slot_ref = assert_cast<VSlotRef*>(get_child(0).get());
177
1.32k
        std::string column_name = column_slot_ref->expr_name();
178
1.32k
        auto it = std::ranges::find(column_names, column_name);
179
1.32k
        if (it == column_names.end()) {
180
1.32k
            return Status::Error<ErrorCode::INTERNAL_ERROR>(
181
1.32k
                    "column {} should in slow path while VMatchPredicate::execute.", column_name);
182
1.32k
        }
183
1.32k
    })
184
1.32k
    ColumnNumbers arguments(_children.size());
185
1.32k
    Block temp_block;
186
3.96k
    for (size_t i = 0; i < _children.size(); ++i) {
187
2.64k
        ColumnPtr arg_column;
188
2.64k
        RETURN_IF_ERROR(_children[i]->execute_column(context, block, selector, count, arg_column));
189
2.64k
        auto arg_type = _children[i]->execute_type(block);
190
2.64k
        temp_block.insert({arg_column, arg_type, _children[i]->expr_name()});
191
2.64k
        arguments[i] = static_cast<uint32_t>(i);
192
2.64k
    }
193
1.32k
    uint32_t num_columns_without_result = temp_block.columns();
194
    // prepare a column to save result
195
1.32k
    temp_block.insert({nullptr, _data_type, _expr_name});
196
197
1.32k
    RETURN_IF_ERROR(_function->execute(context->fn_context(_fn_context_index), temp_block,
198
1.32k
                                       arguments, num_columns_without_result, temp_block.rows()));
199
1.31k
    result_column = temp_block.get_by_position(num_columns_without_result).column;
200
1.31k
    DCHECK_EQ(result_column->size(), count);
201
1.31k
    return Status::OK();
202
1.32k
}
203
204
1.46k
const std::string& VMatchPredicate::expr_name() const {
205
1.46k
    return _expr_name;
206
1.46k
}
207
208
0
const std::string& VMatchPredicate::function_name() const {
209
0
    return _function_name;
210
0
}
211
212
2
std::string VMatchPredicate::debug_string() const {
213
2
    std::stringstream out;
214
2
    out << "MatchPredicate(" << children()[0]->debug_string() << ",[";
215
2
    uint16_t num_children = get_num_children();
216
217
4
    for (uint16_t i = 1; i < num_children; ++i) {
218
2
        out << (i == 1 ? "" : " ") << children()[i]->debug_string();
219
2
    }
220
221
2
    out << "])";
222
2
    return out.str();
223
2
}
224
225
} // namespace doris