Coverage Report

Created: 2026-03-11 11:17

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/exprs/vmatch_predicate.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exprs/vmatch_predicate.h"
19
20
#include <cstdint>
21
22
#ifdef __clang__
23
#pragma clang diagnostic push
24
#pragma clang diagnostic ignored "-Wshadow-field"
25
#endif
26
27
#include <fmt/format.h>
28
#include <fmt/ranges.h> // IWYU pragma: keep
29
#include <gen_cpp/Exprs_types.h>
30
#include <glog/logging.h>
31
32
#include <memory>
33
#include <string>
34
#include <string_view>
35
#include <type_traits>
36
#include <vector>
37
38
#include "common/status.h"
39
#include "core/block/block.h"
40
#include "core/block/column_numbers.h"
41
#include "core/block/column_with_type_and_name.h"
42
#include "exprs/function/match.h"
43
#include "exprs/function/simple_function_factory.h"
44
#include "exprs/vexpr_context.h"
45
#include "exprs/vslot_ref.h"
46
#include "runtime/runtime_state.h"
47
#include "storage/index/inverted/analyzer/analyzer.h"
48
#include "storage/index/inverted/inverted_index_reader.h"
49
50
namespace doris {
51
class RowDescriptor;
52
class RuntimeState;
53
} // namespace doris
54
55
namespace doris {
56
#include "common/compile_check_begin.h"
57
58
using namespace doris::segment_v2;
59
60
0
VMatchPredicate::VMatchPredicate(const TExprNode& node) : VExpr(node) {
61
    // Step 1: Create configuration (stack-allocated temporary, follows SRP)
62
0
    InvertedIndexAnalyzerConfig config;
63
0
    config.analyzer_name = node.match_predicate.analyzer_name;
64
0
    config.parser_type =
65
0
            get_inverted_index_parser_type_from_string(node.match_predicate.parser_type);
66
0
    config.parser_mode = node.match_predicate.parser_mode;
67
0
    config.char_filter_map = node.match_predicate.char_filter_map;
68
0
    if (node.match_predicate.parser_lowercase) {
69
0
        config.lower_case = INVERTED_INDEX_PARSER_TRUE;
70
0
    } else {
71
0
        config.lower_case = INVERTED_INDEX_PARSER_FALSE;
72
0
    }
73
0
    DBUG_EXECUTE_IF("inverted_index_parser.get_parser_lowercase_from_properties",
74
0
                    { config.lower_case = ""; })
75
0
    config.stop_words = node.match_predicate.parser_stopwords;
76
77
    // Step 2: Use config to create analyzer (factory method).
78
    // Always create analyzer based on parser_type for slow path (tables without index).
79
    // For index path, FullTextIndexReader will check analyzer_name to decide whether
80
    // to use this analyzer or fallback to index's own analyzer.
81
0
    _analyzer = inverted_index::InvertedIndexAnalyzer::create_analyzer(&config);
82
83
    // Step 3: Create runtime context (only extract runtime-needed info)
84
0
    _analyzer_ctx = std::make_shared<InvertedIndexAnalyzerCtx>();
85
0
    _analyzer_ctx->analyzer_name = config.analyzer_name;
86
0
    _analyzer_ctx->parser_type = config.parser_type;
87
0
    _analyzer_ctx->char_filter_map = std::move(config.char_filter_map);
88
0
    _analyzer_ctx->analyzer = _analyzer;
89
0
}
90
91
0
VMatchPredicate::~VMatchPredicate() = default;
92
93
Status VMatchPredicate::prepare(RuntimeState* state, const RowDescriptor& desc,
94
0
                                VExprContext* context) {
95
0
    RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context));
96
97
0
    ColumnsWithTypeAndName argument_template;
98
0
    argument_template.reserve(_children.size());
99
0
    std::vector<std::string_view> child_expr_name;
100
0
    for (const auto& child : _children) {
101
0
        argument_template.emplace_back(nullptr, child->data_type(), child->expr_name());
102
0
        child_expr_name.emplace_back(child->expr_name());
103
0
    }
104
105
0
    _function = SimpleFunctionFactory::instance().get_function(_fn.name.function_name,
106
0
                                                               argument_template, _data_type, {});
107
0
    if (_function == nullptr) {
108
0
        std::string type_str;
109
0
        for (const auto& arg : argument_template) {
110
0
            type_str = type_str + " " + arg.type->get_name();
111
0
        }
112
0
        return Status::NotSupported(
113
0
                "Function {} is not implemented, input param type is {}, "
114
0
                "and return type is {}.",
115
0
                _fn.name.function_name, type_str, _data_type->get_name());
116
0
    }
117
118
0
    VExpr::register_function_context(state, context);
119
0
    _expr_name = fmt::format("{}({})", _fn.name.function_name, child_expr_name);
120
0
    _function_name = _fn.name.function_name;
121
0
    _prepare_finished = true;
122
0
    return Status::OK();
123
0
}
124
125
Status VMatchPredicate::open(RuntimeState* state, VExprContext* context,
126
0
                             FunctionContext::FunctionStateScope scope) {
127
0
    DCHECK(_prepare_finished);
128
0
    for (auto& i : _children) {
129
0
        RETURN_IF_ERROR(i->open(state, context, scope));
130
0
    }
131
0
    RETURN_IF_ERROR(VExpr::init_function_context(state, context, scope, _function));
132
0
    if (scope == FunctionContext::THREAD_LOCAL || scope == FunctionContext::FRAGMENT_LOCAL) {
133
0
        context->fn_context(_fn_context_index)->set_function_state(scope, _analyzer_ctx);
134
0
    }
135
0
    if (scope == FunctionContext::FRAGMENT_LOCAL) {
136
0
        RETURN_IF_ERROR(VExpr::get_const_col(context, nullptr));
137
0
    }
138
0
    _open_finished = true;
139
0
    return Status::OK();
140
0
}
141
142
0
void VMatchPredicate::close(VExprContext* context, FunctionContext::FunctionStateScope scope) {
143
0
    VExpr::close_function_context(context, scope, _function);
144
0
    VExpr::close(context, scope);
145
0
}
146
147
0
Status VMatchPredicate::evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) {
148
0
    DCHECK_EQ(get_num_children(), 2);
149
0
    if (context != nullptr && context->get_index_context() != nullptr && _analyzer_ctx != nullptr) {
150
0
        context->get_index_context()->set_analyzer_ctx_for_expr(this, _analyzer_ctx);
151
0
    }
152
0
    return _evaluate_inverted_index(context, _function, segment_num_rows);
153
0
}
154
155
0
const std::string& VMatchPredicate::get_analyzer_key() const {
156
0
    return _analyzer_ctx->analyzer_name;
157
0
}
158
159
Status VMatchPredicate::execute_column(VExprContext* context, const Block* block,
160
                                       Selector* selector, size_t count,
161
0
                                       ColumnPtr& result_column) const {
162
0
    DCHECK(_open_finished || block == nullptr);
163
0
    if (fast_execute(context, selector, count, result_column)) {
164
0
        return Status::OK();
165
0
    }
166
0
    DBUG_EXECUTE_IF("VMatchPredicate.execute", {
167
0
        return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
168
0
                "{} not support slow path, hit debug point.", _expr_name);
169
0
    });
170
0
    DBUG_EXECUTE_IF("VMatchPredicate.must_in_slow_path", {
171
0
        auto debug_col_name = DebugPoints::instance()->get_debug_param_or_default<std::string>(
172
0
                "VMatchPredicate.must_in_slow_path", "column_name", "");
173
174
0
        std::vector<std::string> column_names;
175
0
        boost::split(column_names, debug_col_name, boost::algorithm::is_any_of(","));
176
177
0
        auto* column_slot_ref = assert_cast<VSlotRef*>(get_child(0).get());
178
0
        std::string column_name = column_slot_ref->expr_name();
179
0
        auto it = std::ranges::find(column_names, column_name);
180
0
        if (it == column_names.end()) {
181
0
            return Status::Error<ErrorCode::INTERNAL_ERROR>(
182
0
                    "column {} should in slow path while VMatchPredicate::execute.", column_name);
183
0
        }
184
0
    })
185
0
    ColumnNumbers arguments(_children.size());
186
0
    Block temp_block;
187
0
    for (size_t i = 0; i < _children.size(); ++i) {
188
0
        ColumnPtr arg_column;
189
0
        RETURN_IF_ERROR(_children[i]->execute_column(context, block, selector, count, arg_column));
190
0
        auto arg_type = _children[i]->execute_type(block);
191
0
        temp_block.insert({arg_column, arg_type, _children[i]->expr_name()});
192
0
        arguments[i] = static_cast<uint32_t>(i);
193
0
    }
194
0
    uint32_t num_columns_without_result = temp_block.columns();
195
    // prepare a column to save result
196
0
    temp_block.insert({nullptr, _data_type, _expr_name});
197
198
0
    RETURN_IF_ERROR(_function->execute(context->fn_context(_fn_context_index), temp_block,
199
0
                                       arguments, num_columns_without_result, temp_block.rows()));
200
0
    result_column = temp_block.get_by_position(num_columns_without_result).column;
201
0
    DCHECK_EQ(result_column->size(), count);
202
0
    return Status::OK();
203
0
}
204
205
0
const std::string& VMatchPredicate::expr_name() const {
206
0
    return _expr_name;
207
0
}
208
209
0
const std::string& VMatchPredicate::function_name() const {
210
0
    return _function_name;
211
0
}
212
213
0
std::string VMatchPredicate::debug_string() const {
214
0
    std::stringstream out;
215
0
    out << "MatchPredicate(" << children()[0]->debug_string() << ",[";
216
0
    uint16_t num_children = get_num_children();
217
218
0
    for (uint16_t i = 1; i < num_children; ++i) {
219
0
        out << (i == 1 ? "" : " ") << children()[i]->debug_string();
220
0
    }
221
222
0
    out << "])";
223
0
    return out.str();
224
0
}
225
226
#include "common/compile_check_end.h"
227
} // namespace doris