/root/doris/be/src/exprs/vmatch_predicate.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "exprs/vmatch_predicate.h" |
19 | | |
20 | | #include <cstdint> |
21 | | |
22 | | #ifdef __clang__ |
23 | | #pragma clang diagnostic push |
24 | | #pragma clang diagnostic ignored "-Wshadow-field" |
25 | | #endif |
26 | | |
27 | | #include <fmt/format.h> |
28 | | #include <fmt/ranges.h> // IWYU pragma: keep |
29 | | #include <gen_cpp/Exprs_types.h> |
30 | | #include <glog/logging.h> |
31 | | |
32 | | #include <memory> |
33 | | #include <string> |
34 | | #include <string_view> |
35 | | #include <type_traits> |
36 | | #include <vector> |
37 | | |
38 | | #include "common/status.h" |
39 | | #include "core/block/block.h" |
40 | | #include "core/block/column_numbers.h" |
41 | | #include "core/block/column_with_type_and_name.h" |
42 | | #include "exprs/function/match.h" |
43 | | #include "exprs/function/simple_function_factory.h" |
44 | | #include "exprs/vexpr_context.h" |
45 | | #include "exprs/vslot_ref.h" |
46 | | #include "runtime/runtime_state.h" |
47 | | #include "storage/index/inverted/analyzer/analyzer.h" |
48 | | #include "storage/index/inverted/inverted_index_reader.h" |
49 | | |
50 | | namespace doris { |
51 | | class RowDescriptor; |
52 | | class RuntimeState; |
53 | | } // namespace doris |
54 | | |
55 | | namespace doris { |
56 | | #include "common/compile_check_begin.h" |
57 | | |
58 | | using namespace doris::segment_v2; |
59 | | |
60 | 0 | VMatchPredicate::VMatchPredicate(const TExprNode& node) : VExpr(node) { |
61 | | // Step 1: Create configuration (stack-allocated temporary, follows SRP) |
62 | 0 | InvertedIndexAnalyzerConfig config; |
63 | 0 | config.analyzer_name = node.match_predicate.analyzer_name; |
64 | 0 | config.parser_type = |
65 | 0 | get_inverted_index_parser_type_from_string(node.match_predicate.parser_type); |
66 | 0 | config.parser_mode = node.match_predicate.parser_mode; |
67 | 0 | config.char_filter_map = node.match_predicate.char_filter_map; |
68 | 0 | if (node.match_predicate.parser_lowercase) { |
69 | 0 | config.lower_case = INVERTED_INDEX_PARSER_TRUE; |
70 | 0 | } else { |
71 | 0 | config.lower_case = INVERTED_INDEX_PARSER_FALSE; |
72 | 0 | } |
73 | 0 | DBUG_EXECUTE_IF("inverted_index_parser.get_parser_lowercase_from_properties", |
74 | 0 | { config.lower_case = ""; }) |
75 | 0 | config.stop_words = node.match_predicate.parser_stopwords; |
76 | | |
77 | | // Step 2: Use config to create analyzer (factory method). |
78 | | // Always create analyzer based on parser_type for slow path (tables without index). |
79 | | // For index path, FullTextIndexReader will check analyzer_name to decide whether |
80 | | // to use this analyzer or fallback to index's own analyzer. |
81 | 0 | _analyzer = inverted_index::InvertedIndexAnalyzer::create_analyzer(&config); |
82 | | |
83 | | // Step 3: Create runtime context (only extract runtime-needed info) |
84 | 0 | _analyzer_ctx = std::make_shared<InvertedIndexAnalyzerCtx>(); |
85 | 0 | _analyzer_ctx->analyzer_name = config.analyzer_name; |
86 | 0 | _analyzer_ctx->parser_type = config.parser_type; |
87 | 0 | _analyzer_ctx->char_filter_map = std::move(config.char_filter_map); |
88 | 0 | _analyzer_ctx->analyzer = _analyzer; |
89 | 0 | } |
90 | | |
91 | 0 | VMatchPredicate::~VMatchPredicate() = default; |
92 | | |
93 | | Status VMatchPredicate::prepare(RuntimeState* state, const RowDescriptor& desc, |
94 | 0 | VExprContext* context) { |
95 | 0 | RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context)); |
96 | |
|
97 | 0 | ColumnsWithTypeAndName argument_template; |
98 | 0 | argument_template.reserve(_children.size()); |
99 | 0 | std::vector<std::string_view> child_expr_name; |
100 | 0 | for (const auto& child : _children) { |
101 | 0 | argument_template.emplace_back(nullptr, child->data_type(), child->expr_name()); |
102 | 0 | child_expr_name.emplace_back(child->expr_name()); |
103 | 0 | } |
104 | |
|
105 | 0 | _function = SimpleFunctionFactory::instance().get_function(_fn.name.function_name, |
106 | 0 | argument_template, _data_type, {}); |
107 | 0 | if (_function == nullptr) { |
108 | 0 | std::string type_str; |
109 | 0 | for (const auto& arg : argument_template) { |
110 | 0 | type_str = type_str + " " + arg.type->get_name(); |
111 | 0 | } |
112 | 0 | return Status::NotSupported( |
113 | 0 | "Function {} is not implemented, input param type is {}, " |
114 | 0 | "and return type is {}.", |
115 | 0 | _fn.name.function_name, type_str, _data_type->get_name()); |
116 | 0 | } |
117 | | |
118 | 0 | VExpr::register_function_context(state, context); |
119 | 0 | _expr_name = fmt::format("{}({})", _fn.name.function_name, child_expr_name); |
120 | 0 | _function_name = _fn.name.function_name; |
121 | 0 | _prepare_finished = true; |
122 | 0 | return Status::OK(); |
123 | 0 | } |
124 | | |
125 | | Status VMatchPredicate::open(RuntimeState* state, VExprContext* context, |
126 | 0 | FunctionContext::FunctionStateScope scope) { |
127 | 0 | DCHECK(_prepare_finished); |
128 | 0 | for (auto& i : _children) { |
129 | 0 | RETURN_IF_ERROR(i->open(state, context, scope)); |
130 | 0 | } |
131 | 0 | RETURN_IF_ERROR(VExpr::init_function_context(state, context, scope, _function)); |
132 | 0 | if (scope == FunctionContext::THREAD_LOCAL || scope == FunctionContext::FRAGMENT_LOCAL) { |
133 | 0 | context->fn_context(_fn_context_index)->set_function_state(scope, _analyzer_ctx); |
134 | 0 | } |
135 | 0 | if (scope == FunctionContext::FRAGMENT_LOCAL) { |
136 | 0 | RETURN_IF_ERROR(VExpr::get_const_col(context, nullptr)); |
137 | 0 | } |
138 | 0 | _open_finished = true; |
139 | 0 | return Status::OK(); |
140 | 0 | } |
141 | | |
142 | 0 | void VMatchPredicate::close(VExprContext* context, FunctionContext::FunctionStateScope scope) { |
143 | 0 | VExpr::close_function_context(context, scope, _function); |
144 | 0 | VExpr::close(context, scope); |
145 | 0 | } |
146 | | |
147 | 0 | Status VMatchPredicate::evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) { |
148 | 0 | DCHECK_EQ(get_num_children(), 2); |
149 | 0 | if (context != nullptr && context->get_index_context() != nullptr && _analyzer_ctx != nullptr) { |
150 | 0 | context->get_index_context()->set_analyzer_ctx_for_expr(this, _analyzer_ctx); |
151 | 0 | } |
152 | 0 | return _evaluate_inverted_index(context, _function, segment_num_rows); |
153 | 0 | } |
154 | | |
155 | 0 | const std::string& VMatchPredicate::get_analyzer_key() const { |
156 | 0 | return _analyzer_ctx->analyzer_name; |
157 | 0 | } |
158 | | |
159 | | Status VMatchPredicate::execute_column(VExprContext* context, const Block* block, |
160 | | Selector* selector, size_t count, |
161 | 0 | ColumnPtr& result_column) const { |
162 | 0 | DCHECK(_open_finished || block == nullptr); |
163 | 0 | if (fast_execute(context, selector, count, result_column)) { |
164 | 0 | return Status::OK(); |
165 | 0 | } |
166 | 0 | DBUG_EXECUTE_IF("VMatchPredicate.execute", { |
167 | 0 | return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>( |
168 | 0 | "{} not support slow path, hit debug point.", _expr_name); |
169 | 0 | }); |
170 | 0 | DBUG_EXECUTE_IF("VMatchPredicate.must_in_slow_path", { |
171 | 0 | auto debug_col_name = DebugPoints::instance()->get_debug_param_or_default<std::string>( |
172 | 0 | "VMatchPredicate.must_in_slow_path", "column_name", ""); |
173 | |
|
174 | 0 | std::vector<std::string> column_names; |
175 | 0 | boost::split(column_names, debug_col_name, boost::algorithm::is_any_of(",")); |
176 | |
|
177 | 0 | auto* column_slot_ref = assert_cast<VSlotRef*>(get_child(0).get()); |
178 | 0 | std::string column_name = column_slot_ref->expr_name(); |
179 | 0 | auto it = std::ranges::find(column_names, column_name); |
180 | 0 | if (it == column_names.end()) { |
181 | 0 | return Status::Error<ErrorCode::INTERNAL_ERROR>( |
182 | 0 | "column {} should in slow path while VMatchPredicate::execute.", column_name); |
183 | 0 | } |
184 | 0 | }) |
185 | 0 | ColumnNumbers arguments(_children.size()); |
186 | 0 | Block temp_block; |
187 | 0 | for (size_t i = 0; i < _children.size(); ++i) { |
188 | 0 | ColumnPtr arg_column; |
189 | 0 | RETURN_IF_ERROR(_children[i]->execute_column(context, block, selector, count, arg_column)); |
190 | 0 | auto arg_type = _children[i]->execute_type(block); |
191 | 0 | temp_block.insert({arg_column, arg_type, _children[i]->expr_name()}); |
192 | 0 | arguments[i] = static_cast<uint32_t>(i); |
193 | 0 | } |
194 | 0 | uint32_t num_columns_without_result = temp_block.columns(); |
195 | | // prepare a column to save result |
196 | 0 | temp_block.insert({nullptr, _data_type, _expr_name}); |
197 | |
|
198 | 0 | RETURN_IF_ERROR(_function->execute(context->fn_context(_fn_context_index), temp_block, |
199 | 0 | arguments, num_columns_without_result, temp_block.rows())); |
200 | 0 | result_column = temp_block.get_by_position(num_columns_without_result).column; |
201 | 0 | DCHECK_EQ(result_column->size(), count); |
202 | 0 | return Status::OK(); |
203 | 0 | } |
204 | | |
205 | 0 | const std::string& VMatchPredicate::expr_name() const { |
206 | 0 | return _expr_name; |
207 | 0 | } |
208 | | |
209 | 0 | const std::string& VMatchPredicate::function_name() const { |
210 | 0 | return _function_name; |
211 | 0 | } |
212 | | |
213 | 0 | std::string VMatchPredicate::debug_string() const { |
214 | 0 | std::stringstream out; |
215 | 0 | out << "MatchPredicate(" << children()[0]->debug_string() << ",["; |
216 | 0 | uint16_t num_children = get_num_children(); |
217 | |
|
218 | 0 | for (uint16_t i = 1; i < num_children; ++i) { |
219 | 0 | out << (i == 1 ? "" : " ") << children()[i]->debug_string(); |
220 | 0 | } |
221 | |
|
222 | 0 | out << "])"; |
223 | 0 | return out.str(); |
224 | 0 | } |
225 | | |
226 | | #include "common/compile_check_end.h" |
227 | | } // namespace doris |