be/src/exprs/function/function_multi_match.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "exprs/function/function_multi_match.h" |
19 | | |
20 | | #include <glog/logging.h> |
21 | | |
22 | | #include <memory> |
23 | | #include <roaring/roaring.hh> |
24 | | #include <string> |
25 | | #include <vector> |
26 | | |
27 | | #include "core/column/column.h" |
28 | | #include "exprs/function/simple_function_factory.h" |
29 | | #include "exprs/vslot_ref.h" |
30 | | #include "io/fs/file_reader.h" |
31 | | #include "storage/index/inverted/query/phrase_prefix_query.h" |
32 | | #include "storage/segment/segment_iterator.h" |
33 | | |
34 | | namespace doris { |
35 | | |
36 | | Status FunctionMultiMatch::execute_impl(FunctionContext* /*context*/, Block& block, |
37 | | const ColumnNumbers& arguments, uint32_t result, |
38 | 0 | size_t /*input_rows_count*/) const { |
39 | 0 | return Status::RuntimeError("only inverted index queries are supported"); |
40 | 0 | } |
41 | | |
42 | 33 | InvertedIndexQueryType get_query_type(const std::string& query_type) { |
43 | 33 | if (query_type == "any") { |
44 | 8 | return InvertedIndexQueryType::MATCH_ANY_QUERY; |
45 | 25 | } else if (query_type == "all") { |
46 | 8 | return InvertedIndexQueryType::MATCH_ALL_QUERY; |
47 | 17 | } else if (query_type == "phrase") { |
48 | 9 | return InvertedIndexQueryType::MATCH_PHRASE_QUERY; |
49 | 9 | } else if (query_type == "phrase_prefix") { |
50 | 8 | return InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY; |
51 | 8 | } else { |
52 | 0 | return InvertedIndexQueryType::UNKNOWN_QUERY; |
53 | 0 | } |
54 | 33 | } |
55 | | |
56 | | Status FunctionMultiMatch::evaluate_inverted_index( |
57 | | const ColumnsWithTypeAndName& arguments, |
58 | | const std::vector<IndexFieldNameAndTypePair>& data_type_with_names, |
59 | | std::vector<segment_v2::IndexIterator*> iterators, uint32_t num_rows, |
60 | | const InvertedIndexAnalyzerCtx* analyzer_ctx, |
61 | 33 | segment_v2::InvertedIndexResultBitmap& bitmap_result) const { |
62 | 33 | DCHECK(arguments.size() == 2); |
63 | 33 | std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>(); |
64 | 33 | std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>(); |
65 | | |
66 | | // type |
67 | 33 | auto query_type_value = arguments[0].column->get_data_at(0); |
68 | 33 | auto query_type = get_query_type(query_type_value.to_string()); |
69 | 33 | if (query_type == InvertedIndexQueryType::UNKNOWN_QUERY) { |
70 | 0 | return Status::RuntimeError( |
71 | 0 | "parameter query type incorrect for function multi_match: query_type = {}", |
72 | 0 | query_type); |
73 | 0 | } |
74 | | |
75 | | // query |
76 | 33 | auto query_str_ref = arguments[1].column->get_data_at(0); |
77 | 33 | auto param_type = arguments[1].type->get_primitive_type(); |
78 | 33 | if (!is_string_type(param_type)) { |
79 | 0 | return Status::Error<ErrorCode::INDEX_INVALID_PARAMETERS>( |
80 | 0 | "arguments for multi_match must be string"); |
81 | 0 | } |
82 | | // Must convert StringRef to std::string because downstream readers |
83 | | // (e.g. FullTextIndexReader::query) reinterpret_cast query_value as std::string*. |
84 | 33 | std::string query_str(query_str_ref.data, query_str_ref.size); |
85 | | |
86 | | // search |
87 | 33 | InvertedIndexParam param; |
88 | 33 | param.query_value = &query_str; |
89 | 33 | param.query_type = query_type; |
90 | 33 | param.num_rows = num_rows; |
91 | 151 | for (size_t i = 0; i < data_type_with_names.size(); i++) { |
92 | 119 | auto column_name = data_type_with_names[i].first; |
93 | 119 | auto* iter = iterators[i]; |
94 | 119 | if (iter == nullptr) { |
95 | 1 | std::string error_msg = "Inverted index iterator is null for column '" + column_name + |
96 | 1 | "' during multi_match execution"; |
97 | 1 | return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(error_msg); |
98 | 1 | } |
99 | | |
100 | 118 | param.column_name = column_name; |
101 | 118 | param.column_type = data_type_with_names[i].second; |
102 | 118 | param.roaring = std::make_shared<roaring::Roaring>(); |
103 | 118 | param.analyzer_ctx = analyzer_ctx; |
104 | 118 | RETURN_IF_ERROR(iter->read_from_index(segment_v2::IndexParam {¶m})); |
105 | 118 | *roaring |= *param.roaring; |
106 | 118 | } |
107 | 32 | segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap); |
108 | 32 | bitmap_result = result; |
109 | | |
110 | 32 | return Status::OK(); |
111 | 33 | } |
112 | | |
113 | 8 | void register_function_multi_match(SimpleFunctionFactory& factory) { |
114 | 8 | factory.register_function<FunctionMultiMatch>(); |
115 | 8 | } |
116 | | |
117 | | } // namespace doris |