be/src/exprs/function/function_search.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <CLucene.h> |
21 | | #include <gen_cpp/Exprs_types.h> |
22 | | |
23 | | #include <map> |
24 | | #include <memory> |
25 | | #include <string> |
26 | | #include <unordered_map> |
27 | | #include <vector> |
28 | | |
29 | | #include "core/block/block.h" |
30 | | #include "core/data_type/data_type.h" |
31 | | #include "core/data_type/data_type_number.h" |
32 | | #include "core/types.h" |
33 | | #include "exprs/function/function.h" |
34 | | #include "exprs/function/variant_inverted_index_search.h" |
35 | | #include "storage/index/index_query_context.h" |
36 | | #include "storage/index/inverted/inverted_index_cache.h" |
37 | | #include "storage/index/inverted/query_v2/boolean_query/operator_boolean_query.h" |
38 | | |
39 | | CL_NS_USE(index) |
40 | | namespace doris { |
41 | | |
42 | | using namespace doris::segment_v2; |
43 | | |
44 | | class IndexExecContext; |
45 | | |
46 | | class FunctionSearch : public IFunction { |
47 | | public: |
48 | | static constexpr auto name = "search"; |
49 | | |
50 | 10 | static FunctionPtr create() { return std::make_shared<FunctionSearch>(); } |
51 | | |
52 | 2 | String get_name() const override { return name; } |
53 | | |
54 | 3 | bool is_variadic() const override { return true; } |
55 | | |
56 | 1 | size_t get_number_of_arguments() const override { return 0; } |
57 | | |
58 | | // We manage nulls explicitly for index pushdown only. |
59 | 3 | bool use_default_implementation_for_nulls() const override { return false; } |
60 | 4 | bool is_use_default_implementation_for_constants() const override { return false; } |
61 | | |
62 | 2 | bool use_default_implementation_for_constants() const override { return false; } |
63 | | |
64 | 2 | DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const override { |
65 | 2 | return std::make_shared<DataTypeUInt8>(); |
66 | 2 | } |
67 | | |
68 | | Status execute_impl(FunctionContext* /*context*/, Block& /*block*/, |
69 | | const ColumnNumbers& /*arguments*/, uint32_t /*result*/, |
70 | | size_t /*input_rows_count*/) const override; |
71 | | |
72 | 1 | bool can_push_down_to_index() const override { return true; } |
73 | | |
74 | | Status evaluate_inverted_index( |
75 | | const ColumnsWithTypeAndName& arguments, |
76 | | const std::vector<IndexFieldNameAndTypePair>& data_type_with_names, |
77 | | std::vector<IndexIterator*> iterators, uint32_t num_rows, |
78 | | const InvertedIndexAnalyzerCtx* /*analyzer_ctx*/, |
79 | | InvertedIndexResultBitmap& bitmap_result) const override; |
80 | | |
81 | | Status evaluate_inverted_index_with_search_param( |
82 | | const TSearchParam& search_param, |
83 | | const std::unordered_map<std::string, IndexFieldNameAndTypePair>& data_type_with_names, |
84 | | std::unordered_map<std::string, IndexIterator*> iterators, uint32_t num_rows, |
85 | | InvertedIndexResultBitmap& bitmap_result, bool enable_cache = true) const; |
86 | | |
87 | | Status evaluate_inverted_index_with_search_param( |
88 | | const TSearchParam& search_param, |
89 | | const std::unordered_map<std::string, IndexFieldNameAndTypePair>& data_type_with_names, |
90 | | std::unordered_map<std::string, IndexIterator*> iterators, uint32_t num_rows, |
91 | | InvertedIndexResultBitmap& bitmap_result, bool enable_cache, |
92 | | const IndexExecContext* index_exec_ctx, |
93 | | const std::unordered_map<std::string, int>& field_name_to_column_id, |
94 | | const std::shared_ptr<IndexQueryContext>& index_query_context = nullptr) const; |
95 | | |
96 | | // Public methods for testing |
97 | | enum class ClauseTypeCategory { |
98 | | NON_TOKENIZED, // TERM, PREFIX, WILDCARD, REGEXP, RANGE, LIST - no tokenization, use EQUAL_QUERY |
99 | | TOKENIZED, // PHRASE, MATCH, ANY, ALL - need tokenization, use MATCH_ANY_QUERY |
100 | | COMPOUND // AND, OR, NOT - boolean operations |
101 | | }; |
102 | | |
103 | | ClauseTypeCategory get_clause_type_category(const std::string& clause_type) const; |
104 | | |
105 | | // Analyze query type for a specific field in the search clause |
106 | | InvertedIndexQueryType analyze_field_query_type(const std::string& field_name, |
107 | | const TSearchClause& clause) const; |
108 | | |
109 | | // Map clause_type string to InvertedIndexQueryType |
110 | | InvertedIndexQueryType clause_type_to_query_type(const std::string& clause_type) const; |
111 | | |
112 | | Status build_query_recursive(const TSearchClause& clause, |
113 | | const std::shared_ptr<IndexQueryContext>& context, |
114 | | FieldReaderResolver& resolver, |
115 | | inverted_index::query_v2::QueryPtr* out, std::string* binding_key, |
116 | | const std::string& default_operator, int32_t minimum_should_match, |
117 | | uint32_t num_rows = 0) const; |
118 | | |
119 | | Status build_leaf_query(const TSearchClause& clause, |
120 | | const std::shared_ptr<IndexQueryContext>& context, |
121 | | FieldReaderResolver& resolver, inverted_index::query_v2::QueryPtr* out, |
122 | | std::string* binding_key, const std::string& default_operator, |
123 | | int32_t minimum_should_match, uint32_t num_rows = 0) const; |
124 | | }; |
125 | | |
126 | | } // namespace doris |