Coverage Report

Created: 2026-03-13 09:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_search.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <CLucene.h>
21
#include <gen_cpp/Exprs_types.h>
22
23
#include <map>
24
#include <memory>
25
#include <string>
26
#include <unordered_map>
27
#include <vector>
28
29
#include "core/block/block.h"
30
#include "core/data_type/data_type.h"
31
#include "core/data_type/data_type_number.h"
32
#include "core/types.h"
33
#include "exprs/function/function.h"
34
#include "storage/index/index_query_context.h"
35
#include "storage/index/inverted/inverted_index_cache.h"
36
#include "storage/index/inverted/query_v2/boolean_query/operator_boolean_query.h"
37
38
CL_NS_USE(index)
39
namespace doris {
40
41
using namespace doris::segment_v2;
42
43
class IndexExecContext;
44
45
struct FieldReaderBinding {
46
    std::string logical_field_name;
47
    std::string stored_field_name;
48
    std::wstring stored_field_wstr;
49
    DataTypePtr column_type;
50
    InvertedIndexQueryType query_type;
51
    InvertedIndexReaderPtr inverted_reader;
52
    std::shared_ptr<lucene::index::IndexReader> lucene_reader;
53
    std::map<std::string, std::string> index_properties;
54
    std::string binding_key;
55
    std::string analyzer_key;
56
};
57
58
class FieldReaderResolver {
59
public:
60
    FieldReaderResolver(
61
            const std::unordered_map<std::string, IndexFieldNameAndTypePair>& data_type_with_names,
62
            const std::unordered_map<std::string, IndexIterator*>& iterators,
63
            std::shared_ptr<IndexQueryContext> context,
64
            const std::vector<TSearchFieldBinding>& field_bindings = {})
65
1.14k
            : _data_type_with_names(data_type_with_names),
66
1.14k
              _iterators(iterators),
67
1.14k
              _context(std::move(context)),
68
1.14k
              _field_bindings(field_bindings) {
69
        // Build lookup maps for quick access
70
1.62k
        for (const auto& binding : _field_bindings) {
71
1.63k
            if (binding.__isset.is_variant_subcolumn && binding.is_variant_subcolumn) {
72
65
                _variant_subcolumn_fields.insert(binding.field_name);
73
65
            }
74
1.62k
            _field_binding_map[binding.field_name] = &binding;
75
1.62k
        }
76
1.14k
    }
77
78
    Status resolve(const std::string& field_name, InvertedIndexQueryType query_type,
79
                   FieldReaderBinding* binding);
80
81
    // Check if a field is a variant subcolumn
82
1.96k
    bool is_variant_subcolumn(const std::string& field_name) const {
83
1.96k
        return _variant_subcolumn_fields.count(field_name) > 0;
84
1.96k
    }
85
86
1.08k
    const std::vector<std::shared_ptr<lucene::index::IndexReader>>& readers() const {
87
1.08k
        return _readers;
88
1.08k
    }
89
90
    const std::unordered_map<std::string, std::shared_ptr<lucene::index::IndexReader>>&
91
1.08k
    reader_bindings() const {
92
1.08k
        return _binding_readers;
93
1.08k
    }
94
95
    const std::unordered_map<std::wstring, std::shared_ptr<lucene::index::IndexReader>>&
96
1.10k
    field_readers() const {
97
1.10k
        return _field_readers;
98
1.10k
    }
99
100
1.10k
    const std::unordered_map<std::string, FieldReaderBinding>& binding_cache() const {
101
1.10k
        return _cache;
102
1.10k
    }
103
104
1.59k
    IndexIterator* get_iterator(const std::string& field_name) const {
105
1.59k
        auto it = _iterators.find(field_name);
106
18.4E
        return (it != _iterators.end()) ? it->second : nullptr;
107
1.59k
    }
108
109
private:
110
    std::string binding_key_for(const std::string& stored_field_name,
111
1.92k
                                InvertedIndexQueryType query_type) const {
112
1.92k
        return stored_field_name + "#" + std::to_string(static_cast<int>(query_type));
113
1.92k
    }
114
115
    const std::unordered_map<std::string, IndexFieldNameAndTypePair>& _data_type_with_names;
116
    const std::unordered_map<std::string, IndexIterator*>& _iterators;
117
    std::shared_ptr<IndexQueryContext> _context;
118
    std::vector<TSearchFieldBinding> _field_bindings;
119
    std::unordered_map<std::string, const TSearchFieldBinding*> _field_binding_map;
120
    std::unordered_set<std::string> _variant_subcolumn_fields;
121
    std::unordered_map<std::string, FieldReaderBinding> _cache;
122
    std::vector<std::shared_ptr<lucene::index::IndexReader>> _readers;
123
    std::unordered_map<std::string, std::shared_ptr<lucene::index::IndexReader>> _binding_readers;
124
    std::unordered_map<std::wstring, std::shared_ptr<lucene::index::IndexReader>> _field_readers;
125
    // Keep searcher cache handles alive for the resolver's lifetime.
126
    // This pins cached IndexSearcher entries so extracted IndexReaders remain valid.
127
    std::vector<segment_v2::InvertedIndexCacheHandle> _searcher_cache_handles;
128
};
129
130
class FunctionSearch : public IFunction {
131
public:
132
    static constexpr auto name = "search";
133
134
10
    static FunctionPtr create() { return std::make_shared<FunctionSearch>(); }
135
136
2
    String get_name() const override { return name; }
137
138
3
    bool is_variadic() const override { return true; }
139
140
1
    size_t get_number_of_arguments() const override { return 0; }
141
142
    // We manage nulls explicitly for index pushdown only.
143
5
    bool use_default_implementation_for_nulls() const override { return false; }
144
6
    bool is_use_default_implementation_for_constants() const override { return false; }
145
146
4
    bool use_default_implementation_for_constants() const override { return false; }
147
148
2
    DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const override {
149
2
        return std::make_shared<DataTypeUInt8>();
150
2
    }
151
152
    Status execute_impl(FunctionContext* /*context*/, Block& /*block*/,
153
                        const ColumnNumbers& /*arguments*/, uint32_t /*result*/,
154
                        size_t /*input_rows_count*/) const override;
155
156
1
    bool can_push_down_to_index() const override { return true; }
157
158
    Status evaluate_inverted_index(
159
            const ColumnsWithTypeAndName& arguments,
160
            const std::vector<IndexFieldNameAndTypePair>& data_type_with_names,
161
            std::vector<IndexIterator*> iterators, uint32_t num_rows,
162
            const InvertedIndexAnalyzerCtx* /*analyzer_ctx*/,
163
            InvertedIndexResultBitmap& bitmap_result) const override;
164
165
    Status evaluate_inverted_index_with_search_param(
166
            const TSearchParam& search_param,
167
            const std::unordered_map<std::string, IndexFieldNameAndTypePair>& data_type_with_names,
168
            std::unordered_map<std::string, IndexIterator*> iterators, uint32_t num_rows,
169
            InvertedIndexResultBitmap& bitmap_result, bool enable_cache = true) const;
170
171
    Status evaluate_inverted_index_with_search_param(
172
            const TSearchParam& search_param,
173
            const std::unordered_map<std::string, IndexFieldNameAndTypePair>& data_type_with_names,
174
            std::unordered_map<std::string, IndexIterator*> iterators, uint32_t num_rows,
175
            InvertedIndexResultBitmap& bitmap_result, bool enable_cache,
176
            const IndexExecContext* index_exec_ctx,
177
            const std::unordered_map<std::string, int>& field_name_to_column_id) const;
178
179
    Status evaluate_nested_query(
180
            const TSearchParam& search_param, const TSearchClause& nested_clause,
181
            const std::shared_ptr<IndexQueryContext>& context, FieldReaderResolver& resolver,
182
            uint32_t num_rows, const IndexExecContext* index_exec_ctx,
183
            const std::unordered_map<std::string, int>& field_name_to_column_id,
184
            std::shared_ptr<roaring::Roaring>& result_bitmap) const;
185
186
    // Public methods for testing
187
    enum class ClauseTypeCategory {
188
        NON_TOKENIZED, // TERM, PREFIX, WILDCARD, REGEXP, RANGE, LIST - no tokenization, use EQUAL_QUERY
189
        TOKENIZED,     // PHRASE, MATCH, ANY, ALL - need tokenization, use MATCH_ANY_QUERY
190
        COMPOUND       // AND, OR, NOT - boolean operations
191
    };
192
193
    ClauseTypeCategory get_clause_type_category(const std::string& clause_type) const;
194
195
    // Analyze query type for a specific field in the search clause
196
    InvertedIndexQueryType analyze_field_query_type(const std::string& field_name,
197
                                                    const TSearchClause& clause) const;
198
199
    // Map clause_type string to InvertedIndexQueryType
200
    InvertedIndexQueryType clause_type_to_query_type(const std::string& clause_type) const;
201
202
    Status build_query_recursive(const TSearchClause& clause,
203
                                 const std::shared_ptr<IndexQueryContext>& context,
204
                                 FieldReaderResolver& resolver,
205
                                 inverted_index::query_v2::QueryPtr* out, std::string* binding_key,
206
                                 const std::string& default_operator,
207
                                 int32_t minimum_should_match) const;
208
209
    Status build_leaf_query(const TSearchClause& clause,
210
                            const std::shared_ptr<IndexQueryContext>& context,
211
                            FieldReaderResolver& resolver, inverted_index::query_v2::QueryPtr* out,
212
                            std::string* binding_key, const std::string& default_operator,
213
                            int32_t minimum_should_match) const;
214
};
215
216
} // namespace doris