Coverage Report

Created: 2026-05-27 20:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/variant_inverted_index_search.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <CLucene.h>
21
#include <gen_cpp/Exprs_types.h>
22
23
#include <cstdint>
24
#include <functional>
25
#include <map>
26
#include <memory>
27
#include <roaring/roaring.hh>
28
#include <string>
29
#include <unordered_map>
30
#include <unordered_set>
31
#include <utility>
32
#include <vector>
33
34
#include "common/status.h"
35
#include "core/block/columns_with_type_and_name.h"
36
#include "core/data_type/data_type.h"
37
#include "storage/index/index_query_context.h"
38
#include "storage/index/inverted/inverted_index_cache.h"
39
#include "storage/index/inverted/inverted_index_iterator.h"
40
#include "storage/index/inverted/inverted_index_reader.h"
41
#include "storage/index/inverted/query_v2/query.h"
42
#include "storage/index/inverted/query_v2/weight.h"
43
#include "storage/olap_common.h"
44
#include "storage/segment/column_reader.h"
45
46
namespace doris::segment_v2::inverted_index::query_v2 {
47
class Query;
48
}
49
50
namespace doris::segment_v2 {
51
class NestedGroupReadProvider;
52
struct NestedGroupReader;
53
class VariantColumnReader;
54
} // namespace doris::segment_v2
55
56
namespace doris {
57
58
using namespace doris::segment_v2;
59
60
class FunctionSearch;
61
class IndexExecContext;
62
63
using SearchLeafQueryMapper = std::function<Status(
64
        const std::string&, std::shared_ptr<segment_v2::inverted_index::query_v2::Query>*)>;
65
66
enum class SearchFieldBindingState {
67
    BOUND,
68
    MISSING_IN_SEGMENT,
69
};
70
71
struct FieldReaderBinding {
72
    std::string logical_field_name;
73
    std::string stored_field_name;
74
    std::wstring stored_field_wstr;
75
    DataTypePtr column_type;
76
    InvertedIndexQueryType query_type;
77
    InvertedIndexReaderPtr inverted_reader;
78
    std::shared_ptr<lucene::index::IndexReader> lucene_reader;
79
    std::map<std::string, std::string> index_properties;
80
    std::string binding_key;
81
    std::string analyzer_key;
82
    SearchFieldBindingState state = SearchFieldBindingState::MISSING_IN_SEGMENT;
83
84
12
    bool is_bound() const {
85
12
        return state == SearchFieldBindingState::BOUND || inverted_reader != nullptr ||
86
12
               lucene_reader != nullptr;
87
12
    }
88
6
    bool use_direct_index_reader() const {
89
6
        return is_bound() && inverted_reader != nullptr && lucene_reader == nullptr;
90
6
    }
91
};
92
93
class FieldReaderResolver {
94
public:
95
    FieldReaderResolver(
96
            const std::unordered_map<std::string, IndexFieldNameAndTypePair>& data_type_with_names,
97
            const std::unordered_map<std::string, IndexIterator*>& iterators,
98
            std::shared_ptr<IndexQueryContext> context,
99
            const std::vector<TSearchFieldBinding>& field_bindings = {});
100
101
    Status resolve(const std::string& field_name, InvertedIndexQueryType query_type,
102
                   FieldReaderBinding* binding);
103
104
33
    bool is_variant_subcolumn(const std::string& field_name) const {
105
33
        return _variant_subcolumn_fields.count(field_name) > 0;
106
33
    }
107
108
2
    const std::vector<std::shared_ptr<lucene::index::IndexReader>>& readers() const {
109
2
        return _readers;
110
2
    }
111
112
    const std::unordered_map<std::string, std::shared_ptr<lucene::index::IndexReader>>&
113
1
    reader_bindings() const {
114
1
        return _binding_readers;
115
1
    }
116
117
    const std::unordered_map<std::wstring, std::shared_ptr<lucene::index::IndexReader>>&
118
1
    field_readers() const {
119
1
        return _field_readers;
120
1
    }
121
122
4
    const std::unordered_map<std::string, FieldReaderBinding>& binding_cache() const {
123
4
        return _cache;
124
4
    }
125
126
2
    IndexIterator* get_iterator(const std::string& field_name) const {
127
2
        auto it = _iterators.find(field_name);
128
2
        return (it != _iterators.end()) ? it->second : nullptr;
129
2
    }
130
131
2
    void set_leaf_query_mapper(SearchLeafQueryMapper mapper) {
132
2
        _leaf_query_mapper = std::move(mapper);
133
2
    }
134
135
    Status map_leaf_query(
136
            const std::string& field_name,
137
5
            std::shared_ptr<segment_v2::inverted_index::query_v2::Query>* query) const {
138
5
        if (!_leaf_query_mapper || query == nullptr || *query == nullptr) {
139
3
            return Status::OK();
140
3
        }
141
2
        return _leaf_query_mapper(field_name, query);
142
5
    }
143
144
private:
145
    std::string binding_key_for(const std::string& stored_field_name,
146
31
                                InvertedIndexQueryType query_type) const {
147
31
        return stored_field_name + "#" + std::to_string(static_cast<int>(query_type));
148
31
    }
149
150
    const std::unordered_map<std::string, IndexFieldNameAndTypePair>& _data_type_with_names;
151
    const std::unordered_map<std::string, IndexIterator*>& _iterators;
152
    std::shared_ptr<IndexQueryContext> _context;
153
    std::vector<TSearchFieldBinding> _field_bindings;
154
    std::unordered_map<std::string, const TSearchFieldBinding*> _field_binding_map;
155
    std::unordered_set<std::string> _variant_subcolumn_fields;
156
    std::unordered_map<std::string, FieldReaderBinding> _cache;
157
    std::vector<std::shared_ptr<lucene::index::IndexReader>> _readers;
158
    std::unordered_map<std::string, std::shared_ptr<lucene::index::IndexReader>> _binding_readers;
159
    std::unordered_map<std::wstring, std::shared_ptr<lucene::index::IndexReader>> _field_readers;
160
    std::vector<segment_v2::InvertedIndexCacheHandle> _searcher_cache_handles;
161
    SearchLeafQueryMapper _leaf_query_mapper;
162
};
163
164
class VariantSearchNullBitmapAdapter final : public inverted_index::query_v2::NullBitmapResolver {
165
public:
166
    explicit VariantSearchNullBitmapAdapter(const FieldReaderResolver& resolver)
167
1
            : _resolver(resolver) {}
168
169
    segment_v2::IndexIterator* iterator_for(const inverted_index::query_v2::Scorer& scorer,
170
                                            const std::string& logical_field) const override;
171
172
private:
173
    const FieldReaderResolver& _resolver;
174
};
175
176
void populate_variant_search_binding_context(
177
        const FieldReaderResolver& resolver,
178
        inverted_index::query_v2::QueryExecutionContext* exec_ctx);
179
180
inverted_index::query_v2::QueryExecutionContext build_variant_search_query_execution_context(
181
        uint32_t segment_num_rows, const FieldReaderResolver& resolver,
182
        inverted_index::query_v2::NullBitmapResolver* null_resolver);
183
184
struct VariantNestedDocMapperContext {
185
    std::string root_field;
186
    std::vector<const segment_v2::NestedGroupReader*> active_group_chain;
187
    const segment_v2::VariantColumnReader* variant_reader = nullptr;
188
    const segment_v2::NestedGroupReadProvider* read_provider = nullptr;
189
    segment_v2::ColumnIteratorOptions column_iter_opts;
190
};
191
192
Status map_variant_nested_leaf_query_to_active_group(const VariantNestedDocMapperContext& context,
193
                                                     const std::string& logical_field_name,
194
                                                     inverted_index::query_v2::QueryPtr* query);
195
196
inverted_index::query_v2::QueryPtr make_variant_nested_doc_mapping_query(
197
        inverted_index::query_v2::QueryPtr child_query,
198
        std::vector<const segment_v2::NestedGroupReader*> child_to_parent_chain,
199
        const segment_v2::NestedGroupReadProvider* read_provider,
200
        segment_v2::ColumnIteratorOptions column_iter_opts);
201
202
class VariantNestedSearchEvaluator {
203
public:
204
    explicit VariantNestedSearchEvaluator(const FunctionSearch& function_search)
205
7
            : _function_search(function_search) {}
206
207
    Status evaluate(const TSearchParam& search_param, const TSearchClause& nested_clause,
208
                    const std::shared_ptr<segment_v2::IndexQueryContext>& context,
209
                    FieldReaderResolver& resolver, uint32_t num_rows,
210
                    const IndexExecContext* index_exec_ctx,
211
                    const std::unordered_map<std::string, int>& field_name_to_column_id,
212
                    std::shared_ptr<roaring::Roaring>& result_bitmap) const;
213
214
private:
215
    const FunctionSearch& _function_search;
216
};
217
218
} // namespace doris