Coverage Report

Created: 2026-05-28 00:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/variant_inverted_index_search.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exprs/function/variant_inverted_index_search.h"
19
20
#include <CLucene/config/repl_wchar.h>
21
#include <fmt/format.h>
22
#include <glog/logging.h>
23
24
#include <limits>
25
#include <memory>
26
#include <utility>
27
28
#include "common/config.h"
29
#include "common/exception.h"
30
#include "common/logging.h"
31
#include "exprs/function/function_search.h"
32
#include "exprs/vexpr_context.h"
33
#include "runtime/runtime_state.h"
34
#include "storage/index/index_file_reader.h"
35
#include "storage/index/inverted/analyzer/analyzer.h"
36
#include "storage/index/inverted/inverted_index_compound_reader.h"
37
#include "storage/index/inverted/inverted_index_parser.h"
38
#include "storage/index/inverted/inverted_index_searcher.h"
39
#include "storage/index/inverted/query_v2/bit_set_query/bit_set_scorer.h"
40
#include "storage/index/inverted/query_v2/doc_set.h"
41
#include "storage/index/inverted/query_v2/scorer.h"
42
#include "storage/index/inverted/query_v2/term_query/term_query.h"
43
#include "storage/index/inverted/query_v2/weight.h"
44
#include "storage/index/inverted/util/string_helper.h"
45
#include "storage/segment/segment.h"
46
#include "storage/segment/variant/nested_group_path.h"
47
#include "storage/segment/variant/nested_group_provider.h"
48
#include "storage/segment/variant/variant_column_reader.h"
49
#include "storage/utils.h"
50
#include "util/debug_points.h"
51
#include "util/time.h"
52
53
namespace doris {
54
55
namespace query_v2 = segment_v2::inverted_index::query_v2;
56
57
namespace {
58
59
void add_search_binding_diagnostic(const std::shared_ptr<IndexQueryContext>& context,
60
5
                                   const std::string& diagnostic) {
61
5
    VLOG_DEBUG << diagnostic;
62
5
    if (context != nullptr && context->stats != nullptr) {
63
0
        context->stats->inverted_index_stats.add_binding_diagnostic(diagnostic);
64
0
    }
65
5
}
66
67
} // namespace
68
69
FieldReaderResolver::FieldReaderResolver(
70
        const std::unordered_map<std::string, IndexFieldNameAndTypePair>& data_type_with_names,
71
        const std::unordered_map<std::string, IndexIterator*>& iterators,
72
        std::shared_ptr<IndexQueryContext> context,
73
        const std::vector<TSearchFieldBinding>& field_bindings)
74
43
        : _data_type_with_names(data_type_with_names),
75
43
          _iterators(iterators),
76
43
          _context(std::move(context)),
77
43
          _field_bindings(field_bindings) {
78
43
    for (const auto& binding : _field_bindings) {
79
6
        if (binding.__isset.is_variant_subcolumn && binding.is_variant_subcolumn) {
80
5
            _variant_subcolumn_fields.insert(binding.field_name);
81
5
        }
82
6
        _field_binding_map[binding.field_name] = &binding;
83
6
    }
84
43
}
85
86
Status FieldReaderResolver::resolve(const std::string& field_name,
87
                                    InvertedIndexQueryType query_type,
88
33
                                    FieldReaderBinding* binding) {
89
33
    DCHECK(binding != nullptr);
90
91
33
    const bool is_variant_sub = is_variant_subcolumn(field_name);
92
93
33
    auto data_it = _data_type_with_names.find(field_name);
94
33
    if (data_it == _data_type_with_names.end()) {
95
6
        if (is_variant_sub) {
96
1
            add_search_binding_diagnostic(
97
1
                    _context,
98
1
                    fmt::format("[VariantSearchBinding] phase=field_resolve result=no_metadata "
99
1
                                "logical_field={} query_type={} reason=field_not_found",
100
1
                                field_name, query_type_to_string(query_type)));
101
1
            *binding = FieldReaderBinding();
102
1
            return Status::OK();
103
1
        }
104
5
        return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
105
5
                "field '{}' not found in inverted index metadata", field_name);
106
6
    }
107
108
27
    const auto& stored_field_name = data_it->second.first;
109
27
    const auto binding_key = binding_key_for(stored_field_name, query_type);
110
111
27
    auto cache_it = _cache.find(binding_key);
112
27
    if (cache_it != _cache.end()) {
113
4
        *binding = cache_it->second;
114
4
        return Status::OK();
115
4
    }
116
117
23
    auto iterator_it = _iterators.find(field_name);
118
23
    if (iterator_it == _iterators.end() || iterator_it->second == nullptr) {
119
18
        if (is_variant_sub) {
120
1
            add_search_binding_diagnostic(
121
1
                    _context,
122
1
                    fmt::format("[VariantSearchBinding] phase=field_resolve result=no_iterator "
123
1
                                "logical_field={} stored_field={} query_type={} "
124
1
                                "reason=iterator_not_found",
125
1
                                field_name, stored_field_name, query_type_to_string(query_type)));
126
1
            *binding = FieldReaderBinding();
127
1
            return Status::OK();
128
1
        }
129
17
        return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
130
17
                "iterator not found for field '{}'", field_name);
131
18
    }
132
133
5
    auto* inverted_iterator = dynamic_cast<InvertedIndexIterator*>(iterator_it->second);
134
5
    if (inverted_iterator == nullptr) {
135
2
        return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
136
2
                "iterator for field '{}' is not InvertedIndexIterator", field_name);
137
2
    }
138
139
3
    InvertedIndexQueryType effective_query_type = query_type;
140
3
    const auto& column_type = data_it->second.second;
141
3
    const bool is_text_field =
142
3
            column_type != nullptr && is_string_type(column_type->get_storage_field_type());
143
3
    auto fb_it = _field_binding_map.find(field_name);
144
3
    std::string analyzer_key;
145
3
    if (is_text_field && is_variant_sub && fb_it != _field_binding_map.end() &&
146
3
        fb_it->second->__isset.index_properties && !fb_it->second->index_properties.empty()) {
147
1
        analyzer_key = normalize_analyzer_key(
148
1
                build_analyzer_key_from_properties(fb_it->second->index_properties));
149
1
        if (inverted_index::InvertedIndexAnalyzer::should_analyzer(
150
1
                    fb_it->second->index_properties) &&
151
1
            (effective_query_type == InvertedIndexQueryType::EQUAL_QUERY ||
152
1
             effective_query_type == InvertedIndexQueryType::WILDCARD_QUERY)) {
153
1
            effective_query_type = InvertedIndexQueryType::MATCH_ANY_QUERY;
154
1
        }
155
1
    }
156
157
3
    Result<InvertedIndexReaderPtr> reader_result;
158
3
    if (column_type) {
159
3
        reader_result = inverted_iterator->select_best_reader(column_type, effective_query_type,
160
3
                                                              is_text_field ? analyzer_key : "");
161
3
    } else {
162
0
        reader_result = inverted_iterator->select_best_reader(is_text_field ? analyzer_key : "");
163
0
    }
164
165
3
    if (!reader_result.has_value()) {
166
1
        if (is_variant_sub) {
167
1
            add_search_binding_diagnostic(
168
1
                    _context,
169
1
                    fmt::format("[VariantSearchBinding] phase=field_resolve result=reject "
170
1
                                "logical_field={} stored_field={} query_type={} "
171
1
                                "effective_query_type={} analyzer_key={} reason={}",
172
1
                                field_name, stored_field_name, query_type_to_string(query_type),
173
1
                                query_type_to_string(effective_query_type), analyzer_key,
174
1
                                reader_result.error().to_string()));
175
1
        }
176
1
        return reader_result.error();
177
1
    }
178
179
2
    auto inverted_reader = reader_result.value();
180
2
    if (inverted_reader == nullptr) {
181
0
        if (is_variant_sub) {
182
0
            add_search_binding_diagnostic(
183
0
                    _context,
184
0
                    fmt::format("[VariantSearchBinding] phase=field_resolve result=reject "
185
0
                                "logical_field={} stored_field={} query_type={} "
186
0
                                "effective_query_type={} reason=selected_reader_null",
187
0
                                field_name, stored_field_name, query_type_to_string(query_type),
188
0
                                query_type_to_string(effective_query_type)));
189
0
        }
190
0
        return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
191
0
                "selected reader is null for field '{}'", field_name);
192
0
    }
193
194
2
    FieldReaderBinding resolved;
195
2
    resolved.logical_field_name = field_name;
196
2
    resolved.stored_field_name = stored_field_name;
197
2
    resolved.stored_field_wstr = StringHelper::to_wstring(resolved.stored_field_name);
198
2
    resolved.column_type = column_type;
199
2
    resolved.query_type = effective_query_type;
200
2
    resolved.inverted_reader = inverted_reader;
201
2
    resolved.binding_key = binding_key;
202
2
    resolved.state = SearchFieldBindingState::BOUND;
203
2
    if (fb_it != _field_binding_map.end() && fb_it->second->__isset.index_properties &&
204
2
        !fb_it->second->index_properties.empty()) {
205
1
        resolved.index_properties = fb_it->second->index_properties;
206
1
    } else {
207
1
        resolved.index_properties = inverted_reader->get_index_properties();
208
1
    }
209
2
    resolved.analyzer_key =
210
2
            normalize_analyzer_key(build_analyzer_key_from_properties(resolved.index_properties));
211
212
2
    auto index_file_reader = inverted_reader->get_index_file_reader();
213
2
    if (index_file_reader == nullptr) {
214
1
        if (is_variant_sub) {
215
1
            add_search_binding_diagnostic(
216
1
                    _context,
217
1
                    fmt::format("[VariantSearchBinding] phase=field_resolve result=reject "
218
1
                                "logical_field={} stored_field={} index_id={} suffix={} "
219
1
                                "reason=index_file_reader_null",
220
1
                                field_name, stored_field_name, inverted_reader->get_index_id(),
221
1
                                inverted_reader->get_index_meta().get_index_suffix()));
222
1
        }
223
1
        return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
224
1
                "index file reader is null for field '{}'", field_name);
225
1
    }
226
227
1
    if (inverted_reader->type() == InvertedIndexReaderType::BKD) {
228
1
        _cache.emplace(binding_key, resolved);
229
1
        if (is_variant_sub) {
230
1
            bool index_file_exists = false;
231
1
            auto probe_status = index_file_reader->index_file_exist(
232
1
                    &inverted_reader->get_index_meta(), &index_file_exists);
233
1
            add_search_binding_diagnostic(
234
1
                    _context,
235
1
                    fmt::format("[VariantSearchBinding] phase=field_resolve result=selected_direct "
236
1
                                "logical_field={} stored_field={} query_type={} "
237
1
                                "effective_query_type={} index_id={} suffix={} reader_type={} "
238
1
                                "index_file_exists={} probe_status={} index_file={}",
239
1
                                field_name, stored_field_name, query_type_to_string(query_type),
240
1
                                query_type_to_string(effective_query_type),
241
1
                                inverted_reader->get_index_id(),
242
1
                                inverted_reader->get_index_meta().get_index_suffix(),
243
1
                                reader_type_to_string(inverted_reader->type()), index_file_exists,
244
1
                                probe_status.ok() ? "OK" : probe_status.to_string(),
245
1
                                index_file_reader->get_index_file_path(
246
1
                                        &inverted_reader->get_index_meta())));
247
1
        }
248
1
        *binding = resolved;
249
1
        return Status::OK();
250
1
    }
251
252
0
    auto index_file_key =
253
0
            index_file_reader->get_index_file_cache_key(&inverted_reader->get_index_meta());
254
0
    InvertedIndexSearcherCache::CacheKey searcher_cache_key(index_file_key);
255
0
    InvertedIndexCacheHandle searcher_cache_handle;
256
257
0
    bool searcher_cache_enabled =
258
0
            _context->runtime_state != nullptr &&
259
0
            _context->runtime_state->query_options().enable_inverted_index_searcher_cache;
260
261
0
    bool cache_hit = false;
262
0
    if (searcher_cache_enabled) {
263
0
        int64_t lookup_dummy = 0;
264
0
        SCOPED_RAW_TIMER(_context->stats ? &_context->stats->inverted_index_lookup_timer
265
0
                                         : &lookup_dummy);
266
0
        cache_hit = InvertedIndexSearcherCache::instance()->lookup(searcher_cache_key,
267
0
                                                                   &searcher_cache_handle);
268
0
    }
269
270
0
    std::shared_ptr<lucene::index::IndexReader> reader_holder;
271
0
    if (cache_hit) {
272
0
        if (_context->stats) {
273
0
            _context->stats->inverted_index_searcher_cache_hit++;
274
0
        }
275
0
        auto searcher_variant = searcher_cache_handle.get_index_searcher();
276
0
        auto* searcher_ptr = std::get_if<FulltextIndexSearcherPtr>(&searcher_variant);
277
0
        if (searcher_ptr != nullptr && *searcher_ptr != nullptr) {
278
0
            reader_holder = std::shared_ptr<lucene::index::IndexReader>(
279
0
                    (*searcher_ptr)->getReader(), [](lucene::index::IndexReader*) {});
280
0
        }
281
0
    }
282
283
0
    if (!reader_holder) {
284
0
        if (_context->stats) {
285
0
            _context->stats->inverted_index_searcher_cache_miss++;
286
0
        }
287
0
        int64_t dummy_timer = 0;
288
0
        SCOPED_RAW_TIMER(_context->stats ? &_context->stats->inverted_index_searcher_open_timer
289
0
                                         : &dummy_timer);
290
0
        RETURN_IF_ERROR(
291
0
                index_file_reader->init(config::inverted_index_read_buffer_size, _context->io_ctx));
292
0
        auto directory = DORIS_TRY(
293
0
                index_file_reader->open(&inverted_reader->get_index_meta(), _context->io_ctx));
294
295
0
        auto index_searcher_builder = DORIS_TRY(
296
0
                IndexSearcherBuilder::create_index_searcher_builder(inverted_reader->type()));
297
0
        auto searcher_result =
298
0
                DORIS_TRY(index_searcher_builder->get_index_searcher(directory.get()));
299
0
        auto reader_size = index_searcher_builder->get_reader_size();
300
301
0
        auto* stream = static_cast<DorisCompoundReader*>(directory.get())->getDorisIndexInput();
302
0
        DBUG_EXECUTE_IF(
303
0
                "FieldReaderResolver.resolve.io_ctx", ({
304
0
                    const auto* cur_io_ctx = (const io::IOContext*)stream->getIoContext();
305
0
                    if (cur_io_ctx->file_cache_stats) {
306
0
                        if (cur_io_ctx->file_cache_stats != &_context->stats->file_cache_stats) {
307
0
                            LOG(FATAL) << "search: io_ctx file_cache_stats mismatch: "
308
0
                                       << cur_io_ctx->file_cache_stats << " vs "
309
0
                                       << &_context->stats->file_cache_stats;
310
0
                        }
311
0
                    }
312
0
                }));
313
0
        stream->setIoContext(nullptr);
314
0
        stream->setIndexFile(false);
315
316
0
        auto* cache_value = new InvertedIndexSearcherCache::CacheValue(std::move(searcher_result),
317
0
                                                                       reader_size, UnixMillis());
318
0
        InvertedIndexSearcherCache::instance()->insert(searcher_cache_key, cache_value,
319
0
                                                       &searcher_cache_handle);
320
321
0
        auto new_variant = searcher_cache_handle.get_index_searcher();
322
0
        auto* new_ptr = std::get_if<FulltextIndexSearcherPtr>(&new_variant);
323
0
        if (new_ptr != nullptr && *new_ptr != nullptr) {
324
0
            reader_holder = std::shared_ptr<lucene::index::IndexReader>(
325
0
                    (*new_ptr)->getReader(), [](lucene::index::IndexReader*) {});
326
0
        }
327
328
0
        if (!reader_holder) {
329
0
            return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
330
0
                    "failed to build IndexSearcher for field '{}'", field_name);
331
0
        }
332
0
    }
333
334
0
    _searcher_cache_handles.push_back(std::move(searcher_cache_handle));
335
336
0
    resolved.lucene_reader = reader_holder;
337
0
    _binding_readers[binding_key] = reader_holder;
338
0
    _field_readers[resolved.stored_field_wstr] = reader_holder;
339
0
    _readers.emplace_back(reader_holder);
340
0
    _cache.emplace(binding_key, resolved);
341
0
    if (is_variant_sub) {
342
0
        bool index_file_exists = false;
343
0
        auto probe_status = index_file_reader->index_file_exist(&inverted_reader->get_index_meta(),
344
0
                                                                &index_file_exists);
345
0
        add_search_binding_diagnostic(
346
0
                _context,
347
0
                fmt::format(
348
0
                        "[VariantSearchBinding] phase=field_resolve result=selected "
349
0
                        "logical_field={} stored_field={} query_type={} effective_query_type={} "
350
0
                        "index_id={} suffix={} reader_type={} analyzer_key={} "
351
0
                        "field_pattern={} index_file_exists={} probe_status={} "
352
0
                        "searcher_cache={} index_file={}",
353
0
                        field_name, stored_field_name, query_type_to_string(query_type),
354
0
                        query_type_to_string(effective_query_type), inverted_reader->get_index_id(),
355
0
                        inverted_reader->get_index_meta().get_index_suffix(),
356
0
                        reader_type_to_string(inverted_reader->type()), resolved.analyzer_key,
357
0
                        inverted_reader->get_index_meta().field_pattern(), index_file_exists,
358
0
                        probe_status.ok() ? "OK" : probe_status.to_string(),
359
0
                        cache_hit ? "hit" : "miss",
360
0
                        index_file_reader->get_index_file_path(
361
0
                                &inverted_reader->get_index_meta())));
362
0
    }
363
0
    *binding = resolved;
364
0
    return Status::OK();
365
0
}
366
367
segment_v2::IndexIterator* VariantSearchNullBitmapAdapter::iterator_for(
368
0
        const query_v2::Scorer& /*scorer*/, const std::string& logical_field) const {
369
0
    if (logical_field.empty()) {
370
0
        return nullptr;
371
0
    }
372
0
    return _resolver.get_iterator(logical_field);
373
0
}
374
375
void populate_variant_search_binding_context(const FieldReaderResolver& resolver,
376
1
                                             query_v2::QueryExecutionContext* exec_ctx) {
377
1
    DCHECK(exec_ctx != nullptr);
378
1
    exec_ctx->readers = resolver.readers();
379
1
    exec_ctx->reader_bindings = resolver.reader_bindings();
380
1
    exec_ctx->field_reader_bindings = resolver.field_readers();
381
1
    for (const auto& [binding_key, binding] : resolver.binding_cache()) {
382
0
        if (binding_key.empty()) {
383
0
            continue;
384
0
        }
385
0
        query_v2::FieldBindingContext binding_ctx;
386
0
        binding_ctx.logical_field_name = binding.logical_field_name;
387
0
        binding_ctx.stored_field_name = binding.stored_field_name;
388
0
        binding_ctx.stored_field_wstr = binding.stored_field_wstr;
389
0
        exec_ctx->binding_fields.emplace(binding_key, std::move(binding_ctx));
390
0
    }
391
1
}
392
393
query_v2::QueryExecutionContext build_variant_search_query_execution_context(
394
        uint32_t segment_num_rows, const FieldReaderResolver& resolver,
395
1
        query_v2::NullBitmapResolver* null_resolver) {
396
1
    query_v2::QueryExecutionContext exec_ctx;
397
1
    exec_ctx.segment_num_rows = segment_num_rows;
398
1
    populate_variant_search_binding_context(resolver, &exec_ctx);
399
1
    exec_ctx.null_resolver = null_resolver;
400
1
    return exec_ctx;
401
1
}
402
403
namespace {
404
405
class VariantNestedDocMappingWeight final : public query_v2::Weight {
406
public:
407
    VariantNestedDocMappingWeight(
408
            query_v2::WeightPtr child_weight,
409
            std::vector<const segment_v2::NestedGroupReader*> child_to_parent_chain,
410
            const segment_v2::NestedGroupReadProvider* read_provider,
411
            segment_v2::ColumnIteratorOptions column_iter_opts)
412
7
            : _child_weight(std::move(child_weight)),
413
7
              _child_to_parent_chain(std::move(child_to_parent_chain)),
414
7
              _read_provider(read_provider),
415
7
              _column_iter_opts(std::move(column_iter_opts)) {}
416
417
    query_v2::ScorerPtr scorer(const query_v2::QueryExecutionContext& context,
418
7
                               const std::string& binding_key) override {
419
7
        if (_child_weight == nullptr || _read_provider == nullptr ||
420
7
            _child_to_parent_chain.empty()) {
421
1
            return std::make_shared<query_v2::EmptyScorer>();
422
1
        }
423
424
6
        auto child_scorer = _child_weight->scorer(context, binding_key);
425
6
        if (child_scorer == nullptr) {
426
1
            return std::make_shared<query_v2::EmptyScorer>();
427
1
        }
428
429
5
        roaring::Roaring child_true;
430
5
        uint32_t doc = child_scorer->doc();
431
9
        while (doc != query_v2::TERMINATED) {
432
4
            child_true.add(doc);
433
4
            doc = child_scorer->advance();
434
4
        }
435
436
5
        auto mapped_true = std::make_shared<roaring::Roaring>();
437
5
        if (!child_true.isEmpty()) {
438
3
            auto status = _read_provider->map_elements_to_parent_ords(
439
3
                    _child_to_parent_chain, _column_iter_opts, child_true, mapped_true.get());
440
3
            if (!status.ok()) {
441
1
                throw Exception(ErrorCode::INTERNAL_ERROR,
442
1
                                "failed to map nested search true bitmap: {}", status.to_string());
443
1
            }
444
3
        }
445
446
4
        std::shared_ptr<roaring::Roaring> mapped_null;
447
4
        if (child_scorer->has_null_bitmap(context.null_resolver)) {
448
3
            const auto* child_null = child_scorer->get_null_bitmap(context.null_resolver);
449
3
            if (child_null != nullptr && !child_null->isEmpty()) {
450
3
                mapped_null = std::make_shared<roaring::Roaring>();
451
3
                auto status = _read_provider->map_elements_to_parent_ords(
452
3
                        _child_to_parent_chain, _column_iter_opts, *child_null, mapped_null.get());
453
3
                if (!status.ok()) {
454
1
                    throw Exception(ErrorCode::INTERNAL_ERROR,
455
1
                                    "failed to map nested search null bitmap: {}",
456
1
                                    status.to_string());
457
1
                }
458
2
                *mapped_null -= *mapped_true;
459
2
                if (mapped_null->isEmpty()) {
460
1
                    mapped_null.reset();
461
1
                }
462
2
            }
463
3
        }
464
465
3
        if (mapped_true->isEmpty() && (mapped_null == nullptr || mapped_null->isEmpty())) {
466
1
            return std::make_shared<query_v2::EmptyScorer>();
467
1
        }
468
2
        return std::make_shared<query_v2::BitSetScorer>(std::move(mapped_true),
469
2
                                                        std::move(mapped_null));
470
3
    }
471
472
private:
473
    query_v2::WeightPtr _child_weight;
474
    std::vector<const segment_v2::NestedGroupReader*> _child_to_parent_chain;
475
    const segment_v2::NestedGroupReadProvider* _read_provider;
476
    segment_v2::ColumnIteratorOptions _column_iter_opts;
477
};
478
479
class VariantNestedDocMappingQuery final : public query_v2::Query {
480
public:
481
    VariantNestedDocMappingQuery(
482
            query_v2::QueryPtr child_query,
483
            std::vector<const segment_v2::NestedGroupReader*> child_to_parent_chain,
484
            const segment_v2::NestedGroupReadProvider* read_provider,
485
            segment_v2::ColumnIteratorOptions column_iter_opts)
486
7
            : _child_query(std::move(child_query)),
487
7
              _child_to_parent_chain(std::move(child_to_parent_chain)),
488
7
              _read_provider(read_provider),
489
7
              _column_iter_opts(std::move(column_iter_opts)) {}
490
491
7
    query_v2::WeightPtr weight(bool enable_scoring) override {
492
7
        if (_child_query == nullptr) {
493
0
            return nullptr;
494
0
        }
495
7
        return std::make_shared<VariantNestedDocMappingWeight>(_child_query->weight(enable_scoring),
496
7
                                                               _child_to_parent_chain,
497
7
                                                               _read_provider, _column_iter_opts);
498
7
    }
499
500
private:
501
    query_v2::QueryPtr _child_query;
502
    std::vector<const segment_v2::NestedGroupReader*> _child_to_parent_chain;
503
    const segment_v2::NestedGroupReadProvider* _read_provider;
504
    segment_v2::ColumnIteratorOptions _column_iter_opts;
505
};
506
507
2
bool starts_with_root_field(const std::string& logical_field_name, const std::string& root_field) {
508
2
    if (logical_field_name == root_field) {
509
1
        return true;
510
1
    }
511
1
    return logical_field_name.size() > root_field.size() &&
512
1
           logical_field_name.compare(0, root_field.size(), root_field) == 0 &&
513
1
           logical_field_name[root_field.size()] == '.';
514
2
}
515
516
} // namespace
517
518
query_v2::QueryPtr make_variant_nested_doc_mapping_query(
519
        query_v2::QueryPtr child_query,
520
        std::vector<const segment_v2::NestedGroupReader*> child_to_parent_chain,
521
        const segment_v2::NestedGroupReadProvider* read_provider,
522
8
        segment_v2::ColumnIteratorOptions column_iter_opts) {
523
8
    if (child_to_parent_chain.empty()) {
524
1
        return child_query;
525
1
    }
526
7
    return std::make_shared<VariantNestedDocMappingQuery>(
527
7
            std::move(child_query), std::move(child_to_parent_chain), read_provider,
528
7
            std::move(column_iter_opts));
529
8
}
530
531
Status map_variant_nested_leaf_query_to_active_group(const VariantNestedDocMapperContext& context,
532
                                                     const std::string& logical_field_name,
533
3
                                                     query_v2::QueryPtr* query) {
534
3
    if (query == nullptr || *query == nullptr || context.variant_reader == nullptr ||
535
3
        context.read_provider == nullptr || context.active_group_chain.empty() ||
536
3
        context.root_field.empty()) {
537
1
        return Status::OK();
538
1
    }
539
2
    if (!starts_with_root_field(logical_field_name, context.root_field)) {
540
1
        return Status::OK();
541
1
    }
542
543
1
    std::string relative_path;
544
1
    if (logical_field_name.size() > context.root_field.size()) {
545
0
        relative_path = logical_field_name.substr(context.root_field.size() + 1);
546
0
    }
547
1
    if (relative_path.empty()) {
548
1
        return Status::OK();
549
1
    }
550
551
0
    auto [found, leaf_group_chain, _] =
552
0
            context.variant_reader->collect_nested_group_chain(relative_path);
553
0
    if (!found) {
554
0
        return Status::OK();
555
0
    }
556
0
    if (leaf_group_chain.size() < context.active_group_chain.size()) {
557
0
        return Status::InvalidArgument(
558
0
                "nested search leaf field '{}' is outside active nested path", logical_field_name);
559
0
    }
560
0
    for (size_t i = 0; i < context.active_group_chain.size(); ++i) {
561
0
        if (leaf_group_chain[i] != context.active_group_chain[i]) {
562
0
            return Status::InvalidArgument(
563
0
                    "nested search leaf field '{}' is outside active nested path",
564
0
                    logical_field_name);
565
0
        }
566
0
    }
567
0
    if (leaf_group_chain.size() == context.active_group_chain.size()) {
568
0
        return Status::OK();
569
0
    }
570
571
0
    std::vector<const segment_v2::NestedGroupReader*> child_to_parent_chain(
572
0
            leaf_group_chain.begin() + context.active_group_chain.size(), leaf_group_chain.end());
573
0
    *query = make_variant_nested_doc_mapping_query(std::move(*query),
574
0
                                                   std::move(child_to_parent_chain),
575
0
                                                   context.read_provider, context.column_iter_opts);
576
0
    return Status::OK();
577
0
}
578
579
Status VariantNestedSearchEvaluator::evaluate(
580
        const TSearchParam& search_param, const TSearchClause& nested_clause,
581
        const std::shared_ptr<segment_v2::IndexQueryContext>& context,
582
        FieldReaderResolver& resolver, uint32_t num_rows, const IndexExecContext* index_exec_ctx,
583
        const std::unordered_map<std::string, int>& field_name_to_column_id,
584
7
        std::shared_ptr<roaring::Roaring>& result_bitmap) const {
585
7
    (void)num_rows;
586
7
    (void)field_name_to_column_id;
587
7
    if (!(nested_clause.__isset.nested_path)) {
588
2
        return Status::InvalidArgument("NESTED clause missing nested_path");
589
2
    }
590
5
    if (!(nested_clause.__isset.children) || nested_clause.children.empty()) {
591
2
        return Status::InvalidArgument("NESTED clause missing inner query");
592
2
    }
593
3
    if (result_bitmap == nullptr) {
594
2
        result_bitmap = std::make_shared<roaring::Roaring>();
595
2
    } else {
596
1
        *result_bitmap = roaring::Roaring();
597
1
    }
598
599
3
    std::string root_field = nested_clause.nested_path;
600
3
    auto dot_pos = nested_clause.nested_path.find('.');
601
3
    if (dot_pos != std::string::npos) {
602
1
        root_field = nested_clause.nested_path.substr(0, dot_pos);
603
1
    }
604
3
    if (index_exec_ctx == nullptr || index_exec_ctx->segment() == nullptr) {
605
3
        return Status::InvalidArgument("NESTED query requires IndexExecContext with valid segment");
606
3
    }
607
0
    auto* segment = index_exec_ctx->segment();
608
0
    const int32_t ordinal = segment->tablet_schema()->field_index(root_field);
609
0
    if (ordinal < 0) {
610
0
        return Status::InvalidArgument("Column '{}' not found in tablet schema for nested query",
611
0
                                       root_field);
612
0
    }
613
0
    const ColumnId column_id = static_cast<ColumnId>(ordinal);
614
615
0
    std::shared_ptr<segment_v2::ColumnReader> column_reader;
616
0
    RETURN_IF_ERROR(segment->get_column_reader(segment->tablet_schema()->column(column_id),
617
0
                                               &column_reader,
618
0
                                               index_exec_ctx->column_iter_opts().stats));
619
0
    auto* variant_reader = dynamic_cast<segment_v2::VariantColumnReader*>(column_reader.get());
620
0
    if (variant_reader == nullptr) {
621
0
        return Status::InvalidArgument("Column '{}' is not VARIANT for nested query", root_field);
622
0
    }
623
624
0
    std::string array_path;
625
0
    if (dot_pos == std::string::npos) {
626
0
        array_path = std::string(segment_v2::kRootNestedGroupPath);
627
0
    } else {
628
0
        array_path = nested_clause.nested_path.substr(dot_pos + 1);
629
0
    }
630
631
0
    auto [found, group_chain, _] = variant_reader->collect_nested_group_chain(array_path);
632
0
    if (!found || group_chain.empty()) {
633
0
        return Status::OK();
634
0
    }
635
636
0
    auto read_provider = segment_v2::create_nested_group_read_provider();
637
0
    if (!read_provider || !read_provider->should_enable_nested_group_read_path()) {
638
0
        return Status::NotSupported(
639
0
                "NestedGroup search is an enterprise capability, not available in this build");
640
0
    }
641
642
0
    auto& leaf_group = group_chain.back();
643
0
    uint64_t total_elements = 0;
644
0
    RETURN_IF_ERROR(read_provider->get_total_elements(index_exec_ctx->column_iter_opts(),
645
0
                                                      leaf_group, &total_elements));
646
0
    if (total_elements == 0) {
647
0
        return Status::OK();
648
0
    }
649
0
    if (total_elements > std::numeric_limits<uint32_t>::max()) {
650
0
        return Status::InvalidArgument("nested element_count exceeds uint32_t max");
651
0
    }
652
653
0
    std::string default_operator = "or";
654
0
    if (search_param.__isset.default_operator && !search_param.default_operator.empty()) {
655
0
        default_operator = search_param.default_operator;
656
0
    }
657
0
    int32_t minimum_should_match = -1;
658
0
    if (search_param.__isset.minimum_should_match) {
659
0
        minimum_should_match = search_param.minimum_should_match;
660
0
    }
661
662
0
    query_v2::QueryPtr inner_query;
663
0
    std::string inner_binding_key;
664
0
    VariantNestedDocMapperContext mapper_context;
665
0
    mapper_context.root_field = root_field;
666
0
    mapper_context.active_group_chain = group_chain;
667
0
    mapper_context.variant_reader = variant_reader;
668
0
    mapper_context.read_provider = read_provider.get();
669
0
    mapper_context.column_iter_opts = index_exec_ctx->column_iter_opts();
670
0
    resolver.set_leaf_query_mapper(
671
0
            [mapper_context](const std::string& logical_field_name, query_v2::QueryPtr* query) {
672
0
                return map_variant_nested_leaf_query_to_active_group(mapper_context,
673
0
                                                                     logical_field_name, query);
674
0
            });
675
0
    struct ScopedLeafMapperReset {
676
0
        FieldReaderResolver& resolver;
677
0
        ~ScopedLeafMapperReset() { resolver.set_leaf_query_mapper(nullptr); }
678
0
    } mapper_reset {resolver};
679
0
    RETURN_IF_ERROR(_function_search.build_query_recursive(
680
0
            nested_clause.children[0], context, resolver, &inner_query, &inner_binding_key,
681
0
            default_operator, minimum_should_match, static_cast<uint32_t>(total_elements)));
682
0
    if (inner_query == nullptr) {
683
0
        return Status::OK();
684
0
    }
685
686
0
    VariantSearchNullBitmapAdapter null_resolver(resolver);
687
0
    query_v2::QueryExecutionContext exec_ctx = build_variant_search_query_execution_context(
688
0
            static_cast<uint32_t>(total_elements), resolver, &null_resolver);
689
690
0
    auto weight = inner_query->weight(false);
691
0
    if (!weight) {
692
0
        return Status::OK();
693
0
    }
694
0
    auto scorer = weight->scorer(exec_ctx, inner_binding_key);
695
0
    if (!scorer) {
696
0
        return Status::OK();
697
0
    }
698
699
0
    roaring::Roaring element_bitmap;
700
0
    uint32_t doc = scorer->doc();
701
0
    while (doc != query_v2::TERMINATED) {
702
0
        element_bitmap.add(doc);
703
0
        doc = scorer->advance();
704
0
    }
705
706
0
    if (scorer->has_null_bitmap(exec_ctx.null_resolver)) {
707
0
        const auto* bitmap = scorer->get_null_bitmap(exec_ctx.null_resolver);
708
0
        if (bitmap != nullptr && !bitmap->isEmpty()) {
709
0
            element_bitmap -= *bitmap;
710
0
        }
711
0
    }
712
713
0
    roaring::Roaring parent_bitmap;
714
0
    RETURN_IF_ERROR(read_provider->map_elements_to_parent_ords(
715
0
            group_chain, index_exec_ctx->column_iter_opts(), element_bitmap, &parent_bitmap));
716
0
    *result_bitmap = std::move(parent_bitmap);
717
0
    return Status::OK();
718
0
}
719
720
} // namespace doris