Coverage Report

Created: 2026-03-12 17:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_multi_match.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exprs/function/function_multi_match.h"
19
20
#include <glog/logging.h>
21
22
#include <memory>
23
#include <roaring/roaring.hh>
24
#include <string>
25
#include <vector>
26
27
#include "core/column/column.h"
28
#include "exprs/function/simple_function_factory.h"
29
#include "exprs/vslot_ref.h"
30
#include "io/fs/file_reader.h"
31
#include "storage/index/inverted/query/phrase_prefix_query.h"
32
#include "storage/segment/segment_iterator.h"
33
34
namespace doris {
35
36
Status FunctionMultiMatch::execute_impl(FunctionContext* /*context*/, Block& block,
37
                                        const ColumnNumbers& arguments, uint32_t result,
38
0
                                        size_t /*input_rows_count*/) const {
39
0
    return Status::RuntimeError("only inverted index queries are supported");
40
0
}
41
42
33
InvertedIndexQueryType get_query_type(const std::string& query_type) {
43
33
    if (query_type == "any") {
44
8
        return InvertedIndexQueryType::MATCH_ANY_QUERY;
45
25
    } else if (query_type == "all") {
46
8
        return InvertedIndexQueryType::MATCH_ALL_QUERY;
47
17
    } else if (query_type == "phrase") {
48
9
        return InvertedIndexQueryType::MATCH_PHRASE_QUERY;
49
9
    } else if (query_type == "phrase_prefix") {
50
8
        return InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY;
51
8
    } else {
52
0
        return InvertedIndexQueryType::UNKNOWN_QUERY;
53
0
    }
54
33
}
55
56
Status FunctionMultiMatch::evaluate_inverted_index(
57
        const ColumnsWithTypeAndName& arguments,
58
        const std::vector<IndexFieldNameAndTypePair>& data_type_with_names,
59
        std::vector<segment_v2::IndexIterator*> iterators, uint32_t num_rows,
60
        const InvertedIndexAnalyzerCtx* analyzer_ctx,
61
33
        segment_v2::InvertedIndexResultBitmap& bitmap_result) const {
62
33
    DCHECK(arguments.size() == 2);
63
33
    std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
64
33
    std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
65
66
    // type
67
33
    auto query_type_value = arguments[0].column->get_data_at(0);
68
33
    auto query_type = get_query_type(query_type_value.to_string());
69
33
    if (query_type == InvertedIndexQueryType::UNKNOWN_QUERY) {
70
0
        return Status::RuntimeError(
71
0
                "parameter query type incorrect for function multi_match: query_type = {}",
72
0
                query_type);
73
0
    }
74
75
    // query
76
33
    auto query_str_ref = arguments[1].column->get_data_at(0);
77
33
    auto param_type = arguments[1].type->get_primitive_type();
78
33
    if (!is_string_type(param_type)) {
79
0
        return Status::Error<ErrorCode::INDEX_INVALID_PARAMETERS>(
80
0
                "arguments for multi_match must be string");
81
0
    }
82
    // Must convert StringRef to std::string because downstream readers
83
    // (e.g. FullTextIndexReader::query) reinterpret_cast query_value as std::string*.
84
33
    std::string query_str(query_str_ref.data, query_str_ref.size);
85
86
    // search
87
33
    InvertedIndexParam param;
88
33
    param.query_value = &query_str;
89
33
    param.query_type = query_type;
90
33
    param.num_rows = num_rows;
91
151
    for (size_t i = 0; i < data_type_with_names.size(); i++) {
92
119
        auto column_name = data_type_with_names[i].first;
93
119
        auto* iter = iterators[i];
94
119
        if (iter == nullptr) {
95
1
            std::string error_msg = "Inverted index iterator is null for column '" + column_name +
96
1
                                    "' during multi_match execution";
97
1
            return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(error_msg);
98
1
        }
99
100
118
        param.column_name = column_name;
101
118
        param.column_type = data_type_with_names[i].second;
102
118
        param.roaring = std::make_shared<roaring::Roaring>();
103
118
        param.analyzer_ctx = analyzer_ctx;
104
118
        RETURN_IF_ERROR(iter->read_from_index(segment_v2::IndexParam {&param}));
105
118
        *roaring |= *param.roaring;
106
118
    }
107
32
    segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
108
32
    bitmap_result = result;
109
110
32
    return Status::OK();
111
33
}
112
113
8
void register_function_multi_match(SimpleFunctionFactory& factory) {
114
8
    factory.register_function<FunctionMultiMatch>();
115
8
}
116
117
} // namespace doris