Coverage Report

Created: 2026-02-27 02:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/vec/exprs/vsearch.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "vec/exprs/vsearch.h"
19
20
#include <memory>
21
#include <roaring/roaring.hh>
22
23
#include "common/logging.h"
24
#include "common/status.h"
25
#include "glog/logging.h"
26
#include "olap/rowset/segment_v2/inverted_index_reader.h"
27
#include "olap/rowset/segment_v2/segment.h"
28
#include "runtime/runtime_state.h"
29
#include "vec/columns/column_const.h"
30
#include "vec/exprs/vexpr_context.h"
31
#include "vec/exprs/vliteral.h"
32
#include "vec/exprs/vslot_ref.h"
33
#include "vec/functions/function_search.h"
34
35
namespace doris::vectorized {
36
using namespace segment_v2;
37
38
namespace {
39
40
struct SearchInputBundle {
41
    std::unordered_map<std::string, IndexIterator*> iterators;
42
    std::unordered_map<std::string, vectorized::IndexFieldNameAndTypePair> field_types;
43
    std::unordered_map<std::string, int> field_name_to_column_id;
44
    std::vector<int> column_ids;
45
    vectorized::ColumnsWithTypeAndName literal_args;
46
};
47
48
Status collect_search_inputs(const VSearchExpr& expr, VExprContext* context,
49
5
                             SearchInputBundle* bundle) {
50
5
    DCHECK(bundle != nullptr);
51
52
5
    auto index_context = context->get_index_context();
53
5
    if (index_context == nullptr) {
54
0
        LOG(WARNING) << "collect_search_inputs: No inverted index context available";
55
0
        return Status::InternalError("No inverted index context available");
56
0
    }
57
58
    // Get field bindings for variant subcolumn support
59
5
    const auto& search_param = expr.get_search_param();
60
5
    const auto& field_bindings = search_param.field_bindings;
61
62
5
    std::unordered_map<std::string, ColumnId> parent_to_base_column_id;
63
5
    std::unordered_map<std::string, std::string> parent_to_storage_field_prefix;
64
65
    // Resolve and cache the base (parent) column id for a variant field binding.
66
    // This avoids repeated schema lookups when multiple subcolumns share the same parent column.
67
5
    auto resolve_parent_column_id = [&](const std::string& parent_field, ColumnId* column_id) {
68
        // Guard against invalid inputs: variant bindings may miss parent_field, and callers must
69
        // provide a valid output pointer to receive the resolved id.
70
0
        if (parent_field.empty() || column_id == nullptr) {
71
0
            return false;
72
0
        }
73
0
        auto it = parent_to_base_column_id.find(parent_field);
74
0
        if (it != parent_to_base_column_id.end()) {
75
0
            *column_id = it->second;
76
0
            return true;
77
0
        }
78
0
        if (index_context == nullptr || index_context->segment() == nullptr) {
79
0
            return false;
80
0
        }
81
0
        const int32_t ordinal =
82
0
                index_context->segment()->tablet_schema()->field_index(parent_field);
83
0
        if (ordinal < 0) {
84
0
            return false;
85
0
        }
86
0
        ColumnId resolved_id = static_cast<ColumnId>(ordinal);
87
0
        parent_to_base_column_id.emplace(parent_field, resolved_id);
88
0
        if (auto* storage_name_type = index_context->get_storage_name_and_type_by_id(resolved_id);
89
0
            storage_name_type != nullptr) {
90
0
            parent_to_storage_field_prefix[parent_field] = storage_name_type->first;
91
0
        }
92
0
        *column_id = resolved_id;
93
0
        return true;
94
0
    };
95
96
5
    int child_index = 0; // Index for iterating through children
97
5
    for (const auto& child : expr.children()) {
98
4
        if (child->is_slot_ref()) {
99
3
            auto* column_slot_ref = assert_cast<VSlotRef*>(child.get());
100
3
            int column_id = column_slot_ref->column_id();
101
102
            // Determine the field_name from field_bindings (for variant subcolumns)
103
            // field_bindings and children should have the same order
104
3
            std::string field_name;
105
3
            const TSearchFieldBinding* binding = nullptr;
106
3
            if (child_index < field_bindings.size()) {
107
                // Use field_name from binding (may include "parent.subcolumn" for variant)
108
3
                binding = &field_bindings[child_index];
109
3
                field_name = binding->field_name;
110
3
            } else {
111
                // Fallback to column_name if binding not found
112
0
                field_name = column_slot_ref->column_name();
113
0
            }
114
115
3
            bundle->field_name_to_column_id[field_name] = column_id;
116
117
3
            auto* iterator = index_context->get_inverted_index_iterator_by_column_id(column_id);
118
3
            const auto* storage_name_type =
119
3
                    index_context->get_storage_name_and_type_by_column_id(column_id);
120
3
            bool field_added = false;
121
            // For variant subcolumns, slot_ref might not map to a real indexed column in the scan schema.
122
            // Fall back to the parent variant column's iterator and synthesize lucene field name.
123
3
            if (iterator == nullptr && binding != nullptr &&
124
3
                binding->__isset.is_variant_subcolumn && binding->is_variant_subcolumn &&
125
3
                binding->__isset.parent_field_name && !binding->parent_field_name.empty()) {
126
0
                ColumnId base_column_id = 0;
127
0
                if (resolve_parent_column_id(binding->parent_field_name, &base_column_id)) {
128
0
                    iterator = index_context->get_inverted_index_iterator_by_id(base_column_id);
129
0
                    const auto* base_storage_name_type =
130
0
                            index_context->get_storage_name_and_type_by_id(base_column_id);
131
0
                    if (iterator != nullptr && base_storage_name_type != nullptr) {
132
0
                        std::string prefix = base_storage_name_type->first;
133
0
                        if (auto pit =
134
0
                                    parent_to_storage_field_prefix.find(binding->parent_field_name);
135
0
                            pit != parent_to_storage_field_prefix.end() && !pit->second.empty()) {
136
0
                            prefix = pit->second;
137
0
                        } else {
138
0
                            parent_to_storage_field_prefix[binding->parent_field_name] = prefix;
139
0
                        }
140
141
0
                        std::string sub_path;
142
0
                        if (binding->__isset.subcolumn_path) {
143
0
                            sub_path = binding->subcolumn_path;
144
0
                        }
145
0
                        if (sub_path.empty()) {
146
                            // Fallback: strip "parent." prefix from logical field name
147
0
                            std::string pfx = binding->parent_field_name + ".";
148
0
                            if (field_name.starts_with(pfx)) {
149
0
                                sub_path = field_name.substr(pfx.size());
150
0
                            }
151
0
                        }
152
0
                        if (!sub_path.empty()) {
153
0
                            bundle->iterators[field_name] = iterator;
154
0
                            bundle->field_types[field_name] =
155
0
                                    std::make_pair(prefix + "." + sub_path, nullptr);
156
0
                            int base_column_index =
157
0
                                    index_context->column_index_by_id(base_column_id);
158
0
                            if (base_column_index >= 0) {
159
0
                                bundle->column_ids.emplace_back(base_column_index);
160
0
                            }
161
0
                            field_added = true;
162
0
                        }
163
0
                    }
164
0
                }
165
0
            }
166
167
            // Only collect fields that have iterators (materialized columns with indexes)
168
3
            if (!field_added && iterator != nullptr) {
169
2
                if (storage_name_type == nullptr) {
170
1
                    return Status::InternalError("storage_name_type not found for column {} in {}",
171
1
                                                 column_id, expr.expr_name());
172
1
                }
173
174
1
                bundle->iterators.emplace(field_name, iterator);
175
1
                bundle->field_types.emplace(field_name, *storage_name_type);
176
1
                bundle->column_ids.emplace_back(column_id);
177
1
            }
178
179
2
            child_index++;
180
2
        } else if (child->is_literal()) {
181
0
            auto* literal = assert_cast<VLiteral*>(child.get());
182
0
            bundle->literal_args.emplace_back(literal->get_column_ptr(), literal->get_data_type(),
183
0
                                              literal->expr_name());
184
1
        } else {
185
            // Check if this is ElementAt expression (for variant subcolumn access)
186
1
            if (child->expr_name() == "element_at" && child_index < field_bindings.size() &&
187
1
                field_bindings[child_index].__isset.is_variant_subcolumn &&
188
1
                field_bindings[child_index].is_variant_subcolumn) {
189
                // Variant subcolumn not materialized - skip, will create empty BitSetQuery in function_search
190
0
                child_index++;
191
0
                continue;
192
0
            }
193
194
            // Not a supported child type
195
1
            return Status::InvalidArgument("Unsupported child node type: {}", child->expr_name());
196
1
        }
197
4
    }
198
199
3
    return Status::OK();
200
5
}
201
202
} // namespace
203
204
78
VSearchExpr::VSearchExpr(const TExprNode& node) : VExpr(node) {
205
78
    if (node.__isset.search_param) {
206
74
        _search_param = node.search_param;
207
74
        _original_dsl = _search_param.original_dsl;
208
74
    }
209
78
}
210
211
Status VSearchExpr::prepare(RuntimeState* state, const RowDescriptor& row_desc,
212
0
                            VExprContext* context) {
213
0
    RETURN_IF_ERROR(VExpr::prepare(state, row_desc, context));
214
0
    const auto& query_options = state->query_options();
215
0
    if (query_options.__isset.enable_inverted_index_query_cache) {
216
0
        _enable_cache = query_options.enable_inverted_index_query_cache;
217
0
    }
218
0
    return Status::OK();
219
0
}
220
221
4
const std::string& VSearchExpr::expr_name() const {
222
4
    static const std::string name = "VSearchExpr";
223
4
    return name;
224
4
}
225
226
Status VSearchExpr::execute_column(VExprContext* context, const Block* block, Selector* selector,
227
3
                                   size_t count, ColumnPtr& result_column) const {
228
3
    if (fast_execute(context, selector, count, result_column)) {
229
1
        return Status::OK();
230
1
    }
231
232
2
    return Status::InternalError("SearchExpr should not be executed without inverted index");
233
3
}
234
235
23
Status VSearchExpr::evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) {
236
23
    if (_search_param.original_dsl.empty()) {
237
3
        return Status::InvalidArgument("search DSL is empty");
238
3
    }
239
240
20
    auto index_context = context->get_index_context();
241
20
    if (!index_context) {
242
15
        LOG(WARNING) << "VSearchExpr: No inverted index context available";
243
15
        return Status::OK();
244
15
    }
245
246
5
    SearchInputBundle bundle;
247
5
    RETURN_IF_ERROR(collect_search_inputs(*this, context, &bundle));
248
249
3
    VLOG_DEBUG << "VSearchExpr: bundle.iterators.size()=" << bundle.iterators.size();
250
251
3
    const bool is_nested_query = _search_param.root.clause_type == "NESTED";
252
3
    if (bundle.iterators.empty() && !is_nested_query) {
253
1
        LOG(WARNING) << "VSearchExpr: No indexed columns available for evaluation, DSL: "
254
1
                     << _original_dsl;
255
1
        auto empty_bitmap = InvertedIndexResultBitmap(std::make_shared<roaring::Roaring>(),
256
1
                                                      std::make_shared<roaring::Roaring>());
257
1
        index_context->set_index_result_for_expr(this, std::move(empty_bitmap));
258
1
        return Status::OK();
259
1
    }
260
261
2
    auto function = std::make_shared<FunctionSearch>();
262
2
    auto result_bitmap = InvertedIndexResultBitmap();
263
2
    auto status = function->evaluate_inverted_index_with_search_param(
264
2
            _search_param, bundle.field_types, bundle.iterators, segment_num_rows, result_bitmap,
265
2
            _enable_cache, index_context.get(), bundle.field_name_to_column_id);
266
267
2
    if (!status.ok()) {
268
2
        LOG(WARNING) << "VSearchExpr: Function evaluation failed: " << status.to_string();
269
2
        return status;
270
2
    }
271
272
0
    index_context->set_index_result_for_expr(this, result_bitmap);
273
0
    for (int column_id : bundle.column_ids) {
274
0
        index_context->set_true_for_index_status(this, column_id);
275
0
    }
276
277
0
    return Status::OK();
278
2
}
279
280
} // namespace doris::vectorized