Coverage Report

Created: 2026-03-14 20:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/runtime/index_policy/index_policy_mgr.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "runtime/index_policy/index_policy_mgr.h"
19
20
#include <boost/algorithm/string.hpp>
21
#include <boost/algorithm/string/split.hpp>
22
#include <boost/algorithm/string/trim.hpp>
23
#include <unordered_set>
24
#include <utility>
25
26
namespace doris {
27
28
const std::unordered_set<std::string> IndexPolicyMgr::BUILTIN_NORMALIZERS = {"lowercase"};
29
30
41
std::string IndexPolicyMgr::normalize_name(const std::string& name) {
31
41
    std::string result = name;
32
41
    boost::algorithm::trim(result);
33
41
    boost::algorithm::to_lower(result);
34
41
    return result;
35
41
}
36
37
void IndexPolicyMgr::apply_policy_changes(const std::vector<TIndexPolicy>& policys_to_update,
38
11
                                          const std::vector<int64_t>& policys_to_delete) {
39
11
    LOG(INFO) << "Starting policy changes - "
40
11
              << "Updates: " << policys_to_update.size() << " policies, "
41
11
              << "Deletions: " << policys_to_delete.size() << " policies";
42
43
11
    std::unique_lock lock(_mutex);
44
11
    int32_t success_deletes = 0;
45
11
    int32_t success_updates = 0;
46
47
11
    for (auto id : policys_to_delete) {
48
2
        if (auto it = _policys.find(id); it != _policys.end()) {
49
2
            LOG(INFO) << "Deleting policy - "
50
2
                      << "ID: " << id << ", "
51
2
                      << "Name: " << it->second.name;
52
53
            // Use normalized name for deletion
54
2
            _name_to_id.erase(normalize_name(it->second.name));
55
2
            _policys.erase(it);
56
2
            success_deletes++;
57
2
        } else {
58
0
            LOG(WARNING) << "Delete failed - Policy ID not found: " << id;
59
0
        }
60
2
    }
61
62
23
    for (const auto& policy : policys_to_update) {
63
23
        if (_policys.contains(policy.id)) {
64
1
            LOG(ERROR) << "Reject update - Duplicate policy ID: " << policy.id
65
1
                       << " | Existing name: " << _policys[policy.id].name
66
1
                       << " | New name: " << policy.name;
67
1
            continue;
68
1
        }
69
70
        // Use normalized name for case-insensitive lookup
71
22
        std::string normalized_name = normalize_name(policy.name);
72
22
        if (_name_to_id.contains(normalized_name)) {
73
1
            LOG(ERROR) << "Reject update - Duplicate policy name: " << policy.name
74
1
                       << " | Existing ID: " << _name_to_id[normalized_name]
75
1
                       << " | New ID: " << policy.id;
76
1
            continue;
77
1
        }
78
79
21
        _policys.emplace(policy.id, policy);
80
        // Store with normalized key for case-insensitive lookup
81
21
        _name_to_id.emplace(normalized_name, policy.id);
82
21
        success_updates++;
83
84
21
        LOG(INFO) << "Successfully applied policy - "
85
21
                  << "ID: " << policy.id << ", "
86
21
                  << "Name: " << policy.name << ", "
87
21
                  << "Type: " << policy.type;
88
21
    }
89
90
11
    LOG(INFO) << "Policy changes completed - "
91
11
              << "Deleted: " << success_deletes << "/" << policys_to_delete.size() << ", "
92
11
              << "Updated: " << success_updates << "/" << policys_to_update.size() << ", "
93
11
              << "Total policies: " << _policys.size();
94
11
}
95
96
5
Policys IndexPolicyMgr::get_index_policys() {
97
5
    std::shared_lock<std::shared_mutex> r_lock(_mutex);
98
5
    return _policys; // Return copy to ensure thread safety after lock release
99
5
}
100
101
// NOTE: This function holds a shared_lock while calling build_analyzer_from_policy/
102
// build_normalizer_from_policy, which also access _name_to_id and _policys.
103
// This is safe because std::shared_mutex allows the same thread to hold multiple
104
// shared_locks (read locks are reentrant). The lock is held throughout to ensure
105
// consistency when resolving nested policy references (e.g., tokenizer policies).
106
7
AnalyzerPtr IndexPolicyMgr::get_policy_by_name(const std::string& name) {
107
7
    std::shared_lock lock(_mutex);
108
109
    // Use normalized name for case-insensitive lookup
110
7
    std::string normalized_name = normalize_name(name);
111
7
    auto name_it = _name_to_id.find(normalized_name);
112
7
    if (name_it == _name_to_id.end()) {
113
1
        if (is_builtin_normalizer(normalized_name)) {
114
0
            return build_builtin_normalizer(name);
115
0
        }
116
1
        throw Exception(ErrorCode::INVALID_ARGUMENT, "Policy not found with name: " + name);
117
1
    }
118
119
6
    auto policy_it = _policys.find(name_it->second);
120
6
    if (policy_it == _policys.end()) {
121
0
        throw Exception(ErrorCode::INVALID_ARGUMENT, "Policy not found with id: " + name);
122
0
    }
123
124
6
    const auto& index_policy = policy_it->second;
125
6
    if (index_policy.type == TIndexPolicyType::ANALYZER) {
126
6
        return build_analyzer_from_policy(index_policy);
127
6
    } else if (index_policy.type == TIndexPolicyType::NORMALIZER) {
128
0
        return build_normalizer_from_policy(index_policy);
129
0
    }
130
131
0
    throw Exception(ErrorCode::INVALID_ARGUMENT, "Policy not found with type: " + name);
132
6
}
133
134
6
AnalyzerPtr IndexPolicyMgr::build_analyzer_from_policy(const TIndexPolicy& index_policy_analyzer) {
135
6
    segment_v2::inverted_index::CustomAnalyzerConfig::Builder builder;
136
137
6
    auto tokenizer_it = index_policy_analyzer.properties.find(PROP_TOKENIZER);
138
6
    if (tokenizer_it == index_policy_analyzer.properties.end() || tokenizer_it->second.empty()) {
139
1
        throw Exception(
140
1
                ErrorCode::INVALID_ARGUMENT,
141
1
                "Invalid tokenizer configuration in policy: analyzer must have a tokenizer");
142
1
    }
143
144
5
    const auto& tokenizer_name = tokenizer_it->second;
145
    // Use normalized name for case-insensitive lookup
146
5
    std::string normalized_tokenizer_name = normalize_name(tokenizer_name);
147
5
    if (_name_to_id.contains(normalized_tokenizer_name)) {
148
4
        const auto& tokenizer_policy = _policys[_name_to_id[normalized_tokenizer_name]];
149
4
        auto type_it = tokenizer_policy.properties.find(PROP_TYPE);
150
4
        if (type_it == tokenizer_policy.properties.end()) {
151
0
            throw Exception(ErrorCode::INVALID_ARGUMENT,
152
0
                            "Invalid tokenizer configuration in policy: " + tokenizer_name);
153
0
        }
154
155
4
        segment_v2::inverted_index::Settings settings;
156
9
        for (const auto& prop : tokenizer_policy.properties) {
157
9
            if (prop.first != PROP_TYPE) {
158
5
                settings.set(prop.first, prop.second);
159
5
            }
160
9
        }
161
4
        builder.with_tokenizer_config(type_it->second, settings);
162
4
    } else {
163
1
        builder.with_tokenizer_config(tokenizer_name, {});
164
1
    }
165
166
5
    process_filter_configs(index_policy_analyzer, PROP_CHAR_FILTER, "char filter",
167
5
                           [&builder](const std::string& name,
168
5
                                      const segment_v2::inverted_index::Settings& settings) {
169
0
                               builder.add_char_filter_config(name, settings);
170
0
                           });
171
172
5
    process_filter_configs(index_policy_analyzer, PROP_TOKEN_FILTER, "token filter",
173
5
                           [&builder](const std::string& name,
174
5
                                      const segment_v2::inverted_index::Settings& settings) {
175
5
                               builder.add_token_filter_config(name, settings);
176
5
                           });
177
178
5
    auto custom_analyzer_config = builder.build();
179
5
    return segment_v2::inverted_index::CustomAnalyzer::build_custom_analyzer(
180
5
            custom_analyzer_config);
181
5
}
182
183
AnalyzerPtr IndexPolicyMgr::build_normalizer_from_policy(
184
0
        const TIndexPolicy& index_policy_normalizer) {
185
0
    segment_v2::inverted_index::CustomNormalizerConfig::Builder builder;
186
187
0
    process_filter_configs(index_policy_normalizer, PROP_CHAR_FILTER, "char filter",
188
0
                           [&builder](const std::string& name,
189
0
                                      const segment_v2::inverted_index::Settings& settings) {
190
0
                               builder.add_char_filter_config(name, settings);
191
0
                           });
192
193
0
    process_filter_configs(index_policy_normalizer, PROP_TOKEN_FILTER, "token filter",
194
0
                           [&builder](const std::string& name,
195
0
                                      const segment_v2::inverted_index::Settings& settings) {
196
0
                               builder.add_token_filter_config(name, settings);
197
0
                           });
198
199
0
    auto custom_normalizer_config = builder.build();
200
0
    return segment_v2::inverted_index::CustomNormalizer::build_custom_normalizer(
201
0
            custom_normalizer_config);
202
0
}
203
204
void IndexPolicyMgr::process_filter_configs(
205
        const TIndexPolicy& index_policy_analyzer, const std::string& prop_name,
206
        const std::string& error_prefix,
207
        std::function<void(const std::string&, const segment_v2::inverted_index::Settings&)>
208
10
                add_config_func) {
209
10
    auto filter_it = index_policy_analyzer.properties.find(prop_name);
210
10
    if (filter_it == index_policy_analyzer.properties.end()) {
211
6
        return;
212
6
    }
213
214
4
    std::vector<std::string> filter_strs;
215
4
    boost::split(filter_strs, filter_it->second, boost::is_any_of(","));
216
217
6
    for (auto& filter_name : filter_strs) {
218
6
        boost::trim(filter_name);
219
6
        if (filter_name.empty()) {
220
1
            continue;
221
1
        }
222
223
        // Use normalized name for case-insensitive lookup
224
5
        std::string normalized_filter_name = normalize_name(filter_name);
225
5
        if (_name_to_id.contains(normalized_filter_name)) {
226
            // Nested filter policy
227
4
            const auto& filter_policy = _policys[_name_to_id[normalized_filter_name]];
228
4
            auto type_it = filter_policy.properties.find(PROP_TYPE);
229
4
            if (type_it == filter_policy.properties.end()) {
230
0
                throw Exception(
231
0
                        ErrorCode::INVALID_ARGUMENT,
232
0
                        "Invalid " + error_prefix + " configuration in policy: " + filter_name);
233
0
            }
234
235
4
            segment_v2::inverted_index::Settings settings;
236
4
            for (const auto& prop : filter_policy.properties) {
237
4
                if (prop.first != PROP_TYPE) {
238
0
                    settings.set(prop.first, prop.second);
239
0
                }
240
4
            }
241
4
            add_config_func(type_it->second, settings);
242
4
        } else {
243
            // Simple filter
244
1
            add_config_func(filter_name, {});
245
1
        }
246
5
    }
247
4
}
248
249
1
bool IndexPolicyMgr::is_builtin_normalizer(const std::string& name) {
250
1
    return BUILTIN_NORMALIZERS.contains(name);
251
1
}
252
253
0
AnalyzerPtr IndexPolicyMgr::build_builtin_normalizer(const std::string& name) {
254
0
    using namespace segment_v2::inverted_index;
255
256
0
    if (name == "lowercase") {
257
0
        CustomNormalizerConfig::Builder builder;
258
0
        builder.add_token_filter_config("lowercase", Settings {});
259
0
        auto config = builder.build();
260
0
        return CustomNormalizer::build_custom_normalizer(config);
261
0
    }
262
263
0
    throw Exception(ErrorCode::INVALID_ARGUMENT, "Unknown builtin normalizer: " + name);
264
0
}
265
266
} // namespace doris