be/src/storage/index/inverted/setting.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <unicode/utf8.h> |
21 | | |
22 | | #include <boost/algorithm/string.hpp> |
23 | | #include <boost/algorithm/string/split.hpp> |
24 | | #include <boost/algorithm/string/trim.hpp> |
25 | | #include <boost/regex.hpp> |
26 | | #include <unordered_map> |
27 | | #include <utility> |
28 | | |
29 | | #include "common/exception.h" |
30 | | |
31 | | namespace doris::segment_v2::inverted_index { |
32 | | |
33 | | class Settings { |
34 | | public: |
35 | 1.64k | Settings() = default; |
36 | | Settings(std::unordered_map<std::string, std::string> args) : _args(std::move(args)) {} |
37 | 2.41k | Settings(const Settings&) = default; |
38 | 1.15k | Settings(Settings&&) = default; |
39 | 5.38k | ~Settings() = default; |
40 | | |
41 | 919 | void set(const std::string& key, const std::string& value) { |
42 | 919 | _args.insert_or_assign(key, value); |
43 | 919 | } |
44 | | |
45 | 38 | bool empty() const { return _args.empty(); } |
46 | | |
47 | 5.51k | bool get_bool(const std::string& key, bool default_value) const { |
48 | 5.51k | auto it = _args.find(key); |
49 | 5.51k | if (it != _args.end()) { |
50 | 2.36k | std::string value = it->second; |
51 | 2.36k | std::transform(value.begin(), value.end(), value.begin(), |
52 | 10.6k | [](unsigned char c) { return std::tolower(c); }); |
53 | 2.36k | if (value == "true" || value == "1") { |
54 | 1.19k | return true; |
55 | 1.19k | } else if (value == "false" || value == "0") { |
56 | 1.16k | return false; |
57 | 1.16k | } |
58 | 2.36k | } |
59 | 3.15k | return default_value; |
60 | 5.51k | } |
61 | | |
62 | 1.93k | int32_t get_int(const std::string& key, int32_t default_value) const { |
63 | 1.93k | auto it = _args.find(key); |
64 | 1.93k | if (it != _args.end()) { |
65 | 154 | try { |
66 | 154 | size_t pos; |
67 | 154 | int32_t num = std::stoi(it->second, &pos); |
68 | 154 | if (pos == it->second.size()) { |
69 | 152 | return num; |
70 | 152 | } |
71 | 154 | } catch (...) { |
72 | 1 | throw Exception(ErrorCode::INVALID_ARGUMENT, |
73 | 1 | "stoi failed (invalid argument or out of range): " + it->second); |
74 | 1 | } |
75 | 154 | } |
76 | 1.78k | return default_value; |
77 | 1.93k | } |
78 | | |
79 | 555 | std::string get_string(const std::string& key, const std::string& default_value = "") const { |
80 | 555 | auto it = _args.find(key); |
81 | 555 | if (it != _args.end()) { |
82 | 87 | return it->second; |
83 | 87 | } |
84 | 468 | return default_value; |
85 | 555 | } |
86 | | |
87 | 107 | std::vector<std::string> get_entry_list(const std::string& key) const { |
88 | 107 | static const boost::regex sep(R"((?<=\])\s*,\s*(?=\[))"); |
89 | 107 | std::vector<std::string> lists; |
90 | 107 | auto it = _args.find(key); |
91 | 107 | if (it != _args.end()) { |
92 | 50 | std::string trimmed_input = boost::algorithm::trim_copy(it->second); |
93 | 50 | if (trimmed_input.empty()) { |
94 | 1 | return lists; |
95 | 1 | } |
96 | | |
97 | 55 | auto validate_single = [&](const std::string& item, const std::string& prefix) { |
98 | 55 | if (item.size() < 2 || item.front() != '[' || item.back() != ']') { |
99 | 1 | throw Exception(ErrorCode::INVALID_ARGUMENT, |
100 | 1 | prefix + key + " must be enclosed in []"); |
101 | 1 | } |
102 | 54 | int depth = 0; |
103 | 418 | for (size_t i = 0; i + 1 < item.size(); ++i) { |
104 | 364 | char c = item[i]; |
105 | 364 | if (c == '[') { |
106 | 54 | ++depth; |
107 | 310 | } else if (c == ']') { |
108 | 0 | --depth; |
109 | 0 | if (depth == 0) { |
110 | 0 | throw Exception(ErrorCode::INVALID_ARGUMENT, |
111 | 0 | prefix + key + " must be enclosed in []"); |
112 | 0 | } |
113 | 0 | } |
114 | 364 | } |
115 | 54 | }; |
116 | | |
117 | 49 | if (boost::regex_search(trimmed_input, sep)) { |
118 | 21 | boost::sregex_token_iterator regex_it(trimmed_input.begin(), trimmed_input.end(), |
119 | 21 | sep, -1); |
120 | 21 | boost::sregex_token_iterator end; |
121 | 76 | for (; regex_it != end; ++regex_it) { |
122 | 55 | std::string item = boost::algorithm::trim_copy(regex_it->str()); |
123 | 55 | validate_single(item, "Each item in "); |
124 | 55 | std::string content = item.substr(1, item.size() - 2); |
125 | 55 | if (!content.empty()) { |
126 | 53 | lists.emplace_back(content); |
127 | 53 | } |
128 | 55 | } |
129 | 28 | } else { |
130 | 28 | if (trimmed_input.size() < 2 || trimmed_input.front() != '[' || |
131 | 28 | trimmed_input.back() != ']') { |
132 | 2 | throw Exception(ErrorCode::INVALID_ARGUMENT, |
133 | 2 | "Item in " + key + " must be enclosed in []"); |
134 | 2 | } |
135 | 26 | std::string content = trimmed_input.substr(1, trimmed_input.size() - 2); |
136 | 26 | if (!content.empty()) { |
137 | 25 | lists.emplace_back(content); |
138 | 25 | } |
139 | 26 | } |
140 | 49 | } |
141 | 104 | return lists; |
142 | 107 | } |
143 | | |
144 | 88 | std::unordered_set<std::string> get_word_set(const std::string& key) const { |
145 | 88 | std::unordered_set<std::string> sets; |
146 | 88 | auto it = _args.find(key); |
147 | 88 | if (it != _args.end()) { |
148 | 27 | std::vector<std::string> lists; |
149 | 27 | boost::split(lists, it->second, boost::is_any_of(",")); |
150 | 37 | for (auto& str : lists) { |
151 | 37 | boost::trim(str); |
152 | 37 | if (!str.empty()) { |
153 | 34 | sets.insert(str); |
154 | 34 | } |
155 | 37 | } |
156 | 27 | } |
157 | 88 | return sets; |
158 | 88 | } |
159 | | |
160 | | std::string to_string() { |
161 | | std::string result; |
162 | | for (const auto& [key, value] : _args) { |
163 | | if (!result.empty()) { |
164 | | result += ", "; |
165 | | } |
166 | | result += key + "=" + value; |
167 | | } |
168 | | return result; |
169 | | } |
170 | | |
171 | | private: |
172 | | std::unordered_map<std::string, std::string> _args; |
173 | | }; |
174 | | |
175 | | } // namespace doris::segment_v2::inverted_index |