/root/doris/be/src/olap/options.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "olap/options.h" |
19 | | |
20 | | #include <ctype.h> |
21 | | #include <rapidjson/document.h> |
22 | | #include <rapidjson/encodings.h> |
23 | | #include <rapidjson/rapidjson.h> |
24 | | #include <stdlib.h> |
25 | | |
26 | | #include <algorithm> |
27 | | #include <memory> |
28 | | #include <ostream> |
29 | | |
30 | | #include "common/config.h" |
31 | | #include "common/logging.h" |
32 | | #include "common/status.h" |
33 | | #include "gutil/strings/split.h" |
34 | | #include "gutil/strings/strip.h" |
35 | | #include "io/cache/file_cache_common.h" |
36 | | #include "io/fs/local_file_system.h" |
37 | | #include "olap/olap_define.h" |
38 | | #include "olap/utils.h" |
39 | | #include "util/path_util.h" |
40 | | #include "util/string_util.h" |
41 | | |
42 | | namespace doris { |
43 | | using namespace ErrorCode; |
44 | | |
45 | | using std::string; |
46 | | using std::vector; |
47 | | |
48 | | static std::string CAPACITY_UC = "CAPACITY"; |
49 | | static std::string MEDIUM_UC = "MEDIUM"; |
50 | | static std::string SSD_UC = "SSD"; |
51 | | static std::string HDD_UC = "HDD"; |
52 | | static std::string REMOTE_CACHE_UC = "REMOTE_CACHE"; |
53 | | |
54 | | static std::string CACHE_PATH = "path"; |
55 | | static std::string CACHE_TOTAL_SIZE = "total_size"; |
56 | | static std::string CACHE_QUERY_LIMIT_SIZE = "query_limit"; |
57 | | static std::string CACHE_NORMAL_PERCENT = "normal_percent"; |
58 | | static std::string CACHE_DISPOSABLE_PERCENT = "disposable_percent"; |
59 | | static std::string CACHE_INDEX_PERCENT = "index_percent"; |
60 | | static std::string CACHE_TTL_PERCENT = "ttl_percent"; |
61 | | static std::string CACHE_STORAGE = "storage"; |
62 | | static std::string CACHE_STORAGE_DISK = "disk"; |
63 | | static std::string CACHE_STORAGE_MEMORY = "memory"; |
64 | | |
65 | | // TODO: should be a general util method |
66 | | // static std::string to_upper(const std::string& str) { |
67 | | // std::string out = str; |
68 | | // std::transform(out.begin(), out.end(), out.begin(), [](auto c) { return std::toupper(c); }); |
69 | | // return out; |
70 | | // } |
71 | | |
72 | | // Currently, both of three following formats are supported(see be.conf), remote cache is the |
73 | | // local cache path for remote storage. |
74 | | // format 1: /home/disk1/palo.HDD,50 |
75 | | // format 2: /home/disk1/palo,medium:ssd,capacity:50 |
76 | | // remote cache format: /home/disk/palo/cache,medium:remote_cache,capacity:50 |
77 | 16 | Status parse_root_path(const string& root_path, StorePath* path) { |
78 | 16 | std::vector<string> tmp_vec = strings::Split(root_path, ",", strings::SkipWhitespace()); |
79 | | |
80 | | // parse root path name |
81 | 16 | StripWhiteSpace(&tmp_vec[0]); |
82 | 16 | tmp_vec[0].erase(tmp_vec[0].find_last_not_of('/') + 1); |
83 | 16 | if (tmp_vec[0].empty() || tmp_vec[0][0] != '/') { |
84 | 0 | return Status::Error<INVALID_ARGUMENT>("invalid store path. path={}", tmp_vec[0]); |
85 | 0 | } |
86 | | |
87 | 16 | string canonicalized_path; |
88 | 16 | RETURN_IF_ERROR(io::global_local_filesystem()->canonicalize(tmp_vec[0], &canonicalized_path)); |
89 | 16 | path->path = tmp_vec[0]; |
90 | | |
91 | | // parse root path capacity and storage medium |
92 | 16 | string capacity_str; |
93 | 16 | string medium_str = HDD_UC; |
94 | | |
95 | 16 | string extension = path_util::file_extension(canonicalized_path); |
96 | 16 | if (!extension.empty()) { |
97 | 5 | medium_str = to_upper(extension.substr(1)); |
98 | 5 | } |
99 | | |
100 | 30 | for (int i = 1; i < tmp_vec.size(); i++) { |
101 | | // <property>:<value> or <value> |
102 | 14 | string property; |
103 | 14 | string value; |
104 | 14 | std::pair<string, string> pair = |
105 | 14 | strings::Split(tmp_vec[i], strings::delimiter::Limit(":", 1)); |
106 | 14 | if (pair.second.empty()) { |
107 | | // format_1: <value> only supports setting capacity |
108 | 4 | property = CAPACITY_UC; |
109 | 4 | value = tmp_vec[i]; |
110 | 10 | } else { |
111 | | // format_2 |
112 | 10 | property = to_upper(pair.first); |
113 | 10 | value = pair.second; |
114 | 10 | } |
115 | | |
116 | 14 | StripWhiteSpace(&property); |
117 | 14 | StripWhiteSpace(&value); |
118 | 14 | if (property == CAPACITY_UC) { |
119 | 8 | capacity_str = value; |
120 | 8 | } else if (property == MEDIUM_UC) { |
121 | | // property 'medium' has a higher priority than the extension of |
122 | | // path, so it can override medium_str |
123 | 6 | medium_str = to_upper(value); |
124 | 6 | } else { |
125 | 0 | return Status::Error<INVALID_ARGUMENT>("invalid property of store path, {}", |
126 | 0 | tmp_vec[i]); |
127 | 0 | } |
128 | 14 | } |
129 | | |
130 | 16 | path->capacity_bytes = -1; |
131 | 16 | if (!capacity_str.empty()) { |
132 | 6 | if (!valid_signed_number<int64_t>(capacity_str) || |
133 | 6 | strtol(capacity_str.c_str(), nullptr, 10) < 0) { |
134 | 0 | LOG(WARNING) << "invalid capacity of store path, capacity=" << capacity_str; |
135 | 0 | return Status::Error<INVALID_ARGUMENT>("invalid capacity of store path, capacity={}", |
136 | 0 | capacity_str); |
137 | 0 | } |
138 | 6 | path->capacity_bytes = strtol(capacity_str.c_str(), nullptr, 10) * GB_EXCHANGE_BYTE; |
139 | 6 | } |
140 | | |
141 | 16 | path->storage_medium = TStorageMedium::HDD; |
142 | 16 | if (!medium_str.empty()) { |
143 | 16 | if (medium_str == SSD_UC) { |
144 | 7 | path->storage_medium = TStorageMedium::SSD; |
145 | 9 | } else if (medium_str == HDD_UC) { |
146 | 9 | path->storage_medium = TStorageMedium::HDD; |
147 | 9 | } else if (medium_str == REMOTE_CACHE_UC) { |
148 | 0 | path->storage_medium = TStorageMedium::REMOTE_CACHE; |
149 | 0 | } else { |
150 | 0 | return Status::Error<INVALID_ARGUMENT>("invalid storage medium. medium={}", medium_str); |
151 | 0 | } |
152 | 16 | } |
153 | | |
154 | 16 | return Status::OK(); |
155 | 16 | } |
156 | | |
157 | 5 | Status parse_conf_store_paths(const string& config_path, std::vector<StorePath>* paths) { |
158 | 5 | std::vector<string> path_vec = strings::Split(config_path, ";", strings::SkipWhitespace()); |
159 | 5 | if (path_vec.empty()) { |
160 | | // means compute node |
161 | 0 | return Status::OK(); |
162 | 0 | } |
163 | 5 | if (path_vec.back().empty()) { |
164 | | // deal with the case that user add `;` to the tail |
165 | 0 | path_vec.pop_back(); |
166 | 0 | } |
167 | | |
168 | 5 | std::set<std::string> real_paths; |
169 | 8 | for (auto& item : path_vec) { |
170 | 8 | StorePath path; |
171 | 8 | auto res = parse_root_path(item, &path); |
172 | 8 | if (res.ok()) { |
173 | 8 | auto success = real_paths.emplace(path.path).second; |
174 | 8 | if (success) { |
175 | 7 | paths->emplace_back(std::move(path)); |
176 | 7 | } else { |
177 | 1 | LOG(WARNING) << "a duplicated path is found " << path.path; |
178 | 1 | return Status::Error<INVALID_ARGUMENT>("a duplicated path is found, path={}", |
179 | 1 | path.path); |
180 | 1 | } |
181 | 8 | } else { |
182 | 0 | LOG(WARNING) << "failed to parse store path " << item << ", res=" << res; |
183 | 0 | } |
184 | 8 | } |
185 | 4 | if ((path_vec.size() != paths->size() && !config::ignore_broken_disk)) { |
186 | 0 | return Status::Error<INVALID_ARGUMENT>("fail to parse storage_root_path config. value={}", |
187 | 0 | config_path); |
188 | 0 | } |
189 | 4 | return Status::OK(); |
190 | 4 | } |
191 | | |
192 | 3 | void parse_conf_broken_store_paths(const string& config_path, std::set<std::string>* paths) { |
193 | 3 | std::vector<string> path_vec = strings::Split(config_path, ";", strings::SkipWhitespace()); |
194 | 3 | if (path_vec.empty()) { |
195 | 0 | return; |
196 | 0 | } |
197 | 3 | if (path_vec.back().empty()) { |
198 | | // deal with the case that user add `;` to the tail |
199 | 0 | path_vec.pop_back(); |
200 | 0 | } |
201 | 5 | for (auto& item : path_vec) { |
202 | 5 | paths->emplace(item); |
203 | 5 | } |
204 | 3 | return; |
205 | 3 | } |
206 | | |
207 | | /** format: |
208 | | * [ |
209 | | * {"path": "storage1", "total_size":53687091200,"query_limit": "10737418240"}, |
210 | | * {"path": "storage2", "total_size":53687091200}, |
211 | | * {"path": "storage3", "total_size":53687091200, "ttl_percent":50, "normal_percent":40, "disposable_percent":5, "index_percent":5} |
212 | | * {"path": "xxx", "total_size":53687091200, "storage": "memory"} |
213 | | * ] |
214 | | */ |
215 | 6 | Status parse_conf_cache_paths(const std::string& config_path, std::vector<CachePath>& paths) { |
216 | 6 | using namespace rapidjson; |
217 | 6 | Document document; |
218 | 6 | document.Parse(config_path.c_str()); |
219 | 6 | DCHECK(document.IsArray()) << config_path << " " << document.GetType(); |
220 | 6 | for (auto& config : document.GetArray()) { |
221 | 6 | auto map = config.GetObject(); |
222 | 6 | DCHECK(map.HasMember(CACHE_PATH.c_str())); |
223 | 6 | std::string path = map.FindMember(CACHE_PATH.c_str())->value.GetString(); |
224 | 6 | std::string storage = CACHE_STORAGE_DISK; // disk storage by default |
225 | 6 | if (map.HasMember(CACHE_STORAGE.c_str())) { |
226 | 1 | storage = map.FindMember(CACHE_STORAGE.c_str())->value.GetString(); |
227 | 1 | if (storage != CACHE_STORAGE_DISK && storage != CACHE_STORAGE_MEMORY) [[unlikely]] { |
228 | 0 | return Status::InvalidArgument("invalid file cache storage type: " + storage); |
229 | 0 | } |
230 | 1 | if (storage == CACHE_STORAGE_MEMORY) { |
231 | | // set path to "memory" for memory storage |
232 | | // so that we can track it by path (use _path_to_cache map) |
233 | 1 | path = CACHE_STORAGE_MEMORY; |
234 | 1 | } |
235 | 1 | } |
236 | 6 | int64_t total_size = 0, query_limit_bytes = 0; |
237 | 6 | if (map.HasMember(CACHE_TOTAL_SIZE.c_str())) { |
238 | 6 | auto& value = map.FindMember(CACHE_TOTAL_SIZE.c_str())->value; |
239 | 6 | if (value.IsInt64()) { |
240 | 5 | total_size = value.GetInt64(); |
241 | 5 | } else { |
242 | 1 | total_size = 0; |
243 | 1 | } |
244 | 6 | } |
245 | 6 | if (config::enable_file_cache_query_limit) { |
246 | 6 | if (map.HasMember(CACHE_QUERY_LIMIT_SIZE.c_str())) { |
247 | 3 | auto& value = map.FindMember(CACHE_QUERY_LIMIT_SIZE.c_str())->value; |
248 | 3 | if (value.IsInt64()) { |
249 | 3 | query_limit_bytes = value.GetInt64(); |
250 | 3 | } else { |
251 | 0 | query_limit_bytes = 0; |
252 | 0 | } |
253 | 3 | } |
254 | 6 | } |
255 | 6 | if (total_size < 0 || (config::enable_file_cache_query_limit && query_limit_bytes < 0)) { |
256 | 2 | return Status::InvalidArgument("total_size or query_limit should not less than zero"); |
257 | 2 | } |
258 | | |
259 | | // percent |
260 | 4 | auto get_percent_value = [&](const std::string& key, size_t& percent) { |
261 | 0 | auto& value = map.FindMember(key.c_str())->value; |
262 | 0 | if (value.IsUint()) { |
263 | 0 | percent = value.GetUint(); |
264 | 0 | } else { |
265 | 0 | return Status::InvalidArgument("percent should be uint"); |
266 | 0 | } |
267 | 0 | return Status::OK(); |
268 | 0 | }; |
269 | | |
270 | 4 | size_t normal_percent = io::DEFAULT_NORMAL_PERCENT; |
271 | 4 | size_t disposable_percent = io::DEFAULT_DISPOSABLE_PERCENT; |
272 | 4 | size_t index_percent = io::DEFAULT_INDEX_PERCENT; |
273 | 4 | size_t ttl_percent = io::DEFAULT_TTL_PERCENT; |
274 | 4 | bool has_normal_percent = map.HasMember(CACHE_NORMAL_PERCENT.c_str()); |
275 | 4 | bool has_disposable_percent = map.HasMember(CACHE_DISPOSABLE_PERCENT.c_str()); |
276 | 4 | bool has_index_percent = map.HasMember(CACHE_INDEX_PERCENT.c_str()); |
277 | 4 | bool has_ttl_percent = map.HasMember(CACHE_TTL_PERCENT.c_str()); |
278 | 4 | if (has_normal_percent && has_disposable_percent && has_index_percent && has_ttl_percent) { |
279 | 0 | RETURN_IF_ERROR(get_percent_value(CACHE_NORMAL_PERCENT, normal_percent)); |
280 | 0 | RETURN_IF_ERROR(get_percent_value(CACHE_DISPOSABLE_PERCENT, disposable_percent)); |
281 | 0 | RETURN_IF_ERROR(get_percent_value(CACHE_INDEX_PERCENT, index_percent)); |
282 | 0 | RETURN_IF_ERROR(get_percent_value(CACHE_TTL_PERCENT, ttl_percent)); |
283 | 4 | } else if (has_normal_percent || has_disposable_percent || has_index_percent || |
284 | 4 | has_ttl_percent) { |
285 | 0 | return Status::InvalidArgument( |
286 | 0 | "cache percent (ttl_percent, index_percent, normal_percent, " |
287 | 0 | "disposable_percent) must either be all set or all unset. " |
288 | 0 | "when all unset, use default: ttl_percent=50, index_percent=5, " |
289 | 0 | "normal_percent=40, disposable_percent=5."); |
290 | 0 | } |
291 | 4 | if ((normal_percent + disposable_percent + index_percent + ttl_percent) != 100) { |
292 | 0 | return Status::InvalidArgument("The sum of cache percent config must equal 100."); |
293 | 0 | } |
294 | | |
295 | 4 | paths.emplace_back(std::move(path), total_size, query_limit_bytes, normal_percent, |
296 | 4 | disposable_percent, index_percent, ttl_percent, storage); |
297 | 4 | } |
298 | 4 | if (paths.empty()) { |
299 | 1 | return Status::InvalidArgument("fail to parse storage_root_path config. value={}", |
300 | 1 | config_path); |
301 | 1 | } |
302 | 3 | return Status::OK(); |
303 | 4 | } |
304 | | |
305 | 2 | io::FileCacheSettings CachePath::init_settings() const { |
306 | 2 | return io::get_file_cache_settings(total_bytes, query_limit_bytes, normal_percent, |
307 | 2 | disposable_percent, index_percent, ttl_percent, storage); |
308 | 2 | } |
309 | | |
310 | | } // end namespace doris |