Coverage Report

Created: 2024-11-21 18:14

/root/doris/be/src/olap/options.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "olap/options.h"
19
20
#include <ctype.h>
21
#include <rapidjson/document.h>
22
#include <rapidjson/encodings.h>
23
#include <rapidjson/rapidjson.h>
24
#include <stdlib.h>
25
26
#include <algorithm>
27
#include <memory>
28
#include <ostream>
29
30
#include "common/config.h"
31
#include "common/logging.h"
32
#include "common/status.h"
33
#include "gutil/strings/split.h"
34
#include "gutil/strings/strip.h"
35
#include "io/cache/file_cache_common.h"
36
#include "io/fs/local_file_system.h"
37
#include "olap/olap_define.h"
38
#include "olap/utils.h"
39
#include "util/path_util.h"
40
#include "util/string_util.h"
41
42
namespace doris {
43
using namespace ErrorCode;
44
45
using std::string;
46
using std::vector;
47
48
static std::string CAPACITY_UC = "CAPACITY";
49
static std::string MEDIUM_UC = "MEDIUM";
50
static std::string SSD_UC = "SSD";
51
static std::string HDD_UC = "HDD";
52
static std::string REMOTE_CACHE_UC = "REMOTE_CACHE";
53
54
static std::string CACHE_PATH = "path";
55
static std::string CACHE_TOTAL_SIZE = "total_size";
56
static std::string CACHE_QUERY_LIMIT_SIZE = "query_limit";
57
static std::string CACHE_NORMAL_PERCENT = "normal_percent";
58
static std::string CACHE_DISPOSABLE_PERCENT = "disposable_percent";
59
static std::string CACHE_INDEX_PERCENT = "index_percent";
60
static std::string CACHE_TTL_PERCENT = "ttl_percent";
61
static std::string CACHE_STORAGE = "storage";
62
static std::string CACHE_STORAGE_DISK = "disk";
63
static std::string CACHE_STORAGE_MEMORY = "memory";
64
65
// TODO: should be a general util method
66
// static std::string to_upper(const std::string& str) {
67
//     std::string out = str;
68
//     std::transform(out.begin(), out.end(), out.begin(), [](auto c) { return std::toupper(c); });
69
//     return out;
70
// }
71
72
// Currently, both of three following formats are supported(see be.conf), remote cache is the
73
// local cache path for remote storage.
74
//   format 1:   /home/disk1/palo.HDD,50
75
//   format 2:   /home/disk1/palo,medium:ssd,capacity:50
76
//   remote cache format:  /home/disk/palo/cache,medium:remote_cache,capacity:50
77
16
Status parse_root_path(const string& root_path, StorePath* path) {
78
16
    std::vector<string> tmp_vec = strings::Split(root_path, ",", strings::SkipWhitespace());
79
80
    // parse root path name
81
16
    StripWhiteSpace(&tmp_vec[0]);
82
16
    tmp_vec[0].erase(tmp_vec[0].find_last_not_of('/') + 1);
83
16
    if (tmp_vec[0].empty() || tmp_vec[0][0] != '/') {
84
0
        return Status::Error<INVALID_ARGUMENT>("invalid store path. path={}", tmp_vec[0]);
85
0
    }
86
87
16
    string canonicalized_path;
88
16
    RETURN_IF_ERROR(io::global_local_filesystem()->canonicalize(tmp_vec[0], &canonicalized_path));
89
16
    path->path = tmp_vec[0];
90
91
    // parse root path capacity and storage medium
92
16
    string capacity_str;
93
16
    string medium_str = HDD_UC;
94
95
16
    string extension = path_util::file_extension(canonicalized_path);
96
16
    if (!extension.empty()) {
97
5
        medium_str = to_upper(extension.substr(1));
98
5
    }
99
100
30
    for (int i = 1; i < tmp_vec.size(); i++) {
101
        // <property>:<value> or <value>
102
14
        string property;
103
14
        string value;
104
14
        std::pair<string, string> pair =
105
14
                strings::Split(tmp_vec[i], strings::delimiter::Limit(":", 1));
106
14
        if (pair.second.empty()) {
107
            // format_1: <value> only supports setting capacity
108
4
            property = CAPACITY_UC;
109
4
            value = tmp_vec[i];
110
10
        } else {
111
            // format_2
112
10
            property = to_upper(pair.first);
113
10
            value = pair.second;
114
10
        }
115
116
14
        StripWhiteSpace(&property);
117
14
        StripWhiteSpace(&value);
118
14
        if (property == CAPACITY_UC) {
119
8
            capacity_str = value;
120
8
        } else if (property == MEDIUM_UC) {
121
            // property 'medium' has a higher priority than the extension of
122
            // path, so it can override medium_str
123
6
            medium_str = to_upper(value);
124
6
        } else {
125
0
            return Status::Error<INVALID_ARGUMENT>("invalid property of store path, {}",
126
0
                                                   tmp_vec[i]);
127
0
        }
128
14
    }
129
130
16
    path->capacity_bytes = -1;
131
16
    if (!capacity_str.empty()) {
132
6
        if (!valid_signed_number<int64_t>(capacity_str) ||
133
6
            strtol(capacity_str.c_str(), nullptr, 10) < 0) {
134
0
            LOG(WARNING) << "invalid capacity of store path, capacity=" << capacity_str;
135
0
            return Status::Error<INVALID_ARGUMENT>("invalid capacity of store path, capacity={}",
136
0
                                                   capacity_str);
137
0
        }
138
6
        path->capacity_bytes = strtol(capacity_str.c_str(), nullptr, 10) * GB_EXCHANGE_BYTE;
139
6
    }
140
141
16
    path->storage_medium = TStorageMedium::HDD;
142
16
    if (!medium_str.empty()) {
143
16
        if (medium_str == SSD_UC) {
144
7
            path->storage_medium = TStorageMedium::SSD;
145
9
        } else if (medium_str == HDD_UC) {
146
9
            path->storage_medium = TStorageMedium::HDD;
147
9
        } else if (medium_str == REMOTE_CACHE_UC) {
148
0
            path->storage_medium = TStorageMedium::REMOTE_CACHE;
149
0
        } else {
150
0
            return Status::Error<INVALID_ARGUMENT>("invalid storage medium. medium={}", medium_str);
151
0
        }
152
16
    }
153
154
16
    return Status::OK();
155
16
}
156
157
5
Status parse_conf_store_paths(const string& config_path, std::vector<StorePath>* paths) {
158
5
    std::vector<string> path_vec = strings::Split(config_path, ";", strings::SkipWhitespace());
159
5
    if (path_vec.empty()) {
160
        // means compute node
161
0
        return Status::OK();
162
0
    }
163
5
    if (path_vec.back().empty()) {
164
        // deal with the case that user add `;` to the tail
165
0
        path_vec.pop_back();
166
0
    }
167
168
5
    std::set<std::string> real_paths;
169
8
    for (auto& item : path_vec) {
170
8
        StorePath path;
171
8
        auto res = parse_root_path(item, &path);
172
8
        if (res.ok()) {
173
8
            auto success = real_paths.emplace(path.path).second;
174
8
            if (success) {
175
7
                paths->emplace_back(std::move(path));
176
7
            } else {
177
1
                LOG(WARNING) << "a duplicated path is found " << path.path;
178
1
                return Status::Error<INVALID_ARGUMENT>("a duplicated path is found, path={}",
179
1
                                                       path.path);
180
1
            }
181
8
        } else {
182
0
            LOG(WARNING) << "failed to parse store path " << item << ", res=" << res;
183
0
        }
184
8
    }
185
4
    if ((path_vec.size() != paths->size() && !config::ignore_broken_disk)) {
186
0
        return Status::Error<INVALID_ARGUMENT>("fail to parse storage_root_path config. value={}",
187
0
                                               config_path);
188
0
    }
189
4
    return Status::OK();
190
4
}
191
192
3
void parse_conf_broken_store_paths(const string& config_path, std::set<std::string>* paths) {
193
3
    std::vector<string> path_vec = strings::Split(config_path, ";", strings::SkipWhitespace());
194
3
    if (path_vec.empty()) {
195
0
        return;
196
0
    }
197
3
    if (path_vec.back().empty()) {
198
        // deal with the case that user add `;` to the tail
199
0
        path_vec.pop_back();
200
0
    }
201
5
    for (auto& item : path_vec) {
202
5
        paths->emplace(item);
203
5
    }
204
3
    return;
205
3
}
206
207
/** format:   
208
 *  [
209
 *    {"path": "storage1", "total_size":53687091200,"query_limit": "10737418240"},
210
 *    {"path": "storage2", "total_size":53687091200},
211
 *    {"path": "storage3", "total_size":53687091200, "ttl_percent":50, "normal_percent":40, "disposable_percent":5, "index_percent":5}
212
 *    {"path": "xxx", "total_size":53687091200, "storage": "memory"}
213
 *  ]
214
 */
215
6
Status parse_conf_cache_paths(const std::string& config_path, std::vector<CachePath>& paths) {
216
6
    using namespace rapidjson;
217
6
    Document document;
218
6
    document.Parse(config_path.c_str());
219
6
    DCHECK(document.IsArray()) << config_path << " " << document.GetType();
220
6
    for (auto& config : document.GetArray()) {
221
6
        auto map = config.GetObject();
222
6
        DCHECK(map.HasMember(CACHE_PATH.c_str()));
223
6
        std::string path = map.FindMember(CACHE_PATH.c_str())->value.GetString();
224
6
        std::string storage = CACHE_STORAGE_DISK; // disk storage by default
225
6
        if (map.HasMember(CACHE_STORAGE.c_str())) {
226
1
            storage = map.FindMember(CACHE_STORAGE.c_str())->value.GetString();
227
1
            if (storage != CACHE_STORAGE_DISK && storage != CACHE_STORAGE_MEMORY) [[unlikely]] {
228
0
                return Status::InvalidArgument("invalid file cache storage type: " + storage);
229
0
            }
230
1
            if (storage == CACHE_STORAGE_MEMORY) {
231
                // set path to "memory" for memory storage
232
                // so that we can track it by path (use _path_to_cache map)
233
1
                path = CACHE_STORAGE_MEMORY;
234
1
            }
235
1
        }
236
6
        int64_t total_size = 0, query_limit_bytes = 0;
237
6
        if (map.HasMember(CACHE_TOTAL_SIZE.c_str())) {
238
6
            auto& value = map.FindMember(CACHE_TOTAL_SIZE.c_str())->value;
239
6
            if (value.IsInt64()) {
240
5
                total_size = value.GetInt64();
241
5
            } else {
242
1
                total_size = 0;
243
1
            }
244
6
        }
245
6
        if (config::enable_file_cache_query_limit) {
246
6
            if (map.HasMember(CACHE_QUERY_LIMIT_SIZE.c_str())) {
247
3
                auto& value = map.FindMember(CACHE_QUERY_LIMIT_SIZE.c_str())->value;
248
3
                if (value.IsInt64()) {
249
3
                    query_limit_bytes = value.GetInt64();
250
3
                } else {
251
0
                    query_limit_bytes = 0;
252
0
                }
253
3
            }
254
6
        }
255
6
        if (total_size < 0 || (config::enable_file_cache_query_limit && query_limit_bytes < 0)) {
256
2
            return Status::InvalidArgument("total_size or query_limit should not less than zero");
257
2
        }
258
259
        // percent
260
4
        auto get_percent_value = [&](const std::string& key, size_t& percent) {
261
0
            auto& value = map.FindMember(key.c_str())->value;
262
0
            if (value.IsUint()) {
263
0
                percent = value.GetUint();
264
0
            } else {
265
0
                return Status::InvalidArgument("percent should be uint");
266
0
            }
267
0
            return Status::OK();
268
0
        };
269
270
4
        size_t normal_percent = io::DEFAULT_NORMAL_PERCENT;
271
4
        size_t disposable_percent = io::DEFAULT_DISPOSABLE_PERCENT;
272
4
        size_t index_percent = io::DEFAULT_INDEX_PERCENT;
273
4
        size_t ttl_percent = io::DEFAULT_TTL_PERCENT;
274
4
        bool has_normal_percent = map.HasMember(CACHE_NORMAL_PERCENT.c_str());
275
4
        bool has_disposable_percent = map.HasMember(CACHE_DISPOSABLE_PERCENT.c_str());
276
4
        bool has_index_percent = map.HasMember(CACHE_INDEX_PERCENT.c_str());
277
4
        bool has_ttl_percent = map.HasMember(CACHE_TTL_PERCENT.c_str());
278
4
        if (has_normal_percent && has_disposable_percent && has_index_percent && has_ttl_percent) {
279
0
            RETURN_IF_ERROR(get_percent_value(CACHE_NORMAL_PERCENT, normal_percent));
280
0
            RETURN_IF_ERROR(get_percent_value(CACHE_DISPOSABLE_PERCENT, disposable_percent));
281
0
            RETURN_IF_ERROR(get_percent_value(CACHE_INDEX_PERCENT, index_percent));
282
0
            RETURN_IF_ERROR(get_percent_value(CACHE_TTL_PERCENT, ttl_percent));
283
4
        } else if (has_normal_percent || has_disposable_percent || has_index_percent ||
284
4
                   has_ttl_percent) {
285
0
            return Status::InvalidArgument(
286
0
                    "cache percent (ttl_percent, index_percent, normal_percent, "
287
0
                    "disposable_percent) must either be all set or all unset. "
288
0
                    "when all unset, use default: ttl_percent=50, index_percent=5, "
289
0
                    "normal_percent=40, disposable_percent=5.");
290
0
        }
291
4
        if ((normal_percent + disposable_percent + index_percent + ttl_percent) != 100) {
292
0
            return Status::InvalidArgument("The sum of cache percent config must equal 100.");
293
0
        }
294
295
4
        paths.emplace_back(std::move(path), total_size, query_limit_bytes, normal_percent,
296
4
                           disposable_percent, index_percent, ttl_percent, storage);
297
4
    }
298
4
    if (paths.empty()) {
299
1
        return Status::InvalidArgument("fail to parse storage_root_path config. value={}",
300
1
                                       config_path);
301
1
    }
302
3
    return Status::OK();
303
4
}
304
305
2
io::FileCacheSettings CachePath::init_settings() const {
306
2
    return io::get_file_cache_settings(total_bytes, query_limit_bytes, normal_percent,
307
2
                                       disposable_percent, index_percent, ttl_percent, storage);
308
2
}
309
310
} // end namespace doris