/root/doris/be/src/util/cgroup_util.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "util/cgroup_util.h" |
19 | | |
20 | | #include <algorithm> |
21 | | #include <boost/algorithm/string.hpp> |
22 | | #include <fstream> |
23 | | #include <utility> |
24 | | #include <vector> |
25 | | |
26 | | #include "gutil/stringprintf.h" |
27 | | #include "gutil/strings/escaping.h" |
28 | | #include "gutil/strings/split.h" |
29 | | #include "gutil/strings/substitute.h" |
30 | | #include "io/fs/local_file_system.h" |
31 | | #include "util/error_util.h" |
32 | | #include "util/string_parser.hpp" |
33 | | |
34 | | using strings::CUnescape; |
35 | | using strings::Split; |
36 | | using strings::SkipWhitespace; |
37 | | using std::pair; |
38 | | |
39 | | namespace doris { |
40 | | |
41 | 14 | bool CGroupUtil::cgroupsv1_enable() { |
42 | 14 | bool exists = true; |
43 | 14 | Status st = io::global_local_filesystem()->exists("/proc/cgroups", &exists); |
44 | 14 | return st.ok() && exists; |
45 | 14 | } |
46 | | |
47 | 16 | bool CGroupUtil::cgroupsv2_enable() { |
48 | 16 | #if defined(OS_LINUX) |
49 | | // This file exists iff the host has cgroups v2 enabled. |
50 | 16 | auto controllers_file = default_cgroups_mount / "cgroup.controllers"; |
51 | 16 | bool exists = true; |
52 | 16 | Status st = io::global_local_filesystem()->exists(controllers_file, &exists); |
53 | 16 | return st.ok() && exists; |
54 | | #else |
55 | | return false; |
56 | | #endif |
57 | 16 | } |
58 | | |
59 | 11 | Status CGroupUtil::find_global_cgroupv1(const string& subsystem, string* path) { |
60 | 11 | std::ifstream proc_cgroups("/proc/self/cgroup", std::ios::in); |
61 | 11 | string line; |
62 | 66 | while (true) { |
63 | 66 | if (proc_cgroups.fail()) { |
64 | 0 | return Status::CgroupError("Error reading /proc/self/cgroup: {}", get_str_err_msg()); |
65 | 66 | } else if (proc_cgroups.peek() == std::ifstream::traits_type::eof()) { |
66 | 0 | return Status::CgroupError("Could not find subsystem {} in /proc/self/cgroup", |
67 | 0 | subsystem); |
68 | 0 | } |
69 | | // The line format looks like this: |
70 | | // 4:memory:/user.slice |
71 | | // 9:cpu,cpuacct:/user.slice |
72 | | // so field size will be 3 |
73 | 66 | getline(proc_cgroups, line); |
74 | 66 | if (!proc_cgroups.good()) { |
75 | 0 | continue; |
76 | 0 | } |
77 | 66 | std::vector<string> fields = Split(line, ":"); |
78 | | // ":" in the path does not appear to be escaped - bail in the unusual case that |
79 | | // we get too many tokens. |
80 | 66 | if (fields.size() != 3) { |
81 | 0 | return Status::InvalidArgument( |
82 | 0 | "Could not parse line from /proc/self/cgroup - had {} > 3 tokens: '{}'", |
83 | 0 | fields.size(), line); |
84 | 0 | } |
85 | 66 | std::vector<string> subsystems = Split(fields[1], ","); |
86 | 66 | auto it = std::find(subsystems.begin(), subsystems.end(), subsystem); |
87 | 66 | if (it != subsystems.end()) { |
88 | 11 | *path = std::move(fields[2]); |
89 | 11 | return Status::OK(); |
90 | 11 | } |
91 | 66 | } |
92 | 11 | } |
93 | | |
94 | 22 | static Status unescape_path(const string& escaped, string* unescaped) { |
95 | 22 | string err; |
96 | 22 | if (!CUnescape(escaped, unescaped, &err)) { |
97 | 0 | return Status::InvalidArgument("Could not unescape path '{}': {}", escaped, err); |
98 | 0 | } |
99 | 22 | return Status::OK(); |
100 | 22 | } |
101 | | |
102 | 11 | Status CGroupUtil::find_cgroupv1_mounts(const string& subsystem, pair<string, string>* result) { |
103 | 11 | std::ifstream mountinfo("/proc/self/mountinfo", std::ios::in); |
104 | 11 | string line; |
105 | 143 | while (true) { |
106 | 143 | if (mountinfo.fail() || mountinfo.bad()) { |
107 | 0 | return Status::CgroupError("Error reading /proc/self/mountinfo: {}", get_str_err_msg()); |
108 | 143 | } else if (mountinfo.eof()) { |
109 | 0 | return Status::CgroupError("Could not find subsystem {} in /proc/self/mountinfo", |
110 | 0 | subsystem); |
111 | 0 | } |
112 | | // The relevant lines look like below (see proc manpage for full documentation). The |
113 | | // first example is running outside of a container, the second example is running |
114 | | // inside a docker container. Field 3 is the path relative to the root CGroup on |
115 | | // the host and Field 4 is the mount point from this process's point of view. |
116 | | // 34 29 0:28 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:15 - |
117 | | // cgroup cgroup rw,memory |
118 | | // 275 271 0:28 /docker/f23eee6f88c2ba99fcce /sys/fs/cgroup/memory |
119 | | // ro,nosuid,nodev,noexec,relatime master:15 - cgroup cgroup rw,memory |
120 | 143 | getline(mountinfo, line); |
121 | 143 | if (!mountinfo.good()) { |
122 | 0 | continue; |
123 | 0 | } |
124 | 143 | std::vector<string> fields = Split(line, " ", SkipWhitespace()); |
125 | 143 | if (fields.size() < 7) { |
126 | 0 | return Status::InvalidArgument( |
127 | 0 | "Could not parse line from /proc/self/mountinfo - had {} > 7 tokens: '{}'", |
128 | 0 | fields.size(), line); |
129 | 0 | } |
130 | 143 | if (fields[fields.size() - 3] != "cgroup") { |
131 | 66 | continue; |
132 | 66 | } |
133 | | // This is a cgroup mount. Check if it's the mount we're looking for. |
134 | 77 | std::vector<string> cgroup_opts = Split(fields[fields.size() - 1], ",", SkipWhitespace()); |
135 | 77 | auto it = std::find(cgroup_opts.begin(), cgroup_opts.end(), subsystem); |
136 | 77 | if (it == cgroup_opts.end()) { |
137 | 66 | continue; |
138 | 66 | } |
139 | | // This is the right mount. |
140 | 11 | string mount_path, system_path; |
141 | 11 | RETURN_IF_ERROR(unescape_path(fields[4], &mount_path)); |
142 | 11 | RETURN_IF_ERROR(unescape_path(fields[3], &system_path)); |
143 | | // Strip trailing "/" so that both returned paths match in whether they have a |
144 | | // trailing "/". |
145 | 11 | if (system_path[system_path.size() - 1] == '/') { |
146 | 0 | system_path.pop_back(); |
147 | 0 | } |
148 | 11 | *result = {mount_path, system_path}; |
149 | 11 | return Status::OK(); |
150 | 11 | } |
151 | 11 | } |
152 | | |
153 | 11 | Status CGroupUtil::find_abs_cgroupv1_path(const string& subsystem, string* path) { |
154 | 11 | if (!cgroupsv1_enable()) { |
155 | 0 | return Status::InvalidArgument("cgroup is not enabled!"); |
156 | 0 | } |
157 | 11 | RETURN_IF_ERROR(find_global_cgroupv1(subsystem, path)); |
158 | 11 | pair<string, string> paths; |
159 | 11 | RETURN_IF_ERROR(find_cgroupv1_mounts(subsystem, &paths)); |
160 | 11 | const string& mount_path = paths.first; |
161 | 11 | const string& system_path = paths.second; |
162 | 11 | if (path->compare(0, system_path.size(), system_path) != 0) { |
163 | 0 | return Status::InvalidArgument("Expected CGroup path '{}' to start with '{}'", *path, |
164 | 0 | system_path); |
165 | 0 | } |
166 | 11 | path->replace(0, system_path.size(), mount_path); |
167 | 11 | return Status::OK(); |
168 | 11 | } |
169 | | |
170 | 0 | std::string CGroupUtil::cgroupv2_of_process() { |
171 | 0 | #if defined(OS_LINUX) |
172 | 0 | if (!cgroupsv2_enable()) { |
173 | 0 | return ""; |
174 | 0 | } |
175 | | // All PIDs assigned to a cgroup are in /sys/fs/cgroups/{cgroup_name}/cgroup.procs |
176 | | // A simpler way to get the membership is: |
177 | 0 | std::ifstream cgroup_name_file("/proc/self/cgroup"); |
178 | 0 | if (!cgroup_name_file.is_open()) { |
179 | 0 | return ""; |
180 | 0 | } |
181 | | // With cgroups v2, there will be a *single* line with prefix "0::/" |
182 | | // (see https://docs.kernel.org/admin-guide/cgroup-v2.html) |
183 | 0 | std::string cgroup; |
184 | 0 | std::getline(cgroup_name_file, cgroup); |
185 | 0 | static const std::string v2_prefix = "0::/"; |
186 | 0 | if (!cgroup.starts_with(v2_prefix)) { |
187 | 0 | return ""; |
188 | 0 | } |
189 | 0 | cgroup = cgroup.substr(v2_prefix.length()); |
190 | 0 | return cgroup; |
191 | | #else |
192 | | return ""; |
193 | | #endif |
194 | 0 | } |
195 | | |
196 | 0 | std::optional<std::string> CGroupUtil::get_cgroupsv2_path(const std::string& subsystem) { |
197 | 0 | #if defined(OS_LINUX) |
198 | 0 | if (!CGroupUtil::cgroupsv2_enable()) { |
199 | 0 | return {}; |
200 | 0 | } |
201 | | |
202 | 0 | std::string cgroup = CGroupUtil::cgroupv2_of_process(); |
203 | 0 | auto current_cgroup = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup); |
204 | | |
205 | | // Return the bottom-most nested current memory file. If there is no such file at the current |
206 | | // level, try again at the parent level as memory settings are inherited. |
207 | 0 | while (current_cgroup != default_cgroups_mount.parent_path()) { |
208 | 0 | if (std::filesystem::exists(current_cgroup / subsystem)) { |
209 | 0 | return {current_cgroup}; |
210 | 0 | } |
211 | 0 | current_cgroup = current_cgroup.parent_path(); |
212 | 0 | } |
213 | 0 | return {}; |
214 | | #else |
215 | | return {}; |
216 | | #endif |
217 | 0 | } |
218 | | |
219 | | Status CGroupUtil::read_int_line_from_cgroup_file(const std::filesystem::path& file_path, |
220 | 5 | int64_t* val) { |
221 | 5 | std::ifstream file_stream(file_path, std::ios::in); |
222 | 5 | if (!file_stream.is_open()) { |
223 | 1 | return Status::CgroupError("Error open {}", file_path.string()); |
224 | 1 | } |
225 | | |
226 | 4 | string line; |
227 | 4 | getline(file_stream, line); |
228 | 4 | if (file_stream.fail() || file_stream.bad()) { |
229 | 0 | return Status::CgroupError("Error reading {}: {}", file_path.string(), get_str_err_msg()); |
230 | 0 | } |
231 | 4 | StringParser::ParseResult pr; |
232 | | // Parse into an int64_t If it overflows, returning the max value of int64_t is ok because that |
233 | | // is effectively unlimited. |
234 | 4 | *val = StringParser::string_to_int<int64_t>(line.c_str(), line.size(), &pr); |
235 | 4 | if ((pr != StringParser::PARSE_SUCCESS && pr != StringParser::PARSE_OVERFLOW)) { |
236 | 0 | return Status::InvalidArgument("Failed to parse {} as int64: '{}'", file_path.string(), |
237 | 0 | line); |
238 | 0 | } |
239 | 4 | return Status::OK(); |
240 | 4 | } |
241 | | |
242 | | void CGroupUtil::read_int_metric_from_cgroup_file( |
243 | | const std::filesystem::path& file_path, |
244 | 5 | std::unordered_map<std::string, int64_t>& metrics_map) { |
245 | 5 | std::ifstream cgroup_file(file_path, std::ios::in); |
246 | 5 | std::string line; |
247 | 144 | while (cgroup_file.good() && !cgroup_file.eof()) { |
248 | 139 | getline(cgroup_file, line); |
249 | 139 | std::vector<std::string> fields = strings::Split(line, " ", strings::SkipWhitespace()); |
250 | 139 | if (fields.size() < 2) { |
251 | 4 | continue; |
252 | 4 | } |
253 | 135 | std::string key = fields[0].substr(0, fields[0].size()); |
254 | | |
255 | 135 | StringParser::ParseResult result; |
256 | 135 | auto value = |
257 | 135 | StringParser::string_to_int<int64_t>(fields[1].data(), fields[1].size(), &result); |
258 | | |
259 | 135 | if (result == StringParser::PARSE_SUCCESS) { |
260 | 135 | if (fields.size() == 2) { |
261 | 135 | metrics_map[key] = value; |
262 | 135 | } else if (fields[2] == "kB") { |
263 | 0 | metrics_map[key] = value * 1024L; |
264 | 0 | } |
265 | 135 | } |
266 | 135 | } |
267 | 5 | if (cgroup_file.is_open()) { |
268 | 4 | cgroup_file.close(); |
269 | 4 | } |
270 | 5 | } |
271 | | |
272 | | Status CGroupUtil::read_string_line_from_cgroup_file(const std::filesystem::path& file_path, |
273 | 6 | std::string* line_ptr) { |
274 | 6 | std::ifstream file_stream(file_path, std::ios::in); |
275 | 6 | if (!file_stream.is_open()) { |
276 | 0 | return Status::CgroupError("Error open {}", file_path.string()); |
277 | 0 | } |
278 | 6 | string line; |
279 | 6 | getline(file_stream, line); |
280 | 6 | if (file_stream.fail() || file_stream.bad()) { |
281 | 0 | return Status::CgroupError("Error reading {}: {}", file_path.string(), get_str_err_msg()); |
282 | 0 | } |
283 | 6 | *line_ptr = line; |
284 | 6 | return Status::OK(); |
285 | 6 | } |
286 | | |
287 | 8 | Status CGroupUtil::parse_cpuset_line(std::string cpuset_line, int* cpu_count_ptr) { |
288 | 8 | if (cpuset_line.empty()) { |
289 | 0 | return Status::CgroupError("cpuset line is empty"); |
290 | 0 | } |
291 | 8 | std::vector<string> ranges; |
292 | 8 | boost::split(ranges, cpuset_line, boost::is_any_of(",")); |
293 | 8 | int cpu_count = 0; |
294 | | |
295 | 15 | for (const std::string& range : ranges) { |
296 | 15 | std::vector<std::string> cpu_values; |
297 | 15 | boost::split(cpu_values, range, boost::is_any_of("-")); |
298 | | |
299 | 15 | if (cpu_values.size() == 2) { |
300 | 10 | int start = std::stoi(cpu_values[0]); |
301 | 10 | int end = std::stoi(cpu_values[1]); |
302 | 10 | cpu_count += (end - start) + 1; |
303 | 10 | } else { |
304 | 5 | cpu_count++; |
305 | 5 | } |
306 | 15 | } |
307 | 8 | *cpu_count_ptr = cpu_count; |
308 | 8 | return Status::OK(); |
309 | 8 | } |
310 | | |
311 | 2 | int CGroupUtil::get_cgroup_limited_cpu_number(int physical_cores) { |
312 | 2 | if (physical_cores <= 0) { |
313 | 0 | return physical_cores; |
314 | 0 | } |
315 | 2 | int ret = physical_cores; |
316 | 2 | #if defined(OS_LINUX) |
317 | | // For cgroup v2 |
318 | | // Child cgroup's cpu.max may bigger than parent group's cpu.max, |
319 | | // so it should look up from current cgroup to top group. |
320 | | // For cpuset, child cgroup's cpuset.cpus could not bigger thant parent's cpuset.cpus. |
321 | 2 | if (CGroupUtil::cgroupsv2_enable()) { |
322 | 0 | std::string cgroupv2_process_path = CGroupUtil::cgroupv2_of_process(); |
323 | 0 | if (cgroupv2_process_path.empty()) { |
324 | 0 | return ret; |
325 | 0 | } |
326 | 0 | std::filesystem::path current_cgroup_path = (default_cgroups_mount / cgroupv2_process_path); |
327 | 0 | ret = get_cgroup_v2_cpu_quota_number(current_cgroup_path, default_cgroups_mount, ret); |
328 | |
|
329 | 0 | current_cgroup_path = (default_cgroups_mount / cgroupv2_process_path); |
330 | 0 | ret = get_cgroup_v2_cpuset_number(current_cgroup_path, default_cgroups_mount, ret); |
331 | 2 | } else if (CGroupUtil::cgroupsv1_enable()) { |
332 | | // cpu quota, should find first not empty config from current path to top. |
333 | | // because if a process attach to current cgroup, its cpu quota may not be set. |
334 | 2 | std::string cpu_quota_path = ""; |
335 | 2 | Status cpu_quota_ret = CGroupUtil::find_abs_cgroupv1_path("cpu", &cpu_quota_path); |
336 | 2 | if (cpu_quota_ret.ok() && !cpu_quota_path.empty()) { |
337 | 2 | std::filesystem::path current_cgroup_path = cpu_quota_path; |
338 | 2 | ret = get_cgroup_v1_cpu_quota_number(current_cgroup_path, default_cgroups_mount, ret); |
339 | 2 | } |
340 | | |
341 | | //cpuset |
342 | | // just lookup current process cgroup path is enough |
343 | | // because if a process attach to current cgroup, its cpuset.cpus must be set. |
344 | 2 | std::string cpuset_path = ""; |
345 | 2 | Status cpuset_ret = CGroupUtil::find_abs_cgroupv1_path("cpuset", &cpuset_path); |
346 | 2 | if (cpuset_ret.ok() && !cpuset_path.empty()) { |
347 | 2 | std::filesystem::path current_path = cpuset_path; |
348 | 2 | ret = get_cgroup_v1_cpuset_number(current_path, ret); |
349 | 2 | } |
350 | 2 | } |
351 | 2 | #endif |
352 | 2 | return ret; |
353 | 2 | } |
354 | | |
355 | | int CGroupUtil::get_cgroup_v2_cpu_quota_number(std::filesystem::path& current_path, |
356 | | const std::filesystem::path& default_cg_mout_path, |
357 | 4 | int cpu_num) { |
358 | 4 | int ret = cpu_num; |
359 | 12 | while (current_path != default_cg_mout_path.parent_path()) { |
360 | 8 | std::ifstream cpu_max_file(current_path / "cpu.max"); |
361 | 8 | if (cpu_max_file.is_open()) { |
362 | 8 | std::string cpu_limit_str; |
363 | 8 | double cpu_period; |
364 | 8 | cpu_max_file >> cpu_limit_str >> cpu_period; |
365 | 8 | if (cpu_limit_str != "max" && cpu_period != 0) { |
366 | 5 | double cpu_limit = std::stod(cpu_limit_str); |
367 | 5 | ret = std::min(static_cast<int>(std::ceil(cpu_limit / cpu_period)), ret); |
368 | 5 | } |
369 | 8 | } |
370 | 8 | current_path = current_path.parent_path(); |
371 | 8 | } |
372 | 4 | return ret; |
373 | 4 | } |
374 | | |
375 | | int CGroupUtil::get_cgroup_v2_cpuset_number(std::filesystem::path& current_path, |
376 | | const std::filesystem::path& default_cg_mout_path, |
377 | 2 | int cpu_num) { |
378 | 2 | int ret = cpu_num; |
379 | 3 | while (current_path != default_cg_mout_path.parent_path()) { |
380 | 3 | std::ifstream cpuset_cpus_file(current_path / "cpuset.cpus.effective"); |
381 | 3 | current_path = current_path.parent_path(); |
382 | 3 | if (cpuset_cpus_file.is_open()) { |
383 | 3 | std::string cpuset_line; |
384 | 3 | cpuset_cpus_file >> cpuset_line; |
385 | 3 | if (cpuset_line.empty()) { |
386 | 1 | continue; |
387 | 1 | } |
388 | 2 | int cpus_count = 0; |
389 | 2 | static_cast<void>(CGroupUtil::parse_cpuset_line(cpuset_line, &cpus_count)); |
390 | 2 | ret = std::min(cpus_count, ret); |
391 | 2 | break; |
392 | 3 | } |
393 | 3 | } |
394 | 2 | return ret; |
395 | 2 | } |
396 | | |
397 | | int CGroupUtil::get_cgroup_v1_cpu_quota_number(std::filesystem::path& current_path, |
398 | | const std::filesystem::path& default_cg_mout_path, |
399 | 5 | int cpu_num) { |
400 | 5 | int ret = cpu_num; |
401 | 12 | while (current_path != default_cg_mout_path.parent_path()) { |
402 | 9 | std::ifstream cpu_quota_file(current_path / "cpu.cfs_quota_us"); |
403 | 9 | std::ifstream cpu_period_file(current_path / "cpu.cfs_period_us"); |
404 | 9 | if (cpu_quota_file.is_open() && cpu_period_file.is_open()) { |
405 | 7 | double cpu_quota_value; |
406 | 7 | double cpu_period_value; |
407 | 7 | cpu_quota_file >> cpu_quota_value; |
408 | 7 | cpu_period_file >> cpu_period_value; |
409 | 7 | if (cpu_quota_value > 0 && cpu_period_value > 0) { |
410 | 2 | ret = std::min(ret, |
411 | 2 | static_cast<int>(std::ceil(cpu_quota_value / cpu_period_value))); |
412 | 2 | break; |
413 | 2 | } |
414 | 7 | } |
415 | 7 | current_path = current_path.parent_path(); |
416 | 7 | } |
417 | 5 | return ret; |
418 | 5 | } |
419 | | |
420 | 3 | int CGroupUtil::get_cgroup_v1_cpuset_number(std::filesystem::path& current_path, int cpu_num) { |
421 | 3 | int ret = cpu_num; |
422 | 3 | std::string cpuset_line = ""; |
423 | 3 | Status cpuset_ret = CGroupUtil::read_string_line_from_cgroup_file( |
424 | 3 | (current_path / "cpuset.cpus"), &cpuset_line); |
425 | 3 | if (cpuset_ret.ok() && !cpuset_line.empty()) { |
426 | 3 | int cpuset_count = 0; |
427 | 3 | static_cast<void>(CGroupUtil::parse_cpuset_line(cpuset_line, &cpuset_count)); |
428 | 3 | if (cpuset_count > 0) { |
429 | 3 | ret = std::min(ret, cpuset_count); |
430 | 3 | } |
431 | 3 | } |
432 | 3 | return ret; |
433 | 3 | } |
434 | | |
435 | | } // namespace doris |