/root/doris/be/src/util/cgroup_util.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "util/cgroup_util.h" |
19 | | |
20 | | #include <algorithm> |
21 | | #include <fstream> |
22 | | #include <utility> |
23 | | #include <vector> |
24 | | |
25 | | #include "gutil/stringprintf.h" |
26 | | #include "gutil/strings/escaping.h" |
27 | | #include "gutil/strings/split.h" |
28 | | #include "gutil/strings/substitute.h" |
29 | | #include "io/fs/local_file_system.h" |
30 | | #include "util/error_util.h" |
31 | | #include "util/string_parser.hpp" |
32 | | |
33 | | using strings::CUnescape; |
34 | | using strings::Split; |
35 | | using strings::SkipWhitespace; |
36 | | using std::pair; |
37 | | |
38 | | namespace doris { |
39 | | |
40 | 14 | bool CGroupUtil::cgroupsv1_enable() { |
41 | 14 | bool exists = true; |
42 | 14 | Status st = io::global_local_filesystem()->exists("/proc/cgroups", &exists); |
43 | 14 | return st.ok() && exists; |
44 | 14 | } |
45 | | |
46 | 16 | bool CGroupUtil::cgroupsv2_enable() { |
47 | 16 | #if defined(OS_LINUX) |
48 | | // This file exists iff the host has cgroups v2 enabled. |
49 | 16 | auto controllers_file = default_cgroups_mount / "cgroup.controllers"; |
50 | 16 | bool exists = true; |
51 | 16 | Status st = io::global_local_filesystem()->exists(controllers_file, &exists); |
52 | 16 | return st.ok() && exists; |
53 | | #else |
54 | | return false; |
55 | | #endif |
56 | 16 | } |
57 | | |
58 | 11 | Status CGroupUtil::find_global_cgroupv1(const string& subsystem, string* path) { |
59 | 11 | std::ifstream proc_cgroups("/proc/self/cgroup", std::ios::in); |
60 | 11 | string line; |
61 | 38 | while (true) { |
62 | 38 | if (proc_cgroups.fail()) { |
63 | 0 | return Status::CgroupError("Error reading /proc/self/cgroup: {}", get_str_err_msg()); |
64 | 38 | } else if (proc_cgroups.peek() == std::ifstream::traits_type::eof()) { |
65 | 0 | return Status::CgroupError("Could not find subsystem {} in /proc/self/cgroup", |
66 | 0 | subsystem); |
67 | 0 | } |
68 | | // The line format looks like this: |
69 | | // 4:memory:/user.slice |
70 | | // 9:cpu,cpuacct:/user.slice |
71 | | // so field size will be 3 |
72 | 38 | getline(proc_cgroups, line); |
73 | 38 | if (!proc_cgroups.good()) { |
74 | 0 | continue; |
75 | 0 | } |
76 | 38 | std::vector<string> fields = Split(line, ":"); |
77 | | // ":" in the path does not appear to be escaped - bail in the unusual case that |
78 | | // we get too many tokens. |
79 | 38 | if (fields.size() != 3) { |
80 | 0 | return Status::InvalidArgument( |
81 | 0 | "Could not parse line from /proc/self/cgroup - had {} > 3 tokens: '{}'", |
82 | 0 | fields.size(), line); |
83 | 0 | } |
84 | 38 | std::vector<string> subsystems = Split(fields[1], ","); |
85 | 38 | auto it = std::find(subsystems.begin(), subsystems.end(), subsystem); |
86 | 38 | if (it != subsystems.end()) { |
87 | 11 | *path = std::move(fields[2]); |
88 | 11 | return Status::OK(); |
89 | 11 | } |
90 | 38 | } |
91 | 11 | } |
92 | | |
93 | 22 | static Status unescape_path(const string& escaped, string* unescaped) { |
94 | 22 | string err; |
95 | 22 | if (!CUnescape(escaped, unescaped, &err)) { |
96 | 0 | return Status::InvalidArgument("Could not unescape path '{}': {}", escaped, err); |
97 | 0 | } |
98 | 22 | return Status::OK(); |
99 | 22 | } |
100 | | |
101 | 11 | Status CGroupUtil::find_cgroupv1_mounts(const string& subsystem, pair<string, string>* result) { |
102 | 11 | std::ifstream mountinfo("/proc/self/mountinfo", std::ios::in); |
103 | 11 | string line; |
104 | 171 | while (true) { |
105 | 171 | if (mountinfo.fail() || mountinfo.bad()) { |
106 | 0 | return Status::CgroupError("Error reading /proc/self/mountinfo: {}", get_str_err_msg()); |
107 | 171 | } else if (mountinfo.eof()) { |
108 | 0 | return Status::CgroupError("Could not find subsystem {} in /proc/self/mountinfo", |
109 | 0 | subsystem); |
110 | 0 | } |
111 | | // The relevant lines look like below (see proc manpage for full documentation). The |
112 | | // first example is running outside of a container, the second example is running |
113 | | // inside a docker container. Field 3 is the path relative to the root CGroup on |
114 | | // the host and Field 4 is the mount point from this process's point of view. |
115 | | // 34 29 0:28 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:15 - |
116 | | // cgroup cgroup rw,memory |
117 | | // 275 271 0:28 /docker/f23eee6f88c2ba99fcce /sys/fs/cgroup/memory |
118 | | // ro,nosuid,nodev,noexec,relatime master:15 - cgroup cgroup rw,memory |
119 | 171 | getline(mountinfo, line); |
120 | 171 | if (!mountinfo.good()) { |
121 | 0 | continue; |
122 | 0 | } |
123 | 171 | std::vector<string> fields = Split(line, " ", SkipWhitespace()); |
124 | 171 | if (fields.size() < 7) { |
125 | 0 | return Status::InvalidArgument( |
126 | 0 | "Could not parse line from /proc/self/mountinfo - had {} > 7 tokens: '{}'", |
127 | 0 | fields.size(), line); |
128 | 0 | } |
129 | 171 | if (fields[fields.size() - 3] != "cgroup") { |
130 | 66 | continue; |
131 | 66 | } |
132 | | // This is a cgroup mount. Check if it's the mount we're looking for. |
133 | 105 | std::vector<string> cgroup_opts = Split(fields[fields.size() - 1], ",", SkipWhitespace()); |
134 | 105 | auto it = std::find(cgroup_opts.begin(), cgroup_opts.end(), subsystem); |
135 | 105 | if (it == cgroup_opts.end()) { |
136 | 94 | continue; |
137 | 94 | } |
138 | | // This is the right mount. |
139 | 11 | string mount_path, system_path; |
140 | 11 | RETURN_IF_ERROR(unescape_path(fields[4], &mount_path)); |
141 | 11 | RETURN_IF_ERROR(unescape_path(fields[3], &system_path)); |
142 | | // Strip trailing "/" so that both returned paths match in whether they have a |
143 | | // trailing "/". |
144 | 11 | if (system_path[system_path.size() - 1] == '/') { |
145 | 0 | system_path.pop_back(); |
146 | 0 | } |
147 | 11 | *result = {mount_path, system_path}; |
148 | 11 | return Status::OK(); |
149 | 11 | } |
150 | 11 | } |
151 | | |
152 | 11 | Status CGroupUtil::find_abs_cgroupv1_path(const string& subsystem, string* path) { |
153 | 11 | if (!cgroupsv1_enable()) { |
154 | 0 | return Status::InvalidArgument("cgroup is not enabled!"); |
155 | 0 | } |
156 | 11 | RETURN_IF_ERROR(find_global_cgroupv1(subsystem, path)); |
157 | 11 | pair<string, string> paths; |
158 | 11 | RETURN_IF_ERROR(find_cgroupv1_mounts(subsystem, &paths)); |
159 | 11 | const string& mount_path = paths.first; |
160 | 11 | const string& system_path = paths.second; |
161 | 11 | if (path->compare(0, system_path.size(), system_path) != 0) { |
162 | 0 | return Status::InvalidArgument("Expected CGroup path '{}' to start with '{}'", *path, |
163 | 0 | system_path); |
164 | 0 | } |
165 | 11 | path->replace(0, system_path.size(), mount_path); |
166 | 11 | return Status::OK(); |
167 | 11 | } |
168 | | |
169 | 0 | std::string CGroupUtil::cgroupv2_of_process() { |
170 | 0 | #if defined(OS_LINUX) |
171 | 0 | if (!cgroupsv2_enable()) { |
172 | 0 | return ""; |
173 | 0 | } |
174 | | // All PIDs assigned to a cgroup are in /sys/fs/cgroups/{cgroup_name}/cgroup.procs |
175 | | // A simpler way to get the membership is: |
176 | 0 | std::ifstream cgroup_name_file("/proc/self/cgroup"); |
177 | 0 | if (!cgroup_name_file.is_open()) { |
178 | 0 | return ""; |
179 | 0 | } |
180 | | // With cgroups v2, there will be a *single* line with prefix "0::/" |
181 | | // (see https://docs.kernel.org/admin-guide/cgroup-v2.html) |
182 | 0 | std::string cgroup; |
183 | 0 | std::getline(cgroup_name_file, cgroup); |
184 | 0 | static const std::string v2_prefix = "0::/"; |
185 | 0 | if (!cgroup.starts_with(v2_prefix)) { |
186 | 0 | return ""; |
187 | 0 | } |
188 | 0 | cgroup = cgroup.substr(v2_prefix.length()); |
189 | 0 | return cgroup; |
190 | | #else |
191 | | return ""; |
192 | | #endif |
193 | 0 | } |
194 | | |
195 | 0 | std::optional<std::string> CGroupUtil::get_cgroupsv2_path(const std::string& subsystem) { |
196 | 0 | #if defined(OS_LINUX) |
197 | 0 | if (!CGroupUtil::cgroupsv2_enable()) { |
198 | 0 | return {}; |
199 | 0 | } |
200 | | |
201 | 0 | std::string cgroup = CGroupUtil::cgroupv2_of_process(); |
202 | 0 | auto current_cgroup = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup); |
203 | | |
204 | | // Return the bottom-most nested current memory file. If there is no such file at the current |
205 | | // level, try again at the parent level as memory settings are inherited. |
206 | 0 | while (current_cgroup != default_cgroups_mount.parent_path()) { |
207 | 0 | if (std::filesystem::exists(current_cgroup / subsystem)) { |
208 | 0 | return {current_cgroup}; |
209 | 0 | } |
210 | 0 | current_cgroup = current_cgroup.parent_path(); |
211 | 0 | } |
212 | 0 | return {}; |
213 | | #else |
214 | | return {}; |
215 | | #endif |
216 | 0 | } |
217 | | |
218 | | Status CGroupUtil::read_int_line_from_cgroup_file(const std::filesystem::path& file_path, |
219 | 5 | int64_t* val) { |
220 | 5 | std::ifstream file_stream(file_path, std::ios::in); |
221 | 5 | if (!file_stream.is_open()) { |
222 | 1 | return Status::CgroupError("Error open {}", file_path.string()); |
223 | 1 | } |
224 | | |
225 | 4 | string line; |
226 | 4 | getline(file_stream, line); |
227 | 4 | if (file_stream.fail() || file_stream.bad()) { |
228 | 0 | return Status::CgroupError("Error reading {}: {}", file_path.string(), get_str_err_msg()); |
229 | 0 | } |
230 | 4 | StringParser::ParseResult pr; |
231 | | // Parse into an int64_t If it overflows, returning the max value of int64_t is ok because that |
232 | | // is effectively unlimited. |
233 | 4 | *val = StringParser::string_to_int<int64_t>(line.c_str(), line.size(), &pr); |
234 | 4 | if ((pr != StringParser::PARSE_SUCCESS && pr != StringParser::PARSE_OVERFLOW)) { |
235 | 0 | return Status::InvalidArgument("Failed to parse {} as int64: '{}'", file_path.string(), |
236 | 0 | line); |
237 | 0 | } |
238 | 4 | return Status::OK(); |
239 | 4 | } |
240 | | |
241 | | void CGroupUtil::read_int_metric_from_cgroup_file( |
242 | | const std::filesystem::path& file_path, |
243 | 5 | std::unordered_map<std::string, int64_t>& metrics_map) { |
244 | 5 | std::ifstream cgroup_file(file_path, std::ios::in); |
245 | 5 | std::string line; |
246 | 144 | while (cgroup_file.good() && !cgroup_file.eof()) { |
247 | 139 | getline(cgroup_file, line); |
248 | 139 | std::vector<std::string> fields = strings::Split(line, " ", strings::SkipWhitespace()); |
249 | 139 | if (fields.size() < 2) { |
250 | 4 | continue; |
251 | 4 | } |
252 | 135 | std::string key = fields[0].substr(0, fields[0].size()); |
253 | | |
254 | 135 | StringParser::ParseResult result; |
255 | 135 | auto value = |
256 | 135 | StringParser::string_to_int<int64_t>(fields[1].data(), fields[1].size(), &result); |
257 | | |
258 | 135 | if (result == StringParser::PARSE_SUCCESS) { |
259 | 135 | if (fields.size() == 2) { |
260 | 135 | metrics_map[key] = value; |
261 | 135 | } else if (fields[2] == "kB") { |
262 | 0 | metrics_map[key] = value * 1024L; |
263 | 0 | } |
264 | 135 | } |
265 | 135 | } |
266 | 5 | if (cgroup_file.is_open()) { |
267 | 4 | cgroup_file.close(); |
268 | 4 | } |
269 | 5 | } |
270 | | |
271 | | Status CGroupUtil::read_string_line_from_cgroup_file(const std::filesystem::path& file_path, |
272 | 6 | std::string* line_ptr) { |
273 | 6 | std::ifstream file_stream(file_path, std::ios::in); |
274 | 6 | if (!file_stream.is_open()) { |
275 | 0 | return Status::CgroupError("Error open {}", file_path.string()); |
276 | 0 | } |
277 | 6 | string line; |
278 | 6 | getline(file_stream, line); |
279 | 6 | if (file_stream.fail() || file_stream.bad()) { |
280 | 0 | return Status::CgroupError("Error reading {}: {}", file_path.string(), get_str_err_msg()); |
281 | 0 | } |
282 | 6 | *line_ptr = line; |
283 | 6 | return Status::OK(); |
284 | 6 | } |
285 | | |
286 | 8 | Status CGroupUtil::parse_cpuset_line(std::string cpuset_line, int* cpu_count_ptr) { |
287 | 8 | if (cpuset_line.empty()) { |
288 | 0 | return Status::CgroupError("cpuset line is empty"); |
289 | 0 | } |
290 | 8 | std::vector<string> ranges; |
291 | 8 | boost::split(ranges, cpuset_line, boost::is_any_of(",")); |
292 | 8 | int cpu_count = 0; |
293 | | |
294 | 15 | for (const std::string& range : ranges) { |
295 | 15 | std::vector<std::string> cpu_values; |
296 | 15 | boost::split(cpu_values, range, boost::is_any_of("-")); |
297 | | |
298 | 15 | if (cpu_values.size() == 2) { |
299 | 10 | int start = std::stoi(cpu_values[0]); |
300 | 10 | int end = std::stoi(cpu_values[1]); |
301 | 10 | cpu_count += (end - start) + 1; |
302 | 10 | } else { |
303 | 5 | cpu_count++; |
304 | 5 | } |
305 | 15 | } |
306 | 8 | *cpu_count_ptr = cpu_count; |
307 | 8 | return Status::OK(); |
308 | 8 | } |
309 | | |
310 | 2 | int CGroupUtil::get_cgroup_limited_cpu_number(int physical_cores) { |
311 | 2 | if (physical_cores <= 0) { |
312 | 0 | return physical_cores; |
313 | 0 | } |
314 | 2 | int ret = physical_cores; |
315 | 2 | #if defined(OS_LINUX) |
316 | | // For cgroup v2 |
317 | | // Child cgroup's cpu.max may bigger than parent group's cpu.max, |
318 | | // so it should look up from current cgroup to top group. |
319 | | // For cpuset, child cgroup's cpuset.cpus could not bigger thant parent's cpuset.cpus. |
320 | 2 | if (CGroupUtil::cgroupsv2_enable()) { |
321 | 0 | std::string cgroupv2_process_path = CGroupUtil::cgroupv2_of_process(); |
322 | 0 | if (cgroupv2_process_path.empty()) { |
323 | 0 | return ret; |
324 | 0 | } |
325 | 0 | std::filesystem::path current_cgroup_path = (default_cgroups_mount / cgroupv2_process_path); |
326 | 0 | ret = get_cgroup_v2_cpu_quota_number(current_cgroup_path, default_cgroups_mount, ret); |
327 | |
|
328 | 0 | current_cgroup_path = (default_cgroups_mount / cgroupv2_process_path); |
329 | 0 | ret = get_cgroup_v2_cpuset_number(current_cgroup_path, default_cgroups_mount, ret); |
330 | 2 | } else if (CGroupUtil::cgroupsv1_enable()) { |
331 | | // cpu quota, should find first not empty config from current path to top. |
332 | | // because if a process attach to current cgroup, its cpu quota may not be set. |
333 | 2 | std::string cpu_quota_path = ""; |
334 | 2 | Status cpu_quota_ret = CGroupUtil::find_abs_cgroupv1_path("cpu", &cpu_quota_path); |
335 | 2 | if (cpu_quota_ret.ok() && !cpu_quota_path.empty()) { |
336 | 2 | std::filesystem::path current_cgroup_path = cpu_quota_path; |
337 | 2 | ret = get_cgroup_v1_cpu_quota_number(current_cgroup_path, default_cgroups_mount, ret); |
338 | 2 | } |
339 | | |
340 | | //cpuset |
341 | | // just lookup current process cgroup path is enough |
342 | | // because if a process attach to current cgroup, its cpuset.cpus must be set. |
343 | 2 | std::string cpuset_path = ""; |
344 | 2 | Status cpuset_ret = CGroupUtil::find_abs_cgroupv1_path("cpuset", &cpuset_path); |
345 | 2 | if (cpuset_ret.ok() && !cpuset_path.empty()) { |
346 | 2 | std::filesystem::path current_path = cpuset_path; |
347 | 2 | ret = get_cgroup_v1_cpuset_number(current_path, ret); |
348 | 2 | } |
349 | 2 | } |
350 | 2 | #endif |
351 | 2 | return ret; |
352 | 2 | } |
353 | | |
354 | | int CGroupUtil::get_cgroup_v2_cpu_quota_number(std::filesystem::path& current_path, |
355 | | const std::filesystem::path& default_cg_mout_path, |
356 | 4 | int cpu_num) { |
357 | 4 | int ret = cpu_num; |
358 | 12 | while (current_path != default_cg_mout_path.parent_path()) { |
359 | 8 | std::ifstream cpu_max_file(current_path / "cpu.max"); |
360 | 8 | if (cpu_max_file.is_open()) { |
361 | 8 | std::string cpu_limit_str; |
362 | 8 | double cpu_period; |
363 | 8 | cpu_max_file >> cpu_limit_str >> cpu_period; |
364 | 8 | if (cpu_limit_str != "max" && cpu_period != 0) { |
365 | 5 | double cpu_limit = std::stod(cpu_limit_str); |
366 | 5 | ret = std::min(static_cast<int>(std::ceil(cpu_limit / cpu_period)), ret); |
367 | 5 | } |
368 | 8 | } |
369 | 8 | current_path = current_path.parent_path(); |
370 | 8 | } |
371 | 4 | return ret; |
372 | 4 | } |
373 | | |
374 | | int CGroupUtil::get_cgroup_v2_cpuset_number(std::filesystem::path& current_path, |
375 | | const std::filesystem::path& default_cg_mout_path, |
376 | 2 | int cpu_num) { |
377 | 2 | int ret = cpu_num; |
378 | 3 | while (current_path != default_cg_mout_path.parent_path()) { |
379 | 3 | std::ifstream cpuset_cpus_file(current_path / "cpuset.cpus.effective"); |
380 | 3 | current_path = current_path.parent_path(); |
381 | 3 | if (cpuset_cpus_file.is_open()) { |
382 | 3 | std::string cpuset_line; |
383 | 3 | cpuset_cpus_file >> cpuset_line; |
384 | 3 | if (cpuset_line.empty()) { |
385 | 1 | continue; |
386 | 1 | } |
387 | 2 | int cpus_count = 0; |
388 | 2 | static_cast<void>(CGroupUtil::parse_cpuset_line(cpuset_line, &cpus_count)); |
389 | 2 | ret = std::min(cpus_count, ret); |
390 | 2 | break; |
391 | 3 | } |
392 | 3 | } |
393 | 2 | return ret; |
394 | 2 | } |
395 | | |
396 | | int CGroupUtil::get_cgroup_v1_cpu_quota_number(std::filesystem::path& current_path, |
397 | | const std::filesystem::path& default_cg_mout_path, |
398 | 5 | int cpu_num) { |
399 | 5 | int ret = cpu_num; |
400 | 12 | while (current_path != default_cg_mout_path.parent_path()) { |
401 | 9 | std::ifstream cpu_quota_file(current_path / "cpu.cfs_quota_us"); |
402 | 9 | std::ifstream cpu_period_file(current_path / "cpu.cfs_period_us"); |
403 | 9 | if (cpu_quota_file.is_open() && cpu_period_file.is_open()) { |
404 | 7 | double cpu_quota_value; |
405 | 7 | double cpu_period_value; |
406 | 7 | cpu_quota_file >> cpu_quota_value; |
407 | 7 | cpu_period_file >> cpu_period_value; |
408 | 7 | if (cpu_quota_value > 0 && cpu_period_value > 0) { |
409 | 2 | ret = std::min(ret, |
410 | 2 | static_cast<int>(std::ceil(cpu_quota_value / cpu_period_value))); |
411 | 2 | break; |
412 | 2 | } |
413 | 7 | } |
414 | 7 | current_path = current_path.parent_path(); |
415 | 7 | } |
416 | 5 | return ret; |
417 | 5 | } |
418 | | |
419 | 3 | int CGroupUtil::get_cgroup_v1_cpuset_number(std::filesystem::path& current_path, int cpu_num) { |
420 | 3 | int ret = cpu_num; |
421 | 3 | std::string cpuset_line = ""; |
422 | 3 | Status cpuset_ret = CGroupUtil::read_string_line_from_cgroup_file( |
423 | 3 | (current_path / "cpuset.cpus"), &cpuset_line); |
424 | 3 | if (cpuset_ret.ok() && !cpuset_line.empty()) { |
425 | 3 | int cpuset_count = 0; |
426 | 3 | static_cast<void>(CGroupUtil::parse_cpuset_line(cpuset_line, &cpuset_count)); |
427 | 3 | if (cpuset_count > 0) { |
428 | 3 | ret = std::min(ret, cpuset_count); |
429 | 3 | } |
430 | 3 | } |
431 | 3 | return ret; |
432 | 3 | } |
433 | | |
434 | | } // namespace doris |