/root/doris/be/src/util/cgroup_util.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "util/cgroup_util.h" |
19 | | |
20 | | #include <absl/strings/escaping.h> |
21 | | #include <absl/strings/str_split.h> |
22 | | |
23 | | #include <algorithm> |
24 | | #include <fstream> |
25 | | #include <utility> |
26 | | #include <vector> |
27 | | |
28 | | #include "gutil/stringprintf.h" |
29 | | #include "io/fs/local_file_system.h" |
30 | | #include "util/error_util.h" |
31 | | #include "util/string_parser.hpp" |
32 | | |
33 | | using std::pair; |
34 | | |
35 | | namespace doris { |
36 | | |
37 | 14 | bool CGroupUtil::cgroupsv1_enable() { |
38 | 14 | bool exists = true; |
39 | 14 | Status st = io::global_local_filesystem()->exists("/proc/cgroups", &exists); |
40 | 14 | return st.ok() && exists; |
41 | 14 | } |
42 | | |
43 | 16 | bool CGroupUtil::cgroupsv2_enable() { |
44 | 16 | #if defined(OS_LINUX) |
45 | | // This file exists iff the host has cgroups v2 enabled. |
46 | 16 | auto controllers_file = default_cgroups_mount / "cgroup.controllers"; |
47 | 16 | bool exists = true; |
48 | 16 | Status st = io::global_local_filesystem()->exists(controllers_file, &exists); |
49 | 16 | return st.ok() && exists; |
50 | | #else |
51 | | return false; |
52 | | #endif |
53 | 16 | } |
54 | | |
55 | 11 | Status CGroupUtil::find_global_cgroupv1(const string& subsystem, string* path) { |
56 | 11 | std::ifstream proc_cgroups("/proc/self/cgroup", std::ios::in); |
57 | 11 | string line; |
58 | 99 | while (true) { |
59 | 99 | if (proc_cgroups.fail()) { |
60 | 0 | return Status::CgroupError("Error reading /proc/self/cgroup: {}", get_str_err_msg()); |
61 | 99 | } else if (proc_cgroups.peek() == std::ifstream::traits_type::eof()) { |
62 | 0 | return Status::CgroupError("Could not find subsystem {} in /proc/self/cgroup", |
63 | 0 | subsystem); |
64 | 0 | } |
65 | | // The line format looks like this: |
66 | | // 4:memory:/user.slice |
67 | | // 9:cpu,cpuacct:/user.slice |
68 | | // so field size will be 3 |
69 | 99 | getline(proc_cgroups, line); |
70 | 99 | if (!proc_cgroups.good()) { |
71 | 0 | continue; |
72 | 0 | } |
73 | 99 | std::vector<string> fields = absl::StrSplit(line, ":"); |
74 | | // ":" in the path does not appear to be escaped - bail in the unusual case that |
75 | | // we get too many tokens. |
76 | 99 | if (fields.size() != 3) { |
77 | 0 | return Status::InvalidArgument( |
78 | 0 | "Could not parse line from /proc/self/cgroup - had {} > 3 tokens: '{}'", |
79 | 0 | fields.size(), line); |
80 | 0 | } |
81 | 99 | std::vector<string> subsystems = absl::StrSplit(fields[1], ","); |
82 | 99 | auto it = std::find(subsystems.begin(), subsystems.end(), subsystem); |
83 | 99 | if (it != subsystems.end()) { |
84 | 11 | *path = std::move(fields[2]); |
85 | 11 | return Status::OK(); |
86 | 11 | } |
87 | 99 | } |
88 | 11 | } |
89 | | |
90 | 22 | static Status unescape_path(const string& escaped, string* unescaped) { |
91 | 22 | string err; |
92 | 22 | if (!absl::CUnescape(escaped, unescaped, &err)) { |
93 | 0 | return Status::InvalidArgument("Could not unescape path '{}': {}", escaped, err); |
94 | 0 | } |
95 | 22 | return Status::OK(); |
96 | 22 | } |
97 | | |
98 | 11 | Status CGroupUtil::find_cgroupv1_mounts(const string& subsystem, pair<string, string>* result) { |
99 | 11 | std::ifstream mountinfo("/proc/self/mountinfo", std::ios::in); |
100 | 11 | string line; |
101 | 110 | while (true) { |
102 | 110 | if (mountinfo.fail() || mountinfo.bad()) { |
103 | 0 | return Status::CgroupError("Error reading /proc/self/mountinfo: {}", get_str_err_msg()); |
104 | 110 | } else if (mountinfo.eof()) { |
105 | 0 | return Status::CgroupError("Could not find subsystem {} in /proc/self/mountinfo", |
106 | 0 | subsystem); |
107 | 0 | } |
108 | | // The relevant lines look like below (see proc manpage for full documentation). The |
109 | | // first example is running outside of a container, the second example is running |
110 | | // inside a docker container. Field 3 is the path relative to the root CGroup on |
111 | | // the host and Field 4 is the mount point from this process's point of view. |
112 | | // 34 29 0:28 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:15 - |
113 | | // cgroup cgroup rw,memory |
114 | | // 275 271 0:28 /docker/f23eee6f88c2ba99fcce /sys/fs/cgroup/memory |
115 | | // ro,nosuid,nodev,noexec,relatime master:15 - cgroup cgroup rw,memory |
116 | 110 | getline(mountinfo, line); |
117 | 110 | if (!mountinfo.good()) { |
118 | 0 | continue; |
119 | 0 | } |
120 | 110 | std::vector<string> fields = absl::StrSplit(line, " ", absl::SkipWhitespace()); |
121 | 110 | if (fields.size() < 7) { |
122 | 0 | return Status::InvalidArgument( |
123 | 0 | "Could not parse line from /proc/self/mountinfo - had {} > 7 tokens: '{}'", |
124 | 0 | fields.size(), line); |
125 | 0 | } |
126 | 110 | if (fields[fields.size() - 3] != "cgroup") { |
127 | 66 | continue; |
128 | 66 | } |
129 | | // This is a cgroup mount. Check if it's the mount we're looking for. |
130 | 44 | std::vector<string> cgroup_opts = |
131 | 44 | absl::StrSplit(fields[fields.size() - 1], ",", absl::SkipWhitespace()); |
132 | 44 | auto it = std::find(cgroup_opts.begin(), cgroup_opts.end(), subsystem); |
133 | 44 | if (it == cgroup_opts.end()) { |
134 | 33 | continue; |
135 | 33 | } |
136 | | // This is the right mount. |
137 | 11 | string mount_path, system_path; |
138 | 11 | RETURN_IF_ERROR(unescape_path(fields[4], &mount_path)); |
139 | 11 | RETURN_IF_ERROR(unescape_path(fields[3], &system_path)); |
140 | | // Strip trailing "/" so that both returned paths match in whether they have a |
141 | | // trailing "/". |
142 | 11 | if (system_path[system_path.size() - 1] == '/') { |
143 | 0 | system_path.pop_back(); |
144 | 0 | } |
145 | 11 | *result = {mount_path, system_path}; |
146 | 11 | return Status::OK(); |
147 | 11 | } |
148 | 11 | } |
149 | | |
150 | 11 | Status CGroupUtil::find_abs_cgroupv1_path(const string& subsystem, string* path) { |
151 | 11 | if (!cgroupsv1_enable()) { |
152 | 0 | return Status::InvalidArgument("cgroup is not enabled!"); |
153 | 0 | } |
154 | 11 | RETURN_IF_ERROR(find_global_cgroupv1(subsystem, path)); |
155 | 11 | pair<string, string> paths; |
156 | 11 | RETURN_IF_ERROR(find_cgroupv1_mounts(subsystem, &paths)); |
157 | 11 | const string& mount_path = paths.first; |
158 | 11 | const string& system_path = paths.second; |
159 | 11 | if (path->compare(0, system_path.size(), system_path) != 0) { |
160 | 0 | return Status::InvalidArgument("Expected CGroup path '{}' to start with '{}'", *path, |
161 | 0 | system_path); |
162 | 0 | } |
163 | 11 | path->replace(0, system_path.size(), mount_path); |
164 | 11 | return Status::OK(); |
165 | 11 | } |
166 | | |
167 | 0 | std::string CGroupUtil::cgroupv2_of_process() { |
168 | 0 | #if defined(OS_LINUX) |
169 | 0 | if (!cgroupsv2_enable()) { |
170 | 0 | return ""; |
171 | 0 | } |
172 | | // All PIDs assigned to a cgroup are in /sys/fs/cgroups/{cgroup_name}/cgroup.procs |
173 | | // A simpler way to get the membership is: |
174 | 0 | std::ifstream cgroup_name_file("/proc/self/cgroup"); |
175 | 0 | if (!cgroup_name_file.is_open()) { |
176 | 0 | return ""; |
177 | 0 | } |
178 | | // With cgroups v2, there will be a *single* line with prefix "0::/" |
179 | | // (see https://docs.kernel.org/admin-guide/cgroup-v2.html) |
180 | 0 | std::string cgroup; |
181 | 0 | std::getline(cgroup_name_file, cgroup); |
182 | 0 | static const std::string v2_prefix = "0::/"; |
183 | 0 | if (!cgroup.starts_with(v2_prefix)) { |
184 | 0 | return ""; |
185 | 0 | } |
186 | 0 | cgroup = cgroup.substr(v2_prefix.length()); |
187 | 0 | return cgroup; |
188 | | #else |
189 | | return ""; |
190 | | #endif |
191 | 0 | } |
192 | | |
193 | 0 | std::optional<std::string> CGroupUtil::get_cgroupsv2_path(const std::string& subsystem) { |
194 | 0 | #if defined(OS_LINUX) |
195 | 0 | if (!CGroupUtil::cgroupsv2_enable()) { |
196 | 0 | return {}; |
197 | 0 | } |
198 | | |
199 | 0 | std::string cgroup = CGroupUtil::cgroupv2_of_process(); |
200 | 0 | auto current_cgroup = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup); |
201 | | |
202 | | // Return the bottom-most nested current memory file. If there is no such file at the current |
203 | | // level, try again at the parent level as memory settings are inherited. |
204 | 0 | while (current_cgroup != default_cgroups_mount.parent_path()) { |
205 | 0 | if (std::filesystem::exists(current_cgroup / subsystem)) { |
206 | 0 | return {current_cgroup}; |
207 | 0 | } |
208 | 0 | current_cgroup = current_cgroup.parent_path(); |
209 | 0 | } |
210 | 0 | return {}; |
211 | | #else |
212 | | return {}; |
213 | | #endif |
214 | 0 | } |
215 | | |
216 | | Status CGroupUtil::read_int_line_from_cgroup_file(const std::filesystem::path& file_path, |
217 | 5 | int64_t* val) { |
218 | 5 | std::ifstream file_stream(file_path, std::ios::in); |
219 | 5 | if (!file_stream.is_open()) { |
220 | 1 | return Status::CgroupError("Error open {}", file_path.string()); |
221 | 1 | } |
222 | | |
223 | 4 | string line; |
224 | 4 | getline(file_stream, line); |
225 | 4 | if (file_stream.fail() || file_stream.bad()) { |
226 | 0 | return Status::CgroupError("Error reading {}: {}", file_path.string(), get_str_err_msg()); |
227 | 0 | } |
228 | 4 | StringParser::ParseResult pr; |
229 | | // Parse into an int64_t If it overflows, returning the max value of int64_t is ok because that |
230 | | // is effectively unlimited. |
231 | 4 | *val = StringParser::string_to_int<int64_t>(line.c_str(), line.size(), &pr); |
232 | 4 | if ((pr != StringParser::PARSE_SUCCESS && pr != StringParser::PARSE_OVERFLOW)) { |
233 | 0 | return Status::InvalidArgument("Failed to parse {} as int64: '{}'", file_path.string(), |
234 | 0 | line); |
235 | 0 | } |
236 | 4 | return Status::OK(); |
237 | 4 | } |
238 | | |
239 | | void CGroupUtil::read_int_metric_from_cgroup_file( |
240 | | const std::filesystem::path& file_path, |
241 | 5 | std::unordered_map<std::string, int64_t>& metrics_map) { |
242 | 5 | std::ifstream cgroup_file(file_path, std::ios::in); |
243 | 5 | std::string line; |
244 | 144 | while (cgroup_file.good() && !cgroup_file.eof()) { |
245 | 139 | getline(cgroup_file, line); |
246 | 139 | std::vector<std::string> fields = absl::StrSplit(line, " ", absl::SkipWhitespace()); |
247 | 139 | if (fields.size() < 2) { |
248 | 4 | continue; |
249 | 4 | } |
250 | 135 | std::string key = fields[0].substr(0, fields[0].size()); |
251 | | |
252 | 135 | StringParser::ParseResult result; |
253 | 135 | auto value = |
254 | 135 | StringParser::string_to_int<int64_t>(fields[1].data(), fields[1].size(), &result); |
255 | | |
256 | 135 | if (result == StringParser::PARSE_SUCCESS) { |
257 | 135 | if (fields.size() == 2) { |
258 | 135 | metrics_map[key] = value; |
259 | 135 | } else if (fields[2] == "kB") { |
260 | 0 | metrics_map[key] = value * 1024L; |
261 | 0 | } |
262 | 135 | } |
263 | 135 | } |
264 | 5 | if (cgroup_file.is_open()) { |
265 | 4 | cgroup_file.close(); |
266 | 4 | } |
267 | 5 | } |
268 | | |
269 | | Status CGroupUtil::read_string_line_from_cgroup_file(const std::filesystem::path& file_path, |
270 | 6 | std::string* line_ptr) { |
271 | 6 | std::ifstream file_stream(file_path, std::ios::in); |
272 | 6 | if (!file_stream.is_open()) { |
273 | 0 | return Status::CgroupError("Error open {}", file_path.string()); |
274 | 0 | } |
275 | 6 | string line; |
276 | 6 | getline(file_stream, line); |
277 | 6 | if (file_stream.fail() || file_stream.bad()) { |
278 | 0 | return Status::CgroupError("Error reading {}: {}", file_path.string(), get_str_err_msg()); |
279 | 0 | } |
280 | 6 | *line_ptr = line; |
281 | 6 | return Status::OK(); |
282 | 6 | } |
283 | | |
284 | 8 | Status CGroupUtil::parse_cpuset_line(std::string cpuset_line, int* cpu_count_ptr) { |
285 | 8 | if (cpuset_line.empty()) { |
286 | 0 | return Status::CgroupError("cpuset line is empty"); |
287 | 0 | } |
288 | 8 | std::vector<string> ranges; |
289 | 8 | boost::split(ranges, cpuset_line, boost::is_any_of(",")); |
290 | 8 | int cpu_count = 0; |
291 | | |
292 | 15 | for (const std::string& range : ranges) { |
293 | 15 | std::vector<std::string> cpu_values; |
294 | 15 | boost::split(cpu_values, range, boost::is_any_of("-")); |
295 | | |
296 | 15 | if (cpu_values.size() == 2) { |
297 | 10 | int start = std::stoi(cpu_values[0]); |
298 | 10 | int end = std::stoi(cpu_values[1]); |
299 | 10 | cpu_count += (end - start) + 1; |
300 | 10 | } else { |
301 | 5 | cpu_count++; |
302 | 5 | } |
303 | 15 | } |
304 | 8 | *cpu_count_ptr = cpu_count; |
305 | 8 | return Status::OK(); |
306 | 8 | } |
307 | | |
308 | 2 | int CGroupUtil::get_cgroup_limited_cpu_number(int physical_cores) { |
309 | 2 | if (physical_cores <= 0) { |
310 | 0 | return physical_cores; |
311 | 0 | } |
312 | 2 | int ret = physical_cores; |
313 | 2 | #if defined(OS_LINUX) |
314 | | // For cgroup v2 |
315 | | // Child cgroup's cpu.max may bigger than parent group's cpu.max, |
316 | | // so it should look up from current cgroup to top group. |
317 | | // For cpuset, child cgroup's cpuset.cpus could not bigger thant parent's cpuset.cpus. |
318 | 2 | if (CGroupUtil::cgroupsv2_enable()) { |
319 | 0 | std::string cgroupv2_process_path = CGroupUtil::cgroupv2_of_process(); |
320 | 0 | if (cgroupv2_process_path.empty()) { |
321 | 0 | return ret; |
322 | 0 | } |
323 | 0 | std::filesystem::path current_cgroup_path = (default_cgroups_mount / cgroupv2_process_path); |
324 | 0 | ret = get_cgroup_v2_cpu_quota_number(current_cgroup_path, default_cgroups_mount, ret); |
325 | |
|
326 | 0 | current_cgroup_path = (default_cgroups_mount / cgroupv2_process_path); |
327 | 0 | ret = get_cgroup_v2_cpuset_number(current_cgroup_path, default_cgroups_mount, ret); |
328 | 2 | } else if (CGroupUtil::cgroupsv1_enable()) { |
329 | | // cpu quota, should find first not empty config from current path to top. |
330 | | // because if a process attach to current cgroup, its cpu quota may not be set. |
331 | 2 | std::string cpu_quota_path = ""; |
332 | 2 | Status cpu_quota_ret = CGroupUtil::find_abs_cgroupv1_path("cpu", &cpu_quota_path); |
333 | 2 | if (cpu_quota_ret.ok() && !cpu_quota_path.empty()) { |
334 | 2 | std::filesystem::path current_cgroup_path = cpu_quota_path; |
335 | 2 | ret = get_cgroup_v1_cpu_quota_number(current_cgroup_path, default_cgroups_mount, ret); |
336 | 2 | } |
337 | | |
338 | | //cpuset |
339 | | // just lookup current process cgroup path is enough |
340 | | // because if a process attach to current cgroup, its cpuset.cpus must be set. |
341 | 2 | std::string cpuset_path = ""; |
342 | 2 | Status cpuset_ret = CGroupUtil::find_abs_cgroupv1_path("cpuset", &cpuset_path); |
343 | 2 | if (cpuset_ret.ok() && !cpuset_path.empty()) { |
344 | 2 | std::filesystem::path current_path = cpuset_path; |
345 | 2 | ret = get_cgroup_v1_cpuset_number(current_path, ret); |
346 | 2 | } |
347 | 2 | } |
348 | 2 | #endif |
349 | 2 | return ret; |
350 | 2 | } |
351 | | |
352 | | int CGroupUtil::get_cgroup_v2_cpu_quota_number(std::filesystem::path& current_path, |
353 | | const std::filesystem::path& default_cg_mout_path, |
354 | 4 | int cpu_num) { |
355 | 4 | int ret = cpu_num; |
356 | 12 | while (current_path != default_cg_mout_path.parent_path()) { |
357 | 8 | std::ifstream cpu_max_file(current_path / "cpu.max"); |
358 | 8 | if (cpu_max_file.is_open()) { |
359 | 8 | std::string cpu_limit_str; |
360 | 8 | double cpu_period; |
361 | 8 | cpu_max_file >> cpu_limit_str >> cpu_period; |
362 | 8 | if (cpu_limit_str != "max" && cpu_period != 0) { |
363 | 5 | double cpu_limit = std::stod(cpu_limit_str); |
364 | 5 | ret = std::min(static_cast<int>(std::ceil(cpu_limit / cpu_period)), ret); |
365 | 5 | } |
366 | 8 | } |
367 | 8 | current_path = current_path.parent_path(); |
368 | 8 | } |
369 | 4 | return ret; |
370 | 4 | } |
371 | | |
372 | | int CGroupUtil::get_cgroup_v2_cpuset_number(std::filesystem::path& current_path, |
373 | | const std::filesystem::path& default_cg_mout_path, |
374 | 2 | int cpu_num) { |
375 | 2 | int ret = cpu_num; |
376 | 3 | while (current_path != default_cg_mout_path.parent_path()) { |
377 | 3 | std::ifstream cpuset_cpus_file(current_path / "cpuset.cpus.effective"); |
378 | 3 | current_path = current_path.parent_path(); |
379 | 3 | if (cpuset_cpus_file.is_open()) { |
380 | 3 | std::string cpuset_line; |
381 | 3 | cpuset_cpus_file >> cpuset_line; |
382 | 3 | if (cpuset_line.empty()) { |
383 | 1 | continue; |
384 | 1 | } |
385 | 2 | int cpus_count = 0; |
386 | 2 | static_cast<void>(CGroupUtil::parse_cpuset_line(cpuset_line, &cpus_count)); |
387 | 2 | ret = std::min(cpus_count, ret); |
388 | 2 | break; |
389 | 3 | } |
390 | 3 | } |
391 | 2 | return ret; |
392 | 2 | } |
393 | | |
394 | | int CGroupUtil::get_cgroup_v1_cpu_quota_number(std::filesystem::path& current_path, |
395 | | const std::filesystem::path& default_cg_mout_path, |
396 | 5 | int cpu_num) { |
397 | 5 | int ret = cpu_num; |
398 | 12 | while (current_path != default_cg_mout_path.parent_path()) { |
399 | 9 | std::ifstream cpu_quota_file(current_path / "cpu.cfs_quota_us"); |
400 | 9 | std::ifstream cpu_period_file(current_path / "cpu.cfs_period_us"); |
401 | 9 | if (cpu_quota_file.is_open() && cpu_period_file.is_open()) { |
402 | 7 | double cpu_quota_value; |
403 | 7 | double cpu_period_value; |
404 | 7 | cpu_quota_file >> cpu_quota_value; |
405 | 7 | cpu_period_file >> cpu_period_value; |
406 | 7 | if (cpu_quota_value > 0 && cpu_period_value > 0) { |
407 | 2 | ret = std::min(ret, |
408 | 2 | static_cast<int>(std::ceil(cpu_quota_value / cpu_period_value))); |
409 | 2 | break; |
410 | 2 | } |
411 | 7 | } |
412 | 7 | current_path = current_path.parent_path(); |
413 | 7 | } |
414 | 5 | return ret; |
415 | 5 | } |
416 | | |
417 | 3 | int CGroupUtil::get_cgroup_v1_cpuset_number(std::filesystem::path& current_path, int cpu_num) { |
418 | 3 | int ret = cpu_num; |
419 | 3 | std::string cpuset_line = ""; |
420 | 3 | Status cpuset_ret = CGroupUtil::read_string_line_from_cgroup_file( |
421 | 3 | (current_path / "cpuset.cpus"), &cpuset_line); |
422 | 3 | if (cpuset_ret.ok() && !cpuset_line.empty()) { |
423 | 3 | int cpuset_count = 0; |
424 | 3 | static_cast<void>(CGroupUtil::parse_cpuset_line(cpuset_line, &cpuset_count)); |
425 | 3 | if (cpuset_count > 0) { |
426 | 3 | ret = std::min(ret, cpuset_count); |
427 | 3 | } |
428 | 3 | } |
429 | 3 | return ret; |
430 | 3 | } |
431 | | |
432 | | } // namespace doris |