Coverage Report

Created: 2026-05-27 17:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/cgroup_util.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/cgroup_util.h"
19
20
#include <absl/strings/escaping.h>
21
#include <absl/strings/str_split.h>
22
23
#include <algorithm>
24
#include <boost/algorithm/string.hpp>
25
#include <fstream>
26
#include <utility>
27
#include <vector>
28
29
#include "io/fs/local_file_system.h"
30
#include "util/error_util.h"
31
#include "util/string_parser.hpp"
32
33
using std::pair;
34
35
namespace doris {
36
37
178k
bool CGroupUtil::cgroupsv1_enable() {
38
178k
    bool exists = true;
39
178k
    Status st = io::global_local_filesystem()->exists("/proc/cgroups", &exists);
40
178k
    return st.ok() && exists;
41
178k
}
42
43
357k
bool CGroupUtil::cgroupsv2_enable() {
44
357k
#if defined(OS_LINUX)
45
    // This file exists iff the host has cgroups v2 enabled.
46
357k
    auto controllers_file = default_cgroups_mount / "cgroup.controllers";
47
357k
    bool exists = true;
48
357k
    Status st = io::global_local_filesystem()->exists(controllers_file, &exists);
49
357k
    return st.ok() && exists;
50
#else
51
    return false;
52
#endif
53
357k
}
54
55
178k
Status CGroupUtil::find_global_cgroupv1(const std::string& subsystem, std::string* path) {
56
178k
    std::ifstream proc_cgroups("/proc/self/cgroup", std::ios::in);
57
178k
    std::string line;
58
783k
    while (true) {
59
783k
        if (proc_cgroups.fail()) {
60
0
            return Status::CgroupError("Error reading /proc/self/cgroup: {}", get_str_err_msg());
61
783k
        } else if (proc_cgroups.peek() == std::ifstream::traits_type::eof()) {
62
0
            return Status::CgroupError("Could not find subsystem {} in /proc/self/cgroup",
63
0
                                       subsystem);
64
0
        }
65
        // The line format looks like this:
66
        // 4:memory:/user.slice
67
        // 9:cpu,cpuacct:/user.slice
68
        // so field size will be 3
69
783k
        getline(proc_cgroups, line);
70
783k
        if (!proc_cgroups.good()) {
71
0
            continue;
72
0
        }
73
783k
        std::vector<std::string> fields = absl::StrSplit(line, ":");
74
        // ":" in the path does not appear to be escaped - bail in the unusual case that
75
        // we get too many tokens.
76
783k
        if (fields.size() != 3) {
77
0
            return Status::InvalidArgument(
78
0
                    "Could not parse line from /proc/self/cgroup - had {} > 3 tokens: '{}'",
79
0
                    fields.size(), line);
80
0
        }
81
783k
        std::vector<std::string> subsystems = absl::StrSplit(fields[1], ",");
82
783k
        auto it = std::find(subsystems.begin(), subsystems.end(), subsystem);
83
783k
        if (it != subsystems.end()) {
84
178k
            *path = std::move(fields[2]);
85
178k
            return Status::OK();
86
178k
        }
87
783k
    }
88
178k
}
89
90
357k
static Status unescape_path(const std::string& escaped, std::string* unescaped) {
91
357k
    std::string err;
92
357k
    if (!absl::CUnescape(escaped, unescaped, &err)) {
93
0
        return Status::InvalidArgument("Could not unescape path '{}': {}", escaped, err);
94
0
    }
95
357k
    return Status::OK();
96
357k
}
97
98
Status CGroupUtil::find_cgroupv1_mounts(const std::string& subsystem,
99
178k
                                        pair<std::string, std::string>* result) {
100
178k
    std::ifstream mountinfo("/proc/self/mountinfo", std::ios::in);
101
178k
    std::string line;
102
3.86M
    while (true) {
103
3.86M
        if (mountinfo.fail() || mountinfo.bad()) {
104
0
            return Status::CgroupError("Error reading /proc/self/mountinfo: {}", get_str_err_msg());
105
3.86M
        } else if (mountinfo.eof()) {
106
0
            return Status::CgroupError("Could not find subsystem {} in /proc/self/mountinfo",
107
0
                                       subsystem);
108
0
        }
109
        // The relevant lines look like below (see proc manpage for full documentation). The
110
        // first example is running outside of a container, the second example is running
111
        // inside a docker container. Field 3 is the path relative to the root CGroup on
112
        // the host and Field 4 is the mount point from this process's point of view.
113
        // 34 29 0:28 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:15 -
114
        //    cgroup cgroup rw,memory
115
        // 275 271 0:28 /docker/f23eee6f88c2ba99fcce /sys/fs/cgroup/memory
116
        //    ro,nosuid,nodev,noexec,relatime master:15 - cgroup cgroup rw,memory
117
3.86M
        getline(mountinfo, line);
118
3.86M
        if (!mountinfo.good()) {
119
0
            continue;
120
0
        }
121
3.86M
        std::vector<std::string> fields = absl::StrSplit(line, " ", absl::SkipWhitespace());
122
3.86M
        if (fields.size() < 7) {
123
0
            return Status::InvalidArgument(
124
0
                    "Could not parse line from /proc/self/mountinfo - had {} > 7 tokens: '{}'",
125
0
                    fields.size(), line);
126
0
        }
127
3.86M
        if (fields[fields.size() - 3] != "cgroup") {
128
2.32M
            continue;
129
2.32M
        }
130
        // This is a cgroup mount. Check if it's the mount we're looking for.
131
1.53M
        std::vector<std::string> cgroup_opts =
132
1.53M
                absl::StrSplit(fields[fields.size() - 1], ",", absl::SkipWhitespace());
133
1.53M
        auto it = std::find(cgroup_opts.begin(), cgroup_opts.end(), subsystem);
134
1.53M
        if (it == cgroup_opts.end()) {
135
1.36M
            continue;
136
1.36M
        }
137
        // This is the right mount.
138
178k
        std::string mount_path, system_path;
139
178k
        RETURN_IF_ERROR(unescape_path(fields[4], &mount_path));
140
178k
        RETURN_IF_ERROR(unescape_path(fields[3], &system_path));
141
        // Strip trailing "/" so that both returned paths match in whether they have a
142
        // trailing "/".
143
178k
        if (system_path[system_path.size() - 1] == '/') {
144
178k
            system_path.pop_back();
145
178k
        }
146
178k
        *result = {mount_path, system_path};
147
178k
        return Status::OK();
148
178k
    }
149
178k
}
150
151
178k
Status CGroupUtil::find_abs_cgroupv1_path(const std::string& subsystem, std::string* path) {
152
178k
    if (!cgroupsv1_enable()) {
153
0
        return Status::InvalidArgument("cgroup is not enabled!");
154
0
    }
155
178k
    RETURN_IF_ERROR(find_global_cgroupv1(subsystem, path));
156
178k
    pair<std::string, std::string> paths;
157
178k
    RETURN_IF_ERROR(find_cgroupv1_mounts(subsystem, &paths));
158
178k
    const std::string& mount_path = paths.first;
159
178k
    const std::string& system_path = paths.second;
160
178k
    if (path->compare(0, system_path.size(), system_path) != 0) {
161
0
        return Status::InvalidArgument("Expected CGroup path '{}' to start with '{}'", *path,
162
0
                                       system_path);
163
0
    }
164
178k
    path->replace(0, system_path.size(), mount_path);
165
178k
    return Status::OK();
166
178k
}
167
168
0
std::string CGroupUtil::cgroupv2_of_process() {
169
0
#if defined(OS_LINUX)
170
0
    if (!cgroupsv2_enable()) {
171
0
        return "";
172
0
    }
173
    // All PIDs assigned to a cgroup are in /sys/fs/cgroups/{cgroup_name}/cgroup.procs
174
    // A simpler way to get the membership is:
175
0
    std::ifstream cgroup_name_file("/proc/self/cgroup");
176
0
    if (!cgroup_name_file.is_open()) {
177
0
        return "";
178
0
    }
179
    // With cgroups v2, there will be a *single* line with prefix "0::/"
180
    // (see https://docs.kernel.org/admin-guide/cgroup-v2.html)
181
0
    std::string cgroup;
182
0
    std::getline(cgroup_name_file, cgroup);
183
0
    static const std::string v2_prefix = "0::/";
184
0
    if (!cgroup.starts_with(v2_prefix)) {
185
0
        return "";
186
0
    }
187
0
    cgroup = cgroup.substr(v2_prefix.length());
188
0
    return cgroup;
189
#else
190
    return "";
191
#endif
192
0
}
193
194
0
std::optional<std::string> CGroupUtil::get_cgroupsv2_path(const std::string& subsystem) {
195
0
#if defined(OS_LINUX)
196
0
    if (!CGroupUtil::cgroupsv2_enable()) {
197
0
        return {};
198
0
    }
199
200
0
    std::string cgroup = CGroupUtil::cgroupv2_of_process();
201
0
    auto current_cgroup = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup);
202
203
    // Return the bottom-most nested current memory file. If there is no such file at the current
204
    // level, try again at the parent level as memory settings are inherited.
205
0
    while (current_cgroup != default_cgroups_mount.parent_path()) {
206
0
        if (std::filesystem::exists(current_cgroup / subsystem)) {
207
0
            return {current_cgroup};
208
0
        }
209
0
        current_cgroup = current_cgroup.parent_path();
210
0
    }
211
0
    return {};
212
#else
213
    return {};
214
#endif
215
0
}
216
217
Status CGroupUtil::read_int_line_from_cgroup_file(const std::filesystem::path& file_path,
218
1.76k
                                                  int64_t* val) {
219
1.76k
    std::ifstream file_stream(file_path, std::ios::in);
220
1.76k
    if (!file_stream.is_open()) {
221
1
        return Status::CgroupError("Error open {}", file_path.string());
222
1
    }
223
224
1.76k
    std::string line;
225
1.76k
    getline(file_stream, line);
226
1.76k
    if (file_stream.fail() || file_stream.bad()) {
227
0
        return Status::CgroupError("Error reading {}: {}", file_path.string(), get_str_err_msg());
228
0
    }
229
1.76k
    StringParser::ParseResult pr;
230
    // Parse into an int64_t If it overflows, returning the max value of int64_t is ok because that
231
    // is effectively unlimited.
232
1.76k
    *val = StringParser::string_to_int<int64_t>(line.c_str(), line.size(), &pr);
233
1.76k
    if ((pr != StringParser::PARSE_SUCCESS && pr != StringParser::PARSE_OVERFLOW)) {
234
0
        return Status::InvalidArgument("Failed to parse {} as int64: '{}'", file_path.string(),
235
0
                                       line);
236
0
    }
237
1.76k
    return Status::OK();
238
1.76k
}
239
240
void CGroupUtil::read_int_metric_from_cgroup_file(
241
        const std::filesystem::path& file_path,
242
176k
        std::unordered_map<std::string, int64_t>& metrics_map) {
243
176k
    std::ifstream cgroup_file(file_path, std::ios::in);
244
176k
    std::string line;
245
6.18M
    while (cgroup_file.good() && !cgroup_file.eof()) {
246
6.01M
        getline(cgroup_file, line);
247
6.01M
        std::vector<std::string> fields = absl::StrSplit(line, " ", absl::SkipWhitespace());
248
6.01M
        if (fields.size() < 2) {
249
176k
            continue;
250
176k
        }
251
5.83M
        std::string key = fields[0].substr(0, fields[0].size());
252
253
5.83M
        StringParser::ParseResult result;
254
5.83M
        auto value =
255
5.83M
                StringParser::string_to_int<int64_t>(fields[1].data(), fields[1].size(), &result);
256
257
5.83M
        if (result == StringParser::PARSE_SUCCESS) {
258
5.83M
            if (fields.size() == 2) {
259
5.83M
                metrics_map[key] = value;
260
5.83M
            } else if (fields[2] == "kB") {
261
0
                metrics_map[key] = value * 1024L;
262
0
            }
263
5.83M
        }
264
5.83M
    }
265
176k
    if (cgroup_file.is_open()) {
266
176k
        cgroup_file.close();
267
176k
    }
268
176k
}
269
270
Status CGroupUtil::read_string_line_from_cgroup_file(const std::filesystem::path& file_path,
271
42
                                                     std::string* line_ptr) {
272
42
    std::ifstream file_stream(file_path, std::ios::in);
273
42
    if (!file_stream.is_open()) {
274
0
        return Status::CgroupError("Error open {}", file_path.string());
275
0
    }
276
42
    std::string line;
277
42
    getline(file_stream, line);
278
42
    if (file_stream.fail() || file_stream.bad()) {
279
0
        return Status::CgroupError("Error reading {}: {}", file_path.string(), get_str_err_msg());
280
0
    }
281
42
    *line_ptr = line;
282
42
    return Status::OK();
283
42
}
284
285
44
Status CGroupUtil::parse_cpuset_line(std::string cpuset_line, int* cpu_count_ptr) {
286
44
    if (cpuset_line.empty()) {
287
0
        return Status::CgroupError("cpuset line is empty");
288
0
    }
289
44
    std::vector<std::string> ranges;
290
44
    boost::split(ranges, cpuset_line, boost::is_any_of(","));
291
44
    int cpu_count = 0;
292
293
51
    for (const std::string& range : ranges) {
294
51
        std::vector<std::string> cpu_values;
295
51
        boost::split(cpu_values, range, boost::is_any_of("-"));
296
297
51
        if (cpu_values.size() == 2) {
298
46
            int start = std::stoi(cpu_values[0]);
299
46
            int end = std::stoi(cpu_values[1]);
300
46
            cpu_count += (end - start) + 1;
301
46
        } else {
302
5
            cpu_count++;
303
5
        }
304
51
    }
305
44
    *cpu_count_ptr = cpu_count;
306
44
    return Status::OK();
307
44
}
308
309
38
int CGroupUtil::get_cgroup_limited_cpu_number(int physical_cores) {
310
38
    if (physical_cores <= 0) {
311
0
        return physical_cores;
312
0
    }
313
38
    int ret = physical_cores;
314
38
#if defined(OS_LINUX)
315
    // For cgroup v2
316
    // Child cgroup's cpu.max may bigger than parent group's cpu.max,
317
    //      so it should look up from current cgroup to top group.
318
    // For cpuset, child cgroup's cpuset.cpus could not bigger thant parent's cpuset.cpus.
319
38
    if (CGroupUtil::cgroupsv2_enable()) {
320
0
        std::string cgroupv2_process_path = CGroupUtil::cgroupv2_of_process();
321
0
        if (cgroupv2_process_path.empty()) {
322
0
            return ret;
323
0
        }
324
0
        std::filesystem::path current_cgroup_path = (default_cgroups_mount / cgroupv2_process_path);
325
0
        ret = get_cgroup_v2_cpu_quota_number(current_cgroup_path, default_cgroups_mount, ret);
326
327
0
        current_cgroup_path = (default_cgroups_mount / cgroupv2_process_path);
328
0
        ret = get_cgroup_v2_cpuset_number(current_cgroup_path, default_cgroups_mount, ret);
329
38
    } else if (CGroupUtil::cgroupsv1_enable()) {
330
        // cpu quota, should find first not empty config from current path to top.
331
        // because if a process attach to current cgroup, its cpu quota may not be set.
332
38
        std::string cpu_quota_path = "";
333
38
        Status cpu_quota_ret = CGroupUtil::find_abs_cgroupv1_path("cpu", &cpu_quota_path);
334
38
        if (cpu_quota_ret.ok() && !cpu_quota_path.empty()) {
335
38
            std::filesystem::path current_cgroup_path = cpu_quota_path;
336
38
            ret = get_cgroup_v1_cpu_quota_number(current_cgroup_path, default_cgroups_mount, ret);
337
38
        }
338
339
        //cpuset
340
        // just lookup current process cgroup path is enough
341
        // because if a process attach to current cgroup, its cpuset.cpus must be set.
342
38
        std::string cpuset_path = "";
343
38
        Status cpuset_ret = CGroupUtil::find_abs_cgroupv1_path("cpuset", &cpuset_path);
344
38
        if (cpuset_ret.ok() && !cpuset_path.empty()) {
345
38
            std::filesystem::path current_path = cpuset_path;
346
38
            ret = get_cgroup_v1_cpuset_number(current_path, ret);
347
38
        }
348
38
    }
349
38
#endif
350
38
    return ret;
351
38
}
352
353
int CGroupUtil::get_cgroup_v2_cpu_quota_number(std::filesystem::path& current_path,
354
                                               const std::filesystem::path& default_cg_mout_path,
355
4
                                               int cpu_num) {
356
4
    int ret = cpu_num;
357
12
    while (current_path != default_cg_mout_path.parent_path()) {
358
8
        std::ifstream cpu_max_file(current_path / "cpu.max");
359
8
        if (cpu_max_file.is_open()) {
360
8
            std::string cpu_limit_str;
361
8
            double cpu_period;
362
8
            cpu_max_file >> cpu_limit_str >> cpu_period;
363
8
            if (cpu_limit_str != "max" && cpu_period != 0) {
364
5
                double cpu_limit = std::stod(cpu_limit_str);
365
5
                ret = std::min(static_cast<int>(std::ceil(cpu_limit / cpu_period)), ret);
366
5
            }
367
8
        }
368
8
        current_path = current_path.parent_path();
369
8
    }
370
4
    return ret;
371
4
}
372
373
int CGroupUtil::get_cgroup_v2_cpuset_number(std::filesystem::path& current_path,
374
                                            const std::filesystem::path& default_cg_mout_path,
375
2
                                            int cpu_num) {
376
2
    int ret = cpu_num;
377
3
    while (current_path != default_cg_mout_path.parent_path()) {
378
3
        std::ifstream cpuset_cpus_file(current_path / "cpuset.cpus.effective");
379
3
        current_path = current_path.parent_path();
380
3
        if (cpuset_cpus_file.is_open()) {
381
3
            std::string cpuset_line;
382
3
            cpuset_cpus_file >> cpuset_line;
383
3
            if (cpuset_line.empty()) {
384
1
                continue;
385
1
            }
386
2
            int cpus_count = 0;
387
2
            static_cast<void>(CGroupUtil::parse_cpuset_line(cpuset_line, &cpus_count));
388
2
            ret = std::min(cpus_count, ret);
389
2
            break;
390
3
        }
391
3
    }
392
2
    return ret;
393
2
}
394
395
int CGroupUtil::get_cgroup_v1_cpu_quota_number(std::filesystem::path& current_path,
396
                                               const std::filesystem::path& default_cg_mout_path,
397
41
                                               int cpu_num) {
398
41
    int ret = cpu_num;
399
156
    while (current_path != default_cg_mout_path.parent_path()) {
400
117
        std::ifstream cpu_quota_file(current_path / "cpu.cfs_quota_us");
401
117
        std::ifstream cpu_period_file(current_path / "cpu.cfs_period_us");
402
117
        if (cpu_quota_file.is_open() && cpu_period_file.is_open()) {
403
79
            double cpu_quota_value;
404
79
            double cpu_period_value;
405
79
            cpu_quota_file >> cpu_quota_value;
406
79
            cpu_period_file >> cpu_period_value;
407
79
            if (cpu_quota_value > 0 && cpu_period_value > 0) {
408
2
                ret = std::min(ret,
409
2
                               static_cast<int>(std::ceil(cpu_quota_value / cpu_period_value)));
410
2
                break;
411
2
            }
412
79
        }
413
115
        current_path = current_path.parent_path();
414
115
    }
415
41
    return ret;
416
41
}
417
418
39
int CGroupUtil::get_cgroup_v1_cpuset_number(std::filesystem::path& current_path, int cpu_num) {
419
39
    int ret = cpu_num;
420
39
    std::string cpuset_line = "";
421
39
    Status cpuset_ret = CGroupUtil::read_string_line_from_cgroup_file(
422
39
            (current_path / "cpuset.cpus"), &cpuset_line);
423
39
    if (cpuset_ret.ok() && !cpuset_line.empty()) {
424
39
        int cpuset_count = 0;
425
39
        static_cast<void>(CGroupUtil::parse_cpuset_line(cpuset_line, &cpuset_count));
426
39
        if (cpuset_count > 0) {
427
39
            ret = std::min(ret, cpuset_count);
428
39
        }
429
39
    }
430
39
    return ret;
431
39
}
432
433
} // namespace doris