Coverage Report

Created: 2025-05-20 19:11

/root/doris/be/src/util/cgroup_util.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/cgroup_util.h"
19
20
#include <algorithm>
21
#include <boost/algorithm/string.hpp>
22
#include <fstream>
23
#include <utility>
24
#include <vector>
25
26
#include "gutil/stringprintf.h"
27
#include "gutil/strings/escaping.h"
28
#include "gutil/strings/split.h"
29
#include "gutil/strings/substitute.h"
30
#include "io/fs/local_file_system.h"
31
#include "util/error_util.h"
32
#include "util/string_parser.hpp"
33
34
using strings::CUnescape;
35
using strings::Split;
36
using strings::SkipWhitespace;
37
using std::pair;
38
39
namespace doris {
40
41
14
bool CGroupUtil::cgroupsv1_enable() {
42
14
    bool exists = true;
43
14
    Status st = io::global_local_filesystem()->exists("/proc/cgroups", &exists);
44
14
    return st.ok() && exists;
45
14
}
46
47
16
bool CGroupUtil::cgroupsv2_enable() {
48
16
#if defined(OS_LINUX)
49
    // This file exists iff the host has cgroups v2 enabled.
50
16
    auto controllers_file = default_cgroups_mount / "cgroup.controllers";
51
16
    bool exists = true;
52
16
    Status st = io::global_local_filesystem()->exists(controllers_file, &exists);
53
16
    return st.ok() && exists;
54
#else
55
    return false;
56
#endif
57
16
}
58
59
11
Status CGroupUtil::find_global_cgroupv1(const string& subsystem, string* path) {
60
11
    std::ifstream proc_cgroups("/proc/self/cgroup", std::ios::in);
61
11
    string line;
62
66
    while (true) {
63
66
        if (proc_cgroups.fail()) {
64
0
            return Status::CgroupError("Error reading /proc/self/cgroup: {}", get_str_err_msg());
65
66
        } else if (proc_cgroups.peek() == std::ifstream::traits_type::eof()) {
66
0
            return Status::CgroupError("Could not find subsystem {} in /proc/self/cgroup",
67
0
                                       subsystem);
68
0
        }
69
        // The line format looks like this:
70
        // 4:memory:/user.slice
71
        // 9:cpu,cpuacct:/user.slice
72
        // so field size will be 3
73
66
        getline(proc_cgroups, line);
74
66
        if (!proc_cgroups.good()) {
75
0
            continue;
76
0
        }
77
66
        std::vector<string> fields = Split(line, ":");
78
        // ":" in the path does not appear to be escaped - bail in the unusual case that
79
        // we get too many tokens.
80
66
        if (fields.size() != 3) {
81
0
            return Status::InvalidArgument(
82
0
                    "Could not parse line from /proc/self/cgroup - had {} > 3 tokens: '{}'",
83
0
                    fields.size(), line);
84
0
        }
85
66
        std::vector<string> subsystems = Split(fields[1], ",");
86
66
        auto it = std::find(subsystems.begin(), subsystems.end(), subsystem);
87
66
        if (it != subsystems.end()) {
88
11
            *path = std::move(fields[2]);
89
11
            return Status::OK();
90
11
        }
91
66
    }
92
11
}
93
94
22
static Status unescape_path(const string& escaped, string* unescaped) {
95
22
    string err;
96
22
    if (!CUnescape(escaped, unescaped, &err)) {
97
0
        return Status::InvalidArgument("Could not unescape path '{}': {}", escaped, err);
98
0
    }
99
22
    return Status::OK();
100
22
}
101
102
11
Status CGroupUtil::find_cgroupv1_mounts(const string& subsystem, pair<string, string>* result) {
103
11
    std::ifstream mountinfo("/proc/self/mountinfo", std::ios::in);
104
11
    string line;
105
143
    while (true) {
106
143
        if (mountinfo.fail() || mountinfo.bad()) {
107
0
            return Status::CgroupError("Error reading /proc/self/mountinfo: {}", get_str_err_msg());
108
143
        } else if (mountinfo.eof()) {
109
0
            return Status::CgroupError("Could not find subsystem {} in /proc/self/mountinfo",
110
0
                                       subsystem);
111
0
        }
112
        // The relevant lines look like below (see proc manpage for full documentation). The
113
        // first example is running outside of a container, the second example is running
114
        // inside a docker container. Field 3 is the path relative to the root CGroup on
115
        // the host and Field 4 is the mount point from this process's point of view.
116
        // 34 29 0:28 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:15 -
117
        //    cgroup cgroup rw,memory
118
        // 275 271 0:28 /docker/f23eee6f88c2ba99fcce /sys/fs/cgroup/memory
119
        //    ro,nosuid,nodev,noexec,relatime master:15 - cgroup cgroup rw,memory
120
143
        getline(mountinfo, line);
121
143
        if (!mountinfo.good()) {
122
0
            continue;
123
0
        }
124
143
        std::vector<string> fields = Split(line, " ", SkipWhitespace());
125
143
        if (fields.size() < 7) {
126
0
            return Status::InvalidArgument(
127
0
                    "Could not parse line from /proc/self/mountinfo - had {} > 7 tokens: '{}'",
128
0
                    fields.size(), line);
129
0
        }
130
143
        if (fields[fields.size() - 3] != "cgroup") {
131
66
            continue;
132
66
        }
133
        // This is a cgroup mount. Check if it's the mount we're looking for.
134
77
        std::vector<string> cgroup_opts = Split(fields[fields.size() - 1], ",", SkipWhitespace());
135
77
        auto it = std::find(cgroup_opts.begin(), cgroup_opts.end(), subsystem);
136
77
        if (it == cgroup_opts.end()) {
137
66
            continue;
138
66
        }
139
        // This is the right mount.
140
11
        string mount_path, system_path;
141
11
        RETURN_IF_ERROR(unescape_path(fields[4], &mount_path));
142
11
        RETURN_IF_ERROR(unescape_path(fields[3], &system_path));
143
        // Strip trailing "/" so that both returned paths match in whether they have a
144
        // trailing "/".
145
11
        if (system_path[system_path.size() - 1] == '/') {
146
0
            system_path.pop_back();
147
0
        }
148
11
        *result = {mount_path, system_path};
149
11
        return Status::OK();
150
11
    }
151
11
}
152
153
11
Status CGroupUtil::find_abs_cgroupv1_path(const string& subsystem, string* path) {
154
11
    if (!cgroupsv1_enable()) {
155
0
        return Status::InvalidArgument("cgroup is not enabled!");
156
0
    }
157
11
    RETURN_IF_ERROR(find_global_cgroupv1(subsystem, path));
158
11
    pair<string, string> paths;
159
11
    RETURN_IF_ERROR(find_cgroupv1_mounts(subsystem, &paths));
160
11
    const string& mount_path = paths.first;
161
11
    const string& system_path = paths.second;
162
11
    if (path->compare(0, system_path.size(), system_path) != 0) {
163
0
        return Status::InvalidArgument("Expected CGroup path '{}' to start with '{}'", *path,
164
0
                                       system_path);
165
0
    }
166
11
    path->replace(0, system_path.size(), mount_path);
167
11
    return Status::OK();
168
11
}
169
170
0
std::string CGroupUtil::cgroupv2_of_process() {
171
0
#if defined(OS_LINUX)
172
0
    if (!cgroupsv2_enable()) {
173
0
        return "";
174
0
    }
175
    // All PIDs assigned to a cgroup are in /sys/fs/cgroups/{cgroup_name}/cgroup.procs
176
    // A simpler way to get the membership is:
177
0
    std::ifstream cgroup_name_file("/proc/self/cgroup");
178
0
    if (!cgroup_name_file.is_open()) {
179
0
        return "";
180
0
    }
181
    // With cgroups v2, there will be a *single* line with prefix "0::/"
182
    // (see https://docs.kernel.org/admin-guide/cgroup-v2.html)
183
0
    std::string cgroup;
184
0
    std::getline(cgroup_name_file, cgroup);
185
0
    static const std::string v2_prefix = "0::/";
186
0
    if (!cgroup.starts_with(v2_prefix)) {
187
0
        return "";
188
0
    }
189
0
    cgroup = cgroup.substr(v2_prefix.length());
190
0
    return cgroup;
191
#else
192
    return "";
193
#endif
194
0
}
195
196
0
std::optional<std::string> CGroupUtil::get_cgroupsv2_path(const std::string& subsystem) {
197
0
#if defined(OS_LINUX)
198
0
    if (!CGroupUtil::cgroupsv2_enable()) {
199
0
        return {};
200
0
    }
201
202
0
    std::string cgroup = CGroupUtil::cgroupv2_of_process();
203
0
    auto current_cgroup = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup);
204
205
    // Return the bottom-most nested current memory file. If there is no such file at the current
206
    // level, try again at the parent level as memory settings are inherited.
207
0
    while (current_cgroup != default_cgroups_mount.parent_path()) {
208
0
        if (std::filesystem::exists(current_cgroup / subsystem)) {
209
0
            return {current_cgroup};
210
0
        }
211
0
        current_cgroup = current_cgroup.parent_path();
212
0
    }
213
0
    return {};
214
#else
215
    return {};
216
#endif
217
0
}
218
219
Status CGroupUtil::read_int_line_from_cgroup_file(const std::filesystem::path& file_path,
220
5
                                                  int64_t* val) {
221
5
    std::ifstream file_stream(file_path, std::ios::in);
222
5
    if (!file_stream.is_open()) {
223
1
        return Status::CgroupError("Error open {}", file_path.string());
224
1
    }
225
226
4
    string line;
227
4
    getline(file_stream, line);
228
4
    if (file_stream.fail() || file_stream.bad()) {
229
0
        return Status::CgroupError("Error reading {}: {}", file_path.string(), get_str_err_msg());
230
0
    }
231
4
    StringParser::ParseResult pr;
232
    // Parse into an int64_t If it overflows, returning the max value of int64_t is ok because that
233
    // is effectively unlimited.
234
4
    *val = StringParser::string_to_int<int64_t>(line.c_str(), line.size(), &pr);
235
4
    if ((pr != StringParser::PARSE_SUCCESS && pr != StringParser::PARSE_OVERFLOW)) {
236
0
        return Status::InvalidArgument("Failed to parse {} as int64: '{}'", file_path.string(),
237
0
                                       line);
238
0
    }
239
4
    return Status::OK();
240
4
}
241
242
void CGroupUtil::read_int_metric_from_cgroup_file(
243
        const std::filesystem::path& file_path,
244
5
        std::unordered_map<std::string, int64_t>& metrics_map) {
245
5
    std::ifstream cgroup_file(file_path, std::ios::in);
246
5
    std::string line;
247
144
    while (cgroup_file.good() && !cgroup_file.eof()) {
248
139
        getline(cgroup_file, line);
249
139
        std::vector<std::string> fields = strings::Split(line, " ", strings::SkipWhitespace());
250
139
        if (fields.size() < 2) {
251
4
            continue;
252
4
        }
253
135
        std::string key = fields[0].substr(0, fields[0].size());
254
255
135
        StringParser::ParseResult result;
256
135
        auto value =
257
135
                StringParser::string_to_int<int64_t>(fields[1].data(), fields[1].size(), &result);
258
259
135
        if (result == StringParser::PARSE_SUCCESS) {
260
135
            if (fields.size() == 2) {
261
135
                metrics_map[key] = value;
262
135
            } else if (fields[2] == "kB") {
263
0
                metrics_map[key] = value * 1024L;
264
0
            }
265
135
        }
266
135
    }
267
5
    if (cgroup_file.is_open()) {
268
4
        cgroup_file.close();
269
4
    }
270
5
}
271
272
Status CGroupUtil::read_string_line_from_cgroup_file(const std::filesystem::path& file_path,
273
6
                                                     std::string* line_ptr) {
274
6
    std::ifstream file_stream(file_path, std::ios::in);
275
6
    if (!file_stream.is_open()) {
276
0
        return Status::CgroupError("Error open {}", file_path.string());
277
0
    }
278
6
    string line;
279
6
    getline(file_stream, line);
280
6
    if (file_stream.fail() || file_stream.bad()) {
281
0
        return Status::CgroupError("Error reading {}: {}", file_path.string(), get_str_err_msg());
282
0
    }
283
6
    *line_ptr = line;
284
6
    return Status::OK();
285
6
}
286
287
8
Status CGroupUtil::parse_cpuset_line(std::string cpuset_line, int* cpu_count_ptr) {
288
8
    if (cpuset_line.empty()) {
289
0
        return Status::CgroupError("cpuset line is empty");
290
0
    }
291
8
    std::vector<string> ranges;
292
8
    boost::split(ranges, cpuset_line, boost::is_any_of(","));
293
8
    int cpu_count = 0;
294
295
15
    for (const std::string& range : ranges) {
296
15
        std::vector<std::string> cpu_values;
297
15
        boost::split(cpu_values, range, boost::is_any_of("-"));
298
299
15
        if (cpu_values.size() == 2) {
300
10
            int start = std::stoi(cpu_values[0]);
301
10
            int end = std::stoi(cpu_values[1]);
302
10
            cpu_count += (end - start) + 1;
303
10
        } else {
304
5
            cpu_count++;
305
5
        }
306
15
    }
307
8
    *cpu_count_ptr = cpu_count;
308
8
    return Status::OK();
309
8
}
310
311
2
int CGroupUtil::get_cgroup_limited_cpu_number(int physical_cores) {
312
2
    if (physical_cores <= 0) {
313
0
        return physical_cores;
314
0
    }
315
2
    int ret = physical_cores;
316
2
#if defined(OS_LINUX)
317
    // For cgroup v2
318
    // Child cgroup's cpu.max may bigger than parent group's cpu.max,
319
    //      so it should look up from current cgroup to top group.
320
    // For cpuset, child cgroup's cpuset.cpus could not bigger thant parent's cpuset.cpus.
321
2
    if (CGroupUtil::cgroupsv2_enable()) {
322
0
        std::string cgroupv2_process_path = CGroupUtil::cgroupv2_of_process();
323
0
        if (cgroupv2_process_path.empty()) {
324
0
            return ret;
325
0
        }
326
0
        std::filesystem::path current_cgroup_path = (default_cgroups_mount / cgroupv2_process_path);
327
0
        ret = get_cgroup_v2_cpu_quota_number(current_cgroup_path, default_cgroups_mount, ret);
328
329
0
        current_cgroup_path = (default_cgroups_mount / cgroupv2_process_path);
330
0
        ret = get_cgroup_v2_cpuset_number(current_cgroup_path, default_cgroups_mount, ret);
331
2
    } else if (CGroupUtil::cgroupsv1_enable()) {
332
        // cpu quota, should find first not empty config from current path to top.
333
        // because if a process attach to current cgroup, its cpu quota may not be set.
334
2
        std::string cpu_quota_path = "";
335
2
        Status cpu_quota_ret = CGroupUtil::find_abs_cgroupv1_path("cpu", &cpu_quota_path);
336
2
        if (cpu_quota_ret.ok() && !cpu_quota_path.empty()) {
337
2
            std::filesystem::path current_cgroup_path = cpu_quota_path;
338
2
            ret = get_cgroup_v1_cpu_quota_number(current_cgroup_path, default_cgroups_mount, ret);
339
2
        }
340
341
        //cpuset
342
        // just lookup current process cgroup path is enough
343
        // because if a process attach to current cgroup, its cpuset.cpus must be set.
344
2
        std::string cpuset_path = "";
345
2
        Status cpuset_ret = CGroupUtil::find_abs_cgroupv1_path("cpuset", &cpuset_path);
346
2
        if (cpuset_ret.ok() && !cpuset_path.empty()) {
347
2
            std::filesystem::path current_path = cpuset_path;
348
2
            ret = get_cgroup_v1_cpuset_number(current_path, ret);
349
2
        }
350
2
    }
351
2
#endif
352
2
    return ret;
353
2
}
354
355
int CGroupUtil::get_cgroup_v2_cpu_quota_number(std::filesystem::path& current_path,
356
                                               const std::filesystem::path& default_cg_mout_path,
357
4
                                               int cpu_num) {
358
4
    int ret = cpu_num;
359
12
    while (current_path != default_cg_mout_path.parent_path()) {
360
8
        std::ifstream cpu_max_file(current_path / "cpu.max");
361
8
        if (cpu_max_file.is_open()) {
362
8
            std::string cpu_limit_str;
363
8
            double cpu_period;
364
8
            cpu_max_file >> cpu_limit_str >> cpu_period;
365
8
            if (cpu_limit_str != "max" && cpu_period != 0) {
366
5
                double cpu_limit = std::stod(cpu_limit_str);
367
5
                ret = std::min(static_cast<int>(std::ceil(cpu_limit / cpu_period)), ret);
368
5
            }
369
8
        }
370
8
        current_path = current_path.parent_path();
371
8
    }
372
4
    return ret;
373
4
}
374
375
int CGroupUtil::get_cgroup_v2_cpuset_number(std::filesystem::path& current_path,
376
                                            const std::filesystem::path& default_cg_mout_path,
377
2
                                            int cpu_num) {
378
2
    int ret = cpu_num;
379
3
    while (current_path != default_cg_mout_path.parent_path()) {
380
3
        std::ifstream cpuset_cpus_file(current_path / "cpuset.cpus.effective");
381
3
        current_path = current_path.parent_path();
382
3
        if (cpuset_cpus_file.is_open()) {
383
3
            std::string cpuset_line;
384
3
            cpuset_cpus_file >> cpuset_line;
385
3
            if (cpuset_line.empty()) {
386
1
                continue;
387
1
            }
388
2
            int cpus_count = 0;
389
2
            static_cast<void>(CGroupUtil::parse_cpuset_line(cpuset_line, &cpus_count));
390
2
            ret = std::min(cpus_count, ret);
391
2
            break;
392
3
        }
393
3
    }
394
2
    return ret;
395
2
}
396
397
int CGroupUtil::get_cgroup_v1_cpu_quota_number(std::filesystem::path& current_path,
398
                                               const std::filesystem::path& default_cg_mout_path,
399
5
                                               int cpu_num) {
400
5
    int ret = cpu_num;
401
12
    while (current_path != default_cg_mout_path.parent_path()) {
402
9
        std::ifstream cpu_quota_file(current_path / "cpu.cfs_quota_us");
403
9
        std::ifstream cpu_period_file(current_path / "cpu.cfs_period_us");
404
9
        if (cpu_quota_file.is_open() && cpu_period_file.is_open()) {
405
7
            double cpu_quota_value;
406
7
            double cpu_period_value;
407
7
            cpu_quota_file >> cpu_quota_value;
408
7
            cpu_period_file >> cpu_period_value;
409
7
            if (cpu_quota_value > 0 && cpu_period_value > 0) {
410
2
                ret = std::min(ret,
411
2
                               static_cast<int>(std::ceil(cpu_quota_value / cpu_period_value)));
412
2
                break;
413
2
            }
414
7
        }
415
7
        current_path = current_path.parent_path();
416
7
    }
417
5
    return ret;
418
5
}
419
420
3
int CGroupUtil::get_cgroup_v1_cpuset_number(std::filesystem::path& current_path, int cpu_num) {
421
3
    int ret = cpu_num;
422
3
    std::string cpuset_line = "";
423
3
    Status cpuset_ret = CGroupUtil::read_string_line_from_cgroup_file(
424
3
            (current_path / "cpuset.cpus"), &cpuset_line);
425
3
    if (cpuset_ret.ok() && !cpuset_line.empty()) {
426
3
        int cpuset_count = 0;
427
3
        static_cast<void>(CGroupUtil::parse_cpuset_line(cpuset_line, &cpuset_count));
428
3
        if (cpuset_count > 0) {
429
3
            ret = std::min(ret, cpuset_count);
430
3
        }
431
3
    }
432
3
    return ret;
433
3
}
434
435
} // namespace doris