Coverage Report

Created: 2025-04-26 17:59

/root/doris/be/src/util/cgroup_util.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/cgroup_util.h"
19
20
#include <algorithm>
21
#include <fstream>
22
#include <utility>
23
#include <vector>
24
25
#include "gutil/stringprintf.h"
26
#include "gutil/strings/escaping.h"
27
#include "gutil/strings/split.h"
28
#include "io/fs/local_file_system.h"
29
#include "util/error_util.h"
30
#include "util/string_parser.hpp"
31
32
using strings::CUnescape;
33
using strings::Split;
34
using strings::SkipWhitespace;
35
using std::pair;
36
37
namespace doris {
38
39
14
bool CGroupUtil::cgroupsv1_enable() {
40
14
    bool exists = true;
41
14
    Status st = io::global_local_filesystem()->exists("/proc/cgroups", &exists);
42
14
    return st.ok() && exists;
43
14
}
44
45
16
bool CGroupUtil::cgroupsv2_enable() {
46
16
#if defined(OS_LINUX)
47
    // This file exists iff the host has cgroups v2 enabled.
48
16
    auto controllers_file = default_cgroups_mount / "cgroup.controllers";
49
16
    bool exists = true;
50
16
    Status st = io::global_local_filesystem()->exists(controllers_file, &exists);
51
16
    return st.ok() && exists;
52
#else
53
    return false;
54
#endif
55
16
}
56
57
11
Status CGroupUtil::find_global_cgroupv1(const string& subsystem, string* path) {
58
11
    std::ifstream proc_cgroups("/proc/self/cgroup", std::ios::in);
59
11
    string line;
60
29
    while (true) {
61
29
        if (proc_cgroups.fail()) {
62
0
            return Status::CgroupError("Error reading /proc/self/cgroup: {}", get_str_err_msg());
63
29
        } else if (proc_cgroups.peek() == std::ifstream::traits_type::eof()) {
64
0
            return Status::CgroupError("Could not find subsystem {} in /proc/self/cgroup",
65
0
                                       subsystem);
66
0
        }
67
        // The line format looks like this:
68
        // 4:memory:/user.slice
69
        // 9:cpu,cpuacct:/user.slice
70
        // so field size will be 3
71
29
        getline(proc_cgroups, line);
72
29
        if (!proc_cgroups.good()) {
73
0
            continue;
74
0
        }
75
29
        std::vector<string> fields = Split(line, ":");
76
        // ":" in the path does not appear to be escaped - bail in the unusual case that
77
        // we get too many tokens.
78
29
        if (fields.size() != 3) {
79
0
            return Status::InvalidArgument(
80
0
                    "Could not parse line from /proc/self/cgroup - had {} > 3 tokens: '{}'",
81
0
                    fields.size(), line);
82
0
        }
83
29
        std::vector<string> subsystems = Split(fields[1], ",");
84
29
        auto it = std::find(subsystems.begin(), subsystems.end(), subsystem);
85
29
        if (it != subsystems.end()) {
86
11
            *path = std::move(fields[2]);
87
11
            return Status::OK();
88
11
        }
89
29
    }
90
11
}
91
92
22
static Status unescape_path(const string& escaped, string* unescaped) {
93
22
    string err;
94
22
    if (!CUnescape(escaped, unescaped, &err)) {
95
0
        return Status::InvalidArgument("Could not unescape path '{}': {}", escaped, err);
96
0
    }
97
22
    return Status::OK();
98
22
}
99
100
11
Status CGroupUtil::find_cgroupv1_mounts(const string& subsystem, pair<string, string>* result) {
101
11
    std::ifstream mountinfo("/proc/self/mountinfo", std::ios::in);
102
11
    string line;
103
180
    while (true) {
104
180
        if (mountinfo.fail() || mountinfo.bad()) {
105
0
            return Status::CgroupError("Error reading /proc/self/mountinfo: {}", get_str_err_msg());
106
180
        } else if (mountinfo.eof()) {
107
0
            return Status::CgroupError("Could not find subsystem {} in /proc/self/mountinfo",
108
0
                                       subsystem);
109
0
        }
110
        // The relevant lines look like below (see proc manpage for full documentation). The
111
        // first example is running outside of a container, the second example is running
112
        // inside a docker container. Field 3 is the path relative to the root CGroup on
113
        // the host and Field 4 is the mount point from this process's point of view.
114
        // 34 29 0:28 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:15 -
115
        //    cgroup cgroup rw,memory
116
        // 275 271 0:28 /docker/f23eee6f88c2ba99fcce /sys/fs/cgroup/memory
117
        //    ro,nosuid,nodev,noexec,relatime master:15 - cgroup cgroup rw,memory
118
180
        getline(mountinfo, line);
119
180
        if (!mountinfo.good()) {
120
0
            continue;
121
0
        }
122
180
        std::vector<string> fields = Split(line, " ", SkipWhitespace());
123
180
        if (fields.size() < 7) {
124
0
            return Status::InvalidArgument(
125
0
                    "Could not parse line from /proc/self/mountinfo - had {} > 7 tokens: '{}'",
126
0
                    fields.size(), line);
127
0
        }
128
180
        if (fields[fields.size() - 3] != "cgroup") {
129
66
            continue;
130
66
        }
131
        // This is a cgroup mount. Check if it's the mount we're looking for.
132
114
        std::vector<string> cgroup_opts = Split(fields[fields.size() - 1], ",", SkipWhitespace());
133
114
        auto it = std::find(cgroup_opts.begin(), cgroup_opts.end(), subsystem);
134
114
        if (it == cgroup_opts.end()) {
135
103
            continue;
136
103
        }
137
        // This is the right mount.
138
11
        string mount_path, system_path;
139
11
        RETURN_IF_ERROR(unescape_path(fields[4], &mount_path));
140
11
        RETURN_IF_ERROR(unescape_path(fields[3], &system_path));
141
        // Strip trailing "/" so that both returned paths match in whether they have a
142
        // trailing "/".
143
11
        if (system_path[system_path.size() - 1] == '/') {
144
0
            system_path.pop_back();
145
0
        }
146
11
        *result = {mount_path, system_path};
147
11
        return Status::OK();
148
11
    }
149
11
}
150
151
11
Status CGroupUtil::find_abs_cgroupv1_path(const string& subsystem, string* path) {
152
11
    if (!cgroupsv1_enable()) {
153
0
        return Status::InvalidArgument("cgroup is not enabled!");
154
0
    }
155
11
    RETURN_IF_ERROR(find_global_cgroupv1(subsystem, path));
156
11
    pair<string, string> paths;
157
11
    RETURN_IF_ERROR(find_cgroupv1_mounts(subsystem, &paths));
158
11
    const string& mount_path = paths.first;
159
11
    const string& system_path = paths.second;
160
11
    if (path->compare(0, system_path.size(), system_path) != 0) {
161
0
        return Status::InvalidArgument("Expected CGroup path '{}' to start with '{}'", *path,
162
0
                                       system_path);
163
0
    }
164
11
    path->replace(0, system_path.size(), mount_path);
165
11
    return Status::OK();
166
11
}
167
168
0
std::string CGroupUtil::cgroupv2_of_process() {
169
0
#if defined(OS_LINUX)
170
0
    if (!cgroupsv2_enable()) {
171
0
        return "";
172
0
    }
173
    // All PIDs assigned to a cgroup are in /sys/fs/cgroups/{cgroup_name}/cgroup.procs
174
    // A simpler way to get the membership is:
175
0
    std::ifstream cgroup_name_file("/proc/self/cgroup");
176
0
    if (!cgroup_name_file.is_open()) {
177
0
        return "";
178
0
    }
179
    // With cgroups v2, there will be a *single* line with prefix "0::/"
180
    // (see https://docs.kernel.org/admin-guide/cgroup-v2.html)
181
0
    std::string cgroup;
182
0
    std::getline(cgroup_name_file, cgroup);
183
0
    static const std::string v2_prefix = "0::/";
184
0
    if (!cgroup.starts_with(v2_prefix)) {
185
0
        return "";
186
0
    }
187
0
    cgroup = cgroup.substr(v2_prefix.length());
188
0
    return cgroup;
189
#else
190
    return "";
191
#endif
192
0
}
193
194
0
std::optional<std::string> CGroupUtil::get_cgroupsv2_path(const std::string& subsystem) {
195
0
#if defined(OS_LINUX)
196
0
    if (!CGroupUtil::cgroupsv2_enable()) {
197
0
        return {};
198
0
    }
199
200
0
    std::string cgroup = CGroupUtil::cgroupv2_of_process();
201
0
    auto current_cgroup = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup);
202
203
    // Return the bottom-most nested current memory file. If there is no such file at the current
204
    // level, try again at the parent level as memory settings are inherited.
205
0
    while (current_cgroup != default_cgroups_mount.parent_path()) {
206
0
        if (std::filesystem::exists(current_cgroup / subsystem)) {
207
0
            return {current_cgroup};
208
0
        }
209
0
        current_cgroup = current_cgroup.parent_path();
210
0
    }
211
0
    return {};
212
#else
213
    return {};
214
#endif
215
0
}
216
217
Status CGroupUtil::read_int_line_from_cgroup_file(const std::filesystem::path& file_path,
218
5
                                                  int64_t* val) {
219
5
    std::ifstream file_stream(file_path, std::ios::in);
220
5
    if (!file_stream.is_open()) {
221
1
        return Status::CgroupError("Error open {}", file_path.string());
222
1
    }
223
224
4
    string line;
225
4
    getline(file_stream, line);
226
4
    if (file_stream.fail() || file_stream.bad()) {
227
0
        return Status::CgroupError("Error reading {}: {}", file_path.string(), get_str_err_msg());
228
0
    }
229
4
    StringParser::ParseResult pr;
230
    // Parse into an int64_t If it overflows, returning the max value of int64_t is ok because that
231
    // is effectively unlimited.
232
4
    *val = StringParser::string_to_int<int64_t>(line.c_str(), line.size(), &pr);
233
4
    if ((pr != StringParser::PARSE_SUCCESS && pr != StringParser::PARSE_OVERFLOW)) {
234
0
        return Status::InvalidArgument("Failed to parse {} as int64: '{}'", file_path.string(),
235
0
                                       line);
236
0
    }
237
4
    return Status::OK();
238
4
}
239
240
void CGroupUtil::read_int_metric_from_cgroup_file(
241
        const std::filesystem::path& file_path,
242
5
        std::unordered_map<std::string, int64_t>& metrics_map) {
243
5
    std::ifstream cgroup_file(file_path, std::ios::in);
244
5
    std::string line;
245
144
    while (cgroup_file.good() && !cgroup_file.eof()) {
246
139
        getline(cgroup_file, line);
247
139
        std::vector<std::string> fields = strings::Split(line, " ", strings::SkipWhitespace());
248
139
        if (fields.size() < 2) {
249
4
            continue;
250
4
        }
251
135
        std::string key = fields[0].substr(0, fields[0].size());
252
253
135
        StringParser::ParseResult result;
254
135
        auto value =
255
135
                StringParser::string_to_int<int64_t>(fields[1].data(), fields[1].size(), &result);
256
257
135
        if (result == StringParser::PARSE_SUCCESS) {
258
135
            if (fields.size() == 2) {
259
135
                metrics_map[key] = value;
260
135
            } else if (fields[2] == "kB") {
261
0
                metrics_map[key] = value * 1024L;
262
0
            }
263
135
        }
264
135
    }
265
5
    if (cgroup_file.is_open()) {
266
4
        cgroup_file.close();
267
4
    }
268
5
}
269
270
Status CGroupUtil::read_string_line_from_cgroup_file(const std::filesystem::path& file_path,
271
6
                                                     std::string* line_ptr) {
272
6
    std::ifstream file_stream(file_path, std::ios::in);
273
6
    if (!file_stream.is_open()) {
274
0
        return Status::CgroupError("Error open {}", file_path.string());
275
0
    }
276
6
    string line;
277
6
    getline(file_stream, line);
278
6
    if (file_stream.fail() || file_stream.bad()) {
279
0
        return Status::CgroupError("Error reading {}: {}", file_path.string(), get_str_err_msg());
280
0
    }
281
6
    *line_ptr = line;
282
6
    return Status::OK();
283
6
}
284
285
8
Status CGroupUtil::parse_cpuset_line(std::string cpuset_line, int* cpu_count_ptr) {
286
8
    if (cpuset_line.empty()) {
287
0
        return Status::CgroupError("cpuset line is empty");
288
0
    }
289
8
    std::vector<string> ranges;
290
8
    boost::split(ranges, cpuset_line, boost::is_any_of(","));
291
8
    int cpu_count = 0;
292
293
15
    for (const std::string& range : ranges) {
294
15
        std::vector<std::string> cpu_values;
295
15
        boost::split(cpu_values, range, boost::is_any_of("-"));
296
297
15
        if (cpu_values.size() == 2) {
298
10
            int start = std::stoi(cpu_values[0]);
299
10
            int end = std::stoi(cpu_values[1]);
300
10
            cpu_count += (end - start) + 1;
301
10
        } else {
302
5
            cpu_count++;
303
5
        }
304
15
    }
305
8
    *cpu_count_ptr = cpu_count;
306
8
    return Status::OK();
307
8
}
308
309
2
int CGroupUtil::get_cgroup_limited_cpu_number(int physical_cores) {
310
2
    if (physical_cores <= 0) {
311
0
        return physical_cores;
312
0
    }
313
2
    int ret = physical_cores;
314
2
#if defined(OS_LINUX)
315
    // For cgroup v2
316
    // Child cgroup's cpu.max may bigger than parent group's cpu.max,
317
    //      so it should look up from current cgroup to top group.
318
    // For cpuset, child cgroup's cpuset.cpus could not bigger thant parent's cpuset.cpus.
319
2
    if (CGroupUtil::cgroupsv2_enable()) {
320
0
        std::string cgroupv2_process_path = CGroupUtil::cgroupv2_of_process();
321
0
        if (cgroupv2_process_path.empty()) {
322
0
            return ret;
323
0
        }
324
0
        std::filesystem::path current_cgroup_path = (default_cgroups_mount / cgroupv2_process_path);
325
0
        ret = get_cgroup_v2_cpu_quota_number(current_cgroup_path, default_cgroups_mount, ret);
326
327
0
        current_cgroup_path = (default_cgroups_mount / cgroupv2_process_path);
328
0
        ret = get_cgroup_v2_cpuset_number(current_cgroup_path, default_cgroups_mount, ret);
329
2
    } else if (CGroupUtil::cgroupsv1_enable()) {
330
        // cpu quota, should find first not empty config from current path to top.
331
        // because if a process attach to current cgroup, its cpu quota may not be set.
332
2
        std::string cpu_quota_path = "";
333
2
        Status cpu_quota_ret = CGroupUtil::find_abs_cgroupv1_path("cpu", &cpu_quota_path);
334
2
        if (cpu_quota_ret.ok() && !cpu_quota_path.empty()) {
335
2
            std::filesystem::path current_cgroup_path = cpu_quota_path;
336
2
            ret = get_cgroup_v1_cpu_quota_number(current_cgroup_path, default_cgroups_mount, ret);
337
2
        }
338
339
        //cpuset
340
        // just lookup current process cgroup path is enough
341
        // because if a process attach to current cgroup, its cpuset.cpus must be set.
342
2
        std::string cpuset_path = "";
343
2
        Status cpuset_ret = CGroupUtil::find_abs_cgroupv1_path("cpuset", &cpuset_path);
344
2
        if (cpuset_ret.ok() && !cpuset_path.empty()) {
345
2
            std::filesystem::path current_path = cpuset_path;
346
2
            ret = get_cgroup_v1_cpuset_number(current_path, ret);
347
2
        }
348
2
    }
349
2
#endif
350
2
    return ret;
351
2
}
352
353
int CGroupUtil::get_cgroup_v2_cpu_quota_number(std::filesystem::path& current_path,
354
                                               const std::filesystem::path& default_cg_mout_path,
355
4
                                               int cpu_num) {
356
4
    int ret = cpu_num;
357
12
    while (current_path != default_cg_mout_path.parent_path()) {
358
8
        std::ifstream cpu_max_file(current_path / "cpu.max");
359
8
        if (cpu_max_file.is_open()) {
360
8
            std::string cpu_limit_str;
361
8
            double cpu_period;
362
8
            cpu_max_file >> cpu_limit_str >> cpu_period;
363
8
            if (cpu_limit_str != "max" && cpu_period != 0) {
364
5
                double cpu_limit = std::stod(cpu_limit_str);
365
5
                ret = std::min(static_cast<int>(std::ceil(cpu_limit / cpu_period)), ret);
366
5
            }
367
8
        }
368
8
        current_path = current_path.parent_path();
369
8
    }
370
4
    return ret;
371
4
}
372
373
int CGroupUtil::get_cgroup_v2_cpuset_number(std::filesystem::path& current_path,
374
                                            const std::filesystem::path& default_cg_mout_path,
375
2
                                            int cpu_num) {
376
2
    int ret = cpu_num;
377
3
    while (current_path != default_cg_mout_path.parent_path()) {
378
3
        std::ifstream cpuset_cpus_file(current_path / "cpuset.cpus.effective");
379
3
        current_path = current_path.parent_path();
380
3
        if (cpuset_cpus_file.is_open()) {
381
3
            std::string cpuset_line;
382
3
            cpuset_cpus_file >> cpuset_line;
383
3
            if (cpuset_line.empty()) {
384
1
                continue;
385
1
            }
386
2
            int cpus_count = 0;
387
2
            static_cast<void>(CGroupUtil::parse_cpuset_line(cpuset_line, &cpus_count));
388
2
            ret = std::min(cpus_count, ret);
389
2
            break;
390
3
        }
391
3
    }
392
2
    return ret;
393
2
}
394
395
int CGroupUtil::get_cgroup_v1_cpu_quota_number(std::filesystem::path& current_path,
396
                                               const std::filesystem::path& default_cg_mout_path,
397
5
                                               int cpu_num) {
398
5
    int ret = cpu_num;
399
12
    while (current_path != default_cg_mout_path.parent_path()) {
400
9
        std::ifstream cpu_quota_file(current_path / "cpu.cfs_quota_us");
401
9
        std::ifstream cpu_period_file(current_path / "cpu.cfs_period_us");
402
9
        if (cpu_quota_file.is_open() && cpu_period_file.is_open()) {
403
7
            double cpu_quota_value;
404
7
            double cpu_period_value;
405
7
            cpu_quota_file >> cpu_quota_value;
406
7
            cpu_period_file >> cpu_period_value;
407
7
            if (cpu_quota_value > 0 && cpu_period_value > 0) {
408
2
                ret = std::min(ret,
409
2
                               static_cast<int>(std::ceil(cpu_quota_value / cpu_period_value)));
410
2
                break;
411
2
            }
412
7
        }
413
7
        current_path = current_path.parent_path();
414
7
    }
415
5
    return ret;
416
5
}
417
418
3
int CGroupUtil::get_cgroup_v1_cpuset_number(std::filesystem::path& current_path, int cpu_num) {
419
3
    int ret = cpu_num;
420
3
    std::string cpuset_line = "";
421
3
    Status cpuset_ret = CGroupUtil::read_string_line_from_cgroup_file(
422
3
            (current_path / "cpuset.cpus"), &cpuset_line);
423
3
    if (cpuset_ret.ok() && !cpuset_line.empty()) {
424
3
        int cpuset_count = 0;
425
3
        static_cast<void>(CGroupUtil::parse_cpuset_line(cpuset_line, &cpuset_count));
426
3
        if (cpuset_count > 0) {
427
3
            ret = std::min(ret, cpuset_count);
428
3
        }
429
3
    }
430
3
    return ret;
431
3
}
432
433
} // namespace doris