Coverage Report

Created: 2025-05-09 19:27

/root/doris/be/src/util/cgroup_util.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/cgroup_util.h"
19
20
#include <absl/strings/escaping.h>
21
#include <absl/strings/str_split.h>
22
23
#include <algorithm>
24
#include <fstream>
25
#include <utility>
26
#include <vector>
27
28
#include "gutil/stringprintf.h"
29
#include "io/fs/local_file_system.h"
30
#include "util/error_util.h"
31
#include "util/string_parser.hpp"
32
33
using std::pair;
34
35
namespace doris {
36
37
14
bool CGroupUtil::cgroupsv1_enable() {
38
14
    bool exists = true;
39
14
    Status st = io::global_local_filesystem()->exists("/proc/cgroups", &exists);
40
14
    return st.ok() && exists;
41
14
}
42
43
16
bool CGroupUtil::cgroupsv2_enable() {
44
16
#if defined(OS_LINUX)
45
    // This file exists iff the host has cgroups v2 enabled.
46
16
    auto controllers_file = default_cgroups_mount / "cgroup.controllers";
47
16
    bool exists = true;
48
16
    Status st = io::global_local_filesystem()->exists(controllers_file, &exists);
49
16
    return st.ok() && exists;
50
#else
51
    return false;
52
#endif
53
16
}
54
55
11
Status CGroupUtil::find_global_cgroupv1(const string& subsystem, string* path) {
56
11
    std::ifstream proc_cgroups("/proc/self/cgroup", std::ios::in);
57
11
    string line;
58
99
    while (true) {
59
99
        if (proc_cgroups.fail()) {
60
0
            return Status::CgroupError("Error reading /proc/self/cgroup: {}", get_str_err_msg());
61
99
        } else if (proc_cgroups.peek() == std::ifstream::traits_type::eof()) {
62
0
            return Status::CgroupError("Could not find subsystem {} in /proc/self/cgroup",
63
0
                                       subsystem);
64
0
        }
65
        // The line format looks like this:
66
        // 4:memory:/user.slice
67
        // 9:cpu,cpuacct:/user.slice
68
        // so field size will be 3
69
99
        getline(proc_cgroups, line);
70
99
        if (!proc_cgroups.good()) {
71
0
            continue;
72
0
        }
73
99
        std::vector<string> fields = absl::StrSplit(line, ":");
74
        // ":" in the path does not appear to be escaped - bail in the unusual case that
75
        // we get too many tokens.
76
99
        if (fields.size() != 3) {
77
0
            return Status::InvalidArgument(
78
0
                    "Could not parse line from /proc/self/cgroup - had {} > 3 tokens: '{}'",
79
0
                    fields.size(), line);
80
0
        }
81
99
        std::vector<string> subsystems = absl::StrSplit(fields[1], ",");
82
99
        auto it = std::find(subsystems.begin(), subsystems.end(), subsystem);
83
99
        if (it != subsystems.end()) {
84
11
            *path = std::move(fields[2]);
85
11
            return Status::OK();
86
11
        }
87
99
    }
88
11
}
89
90
22
static Status unescape_path(const string& escaped, string* unescaped) {
91
22
    string err;
92
22
    if (!absl::CUnescape(escaped, unescaped, &err)) {
93
0
        return Status::InvalidArgument("Could not unescape path '{}': {}", escaped, err);
94
0
    }
95
22
    return Status::OK();
96
22
}
97
98
11
Status CGroupUtil::find_cgroupv1_mounts(const string& subsystem, pair<string, string>* result) {
99
11
    std::ifstream mountinfo("/proc/self/mountinfo", std::ios::in);
100
11
    string line;
101
110
    while (true) {
102
110
        if (mountinfo.fail() || mountinfo.bad()) {
103
0
            return Status::CgroupError("Error reading /proc/self/mountinfo: {}", get_str_err_msg());
104
110
        } else if (mountinfo.eof()) {
105
0
            return Status::CgroupError("Could not find subsystem {} in /proc/self/mountinfo",
106
0
                                       subsystem);
107
0
        }
108
        // The relevant lines look like below (see proc manpage for full documentation). The
109
        // first example is running outside of a container, the second example is running
110
        // inside a docker container. Field 3 is the path relative to the root CGroup on
111
        // the host and Field 4 is the mount point from this process's point of view.
112
        // 34 29 0:28 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:15 -
113
        //    cgroup cgroup rw,memory
114
        // 275 271 0:28 /docker/f23eee6f88c2ba99fcce /sys/fs/cgroup/memory
115
        //    ro,nosuid,nodev,noexec,relatime master:15 - cgroup cgroup rw,memory
116
110
        getline(mountinfo, line);
117
110
        if (!mountinfo.good()) {
118
0
            continue;
119
0
        }
120
110
        std::vector<string> fields = absl::StrSplit(line, " ", absl::SkipWhitespace());
121
110
        if (fields.size() < 7) {
122
0
            return Status::InvalidArgument(
123
0
                    "Could not parse line from /proc/self/mountinfo - had {} > 7 tokens: '{}'",
124
0
                    fields.size(), line);
125
0
        }
126
110
        if (fields[fields.size() - 3] != "cgroup") {
127
66
            continue;
128
66
        }
129
        // This is a cgroup mount. Check if it's the mount we're looking for.
130
44
        std::vector<string> cgroup_opts =
131
44
                absl::StrSplit(fields[fields.size() - 1], ",", absl::SkipWhitespace());
132
44
        auto it = std::find(cgroup_opts.begin(), cgroup_opts.end(), subsystem);
133
44
        if (it == cgroup_opts.end()) {
134
33
            continue;
135
33
        }
136
        // This is the right mount.
137
11
        string mount_path, system_path;
138
11
        RETURN_IF_ERROR(unescape_path(fields[4], &mount_path));
139
11
        RETURN_IF_ERROR(unescape_path(fields[3], &system_path));
140
        // Strip trailing "/" so that both returned paths match in whether they have a
141
        // trailing "/".
142
11
        if (system_path[system_path.size() - 1] == '/') {
143
0
            system_path.pop_back();
144
0
        }
145
11
        *result = {mount_path, system_path};
146
11
        return Status::OK();
147
11
    }
148
11
}
149
150
11
Status CGroupUtil::find_abs_cgroupv1_path(const string& subsystem, string* path) {
151
11
    if (!cgroupsv1_enable()) {
152
0
        return Status::InvalidArgument("cgroup is not enabled!");
153
0
    }
154
11
    RETURN_IF_ERROR(find_global_cgroupv1(subsystem, path));
155
11
    pair<string, string> paths;
156
11
    RETURN_IF_ERROR(find_cgroupv1_mounts(subsystem, &paths));
157
11
    const string& mount_path = paths.first;
158
11
    const string& system_path = paths.second;
159
11
    if (path->compare(0, system_path.size(), system_path) != 0) {
160
0
        return Status::InvalidArgument("Expected CGroup path '{}' to start with '{}'", *path,
161
0
                                       system_path);
162
0
    }
163
11
    path->replace(0, system_path.size(), mount_path);
164
11
    return Status::OK();
165
11
}
166
167
0
std::string CGroupUtil::cgroupv2_of_process() {
168
0
#if defined(OS_LINUX)
169
0
    if (!cgroupsv2_enable()) {
170
0
        return "";
171
0
    }
172
    // All PIDs assigned to a cgroup are in /sys/fs/cgroups/{cgroup_name}/cgroup.procs
173
    // A simpler way to get the membership is:
174
0
    std::ifstream cgroup_name_file("/proc/self/cgroup");
175
0
    if (!cgroup_name_file.is_open()) {
176
0
        return "";
177
0
    }
178
    // With cgroups v2, there will be a *single* line with prefix "0::/"
179
    // (see https://docs.kernel.org/admin-guide/cgroup-v2.html)
180
0
    std::string cgroup;
181
0
    std::getline(cgroup_name_file, cgroup);
182
0
    static const std::string v2_prefix = "0::/";
183
0
    if (!cgroup.starts_with(v2_prefix)) {
184
0
        return "";
185
0
    }
186
0
    cgroup = cgroup.substr(v2_prefix.length());
187
0
    return cgroup;
188
#else
189
    return "";
190
#endif
191
0
}
192
193
0
std::optional<std::string> CGroupUtil::get_cgroupsv2_path(const std::string& subsystem) {
194
0
#if defined(OS_LINUX)
195
0
    if (!CGroupUtil::cgroupsv2_enable()) {
196
0
        return {};
197
0
    }
198
199
0
    std::string cgroup = CGroupUtil::cgroupv2_of_process();
200
0
    auto current_cgroup = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup);
201
202
    // Return the bottom-most nested current memory file. If there is no such file at the current
203
    // level, try again at the parent level as memory settings are inherited.
204
0
    while (current_cgroup != default_cgroups_mount.parent_path()) {
205
0
        if (std::filesystem::exists(current_cgroup / subsystem)) {
206
0
            return {current_cgroup};
207
0
        }
208
0
        current_cgroup = current_cgroup.parent_path();
209
0
    }
210
0
    return {};
211
#else
212
    return {};
213
#endif
214
0
}
215
216
Status CGroupUtil::read_int_line_from_cgroup_file(const std::filesystem::path& file_path,
217
5
                                                  int64_t* val) {
218
5
    std::ifstream file_stream(file_path, std::ios::in);
219
5
    if (!file_stream.is_open()) {
220
1
        return Status::CgroupError("Error open {}", file_path.string());
221
1
    }
222
223
4
    string line;
224
4
    getline(file_stream, line);
225
4
    if (file_stream.fail() || file_stream.bad()) {
226
0
        return Status::CgroupError("Error reading {}: {}", file_path.string(), get_str_err_msg());
227
0
    }
228
4
    StringParser::ParseResult pr;
229
    // Parse into an int64_t If it overflows, returning the max value of int64_t is ok because that
230
    // is effectively unlimited.
231
4
    *val = StringParser::string_to_int<int64_t>(line.c_str(), line.size(), &pr);
232
4
    if ((pr != StringParser::PARSE_SUCCESS && pr != StringParser::PARSE_OVERFLOW)) {
233
0
        return Status::InvalidArgument("Failed to parse {} as int64: '{}'", file_path.string(),
234
0
                                       line);
235
0
    }
236
4
    return Status::OK();
237
4
}
238
239
void CGroupUtil::read_int_metric_from_cgroup_file(
240
        const std::filesystem::path& file_path,
241
5
        std::unordered_map<std::string, int64_t>& metrics_map) {
242
5
    std::ifstream cgroup_file(file_path, std::ios::in);
243
5
    std::string line;
244
144
    while (cgroup_file.good() && !cgroup_file.eof()) {
245
139
        getline(cgroup_file, line);
246
139
        std::vector<std::string> fields = absl::StrSplit(line, " ", absl::SkipWhitespace());
247
139
        if (fields.size() < 2) {
248
4
            continue;
249
4
        }
250
135
        std::string key = fields[0].substr(0, fields[0].size());
251
252
135
        StringParser::ParseResult result;
253
135
        auto value =
254
135
                StringParser::string_to_int<int64_t>(fields[1].data(), fields[1].size(), &result);
255
256
135
        if (result == StringParser::PARSE_SUCCESS) {
257
135
            if (fields.size() == 2) {
258
135
                metrics_map[key] = value;
259
135
            } else if (fields[2] == "kB") {
260
0
                metrics_map[key] = value * 1024L;
261
0
            }
262
135
        }
263
135
    }
264
5
    if (cgroup_file.is_open()) {
265
4
        cgroup_file.close();
266
4
    }
267
5
}
268
269
Status CGroupUtil::read_string_line_from_cgroup_file(const std::filesystem::path& file_path,
270
6
                                                     std::string* line_ptr) {
271
6
    std::ifstream file_stream(file_path, std::ios::in);
272
6
    if (!file_stream.is_open()) {
273
0
        return Status::CgroupError("Error open {}", file_path.string());
274
0
    }
275
6
    string line;
276
6
    getline(file_stream, line);
277
6
    if (file_stream.fail() || file_stream.bad()) {
278
0
        return Status::CgroupError("Error reading {}: {}", file_path.string(), get_str_err_msg());
279
0
    }
280
6
    *line_ptr = line;
281
6
    return Status::OK();
282
6
}
283
284
8
Status CGroupUtil::parse_cpuset_line(std::string cpuset_line, int* cpu_count_ptr) {
285
8
    if (cpuset_line.empty()) {
286
0
        return Status::CgroupError("cpuset line is empty");
287
0
    }
288
8
    std::vector<string> ranges;
289
8
    boost::split(ranges, cpuset_line, boost::is_any_of(","));
290
8
    int cpu_count = 0;
291
292
15
    for (const std::string& range : ranges) {
293
15
        std::vector<std::string> cpu_values;
294
15
        boost::split(cpu_values, range, boost::is_any_of("-"));
295
296
15
        if (cpu_values.size() == 2) {
297
10
            int start = std::stoi(cpu_values[0]);
298
10
            int end = std::stoi(cpu_values[1]);
299
10
            cpu_count += (end - start) + 1;
300
10
        } else {
301
5
            cpu_count++;
302
5
        }
303
15
    }
304
8
    *cpu_count_ptr = cpu_count;
305
8
    return Status::OK();
306
8
}
307
308
2
int CGroupUtil::get_cgroup_limited_cpu_number(int physical_cores) {
309
2
    if (physical_cores <= 0) {
310
0
        return physical_cores;
311
0
    }
312
2
    int ret = physical_cores;
313
2
#if defined(OS_LINUX)
314
    // For cgroup v2
315
    // Child cgroup's cpu.max may bigger than parent group's cpu.max,
316
    //      so it should look up from current cgroup to top group.
317
    // For cpuset, child cgroup's cpuset.cpus could not bigger thant parent's cpuset.cpus.
318
2
    if (CGroupUtil::cgroupsv2_enable()) {
319
0
        std::string cgroupv2_process_path = CGroupUtil::cgroupv2_of_process();
320
0
        if (cgroupv2_process_path.empty()) {
321
0
            return ret;
322
0
        }
323
0
        std::filesystem::path current_cgroup_path = (default_cgroups_mount / cgroupv2_process_path);
324
0
        ret = get_cgroup_v2_cpu_quota_number(current_cgroup_path, default_cgroups_mount, ret);
325
326
0
        current_cgroup_path = (default_cgroups_mount / cgroupv2_process_path);
327
0
        ret = get_cgroup_v2_cpuset_number(current_cgroup_path, default_cgroups_mount, ret);
328
2
    } else if (CGroupUtil::cgroupsv1_enable()) {
329
        // cpu quota, should find first not empty config from current path to top.
330
        // because if a process attach to current cgroup, its cpu quota may not be set.
331
2
        std::string cpu_quota_path = "";
332
2
        Status cpu_quota_ret = CGroupUtil::find_abs_cgroupv1_path("cpu", &cpu_quota_path);
333
2
        if (cpu_quota_ret.ok() && !cpu_quota_path.empty()) {
334
2
            std::filesystem::path current_cgroup_path = cpu_quota_path;
335
2
            ret = get_cgroup_v1_cpu_quota_number(current_cgroup_path, default_cgroups_mount, ret);
336
2
        }
337
338
        //cpuset
339
        // just lookup current process cgroup path is enough
340
        // because if a process attach to current cgroup, its cpuset.cpus must be set.
341
2
        std::string cpuset_path = "";
342
2
        Status cpuset_ret = CGroupUtil::find_abs_cgroupv1_path("cpuset", &cpuset_path);
343
2
        if (cpuset_ret.ok() && !cpuset_path.empty()) {
344
2
            std::filesystem::path current_path = cpuset_path;
345
2
            ret = get_cgroup_v1_cpuset_number(current_path, ret);
346
2
        }
347
2
    }
348
2
#endif
349
2
    return ret;
350
2
}
351
352
int CGroupUtil::get_cgroup_v2_cpu_quota_number(std::filesystem::path& current_path,
353
                                               const std::filesystem::path& default_cg_mout_path,
354
4
                                               int cpu_num) {
355
4
    int ret = cpu_num;
356
12
    while (current_path != default_cg_mout_path.parent_path()) {
357
8
        std::ifstream cpu_max_file(current_path / "cpu.max");
358
8
        if (cpu_max_file.is_open()) {
359
8
            std::string cpu_limit_str;
360
8
            double cpu_period;
361
8
            cpu_max_file >> cpu_limit_str >> cpu_period;
362
8
            if (cpu_limit_str != "max" && cpu_period != 0) {
363
5
                double cpu_limit = std::stod(cpu_limit_str);
364
5
                ret = std::min(static_cast<int>(std::ceil(cpu_limit / cpu_period)), ret);
365
5
            }
366
8
        }
367
8
        current_path = current_path.parent_path();
368
8
    }
369
4
    return ret;
370
4
}
371
372
int CGroupUtil::get_cgroup_v2_cpuset_number(std::filesystem::path& current_path,
373
                                            const std::filesystem::path& default_cg_mout_path,
374
2
                                            int cpu_num) {
375
2
    int ret = cpu_num;
376
3
    while (current_path != default_cg_mout_path.parent_path()) {
377
3
        std::ifstream cpuset_cpus_file(current_path / "cpuset.cpus.effective");
378
3
        current_path = current_path.parent_path();
379
3
        if (cpuset_cpus_file.is_open()) {
380
3
            std::string cpuset_line;
381
3
            cpuset_cpus_file >> cpuset_line;
382
3
            if (cpuset_line.empty()) {
383
1
                continue;
384
1
            }
385
2
            int cpus_count = 0;
386
2
            static_cast<void>(CGroupUtil::parse_cpuset_line(cpuset_line, &cpus_count));
387
2
            ret = std::min(cpus_count, ret);
388
2
            break;
389
3
        }
390
3
    }
391
2
    return ret;
392
2
}
393
394
int CGroupUtil::get_cgroup_v1_cpu_quota_number(std::filesystem::path& current_path,
395
                                               const std::filesystem::path& default_cg_mout_path,
396
5
                                               int cpu_num) {
397
5
    int ret = cpu_num;
398
12
    while (current_path != default_cg_mout_path.parent_path()) {
399
9
        std::ifstream cpu_quota_file(current_path / "cpu.cfs_quota_us");
400
9
        std::ifstream cpu_period_file(current_path / "cpu.cfs_period_us");
401
9
        if (cpu_quota_file.is_open() && cpu_period_file.is_open()) {
402
7
            double cpu_quota_value;
403
7
            double cpu_period_value;
404
7
            cpu_quota_file >> cpu_quota_value;
405
7
            cpu_period_file >> cpu_period_value;
406
7
            if (cpu_quota_value > 0 && cpu_period_value > 0) {
407
2
                ret = std::min(ret,
408
2
                               static_cast<int>(std::ceil(cpu_quota_value / cpu_period_value)));
409
2
                break;
410
2
            }
411
7
        }
412
7
        current_path = current_path.parent_path();
413
7
    }
414
5
    return ret;
415
5
}
416
417
3
int CGroupUtil::get_cgroup_v1_cpuset_number(std::filesystem::path& current_path, int cpu_num) {
418
3
    int ret = cpu_num;
419
3
    std::string cpuset_line = "";
420
3
    Status cpuset_ret = CGroupUtil::read_string_line_from_cgroup_file(
421
3
            (current_path / "cpuset.cpus"), &cpuset_line);
422
3
    if (cpuset_ret.ok() && !cpuset_line.empty()) {
423
3
        int cpuset_count = 0;
424
3
        static_cast<void>(CGroupUtil::parse_cpuset_line(cpuset_line, &cpuset_count));
425
3
        if (cpuset_count > 0) {
426
3
            ret = std::min(ret, cpuset_count);
427
3
        }
428
3
    }
429
3
    return ret;
430
3
}
431
432
} // namespace doris