Coverage Report

Created: 2025-03-10 18:45

/root/doris/be/src/util/cgroup_util.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/cgroup_util.h"
19
20
#include <algorithm>
21
#include <fstream>
22
#include <utility>
23
#include <vector>
24
25
#include "gutil/stringprintf.h"
26
#include "gutil/strings/escaping.h"
27
#include "gutil/strings/split.h"
28
#include "gutil/strings/substitute.h"
29
#include "io/fs/local_file_system.h"
30
#include "util/error_util.h"
31
#include "util/string_parser.hpp"
32
33
using strings::CUnescape;
34
using strings::Split;
35
using strings::SkipWhitespace;
36
using std::pair;
37
38
namespace doris {
39
40
14
bool CGroupUtil::cgroupsv1_enable() {
41
14
    bool exists = true;
42
14
    Status st = io::global_local_filesystem()->exists("/proc/cgroups", &exists);
43
14
    return st.ok() && exists;
44
14
}
45
46
16
bool CGroupUtil::cgroupsv2_enable() {
47
16
#if defined(OS_LINUX)
48
    // This file exists iff the host has cgroups v2 enabled.
49
16
    auto controllers_file = default_cgroups_mount / "cgroup.controllers";
50
16
    bool exists = true;
51
16
    Status st = io::global_local_filesystem()->exists(controllers_file, &exists);
52
16
    return st.ok() && exists;
53
#else
54
    return false;
55
#endif
56
16
}
57
58
11
Status CGroupUtil::find_global_cgroupv1(const string& subsystem, string* path) {
59
11
    std::ifstream proc_cgroups("/proc/self/cgroup", std::ios::in);
60
11
    string line;
61
38
    while (true) {
62
38
        if (proc_cgroups.fail()) {
63
0
            return Status::CgroupError("Error reading /proc/self/cgroup: {}", get_str_err_msg());
64
38
        } else if (proc_cgroups.peek() == std::ifstream::traits_type::eof()) {
65
0
            return Status::CgroupError("Could not find subsystem {} in /proc/self/cgroup",
66
0
                                       subsystem);
67
0
        }
68
        // The line format looks like this:
69
        // 4:memory:/user.slice
70
        // 9:cpu,cpuacct:/user.slice
71
        // so field size will be 3
72
38
        getline(proc_cgroups, line);
73
38
        if (!proc_cgroups.good()) {
74
0
            continue;
75
0
        }
76
38
        std::vector<string> fields = Split(line, ":");
77
        // ":" in the path does not appear to be escaped - bail in the unusual case that
78
        // we get too many tokens.
79
38
        if (fields.size() != 3) {
80
0
            return Status::InvalidArgument(
81
0
                    "Could not parse line from /proc/self/cgroup - had {} > 3 tokens: '{}'",
82
0
                    fields.size(), line);
83
0
        }
84
38
        std::vector<string> subsystems = Split(fields[1], ",");
85
38
        auto it = std::find(subsystems.begin(), subsystems.end(), subsystem);
86
38
        if (it != subsystems.end()) {
87
11
            *path = std::move(fields[2]);
88
11
            return Status::OK();
89
11
        }
90
38
    }
91
11
}
92
93
22
static Status unescape_path(const string& escaped, string* unescaped) {
94
22
    string err;
95
22
    if (!CUnescape(escaped, unescaped, &err)) {
96
0
        return Status::InvalidArgument("Could not unescape path '{}': {}", escaped, err);
97
0
    }
98
22
    return Status::OK();
99
22
}
100
101
11
Status CGroupUtil::find_cgroupv1_mounts(const string& subsystem, pair<string, string>* result) {
102
11
    std::ifstream mountinfo("/proc/self/mountinfo", std::ios::in);
103
11
    string line;
104
171
    while (true) {
105
171
        if (mountinfo.fail() || mountinfo.bad()) {
106
0
            return Status::CgroupError("Error reading /proc/self/mountinfo: {}", get_str_err_msg());
107
171
        } else if (mountinfo.eof()) {
108
0
            return Status::CgroupError("Could not find subsystem {} in /proc/self/mountinfo",
109
0
                                       subsystem);
110
0
        }
111
        // The relevant lines look like below (see proc manpage for full documentation). The
112
        // first example is running outside of a container, the second example is running
113
        // inside a docker container. Field 3 is the path relative to the root CGroup on
114
        // the host and Field 4 is the mount point from this process's point of view.
115
        // 34 29 0:28 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:15 -
116
        //    cgroup cgroup rw,memory
117
        // 275 271 0:28 /docker/f23eee6f88c2ba99fcce /sys/fs/cgroup/memory
118
        //    ro,nosuid,nodev,noexec,relatime master:15 - cgroup cgroup rw,memory
119
171
        getline(mountinfo, line);
120
171
        if (!mountinfo.good()) {
121
0
            continue;
122
0
        }
123
171
        std::vector<string> fields = Split(line, " ", SkipWhitespace());
124
171
        if (fields.size() < 7) {
125
0
            return Status::InvalidArgument(
126
0
                    "Could not parse line from /proc/self/mountinfo - had {} > 7 tokens: '{}'",
127
0
                    fields.size(), line);
128
0
        }
129
171
        if (fields[fields.size() - 3] != "cgroup") {
130
66
            continue;
131
66
        }
132
        // This is a cgroup mount. Check if it's the mount we're looking for.
133
105
        std::vector<string> cgroup_opts = Split(fields[fields.size() - 1], ",", SkipWhitespace());
134
105
        auto it = std::find(cgroup_opts.begin(), cgroup_opts.end(), subsystem);
135
105
        if (it == cgroup_opts.end()) {
136
94
            continue;
137
94
        }
138
        // This is the right mount.
139
11
        string mount_path, system_path;
140
11
        RETURN_IF_ERROR(unescape_path(fields[4], &mount_path));
141
11
        RETURN_IF_ERROR(unescape_path(fields[3], &system_path));
142
        // Strip trailing "/" so that both returned paths match in whether they have a
143
        // trailing "/".
144
11
        if (system_path[system_path.size() - 1] == '/') {
145
0
            system_path.pop_back();
146
0
        }
147
11
        *result = {mount_path, system_path};
148
11
        return Status::OK();
149
11
    }
150
11
}
151
152
11
Status CGroupUtil::find_abs_cgroupv1_path(const string& subsystem, string* path) {
153
11
    if (!cgroupsv1_enable()) {
154
0
        return Status::InvalidArgument("cgroup is not enabled!");
155
0
    }
156
11
    RETURN_IF_ERROR(find_global_cgroupv1(subsystem, path));
157
11
    pair<string, string> paths;
158
11
    RETURN_IF_ERROR(find_cgroupv1_mounts(subsystem, &paths));
159
11
    const string& mount_path = paths.first;
160
11
    const string& system_path = paths.second;
161
11
    if (path->compare(0, system_path.size(), system_path) != 0) {
162
0
        return Status::InvalidArgument("Expected CGroup path '{}' to start with '{}'", *path,
163
0
                                       system_path);
164
0
    }
165
11
    path->replace(0, system_path.size(), mount_path);
166
11
    return Status::OK();
167
11
}
168
169
0
std::string CGroupUtil::cgroupv2_of_process() {
170
0
#if defined(OS_LINUX)
171
0
    if (!cgroupsv2_enable()) {
172
0
        return "";
173
0
    }
174
    // All PIDs assigned to a cgroup are in /sys/fs/cgroups/{cgroup_name}/cgroup.procs
175
    // A simpler way to get the membership is:
176
0
    std::ifstream cgroup_name_file("/proc/self/cgroup");
177
0
    if (!cgroup_name_file.is_open()) {
178
0
        return "";
179
0
    }
180
    // With cgroups v2, there will be a *single* line with prefix "0::/"
181
    // (see https://docs.kernel.org/admin-guide/cgroup-v2.html)
182
0
    std::string cgroup;
183
0
    std::getline(cgroup_name_file, cgroup);
184
0
    static const std::string v2_prefix = "0::/";
185
0
    if (!cgroup.starts_with(v2_prefix)) {
186
0
        return "";
187
0
    }
188
0
    cgroup = cgroup.substr(v2_prefix.length());
189
0
    return cgroup;
190
#else
191
    return "";
192
#endif
193
0
}
194
195
0
std::optional<std::string> CGroupUtil::get_cgroupsv2_path(const std::string& subsystem) {
196
0
#if defined(OS_LINUX)
197
0
    if (!CGroupUtil::cgroupsv2_enable()) {
198
0
        return {};
199
0
    }
200
201
0
    std::string cgroup = CGroupUtil::cgroupv2_of_process();
202
0
    auto current_cgroup = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup);
203
204
    // Return the bottom-most nested current memory file. If there is no such file at the current
205
    // level, try again at the parent level as memory settings are inherited.
206
0
    while (current_cgroup != default_cgroups_mount.parent_path()) {
207
0
        if (std::filesystem::exists(current_cgroup / subsystem)) {
208
0
            return {current_cgroup};
209
0
        }
210
0
        current_cgroup = current_cgroup.parent_path();
211
0
    }
212
0
    return {};
213
#else
214
    return {};
215
#endif
216
0
}
217
218
Status CGroupUtil::read_int_line_from_cgroup_file(const std::filesystem::path& file_path,
219
5
                                                  int64_t* val) {
220
5
    std::ifstream file_stream(file_path, std::ios::in);
221
5
    if (!file_stream.is_open()) {
222
1
        return Status::CgroupError("Error open {}", file_path.string());
223
1
    }
224
225
4
    string line;
226
4
    getline(file_stream, line);
227
4
    if (file_stream.fail() || file_stream.bad()) {
228
0
        return Status::CgroupError("Error reading {}: {}", file_path.string(), get_str_err_msg());
229
0
    }
230
4
    StringParser::ParseResult pr;
231
    // Parse into an int64_t If it overflows, returning the max value of int64_t is ok because that
232
    // is effectively unlimited.
233
4
    *val = StringParser::string_to_int<int64_t>(line.c_str(), line.size(), &pr);
234
4
    if ((pr != StringParser::PARSE_SUCCESS && pr != StringParser::PARSE_OVERFLOW)) {
235
0
        return Status::InvalidArgument("Failed to parse {} as int64: '{}'", file_path.string(),
236
0
                                       line);
237
0
    }
238
4
    return Status::OK();
239
4
}
240
241
void CGroupUtil::read_int_metric_from_cgroup_file(
242
        const std::filesystem::path& file_path,
243
5
        std::unordered_map<std::string, int64_t>& metrics_map) {
244
5
    std::ifstream cgroup_file(file_path, std::ios::in);
245
5
    std::string line;
246
144
    while (cgroup_file.good() && !cgroup_file.eof()) {
247
139
        getline(cgroup_file, line);
248
139
        std::vector<std::string> fields = strings::Split(line, " ", strings::SkipWhitespace());
249
139
        if (fields.size() < 2) {
250
4
            continue;
251
4
        }
252
135
        std::string key = fields[0].substr(0, fields[0].size());
253
254
135
        StringParser::ParseResult result;
255
135
        auto value =
256
135
                StringParser::string_to_int<int64_t>(fields[1].data(), fields[1].size(), &result);
257
258
135
        if (result == StringParser::PARSE_SUCCESS) {
259
135
            if (fields.size() == 2) {
260
135
                metrics_map[key] = value;
261
135
            } else if (fields[2] == "kB") {
262
0
                metrics_map[key] = value * 1024L;
263
0
            }
264
135
        }
265
135
    }
266
5
    if (cgroup_file.is_open()) {
267
4
        cgroup_file.close();
268
4
    }
269
5
}
270
271
Status CGroupUtil::read_string_line_from_cgroup_file(const std::filesystem::path& file_path,
272
6
                                                     std::string* line_ptr) {
273
6
    std::ifstream file_stream(file_path, std::ios::in);
274
6
    if (!file_stream.is_open()) {
275
0
        return Status::CgroupError("Error open {}", file_path.string());
276
0
    }
277
6
    string line;
278
6
    getline(file_stream, line);
279
6
    if (file_stream.fail() || file_stream.bad()) {
280
0
        return Status::CgroupError("Error reading {}: {}", file_path.string(), get_str_err_msg());
281
0
    }
282
6
    *line_ptr = line;
283
6
    return Status::OK();
284
6
}
285
286
8
Status CGroupUtil::parse_cpuset_line(std::string cpuset_line, int* cpu_count_ptr) {
287
8
    if (cpuset_line.empty()) {
288
0
        return Status::CgroupError("cpuset line is empty");
289
0
    }
290
8
    std::vector<string> ranges;
291
8
    boost::split(ranges, cpuset_line, boost::is_any_of(","));
292
8
    int cpu_count = 0;
293
294
15
    for (const std::string& range : ranges) {
295
15
        std::vector<std::string> cpu_values;
296
15
        boost::split(cpu_values, range, boost::is_any_of("-"));
297
298
15
        if (cpu_values.size() == 2) {
299
10
            int start = std::stoi(cpu_values[0]);
300
10
            int end = std::stoi(cpu_values[1]);
301
10
            cpu_count += (end - start) + 1;
302
10
        } else {
303
5
            cpu_count++;
304
5
        }
305
15
    }
306
8
    *cpu_count_ptr = cpu_count;
307
8
    return Status::OK();
308
8
}
309
310
2
int CGroupUtil::get_cgroup_limited_cpu_number(int physical_cores) {
311
2
    if (physical_cores <= 0) {
312
0
        return physical_cores;
313
0
    }
314
2
    int ret = physical_cores;
315
2
#if defined(OS_LINUX)
316
    // For cgroup v2
317
    // Child cgroup's cpu.max may bigger than parent group's cpu.max,
318
    //      so it should look up from current cgroup to top group.
319
    // For cpuset, child cgroup's cpuset.cpus could not bigger thant parent's cpuset.cpus.
320
2
    if (CGroupUtil::cgroupsv2_enable()) {
321
0
        std::string cgroupv2_process_path = CGroupUtil::cgroupv2_of_process();
322
0
        if (cgroupv2_process_path.empty()) {
323
0
            return ret;
324
0
        }
325
0
        std::filesystem::path current_cgroup_path = (default_cgroups_mount / cgroupv2_process_path);
326
0
        ret = get_cgroup_v2_cpu_quota_number(current_cgroup_path, default_cgroups_mount, ret);
327
328
0
        current_cgroup_path = (default_cgroups_mount / cgroupv2_process_path);
329
0
        ret = get_cgroup_v2_cpuset_number(current_cgroup_path, default_cgroups_mount, ret);
330
2
    } else if (CGroupUtil::cgroupsv1_enable()) {
331
        // cpu quota, should find first not empty config from current path to top.
332
        // because if a process attach to current cgroup, its cpu quota may not be set.
333
2
        std::string cpu_quota_path = "";
334
2
        Status cpu_quota_ret = CGroupUtil::find_abs_cgroupv1_path("cpu", &cpu_quota_path);
335
2
        if (cpu_quota_ret.ok() && !cpu_quota_path.empty()) {
336
2
            std::filesystem::path current_cgroup_path = cpu_quota_path;
337
2
            ret = get_cgroup_v1_cpu_quota_number(current_cgroup_path, default_cgroups_mount, ret);
338
2
        }
339
340
        //cpuset
341
        // just lookup current process cgroup path is enough
342
        // because if a process attach to current cgroup, its cpuset.cpus must be set.
343
2
        std::string cpuset_path = "";
344
2
        Status cpuset_ret = CGroupUtil::find_abs_cgroupv1_path("cpuset", &cpuset_path);
345
2
        if (cpuset_ret.ok() && !cpuset_path.empty()) {
346
2
            std::filesystem::path current_path = cpuset_path;
347
2
            ret = get_cgroup_v1_cpuset_number(current_path, ret);
348
2
        }
349
2
    }
350
2
#endif
351
2
    return ret;
352
2
}
353
354
int CGroupUtil::get_cgroup_v2_cpu_quota_number(std::filesystem::path& current_path,
355
                                               const std::filesystem::path& default_cg_mout_path,
356
4
                                               int cpu_num) {
357
4
    int ret = cpu_num;
358
12
    while (current_path != default_cg_mout_path.parent_path()) {
359
8
        std::ifstream cpu_max_file(current_path / "cpu.max");
360
8
        if (cpu_max_file.is_open()) {
361
8
            std::string cpu_limit_str;
362
8
            double cpu_period;
363
8
            cpu_max_file >> cpu_limit_str >> cpu_period;
364
8
            if (cpu_limit_str != "max" && cpu_period != 0) {
365
5
                double cpu_limit = std::stod(cpu_limit_str);
366
5
                ret = std::min(static_cast<int>(std::ceil(cpu_limit / cpu_period)), ret);
367
5
            }
368
8
        }
369
8
        current_path = current_path.parent_path();
370
8
    }
371
4
    return ret;
372
4
}
373
374
int CGroupUtil::get_cgroup_v2_cpuset_number(std::filesystem::path& current_path,
375
                                            const std::filesystem::path& default_cg_mout_path,
376
2
                                            int cpu_num) {
377
2
    int ret = cpu_num;
378
3
    while (current_path != default_cg_mout_path.parent_path()) {
379
3
        std::ifstream cpuset_cpus_file(current_path / "cpuset.cpus.effective");
380
3
        current_path = current_path.parent_path();
381
3
        if (cpuset_cpus_file.is_open()) {
382
3
            std::string cpuset_line;
383
3
            cpuset_cpus_file >> cpuset_line;
384
3
            if (cpuset_line.empty()) {
385
1
                continue;
386
1
            }
387
2
            int cpus_count = 0;
388
2
            static_cast<void>(CGroupUtil::parse_cpuset_line(cpuset_line, &cpus_count));
389
2
            ret = std::min(cpus_count, ret);
390
2
            break;
391
3
        }
392
3
    }
393
2
    return ret;
394
2
}
395
396
int CGroupUtil::get_cgroup_v1_cpu_quota_number(std::filesystem::path& current_path,
397
                                               const std::filesystem::path& default_cg_mout_path,
398
5
                                               int cpu_num) {
399
5
    int ret = cpu_num;
400
12
    while (current_path != default_cg_mout_path.parent_path()) {
401
9
        std::ifstream cpu_quota_file(current_path / "cpu.cfs_quota_us");
402
9
        std::ifstream cpu_period_file(current_path / "cpu.cfs_period_us");
403
9
        if (cpu_quota_file.is_open() && cpu_period_file.is_open()) {
404
7
            double cpu_quota_value;
405
7
            double cpu_period_value;
406
7
            cpu_quota_file >> cpu_quota_value;
407
7
            cpu_period_file >> cpu_period_value;
408
7
            if (cpu_quota_value > 0 && cpu_period_value > 0) {
409
2
                ret = std::min(ret,
410
2
                               static_cast<int>(std::ceil(cpu_quota_value / cpu_period_value)));
411
2
                break;
412
2
            }
413
7
        }
414
7
        current_path = current_path.parent_path();
415
7
    }
416
5
    return ret;
417
5
}
418
419
3
int CGroupUtil::get_cgroup_v1_cpuset_number(std::filesystem::path& current_path, int cpu_num) {
420
3
    int ret = cpu_num;
421
3
    std::string cpuset_line = "";
422
3
    Status cpuset_ret = CGroupUtil::read_string_line_from_cgroup_file(
423
3
            (current_path / "cpuset.cpus"), &cpuset_line);
424
3
    if (cpuset_ret.ok() && !cpuset_line.empty()) {
425
3
        int cpuset_count = 0;
426
3
        static_cast<void>(CGroupUtil::parse_cpuset_line(cpuset_line, &cpuset_count));
427
3
        if (cpuset_count > 0) {
428
3
            ret = std::min(ret, cpuset_count);
429
3
        }
430
3
    }
431
3
    return ret;
432
3
}
433
434
} // namespace doris