Coverage Report

Created: 2024-11-20 19:28

/root/doris/be/src/util/cpu_info.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/cpu-info.cpp
19
// and modified by Doris
20
21
#include "util/cpu_info.h"
22
23
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
24
#elif defined(__GNUC__) && defined(__ARM_NEON__)
25
/* GCC-compatible compiler, targeting ARM with NEON */
26
#include <arm_neon.h>
27
#elif defined(__GNUC__) && defined(__IWMMXT__)
28
/* GCC-compatible compiler, targeting ARM with WMMX */
29
#include <mmintrin.h>
30
#elif (defined(__GNUC__) || defined(__xlC__)) && (defined(__VEC__) || defined(__ALTIVEC__))
31
/* XLC or GCC-compatible compiler, targeting PowerPC with VMX/VSX */
32
#include <altivec.h>
33
#elif defined(__GNUC__) && defined(__SPE__)
34
/* GCC-compatible compiler, targeting PowerPC with SPE */
35
#include <spe.h>
36
#endif
37
38
#ifndef __APPLE__
39
#include <sys/sysinfo.h>
40
#else
41
#include <sys/sysctl.h>
42
#endif
43
44
#include <gen_cpp/Metrics_types.h>
45
#include <sched.h>
46
#include <stdlib.h>
47
#include <unistd.h>
48
49
#include <algorithm>
50
#include <boost/algorithm/string/predicate.hpp>
51
#include <boost/algorithm/string/trim.hpp>
52
// IWYU pragma: no_include <bits/chrono.h>
53
#include <chrono> // IWYU pragma: keep
54
#include <filesystem>
55
#include <fstream>
56
57
#include "common/config.h"
58
#include "common/env_config.h"
59
#include "gflags/gflags.h"
60
#include "gutil/stringprintf.h"
61
#include "gutil/strings/substitute.h"
62
#include "util/pretty_printer.h"
63
64
using boost::algorithm::contains;
65
using boost::algorithm::trim;
66
namespace fs = std::filesystem;
67
using std::max;
68
69
DECLARE_bool(abort_on_config_error);
70
DEFINE_int32(num_cores, 0,
71
             "(Advanced) If > 0, it sets the number of cores available to"
72
             " Impala. Setting it to 0 means Impala will use all available cores on the machine"
73
             " according to /proc/cpuinfo.");
74
75
namespace doris {
76
// Helper function to warn if a given file does not contain an expected string as its
77
// first line. If the file cannot be opened, no error is reported.
78
void WarnIfFileNotEqual(const string& filename, const string& expected,
79
0
                        const string& warning_text) {
80
0
    std::ifstream file(filename);
81
0
    if (!file) return;
82
0
    string line;
83
0
    getline(file, line);
84
0
    if (line != expected) {
85
0
        LOG(ERROR) << "Expected " << expected << ", actual " << line << std::endl << warning_text;
86
0
    }
87
0
}
88
} // namespace doris
89
90
namespace doris {
91
92
bool CpuInfo::initialized_ = false;
93
int64_t CpuInfo::hardware_flags_ = 0;
94
int64_t CpuInfo::original_hardware_flags_;
95
int64_t CpuInfo::cycles_per_ms_;
96
int CpuInfo::num_cores_ = 1;
97
int CpuInfo::max_num_cores_ = 1;
98
std::string CpuInfo::model_name_ = "unknown";
99
int CpuInfo::max_num_numa_nodes_;
100
std::unique_ptr<int[]> CpuInfo::core_to_numa_node_;
101
std::vector<vector<int>> CpuInfo::numa_node_to_cores_;
102
std::vector<int> CpuInfo::numa_node_core_idx_;
103
104
static struct {
105
    string name;
106
    int64_t flag;
107
} flag_mappings[] = {
108
        {"ssse3", CpuInfo::SSSE3},   {"sse4_1", CpuInfo::SSE4_1}, {"sse4_2", CpuInfo::SSE4_2},
109
        {"popcnt", CpuInfo::POPCNT}, {"avx", CpuInfo::AVX},       {"avx2", CpuInfo::AVX2},
110
};
111
112
1
int cgroup_bandwidth_quota(int physical_cores) {
113
1
    namespace fs = std::filesystem;
114
1
    fs::path cpu_max = "/sys/fs/cgroup/cpu.max";
115
1
    fs::path cfs_quota = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us";
116
1
    fs::path cfs_period = "/sys/fs/cgroup/cpu/cpu.cfs_period_us";
117
118
1
    int64_t quota, period;
119
1
    char byte_buffer[1000];
120
1
    int64_t read_bytes;
121
122
1
    if (fs::exists(cpu_max)) {
123
        // cgroup v2
124
        // https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
125
0
        std::ifstream file(cpu_max);
126
0
        file.read(byte_buffer, 999);
127
0
        read_bytes = file.gcount();
128
0
        byte_buffer[read_bytes] = '\0';
129
0
        if (sscanf(byte_buffer, "%" SCNd64 " %" SCNd64 "", &quota, &period) != 2) {
130
0
            return physical_cores;
131
0
        }
132
1
    } else if (fs::exists(cfs_quota) && fs::exists(cfs_period)) {
133
        // cgroup v1
134
        // https://www.kernel.org/doc/html/latest/scheduler/sched-bwc.html#management
135
136
        // Read the quota, this indicates how many microseconds the CPU can be utilized by this cgroup per period
137
1
        std::ifstream quota_file(cfs_quota);
138
1
        quota_file.read(byte_buffer, 999);
139
1
        read_bytes = quota_file.gcount();
140
1
        byte_buffer[read_bytes] = '\0';
141
1
        if (sscanf(byte_buffer, "%" SCNd64 "", &quota) != 1) {
142
0
            return physical_cores;
143
0
        }
144
145
        // Read the time period, a cgroup can utilize the CPU up to quota microseconds every period
146
1
        std::ifstream period_file(cfs_period);
147
1
        period_file.read(byte_buffer, 999);
148
1
        read_bytes = period_file.gcount();
149
1
        byte_buffer[read_bytes] = '\0';
150
1
        if (sscanf(byte_buffer, "%" SCNd64 "", &period) != 1) {
151
0
            return physical_cores;
152
0
        }
153
1
    } else {
154
        // No cgroup quota
155
0
        return physical_cores;
156
0
    }
157
1
    if (quota > 0 && period > 0) {
158
0
        return int64_t(ceil(double(quota) / double(period)));
159
1
    } else {
160
1
        return physical_cores;
161
1
    }
162
1
}
163
164
// Helper function to parse for hardware flags.
165
// values contains a list of space-separated flags.  check to see if the flags we
166
// care about are present.
167
// Returns a bitmap of flags.
168
8
int64_t ParseCPUFlags(const string& values) {
169
8
    int64_t flags = 0;
170
48
    for (auto& flag_mapping : flag_mappings) {
171
48
        if (contains(values, flag_mapping.name)) {
172
48
            flags |= flag_mapping.flag;
173
48
        }
174
48
    }
175
8
    return flags;
176
8
}
177
178
10
void CpuInfo::init() {
179
10
    if (initialized_) return;
180
1
    string line;
181
1
    string name;
182
1
    string value;
183
184
1
    float max_mhz = 0;
185
1
    int physical_num_cores = 0;
186
187
    // maybe use std::thread::hardware_concurrency()?
188
    // Read from /proc/cpuinfo
189
1
    std::ifstream cpuinfo("/proc/cpuinfo");
190
218
    while (cpuinfo) {
191
217
        getline(cpuinfo, line);
192
217
        size_t colon = line.find(':');
193
217
        if (colon != string::npos) {
194
208
            name = line.substr(0, colon - 1);
195
208
            value = line.substr(colon + 1, string::npos);
196
208
            trim(name);
197
208
            trim(value);
198
208
            if (name == "flags") {
199
8
                hardware_flags_ |= ParseCPUFlags(value);
200
200
            } else if (name == "cpu MHz") {
201
                // Every core will report a different speed.  We'll take the max, assuming
202
                // that when impala is running, the core will not be in a lower power state.
203
                // TODO: is there a more robust way to do this, such as
204
                // Window's QueryPerformanceFrequency()
205
8
                float mhz = atof(value.c_str());
206
8
                max_mhz = max(mhz, max_mhz);
207
192
            } else if (name == "processor") {
208
8
                ++physical_num_cores;
209
184
            } else if (name == "model name") {
210
8
                model_name_ = value;
211
8
            }
212
208
        }
213
217
    }
214
215
1
    int num_cores = cgroup_bandwidth_quota(physical_num_cores);
216
1
    if (max_mhz != 0) {
217
1
        cycles_per_ms_ = int64_t(max_mhz) * 1000;
218
1
    } else {
219
0
        cycles_per_ms_ = 1000000;
220
0
    }
221
1
    original_hardware_flags_ = hardware_flags_;
222
223
1
    if (num_cores > 0) {
224
1
        num_cores_ = num_cores;
225
1
    } else {
226
0
        num_cores_ = 1;
227
0
    }
228
1
    if (config::num_cores > 0) {
229
0
        num_cores_ = config::num_cores;
230
0
    }
231
232
#ifdef __APPLE__
233
    size_t len = sizeof(max_num_cores_);
234
    sysctlbyname("hw.logicalcpu", &max_num_cores_, &len, nullptr, 0);
235
#else
236
1
    max_num_cores_ = get_nprocs_conf();
237
1
#endif
238
239
    // Print a warning if something is wrong with sched_getcpu().
240
1
#ifdef HAVE_SCHED_GETCPU
241
1
    if (sched_getcpu() == -1) {
242
0
        LOG(WARNING) << "Kernel does not support sched_getcpu(). Performance may be impacted.";
243
0
    }
244
#else
245
    LOG(WARNING) << "Built on a system without sched_getcpu() support. Performance may"
246
                 << " be impacted.";
247
#endif
248
249
1
    _init_numa();
250
1
    initialized_ = true;
251
1
}
252
253
1
void CpuInfo::_init_numa() {
254
    // Use the NUMA info in the /sys filesystem. which is part of the Linux ABI:
255
    // see https://www.kernel.org/doc/Documentation/ABI/stable/sysfs-devices-node and
256
    // https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-system-cpu
257
    // The filesystem entries are only present if the kernel was compiled with NUMA support.
258
1
    core_to_numa_node_.reset(new int[max_num_cores_]);
259
260
1
    if (!fs::is_directory("/sys/devices/system/node")) {
261
0
        LOG(WARNING) << "/sys/devices/system/node is not present - no NUMA support";
262
        // Assume a single NUMA node.
263
0
        max_num_numa_nodes_ = 1;
264
0
        std::fill_n(core_to_numa_node_.get(), max_num_cores_, 0);
265
0
        _init_numa_node_to_cores();
266
0
        return;
267
0
    }
268
269
    // Search for node subdirectories - node0, node1, node2, etc to determine possible
270
    // NUMA nodes.
271
1
    fs::directory_iterator dir_it("/sys/devices/system/node");
272
1
    max_num_numa_nodes_ = 0;
273
9
    for (; dir_it != fs::directory_iterator(); ++dir_it) {
274
8
        const string filename = dir_it->path().filename().string();
275
8
        if (filename.find("node") == 0) ++max_num_numa_nodes_;
276
8
    }
277
1
    if (max_num_numa_nodes_ == 0) {
278
0
        LOG(WARNING) << "Could not find nodes in /sys/devices/system/node";
279
0
        max_num_numa_nodes_ = 1;
280
0
    }
281
282
    // Check which NUMA node each core belongs to based on the existence of a symlink
283
    // to the node subdirectory.
284
9
    for (int core = 0; core < max_num_cores_; ++core) {
285
8
        bool found_numa_node = false;
286
8
        for (int node = 0; node < max_num_numa_nodes_; ++node) {
287
8
            if (fs::exists(
288
8
                        strings::Substitute("/sys/devices/system/cpu/cpu$0/node$1", core, node))) {
289
8
                core_to_numa_node_[core] = node;
290
8
                found_numa_node = true;
291
8
                break;
292
8
            }
293
8
        }
294
8
        if (!found_numa_node) {
295
0
            LOG(WARNING) << "Could not determine NUMA node for core " << core
296
0
                         << " from /sys/devices/system/cpu/";
297
0
            core_to_numa_node_[core] = 0;
298
0
        }
299
8
    }
300
1
    _init_numa_node_to_cores();
301
1
}
302
303
void CpuInfo::_init_fake_numa_for_test(int max_num_numa_nodes,
304
0
                                       const std::vector<int>& core_to_numa_node) {
305
0
    DCHECK_EQ(max_num_cores_, core_to_numa_node.size());
306
0
    max_num_numa_nodes_ = max_num_numa_nodes;
307
0
    for (int i = 0; i < max_num_cores_; ++i) {
308
0
        core_to_numa_node_[i] = core_to_numa_node[i];
309
0
    }
310
0
    numa_node_to_cores_.clear();
311
0
    _init_numa_node_to_cores();
312
0
}
313
314
1
void CpuInfo::_init_numa_node_to_cores() {
315
1
    DCHECK(numa_node_to_cores_.empty());
316
1
    numa_node_to_cores_.resize(max_num_numa_nodes_);
317
1
    numa_node_core_idx_.resize(max_num_cores_);
318
9
    for (int core = 0; core < max_num_cores_; ++core) {
319
8
        std::vector<int>* cores_of_node = &numa_node_to_cores_[core_to_numa_node_[core]];
320
8
        numa_node_core_idx_[core] = cores_of_node->size();
321
8
        cores_of_node->push_back(core);
322
8
    }
323
1
}
324
325
0
void CpuInfo::verify_cpu_requirements() {
326
0
    if (!CpuInfo::is_supported(CpuInfo::SSSE3)) {
327
0
        LOG(ERROR) << "CPU does not support the Supplemental SSE3 (SSSE3) instruction set. "
328
0
                   << "This setup is generally unsupported and Impala might be unstable.";
329
0
    }
330
0
}
331
332
0
void CpuInfo::verify_performance_governor() {
333
0
    for (int cpu_id = 0; cpu_id < CpuInfo::num_cores(); ++cpu_id) {
334
0
        const string governor_file = strings::Substitute(
335
0
                "/sys/devices/system/cpu/cpu$0/cpufreq/scaling_governor", cpu_id);
336
0
        const string warning_text = strings::Substitute(
337
0
                "WARNING: CPU $0 is not using 'performance' governor. Note that changing the "
338
0
                "governor to 'performance' will reset the no_turbo setting to 0.",
339
0
                cpu_id);
340
0
        WarnIfFileNotEqual(governor_file, "performance", warning_text);
341
0
    }
342
0
}
343
344
0
void CpuInfo::verify_turbo_disabled() {
345
0
    WarnIfFileNotEqual(
346
0
            "/sys/devices/system/cpu/intel_pstate/no_turbo", "1",
347
0
            "WARNING: CPU turbo is enabled. This setting can change the clock frequency of CPU "
348
0
            "cores during the benchmark run, which can lead to inaccurate results. You can "
349
0
            "disable CPU turbo by writing a 1 to "
350
0
            "/sys/devices/system/cpu/intel_pstate/no_turbo. Note that changing the governor to "
351
0
            "'performance' will reset this to 0.");
352
0
}
353
354
0
void CpuInfo::enable_feature(long flag, bool enable) {
355
0
    DCHECK(initialized_);
356
0
    if (!enable) {
357
0
        hardware_flags_ &= ~flag;
358
0
    } else {
359
        // Can't turn something on that can't be supported
360
0
        DCHECK((original_hardware_flags_ & flag) != 0);
361
0
        hardware_flags_ |= flag;
362
0
    }
363
0
}
364
365
0
int CpuInfo::get_current_core() {
366
    // sched_getcpu() is not supported on some old kernels/glibcs (like the versions that
367
    // shipped with CentOS 5). In that case just pretend we're always running on CPU 0
368
    // so that we can build and run with degraded perf.
369
0
#ifdef HAVE_SCHED_GETCPU
370
0
    int cpu = sched_getcpu();
371
0
    if (cpu < 0) return 0;
372
0
    if (cpu >= max_num_cores_) {
373
0
        LOG_FIRST_N(WARNING, 5) << "sched_getcpu() return value " << cpu
374
0
                                << ", which is greater than get_nprocs_conf() retrun value "
375
0
                                << max_num_cores_ << ", now is " << get_nprocs_conf();
376
0
        cpu %= max_num_cores_;
377
0
    }
378
0
    return cpu;
379
#else
380
    return 0;
381
#endif
382
0
}
383
384
void CpuInfo::_get_cache_info(long cache_sizes[NUM_CACHE_LEVELS],
385
0
                              long cache_line_sizes[NUM_CACHE_LEVELS]) {
386
#ifdef __APPLE__
387
    // On Mac OS X use sysctl() to get the cache sizes
388
    size_t len = 0;
389
    sysctlbyname("hw.cachesize", nullptr, &len, nullptr, 0);
390
    uint64_t* data = static_cast<uint64_t*>(malloc(len));
391
    sysctlbyname("hw.cachesize", data, &len, nullptr, 0);
392
#ifndef __arm64__
393
    DCHECK(len / sizeof(uint64_t) >= 3);
394
    for (size_t i = 0; i < NUM_CACHE_LEVELS; ++i) {
395
        cache_sizes[i] = data[i];
396
    }
397
#else
398
    for (size_t i = 0; i < NUM_CACHE_LEVELS; ++i) {
399
        cache_sizes[i] = data[i + 1];
400
    }
401
#endif
402
    size_t linesize;
403
    size_t sizeof_linesize = sizeof(linesize);
404
    sysctlbyname("hw.cachelinesize", &linesize, &sizeof_linesize, nullptr, 0);
405
    for (size_t i = 0; i < NUM_CACHE_LEVELS; ++i) cache_line_sizes[i] = linesize;
406
#else
407
    // Call sysconf to query for the cache sizes
408
    // Note: on some systems (e.g. RHEL 5 on AWS EC2), this returns 0 instead of the
409
    // actual cache line size.
410
0
    cache_sizes[L1_CACHE] = sysconf(_SC_LEVEL1_DCACHE_SIZE);
411
0
    cache_sizes[L2_CACHE] = sysconf(_SC_LEVEL2_CACHE_SIZE);
412
0
    cache_sizes[L3_CACHE] = sysconf(_SC_LEVEL3_CACHE_SIZE);
413
414
0
    cache_line_sizes[L1_CACHE] = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
415
0
    cache_line_sizes[L2_CACHE] = sysconf(_SC_LEVEL2_CACHE_LINESIZE);
416
0
    cache_line_sizes[L3_CACHE] = sysconf(_SC_LEVEL3_CACHE_LINESIZE);
417
0
#endif
418
0
}
419
420
0
std::string CpuInfo::debug_string() {
421
0
    DCHECK(initialized_);
422
0
    std::stringstream stream;
423
0
    long cache_sizes[NUM_CACHE_LEVELS];
424
0
    long cache_line_sizes[NUM_CACHE_LEVELS];
425
0
    _get_cache_info(cache_sizes, cache_line_sizes);
426
427
0
    string L1 = strings::Substitute(
428
0
            "L1 Cache: $0 (Line: $1)",
429
0
            PrettyPrinter::print(static_cast<int64_t>(cache_sizes[L1_CACHE]), TUnit::BYTES),
430
0
            PrettyPrinter::print(static_cast<int64_t>(cache_line_sizes[L1_CACHE]), TUnit::BYTES));
431
0
    string L2 = strings::Substitute(
432
0
            "L2 Cache: $0 (Line: $1)",
433
0
            PrettyPrinter::print(static_cast<int64_t>(cache_sizes[L2_CACHE]), TUnit::BYTES),
434
0
            PrettyPrinter::print(static_cast<int64_t>(cache_line_sizes[L2_CACHE]), TUnit::BYTES));
435
0
    string L3 =
436
0
            cache_sizes[L3_CACHE]
437
0
                    ? strings::Substitute(
438
0
                              "L3 Cache: $0 (Line: $1)",
439
0
                              PrettyPrinter::print(static_cast<int64_t>(cache_sizes[L3_CACHE]),
440
0
                                                   TUnit::BYTES),
441
0
                              PrettyPrinter::print(static_cast<int64_t>(cache_line_sizes[L3_CACHE]),
442
0
                                                   TUnit::BYTES))
443
0
                    : "";
444
0
    stream << "Cpu Info:" << std::endl
445
0
           << "  Model: " << model_name_ << std::endl
446
0
           << "  Cores: " << num_cores_ << std::endl
447
0
           << "  Max Possible Cores: " << max_num_cores_ << std::endl
448
0
           << "  " << L1 << std::endl
449
0
           << "  " << L2 << std::endl
450
0
           << "  " << L3 << std::endl
451
0
           << "  Hardware Supports:" << std::endl;
452
0
    for (auto& flag_mapping : flag_mappings) {
453
0
        if (is_supported(flag_mapping.flag)) {
454
0
            stream << "    " << flag_mapping.name << std::endl;
455
0
        }
456
0
    }
457
0
    stream << "  Numa Nodes: " << max_num_numa_nodes_ << std::endl;
458
0
    stream << "  Numa Nodes of Cores:";
459
0
    for (int core = 0; core < max_num_cores_; ++core) {
460
0
        stream << " " << core << "->" << core_to_numa_node_[core] << " |";
461
0
    }
462
0
    stream << std::endl;
463
0
    return stream.str();
464
0
}
465
466
} // namespace doris