/root/doris/be/src/util/cpu_info.cpp

Source (jump to first uncovered line)
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/cpu-info.cpp
// and modified by Doris

#include "util/cpu_info.h"

#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
#elif defined(__GNUC__) && defined(__ARM_NEON__)
/* GCC-compatible compiler, targeting ARM with NEON */
#include <arm_neon.h>
#elif defined(__GNUC__) && defined(__IWMMXT__)
/* GCC-compatible compiler, targeting ARM with WMMX */
#include <mmintrin.h>
#elif (defined(__GNUC__) || defined(__xlC__)) && (defined(__VEC__) || defined(__ALTIVEC__))
/* XLC or GCC-compatible compiler, targeting PowerPC with VMX/VSX */
#include <altivec.h>
#elif defined(__GNUC__) && defined(__SPE__)
/* GCC-compatible compiler, targeting PowerPC with SPE */
#include <spe.h>
#endif

#ifndef __APPLE__
#include <sys/sysinfo.h>
#else
#include <sys/sysctl.h>
#endif

#include <gen_cpp/Metrics_types.h>
#include <sched.h>
#include <stdlib.h>
#include <unistd.h>

#include <algorithm>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/algorithm/string/trim.hpp>
// IWYU pragma: no_include <bits/chrono.h>
#include <chrono> // IWYU pragma: keep
#include <filesystem>
#include <fstream>

#include "common/config.h"
#include "common/env_config.h"
#include "gflags/gflags.h"
#include "gutil/stringprintf.h"
#include "gutil/strings/substitute.h"
#include "util/pretty_printer.h"

using boost::algorithm::contains;
using boost::algorithm::trim;
namespace fs = std::filesystem;
using std::max;

DECLARE_bool(abort_on_config_error);
DEFINE_int32(num_cores, 0,
             "(Advanced) If > 0, it sets the number of cores available to"
             " Impala. Setting it to 0 means Impala will use all available cores on the machine"
             " according to /proc/cpuinfo.");

namespace doris {
// Helper function to warn if a given file does not contain an expected string as its
// first line. If the file cannot be opened, no error is reported.
void WarnIfFileNotEqual(const string& filename, const string& expected,
                        const string& warning_text) {
    std::ifstream file(filename);
    if (!file) return;
    string line;
    getline(file, line);
    if (line != expected) {
        LOG(ERROR) << "Expected " << expected << ", actual " << line << std::endl << warning_text;
    }
}
} // namespace doris

namespace doris {

bool CpuInfo::initialized_ = false;
int64_t CpuInfo::hardware_flags_ = 0;
int64_t CpuInfo::original_hardware_flags_;
int64_t CpuInfo::cycles_per_ms_;
int CpuInfo::num_cores_ = 1;
int CpuInfo::max_num_cores_ = 1;
std::string CpuInfo::model_name_ = "unknown";
int CpuInfo::max_num_numa_nodes_;
std::unique_ptr<int[]> CpuInfo::core_to_numa_node_;
std::vector<vector<int>> CpuInfo::numa_node_to_cores_;
std::vector<int> CpuInfo::numa_node_core_idx_;

static struct {
    string name;
    int64_t flag;
} flag_mappings[] = {
        {"ssse3", CpuInfo::SSSE3},   {"sse4_1", CpuInfo::SSE4_1}, {"sse4_2", CpuInfo::SSE4_2},
        {"popcnt", CpuInfo::POPCNT}, {"avx", CpuInfo::AVX},       {"avx2", CpuInfo::AVX2},
};

int cgroup_bandwidth_quota(int physical_cores) {
    namespace fs = std::filesystem;
    fs::path cpu_max = "/sys/fs/cgroup/cpu.max";
    fs::path cfs_quota = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us";
    fs::path cfs_period = "/sys/fs/cgroup/cpu/cpu.cfs_period_us";

    int64_t quota, period;
    char byte_buffer[1000];
    int64_t read_bytes;

    if (fs::exists(cpu_max)) {
        // cgroup v2
        // https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
        std::ifstream file(cpu_max);
        file.read(byte_buffer, 999);
        read_bytes = file.gcount();
        byte_buffer[read_bytes] = '\0';
        if (sscanf(byte_buffer, "%" SCNd64 " %" SCNd64 "", &quota, &period) != 2) {
            return physical_cores;
        }
    } else if (fs::exists(cfs_quota) && fs::exists(cfs_period)) {
        // cgroup v1
        // https://www.kernel.org/doc/html/latest/scheduler/sched-bwc.html#management

        // Read the quota, this indicates how many microseconds the CPU can be utilized by this cgroup per period
        std::ifstream quota_file(cfs_quota);
        quota_file.read(byte_buffer, 999);
        read_bytes = quota_file.gcount();
        byte_buffer[read_bytes] = '\0';
        if (sscanf(byte_buffer, "%" SCNd64 "", &quota) != 1) {
            return physical_cores;
        }

        // Read the time period, a cgroup can utilize the CPU up to quota microseconds every period
        std::ifstream period_file(cfs_period);
        period_file.read(byte_buffer, 999);
        read_bytes = period_file.gcount();
        byte_buffer[read_bytes] = '\0';
        if (sscanf(byte_buffer, "%" SCNd64 "", &period) != 1) {
            return physical_cores;
        }
    } else {
        // No cgroup quota
        return physical_cores;
    }
    if (quota > 0 && period > 0) {
        return int64_t(ceil(double(quota) / double(period)));
    } else {
        return physical_cores;
    }
}

// Helper function to parse for hardware flags.
// values contains a list of space-separated flags.  check to see if the flags we
// care about are present.
// Returns a bitmap of flags.
int64_t ParseCPUFlags(const string& values) {
    int64_t flags = 0;
    for (auto& flag_mapping : flag_mappings) {
        if (contains(values, flag_mapping.name)) {
            flags |= flag_mapping.flag;
        }
    }
    return flags;
}

void CpuInfo::init() {
    if (initialized_) return;
    string line;
    string name;
    string value;

    float max_mhz = 0;
    int physical_num_cores = 0;

    // maybe use std::thread::hardware_concurrency()?
    // Read from /proc/cpuinfo
    std::ifstream cpuinfo("/proc/cpuinfo");
    while (cpuinfo) {
        getline(cpuinfo, line);
        size_t colon = line.find(':');
        if (colon != string::npos) {
            name = line.substr(0, colon - 1);
            value = line.substr(colon + 1, string::npos);
            trim(name);
            trim(value);
            if (name == "flags") {
                hardware_flags_ |= ParseCPUFlags(value);
            } else if (name == "cpu MHz") {
                // Every core will report a different speed.  We'll take the max, assuming
                // that when impala is running, the core will not be in a lower power state.
                // TODO: is there a more robust way to do this, such as
                // Window's QueryPerformanceFrequency()
                float mhz = atof(value.c_str());
                max_mhz = max(mhz, max_mhz);
            } else if (name == "processor") {
                ++physical_num_cores;
            } else if (name == "model name") {
                model_name_ = value;
            }
        }
    }

    int num_cores = cgroup_bandwidth_quota(physical_num_cores);
    if (max_mhz != 0) {
        cycles_per_ms_ = int64_t(max_mhz) * 1000;
    } else {
        cycles_per_ms_ = 1000000;
    }
    original_hardware_flags_ = hardware_flags_;

    if (num_cores > 0) {
        num_cores_ = num_cores;
    } else {
        num_cores_ = 1;
    }
    if (config::num_cores > 0) {
        num_cores_ = config::num_cores;
    }

#ifdef __APPLE__
    size_t len = sizeof(max_num_cores_);
    sysctlbyname("hw.logicalcpu", &max_num_cores_, &len, nullptr, 0);
#else
    max_num_cores_ = get_nprocs_conf();
#endif

    // Print a warning if something is wrong with sched_getcpu().
#ifdef HAVE_SCHED_GETCPU
    if (sched_getcpu() == -1) {
        LOG(WARNING) << "Kernel does not support sched_getcpu(). Performance may be impacted.";
    }
#else
    LOG(WARNING) << "Built on a system without sched_getcpu() support. Performance may"
                 << " be impacted.";
#endif

    _init_numa();
    initialized_ = true;
}

void CpuInfo::_init_numa() {
    // Use the NUMA info in the /sys filesystem. which is part of the Linux ABI:
    // see https://www.kernel.org/doc/Documentation/ABI/stable/sysfs-devices-node and
    // https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-system-cpu
    // The filesystem entries are only present if the kernel was compiled with NUMA support.
    core_to_numa_node_.reset(new int[max_num_cores_]);

    if (!fs::is_directory("/sys/devices/system/node")) {
        LOG(WARNING) << "/sys/devices/system/node is not present - no NUMA support";
        // Assume a single NUMA node.
        max_num_numa_nodes_ = 1;
        std::fill_n(core_to_numa_node_.get(), max_num_cores_, 0);
        _init_numa_node_to_cores();
        return;
    }

    // Search for node subdirectories - node0, node1, node2, etc to determine possible
    // NUMA nodes.
    fs::directory_iterator dir_it("/sys/devices/system/node");
    max_num_numa_nodes_ = 0;
    for (; dir_it != fs::directory_iterator(); ++dir_it) {
        const string filename = dir_it->path().filename().string();
        if (filename.find("node") == 0) ++max_num_numa_nodes_;
    }
    if (max_num_numa_nodes_ == 0) {
        LOG(WARNING) << "Could not find nodes in /sys/devices/system/node";
        max_num_numa_nodes_ = 1;
    }

    // Check which NUMA node each core belongs to based on the existence of a symlink
    // to the node subdirectory.
    for (int core = 0; core < max_num_cores_; ++core) {
        bool found_numa_node = false;
        for (int node = 0; node < max_num_numa_nodes_; ++node) {
            if (fs::exists(
                        strings::Substitute("/sys/devices/system/cpu/cpu$0/node$1", core, node))) {
                core_to_numa_node_[core] = node;
                found_numa_node = true;
                break;
            }
        }
        if (!found_numa_node) {
            LOG(WARNING) << "Could not determine NUMA node for core " << core
                         << " from /sys/devices/system/cpu/";
            core_to_numa_node_[core] = 0;
        }
    }
    _init_numa_node_to_cores();
}

void CpuInfo::_init_fake_numa_for_test(int max_num_numa_nodes,
                                       const std::vector<int>& core_to_numa_node) {
    DCHECK_EQ(max_num_cores_, core_to_numa_node.size());
    max_num_numa_nodes_ = max_num_numa_nodes;
    for (int i = 0; i < max_num_cores_; ++i) {
        core_to_numa_node_[i] = core_to_numa_node[i];
    }
    numa_node_to_cores_.clear();
    _init_numa_node_to_cores();
}

void CpuInfo::_init_numa_node_to_cores() {
    DCHECK(numa_node_to_cores_.empty());
    numa_node_to_cores_.resize(max_num_numa_nodes_);
    numa_node_core_idx_.resize(max_num_cores_);
    for (int core = 0; core < max_num_cores_; ++core) {
        std::vector<int>* cores_of_node = &numa_node_to_cores_[core_to_numa_node_[core]];
        numa_node_core_idx_[core] = cores_of_node->size();
        cores_of_node->push_back(core);
    }
}

void CpuInfo::verify_cpu_requirements() {
    if (!CpuInfo::is_supported(CpuInfo::SSSE3)) {
        LOG(ERROR) << "CPU does not support the Supplemental SSE3 (SSSE3) instruction set. "
                   << "This setup is generally unsupported and Impala might be unstable.";
    }
}

void CpuInfo::verify_performance_governor() {
    for (int cpu_id = 0; cpu_id < CpuInfo::num_cores(); ++cpu_id) {
        const string governor_file = strings::Substitute(
                "/sys/devices/system/cpu/cpu$0/cpufreq/scaling_governor", cpu_id);
        const string warning_text = strings::Substitute(
                "WARNING: CPU $0 is not using 'performance' governor. Note that changing the "
                "governor to 'performance' will reset the no_turbo setting to 0.",
                cpu_id);
        WarnIfFileNotEqual(governor_file, "performance", warning_text);
    }
}

void CpuInfo::verify_turbo_disabled() {
    WarnIfFileNotEqual(
            "/sys/devices/system/cpu/intel_pstate/no_turbo", "1",
            "WARNING: CPU turbo is enabled. This setting can change the clock frequency of CPU "
            "cores during the benchmark run, which can lead to inaccurate results. You can "
            "disable CPU turbo by writing a 1 to "
            "/sys/devices/system/cpu/intel_pstate/no_turbo. Note that changing the governor to "
            "'performance' will reset this to 0.");
}

void CpuInfo::enable_feature(long flag, bool enable) {
    DCHECK(initialized_);
    if (!enable) {
        hardware_flags_ &= ~flag;
    } else {
        // Can't turn something on that can't be supported
        DCHECK((original_hardware_flags_ & flag) != 0);
        hardware_flags_ |= flag;
    }
}

int CpuInfo::get_current_core() {
    // sched_getcpu() is not supported on some old kernels/glibcs (like the versions that
    // shipped with CentOS 5). In that case just pretend we're always running on CPU 0
    // so that we can build and run with degraded perf.
#ifdef HAVE_SCHED_GETCPU
    int cpu = sched_getcpu();
    if (cpu < 0) return 0;
    if (cpu >= max_num_cores_) {
        LOG_FIRST_N(WARNING, 5) << "sched_getcpu() return value " << cpu
                                << ", which is greater than get_nprocs_conf() retrun value "
                                << max_num_cores_ << ", now is " << get_nprocs_conf();
        cpu %= max_num_cores_;
    }
    return cpu;
#else
    return 0;
#endif
}

void CpuInfo::_get_cache_info(long cache_sizes[NUM_CACHE_LEVELS],
                              long cache_line_sizes[NUM_CACHE_LEVELS]) {
#ifdef __APPLE__
    // On Mac OS X use sysctl() to get the cache sizes
    size_t len = 0;
    sysctlbyname("hw.cachesize", nullptr, &len, nullptr, 0);
    uint64_t* data = static_cast<uint64_t*>(malloc(len));
    sysctlbyname("hw.cachesize", data, &len, nullptr, 0);
#ifndef __arm64__
    DCHECK(len / sizeof(uint64_t) >= 3);
    for (size_t i = 0; i < NUM_CACHE_LEVELS; ++i) {
        cache_sizes[i] = data[i];
    }
#else
    for (size_t i = 0; i < NUM_CACHE_LEVELS; ++i) {
        cache_sizes[i] = data[i + 1];
    }
#endif
    size_t linesize;
    size_t sizeof_linesize = sizeof(linesize);
    sysctlbyname("hw.cachelinesize", &linesize, &sizeof_linesize, nullptr, 0);
    for (size_t i = 0; i < NUM_CACHE_LEVELS; ++i) cache_line_sizes[i] = linesize;
#else
    // Call sysconf to query for the cache sizes
    // Note: on some systems (e.g. RHEL 5 on AWS EC2), this returns 0 instead of the
    // actual cache line size.
    cache_sizes[L1_CACHE] = sysconf(_SC_LEVEL1_DCACHE_SIZE);
    cache_sizes[L2_CACHE] = sysconf(_SC_LEVEL2_CACHE_SIZE);
    cache_sizes[L3_CACHE] = sysconf(_SC_LEVEL3_CACHE_SIZE);

    cache_line_sizes[L1_CACHE] = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
    cache_line_sizes[L2_CACHE] = sysconf(_SC_LEVEL2_CACHE_LINESIZE);
    cache_line_sizes[L3_CACHE] = sysconf(_SC_LEVEL3_CACHE_LINESIZE);
#endif
}

std::string CpuInfo::debug_string() {
    DCHECK(initialized_);
    std::stringstream stream;
    long cache_sizes[NUM_CACHE_LEVELS];
    long cache_line_sizes[NUM_CACHE_LEVELS];
    _get_cache_info(cache_sizes, cache_line_sizes);

    string L1 = strings::Substitute(
            "L1 Cache: $0 (Line: $1)",
            PrettyPrinter::print(static_cast<int64_t>(cache_sizes[L1_CACHE]), TUnit::BYTES),
            PrettyPrinter::print(static_cast<int64_t>(cache_line_sizes[L1_CACHE]), TUnit::BYTES));
    string L2 = strings::Substitute(
            "L2 Cache: $0 (Line: $1)",
            PrettyPrinter::print(static_cast<int64_t>(cache_sizes[L2_CACHE]), TUnit::BYTES),
            PrettyPrinter::print(static_cast<int64_t>(cache_line_sizes[L2_CACHE]), TUnit::BYTES));
    string L3 =
            cache_sizes[L3_CACHE]
                    ? strings::Substitute(
                              "L3 Cache: $0 (Line: $1)",
                              PrettyPrinter::print(static_cast<int64_t>(cache_sizes[L3_CACHE]),
                                                   TUnit::BYTES),
                              PrettyPrinter::print(static_cast<int64_t>(cache_line_sizes[L3_CACHE]),
                                                   TUnit::BYTES))
                    : "";
    stream << "Cpu Info:" << std::endl
           << "  Model: " << model_name_ << std::endl
           << "  Cores: " << num_cores_ << std::endl
           << "  Max Possible Cores: " << max_num_cores_ << std::endl
           << "  " << L1 << std::endl
           << "  " << L2 << std::endl
           << "  " << L3 << std::endl
           << "  Hardware Supports:" << std::endl;
    for (auto& flag_mapping : flag_mappings) {
        if (is_supported(flag_mapping.flag)) {
            stream << "    " << flag_mapping.name << std::endl;
        }
    }
    stream << "  Numa Nodes: " << max_num_numa_nodes_ << std::endl;
    stream << "  Numa Nodes of Cores:";
    for (int core = 0; core < max_num_cores_; ++core) {
        stream << " " << core << "->" << core_to_numa_node_[core] << " |";
    }
    stream << std::endl;
    return stream.str();
}

} // namespace doris

Coverage Report

Created: 2024-11-18 11:49

Line	Count	Source (jump to first uncovered line)
1		// Licensed to the Apache Software Foundation (ASF) under one
2		// or more contributor license agreements. See the NOTICE file
3		// distributed with this work for additional information
4		// regarding copyright ownership. The ASF licenses this file
5		// to you under the Apache License, Version 2.0 (the
6		// "License"); you may not use this file except in compliance
7		// with the License. You may obtain a copy of the License at
8		//
9		// http://www.apache.org/licenses/LICENSE-2.0
10		//
11		// Unless required by applicable law or agreed to in writing,
12		// software distributed under the License is distributed on an
13		// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14		// KIND, either express or implied. See the License for the
15		// specific language governing permissions and limitations
16		// under the License.
17		// This file is copied from
18		// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/cpu-info.cpp
19		// and modified by Doris
20
21		#include "util/cpu_info.h"
22
23		#if defined(__GNUC__) && (defined(__x86_64__) \|\| defined(__i386__))
24		#elif defined(__GNUC__) && defined(__ARM_NEON__)
25		/* GCC-compatible compiler, targeting ARM with NEON */
26		#include <arm_neon.h>
27		#elif defined(__GNUC__) && defined(__IWMMXT__)
28		/* GCC-compatible compiler, targeting ARM with WMMX */
29		#include <mmintrin.h>
30		#elif (defined(__GNUC__) \|\| defined(__xlC__)) && (defined(__VEC__) \|\| defined(__ALTIVEC__))
31		/* XLC or GCC-compatible compiler, targeting PowerPC with VMX/VSX */
32		#include <altivec.h>
33		#elif defined(__GNUC__) && defined(__SPE__)
34		/* GCC-compatible compiler, targeting PowerPC with SPE */
35		#include <spe.h>
36		#endif
37
38		#ifndef __APPLE__
39		#include <sys/sysinfo.h>
40		#else
41		#include <sys/sysctl.h>
42		#endif
43
44		#include <gen_cpp/Metrics_types.h>
45		#include <sched.h>
46		#include <stdlib.h>
47		#include <unistd.h>
48
49		#include <algorithm>
50		#include <boost/algorithm/string/predicate.hpp>
51		#include <boost/algorithm/string/trim.hpp>
52		// IWYU pragma: no_include <bits/chrono.h>
53		#include <chrono> // IWYU pragma: keep
54		#include <filesystem>
55		#include <fstream>
56
57		#include "common/config.h"
58		#include "common/env_config.h"
59		#include "gflags/gflags.h"
60		#include "gutil/stringprintf.h"
61		#include "gutil/strings/substitute.h"
62		#include "util/pretty_printer.h"
63
64		using boost::algorithm::contains;
65		using boost::algorithm::trim;
66		namespace fs = std::filesystem;
67		using std::max;
68
69		DECLARE_bool(abort_on_config_error);
70		DEFINE_int32(num_cores, 0,
71		"(Advanced) If > 0, it sets the number of cores available to"
72		" Impala. Setting it to 0 means Impala will use all available cores on the machine"
73		" according to /proc/cpuinfo.");
74
75		namespace doris {
76		// Helper function to warn if a given file does not contain an expected string as its
77		// first line. If the file cannot be opened, no error is reported.
78		void WarnIfFileNotEqual(const string& filename, const string& expected,
79	0	const string& warning_text) {
80	0	std::ifstream file(filename);
81	0	if (!file) return;
82	0	string line;
83	0	getline(file, line);
84	0	if (line != expected) {
85	0	LOG(ERROR) << "Expected " << expected << ", actual " << line << std::endl << warning_text;
86	0	}
87	0	}
88		} // namespace doris
89
90		namespace doris {
91
92		bool CpuInfo::initialized_ = false;
93		int64_t CpuInfo::hardware_flags_ = 0;
94		int64_t CpuInfo::original_hardware_flags_;
95		int64_t CpuInfo::cycles_per_ms_;
96		int CpuInfo::num_cores_ = 1;
97		int CpuInfo::max_num_cores_ = 1;
98		std::string CpuInfo::model_name_ = "unknown";
99		int CpuInfo::max_num_numa_nodes_;
100		std::unique_ptr<int[]> CpuInfo::core_to_numa_node_;
101		std::vector<vector<int>> CpuInfo::numa_node_to_cores_;
102		std::vector<int> CpuInfo::numa_node_core_idx_;
103
104		static struct {
105		string name;
106		int64_t flag;
107		} flag_mappings[] = {
108		{"ssse3", CpuInfo::SSSE3}, {"sse4_1", CpuInfo::SSE4_1}, {"sse4_2", CpuInfo::SSE4_2},
109		{"popcnt", CpuInfo::POPCNT}, {"avx", CpuInfo::AVX}, {"avx2", CpuInfo::AVX2},
110		};
111
112	1	int cgroup_bandwidth_quota(int physical_cores) {
113	1	namespace fs = std::filesystem;
114	1	fs::path cpu_max = "/sys/fs/cgroup/cpu.max";
115	1	fs::path cfs_quota = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us";
116	1	fs::path cfs_period = "/sys/fs/cgroup/cpu/cpu.cfs_period_us";
117
118	1	int64_t quota, period;
119	1	char byte_buffer[1000];
120	1	int64_t read_bytes;
121
122	1	if (fs::exists(cpu_max)) {
123		// cgroup v2
124		// https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
125	0	std::ifstream file(cpu_max);
126	0	file.read(byte_buffer, 999);
127	0	read_bytes = file.gcount();
128	0	byte_buffer[read_bytes] = '\0';
129	0	if (sscanf(byte_buffer, "%" SCNd64 " %" SCNd64 "", &quota, &period) != 2) {
130	0	return physical_cores;
131	0	}
132	1	} else if (fs::exists(cfs_quota) && fs::exists(cfs_period)) {
133		// cgroup v1
134		// https://www.kernel.org/doc/html/latest/scheduler/sched-bwc.html#management
135
136		// Read the quota, this indicates how many microseconds the CPU can be utilized by this cgroup per period
137	1	std::ifstream quota_file(cfs_quota);
138	1	quota_file.read(byte_buffer, 999);
139	1	read_bytes = quota_file.gcount();
140	1	byte_buffer[read_bytes] = '\0';
141	1	if (sscanf(byte_buffer, "%" SCNd64 "", &quota) != 1) {
142	0	return physical_cores;
143	0	}
144
145		// Read the time period, a cgroup can utilize the CPU up to quota microseconds every period
146	1	std::ifstream period_file(cfs_period);
147	1	period_file.read(byte_buffer, 999);
148	1	read_bytes = period_file.gcount();
149	1	byte_buffer[read_bytes] = '\0';
150	1	if (sscanf(byte_buffer, "%" SCNd64 "", &period) != 1) {
151	0	return physical_cores;
152	0	}
153	1	} else {
154		// No cgroup quota
155	0	return physical_cores;
156	0	}
157	1	if (quota > 0 && period > 0) {
158	0	return int64_t(ceil(double(quota) / double(period)));
159	1	} else {
160	1	return physical_cores;
161	1	}
162	1	}
163
164		// Helper function to parse for hardware flags.
165		// values contains a list of space-separated flags. check to see if the flags we
166		// care about are present.
167		// Returns a bitmap of flags.
168	8	int64_t ParseCPUFlags(const string& values) {
169	8	int64_t flags = 0;
170	48	for (auto& flag_mapping : flag_mappings) {
171	48	if (contains(values, flag_mapping.name)) {
172	48	flags \|= flag_mapping.flag;
173	48	}
174	48	}
175	8	return flags;
176	8	}
177
178	10	void CpuInfo::init() {
179	10	if (initialized_) return;
180	1	string line;
181	1	string name;
182	1	string value;
183
184	1	float max_mhz = 0;
185	1	int physical_num_cores = 0;
186
187		// maybe use std::thread::hardware_concurrency()?
188		// Read from /proc/cpuinfo
189	1	std::ifstream cpuinfo("/proc/cpuinfo");
190	218	while (cpuinfo) {
191	217	getline(cpuinfo, line);
192	217	size_t colon = line.find(':');
193	217	if (colon != string::npos) {
194	208	name = line.substr(0, colon - 1);
195	208	value = line.substr(colon + 1, string::npos);
196	208	trim(name);
197	208	trim(value);
198	208	if (name == "flags") {
199	8	hardware_flags_ \|= ParseCPUFlags(value);
200	200	} else if (name == "cpu MHz") {
201		// Every core will report a different speed. We'll take the max, assuming
202		// that when impala is running, the core will not be in a lower power state.
203		// TODO: is there a more robust way to do this, such as
204		// Window's QueryPerformanceFrequency()
205	8	float mhz = atof(value.c_str());
206	8	max_mhz = max(mhz, max_mhz);
207	192	} else if (name == "processor") {
208	8	++physical_num_cores;
209	184	} else if (name == "model name") {
210	8	model_name_ = value;
211	8	}
212	208	}
213	217	}
214
215	1	int num_cores = cgroup_bandwidth_quota(physical_num_cores);
216	1	if (max_mhz != 0) {
217	1	cycles_per_ms_ = int64_t(max_mhz) * 1000;
218	1	} else {
219	0	cycles_per_ms_ = 1000000;
220	0	}
221	1	original_hardware_flags_ = hardware_flags_;
222
223	1	if (num_cores > 0) {
224	1	num_cores_ = num_cores;
225	1	} else {
226	0	num_cores_ = 1;
227	0	}
228	1	if (config::num_cores > 0) {
229	0	num_cores_ = config::num_cores;
230	0	}
231
232		#ifdef __APPLE__
233		size_t len = sizeof(max_num_cores_);
234		sysctlbyname("hw.logicalcpu", &max_num_cores_, &len, nullptr, 0);
235		#else
236	1	max_num_cores_ = get_nprocs_conf();
237	1	#endif
238
239		// Print a warning if something is wrong with sched_getcpu().
240	1	#ifdef HAVE_SCHED_GETCPU
241	1	if (sched_getcpu() == -1) {
242	0	LOG(WARNING) << "Kernel does not support sched_getcpu(). Performance may be impacted.";
243	0	}
244		#else
245		LOG(WARNING) << "Built on a system without sched_getcpu() support. Performance may"
246		<< " be impacted.";
247		#endif
248
249	1	_init_numa();
250	1	initialized_ = true;
251	1	}
252
253	1	void CpuInfo::_init_numa() {
254		// Use the NUMA info in the /sys filesystem. which is part of the Linux ABI:
255		// see https://www.kernel.org/doc/Documentation/ABI/stable/sysfs-devices-node and
256		// https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-system-cpu
257		// The filesystem entries are only present if the kernel was compiled with NUMA support.
258	1	core_to_numa_node_.reset(new int[max_num_cores_]);
259
260	1	if (!fs::is_directory("/sys/devices/system/node")) {
261	0	LOG(WARNING) << "/sys/devices/system/node is not present - no NUMA support";
262		// Assume a single NUMA node.
263	0	max_num_numa_nodes_ = 1;
264	0	std::fill_n(core_to_numa_node_.get(), max_num_cores_, 0);
265	0	_init_numa_node_to_cores();
266	0	return;
267	0	}
268
269		// Search for node subdirectories - node0, node1, node2, etc to determine possible
270		// NUMA nodes.
271	1	fs::directory_iterator dir_it("/sys/devices/system/node");
272	1	max_num_numa_nodes_ = 0;
273	9	for (; dir_it != fs::directory_iterator(); ++dir_it) {
274	8	const string filename = dir_it->path().filename().string();
275	8	if (filename.find("node") == 0) ++max_num_numa_nodes_;
276	8	}
277	1	if (max_num_numa_nodes_ == 0) {
278	0	LOG(WARNING) << "Could not find nodes in /sys/devices/system/node";
279	0	max_num_numa_nodes_ = 1;
280	0	}
281
282		// Check which NUMA node each core belongs to based on the existence of a symlink
283		// to the node subdirectory.
284	9	for (int core = 0; core < max_num_cores_; ++core) {
285	8	bool found_numa_node = false;
286	8	for (int node = 0; node < max_num_numa_nodes_; ++node) {
287	8	if (fs::exists(
288	8	strings::Substitute("/sys/devices/system/cpu/cpu$0/node$1", core, node))) {
289	8	core_to_numa_node_[core] = node;
290	8	found_numa_node = true;
291	8	break;
292	8	}
293	8	}
294	8	if (!found_numa_node) {
295	0	LOG(WARNING) << "Could not determine NUMA node for core " << core
296	0	<< " from /sys/devices/system/cpu/";
297	0	core_to_numa_node_[core] = 0;
298	0	}
299	8	}
300	1	_init_numa_node_to_cores();
301	1	}
302
303		void CpuInfo::_init_fake_numa_for_test(int max_num_numa_nodes,
304	0	const std::vector<int>& core_to_numa_node) {
305	0	DCHECK_EQ(max_num_cores_, core_to_numa_node.size());
306	0	max_num_numa_nodes_ = max_num_numa_nodes;
307	0	for (int i = 0; i < max_num_cores_; ++i) {
308	0	core_to_numa_node_[i] = core_to_numa_node[i];
309	0	}
310	0	numa_node_to_cores_.clear();
311	0	_init_numa_node_to_cores();
312	0	}
313
314	1	void CpuInfo::_init_numa_node_to_cores() {
315	1	DCHECK(numa_node_to_cores_.empty());
316	1	numa_node_to_cores_.resize(max_num_numa_nodes_);
317	1	numa_node_core_idx_.resize(max_num_cores_);
318	9	for (int core = 0; core < max_num_cores_; ++core) {
319	8	std::vector<int>* cores_of_node = &numa_node_to_cores_[core_to_numa_node_[core]];
320	8	numa_node_core_idx_[core] = cores_of_node->size();
321	8	cores_of_node->push_back(core);
322	8	}
323	1	}
324
325	0	void CpuInfo::verify_cpu_requirements() {
326	0	if (!CpuInfo::is_supported(CpuInfo::SSSE3)) {
327	0	LOG(ERROR) << "CPU does not support the Supplemental SSE3 (SSSE3) instruction set. "
328	0	<< "This setup is generally unsupported and Impala might be unstable.";
329	0	}
330	0	}
331
332	0	void CpuInfo::verify_performance_governor() {
333	0	for (int cpu_id = 0; cpu_id < CpuInfo::num_cores(); ++cpu_id) {
334	0	const string governor_file = strings::Substitute(
335	0	"/sys/devices/system/cpu/cpu$0/cpufreq/scaling_governor", cpu_id);
336	0	const string warning_text = strings::Substitute(
337	0	"WARNING: CPU $0 is not using 'performance' governor. Note that changing the "
338	0	"governor to 'performance' will reset the no_turbo setting to 0.",
339	0	cpu_id);
340	0	WarnIfFileNotEqual(governor_file, "performance", warning_text);
341	0	}
342	0	}
343
344	0	void CpuInfo::verify_turbo_disabled() {
345	0	WarnIfFileNotEqual(
346	0	"/sys/devices/system/cpu/intel_pstate/no_turbo", "1",
347	0	"WARNING: CPU turbo is enabled. This setting can change the clock frequency of CPU "
348	0	"cores during the benchmark run, which can lead to inaccurate results. You can "
349	0	"disable CPU turbo by writing a 1 to "
350	0	"/sys/devices/system/cpu/intel_pstate/no_turbo. Note that changing the governor to "
351	0	"'performance' will reset this to 0.");
352	0	}
353
354	0	void CpuInfo::enable_feature(long flag, bool enable) {
355	0	DCHECK(initialized_);
356	0	if (!enable) {
357	0	hardware_flags_ &= ~flag;
358	0	} else {
359		// Can't turn something on that can't be supported
360	0	DCHECK((original_hardware_flags_ & flag) != 0);
361	0	hardware_flags_ \|= flag;
362	0	}
363	0	}
364
365	0	int CpuInfo::get_current_core() {
366		// sched_getcpu() is not supported on some old kernels/glibcs (like the versions that
367		// shipped with CentOS 5). In that case just pretend we're always running on CPU 0
368		// so that we can build and run with degraded perf.
369	0	#ifdef HAVE_SCHED_GETCPU
370	0	int cpu = sched_getcpu();
371	0	if (cpu < 0) return 0;
372	0	if (cpu >= max_num_cores_) {
373	0	LOG_FIRST_N(WARNING, 5) << "sched_getcpu() return value " << cpu
374	0	<< ", which is greater than get_nprocs_conf() retrun value "
375	0	<< max_num_cores_ << ", now is " << get_nprocs_conf();
376	0	cpu %= max_num_cores_;
377	0	}
378	0	return cpu;
379		#else
380		return 0;
381		#endif
382	0	}
383
384		void CpuInfo::_get_cache_info(long cache_sizes[NUM_CACHE_LEVELS],
385	0	long cache_line_sizes[NUM_CACHE_LEVELS]) {
386		#ifdef __APPLE__
387		// On Mac OS X use sysctl() to get the cache sizes
388		size_t len = 0;
389		sysctlbyname("hw.cachesize", nullptr, &len, nullptr, 0);
390		uint64_t* data = static_cast<uint64_t*>(malloc(len));
391		sysctlbyname("hw.cachesize", data, &len, nullptr, 0);
392		#ifndef __arm64__
393		DCHECK(len / sizeof(uint64_t) >= 3);
394		for (size_t i = 0; i < NUM_CACHE_LEVELS; ++i) {
395		cache_sizes[i] = data[i];
396		}
397		#else
398		for (size_t i = 0; i < NUM_CACHE_LEVELS; ++i) {
399		cache_sizes[i] = data[i + 1];
400		}
401		#endif
402		size_t linesize;
403		size_t sizeof_linesize = sizeof(linesize);
404		sysctlbyname("hw.cachelinesize", &linesize, &sizeof_linesize, nullptr, 0);
405		for (size_t i = 0; i < NUM_CACHE_LEVELS; ++i) cache_line_sizes[i] = linesize;
406		#else
407		// Call sysconf to query for the cache sizes
408		// Note: on some systems (e.g. RHEL 5 on AWS EC2), this returns 0 instead of the
409		// actual cache line size.
410	0	cache_sizes[L1_CACHE] = sysconf(_SC_LEVEL1_DCACHE_SIZE);
411	0	cache_sizes[L2_CACHE] = sysconf(_SC_LEVEL2_CACHE_SIZE);
412	0	cache_sizes[L3_CACHE] = sysconf(_SC_LEVEL3_CACHE_SIZE);
413
414	0	cache_line_sizes[L1_CACHE] = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
415	0	cache_line_sizes[L2_CACHE] = sysconf(_SC_LEVEL2_CACHE_LINESIZE);
416	0	cache_line_sizes[L3_CACHE] = sysconf(_SC_LEVEL3_CACHE_LINESIZE);
417	0	#endif
418	0	}
419
420	0	std::string CpuInfo::debug_string() {
421	0	DCHECK(initialized_);
422	0	std::stringstream stream;
423	0	long cache_sizes[NUM_CACHE_LEVELS];
424	0	long cache_line_sizes[NUM_CACHE_LEVELS];
425	0	_get_cache_info(cache_sizes, cache_line_sizes);
426
427	0	string L1 = strings::Substitute(
428	0	"L1 Cache: $0 (Line: $1)",
429	0	PrettyPrinter::print(static_cast<int64_t>(cache_sizes[L1_CACHE]), TUnit::BYTES),
430	0	PrettyPrinter::print(static_cast<int64_t>(cache_line_sizes[L1_CACHE]), TUnit::BYTES));
431	0	string L2 = strings::Substitute(
432	0	"L2 Cache: $0 (Line: $1)",
433	0	PrettyPrinter::print(static_cast<int64_t>(cache_sizes[L2_CACHE]), TUnit::BYTES),
434	0	PrettyPrinter::print(static_cast<int64_t>(cache_line_sizes[L2_CACHE]), TUnit::BYTES));
435	0	string L3 =
436	0	cache_sizes[L3_CACHE]
437	0	? strings::Substitute(
438	0	"L3 Cache: $0 (Line: $1)",
439	0	PrettyPrinter::print(static_cast<int64_t>(cache_sizes[L3_CACHE]),
440	0	TUnit::BYTES),
441	0	PrettyPrinter::print(static_cast<int64_t>(cache_line_sizes[L3_CACHE]),
442	0	TUnit::BYTES))
443	0	: "";
444	0	stream << "Cpu Info:" << std::endl
445	0	<< " Model: " << model_name_ << std::endl
446	0	<< " Cores: " << num_cores_ << std::endl
447	0	<< " Max Possible Cores: " << max_num_cores_ << std::endl
448	0	<< " " << L1 << std::endl
449	0	<< " " << L2 << std::endl
450	0	<< " " << L3 << std::endl
451	0	<< " Hardware Supports:" << std::endl;
452	0	for (auto& flag_mapping : flag_mappings) {
453	0	if (is_supported(flag_mapping.flag)) {
454	0	stream << " " << flag_mapping.name << std::endl;
455	0	}
456	0	}
457	0	stream << " Numa Nodes: " << max_num_numa_nodes_ << std::endl;
458	0	stream << " Numa Nodes of Cores:";
459	0	for (int core = 0; core < max_num_cores_; ++core) {
460	0	stream << " " << core << "->" << core_to_numa_node_[core] << " \|";
461	0	}
462	0	stream << std::endl;
463	0	return stream.str();
464	0	}
465
466		} // namespace doris