/root/doris/be/src/gutil/cpu.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 | | // Use of this source code is governed by a BSD-style license that can be |
3 | | // found in the LICENSE file. |
4 | | |
5 | | #include "gutil/cpu.h" |
6 | | |
7 | | #include <stdlib.h> |
8 | | #include <string.h> |
9 | | #include <stdint.h> |
10 | | #include <fstream> |
11 | | #include <sstream> |
12 | | #include <string> |
13 | | #include <utility> |
14 | | |
15 | | #if defined(__x86_64__) |
16 | | #if defined(_MSC_VER) |
17 | | #include <immintrin.h> // For _xgetbv() |
18 | | #include <intrin.h> |
19 | | #endif |
20 | | #endif |
21 | | |
22 | | namespace base { |
23 | | #if defined(ARCH_CPU_X86_FAMILY) |
24 | | namespace internal { |
25 | 1 | std::tuple<int, int, int, int> ComputeX86FamilyAndModel(const std::string& vendor, int signature) { |
26 | 1 | int family = (signature >> 8) & 0xf; |
27 | 1 | int model = (signature >> 4) & 0xf; |
28 | 1 | int ext_family = 0; |
29 | 1 | int ext_model = 0; |
30 | | // The "Intel 64 and IA-32 Architectures Developer's Manual: Vol. 2A" |
31 | | // specifies the Extended Model is defined only when the Base Family is |
32 | | // 06h or 0Fh. |
33 | | // The "AMD CPUID Specification" specifies that the Extended Model is |
34 | | // defined only when Base Family is 0Fh. |
35 | | // Both manuals define the display model as |
36 | | // {ExtendedModel[3:0],BaseModel[3:0]} in that case. |
37 | 1 | if (family == 0xf || (family == 0x6 && vendor == "GenuineIntel")) { |
38 | 1 | ext_model = (signature >> 16) & 0xf; |
39 | 1 | model += ext_model << 4; |
40 | 1 | } |
41 | | // Both the "Intel 64 and IA-32 Architectures Developer's Manual: Vol. 2A" |
42 | | // and the "AMD CPUID Specification" specify that the Extended Family is |
43 | | // defined only when the Base Family is 0Fh. |
44 | | // Both manuals define the display family as {0000b,BaseFamily[3:0]} + |
45 | | // ExtendedFamily[7:0] in that case. |
46 | 1 | if (family == 0xf) { |
47 | 0 | ext_family = (signature >> 20) & 0xff; |
48 | 0 | family += ext_family; |
49 | 0 | } |
50 | 1 | return {family, model, ext_family, ext_model}; |
51 | 1 | } |
52 | | } // namespace internal |
53 | | #endif // defined(ARCH_CPU_X86_FAMILY) |
54 | | CPU::CPU() |
55 | | : signature_(0), |
56 | | type_(0), |
57 | | family_(0), |
58 | | model_(0), |
59 | | stepping_(0), |
60 | | ext_model_(0), |
61 | | ext_family_(0), |
62 | | has_mmx_(false), |
63 | | has_sse_(false), |
64 | | has_sse2_(false), |
65 | | has_sse3_(false), |
66 | | has_ssse3_(false), |
67 | | has_sse41_(false), |
68 | | has_sse42_(false), |
69 | | has_popcnt_(false), |
70 | | has_avx_(false), |
71 | | has_avx2_(false), |
72 | | has_aesni_(false), |
73 | | has_non_stop_time_stamp_counter_(false), |
74 | | is_running_in_vm_(false), |
75 | 1 | cpu_vendor_("unknown") { |
76 | 1 | Initialize(); |
77 | 1 | } |
78 | | namespace { |
79 | | #if defined(ARCH_CPU_X86_FAMILY) |
80 | | #if !defined(COMPILER_MSVC) |
81 | | #if defined(__pic__) && defined(__i386__) |
82 | | void __cpuid(int cpu_info[4], int info_type) { |
83 | | __asm__ volatile( |
84 | | "mov %%ebx, %%edi\n" |
85 | | "cpuid\n" |
86 | | "xchg %%edi, %%ebx\n" |
87 | | : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) |
88 | | : "a"(info_type), "c"(0)); |
89 | | } |
90 | | #else |
91 | 8 | void __cpuid(int cpu_info[4], int info_type) { |
92 | 8 | __asm__ volatile("cpuid\n" |
93 | 8 | : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) |
94 | 8 | : "a"(info_type), "c"(0)); |
95 | 8 | } |
96 | | #endif |
97 | | #endif // !defined(COMPILER_MSVC) |
98 | | // xgetbv returns the value of an Intel Extended Control Register (XCR). |
99 | | // Currently only XCR0 is defined by Intel so |xcr| should always be zero. |
100 | 1 | uint64_t xgetbv(uint32_t xcr) { |
101 | | #if defined(COMPILER_MSVC) |
102 | | return _xgetbv(xcr); |
103 | | #else |
104 | 1 | uint32_t eax, edx; |
105 | 1 | __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr)); |
106 | 1 | return (static_cast<uint64_t>(edx) << 32) | eax; |
107 | 1 | #endif // defined(COMPILER_MSVC) |
108 | 1 | } |
109 | | #endif // ARCH_CPU_X86_FAMILY |
110 | | #if defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || defined(OS_LINUX)) |
111 | | std::string* CpuInfoBrand() { |
112 | | static std::string* brand = []() { |
113 | | // This function finds the value from /proc/cpuinfo under the key "model |
114 | | // name" or "Processor". "model name" is used in Linux 3.8 and later (3.7 |
115 | | // and later for arm64) and is shown once per CPU. "Processor" is used in |
116 | | // earler versions and is shown only once at the top of /proc/cpuinfo |
117 | | // regardless of the number CPUs. |
118 | | const char kModelNamePrefix[] = "model name\t: "; |
119 | | const char kProcessorPrefix[] = "Processor\t: "; |
120 | | std::ifstream info("/proc/cpuinfo"); |
121 | | std::string contents; |
122 | | contents.assign(std::istreambuf_iterator<char>(info), std::istreambuf_iterator<char>()); |
123 | | std::istringstream iss(contents); |
124 | | std::string line; |
125 | | while (std::getline(iss, line)) { |
126 | | if (line.compare(0, strlen(kModelNamePrefix), kModelNamePrefix) == 0) |
127 | | return new std::string(line.substr(strlen(kModelNamePrefix))); |
128 | | if (line.compare(0, strlen(kProcessorPrefix), kProcessorPrefix) == 0) |
129 | | return new std::string(line.substr(strlen(kProcessorPrefix))); |
130 | | } |
131 | | return new std::string(); |
132 | | }(); |
133 | | return brand; |
134 | | } |
135 | | #endif // defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || defined(OS_LINUX)) |
136 | | |
137 | | } // namespace |
138 | 1 | void CPU::Initialize() { |
139 | 1 | #if defined(ARCH_CPU_X86_FAMILY) |
140 | 1 | int cpu_info[4] = {-1}; |
141 | | // This array is used to temporarily hold the vendor name and then the brand |
142 | | // name. Thus it has to be big enough for both use cases. There are |
143 | | // static_asserts below for each of the use cases to make sure this array is |
144 | | // big enough. |
145 | 1 | char cpu_string[sizeof(cpu_info) * 3 + 1]; |
146 | | // __cpuid with an InfoType argument of 0 returns the number of |
147 | | // valid Ids in CPUInfo[0] and the CPU identification string in |
148 | | // the other three array elements. The CPU identification string is |
149 | | // not in linear order. The code below arranges the information |
150 | | // in a human readable form. The human readable order is CPUInfo[1] | |
151 | | // CPUInfo[3] | CPUInfo[2]. CPUInfo[2] and CPUInfo[3] are swapped |
152 | | // before using memcpy() to copy these three array elements to |cpu_string|. |
153 | 1 | __cpuid(cpu_info, 0); |
154 | 1 | int num_ids = cpu_info[0]; |
155 | 1 | std::swap(cpu_info[2], cpu_info[3]); |
156 | 1 | static constexpr size_t kVendorNameSize = 3 * sizeof(cpu_info[1]); |
157 | 1 | static_assert(kVendorNameSize < sizeof(cpu_string) / sizeof(cpu_string[0]), |
158 | 1 | "cpu_string too small"); |
159 | 1 | memcpy(cpu_string, &cpu_info[1], kVendorNameSize); |
160 | 1 | cpu_string[kVendorNameSize] = '\0'; |
161 | 1 | cpu_vendor_ = cpu_string; |
162 | | // Interpret CPU feature information. |
163 | 1 | if (num_ids > 0) { |
164 | 1 | int cpu_info7[4] = {0}; |
165 | 1 | __cpuid(cpu_info, 1); |
166 | 1 | if (num_ids >= 7) { |
167 | 1 | __cpuid(cpu_info7, 7); |
168 | 1 | } |
169 | 1 | signature_ = cpu_info[0]; |
170 | 1 | stepping_ = cpu_info[0] & 0xf; |
171 | 1 | type_ = (cpu_info[0] >> 12) & 0x3; |
172 | 1 | std::tie(family_, model_, ext_family_, ext_model_) = |
173 | 1 | internal::ComputeX86FamilyAndModel(cpu_vendor_, signature_); |
174 | 1 | has_mmx_ = (cpu_info[3] & 0x00800000) != 0; |
175 | 1 | has_sse_ = (cpu_info[3] & 0x02000000) != 0; |
176 | 1 | has_sse2_ = (cpu_info[3] & 0x04000000) != 0; |
177 | 1 | has_sse3_ = (cpu_info[2] & 0x00000001) != 0; |
178 | 1 | has_ssse3_ = (cpu_info[2] & 0x00000200) != 0; |
179 | 1 | has_sse41_ = (cpu_info[2] & 0x00080000) != 0; |
180 | 1 | has_sse42_ = (cpu_info[2] & 0x00100000) != 0; |
181 | 1 | has_popcnt_ = (cpu_info[2] & 0x00800000) != 0; |
182 | | // "Hypervisor Present Bit: Bit 31 of ECX of CPUID leaf 0x1." |
183 | | // See https://lwn.net/Articles/301888/ |
184 | | // This is checking for any hypervisor. Hypervisors may choose not to |
185 | | // announce themselves. Hypervisors trap CPUID and sometimes return |
186 | | // different results to underlying hardware. |
187 | 1 | is_running_in_vm_ = (cpu_info[2] & 0x80000000) != 0; |
188 | | // AVX instructions will generate an illegal instruction exception unless |
189 | | // a) they are supported by the CPU, |
190 | | // b) XSAVE is supported by the CPU and |
191 | | // c) XSAVE is enabled by the kernel. |
192 | | // See http://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled |
193 | | // |
194 | | // In addition, we have observed some crashes with the xgetbv instruction |
195 | | // even after following Intel's example code. (See crbug.com/375968.) |
196 | | // Because of that, we also test the XSAVE bit because its description in |
197 | | // the CPUID documentation suggests that it signals xgetbv support. |
198 | 1 | has_avx_ = (cpu_info[2] & 0x10000000) != 0 && (cpu_info[2] & 0x04000000) != 0 /* XSAVE */ && |
199 | 1 | (cpu_info[2] & 0x08000000) != 0 /* OSXSAVE */ && |
200 | 1 | (xgetbv(0) & 6) == 6 /* XSAVE enabled by kernel */; |
201 | 1 | has_aesni_ = (cpu_info[2] & 0x02000000) != 0; |
202 | 1 | has_avx2_ = has_avx_ && (cpu_info7[1] & 0x00000020) != 0; |
203 | 1 | } |
204 | | // Get the brand string of the cpu. |
205 | 1 | __cpuid(cpu_info, 0x80000000); |
206 | 1 | const int max_parameter = cpu_info[0]; |
207 | 1 | static constexpr int kParameterStart = 0x80000002; |
208 | 1 | static constexpr int kParameterEnd = 0x80000004; |
209 | 1 | static constexpr int kParameterSize = kParameterEnd - kParameterStart + 1; |
210 | 1 | static_assert( |
211 | 1 | kParameterSize * sizeof(cpu_info) + 1 == sizeof(cpu_string) / sizeof(cpu_string[0]), |
212 | 1 | "cpu_string has wrong size"); |
213 | 1 | if (max_parameter >= kParameterEnd) { |
214 | 1 | size_t i = 0; |
215 | 4 | for (int parameter = kParameterStart; parameter <= kParameterEnd; ++parameter) { |
216 | 3 | __cpuid(cpu_info, parameter); |
217 | 3 | memcpy(&cpu_string[i], cpu_info, sizeof(cpu_info)); |
218 | 3 | i += sizeof(cpu_info); |
219 | 3 | } |
220 | 1 | cpu_string[i] = '\0'; |
221 | 1 | cpu_brand_ = cpu_string; |
222 | 1 | } |
223 | 1 | static constexpr int kParameterContainingNonStopTimeStampCounter = 0x80000007; |
224 | 1 | if (max_parameter >= kParameterContainingNonStopTimeStampCounter) { |
225 | 1 | __cpuid(cpu_info, kParameterContainingNonStopTimeStampCounter); |
226 | 1 | has_non_stop_time_stamp_counter_ = (cpu_info[3] & (1 << 8)) != 0; |
227 | 1 | } |
228 | 1 | if (!has_non_stop_time_stamp_counter_ && is_running_in_vm_) { |
229 | 0 | int cpu_info_hv[4] = {}; |
230 | 0 | __cpuid(cpu_info_hv, 0x40000000); |
231 | 0 | if (cpu_info_hv[1] == 0x7263694D && // Micr |
232 | 0 | cpu_info_hv[2] == 0x666F736F && // osof |
233 | 0 | cpu_info_hv[3] == 0x76482074) { // t Hv |
234 | | // If CPUID says we have a variant TSC and a hypervisor has identified |
235 | | // itself and the hypervisor says it is Microsoft Hyper-V, then treat |
236 | | // TSC as invariant. |
237 | | // |
238 | | // Microsoft Hyper-V hypervisor reports variant TSC as there are some |
239 | | // scenarios (eg. VM live migration) where the TSC is variant, but for |
240 | | // our purposes we can treat it as invariant. |
241 | 0 | has_non_stop_time_stamp_counter_ = true; |
242 | 0 | } |
243 | 0 | } |
244 | | #elif defined(ARCH_CPU_ARM_FAMILY) |
245 | | #if (defined(OS_ANDROID) || defined(OS_LINUX)) |
246 | | cpu_brand_ = *CpuInfoBrand(); |
247 | | #elif defined(OS_WIN) |
248 | | // Windows makes high-resolution thread timing information available in |
249 | | // user-space. |
250 | | has_non_stop_time_stamp_counter_ = true; |
251 | | #endif |
252 | | #endif |
253 | 1 | } |
254 | 0 | CPU::IntelMicroArchitecture CPU::GetIntelMicroArchitecture() const { |
255 | 0 | if (has_avx2()) return AVX2; |
256 | 0 | if (has_avx()) return AVX; |
257 | 0 | if (has_sse42()) return SSE42; |
258 | 0 | if (has_sse41()) return SSE41; |
259 | 0 | if (has_ssse3()) return SSSE3; |
260 | 0 | if (has_sse3()) return SSE3; |
261 | 0 | if (has_sse2()) return SSE2; |
262 | 0 | if (has_sse()) return SSE; |
263 | 0 | return PENTIUM; |
264 | 0 | } |
265 | | } // namespace base |