Coverage Report

Created: 2024-11-20 15:53

/root/doris/be/src/gutil/cpu.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style license that can be
3
// found in the LICENSE file.
4
5
#include "gutil/cpu.h"
6
7
#include <stdlib.h>
8
#include <string.h>
9
#include <stdint.h>
10
#include <fstream>
11
#include <sstream>
12
#include <string>
13
#include <utility>
14
15
#if defined(__x86_64__)
16
#if defined(_MSC_VER)
17
#include <immintrin.h> // For _xgetbv()
18
#include <intrin.h>
19
#endif
20
#endif
21
22
namespace base {
23
#if defined(ARCH_CPU_X86_FAMILY)
24
namespace internal {
25
1
std::tuple<int, int, int, int> ComputeX86FamilyAndModel(const std::string& vendor, int signature) {
26
1
    int family = (signature >> 8) & 0xf;
27
1
    int model = (signature >> 4) & 0xf;
28
1
    int ext_family = 0;
29
1
    int ext_model = 0;
30
    // The "Intel 64 and IA-32 Architectures Developer's Manual: Vol. 2A"
31
    // specifies the Extended Model is defined only when the Base Family is
32
    // 06h or 0Fh.
33
    // The "AMD CPUID Specification" specifies that the Extended Model is
34
    // defined only when Base Family is 0Fh.
35
    // Both manuals define the display model as
36
    // {ExtendedModel[3:0],BaseModel[3:0]} in that case.
37
1
    if (family == 0xf || (family == 0x6 && vendor == "GenuineIntel")) {
38
1
        ext_model = (signature >> 16) & 0xf;
39
1
        model += ext_model << 4;
40
1
    }
41
    // Both the "Intel 64 and IA-32 Architectures Developer's Manual: Vol. 2A"
42
    // and the "AMD CPUID Specification" specify that the Extended Family is
43
    // defined only when the Base Family is 0Fh.
44
    // Both manuals define the display family as {0000b,BaseFamily[3:0]} +
45
    // ExtendedFamily[7:0] in that case.
46
1
    if (family == 0xf) {
47
0
        ext_family = (signature >> 20) & 0xff;
48
0
        family += ext_family;
49
0
    }
50
1
    return {family, model, ext_family, ext_model};
51
1
}
52
} // namespace internal
53
#endif // defined(ARCH_CPU_X86_FAMILY)
54
CPU::CPU()
55
        : signature_(0),
56
          type_(0),
57
          family_(0),
58
          model_(0),
59
          stepping_(0),
60
          ext_model_(0),
61
          ext_family_(0),
62
          has_mmx_(false),
63
          has_sse_(false),
64
          has_sse2_(false),
65
          has_sse3_(false),
66
          has_ssse3_(false),
67
          has_sse41_(false),
68
          has_sse42_(false),
69
          has_popcnt_(false),
70
          has_avx_(false),
71
          has_avx2_(false),
72
          has_aesni_(false),
73
          has_non_stop_time_stamp_counter_(false),
74
          is_running_in_vm_(false),
75
1
          cpu_vendor_("unknown") {
76
1
    Initialize();
77
1
}
78
namespace {
79
#if defined(ARCH_CPU_X86_FAMILY)
80
#if !defined(COMPILER_MSVC)
81
#if defined(__pic__) && defined(__i386__)
82
void __cpuid(int cpu_info[4], int info_type) {
83
    __asm__ volatile(
84
            "mov %%ebx, %%edi\n"
85
            "cpuid\n"
86
            "xchg %%edi, %%ebx\n"
87
            : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
88
            : "a"(info_type), "c"(0));
89
}
90
#else
91
8
void __cpuid(int cpu_info[4], int info_type) {
92
8
    __asm__ volatile("cpuid\n"
93
8
                     : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
94
8
                     : "a"(info_type), "c"(0));
95
8
}
96
#endif
97
#endif // !defined(COMPILER_MSVC)
98
// xgetbv returns the value of an Intel Extended Control Register (XCR).
99
// Currently only XCR0 is defined by Intel so |xcr| should always be zero.
100
1
uint64_t xgetbv(uint32_t xcr) {
101
#if defined(COMPILER_MSVC)
102
    return _xgetbv(xcr);
103
#else
104
1
    uint32_t eax, edx;
105
1
    __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
106
1
    return (static_cast<uint64_t>(edx) << 32) | eax;
107
1
#endif // defined(COMPILER_MSVC)
108
1
}
109
#endif // ARCH_CPU_X86_FAMILY
110
#if defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || defined(OS_LINUX))
111
std::string* CpuInfoBrand() {
112
    static std::string* brand = []() {
113
        // This function finds the value from /proc/cpuinfo under the key "model
114
        // name" or "Processor". "model name" is used in Linux 3.8 and later (3.7
115
        // and later for arm64) and is shown once per CPU. "Processor" is used in
116
        // earler versions and is shown only once at the top of /proc/cpuinfo
117
        // regardless of the number CPUs.
118
        const char kModelNamePrefix[] = "model name\t: ";
119
        const char kProcessorPrefix[] = "Processor\t: ";
120
        std::ifstream info("/proc/cpuinfo");
121
        std::string contents;
122
        contents.assign(std::istreambuf_iterator<char>(info), std::istreambuf_iterator<char>());
123
        std::istringstream iss(contents);
124
        std::string line;
125
        while (std::getline(iss, line)) {
126
            if (line.compare(0, strlen(kModelNamePrefix), kModelNamePrefix) == 0)
127
                return new std::string(line.substr(strlen(kModelNamePrefix)));
128
            if (line.compare(0, strlen(kProcessorPrefix), kProcessorPrefix) == 0)
129
                return new std::string(line.substr(strlen(kProcessorPrefix)));
130
        }
131
        return new std::string();
132
    }();
133
    return brand;
134
}
135
#endif  // defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || defined(OS_LINUX))
136
137
} // namespace
138
1
void CPU::Initialize() {
139
1
#if defined(ARCH_CPU_X86_FAMILY)
140
1
    int cpu_info[4] = {-1};
141
    // This array is used to temporarily hold the vendor name and then the brand
142
    // name. Thus it has to be big enough for both use cases. There are
143
    // static_asserts below for each of the use cases to make sure this array is
144
    // big enough.
145
1
    char cpu_string[sizeof(cpu_info) * 3 + 1];
146
    // __cpuid with an InfoType argument of 0 returns the number of
147
    // valid Ids in CPUInfo[0] and the CPU identification string in
148
    // the other three array elements. The CPU identification string is
149
    // not in linear order. The code below arranges the information
150
    // in a human readable form. The human readable order is CPUInfo[1] |
151
    // CPUInfo[3] | CPUInfo[2]. CPUInfo[2] and CPUInfo[3] are swapped
152
    // before using memcpy() to copy these three array elements to |cpu_string|.
153
1
    __cpuid(cpu_info, 0);
154
1
    int num_ids = cpu_info[0];
155
1
    std::swap(cpu_info[2], cpu_info[3]);
156
1
    static constexpr size_t kVendorNameSize = 3 * sizeof(cpu_info[1]);
157
1
    static_assert(kVendorNameSize < sizeof(cpu_string) / sizeof(cpu_string[0]),
158
1
                  "cpu_string too small");
159
1
    memcpy(cpu_string, &cpu_info[1], kVendorNameSize);
160
1
    cpu_string[kVendorNameSize] = '\0';
161
1
    cpu_vendor_ = cpu_string;
162
    // Interpret CPU feature information.
163
1
    if (num_ids > 0) {
164
1
        int cpu_info7[4] = {0};
165
1
        __cpuid(cpu_info, 1);
166
1
        if (num_ids >= 7) {
167
1
            __cpuid(cpu_info7, 7);
168
1
        }
169
1
        signature_ = cpu_info[0];
170
1
        stepping_ = cpu_info[0] & 0xf;
171
1
        type_ = (cpu_info[0] >> 12) & 0x3;
172
1
        std::tie(family_, model_, ext_family_, ext_model_) =
173
1
                internal::ComputeX86FamilyAndModel(cpu_vendor_, signature_);
174
1
        has_mmx_ = (cpu_info[3] & 0x00800000) != 0;
175
1
        has_sse_ = (cpu_info[3] & 0x02000000) != 0;
176
1
        has_sse2_ = (cpu_info[3] & 0x04000000) != 0;
177
1
        has_sse3_ = (cpu_info[2] & 0x00000001) != 0;
178
1
        has_ssse3_ = (cpu_info[2] & 0x00000200) != 0;
179
1
        has_sse41_ = (cpu_info[2] & 0x00080000) != 0;
180
1
        has_sse42_ = (cpu_info[2] & 0x00100000) != 0;
181
1
        has_popcnt_ = (cpu_info[2] & 0x00800000) != 0;
182
        // "Hypervisor Present Bit: Bit 31 of ECX of CPUID leaf 0x1."
183
        // See https://lwn.net/Articles/301888/
184
        // This is checking for any hypervisor. Hypervisors may choose not to
185
        // announce themselves. Hypervisors trap CPUID and sometimes return
186
        // different results to underlying hardware.
187
1
        is_running_in_vm_ = (cpu_info[2] & 0x80000000) != 0;
188
        // AVX instructions will generate an illegal instruction exception unless
189
        //   a) they are supported by the CPU,
190
        //   b) XSAVE is supported by the CPU and
191
        //   c) XSAVE is enabled by the kernel.
192
        // See http://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
193
        //
194
        // In addition, we have observed some crashes with the xgetbv instruction
195
        // even after following Intel's example code. (See crbug.com/375968.)
196
        // Because of that, we also test the XSAVE bit because its description in
197
        // the CPUID documentation suggests that it signals xgetbv support.
198
1
        has_avx_ = (cpu_info[2] & 0x10000000) != 0 && (cpu_info[2] & 0x04000000) != 0 /* XSAVE */ &&
199
1
                   (cpu_info[2] & 0x08000000) != 0 /* OSXSAVE */ &&
200
1
                   (xgetbv(0) & 6) == 6 /* XSAVE enabled by kernel */;
201
1
        has_aesni_ = (cpu_info[2] & 0x02000000) != 0;
202
1
        has_avx2_ = has_avx_ && (cpu_info7[1] & 0x00000020) != 0;
203
1
    }
204
    // Get the brand string of the cpu.
205
1
    __cpuid(cpu_info, 0x80000000);
206
1
    const int max_parameter = cpu_info[0];
207
1
    static constexpr int kParameterStart = 0x80000002;
208
1
    static constexpr int kParameterEnd = 0x80000004;
209
1
    static constexpr int kParameterSize = kParameterEnd - kParameterStart + 1;
210
1
    static_assert(
211
1
            kParameterSize * sizeof(cpu_info) + 1 == sizeof(cpu_string) / sizeof(cpu_string[0]),
212
1
            "cpu_string has wrong size");
213
1
    if (max_parameter >= kParameterEnd) {
214
1
        size_t i = 0;
215
4
        for (int parameter = kParameterStart; parameter <= kParameterEnd; ++parameter) {
216
3
            __cpuid(cpu_info, parameter);
217
3
            memcpy(&cpu_string[i], cpu_info, sizeof(cpu_info));
218
3
            i += sizeof(cpu_info);
219
3
        }
220
1
        cpu_string[i] = '\0';
221
1
        cpu_brand_ = cpu_string;
222
1
    }
223
1
    static constexpr int kParameterContainingNonStopTimeStampCounter = 0x80000007;
224
1
    if (max_parameter >= kParameterContainingNonStopTimeStampCounter) {
225
1
        __cpuid(cpu_info, kParameterContainingNonStopTimeStampCounter);
226
1
        has_non_stop_time_stamp_counter_ = (cpu_info[3] & (1 << 8)) != 0;
227
1
    }
228
1
    if (!has_non_stop_time_stamp_counter_ && is_running_in_vm_) {
229
0
        int cpu_info_hv[4] = {};
230
0
        __cpuid(cpu_info_hv, 0x40000000);
231
0
        if (cpu_info_hv[1] == 0x7263694D && // Micr
232
0
            cpu_info_hv[2] == 0x666F736F && // osof
233
0
            cpu_info_hv[3] == 0x76482074) { // t Hv
234
            // If CPUID says we have a variant TSC and a hypervisor has identified
235
            // itself and the hypervisor says it is Microsoft Hyper-V, then treat
236
            // TSC as invariant.
237
            //
238
            // Microsoft Hyper-V hypervisor reports variant TSC as there are some
239
            // scenarios (eg. VM live migration) where the TSC is variant, but for
240
            // our purposes we can treat it as invariant.
241
0
            has_non_stop_time_stamp_counter_ = true;
242
0
        }
243
0
    }
244
#elif defined(ARCH_CPU_ARM_FAMILY)
245
#if (defined(OS_ANDROID) || defined(OS_LINUX))
246
    cpu_brand_ = *CpuInfoBrand();
247
#elif defined(OS_WIN)
248
    // Windows makes high-resolution thread timing information available in
249
    // user-space.
250
    has_non_stop_time_stamp_counter_ = true;
251
#endif
252
#endif
253
1
}
254
0
CPU::IntelMicroArchitecture CPU::GetIntelMicroArchitecture() const {
255
0
    if (has_avx2()) return AVX2;
256
0
    if (has_avx()) return AVX;
257
0
    if (has_sse42()) return SSE42;
258
0
    if (has_sse41()) return SSE41;
259
0
    if (has_ssse3()) return SSSE3;
260
0
    if (has_sse3()) return SSE3;
261
0
    if (has_sse2()) return SSE2;
262
0
    if (has_sse()) return SSE;
263
0
    return PENTIUM;
264
0
}
265
} // namespace base