Coverage Report

Created: 2024-11-22 00:22

/root/doris/be/src/gutil/atomicops-internals-x86.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2003 Google Inc.
2
//
3
// Licensed to the Apache Software Foundation (ASF) under one
4
// or more contributor license agreements.  See the NOTICE file
5
// distributed with this work for additional information
6
// regarding copyright ownership.  The ASF licenses this file
7
// to you under the Apache License, Version 2.0 (the
8
// "License"); you may not use this file except in compliance
9
// with the License.  You may obtain a copy of the License at
10
//
11
//   http://www.apache.org/licenses/LICENSE-2.0
12
//
13
// Unless required by applicable law or agreed to in writing,
14
// software distributed under the License is distributed on an
15
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16
// KIND, either express or implied.  See the License for the
17
// specific language governing permissions and limitations
18
// under the License.
19
//
20
// All Rights Reserved.
21
//
22
//
23
// Implementation of atomic operations for x86.  This file should not
24
// be included directly.  Clients should instead include
25
// "base/atomicops.h".
26
27
#pragma once
28
29
#include "common/logging.h"
30
#include <stdint.h>
31
#include <ostream>
32
33
#define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic*
34
35
// NOTE(user): x86 does not need to define AtomicWordCastType, because it
36
// already matches Atomic32 or Atomic64, depending on the platform.
37
38
// This struct is not part of the public API of this module; clients may not
39
// use it.
40
// Features of this x86.  Values may not be correct before InitGoogle() is run,
41
// but are set conservatively.
42
// Modify AtomicOps_x86CPUFeatureStruct to GutilAtomicOps_x86CPUFeatureStruct for brpc
43
struct GutilAtomicOps_x86CPUFeatureStruct {
44
    bool has_sse2;       // Processor has SSE2.
45
    bool has_cmpxchg16b; // Processor supports cmpxchg16b instruction.
46
};
47
extern struct GutilAtomicOps_x86CPUFeatureStruct GutilAtomicOps_Internalx86CPUFeatures;
48
49
9.37k
#define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory")
50
51
// AtomicOps initialisation for open source use.
52
void AtomicOps_x86CPUFeaturesInit();
53
54
typedef int32_t Atomic32;
55
typedef int64_t Atomic64;
56
57
namespace base {
58
namespace subtle {
59
60
typedef int32_t Atomic32;
61
typedef int64_t Atomic64;
62
63
// These atomic primitives don't work atomically, and can cause really nasty
64
// hard-to-track-down bugs, if the pointer isn't naturally aligned. Check alignment
65
// in debug mode.
66
template <class T>
67
21.1k
void CheckNaturalAlignment(const T* ptr) {
68
21.1k
    DCHECK_EQ(0, reinterpret_cast<const uintptr_t>(ptr) & (sizeof(T) - 1))
69
0
            << "unaligned pointer not allowed for atomics";
70
21.1k
}
_ZN4base6subtle21CheckNaturalAlignmentIViEEvPKT_
Line
Count
Source
67
17.7k
void CheckNaturalAlignment(const T* ptr) {
68
17.7k
    DCHECK_EQ(0, reinterpret_cast<const uintptr_t>(ptr) & (sizeof(T) - 1))
69
0
            << "unaligned pointer not allowed for atomics";
70
17.7k
}
_ZN4base6subtle21CheckNaturalAlignmentIVlEEvPKT_
Line
Count
Source
67
3.47k
void CheckNaturalAlignment(const T* ptr) {
68
3.47k
    DCHECK_EQ(0, reinterpret_cast<const uintptr_t>(ptr) & (sizeof(T) - 1))
69
0
            << "unaligned pointer not allowed for atomics";
70
3.47k
}
71
72
// 32-bit low-level operations on any platform.
73
74
inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, Atomic32 old_value,
75
0
                                         Atomic32 new_value) {
76
0
    CheckNaturalAlignment(ptr);
77
0
    Atomic32 prev;
78
0
    __asm__ __volatile__("lock; cmpxchgl %1,%2"
79
0
                         : "=a"(prev)
80
0
                         : "q"(new_value), "m"(*ptr), "0"(old_value)
81
0
                         : "memory");
82
0
    return prev;
83
0
}
84
85
0
inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value) {
86
0
    CheckNaturalAlignment(ptr);
87
0
    __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg.
88
0
                         : "=r"(new_value)
89
0
                         : "m"(*ptr), "0"(new_value)
90
0
                         : "memory");
91
0
    return new_value; // Now it's the previous value.
92
0
}
93
94
0
inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value) {
95
0
    CheckNaturalAlignment(ptr);
96
0
    Atomic32 old_val = NoBarrier_AtomicExchange(ptr, new_value);
97
0
    return old_val;
98
0
}
99
100
0
inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value) {
101
0
    return NoBarrier_AtomicExchange(ptr, new_value);
102
0
}
103
104
5.91k
inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment) {
105
5.91k
    CheckNaturalAlignment(ptr);
106
5.91k
    Atomic32 temp = increment;
107
5.91k
    __asm__ __volatile__("lock; xaddl %0,%1" : "+r"(temp), "+m"(*ptr) : : "memory");
108
    // temp now holds the old value of *ptr
109
5.91k
    return temp + increment;
110
5.91k
}
111
112
5.89k
inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment) {
113
5.89k
    CheckNaturalAlignment(ptr);
114
5.89k
    Atomic32 temp = increment;
115
5.89k
    __asm__ __volatile__("lock; xaddl %0,%1" : "+r"(temp), "+m"(*ptr) : : "memory");
116
    // temp now holds the old value of *ptr
117
5.89k
    return temp + increment;
118
5.89k
}
119
120
// On x86, the NoBarrier_CompareAndSwap() uses a locked instruction and so also
121
// provides both acquire and release barriers.
122
inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, Atomic32 old_value,
123
0
                                       Atomic32 new_value) {
124
0
    return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
125
0
}
126
127
inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, Atomic32 old_value,
128
0
                                       Atomic32 new_value) {
129
0
    return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
130
0
}
131
132
inline Atomic32 Barrier_CompareAndSwap(volatile Atomic32* ptr, Atomic32 old_value,
133
0
                                       Atomic32 new_value) {
134
0
    return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
135
0
}
136
137
0
inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
138
0
    CheckNaturalAlignment(ptr);
139
0
    *ptr = value;
140
0
}
141
142
// Issue the x86 "pause" instruction, which tells the CPU that we
143
// are in a spinlock wait loop and should allow other hyperthreads
144
// to run, not speculate memory access, etc.
145
0
inline void PauseCPU() {
146
0
    __asm__ __volatile__("pause" : : : "memory");
147
0
}
148
149
#if defined(__x86_64__)
150
151
// 64-bit implementations of memory barrier can be simpler, because it
152
// "mfence" is guaranteed to exist.
153
0
inline void MemoryBarrier() {
154
0
    __asm__ __volatile__("mfence" : : : "memory");
155
0
}
156
157
0
inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
158
0
    CheckNaturalAlignment(ptr);
159
0
    *ptr = value;
160
0
    MemoryBarrier();
161
0
}
162
163
#else
164
165
inline void MemoryBarrier() {
166
    if (GutilAtomicOps_Internalx86CPUFeatures.has_sse2) {
167
        __asm__ __volatile__("mfence" : : : "memory");
168
    } else { // mfence is faster but not present on PIII
169
        Atomic32 x = 0;
170
        Acquire_AtomicExchange(&x, 0);
171
    }
172
}
173
174
inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
175
    if (GutilAtomicOps_Internalx86CPUFeatures.has_sse2) {
176
        CheckNaturalAlignment(ptr);
177
        *ptr = value;
178
        __asm__ __volatile__("mfence" : : : "memory");
179
    } else {
180
        Acquire_AtomicExchange(ptr, value);
181
    }
182
}
183
#endif
184
185
0
inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
186
0
    CheckNaturalAlignment(ptr);
187
0
    ATOMICOPS_COMPILER_BARRIER();
188
0
    *ptr = value; // An x86 store acts as a release barrier.
189
0
                  // See comments in Atomic64 version of Release_Store(), below.
190
0
}
191
192
0
inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
193
0
    CheckNaturalAlignment(ptr);
194
0
    return *ptr;
195
0
}
196
197
5.89k
inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
198
5.89k
    CheckNaturalAlignment(ptr);
199
5.89k
    Atomic32 value = *ptr; // An x86 load acts as a acquire barrier.
200
    // See comments in Atomic64 version of Release_Store(), below.
201
5.89k
    ATOMICOPS_COMPILER_BARRIER();
202
5.89k
    return value;
203
5.89k
}
204
205
0
inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
206
0
    CheckNaturalAlignment(ptr);
207
0
    MemoryBarrier();
208
0
    return *ptr;
209
0
}
210
211
#if defined(__x86_64__)
212
213
// 64-bit low-level operations on 64-bit platform.
214
215
inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, Atomic64 old_value,
216
0
                                         Atomic64 new_value) {
217
0
    Atomic64 prev;
218
0
    CheckNaturalAlignment(ptr);
219
0
    __asm__ __volatile__("lock; cmpxchgq %1,%2"
220
0
                         : "=a"(prev)
221
0
                         : "q"(new_value), "m"(*ptr), "0"(old_value)
222
0
                         : "memory");
223
0
    return prev;
224
0
}
225
226
0
inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value) {
227
0
    CheckNaturalAlignment(ptr);
228
0
    __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg.
229
0
                         : "=r"(new_value)
230
0
                         : "m"(*ptr), "0"(new_value)
231
0
                         : "memory");
232
0
    return new_value; // Now it's the previous value.
233
0
}
234
235
0
inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value) {
236
0
    Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_value);
237
0
    return old_val;
238
0
}
239
240
0
inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value) {
241
0
    return NoBarrier_AtomicExchange(ptr, new_value);
242
0
}
243
244
0
inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment) {
245
0
    Atomic64 temp = increment;
246
0
    CheckNaturalAlignment(ptr);
247
0
    __asm__ __volatile__("lock; xaddq %0,%1" : "+r"(temp), "+m"(*ptr) : : "memory");
248
    // temp now contains the previous value of *ptr
249
0
    return temp + increment;
250
0
}
251
252
0
inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment) {
253
0
    Atomic64 temp = increment;
254
0
    CheckNaturalAlignment(ptr);
255
0
    __asm__ __volatile__("lock; xaddq %0,%1" : "+r"(temp), "+m"(*ptr) : : "memory");
256
    // temp now contains the previous value of *ptr
257
0
    return temp + increment;
258
0
}
259
260
0
inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
261
0
    CheckNaturalAlignment(ptr);
262
0
    *ptr = value;
263
0
}
264
265
0
inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
266
0
    CheckNaturalAlignment(ptr);
267
0
    *ptr = value;
268
0
    MemoryBarrier();
269
0
}
270
271
2.43k
inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
272
2.43k
    ATOMICOPS_COMPILER_BARRIER();
273
2.43k
    CheckNaturalAlignment(ptr);
274
2.43k
    *ptr = value; // An x86 store acts as a release barrier
275
                  // for current AMD/Intel chips as of Jan 2008.
276
                  // See also Acquire_Load(), below.
277
278
    // When new chips come out, check:
279
    //  IA-32 Intel Architecture Software Developer's Manual, Volume 3:
280
    //  System Programming Guide, Chatper 7: Multiple-processor management,
281
    //  Section 7.2, Memory Ordering.
282
    // Last seen at:
283
    //   http://developer.intel.com/design/pentium4/manuals/index_new.htm
284
    //
285
    // x86 stores/loads fail to act as barriers for a few instructions (clflush
286
    // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are
287
    // not generated by the compiler, and are rare.  Users of these instructions
288
    // need to know about cache behaviour in any case since all of these involve
289
    // either flushing cache lines or non-temporal cache hints.
290
2.43k
}
291
292
0
inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
293
0
    CheckNaturalAlignment(ptr);
294
0
    return *ptr;
295
0
}
296
297
1.03k
inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
298
1.03k
    CheckNaturalAlignment(ptr);
299
1.03k
    Atomic64 value = *ptr; // An x86 load acts as a acquire barrier,
300
                           // for current AMD/Intel chips as of Jan 2008.
301
                           // See also Release_Store(), above.
302
1.03k
    ATOMICOPS_COMPILER_BARRIER();
303
1.03k
    return value;
304
1.03k
}
305
306
0
inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
307
0
    CheckNaturalAlignment(ptr);
308
0
    MemoryBarrier();
309
0
    return *ptr;
310
0
}
311
312
#else // defined(__x86_64__)
313
314
// 64-bit low-level operations on 32-bit platform.
315
316
#if !((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
317
// For compilers older than gcc 4.1, we use inline asm.
318
//
319
// Potential pitfalls:
320
//
321
// 1. %ebx points to Global offset table (GOT) with -fPIC.
322
//    We need to preserve this register.
323
// 2. When explicit registers are used in inline asm, the
324
//    compiler may not be aware of it and might try to reuse
325
//    the same register for another argument which has constraints
326
//    that allow it ("r" for example).
327
328
inline Atomic64 __sync_val_compare_and_swap(volatile Atomic64* ptr, Atomic64 old_value,
329
                                            Atomic64 new_value) {
330
    CheckNaturalAlignment(ptr);
331
    Atomic64 prev;
332
    __asm__ __volatile__(
333
            "push %%ebx\n\t"
334
            "movl (%3), %%ebx\n\t"     // Move 64-bit new_value into
335
            "movl 4(%3), %%ecx\n\t"    // ecx:ebx
336
            "lock; cmpxchg8b (%1)\n\t" // If edx:eax (old_value) same
337
            "pop %%ebx\n\t"
338
            : "=A"(prev)      // as contents of ptr:
339
            : "D"(ptr),       //   ecx:ebx => ptr
340
              "0"(old_value), // else:
341
              "S"(&new_value) //   old *ptr => edx:eax
342
            : "memory", "%ecx");
343
    return prev;
344
}
345
#endif // Compiler < gcc-4.1
346
347
inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, Atomic64 old_val,
348
                                         Atomic64 new_val) {
349
    CheckNaturalAlignment(ptr);
350
    return __sync_val_compare_and_swap(ptr, old_val, new_val);
351
}
352
353
inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_val) {
354
    Atomic64 old_val;
355
    CheckNaturalAlignment(ptr);
356
357
    do {
358
        old_val = *ptr;
359
    } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val);
360
361
    return old_val;
362
}
363
364
inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_val) {
365
    CheckNaturalAlignment(ptr);
366
    Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_val);
367
    return old_val;
368
}
369
370
inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_val) {
371
    return NoBarrier_AtomicExchange(ptr, new_val);
372
}
373
374
inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment) {
375
    CheckNaturalAlignment(ptr);
376
    Atomic64 old_val, new_val;
377
378
    do {
379
        old_val = *ptr;
380
        new_val = old_val + increment;
381
    } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val);
382
383
    return old_val + increment;
384
}
385
386
inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment) {
387
    CheckNaturalAlignment(ptr);
388
    Atomic64 new_val = NoBarrier_AtomicIncrement(ptr, increment);
389
    return new_val;
390
}
391
392
inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
393
    CheckNaturalAlignment(ptr);
394
    __asm__ __volatile__(
395
            "movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic
396
            "movq %%mm0, %0\n\t" // moves (ptr could be read-only)
397
            "emms\n\t"           // Empty mmx state/Reset FP regs
398
            : "=m"(*ptr)
399
            : "m"(value)
400
            : // mark the FP stack and mmx registers as clobbered
401
            "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)", "mm0", "mm1",
402
            "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
403
}
404
405
inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
406
    NoBarrier_Store(ptr, value);
407
    MemoryBarrier();
408
}
409
410
inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
411
    ATOMICOPS_COMPILER_BARRIER();
412
    NoBarrier_Store(ptr, value);
413
}
414
415
inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
416
    CheckNaturalAlignment(ptr);
417
    Atomic64 value;
418
    __asm__ __volatile__(
419
            "movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic
420
            "movq %%mm0, %0\n\t" // moves (ptr could be read-only)
421
            "emms\n\t"           // Empty mmx state/Reset FP regs
422
            : "=m"(value)
423
            : "m"(*ptr)
424
            : // mark the FP stack and mmx registers as clobbered
425
            "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)", "mm0", "mm1",
426
            "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
427
    return value;
428
}
429
430
inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
431
    CheckNaturalAlignment(ptr);
432
    Atomic64 value = NoBarrier_Load(ptr);
433
    ATOMICOPS_COMPILER_BARRIER();
434
    return value;
435
}
436
437
inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
438
    MemoryBarrier();
439
    return NoBarrier_Load(ptr);
440
}
441
442
#endif // defined(__x86_64__)
443
444
inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, Atomic64 old_value,
445
0
                                       Atomic64 new_value) {
446
0
    return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
447
0
}
448
449
inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, Atomic64 old_value,
450
0
                                       Atomic64 new_value) {
451
0
    return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
452
0
}
453
454
inline Atomic64 Barrier_CompareAndSwap(volatile Atomic64* ptr, Atomic64 old_value,
455
0
                                       Atomic64 new_value) {
456
0
    return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
457
0
}
458
459
} // namespace subtle
460
} // namespace base
461
462
#undef ATOMICOPS_COMPILER_BARRIER