Coverage Report

Created: 2025-06-26 14:35

/root/doris/be/src/gutil/endian.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2005 Google Inc.
2
//
3
// Licensed to the Apache Software Foundation (ASF) under one
4
// or more contributor license agreements.  See the NOTICE file
5
// distributed with this work for additional information
6
// regarding copyright ownership.  The ASF licenses this file
7
// to you under the Apache License, Version 2.0 (the
8
// "License"); you may not use this file except in compliance
9
// with the License.  You may obtain a copy of the License at
10
//
11
//   http://www.apache.org/licenses/LICENSE-2.0
12
//
13
// Unless required by applicable law or agreed to in writing,
14
// software distributed under the License is distributed on an
15
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16
// KIND, either express or implied.  See the License for the
17
// specific language governing permissions and limitations
18
// under the License.
19
//
20
// ---
21
//
22
//
23
// Utility functions that depend on bytesex. We define htonll and ntohll,
24
// as well as "Google" versions of all the standards: ghtonl, ghtons, and
25
// so on. These functions do exactly the same as their standard variants,
26
// but don't require including the dangerous netinet/in.h.
27
//
28
// Buffer routines will copy to and from buffers without causing
29
// a bus error when the architecture requires different byte alignments
30
31
#pragma once
32
33
#include <assert.h>
34
35
#include "vec/core/wide_integer.h"
36
37
// Portable handling of unaligned loads, stores, and copies.
38
// On some platforms, like ARM, the copy functions can be more efficient
39
// then a load and a store.
40
41
#if defined(__i386) || defined(ARCH_ATHLON) || defined(__x86_64__) || defined(_ARCH_PPC)
42
43
// x86 and x86-64 can perform unaligned loads/stores directly;
44
// modern PowerPC hardware can also do unaligned integer loads and stores;
45
// but note: the FPU still sends unaligned loads and stores to a trap handler!
46
47
#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16_t*>(_p))
48
256k
#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t*>(_p))
49
5.38k
#define UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64_t*>(_p))
50
51
#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16_t*>(_p) = (_val))
52
0
#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32_t*>(_p) = (_val))
53
#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast<uint64_t*>(_p) = (_val))
54
55
#elif defined(__arm__) && !defined(__ARM_ARCH_5__) && !defined(__ARM_ARCH_5T__) &&               \
56
        !defined(__ARM_ARCH_5TE__) && !defined(__ARM_ARCH_5TEJ__) && !defined(__ARM_ARCH_6__) && \
57
        !defined(__ARM_ARCH_6J__) && !defined(__ARM_ARCH_6K__) && !defined(__ARM_ARCH_6Z__) &&   \
58
        !defined(__ARM_ARCH_6ZK__) && !defined(__ARM_ARCH_6T2__)
59
60
// ARMv7 and newer support native unaligned accesses, but only of 16-bit
61
// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
62
// do an unaligned read and rotate the words around a bit, or do the reads very
63
// slowly (trip through kernel mode). There's no simple #define that says just
64
// “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6
65
// sub-architectures. Newer gcc (>= 4.6) set an __ARM_FEATURE_ALIGNED #define,
66
// so in time, maybe we can move on to that.
67
//
68
// This is a mess, but there's not much we can do about it.
69
70
#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16_t*>(_p))
71
#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t*>(_p))
72
73
#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16_t*>(_p) = (_val))
74
#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32_t*>(_p) = (_val))
75
76
// TODO(user): NEON supports unaligned 64-bit loads and stores.
77
// See if that would be more efficient on platforms supporting it,
78
// at least for copies.
79
80
inline uint64_t UNALIGNED_LOAD64(const void* p) {
81
    uint64_t t;
82
    memcpy(&t, p, sizeof t);
83
    return t;
84
}
85
86
inline void UNALIGNED_STORE64(void* p, uint64_t v) {
87
    memcpy(p, &v, sizeof v);
88
}
89
90
#else
91
92
#define NEED_ALIGNED_LOADS
93
94
// These functions are provided for architectures that don't support
95
// unaligned loads and stores.
96
97
inline uint16_t UNALIGNED_LOAD16(const void* p) {
98
    uint16_t t;
99
    memcpy(&t, p, sizeof t);
100
    return t;
101
}
102
103
inline uint32_t UNALIGNED_LOAD32(const void* p) {
104
    uint32_t t;
105
    memcpy(&t, p, sizeof t);
106
    return t;
107
}
108
109
inline uint64_t UNALIGNED_LOAD64(const void* p) {
110
    uint64_t t;
111
    memcpy(&t, p, sizeof t);
112
    return t;
113
}
114
115
inline void UNALIGNED_STORE16(void* p, uint16_t v) {
116
    memcpy(p, &v, sizeof v);
117
}
118
119
inline void UNALIGNED_STORE32(void* p, uint32_t v) {
120
    memcpy(p, &v, sizeof v);
121
}
122
123
inline void UNALIGNED_STORE64(void* p, uint64_t v) {
124
    memcpy(p, &v, sizeof v);
125
}
126
127
#endif
128
129
62.8k
inline uint64_t gbswap_64(uint64_t host_int) {
130
62.8k
#if defined(__GNUC__) && defined(__x86_64__) && !defined(__APPLE__)
131
    // Adapted from /usr/include/byteswap.h.  Not available on Mac.
132
62.8k
    if (__builtin_constant_p(host_int)) {
133
0
        return __bswap_constant_64(host_int);
134
62.8k
    } else {
135
62.8k
        uint64_t result;
136
62.8k
        __asm__("bswap %0" : "=r"(result) : "0"(host_int));
137
62.8k
        return result;
138
62.8k
    }
139
#elif defined(bswap_64)
140
    return bswap_64(host_int);
141
#else
142
    return static_cast<uint64_t>(bswap_32(static_cast<uint32_t>(host_int >> 32))) |
143
           (static_cast<uint64_t>(bswap_32(static_cast<uint32_t>(host_int))) << 32);
144
#endif // bswap_64
145
62.8k
}
146
147
214
inline unsigned __int128 gbswap_128(unsigned __int128 host_int) {
148
214
    return static_cast<unsigned __int128>(bswap_64(static_cast<uint64_t>(host_int >> 64))) |
149
214
           (static_cast<unsigned __int128>(bswap_64(static_cast<uint64_t>(host_int))) << 64);
150
214
}
151
152
1
inline wide::UInt256 gbswap_256(wide::UInt256 host_int) {
153
1
    wide::UInt256 result {gbswap_64(host_int.items[3]), gbswap_64(host_int.items[2]),
154
1
                          gbswap_64(host_int.items[1]), gbswap_64(host_int.items[0])};
155
1
    return result;
156
1
}
157
158
// Swap bytes of a 24-bit value.
159
217
inline uint32_t bswap_24(uint32_t x) {
160
217
    return ((x & 0x0000ffULL) << 16) | ((x & 0x00ff00ULL)) | ((x & 0xff0000ULL) >> 16);
161
217
}
162
163
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
164
165
// Definitions for ntohl etc. that don't require us to include
166
// netinet/in.h. We wrap bswap_32 and bswap_16 in functions rather
167
// than just #defining them because in debug mode, gcc doesn't
168
// correctly handle the (rather involved) definitions of bswap_32.
169
// gcc guarantees that inline functions are as fast as macros, so
170
// this isn't a performance hit.
171
0
inline uint16_t ghtons(uint16_t x) {
172
0
    return bswap_16(x);
173
0
}
174
0
inline uint32_t ghtonl(uint32_t x) {
175
0
    return bswap_32(x);
176
0
}
177
0
inline uint64_t ghtonll(uint64_t x) {
178
0
    return gbswap_64(x);
179
0
}
180
181
#else
182
183
// These definitions are simpler on big-endian machines
184
// These are functions instead of macros to avoid self-assignment warnings
185
// on calls such as "i = ghtnol(i);".  This also provides type checking.
186
inline uint16_t ghtons(uint16_t x) {
187
    return x;
188
}
189
inline uint32_t ghtonl(uint32_t x) {
190
    return x;
191
}
192
inline uint64_t ghtonll(uint64_t x) {
193
    return x;
194
}
195
196
#endif // bytesex
197
198
// ntoh* and hton* are the same thing for any size and bytesex,
199
// since the function is an involution, i.e., its own inverse.
200
#if !defined(__APPLE__)
201
// This one is safe to take as it's an extension
202
#define htonll(x) ghtonll(x)
203
#define ntohll(x) htonll(x)
204
#endif
205
206
// Utilities to convert numbers between the current hosts's native byte
207
// order and little-endian byte order
208
//
209
// Load/Store methods are alignment safe
210
class LittleEndian {
211
public:
212
    // Conversion functions.
213
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
214
215
0
    static uint16_t FromHost16(uint16_t x) { return x; }
216
0
    static uint16_t ToHost16(uint16_t x) { return x; }
217
218
0
    static uint32_t FromHost32(uint32_t x) { return x; }
219
256k
    static uint32_t ToHost32(uint32_t x) { return x; }
220
221
0
    static uint64_t FromHost64(uint64_t x) { return x; }
222
4.18k
    static uint64_t ToHost64(uint64_t x) { return x; }
223
224
0
    static unsigned __int128 FromHost128(unsigned __int128 x) { return x; }
225
0
    static unsigned __int128 ToHost128(unsigned __int128 x) { return x; }
226
227
0
    static wide::UInt256 FromHost256(wide::UInt256 x) { return x; }
228
0
    static wide::UInt256 ToHost256(wide::UInt256 x) { return x; }
229
230
0
    static bool IsLittleEndian() { return true; }
231
232
#else
233
234
    static uint16_t FromHost16(uint16_t x) { return bswap_16(x); }
235
    static uint16_t ToHost16(uint16_t x) { return bswap_16(x); }
236
237
    static uint32_t FromHost32(uint32_t x) { return bswap_32(x); }
238
    static uint32_t ToHost32(uint32_t x) { return bswap_32(x); }
239
240
    static uint64_t FromHost64(uint64_t x) { return gbswap_64(x); }
241
    static uint64_t ToHost64(uint64_t x) { return gbswap_64(x); }
242
243
    static unsigned __int128 FromHost128(unsigned __int128 x) { return gbswap_128(x); }
244
    static unsigned __int128 ToHost128(unsigned __int128 x) { return gbswap_128(x); }
245
246
    static wide::UInt256 FromHost256(wide::UInt256 x) { return gbswap_256(x); }
247
    static wide::UInt256 ToHost256(wide::UInt256 x) { return gbswap_256(x); }
248
249
    static bool IsLittleEndian() { return false; }
250
251
#endif /* ENDIAN */
252
253
    // Functions to do unaligned loads and stores in little-endian order.
254
0
    static uint16_t Load16(const void* p) { return ToHost16(UNALIGNED_LOAD16(p)); }
255
256
0
    static void Store16(void* p, uint16_t v) { UNALIGNED_STORE16(p, FromHost16(v)); }
257
258
256k
    static uint32_t Load32(const void* p) { return ToHost32(UNALIGNED_LOAD32(p)); }
259
260
0
    static void Store32(void* p, uint32_t v) { UNALIGNED_STORE32(p, FromHost32(v)); }
261
262
4.18k
    static uint64_t Load64(const void* p) { return ToHost64(UNALIGNED_LOAD64(p)); }
263
264
    // Build a uint64_t from 1-8 bytes.
265
    // 8 * len least significant bits are loaded from the memory with
266
    // LittleEndian order. The 64 - 8 * len most significant bits are
267
    // set all to 0.
268
    // In latex-friendly words, this function returns:
269
    //     $\sum_{i=0}^{len-1} p[i] 256^{i}$, where p[i] is unsigned.
270
    //
271
    // This function is equivalent with:
272
    // uint64_t val = 0;
273
    // memcpy(&val, p, len);
274
    // return ToHost64(val);
275
    // TODO(user): write a small benchmark and benchmark the speed
276
    // of a memcpy based approach.
277
    //
278
    // For speed reasons this function does not work for len == 0.
279
    // The caller needs to guarantee that 1 <= len <= 8.
280
0
    static uint64_t Load64VariableLength(const void* const p, int len) {
281
0
        assert(len >= 1 && len <= 8);
282
0
        const char* const buf = static_cast<const char*>(p);
283
0
        uint64_t val = 0;
284
0
        --len;
285
0
        do {
286
0
            val = (val << 8) | buf[len];
287
0
            // (--len >= 0) is about 10 % faster than (len--) in some benchmarks.
288
0
        } while (--len >= 0);
289
0
        // No ToHost64(...) needed. The bytes are accessed in little-endian manner
290
0
        // on every architecture.
291
0
        return val;
292
0
    }
293
294
0
    static void Store64(void* p, uint64_t v) { UNALIGNED_STORE64(p, FromHost64(v)); }
295
};
296
297
// Utilities to convert numbers between the current hosts's native byte
298
// order and big-endian byte order (same as network byte order)
299
//
300
// Load/Store methods are alignment safe
301
class BigEndian {
302
public:
303
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
304
305
249
    static uint16_t FromHost16(uint16_t x) { return bswap_16(x); }
306
1
    static uint16_t ToHost16(uint16_t x) { return bswap_16(x); }
307
308
217
    static uint32_t FromHost24(uint32_t x) { return bswap_24(x); }
309
0
    static uint32_t ToHost24(uint32_t x) { return bswap_24(x); }
310
311
978k
    static uint32_t FromHost32(uint32_t x) { return bswap_32(x); }
312
1
    static uint32_t ToHost32(uint32_t x) { return bswap_32(x); }
313
314
61.6k
    static uint64_t FromHost64(uint64_t x) { return gbswap_64(x); }
315
1.19k
    static uint64_t ToHost64(uint64_t x) { return gbswap_64(x); }
316
317
213
    static unsigned __int128 FromHost128(unsigned __int128 x) { return gbswap_128(x); }
318
1
    static unsigned __int128 ToHost128(unsigned __int128 x) { return gbswap_128(x); }
319
320
0
    static wide::UInt256 FromHost256(wide::UInt256 x) { return gbswap_256(x); }
321
1
    static wide::UInt256 ToHost256(wide::UInt256 x) { return gbswap_256(x); }
322
323
0
    static bool IsLittleEndian() { return true; }
324
325
#else
326
327
    static uint16_t FromHost16(uint16_t x) { return x; }
328
    static uint16_t ToHost16(uint16_t x) { return x; }
329
330
    static uint32_t FromHost24(uint32_t x) { return x; }
331
    static uint32_t ToHost24(uint32_t x) { return x; }
332
333
    static uint32_t FromHost32(uint32_t x) { return x; }
334
    static uint32_t ToHost32(uint32_t x) { return x; }
335
336
    static uint64_t FromHost64(uint64_t x) { return x; }
337
    static uint64_t ToHost64(uint64_t x) { return x; }
338
339
    static wide::UInt256 FromHost256(wide::UInt256 x) { return x; }
340
    static wide::UInt256 ToHost256(wide::UInt256 x) { return x; }
341
342
    static bool IsLittleEndian() { return false; }
343
344
#endif /* ENDIAN */
345
    // Functions to do unaligned loads and stores in little-endian order.
346
0
    static uint16_t Load16(const void* p) { return ToHost16(UNALIGNED_LOAD16(p)); }
347
348
0
    static void Store16(void* p, uint16_t v) { UNALIGNED_STORE16(p, FromHost16(v)); }
349
350
0
    static uint32_t Load32(const void* p) { return ToHost32(UNALIGNED_LOAD32(p)); }
351
352
0
    static void Store32(void* p, uint32_t v) { UNALIGNED_STORE32(p, FromHost32(v)); }
353
354
0
    static uint64_t Load64(const void* p) { return ToHost64(UNALIGNED_LOAD64(p)); }
355
356
    // Build a uint64_t from 1-8 bytes.
357
    // 8 * len least significant bits are loaded from the memory with
358
    // BigEndian order. The 64 - 8 * len most significant bits are
359
    // set all to 0.
360
    // In latex-friendly words, this function returns:
361
    //     $\sum_{i=0}^{len-1} p[i] 256^{i}$, where p[i] is unsigned.
362
    //
363
    // This function is equivalent with:
364
    // uint64_t val = 0;
365
    // memcpy(&val, p, len);
366
    // return ToHost64(val);
367
    // TODO(user): write a small benchmark and benchmark the speed
368
    // of a memcpy based approach.
369
    //
370
    // For speed reasons this function does not work for len == 0.
371
    // The caller needs to guarantee that 1 <= len <= 8.
372
0
    static uint64_t Load64VariableLength(const void* const p, int len) {
373
0
        assert(len >= 1 && len <= 8);
374
0
        uint64_t val = Load64(p);
375
0
        uint64_t mask = 0;
376
0
        --len;
377
0
        do {
378
0
            mask = (mask << 8) | 0xff;
379
0
            // (--len >= 0) is about 10 % faster than (len--) in some benchmarks.
380
0
        } while (--len >= 0);
381
0
        return val & mask;
382
0
    }
383
384
0
    static void Store64(void* p, uint64_t v) { UNALIGNED_STORE64(p, FromHost64(v)); }
385
}; // BigEndian
386
387
// Network byte order is big-endian
388
typedef BigEndian NetworkByteOrder;