/root/doris/be/src/gutil/endian.h

Source (jump to first uncovered line)
// Copyright 2005 Google Inc.
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.
//
// ---
//
//
// Utility functions that depend on bytesex. We define htonll and ntohll,
// as well as "Google" versions of all the standards: ghtonl, ghtons, and
// so on. These functions do exactly the same as their standard variants,
// but don't require including the dangerous netinet/in.h.
//
// Buffer routines will copy to and from buffers without causing
// a bus error when the architecture requires different byte alignments

#pragma once

#include <assert.h>

#include "vec/core/wide_integer.h"

// Portable handling of unaligned loads, stores, and copies.
// On some platforms, like ARM, the copy functions can be more efficient
// then a load and a store.

#if defined(__i386) || defined(ARCH_ATHLON) || defined(__x86_64__) || defined(_ARCH_PPC)

// x86 and x86-64 can perform unaligned loads/stores directly;
// modern PowerPC hardware can also do unaligned integer loads and stores;
// but note: the FPU still sends unaligned loads and stores to a trap handler!

#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16_t*>(_p))
#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t*>(_p))
#define UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64_t*>(_p))

#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16_t*>(_p) = (_val))
#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32_t*>(_p) = (_val))
#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast<uint64_t*>(_p) = (_val))

#elif defined(__arm__) && !defined(__ARM_ARCH_5__) && !defined(__ARM_ARCH_5T__) &&               \
        !defined(__ARM_ARCH_5TE__) && !defined(__ARM_ARCH_5TEJ__) && !defined(__ARM_ARCH_6__) && \
        !defined(__ARM_ARCH_6J__) && !defined(__ARM_ARCH_6K__) && !defined(__ARM_ARCH_6Z__) &&   \
        !defined(__ARM_ARCH_6ZK__) && !defined(__ARM_ARCH_6T2__)

// ARMv7 and newer support native unaligned accesses, but only of 16-bit
// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
// do an unaligned read and rotate the words around a bit, or do the reads very
// slowly (trip through kernel mode). There's no simple #define that says just
// “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6
// sub-architectures. Newer gcc (>= 4.6) set an __ARM_FEATURE_ALIGNED #define,
// so in time, maybe we can move on to that.
//
// This is a mess, but there's not much we can do about it.

#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16_t*>(_p))
#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t*>(_p))

#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16_t*>(_p) = (_val))
#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32_t*>(_p) = (_val))

// TODO(user): NEON supports unaligned 64-bit loads and stores.
// See if that would be more efficient on platforms supporting it,
// at least for copies.

inline uint64_t UNALIGNED_LOAD64(const void* p) {
    uint64_t t;
    memcpy(&t, p, sizeof t);
    return t;
}

inline void UNALIGNED_STORE64(void* p, uint64_t v) {
    memcpy(p, &v, sizeof v);
}

#else

#define NEED_ALIGNED_LOADS

// These functions are provided for architectures that don't support
// unaligned loads and stores.

inline uint16_t UNALIGNED_LOAD16(const void* p) {
    uint16_t t;
    memcpy(&t, p, sizeof t);
    return t;
}

inline uint32_t UNALIGNED_LOAD32(const void* p) {
    uint32_t t;
    memcpy(&t, p, sizeof t);
    return t;
}

inline uint64_t UNALIGNED_LOAD64(const void* p) {
    uint64_t t;
    memcpy(&t, p, sizeof t);
    return t;
}

inline void UNALIGNED_STORE16(void* p, uint16_t v) {
    memcpy(p, &v, sizeof v);
}

inline void UNALIGNED_STORE32(void* p, uint32_t v) {
    memcpy(p, &v, sizeof v);
}

inline void UNALIGNED_STORE64(void* p, uint64_t v) {
    memcpy(p, &v, sizeof v);
}

#endif

inline uint64_t gbswap_64(uint64_t host_int) {
#if defined(__GNUC__) && defined(__x86_64__) && !defined(__APPLE__)
    // Adapted from /usr/include/byteswap.h.  Not available on Mac.
    if (__builtin_constant_p(host_int)) {
        return __bswap_constant_64(host_int);
    } else {
        uint64_t result;
        __asm__("bswap %0" : "=r"(result) : "0"(host_int));
        return result;
    }
#elif defined(bswap_64)
    return bswap_64(host_int);
#else
    return static_cast<uint64_t>(bswap_32(static_cast<uint32_t>(host_int >> 32))) |
           (static_cast<uint64_t>(bswap_32(static_cast<uint32_t>(host_int))) << 32);
#endif // bswap_64
}

inline unsigned __int128 gbswap_128(unsigned __int128 host_int) {
    return static_cast<unsigned __int128>(bswap_64(static_cast<uint64_t>(host_int >> 64))) |
           (static_cast<unsigned __int128>(bswap_64(static_cast<uint64_t>(host_int))) << 64);
}

inline wide::UInt256 gbswap_256(wide::UInt256 host_int) {
    wide::UInt256 result {gbswap_64(host_int.items[3]), gbswap_64(host_int.items[2]),
                          gbswap_64(host_int.items[1]), gbswap_64(host_int.items[0])};
    return result;
}

// Swap bytes of a 24-bit value.
inline uint32_t bswap_24(uint32_t x) {
    return ((x & 0x0000ffULL) << 16) | ((x & 0x00ff00ULL)) | ((x & 0xff0000ULL) >> 16);
}

#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__

// Definitions for ntohl etc. that don't require us to include
// netinet/in.h. We wrap bswap_32 and bswap_16 in functions rather
// than just #defining them because in debug mode, gcc doesn't
// correctly handle the (rather involved) definitions of bswap_32.
// gcc guarantees that inline functions are as fast as macros, so
// this isn't a performance hit.
inline uint16_t ghtons(uint16_t x) {
    return bswap_16(x);
}
inline uint32_t ghtonl(uint32_t x) {
    return bswap_32(x);
}
inline uint64_t ghtonll(uint64_t x) {
    return gbswap_64(x);
}

#else

// These definitions are simpler on big-endian machines
// These are functions instead of macros to avoid self-assignment warnings
// on calls such as "i = ghtnol(i);".  This also provides type checking.
inline uint16_t ghtons(uint16_t x) {
    return x;
}
inline uint32_t ghtonl(uint32_t x) {
    return x;
}
inline uint64_t ghtonll(uint64_t x) {
    return x;
}

#endif // bytesex

// ntoh* and hton* are the same thing for any size and bytesex,
// since the function is an involution, i.e., its own inverse.
#if !defined(__APPLE__)
// This one is safe to take as it's an extension
#define htonll(x) ghtonll(x)
#define ntohll(x) htonll(x)
#endif

// Utilities to convert numbers between the current hosts's native byte
// order and little-endian byte order
//
// Load/Store methods are alignment safe
class LittleEndian {
public:
    // Conversion functions.
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__

    static uint16_t FromHost16(uint16_t x) { return x; }
    static uint16_t ToHost16(uint16_t x) { return x; }

    static uint32_t FromHost32(uint32_t x) { return x; }
    static uint32_t ToHost32(uint32_t x) { return x; }

    static uint64_t FromHost64(uint64_t x) { return x; }
    static uint64_t ToHost64(uint64_t x) { return x; }

    static unsigned __int128 FromHost128(unsigned __int128 x) { return x; }
    static unsigned __int128 ToHost128(unsigned __int128 x) { return x; }

    static wide::UInt256 FromHost256(wide::UInt256 x) { return x; }
    static wide::UInt256 ToHost256(wide::UInt256 x) { return x; }

    static bool IsLittleEndian() { return true; }

#else

    static uint16_t FromHost16(uint16_t x) { return bswap_16(x); }
    static uint16_t ToHost16(uint16_t x) { return bswap_16(x); }

    static uint32_t FromHost32(uint32_t x) { return bswap_32(x); }
    static uint32_t ToHost32(uint32_t x) { return bswap_32(x); }

    static uint64_t FromHost64(uint64_t x) { return gbswap_64(x); }
    static uint64_t ToHost64(uint64_t x) { return gbswap_64(x); }

    static unsigned __int128 FromHost128(unsigned __int128 x) { return gbswap_128(x); }
    static unsigned __int128 ToHost128(unsigned __int128 x) { return gbswap_128(x); }

    static wide::UInt256 FromHost256(wide::UInt256 x) { return gbswap_256(x); }
    static wide::UInt256 ToHost256(wide::UInt256 x) { return gbswap_256(x); }

    static bool IsLittleEndian() { return false; }

#endif /* ENDIAN */

    // Functions to do unaligned loads and stores in little-endian order.
    static uint16_t Load16(const void* p) { return ToHost16(UNALIGNED_LOAD16(p)); }

    static void Store16(void* p, uint16_t v) { UNALIGNED_STORE16(p, FromHost16(v)); }

    static uint32_t Load32(const void* p) { return ToHost32(UNALIGNED_LOAD32(p)); }

    static void Store32(void* p, uint32_t v) { UNALIGNED_STORE32(p, FromHost32(v)); }

    static uint64_t Load64(const void* p) { return ToHost64(UNALIGNED_LOAD64(p)); }

    // Build a uint64_t from 1-8 bytes.
    // 8 * len least significant bits are loaded from the memory with
    // LittleEndian order. The 64 - 8 * len most significant bits are
    // set all to 0.
    // In latex-friendly words, this function returns:
    //     $\sum_{i=0}^{len-1} p[i] 256^{i}$, where p[i] is unsigned.
    //
    // This function is equivalent with:
    // uint64_t val = 0;
    // memcpy(&val, p, len);
    // return ToHost64(val);
    // TODO(user): write a small benchmark and benchmark the speed
    // of a memcpy based approach.
    //
    // For speed reasons this function does not work for len == 0.
    // The caller needs to guarantee that 1 <= len <= 8.
    static uint64_t Load64VariableLength(const void* const p, int len) {
        assert(len >= 1 && len <= 8);
        const char* const buf = static_cast<const char*>(p);
        uint64_t val = 0;
        --len;
        do {
            val = (val << 8) | buf[len];
            // (--len >= 0) is about 10 % faster than (len--) in some benchmarks.
        } while (--len >= 0);
        // No ToHost64(...) needed. The bytes are accessed in little-endian manner
        // on every architecture.
        return val;
    }

    static void Store64(void* p, uint64_t v) { UNALIGNED_STORE64(p, FromHost64(v)); }
};

// Utilities to convert numbers between the current hosts's native byte
// order and big-endian byte order (same as network byte order)
//
// Load/Store methods are alignment safe
class BigEndian {
public:
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__

    static uint16_t FromHost16(uint16_t x) { return bswap_16(x); }
    static uint16_t ToHost16(uint16_t x) { return bswap_16(x); }

    static uint32_t FromHost24(uint32_t x) { return bswap_24(x); }
    static uint32_t ToHost24(uint32_t x) { return bswap_24(x); }

    static uint32_t FromHost32(uint32_t x) { return bswap_32(x); }
    static uint32_t ToHost32(uint32_t x) { return bswap_32(x); }

    static uint64_t FromHost64(uint64_t x) { return gbswap_64(x); }
    static uint64_t ToHost64(uint64_t x) { return gbswap_64(x); }

    static unsigned __int128 FromHost128(unsigned __int128 x) { return gbswap_128(x); }
    static unsigned __int128 ToHost128(unsigned __int128 x) { return gbswap_128(x); }

    static wide::UInt256 FromHost256(wide::UInt256 x) { return gbswap_256(x); }
    static wide::UInt256 ToHost256(wide::UInt256 x) { return gbswap_256(x); }

    static bool IsLittleEndian() { return true; }

#else

    static uint16_t FromHost16(uint16_t x) { return x; }
    static uint16_t ToHost16(uint16_t x) { return x; }

    static uint32_t FromHost24(uint32_t x) { return x; }
    static uint32_t ToHost24(uint32_t x) { return x; }

    static uint32_t FromHost32(uint32_t x) { return x; }
    static uint32_t ToHost32(uint32_t x) { return x; }

    static uint64_t FromHost64(uint64_t x) { return x; }
    static uint64_t ToHost64(uint64_t x) { return x; }

    static wide::UInt256 FromHost256(wide::UInt256 x) { return x; }
    static wide::UInt256 ToHost256(wide::UInt256 x) { return x; }

    static bool IsLittleEndian() { return false; }

#endif /* ENDIAN */
    // Functions to do unaligned loads and stores in little-endian order.
    static uint16_t Load16(const void* p) { return ToHost16(UNALIGNED_LOAD16(p)); }

    static void Store16(void* p, uint16_t v) { UNALIGNED_STORE16(p, FromHost16(v)); }

    static uint32_t Load32(const void* p) { return ToHost32(UNALIGNED_LOAD32(p)); }

    static void Store32(void* p, uint32_t v) { UNALIGNED_STORE32(p, FromHost32(v)); }

    static uint64_t Load64(const void* p) { return ToHost64(UNALIGNED_LOAD64(p)); }

    // Build a uint64_t from 1-8 bytes.
    // 8 * len least significant bits are loaded from the memory with
    // BigEndian order. The 64 - 8 * len most significant bits are
    // set all to 0.
    // In latex-friendly words, this function returns:
    //     $\sum_{i=0}^{len-1} p[i] 256^{i}$, where p[i] is unsigned.
    //
    // This function is equivalent with:
    // uint64_t val = 0;
    // memcpy(&val, p, len);
    // return ToHost64(val);
    // TODO(user): write a small benchmark and benchmark the speed
    // of a memcpy based approach.
    //
    // For speed reasons this function does not work for len == 0.
    // The caller needs to guarantee that 1 <= len <= 8.
    static uint64_t Load64VariableLength(const void* const p, int len) {
        assert(len >= 1 && len <= 8);
        uint64_t val = Load64(p);
        uint64_t mask = 0;
        --len;
        do {
            mask = (mask << 8) | 0xff;
            // (--len >= 0) is about 10 % faster than (len--) in some benchmarks.
        } while (--len >= 0);
        return val & mask;
    }

    static void Store64(void* p, uint64_t v) { UNALIGNED_STORE64(p, FromHost64(v)); }
}; // BigEndian

// Network byte order is big-endian
typedef BigEndian NetworkByteOrder;

Coverage Report

Created: 2025-06-26 14:35

Line	Count	Source (jump to first uncovered line)
1		// Copyright 2005 Google Inc.
2		//
3		// Licensed to the Apache Software Foundation (ASF) under one
4		// or more contributor license agreements. See the NOTICE file
5		// distributed with this work for additional information
6		// regarding copyright ownership. The ASF licenses this file
7		// to you under the Apache License, Version 2.0 (the
8		// "License"); you may not use this file except in compliance
9		// with the License. You may obtain a copy of the License at
10		//
11		// http://www.apache.org/licenses/LICENSE-2.0
12		//
13		// Unless required by applicable law or agreed to in writing,
14		// software distributed under the License is distributed on an
15		// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16		// KIND, either express or implied. See the License for the
17		// specific language governing permissions and limitations
18		// under the License.
19		//
20		// ---
21		//
22		//
23		// Utility functions that depend on bytesex. We define htonll and ntohll,
24		// as well as "Google" versions of all the standards: ghtonl, ghtons, and
25		// so on. These functions do exactly the same as their standard variants,
26		// but don't require including the dangerous netinet/in.h.
27		//
28		// Buffer routines will copy to and from buffers without causing
29		// a bus error when the architecture requires different byte alignments
30
31		#pragma once
32
33		#include <assert.h>
34
35		#include "vec/core/wide_integer.h"
36
37		// Portable handling of unaligned loads, stores, and copies.
38		// On some platforms, like ARM, the copy functions can be more efficient
39		// then a load and a store.
40
41		#if defined(__i386) \|\| defined(ARCH_ATHLON) \|\| defined(__x86_64__) \|\| defined(_ARCH_PPC)
42
43		// x86 and x86-64 can perform unaligned loads/stores directly;
44		// modern PowerPC hardware can also do unaligned integer loads and stores;
45		// but note: the FPU still sends unaligned loads and stores to a trap handler!
46
47		#define UNALIGNED_LOAD16(_p) (reinterpret_cast<const uint16_t>(_p))
48	256k	#define UNALIGNED_LOAD32(_p) (reinterpret_cast<const uint32_t>(_p))
49	5.38k	#define UNALIGNED_LOAD64(_p) (reinterpret_cast<const uint64_t>(_p))
50
51		#define UNALIGNED_STORE16(_p, _val) (reinterpret_cast<uint16_t>(_p) = (_val))
52	0	#define UNALIGNED_STORE32(_p, _val) (reinterpret_cast<uint32_t>(_p) = (_val))
53		#define UNALIGNED_STORE64(_p, _val) (reinterpret_cast<uint64_t>(_p) = (_val))
54
55		#elif defined(__arm__) && !defined(__ARM_ARCH_5__) && !defined(__ARM_ARCH_5T__) && \
56		!defined(__ARM_ARCH_5TE__) && !defined(__ARM_ARCH_5TEJ__) && !defined(__ARM_ARCH_6__) && \
57		!defined(__ARM_ARCH_6J__) && !defined(__ARM_ARCH_6K__) && !defined(__ARM_ARCH_6Z__) && \
58		!defined(__ARM_ARCH_6ZK__) && !defined(__ARM_ARCH_6T2__)
59
60		// ARMv7 and newer support native unaligned accesses, but only of 16-bit
61		// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
62		// do an unaligned read and rotate the words around a bit, or do the reads very
63		// slowly (trip through kernel mode). There's no simple #define that says just
64		// “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6
65		// sub-architectures. Newer gcc (>= 4.6) set an __ARM_FEATURE_ALIGNED #define,
66		// so in time, maybe we can move on to that.
67		//
68		// This is a mess, but there's not much we can do about it.
69
70		#define UNALIGNED_LOAD16(_p) (reinterpret_cast<const uint16_t>(_p))
71		#define UNALIGNED_LOAD32(_p) (reinterpret_cast<const uint32_t>(_p))
72
73		#define UNALIGNED_STORE16(_p, _val) (reinterpret_cast<uint16_t>(_p) = (_val))
74		#define UNALIGNED_STORE32(_p, _val) (reinterpret_cast<uint32_t>(_p) = (_val))
75
76		// TODO(user): NEON supports unaligned 64-bit loads and stores.
77		// See if that would be more efficient on platforms supporting it,
78		// at least for copies.
79
80		inline uint64_t UNALIGNED_LOAD64(const void* p) {
81		uint64_t t;
82		memcpy(&t, p, sizeof t);
83		return t;
84		}
85
86		inline void UNALIGNED_STORE64(void* p, uint64_t v) {
87		memcpy(p, &v, sizeof v);
88		}
89
90		#else
91
92		#define NEED_ALIGNED_LOADS
93
94		// These functions are provided for architectures that don't support
95		// unaligned loads and stores.
96
97		inline uint16_t UNALIGNED_LOAD16(const void* p) {
98		uint16_t t;
99		memcpy(&t, p, sizeof t);
100		return t;
101		}
102
103		inline uint32_t UNALIGNED_LOAD32(const void* p) {
104		uint32_t t;
105		memcpy(&t, p, sizeof t);
106		return t;
107		}
108
109		inline uint64_t UNALIGNED_LOAD64(const void* p) {
110		uint64_t t;
111		memcpy(&t, p, sizeof t);
112		return t;
113		}
114
115		inline void UNALIGNED_STORE16(void* p, uint16_t v) {
116		memcpy(p, &v, sizeof v);
117		}
118
119		inline void UNALIGNED_STORE32(void* p, uint32_t v) {
120		memcpy(p, &v, sizeof v);
121		}
122
123		inline void UNALIGNED_STORE64(void* p, uint64_t v) {
124		memcpy(p, &v, sizeof v);
125		}
126
127		#endif
128
129	62.8k	inline uint64_t gbswap_64(uint64_t host_int) {
130	62.8k	#if defined(__GNUC__) && defined(__x86_64__) && !defined(__APPLE__)
131		// Adapted from /usr/include/byteswap.h. Not available on Mac.
132	62.8k	if (__builtin_constant_p(host_int)) {
133	0	return __bswap_constant_64(host_int);
134	62.8k	} else {
135	62.8k	uint64_t result;
136	62.8k	__asm__("bswap %0" : "=r"(result) : "0"(host_int));
137	62.8k	return result;
138	62.8k	}
139		#elif defined(bswap_64)
140		return bswap_64(host_int);
141		#else
142		return static_cast<uint64_t>(bswap_32(static_cast<uint32_t>(host_int >> 32))) \|
143		(static_cast<uint64_t>(bswap_32(static_cast<uint32_t>(host_int))) << 32);
144		#endif // bswap_64
145	62.8k	}
146
147	214	inline unsigned __int128 gbswap_128(unsigned __int128 host_int) {
148	214	return static_cast<unsigned __int128>(bswap_64(static_cast<uint64_t>(host_int >> 64))) \|
149	214	(static_cast<unsigned __int128>(bswap_64(static_cast<uint64_t>(host_int))) << 64);
150	214	}
151
152	1	inline wide::UInt256 gbswap_256(wide::UInt256 host_int) {
153	1	wide::UInt256 result {gbswap_64(host_int.items[3]), gbswap_64(host_int.items[2]),
154	1	gbswap_64(host_int.items[1]), gbswap_64(host_int.items[0])};
155	1	return result;
156	1	}
157
158		// Swap bytes of a 24-bit value.
159	217	inline uint32_t bswap_24(uint32_t x) {
160	217	return ((x & 0x0000ffULL) << 16) \| ((x & 0x00ff00ULL)) \| ((x & 0xff0000ULL) >> 16);
161	217	}
162
163		#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
164
165		// Definitions for ntohl etc. that don't require us to include
166		// netinet/in.h. We wrap bswap_32 and bswap_16 in functions rather
167		// than just #defining them because in debug mode, gcc doesn't
168		// correctly handle the (rather involved) definitions of bswap_32.
169		// gcc guarantees that inline functions are as fast as macros, so
170		// this isn't a performance hit.
171	0	inline uint16_t ghtons(uint16_t x) {
172	0	return bswap_16(x);
173	0	}
174	0	inline uint32_t ghtonl(uint32_t x) {
175	0	return bswap_32(x);
176	0	}
177	0	inline uint64_t ghtonll(uint64_t x) {
178	0	return gbswap_64(x);
179	0	}
180
181		#else
182
183		// These definitions are simpler on big-endian machines
184		// These are functions instead of macros to avoid self-assignment warnings
185		// on calls such as "i = ghtnol(i);". This also provides type checking.
186		inline uint16_t ghtons(uint16_t x) {
187		return x;
188		}
189		inline uint32_t ghtonl(uint32_t x) {
190		return x;
191		}
192		inline uint64_t ghtonll(uint64_t x) {
193		return x;
194		}
195
196		#endif // bytesex
197
198		// ntoh* and hton* are the same thing for any size and bytesex,
199		// since the function is an involution, i.e., its own inverse.
200		#if !defined(__APPLE__)
201		// This one is safe to take as it's an extension
202		#define htonll(x) ghtonll(x)
203		#define ntohll(x) htonll(x)
204		#endif
205
206		// Utilities to convert numbers between the current hosts's native byte
207		// order and little-endian byte order
208		//
209		// Load/Store methods are alignment safe
210		class LittleEndian {
211		public:
212		// Conversion functions.
213		#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
214
215	0	static uint16_t FromHost16(uint16_t x) { return x; }
216	0	static uint16_t ToHost16(uint16_t x) { return x; }
217
218	0	static uint32_t FromHost32(uint32_t x) { return x; }
219	256k	static uint32_t ToHost32(uint32_t x) { return x; }
220
221	0	static uint64_t FromHost64(uint64_t x) { return x; }
222	4.18k	static uint64_t ToHost64(uint64_t x) { return x; }
223
224	0	static unsigned __int128 FromHost128(unsigned __int128 x) { return x; }
225	0	static unsigned __int128 ToHost128(unsigned __int128 x) { return x; }
226
227	0	static wide::UInt256 FromHost256(wide::UInt256 x) { return x; }
228	0	static wide::UInt256 ToHost256(wide::UInt256 x) { return x; }
229
230	0	static bool IsLittleEndian() { return true; }
231
232		#else
233
234		static uint16_t FromHost16(uint16_t x) { return bswap_16(x); }
235		static uint16_t ToHost16(uint16_t x) { return bswap_16(x); }
236
237		static uint32_t FromHost32(uint32_t x) { return bswap_32(x); }
238		static uint32_t ToHost32(uint32_t x) { return bswap_32(x); }
239
240		static uint64_t FromHost64(uint64_t x) { return gbswap_64(x); }
241		static uint64_t ToHost64(uint64_t x) { return gbswap_64(x); }
242
243		static unsigned __int128 FromHost128(unsigned __int128 x) { return gbswap_128(x); }
244		static unsigned __int128 ToHost128(unsigned __int128 x) { return gbswap_128(x); }
245
246		static wide::UInt256 FromHost256(wide::UInt256 x) { return gbswap_256(x); }
247		static wide::UInt256 ToHost256(wide::UInt256 x) { return gbswap_256(x); }
248
249		static bool IsLittleEndian() { return false; }
250
251		#endif /* ENDIAN */
252
253		// Functions to do unaligned loads and stores in little-endian order.
254	0	static uint16_t Load16(const void* p) { return ToHost16(UNALIGNED_LOAD16(p)); }
255
256	0	static void Store16(void* p, uint16_t v) { UNALIGNED_STORE16(p, FromHost16(v)); }
257
258	256k	static uint32_t Load32(const void* p) { return ToHost32(UNALIGNED_LOAD32(p)); }
259
260	0	static void Store32(void* p, uint32_t v) { UNALIGNED_STORE32(p, FromHost32(v)); }
261
262	4.18k	static uint64_t Load64(const void* p) { return ToHost64(UNALIGNED_LOAD64(p)); }
263
264		// Build a uint64_t from 1-8 bytes.
265		// 8 * len least significant bits are loaded from the memory with
266		// LittleEndian order. The 64 - 8 * len most significant bits are
267		// set all to 0.
268		// In latex-friendly words, this function returns:
269		// $\sum_{i=0}^{len-1} p[i] 256^{i}$, where p[i] is unsigned.
270		//
271		// This function is equivalent with:
272		// uint64_t val = 0;
273		// memcpy(&val, p, len);
274		// return ToHost64(val);
275		// TODO(user): write a small benchmark and benchmark the speed
276		// of a memcpy based approach.
277		//
278		// For speed reasons this function does not work for len == 0.
279		// The caller needs to guarantee that 1 <= len <= 8.
280	0	static uint64_t Load64VariableLength(const void* const p, int len) {
281	0	assert(len >= 1 && len <= 8);
282	0	const char* const buf = static_cast<const char*>(p);
283	0	uint64_t val = 0;
284	0	--len;
285	0	do {
286	0	val = (val << 8) \| buf[len];
287	0	// (--len >= 0) is about 10 % faster than (len--) in some benchmarks.
288	0	} while (--len >= 0);
289	0	// No ToHost64(...) needed. The bytes are accessed in little-endian manner
290	0	// on every architecture.
291	0	return val;
292	0	}
293
294	0	static void Store64(void* p, uint64_t v) { UNALIGNED_STORE64(p, FromHost64(v)); }
295		};
296
297		// Utilities to convert numbers between the current hosts's native byte
298		// order and big-endian byte order (same as network byte order)
299		//
300		// Load/Store methods are alignment safe
301		class BigEndian {
302		public:
303		#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
304
305	249	static uint16_t FromHost16(uint16_t x) { return bswap_16(x); }
306	1	static uint16_t ToHost16(uint16_t x) { return bswap_16(x); }
307
308	217	static uint32_t FromHost24(uint32_t x) { return bswap_24(x); }
309	0	static uint32_t ToHost24(uint32_t x) { return bswap_24(x); }
310
311	978k	static uint32_t FromHost32(uint32_t x) { return bswap_32(x); }
312	1	static uint32_t ToHost32(uint32_t x) { return bswap_32(x); }
313
314	61.6k	static uint64_t FromHost64(uint64_t x) { return gbswap_64(x); }
315	1.19k	static uint64_t ToHost64(uint64_t x) { return gbswap_64(x); }
316
317	213	static unsigned __int128 FromHost128(unsigned __int128 x) { return gbswap_128(x); }
318	1	static unsigned __int128 ToHost128(unsigned __int128 x) { return gbswap_128(x); }
319
320	0	static wide::UInt256 FromHost256(wide::UInt256 x) { return gbswap_256(x); }
321	1	static wide::UInt256 ToHost256(wide::UInt256 x) { return gbswap_256(x); }
322
323	0	static bool IsLittleEndian() { return true; }
324
325		#else
326
327		static uint16_t FromHost16(uint16_t x) { return x; }
328		static uint16_t ToHost16(uint16_t x) { return x; }
329
330		static uint32_t FromHost24(uint32_t x) { return x; }
331		static uint32_t ToHost24(uint32_t x) { return x; }
332
333		static uint32_t FromHost32(uint32_t x) { return x; }
334		static uint32_t ToHost32(uint32_t x) { return x; }
335
336		static uint64_t FromHost64(uint64_t x) { return x; }
337		static uint64_t ToHost64(uint64_t x) { return x; }
338
339		static wide::UInt256 FromHost256(wide::UInt256 x) { return x; }
340		static wide::UInt256 ToHost256(wide::UInt256 x) { return x; }
341
342		static bool IsLittleEndian() { return false; }
343
344		#endif /* ENDIAN */
345		// Functions to do unaligned loads and stores in little-endian order.
346	0	static uint16_t Load16(const void* p) { return ToHost16(UNALIGNED_LOAD16(p)); }
347
348	0	static void Store16(void* p, uint16_t v) { UNALIGNED_STORE16(p, FromHost16(v)); }
349
350	0	static uint32_t Load32(const void* p) { return ToHost32(UNALIGNED_LOAD32(p)); }
351
352	0	static void Store32(void* p, uint32_t v) { UNALIGNED_STORE32(p, FromHost32(v)); }
353
354	0	static uint64_t Load64(const void* p) { return ToHost64(UNALIGNED_LOAD64(p)); }
355
356		// Build a uint64_t from 1-8 bytes.
357		// 8 * len least significant bits are loaded from the memory with
358		// BigEndian order. The 64 - 8 * len most significant bits are
359		// set all to 0.
360		// In latex-friendly words, this function returns:
361		// $\sum_{i=0}^{len-1} p[i] 256^{i}$, where p[i] is unsigned.
362		//
363		// This function is equivalent with:
364		// uint64_t val = 0;
365		// memcpy(&val, p, len);
366		// return ToHost64(val);
367		// TODO(user): write a small benchmark and benchmark the speed
368		// of a memcpy based approach.
369		//
370		// For speed reasons this function does not work for len == 0.
371		// The caller needs to guarantee that 1 <= len <= 8.
372	0	static uint64_t Load64VariableLength(const void* const p, int len) {
373	0	assert(len >= 1 && len <= 8);
374	0	uint64_t val = Load64(p);
375	0	uint64_t mask = 0;
376	0	--len;
377	0	do {
378	0	mask = (mask << 8) \| 0xff;
379	0	// (--len >= 0) is about 10 % faster than (len--) in some benchmarks.
380	0	} while (--len >= 0);
381	0	return val & mask;
382	0	}
383
384	0	static void Store64(void* p, uint64_t v) { UNALIGNED_STORE64(p, FromHost64(v)); }
385		}; // BigEndian
386
387		// Network byte order is big-endian
388		typedef BigEndian NetworkByteOrder;