/root/doris/be/src/gutil/bits.h

Source (jump to first uncovered line)
// Copyright 2002 and onwards Google Inc.
//
// A collection of useful (static) bit-twiddling functions.

#pragma once


#include "gutil/integral_types.h"
// IWYU pragma: no_include <butil/macros.h>
#include "gutil/macros.h" // IWYU pragma: keep

class Bits {
public:
    // Return the number of one bits in the given integer.
    static int CountOnesInByte(unsigned char n);

    static int CountOnes(uint32 n) {
        n -= ((n >> 1) & 0x55555555);
        n = ((n >> 2) & 0x33333333) + (n & 0x33333333);
        return (((n + (n >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
    }

    // Count bits using sideways addition [WWG'57]. See Knuth TAOCP v4 7.1.3(59)
    static inline int CountOnes64(uint64 n) {
#if defined(__x86_64__)
        n -= (n >> 1) & 0x5555555555555555ULL;
        n = ((n >> 2) & 0x3333333333333333ULL) + (n & 0x3333333333333333ULL);
        return (((n + (n >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56;
#else
        return CountOnes(n >> 32) + CountOnes(n & 0xffffffff);
#endif
    }

    // Count bits using popcnt instruction (available on argo machines).
    // Doesn't check if the instruction exists.
    // Please use TestCPUFeature(POPCNT) from base/cpuid/cpuid.h before using this.
    static inline int CountOnes64withPopcount(uint64 n) {
#if defined(__x86_64__) && defined __GNUC__
        int64 count = 0;
        asm("popcnt %1,%0" : "=r"(count) : "rm"(n) : "cc");
        return count;
#else
        return CountOnes64(n);
#endif
    }

    // Reverse the bits in the given integer.
    static uint8 ReverseBits8(uint8 n);
    static uint32 ReverseBits32(uint32 n);
    static uint64 ReverseBits64(uint64 n);

    // Return the number of one bits in the byte sequence.
    static int Count(const void* m, int num_bytes);

    // Return the number of different bits in the given byte sequences.
    // (i.e., the Hamming distance)
    static int Difference(const void* m1, const void* m2, int num_bytes);

    // Return the number of different bits in the given byte sequences,
    // up to a maximum.  Values larger than the maximum may be returned
    // (because multiple bits are checked at a time), but the function
    // may exit early if the cap is exceeded.
    static int CappedDifference(const void* m1, const void* m2, int num_bytes, int cap);

    // Return floor(log2(n)) for positive integer n.  Returns -1 iff n == 0.
    static int Log2Floor(uint32 n);
    static int Log2Floor64(uint64 n);

    // Potentially faster version of Log2Floor() that returns an
    // undefined value if n == 0
    static int Log2FloorNonZero(uint32 n);
    static int Log2FloorNonZero64(uint64 n);

    // Return ceiling(log2(n)) for positive integer n.  Returns -1 iff n == 0.
    static int Log2Ceiling(uint32 n);
    static int Log2Ceiling64(uint64 n);

    // Return the first set least / most significant bit, 0-indexed.  Returns an
    // undefined value if n == 0.  FindLSBSetNonZero() is similar to ffs() except
    // that it's 0-indexed, while FindMSBSetNonZero() is the same as
    // Log2FloorNonZero().
    static int FindLSBSetNonZero(uint32 n);
    static int FindLSBSetNonZero64(uint64 n);
    static int FindMSBSetNonZero(uint32 n) { return Log2FloorNonZero(n); }
    static int FindMSBSetNonZero64(uint64 n) { return Log2FloorNonZero64(n); }

    // Portable implementations
    static int Log2Floor_Portable(uint32 n);
    static int Log2FloorNonZero_Portable(uint32 n);
    static int FindLSBSetNonZero_Portable(uint32 n);
    static int Log2Floor64_Portable(uint64 n);
    static int Log2FloorNonZero64_Portable(uint64 n);
    static int FindLSBSetNonZero64_Portable(uint64 n);

    // Viewing bytes as a stream of unsigned bytes, does that stream
    // contain any byte equal to c?
    template <class T>
    static bool BytesContainByte(T bytes, uint8 c);

    // Viewing bytes as a stream of unsigned bytes, does that stream
    // contain any byte b < c?
    template <class T>
    static bool BytesContainByteLessThan(T bytes, uint8 c);

    // Viewing bytes as a stream of unsigned bytes, are all elements of that
    // stream in [lo, hi]?
    template <class T>
    static bool BytesAllInRange(T bytes, uint8 lo, uint8 hi);

private:
    static const char num_bits[];
    static const unsigned char bit_reverse_table[];
    DISALLOW_COPY_AND_ASSIGN(Bits);
};

// A utility class for some handy bit patterns.  The names l and h
// were chosen to match Knuth Volume 4: l is 0x010101... and h is 0x808080...;
// half_ones is ones in the lower half only.  We assume sizeof(T) is 1 or even.
template <class T>
struct BitPattern {
    static const T half_ones = (static_cast<T>(1) << (sizeof(T) * 4)) - 1;
    static const T l = (sizeof(T) == 1) ? 1 : (half_ones / 0xff * (half_ones + 2));
    static const T h = ~(l * 0x7f);
};

// ------------------------------------------------------------------------
// Implementation details follow
// ------------------------------------------------------------------------

// use GNU builtins where available
#if defined(__GNUC__) && ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4)
inline int Bits::Log2Floor(uint32 n) {
    return n == 0 ? -1 : 31 ^ __builtin_clz(n);
}

inline int Bits::Log2FloorNonZero(uint32 n) {
    return 31 ^ __builtin_clz(n);
}

inline int Bits::FindLSBSetNonZero(uint32 n) {
    return __builtin_ctz(n);
}

inline int Bits::Log2Floor64(uint64 n) {
    return n == 0 ? -1 : 63 ^ __builtin_clzll(n);
}

inline int Bits::Log2FloorNonZero64(uint64 n) {
    return 63 ^ __builtin_clzll(n);
}

inline int Bits::FindLSBSetNonZero64(uint64 n) {
    return __builtin_ctzll(n);
}
#elif defined(_MSC_VER)
#include "gutil/bits-internal-windows.h"
#else
#include "gutil/bits-internal-unknown.h"
#endif

inline int Bits::CountOnesInByte(unsigned char n) {
    return num_bits[n];
}

inline uint8 Bits::ReverseBits8(unsigned char n) {
    n = ((n >> 1) & 0x55) | ((n & 0x55) << 1);
    n = ((n >> 2) & 0x33) | ((n & 0x33) << 2);
    return ((n >> 4) & 0x0f) | ((n & 0x0f) << 4);
}

inline uint32 Bits::ReverseBits32(uint32 n) {
    n = ((n >> 1) & 0x55555555) | ((n & 0x55555555) << 1);
    n = ((n >> 2) & 0x33333333) | ((n & 0x33333333) << 2);
    n = ((n >> 4) & 0x0F0F0F0F) | ((n & 0x0F0F0F0F) << 4);
    n = ((n >> 8) & 0x00FF00FF) | ((n & 0x00FF00FF) << 8);
    return (n >> 16) | (n << 16);
}

inline uint64 Bits::ReverseBits64(uint64 n) {
#if defined(__x86_64__)
    n = ((n >> 1) & 0x5555555555555555ULL) | ((n & 0x5555555555555555ULL) << 1);
    n = ((n >> 2) & 0x3333333333333333ULL) | ((n & 0x3333333333333333ULL) << 2);
    n = ((n >> 4) & 0x0F0F0F0F0F0F0F0FULL) | ((n & 0x0F0F0F0F0F0F0F0FULL) << 4);
    n = ((n >> 8) & 0x00FF00FF00FF00FFULL) | ((n & 0x00FF00FF00FF00FFULL) << 8);
    n = ((n >> 16) & 0x0000FFFF0000FFFFULL) | ((n & 0x0000FFFF0000FFFFULL) << 16);
    return (n >> 32) | (n << 32);
#else
    return ReverseBits32(n >> 32) | (static_cast<uint64>(ReverseBits32(n & 0xffffffff)) << 32);
#endif
}

inline int Bits::Log2FloorNonZero_Portable(uint32 n) {
    // Just use the common routine
    return Log2Floor(n);
}

// Log2Floor64() is defined in terms of Log2Floor32(), Log2FloorNonZero32()
inline int Bits::Log2Floor64_Portable(uint64 n) {
    const uint32 topbits = static_cast<uint32>(n >> 32);
    if (topbits == 0) {
        // Top bits are zero, so scan in bottom bits
        return Log2Floor(static_cast<uint32>(n));
    } else {
        return 32 + Log2FloorNonZero(topbits);
    }
}

// Log2FloorNonZero64() is defined in terms of Log2FloorNonZero32()
inline int Bits::Log2FloorNonZero64_Portable(uint64 n) {
    const uint32 topbits = static_cast<uint32>(n >> 32);
    if (topbits == 0) {
        // Top bits are zero, so scan in bottom bits
        return Log2FloorNonZero(static_cast<uint32>(n));
    } else {
        return 32 + Log2FloorNonZero(topbits);
    }
}

// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero()
inline int Bits::FindLSBSetNonZero64_Portable(uint64 n) {
    const uint32 bottombits = static_cast<uint32>(n);
    if (bottombits == 0) {
        // Bottom bits are zero, so scan in top bits
        return 32 + FindLSBSetNonZero(static_cast<uint32>(n >> 32));
    } else {
        return FindLSBSetNonZero(bottombits);
    }
}

template <class T>
 bool Bits::BytesContainByteLessThan(T bytes, uint8 c) {
    T l = BitPattern<T>::l;
    T h = BitPattern<T>::h;
    // The c <= 0x80 code is straight out of Knuth Volume 4.
    // Usually c will be manifestly constant.
    return c <= 0x80 ? ((h & (bytes - l * c) & ~bytes) != 0)
                     : ((((bytes - l * c) | (bytes ^ h)) & h) != 0);
}

template <class T>
 bool Bits::BytesContainByte(T bytes, uint8 c) {
    // Usually c will be manifestly constant.
    return Bits::BytesContainByteLessThan<T>(bytes ^ (c * BitPattern<T>::l), 1);
}

template <class T>
 bool Bits::BytesAllInRange(T bytes, uint8 lo, uint8 hi) {
    T l = BitPattern<T>::l;
    T h = BitPattern<T>::h;
    // In the common case, lo and hi are manifest constants.
    if (lo > hi) {
        return false;
    }
    if (hi - lo < 128) {
        T x = bytes - l * lo;
        T y = bytes + l * (127 - hi);
        return ((x | y) & h) == 0;
    }
    return !Bits::BytesContainByteLessThan(bytes + (255 - hi) * l, lo + (255 - hi));
}

Coverage Report

Created: 2024-11-20 21:05

Line	Count	Source (jump to first uncovered line)
1		// Copyright 2002 and onwards Google Inc.
2		//
3		// A collection of useful (static) bit-twiddling functions.
4
5		#pragma once
6
7
8		#include "gutil/integral_types.h"
9		// IWYU pragma: no_include <butil/macros.h>
10		#include "gutil/macros.h" // IWYU pragma: keep
11
12		class Bits {
13		public:
14		// Return the number of one bits in the given integer.
15		static int CountOnesInByte(unsigned char n);
16
17	0	static int CountOnes(uint32 n) {
18	0	n -= ((n >> 1) & 0x55555555);
19	0	n = ((n >> 2) & 0x33333333) + (n & 0x33333333);
20	0	return (((n + (n >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
21	0	}
22
23		// Count bits using sideways addition [WWG'57]. See Knuth TAOCP v4 7.1.3(59)
24	0	static inline int CountOnes64(uint64 n) {
25	0	#if defined(__x86_64__)
26	0	n -= (n >> 1) & 0x5555555555555555ULL;
27	0	n = ((n >> 2) & 0x3333333333333333ULL) + (n & 0x3333333333333333ULL);
28	0	return (((n + (n >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56;
29	0	#else
30	0	return CountOnes(n >> 32) + CountOnes(n & 0xffffffff);
31	0	#endif
32	0	}
33
34		// Count bits using popcnt instruction (available on argo machines).
35		// Doesn't check if the instruction exists.
36		// Please use TestCPUFeature(POPCNT) from base/cpuid/cpuid.h before using this.
37	0	static inline int CountOnes64withPopcount(uint64 n) {
38	0	#if defined(__x86_64__) && defined __GNUC__
39	0	int64 count = 0;
40	0	asm("popcnt %1,%0" : "=r"(count) : "rm"(n) : "cc");
41	0	return count;
42	0	#else
43	0	return CountOnes64(n);
44	0	#endif
45	0	}
46
47		// Reverse the bits in the given integer.
48		static uint8 ReverseBits8(uint8 n);
49		static uint32 ReverseBits32(uint32 n);
50		static uint64 ReverseBits64(uint64 n);
51
52		// Return the number of one bits in the byte sequence.
53		static int Count(const void* m, int num_bytes);
54
55		// Return the number of different bits in the given byte sequences.
56		// (i.e., the Hamming distance)
57		static int Difference(const void* m1, const void* m2, int num_bytes);
58
59		// Return the number of different bits in the given byte sequences,
60		// up to a maximum. Values larger than the maximum may be returned
61		// (because multiple bits are checked at a time), but the function
62		// may exit early if the cap is exceeded.
63		static int CappedDifference(const void* m1, const void* m2, int num_bytes, int cap);
64
65		// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
66		static int Log2Floor(uint32 n);
67		static int Log2Floor64(uint64 n);
68
69		// Potentially faster version of Log2Floor() that returns an
70		// undefined value if n == 0
71		static int Log2FloorNonZero(uint32 n);
72		static int Log2FloorNonZero64(uint64 n);
73
74		// Return ceiling(log2(n)) for positive integer n. Returns -1 iff n == 0.
75		static int Log2Ceiling(uint32 n);
76		static int Log2Ceiling64(uint64 n);
77
78		// Return the first set least / most significant bit, 0-indexed. Returns an
79		// undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except
80		// that it's 0-indexed, while FindMSBSetNonZero() is the same as
81		// Log2FloorNonZero().
82		static int FindLSBSetNonZero(uint32 n);
83		static int FindLSBSetNonZero64(uint64 n);
84	0	static int FindMSBSetNonZero(uint32 n) { return Log2FloorNonZero(n); }
85	0	static int FindMSBSetNonZero64(uint64 n) { return Log2FloorNonZero64(n); }
86
87		// Portable implementations
88		static int Log2Floor_Portable(uint32 n);
89		static int Log2FloorNonZero_Portable(uint32 n);
90		static int FindLSBSetNonZero_Portable(uint32 n);
91		static int Log2Floor64_Portable(uint64 n);
92		static int Log2FloorNonZero64_Portable(uint64 n);
93		static int FindLSBSetNonZero64_Portable(uint64 n);
94
95		// Viewing bytes as a stream of unsigned bytes, does that stream
96		// contain any byte equal to c?
97		template <class T>
98		static bool BytesContainByte(T bytes, uint8 c);
99
100		// Viewing bytes as a stream of unsigned bytes, does that stream
101		// contain any byte b < c?
102		template <class T>
103		static bool BytesContainByteLessThan(T bytes, uint8 c);
104
105		// Viewing bytes as a stream of unsigned bytes, are all elements of that
106		// stream in [lo, hi]?
107		template <class T>
108		static bool BytesAllInRange(T bytes, uint8 lo, uint8 hi);
109
110		private:
111		static const char num_bits[];
112		static const unsigned char bit_reverse_table[];
113		DISALLOW_COPY_AND_ASSIGN(Bits);
114		};
115
116		// A utility class for some handy bit patterns. The names l and h
117		// were chosen to match Knuth Volume 4: l is 0x010101... and h is 0x808080...;
118		// half_ones is ones in the lower half only. We assume sizeof(T) is 1 or even.
119		template <class T>
120		struct BitPattern {
121		static const T half_ones = (static_cast<T>(1) << (sizeof(T) * 4)) - 1;
122		static const T l = (sizeof(T) == 1) ? 1 : (half_ones / 0xff * (half_ones + 2));
123		static const T h = ~(l * 0x7f);
124		};
125
126		// ------------------------------------------------------------------------
127		// Implementation details follow
128		// ------------------------------------------------------------------------
129
130		// use GNU builtins where available
131		#if defined(__GNUC__) && ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) \|\| __GNUC__ >= 4)
132	0	inline int Bits::Log2Floor(uint32 n) {
133	0	return n == 0 ? -1 : 31 ^ __builtin_clz(n);
134	0	}
135
136	0	inline int Bits::Log2FloorNonZero(uint32 n) {
137	0	return 31 ^ __builtin_clz(n);
138	0	}
139
140	324	inline int Bits::FindLSBSetNonZero(uint32 n) {
141	324	return __builtin_ctz(n);
142	324	}
143
144	0	inline int Bits::Log2Floor64(uint64 n) {
145	0	return n == 0 ? -1 : 63 ^ __builtin_clzll(n);
146	0	}
147
148	0	inline int Bits::Log2FloorNonZero64(uint64 n) {
149	0	return 63 ^ __builtin_clzll(n);
150	0	}
151
152	0	inline int Bits::FindLSBSetNonZero64(uint64 n) {
153	0	return __builtin_ctzll(n);
154	0	}
155		#elif defined(_MSC_VER)
156		#include "gutil/bits-internal-windows.h"
157		#else
158		#include "gutil/bits-internal-unknown.h"
159		#endif
160
161	0	inline int Bits::CountOnesInByte(unsigned char n) {
162	0	return num_bits[n];
163	0	}
164
165	0	inline uint8 Bits::ReverseBits8(unsigned char n) {
166	0	n = ((n >> 1) & 0x55) \| ((n & 0x55) << 1);
167	0	n = ((n >> 2) & 0x33) \| ((n & 0x33) << 2);
168	0	return ((n >> 4) & 0x0f) \| ((n & 0x0f) << 4);
169	0	}
170
171	0	inline uint32 Bits::ReverseBits32(uint32 n) {
172	0	n = ((n >> 1) & 0x55555555) \| ((n & 0x55555555) << 1);
173	0	n = ((n >> 2) & 0x33333333) \| ((n & 0x33333333) << 2);
174	0	n = ((n >> 4) & 0x0F0F0F0F) \| ((n & 0x0F0F0F0F) << 4);
175	0	n = ((n >> 8) & 0x00FF00FF) \| ((n & 0x00FF00FF) << 8);
176	0	return (n >> 16) \| (n << 16);
177	0	}
178
179	0	inline uint64 Bits::ReverseBits64(uint64 n) {
180	0	#if defined(__x86_64__)
181	0	n = ((n >> 1) & 0x5555555555555555ULL) \| ((n & 0x5555555555555555ULL) << 1);
182	0	n = ((n >> 2) & 0x3333333333333333ULL) \| ((n & 0x3333333333333333ULL) << 2);
183	0	n = ((n >> 4) & 0x0F0F0F0F0F0F0F0FULL) \| ((n & 0x0F0F0F0F0F0F0F0FULL) << 4);
184	0	n = ((n >> 8) & 0x00FF00FF00FF00FFULL) \| ((n & 0x00FF00FF00FF00FFULL) << 8);
185	0	n = ((n >> 16) & 0x0000FFFF0000FFFFULL) \| ((n & 0x0000FFFF0000FFFFULL) << 16);
186	0	return (n >> 32) \| (n << 32);
187	0	#else
188	0	return ReverseBits32(n >> 32) \| (static_cast<uint64>(ReverseBits32(n & 0xffffffff)) << 32);
189	0	#endif
190	0	}
191
192	0	inline int Bits::Log2FloorNonZero_Portable(uint32 n) {
193	0	// Just use the common routine
194	0	return Log2Floor(n);
195	0	}
196
197		// Log2Floor64() is defined in terms of Log2Floor32(), Log2FloorNonZero32()
198	0	inline int Bits::Log2Floor64_Portable(uint64 n) {
199	0	const uint32 topbits = static_cast<uint32>(n >> 32);
200	0	if (topbits == 0) {
201	0	// Top bits are zero, so scan in bottom bits
202	0	return Log2Floor(static_cast<uint32>(n));
203	0	} else {
204	0	return 32 + Log2FloorNonZero(topbits);
205	0	}
206	0	}
207
208		// Log2FloorNonZero64() is defined in terms of Log2FloorNonZero32()
209	0	inline int Bits::Log2FloorNonZero64_Portable(uint64 n) {
210	0	const uint32 topbits = static_cast<uint32>(n >> 32);
211	0	if (topbits == 0) {
212	0	// Top bits are zero, so scan in bottom bits
213	0	return Log2FloorNonZero(static_cast<uint32>(n));
214	0	} else {
215	0	return 32 + Log2FloorNonZero(topbits);
216	0	}
217	0	}
218
219		// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero()
220	0	inline int Bits::FindLSBSetNonZero64_Portable(uint64 n) {
221	0	const uint32 bottombits = static_cast<uint32>(n);
222	0	if (bottombits == 0) {
223	0	// Bottom bits are zero, so scan in top bits
224	0	return 32 + FindLSBSetNonZero(static_cast<uint32>(n >> 32));
225	0	} else {
226	0	return FindLSBSetNonZero(bottombits);
227	0	}
228	0	}
229
230		template <class T>
231		bool Bits::BytesContainByteLessThan(T bytes, uint8 c) {
232		T l = BitPattern<T>::l;
233		T h = BitPattern<T>::h;
234		// The c <= 0x80 code is straight out of Knuth Volume 4.
235		// Usually c will be manifestly constant.
236		return c <= 0x80 ? ((h & (bytes - l * c) & ~bytes) != 0)
237		: ((((bytes - l * c) \| (bytes ^ h)) & h) != 0);
238		}
239
240		template <class T>
241		bool Bits::BytesContainByte(T bytes, uint8 c) {
242		// Usually c will be manifestly constant.
243		return Bits::BytesContainByteLessThan<T>(bytes ^ (c * BitPattern<T>::l), 1);
244		}
245
246		template <class T>
247		bool Bits::BytesAllInRange(T bytes, uint8 lo, uint8 hi) {
248		T l = BitPattern<T>::l;
249		T h = BitPattern<T>::h;
250		// In the common case, lo and hi are manifest constants.
251		if (lo > hi) {
252		return false;
253		}
254		if (hi - lo < 128) {
255		T x = bytes - l * lo;
256		T y = bytes + l * (127 - hi);
257		return ((x \| y) & h) == 0;
258		}
259		return !Bits::BytesContainByteLessThan(bytes + (255 - hi) * l, lo + (255 - hi));
260		}