/root/doris/be/src/util/bit_util.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/bit-util.h |
19 | | // and modified by Doris |
20 | | |
21 | | #pragma once |
22 | | |
23 | | #include <type_traits> |
24 | | |
25 | | #include "vec/core/extended_types.h" |
26 | | #ifndef __APPLE__ |
27 | | #include <endian.h> |
28 | | #endif |
29 | | |
30 | | #include "common/compiler_util.h" // IWYU pragma: keep |
31 | | #include "gutil/endian.h" |
32 | | #include "util/cpu_info.h" |
33 | | #include "util/sse_util.hpp" |
34 | | |
35 | | namespace doris { |
36 | | |
37 | | // Utility class to do standard bit tricks |
38 | | // TODO: is this in boost or something else like that? |
39 | | class BitUtil { |
40 | | public: |
41 | | // Returns the ceil of value/divisor |
42 | 7 | static inline int64_t ceil(int64_t value, int64_t divisor) { |
43 | 7 | return value / divisor + (value % divisor != 0); |
44 | 7 | } |
45 | | |
46 | | // Returns 'value' rounded up to the nearest multiple of 'factor' |
47 | 36 | static inline int64_t round_up(int64_t value, int64_t factor) { |
48 | 36 | return (value + (factor - 1)) / factor * factor; |
49 | 36 | } |
50 | | |
51 | | // Returns the smallest power of two that contains v. Taken from |
52 | | // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 |
53 | | // TODO: Pick a better name, as it is not clear what happens when the input is |
54 | | // already a power of two. |
55 | 108 | static inline int64_t next_power_of_two(int64_t v) { |
56 | 108 | --v; |
57 | 108 | v |= v >> 1; |
58 | 108 | v |= v >> 2; |
59 | 108 | v |= v >> 4; |
60 | 108 | v |= v >> 8; |
61 | 108 | v |= v >> 16; |
62 | 108 | v |= v >> 32; |
63 | 108 | ++v; |
64 | 108 | return v; |
65 | 108 | } |
66 | | |
67 | | // Non hw accelerated pop count. |
68 | | // TODO: we don't use this in any perf sensitive code paths currently. There |
69 | | // might be a much faster way to implement this. |
70 | 0 | static inline int popcount_no_hw(uint64_t x) { |
71 | 0 | int count = 0; |
72 | 0 |
|
73 | 0 | for (; x != 0; ++count) { |
74 | 0 | x &= x - 1; |
75 | 0 | } |
76 | 0 |
|
77 | 0 | return count; |
78 | 0 | } |
79 | | |
80 | | // Returns the number of set bits in x |
81 | 0 | static inline int popcount(uint64_t x) { |
82 | 0 | if (LIKELY(CpuInfo::is_supported(CpuInfo::POPCNT))) { |
83 | 0 | return __builtin_popcountl(x); |
84 | 0 | } else { |
85 | 0 | return popcount_no_hw(x); |
86 | 0 | } |
87 | 0 | } |
88 | | |
89 | | // Returns the 'num_bits' least-significant bits of 'v'. |
90 | 0 | static inline uint64_t trailing_bits(uint64_t v, int num_bits) { |
91 | 0 | if (UNLIKELY(num_bits == 0)) { |
92 | 0 | return 0; |
93 | 0 | } |
94 | 0 |
|
95 | 0 | if (UNLIKELY(num_bits >= 64)) { |
96 | 0 | return v; |
97 | 0 | } |
98 | 0 |
|
99 | 0 | int n = 64 - num_bits; |
100 | 0 | return (v << n) >> n; |
101 | 0 | } |
102 | | |
103 | | template <typename T> |
104 | 1 | static std::string IntToByteBuffer(T input) { |
105 | 1 | std::string buffer; |
106 | 1 | T value = input; |
107 | 2 | for (int i = 0; i < sizeof(value); ++i) { |
108 | | // Applies a mask for a byte range on the input. |
109 | 2 | signed char value_to_save = value & 0XFF; |
110 | 2 | buffer.push_back(value_to_save); |
111 | | // Remove the just processed part from the input so that we can exit early if there |
112 | | // is nothing left to process. |
113 | 2 | value >>= 8; |
114 | 2 | if (value == 0 && value_to_save >= 0) { |
115 | 1 | break; |
116 | 1 | } |
117 | 1 | if (value == -1 && value_to_save < 0) { |
118 | 0 | break; |
119 | 0 | } |
120 | 1 | } |
121 | 1 | std::reverse(buffer.begin(), buffer.end()); |
122 | 1 | return buffer; |
123 | 1 | } _ZN5doris7BitUtil15IntToByteBufferIiEENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEET_ Line | Count | Source | 104 | 1 | static std::string IntToByteBuffer(T input) { | 105 | 1 | std::string buffer; | 106 | 1 | T value = input; | 107 | 2 | for (int i = 0; i < sizeof(value); ++i) { | 108 | | // Applies a mask for a byte range on the input. | 109 | 2 | signed char value_to_save = value & 0XFF; | 110 | 2 | buffer.push_back(value_to_save); | 111 | | // Remove the just processed part from the input so that we can exit early if there | 112 | | // is nothing left to process. | 113 | 2 | value >>= 8; | 114 | 2 | if (value == 0 && value_to_save >= 0) { | 115 | 1 | break; | 116 | 1 | } | 117 | 1 | if (value == -1 && value_to_save < 0) { | 118 | 0 | break; | 119 | 0 | } | 120 | 1 | } | 121 | 1 | std::reverse(buffer.begin(), buffer.end()); | 122 | 1 | return buffer; | 123 | 1 | } |
Unexecuted instantiation: _ZN5doris7BitUtil15IntToByteBufferInEENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEET_ Unexecuted instantiation: _ZN5doris7BitUtil15IntToByteBufferIlEENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEET_ Unexecuted instantiation: _ZN5doris7BitUtil15IntToByteBufferIN4wide7integerILm256EiEEEENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEET_ |
124 | | |
125 | | // Returns ceil(log2(x)). |
126 | | // TODO: this could be faster if we use __builtin_clz. Fix this if this ever shows up |
127 | | // in a hot path. |
128 | 117 | static inline int log2(uint64_t x) { |
129 | 117 | DCHECK_GT(x, 0); |
130 | | |
131 | 117 | if (x == 1) { |
132 | 0 | return 0; |
133 | 0 | } |
134 | | |
135 | | // Compute result = ceil(log2(x)) |
136 | | // = floor(log2(x - 1)) + 1, for x > 1 |
137 | | // by finding the position of the most significant bit (1-indexed) of x - 1 |
138 | | // (floor(log2(n)) = MSB(n) (0-indexed)) |
139 | 117 | --x; |
140 | 117 | int result = 1; |
141 | | |
142 | 124 | while (x >>= 1) { |
143 | 7 | ++result; |
144 | 7 | } |
145 | | |
146 | 117 | return result; |
147 | 117 | } |
148 | | |
149 | | // Returns the rounded up to 64 multiple. Used for conversions of bits to i64. |
150 | 0 | static inline uint32_t round_up_numi64(uint32_t bits) { return (bits + 63) >> 6; } |
151 | | |
152 | | // Returns the rounded up to 32 multiple. Used for conversions of bits to i32. |
153 | 0 | constexpr static inline uint32_t round_up_numi32(uint32_t bits) { return (bits + 31) >> 5; } |
154 | | |
155 | | /// Returns the smallest power of two that contains v. If v is a power of two, v is |
156 | | /// returned. Taken from |
157 | | /// http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 |
158 | 8 | static inline int64_t RoundUpToPowerOfTwo(int64_t v) { |
159 | 8 | --v; |
160 | 8 | v |= v >> 1; |
161 | 8 | v |= v >> 2; |
162 | 8 | v |= v >> 4; |
163 | 8 | v |= v >> 8; |
164 | 8 | v |= v >> 16; |
165 | 8 | v |= v >> 32; |
166 | 8 | ++v; |
167 | 8 | return v; |
168 | 8 | } |
169 | | |
170 | | // Wrap the gutil/ version for convenience. |
171 | 0 | static inline int Log2FloorNonZero64(uint64_t n) { return 63 ^ __builtin_clzll(n); } |
172 | | |
173 | | // Wrap the gutil/ version for convenience. |
174 | 0 | static inline int Log2Floor64(uint64_t n) { return n == 0 ? -1 : 63 ^ __builtin_clzll(n); } |
175 | | |
176 | 0 | static inline int Log2Ceiling64(uint64_t n) { |
177 | 0 | int floor = Log2Floor64(n); |
178 | 0 | // Check if zero or a power of two. This pattern is recognised by gcc and optimised |
179 | 0 | // into branch-free code. |
180 | 0 | if (0 == (n & (n - 1))) { |
181 | 0 | return floor; |
182 | 0 | } else { |
183 | 0 | return floor + 1; |
184 | 0 | } |
185 | 0 | } |
186 | | |
187 | 0 | static inline int Log2CeilingNonZero64(uint64_t n) { |
188 | 0 | int floor = Log2FloorNonZero64(n); |
189 | 0 | // Check if zero or a power of two. This pattern is recognised by gcc and optimised |
190 | 0 | // into branch-free code. |
191 | 0 | if (0 == (n & (n - 1))) { |
192 | 0 | return floor; |
193 | 0 | } else { |
194 | 0 | return floor + 1; |
195 | 0 | } |
196 | 0 | } |
197 | | |
198 | | // Returns the rounded up to 64 multiple. Used for conversions of bits to i64. |
199 | 0 | static inline uint32_t round_up_numi_64(uint32_t bits) { return (bits + 63) >> 6; } |
200 | | |
201 | 0 | constexpr static inline int64_t Ceil(int64_t value, int64_t divisor) { |
202 | 0 | return value / divisor + (value % divisor != 0); |
203 | 0 | } |
204 | | |
205 | 0 | constexpr static inline bool IsPowerOf2(int64_t value) { return (value & (value - 1)) == 0; } |
206 | | |
207 | 0 | constexpr static inline int64_t RoundDown(int64_t value, int64_t factor) { |
208 | 0 | return (value / factor) * factor; |
209 | 0 | } |
210 | | |
211 | | /// Specialized round up and down functions for frequently used factors, |
212 | | /// like 8 (bits->bytes), 32 (bits->i32), and 64 (bits->i64) |
213 | | /// Returns the rounded up number of bytes that fit the number of bits. |
214 | 71 | constexpr static inline uint32_t RoundUpNumBytes(uint32_t bits) { return (bits + 7) >> 3; } |
215 | | |
216 | | template <typename T> |
217 | 28 | static inline T RoundDownToPowerOf2(T value, T factor) { |
218 | 28 | static_assert(std::is_integral<T>::value, "T must be an integral type"); |
219 | 28 | DCHECK((factor > 0) && ((factor & (factor - 1)) == 0)); |
220 | 28 | return value & ~(factor - 1); |
221 | 28 | } |
222 | | |
223 | | // Returns the ceil of value/divisor |
224 | 206k | static inline int Ceil(int value, int divisor) { |
225 | 206k | return value / divisor + (value % divisor != 0); |
226 | 206k | } |
227 | | |
228 | | // Returns the 'num_bits' least-significant bits of 'v'. |
229 | 630k | static inline uint64_t TrailingBits(uint64_t v, int num_bits) { |
230 | 630k | if (num_bits == 0) [[unlikely]] { |
231 | 24.6k | return 0; |
232 | 24.6k | } |
233 | 605k | if (num_bits >= 64) [[unlikely]] { |
234 | 200k | return v; |
235 | 200k | } |
236 | 405k | int n = 64 - num_bits; |
237 | 405k | return (v << n) >> n; |
238 | 605k | } |
239 | | |
240 | 200k | static inline uint64_t ShiftLeftZeroOnOverflow(uint64_t v, int num_bits) { |
241 | 200k | if (num_bits >= 64) [[unlikely]] { |
242 | 6.42k | return 0; |
243 | 6.42k | } |
244 | 194k | return v << num_bits; |
245 | 200k | } |
246 | | |
247 | 200k | static inline uint64_t ShiftRightZeroOnOverflow(uint64_t v, int num_bits) { |
248 | 200k | if (num_bits >= 64) [[unlikely]] { |
249 | 6.42k | return 0; |
250 | 6.42k | } |
251 | 194k | return v >> num_bits; |
252 | 200k | } |
253 | | }; |
254 | | |
255 | | } // namespace doris |