/root/doris/be/src/util/bit_util.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/bit-util.h |
19 | | // and modified by Doris |
20 | | |
21 | | #pragma once |
22 | | |
23 | | #include <type_traits> |
24 | | |
25 | | #include "vec/core/wide_integer.h" |
26 | | #ifndef __APPLE__ |
27 | | #include <endian.h> |
28 | | #endif |
29 | | |
30 | | #include "common/compiler_util.h" // IWYU pragma: keep |
31 | | #include "gutil/bits.h" |
32 | | #include "gutil/endian.h" |
33 | | #include "util/cpu_info.h" |
34 | | #include "util/sse_util.hpp" |
35 | | |
36 | | namespace doris { |
37 | | |
38 | | // Utility class to do standard bit tricks |
39 | | // TODO: is this in boost or something else like that? |
40 | | class BitUtil { |
41 | | public: |
42 | | // Returns the ceil of value/divisor |
43 | 7 | static inline int64_t ceil(int64_t value, int64_t divisor) { |
44 | 7 | return value / divisor + (value % divisor != 0); |
45 | 7 | } |
46 | | |
47 | | // Returns 'value' rounded up to the nearest multiple of 'factor' |
48 | 4 | static inline int64_t round_up(int64_t value, int64_t factor) { |
49 | 4 | return (value + (factor - 1)) / factor * factor; |
50 | 4 | } |
51 | | |
52 | | // Returns the smallest power of two that contains v. Taken from |
53 | | // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 |
54 | | // TODO: Pick a better name, as it is not clear what happens when the input is |
55 | | // already a power of two. |
56 | 39 | static inline int64_t next_power_of_two(int64_t v) { |
57 | 39 | --v; |
58 | 39 | v |= v >> 1; |
59 | 39 | v |= v >> 2; |
60 | 39 | v |= v >> 4; |
61 | 39 | v |= v >> 8; |
62 | 39 | v |= v >> 16; |
63 | 39 | v |= v >> 32; |
64 | 39 | ++v; |
65 | 39 | return v; |
66 | 39 | } |
67 | | |
68 | | // Non hw accelerated pop count. |
69 | | // TODO: we don't use this in any perf sensitive code paths currently. There |
70 | | // might be a much faster way to implement this. |
71 | 4 | static inline int popcount_no_hw(uint64_t x) { |
72 | 4 | int count = 0; |
73 | | |
74 | 22 | for (; x != 0; ++count) { |
75 | 18 | x &= x - 1; |
76 | 18 | } |
77 | | |
78 | 4 | return count; |
79 | 4 | } |
80 | | |
81 | | // Returns the number of set bits in x |
82 | 4 | static inline int popcount(uint64_t x) { |
83 | 4 | if (LIKELY(CpuInfo::is_supported(CpuInfo::POPCNT))) { |
84 | 4 | return __builtin_popcountl(x); |
85 | 4 | } else { |
86 | 0 | return popcount_no_hw(x); |
87 | 0 | } |
88 | 4 | } |
89 | | |
90 | | // Returns the 'num_bits' least-significant bits of 'v'. |
91 | 0 | static inline uint64_t trailing_bits(uint64_t v, int num_bits) { |
92 | 0 | if (UNLIKELY(num_bits == 0)) { |
93 | 0 | return 0; |
94 | 0 | } |
95 | 0 |
|
96 | 0 | if (UNLIKELY(num_bits >= 64)) { |
97 | 0 | return v; |
98 | 0 | } |
99 | 0 |
|
100 | 0 | int n = 64 - num_bits; |
101 | 0 | return (v << n) >> n; |
102 | 0 | } |
103 | | |
104 | | template <typename T> |
105 | 1 | static std::string IntToByteBuffer(T input) { |
106 | 1 | std::string buffer; |
107 | 1 | T value = input; |
108 | 2 | for (int i = 0; i < sizeof(value); ++i) { |
109 | | // Applies a mask for a byte range on the input. |
110 | 2 | char value_to_save = value & 0XFF; |
111 | 2 | buffer.push_back(value_to_save); |
112 | | // Remove the just processed part from the input so that we can exit early if there |
113 | | // is nothing left to process. |
114 | 2 | value >>= 8; |
115 | 2 | if (value == 0 && value_to_save >= 0) { |
116 | 1 | break; |
117 | 1 | } |
118 | 1 | if (value == -1 && value_to_save < 0) { |
119 | 0 | break; |
120 | 0 | } |
121 | 1 | } |
122 | 1 | std::reverse(buffer.begin(), buffer.end()); |
123 | 1 | return buffer; |
124 | 1 | } _ZN5doris7BitUtil15IntToByteBufferIiEENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEET_ Line | Count | Source | 105 | 1 | static std::string IntToByteBuffer(T input) { | 106 | 1 | std::string buffer; | 107 | 1 | T value = input; | 108 | 2 | for (int i = 0; i < sizeof(value); ++i) { | 109 | | // Applies a mask for a byte range on the input. | 110 | 2 | char value_to_save = value & 0XFF; | 111 | 2 | buffer.push_back(value_to_save); | 112 | | // Remove the just processed part from the input so that we can exit early if there | 113 | | // is nothing left to process. | 114 | 2 | value >>= 8; | 115 | 2 | if (value == 0 && value_to_save >= 0) { | 116 | 1 | break; | 117 | 1 | } | 118 | 1 | if (value == -1 && value_to_save < 0) { | 119 | 0 | break; | 120 | 0 | } | 121 | 1 | } | 122 | 1 | std::reverse(buffer.begin(), buffer.end()); | 123 | 1 | return buffer; | 124 | 1 | } |
Unexecuted instantiation: _ZN5doris7BitUtil15IntToByteBufferInEENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEET_ Unexecuted instantiation: _ZN5doris7BitUtil15IntToByteBufferIlEENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEET_ Unexecuted instantiation: _ZN5doris7BitUtil15IntToByteBufferIN4wide7integerILm256EiEEEENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEET_ |
125 | | |
126 | | // Returns ceil(log2(x)). |
127 | | // TODO: this could be faster if we use __builtin_clz. Fix this if this ever shows up |
128 | | // in a hot path. |
129 | 39 | static inline int log2(uint64_t x) { |
130 | 39 | DCHECK_GT(x, 0); |
131 | | |
132 | 39 | if (x == 1) { |
133 | 0 | return 0; |
134 | 0 | } |
135 | | |
136 | | // Compute result = ceil(log2(x)) |
137 | | // = floor(log2(x - 1)) + 1, for x > 1 |
138 | | // by finding the position of the most significant bit (1-indexed) of x - 1 |
139 | | // (floor(log2(n)) = MSB(n) (0-indexed)) |
140 | 39 | --x; |
141 | 39 | int result = 1; |
142 | | |
143 | 39 | while (x >>= 1) { |
144 | 0 | ++result; |
145 | 0 | } |
146 | | |
147 | 39 | return result; |
148 | 39 | } |
149 | | |
150 | | // Swaps the byte order (i.e. endianess) |
151 | 0 | static inline int64_t byte_swap(int64_t value) { return __builtin_bswap64(value); } |
152 | 0 | static inline uint64_t byte_swap(uint64_t value) { |
153 | 0 | return static_cast<uint64_t>(__builtin_bswap64(value)); |
154 | 0 | } |
155 | 0 | static inline int32_t byte_swap(int32_t value) { return __builtin_bswap32(value); } |
156 | 0 | static inline uint32_t byte_swap(uint32_t value) { |
157 | 0 | return static_cast<uint32_t>(__builtin_bswap32(value)); |
158 | 0 | } |
159 | 0 | static inline int16_t byte_swap(int16_t value) { |
160 | 0 | return (((value >> 8) & 0xff) | ((value & 0xff) << 8)); |
161 | 0 | } |
162 | 0 | static inline uint16_t byte_swap(uint16_t value) { |
163 | 0 | return static_cast<uint16_t>(byte_swap(static_cast<int16_t>(value))); |
164 | 0 | } |
165 | | |
166 | | // Write the swapped bytes into dst. len must be 1, 2, 4 or 8. |
167 | 0 | static inline void byte_swap(void* dst, void* src, int len) { |
168 | 0 | switch (len) { |
169 | 0 | case 1: |
170 | 0 | *reinterpret_cast<int8_t*>(dst) = *reinterpret_cast<int8_t*>(src); |
171 | 0 | break; |
172 | 0 |
|
173 | 0 | case 2: |
174 | 0 | *reinterpret_cast<int16_t*>(dst) = byte_swap(*reinterpret_cast<int16_t*>(src)); |
175 | 0 | break; |
176 | 0 |
|
177 | 0 | case 4: |
178 | 0 | *reinterpret_cast<int32_t*>(dst) = byte_swap(*reinterpret_cast<int32_t*>(src)); |
179 | 0 | break; |
180 | 0 |
|
181 | 0 | case 8: |
182 | 0 | *reinterpret_cast<int64_t*>(dst) = byte_swap(*reinterpret_cast<int64_t*>(src)); |
183 | 0 | break; |
184 | 0 |
|
185 | 0 | default: |
186 | 0 | DCHECK(false); |
187 | 0 | } |
188 | 0 | } |
189 | | |
190 | | // Returns the rounded up to 64 multiple. Used for conversions of bits to i64. |
191 | 0 | static inline uint32_t round_up_numi64(uint32_t bits) { return (bits + 63) >> 6; } |
192 | | |
193 | | // Returns the rounded up to 32 multiple. Used for conversions of bits to i32. |
194 | 0 | constexpr static inline uint32_t round_up_numi32(uint32_t bits) { return (bits + 31) >> 5; } |
195 | | |
196 | | #if __BYTE_ORDER == __LITTLE_ENDIAN |
197 | | // Converts to big endian format (if not already in big endian). |
198 | 0 | static inline int64_t big_endian(int64_t value) { return byte_swap(value); } |
199 | 0 | static inline uint64_t big_endian(uint64_t value) { return byte_swap(value); } |
200 | 0 | static inline int32_t big_endian(int32_t value) { return byte_swap(value); } |
201 | 0 | static inline uint32_t big_endian(uint32_t value) { return byte_swap(value); } |
202 | 0 | static inline int16_t big_endian(int16_t value) { return byte_swap(value); } |
203 | 0 | static inline uint16_t big_endian(uint16_t value) { return byte_swap(value); } |
204 | | #else |
205 | | static inline int64_t big_endian(int64_t val) { return val; } |
206 | | static inline uint64_t big_endian(uint64_t val) { return val; } |
207 | | static inline int32_t big_endian(int32_t val) { return val; } |
208 | | static inline uint32_t big_endian(uint32_t val) { return val; } |
209 | | static inline int16_t big_endian(int16_t val) { return val; } |
210 | | static inline uint16_t big_endian(uint16_t val) { return val; } |
211 | | #endif |
212 | | |
213 | | template <typename T> |
214 | 5 | static T big_endian_to_host(T value) { |
215 | 5 | if constexpr (std::is_same_v<T, wide::Int256>) { |
216 | 5 | return BigEndian::ToHost256(value); |
217 | 5 | } else if constexpr (std::is_same_v<T, wide::UInt256>) { |
218 | 4 | return BigEndian::ToHost256(value); |
219 | 4 | } else if constexpr (std::is_same_v<T, __int128>) { |
220 | 4 | return BigEndian::ToHost128(value); |
221 | 4 | } else if constexpr (std::is_same_v<T, unsigned __int128>) { |
222 | 3 | return BigEndian::ToHost128(value); |
223 | 3 | } else if constexpr (std::is_same_v<T, int64_t>) { |
224 | 3 | return BigEndian::ToHost64(value); |
225 | 3 | } else if constexpr (std::is_same_v<T, uint64_t>) { |
226 | 2 | return BigEndian::ToHost64(value); |
227 | 2 | } else if constexpr (std::is_same_v<T, int32_t>) { |
228 | 2 | return BigEndian::ToHost32(value); |
229 | 2 | } else if constexpr (std::is_same_v<T, uint32_t>) { |
230 | 1 | return BigEndian::ToHost32(value); |
231 | 1 | } else if constexpr (std::is_same_v<T, int16_t>) { |
232 | 1 | return BigEndian::ToHost16(value); |
233 | 1 | } else if constexpr (std::is_same_v<T, uint16_t>) { |
234 | 1 | return BigEndian::ToHost16(value); |
235 | 1 | } else if constexpr (std::is_same_v<T, int8_t>) { |
236 | 5 | return value; |
237 | 5 | } else if constexpr (std::is_same_v<T, uint8_t>) { |
238 | 5 | return value; |
239 | 5 | } else { |
240 | 5 | __builtin_unreachable(); |
241 | 5 | LOG(FATAL) << "__builtin_unreachable"; |
242 | 5 | return value; |
243 | 5 | } |
244 | 5 | } _ZN5doris7BitUtil18big_endian_to_hostItEET_S2_ Line | Count | Source | 214 | 1 | static T big_endian_to_host(T value) { | 215 | 1 | if constexpr (std::is_same_v<T, wide::Int256>) { | 216 | 1 | return BigEndian::ToHost256(value); | 217 | 1 | } else if constexpr (std::is_same_v<T, wide::UInt256>) { | 218 | 1 | return BigEndian::ToHost256(value); | 219 | 1 | } else if constexpr (std::is_same_v<T, __int128>) { | 220 | 1 | return BigEndian::ToHost128(value); | 221 | 1 | } else if constexpr (std::is_same_v<T, unsigned __int128>) { | 222 | 1 | return BigEndian::ToHost128(value); | 223 | 1 | } else if constexpr (std::is_same_v<T, int64_t>) { | 224 | 1 | return BigEndian::ToHost64(value); | 225 | 1 | } else if constexpr (std::is_same_v<T, uint64_t>) { | 226 | 1 | return BigEndian::ToHost64(value); | 227 | 1 | } else if constexpr (std::is_same_v<T, int32_t>) { | 228 | 1 | return BigEndian::ToHost32(value); | 229 | 1 | } else if constexpr (std::is_same_v<T, uint32_t>) { | 230 | 1 | return BigEndian::ToHost32(value); | 231 | 1 | } else if constexpr (std::is_same_v<T, int16_t>) { | 232 | 1 | return BigEndian::ToHost16(value); | 233 | 1 | } else if constexpr (std::is_same_v<T, uint16_t>) { | 234 | 1 | return BigEndian::ToHost16(value); | 235 | 1 | } else if constexpr (std::is_same_v<T, int8_t>) { | 236 | 1 | return value; | 237 | 1 | } else if constexpr (std::is_same_v<T, uint8_t>) { | 238 | 1 | return value; | 239 | 1 | } else { | 240 | 1 | __builtin_unreachable(); | 241 | 1 | LOG(FATAL) << "__builtin_unreachable"; | 242 | 1 | return value; | 243 | 1 | } | 244 | 1 | } |
_ZN5doris7BitUtil18big_endian_to_hostIjEET_S2_ Line | Count | Source | 214 | 1 | static T big_endian_to_host(T value) { | 215 | 1 | if constexpr (std::is_same_v<T, wide::Int256>) { | 216 | 1 | return BigEndian::ToHost256(value); | 217 | 1 | } else if constexpr (std::is_same_v<T, wide::UInt256>) { | 218 | 1 | return BigEndian::ToHost256(value); | 219 | 1 | } else if constexpr (std::is_same_v<T, __int128>) { | 220 | 1 | return BigEndian::ToHost128(value); | 221 | 1 | } else if constexpr (std::is_same_v<T, unsigned __int128>) { | 222 | 1 | return BigEndian::ToHost128(value); | 223 | 1 | } else if constexpr (std::is_same_v<T, int64_t>) { | 224 | 1 | return BigEndian::ToHost64(value); | 225 | 1 | } else if constexpr (std::is_same_v<T, uint64_t>) { | 226 | 1 | return BigEndian::ToHost64(value); | 227 | 1 | } else if constexpr (std::is_same_v<T, int32_t>) { | 228 | 1 | return BigEndian::ToHost32(value); | 229 | 1 | } else if constexpr (std::is_same_v<T, uint32_t>) { | 230 | 1 | return BigEndian::ToHost32(value); | 231 | 1 | } else if constexpr (std::is_same_v<T, int16_t>) { | 232 | 1 | return BigEndian::ToHost16(value); | 233 | 1 | } else if constexpr (std::is_same_v<T, uint16_t>) { | 234 | 1 | return BigEndian::ToHost16(value); | 235 | 1 | } else if constexpr (std::is_same_v<T, int8_t>) { | 236 | 1 | return value; | 237 | 1 | } else if constexpr (std::is_same_v<T, uint8_t>) { | 238 | 1 | return value; | 239 | 1 | } else { | 240 | 1 | __builtin_unreachable(); | 241 | 1 | LOG(FATAL) << "__builtin_unreachable"; | 242 | 1 | return value; | 243 | 1 | } | 244 | 1 | } |
_ZN5doris7BitUtil18big_endian_to_hostImEET_S2_ Line | Count | Source | 214 | 1 | static T big_endian_to_host(T value) { | 215 | 1 | if constexpr (std::is_same_v<T, wide::Int256>) { | 216 | 1 | return BigEndian::ToHost256(value); | 217 | 1 | } else if constexpr (std::is_same_v<T, wide::UInt256>) { | 218 | 1 | return BigEndian::ToHost256(value); | 219 | 1 | } else if constexpr (std::is_same_v<T, __int128>) { | 220 | 1 | return BigEndian::ToHost128(value); | 221 | 1 | } else if constexpr (std::is_same_v<T, unsigned __int128>) { | 222 | 1 | return BigEndian::ToHost128(value); | 223 | 1 | } else if constexpr (std::is_same_v<T, int64_t>) { | 224 | 1 | return BigEndian::ToHost64(value); | 225 | 1 | } else if constexpr (std::is_same_v<T, uint64_t>) { | 226 | 1 | return BigEndian::ToHost64(value); | 227 | 1 | } else if constexpr (std::is_same_v<T, int32_t>) { | 228 | 1 | return BigEndian::ToHost32(value); | 229 | 1 | } else if constexpr (std::is_same_v<T, uint32_t>) { | 230 | 1 | return BigEndian::ToHost32(value); | 231 | 1 | } else if constexpr (std::is_same_v<T, int16_t>) { | 232 | 1 | return BigEndian::ToHost16(value); | 233 | 1 | } else if constexpr (std::is_same_v<T, uint16_t>) { | 234 | 1 | return BigEndian::ToHost16(value); | 235 | 1 | } else if constexpr (std::is_same_v<T, int8_t>) { | 236 | 1 | return value; | 237 | 1 | } else if constexpr (std::is_same_v<T, uint8_t>) { | 238 | 1 | return value; | 239 | 1 | } else { | 240 | 1 | __builtin_unreachable(); | 241 | 1 | LOG(FATAL) << "__builtin_unreachable"; | 242 | 1 | return value; | 243 | 1 | } | 244 | 1 | } |
_ZN5doris7BitUtil18big_endian_to_hostIoEET_S2_ Line | Count | Source | 214 | 1 | static T big_endian_to_host(T value) { | 215 | 1 | if constexpr (std::is_same_v<T, wide::Int256>) { | 216 | 1 | return BigEndian::ToHost256(value); | 217 | 1 | } else if constexpr (std::is_same_v<T, wide::UInt256>) { | 218 | 1 | return BigEndian::ToHost256(value); | 219 | 1 | } else if constexpr (std::is_same_v<T, __int128>) { | 220 | 1 | return BigEndian::ToHost128(value); | 221 | 1 | } else if constexpr (std::is_same_v<T, unsigned __int128>) { | 222 | 1 | return BigEndian::ToHost128(value); | 223 | 1 | } else if constexpr (std::is_same_v<T, int64_t>) { | 224 | 1 | return BigEndian::ToHost64(value); | 225 | 1 | } else if constexpr (std::is_same_v<T, uint64_t>) { | 226 | 1 | return BigEndian::ToHost64(value); | 227 | 1 | } else if constexpr (std::is_same_v<T, int32_t>) { | 228 | 1 | return BigEndian::ToHost32(value); | 229 | 1 | } else if constexpr (std::is_same_v<T, uint32_t>) { | 230 | 1 | return BigEndian::ToHost32(value); | 231 | 1 | } else if constexpr (std::is_same_v<T, int16_t>) { | 232 | 1 | return BigEndian::ToHost16(value); | 233 | 1 | } else if constexpr (std::is_same_v<T, uint16_t>) { | 234 | 1 | return BigEndian::ToHost16(value); | 235 | 1 | } else if constexpr (std::is_same_v<T, int8_t>) { | 236 | 1 | return value; | 237 | 1 | } else if constexpr (std::is_same_v<T, uint8_t>) { | 238 | 1 | return value; | 239 | 1 | } else { | 240 | 1 | __builtin_unreachable(); | 241 | 1 | LOG(FATAL) << "__builtin_unreachable"; | 242 | 1 | return value; | 243 | 1 | } | 244 | 1 | } |
_ZN5doris7BitUtil18big_endian_to_hostIN4wide7integerILm256EjEEEET_S5_ Line | Count | Source | 214 | 1 | static T big_endian_to_host(T value) { | 215 | 1 | if constexpr (std::is_same_v<T, wide::Int256>) { | 216 | 1 | return BigEndian::ToHost256(value); | 217 | 1 | } else if constexpr (std::is_same_v<T, wide::UInt256>) { | 218 | 1 | return BigEndian::ToHost256(value); | 219 | 1 | } else if constexpr (std::is_same_v<T, __int128>) { | 220 | 1 | return BigEndian::ToHost128(value); | 221 | 1 | } else if constexpr (std::is_same_v<T, unsigned __int128>) { | 222 | 1 | return BigEndian::ToHost128(value); | 223 | 1 | } else if constexpr (std::is_same_v<T, int64_t>) { | 224 | 1 | return BigEndian::ToHost64(value); | 225 | 1 | } else if constexpr (std::is_same_v<T, uint64_t>) { | 226 | 1 | return BigEndian::ToHost64(value); | 227 | 1 | } else if constexpr (std::is_same_v<T, int32_t>) { | 228 | 1 | return BigEndian::ToHost32(value); | 229 | 1 | } else if constexpr (std::is_same_v<T, uint32_t>) { | 230 | 1 | return BigEndian::ToHost32(value); | 231 | 1 | } else if constexpr (std::is_same_v<T, int16_t>) { | 232 | 1 | return BigEndian::ToHost16(value); | 233 | 1 | } else if constexpr (std::is_same_v<T, uint16_t>) { | 234 | 1 | return BigEndian::ToHost16(value); | 235 | 1 | } else if constexpr (std::is_same_v<T, int8_t>) { | 236 | 1 | return value; | 237 | 1 | } else if constexpr (std::is_same_v<T, uint8_t>) { | 238 | 1 | return value; | 239 | 1 | } else { | 240 | 1 | __builtin_unreachable(); | 241 | 1 | LOG(FATAL) << "__builtin_unreachable"; | 242 | 1 | return value; | 243 | 1 | } | 244 | 1 | } |
Unexecuted instantiation: _ZN5doris7BitUtil18big_endian_to_hostIlEET_S2_ Unexecuted instantiation: _ZN5doris7BitUtil18big_endian_to_hostInEET_S2_ Unexecuted instantiation: _ZN5doris7BitUtil18big_endian_to_hostIN4wide7integerILm256EiEEEET_S5_ Unexecuted instantiation: _ZN5doris7BitUtil18big_endian_to_hostIiEET_S2_ |
245 | | |
246 | | /// Returns the smallest power of two that contains v. If v is a power of two, v is |
247 | | /// returned. Taken from |
248 | | /// http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 |
249 | 8 | static inline int64_t RoundUpToPowerOfTwo(int64_t v) { |
250 | 8 | --v; |
251 | 8 | v |= v >> 1; |
252 | 8 | v |= v >> 2; |
253 | 8 | v |= v >> 4; |
254 | 8 | v |= v >> 8; |
255 | 8 | v |= v >> 16; |
256 | 8 | v |= v >> 32; |
257 | 8 | ++v; |
258 | 8 | return v; |
259 | 8 | } |
260 | | |
261 | | // Wrap the gutil/ version for convenience. |
262 | 0 | static inline int Log2FloorNonZero64(uint64_t n) { return Bits::Log2FloorNonZero64(n); } |
263 | | |
264 | | // Wrap the gutil/ version for convenience. |
265 | 0 | static inline int Log2Floor64(uint64_t n) { return Bits::Log2Floor64(n); } |
266 | | |
267 | 0 | static inline int Log2Ceiling64(uint64_t n) { |
268 | 0 | int floor = Log2Floor64(n); |
269 | 0 | // Check if zero or a power of two. This pattern is recognised by gcc and optimised |
270 | 0 | // into branch-free code. |
271 | 0 | if (0 == (n & (n - 1))) { |
272 | 0 | return floor; |
273 | 0 | } else { |
274 | 0 | return floor + 1; |
275 | 0 | } |
276 | 0 | } |
277 | | |
278 | 0 | static inline int Log2CeilingNonZero64(uint64_t n) { |
279 | 0 | int floor = Log2FloorNonZero64(n); |
280 | 0 | // Check if zero or a power of two. This pattern is recognised by gcc and optimised |
281 | 0 | // into branch-free code. |
282 | 0 | if (0 == (n & (n - 1))) { |
283 | 0 | return floor; |
284 | 0 | } else { |
285 | 0 | return floor + 1; |
286 | 0 | } |
287 | 0 | } |
288 | | |
289 | | // Returns the rounded up to 64 multiple. Used for conversions of bits to i64. |
290 | 12 | static inline uint32_t round_up_numi_64(uint32_t bits) { return (bits + 63) >> 6; } |
291 | | |
292 | 0 | constexpr static inline int64_t Ceil(int64_t value, int64_t divisor) { |
293 | 0 | return value / divisor + (value % divisor != 0); |
294 | 0 | } |
295 | | |
296 | 0 | constexpr static inline bool IsPowerOf2(int64_t value) { return (value & (value - 1)) == 0; } |
297 | | |
298 | 0 | constexpr static inline int64_t RoundDown(int64_t value, int64_t factor) { |
299 | 0 | return (value / factor) * factor; |
300 | 0 | } |
301 | | |
302 | | /// Specialized round up and down functions for frequently used factors, |
303 | | /// like 8 (bits->bytes), 32 (bits->i32), and 64 (bits->i64) |
304 | | /// Returns the rounded up number of bytes that fit the number of bits. |
305 | 6 | constexpr static inline uint32_t RoundUpNumBytes(uint32_t bits) { return (bits + 7) >> 3; } |
306 | | |
307 | | /// Non hw accelerated pop count. |
308 | | /// TODO: we don't use this in any perf sensitive code paths currently. There |
309 | | /// might be a much faster way to implement this. |
310 | 0 | static inline int PopcountNoHw(uint64_t x) { |
311 | 0 | int count = 0; |
312 | 0 | for (; x != 0; ++count) x &= x - 1; |
313 | 0 | return count; |
314 | 0 | } |
315 | | |
316 | | /// Returns the number of set bits in x |
317 | 0 | static inline int Popcount(uint64_t x) { |
318 | 0 | //if (LIKELY(CpuInfo::is_supported(CpuInfo::POPCNT))) { |
319 | 0 | // return POPCNT_popcnt_u64(x); |
320 | 0 | //} else { |
321 | 0 | return PopcountNoHw(x); |
322 | 0 | // } |
323 | 0 | } |
324 | | |
325 | | // Compute correct population count for various-width signed integers |
326 | | template <typename T> |
327 | | static inline int PopcountSigned(T v) { |
328 | | // Converting to same-width unsigned then extending preserves the bit pattern. |
329 | | return BitUtil::Popcount(static_cast<typename std::make_unsigned<T>::type>(v)); |
330 | | } |
331 | | |
332 | | /// Logical right shift for signed integer types |
333 | | /// This is needed because the C >> operator does arithmetic right shift |
334 | | /// Negative shift amounts lead to undefined behavior |
335 | | template <typename T> |
336 | | constexpr static T ShiftRightLogical(T v, int shift) { |
337 | | // Conversion to unsigned ensures most significant bits always filled with 0's |
338 | | return static_cast<typename std::make_unsigned<T>::type>(v) >> shift; |
339 | | } |
340 | | |
341 | | /// Get an specific bit of a numeric type |
342 | | template <typename T> |
343 | | static inline int8_t GetBit(T v, int bitpos) { |
344 | | T masked = v & (static_cast<T>(0x1) << bitpos); |
345 | | return static_cast<int8_t>(ShiftRightLogical(masked, bitpos)); |
346 | | } |
347 | | |
348 | | /// Set a specific bit to 1 |
349 | | /// Behavior when bitpos is negative is undefined |
350 | | template <typename T> |
351 | | constexpr static T SetBit(T v, int bitpos) { |
352 | | return v | (static_cast<T>(0x1) << bitpos); |
353 | | } |
354 | | |
355 | | /// Set a specific bit to 0 |
356 | | /// Behavior when bitpos is negative is undefined |
357 | | template <typename T> |
358 | | constexpr static T UnsetBit(T v, int bitpos) { |
359 | | return v & ~(static_cast<T>(0x1) << bitpos); |
360 | | } |
361 | | |
362 | | /// Returns 'value' rounded up to the nearest multiple of 'factor' when factor is |
363 | | /// a power of two |
364 | 0 | static inline int64_t RoundUpToPowerOf2(int64_t value, int64_t factor) { |
365 | 0 | DCHECK((factor > 0) && ((factor & (factor - 1)) == 0)); |
366 | 0 | return (value + (factor - 1)) & ~(factor - 1); |
367 | 0 | } |
368 | | |
369 | | // speed up function compute for SIMD |
370 | 0 | static inline size_t RoundUpToPowerOf2Int32(size_t value, size_t factor) { |
371 | 0 | DCHECK((factor > 0) && ((factor & (factor - 1)) == 0)); |
372 | 0 | return (value + (factor - 1)) & ~(factor - 1); |
373 | 0 | } |
374 | | |
375 | 0 | static inline int64_t RoundDownToPowerOf2(int64_t value, int64_t factor) { |
376 | 0 | DCHECK((factor > 0) && ((factor & (factor - 1)) == 0)); |
377 | 0 | return value & ~(factor - 1); |
378 | 0 | } |
379 | | |
380 | | // Returns the ceil of value/divisor |
381 | 157k | static inline int Ceil(int value, int divisor) { |
382 | 157k | return value / divisor + (value % divisor != 0); |
383 | 157k | } |
384 | | |
385 | | // Returns the 'num_bits' least-significant bits of 'v'. |
386 | 630k | static inline uint64_t TrailingBits(uint64_t v, int num_bits) { |
387 | 630k | if (PREDICT_FALSE(num_bits == 0)) return 0; |
388 | 605k | if (PREDICT_FALSE(num_bits >= 64)) return v; |
389 | 404k | int n = 64 - num_bits; |
390 | 404k | return (v << n) >> n; |
391 | 605k | } |
392 | | |
393 | 200k | static inline uint64_t ShiftLeftZeroOnOverflow(uint64_t v, int num_bits) { |
394 | 200k | if (PREDICT_FALSE(num_bits >= 64)) return 0; |
395 | 194k | return v << num_bits; |
396 | 200k | } |
397 | | |
398 | 200k | static inline uint64_t ShiftRightZeroOnOverflow(uint64_t v, int num_bits) { |
399 | 200k | if (PREDICT_FALSE(num_bits >= 64)) return 0; |
400 | 194k | return v >> num_bits; |
401 | 200k | } |
402 | | |
403 | 0 | static void ByteSwapScalar(void* dest, const void* source, int len) { |
404 | 0 | uint8_t* dst = reinterpret_cast<uint8_t*>(dest); |
405 | 0 | const uint8_t* src = reinterpret_cast<const uint8_t*>(source); |
406 | 0 | switch (len) { |
407 | 0 | case 1: |
408 | 0 | *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src); |
409 | 0 | return; |
410 | 0 | case 2: |
411 | 0 | *reinterpret_cast<uint16_t*>(dst) = |
412 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint16_t*>(src)); |
413 | 0 | return; |
414 | 0 | case 3: |
415 | 0 | *reinterpret_cast<uint16_t*>(dst + 1) = |
416 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint16_t*>(src)); |
417 | 0 | *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 2); |
418 | 0 | return; |
419 | 0 | case 4: |
420 | 0 | *reinterpret_cast<uint32_t*>(dst) = |
421 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint32_t*>(src)); |
422 | 0 | return; |
423 | 0 | case 5: |
424 | 0 | *reinterpret_cast<uint32_t*>(dst + 1) = |
425 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint32_t*>(src)); |
426 | 0 | *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 4); |
427 | 0 | return; |
428 | 0 | case 6: |
429 | 0 | *reinterpret_cast<uint32_t*>(dst + 2) = |
430 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint32_t*>(src)); |
431 | 0 | *reinterpret_cast<uint16_t*>(dst) = |
432 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint16_t*>(src + 4)); |
433 | 0 | return; |
434 | 0 | case 7: |
435 | 0 | *reinterpret_cast<uint32_t*>(dst + 3) = |
436 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint32_t*>(src)); |
437 | 0 | *reinterpret_cast<uint16_t*>(dst + 1) = |
438 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint16_t*>(src + 4)); |
439 | 0 | *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 6); |
440 | 0 | return; |
441 | 0 | case 8: |
442 | 0 | *reinterpret_cast<uint64_t*>(dst) = |
443 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint64_t*>(src)); |
444 | 0 | return; |
445 | 0 | case 9: |
446 | 0 | *reinterpret_cast<uint64_t*>(dst + 1) = |
447 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint64_t*>(src)); |
448 | 0 | *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 8); |
449 | 0 | return; |
450 | 0 | case 10: |
451 | 0 | *reinterpret_cast<uint64_t*>(dst + 2) = |
452 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint64_t*>(src)); |
453 | 0 | *reinterpret_cast<uint16_t*>(dst) = |
454 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint16_t*>(src + 8)); |
455 | 0 | return; |
456 | 0 | case 11: |
457 | 0 | *reinterpret_cast<uint64_t*>(dst + 3) = |
458 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint64_t*>(src)); |
459 | 0 | *reinterpret_cast<uint16_t*>(dst + 1) = |
460 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint16_t*>(src + 8)); |
461 | 0 | *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 10); |
462 | 0 | return; |
463 | 0 | case 12: |
464 | 0 | *reinterpret_cast<uint64_t*>(dst + 4) = |
465 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint64_t*>(src)); |
466 | 0 | *reinterpret_cast<uint32_t*>(dst) = |
467 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint32_t*>(src + 8)); |
468 | 0 | return; |
469 | 0 | case 13: |
470 | 0 | *reinterpret_cast<uint64_t*>(dst + 5) = |
471 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint64_t*>(src)); |
472 | 0 | *reinterpret_cast<uint32_t*>(dst + 1) = |
473 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint32_t*>(src + 8)); |
474 | 0 | *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 12); |
475 | 0 | return; |
476 | 0 | case 14: |
477 | 0 | *reinterpret_cast<uint64_t*>(dst + 6) = |
478 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint64_t*>(src)); |
479 | 0 | *reinterpret_cast<uint32_t*>(dst + 2) = |
480 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint32_t*>(src + 8)); |
481 | 0 | *reinterpret_cast<uint16_t*>(dst) = |
482 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint16_t*>(src + 12)); |
483 | 0 | return; |
484 | 0 | case 15: |
485 | 0 | *reinterpret_cast<uint64_t*>(dst + 7) = |
486 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint64_t*>(src)); |
487 | 0 | *reinterpret_cast<uint32_t*>(dst + 3) = |
488 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint32_t*>(src + 8)); |
489 | 0 | *reinterpret_cast<uint16_t*>(dst + 1) = |
490 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint16_t*>(src + 12)); |
491 | 0 | *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 14); |
492 | 0 | return; |
493 | 0 | case 16: |
494 | 0 | *reinterpret_cast<uint64_t*>(dst + 8) = |
495 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint64_t*>(src)); |
496 | 0 | *reinterpret_cast<uint64_t*>(dst) = |
497 | 0 | BitUtil::byte_swap(*reinterpret_cast<const uint64_t*>(src + 8)); |
498 | 0 | return; |
499 | 0 | default: |
500 | 0 | // Revert to slow loop-based swap. |
501 | 0 | ByteSwapScalarLoop(source, len, dest); |
502 | 0 | return; |
503 | 0 | } |
504 | 0 | } |
505 | | |
506 | 0 | static void ByteSwapScalarLoop(const void* src, int len, void* dst) { |
507 | 0 | //TODO: improve the performance of following code further using BSWAP intrinsic |
508 | 0 | uint8_t* d = reinterpret_cast<uint8_t*>(dst); |
509 | 0 | const uint8_t* s = reinterpret_cast<const uint8_t*>(src); |
510 | 0 | for (int i = 0; i < len; ++i) d[i] = s[len - i - 1]; |
511 | 0 | } |
512 | | }; |
513 | | |
514 | | } // namespace doris |