Coverage Report

Created: 2024-11-18 12:21

/root/doris/be/src/util/bit_util.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/bit-util.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <type_traits>
24
25
#include "vec/core/wide_integer.h"
26
#ifndef __APPLE__
27
#include <endian.h>
28
#endif
29
30
#include "common/compiler_util.h" // IWYU pragma: keep
31
#include "gutil/bits.h"
32
#include "gutil/endian.h"
33
#include "util/cpu_info.h"
34
#include "util/sse_util.hpp"
35
36
namespace doris {
37
38
// Utility class to do standard bit tricks
39
// TODO: is this in boost or something else like that?
40
class BitUtil {
41
public:
42
    // Returns the ceil of value/divisor
43
7
    static inline int64_t ceil(int64_t value, int64_t divisor) {
44
7
        return value / divisor + (value % divisor != 0);
45
7
    }
46
47
    // Returns 'value' rounded up to the nearest multiple of 'factor'
48
4
    static inline int64_t round_up(int64_t value, int64_t factor) {
49
4
        return (value + (factor - 1)) / factor * factor;
50
4
    }
51
52
    // Returns the smallest power of two that contains v. Taken from
53
    // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
54
    // TODO: Pick a better name, as it is not clear what happens when the input is
55
    // already a power of two.
56
39
    static inline int64_t next_power_of_two(int64_t v) {
57
39
        --v;
58
39
        v |= v >> 1;
59
39
        v |= v >> 2;
60
39
        v |= v >> 4;
61
39
        v |= v >> 8;
62
39
        v |= v >> 16;
63
39
        v |= v >> 32;
64
39
        ++v;
65
39
        return v;
66
39
    }
67
68
    // Non hw accelerated pop count.
69
    // TODO: we don't use this in any perf sensitive code paths currently.  There
70
    // might be a much faster way to implement this.
71
4
    static inline int popcount_no_hw(uint64_t x) {
72
4
        int count = 0;
73
74
22
        for (; x != 0; ++count) {
75
18
            x &= x - 1;
76
18
        }
77
78
4
        return count;
79
4
    }
80
81
    // Returns the number of set bits in x
82
4
    static inline int popcount(uint64_t x) {
83
4
        if (LIKELY(CpuInfo::is_supported(CpuInfo::POPCNT))) {
84
4
            return __builtin_popcountl(x);
85
4
        } else {
86
0
            return popcount_no_hw(x);
87
0
        }
88
4
    }
89
90
    // Returns the 'num_bits' least-significant bits of 'v'.
91
0
    static inline uint64_t trailing_bits(uint64_t v, int num_bits) {
92
0
        if (UNLIKELY(num_bits == 0)) {
93
0
            return 0;
94
0
        }
95
0
96
0
        if (UNLIKELY(num_bits >= 64)) {
97
0
            return v;
98
0
        }
99
0
100
0
        int n = 64 - num_bits;
101
0
        return (v << n) >> n;
102
0
    }
103
104
    template <typename T>
105
1
    static std::string IntToByteBuffer(T input) {
106
1
        std::string buffer;
107
1
        T value = input;
108
2
        for (int i = 0; i < sizeof(value); ++i) {
109
            // Applies a mask for a byte range on the input.
110
2
            char value_to_save = value & 0XFF;
111
2
            buffer.push_back(value_to_save);
112
            // Remove the just processed part from the input so that we can exit early if there
113
            // is nothing left to process.
114
2
            value >>= 8;
115
2
            if (value == 0 && value_to_save >= 0) {
116
1
                break;
117
1
            }
118
1
            if (value == -1 && value_to_save < 0) {
119
0
                break;
120
0
            }
121
1
        }
122
1
        std::reverse(buffer.begin(), buffer.end());
123
1
        return buffer;
124
1
    }
_ZN5doris7BitUtil15IntToByteBufferIiEENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEET_
Line
Count
Source
105
1
    static std::string IntToByteBuffer(T input) {
106
1
        std::string buffer;
107
1
        T value = input;
108
2
        for (int i = 0; i < sizeof(value); ++i) {
109
            // Applies a mask for a byte range on the input.
110
2
            char value_to_save = value & 0XFF;
111
2
            buffer.push_back(value_to_save);
112
            // Remove the just processed part from the input so that we can exit early if there
113
            // is nothing left to process.
114
2
            value >>= 8;
115
2
            if (value == 0 && value_to_save >= 0) {
116
1
                break;
117
1
            }
118
1
            if (value == -1 && value_to_save < 0) {
119
0
                break;
120
0
            }
121
1
        }
122
1
        std::reverse(buffer.begin(), buffer.end());
123
1
        return buffer;
124
1
    }
Unexecuted instantiation: _ZN5doris7BitUtil15IntToByteBufferInEENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEET_
Unexecuted instantiation: _ZN5doris7BitUtil15IntToByteBufferIlEENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEET_
Unexecuted instantiation: _ZN5doris7BitUtil15IntToByteBufferIN4wide7integerILm256EiEEEENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEET_
125
126
    // Returns ceil(log2(x)).
127
    // TODO: this could be faster if we use __builtin_clz.  Fix this if this ever shows up
128
    // in a hot path.
129
39
    static inline int log2(uint64_t x) {
130
39
        DCHECK_GT(x, 0);
131
132
39
        if (x == 1) {
133
0
            return 0;
134
0
        }
135
136
        // Compute result = ceil(log2(x))
137
        //                = floor(log2(x - 1)) + 1, for x > 1
138
        // by finding the position of the most significant bit (1-indexed) of x - 1
139
        // (floor(log2(n)) = MSB(n) (0-indexed))
140
39
        --x;
141
39
        int result = 1;
142
143
39
        while (x >>= 1) {
144
0
            ++result;
145
0
        }
146
147
39
        return result;
148
39
    }
149
150
    // Swaps the byte order (i.e. endianess)
151
0
    static inline int64_t byte_swap(int64_t value) { return __builtin_bswap64(value); }
152
0
    static inline uint64_t byte_swap(uint64_t value) {
153
0
        return static_cast<uint64_t>(__builtin_bswap64(value));
154
0
    }
155
0
    static inline int32_t byte_swap(int32_t value) { return __builtin_bswap32(value); }
156
0
    static inline uint32_t byte_swap(uint32_t value) {
157
0
        return static_cast<uint32_t>(__builtin_bswap32(value));
158
0
    }
159
0
    static inline int16_t byte_swap(int16_t value) {
160
0
        return (((value >> 8) & 0xff) | ((value & 0xff) << 8));
161
0
    }
162
0
    static inline uint16_t byte_swap(uint16_t value) {
163
0
        return static_cast<uint16_t>(byte_swap(static_cast<int16_t>(value)));
164
0
    }
165
166
    // Write the swapped bytes into dst. len must be 1, 2, 4 or 8.
167
0
    static inline void byte_swap(void* dst, void* src, int len) {
168
0
        switch (len) {
169
0
        case 1:
170
0
            *reinterpret_cast<int8_t*>(dst) = *reinterpret_cast<int8_t*>(src);
171
0
            break;
172
0
173
0
        case 2:
174
0
            *reinterpret_cast<int16_t*>(dst) = byte_swap(*reinterpret_cast<int16_t*>(src));
175
0
            break;
176
0
177
0
        case 4:
178
0
            *reinterpret_cast<int32_t*>(dst) = byte_swap(*reinterpret_cast<int32_t*>(src));
179
0
            break;
180
0
181
0
        case 8:
182
0
            *reinterpret_cast<int64_t*>(dst) = byte_swap(*reinterpret_cast<int64_t*>(src));
183
0
            break;
184
0
185
0
        default:
186
0
            DCHECK(false);
187
0
        }
188
0
    }
189
190
    // Returns the rounded up to 64 multiple. Used for conversions of bits to i64.
191
0
    static inline uint32_t round_up_numi64(uint32_t bits) { return (bits + 63) >> 6; }
192
193
    // Returns the rounded up to 32 multiple. Used for conversions of bits to i32.
194
0
    constexpr static inline uint32_t round_up_numi32(uint32_t bits) { return (bits + 31) >> 5; }
195
196
#if __BYTE_ORDER == __LITTLE_ENDIAN
197
    // Converts to big endian format (if not already in big endian).
198
0
    static inline int64_t big_endian(int64_t value) { return byte_swap(value); }
199
0
    static inline uint64_t big_endian(uint64_t value) { return byte_swap(value); }
200
0
    static inline int32_t big_endian(int32_t value) { return byte_swap(value); }
201
0
    static inline uint32_t big_endian(uint32_t value) { return byte_swap(value); }
202
0
    static inline int16_t big_endian(int16_t value) { return byte_swap(value); }
203
0
    static inline uint16_t big_endian(uint16_t value) { return byte_swap(value); }
204
#else
205
    static inline int64_t big_endian(int64_t val) { return val; }
206
    static inline uint64_t big_endian(uint64_t val) { return val; }
207
    static inline int32_t big_endian(int32_t val) { return val; }
208
    static inline uint32_t big_endian(uint32_t val) { return val; }
209
    static inline int16_t big_endian(int16_t val) { return val; }
210
    static inline uint16_t big_endian(uint16_t val) { return val; }
211
#endif
212
213
    template <typename T>
214
5
    static T big_endian_to_host(T value) {
215
5
        if constexpr (std::is_same_v<T, wide::Int256>) {
216
5
            return BigEndian::ToHost256(value);
217
5
        } else if constexpr (std::is_same_v<T, wide::UInt256>) {
218
4
            return BigEndian::ToHost256(value);
219
4
        } else if constexpr (std::is_same_v<T, __int128>) {
220
4
            return BigEndian::ToHost128(value);
221
4
        } else if constexpr (std::is_same_v<T, unsigned __int128>) {
222
3
            return BigEndian::ToHost128(value);
223
3
        } else if constexpr (std::is_same_v<T, int64_t>) {
224
3
            return BigEndian::ToHost64(value);
225
3
        } else if constexpr (std::is_same_v<T, uint64_t>) {
226
2
            return BigEndian::ToHost64(value);
227
2
        } else if constexpr (std::is_same_v<T, int32_t>) {
228
2
            return BigEndian::ToHost32(value);
229
2
        } else if constexpr (std::is_same_v<T, uint32_t>) {
230
1
            return BigEndian::ToHost32(value);
231
1
        } else if constexpr (std::is_same_v<T, int16_t>) {
232
1
            return BigEndian::ToHost16(value);
233
1
        } else if constexpr (std::is_same_v<T, uint16_t>) {
234
1
            return BigEndian::ToHost16(value);
235
1
        } else if constexpr (std::is_same_v<T, int8_t>) {
236
5
            return value;
237
5
        } else if constexpr (std::is_same_v<T, uint8_t>) {
238
5
            return value;
239
5
        } else {
240
5
            __builtin_unreachable();
241
5
            LOG(FATAL) << "__builtin_unreachable";
242
5
            return value;
243
5
        }
244
5
    }
_ZN5doris7BitUtil18big_endian_to_hostItEET_S2_
Line
Count
Source
214
1
    static T big_endian_to_host(T value) {
215
1
        if constexpr (std::is_same_v<T, wide::Int256>) {
216
1
            return BigEndian::ToHost256(value);
217
1
        } else if constexpr (std::is_same_v<T, wide::UInt256>) {
218
1
            return BigEndian::ToHost256(value);
219
1
        } else if constexpr (std::is_same_v<T, __int128>) {
220
1
            return BigEndian::ToHost128(value);
221
1
        } else if constexpr (std::is_same_v<T, unsigned __int128>) {
222
1
            return BigEndian::ToHost128(value);
223
1
        } else if constexpr (std::is_same_v<T, int64_t>) {
224
1
            return BigEndian::ToHost64(value);
225
1
        } else if constexpr (std::is_same_v<T, uint64_t>) {
226
1
            return BigEndian::ToHost64(value);
227
1
        } else if constexpr (std::is_same_v<T, int32_t>) {
228
1
            return BigEndian::ToHost32(value);
229
1
        } else if constexpr (std::is_same_v<T, uint32_t>) {
230
1
            return BigEndian::ToHost32(value);
231
1
        } else if constexpr (std::is_same_v<T, int16_t>) {
232
1
            return BigEndian::ToHost16(value);
233
1
        } else if constexpr (std::is_same_v<T, uint16_t>) {
234
1
            return BigEndian::ToHost16(value);
235
1
        } else if constexpr (std::is_same_v<T, int8_t>) {
236
1
            return value;
237
1
        } else if constexpr (std::is_same_v<T, uint8_t>) {
238
1
            return value;
239
1
        } else {
240
1
            __builtin_unreachable();
241
1
            LOG(FATAL) << "__builtin_unreachable";
242
1
            return value;
243
1
        }
244
1
    }
_ZN5doris7BitUtil18big_endian_to_hostIjEET_S2_
Line
Count
Source
214
1
    static T big_endian_to_host(T value) {
215
1
        if constexpr (std::is_same_v<T, wide::Int256>) {
216
1
            return BigEndian::ToHost256(value);
217
1
        } else if constexpr (std::is_same_v<T, wide::UInt256>) {
218
1
            return BigEndian::ToHost256(value);
219
1
        } else if constexpr (std::is_same_v<T, __int128>) {
220
1
            return BigEndian::ToHost128(value);
221
1
        } else if constexpr (std::is_same_v<T, unsigned __int128>) {
222
1
            return BigEndian::ToHost128(value);
223
1
        } else if constexpr (std::is_same_v<T, int64_t>) {
224
1
            return BigEndian::ToHost64(value);
225
1
        } else if constexpr (std::is_same_v<T, uint64_t>) {
226
1
            return BigEndian::ToHost64(value);
227
1
        } else if constexpr (std::is_same_v<T, int32_t>) {
228
1
            return BigEndian::ToHost32(value);
229
1
        } else if constexpr (std::is_same_v<T, uint32_t>) {
230
1
            return BigEndian::ToHost32(value);
231
1
        } else if constexpr (std::is_same_v<T, int16_t>) {
232
1
            return BigEndian::ToHost16(value);
233
1
        } else if constexpr (std::is_same_v<T, uint16_t>) {
234
1
            return BigEndian::ToHost16(value);
235
1
        } else if constexpr (std::is_same_v<T, int8_t>) {
236
1
            return value;
237
1
        } else if constexpr (std::is_same_v<T, uint8_t>) {
238
1
            return value;
239
1
        } else {
240
1
            __builtin_unreachable();
241
1
            LOG(FATAL) << "__builtin_unreachable";
242
1
            return value;
243
1
        }
244
1
    }
_ZN5doris7BitUtil18big_endian_to_hostImEET_S2_
Line
Count
Source
214
1
    static T big_endian_to_host(T value) {
215
1
        if constexpr (std::is_same_v<T, wide::Int256>) {
216
1
            return BigEndian::ToHost256(value);
217
1
        } else if constexpr (std::is_same_v<T, wide::UInt256>) {
218
1
            return BigEndian::ToHost256(value);
219
1
        } else if constexpr (std::is_same_v<T, __int128>) {
220
1
            return BigEndian::ToHost128(value);
221
1
        } else if constexpr (std::is_same_v<T, unsigned __int128>) {
222
1
            return BigEndian::ToHost128(value);
223
1
        } else if constexpr (std::is_same_v<T, int64_t>) {
224
1
            return BigEndian::ToHost64(value);
225
1
        } else if constexpr (std::is_same_v<T, uint64_t>) {
226
1
            return BigEndian::ToHost64(value);
227
1
        } else if constexpr (std::is_same_v<T, int32_t>) {
228
1
            return BigEndian::ToHost32(value);
229
1
        } else if constexpr (std::is_same_v<T, uint32_t>) {
230
1
            return BigEndian::ToHost32(value);
231
1
        } else if constexpr (std::is_same_v<T, int16_t>) {
232
1
            return BigEndian::ToHost16(value);
233
1
        } else if constexpr (std::is_same_v<T, uint16_t>) {
234
1
            return BigEndian::ToHost16(value);
235
1
        } else if constexpr (std::is_same_v<T, int8_t>) {
236
1
            return value;
237
1
        } else if constexpr (std::is_same_v<T, uint8_t>) {
238
1
            return value;
239
1
        } else {
240
1
            __builtin_unreachable();
241
1
            LOG(FATAL) << "__builtin_unreachable";
242
1
            return value;
243
1
        }
244
1
    }
_ZN5doris7BitUtil18big_endian_to_hostIoEET_S2_
Line
Count
Source
214
1
    static T big_endian_to_host(T value) {
215
1
        if constexpr (std::is_same_v<T, wide::Int256>) {
216
1
            return BigEndian::ToHost256(value);
217
1
        } else if constexpr (std::is_same_v<T, wide::UInt256>) {
218
1
            return BigEndian::ToHost256(value);
219
1
        } else if constexpr (std::is_same_v<T, __int128>) {
220
1
            return BigEndian::ToHost128(value);
221
1
        } else if constexpr (std::is_same_v<T, unsigned __int128>) {
222
1
            return BigEndian::ToHost128(value);
223
1
        } else if constexpr (std::is_same_v<T, int64_t>) {
224
1
            return BigEndian::ToHost64(value);
225
1
        } else if constexpr (std::is_same_v<T, uint64_t>) {
226
1
            return BigEndian::ToHost64(value);
227
1
        } else if constexpr (std::is_same_v<T, int32_t>) {
228
1
            return BigEndian::ToHost32(value);
229
1
        } else if constexpr (std::is_same_v<T, uint32_t>) {
230
1
            return BigEndian::ToHost32(value);
231
1
        } else if constexpr (std::is_same_v<T, int16_t>) {
232
1
            return BigEndian::ToHost16(value);
233
1
        } else if constexpr (std::is_same_v<T, uint16_t>) {
234
1
            return BigEndian::ToHost16(value);
235
1
        } else if constexpr (std::is_same_v<T, int8_t>) {
236
1
            return value;
237
1
        } else if constexpr (std::is_same_v<T, uint8_t>) {
238
1
            return value;
239
1
        } else {
240
1
            __builtin_unreachable();
241
1
            LOG(FATAL) << "__builtin_unreachable";
242
1
            return value;
243
1
        }
244
1
    }
_ZN5doris7BitUtil18big_endian_to_hostIN4wide7integerILm256EjEEEET_S5_
Line
Count
Source
214
1
    static T big_endian_to_host(T value) {
215
1
        if constexpr (std::is_same_v<T, wide::Int256>) {
216
1
            return BigEndian::ToHost256(value);
217
1
        } else if constexpr (std::is_same_v<T, wide::UInt256>) {
218
1
            return BigEndian::ToHost256(value);
219
1
        } else if constexpr (std::is_same_v<T, __int128>) {
220
1
            return BigEndian::ToHost128(value);
221
1
        } else if constexpr (std::is_same_v<T, unsigned __int128>) {
222
1
            return BigEndian::ToHost128(value);
223
1
        } else if constexpr (std::is_same_v<T, int64_t>) {
224
1
            return BigEndian::ToHost64(value);
225
1
        } else if constexpr (std::is_same_v<T, uint64_t>) {
226
1
            return BigEndian::ToHost64(value);
227
1
        } else if constexpr (std::is_same_v<T, int32_t>) {
228
1
            return BigEndian::ToHost32(value);
229
1
        } else if constexpr (std::is_same_v<T, uint32_t>) {
230
1
            return BigEndian::ToHost32(value);
231
1
        } else if constexpr (std::is_same_v<T, int16_t>) {
232
1
            return BigEndian::ToHost16(value);
233
1
        } else if constexpr (std::is_same_v<T, uint16_t>) {
234
1
            return BigEndian::ToHost16(value);
235
1
        } else if constexpr (std::is_same_v<T, int8_t>) {
236
1
            return value;
237
1
        } else if constexpr (std::is_same_v<T, uint8_t>) {
238
1
            return value;
239
1
        } else {
240
1
            __builtin_unreachable();
241
1
            LOG(FATAL) << "__builtin_unreachable";
242
1
            return value;
243
1
        }
244
1
    }
Unexecuted instantiation: _ZN5doris7BitUtil18big_endian_to_hostIlEET_S2_
Unexecuted instantiation: _ZN5doris7BitUtil18big_endian_to_hostInEET_S2_
Unexecuted instantiation: _ZN5doris7BitUtil18big_endian_to_hostIN4wide7integerILm256EiEEEET_S5_
Unexecuted instantiation: _ZN5doris7BitUtil18big_endian_to_hostIiEET_S2_
245
246
    /// Returns the smallest power of two that contains v. If v is a power of two, v is
247
    /// returned. Taken from
248
    /// http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
249
8
    static inline int64_t RoundUpToPowerOfTwo(int64_t v) {
250
8
        --v;
251
8
        v |= v >> 1;
252
8
        v |= v >> 2;
253
8
        v |= v >> 4;
254
8
        v |= v >> 8;
255
8
        v |= v >> 16;
256
8
        v |= v >> 32;
257
8
        ++v;
258
8
        return v;
259
8
    }
260
261
    // Wrap the gutil/ version for convenience.
262
0
    static inline int Log2FloorNonZero64(uint64_t n) { return Bits::Log2FloorNonZero64(n); }
263
264
    // Wrap the gutil/ version for convenience.
265
0
    static inline int Log2Floor64(uint64_t n) { return Bits::Log2Floor64(n); }
266
267
0
    static inline int Log2Ceiling64(uint64_t n) {
268
0
        int floor = Log2Floor64(n);
269
0
        // Check if zero or a power of two. This pattern is recognised by gcc and optimised
270
0
        // into branch-free code.
271
0
        if (0 == (n & (n - 1))) {
272
0
            return floor;
273
0
        } else {
274
0
            return floor + 1;
275
0
        }
276
0
    }
277
278
0
    static inline int Log2CeilingNonZero64(uint64_t n) {
279
0
        int floor = Log2FloorNonZero64(n);
280
0
        // Check if zero or a power of two. This pattern is recognised by gcc and optimised
281
0
        // into branch-free code.
282
0
        if (0 == (n & (n - 1))) {
283
0
            return floor;
284
0
        } else {
285
0
            return floor + 1;
286
0
        }
287
0
    }
288
289
    // Returns the rounded up to 64 multiple. Used for conversions of bits to i64.
290
12
    static inline uint32_t round_up_numi_64(uint32_t bits) { return (bits + 63) >> 6; }
291
292
0
    constexpr static inline int64_t Ceil(int64_t value, int64_t divisor) {
293
0
        return value / divisor + (value % divisor != 0);
294
0
    }
295
296
0
    constexpr static inline bool IsPowerOf2(int64_t value) { return (value & (value - 1)) == 0; }
297
298
0
    constexpr static inline int64_t RoundDown(int64_t value, int64_t factor) {
299
0
        return (value / factor) * factor;
300
0
    }
301
302
    /// Specialized round up and down functions for frequently used factors,
303
    /// like 8 (bits->bytes), 32 (bits->i32), and 64 (bits->i64)
304
    /// Returns the rounded up number of bytes that fit the number of bits.
305
6
    constexpr static inline uint32_t RoundUpNumBytes(uint32_t bits) { return (bits + 7) >> 3; }
306
307
    /// Non hw accelerated pop count.
308
    /// TODO: we don't use this in any perf sensitive code paths currently.  There
309
    /// might be a much faster way to implement this.
310
0
    static inline int PopcountNoHw(uint64_t x) {
311
0
        int count = 0;
312
0
        for (; x != 0; ++count) x &= x - 1;
313
0
        return count;
314
0
    }
315
316
    /// Returns the number of set bits in x
317
0
    static inline int Popcount(uint64_t x) {
318
0
        //if (LIKELY(CpuInfo::is_supported(CpuInfo::POPCNT))) {
319
0
        //  return POPCNT_popcnt_u64(x);
320
0
        //} else {
321
0
        return PopcountNoHw(x);
322
0
        // }
323
0
    }
324
325
    // Compute correct population count for various-width signed integers
326
    template <typename T>
327
    static inline int PopcountSigned(T v) {
328
        // Converting to same-width unsigned then extending preserves the bit pattern.
329
        return BitUtil::Popcount(static_cast<typename std::make_unsigned<T>::type>(v));
330
    }
331
332
    /// Logical right shift for signed integer types
333
    /// This is needed because the C >> operator does arithmetic right shift
334
    /// Negative shift amounts lead to undefined behavior
335
    template <typename T>
336
    constexpr static T ShiftRightLogical(T v, int shift) {
337
        // Conversion to unsigned ensures most significant bits always filled with 0's
338
        return static_cast<typename std::make_unsigned<T>::type>(v) >> shift;
339
    }
340
341
    /// Get an specific bit of a numeric type
342
    template <typename T>
343
    static inline int8_t GetBit(T v, int bitpos) {
344
        T masked = v & (static_cast<T>(0x1) << bitpos);
345
        return static_cast<int8_t>(ShiftRightLogical(masked, bitpos));
346
    }
347
348
    /// Set a specific bit to 1
349
    /// Behavior when bitpos is negative is undefined
350
    template <typename T>
351
    constexpr static T SetBit(T v, int bitpos) {
352
        return v | (static_cast<T>(0x1) << bitpos);
353
    }
354
355
    /// Set a specific bit to 0
356
    /// Behavior when bitpos is negative is undefined
357
    template <typename T>
358
    constexpr static T UnsetBit(T v, int bitpos) {
359
        return v & ~(static_cast<T>(0x1) << bitpos);
360
    }
361
362
    /// Returns 'value' rounded up to the nearest multiple of 'factor' when factor is
363
    /// a power of two
364
0
    static inline int64_t RoundUpToPowerOf2(int64_t value, int64_t factor) {
365
0
        DCHECK((factor > 0) && ((factor & (factor - 1)) == 0));
366
0
        return (value + (factor - 1)) & ~(factor - 1);
367
0
    }
368
369
    // speed up function compute for SIMD
370
0
    static inline size_t RoundUpToPowerOf2Int32(size_t value, size_t factor) {
371
0
        DCHECK((factor > 0) && ((factor & (factor - 1)) == 0));
372
0
        return (value + (factor - 1)) & ~(factor - 1);
373
0
    }
374
375
0
    static inline int64_t RoundDownToPowerOf2(int64_t value, int64_t factor) {
376
0
        DCHECK((factor > 0) && ((factor & (factor - 1)) == 0));
377
0
        return value & ~(factor - 1);
378
0
    }
379
380
    // Returns the ceil of value/divisor
381
157k
    static inline int Ceil(int value, int divisor) {
382
157k
        return value / divisor + (value % divisor != 0);
383
157k
    }
384
385
    // Returns the 'num_bits' least-significant bits of 'v'.
386
630k
    static inline uint64_t TrailingBits(uint64_t v, int num_bits) {
387
630k
        if (PREDICT_FALSE(num_bits == 0)) return 0;
388
605k
        if (PREDICT_FALSE(num_bits >= 64)) return v;
389
404k
        int n = 64 - num_bits;
390
404k
        return (v << n) >> n;
391
605k
    }
392
393
200k
    static inline uint64_t ShiftLeftZeroOnOverflow(uint64_t v, int num_bits) {
394
200k
        if (PREDICT_FALSE(num_bits >= 64)) return 0;
395
194k
        return v << num_bits;
396
200k
    }
397
398
200k
    static inline uint64_t ShiftRightZeroOnOverflow(uint64_t v, int num_bits) {
399
200k
        if (PREDICT_FALSE(num_bits >= 64)) return 0;
400
194k
        return v >> num_bits;
401
200k
    }
402
403
0
    static void ByteSwapScalar(void* dest, const void* source, int len) {
404
0
        uint8_t* dst = reinterpret_cast<uint8_t*>(dest);
405
0
        const uint8_t* src = reinterpret_cast<const uint8_t*>(source);
406
0
        switch (len) {
407
0
        case 1:
408
0
            *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src);
409
0
            return;
410
0
        case 2:
411
0
            *reinterpret_cast<uint16_t*>(dst) =
412
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint16_t*>(src));
413
0
            return;
414
0
        case 3:
415
0
            *reinterpret_cast<uint16_t*>(dst + 1) =
416
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint16_t*>(src));
417
0
            *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 2);
418
0
            return;
419
0
        case 4:
420
0
            *reinterpret_cast<uint32_t*>(dst) =
421
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint32_t*>(src));
422
0
            return;
423
0
        case 5:
424
0
            *reinterpret_cast<uint32_t*>(dst + 1) =
425
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint32_t*>(src));
426
0
            *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 4);
427
0
            return;
428
0
        case 6:
429
0
            *reinterpret_cast<uint32_t*>(dst + 2) =
430
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint32_t*>(src));
431
0
            *reinterpret_cast<uint16_t*>(dst) =
432
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint16_t*>(src + 4));
433
0
            return;
434
0
        case 7:
435
0
            *reinterpret_cast<uint32_t*>(dst + 3) =
436
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint32_t*>(src));
437
0
            *reinterpret_cast<uint16_t*>(dst + 1) =
438
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint16_t*>(src + 4));
439
0
            *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 6);
440
0
            return;
441
0
        case 8:
442
0
            *reinterpret_cast<uint64_t*>(dst) =
443
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint64_t*>(src));
444
0
            return;
445
0
        case 9:
446
0
            *reinterpret_cast<uint64_t*>(dst + 1) =
447
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint64_t*>(src));
448
0
            *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 8);
449
0
            return;
450
0
        case 10:
451
0
            *reinterpret_cast<uint64_t*>(dst + 2) =
452
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint64_t*>(src));
453
0
            *reinterpret_cast<uint16_t*>(dst) =
454
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint16_t*>(src + 8));
455
0
            return;
456
0
        case 11:
457
0
            *reinterpret_cast<uint64_t*>(dst + 3) =
458
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint64_t*>(src));
459
0
            *reinterpret_cast<uint16_t*>(dst + 1) =
460
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint16_t*>(src + 8));
461
0
            *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 10);
462
0
            return;
463
0
        case 12:
464
0
            *reinterpret_cast<uint64_t*>(dst + 4) =
465
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint64_t*>(src));
466
0
            *reinterpret_cast<uint32_t*>(dst) =
467
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint32_t*>(src + 8));
468
0
            return;
469
0
        case 13:
470
0
            *reinterpret_cast<uint64_t*>(dst + 5) =
471
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint64_t*>(src));
472
0
            *reinterpret_cast<uint32_t*>(dst + 1) =
473
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint32_t*>(src + 8));
474
0
            *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 12);
475
0
            return;
476
0
        case 14:
477
0
            *reinterpret_cast<uint64_t*>(dst + 6) =
478
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint64_t*>(src));
479
0
            *reinterpret_cast<uint32_t*>(dst + 2) =
480
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint32_t*>(src + 8));
481
0
            *reinterpret_cast<uint16_t*>(dst) =
482
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint16_t*>(src + 12));
483
0
            return;
484
0
        case 15:
485
0
            *reinterpret_cast<uint64_t*>(dst + 7) =
486
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint64_t*>(src));
487
0
            *reinterpret_cast<uint32_t*>(dst + 3) =
488
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint32_t*>(src + 8));
489
0
            *reinterpret_cast<uint16_t*>(dst + 1) =
490
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint16_t*>(src + 12));
491
0
            *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 14);
492
0
            return;
493
0
        case 16:
494
0
            *reinterpret_cast<uint64_t*>(dst + 8) =
495
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint64_t*>(src));
496
0
            *reinterpret_cast<uint64_t*>(dst) =
497
0
                    BitUtil::byte_swap(*reinterpret_cast<const uint64_t*>(src + 8));
498
0
            return;
499
0
        default:
500
0
            // Revert to slow loop-based swap.
501
0
            ByteSwapScalarLoop(source, len, dest);
502
0
            return;
503
0
        }
504
0
    }
505
506
0
    static void ByteSwapScalarLoop(const void* src, int len, void* dst) {
507
0
        //TODO: improve the performance of following code further using BSWAP intrinsic
508
0
        uint8_t* d = reinterpret_cast<uint8_t*>(dst);
509
0
        const uint8_t* s = reinterpret_cast<const uint8_t*>(src);
510
0
        for (int i = 0; i < len; ++i) d[i] = s[len - i - 1];
511
0
    }
512
};
513
514
} // namespace doris