Coverage Report

Created: 2024-11-21 14:46

/root/doris/be/src/util/bitmap.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/bitmap.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <glog/logging.h>
24
#include <stdint.h>
25
#include <string.h>
26
27
#include <algorithm>
28
#include <string>
29
#include <vector>
30
31
#include "gutil/port.h"
32
#include "gutil/strings/fastmem.h"
33
#include "util/bit_util.h"
34
35
namespace doris {
36
37
// Return the number of bytes necessary to store the given number of bits.
38
25.2k
inline size_t BitmapSize(size_t num_bits) {
39
25.2k
    return (num_bits + 7) / 8;
40
25.2k
}
41
42
// Set the given bit.
43
2.80k
inline void BitmapSet(uint8_t* bitmap, size_t idx) {
44
2.80k
    bitmap[idx >> 3] |= 1 << (idx & 7);
45
2.80k
}
46
47
// Switch the given bit to the specified value.
48
451k
inline void BitmapChange(uint8_t* bitmap, size_t idx, bool value) {
49
451k
    bitmap[idx >> 3] = (bitmap[idx >> 3] & ~(1 << (idx & 7))) | ((!!value) << (idx & 7));
50
451k
}
51
52
// Clear the given bit.
53
2
inline void BitmapClear(uint8_t* bitmap, size_t idx) {
54
2
    bitmap[idx >> 3] &= ~(1 << (idx & 7));
55
2
}
56
57
// Test/get the given bit.
58
902k
inline bool BitmapTest(const uint8_t* bitmap, size_t idx) {
59
902k
    return bitmap[idx >> 3] & (1 << (idx & 7));
60
902k
}
61
62
// Merge the two bitmaps using bitwise or. Both bitmaps should have at least
63
// n_bits valid bits.
64
0
inline void BitmapMergeOr(uint8_t* dst, const uint8_t* src, size_t n_bits) {
65
0
    size_t n_bytes = BitmapSize(n_bits);
66
0
    for (size_t i = 0; i < n_bytes; i++) {
67
0
        *dst++ |= *src++;
68
0
    }
69
0
}
70
71
// Set bits from offset to (offset + num_bits) to the specified value
72
void BitmapChangeBits(uint8_t* bitmap, size_t offset, size_t num_bits, bool value);
73
74
// Find the first bit of the specified value, starting from the specified offset.
75
bool BitmapFindFirst(const uint8_t* bitmap, size_t offset, size_t bitmap_size, bool value,
76
                     size_t* idx);
77
78
// Find the first set bit in the bitmap, at the specified offset.
79
inline bool BitmapFindFirstSet(const uint8_t* bitmap, size_t offset, size_t bitmap_size,
80
2
                               size_t* idx) {
81
2
    return BitmapFindFirst(bitmap, offset, bitmap_size, true, idx);
82
2
}
83
84
// Find the first zero bit in the bitmap, at the specified offset.
85
inline bool BitmapFindFirstZero(const uint8_t* bitmap, size_t offset, size_t bitmap_size,
86
2
                                size_t* idx) {
87
2
    return BitmapFindFirst(bitmap, offset, bitmap_size, false, idx);
88
2
}
89
90
// Returns true if the bitmap contains only ones.
91
0
inline bool BitMapIsAllSet(const uint8_t* bitmap, size_t offset, size_t bitmap_size) {
92
0
    DCHECK_LT(offset, bitmap_size);
93
0
    size_t idx;
94
0
    return !BitmapFindFirstZero(bitmap, offset, bitmap_size, &idx);
95
0
}
96
97
// Returns true if the bitmap contains only zeros.
98
0
inline bool BitmapIsAllZero(const uint8_t* bitmap, size_t offset, size_t bitmap_size) {
99
0
    DCHECK_LT(offset, bitmap_size);
100
0
    size_t idx;
101
0
    return !BitmapFindFirstSet(bitmap, offset, bitmap_size, &idx);
102
0
}
103
104
// Returns true if the two bitmaps are equal.
105
//
106
// It is assumed that both bitmaps have 'bitmap_size' number of bits.
107
0
inline bool BitmapEquals(const uint8_t* bm1, const uint8_t* bm2, size_t bitmap_size) {
108
0
    // Use memeq() to check all of the full bytes.
109
0
    size_t num_full_bytes = bitmap_size >> 3;
110
0
    if (!strings::memeq(bm1, bm2, num_full_bytes)) {
111
0
        return false;
112
0
    }
113
0
114
0
    // Check any remaining bits in one extra operation.
115
0
    size_t num_remaining_bits = bitmap_size - (num_full_bytes << 3);
116
0
    if (num_remaining_bits == 0) {
117
0
        return true;
118
0
    }
119
0
    DCHECK_LT(num_remaining_bits, 8);
120
0
    uint8_t mask = (1 << num_remaining_bits) - 1;
121
0
    return (bm1[num_full_bytes] & mask) == (bm2[num_full_bytes] & mask);
122
0
}
123
124
// This function will print the bitmap content in a format like the following:
125
// eg: 0001110000100010110011001100001100110011
126
// output:
127
//      0000: 00011100 00100010 11001100 11000011
128
//      0016: 00110011
129
std::string BitmapToString(const uint8_t* bitmap, size_t num_bits);
130
131
// Iterator which yields ranges of set and unset bits.
132
// Example usage:
133
//   bool value;
134
//   size_t size;
135
//   BitmapIterator iter(bitmap, n_bits);
136
//   while ((size = iter.Next(&value))) {
137
//      printf("bitmap block len=%lu value=%d\n", size, value);
138
//   }
139
class BitmapIterator {
140
public:
141
    BitmapIterator(const uint8_t* map, size_t num_bits)
142
451k
            : offset_(0), num_bits_(num_bits), map_(map) {}
143
144
0
    void Reset(const uint8_t* map, size_t num_bits) {
145
0
        offset_ = 0;
146
0
        num_bits_ = num_bits;
147
0
        map_ = map;
148
0
    }
149
150
0
    void Reset() {
151
0
        offset_ = 0;
152
0
        num_bits_ = 0;
153
0
        map_ = nullptr;
154
0
    }
155
156
3
    bool done() const { return (num_bits_ - offset_) == 0; }
157
158
2
    void SeekTo(size_t bit) {
159
2
        DCHECK_LE(bit, num_bits_);
160
2
        offset_ = bit;
161
2
    }
162
163
1
    size_t Next(bool* value, size_t max_run) {
164
1
        return NextWithLimit(value, std::min(num_bits_, max_run + offset_));
165
1
    }
166
167
902k
    size_t Next(bool* value) { return NextWithLimit(value, num_bits_); }
168
169
private:
170
902k
    size_t NextWithLimit(bool* value, size_t limit) {
171
902k
        size_t len = limit - offset_;
172
902k
        if (PREDICT_FALSE(len == 0)) return (0);
173
174
451k
        *value = BitmapTest(map_, offset_);
175
176
451k
        size_t index;
177
451k
        if (BitmapFindFirst(map_, offset_, limit, !(*value), &index)) {
178
5
            len = index - offset_;
179
451k
        } else {
180
451k
            index = limit;
181
451k
        }
182
183
451k
        offset_ = index;
184
451k
        return len;
185
902k
    }
186
187
private:
188
    size_t offset_;
189
    size_t num_bits_;
190
    const uint8_t* map_ = nullptr;
191
};
192
193
/// Bitmap vector utility class.
194
/// TODO: investigate perf.
195
///  - Precomputed bitmap
196
///  - Explicit Set/Unset() apis
197
///  - Bigger words
198
///  - size bitmap to Mersenne prime.
199
class Bitmap {
200
public:
201
5
    Bitmap(int64_t num_bits) {
202
5
        DCHECK_GE(num_bits, 0);
203
5
        buffer_.resize(BitUtil::round_up_numi_64(num_bits));
204
5
        num_bits_ = num_bits;
205
5
    }
206
207
    /// Resize bitmap and set all bits to zero.
208
7
    void Reset(int64_t num_bits) {
209
7
        DCHECK_GE(num_bits, 0);
210
7
        buffer_.resize(BitUtil::round_up_numi_64(num_bits));
211
7
        num_bits_ = num_bits;
212
7
        SetAllBits(false);
213
7
    }
214
215
    /// Compute memory usage of a bitmap, not including the Bitmap object itself.
216
0
    static int64_t MemUsage(int64_t num_bits) {
217
0
        DCHECK_GE(num_bits, 0);
218
0
        return BitUtil::round_up_numi_64(num_bits) * sizeof(int64_t);
219
0
    }
220
221
    /// Compute memory usage of this bitmap, not including the Bitmap object itself.
222
0
    int64_t MemUsage() const { return MemUsage(num_bits_); }
223
224
    /// Sets the bit at 'bit_index' to v.
225
0
    void Set(int64_t bit_index, bool v) {
226
0
        int64_t word_index = bit_index >> NUM_OFFSET_BITS;
227
0
        bit_index &= BIT_INDEX_MASK;
228
0
        DCHECK_LT(word_index, buffer_.size());
229
0
        if (v) {
230
0
            buffer_[word_index] |= (1LL << bit_index);
231
0
        } else {
232
0
            buffer_[word_index] &= ~(1LL << bit_index);
233
0
        }
234
0
    }
235
236
    /// Returns true if the bit at 'bit_index' is set.
237
0
    bool Get(int64_t bit_index) const {
238
0
        int64_t word_index = bit_index >> NUM_OFFSET_BITS;
239
0
        bit_index &= BIT_INDEX_MASK;
240
0
        DCHECK_LT(word_index, buffer_.size());
241
0
        return (buffer_[word_index] & (1LL << bit_index)) != 0;
242
0
    }
243
244
7
    void SetAllBits(bool b) { memset(&buffer_[0], 255 * b, buffer_.size() * sizeof(uint64_t)); }
245
246
0
    int64_t num_bits() const { return num_bits_; }
247
248
    /// If 'print_bits' prints 0/1 per bit, otherwise it prints the int64_t value.
249
    std::string DebugString(bool print_bits) const;
250
251
private:
252
    std::vector<uint64_t> buffer_;
253
    int64_t num_bits_;
254
255
    /// Used for bit shifting and masking for the word and offset calculation.
256
    static const int64_t NUM_OFFSET_BITS = 6;
257
    static const int64_t BIT_INDEX_MASK = 63;
258
};
259
260
} // namespace doris