Coverage Report

Created: 2025-04-30 15:20

/root/doris/be/src/util/bitmap_value.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <parallel_hashmap/btree.h>
21
#include <parallel_hashmap/phmap.h>
22
23
#include <algorithm>
24
#include <cstdarg>
25
#include <cstdint>
26
#include <cstdio>
27
#include <limits>
28
#include <map>
29
#include <memory>
30
#include <new>
31
#include <numeric>
32
#include <roaring/roaring.hh>
33
#include <set>
34
#include <stdexcept>
35
#include <string>
36
#include <utility>
37
38
#include "common/config.h"
39
#include "common/exception.h"
40
#include "common/logging.h"
41
#include "udf/udf.h"
42
#include "util/coding.h"
43
#include "vec/common/pod_array.h"
44
#include "vec/common/pod_array_fwd.h"
45
namespace doris {
46
47
// serialized bitmap := TypeCode(1), Payload
48
// The format of payload depends on value of TypeCode which is defined below
49
struct BitmapTypeCode {
50
    enum type {
51
        // An empty bitmap. Payload is 0 byte.
52
        // added in 0.11
53
        EMPTY = 0,
54
        // A bitmap containing only one element that is in [0, UINT32_MAX]
55
        // Payload := UInt32LittleEndian(4 byte)
56
        // added in 0.11
57
        SINGLE32 = 1,
58
        // A bitmap whose maximum element is in [0, UINT32_MAX]
59
        // Payload := the standard RoaringBitmap format described by
60
        // https://github.com/RoaringBitmap/RoaringFormatSpec/
61
        // added in 0.11
62
        BITMAP32 = 2,
63
        // A bitmap containing only one element that is in (UINT32_MAX, UINT64_MAX]
64
        // Payload := UInt64LittleEndian(8 byte)
65
        // added in 0.12
66
        SINGLE64 = 3,
67
        // A bitmap whose maximum element is in (UINT32_MAX, UINT64_MAX].
68
        //
69
        // To support 64-bits elements, all elements with the same high 32 bits are stored in a
70
        // RoaringBitmap containing only the lower 32 bits. Thus we could use
71
        // map<uint32_t, RoaringBitmap> to represent bitmap of 64-bits ints.
72
        //
73
        // Since there is no standard format for 64-bits RoaringBitmap, we define our own as below
74
        // Payload := NumRoaring(vint64), { MapKey, MapValue }^NumRoaring
75
        // - MapKey := the shared high 32 bits in UInt32LittleEndian(4 byte)
76
        // - MapValue := the standard RoaringBitmap format
77
        //
78
        // added in 0.12
79
        BITMAP64 = 4,
80
        SET = 5, // V1
81
        SET_V2 = 10,
82
        BITMAP32_V2 = 12,
83
        BITMAP64_V2 = 13,
84
        TYPE_MAX
85
    };
86
2
    Status static inline validate(int bitmap_type) {
87
2
        if (UNLIKELY(bitmap_type < type::EMPTY || bitmap_type >= type::TYPE_MAX)) {
88
2
            std::string err_msg =
89
2
                    fmt::format("BitmapTypeCode invalid, should between: {} and {} actrual is {}",
90
2
                                BitmapTypeCode::EMPTY, BitmapTypeCode::BITMAP64, bitmap_type);
91
2
            LOG(ERROR) << err_msg;
92
2
            return Status::Corruption(err_msg);
93
2
        }
94
0
        return Status::OK();
95
2
    }
96
};
97
98
namespace detail {
99
100
class Roaring64MapSetBitForwardIterator;
101
102
// Forked from https://github.com/RoaringBitmap/CRoaring/blob/v0.2.60/cpp/roaring64map.hh
103
// What we change includes
104
// - a custom serialization format is used inside read()/write()/getSizeInBytes()
105
// - added clear() and is32BitsEnough()
106
class Roaring64Map {
107
public:
108
    /**
109
     * Create an empty bitmap
110
     */
111
42.8k
    Roaring64Map() = default;
112
113
    /**
114
     * Construct a bitmap from a list of 32-bit integer values.
115
     */
116
4
    Roaring64Map(size_t n, const uint32_t* data) { addMany(n, data); }
117
118
    /**
119
     * Construct a bitmap from a list of 64-bit integer values.
120
     */
121
2
    Roaring64Map(size_t n, const uint64_t* data) { addMany(n, data); }
122
123
    /**
124
     * Construct a 64-bit map from a 32-bit one
125
     */
126
2
    explicit Roaring64Map(const roaring::Roaring& r) { emplaceOrInsert(0, r); }
127
128
18
    Roaring64Map(const Roaring64Map& r) = default;
129
130
22.1k
    Roaring64Map(Roaring64Map&& r) = default;
131
132
    /**
133
     * Assignment operator.
134
     */
135
2.06k
    Roaring64Map& operator=(const Roaring64Map& r) {
136
2.06k
        roarings = r.roarings;
137
2.06k
        return *this;
138
2.06k
    }
139
140
    /**
141
     * Add value x
142
     *
143
     */
144
6
    void add(uint32_t x) {
145
6
        roarings[0].add(x);
146
6
        roarings[0].setCopyOnWrite(copyOnWrite);
147
6
    }
148
9.46M
    void add(uint64_t x) {
149
9.46M
        roarings[highBytes(x)].add(lowBytes(x));
150
9.46M
        roarings[highBytes(x)].setCopyOnWrite(copyOnWrite);
151
9.46M
    }
152
153
    template <typename T>
154
18
    void addMany(size_t n_args, const T* vals) {
155
18
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
156
2
            auto& roaring = roarings[0];
157
2
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
158
2
            roaring.setCopyOnWrite(copyOnWrite);
159
2
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
160
0
            auto& roaring = roarings[0];
161
0
            std::vector<uint32_t> values(n_args);
162
16
            for (size_t i = 0; i != n_args; ++i) {
163
14
                values[i] = uint32_t(vals[i]);
164
14
            }
165
2
            roaring.addMany(n_args, values.data());
166
2
            roaring.setCopyOnWrite(copyOnWrite);
167
2
        } else {
168
0
            for (size_t lcv = 0; lcv < n_args; lcv++) {
169
0
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
170
0
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
171
0
            }
172
0
        }
173
18
    }
_ZN5doris6detail12Roaring64Map7addManyIjEEvmPKT_
Line
Count
Source
154
16
    void addMany(size_t n_args, const T* vals) {
155
16
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
156
16
            auto& roaring = roarings[0];
157
16
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
158
16
            roaring.setCopyOnWrite(copyOnWrite);
159
16
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
160
16
            auto& roaring = roarings[0];
161
16
            std::vector<uint32_t> values(n_args);
162
16
            for (size_t i = 0; i != n_args; ++i) {
163
16
                values[i] = uint32_t(vals[i]);
164
16
            }
165
16
            roaring.addMany(n_args, values.data());
166
16
            roaring.setCopyOnWrite(copyOnWrite);
167
16
        } else {
168
16
            for (size_t lcv = 0; lcv < n_args; lcv++) {
169
16
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
170
16
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
171
16
            }
172
16
        }
173
16
    }
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIiEEvmPKT_
_ZN5doris6detail12Roaring64Map7addManyItEEvmPKT_
Line
Count
Source
154
2
    void addMany(size_t n_args, const T* vals) {
155
2
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
156
2
            auto& roaring = roarings[0];
157
2
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
158
2
            roaring.setCopyOnWrite(copyOnWrite);
159
2
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
160
2
            auto& roaring = roarings[0];
161
2
            std::vector<uint32_t> values(n_args);
162
16
            for (size_t i = 0; i != n_args; ++i) {
163
14
                values[i] = uint32_t(vals[i]);
164
14
            }
165
2
            roaring.addMany(n_args, values.data());
166
2
            roaring.setCopyOnWrite(copyOnWrite);
167
2
        } else {
168
2
            for (size_t lcv = 0; lcv < n_args; lcv++) {
169
2
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
170
2
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
171
2
            }
172
2
        }
173
2
    }
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIaEEvmPKT_
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIsEEvmPKT_
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIlEEvmPKT_
174
175
80
    void addMany(size_t n_args, const uint64_t* vals) {
176
2.51k
        for (size_t lcv = 0; lcv < n_args; lcv++) {
177
2.43k
            roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
178
2.43k
            roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
179
2.43k
        }
180
80
    }
181
182
    /**
183
     * Remove value x
184
     *
185
     */
186
8
    void remove(uint32_t x) { roarings[0].remove(x); }
187
46
    void remove(uint64_t x) {
188
46
        auto roaring_iter = roarings.find(highBytes(x));
189
46
        if (roaring_iter != roarings.cend()) {
190
46
            roaring_iter->second.remove(lowBytes(x));
191
46
        }
192
46
    }
193
    /**
194
     * Return the largest value (if not empty)
195
     *
196
     */
197
93.0k
    uint64_t maximum() const {
198
93.0k
        for (auto roaring_iter = roarings.crbegin(); roaring_iter != roarings.crend();
199
93.0k
             ++roaring_iter) {
200
93.0k
            if (!roaring_iter->second.isEmpty()) {
201
93.0k
                return uniteBytes(roaring_iter->first, roaring_iter->second.maximum());
202
93.0k
            }
203
93.0k
        }
204
        // we put std::numeric_limits<>::max/min in parenthesis
205
        // to avoid a clash with the Windows.h header under Windows
206
10
        return (std::numeric_limits<uint64_t>::min)();
207
93.0k
    }
208
209
    /**
210
     * Return the smallest value (if not empty)
211
     *
212
     */
213
14
    uint64_t minimum() const {
214
14
        for (auto roaring_iter = roarings.cbegin(); roaring_iter != roarings.cend();
215
14
             ++roaring_iter) {
216
12
            if (!roaring_iter->second.isEmpty()) {
217
12
                return uniteBytes(roaring_iter->first, roaring_iter->second.minimum());
218
12
            }
219
12
        }
220
        // we put std::numeric_limits<>::max/min in parenthesis
221
        // to avoid a clash with the Windows.h header under Windows
222
2
        return (std::numeric_limits<uint64_t>::max)();
223
14
    }
224
225
    /**
226
     * Check if value x is present
227
     */
228
22
    bool contains(uint32_t x) const {
229
22
        return roarings.count(0) == 0 ? false : roarings.at(0).contains(x);
230
22
    }
231
18.4k
    bool contains(uint64_t x) const {
232
18.4k
        return roarings.count(highBytes(x)) == 0 ? false
233
18.4k
                                                 : roarings.at(highBytes(x)).contains(lowBytes(x));
234
18.4k
    }
235
236
    /**
237
     * Compute the intersection between the current bitmap and the provided
238
     * bitmap,
239
     * writing the result in the current bitmap. The provided bitmap is not
240
     * modified.
241
     */
242
12
    Roaring64Map& operator&=(const Roaring64Map& r) {
243
22
        for (auto& map_entry : roarings) {
244
22
            if (r.roarings.count(map_entry.first) == 1) {
245
20
                map_entry.second &= r.roarings.at(map_entry.first);
246
20
            } else {
247
2
                map_entry.second = roaring::Roaring();
248
2
            }
249
22
        }
250
12
        return *this;
251
12
    }
252
253
    /**
254
     * Compute the difference between the current bitmap and the provided
255
     * bitmap,
256
     * writing the result in the current bitmap. The provided bitmap is not
257
     * modified.
258
     */
259
2
    Roaring64Map& operator-=(const Roaring64Map& r) {
260
8
        for (auto& map_entry : roarings) {
261
8
            if (r.roarings.count(map_entry.first) == 1) {
262
8
                map_entry.second -= r.roarings.at(map_entry.first);
263
8
            }
264
8
        }
265
2
        return *this;
266
2
    }
267
268
    /**
269
     * Compute the union between the current bitmap and the provided bitmap,
270
     * writing the result in the current bitmap. The provided bitmap is not
271
     * modified.
272
     *
273
     * See also the fastunion function to aggregate many bitmaps more quickly.
274
     */
275
24
    Roaring64Map& operator|=(const Roaring64Map& r) {
276
44
        for (const auto& map_entry : r.roarings) {
277
44
            if (roarings.count(map_entry.first) == 0) {
278
20
                roarings[map_entry.first] = map_entry.second;
279
20
                roarings[map_entry.first].setCopyOnWrite(copyOnWrite);
280
24
            } else {
281
24
                roarings[map_entry.first] |= map_entry.second;
282
24
            }
283
44
        }
284
24
        return *this;
285
24
    }
286
287
    /**
288
     * Compute the symmetric union between the current bitmap and the provided
289
     * bitmap,
290
     * writing the result in the current bitmap. The provided bitmap is not
291
     * modified.
292
     */
293
8
    Roaring64Map& operator^=(const Roaring64Map& r) {
294
14
        for (const auto& map_entry : r.roarings) {
295
14
            if (roarings.count(map_entry.first) == 0) {
296
0
                roarings[map_entry.first] = map_entry.second;
297
0
                roarings[map_entry.first].setCopyOnWrite(copyOnWrite);
298
14
            } else {
299
14
                roarings[map_entry.first] ^= map_entry.second;
300
14
            }
301
14
        }
302
8
        return *this;
303
8
    }
304
305
    /**
306
     * Exchange the content of this bitmap with another.
307
     */
308
2
    void swap(Roaring64Map& r) { roarings.swap(r.roarings); }
309
310
    /**
311
     * Get the cardinality of the bitmap (number of elements).
312
     * Throws std::length_error in the special case where the bitmap is full
313
     * (cardinality() == 2^64). Check isFull() before calling to avoid
314
     * exception.
315
     */
316
492
    uint64_t cardinality() const {
317
492
        if (isFull()) {
318
0
            throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
319
0
                                   "bitmap is full, cardinality is 2^64, "
320
0
                                   "unable to represent in a 64-bit integer");
321
0
        }
322
492
        return std::accumulate(roarings.cbegin(), roarings.cend(), (uint64_t)0,
323
492
                               [](uint64_t previous,
324
836
                                  const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
325
836
                                   return previous + map_entry.second.cardinality();
326
836
                               });
327
492
    }
328
    /**
329
     * Computes the size of the intersection between two bitmaps.
330
     *
331
     */
332
12
    uint64_t andCardinality(const Roaring64Map& r) const {
333
12
        uint64_t card = 0;
334
20
        for (const auto& map_entry : roarings) {
335
20
            if (r.roarings.count(map_entry.first) == 1) {
336
18
                card += map_entry.second.and_cardinality(r.roarings.at(map_entry.first));
337
18
            }
338
20
        }
339
12
        return card;
340
12
    }
341
342
    /**
343
     * Computes the size of the union between two bitmaps.
344
     *
345
     */
346
8
    uint64_t orCardinality(const Roaring64Map& r) const {
347
8
        uint64_t card = 0;
348
8
        for (const auto& map_entry : roarings) {
349
8
            if (r.roarings.count(map_entry.first) == 0) {
350
0
                card += map_entry.second.cardinality();
351
0
            }
352
8
        }
353
8
        for (const auto& map_entry : r.roarings) {
354
8
            if (roarings.count(map_entry.first) == 0) {
355
0
                card += map_entry.second.cardinality();
356
8
            } else {
357
8
                card += roarings.at(map_entry.first).or_cardinality(map_entry.second);
358
8
            }
359
8
        }
360
8
        return card;
361
8
    }
362
363
    /**
364
     * Computes the size of the difference (andnot) between two bitmaps.
365
     * r1.cardinality - (r1 & r2).cardinality
366
     */
367
2
    uint64_t andnotCardinality(const Roaring64Map& r) const {
368
2
        uint64_t card = 0;
369
8
        for (const auto& map_entry : roarings) {
370
8
            card += map_entry.second.cardinality();
371
8
            if (r.roarings.count(map_entry.first) == 1) {
372
8
                card -= r.roarings.at(map_entry.first).and_cardinality(map_entry.second);
373
8
            }
374
8
        }
375
2
        return card;
376
2
    }
377
378
    /**
379
     * Computes the size of the symmetric difference (andnot) between two
380
     * bitmaps.
381
     * r1.cardinality + r2.cardinality - 2 * (r1 & r2).cardinality
382
     */
383
2
    uint64_t xorCardinality(const Roaring64Map& r) const {
384
2
        uint64_t card = 0;
385
2
        for (const auto& map_entry : roarings) {
386
2
            card += map_entry.second.cardinality();
387
2
        }
388
2
        for (const auto& map_entry : r.roarings) {
389
2
            card += map_entry.second.cardinality();
390
2
            if (roarings.count(map_entry.first) == 1) {
391
2
                card -= 2 * roarings.at(map_entry.first).and_cardinality(map_entry.second);
392
2
            }
393
2
        }
394
2
        return card;
395
2
    }
396
397
    /**
398
    * Returns true if the bitmap is empty (cardinality is zero).
399
    */
400
2
    bool isEmpty() const {
401
2
        return std::all_of(roarings.cbegin(), roarings.cend(),
402
2
                           [](const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
403
0
                               return map_entry.second.isEmpty();
404
0
                           });
405
2
    }
406
407
    /**
408
    * Returns true if the bitmap is full (cardinality is max uint64_t + 1).
409
    */
410
492
    bool isFull() const {
411
        // only bother to check if map is fully saturated
412
        //
413
        // we put std::numeric_limits<>::max/min in parenthesis
414
        // to avoid a clash with the Windows.h header under Windows
415
492
        return roarings.size() == ((uint64_t)(std::numeric_limits<uint32_t>::max)()) + 1
416
492
                       ? std::all_of(roarings.cbegin(), roarings.cend(),
417
0
                                     [](const std::pair<const uint32_t, roaring::Roaring>&
418
0
                                                roaring_map_entry) {
419
                                         // roarings within map are saturated if cardinality
420
                                         // is uint32_t max + 1
421
0
                                         return roaring_map_entry.second.cardinality() ==
422
0
                                                ((uint64_t)(std::numeric_limits<uint32_t>::max)()) +
423
0
                                                        1;
424
0
                                     })
425
492
                       : false;
426
492
    }
427
428
    /**
429
     * Return true if the two bitmaps contain the same elements.
430
     */
431
28
    bool operator==(const Roaring64Map& r) const {
432
        // we cannot use operator == on the map because either side may contain
433
        // empty Roaring Bitmaps
434
28
        auto lhs_iter = roarings.cbegin();
435
28
        auto lhs_cend = roarings.cend();
436
28
        auto rhs_iter = r.roarings.cbegin();
437
28
        auto rhs_cend = r.roarings.cend();
438
54
        while (lhs_iter != lhs_cend && rhs_iter != rhs_cend) {
439
26
            auto lhs_key = lhs_iter->first;
440
26
            auto rhs_key = rhs_iter->first;
441
26
            const auto& lhs_map = lhs_iter->second;
442
26
            const auto& rhs_map = rhs_iter->second;
443
26
            if (lhs_map.isEmpty()) {
444
2
                ++lhs_iter;
445
2
                continue;
446
2
            }
447
24
            if (rhs_map.isEmpty()) {
448
4
                ++rhs_iter;
449
4
                continue;
450
4
            }
451
20
            if (!(lhs_key == rhs_key)) {
452
0
                return false;
453
0
            }
454
20
            if (!(lhs_map == rhs_map)) {
455
0
                return false;
456
0
            }
457
20
            ++lhs_iter;
458
20
            ++rhs_iter;
459
20
        }
460
32
        while (lhs_iter != lhs_cend) {
461
8
            if (!lhs_iter->second.isEmpty()) {
462
4
                return false;
463
4
            }
464
4
            ++lhs_iter;
465
4
        }
466
28
        while (rhs_iter != rhs_cend) {
467
6
            if (!rhs_iter->second.isEmpty()) {
468
2
                return false;
469
2
            }
470
4
            ++rhs_iter;
471
4
        }
472
22
        return true;
473
24
    }
474
475
    /** convert array and bitmap containers to run containers when it is more
476
     * efficient;
477
     * also convert from run containers when more space efficient.  Returns
478
     * true if the result has at least one run container.
479
     * Additional savings might be possible by calling shrinkToFit().
480
     */
481
54.6k
    bool runOptimize() {
482
54.6k
        return std::accumulate(
483
54.6k
                roarings.begin(), roarings.end(), true,
484
54.6k
                [](bool previous, std::pair<const uint32_t, roaring::Roaring>& map_entry) {
485
54.6k
                    return map_entry.second.runOptimize() && previous;
486
54.6k
                });
487
54.6k
    }
488
489
    /**
490
     * If needed, reallocate memory to shrink the memory usage. Returns
491
     * the number of bytes saved.
492
    */
493
54.6k
    size_t shrinkToFit() {
494
54.6k
        size_t savedBytes = 0;
495
54.6k
        auto iter = roarings.begin();
496
109k
        while (iter != roarings.cend()) {
497
54.6k
            if (iter->second.isEmpty()) {
498
                // empty Roarings are 84 bytes
499
2
                savedBytes += 88;
500
2
                iter = roarings.erase(iter);
501
54.6k
            } else {
502
54.6k
                savedBytes += iter->second.shrinkToFit();
503
54.6k
                iter++;
504
54.6k
            }
505
54.6k
        }
506
54.6k
        return savedBytes;
507
54.6k
    }
508
509
    /**
510
     * Iterate over the bitmap elements. The function iterator is called once
511
     * for all the values with ptr (can be nullptr) as the second parameter of each
512
     * call.
513
     *
514
     * roaring_iterator is simply a pointer to a function that returns bool
515
     * (true means that the iteration should continue while false means that it
516
     * should stop), and takes (uint32_t,void*) as inputs.
517
     */
518
17.4k
    void iterate(roaring::api::roaring_iterator64 iterator, void* ptr) const {
519
17.5k
        for (const auto& map_entry : roarings) {
520
17.5k
            bool should_continue = roaring_iterate64(&map_entry.second.roaring, iterator,
521
17.5k
                                                     uint64_t(map_entry.first) << 32, ptr);
522
17.5k
            if (!should_continue) {
523
2
                break;
524
2
            }
525
17.5k
        }
526
17.4k
    }
527
528
    /**
529
     * write a bitmap to a char buffer.
530
     * Returns how many bytes were written which should be getSizeInBytes().
531
     */
532
38.3k
    size_t write(char* buf, int serialize_version) const {
533
38.3k
        bool is_v1 = serialize_version == 1;
534
38.3k
        BitmapTypeCode::type type_bitmap32 =
535
38.3k
                is_v1 ? BitmapTypeCode::type::BITMAP32 : BitmapTypeCode::type::BITMAP32_V2;
536
38.3k
        BitmapTypeCode::type type_bitmap64 =
537
38.3k
                is_v1 ? BitmapTypeCode::type::BITMAP64 : BitmapTypeCode::type::BITMAP64_V2;
538
539
38.3k
        if (is32BitsEnough()) {
540
38.3k
            *(buf++) = type_bitmap32;
541
38.3k
            auto it = roarings.find(0);
542
38.3k
            if (it == roarings.end()) { // empty bitmap
543
4
                roaring::Roaring r;
544
4
                return r.write(buf, is_v1) + 1;
545
4
            }
546
38.3k
            return it->second.write(buf, is_v1) + 1;
547
38.3k
        }
548
549
14
        const char* orig = buf;
550
        // put type code
551
14
        *(buf++) = type_bitmap64;
552
        // push map size
553
14
        buf = (char*)encode_varint64((uint8_t*)buf, roarings.size());
554
14
        std::for_each(roarings.cbegin(), roarings.cend(),
555
32
                      [&buf, is_v1](const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
556
                          // push map key
557
32
                          encode_fixed32_le((uint8_t*)buf, map_entry.first);
558
32
                          buf += sizeof(uint32_t);
559
                          // push map value Roaring
560
32
                          buf += map_entry.second.write(buf, is_v1);
561
32
                      });
562
14
        return buf - orig;
563
38.3k
    }
564
565
    /**
566
     * read a bitmap from a serialized version.
567
     *
568
     * This function is unsafe in the sense that if you provide bad data,
569
     * many bytes could be read, possibly causing a buffer overflow. See also readSafe.
570
     */
571
22.1k
    static Roaring64Map read(const char* buf) {
572
22.1k
        Roaring64Map result;
573
574
22.1k
        bool is_v1 = BitmapTypeCode::BITMAP32 == *buf || BitmapTypeCode::BITMAP64 == *buf;
575
22.1k
        bool is_bitmap32 = BitmapTypeCode::BITMAP32 == *buf || BitmapTypeCode::BITMAP32_V2 == *buf;
576
22.1k
        bool is_bitmap64 = BitmapTypeCode::BITMAP64 == *buf || BitmapTypeCode::BITMAP64_V2 == *buf;
577
22.1k
        if (is_bitmap32) {
578
22.1k
            roaring::Roaring read = roaring::Roaring::read(buf + 1, is_v1);
579
22.1k
            result.emplaceOrInsert(0, std::move(read));
580
22.1k
            return result;
581
22.1k
        }
582
583
16
        DCHECK(is_bitmap64);
584
16
        buf++;
585
586
        // get map size (varint64 took 1~10 bytes)
587
16
        uint64_t map_size;
588
16
        buf = reinterpret_cast<const char*>(
589
16
                decode_varint64_ptr(reinterpret_cast<const uint8_t*>(buf),
590
16
                                    reinterpret_cast<const uint8_t*>(buf + 10), &map_size));
591
16
        DCHECK(buf != nullptr);
592
52
        for (uint64_t lcv = 0; lcv < map_size; lcv++) {
593
            // get map key
594
36
            uint32_t key = decode_fixed32_le(reinterpret_cast<const uint8_t*>(buf));
595
36
            buf += sizeof(uint32_t);
596
            // read map value Roaring
597
36
            roaring::Roaring read_var = roaring::Roaring::read(buf, is_v1);
598
            // forward buffer past the last Roaring Bitmap
599
36
            buf += read_var.getSizeInBytes(is_v1);
600
36
            result.emplaceOrInsert(key, std::move(read_var));
601
36
        }
602
16
        return result;
603
22.1k
    }
604
605
    /**
606
     * How many bytes are required to serialize this bitmap
607
     */
608
54.6k
    size_t getSizeInBytes(int serialize_version) const {
609
54.6k
        bool is_v1 = serialize_version == 1;
610
54.6k
        if (is32BitsEnough()) {
611
54.6k
            auto it = roarings.find(0);
612
54.6k
            if (it == roarings.end()) { // empty bitmap
613
4
                roaring::Roaring r;
614
4
                return r.getSizeInBytes(is_v1) + 1;
615
4
            }
616
54.6k
            return it->second.getSizeInBytes(is_v1) + 1;
617
54.6k
        }
618
        // start with type code, map size and size of keys for each map entry
619
26
        size_t init = 1 + varint_length(roarings.size()) + roarings.size() * sizeof(uint32_t);
620
26
        return std::accumulate(
621
26
                roarings.cbegin(), roarings.cend(), init,
622
56
                [=](size_t previous, const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
623
                    // add in bytes used by each Roaring
624
56
                    return previous + map_entry.second.getSizeInBytes(is_v1);
625
56
                });
626
54.6k
    }
627
628
    /**
629
     * remove all elements
630
     */
631
0
    void clear() { roarings.clear(); }
632
633
    /**
634
     * Return whether all elements can be represented in 32 bits
635
     */
636
93.0k
    bool is32BitsEnough() const { return maximum() <= std::numeric_limits<uint32_t>::max(); }
637
638
    /**
639
     * Computes the intersection between two bitmaps and returns new bitmap.
640
     * The current bitmap and the provided bitmap are unchanged.
641
     */
642
4
    Roaring64Map operator&(const Roaring64Map& o) const { return Roaring64Map(*this) &= o; }
643
644
    /**
645
     * Computes the difference between two bitmaps and returns new bitmap.
646
     * The current bitmap and the provided bitmap are unchanged.
647
     */
648
0
    Roaring64Map operator-(const Roaring64Map& o) const { return Roaring64Map(*this) -= o; }
649
650
    /**
651
     * Computes the union between two bitmaps and returns new bitmap.
652
     * The current bitmap and the provided bitmap are unchanged.
653
     */
654
2
    Roaring64Map operator|(const Roaring64Map& o) const { return Roaring64Map(*this) |= o; }
655
656
    /**
657
     * Computes the symmetric union between two bitmaps and returns new bitmap.
658
     * The current bitmap and the provided bitmap are unchanged.
659
     */
660
2
    Roaring64Map operator^(const Roaring64Map& o) const { return Roaring64Map(*this) ^= o; }
661
662
    /**
663
     * computes the logical or (union) between "n" bitmaps (referenced by a
664
     * pointer).
665
     */
666
10
    static Roaring64Map fastunion(size_t n, const Roaring64Map** inputs) {
667
10
        Roaring64Map ans;
668
        // not particularly fast
669
24
        for (size_t lcv = 0; lcv < n; ++lcv) {
670
14
            ans |= *(inputs[lcv]);
671
14
        }
672
10
        return ans;
673
10
    }
674
675
    friend class Roaring64MapSetBitForwardIterator;
676
    using const_iterator = Roaring64MapSetBitForwardIterator;
677
678
    /**
679
    * Returns an iterator that can be used to access the position of the
680
    * set bits. The running time complexity of a full scan is proportional to
681
    * the
682
    * number
683
    * of set bits: be aware that if you have long strings of 1s, this can be
684
    * very inefficient.
685
    *
686
    * It can be much faster to use the toArray method if you want to
687
    * retrieve the set bits.
688
    */
689
    const_iterator begin() const;
690
691
    /**
692
    * A bogus iterator that can be used together with begin()
693
    * for constructions such as for(auto i = b.begin();
694
    * i!=b.end(); ++i) {}
695
    */
696
    const_iterator end() const;
697
698
private:
699
    phmap::btree_map<uint32_t, roaring::Roaring> roarings {};
700
    bool copyOnWrite {false};
701
18.9M
    static uint32_t highBytes(const uint64_t in) { return uint32_t(in >> 32); }
702
9.48M
    static uint32_t lowBytes(const uint64_t in) { return uint32_t(in); }
703
98.9k
    static uint64_t uniteBytes(const uint32_t highBytes, const uint32_t lowBytes) {
704
98.9k
        return (uint64_t(highBytes) << 32) | uint64_t(lowBytes);
705
98.9k
    }
706
2
    void emplaceOrInsert(const uint32_t key, const roaring::Roaring& value) {
707
2
        roarings.emplace(std::make_pair(key, value));
708
2
    }
709
710
22.2k
    void emplaceOrInsert(const uint32_t key, roaring::Roaring&& value) {
711
22.2k
        roarings.emplace(key, value);
712
22.2k
    }
713
};
714
715
// Forked from https://github.com/RoaringBitmap/CRoaring/blob/v0.4.0/cpp/roaring64map.hh
716
// Used to go through the set bits. Not optimally fast, but convenient.
717
class Roaring64MapSetBitForwardIterator {
718
public:
719
    using type_of_iterator = Roaring64MapSetBitForwardIterator;
720
721
    /**
722
     * Provides the location of the set bit.
723
     */
724
5.92k
    uint64_t operator*() const {
725
5.92k
        return Roaring64Map::uniteBytes(map_iter->first, i.current_value);
726
5.92k
    }
727
728
1.95k
    type_of_iterator& operator++() { // ++i, must returned inc. value
729
1.95k
        if (i.has_value) {
730
1.95k
            roaring_advance_uint32_iterator(&i);
731
1.95k
        }
732
2.00k
        while (!i.has_value) {
733
68
            map_iter++;
734
68
            if (map_iter == map_end) return *this;
735
50
            roaring_init_iterator(&map_iter->second.roaring, &i);
736
50
        }
737
1.93k
        return *this;
738
1.95k
    }
739
740
3.61k
    type_of_iterator operator++(int) { // i++, must return orig. value
741
3.61k
        Roaring64MapSetBitForwardIterator orig(*this);
742
3.61k
        roaring_advance_uint32_iterator(&i);
743
3.62k
        while (!i.has_value) {
744
8
            map_iter++;
745
8
            if (map_iter == map_end) return orig;
746
4
            roaring_init_iterator(&map_iter->second.roaring, &i);
747
4
        }
748
3.61k
        return orig;
749
3.61k
    }
750
751
10
    bool move(const uint64_t& x) {
752
10
        map_iter = p.lower_bound(Roaring64Map::highBytes(x));
753
10
        if (map_iter != p.cend()) {
754
10
            roaring_init_iterator(&map_iter->second.roaring, &i);
755
10
            if (map_iter->first == Roaring64Map::highBytes(x)) {
756
10
                if (roaring_move_uint32_iterator_equalorlarger(&i, Roaring64Map::lowBytes(x)))
757
8
                    return true;
758
2
                map_iter++;
759
2
                if (map_iter == map_end) return false;
760
0
                roaring_init_iterator(&map_iter->second.roaring, &i);
761
0
            }
762
0
            return true;
763
10
        }
764
0
        return false;
765
10
    }
766
767
1.05k
    bool operator==(const Roaring64MapSetBitForwardIterator& o) const {
768
1.05k
        if (map_iter == map_end && o.map_iter == o.map_end) return true;
769
1.03k
        if (o.map_iter == o.map_end) return false;
770
2
        return **this == *o;
771
1.03k
    }
772
773
4.60k
    bool operator!=(const Roaring64MapSetBitForwardIterator& o) const {
774
4.60k
        if (map_iter == map_end && o.map_iter == o.map_end) return false;
775
4.59k
        if (o.map_iter == o.map_end) return true;
776
0
        return **this != *o;
777
4.59k
    }
778
10
    Roaring64MapSetBitForwardIterator& operator=(const Roaring64MapSetBitForwardIterator& r) {
779
10
        map_iter = r.map_iter;
780
10
        map_end = r.map_end;
781
10
        i = r.i;
782
10
        return *this;
783
10
    }
784
785
    Roaring64MapSetBitForwardIterator(const Roaring64MapSetBitForwardIterator& r) = default;
786
787
    Roaring64MapSetBitForwardIterator(const Roaring64Map& parent, bool exhausted = false)
788
4.59k
            : p(parent.roarings), map_end(parent.roarings.cend()) {
789
4.59k
        if (exhausted || parent.roarings.empty()) {
790
4.55k
            map_iter = parent.roarings.cend();
791
4.55k
        } else {
792
42
            map_iter = parent.roarings.cbegin();
793
42
            roaring_init_iterator(&map_iter->second.roaring, &i);
794
42
            while (!i.has_value) {
795
0
                map_iter++;
796
0
                if (map_iter == map_end) return;
797
0
                roaring_init_iterator(&map_iter->second.roaring, &i);
798
0
            }
799
42
        }
800
4.59k
    }
801
802
protected:
803
    const phmap::btree_map<uint32_t, roaring::Roaring>& p;
804
    phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator map_iter {};
805
    phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator map_end {};
806
    roaring::api::roaring_uint32_iterator_t i {};
807
};
808
809
36
inline Roaring64MapSetBitForwardIterator Roaring64Map::begin() const {
810
36
    return {*this};
811
36
}
812
813
4.53k
inline Roaring64MapSetBitForwardIterator Roaring64Map::end() const {
814
4.53k
    return {*this, true};
815
4.53k
}
816
817
} // namespace detail
818
819
// Represent the in-memory and on-disk structure of Doris's BITMAP data type.
820
// Optimize for the case where the bitmap contains 0 or 1 element which is common
821
// for streaming load scenario.
822
class BitmapValueIterator;
823
class BitmapValue {
824
public:
825
    template <typename T>
826
    using SetContainer = phmap::flat_hash_set<T>;
827
828
    // Construct an empty bitmap.
829
200k
    BitmapValue() : _sv(0), _bitmap(nullptr), _type(EMPTY), _is_shared(false) { _set.clear(); }
830
831
    // Construct a bitmap with one element.
832
    explicit BitmapValue(uint64_t value)
833
116
            : _sv(value), _bitmap(nullptr), _type(SINGLE), _is_shared(false) {}
834
835
    // Construct a bitmap from serialized data.
836
6.16k
    explicit BitmapValue(const char* src) : _is_shared(false) {
837
6.16k
        bool res = deserialize(src);
838
6.16k
        DCHECK(res);
839
6.16k
    }
840
841
41.0k
    BitmapValue(const BitmapValue& other) {
842
41.0k
        _type = other._type;
843
41.0k
        switch (other._type) {
844
360
        case EMPTY:
845
360
            break;
846
286
        case SINGLE:
847
286
            _sv = other._sv;
848
286
            break;
849
35.0k
        case BITMAP:
850
35.0k
            _bitmap = other._bitmap;
851
35.0k
            break;
852
5.35k
        case SET:
853
5.35k
            _set = other._set;
854
5.35k
            break;
855
41.0k
        }
856
857
41.0k
        if (other._type == BITMAP) {
858
35.0k
            _is_shared = true;
859
            // should also set other's state to shared, so that other bitmap value will
860
            // create a new bitmap when it wants to modify it.
861
35.0k
            const_cast<BitmapValue&>(other)._is_shared = true;
862
35.0k
        }
863
41.0k
    }
864
865
50.5k
    BitmapValue(BitmapValue&& other) noexcept {
866
50.5k
        _type = other._type;
867
50.5k
        switch (other._type) {
868
136
        case EMPTY:
869
136
            break;
870
434
        case SINGLE:
871
434
            _sv = other._sv;
872
434
            break;
873
42.1k
        case BITMAP:
874
42.1k
            _bitmap = std::move(other._bitmap);
875
42.1k
            other._bitmap = nullptr;
876
42.1k
            break;
877
7.79k
        case SET:
878
7.79k
            _set = std::move(other._set);
879
7.79k
            break;
880
50.5k
        }
881
50.5k
        _is_shared = other._is_shared;
882
50.5k
        other._type = EMPTY;
883
50.5k
        other._is_shared = false;
884
50.5k
    }
885
886
214
    BitmapValue& operator=(const BitmapValue& other) {
887
214
        if (this == &other) {
888
10
            return *this;
889
10
        }
890
204
        reset();
891
204
        _type = other._type;
892
204
        switch (other._type) {
893
40
        case EMPTY:
894
40
            break;
895
58
        case SINGLE:
896
58
            _sv = other._sv;
897
58
            break;
898
52
        case BITMAP:
899
52
            _bitmap = other._bitmap;
900
52
            break;
901
54
        case SET:
902
54
            _set = other._set;
903
54
            break;
904
204
        }
905
906
204
        if (other._type == BITMAP) {
907
52
            _is_shared = true;
908
            // should also set other's state to shared, so that other bitmap value will
909
            // create a new bitmap when it wants to modify it.
910
52
            const_cast<BitmapValue&>(other)._is_shared = true;
911
52
        }
912
204
        return *this;
913
204
    }
914
915
44
    static BitmapValue empty_bitmap() { return BitmapValue {}; }
916
917
36
    BitmapValue& operator=(BitmapValue&& other) noexcept {
918
36
        if (this == &other) {
919
0
            return *this;
920
0
        }
921
36
        reset();
922
923
36
        _type = other._type;
924
36
        switch (other._type) {
925
0
        case EMPTY:
926
0
            break;
927
24
        case SINGLE:
928
24
            _sv = other._sv;
929
24
            break;
930
8
        case BITMAP:
931
8
            _bitmap = std::move(other._bitmap);
932
8
            other._bitmap = nullptr;
933
8
            break;
934
4
        case SET:
935
4
            _set = std::move(other._set);
936
4
            break;
937
36
        }
938
36
        _is_shared = other._is_shared;
939
36
        return *this;
940
36
    }
941
942
    // Construct a bitmap from given elements.
943
118
    explicit BitmapValue(const std::vector<uint64_t>& bits) : _is_shared(false) {
944
118
        if (bits.size() == 0) {
945
0
            _type = EMPTY;
946
0
            return;
947
0
        }
948
949
118
        if (bits.size() == 1) {
950
2
            _type = SINGLE;
951
2
            _sv = bits[0];
952
2
            return;
953
2
        }
954
955
116
        if (!config::enable_set_in_bitmap_value || bits.size() > SET_TYPE_THRESHOLD) {
956
52
            _type = BITMAP;
957
52
            _prepare_bitmap_for_write();
958
52
            _bitmap->addMany(bits.size(), &bits[0]);
959
64
        } else {
960
64
            _type = SET;
961
252
            for (auto v : bits) {
962
252
                _set.insert(v);
963
252
            }
964
64
        }
965
116
    }
966
967
62
    BitmapTypeCode::type get_type_code() const {
968
62
        switch (_type) {
969
2
        case EMPTY:
970
2
            return BitmapTypeCode::EMPTY;
971
14
        case SINGLE:
972
14
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
973
8
                return BitmapTypeCode::SINGLE32;
974
8
            } else {
975
6
                return BitmapTypeCode::SINGLE64;
976
6
            }
977
18
        case SET:
978
18
            return BitmapTypeCode::SET;
979
28
        case BITMAP:
980
28
            bool is_v1 = (config::bitmap_serialize_version == 1);
981
28
            if (_bitmap->is32BitsEnough()) {
982
18
                return is_v1 ? BitmapTypeCode::type::BITMAP32 : BitmapTypeCode::type::BITMAP32_V2;
983
18
            } else {
984
10
                return is_v1 ? BitmapTypeCode::type::BITMAP64 : BitmapTypeCode::type::BITMAP64_V2;
985
10
            }
986
62
        }
987
0
        __builtin_unreachable();
988
62
    }
989
990
    template <typename T>
991
44
    void add_many(const T* values, const size_t count) {
992
44
        switch (_type) {
993
34
        case EMPTY:
994
34
            if (count == 1) {
995
0
                _sv = values[0];
996
0
                _type = SINGLE;
997
34
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
998
540
                for (size_t i = 0; i != count; ++i) {
999
518
                    _set.insert(values[i]);
1000
518
                }
1001
22
                _type = SET;
1002
22
            } else {
1003
12
                _prepare_bitmap_for_write();
1004
12
                _bitmap->addMany(count, values);
1005
12
                _type = BITMAP;
1006
12
            }
1007
34
            break;
1008
0
        case SINGLE:
1009
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1010
0
                _set.insert(_sv);
1011
0
                for (size_t i = 0; i != count; ++i) {
1012
0
                    _set.insert(values[i]);
1013
0
                }
1014
0
                _type = SET;
1015
0
                _convert_to_bitmap_if_need();
1016
0
            } else {
1017
0
                _prepare_bitmap_for_write();
1018
0
                _bitmap->add(_sv);
1019
0
                _bitmap->addMany(count, values);
1020
0
                _type = BITMAP;
1021
0
            }
1022
0
            break;
1023
2
        case BITMAP:
1024
2
            _prepare_bitmap_for_write();
1025
2
            _bitmap->addMany(count, values);
1026
2
            break;
1027
8
        case SET:
1028
648
            for (size_t i = 0; i != count; ++i) {
1029
640
                _set.insert(values[i]);
1030
640
            }
1031
8
            _convert_to_bitmap_if_need();
1032
8
            break;
1033
44
        }
1034
44
    }
_ZN5doris11BitmapValue8add_manyIiEEvPKT_m
Line
Count
Source
991
10
    void add_many(const T* values, const size_t count) {
992
10
        switch (_type) {
993
10
        case EMPTY:
994
10
            if (count == 1) {
995
0
                _sv = values[0];
996
0
                _type = SINGLE;
997
10
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
998
156
                for (size_t i = 0; i != count; ++i) {
999
146
                    _set.insert(values[i]);
1000
146
                }
1001
10
                _type = SET;
1002
10
            } else {
1003
0
                _prepare_bitmap_for_write();
1004
0
                _bitmap->addMany(count, values);
1005
0
                _type = BITMAP;
1006
0
            }
1007
10
            break;
1008
0
        case SINGLE:
1009
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1010
0
                _set.insert(_sv);
1011
0
                for (size_t i = 0; i != count; ++i) {
1012
0
                    _set.insert(values[i]);
1013
0
                }
1014
0
                _type = SET;
1015
0
                _convert_to_bitmap_if_need();
1016
0
            } else {
1017
0
                _prepare_bitmap_for_write();
1018
0
                _bitmap->add(_sv);
1019
0
                _bitmap->addMany(count, values);
1020
0
                _type = BITMAP;
1021
0
            }
1022
0
            break;
1023
0
        case BITMAP:
1024
0
            _prepare_bitmap_for_write();
1025
0
            _bitmap->addMany(count, values);
1026
0
            break;
1027
0
        case SET:
1028
0
            for (size_t i = 0; i != count; ++i) {
1029
0
                _set.insert(values[i]);
1030
0
            }
1031
0
            _convert_to_bitmap_if_need();
1032
0
            break;
1033
10
        }
1034
10
    }
_ZN5doris11BitmapValue8add_manyImEEvPKT_m
Line
Count
Source
991
34
    void add_many(const T* values, const size_t count) {
992
34
        switch (_type) {
993
24
        case EMPTY:
994
24
            if (count == 1) {
995
0
                _sv = values[0];
996
0
                _type = SINGLE;
997
24
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
998
384
                for (size_t i = 0; i != count; ++i) {
999
372
                    _set.insert(values[i]);
1000
372
                }
1001
12
                _type = SET;
1002
12
            } else {
1003
12
                _prepare_bitmap_for_write();
1004
12
                _bitmap->addMany(count, values);
1005
12
                _type = BITMAP;
1006
12
            }
1007
24
            break;
1008
0
        case SINGLE:
1009
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1010
0
                _set.insert(_sv);
1011
0
                for (size_t i = 0; i != count; ++i) {
1012
0
                    _set.insert(values[i]);
1013
0
                }
1014
0
                _type = SET;
1015
0
                _convert_to_bitmap_if_need();
1016
0
            } else {
1017
0
                _prepare_bitmap_for_write();
1018
0
                _bitmap->add(_sv);
1019
0
                _bitmap->addMany(count, values);
1020
0
                _type = BITMAP;
1021
0
            }
1022
0
            break;
1023
2
        case BITMAP:
1024
2
            _prepare_bitmap_for_write();
1025
2
            _bitmap->addMany(count, values);
1026
2
            break;
1027
8
        case SET:
1028
648
            for (size_t i = 0; i != count; ++i) {
1029
640
                _set.insert(values[i]);
1030
640
            }
1031
8
            _convert_to_bitmap_if_need();
1032
8
            break;
1033
34
        }
1034
34
    }
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIaEEvPKT_m
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIsEEvPKT_m
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIlEEvPKT_m
1035
1036
9.47M
    void add(uint64_t value) {
1037
9.47M
        switch (_type) {
1038
23.1k
        case EMPTY:
1039
23.1k
            if (!config::enable_set_in_bitmap_value) {
1040
10.6k
                _sv = value;
1041
10.6k
                _type = SINGLE;
1042
12.5k
            } else {
1043
12.5k
                _set.insert(value);
1044
12.5k
                _type = SET;
1045
12.5k
            }
1046
23.1k
            break;
1047
10.5k
        case SINGLE:
1048
            //there is no need to convert the type if two variables are equal
1049
10.5k
            if (_sv == value) {
1050
4
                break;
1051
4
            }
1052
10.5k
            if (config::enable_set_in_bitmap_value) {
1053
8
                _set.insert(_sv);
1054
8
                _set.insert(value);
1055
8
                _type = SET;
1056
10.5k
            } else {
1057
10.5k
                _prepare_bitmap_for_write();
1058
10.5k
                _bitmap->add(_sv);
1059
10.5k
                _bitmap->add(value);
1060
10.5k
                _type = BITMAP;
1061
10.5k
            }
1062
10.5k
            break;
1063
9.17M
        case BITMAP:
1064
9.17M
            _prepare_bitmap_for_write();
1065
9.17M
            _bitmap->add(value);
1066
9.17M
            break;
1067
260k
        case SET:
1068
260k
            _set.insert(value);
1069
260k
            _convert_to_bitmap_if_need();
1070
260k
            break;
1071
9.47M
        }
1072
9.47M
    }
1073
1074
16
    void remove(uint64_t value) {
1075
16
        switch (_type) {
1076
2
        case EMPTY:
1077
2
            break;
1078
2
        case SINGLE:
1079
            //there is need to convert the type if two variables are equal
1080
2
            if (_sv == value) {
1081
0
                _type = EMPTY;
1082
0
            }
1083
2
            break;
1084
8
        case BITMAP:
1085
8
            _prepare_bitmap_for_write();
1086
8
            _bitmap->remove(value);
1087
8
            _convert_to_smaller_type();
1088
8
            break;
1089
4
        case SET:
1090
4
            _set.erase(value);
1091
4
            _convert_to_smaller_type();
1092
4
            break;
1093
16
        }
1094
16
    }
1095
1096
    // Compute the union between the current bitmap and the provided bitmap.
1097
32
    BitmapValue& operator-=(const BitmapValue& rhs) {
1098
32
        switch (rhs._type) {
1099
8
        case EMPTY:
1100
8
            break;
1101
8
        case SINGLE:
1102
8
            remove(rhs._sv);
1103
8
            break;
1104
8
        case BITMAP:
1105
8
            switch (_type) {
1106
2
            case EMPTY:
1107
2
                break;
1108
2
            case SINGLE:
1109
2
                if (rhs._bitmap->contains(_sv)) {
1110
0
                    _type = EMPTY;
1111
0
                }
1112
2
                break;
1113
2
            case BITMAP:
1114
2
                _prepare_bitmap_for_write();
1115
2
                *_bitmap -= *rhs._bitmap;
1116
2
                _convert_to_smaller_type();
1117
2
                break;
1118
2
            case SET: {
1119
64
                for (auto it = _set.begin(); it != _set.end();) {
1120
62
                    if (rhs.contains(*it)) {
1121
32
                        it = _set.erase(it);
1122
32
                    } else {
1123
30
                        ++it;
1124
30
                    }
1125
62
                }
1126
2
                _convert_to_smaller_type();
1127
2
                break;
1128
0
            }
1129
8
            }
1130
8
            break;
1131
8
        case SET: {
1132
8
            switch (_type) {
1133
2
            case EMPTY:
1134
2
                break;
1135
2
            case SINGLE:
1136
2
                if (rhs._set.contains(_sv)) {
1137
0
                    _type = EMPTY;
1138
0
                }
1139
2
                break;
1140
2
            case BITMAP:
1141
2
                _prepare_bitmap_for_write();
1142
62
                for (auto v : rhs._set) {
1143
62
                    if (_bitmap->contains(v)) {
1144
0
                        _bitmap->remove(v);
1145
0
                    }
1146
62
                }
1147
2
                _convert_to_smaller_type();
1148
2
                break;
1149
2
            case SET: {
1150
64
                for (auto it = _set.begin(); it != _set.end();) {
1151
62
                    if (rhs.contains(*it)) {
1152
28
                        it = _set.erase(it);
1153
34
                    } else {
1154
34
                        ++it;
1155
34
                    }
1156
62
                }
1157
2
                _convert_to_smaller_type();
1158
2
                break;
1159
0
            }
1160
8
            }
1161
8
        }
1162
32
        }
1163
32
        return *this;
1164
32
    }
1165
1166
    // Compute the union between the current bitmap and the provided bitmap.
1167
    // Possible type transitions are:
1168
    // EMPTY  -> SINGLE
1169
    // EMPTY  -> BITMAP
1170
    // SINGLE -> BITMAP
1171
118
    BitmapValue& operator|=(const BitmapValue& rhs) {
1172
118
        switch (rhs._type) {
1173
24
        case EMPTY:
1174
24
            break;
1175
50
        case SINGLE:
1176
50
            add(rhs._sv);
1177
50
            break;
1178
16
        case BITMAP:
1179
16
            switch (_type) {
1180
4
            case EMPTY:
1181
4
                _bitmap = rhs._bitmap;
1182
4
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1183
4
                _is_shared = true;
1184
4
                _type = BITMAP;
1185
4
                break;
1186
6
            case SINGLE:
1187
6
                _bitmap = rhs._bitmap;
1188
6
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1189
6
                _is_shared = true;
1190
6
                _prepare_bitmap_for_write();
1191
6
                _bitmap->add(_sv);
1192
6
                _type = BITMAP;
1193
6
                break;
1194
4
            case BITMAP:
1195
4
                _prepare_bitmap_for_write();
1196
4
                *_bitmap |= *rhs._bitmap;
1197
4
                break;
1198
2
            case SET: {
1199
2
                _prepare_bitmap_for_write();
1200
2
                *_bitmap = *rhs._bitmap;
1201
1202
62
                for (auto v : _set) {
1203
62
                    _bitmap->add(v);
1204
62
                }
1205
2
                _type = BITMAP;
1206
2
                break;
1207
0
            }
1208
16
            }
1209
16
            break;
1210
28
        case SET:
1211
28
            switch (_type) {
1212
2
            case EMPTY:
1213
2
                _set = rhs._set;
1214
2
                _type = SET;
1215
2
                break;
1216
2
            case SINGLE: {
1217
2
                if ((rhs._set.size() + rhs._set.contains(_sv) > SET_TYPE_THRESHOLD)) {
1218
0
                    _prepare_bitmap_for_write();
1219
0
                    _bitmap->add(_sv);
1220
0
                    for (auto v : rhs._set) {
1221
0
                        _bitmap->add(v);
1222
0
                    }
1223
0
                    _type = BITMAP;
1224
2
                } else {
1225
2
                    _set = rhs._set;
1226
2
                    _set.insert(_sv);
1227
2
                    _type = SET;
1228
2
                }
1229
2
                break;
1230
0
            }
1231
2
            case BITMAP:
1232
2
                _prepare_bitmap_for_write();
1233
62
                for (auto v : rhs._set) {
1234
62
                    _bitmap->add(v);
1235
62
                }
1236
2
                break;
1237
22
            case SET: {
1238
116
                for (auto v : rhs._set) {
1239
116
                    _set.insert(v);
1240
116
                }
1241
22
                _convert_to_bitmap_if_need();
1242
22
                break;
1243
0
            }
1244
28
            }
1245
28
            break;
1246
118
        }
1247
118
        return *this;
1248
118
    }
1249
1250
42
    BitmapValue& fastunion(const std::vector<const BitmapValue*>& values) {
1251
42
        std::vector<const detail::Roaring64Map*> bitmaps;
1252
42
        std::vector<uint64_t> single_values;
1253
42
        std::vector<const SetContainer<uint64_t>*> sets;
1254
3.46k
        for (const auto* value : values) {
1255
3.46k
            switch (value->_type) {
1256
10
            case EMPTY:
1257
10
                break;
1258
10
            case SINGLE:
1259
10
                single_values.push_back(value->_sv);
1260
10
                break;
1261
10
            case BITMAP:
1262
10
                bitmaps.push_back(value->_bitmap.get());
1263
10
                break;
1264
3.43k
            case SET:
1265
3.43k
                sets.push_back(&value->_set);
1266
3.43k
                break;
1267
3.46k
            }
1268
3.46k
        }
1269
1270
42
        if (!bitmaps.empty()) {
1271
10
            _prepare_bitmap_for_write();
1272
10
            switch (_type) {
1273
4
            case EMPTY:
1274
4
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1275
4
                break;
1276
4
            case SINGLE:
1277
4
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1278
4
                _bitmap->add(_sv);
1279
4
                break;
1280
2
            case BITMAP:
1281
2
                for (const auto* bitmap : bitmaps) {
1282
2
                    *_bitmap |= *bitmap;
1283
2
                }
1284
2
                break;
1285
0
            case SET: {
1286
0
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1287
0
                for (auto v : _set) {
1288
0
                    _bitmap->add(v);
1289
0
                }
1290
0
                _set.clear();
1291
0
                break;
1292
0
            }
1293
10
            }
1294
10
            _type = BITMAP;
1295
10
        }
1296
1297
42
        if (!sets.empty()) {
1298
3.43k
            for (auto& set : sets) {
1299
3.60k
                for (auto v : *set) {
1300
3.60k
                    _set.insert(v);
1301
3.60k
                }
1302
3.43k
            }
1303
20
            switch (_type) {
1304
18
            case EMPTY:
1305
18
                _type = SET;
1306
18
                break;
1307
0
            case SINGLE: {
1308
0
                _set.insert(_sv);
1309
0
                _type = SET;
1310
0
                break;
1311
0
            }
1312
0
            case BITMAP:
1313
0
                _prepare_bitmap_for_write();
1314
0
                for (auto v : _set) {
1315
0
                    _bitmap->add(v);
1316
0
                }
1317
0
                _type = BITMAP;
1318
0
                _set.clear();
1319
0
                break;
1320
2
            case SET: {
1321
2
                break;
1322
0
            }
1323
20
            }
1324
20
            if (_type == SET) {
1325
20
                _convert_to_bitmap_if_need();
1326
20
            }
1327
20
        }
1328
1329
42
        if (_type == EMPTY && single_values.size() == 1) {
1330
2
            if (config::enable_set_in_bitmap_value) {
1331
0
                _type = SET;
1332
0
                _set.insert(single_values[0]);
1333
2
            } else {
1334
2
                _sv = single_values[0];
1335
2
                _type = SINGLE;
1336
2
            }
1337
40
        } else if (!single_values.empty()) {
1338
8
            switch (_type) {
1339
0
            case EMPTY:
1340
2
            case SINGLE:
1341
2
                if (config::enable_set_in_bitmap_value) {
1342
0
                    _set.insert(single_values.cbegin(), single_values.cend());
1343
0
                    if (_type == SINGLE) {
1344
0
                        _set.insert(_sv);
1345
0
                    }
1346
0
                    _type = SET;
1347
0
                    _convert_to_bitmap_if_need();
1348
2
                } else {
1349
2
                    _prepare_bitmap_for_write();
1350
2
                    _bitmap->addMany(single_values.size(), single_values.data());
1351
2
                    if (_type == SINGLE) {
1352
2
                        _bitmap->add(_sv);
1353
2
                    }
1354
2
                    _type = BITMAP;
1355
2
                    _convert_to_smaller_type();
1356
2
                }
1357
2
                break;
1358
6
            case BITMAP: {
1359
6
                _prepare_bitmap_for_write();
1360
6
                _bitmap->addMany(single_values.size(), single_values.data());
1361
6
                break;
1362
0
            }
1363
0
            case SET:
1364
0
                _set.insert(single_values.cbegin(), single_values.cend());
1365
0
                _convert_to_bitmap_if_need();
1366
0
                break;
1367
8
            }
1368
8
        }
1369
1370
42
        return *this;
1371
42
    }
1372
1373
    // Compute the intersection between the current bitmap and the provided bitmap.
1374
    // Possible type transitions are:
1375
    // SINGLE -> EMPTY
1376
    // BITMAP -> EMPTY
1377
    // BITMAP -> SINGLE
1378
102
    BitmapValue& operator&=(const BitmapValue& rhs) {
1379
102
        switch (rhs._type) {
1380
28
        case EMPTY:
1381
28
            reset(); // empty & any = empty
1382
28
            break;
1383
20
        case SINGLE:
1384
20
            switch (_type) {
1385
4
            case EMPTY:
1386
4
                break;
1387
6
            case SINGLE:
1388
6
                if (_sv != rhs._sv) {
1389
4
                    _type = EMPTY;
1390
4
                }
1391
6
                break;
1392
8
            case BITMAP:
1393
8
                if (!_bitmap->contains(rhs._sv)) {
1394
2
                    _type = EMPTY;
1395
6
                } else {
1396
6
                    _type = SINGLE;
1397
6
                    _sv = rhs._sv;
1398
6
                }
1399
8
                _bitmap.reset();
1400
8
                _is_shared = false;
1401
8
                break;
1402
2
            case SET:
1403
2
                if (!_set.contains(rhs._sv)) {
1404
0
                    _type = EMPTY;
1405
2
                } else {
1406
2
                    _type = SINGLE;
1407
2
                    _sv = rhs._sv;
1408
2
                }
1409
2
                _set.clear();
1410
2
                break;
1411
20
            }
1412
20
            break;
1413
20
        case BITMAP:
1414
20
            switch (_type) {
1415
4
            case EMPTY:
1416
4
                break;
1417
6
            case SINGLE:
1418
6
                if (!rhs._bitmap->contains(_sv)) {
1419
4
                    _type = EMPTY;
1420
4
                }
1421
6
                break;
1422
8
            case BITMAP:
1423
8
                _prepare_bitmap_for_write();
1424
8
                *_bitmap &= *rhs._bitmap;
1425
8
                _convert_to_smaller_type();
1426
8
                break;
1427
2
            case SET:
1428
64
                for (auto it = _set.begin(); it != _set.end();) {
1429
62
                    if (!rhs._bitmap->contains(*it)) {
1430
30
                        it = _set.erase(it);
1431
32
                    } else {
1432
32
                        ++it;
1433
32
                    }
1434
62
                }
1435
2
                _convert_to_smaller_type();
1436
2
                break;
1437
20
            }
1438
20
            break;
1439
34
        case SET:
1440
34
            switch (_type) {
1441
2
            case EMPTY:
1442
2
                break;
1443
2
            case SINGLE:
1444
2
                if (!rhs._set.contains(_sv)) {
1445
2
                    _type = EMPTY;
1446
2
                }
1447
2
                break;
1448
2
            case BITMAP:
1449
2
                _prepare_bitmap_for_write();
1450
62
                for (auto v : rhs._set) {
1451
62
                    if (_bitmap->contains(v)) {
1452
0
                        _set.insert(v);
1453
0
                    }
1454
62
                }
1455
2
                _type = SET;
1456
2
                _bitmap.reset();
1457
2
                _is_shared = false;
1458
2
                _convert_to_smaller_type();
1459
2
                break;
1460
28
            case SET:
1461
158
                for (auto it = _set.begin(); it != _set.end();) {
1462
130
                    if (!rhs._set.contains(*it)) {
1463
74
                        it = _set.erase(it);
1464
74
                    } else {
1465
56
                        ++it;
1466
56
                    }
1467
130
                }
1468
28
                _convert_to_smaller_type();
1469
28
                break;
1470
34
            }
1471
34
            break;
1472
102
        }
1473
102
        return *this;
1474
102
    }
1475
1476
    // Compute the symmetric union between the current bitmap and the provided bitmap.
1477
    // Possible type transitions are:
1478
    // SINGLE -> EMPTY
1479
    // BITMAP -> EMPTY
1480
    // BITMAP -> SINGLE
1481
58
    BitmapValue& operator^=(const BitmapValue& rhs) {
1482
58
        switch (rhs._type) {
1483
16
        case EMPTY:
1484
16
            break;
1485
8
        case SINGLE:
1486
8
            switch (_type) {
1487
2
            case EMPTY:
1488
2
                add(rhs._sv);
1489
2
                break;
1490
2
            case SINGLE:
1491
2
                if (_sv == rhs._sv) {
1492
0
                    _type = EMPTY;
1493
2
                } else {
1494
2
                    add(rhs._sv);
1495
2
                }
1496
2
                break;
1497
2
            case BITMAP:
1498
2
                if (!_bitmap->contains(rhs._sv)) {
1499
0
                    add(rhs._sv);
1500
2
                } else {
1501
2
                    _prepare_bitmap_for_write();
1502
2
                    _bitmap->remove(rhs._sv);
1503
2
                }
1504
2
                break;
1505
2
            case SET:
1506
2
                if (!_set.contains(rhs._sv)) {
1507
0
                    _set.insert(rhs._sv);
1508
2
                } else {
1509
2
                    _set.erase(rhs._sv);
1510
2
                }
1511
2
                break;
1512
8
            }
1513
8
            break;
1514
12
        case BITMAP:
1515
12
            switch (_type) {
1516
2
            case EMPTY:
1517
2
                _bitmap = rhs._bitmap;
1518
2
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1519
2
                _is_shared = true;
1520
2
                _type = BITMAP;
1521
2
                break;
1522
2
            case SINGLE:
1523
2
                _bitmap = rhs._bitmap;
1524
2
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1525
2
                _is_shared = true;
1526
2
                _type = BITMAP;
1527
2
                _prepare_bitmap_for_write();
1528
2
                if (!rhs._bitmap->contains(_sv)) {
1529
2
                    _bitmap->add(_sv);
1530
2
                } else {
1531
0
                    _bitmap->remove(_sv);
1532
0
                }
1533
2
                break;
1534
6
            case BITMAP:
1535
6
                _prepare_bitmap_for_write();
1536
6
                *_bitmap ^= *rhs._bitmap;
1537
6
                _convert_to_smaller_type();
1538
6
                break;
1539
2
            case SET:
1540
2
                _prepare_bitmap_for_write();
1541
2
                *_bitmap = *rhs._bitmap;
1542
62
                for (auto v : _set) {
1543
62
                    if (_bitmap->contains(v)) {
1544
32
                        _bitmap->remove(v);
1545
32
                    } else {
1546
30
                        _bitmap->add(v);
1547
30
                    }
1548
62
                }
1549
2
                _type = BITMAP;
1550
2
                _convert_to_smaller_type();
1551
2
                break;
1552
12
            }
1553
12
            break;
1554
22
        case SET:
1555
22
            switch (_type) {
1556
2
            case EMPTY:
1557
2
                _set = rhs._set;
1558
2
                _type = SET;
1559
2
                break;
1560
2
            case SINGLE:
1561
2
                _set = rhs._set;
1562
2
                if (!rhs._set.contains(_sv)) {
1563
2
                    _set.insert(_sv);
1564
2
                } else {
1565
0
                    _set.erase(_sv);
1566
0
                }
1567
2
                _type = SET;
1568
2
                break;
1569
2
            case BITMAP:
1570
2
                _prepare_bitmap_for_write();
1571
62
                for (auto v : rhs._set) {
1572
62
                    if (_bitmap->contains(v)) {
1573
0
                        _bitmap->remove(v);
1574
62
                    } else {
1575
62
                        _bitmap->add(v);
1576
62
                    }
1577
62
                }
1578
2
                _convert_to_smaller_type();
1579
2
                break;
1580
16
            case SET:
1581
98
                for (auto v : rhs._set) {
1582
98
                    if (_set.contains(v)) {
1583
44
                        _set.erase(v);
1584
54
                    } else {
1585
54
                        _set.insert(v);
1586
54
                    }
1587
98
                }
1588
16
                _convert_to_smaller_type();
1589
16
                break;
1590
22
            }
1591
22
            break;
1592
58
        }
1593
58
        return *this;
1594
58
    }
1595
1596
    // check if value x is present
1597
18.3k
    bool contains(uint64_t x) const {
1598
18.3k
        switch (_type) {
1599
0
        case EMPTY:
1600
0
            return false;
1601
28
        case SINGLE:
1602
28
            return _sv == x;
1603
17.8k
        case BITMAP:
1604
17.8k
            return _bitmap->contains(x);
1605
476
        case SET:
1606
476
            return _set.contains(x);
1607
18.3k
        }
1608
0
        return false;
1609
18.3k
    }
1610
1611
    // true if contains a value that belongs to the range [left, right].
1612
    bool contains_any(uint64_t left, uint64_t right) const;
1613
1614
480k
    uint64_t cardinality() const {
1615
480k
        switch (_type) {
1616
480k
        case EMPTY:
1617
480k
            return 0;
1618
72
        case SINGLE:
1619
72
            return 1;
1620
420
        case BITMAP:
1621
420
            return _bitmap->cardinality();
1622
148
        case SET:
1623
148
            return _set.size();
1624
480k
        }
1625
0
        return 0;
1626
480k
    }
1627
1628
120k
    uint64_t and_cardinality(const BitmapValue& rhs) const {
1629
120k
        switch (rhs._type) {
1630
120k
        case EMPTY:
1631
120k
            return 0;
1632
8
        case SINGLE:
1633
8
            switch (_type) {
1634
2
            case EMPTY:
1635
2
                return 0;
1636
2
            case SINGLE:
1637
2
                return _sv == rhs._sv;
1638
2
            case BITMAP:
1639
2
                return _bitmap->contains(rhs._sv);
1640
2
            case SET:
1641
2
                return _set.contains(rhs._sv);
1642
8
            }
1643
0
            break;
1644
14
        case BITMAP:
1645
14
            switch (_type) {
1646
2
            case EMPTY:
1647
2
                return 0;
1648
2
            case SINGLE:
1649
2
                return rhs._bitmap->contains(_sv);
1650
8
            case BITMAP:
1651
8
                return _bitmap->andCardinality(*rhs._bitmap);
1652
2
            case SET: {
1653
2
                uint64_t cardinality = 0;
1654
62
                for (auto v : _set) {
1655
62
                    if (rhs._bitmap->contains(v)) {
1656
32
                        ++cardinality;
1657
32
                    }
1658
62
                }
1659
2
                return cardinality;
1660
0
            }
1661
14
            }
1662
0
            break;
1663
8
        case SET:
1664
8
            switch (_type) {
1665
2
            case EMPTY:
1666
2
                return 0;
1667
2
            case SINGLE:
1668
2
                return rhs._set.contains(_sv);
1669
2
            case BITMAP: {
1670
2
                uint64_t cardinality = 0;
1671
62
                for (auto v : rhs._set) {
1672
62
                    if (_bitmap->contains(v)) {
1673
0
                        ++cardinality;
1674
0
                    }
1675
62
                }
1676
2
                return cardinality;
1677
0
            }
1678
2
            case SET: {
1679
2
                uint64_t cardinality = 0;
1680
62
                for (auto v : _set) {
1681
62
                    if (rhs._set.contains(v)) {
1682
28
                        ++cardinality;
1683
28
                    }
1684
62
                }
1685
2
                return cardinality;
1686
0
            }
1687
8
            }
1688
120k
        }
1689
0
        return 0;
1690
120k
    }
1691
1692
120k
    uint64_t or_cardinality(const BitmapValue& rhs) const {
1693
120k
        switch (rhs._type) {
1694
119k
        case EMPTY:
1695
119k
            return cardinality();
1696
0
        case SINGLE:
1697
0
            switch (_type) {
1698
0
            case EMPTY:
1699
0
                return 1;
1700
0
            case SINGLE:
1701
0
                return 1 + (_sv != rhs._sv);
1702
0
            case BITMAP:
1703
0
                return cardinality() + !_bitmap->contains(rhs._sv);
1704
0
            case SET:
1705
0
                return _set.size() + !_set.contains(rhs._sv);
1706
0
            }
1707
0
            break;
1708
6
        case BITMAP:
1709
6
            switch (_type) {
1710
0
            case EMPTY:
1711
0
                return rhs.cardinality();
1712
0
            case SINGLE:
1713
0
                return rhs.cardinality() + !rhs._bitmap->contains(_sv);
1714
6
            case BITMAP:
1715
6
                return _bitmap->orCardinality(*rhs._bitmap);
1716
0
            case SET: {
1717
0
                uint64_t cardinality = rhs._bitmap->cardinality();
1718
0
                for (auto v : _set) {
1719
0
                    if (!rhs._bitmap->contains(v)) {
1720
0
                        ++cardinality;
1721
0
                    }
1722
0
                }
1723
0
                return cardinality;
1724
0
            }
1725
6
            }
1726
0
            break;
1727
0
        case SET:
1728
0
            switch (_type) {
1729
0
            case EMPTY:
1730
0
                return rhs.cardinality();
1731
0
            case SINGLE:
1732
0
                return rhs.cardinality() + !rhs._set.contains(_sv);
1733
0
            case BITMAP: {
1734
0
                uint64_t cardinality = _bitmap->cardinality();
1735
0
                for (auto v : rhs._set) {
1736
0
                    if (!_bitmap->contains(v)) {
1737
0
                        ++cardinality;
1738
0
                    }
1739
0
                }
1740
0
                return cardinality;
1741
0
            }
1742
0
            case SET: {
1743
0
                uint64_t cardinality = _set.size();
1744
0
                for (auto v : _set) {
1745
0
                    if (!rhs._set.contains(v)) {
1746
0
                        ++cardinality;
1747
0
                    }
1748
0
                }
1749
0
                return cardinality;
1750
0
            }
1751
0
            }
1752
120k
        }
1753
0
        return 0;
1754
120k
    }
1755
1756
48
    uint64_t andnot_cardinality(const BitmapValue& rhs) const {
1757
48
        switch (rhs._type) {
1758
16
        case EMPTY:
1759
16
            return cardinality();
1760
8
        case SINGLE:
1761
8
            switch (_type) {
1762
2
            case EMPTY:
1763
2
                return 0;
1764
2
            case SINGLE:
1765
2
                return 1 - (_sv == rhs._sv);
1766
2
            case BITMAP:
1767
2
                return cardinality() - _bitmap->contains(rhs._sv);
1768
2
            case SET:
1769
2
                return cardinality() - _set.contains(rhs._sv);
1770
8
            }
1771
0
            break;
1772
8
        case BITMAP:
1773
8
            switch (_type) {
1774
2
            case EMPTY:
1775
2
                return 0;
1776
2
            case SINGLE:
1777
2
                return !rhs._bitmap->contains(_sv);
1778
2
            case BITMAP:
1779
2
                return _bitmap->andnotCardinality(*rhs._bitmap);
1780
2
            case SET: {
1781
2
                uint64_t cardinality = _set.size();
1782
62
                for (auto v : _set) {
1783
62
                    if (rhs._bitmap->contains(v)) {
1784
32
                        cardinality -= 1;
1785
32
                    }
1786
62
                }
1787
2
                return cardinality;
1788
0
            }
1789
8
            }
1790
0
            break;
1791
16
        case SET:
1792
16
            switch (_type) {
1793
2
            case EMPTY:
1794
2
                return 0;
1795
2
            case SINGLE:
1796
2
                return !rhs._set.contains(_sv);
1797
2
            case BITMAP: {
1798
2
                uint64_t cardinality = _bitmap->cardinality();
1799
62
                for (auto v : rhs._set) {
1800
62
                    if (_bitmap->contains(v)) {
1801
0
                        cardinality -= 1;
1802
0
                    }
1803
62
                }
1804
2
                return cardinality;
1805
0
            }
1806
10
            case SET: {
1807
10
                uint64_t cardinality = _set.size();
1808
66
                for (auto v : rhs._set) {
1809
66
                    if (_set.contains(v)) {
1810
32
                        cardinality -= 1;
1811
32
                    }
1812
66
                }
1813
10
                return cardinality;
1814
0
            }
1815
16
            }
1816
48
        }
1817
0
        return 0;
1818
48
    }
1819
1820
    // Return how many bytes are required to serialize this bitmap.
1821
    // See BitmapTypeCode for the serialized format.
1822
296k
    size_t getSizeInBytes() {
1823
296k
        size_t res = 0;
1824
296k
        switch (_type) {
1825
240k
        case EMPTY:
1826
240k
            res = 1;
1827
240k
            break;
1828
70
        case SINGLE:
1829
70
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
1830
60
                res = 1 + sizeof(uint32_t);
1831
60
            } else {
1832
10
                res = 1 + sizeof(uint64_t);
1833
10
            }
1834
70
            break;
1835
54.6k
        case BITMAP:
1836
54.6k
            _prepare_bitmap_for_write();
1837
54.6k
            _bitmap->runOptimize();
1838
54.6k
            _bitmap->shrinkToFit();
1839
54.6k
            res = _bitmap->getSizeInBytes(config::bitmap_serialize_version);
1840
54.6k
            break;
1841
1.35k
        case SET:
1842
            /// 1 byte for type, 1 byte for count
1843
1.35k
            res = 2 + sizeof(uint64_t) * _set.size();
1844
1.35k
            break;
1845
296k
        }
1846
296k
        return res;
1847
296k
    }
1848
1849
    // Serialize the bitmap value to dst, which should be large enough.
1850
    // Client should call `getSizeInBytes` first to get the serialized size.
1851
159k
    void write_to(char* dst) const {
1852
159k
        switch (_type) {
1853
120k
        case EMPTY:
1854
120k
            *dst = BitmapTypeCode::EMPTY;
1855
120k
            break;
1856
50
        case SINGLE:
1857
50
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
1858
44
                *(dst++) = BitmapTypeCode::SINGLE32;
1859
44
                encode_fixed32_le(reinterpret_cast<uint8_t*>(dst), static_cast<uint32_t>(_sv));
1860
44
            } else {
1861
6
                *(dst++) = BitmapTypeCode::SINGLE64;
1862
6
                encode_fixed64_le(reinterpret_cast<uint8_t*>(dst), _sv);
1863
6
            }
1864
50
            break;
1865
1.09k
        case SET:
1866
1.09k
            DCHECK(config::enable_set_in_bitmap_value);
1867
1.09k
            *dst = BitmapTypeCode::SET;
1868
1.09k
            ++dst;
1869
1.09k
            *dst = static_cast<uint8_t>(_set.size());
1870
1.09k
            ++dst;
1871
17.5k
            for (auto v : _set) {
1872
17.5k
                encode_fixed64_le(reinterpret_cast<uint8_t*>(dst), v);
1873
17.5k
                dst += sizeof(uint64_t);
1874
17.5k
            }
1875
1.09k
            break;
1876
38.3k
        case BITMAP:
1877
38.3k
            _bitmap->write(dst, config::bitmap_serialize_version);
1878
38.3k
            break;
1879
159k
        }
1880
159k
    }
1881
1882
    // Deserialize a bitmap value from `src`.
1883
    // Return false if `src` begins with unknown type code, true otherwise.
1884
142k
    bool deserialize(const char* src) {
1885
142k
        switch (*src) {
1886
120k
        case BitmapTypeCode::EMPTY:
1887
120k
            _type = EMPTY;
1888
120k
            break;
1889
30
        case BitmapTypeCode::SINGLE32:
1890
30
            _type = SINGLE;
1891
30
            _sv = decode_fixed32_le(reinterpret_cast<const uint8_t*>(src + 1));
1892
30
            if (config::enable_set_in_bitmap_value) {
1893
4
                _type = SET;
1894
4
                _set.insert(_sv);
1895
4
            }
1896
30
            break;
1897
6
        case BitmapTypeCode::SINGLE64:
1898
6
            _type = SINGLE;
1899
6
            _sv = decode_fixed64_le(reinterpret_cast<const uint8_t*>(src + 1));
1900
6
            if (config::enable_set_in_bitmap_value) {
1901
2
                _type = SET;
1902
2
                _set.insert(_sv);
1903
2
            }
1904
6
            break;
1905
10.2k
        case BitmapTypeCode::BITMAP32:
1906
10.2k
        case BitmapTypeCode::BITMAP64:
1907
22.1k
        case BitmapTypeCode::BITMAP32_V2:
1908
22.1k
        case BitmapTypeCode::BITMAP64_V2:
1909
22.1k
            _type = BITMAP;
1910
22.1k
            _is_shared = false;
1911
22.1k
            _bitmap = std::make_shared<detail::Roaring64Map>(detail::Roaring64Map::read(src));
1912
22.1k
            break;
1913
710
        case BitmapTypeCode::SET: {
1914
710
            _type = SET;
1915
710
            ++src;
1916
710
            uint8_t count = *src;
1917
710
            ++src;
1918
710
            CHECK(count <= SET_TYPE_THRESHOLD) << "bitmap value with incorrect set count";
1919
12.0k
            for (uint8_t i = 0; i != count; ++i, src += sizeof(uint64_t)) {
1920
11.3k
                _set.insert(decode_fixed64_le(reinterpret_cast<const uint8_t*>(src)));
1921
11.3k
            }
1922
710
            CHECK_EQ(count, _set.size()) << "bitmap value with incorrect set count";
1923
1924
710
            if (!config::enable_set_in_bitmap_value) {
1925
0
                _prepare_bitmap_for_write();
1926
0
                for (auto v : _set) {
1927
0
                    _bitmap->add(v);
1928
0
                }
1929
0
                _type = BITMAP;
1930
0
                _set.clear();
1931
0
            }
1932
710
            break;
1933
22.1k
        }
1934
2
        case BitmapTypeCode::SET_V2: {
1935
2
            uint32_t size = 0;
1936
2
            memcpy(&size, src + 1, sizeof(uint32_t));
1937
2
            src += sizeof(uint32_t) + 1;
1938
1939
2
            if (!config::enable_set_in_bitmap_value || size > SET_TYPE_THRESHOLD) {
1940
0
                _type = BITMAP;
1941
0
                _prepare_bitmap_for_write();
1942
1943
0
                for (int i = 0; i < size; ++i) {
1944
0
                    uint64_t key {};
1945
0
                    memcpy(&key, src, sizeof(uint64_t));
1946
0
                    _bitmap->add(key);
1947
0
                    src += sizeof(uint64_t);
1948
0
                }
1949
2
            } else {
1950
2
                _type = SET;
1951
2
                _set.reserve(size);
1952
1953
6
                for (int i = 0; i < size; ++i) {
1954
4
                    uint64_t key {};
1955
4
                    memcpy(&key, src, sizeof(uint64_t));
1956
4
                    _set.insert(key);
1957
4
                    src += sizeof(uint64_t);
1958
4
                }
1959
2
            }
1960
2
            break;
1961
22.1k
        }
1962
0
        default:
1963
0
            LOG(ERROR) << "BitmapTypeCode invalid, should between: " << BitmapTypeCode::EMPTY
1964
0
                       << " and " << BitmapTypeCode::BITMAP64 << " actual is "
1965
0
                       << static_cast<int>(*src);
1966
0
            return false;
1967
142k
        }
1968
142k
        return true;
1969
142k
    }
1970
1971
26
    int64_t minimum() const {
1972
26
        switch (_type) {
1973
4
        case SINGLE:
1974
4
            return _sv;
1975
8
        case BITMAP:
1976
8
            return _bitmap->minimum();
1977
10
        case SET:
1978
10
            return _min_in_set();
1979
4
        default:
1980
4
            return 0;
1981
26
        }
1982
26
    }
1983
1984
    // TODO limit string size to avoid OOM
1985
18.6k
    std::string to_string() const {
1986
18.6k
        std::stringstream ss;
1987
18.6k
        switch (_type) {
1988
32
        case EMPTY:
1989
32
            break;
1990
90
        case SINGLE:
1991
90
            ss << _sv;
1992
90
            break;
1993
17.4k
        case BITMAP: {
1994
17.4k
            struct IterCtx {
1995
17.4k
                std::stringstream* ss = nullptr;
1996
17.4k
                bool first = true;
1997
17.4k
            } iter_ctx;
1998
17.4k
            iter_ctx.ss = &ss;
1999
2000
17.4k
            _bitmap->iterate(
2001
8.47M
                    [](uint64_t value, void* c) -> bool {
2002
8.47M
                        auto ctx = reinterpret_cast<IterCtx*>(c);
2003
8.47M
                        if (ctx->first) {
2004
17.4k
                            ctx->first = false;
2005
8.45M
                        } else {
2006
8.45M
                            (*ctx->ss) << ",";
2007
8.45M
                        }
2008
8.47M
                        (*ctx->ss) << value;
2009
8.47M
                        return true;
2010
8.47M
                    },
2011
17.4k
                    &iter_ctx);
2012
17.4k
            break;
2013
0
        }
2014
1.03k
        case SET: {
2015
1.03k
            struct IterCtx {
2016
1.03k
                std::stringstream* ss = nullptr;
2017
1.03k
                bool first = true;
2018
1.03k
            } iter_ctx;
2019
1.03k
            iter_ctx.ss = &ss;
2020
2021
1.03k
            std::vector<uint64_t> values(_set.begin(), _set.end());
2022
1.03k
            std::sort(values.begin(), values.end());
2023
2024
16.5k
            for (auto v : values) {
2025
16.5k
                if (iter_ctx.first) {
2026
1.03k
                    iter_ctx.first = false;
2027
15.5k
                } else {
2028
15.5k
                    (*iter_ctx.ss) << ",";
2029
15.5k
                }
2030
16.5k
                (*iter_ctx.ss) << v;
2031
16.5k
            }
2032
1.03k
            break;
2033
0
        }
2034
18.6k
        }
2035
18.6k
        return ss.str();
2036
18.6k
    }
2037
2038
28
    int64_t maximum() const {
2039
28
        switch (_type) {
2040
4
        case SINGLE:
2041
4
            return _sv;
2042
10
        case BITMAP:
2043
10
            return _bitmap->maximum();
2044
10
        case SET:
2045
10
            return _max_in_set();
2046
4
        default:
2047
4
            return 0;
2048
28
        }
2049
28
    }
2050
2051
8
    uint64_t max(bool* empty) const {
2052
8
        return min_or_max(empty, [&]() { return maximum(); });
2053
8
    }
2054
2055
8
    uint64_t min(bool* empty) const {
2056
8
        return min_or_max(empty, [&]() { return minimum(); });
2057
8
    }
2058
2059
10
    uint64_t _min_in_set() const {
2060
10
        DCHECK_EQ(_type, SET);
2061
10
        return *std::min_element(_set.begin(), _set.end());
2062
10
    }
2063
2064
10
    uint64_t _max_in_set() const {
2065
10
        DCHECK_EQ(_type, SET);
2066
10
        return *std::max_element(_set.begin(), _set.end());
2067
10
    }
2068
2069
12
    bool empty() const { return _type == EMPTY; }
2070
2071
    /**
2072
     * Return new set with specified range (not include the range_end)
2073
     */
2074
    int64_t sub_range(const int64_t& range_start, const int64_t& range_end,
2075
14
                      BitmapValue* ret_bitmap) {
2076
14
        switch (_type) {
2077
2
        case EMPTY:
2078
2
            return 0;
2079
4
        case SINGLE: {
2080
            //only single value, so _sv must in [range_start,range_end)
2081
4
            if (range_start <= _sv && _sv < range_end) {
2082
2
                ret_bitmap->add(_sv);
2083
2
                return 1;
2084
2
            } else {
2085
2
                return 0;
2086
2
            }
2087
4
        }
2088
4
        case BITMAP: {
2089
4
            int64_t count = 0;
2090
292
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2091
290
                if (*it < range_start) {
2092
0
                    continue;
2093
0
                }
2094
290
                if (*it < range_end) {
2095
288
                    ret_bitmap->add(*it);
2096
288
                    ++count;
2097
288
                } else {
2098
2
                    break;
2099
2
                }
2100
290
            }
2101
4
            return count;
2102
4
        }
2103
4
        case SET: {
2104
4
            int64_t count = 0;
2105
4
            std::vector<uint64_t> values(_set.begin(), _set.end());
2106
4
            std::sort(values.begin(), values.end());
2107
12
            for (auto it = values.begin(); it != values.end(); ++it) {
2108
8
                if (*it < range_start || *it >= range_end) {
2109
2
                    continue;
2110
2
                }
2111
6
                ret_bitmap->add(*it);
2112
6
                ++count;
2113
6
            }
2114
4
            return count;
2115
4
        }
2116
14
        }
2117
0
        return 0;
2118
14
    }
2119
2120
    /**
2121
     * Return new set with specified start and limit
2122
     * @param range_start the start value for the range
2123
     * @param cardinality_limit the length of the subset
2124
     * @return the real count for subset, maybe less than cardinality_limit
2125
     */
2126
    int64_t sub_limit(const int64_t& range_start, const int64_t& cardinality_limit,
2127
18
                      BitmapValue* ret_bitmap) {
2128
18
        switch (_type) {
2129
2
        case EMPTY:
2130
2
            return 0;
2131
4
        case SINGLE: {
2132
            //only single value, so range_start must less than _sv
2133
4
            if (range_start > _sv) {
2134
0
                return 0;
2135
4
            } else {
2136
4
                ret_bitmap->add(_sv);
2137
4
                return 1;
2138
4
            }
2139
4
        }
2140
8
        case BITMAP: {
2141
8
            int64_t count = 0;
2142
62
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2143
60
                if (*it < range_start) {
2144
16
                    continue;
2145
16
                }
2146
44
                if (count < cardinality_limit) {
2147
38
                    ret_bitmap->add(*it);
2148
38
                    ++count;
2149
38
                } else {
2150
6
                    break;
2151
6
                }
2152
44
            }
2153
8
            return count;
2154
4
        }
2155
4
        case SET: {
2156
4
            int64_t count = 0;
2157
2158
4
            std::vector<uint64_t> values(_set.begin(), _set.end());
2159
4
            std::sort(values.begin(), values.end());
2160
12
            for (auto it = values.begin(); it != values.end(); ++it) {
2161
8
                if (*it < range_start) {
2162
0
                    continue;
2163
0
                }
2164
8
                if (count < cardinality_limit) {
2165
8
                    ret_bitmap->add(*it);
2166
8
                    ++count;
2167
8
                } else {
2168
0
                    break;
2169
0
                }
2170
8
            }
2171
4
            return count;
2172
4
        }
2173
18
        }
2174
0
        return 0;
2175
18
    }
2176
2177
    /**
2178
     * Returns the bitmap elements, starting from the offset position.
2179
     * The number of returned elements is limited by the cardinality_limit parameter.
2180
     * Analog of the substring string function, but for bitmap.
2181
     */
2182
16
    int64_t offset_limit(const int64_t& offset, const int64_t& limit, BitmapValue* ret_bitmap) {
2183
16
        switch (_type) {
2184
2
        case EMPTY:
2185
2
            return 0;
2186
4
        case SINGLE: {
2187
            //only single value, so offset must start 0
2188
4
            if (offset == 0) {
2189
2
                ret_bitmap->add(_sv);
2190
2
                return 1;
2191
2
            } else {
2192
2
                return 0;
2193
2
            }
2194
4
        }
2195
10
        default:
2196
10
            break;
2197
16
        }
2198
10
        if (_type == BITMAP) {
2199
6
            if (std::abs(offset) >= _bitmap->cardinality()) {
2200
0
                return 0;
2201
0
            }
2202
6
            int64_t abs_offset = offset;
2203
6
            if (offset < 0) {
2204
0
                abs_offset = _bitmap->cardinality() + offset;
2205
0
            }
2206
2207
6
            int64_t count = 0;
2208
6
            int64_t offset_count = 0;
2209
6
            auto it = _bitmap->begin();
2210
246
            for (; it != _bitmap->end() && offset_count < abs_offset; ++it) {
2211
240
                ++offset_count;
2212
240
            }
2213
66
            for (; it != _bitmap->end() && count < limit; ++it, ++count) {
2214
60
                ret_bitmap->add(*it);
2215
60
            }
2216
6
            return count;
2217
6
        } else {
2218
4
            if (std::abs(offset) > _set.size()) {
2219
0
                return 0;
2220
0
            }
2221
2222
4
            int64_t abs_offset = offset;
2223
4
            if (offset < 0) {
2224
0
                abs_offset = _set.size() + offset;
2225
0
            }
2226
2227
4
            std::vector<uint64_t> values(_set.begin(), _set.end());
2228
4
            std::sort(values.begin(), values.end());
2229
2230
4
            int64_t count = 0;
2231
4
            size_t index = 0;
2232
8
            for (auto v : values) {
2233
8
                if (index < abs_offset) {
2234
2
                    ++index;
2235
2
                    continue;
2236
2
                }
2237
6
                if (count == limit || index == values.size()) {
2238
0
                    break;
2239
0
                }
2240
6
                ++count;
2241
6
                ++index;
2242
6
                ret_bitmap->add(v);
2243
6
            }
2244
4
            return count;
2245
4
        }
2246
10
    }
2247
2248
    //for function bitmap_to_array
2249
6
    void to_array(vectorized::PaddedPODArray<int64_t>& data) const {
2250
6
        switch (_type) {
2251
0
        case EMPTY:
2252
0
            break;
2253
2
        case SINGLE: {
2254
2
            data.emplace_back(_sv);
2255
2
            break;
2256
0
        }
2257
2
        case BITMAP: {
2258
258
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2259
256
                data.emplace_back(*it);
2260
256
            }
2261
2
            break;
2262
0
        }
2263
2
        case SET: {
2264
2
            std::vector<uint64_t> values(_set.begin(), _set.end());
2265
2
            std::sort(values.begin(), values.end());
2266
62
            for (auto v : values) {
2267
62
                data.emplace_back(v);
2268
62
            }
2269
2
            break;
2270
0
        }
2271
6
        }
2272
6
    }
2273
2274
294
    void reset() {
2275
294
        _type = EMPTY;
2276
294
        _sv = 0;
2277
294
        _set.clear();
2278
294
        _is_shared = false;
2279
294
        _bitmap = nullptr;
2280
294
    }
2281
    // Implement an iterator for convenience
2282
    friend class BitmapValueIterator;
2283
    typedef BitmapValueIterator b_iterator;
2284
2285
    b_iterator begin() const;
2286
    b_iterator end() const;
2287
    b_iterator lower_bound(uint64_t val) const;
2288
2289
private:
2290
88
    void _convert_to_smaller_type() {
2291
88
        if (_type == BITMAP) {
2292
32
            uint64_t c = _bitmap->cardinality();
2293
32
            if (config::enable_set_in_bitmap_value && c > SET_TYPE_THRESHOLD) {
2294
14
                return;
2295
18
            } else if (c > 1) {
2296
14
                return;
2297
14
            }
2298
4
            if (c == 0) {
2299
2
                _type = EMPTY;
2300
2
            } else if (c == 1 && !config::enable_set_in_bitmap_value) {
2301
2
                _type = SINGLE;
2302
2
                _sv = _bitmap->minimum();
2303
2
            } else {
2304
0
                _type = SET;
2305
0
                for (auto v : *_bitmap) {
2306
0
                    _set.insert(v);
2307
0
                }
2308
0
            }
2309
4
            _bitmap.reset();
2310
56
        } else if (_type == SET) {
2311
56
            if (_set.size() == 1 && !config::enable_set_in_bitmap_value) {
2312
0
                _type = SINGLE;
2313
0
                _sv = *_set.begin();
2314
0
                _set.clear();
2315
0
            }
2316
56
        }
2317
88
    }
2318
2319
16
    uint64_t min_or_max(bool* empty, std::function<uint64_t()> func) const {
2320
16
        bool is_empty = false;
2321
16
        uint64_t result = 0;
2322
16
        switch (_type) {
2323
0
        case SINGLE:
2324
0
            result = _sv;
2325
0
            break;
2326
4
        case BITMAP:
2327
12
        case SET:
2328
12
            result = func();
2329
12
            break;
2330
4
        default:
2331
4
            is_empty = true;
2332
16
        }
2333
16
        if (empty) {
2334
16
            *empty = is_empty;
2335
16
        }
2336
16
        return result;
2337
16
    }
2338
2339
9.24M
    void _prepare_bitmap_for_write() {
2340
9.24M
        if (!_bitmap) {
2341
18.5k
            _bitmap = std::make_shared<detail::Roaring64Map>();
2342
18.5k
            _is_shared = false;
2343
18.5k
            return;
2344
18.5k
        }
2345
2346
9.23M
        if (!_is_shared) {
2347
            // the state is not shared, not need to check use count any more
2348
9.21M
            return;
2349
9.21M
        }
2350
2351
18.3k
        if (_bitmap.use_count() > 1) {
2352
2.05k
            auto new_one = std::make_shared<detail::Roaring64Map>();
2353
2.05k
            *new_one = *_bitmap;
2354
2.05k
            _bitmap = new_one;
2355
2.05k
        }
2356
18.3k
        _is_shared = false;
2357
18.3k
    }
2358
2359
260k
    void _convert_to_bitmap_if_need() {
2360
260k
        if (_type != SET || _set.size() <= SET_TYPE_THRESHOLD) {
2361
252k
            return;
2362
252k
        }
2363
7.95k
        _prepare_bitmap_for_write();
2364
266k
        for (auto v : _set) {
2365
266k
            _bitmap->add(v);
2366
266k
        }
2367
7.95k
        _type = BITMAP;
2368
7.95k
        _set.clear();
2369
7.95k
    }
2370
2371
    enum BitmapDataType {
2372
        EMPTY = 0,
2373
        SINGLE = 1, // single element
2374
        BITMAP = 2, // more than one elements
2375
        SET = 3     // elements count less or equal than 32
2376
    };
2377
    uint64_t _sv = 0;                              // store the single value when _type == SINGLE
2378
    std::shared_ptr<detail::Roaring64Map> _bitmap; // used when _type == BITMAP
2379
    SetContainer<uint64_t> _set;
2380
    BitmapDataType _type {EMPTY};
2381
    // Indicate whether the state is shared among multi BitmapValue object
2382
    bool _is_shared = true;
2383
    static constexpr uint64_t SET_TYPE_THRESHOLD = 32;
2384
};
2385
2386
// A simple implement of bitmap value iterator(Read only)
2387
// Usage:
2388
//  BitmapValueIterator iter = bitmap_value.begin();
2389
//  BitmapValueIterator end = bitmap_value.end();
2390
//  for (; iter != end(); ++iter) {
2391
//      uint64_t v = *iter;
2392
//      ... do something with "v" ...
2393
//  }
2394
class BitmapValueIterator {
2395
public:
2396
74
    BitmapValueIterator(const BitmapValue& bitmap, bool end = false) : _bitmap(bitmap), _end(end) {
2397
74
        switch (_bitmap._type) {
2398
16
        case BitmapValue::BitmapDataType::EMPTY:
2399
16
            _end = true;
2400
16
            break;
2401
16
        case BitmapValue::BitmapDataType::SINGLE:
2402
16
            _sv = _bitmap._sv;
2403
16
            break;
2404
26
        case BitmapValue::BitmapDataType::BITMAP:
2405
26
            _iter = new detail::Roaring64MapSetBitForwardIterator(*_bitmap._bitmap, _end);
2406
26
            break;
2407
16
        case BitmapValue::BitmapDataType::SET:
2408
16
            _set_iter = _end ? _bitmap._set.end() : _bitmap._set.begin();
2409
16
            break;
2410
0
        default:
2411
0
            CHECK(false) << _bitmap._type;
2412
74
        }
2413
74
    }
2414
2415
    BitmapValueIterator(const BitmapValueIterator& other)
2416
336
            : _bitmap(other._bitmap), _sv(other._sv), _end(other._end) {
2417
336
        _iter = other._iter ? new detail::Roaring64MapSetBitForwardIterator(*other._iter) : nullptr;
2418
336
        if (_bitmap._type == BitmapValue::BitmapDataType::SET) {
2419
66
            _set_iter = other._set_iter;
2420
66
        }
2421
336
    }
2422
2423
410
    ~BitmapValueIterator() {
2424
410
        if (_iter != nullptr) {
2425
286
            delete _iter;
2426
286
            _iter = nullptr;
2427
286
        }
2428
410
    }
2429
2430
1.29k
    uint64_t operator*() const {
2431
1.29k
        CHECK(!_end) << "should not get value of end iterator";
2432
1.29k
        switch (_bitmap._type) {
2433
8
        case BitmapValue::BitmapDataType::SINGLE:
2434
8
            return _sv;
2435
1.03k
        case BitmapValue::BitmapDataType::BITMAP:
2436
1.03k
            return *(*_iter);
2437
250
        case BitmapValue::BitmapDataType::SET: {
2438
250
            return *_set_iter;
2439
0
        }
2440
0
        default:
2441
0
            CHECK(false) << _bitmap._type;
2442
1.29k
        }
2443
0
        return 0;
2444
1.29k
    }
2445
2446
972
    BitmapValueIterator& operator++() { // ++i, must returned inc. value
2447
972
        CHECK(!_end) << "should not forward when iterator ends";
2448
972
        switch (_bitmap._type) {
2449
6
        case BitmapValue::BitmapDataType::SINGLE:
2450
6
            _end = true;
2451
6
            break;
2452
778
        case BitmapValue::BitmapDataType::BITMAP:
2453
778
            ++(*_iter);
2454
778
            break;
2455
188
        case BitmapValue::BitmapDataType::SET:
2456
188
            ++_set_iter;
2457
188
            break;
2458
0
        default:
2459
0
            CHECK(false) << _bitmap._type;
2460
972
        }
2461
972
        return *this;
2462
972
    }
2463
2464
320
    BitmapValueIterator operator++(int) { // i++, must return orig. value
2465
320
        CHECK(!_end) << "should not forward when iterator ends";
2466
320
        BitmapValueIterator orig(*this);
2467
320
        switch (_bitmap._type) {
2468
2
        case BitmapValue::BitmapDataType::SINGLE:
2469
2
            _end = true;
2470
2
            break;
2471
256
        case BitmapValue::BitmapDataType::BITMAP:
2472
256
            ++(*_iter);
2473
256
            break;
2474
62
        case BitmapValue::BitmapDataType::SET:
2475
62
            ++_set_iter;
2476
62
            break;
2477
0
        default:
2478
0
            CHECK(false) << _bitmap._type;
2479
320
        }
2480
320
        return orig;
2481
320
    }
2482
2483
1.34k
    bool operator==(const BitmapValueIterator& other) const {
2484
1.34k
        if (_end && other._end) {
2485
26
            return true;
2486
26
        }
2487
2488
1.32k
        switch (_bitmap._type) {
2489
0
        case BitmapValue::BitmapDataType::EMPTY:
2490
0
            return other._bitmap._type == BitmapValue::BitmapDataType::EMPTY;
2491
10
        case BitmapValue::BitmapDataType::SINGLE:
2492
10
            return _end == other._end && _sv == other._sv;
2493
1.04k
        case BitmapValue::BitmapDataType::BITMAP:
2494
1.04k
            return *_iter == *(other._iter);
2495
264
        case BitmapValue::BitmapDataType::SET:
2496
264
            return _set_iter == other._set_iter;
2497
0
        default:
2498
0
            CHECK(false) << _bitmap._type;
2499
1.32k
        }
2500
0
        return false;
2501
1.32k
    }
2502
2503
1.32k
    bool operator!=(const BitmapValueIterator& other) const { return !(*this == other); }
2504
2505
    /**
2506
    * Move the iterator to the first value >= `val`.
2507
    */
2508
0
    BitmapValueIterator& move(uint64_t val) {
2509
0
        switch (_bitmap._type) {
2510
0
        case BitmapValue::BitmapDataType::SINGLE:
2511
0
            if (_sv < val) {
2512
0
                _end = true;
2513
0
            }
2514
0
            break;
2515
0
        case BitmapValue::BitmapDataType::BITMAP:
2516
0
            if (!_iter->move(val)) {
2517
0
                _end = true;
2518
0
            }
2519
0
            break;
2520
0
        case BitmapValue::BitmapDataType::SET: {
2521
0
            throw Exception(Status::FatalError("BitmapValue with set do not support move"));
2522
0
        }
2523
0
        default:
2524
0
            break;
2525
0
        }
2526
0
        return *this;
2527
0
    }
2528
2529
private:
2530
    const BitmapValue& _bitmap;
2531
    detail::Roaring64MapSetBitForwardIterator* _iter = nullptr;
2532
    BitmapValue::SetContainer<uint64_t>::const_iterator _set_iter;
2533
    uint64_t _sv = 0;
2534
    bool _end = false;
2535
};
2536
2537
32
inline BitmapValueIterator BitmapValue::begin() const {
2538
32
    return {*this};
2539
32
}
2540
2541
42
inline BitmapValueIterator BitmapValue::end() const {
2542
42
    return {*this, true};
2543
42
}
2544
2545
0
inline BitmapValueIterator BitmapValue::lower_bound(uint64_t val) const {
2546
0
    return BitmapValueIterator(*this).move(val);
2547
0
}
2548
2549
14
inline bool BitmapValue::contains_any(uint64_t left, uint64_t right) const {
2550
14
    if (left > right) {
2551
0
        return false;
2552
0
    }
2553
2554
14
    if (_type == SET) {
2555
50
        for (auto v : _set) {
2556
50
            if (v >= left && v <= right) {
2557
8
                return true;
2558
8
            }
2559
50
        }
2560
6
        return false;
2561
14
    }
2562
0
    auto it = lower_bound(left);
2563
0
    return it != end() && *it <= right;
2564
14
}
2565
2566
} // namespace doris