Coverage Report

Created: 2025-04-11 13:52

/root/doris/be/src/util/bitmap_value.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <parallel_hashmap/btree.h>
21
#include <parallel_hashmap/phmap.h>
22
23
#include <algorithm>
24
#include <cstdarg>
25
#include <cstdint>
26
#include <cstdio>
27
#include <limits>
28
#include <map>
29
#include <memory>
30
#include <new>
31
#include <numeric>
32
#include <roaring/roaring.hh>
33
#include <set>
34
#include <stdexcept>
35
#include <string>
36
#include <utility>
37
38
#include "common/config.h"
39
#include "common/exception.h"
40
#include "common/logging.h"
41
#include "udf/udf.h"
42
#include "util/coding.h"
43
#include "vec/common/pod_array.h"
44
#include "vec/common/pod_array_fwd.h"
45
namespace doris {
46
47
// serialized bitmap := TypeCode(1), Payload
48
// The format of payload depends on value of TypeCode which is defined below
49
struct BitmapTypeCode {
50
    enum type {
51
        // An empty bitmap. Payload is 0 byte.
52
        // added in 0.11
53
        EMPTY = 0,
54
        // A bitmap containing only one element that is in [0, UINT32_MAX]
55
        // Payload := UInt32LittleEndian(4 byte)
56
        // added in 0.11
57
        SINGLE32 = 1,
58
        // A bitmap whose maximum element is in [0, UINT32_MAX]
59
        // Payload := the standard RoaringBitmap format described by
60
        // https://github.com/RoaringBitmap/RoaringFormatSpec/
61
        // added in 0.11
62
        BITMAP32 = 2,
63
        // A bitmap containing only one element that is in (UINT32_MAX, UINT64_MAX]
64
        // Payload := UInt64LittleEndian(8 byte)
65
        // added in 0.12
66
        SINGLE64 = 3,
67
        // A bitmap whose maximum element is in (UINT32_MAX, UINT64_MAX].
68
        //
69
        // To support 64-bits elements, all elements with the same high 32 bits are stored in a
70
        // RoaringBitmap containing only the lower 32 bits. Thus we could use
71
        // map<uint32_t, RoaringBitmap> to represent bitmap of 64-bits ints.
72
        //
73
        // Since there is no standard format for 64-bits RoaringBitmap, we define our own as below
74
        // Payload := NumRoaring(vint64), { MapKey, MapValue }^NumRoaring
75
        // - MapKey := the shared high 32 bits in UInt32LittleEndian(4 byte)
76
        // - MapValue := the standard RoaringBitmap format
77
        //
78
        // added in 0.12
79
        BITMAP64 = 4,
80
        SET = 5, // V1
81
        SET_V2 = 10,
82
        BITMAP32_V2 = 12,
83
        BITMAP64_V2 = 13,
84
        TYPE_MAX
85
    };
86
1
    Status static inline validate(int bitmap_type) {
87
1
        if (UNLIKELY(bitmap_type < type::EMPTY || bitmap_type >= type::TYPE_MAX)) {
88
1
            std::string err_msg =
89
1
                    fmt::format("BitmapTypeCode invalid, should between: {} and {} actrual is {}",
90
1
                                BitmapTypeCode::EMPTY, BitmapTypeCode::BITMAP64, bitmap_type);
91
1
            LOG(ERROR) << err_msg;
92
1
            return Status::Corruption(err_msg);
93
1
        }
94
0
        return Status::OK();
95
1
    }
96
};
97
98
namespace detail {
99
100
class Roaring64MapSetBitForwardIterator;
101
102
// Forked from https://github.com/RoaringBitmap/CRoaring/blob/v0.2.60/cpp/roaring64map.hh
103
// What we change includes
104
// - a custom serialization format is used inside read()/write()/getSizeInBytes()
105
// - added clear() and is32BitsEnough()
106
class Roaring64Map {
107
public:
108
    /**
109
     * Create an empty bitmap
110
     */
111
21.4k
    Roaring64Map() = default;
112
113
    /**
114
     * Construct a bitmap from a list of 32-bit integer values.
115
     */
116
2
    Roaring64Map(size_t n, const uint32_t* data) { addMany(n, data); }
117
118
    /**
119
     * Construct a bitmap from a list of 64-bit integer values.
120
     */
121
1
    Roaring64Map(size_t n, const uint64_t* data) { addMany(n, data); }
122
123
    /**
124
     * Construct a 64-bit map from a 32-bit one
125
     */
126
1
    explicit Roaring64Map(const roaring::Roaring& r) { emplaceOrInsert(0, r); }
127
128
9
    Roaring64Map(const Roaring64Map& r) = default;
129
130
11.0k
    Roaring64Map(Roaring64Map&& r) = default;
131
132
    /**
133
     * Assignment operator.
134
     */
135
1.03k
    Roaring64Map& operator=(const Roaring64Map& r) {
136
1.03k
        roarings = r.roarings;
137
1.03k
        return *this;
138
1.03k
    }
139
140
    /**
141
     * Add value x
142
     *
143
     */
144
3
    void add(uint32_t x) {
145
3
        roarings[0].add(x);
146
3
        roarings[0].setCopyOnWrite(copyOnWrite);
147
3
    }
148
4.73M
    void add(uint64_t x) {
149
4.73M
        roarings[highBytes(x)].add(lowBytes(x));
150
4.73M
        roarings[highBytes(x)].setCopyOnWrite(copyOnWrite);
151
4.73M
    }
152
153
    template <typename T>
154
9
    void addMany(size_t n_args, const T* vals) {
155
9
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
156
1
            auto& roaring = roarings[0];
157
1
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
158
1
            roaring.setCopyOnWrite(copyOnWrite);
159
1
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
160
0
            auto& roaring = roarings[0];
161
0
            std::vector<uint32_t> values(n_args);
162
8
            for (size_t i = 0; i != n_args; ++i) {
163
7
                values[i] = uint32_t(vals[i]);
164
7
            }
165
1
            roaring.addMany(n_args, values.data());
166
1
            roaring.setCopyOnWrite(copyOnWrite);
167
1
        } else {
168
0
            for (size_t lcv = 0; lcv < n_args; lcv++) {
169
0
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
170
0
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
171
0
            }
172
0
        }
173
9
    }
_ZN5doris6detail12Roaring64Map7addManyIjEEvmPKT_
Line
Count
Source
154
8
    void addMany(size_t n_args, const T* vals) {
155
8
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
156
8
            auto& roaring = roarings[0];
157
8
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
158
8
            roaring.setCopyOnWrite(copyOnWrite);
159
8
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
160
8
            auto& roaring = roarings[0];
161
8
            std::vector<uint32_t> values(n_args);
162
8
            for (size_t i = 0; i != n_args; ++i) {
163
8
                values[i] = uint32_t(vals[i]);
164
8
            }
165
8
            roaring.addMany(n_args, values.data());
166
8
            roaring.setCopyOnWrite(copyOnWrite);
167
8
        } else {
168
8
            for (size_t lcv = 0; lcv < n_args; lcv++) {
169
8
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
170
8
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
171
8
            }
172
8
        }
173
8
    }
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIiEEvmPKT_
_ZN5doris6detail12Roaring64Map7addManyItEEvmPKT_
Line
Count
Source
154
1
    void addMany(size_t n_args, const T* vals) {
155
1
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
156
1
            auto& roaring = roarings[0];
157
1
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
158
1
            roaring.setCopyOnWrite(copyOnWrite);
159
1
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
160
1
            auto& roaring = roarings[0];
161
1
            std::vector<uint32_t> values(n_args);
162
8
            for (size_t i = 0; i != n_args; ++i) {
163
7
                values[i] = uint32_t(vals[i]);
164
7
            }
165
1
            roaring.addMany(n_args, values.data());
166
1
            roaring.setCopyOnWrite(copyOnWrite);
167
1
        } else {
168
1
            for (size_t lcv = 0; lcv < n_args; lcv++) {
169
1
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
170
1
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
171
1
            }
172
1
        }
173
1
    }
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIaEEvmPKT_
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIsEEvmPKT_
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIlEEvmPKT_
174
175
40
    void addMany(size_t n_args, const uint64_t* vals) {
176
1.25k
        for (size_t lcv = 0; lcv < n_args; lcv++) {
177
1.21k
            roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
178
1.21k
            roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
179
1.21k
        }
180
40
    }
181
182
    /**
183
     * Remove value x
184
     *
185
     */
186
4
    void remove(uint32_t x) { roarings[0].remove(x); }
187
23
    void remove(uint64_t x) {
188
23
        auto roaring_iter = roarings.find(highBytes(x));
189
23
        if (roaring_iter != roarings.cend()) {
190
23
            roaring_iter->second.remove(lowBytes(x));
191
23
        }
192
23
    }
193
    /**
194
     * Return the largest value (if not empty)
195
     *
196
     */
197
46.5k
    uint64_t maximum() const {
198
46.5k
        for (auto roaring_iter = roarings.crbegin(); roaring_iter != roarings.crend();
199
46.5k
             ++roaring_iter) {
200
46.5k
            if (!roaring_iter->second.isEmpty()) {
201
46.5k
                return uniteBytes(roaring_iter->first, roaring_iter->second.maximum());
202
46.5k
            }
203
46.5k
        }
204
        // we put std::numeric_limits<>::max/min in parenthesis
205
        // to avoid a clash with the Windows.h header under Windows
206
5
        return (std::numeric_limits<uint64_t>::min)();
207
46.5k
    }
208
209
    /**
210
     * Return the smallest value (if not empty)
211
     *
212
     */
213
7
    uint64_t minimum() const {
214
7
        for (auto roaring_iter = roarings.cbegin(); roaring_iter != roarings.cend();
215
7
             ++roaring_iter) {
216
6
            if (!roaring_iter->second.isEmpty()) {
217
6
                return uniteBytes(roaring_iter->first, roaring_iter->second.minimum());
218
6
            }
219
6
        }
220
        // we put std::numeric_limits<>::max/min in parenthesis
221
        // to avoid a clash with the Windows.h header under Windows
222
1
        return (std::numeric_limits<uint64_t>::max)();
223
7
    }
224
225
    /**
226
     * Check if value x is present
227
     */
228
11
    bool contains(uint32_t x) const {
229
11
        return roarings.count(0) == 0 ? false : roarings.at(0).contains(x);
230
11
    }
231
9.24k
    bool contains(uint64_t x) const {
232
9.24k
        return roarings.count(highBytes(x)) == 0 ? false
233
9.24k
                                                 : roarings.at(highBytes(x)).contains(lowBytes(x));
234
9.24k
    }
235
236
    /**
237
     * Compute the intersection between the current bitmap and the provided
238
     * bitmap,
239
     * writing the result in the current bitmap. The provided bitmap is not
240
     * modified.
241
     */
242
6
    Roaring64Map& operator&=(const Roaring64Map& r) {
243
11
        for (auto& map_entry : roarings) {
244
11
            if (r.roarings.count(map_entry.first) == 1) {
245
10
                map_entry.second &= r.roarings.at(map_entry.first);
246
10
            } else {
247
1
                map_entry.second = roaring::Roaring();
248
1
            }
249
11
        }
250
6
        return *this;
251
6
    }
252
253
    /**
254
     * Compute the difference between the current bitmap and the provided
255
     * bitmap,
256
     * writing the result in the current bitmap. The provided bitmap is not
257
     * modified.
258
     */
259
1
    Roaring64Map& operator-=(const Roaring64Map& r) {
260
4
        for (auto& map_entry : roarings) {
261
4
            if (r.roarings.count(map_entry.first) == 1) {
262
4
                map_entry.second -= r.roarings.at(map_entry.first);
263
4
            }
264
4
        }
265
1
        return *this;
266
1
    }
267
268
    /**
269
     * Compute the union between the current bitmap and the provided bitmap,
270
     * writing the result in the current bitmap. The provided bitmap is not
271
     * modified.
272
     *
273
     * See also the fastunion function to aggregate many bitmaps more quickly.
274
     */
275
12
    Roaring64Map& operator|=(const Roaring64Map& r) {
276
22
        for (const auto& map_entry : r.roarings) {
277
22
            if (roarings.count(map_entry.first) == 0) {
278
10
                roarings[map_entry.first] = map_entry.second;
279
10
                roarings[map_entry.first].setCopyOnWrite(copyOnWrite);
280
12
            } else {
281
12
                roarings[map_entry.first] |= map_entry.second;
282
12
            }
283
22
        }
284
12
        return *this;
285
12
    }
286
287
    /**
288
     * Compute the symmetric union between the current bitmap and the provided
289
     * bitmap,
290
     * writing the result in the current bitmap. The provided bitmap is not
291
     * modified.
292
     */
293
4
    Roaring64Map& operator^=(const Roaring64Map& r) {
294
7
        for (const auto& map_entry : r.roarings) {
295
7
            if (roarings.count(map_entry.first) == 0) {
296
0
                roarings[map_entry.first] = map_entry.second;
297
0
                roarings[map_entry.first].setCopyOnWrite(copyOnWrite);
298
7
            } else {
299
7
                roarings[map_entry.first] ^= map_entry.second;
300
7
            }
301
7
        }
302
4
        return *this;
303
4
    }
304
305
    /**
306
     * Exchange the content of this bitmap with another.
307
     */
308
1
    void swap(Roaring64Map& r) { roarings.swap(r.roarings); }
309
310
    /**
311
     * Get the cardinality of the bitmap (number of elements).
312
     * Throws std::length_error in the special case where the bitmap is full
313
     * (cardinality() == 2^64). Check isFull() before calling to avoid
314
     * exception.
315
     */
316
246
    uint64_t cardinality() const {
317
246
        if (isFull()) {
318
0
            throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
319
0
                                   "bitmap is full, cardinality is 2^64, "
320
0
                                   "unable to represent in a 64-bit integer");
321
0
        }
322
246
        return std::accumulate(roarings.cbegin(), roarings.cend(), (uint64_t)0,
323
246
                               [](uint64_t previous,
324
418
                                  const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
325
418
                                   return previous + map_entry.second.cardinality();
326
418
                               });
327
246
    }
328
    /**
329
     * Computes the size of the intersection between two bitmaps.
330
     *
331
     */
332
6
    uint64_t andCardinality(const Roaring64Map& r) const {
333
6
        uint64_t card = 0;
334
10
        for (const auto& map_entry : roarings) {
335
10
            if (r.roarings.count(map_entry.first) == 1) {
336
9
                card += map_entry.second.and_cardinality(r.roarings.at(map_entry.first));
337
9
            }
338
10
        }
339
6
        return card;
340
6
    }
341
342
    /**
343
     * Computes the size of the union between two bitmaps.
344
     *
345
     */
346
4
    uint64_t orCardinality(const Roaring64Map& r) const {
347
4
        uint64_t card = 0;
348
4
        for (const auto& map_entry : roarings) {
349
4
            if (r.roarings.count(map_entry.first) == 0) {
350
0
                card += map_entry.second.cardinality();
351
0
            }
352
4
        }
353
4
        for (const auto& map_entry : r.roarings) {
354
4
            if (roarings.count(map_entry.first) == 0) {
355
0
                card += map_entry.second.cardinality();
356
4
            } else {
357
4
                card += roarings.at(map_entry.first).or_cardinality(map_entry.second);
358
4
            }
359
4
        }
360
4
        return card;
361
4
    }
362
363
    /**
364
     * Computes the size of the difference (andnot) between two bitmaps.
365
     * r1.cardinality - (r1 & r2).cardinality
366
     */
367
1
    uint64_t andnotCardinality(const Roaring64Map& r) const {
368
1
        uint64_t card = 0;
369
4
        for (const auto& map_entry : roarings) {
370
4
            card += map_entry.second.cardinality();
371
4
            if (r.roarings.count(map_entry.first) == 1) {
372
4
                card -= r.roarings.at(map_entry.first).and_cardinality(map_entry.second);
373
4
            }
374
4
        }
375
1
        return card;
376
1
    }
377
378
    /**
379
     * Computes the size of the symmetric difference (andnot) between two
380
     * bitmaps.
381
     * r1.cardinality + r2.cardinality - 2 * (r1 & r2).cardinality
382
     */
383
1
    uint64_t xorCardinality(const Roaring64Map& r) const {
384
1
        uint64_t card = 0;
385
1
        for (const auto& map_entry : roarings) {
386
1
            card += map_entry.second.cardinality();
387
1
        }
388
1
        for (const auto& map_entry : r.roarings) {
389
1
            card += map_entry.second.cardinality();
390
1
            if (roarings.count(map_entry.first) == 1) {
391
1
                card -= 2 * roarings.at(map_entry.first).and_cardinality(map_entry.second);
392
1
            }
393
1
        }
394
1
        return card;
395
1
    }
396
397
    /**
398
    * Returns true if the bitmap is empty (cardinality is zero).
399
    */
400
1
    bool isEmpty() const {
401
1
        return std::all_of(roarings.cbegin(), roarings.cend(),
402
1
                           [](const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
403
0
                               return map_entry.second.isEmpty();
404
0
                           });
405
1
    }
406
407
    /**
408
    * Returns true if the bitmap is full (cardinality is max uint64_t + 1).
409
    */
410
246
    bool isFull() const {
411
        // only bother to check if map is fully saturated
412
        //
413
        // we put std::numeric_limits<>::max/min in parenthesis
414
        // to avoid a clash with the Windows.h header under Windows
415
246
        return roarings.size() == ((uint64_t)(std::numeric_limits<uint32_t>::max)()) + 1
416
246
                       ? std::all_of(roarings.cbegin(), roarings.cend(),
417
0
                                     [](const std::pair<const uint32_t, roaring::Roaring>&
418
0
                                                roaring_map_entry) {
419
                                         // roarings within map are saturated if cardinality
420
                                         // is uint32_t max + 1
421
0
                                         return roaring_map_entry.second.cardinality() ==
422
0
                                                ((uint64_t)(std::numeric_limits<uint32_t>::max)()) +
423
0
                                                        1;
424
0
                                     })
425
246
                       : false;
426
246
    }
427
428
    /**
429
     * Return true if the two bitmaps contain the same elements.
430
     */
431
14
    bool operator==(const Roaring64Map& r) const {
432
        // we cannot use operator == on the map because either side may contain
433
        // empty Roaring Bitmaps
434
14
        auto lhs_iter = roarings.cbegin();
435
14
        auto lhs_cend = roarings.cend();
436
14
        auto rhs_iter = r.roarings.cbegin();
437
14
        auto rhs_cend = r.roarings.cend();
438
27
        while (lhs_iter != lhs_cend && rhs_iter != rhs_cend) {
439
13
            auto lhs_key = lhs_iter->first;
440
13
            auto rhs_key = rhs_iter->first;
441
13
            const auto& lhs_map = lhs_iter->second;
442
13
            const auto& rhs_map = rhs_iter->second;
443
13
            if (lhs_map.isEmpty()) {
444
1
                ++lhs_iter;
445
1
                continue;
446
1
            }
447
12
            if (rhs_map.isEmpty()) {
448
2
                ++rhs_iter;
449
2
                continue;
450
2
            }
451
10
            if (!(lhs_key == rhs_key)) {
452
0
                return false;
453
0
            }
454
10
            if (!(lhs_map == rhs_map)) {
455
0
                return false;
456
0
            }
457
10
            ++lhs_iter;
458
10
            ++rhs_iter;
459
10
        }
460
16
        while (lhs_iter != lhs_cend) {
461
4
            if (!lhs_iter->second.isEmpty()) {
462
2
                return false;
463
2
            }
464
2
            ++lhs_iter;
465
2
        }
466
14
        while (rhs_iter != rhs_cend) {
467
3
            if (!rhs_iter->second.isEmpty()) {
468
1
                return false;
469
1
            }
470
2
            ++rhs_iter;
471
2
        }
472
11
        return true;
473
12
    }
474
475
    /** convert array and bitmap containers to run containers when it is more
476
     * efficient;
477
     * also convert from run containers when more space efficient.  Returns
478
     * true if the result has at least one run container.
479
     * Additional savings might be possible by calling shrinkToFit().
480
     */
481
27.3k
    bool runOptimize() {
482
27.3k
        return std::accumulate(
483
27.3k
                roarings.begin(), roarings.end(), true,
484
27.3k
                [](bool previous, std::pair<const uint32_t, roaring::Roaring>& map_entry) {
485
27.3k
                    return map_entry.second.runOptimize() && previous;
486
27.3k
                });
487
27.3k
    }
488
489
    /**
490
     * If needed, reallocate memory to shrink the memory usage. Returns
491
     * the number of bytes saved.
492
    */
493
27.3k
    size_t shrinkToFit() {
494
27.3k
        size_t savedBytes = 0;
495
27.3k
        auto iter = roarings.begin();
496
54.6k
        while (iter != roarings.cend()) {
497
27.3k
            if (iter->second.isEmpty()) {
498
                // empty Roarings are 84 bytes
499
1
                savedBytes += 88;
500
1
                iter = roarings.erase(iter);
501
27.3k
            } else {
502
27.3k
                savedBytes += iter->second.shrinkToFit();
503
27.3k
                iter++;
504
27.3k
            }
505
27.3k
        }
506
27.3k
        return savedBytes;
507
27.3k
    }
508
509
    /**
510
     * Iterate over the bitmap elements. The function iterator is called once
511
     * for all the values with ptr (can be nullptr) as the second parameter of each
512
     * call.
513
     *
514
     * roaring_iterator is simply a pointer to a function that returns bool
515
     * (true means that the iteration should continue while false means that it
516
     * should stop), and takes (uint32_t,void*) as inputs.
517
     */
518
8.73k
    void iterate(roaring::api::roaring_iterator64 iterator, void* ptr) const {
519
8.76k
        for (const auto& map_entry : roarings) {
520
8.76k
            bool should_continue = roaring_iterate64(&map_entry.second.roaring, iterator,
521
8.76k
                                                     uint64_t(map_entry.first) << 32, ptr);
522
8.76k
            if (!should_continue) {
523
1
                break;
524
1
            }
525
8.76k
        }
526
8.73k
    }
527
528
    /**
529
     * write a bitmap to a char buffer.
530
     * Returns how many bytes were written which should be getSizeInBytes().
531
     */
532
19.1k
    size_t write(char* buf, int serialize_version) const {
533
19.1k
        bool is_v1 = serialize_version == 1;
534
19.1k
        BitmapTypeCode::type type_bitmap32 =
535
19.1k
                is_v1 ? BitmapTypeCode::type::BITMAP32 : BitmapTypeCode::type::BITMAP32_V2;
536
19.1k
        BitmapTypeCode::type type_bitmap64 =
537
19.1k
                is_v1 ? BitmapTypeCode::type::BITMAP64 : BitmapTypeCode::type::BITMAP64_V2;
538
539
19.1k
        if (is32BitsEnough()) {
540
19.1k
            *(buf++) = type_bitmap32;
541
19.1k
            auto it = roarings.find(0);
542
19.1k
            if (it == roarings.end()) { // empty bitmap
543
2
                roaring::Roaring r;
544
2
                return r.write(buf, is_v1) + 1;
545
2
            }
546
19.1k
            return it->second.write(buf, is_v1) + 1;
547
19.1k
        }
548
549
7
        const char* orig = buf;
550
        // put type code
551
7
        *(buf++) = type_bitmap64;
552
        // push map size
553
7
        buf = (char*)encode_varint64((uint8_t*)buf, roarings.size());
554
7
        std::for_each(roarings.cbegin(), roarings.cend(),
555
16
                      [&buf, is_v1](const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
556
                          // push map key
557
16
                          encode_fixed32_le((uint8_t*)buf, map_entry.first);
558
16
                          buf += sizeof(uint32_t);
559
                          // push map value Roaring
560
16
                          buf += map_entry.second.write(buf, is_v1);
561
16
                      });
562
7
        return buf - orig;
563
19.1k
    }
564
565
    /**
566
     * read a bitmap from a serialized version.
567
     *
568
     * This function is unsafe in the sense that if you provide bad data,
569
     * many bytes could be read, possibly causing a buffer overflow. See also readSafe.
570
     */
571
11.0k
    static Roaring64Map read(const char* buf) {
572
11.0k
        Roaring64Map result;
573
574
11.0k
        bool is_v1 = BitmapTypeCode::BITMAP32 == *buf || BitmapTypeCode::BITMAP64 == *buf;
575
11.0k
        bool is_bitmap32 = BitmapTypeCode::BITMAP32 == *buf || BitmapTypeCode::BITMAP32_V2 == *buf;
576
11.0k
        bool is_bitmap64 = BitmapTypeCode::BITMAP64 == *buf || BitmapTypeCode::BITMAP64_V2 == *buf;
577
11.0k
        if (is_bitmap32) {
578
11.0k
            roaring::Roaring read = roaring::Roaring::read(buf + 1, is_v1);
579
11.0k
            result.emplaceOrInsert(0, std::move(read));
580
11.0k
            return result;
581
11.0k
        }
582
583
8
        DCHECK(is_bitmap64);
584
8
        buf++;
585
586
        // get map size (varint64 took 1~10 bytes)
587
8
        uint64_t map_size;
588
8
        buf = reinterpret_cast<const char*>(
589
8
                decode_varint64_ptr(reinterpret_cast<const uint8_t*>(buf),
590
8
                                    reinterpret_cast<const uint8_t*>(buf + 10), &map_size));
591
8
        DCHECK(buf != nullptr);
592
26
        for (uint64_t lcv = 0; lcv < map_size; lcv++) {
593
            // get map key
594
18
            uint32_t key = decode_fixed32_le(reinterpret_cast<const uint8_t*>(buf));
595
18
            buf += sizeof(uint32_t);
596
            // read map value Roaring
597
18
            roaring::Roaring read_var = roaring::Roaring::read(buf, is_v1);
598
            // forward buffer past the last Roaring Bitmap
599
18
            buf += read_var.getSizeInBytes(is_v1);
600
18
            result.emplaceOrInsert(key, std::move(read_var));
601
18
        }
602
8
        return result;
603
11.0k
    }
604
605
    /**
606
     * How many bytes are required to serialize this bitmap
607
     */
608
27.3k
    size_t getSizeInBytes(int serialize_version) const {
609
27.3k
        bool is_v1 = serialize_version == 1;
610
27.3k
        if (is32BitsEnough()) {
611
27.3k
            auto it = roarings.find(0);
612
27.3k
            if (it == roarings.end()) { // empty bitmap
613
2
                roaring::Roaring r;
614
2
                return r.getSizeInBytes(is_v1) + 1;
615
2
            }
616
27.3k
            return it->second.getSizeInBytes(is_v1) + 1;
617
27.3k
        }
618
        // start with type code, map size and size of keys for each map entry
619
13
        size_t init = 1 + varint_length(roarings.size()) + roarings.size() * sizeof(uint32_t);
620
13
        return std::accumulate(
621
13
                roarings.cbegin(), roarings.cend(), init,
622
28
                [=](size_t previous, const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
623
                    // add in bytes used by each Roaring
624
28
                    return previous + map_entry.second.getSizeInBytes(is_v1);
625
28
                });
626
27.3k
    }
627
628
    /**
629
     * remove all elements
630
     */
631
0
    void clear() { roarings.clear(); }
632
633
    /**
634
     * Return whether all elements can be represented in 32 bits
635
     */
636
46.5k
    bool is32BitsEnough() const { return maximum() <= std::numeric_limits<uint32_t>::max(); }
637
638
    /**
639
     * Computes the intersection between two bitmaps and returns new bitmap.
640
     * The current bitmap and the provided bitmap are unchanged.
641
     */
642
2
    Roaring64Map operator&(const Roaring64Map& o) const { return Roaring64Map(*this) &= o; }
643
644
    /**
645
     * Computes the difference between two bitmaps and returns new bitmap.
646
     * The current bitmap and the provided bitmap are unchanged.
647
     */
648
0
    Roaring64Map operator-(const Roaring64Map& o) const { return Roaring64Map(*this) -= o; }
649
650
    /**
651
     * Computes the union between two bitmaps and returns new bitmap.
652
     * The current bitmap and the provided bitmap are unchanged.
653
     */
654
1
    Roaring64Map operator|(const Roaring64Map& o) const { return Roaring64Map(*this) |= o; }
655
656
    /**
657
     * Computes the symmetric union between two bitmaps and returns new bitmap.
658
     * The current bitmap and the provided bitmap are unchanged.
659
     */
660
1
    Roaring64Map operator^(const Roaring64Map& o) const { return Roaring64Map(*this) ^= o; }
661
662
    /**
663
     * computes the logical or (union) between "n" bitmaps (referenced by a
664
     * pointer).
665
     */
666
5
    static Roaring64Map fastunion(size_t n, const Roaring64Map** inputs) {
667
5
        Roaring64Map ans;
668
        // not particularly fast
669
12
        for (size_t lcv = 0; lcv < n; ++lcv) {
670
7
            ans |= *(inputs[lcv]);
671
7
        }
672
5
        return ans;
673
5
    }
674
675
    friend class Roaring64MapSetBitForwardIterator;
676
    using const_iterator = Roaring64MapSetBitForwardIterator;
677
678
    /**
679
    * Returns an iterator that can be used to access the position of the
680
    * set bits. The running time complexity of a full scan is proportional to
681
    * the
682
    * number
683
    * of set bits: be aware that if you have long strings of 1s, this can be
684
    * very inefficient.
685
    *
686
    * It can be much faster to use the toArray method if you want to
687
    * retrieve the set bits.
688
    */
689
    const_iterator begin() const;
690
691
    /**
692
    * A bogus iterator that can be used together with begin()
693
    * for constructions such as for(auto i = b.begin();
694
    * i!=b.end(); ++i) {}
695
    */
696
    const_iterator end() const;
697
698
private:
699
    phmap::btree_map<uint32_t, roaring::Roaring> roarings {};
700
    bool copyOnWrite {false};
701
9.48M
    static uint32_t highBytes(const uint64_t in) { return uint32_t(in >> 32); }
702
4.74M
    static uint32_t lowBytes(const uint64_t in) { return uint32_t(in); }
703
49.4k
    static uint64_t uniteBytes(const uint32_t highBytes, const uint32_t lowBytes) {
704
49.4k
        return (uint64_t(highBytes) << 32) | uint64_t(lowBytes);
705
49.4k
    }
706
1
    void emplaceOrInsert(const uint32_t key, const roaring::Roaring& value) {
707
1
        roarings.emplace(std::make_pair(key, value));
708
1
    }
709
710
11.1k
    void emplaceOrInsert(const uint32_t key, roaring::Roaring&& value) {
711
11.1k
        roarings.emplace(key, value);
712
11.1k
    }
713
};
714
715
// Forked from https://github.com/RoaringBitmap/CRoaring/blob/v0.4.0/cpp/roaring64map.hh
716
// Used to go through the set bits. Not optimally fast, but convenient.
717
class Roaring64MapSetBitForwardIterator {
718
public:
719
    using type_of_iterator = Roaring64MapSetBitForwardIterator;
720
721
    /**
722
     * Provides the location of the set bit.
723
     */
724
2.96k
    uint64_t operator*() const {
725
2.96k
        return Roaring64Map::uniteBytes(map_iter->first, i.current_value);
726
2.96k
    }
727
728
975
    type_of_iterator& operator++() { // ++i, must returned inc. value
729
975
        if (i.has_value) {
730
975
            roaring_advance_uint32_iterator(&i);
731
975
        }
732
1.00k
        while (!i.has_value) {
733
34
            map_iter++;
734
34
            if (map_iter == map_end) return *this;
735
25
            roaring_init_iterator(&map_iter->second.roaring, &i);
736
25
        }
737
966
        return *this;
738
975
    }
739
740
1.80k
    type_of_iterator operator++(int) { // i++, must return orig. value
741
1.80k
        Roaring64MapSetBitForwardIterator orig(*this);
742
1.80k
        roaring_advance_uint32_iterator(&i);
743
1.81k
        while (!i.has_value) {
744
4
            map_iter++;
745
4
            if (map_iter == map_end) return orig;
746
2
            roaring_init_iterator(&map_iter->second.roaring, &i);
747
2
        }
748
1.80k
        return orig;
749
1.80k
    }
750
751
5
    bool move(const uint64_t& x) {
752
5
        map_iter = p.lower_bound(Roaring64Map::highBytes(x));
753
5
        if (map_iter != p.cend()) {
754
5
            roaring_init_iterator(&map_iter->second.roaring, &i);
755
5
            if (map_iter->first == Roaring64Map::highBytes(x)) {
756
5
                if (roaring_move_uint32_iterator_equalorlarger(&i, Roaring64Map::lowBytes(x)))
757
4
                    return true;
758
1
                map_iter++;
759
1
                if (map_iter == map_end) return false;
760
0
                roaring_init_iterator(&map_iter->second.roaring, &i);
761
0
            }
762
0
            return true;
763
5
        }
764
0
        return false;
765
5
    }
766
767
527
    bool operator==(const Roaring64MapSetBitForwardIterator& o) const {
768
527
        if (map_iter == map_end && o.map_iter == o.map_end) return true;
769
519
        if (o.map_iter == o.map_end) return false;
770
1
        return **this == *o;
771
519
    }
772
773
2.30k
    bool operator!=(const Roaring64MapSetBitForwardIterator& o) const {
774
2.30k
        if (map_iter == map_end && o.map_iter == o.map_end) return false;
775
2.29k
        if (o.map_iter == o.map_end) return true;
776
0
        return **this != *o;
777
2.29k
    }
778
5
    Roaring64MapSetBitForwardIterator& operator=(const Roaring64MapSetBitForwardIterator& r) {
779
5
        map_iter = r.map_iter;
780
5
        map_end = r.map_end;
781
5
        i = r.i;
782
5
        return *this;
783
5
    }
784
785
    Roaring64MapSetBitForwardIterator(const Roaring64MapSetBitForwardIterator& r) = default;
786
787
    Roaring64MapSetBitForwardIterator(const Roaring64Map& parent, bool exhausted = false)
788
2.29k
            : p(parent.roarings), map_end(parent.roarings.cend()) {
789
2.29k
        if (exhausted || parent.roarings.empty()) {
790
2.27k
            map_iter = parent.roarings.cend();
791
2.27k
        } else {
792
21
            map_iter = parent.roarings.cbegin();
793
21
            roaring_init_iterator(&map_iter->second.roaring, &i);
794
21
            while (!i.has_value) {
795
0
                map_iter++;
796
0
                if (map_iter == map_end) return;
797
0
                roaring_init_iterator(&map_iter->second.roaring, &i);
798
0
            }
799
21
        }
800
2.29k
    }
801
802
protected:
803
    const phmap::btree_map<uint32_t, roaring::Roaring>& p;
804
    phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator map_iter {};
805
    phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator map_end {};
806
    roaring::api::roaring_uint32_iterator_t i {};
807
};
808
809
18
inline Roaring64MapSetBitForwardIterator Roaring64Map::begin() const {
810
18
    return {*this};
811
18
}
812
813
2.26k
inline Roaring64MapSetBitForwardIterator Roaring64Map::end() const {
814
2.26k
    return {*this, true};
815
2.26k
}
816
817
} // namespace detail
818
819
// Represent the in-memory and on-disk structure of Doris's BITMAP data type.
820
// Optimize for the case where the bitmap contains 0 or 1 element which is common
821
// for streaming load scenario.
822
class BitmapValueIterator;
823
class BitmapValue {
824
public:
825
    template <typename T>
826
    using SetContainer = phmap::flat_hash_set<T>;
827
828
    // Construct an empty bitmap.
829
100k
    BitmapValue() : _sv(0), _bitmap(nullptr), _type(EMPTY), _is_shared(false) { _set.clear(); }
830
831
    // Construct a bitmap with one element.
832
    explicit BitmapValue(uint64_t value)
833
58
            : _sv(value), _bitmap(nullptr), _type(SINGLE), _is_shared(false) {}
834
835
    // Construct a bitmap from serialized data.
836
3.08k
    explicit BitmapValue(const char* src) : _is_shared(false) {
837
3.08k
        bool res = deserialize(src);
838
3.08k
        DCHECK(res);
839
3.08k
    }
840
841
20.6k
    BitmapValue(const BitmapValue& other) {
842
20.6k
        _type = other._type;
843
20.6k
        switch (other._type) {
844
186
        case EMPTY:
845
186
            break;
846
156
        case SINGLE:
847
156
            _sv = other._sv;
848
156
            break;
849
17.5k
        case BITMAP:
850
17.5k
            _bitmap = other._bitmap;
851
17.5k
            break;
852
2.68k
        case SET:
853
2.68k
            _set = other._set;
854
2.68k
            break;
855
20.6k
        }
856
857
20.6k
        if (other._type == BITMAP) {
858
17.5k
            _is_shared = true;
859
            // should also set other's state to shared, so that other bitmap value will
860
            // create a new bitmap when it wants to modify it.
861
17.5k
            const_cast<BitmapValue&>(other)._is_shared = true;
862
17.5k
        }
863
20.6k
    }
864
865
25.2k
    BitmapValue(BitmapValue&& other) noexcept {
866
25.2k
        _type = other._type;
867
25.2k
        switch (other._type) {
868
69
        case EMPTY:
869
69
            break;
870
215
        case SINGLE:
871
215
            _sv = other._sv;
872
215
            break;
873
21.0k
        case BITMAP:
874
21.0k
            _bitmap = std::move(other._bitmap);
875
21.0k
            other._bitmap = nullptr;
876
21.0k
            break;
877
3.90k
        case SET:
878
3.90k
            _set = std::move(other._set);
879
3.90k
            break;
880
25.2k
        }
881
25.2k
        _is_shared = other._is_shared;
882
25.2k
        other._type = EMPTY;
883
25.2k
        other._is_shared = false;
884
25.2k
    }
885
886
107
    BitmapValue& operator=(const BitmapValue& other) {
887
107
        if (this == &other) {
888
5
            return *this;
889
5
        }
890
102
        reset();
891
102
        _type = other._type;
892
102
        switch (other._type) {
893
20
        case EMPTY:
894
20
            break;
895
29
        case SINGLE:
896
29
            _sv = other._sv;
897
29
            break;
898
26
        case BITMAP:
899
26
            _bitmap = other._bitmap;
900
26
            break;
901
27
        case SET:
902
27
            _set = other._set;
903
27
            break;
904
102
        }
905
906
102
        if (other._type == BITMAP) {
907
26
            _is_shared = true;
908
            // should also set other's state to shared, so that other bitmap value will
909
            // create a new bitmap when it wants to modify it.
910
26
            const_cast<BitmapValue&>(other)._is_shared = true;
911
26
        }
912
102
        return *this;
913
102
    }
914
915
22
    static BitmapValue empty_bitmap() { return BitmapValue {}; }
916
917
18
    BitmapValue& operator=(BitmapValue&& other) noexcept {
918
18
        if (this == &other) {
919
0
            return *this;
920
0
        }
921
18
        reset();
922
923
18
        _type = other._type;
924
18
        switch (other._type) {
925
0
        case EMPTY:
926
0
            break;
927
12
        case SINGLE:
928
12
            _sv = other._sv;
929
12
            break;
930
4
        case BITMAP:
931
4
            _bitmap = std::move(other._bitmap);
932
4
            other._bitmap = nullptr;
933
4
            break;
934
2
        case SET:
935
2
            _set = std::move(other._set);
936
2
            break;
937
18
        }
938
18
        _is_shared = other._is_shared;
939
18
        return *this;
940
18
    }
941
942
    // Construct a bitmap from given elements.
943
59
    explicit BitmapValue(const std::vector<uint64_t>& bits) : _is_shared(false) {
944
59
        if (bits.size() == 0) {
945
0
            _type = EMPTY;
946
0
            return;
947
0
        }
948
949
59
        if (bits.size() == 1) {
950
1
            _type = SINGLE;
951
1
            _sv = bits[0];
952
1
            return;
953
1
        }
954
955
58
        if (!config::enable_set_in_bitmap_value || bits.size() > SET_TYPE_THRESHOLD) {
956
26
            _type = BITMAP;
957
26
            _prepare_bitmap_for_write();
958
26
            _bitmap->addMany(bits.size(), &bits[0]);
959
32
        } else {
960
32
            _type = SET;
961
126
            for (auto v : bits) {
962
126
                _set.insert(v);
963
126
            }
964
32
        }
965
58
    }
966
967
31
    BitmapTypeCode::type get_type_code() const {
968
31
        switch (_type) {
969
1
        case EMPTY:
970
1
            return BitmapTypeCode::EMPTY;
971
7
        case SINGLE:
972
7
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
973
4
                return BitmapTypeCode::SINGLE32;
974
4
            } else {
975
3
                return BitmapTypeCode::SINGLE64;
976
3
            }
977
9
        case SET:
978
9
            return BitmapTypeCode::SET;
979
14
        case BITMAP:
980
14
            bool is_v1 = (config::bitmap_serialize_version == 1);
981
14
            if (_bitmap->is32BitsEnough()) {
982
9
                return is_v1 ? BitmapTypeCode::type::BITMAP32 : BitmapTypeCode::type::BITMAP32_V2;
983
9
            } else {
984
5
                return is_v1 ? BitmapTypeCode::type::BITMAP64 : BitmapTypeCode::type::BITMAP64_V2;
985
5
            }
986
31
        }
987
0
        __builtin_unreachable();
988
31
    }
989
990
    template <typename T>
991
22
    void add_many(const T* values, const size_t count) {
992
22
        switch (_type) {
993
17
        case EMPTY:
994
17
            if (count == 1) {
995
0
                _sv = values[0];
996
0
                _type = SINGLE;
997
17
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
998
270
                for (size_t i = 0; i != count; ++i) {
999
259
                    _set.insert(values[i]);
1000
259
                }
1001
11
                _type = SET;
1002
11
            } else {
1003
6
                _prepare_bitmap_for_write();
1004
6
                _bitmap->addMany(count, values);
1005
6
                _type = BITMAP;
1006
6
            }
1007
17
            break;
1008
0
        case SINGLE:
1009
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1010
0
                _set.insert(_sv);
1011
0
                for (size_t i = 0; i != count; ++i) {
1012
0
                    _set.insert(values[i]);
1013
0
                }
1014
0
                _type = SET;
1015
0
                _convert_to_bitmap_if_need();
1016
0
            } else {
1017
0
                _prepare_bitmap_for_write();
1018
0
                _bitmap->add(_sv);
1019
0
                _bitmap->addMany(count, values);
1020
0
                _type = BITMAP;
1021
0
            }
1022
0
            break;
1023
1
        case BITMAP:
1024
1
            _prepare_bitmap_for_write();
1025
1
            _bitmap->addMany(count, values);
1026
1
            break;
1027
4
        case SET:
1028
324
            for (size_t i = 0; i != count; ++i) {
1029
320
                _set.insert(values[i]);
1030
320
            }
1031
4
            _convert_to_bitmap_if_need();
1032
4
            break;
1033
22
        }
1034
22
    }
_ZN5doris11BitmapValue8add_manyIiEEvPKT_m
Line
Count
Source
991
5
    void add_many(const T* values, const size_t count) {
992
5
        switch (_type) {
993
5
        case EMPTY:
994
5
            if (count == 1) {
995
0
                _sv = values[0];
996
0
                _type = SINGLE;
997
5
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
998
78
                for (size_t i = 0; i != count; ++i) {
999
73
                    _set.insert(values[i]);
1000
73
                }
1001
5
                _type = SET;
1002
5
            } else {
1003
0
                _prepare_bitmap_for_write();
1004
0
                _bitmap->addMany(count, values);
1005
0
                _type = BITMAP;
1006
0
            }
1007
5
            break;
1008
0
        case SINGLE:
1009
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1010
0
                _set.insert(_sv);
1011
0
                for (size_t i = 0; i != count; ++i) {
1012
0
                    _set.insert(values[i]);
1013
0
                }
1014
0
                _type = SET;
1015
0
                _convert_to_bitmap_if_need();
1016
0
            } else {
1017
0
                _prepare_bitmap_for_write();
1018
0
                _bitmap->add(_sv);
1019
0
                _bitmap->addMany(count, values);
1020
0
                _type = BITMAP;
1021
0
            }
1022
0
            break;
1023
0
        case BITMAP:
1024
0
            _prepare_bitmap_for_write();
1025
0
            _bitmap->addMany(count, values);
1026
0
            break;
1027
0
        case SET:
1028
0
            for (size_t i = 0; i != count; ++i) {
1029
0
                _set.insert(values[i]);
1030
0
            }
1031
0
            _convert_to_bitmap_if_need();
1032
0
            break;
1033
5
        }
1034
5
    }
_ZN5doris11BitmapValue8add_manyImEEvPKT_m
Line
Count
Source
991
17
    void add_many(const T* values, const size_t count) {
992
17
        switch (_type) {
993
12
        case EMPTY:
994
12
            if (count == 1) {
995
0
                _sv = values[0];
996
0
                _type = SINGLE;
997
12
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
998
192
                for (size_t i = 0; i != count; ++i) {
999
186
                    _set.insert(values[i]);
1000
186
                }
1001
6
                _type = SET;
1002
6
            } else {
1003
6
                _prepare_bitmap_for_write();
1004
6
                _bitmap->addMany(count, values);
1005
6
                _type = BITMAP;
1006
6
            }
1007
12
            break;
1008
0
        case SINGLE:
1009
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1010
0
                _set.insert(_sv);
1011
0
                for (size_t i = 0; i != count; ++i) {
1012
0
                    _set.insert(values[i]);
1013
0
                }
1014
0
                _type = SET;
1015
0
                _convert_to_bitmap_if_need();
1016
0
            } else {
1017
0
                _prepare_bitmap_for_write();
1018
0
                _bitmap->add(_sv);
1019
0
                _bitmap->addMany(count, values);
1020
0
                _type = BITMAP;
1021
0
            }
1022
0
            break;
1023
1
        case BITMAP:
1024
1
            _prepare_bitmap_for_write();
1025
1
            _bitmap->addMany(count, values);
1026
1
            break;
1027
4
        case SET:
1028
324
            for (size_t i = 0; i != count; ++i) {
1029
320
                _set.insert(values[i]);
1030
320
            }
1031
4
            _convert_to_bitmap_if_need();
1032
4
            break;
1033
17
        }
1034
17
    }
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIaEEvPKT_m
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIsEEvPKT_m
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIlEEvPKT_m
1035
1036
4.73M
    void add(uint64_t value) {
1037
4.73M
        switch (_type) {
1038
11.5k
        case EMPTY:
1039
11.5k
            if (!config::enable_set_in_bitmap_value) {
1040
5.32k
                _sv = value;
1041
5.32k
                _type = SINGLE;
1042
6.25k
            } else {
1043
6.25k
                _set.insert(value);
1044
6.25k
                _type = SET;
1045
6.25k
            }
1046
11.5k
            break;
1047
5.27k
        case SINGLE:
1048
            //there is no need to convert the type if two variables are equal
1049
5.27k
            if (_sv == value) {
1050
2
                break;
1051
2
            }
1052
5.27k
            if (config::enable_set_in_bitmap_value) {
1053
4
                _set.insert(_sv);
1054
4
                _set.insert(value);
1055
4
                _type = SET;
1056
5.27k
            } else {
1057
5.27k
                _prepare_bitmap_for_write();
1058
5.27k
                _bitmap->add(_sv);
1059
5.27k
                _bitmap->add(value);
1060
5.27k
                _type = BITMAP;
1061
5.27k
            }
1062
5.27k
            break;
1063
4.58M
        case BITMAP:
1064
4.58M
            _prepare_bitmap_for_write();
1065
4.58M
            _bitmap->add(value);
1066
4.58M
            break;
1067
130k
        case SET:
1068
130k
            _set.insert(value);
1069
130k
            _convert_to_bitmap_if_need();
1070
130k
            break;
1071
4.73M
        }
1072
4.73M
    }
1073
1074
8
    void remove(uint64_t value) {
1075
8
        switch (_type) {
1076
1
        case EMPTY:
1077
1
            break;
1078
1
        case SINGLE:
1079
            //there is need to convert the type if two variables are equal
1080
1
            if (_sv == value) {
1081
0
                _type = EMPTY;
1082
0
            }
1083
1
            break;
1084
4
        case BITMAP:
1085
4
            _prepare_bitmap_for_write();
1086
4
            _bitmap->remove(value);
1087
4
            _convert_to_smaller_type();
1088
4
            break;
1089
2
        case SET:
1090
2
            _set.erase(value);
1091
2
            _convert_to_smaller_type();
1092
2
            break;
1093
8
        }
1094
8
    }
1095
1096
    // Compute the union between the current bitmap and the provided bitmap.
1097
16
    BitmapValue& operator-=(const BitmapValue& rhs) {
1098
16
        switch (rhs._type) {
1099
4
        case EMPTY:
1100
4
            break;
1101
4
        case SINGLE:
1102
4
            remove(rhs._sv);
1103
4
            break;
1104
4
        case BITMAP:
1105
4
            switch (_type) {
1106
1
            case EMPTY:
1107
1
                break;
1108
1
            case SINGLE:
1109
1
                if (rhs._bitmap->contains(_sv)) {
1110
0
                    _type = EMPTY;
1111
0
                }
1112
1
                break;
1113
1
            case BITMAP:
1114
1
                _prepare_bitmap_for_write();
1115
1
                *_bitmap -= *rhs._bitmap;
1116
1
                _convert_to_smaller_type();
1117
1
                break;
1118
1
            case SET: {
1119
32
                for (auto it = _set.begin(); it != _set.end();) {
1120
31
                    if (rhs.contains(*it)) {
1121
16
                        it = _set.erase(it);
1122
16
                    } else {
1123
15
                        ++it;
1124
15
                    }
1125
31
                }
1126
1
                _convert_to_smaller_type();
1127
1
                break;
1128
0
            }
1129
4
            }
1130
4
            break;
1131
4
        case SET: {
1132
4
            switch (_type) {
1133
1
            case EMPTY:
1134
1
                break;
1135
1
            case SINGLE:
1136
1
                if (rhs._set.contains(_sv)) {
1137
0
                    _type = EMPTY;
1138
0
                }
1139
1
                break;
1140
1
            case BITMAP:
1141
1
                _prepare_bitmap_for_write();
1142
31
                for (auto v : rhs._set) {
1143
31
                    if (_bitmap->contains(v)) {
1144
0
                        _bitmap->remove(v);
1145
0
                    }
1146
31
                }
1147
1
                _convert_to_smaller_type();
1148
1
                break;
1149
1
            case SET: {
1150
32
                for (auto it = _set.begin(); it != _set.end();) {
1151
31
                    if (rhs.contains(*it)) {
1152
14
                        it = _set.erase(it);
1153
17
                    } else {
1154
17
                        ++it;
1155
17
                    }
1156
31
                }
1157
1
                _convert_to_smaller_type();
1158
1
                break;
1159
0
            }
1160
4
            }
1161
4
        }
1162
16
        }
1163
16
        return *this;
1164
16
    }
1165
1166
    // Compute the union between the current bitmap and the provided bitmap.
1167
    // Possible type transitions are:
1168
    // EMPTY  -> SINGLE
1169
    // EMPTY  -> BITMAP
1170
    // SINGLE -> BITMAP
1171
59
    BitmapValue& operator|=(const BitmapValue& rhs) {
1172
59
        switch (rhs._type) {
1173
12
        case EMPTY:
1174
12
            break;
1175
25
        case SINGLE:
1176
25
            add(rhs._sv);
1177
25
            break;
1178
8
        case BITMAP:
1179
8
            switch (_type) {
1180
2
            case EMPTY:
1181
2
                _bitmap = rhs._bitmap;
1182
2
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1183
2
                _is_shared = true;
1184
2
                _type = BITMAP;
1185
2
                break;
1186
3
            case SINGLE:
1187
3
                _bitmap = rhs._bitmap;
1188
3
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1189
3
                _is_shared = true;
1190
3
                _prepare_bitmap_for_write();
1191
3
                _bitmap->add(_sv);
1192
3
                _type = BITMAP;
1193
3
                break;
1194
2
            case BITMAP:
1195
2
                _prepare_bitmap_for_write();
1196
2
                *_bitmap |= *rhs._bitmap;
1197
2
                break;
1198
1
            case SET: {
1199
1
                _prepare_bitmap_for_write();
1200
1
                *_bitmap = *rhs._bitmap;
1201
1202
31
                for (auto v : _set) {
1203
31
                    _bitmap->add(v);
1204
31
                }
1205
1
                _type = BITMAP;
1206
1
                break;
1207
0
            }
1208
8
            }
1209
8
            break;
1210
14
        case SET:
1211
14
            switch (_type) {
1212
1
            case EMPTY:
1213
1
                _set = rhs._set;
1214
1
                _type = SET;
1215
1
                break;
1216
1
            case SINGLE: {
1217
1
                if ((rhs._set.size() + rhs._set.contains(_sv) > SET_TYPE_THRESHOLD)) {
1218
0
                    _prepare_bitmap_for_write();
1219
0
                    _bitmap->add(_sv);
1220
0
                    for (auto v : rhs._set) {
1221
0
                        _bitmap->add(v);
1222
0
                    }
1223
0
                    _type = BITMAP;
1224
1
                } else {
1225
1
                    _set = rhs._set;
1226
1
                    _set.insert(_sv);
1227
1
                    _type = SET;
1228
1
                }
1229
1
                break;
1230
0
            }
1231
1
            case BITMAP:
1232
1
                _prepare_bitmap_for_write();
1233
31
                for (auto v : rhs._set) {
1234
31
                    _bitmap->add(v);
1235
31
                }
1236
1
                break;
1237
11
            case SET: {
1238
58
                for (auto v : rhs._set) {
1239
58
                    _set.insert(v);
1240
58
                }
1241
11
                _convert_to_bitmap_if_need();
1242
11
                break;
1243
0
            }
1244
14
            }
1245
14
            break;
1246
59
        }
1247
59
        return *this;
1248
59
    }
1249
1250
21
    BitmapValue& fastunion(const std::vector<const BitmapValue*>& values) {
1251
21
        std::vector<const detail::Roaring64Map*> bitmaps;
1252
21
        std::vector<uint64_t> single_values;
1253
21
        std::vector<const SetContainer<uint64_t>*> sets;
1254
1.73k
        for (const auto* value : values) {
1255
1.73k
            switch (value->_type) {
1256
5
            case EMPTY:
1257
5
                break;
1258
5
            case SINGLE:
1259
5
                single_values.push_back(value->_sv);
1260
5
                break;
1261
5
            case BITMAP:
1262
5
                bitmaps.push_back(value->_bitmap.get());
1263
5
                break;
1264
1.71k
            case SET:
1265
1.71k
                sets.push_back(&value->_set);
1266
1.71k
                break;
1267
1.73k
            }
1268
1.73k
        }
1269
1270
21
        if (!bitmaps.empty()) {
1271
5
            _prepare_bitmap_for_write();
1272
5
            switch (_type) {
1273
2
            case EMPTY:
1274
2
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1275
2
                break;
1276
2
            case SINGLE:
1277
2
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1278
2
                _bitmap->add(_sv);
1279
2
                break;
1280
1
            case BITMAP:
1281
1
                for (const auto* bitmap : bitmaps) {
1282
1
                    *_bitmap |= *bitmap;
1283
1
                }
1284
1
                break;
1285
0
            case SET: {
1286
0
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1287
0
                for (auto v : _set) {
1288
0
                    _bitmap->add(v);
1289
0
                }
1290
0
                _set.clear();
1291
0
                break;
1292
0
            }
1293
5
            }
1294
5
            _type = BITMAP;
1295
5
        }
1296
1297
21
        if (!sets.empty()) {
1298
1.71k
            for (auto& set : sets) {
1299
1.80k
                for (auto v : *set) {
1300
1.80k
                    _set.insert(v);
1301
1.80k
                }
1302
1.71k
            }
1303
10
            switch (_type) {
1304
9
            case EMPTY:
1305
9
                _type = SET;
1306
9
                break;
1307
0
            case SINGLE: {
1308
0
                _set.insert(_sv);
1309
0
                _type = SET;
1310
0
                break;
1311
0
            }
1312
0
            case BITMAP:
1313
0
                _prepare_bitmap_for_write();
1314
0
                for (auto v : _set) {
1315
0
                    _bitmap->add(v);
1316
0
                }
1317
0
                _type = BITMAP;
1318
0
                _set.clear();
1319
0
                break;
1320
1
            case SET: {
1321
1
                break;
1322
0
            }
1323
10
            }
1324
10
            if (_type == SET) {
1325
10
                _convert_to_bitmap_if_need();
1326
10
            }
1327
10
        }
1328
1329
21
        if (_type == EMPTY && single_values.size() == 1) {
1330
1
            if (config::enable_set_in_bitmap_value) {
1331
0
                _type = SET;
1332
0
                _set.insert(single_values[0]);
1333
1
            } else {
1334
1
                _sv = single_values[0];
1335
1
                _type = SINGLE;
1336
1
            }
1337
20
        } else if (!single_values.empty()) {
1338
4
            switch (_type) {
1339
0
            case EMPTY:
1340
1
            case SINGLE:
1341
1
                if (config::enable_set_in_bitmap_value) {
1342
0
                    _set.insert(single_values.cbegin(), single_values.cend());
1343
0
                    if (_type == SINGLE) {
1344
0
                        _set.insert(_sv);
1345
0
                    }
1346
0
                    _type = SET;
1347
0
                    _convert_to_bitmap_if_need();
1348
1
                } else {
1349
1
                    _prepare_bitmap_for_write();
1350
1
                    _bitmap->addMany(single_values.size(), single_values.data());
1351
1
                    if (_type == SINGLE) {
1352
1
                        _bitmap->add(_sv);
1353
1
                    }
1354
1
                    _type = BITMAP;
1355
1
                    _convert_to_smaller_type();
1356
1
                }
1357
1
                break;
1358
3
            case BITMAP: {
1359
3
                _prepare_bitmap_for_write();
1360
3
                _bitmap->addMany(single_values.size(), single_values.data());
1361
3
                break;
1362
0
            }
1363
0
            case SET:
1364
0
                _set.insert(single_values.cbegin(), single_values.cend());
1365
0
                _convert_to_bitmap_if_need();
1366
0
                break;
1367
4
            }
1368
4
        }
1369
1370
21
        return *this;
1371
21
    }
1372
1373
    // Compute the intersection between the current bitmap and the provided bitmap.
1374
    // Possible type transitions are:
1375
    // SINGLE -> EMPTY
1376
    // BITMAP -> EMPTY
1377
    // BITMAP -> SINGLE
1378
51
    BitmapValue& operator&=(const BitmapValue& rhs) {
1379
51
        switch (rhs._type) {
1380
14
        case EMPTY:
1381
14
            reset(); // empty & any = empty
1382
14
            break;
1383
10
        case SINGLE:
1384
10
            switch (_type) {
1385
2
            case EMPTY:
1386
2
                break;
1387
3
            case SINGLE:
1388
3
                if (_sv != rhs._sv) {
1389
2
                    _type = EMPTY;
1390
2
                }
1391
3
                break;
1392
4
            case BITMAP:
1393
4
                if (!_bitmap->contains(rhs._sv)) {
1394
1
                    _type = EMPTY;
1395
3
                } else {
1396
3
                    _type = SINGLE;
1397
3
                    _sv = rhs._sv;
1398
3
                }
1399
4
                _bitmap.reset();
1400
4
                _is_shared = false;
1401
4
                break;
1402
1
            case SET:
1403
1
                if (!_set.contains(rhs._sv)) {
1404
0
                    _type = EMPTY;
1405
1
                } else {
1406
1
                    _type = SINGLE;
1407
1
                    _sv = rhs._sv;
1408
1
                }
1409
1
                _set.clear();
1410
1
                break;
1411
10
            }
1412
10
            break;
1413
10
        case BITMAP:
1414
10
            switch (_type) {
1415
2
            case EMPTY:
1416
2
                break;
1417
3
            case SINGLE:
1418
3
                if (!rhs._bitmap->contains(_sv)) {
1419
2
                    _type = EMPTY;
1420
2
                }
1421
3
                break;
1422
4
            case BITMAP:
1423
4
                _prepare_bitmap_for_write();
1424
4
                *_bitmap &= *rhs._bitmap;
1425
4
                _convert_to_smaller_type();
1426
4
                break;
1427
1
            case SET:
1428
32
                for (auto it = _set.begin(); it != _set.end();) {
1429
31
                    if (!rhs._bitmap->contains(*it)) {
1430
15
                        it = _set.erase(it);
1431
16
                    } else {
1432
16
                        ++it;
1433
16
                    }
1434
31
                }
1435
1
                _convert_to_smaller_type();
1436
1
                break;
1437
10
            }
1438
10
            break;
1439
17
        case SET:
1440
17
            switch (_type) {
1441
1
            case EMPTY:
1442
1
                break;
1443
1
            case SINGLE:
1444
1
                if (!rhs._set.contains(_sv)) {
1445
1
                    _type = EMPTY;
1446
1
                }
1447
1
                break;
1448
1
            case BITMAP:
1449
1
                _prepare_bitmap_for_write();
1450
31
                for (auto v : rhs._set) {
1451
31
                    if (_bitmap->contains(v)) {
1452
0
                        _set.insert(v);
1453
0
                    }
1454
31
                }
1455
1
                _type = SET;
1456
1
                _bitmap.reset();
1457
1
                _is_shared = false;
1458
1
                _convert_to_smaller_type();
1459
1
                break;
1460
14
            case SET:
1461
79
                for (auto it = _set.begin(); it != _set.end();) {
1462
65
                    if (!rhs._set.contains(*it)) {
1463
37
                        it = _set.erase(it);
1464
37
                    } else {
1465
28
                        ++it;
1466
28
                    }
1467
65
                }
1468
14
                _convert_to_smaller_type();
1469
14
                break;
1470
17
            }
1471
17
            break;
1472
51
        }
1473
51
        return *this;
1474
51
    }
1475
1476
    // Compute the symmetric union between the current bitmap and the provided bitmap.
1477
    // Possible type transitions are:
1478
    // SINGLE -> EMPTY
1479
    // BITMAP -> EMPTY
1480
    // BITMAP -> SINGLE
1481
29
    BitmapValue& operator^=(const BitmapValue& rhs) {
1482
29
        switch (rhs._type) {
1483
8
        case EMPTY:
1484
8
            break;
1485
4
        case SINGLE:
1486
4
            switch (_type) {
1487
1
            case EMPTY:
1488
1
                add(rhs._sv);
1489
1
                break;
1490
1
            case SINGLE:
1491
1
                if (_sv == rhs._sv) {
1492
0
                    _type = EMPTY;
1493
1
                } else {
1494
1
                    add(rhs._sv);
1495
1
                }
1496
1
                break;
1497
1
            case BITMAP:
1498
1
                if (!_bitmap->contains(rhs._sv)) {
1499
0
                    add(rhs._sv);
1500
1
                } else {
1501
1
                    _prepare_bitmap_for_write();
1502
1
                    _bitmap->remove(rhs._sv);
1503
1
                }
1504
1
                break;
1505
1
            case SET:
1506
1
                if (!_set.contains(rhs._sv)) {
1507
0
                    _set.insert(rhs._sv);
1508
1
                } else {
1509
1
                    _set.erase(rhs._sv);
1510
1
                }
1511
1
                break;
1512
4
            }
1513
4
            break;
1514
6
        case BITMAP:
1515
6
            switch (_type) {
1516
1
            case EMPTY:
1517
1
                _bitmap = rhs._bitmap;
1518
1
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1519
1
                _is_shared = true;
1520
1
                _type = BITMAP;
1521
1
                break;
1522
1
            case SINGLE:
1523
1
                _bitmap = rhs._bitmap;
1524
1
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1525
1
                _is_shared = true;
1526
1
                _type = BITMAP;
1527
1
                _prepare_bitmap_for_write();
1528
1
                if (!rhs._bitmap->contains(_sv)) {
1529
1
                    _bitmap->add(_sv);
1530
1
                } else {
1531
0
                    _bitmap->remove(_sv);
1532
0
                }
1533
1
                break;
1534
3
            case BITMAP:
1535
3
                _prepare_bitmap_for_write();
1536
3
                *_bitmap ^= *rhs._bitmap;
1537
3
                _convert_to_smaller_type();
1538
3
                break;
1539
1
            case SET:
1540
1
                _prepare_bitmap_for_write();
1541
1
                *_bitmap = *rhs._bitmap;
1542
31
                for (auto v : _set) {
1543
31
                    if (_bitmap->contains(v)) {
1544
16
                        _bitmap->remove(v);
1545
16
                    } else {
1546
15
                        _bitmap->add(v);
1547
15
                    }
1548
31
                }
1549
1
                _type = BITMAP;
1550
1
                _convert_to_smaller_type();
1551
1
                break;
1552
6
            }
1553
6
            break;
1554
11
        case SET:
1555
11
            switch (_type) {
1556
1
            case EMPTY:
1557
1
                _set = rhs._set;
1558
1
                _type = SET;
1559
1
                break;
1560
1
            case SINGLE:
1561
1
                _set = rhs._set;
1562
1
                if (!rhs._set.contains(_sv)) {
1563
1
                    _set.insert(_sv);
1564
1
                } else {
1565
0
                    _set.erase(_sv);
1566
0
                }
1567
1
                _type = SET;
1568
1
                break;
1569
1
            case BITMAP:
1570
1
                _prepare_bitmap_for_write();
1571
31
                for (auto v : rhs._set) {
1572
31
                    if (_bitmap->contains(v)) {
1573
0
                        _bitmap->remove(v);
1574
31
                    } else {
1575
31
                        _bitmap->add(v);
1576
31
                    }
1577
31
                }
1578
1
                _convert_to_smaller_type();
1579
1
                break;
1580
8
            case SET:
1581
49
                for (auto v : rhs._set) {
1582
49
                    if (_set.contains(v)) {
1583
22
                        _set.erase(v);
1584
27
                    } else {
1585
27
                        _set.insert(v);
1586
27
                    }
1587
49
                }
1588
8
                _convert_to_smaller_type();
1589
8
                break;
1590
11
            }
1591
11
            break;
1592
29
        }
1593
29
        return *this;
1594
29
    }
1595
1596
    // check if value x is present
1597
9.19k
    bool contains(uint64_t x) const {
1598
9.19k
        switch (_type) {
1599
0
        case EMPTY:
1600
0
            return false;
1601
14
        case SINGLE:
1602
14
            return _sv == x;
1603
8.93k
        case BITMAP:
1604
8.93k
            return _bitmap->contains(x);
1605
238
        case SET:
1606
238
            return _set.contains(x);
1607
9.19k
        }
1608
0
        return false;
1609
9.19k
    }
1610
1611
    // true if contains a value that belongs to the range [left, right].
1612
    bool contains_any(uint64_t left, uint64_t right) const;
1613
1614
240k
    uint64_t cardinality() const {
1615
240k
        switch (_type) {
1616
240k
        case EMPTY:
1617
240k
            return 0;
1618
36
        case SINGLE:
1619
36
            return 1;
1620
210
        case BITMAP:
1621
210
            return _bitmap->cardinality();
1622
74
        case SET:
1623
74
            return _set.size();
1624
240k
        }
1625
0
        return 0;
1626
240k
    }
1627
1628
60.0k
    uint64_t and_cardinality(const BitmapValue& rhs) const {
1629
60.0k
        switch (rhs._type) {
1630
60.0k
        case EMPTY:
1631
60.0k
            return 0;
1632
4
        case SINGLE:
1633
4
            switch (_type) {
1634
1
            case EMPTY:
1635
1
                return 0;
1636
1
            case SINGLE:
1637
1
                return _sv == rhs._sv;
1638
1
            case BITMAP:
1639
1
                return _bitmap->contains(rhs._sv);
1640
1
            case SET:
1641
1
                return _set.contains(rhs._sv);
1642
4
            }
1643
0
            break;
1644
7
        case BITMAP:
1645
7
            switch (_type) {
1646
1
            case EMPTY:
1647
1
                return 0;
1648
1
            case SINGLE:
1649
1
                return rhs._bitmap->contains(_sv);
1650
4
            case BITMAP:
1651
4
                return _bitmap->andCardinality(*rhs._bitmap);
1652
1
            case SET: {
1653
1
                uint64_t cardinality = 0;
1654
31
                for (auto v : _set) {
1655
31
                    if (rhs._bitmap->contains(v)) {
1656
16
                        ++cardinality;
1657
16
                    }
1658
31
                }
1659
1
                return cardinality;
1660
0
            }
1661
7
            }
1662
0
            break;
1663
4
        case SET:
1664
4
            switch (_type) {
1665
1
            case EMPTY:
1666
1
                return 0;
1667
1
            case SINGLE:
1668
1
                return rhs._set.contains(_sv);
1669
1
            case BITMAP: {
1670
1
                uint64_t cardinality = 0;
1671
31
                for (auto v : rhs._set) {
1672
31
                    if (_bitmap->contains(v)) {
1673
0
                        ++cardinality;
1674
0
                    }
1675
31
                }
1676
1
                return cardinality;
1677
0
            }
1678
1
            case SET: {
1679
1
                uint64_t cardinality = 0;
1680
31
                for (auto v : _set) {
1681
31
                    if (rhs._set.contains(v)) {
1682
14
                        ++cardinality;
1683
14
                    }
1684
31
                }
1685
1
                return cardinality;
1686
0
            }
1687
4
            }
1688
60.0k
        }
1689
0
        return 0;
1690
60.0k
    }
1691
1692
60.0k
    uint64_t or_cardinality(const BitmapValue& rhs) const {
1693
60.0k
        switch (rhs._type) {
1694
59.9k
        case EMPTY:
1695
59.9k
            return cardinality();
1696
0
        case SINGLE:
1697
0
            switch (_type) {
1698
0
            case EMPTY:
1699
0
                return 1;
1700
0
            case SINGLE:
1701
0
                return 1 + (_sv != rhs._sv);
1702
0
            case BITMAP:
1703
0
                return cardinality() + !_bitmap->contains(rhs._sv);
1704
0
            case SET:
1705
0
                return _set.size() + !_set.contains(rhs._sv);
1706
0
            }
1707
0
            break;
1708
3
        case BITMAP:
1709
3
            switch (_type) {
1710
0
            case EMPTY:
1711
0
                return rhs.cardinality();
1712
0
            case SINGLE:
1713
0
                return rhs.cardinality() + !rhs._bitmap->contains(_sv);
1714
3
            case BITMAP:
1715
3
                return _bitmap->orCardinality(*rhs._bitmap);
1716
0
            case SET: {
1717
0
                uint64_t cardinality = rhs._bitmap->cardinality();
1718
0
                for (auto v : _set) {
1719
0
                    if (!rhs._bitmap->contains(v)) {
1720
0
                        ++cardinality;
1721
0
                    }
1722
0
                }
1723
0
                return cardinality;
1724
0
            }
1725
3
            }
1726
0
            break;
1727
0
        case SET:
1728
0
            switch (_type) {
1729
0
            case EMPTY:
1730
0
                return rhs.cardinality();
1731
0
            case SINGLE:
1732
0
                return rhs.cardinality() + !rhs._set.contains(_sv);
1733
0
            case BITMAP: {
1734
0
                uint64_t cardinality = _bitmap->cardinality();
1735
0
                for (auto v : rhs._set) {
1736
0
                    if (!_bitmap->contains(v)) {
1737
0
                        ++cardinality;
1738
0
                    }
1739
0
                }
1740
0
                return cardinality;
1741
0
            }
1742
0
            case SET: {
1743
0
                uint64_t cardinality = _set.size();
1744
0
                for (auto v : _set) {
1745
0
                    if (!rhs._set.contains(v)) {
1746
0
                        ++cardinality;
1747
0
                    }
1748
0
                }
1749
0
                return cardinality;
1750
0
            }
1751
0
            }
1752
60.0k
        }
1753
0
        return 0;
1754
60.0k
    }
1755
1756
24
    uint64_t andnot_cardinality(const BitmapValue& rhs) const {
1757
24
        switch (rhs._type) {
1758
8
        case EMPTY:
1759
8
            return cardinality();
1760
4
        case SINGLE:
1761
4
            switch (_type) {
1762
1
            case EMPTY:
1763
1
                return 0;
1764
1
            case SINGLE:
1765
1
                return 1 - (_sv == rhs._sv);
1766
1
            case BITMAP:
1767
1
                return cardinality() - _bitmap->contains(rhs._sv);
1768
1
            case SET:
1769
1
                return cardinality() - _set.contains(rhs._sv);
1770
4
            }
1771
0
            break;
1772
4
        case BITMAP:
1773
4
            switch (_type) {
1774
1
            case EMPTY:
1775
1
                return 0;
1776
1
            case SINGLE:
1777
1
                return !rhs._bitmap->contains(_sv);
1778
1
            case BITMAP:
1779
1
                return _bitmap->andnotCardinality(*rhs._bitmap);
1780
1
            case SET: {
1781
1
                uint64_t cardinality = _set.size();
1782
31
                for (auto v : _set) {
1783
31
                    if (rhs._bitmap->contains(v)) {
1784
16
                        cardinality -= 1;
1785
16
                    }
1786
31
                }
1787
1
                return cardinality;
1788
0
            }
1789
4
            }
1790
0
            break;
1791
8
        case SET:
1792
8
            switch (_type) {
1793
1
            case EMPTY:
1794
1
                return 0;
1795
1
            case SINGLE:
1796
1
                return !rhs._set.contains(_sv);
1797
1
            case BITMAP: {
1798
1
                uint64_t cardinality = _bitmap->cardinality();
1799
31
                for (auto v : rhs._set) {
1800
31
                    if (_bitmap->contains(v)) {
1801
0
                        cardinality -= 1;
1802
0
                    }
1803
31
                }
1804
1
                return cardinality;
1805
0
            }
1806
5
            case SET: {
1807
5
                uint64_t cardinality = _set.size();
1808
33
                for (auto v : rhs._set) {
1809
33
                    if (_set.contains(v)) {
1810
16
                        cardinality -= 1;
1811
16
                    }
1812
33
                }
1813
5
                return cardinality;
1814
0
            }
1815
8
            }
1816
24
        }
1817
0
        return 0;
1818
24
    }
1819
1820
    // Return how many bytes are required to serialize this bitmap.
1821
    // See BitmapTypeCode for the serialized format.
1822
148k
    size_t getSizeInBytes() {
1823
148k
        size_t res = 0;
1824
148k
        switch (_type) {
1825
120k
        case EMPTY:
1826
120k
            res = 1;
1827
120k
            break;
1828
35
        case SINGLE:
1829
35
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
1830
30
                res = 1 + sizeof(uint32_t);
1831
30
            } else {
1832
5
                res = 1 + sizeof(uint64_t);
1833
5
            }
1834
35
            break;
1835
27.3k
        case BITMAP:
1836
27.3k
            _prepare_bitmap_for_write();
1837
27.3k
            _bitmap->runOptimize();
1838
27.3k
            _bitmap->shrinkToFit();
1839
27.3k
            res = _bitmap->getSizeInBytes(config::bitmap_serialize_version);
1840
27.3k
            break;
1841
679
        case SET:
1842
            /// 1 byte for type, 1 byte for count
1843
679
            res = 2 + sizeof(uint64_t) * _set.size();
1844
679
            break;
1845
148k
        }
1846
148k
        return res;
1847
148k
    }
1848
1849
    // Serialize the bitmap value to dst, which should be large enough.
1850
    // Client should call `getSizeInBytes` first to get the serialized size.
1851
79.7k
    void write_to(char* dst) const {
1852
79.7k
        switch (_type) {
1853
60.0k
        case EMPTY:
1854
60.0k
            *dst = BitmapTypeCode::EMPTY;
1855
60.0k
            break;
1856
25
        case SINGLE:
1857
25
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
1858
22
                *(dst++) = BitmapTypeCode::SINGLE32;
1859
22
                encode_fixed32_le(reinterpret_cast<uint8_t*>(dst), static_cast<uint32_t>(_sv));
1860
22
            } else {
1861
3
                *(dst++) = BitmapTypeCode::SINGLE64;
1862
3
                encode_fixed64_le(reinterpret_cast<uint8_t*>(dst), _sv);
1863
3
            }
1864
25
            break;
1865
548
        case SET:
1866
548
            DCHECK(config::enable_set_in_bitmap_value);
1867
548
            *dst = BitmapTypeCode::SET;
1868
548
            ++dst;
1869
548
            *dst = static_cast<uint8_t>(_set.size());
1870
548
            ++dst;
1871
8.76k
            for (auto v : _set) {
1872
8.76k
                encode_fixed64_le(reinterpret_cast<uint8_t*>(dst), v);
1873
8.76k
                dst += sizeof(uint64_t);
1874
8.76k
            }
1875
548
            break;
1876
19.1k
        case BITMAP:
1877
19.1k
            _bitmap->write(dst, config::bitmap_serialize_version);
1878
19.1k
            break;
1879
79.7k
        }
1880
79.7k
    }
1881
1882
    // Deserialize a bitmap value from `src`.
1883
    // Return false if `src` begins with unknown type code, true otherwise.
1884
71.4k
    bool deserialize(const char* src) {
1885
71.4k
        switch (*src) {
1886
60.0k
        case BitmapTypeCode::EMPTY:
1887
60.0k
            _type = EMPTY;
1888
60.0k
            break;
1889
15
        case BitmapTypeCode::SINGLE32:
1890
15
            _type = SINGLE;
1891
15
            _sv = decode_fixed32_le(reinterpret_cast<const uint8_t*>(src + 1));
1892
15
            if (config::enable_set_in_bitmap_value) {
1893
2
                _type = SET;
1894
2
                _set.insert(_sv);
1895
2
            }
1896
15
            break;
1897
3
        case BitmapTypeCode::SINGLE64:
1898
3
            _type = SINGLE;
1899
3
            _sv = decode_fixed64_le(reinterpret_cast<const uint8_t*>(src + 1));
1900
3
            if (config::enable_set_in_bitmap_value) {
1901
1
                _type = SET;
1902
1
                _set.insert(_sv);
1903
1
            }
1904
3
            break;
1905
5.12k
        case BitmapTypeCode::BITMAP32:
1906
5.13k
        case BitmapTypeCode::BITMAP64:
1907
11.0k
        case BitmapTypeCode::BITMAP32_V2:
1908
11.0k
        case BitmapTypeCode::BITMAP64_V2:
1909
11.0k
            _type = BITMAP;
1910
11.0k
            _is_shared = false;
1911
11.0k
            _bitmap = std::make_shared<detail::Roaring64Map>(detail::Roaring64Map::read(src));
1912
11.0k
            break;
1913
355
        case BitmapTypeCode::SET: {
1914
355
            _type = SET;
1915
355
            ++src;
1916
355
            uint8_t count = *src;
1917
355
            ++src;
1918
355
            CHECK(count <= SET_TYPE_THRESHOLD) << "bitmap value with incorrect set count";
1919
6.04k
            for (uint8_t i = 0; i != count; ++i, src += sizeof(uint64_t)) {
1920
5.68k
                _set.insert(decode_fixed64_le(reinterpret_cast<const uint8_t*>(src)));
1921
5.68k
            }
1922
355
            CHECK_EQ(count, _set.size()) << "bitmap value with incorrect set count";
1923
1924
355
            if (!config::enable_set_in_bitmap_value) {
1925
0
                _prepare_bitmap_for_write();
1926
0
                for (auto v : _set) {
1927
0
                    _bitmap->add(v);
1928
0
                }
1929
0
                _type = BITMAP;
1930
0
                _set.clear();
1931
0
            }
1932
355
            break;
1933
11.0k
        }
1934
1
        case BitmapTypeCode::SET_V2: {
1935
1
            uint32_t size = 0;
1936
1
            memcpy(&size, src + 1, sizeof(uint32_t));
1937
1
            src += sizeof(uint32_t) + 1;
1938
1939
1
            if (!config::enable_set_in_bitmap_value || size > SET_TYPE_THRESHOLD) {
1940
0
                _type = BITMAP;
1941
0
                _prepare_bitmap_for_write();
1942
1943
0
                for (int i = 0; i < size; ++i) {
1944
0
                    uint64_t key {};
1945
0
                    memcpy(&key, src, sizeof(uint64_t));
1946
0
                    _bitmap->add(key);
1947
0
                    src += sizeof(uint64_t);
1948
0
                }
1949
1
            } else {
1950
1
                _type = SET;
1951
1
                _set.reserve(size);
1952
1953
3
                for (int i = 0; i < size; ++i) {
1954
2
                    uint64_t key {};
1955
2
                    memcpy(&key, src, sizeof(uint64_t));
1956
2
                    _set.insert(key);
1957
2
                    src += sizeof(uint64_t);
1958
2
                }
1959
1
            }
1960
1
            break;
1961
11.0k
        }
1962
0
        default:
1963
0
            LOG(ERROR) << "BitmapTypeCode invalid, should between: " << BitmapTypeCode::EMPTY
1964
0
                       << " and " << BitmapTypeCode::BITMAP64 << " actual is "
1965
0
                       << static_cast<int>(*src);
1966
0
            return false;
1967
71.4k
        }
1968
71.4k
        return true;
1969
71.4k
    }
1970
1971
13
    int64_t minimum() const {
1972
13
        switch (_type) {
1973
2
        case SINGLE:
1974
2
            return _sv;
1975
4
        case BITMAP:
1976
4
            return _bitmap->minimum();
1977
5
        case SET:
1978
5
            return _min_in_set();
1979
2
        default:
1980
2
            return 0;
1981
13
        }
1982
13
    }
1983
1984
    // TODO limit string size to avoid OOM
1985
9.31k
    std::string to_string() const {
1986
9.31k
        std::stringstream ss;
1987
9.31k
        switch (_type) {
1988
16
        case EMPTY:
1989
16
            break;
1990
45
        case SINGLE:
1991
45
            ss << _sv;
1992
45
            break;
1993
8.73k
        case BITMAP: {
1994
8.73k
            struct IterCtx {
1995
8.73k
                std::stringstream* ss = nullptr;
1996
8.73k
                bool first = true;
1997
8.73k
            } iter_ctx;
1998
8.73k
            iter_ctx.ss = &ss;
1999
2000
8.73k
            _bitmap->iterate(
2001
4.23M
                    [](uint64_t value, void* c) -> bool {
2002
4.23M
                        auto ctx = reinterpret_cast<IterCtx*>(c);
2003
4.23M
                        if (ctx->first) {
2004
8.73k
                            ctx->first = false;
2005
4.22M
                        } else {
2006
4.22M
                            (*ctx->ss) << ",";
2007
4.22M
                        }
2008
4.23M
                        (*ctx->ss) << value;
2009
4.23M
                        return true;
2010
4.23M
                    },
2011
8.73k
                    &iter_ctx);
2012
8.73k
            break;
2013
0
        }
2014
519
        case SET: {
2015
519
            struct IterCtx {
2016
519
                std::stringstream* ss = nullptr;
2017
519
                bool first = true;
2018
519
            } iter_ctx;
2019
519
            iter_ctx.ss = &ss;
2020
2021
519
            std::vector<uint64_t> values(_set.begin(), _set.end());
2022
519
            std::sort(values.begin(), values.end());
2023
2024
8.27k
            for (auto v : values) {
2025
8.27k
                if (iter_ctx.first) {
2026
519
                    iter_ctx.first = false;
2027
7.75k
                } else {
2028
7.75k
                    (*iter_ctx.ss) << ",";
2029
7.75k
                }
2030
8.27k
                (*iter_ctx.ss) << v;
2031
8.27k
            }
2032
519
            break;
2033
0
        }
2034
9.31k
        }
2035
9.31k
        return ss.str();
2036
9.31k
    }
2037
2038
14
    int64_t maximum() const {
2039
14
        switch (_type) {
2040
2
        case SINGLE:
2041
2
            return _sv;
2042
5
        case BITMAP:
2043
5
            return _bitmap->maximum();
2044
5
        case SET:
2045
5
            return _max_in_set();
2046
2
        default:
2047
2
            return 0;
2048
14
        }
2049
14
    }
2050
2051
4
    uint64_t max(bool* empty) const {
2052
4
        return min_or_max(empty, [&]() { return maximum(); });
2053
4
    }
2054
2055
4
    uint64_t min(bool* empty) const {
2056
4
        return min_or_max(empty, [&]() { return minimum(); });
2057
4
    }
2058
2059
5
    uint64_t _min_in_set() const {
2060
5
        DCHECK_EQ(_type, SET);
2061
5
        return *std::min_element(_set.begin(), _set.end());
2062
5
    }
2063
2064
5
    uint64_t _max_in_set() const {
2065
5
        DCHECK_EQ(_type, SET);
2066
5
        return *std::max_element(_set.begin(), _set.end());
2067
5
    }
2068
2069
6
    bool empty() const { return _type == EMPTY; }
2070
2071
    /**
2072
     * Return new set with specified range (not include the range_end)
2073
     */
2074
    int64_t sub_range(const int64_t& range_start, const int64_t& range_end,
2075
7
                      BitmapValue* ret_bitmap) {
2076
7
        switch (_type) {
2077
1
        case EMPTY:
2078
1
            return 0;
2079
2
        case SINGLE: {
2080
            //only single value, so _sv must in [range_start,range_end)
2081
2
            if (range_start <= _sv && _sv < range_end) {
2082
1
                ret_bitmap->add(_sv);
2083
1
                return 1;
2084
1
            } else {
2085
1
                return 0;
2086
1
            }
2087
2
        }
2088
2
        case BITMAP: {
2089
2
            int64_t count = 0;
2090
146
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2091
145
                if (*it < range_start) {
2092
0
                    continue;
2093
0
                }
2094
145
                if (*it < range_end) {
2095
144
                    ret_bitmap->add(*it);
2096
144
                    ++count;
2097
144
                } else {
2098
1
                    break;
2099
1
                }
2100
145
            }
2101
2
            return count;
2102
2
        }
2103
2
        case SET: {
2104
2
            int64_t count = 0;
2105
2
            std::vector<uint64_t> values(_set.begin(), _set.end());
2106
2
            std::sort(values.begin(), values.end());
2107
6
            for (auto it = values.begin(); it != values.end(); ++it) {
2108
4
                if (*it < range_start || *it >= range_end) {
2109
1
                    continue;
2110
1
                }
2111
3
                ret_bitmap->add(*it);
2112
3
                ++count;
2113
3
            }
2114
2
            return count;
2115
2
        }
2116
7
        }
2117
0
        return 0;
2118
7
    }
2119
2120
    /**
2121
     * Return new set with specified start and limit
2122
     * @param range_start the start value for the range
2123
     * @param cardinality_limit the length of the subset
2124
     * @return the real count for subset, maybe less than cardinality_limit
2125
     */
2126
    int64_t sub_limit(const int64_t& range_start, const int64_t& cardinality_limit,
2127
9
                      BitmapValue* ret_bitmap) {
2128
9
        switch (_type) {
2129
1
        case EMPTY:
2130
1
            return 0;
2131
2
        case SINGLE: {
2132
            //only single value, so range_start must less than _sv
2133
2
            if (range_start > _sv) {
2134
0
                return 0;
2135
2
            } else {
2136
2
                ret_bitmap->add(_sv);
2137
2
                return 1;
2138
2
            }
2139
2
        }
2140
4
        case BITMAP: {
2141
4
            int64_t count = 0;
2142
31
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2143
30
                if (*it < range_start) {
2144
8
                    continue;
2145
8
                }
2146
22
                if (count < cardinality_limit) {
2147
19
                    ret_bitmap->add(*it);
2148
19
                    ++count;
2149
19
                } else {
2150
3
                    break;
2151
3
                }
2152
22
            }
2153
4
            return count;
2154
2
        }
2155
2
        case SET: {
2156
2
            int64_t count = 0;
2157
2158
2
            std::vector<uint64_t> values(_set.begin(), _set.end());
2159
2
            std::sort(values.begin(), values.end());
2160
6
            for (auto it = values.begin(); it != values.end(); ++it) {
2161
4
                if (*it < range_start) {
2162
0
                    continue;
2163
0
                }
2164
4
                if (count < cardinality_limit) {
2165
4
                    ret_bitmap->add(*it);
2166
4
                    ++count;
2167
4
                } else {
2168
0
                    break;
2169
0
                }
2170
4
            }
2171
2
            return count;
2172
2
        }
2173
9
        }
2174
0
        return 0;
2175
9
    }
2176
2177
    /**
2178
     * Returns the bitmap elements, starting from the offset position.
2179
     * The number of returned elements is limited by the cardinality_limit parameter.
2180
     * Analog of the substring string function, but for bitmap.
2181
     */
2182
8
    int64_t offset_limit(const int64_t& offset, const int64_t& limit, BitmapValue* ret_bitmap) {
2183
8
        switch (_type) {
2184
1
        case EMPTY:
2185
1
            return 0;
2186
2
        case SINGLE: {
2187
            //only single value, so offset must start 0
2188
2
            if (offset == 0) {
2189
1
                ret_bitmap->add(_sv);
2190
1
                return 1;
2191
1
            } else {
2192
1
                return 0;
2193
1
            }
2194
2
        }
2195
5
        default:
2196
5
            break;
2197
8
        }
2198
5
        if (_type == BITMAP) {
2199
3
            if (std::abs(offset) >= _bitmap->cardinality()) {
2200
0
                return 0;
2201
0
            }
2202
3
            int64_t abs_offset = offset;
2203
3
            if (offset < 0) {
2204
0
                abs_offset = _bitmap->cardinality() + offset;
2205
0
            }
2206
2207
3
            int64_t count = 0;
2208
3
            int64_t offset_count = 0;
2209
3
            auto it = _bitmap->begin();
2210
123
            for (; it != _bitmap->end() && offset_count < abs_offset; ++it) {
2211
120
                ++offset_count;
2212
120
            }
2213
33
            for (; it != _bitmap->end() && count < limit; ++it, ++count) {
2214
30
                ret_bitmap->add(*it);
2215
30
            }
2216
3
            return count;
2217
3
        } else {
2218
2
            if (std::abs(offset) > _set.size()) {
2219
0
                return 0;
2220
0
            }
2221
2222
2
            int64_t abs_offset = offset;
2223
2
            if (offset < 0) {
2224
0
                abs_offset = _set.size() + offset;
2225
0
            }
2226
2227
2
            std::vector<uint64_t> values(_set.begin(), _set.end());
2228
2
            std::sort(values.begin(), values.end());
2229
2230
2
            int64_t count = 0;
2231
2
            size_t index = 0;
2232
4
            for (auto v : values) {
2233
4
                if (index < abs_offset) {
2234
1
                    ++index;
2235
1
                    continue;
2236
1
                }
2237
3
                if (count == limit || index == values.size()) {
2238
0
                    break;
2239
0
                }
2240
3
                ++count;
2241
3
                ++index;
2242
3
                ret_bitmap->add(v);
2243
3
            }
2244
2
            return count;
2245
2
        }
2246
5
    }
2247
2248
    //for function bitmap_to_array
2249
3
    void to_array(vectorized::PaddedPODArray<int64_t>& data) const {
2250
3
        switch (_type) {
2251
0
        case EMPTY:
2252
0
            break;
2253
1
        case SINGLE: {
2254
1
            data.emplace_back(_sv);
2255
1
            break;
2256
0
        }
2257
1
        case BITMAP: {
2258
129
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2259
128
                data.emplace_back(*it);
2260
128
            }
2261
1
            break;
2262
0
        }
2263
1
        case SET: {
2264
1
            std::vector<uint64_t> values(_set.begin(), _set.end());
2265
1
            std::sort(values.begin(), values.end());
2266
31
            for (auto v : values) {
2267
31
                data.emplace_back(v);
2268
31
            }
2269
1
            break;
2270
0
        }
2271
3
        }
2272
3
    }
2273
2274
147
    void reset() {
2275
147
        _type = EMPTY;
2276
147
        _sv = 0;
2277
147
        _set.clear();
2278
147
        _is_shared = false;
2279
147
        _bitmap = nullptr;
2280
147
    }
2281
    // Implement an iterator for convenience
2282
    friend class BitmapValueIterator;
2283
    typedef BitmapValueIterator b_iterator;
2284
2285
    b_iterator begin() const;
2286
    b_iterator end() const;
2287
    b_iterator lower_bound(uint64_t val) const;
2288
2289
private:
2290
44
    void _convert_to_smaller_type() {
2291
44
        if (_type == BITMAP) {
2292
16
            uint64_t c = _bitmap->cardinality();
2293
16
            if (config::enable_set_in_bitmap_value && c > SET_TYPE_THRESHOLD) {
2294
7
                return;
2295
9
            } else if (c > 1) {
2296
7
                return;
2297
7
            }
2298
2
            if (c == 0) {
2299
1
                _type = EMPTY;
2300
1
            } else if (c == 1 && !config::enable_set_in_bitmap_value) {
2301
1
                _type = SINGLE;
2302
1
                _sv = _bitmap->minimum();
2303
1
            } else {
2304
0
                _type = SET;
2305
0
                for (auto v : *_bitmap) {
2306
0
                    _set.insert(v);
2307
0
                }
2308
0
            }
2309
2
            _bitmap.reset();
2310
28
        } else if (_type == SET) {
2311
28
            if (_set.size() == 1 && !config::enable_set_in_bitmap_value) {
2312
0
                _type = SINGLE;
2313
0
                _sv = *_set.begin();
2314
0
                _set.clear();
2315
0
            }
2316
28
        }
2317
44
    }
2318
2319
8
    uint64_t min_or_max(bool* empty, std::function<uint64_t()> func) const {
2320
8
        bool is_empty = false;
2321
8
        uint64_t result = 0;
2322
8
        switch (_type) {
2323
0
        case SINGLE:
2324
0
            result = _sv;
2325
0
            break;
2326
2
        case BITMAP:
2327
6
        case SET:
2328
6
            result = func();
2329
6
            break;
2330
2
        default:
2331
2
            is_empty = true;
2332
8
        }
2333
8
        if (empty) {
2334
8
            *empty = is_empty;
2335
8
        }
2336
8
        return result;
2337
8
    }
2338
2339
4.62M
    void _prepare_bitmap_for_write() {
2340
4.62M
        if (!_bitmap) {
2341
9.28k
            _bitmap = std::make_shared<detail::Roaring64Map>();
2342
9.28k
            _is_shared = false;
2343
9.28k
            return;
2344
9.28k
        }
2345
2346
4.61M
        if (!_is_shared) {
2347
            // the state is not shared, not need to check use count any more
2348
4.60M
            return;
2349
4.60M
        }
2350
2351
9.17k
        if (_bitmap.use_count() > 1) {
2352
1.02k
            auto new_one = std::make_shared<detail::Roaring64Map>();
2353
1.02k
            *new_one = *_bitmap;
2354
1.02k
            _bitmap = new_one;
2355
1.02k
        }
2356
9.17k
        _is_shared = false;
2357
9.17k
    }
2358
2359
130k
    void _convert_to_bitmap_if_need() {
2360
130k
        if (_type != SET || _set.size() <= SET_TYPE_THRESHOLD) {
2361
126k
            return;
2362
126k
        }
2363
3.97k
        _prepare_bitmap_for_write();
2364
133k
        for (auto v : _set) {
2365
133k
            _bitmap->add(v);
2366
133k
        }
2367
3.97k
        _type = BITMAP;
2368
3.97k
        _set.clear();
2369
3.97k
    }
2370
2371
    enum BitmapDataType {
2372
        EMPTY = 0,
2373
        SINGLE = 1, // single element
2374
        BITMAP = 2, // more than one elements
2375
        SET = 3     // elements count less or equal than 32
2376
    };
2377
    uint64_t _sv = 0;                              // store the single value when _type == SINGLE
2378
    std::shared_ptr<detail::Roaring64Map> _bitmap; // used when _type == BITMAP
2379
    SetContainer<uint64_t> _set;
2380
    BitmapDataType _type {EMPTY};
2381
    // Indicate whether the state is shared among multi BitmapValue object
2382
    bool _is_shared = true;
2383
    static constexpr uint64_t SET_TYPE_THRESHOLD = 32;
2384
};
2385
2386
// A simple implement of bitmap value iterator(Read only)
2387
// Usage:
2388
//  BitmapValueIterator iter = bitmap_value.begin();
2389
//  BitmapValueIterator end = bitmap_value.end();
2390
//  for (; iter != end(); ++iter) {
2391
//      uint64_t v = *iter;
2392
//      ... do something with "v" ...
2393
//  }
2394
class BitmapValueIterator {
2395
public:
2396
37
    BitmapValueIterator(const BitmapValue& bitmap, bool end = false) : _bitmap(bitmap), _end(end) {
2397
37
        switch (_bitmap._type) {
2398
8
        case BitmapValue::BitmapDataType::EMPTY:
2399
8
            _end = true;
2400
8
            break;
2401
8
        case BitmapValue::BitmapDataType::SINGLE:
2402
8
            _sv = _bitmap._sv;
2403
8
            break;
2404
13
        case BitmapValue::BitmapDataType::BITMAP:
2405
13
            _iter = new detail::Roaring64MapSetBitForwardIterator(*_bitmap._bitmap, _end);
2406
13
            break;
2407
8
        case BitmapValue::BitmapDataType::SET:
2408
8
            _set_iter = _end ? _bitmap._set.end() : _bitmap._set.begin();
2409
8
            break;
2410
0
        default:
2411
0
            CHECK(false) << _bitmap._type;
2412
37
        }
2413
37
    }
2414
2415
    BitmapValueIterator(const BitmapValueIterator& other)
2416
168
            : _bitmap(other._bitmap), _sv(other._sv), _end(other._end) {
2417
168
        _iter = other._iter ? new detail::Roaring64MapSetBitForwardIterator(*other._iter) : nullptr;
2418
168
        if (_bitmap._type == BitmapValue::BitmapDataType::SET) {
2419
33
            _set_iter = other._set_iter;
2420
33
        }
2421
168
    }
2422
2423
205
    ~BitmapValueIterator() {
2424
205
        if (_iter != nullptr) {
2425
143
            delete _iter;
2426
143
            _iter = nullptr;
2427
143
        }
2428
205
    }
2429
2430
646
    uint64_t operator*() const {
2431
646
        CHECK(!_end) << "should not get value of end iterator";
2432
646
        switch (_bitmap._type) {
2433
4
        case BitmapValue::BitmapDataType::SINGLE:
2434
4
            return _sv;
2435
517
        case BitmapValue::BitmapDataType::BITMAP:
2436
517
            return *(*_iter);
2437
125
        case BitmapValue::BitmapDataType::SET: {
2438
125
            return *_set_iter;
2439
0
        }
2440
0
        default:
2441
0
            CHECK(false) << _bitmap._type;
2442
646
        }
2443
0
        return 0;
2444
646
    }
2445
2446
486
    BitmapValueIterator& operator++() { // ++i, must returned inc. value
2447
486
        CHECK(!_end) << "should not forward when iterator ends";
2448
486
        switch (_bitmap._type) {
2449
3
        case BitmapValue::BitmapDataType::SINGLE:
2450
3
            _end = true;
2451
3
            break;
2452
389
        case BitmapValue::BitmapDataType::BITMAP:
2453
389
            ++(*_iter);
2454
389
            break;
2455
94
        case BitmapValue::BitmapDataType::SET:
2456
94
            ++_set_iter;
2457
94
            break;
2458
0
        default:
2459
0
            CHECK(false) << _bitmap._type;
2460
486
        }
2461
486
        return *this;
2462
486
    }
2463
2464
160
    BitmapValueIterator operator++(int) { // i++, must return orig. value
2465
160
        CHECK(!_end) << "should not forward when iterator ends";
2466
160
        BitmapValueIterator orig(*this);
2467
160
        switch (_bitmap._type) {
2468
1
        case BitmapValue::BitmapDataType::SINGLE:
2469
1
            _end = true;
2470
1
            break;
2471
128
        case BitmapValue::BitmapDataType::BITMAP:
2472
128
            ++(*_iter);
2473
128
            break;
2474
31
        case BitmapValue::BitmapDataType::SET:
2475
31
            ++_set_iter;
2476
31
            break;
2477
0
        default:
2478
0
            CHECK(false) << _bitmap._type;
2479
160
        }
2480
160
        return orig;
2481
160
    }
2482
2483
674
    bool operator==(const BitmapValueIterator& other) const {
2484
674
        if (_end && other._end) {
2485
13
            return true;
2486
13
        }
2487
2488
661
        switch (_bitmap._type) {
2489
0
        case BitmapValue::BitmapDataType::EMPTY:
2490
0
            return other._bitmap._type == BitmapValue::BitmapDataType::EMPTY;
2491
5
        case BitmapValue::BitmapDataType::SINGLE:
2492
5
            return _end == other._end && _sv == other._sv;
2493
524
        case BitmapValue::BitmapDataType::BITMAP:
2494
524
            return *_iter == *(other._iter);
2495
132
        case BitmapValue::BitmapDataType::SET:
2496
132
            return _set_iter == other._set_iter;
2497
0
        default:
2498
0
            CHECK(false) << _bitmap._type;
2499
661
        }
2500
0
        return false;
2501
661
    }
2502
2503
664
    bool operator!=(const BitmapValueIterator& other) const { return !(*this == other); }
2504
2505
    /**
2506
    * Move the iterator to the first value >= `val`.
2507
    */
2508
0
    BitmapValueIterator& move(uint64_t val) {
2509
0
        switch (_bitmap._type) {
2510
0
        case BitmapValue::BitmapDataType::SINGLE:
2511
0
            if (_sv < val) {
2512
0
                _end = true;
2513
0
            }
2514
0
            break;
2515
0
        case BitmapValue::BitmapDataType::BITMAP:
2516
0
            if (!_iter->move(val)) {
2517
0
                _end = true;
2518
0
            }
2519
0
            break;
2520
0
        case BitmapValue::BitmapDataType::SET: {
2521
0
            throw Exception(Status::FatalError("BitmapValue with set do not support move"));
2522
0
        }
2523
0
        default:
2524
0
            break;
2525
0
        }
2526
0
        return *this;
2527
0
    }
2528
2529
private:
2530
    const BitmapValue& _bitmap;
2531
    detail::Roaring64MapSetBitForwardIterator* _iter = nullptr;
2532
    BitmapValue::SetContainer<uint64_t>::const_iterator _set_iter;
2533
    uint64_t _sv = 0;
2534
    bool _end = false;
2535
};
2536
2537
16
inline BitmapValueIterator BitmapValue::begin() const {
2538
16
    return {*this};
2539
16
}
2540
2541
21
inline BitmapValueIterator BitmapValue::end() const {
2542
21
    return {*this, true};
2543
21
}
2544
2545
0
inline BitmapValueIterator BitmapValue::lower_bound(uint64_t val) const {
2546
0
    return BitmapValueIterator(*this).move(val);
2547
0
}
2548
2549
7
inline bool BitmapValue::contains_any(uint64_t left, uint64_t right) const {
2550
7
    if (left > right) {
2551
0
        return false;
2552
0
    }
2553
2554
7
    if (_type == SET) {
2555
25
        for (auto v : _set) {
2556
25
            if (v >= left && v <= right) {
2557
4
                return true;
2558
4
            }
2559
25
        }
2560
3
        return false;
2561
7
    }
2562
0
    auto it = lower_bound(left);
2563
0
    return it != end() && *it <= right;
2564
7
}
2565
2566
} // namespace doris