Coverage Report

Created: 2024-11-21 23:52

/root/doris/be/src/util/bitmap_value.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <parallel_hashmap/btree.h>
21
#include <parallel_hashmap/phmap.h>
22
23
#include <algorithm>
24
#include <cstdarg>
25
#include <cstdint>
26
#include <cstdio>
27
#include <limits>
28
#include <map>
29
#include <memory>
30
#include <new>
31
#include <numeric>
32
#include <roaring/roaring.hh>
33
#include <set>
34
#include <stdexcept>
35
#include <string>
36
#include <utility>
37
38
#include "common/config.h"
39
#include "common/exception.h"
40
#include "common/logging.h"
41
#include "gutil/integral_types.h"
42
#include "udf/udf.h"
43
#include "util/coding.h"
44
#include "vec/common/pod_array.h"
45
#include "vec/common/pod_array_fwd.h"
46
namespace doris {
47
48
// serialized bitmap := TypeCode(1), Payload
49
// The format of payload depends on value of TypeCode which is defined below
50
struct BitmapTypeCode {
51
    enum type {
52
        // An empty bitmap. Payload is 0 byte.
53
        // added in 0.11
54
        EMPTY = 0,
55
        // A bitmap containing only one element that is in [0, UINT32_MAX]
56
        // Payload := UInt32LittleEndian(4 byte)
57
        // added in 0.11
58
        SINGLE32 = 1,
59
        // A bitmap whose maximum element is in [0, UINT32_MAX]
60
        // Payload := the standard RoaringBitmap format described by
61
        // https://github.com/RoaringBitmap/RoaringFormatSpec/
62
        // added in 0.11
63
        BITMAP32 = 2,
64
        // A bitmap containing only one element that is in (UINT32_MAX, UINT64_MAX]
65
        // Payload := UInt64LittleEndian(8 byte)
66
        // added in 0.12
67
        SINGLE64 = 3,
68
        // A bitmap whose maximum element is in (UINT32_MAX, UINT64_MAX].
69
        //
70
        // To support 64-bits elements, all elements with the same high 32 bits are stored in a
71
        // RoaringBitmap containing only the lower 32 bits. Thus we could use
72
        // map<uint32_t, RoaringBitmap> to represent bitmap of 64-bits ints.
73
        //
74
        // Since there is no standard format for 64-bits RoaringBitmap, we define our own as below
75
        // Payload := NumRoaring(vint64), { MapKey, MapValue }^NumRoaring
76
        // - MapKey := the shared high 32 bits in UInt32LittleEndian(4 byte)
77
        // - MapValue := the standard RoaringBitmap format
78
        //
79
        // added in 0.12
80
        BITMAP64 = 4,
81
        SET = 5, // V1
82
        SET_V2 = 10,
83
        BITMAP32_V2 = 12,
84
        BITMAP64_V2 = 13,
85
        TYPE_MAX
86
    };
87
1
    Status static inline validate(int bitmap_type) {
88
1
        if (UNLIKELY(bitmap_type < type::EMPTY || bitmap_type >= type::TYPE_MAX)) {
89
1
            std::string err_msg =
90
1
                    fmt::format("BitmapTypeCode invalid, should between: {} and {} actrual is {}",
91
1
                                BitmapTypeCode::EMPTY, BitmapTypeCode::BITMAP64, bitmap_type);
92
1
            LOG(ERROR) << err_msg;
93
1
            return Status::Corruption(err_msg);
94
1
        }
95
0
        return Status::OK();
96
1
    }
97
};
98
99
namespace detail {
100
101
class Roaring64MapSetBitForwardIterator;
102
103
// Forked from https://github.com/RoaringBitmap/CRoaring/blob/v0.2.60/cpp/roaring64map.hh
104
// What we change includes
105
// - a custom serialization format is used inside read()/write()/getSizeInBytes()
106
// - added clear() and is32BitsEnough()
107
class Roaring64Map {
108
public:
109
    /**
110
     * Create an empty bitmap
111
     */
112
13.4k
    Roaring64Map() = default;
113
114
    /**
115
     * Construct a bitmap from a list of 32-bit integer values.
116
     */
117
2
    Roaring64Map(size_t n, const uint32_t* data) { addMany(n, data); }
118
119
    /**
120
     * Construct a bitmap from a list of 64-bit integer values.
121
     */
122
1
    Roaring64Map(size_t n, const uint64_t* data) { addMany(n, data); }
123
124
    /**
125
     * Construct a 64-bit map from a 32-bit one
126
     */
127
1
    explicit Roaring64Map(const roaring::Roaring& r) { emplaceOrInsert(0, r); }
128
129
9
    Roaring64Map(const Roaring64Map& r) = default;
130
131
6.12k
    Roaring64Map(Roaring64Map&& r) = default;
132
133
    /**
134
     * Assignment operator.
135
     */
136
1.02k
    Roaring64Map& operator=(const Roaring64Map& r) {
137
1.02k
        roarings = r.roarings;
138
1.02k
        return *this;
139
1.02k
    }
140
141
    /**
142
     * Add value x
143
     *
144
     */
145
3
    void add(uint32_t x) {
146
3
        roarings[0].add(x);
147
3
        roarings[0].setCopyOnWrite(copyOnWrite);
148
3
    }
149
3.15M
    void add(uint64_t x) {
150
3.15M
        roarings[highBytes(x)].add(lowBytes(x));
151
3.15M
        roarings[highBytes(x)].setCopyOnWrite(copyOnWrite);
152
3.15M
    }
153
154
    template <typename T>
155
9
    void addMany(size_t n_args, const T* vals) {
156
9
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
157
1
            auto& roaring = roarings[0];
158
1
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
159
1
            roaring.setCopyOnWrite(copyOnWrite);
160
1
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
161
0
            auto& roaring = roarings[0];
162
0
            std::vector<uint32_t> values(n_args);
163
8
            for (size_t i = 0; i != n_args; ++i) {
164
7
                values[i] = uint32_t(vals[i]);
165
7
            }
166
1
            roaring.addMany(n_args, values.data());
167
1
            roaring.setCopyOnWrite(copyOnWrite);
168
1
        } else {
169
0
            for (size_t lcv = 0; lcv < n_args; lcv++) {
170
0
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
171
0
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
172
0
            }
173
0
        }
174
9
    }
_ZN5doris6detail12Roaring64Map7addManyIjEEvmPKT_
Line
Count
Source
155
8
    void addMany(size_t n_args, const T* vals) {
156
8
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
157
8
            auto& roaring = roarings[0];
158
8
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
159
8
            roaring.setCopyOnWrite(copyOnWrite);
160
8
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
161
8
            auto& roaring = roarings[0];
162
8
            std::vector<uint32_t> values(n_args);
163
8
            for (size_t i = 0; i != n_args; ++i) {
164
8
                values[i] = uint32_t(vals[i]);
165
8
            }
166
8
            roaring.addMany(n_args, values.data());
167
8
            roaring.setCopyOnWrite(copyOnWrite);
168
8
        } else {
169
8
            for (size_t lcv = 0; lcv < n_args; lcv++) {
170
8
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
171
8
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
172
8
            }
173
8
        }
174
8
    }
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIiEEvmPKT_
_ZN5doris6detail12Roaring64Map7addManyItEEvmPKT_
Line
Count
Source
155
1
    void addMany(size_t n_args, const T* vals) {
156
1
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
157
1
            auto& roaring = roarings[0];
158
1
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
159
1
            roaring.setCopyOnWrite(copyOnWrite);
160
1
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
161
1
            auto& roaring = roarings[0];
162
1
            std::vector<uint32_t> values(n_args);
163
8
            for (size_t i = 0; i != n_args; ++i) {
164
7
                values[i] = uint32_t(vals[i]);
165
7
            }
166
1
            roaring.addMany(n_args, values.data());
167
1
            roaring.setCopyOnWrite(copyOnWrite);
168
1
        } else {
169
1
            for (size_t lcv = 0; lcv < n_args; lcv++) {
170
1
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
171
1
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
172
1
            }
173
1
        }
174
1
    }
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIhEEvmPKT_
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIaEEvmPKT_
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIsEEvmPKT_
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIlEEvmPKT_
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyInEEvmPKT_
175
176
40
    void addMany(size_t n_args, const uint64_t* vals) {
177
1.25k
        for (size_t lcv = 0; lcv < n_args; lcv++) {
178
1.21k
            roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
179
1.21k
            roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
180
1.21k
        }
181
40
    }
182
183
    /**
184
     * Remove value x
185
     *
186
     */
187
4
    void remove(uint32_t x) { roarings[0].remove(x); }
188
23
    void remove(uint64_t x) {
189
23
        auto roaring_iter = roarings.find(highBytes(x));
190
23
        if (roaring_iter != roarings.cend()) {
191
23
            roaring_iter->second.remove(lowBytes(x));
192
23
        }
193
23
    }
194
    /**
195
     * Return the largest value (if not empty)
196
     *
197
     */
198
28.6k
    uint64_t maximum() const {
199
28.6k
        for (auto roaring_iter = roarings.crbegin(); roaring_iter != roarings.crend();
200
28.6k
             ++roaring_iter) {
201
28.6k
            if (!roaring_iter->second.isEmpty()) {
202
28.6k
                return uniteBytes(roaring_iter->first, roaring_iter->second.maximum());
203
28.6k
            }
204
28.6k
        }
205
        // we put std::numeric_limits<>::max/min in parenthesis
206
        // to avoid a clash with the Windows.h header under Windows
207
5
        return (std::numeric_limits<uint64_t>::min)();
208
28.6k
    }
209
210
    /**
211
     * Return the smallest value (if not empty)
212
     *
213
     */
214
7
    uint64_t minimum() const {
215
7
        for (auto roaring_iter = roarings.cbegin(); roaring_iter != roarings.cend();
216
7
             ++roaring_iter) {
217
6
            if (!roaring_iter->second.isEmpty()) {
218
6
                return uniteBytes(roaring_iter->first, roaring_iter->second.minimum());
219
6
            }
220
6
        }
221
        // we put std::numeric_limits<>::max/min in parenthesis
222
        // to avoid a clash with the Windows.h header under Windows
223
1
        return (std::numeric_limits<uint64_t>::max)();
224
7
    }
225
226
    /**
227
     * Check if value x is present
228
     */
229
11
    bool contains(uint32_t x) const {
230
11
        return roarings.count(0) == 0 ? false : roarings.at(0).contains(x);
231
11
    }
232
9.24k
    bool contains(uint64_t x) const {
233
9.24k
        return roarings.count(highBytes(x)) == 0 ? false
234
9.24k
                                                 : roarings.at(highBytes(x)).contains(lowBytes(x));
235
9.24k
    }
236
237
    /**
238
     * Compute the intersection between the current bitmap and the provided
239
     * bitmap,
240
     * writing the result in the current bitmap. The provided bitmap is not
241
     * modified.
242
     */
243
5
    Roaring64Map& operator&=(const Roaring64Map& r) {
244
10
        for (auto& map_entry : roarings) {
245
10
            if (r.roarings.count(map_entry.first) == 1) {
246
9
                map_entry.second &= r.roarings.at(map_entry.first);
247
9
            } else {
248
1
                map_entry.second = roaring::Roaring();
249
1
            }
250
10
        }
251
5
        return *this;
252
5
    }
253
254
    /**
255
     * Compute the difference between the current bitmap and the provided
256
     * bitmap,
257
     * writing the result in the current bitmap. The provided bitmap is not
258
     * modified.
259
     */
260
1
    Roaring64Map& operator-=(const Roaring64Map& r) {
261
4
        for (auto& map_entry : roarings) {
262
4
            if (r.roarings.count(map_entry.first) == 1) {
263
4
                map_entry.second -= r.roarings.at(map_entry.first);
264
4
            }
265
4
        }
266
1
        return *this;
267
1
    }
268
269
    /**
270
     * Compute the union between the current bitmap and the provided bitmap,
271
     * writing the result in the current bitmap. The provided bitmap is not
272
     * modified.
273
     *
274
     * See also the fastunion function to aggregate many bitmaps more quickly.
275
     */
276
12
    Roaring64Map& operator|=(const Roaring64Map& r) {
277
22
        for (const auto& map_entry : r.roarings) {
278
22
            if (roarings.count(map_entry.first) == 0) {
279
10
                roarings[map_entry.first] = map_entry.second;
280
10
                roarings[map_entry.first].setCopyOnWrite(copyOnWrite);
281
12
            } else {
282
12
                roarings[map_entry.first] |= map_entry.second;
283
12
            }
284
22
        }
285
12
        return *this;
286
12
    }
287
288
    /**
289
     * Compute the symmetric union between the current bitmap and the provided
290
     * bitmap,
291
     * writing the result in the current bitmap. The provided bitmap is not
292
     * modified.
293
     */
294
2
    Roaring64Map& operator^=(const Roaring64Map& r) {
295
5
        for (const auto& map_entry : r.roarings) {
296
5
            if (roarings.count(map_entry.first) == 0) {
297
0
                roarings[map_entry.first] = map_entry.second;
298
0
                roarings[map_entry.first].setCopyOnWrite(copyOnWrite);
299
5
            } else {
300
5
                roarings[map_entry.first] ^= map_entry.second;
301
5
            }
302
5
        }
303
2
        return *this;
304
2
    }
305
306
    /**
307
     * Exchange the content of this bitmap with another.
308
     */
309
1
    void swap(Roaring64Map& r) { roarings.swap(r.roarings); }
310
311
    /**
312
     * Get the cardinality of the bitmap (number of elements).
313
     * Throws std::length_error in the special case where the bitmap is full
314
     * (cardinality() == 2^64). Check isFull() before calling to avoid
315
     * exception.
316
     */
317
235
    uint64_t cardinality() const {
318
235
        if (isFull()) {
319
0
            throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
320
0
                                   "bitmap is full, cardinality is 2^64, "
321
0
                                   "unable to represent in a 64-bit integer");
322
0
        }
323
235
        return std::accumulate(roarings.cbegin(), roarings.cend(), (uint64_t)0,
324
235
                               [](uint64_t previous,
325
407
                                  const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
326
407
                                   return previous + map_entry.second.cardinality();
327
407
                               });
328
235
    }
329
    /**
330
     * Computes the size of the intersection between two bitmaps.
331
     *
332
     */
333
6
    uint64_t andCardinality(const Roaring64Map& r) const {
334
6
        uint64_t card = 0;
335
10
        for (const auto& map_entry : roarings) {
336
10
            if (r.roarings.count(map_entry.first) == 1) {
337
9
                card += map_entry.second.and_cardinality(r.roarings.at(map_entry.first));
338
9
            }
339
10
        }
340
6
        return card;
341
6
    }
342
343
    /**
344
     * Computes the size of the union between two bitmaps.
345
     *
346
     */
347
4
    uint64_t orCardinality(const Roaring64Map& r) const {
348
4
        uint64_t card = 0;
349
4
        for (const auto& map_entry : roarings) {
350
4
            if (r.roarings.count(map_entry.first) == 0) {
351
0
                card += map_entry.second.cardinality();
352
0
            }
353
4
        }
354
4
        for (const auto& map_entry : r.roarings) {
355
4
            if (roarings.count(map_entry.first) == 0) {
356
0
                card += map_entry.second.cardinality();
357
4
            } else {
358
4
                card += roarings.at(map_entry.first).or_cardinality(map_entry.second);
359
4
            }
360
4
        }
361
4
        return card;
362
4
    }
363
364
    /**
365
     * Computes the size of the difference (andnot) between two bitmaps.
366
     * r1.cardinality - (r1 & r2).cardinality
367
     */
368
1
    uint64_t andnotCardinality(const Roaring64Map& r) const {
369
1
        uint64_t card = 0;
370
4
        for (const auto& map_entry : roarings) {
371
4
            card += map_entry.second.cardinality();
372
4
            if (r.roarings.count(map_entry.first) == 1) {
373
4
                card -= r.roarings.at(map_entry.first).and_cardinality(map_entry.second);
374
4
            }
375
4
        }
376
1
        return card;
377
1
    }
378
379
    /**
380
     * Computes the size of the symmetric difference (andnot) between two
381
     * bitmaps.
382
     * r1.cardinality + r2.cardinality - 2 * (r1 & r2).cardinality
383
     */
384
1
    uint64_t xorCardinality(const Roaring64Map& r) const {
385
1
        uint64_t card = 0;
386
1
        for (const auto& map_entry : roarings) {
387
1
            card += map_entry.second.cardinality();
388
1
        }
389
1
        for (const auto& map_entry : r.roarings) {
390
1
            card += map_entry.second.cardinality();
391
1
            if (roarings.count(map_entry.first) == 1) {
392
1
                card -= 2 * roarings.at(map_entry.first).and_cardinality(map_entry.second);
393
1
            }
394
1
        }
395
1
        return card;
396
1
    }
397
398
    /**
399
    * Returns true if the bitmap is empty (cardinality is zero).
400
    */
401
1
    bool isEmpty() const {
402
1
        return std::all_of(roarings.cbegin(), roarings.cend(),
403
1
                           [](const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
404
0
                               return map_entry.second.isEmpty();
405
0
                           });
406
1
    }
407
408
    /**
409
    * Returns true if the bitmap is full (cardinality is max uint64_t + 1).
410
    */
411
235
    bool isFull() const {
412
        // only bother to check if map is fully saturated
413
        //
414
        // we put std::numeric_limits<>::max/min in parenthesis
415
        // to avoid a clash with the Windows.h header under Windows
416
235
        return roarings.size() == ((uint64_t)(std::numeric_limits<uint32_t>::max)()) + 1
417
235
                       ? std::all_of(roarings.cbegin(), roarings.cend(),
418
0
                                     [](const std::pair<const uint32_t, roaring::Roaring>&
419
0
                                                roaring_map_entry) {
420
                                         // roarings within map are saturated if cardinality
421
                                         // is uint32_t max + 1
422
0
                                         return roaring_map_entry.second.cardinality() ==
423
0
                                                ((uint64_t)(std::numeric_limits<uint32_t>::max)()) +
424
0
                                                        1;
425
0
                                     })
426
235
                       : false;
427
235
    }
428
429
    /**
430
     * Return true if the two bitmaps contain the same elements.
431
     */
432
14
    bool operator==(const Roaring64Map& r) const {
433
        // we cannot use operator == on the map because either side may contain
434
        // empty Roaring Bitmaps
435
14
        auto lhs_iter = roarings.cbegin();
436
14
        auto lhs_cend = roarings.cend();
437
14
        auto rhs_iter = r.roarings.cbegin();
438
14
        auto rhs_cend = r.roarings.cend();
439
27
        while (lhs_iter != lhs_cend && rhs_iter != rhs_cend) {
440
13
            auto lhs_key = lhs_iter->first;
441
13
            auto rhs_key = rhs_iter->first;
442
13
            const auto& lhs_map = lhs_iter->second;
443
13
            const auto& rhs_map = rhs_iter->second;
444
13
            if (lhs_map.isEmpty()) {
445
1
                ++lhs_iter;
446
1
                continue;
447
1
            }
448
12
            if (rhs_map.isEmpty()) {
449
2
                ++rhs_iter;
450
2
                continue;
451
2
            }
452
10
            if (!(lhs_key == rhs_key)) {
453
0
                return false;
454
0
            }
455
10
            if (!(lhs_map == rhs_map)) {
456
0
                return false;
457
0
            }
458
10
            ++lhs_iter;
459
10
            ++rhs_iter;
460
10
        }
461
16
        while (lhs_iter != lhs_cend) {
462
4
            if (!lhs_iter->second.isEmpty()) {
463
2
                return false;
464
2
            }
465
2
            ++lhs_iter;
466
2
        }
467
14
        while (rhs_iter != rhs_cend) {
468
3
            if (!rhs_iter->second.isEmpty()) {
469
1
                return false;
470
1
            }
471
2
            ++rhs_iter;
472
2
        }
473
11
        return true;
474
12
    }
475
476
    /** convert array and bitmap containers to run containers when it is more
477
     * efficient;
478
     * also convert from run containers when more space efficient.  Returns
479
     * true if the result has at least one run container.
480
     * Additional savings might be possible by calling shrinkToFit().
481
     */
482
17.3k
    bool runOptimize() {
483
17.3k
        return std::accumulate(
484
17.3k
                roarings.begin(), roarings.end(), true,
485
17.4k
                [](bool previous, std::pair<const uint32_t, roaring::Roaring>& map_entry) {
486
17.4k
                    return map_entry.second.runOptimize() && previous;
487
17.4k
                });
488
17.3k
    }
489
490
    /**
491
     * If needed, reallocate memory to shrink the memory usage. Returns
492
     * the number of bytes saved.
493
    */
494
17.3k
    size_t shrinkToFit() {
495
17.3k
        size_t savedBytes = 0;
496
17.3k
        auto iter = roarings.begin();
497
34.7k
        while (iter != roarings.cend()) {
498
17.4k
            if (iter->second.isEmpty()) {
499
                // empty Roarings are 84 bytes
500
1
                savedBytes += 88;
501
1
                iter = roarings.erase(iter);
502
17.4k
            } else {
503
17.4k
                savedBytes += iter->second.shrinkToFit();
504
17.4k
                iter++;
505
17.4k
            }
506
17.4k
        }
507
17.3k
        return savedBytes;
508
17.3k
    }
509
510
    /**
511
     * Iterate over the bitmap elements. The function iterator is called once
512
     * for all the values with ptr (can be nullptr) as the second parameter of each
513
     * call.
514
     *
515
     * roaring_iterator is simply a pointer to a function that returns bool
516
     * (true means that the iteration should continue while false means that it
517
     * should stop), and takes (uint32_t,void*) as inputs.
518
     */
519
2.77k
    void iterate(roaring::api::roaring_iterator64 iterator, void* ptr) const {
520
2.80k
        for (const auto& map_entry : roarings) {
521
2.80k
            bool should_continue = roaring_iterate64(&map_entry.second.roaring, iterator,
522
2.80k
                                                     uint64_t(map_entry.first) << 32, ptr);
523
2.80k
            if (!should_continue) {
524
1
                break;
525
1
            }
526
2.80k
        }
527
2.77k
    }
528
529
    /**
530
     * write a bitmap to a char buffer.
531
     * Returns how many bytes were written which should be getSizeInBytes().
532
     */
533
11.2k
    size_t write(char* buf, int serialize_version) const {
534
11.2k
        bool is_v1 = serialize_version == 1;
535
11.2k
        BitmapTypeCode::type type_bitmap32 =
536
11.2k
                is_v1 ? BitmapTypeCode::type::BITMAP32 : BitmapTypeCode::type::BITMAP32_V2;
537
11.2k
        BitmapTypeCode::type type_bitmap64 =
538
11.2k
                is_v1 ? BitmapTypeCode::type::BITMAP64 : BitmapTypeCode::type::BITMAP64_V2;
539
540
11.2k
        if (is32BitsEnough()) {
541
11.2k
            *(buf++) = type_bitmap32;
542
11.2k
            auto it = roarings.find(0);
543
11.2k
            if (it == roarings.end()) { // empty bitmap
544
2
                roaring::Roaring r;
545
2
                return r.write(buf, is_v1) + 1;
546
2
            }
547
11.2k
            return it->second.write(buf, is_v1) + 1;
548
11.2k
        }
549
550
7
        const char* orig = buf;
551
        // put type code
552
7
        *(buf++) = type_bitmap64;
553
        // push map size
554
7
        buf = (char*)encode_varint64((uint8_t*)buf, roarings.size());
555
7
        std::for_each(roarings.cbegin(), roarings.cend(),
556
16
                      [&buf, is_v1](const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
557
                          // push map key
558
16
                          encode_fixed32_le((uint8_t*)buf, map_entry.first);
559
16
                          buf += sizeof(uint32_t);
560
                          // push map value Roaring
561
16
                          buf += map_entry.second.write(buf, is_v1);
562
16
                      });
563
7
        return buf - orig;
564
11.2k
    }
565
566
    /**
567
     * read a bitmap from a serialized version.
568
     *
569
     * This function is unsafe in the sense that if you provide bad data,
570
     * many bytes could be read, possibly causing a buffer overflow. See also readSafe.
571
     */
572
6.12k
    static Roaring64Map read(const char* buf) {
573
6.12k
        Roaring64Map result;
574
575
6.12k
        bool is_v1 = BitmapTypeCode::BITMAP32 == *buf || BitmapTypeCode::BITMAP64 == *buf;
576
6.12k
        bool is_bitmap32 = BitmapTypeCode::BITMAP32 == *buf || BitmapTypeCode::BITMAP32_V2 == *buf;
577
6.12k
        bool is_bitmap64 = BitmapTypeCode::BITMAP64 == *buf || BitmapTypeCode::BITMAP64_V2 == *buf;
578
6.12k
        if (is_bitmap32) {
579
6.12k
            roaring::Roaring read = roaring::Roaring::read(buf + 1, is_v1);
580
6.12k
            result.emplaceOrInsert(0, std::move(read));
581
6.12k
            return result;
582
6.12k
        }
583
584
8
        DCHECK(is_bitmap64);
585
8
        buf++;
586
587
        // get map size (varint64 took 1~10 bytes)
588
8
        uint64_t map_size;
589
8
        buf = reinterpret_cast<const char*>(
590
8
                decode_varint64_ptr(reinterpret_cast<const uint8_t*>(buf),
591
8
                                    reinterpret_cast<const uint8_t*>(buf + 10), &map_size));
592
8
        DCHECK(buf != nullptr);
593
26
        for (uint64_t lcv = 0; lcv < map_size; lcv++) {
594
            // get map key
595
18
            uint32_t key = decode_fixed32_le(reinterpret_cast<const uint8_t*>(buf));
596
18
            buf += sizeof(uint32_t);
597
            // read map value Roaring
598
18
            roaring::Roaring read_var = roaring::Roaring::read(buf, is_v1);
599
            // forward buffer past the last Roaring Bitmap
600
18
            buf += read_var.getSizeInBytes(is_v1);
601
18
            result.emplaceOrInsert(key, std::move(read_var));
602
18
        }
603
8
        return result;
604
6.12k
    }
605
606
    /**
607
     * How many bytes are required to serialize this bitmap
608
     */
609
17.3k
    size_t getSizeInBytes(int serialize_version) const {
610
17.3k
        bool is_v1 = serialize_version == 1;
611
17.3k
        if (is32BitsEnough()) {
612
17.3k
            auto it = roarings.find(0);
613
17.3k
            if (it == roarings.end()) { // empty bitmap
614
2
                roaring::Roaring r;
615
2
                return r.getSizeInBytes(is_v1) + 1;
616
2
            }
617
17.3k
            return it->second.getSizeInBytes(is_v1) + 1;
618
17.3k
        }
619
        // start with type code, map size and size of keys for each map entry
620
13
        size_t init = 1 + varint_length(roarings.size()) + roarings.size() * sizeof(uint32_t);
621
13
        return std::accumulate(
622
13
                roarings.cbegin(), roarings.cend(), init,
623
28
                [=](size_t previous, const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
624
                    // add in bytes used by each Roaring
625
28
                    return previous + map_entry.second.getSizeInBytes(is_v1);
626
28
                });
627
17.3k
    }
628
629
    /**
630
     * remove all elements
631
     */
632
0
    void clear() { roarings.clear(); }
633
634
    /**
635
     * Return whether all elements can be represented in 32 bits
636
     */
637
28.6k
    bool is32BitsEnough() const { return maximum() <= std::numeric_limits<uint32_t>::max(); }
638
639
    /**
640
     * Computes the intersection between two bitmaps and returns new bitmap.
641
     * The current bitmap and the provided bitmap are unchanged.
642
     */
643
2
    Roaring64Map operator&(const Roaring64Map& o) const { return Roaring64Map(*this) &= o; }
644
645
    /**
646
     * Computes the difference between two bitmaps and returns new bitmap.
647
     * The current bitmap and the provided bitmap are unchanged.
648
     */
649
0
    Roaring64Map operator-(const Roaring64Map& o) const { return Roaring64Map(*this) -= o; }
650
651
    /**
652
     * Computes the union between two bitmaps and returns new bitmap.
653
     * The current bitmap and the provided bitmap are unchanged.
654
     */
655
1
    Roaring64Map operator|(const Roaring64Map& o) const { return Roaring64Map(*this) |= o; }
656
657
    /**
658
     * Computes the symmetric union between two bitmaps and returns new bitmap.
659
     * The current bitmap and the provided bitmap are unchanged.
660
     */
661
1
    Roaring64Map operator^(const Roaring64Map& o) const { return Roaring64Map(*this) ^= o; }
662
663
    /**
664
     * computes the logical or (union) between "n" bitmaps (referenced by a
665
     * pointer).
666
     */
667
5
    static Roaring64Map fastunion(size_t n, const Roaring64Map** inputs) {
668
5
        Roaring64Map ans;
669
        // not particularly fast
670
12
        for (size_t lcv = 0; lcv < n; ++lcv) {
671
7
            ans |= *(inputs[lcv]);
672
7
        }
673
5
        return ans;
674
5
    }
675
676
    friend class Roaring64MapSetBitForwardIterator;
677
    using const_iterator = Roaring64MapSetBitForwardIterator;
678
679
    /**
680
    * Returns an iterator that can be used to access the position of the
681
    * set bits. The running time complexity of a full scan is proportional to
682
    * the
683
    * number
684
    * of set bits: be aware that if you have long strings of 1s, this can be
685
    * very inefficient.
686
    *
687
    * It can be much faster to use the toArray method if you want to
688
    * retrieve the set bits.
689
    */
690
    const_iterator begin() const;
691
692
    /**
693
    * A bogus iterator that can be used together with begin()
694
    * for constructions such as for(auto i = b.begin();
695
    * i!=b.end(); ++i) {}
696
    */
697
    const_iterator end() const;
698
699
private:
700
    phmap::btree_map<uint32_t, roaring::Roaring> roarings {};
701
    bool copyOnWrite {false};
702
6.33M
    static uint32_t highBytes(const uint64_t in) { return uint32_t(in >> 32); }
703
3.16M
    static uint32_t lowBytes(const uint64_t in) { return uint32_t(in); }
704
31.6k
    static uint64_t uniteBytes(const uint32_t highBytes, const uint32_t lowBytes) {
705
31.6k
        return (uint64_t(highBytes) << 32) | uint64_t(lowBytes);
706
31.6k
    }
707
1
    void emplaceOrInsert(const uint32_t key, const roaring::Roaring& value) {
708
1
        roarings.emplace(std::make_pair(key, value));
709
1
    }
710
711
6.13k
    void emplaceOrInsert(const uint32_t key, roaring::Roaring&& value) {
712
6.13k
        roarings.emplace(key, value);
713
6.13k
    }
714
};
715
716
// Forked from https://github.com/RoaringBitmap/CRoaring/blob/v0.4.0/cpp/roaring64map.hh
717
// Used to go through the set bits. Not optimally fast, but convenient.
718
class Roaring64MapSetBitForwardIterator {
719
public:
720
    using type_of_iterator = Roaring64MapSetBitForwardIterator;
721
722
    /**
723
     * Provides the location of the set bit.
724
     */
725
2.96k
    uint64_t operator*() const {
726
2.96k
        return Roaring64Map::uniteBytes(map_iter->first, i.current_value);
727
2.96k
    }
728
729
975
    type_of_iterator& operator++() { // ++i, must returned inc. value
730
975
        if (i.has_value) {
731
975
            roaring_advance_uint32_iterator(&i);
732
975
        }
733
1.00k
        while (!i.has_value) {
734
34
            map_iter++;
735
34
            if (map_iter == map_end) return *this;
736
25
            roaring_init_iterator(&map_iter->second.roaring, &i);
737
25
        }
738
966
        return *this;
739
975
    }
740
741
1.80k
    type_of_iterator operator++(int) { // i++, must return orig. value
742
1.80k
        Roaring64MapSetBitForwardIterator orig(*this);
743
1.80k
        roaring_advance_uint32_iterator(&i);
744
1.81k
        while (!i.has_value) {
745
4
            map_iter++;
746
4
            if (map_iter == map_end) return orig;
747
2
            roaring_init_iterator(&map_iter->second.roaring, &i);
748
2
        }
749
1.80k
        return orig;
750
1.80k
    }
751
752
5
    bool move(const uint64_t& x) {
753
5
        map_iter = p.lower_bound(Roaring64Map::highBytes(x));
754
5
        if (map_iter != p.cend()) {
755
5
            roaring_init_iterator(&map_iter->second.roaring, &i);
756
5
            if (map_iter->first == Roaring64Map::highBytes(x)) {
757
5
                if (roaring_move_uint32_iterator_equalorlarger(&i, Roaring64Map::lowBytes(x)))
758
4
                    return true;
759
1
                map_iter++;
760
1
                if (map_iter == map_end) return false;
761
0
                roaring_init_iterator(&map_iter->second.roaring, &i);
762
0
            }
763
0
            return true;
764
5
        }
765
0
        return false;
766
5
    }
767
768
527
    bool operator==(const Roaring64MapSetBitForwardIterator& o) const {
769
527
        if (map_iter == map_end && o.map_iter == o.map_end) return true;
770
519
        if (o.map_iter == o.map_end) return false;
771
1
        return **this == *o;
772
519
    }
773
774
2.30k
    bool operator!=(const Roaring64MapSetBitForwardIterator& o) const {
775
2.30k
        if (map_iter == map_end && o.map_iter == o.map_end) return false;
776
2.29k
        if (o.map_iter == o.map_end) return true;
777
0
        return **this != *o;
778
2.29k
    }
779
5
    Roaring64MapSetBitForwardIterator& operator=(const Roaring64MapSetBitForwardIterator& r) {
780
5
        map_iter = r.map_iter;
781
5
        map_end = r.map_end;
782
5
        i = r.i;
783
5
        return *this;
784
5
    }
785
786
    Roaring64MapSetBitForwardIterator(const Roaring64MapSetBitForwardIterator& r) = default;
787
788
    Roaring64MapSetBitForwardIterator(const Roaring64Map& parent, bool exhausted = false)
789
2.29k
            : p(parent.roarings), map_end(parent.roarings.cend()) {
790
2.29k
        if (exhausted || parent.roarings.empty()) {
791
2.27k
            map_iter = parent.roarings.cend();
792
2.27k
        } else {
793
21
            map_iter = parent.roarings.cbegin();
794
21
            roaring_init_iterator(&map_iter->second.roaring, &i);
795
21
            while (!i.has_value) {
796
0
                map_iter++;
797
0
                if (map_iter == map_end) return;
798
0
                roaring_init_iterator(&map_iter->second.roaring, &i);
799
0
            }
800
21
        }
801
2.29k
    }
802
803
protected:
804
    const phmap::btree_map<uint32_t, roaring::Roaring>& p;
805
    phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator map_iter {};
806
    phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator map_end {};
807
    roaring::api::roaring_uint32_iterator_t i {};
808
};
809
810
18
inline Roaring64MapSetBitForwardIterator Roaring64Map::begin() const {
811
18
    return {*this};
812
18
}
813
814
2.26k
inline Roaring64MapSetBitForwardIterator Roaring64Map::end() const {
815
2.26k
    return {*this, true};
816
2.26k
}
817
818
} // namespace detail
819
820
// Represent the in-memory and on-disk structure of Doris's BITMAP data type.
821
// Optimize for the case where the bitmap contains 0 or 1 element which is common
822
// for streaming load scenario.
823
class BitmapValueIterator;
824
class BitmapValue {
825
public:
826
    template <typename T>
827
    using SetContainer = phmap::flat_hash_set<T>;
828
829
    // Construct an empty bitmap.
830
89.5k
    BitmapValue() : _sv(0), _bitmap(nullptr), _type(EMPTY), _is_shared(false) { _set.clear(); }
831
832
    // Construct a bitmap with one element.
833
    explicit BitmapValue(uint64_t value)
834
43
            : _sv(value), _bitmap(nullptr), _type(SINGLE), _is_shared(false) {}
835
836
    // Construct a bitmap from serialized data.
837
3.07k
    explicit BitmapValue(const char* src) : _is_shared(false) {
838
3.07k
        bool res = deserialize(src);
839
3.07k
        DCHECK(res);
840
3.07k
    }
841
842
10.8k
    BitmapValue(const BitmapValue& other) {
843
10.8k
        _type = other._type;
844
10.8k
        switch (other._type) {
845
36
        case EMPTY:
846
36
            break;
847
95
        case SINGLE:
848
95
            _sv = other._sv;
849
95
            break;
850
10.5k
        case BITMAP:
851
10.5k
            _bitmap = other._bitmap;
852
10.5k
            break;
853
190
        case SET:
854
190
            _set = other._set;
855
190
            break;
856
10.8k
        }
857
858
10.8k
        if (other._type == BITMAP) {
859
10.5k
            _is_shared = true;
860
            // should also set other's state to shared, so that other bitmap value will
861
            // create a new bitmap when it wants to modify it.
862
10.5k
            const_cast<BitmapValue&>(other)._is_shared = true;
863
10.5k
        }
864
10.8k
    }
865
866
12.5k
    BitmapValue(BitmapValue&& other) noexcept {
867
12.5k
        _type = other._type;
868
12.5k
        switch (other._type) {
869
19
        case EMPTY:
870
19
            break;
871
170
        case SINGLE:
872
170
            _sv = other._sv;
873
170
            break;
874
11.9k
        case BITMAP:
875
11.9k
            _bitmap = std::move(other._bitmap);
876
11.9k
            other._bitmap = nullptr;
877
11.9k
            break;
878
427
        case SET:
879
427
            _set = std::move(other._set);
880
427
            break;
881
12.5k
        }
882
12.5k
        _is_shared = other._is_shared;
883
12.5k
        other._type = EMPTY;
884
12.5k
        other._is_shared = false;
885
12.5k
    }
886
887
81
    BitmapValue& operator=(const BitmapValue& other) {
888
81
        if (this == &other) {
889
0
            return *this;
890
0
        }
891
81
        reset();
892
81
        _type = other._type;
893
81
        switch (other._type) {
894
13
        case EMPTY:
895
13
            break;
896
24
        case SINGLE:
897
24
            _sv = other._sv;
898
24
            break;
899
17
        case BITMAP:
900
17
            _bitmap = other._bitmap;
901
17
            break;
902
27
        case SET:
903
27
            _set = other._set;
904
27
            break;
905
81
        }
906
907
81
        if (other._type == BITMAP) {
908
17
            _is_shared = true;
909
            // should also set other's state to shared, so that other bitmap value will
910
            // create a new bitmap when it wants to modify it.
911
17
            const_cast<BitmapValue&>(other)._is_shared = true;
912
17
        }
913
81
        return *this;
914
81
    }
915
916
0
    static BitmapValue empty_bitmap() { return BitmapValue {}; }
917
918
18
    BitmapValue& operator=(BitmapValue&& other) noexcept {
919
18
        if (this == &other) {
920
0
            return *this;
921
0
        }
922
18
        reset();
923
924
18
        _type = other._type;
925
18
        switch (other._type) {
926
0
        case EMPTY:
927
0
            break;
928
12
        case SINGLE:
929
12
            _sv = other._sv;
930
12
            break;
931
4
        case BITMAP:
932
4
            _bitmap = std::move(other._bitmap);
933
4
            other._bitmap = nullptr;
934
4
            break;
935
2
        case SET:
936
2
            _set = std::move(other._set);
937
2
            break;
938
18
        }
939
18
        _is_shared = other._is_shared;
940
18
        return *this;
941
18
    }
942
943
    // Construct a bitmap from given elements.
944
59
    explicit BitmapValue(const std::vector<uint64_t>& bits) : _is_shared(false) {
945
59
        if (bits.size() == 0) {
946
0
            _type = EMPTY;
947
0
            return;
948
0
        }
949
950
59
        if (bits.size() == 1) {
951
1
            _type = SINGLE;
952
1
            _sv = bits[0];
953
1
            return;
954
1
        }
955
956
58
        if (!config::enable_set_in_bitmap_value || bits.size() > SET_TYPE_THRESHOLD) {
957
26
            _type = BITMAP;
958
26
            _prepare_bitmap_for_write();
959
26
            _bitmap->addMany(bits.size(), &bits[0]);
960
32
        } else {
961
32
            _type = SET;
962
126
            for (auto v : bits) {
963
126
                _set.insert(v);
964
126
            }
965
32
        }
966
58
    }
967
968
31
    BitmapTypeCode::type get_type_code() const {
969
31
        switch (_type) {
970
1
        case EMPTY:
971
1
            return BitmapTypeCode::EMPTY;
972
7
        case SINGLE:
973
7
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
974
4
                return BitmapTypeCode::SINGLE32;
975
4
            } else {
976
3
                return BitmapTypeCode::SINGLE64;
977
3
            }
978
9
        case SET:
979
9
            return BitmapTypeCode::SET;
980
14
        case BITMAP:
981
14
            bool is_v1 = (config::bitmap_serialize_version == 1);
982
14
            if (_bitmap->is32BitsEnough()) {
983
9
                return is_v1 ? BitmapTypeCode::type::BITMAP32 : BitmapTypeCode::type::BITMAP32_V2;
984
9
            } else {
985
5
                return is_v1 ? BitmapTypeCode::type::BITMAP64 : BitmapTypeCode::type::BITMAP64_V2;
986
5
            }
987
31
        }
988
0
        __builtin_unreachable();
989
31
    }
990
991
    template <typename T>
992
22
    void add_many(const T* values, const size_t count) {
993
22
        switch (_type) {
994
17
        case EMPTY:
995
17
            if (count == 1) {
996
0
                _sv = values[0];
997
0
                _type = SINGLE;
998
17
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
999
270
                for (size_t i = 0; i != count; ++i) {
1000
259
                    _set.insert(values[i]);
1001
259
                }
1002
11
                _type = SET;
1003
11
            } else {
1004
6
                _prepare_bitmap_for_write();
1005
6
                _bitmap->addMany(count, values);
1006
6
                _type = BITMAP;
1007
6
            }
1008
17
            break;
1009
0
        case SINGLE:
1010
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1011
0
                _set.insert(_sv);
1012
0
                for (size_t i = 0; i != count; ++i) {
1013
0
                    _set.insert(values[i]);
1014
0
                }
1015
0
                _type = SET;
1016
0
                _convert_to_bitmap_if_need();
1017
0
            } else {
1018
0
                _prepare_bitmap_for_write();
1019
0
                _bitmap->add(_sv);
1020
0
                _bitmap->addMany(count, values);
1021
0
                _type = BITMAP;
1022
0
            }
1023
0
            break;
1024
1
        case BITMAP:
1025
1
            _prepare_bitmap_for_write();
1026
1
            _bitmap->addMany(count, values);
1027
1
            break;
1028
4
        case SET:
1029
324
            for (size_t i = 0; i != count; ++i) {
1030
320
                _set.insert(values[i]);
1031
320
            }
1032
4
            _convert_to_bitmap_if_need();
1033
4
            break;
1034
22
        }
1035
22
    }
_ZN5doris11BitmapValue8add_manyIiEEvPKT_m
Line
Count
Source
992
5
    void add_many(const T* values, const size_t count) {
993
5
        switch (_type) {
994
5
        case EMPTY:
995
5
            if (count == 1) {
996
0
                _sv = values[0];
997
0
                _type = SINGLE;
998
5
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
999
78
                for (size_t i = 0; i != count; ++i) {
1000
73
                    _set.insert(values[i]);
1001
73
                }
1002
5
                _type = SET;
1003
5
            } else {
1004
0
                _prepare_bitmap_for_write();
1005
0
                _bitmap->addMany(count, values);
1006
0
                _type = BITMAP;
1007
0
            }
1008
5
            break;
1009
0
        case SINGLE:
1010
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1011
0
                _set.insert(_sv);
1012
0
                for (size_t i = 0; i != count; ++i) {
1013
0
                    _set.insert(values[i]);
1014
0
                }
1015
0
                _type = SET;
1016
0
                _convert_to_bitmap_if_need();
1017
0
            } else {
1018
0
                _prepare_bitmap_for_write();
1019
0
                _bitmap->add(_sv);
1020
0
                _bitmap->addMany(count, values);
1021
0
                _type = BITMAP;
1022
0
            }
1023
0
            break;
1024
0
        case BITMAP:
1025
0
            _prepare_bitmap_for_write();
1026
0
            _bitmap->addMany(count, values);
1027
0
            break;
1028
0
        case SET:
1029
0
            for (size_t i = 0; i != count; ++i) {
1030
0
                _set.insert(values[i]);
1031
0
            }
1032
0
            _convert_to_bitmap_if_need();
1033
0
            break;
1034
5
        }
1035
5
    }
_ZN5doris11BitmapValue8add_manyImEEvPKT_m
Line
Count
Source
992
17
    void add_many(const T* values, const size_t count) {
993
17
        switch (_type) {
994
12
        case EMPTY:
995
12
            if (count == 1) {
996
0
                _sv = values[0];
997
0
                _type = SINGLE;
998
12
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
999
192
                for (size_t i = 0; i != count; ++i) {
1000
186
                    _set.insert(values[i]);
1001
186
                }
1002
6
                _type = SET;
1003
6
            } else {
1004
6
                _prepare_bitmap_for_write();
1005
6
                _bitmap->addMany(count, values);
1006
6
                _type = BITMAP;
1007
6
            }
1008
12
            break;
1009
0
        case SINGLE:
1010
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1011
0
                _set.insert(_sv);
1012
0
                for (size_t i = 0; i != count; ++i) {
1013
0
                    _set.insert(values[i]);
1014
0
                }
1015
0
                _type = SET;
1016
0
                _convert_to_bitmap_if_need();
1017
0
            } else {
1018
0
                _prepare_bitmap_for_write();
1019
0
                _bitmap->add(_sv);
1020
0
                _bitmap->addMany(count, values);
1021
0
                _type = BITMAP;
1022
0
            }
1023
0
            break;
1024
1
        case BITMAP:
1025
1
            _prepare_bitmap_for_write();
1026
1
            _bitmap->addMany(count, values);
1027
1
            break;
1028
4
        case SET:
1029
324
            for (size_t i = 0; i != count; ++i) {
1030
320
                _set.insert(values[i]);
1031
320
            }
1032
4
            _convert_to_bitmap_if_need();
1033
4
            break;
1034
17
        }
1035
17
    }
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIhEEvPKT_m
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIaEEvPKT_m
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIsEEvPKT_m
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIlEEvPKT_m
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyInEEvPKT_m
1036
1037
3.15M
    void add(uint64_t value) {
1038
3.15M
        switch (_type) {
1039
6.34k
        case EMPTY:
1040
6.34k
            if (!config::enable_set_in_bitmap_value) {
1041
5.30k
                _sv = value;
1042
5.30k
                _type = SINGLE;
1043
5.30k
            } else {
1044
1.03k
                _set.insert(value);
1045
1.03k
                _type = SET;
1046
1.03k
            }
1047
6.34k
            break;
1048
5.26k
        case SINGLE:
1049
            //there is no need to convert the type if two variables are equal
1050
5.26k
            if (_sv == value) {
1051
2
                break;
1052
2
            }
1053
5.26k
            if (config::enable_set_in_bitmap_value) {
1054
4
                _set.insert(_sv);
1055
4
                _set.insert(value);
1056
4
                _type = SET;
1057
5.26k
            } else {
1058
5.26k
                _prepare_bitmap_for_write();
1059
5.26k
                _bitmap->add(_sv);
1060
5.26k
                _bitmap->add(value);
1061
5.26k
                _type = BITMAP;
1062
5.26k
            }
1063
5.26k
            break;
1064
3.11M
        case BITMAP:
1065
3.11M
            _prepare_bitmap_for_write();
1066
3.11M
            _bitmap->add(value);
1067
3.11M
            break;
1068
32.3k
        case SET:
1069
32.3k
            _set.insert(value);
1070
32.3k
            _convert_to_bitmap_if_need();
1071
32.3k
            break;
1072
3.15M
        }
1073
3.15M
    }
1074
1075
8
    void remove(uint64_t value) {
1076
8
        switch (_type) {
1077
1
        case EMPTY:
1078
1
            break;
1079
1
        case SINGLE:
1080
            //there is need to convert the type if two variables are equal
1081
1
            if (_sv == value) {
1082
0
                _type = EMPTY;
1083
0
            }
1084
1
            break;
1085
4
        case BITMAP:
1086
4
            _prepare_bitmap_for_write();
1087
4
            _bitmap->remove(value);
1088
4
            _convert_to_smaller_type();
1089
4
            break;
1090
2
        case SET:
1091
2
            _set.erase(value);
1092
2
            _convert_to_smaller_type();
1093
2
            break;
1094
8
        }
1095
8
    }
1096
1097
    // Compute the union between the current bitmap and the provided bitmap.
1098
16
    BitmapValue& operator-=(const BitmapValue& rhs) {
1099
16
        switch (rhs._type) {
1100
4
        case EMPTY:
1101
4
            break;
1102
4
        case SINGLE:
1103
4
            remove(rhs._sv);
1104
4
            break;
1105
4
        case BITMAP:
1106
4
            switch (_type) {
1107
1
            case EMPTY:
1108
1
                break;
1109
1
            case SINGLE:
1110
1
                if (rhs._bitmap->contains(_sv)) {
1111
0
                    _type = EMPTY;
1112
0
                }
1113
1
                break;
1114
1
            case BITMAP:
1115
1
                _prepare_bitmap_for_write();
1116
1
                *_bitmap -= *rhs._bitmap;
1117
1
                _convert_to_smaller_type();
1118
1
                break;
1119
1
            case SET: {
1120
32
                for (auto it = _set.begin(); it != _set.end();) {
1121
31
                    if (rhs.contains(*it)) {
1122
16
                        it = _set.erase(it);
1123
16
                    } else {
1124
15
                        ++it;
1125
15
                    }
1126
31
                }
1127
1
                _convert_to_smaller_type();
1128
1
                break;
1129
0
            }
1130
4
            }
1131
4
            break;
1132
4
        case SET: {
1133
4
            switch (_type) {
1134
1
            case EMPTY:
1135
1
                break;
1136
1
            case SINGLE:
1137
1
                if (rhs._set.contains(_sv)) {
1138
0
                    _type = EMPTY;
1139
0
                }
1140
1
                break;
1141
1
            case BITMAP:
1142
1
                _prepare_bitmap_for_write();
1143
31
                for (auto v : rhs._set) {
1144
31
                    if (_bitmap->contains(v)) {
1145
0
                        _bitmap->remove(v);
1146
0
                    }
1147
31
                }
1148
1
                _convert_to_smaller_type();
1149
1
                break;
1150
1
            case SET: {
1151
32
                for (auto it = _set.begin(); it != _set.end();) {
1152
31
                    if (rhs.contains(*it)) {
1153
14
                        it = _set.erase(it);
1154
17
                    } else {
1155
17
                        ++it;
1156
17
                    }
1157
31
                }
1158
1
                _convert_to_smaller_type();
1159
1
                break;
1160
0
            }
1161
4
            }
1162
4
        }
1163
16
        }
1164
16
        return *this;
1165
16
    }
1166
1167
    // Compute the union between the current bitmap and the provided bitmap.
1168
    // Possible type transitions are:
1169
    // EMPTY  -> SINGLE
1170
    // EMPTY  -> BITMAP
1171
    // SINGLE -> BITMAP
1172
51
    BitmapValue& operator|=(const BitmapValue& rhs) {
1173
51
        switch (rhs._type) {
1174
12
        case EMPTY:
1175
12
            break;
1176
17
        case SINGLE:
1177
17
            add(rhs._sv);
1178
17
            break;
1179
8
        case BITMAP:
1180
8
            switch (_type) {
1181
2
            case EMPTY:
1182
2
                _bitmap = rhs._bitmap;
1183
2
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1184
2
                _is_shared = true;
1185
2
                _type = BITMAP;
1186
2
                break;
1187
3
            case SINGLE:
1188
3
                _bitmap = rhs._bitmap;
1189
3
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1190
3
                _is_shared = true;
1191
3
                _prepare_bitmap_for_write();
1192
3
                _bitmap->add(_sv);
1193
3
                _type = BITMAP;
1194
3
                break;
1195
2
            case BITMAP:
1196
2
                _prepare_bitmap_for_write();
1197
2
                *_bitmap |= *rhs._bitmap;
1198
2
                break;
1199
1
            case SET: {
1200
1
                _prepare_bitmap_for_write();
1201
1
                *_bitmap = *rhs._bitmap;
1202
1203
31
                for (auto v : _set) {
1204
31
                    _bitmap->add(v);
1205
31
                }
1206
1
                _type = BITMAP;
1207
1
                break;
1208
0
            }
1209
8
            }
1210
8
            break;
1211
14
        case SET:
1212
14
            switch (_type) {
1213
1
            case EMPTY:
1214
1
                _set = rhs._set;
1215
1
                _type = SET;
1216
1
                break;
1217
1
            case SINGLE: {
1218
1
                if ((rhs._set.size() + rhs._set.contains(_sv) > SET_TYPE_THRESHOLD)) {
1219
0
                    _prepare_bitmap_for_write();
1220
0
                    _bitmap->add(_sv);
1221
0
                    for (auto v : rhs._set) {
1222
0
                        _bitmap->add(v);
1223
0
                    }
1224
0
                    _type = BITMAP;
1225
1
                } else {
1226
1
                    _set = rhs._set;
1227
1
                    _set.insert(_sv);
1228
1
                    _type = SET;
1229
1
                }
1230
1
                break;
1231
0
            }
1232
1
            case BITMAP:
1233
1
                _prepare_bitmap_for_write();
1234
31
                for (auto v : rhs._set) {
1235
31
                    _bitmap->add(v);
1236
31
                }
1237
1
                break;
1238
11
            case SET: {
1239
58
                for (auto v : rhs._set) {
1240
58
                    _set.insert(v);
1241
58
                }
1242
11
                _convert_to_bitmap_if_need();
1243
11
                break;
1244
0
            }
1245
14
            }
1246
14
            break;
1247
51
        }
1248
51
        return *this;
1249
51
    }
1250
1251
19
    BitmapValue& fastunion(const std::vector<const BitmapValue*>& values) {
1252
19
        std::vector<const detail::Roaring64Map*> bitmaps;
1253
19
        std::vector<uint64_t> single_values;
1254
19
        std::vector<const SetContainer<uint64_t>*> sets;
1255
25
        for (const auto* value : values) {
1256
25
            switch (value->_type) {
1257
5
            case EMPTY:
1258
5
                break;
1259
5
            case SINGLE:
1260
5
                single_values.push_back(value->_sv);
1261
5
                break;
1262
5
            case BITMAP:
1263
5
                bitmaps.push_back(value->_bitmap.get());
1264
5
                break;
1265
10
            case SET:
1266
10
                sets.push_back(&value->_set);
1267
10
                break;
1268
25
            }
1269
25
        }
1270
1271
19
        if (!bitmaps.empty()) {
1272
5
            _prepare_bitmap_for_write();
1273
5
            switch (_type) {
1274
2
            case EMPTY:
1275
2
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1276
2
                break;
1277
2
            case SINGLE:
1278
2
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1279
2
                _bitmap->add(_sv);
1280
2
                break;
1281
1
            case BITMAP:
1282
1
                for (const auto* bitmap : bitmaps) {
1283
1
                    *_bitmap |= *bitmap;
1284
1
                }
1285
1
                break;
1286
0
            case SET: {
1287
0
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1288
0
                for (auto v : _set) {
1289
0
                    _bitmap->add(v);
1290
0
                }
1291
0
                _set.clear();
1292
0
                break;
1293
0
            }
1294
5
            }
1295
5
            _type = BITMAP;
1296
5
        }
1297
1298
19
        if (!sets.empty()) {
1299
10
            for (auto& set : sets) {
1300
96
                for (auto v : *set) {
1301
96
                    _set.insert(v);
1302
96
                }
1303
10
            }
1304
8
            switch (_type) {
1305
7
            case EMPTY:
1306
7
                _type = SET;
1307
7
                break;
1308
0
            case SINGLE: {
1309
0
                _set.insert(_sv);
1310
0
                _type = SET;
1311
0
                break;
1312
0
            }
1313
0
            case BITMAP:
1314
0
                _prepare_bitmap_for_write();
1315
0
                for (auto v : _set) {
1316
0
                    _bitmap->add(v);
1317
0
                }
1318
0
                _type = BITMAP;
1319
0
                _set.clear();
1320
0
                break;
1321
1
            case SET: {
1322
1
                break;
1323
0
            }
1324
8
            }
1325
8
            if (_type == SET) {
1326
8
                _convert_to_bitmap_if_need();
1327
8
            }
1328
8
        }
1329
1330
19
        if (_type == EMPTY && single_values.size() == 1) {
1331
1
            if (config::enable_set_in_bitmap_value) {
1332
0
                _type = SET;
1333
0
                _set.insert(single_values[0]);
1334
1
            } else {
1335
1
                _sv = single_values[0];
1336
1
                _type = SINGLE;
1337
1
            }
1338
18
        } else if (!single_values.empty()) {
1339
4
            switch (_type) {
1340
0
            case EMPTY:
1341
1
            case SINGLE:
1342
1
                if (config::enable_set_in_bitmap_value) {
1343
0
                    _set.insert(single_values.cbegin(), single_values.cend());
1344
0
                    if (_type == SINGLE) {
1345
0
                        _set.insert(_sv);
1346
0
                    }
1347
0
                    _type = SET;
1348
0
                    _convert_to_bitmap_if_need();
1349
1
                } else {
1350
1
                    _prepare_bitmap_for_write();
1351
1
                    _bitmap->addMany(single_values.size(), single_values.data());
1352
1
                    if (_type == SINGLE) {
1353
1
                        _bitmap->add(_sv);
1354
1
                    }
1355
1
                    _type = BITMAP;
1356
1
                    _convert_to_smaller_type();
1357
1
                }
1358
1
                break;
1359
3
            case BITMAP: {
1360
3
                _prepare_bitmap_for_write();
1361
3
                _bitmap->addMany(single_values.size(), single_values.data());
1362
3
                break;
1363
0
            }
1364
0
            case SET:
1365
0
                _set.insert(single_values.cbegin(), single_values.cend());
1366
0
                _convert_to_bitmap_if_need();
1367
0
                break;
1368
4
            }
1369
4
        }
1370
1371
19
        return *this;
1372
19
    }
1373
1374
    // Compute the intersection between the current bitmap and the provided bitmap.
1375
    // Possible type transitions are:
1376
    // SINGLE -> EMPTY
1377
    // BITMAP -> EMPTY
1378
    // BITMAP -> SINGLE
1379
50
    BitmapValue& operator&=(const BitmapValue& rhs) {
1380
50
        switch (rhs._type) {
1381
14
        case EMPTY:
1382
14
            reset(); // empty & any = empty
1383
14
            break;
1384
10
        case SINGLE:
1385
10
            switch (_type) {
1386
2
            case EMPTY:
1387
2
                break;
1388
3
            case SINGLE:
1389
3
                if (_sv != rhs._sv) {
1390
2
                    _type = EMPTY;
1391
2
                }
1392
3
                break;
1393
4
            case BITMAP:
1394
4
                if (!_bitmap->contains(rhs._sv)) {
1395
1
                    _type = EMPTY;
1396
3
                } else {
1397
3
                    _type = SINGLE;
1398
3
                    _sv = rhs._sv;
1399
3
                }
1400
4
                _bitmap.reset();
1401
4
                _is_shared = false;
1402
4
                break;
1403
1
            case SET:
1404
1
                if (!_set.contains(rhs._sv)) {
1405
0
                    _type = EMPTY;
1406
1
                } else {
1407
1
                    _type = SINGLE;
1408
1
                    _sv = rhs._sv;
1409
1
                }
1410
1
                _set.clear();
1411
1
                break;
1412
10
            }
1413
10
            break;
1414
10
        case BITMAP:
1415
9
            switch (_type) {
1416
2
            case EMPTY:
1417
2
                break;
1418
3
            case SINGLE:
1419
3
                if (!rhs._bitmap->contains(_sv)) {
1420
2
                    _type = EMPTY;
1421
2
                }
1422
3
                break;
1423
3
            case BITMAP:
1424
3
                _prepare_bitmap_for_write();
1425
3
                *_bitmap &= *rhs._bitmap;
1426
3
                _convert_to_smaller_type();
1427
3
                break;
1428
1
            case SET:
1429
32
                for (auto it = _set.begin(); it != _set.end();) {
1430
31
                    if (!rhs._bitmap->contains(*it)) {
1431
15
                        it = _set.erase(it);
1432
16
                    } else {
1433
16
                        ++it;
1434
16
                    }
1435
31
                }
1436
1
                _convert_to_smaller_type();
1437
1
                break;
1438
9
            }
1439
9
            break;
1440
17
        case SET:
1441
17
            switch (_type) {
1442
1
            case EMPTY:
1443
1
                break;
1444
1
            case SINGLE:
1445
1
                if (!rhs._set.contains(_sv)) {
1446
1
                    _type = EMPTY;
1447
1
                }
1448
1
                break;
1449
1
            case BITMAP:
1450
1
                _prepare_bitmap_for_write();
1451
31
                for (auto v : rhs._set) {
1452
31
                    if (_bitmap->contains(v)) {
1453
0
                        _set.insert(v);
1454
0
                    }
1455
31
                }
1456
1
                _type = SET;
1457
1
                _bitmap.reset();
1458
1
                _is_shared = false;
1459
1
                _convert_to_smaller_type();
1460
1
                break;
1461
14
            case SET:
1462
79
                for (auto it = _set.begin(); it != _set.end();) {
1463
65
                    if (!rhs._set.contains(*it)) {
1464
37
                        it = _set.erase(it);
1465
37
                    } else {
1466
28
                        ++it;
1467
28
                    }
1468
65
                }
1469
14
                _convert_to_smaller_type();
1470
14
                break;
1471
17
            }
1472
17
            break;
1473
50
        }
1474
50
        return *this;
1475
50
    }
1476
1477
    // Compute the symmetric union between the current bitmap and the provided bitmap.
1478
    // Possible type transitions are:
1479
    // SINGLE -> EMPTY
1480
    // BITMAP -> EMPTY
1481
    // BITMAP -> SINGLE
1482
27
    BitmapValue& operator^=(const BitmapValue& rhs) {
1483
27
        switch (rhs._type) {
1484
8
        case EMPTY:
1485
8
            break;
1486
4
        case SINGLE:
1487
4
            switch (_type) {
1488
1
            case EMPTY:
1489
1
                add(rhs._sv);
1490
1
                break;
1491
1
            case SINGLE:
1492
1
                if (_sv == rhs._sv) {
1493
0
                    _type = EMPTY;
1494
1
                } else {
1495
1
                    add(rhs._sv);
1496
1
                }
1497
1
                break;
1498
1
            case BITMAP:
1499
1
                if (!_bitmap->contains(rhs._sv)) {
1500
0
                    add(rhs._sv);
1501
1
                } else {
1502
1
                    _prepare_bitmap_for_write();
1503
1
                    _bitmap->remove(rhs._sv);
1504
1
                }
1505
1
                break;
1506
1
            case SET:
1507
1
                if (!_set.contains(rhs._sv)) {
1508
0
                    _set.insert(rhs._sv);
1509
1
                } else {
1510
1
                    _set.erase(rhs._sv);
1511
1
                }
1512
1
                break;
1513
4
            }
1514
4
            break;
1515
4
        case BITMAP:
1516
4
            switch (_type) {
1517
1
            case EMPTY:
1518
1
                _bitmap = rhs._bitmap;
1519
1
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1520
1
                _is_shared = true;
1521
1
                _type = BITMAP;
1522
1
                break;
1523
1
            case SINGLE:
1524
1
                _bitmap = rhs._bitmap;
1525
1
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1526
1
                _is_shared = true;
1527
1
                _type = BITMAP;
1528
1
                _prepare_bitmap_for_write();
1529
1
                if (!rhs._bitmap->contains(_sv)) {
1530
1
                    _bitmap->add(_sv);
1531
1
                } else {
1532
0
                    _bitmap->remove(_sv);
1533
0
                }
1534
1
                break;
1535
1
            case BITMAP:
1536
1
                _prepare_bitmap_for_write();
1537
1
                *_bitmap ^= *rhs._bitmap;
1538
1
                _convert_to_smaller_type();
1539
1
                break;
1540
1
            case SET:
1541
1
                _prepare_bitmap_for_write();
1542
1
                *_bitmap = *rhs._bitmap;
1543
31
                for (auto v : _set) {
1544
31
                    if (_bitmap->contains(v)) {
1545
16
                        _bitmap->remove(v);
1546
16
                    } else {
1547
15
                        _bitmap->add(v);
1548
15
                    }
1549
31
                }
1550
1
                _type = BITMAP;
1551
1
                _convert_to_smaller_type();
1552
1
                break;
1553
4
            }
1554
4
            break;
1555
11
        case SET:
1556
11
            switch (_type) {
1557
1
            case EMPTY:
1558
1
                _set = rhs._set;
1559
1
                _type = SET;
1560
1
                break;
1561
1
            case SINGLE:
1562
1
                _set = rhs._set;
1563
1
                if (!rhs._set.contains(_sv)) {
1564
1
                    _set.insert(_sv);
1565
1
                } else {
1566
0
                    _set.erase(_sv);
1567
0
                }
1568
1
                _type = SET;
1569
1
                break;
1570
1
            case BITMAP:
1571
1
                _prepare_bitmap_for_write();
1572
31
                for (auto v : rhs._set) {
1573
31
                    if (_bitmap->contains(v)) {
1574
0
                        _bitmap->remove(v);
1575
31
                    } else {
1576
31
                        _bitmap->add(v);
1577
31
                    }
1578
31
                }
1579
1
                _convert_to_smaller_type();
1580
1
                break;
1581
8
            case SET:
1582
49
                for (auto v : rhs._set) {
1583
49
                    if (_set.contains(v)) {
1584
22
                        _set.erase(v);
1585
27
                    } else {
1586
27
                        _set.insert(v);
1587
27
                    }
1588
49
                }
1589
8
                _convert_to_smaller_type();
1590
8
                break;
1591
11
            }
1592
11
            break;
1593
27
        }
1594
27
        return *this;
1595
27
    }
1596
1597
    // check if value x is present
1598
9.19k
    bool contains(uint64_t x) const {
1599
9.19k
        switch (_type) {
1600
0
        case EMPTY:
1601
0
            return false;
1602
14
        case SINGLE:
1603
14
            return _sv == x;
1604
8.93k
        case BITMAP:
1605
8.93k
            return _bitmap->contains(x);
1606
238
        case SET:
1607
238
            return _set.contains(x);
1608
9.19k
        }
1609
0
        return false;
1610
9.19k
    }
1611
1612
    // true if contains a value that belongs to the range [left, right].
1613
    bool contains_any(uint64_t left, uint64_t right) const;
1614
1615
240k
    uint64_t cardinality() const {
1616
240k
        switch (_type) {
1617
240k
        case EMPTY:
1618
240k
            return 0;
1619
36
        case SINGLE:
1620
36
            return 1;
1621
202
        case BITMAP:
1622
202
            return _bitmap->cardinality();
1623
74
        case SET:
1624
74
            return _set.size();
1625
240k
        }
1626
0
        return 0;
1627
240k
    }
1628
1629
60.0k
    uint64_t and_cardinality(const BitmapValue& rhs) const {
1630
60.0k
        switch (rhs._type) {
1631
60.0k
        case EMPTY:
1632
60.0k
            return 0;
1633
4
        case SINGLE:
1634
4
            switch (_type) {
1635
1
            case EMPTY:
1636
1
                return 0;
1637
1
            case SINGLE:
1638
1
                return _sv == rhs._sv;
1639
1
            case BITMAP:
1640
1
                return _bitmap->contains(rhs._sv);
1641
1
            case SET:
1642
1
                return _set.contains(rhs._sv);
1643
4
            }
1644
0
            break;
1645
7
        case BITMAP:
1646
7
            switch (_type) {
1647
1
            case EMPTY:
1648
1
                return 0;
1649
1
            case SINGLE:
1650
1
                return rhs._bitmap->contains(_sv);
1651
4
            case BITMAP:
1652
4
                return _bitmap->andCardinality(*rhs._bitmap);
1653
1
            case SET: {
1654
1
                uint64_t cardinality = 0;
1655
31
                for (auto v : _set) {
1656
31
                    if (rhs._bitmap->contains(v)) {
1657
16
                        ++cardinality;
1658
16
                    }
1659
31
                }
1660
1
                return cardinality;
1661
0
            }
1662
7
            }
1663
0
            break;
1664
4
        case SET:
1665
4
            switch (_type) {
1666
1
            case EMPTY:
1667
1
                return 0;
1668
1
            case SINGLE:
1669
1
                return rhs._set.contains(_sv);
1670
1
            case BITMAP: {
1671
1
                uint64_t cardinality = 0;
1672
31
                for (auto v : rhs._set) {
1673
31
                    if (_bitmap->contains(v)) {
1674
0
                        ++cardinality;
1675
0
                    }
1676
31
                }
1677
1
                return cardinality;
1678
0
            }
1679
1
            case SET: {
1680
1
                uint64_t cardinality = 0;
1681
31
                for (auto v : _set) {
1682
31
                    if (rhs._set.contains(v)) {
1683
14
                        ++cardinality;
1684
14
                    }
1685
31
                }
1686
1
                return cardinality;
1687
0
            }
1688
4
            }
1689
60.0k
        }
1690
0
        return 0;
1691
60.0k
    }
1692
1693
60.0k
    uint64_t or_cardinality(const BitmapValue& rhs) const {
1694
60.0k
        switch (rhs._type) {
1695
59.9k
        case EMPTY:
1696
59.9k
            return cardinality();
1697
0
        case SINGLE:
1698
0
            switch (_type) {
1699
0
            case EMPTY:
1700
0
                return 1;
1701
0
            case SINGLE:
1702
0
                return 1 + (_sv != rhs._sv);
1703
0
            case BITMAP:
1704
0
                return cardinality() + !_bitmap->contains(rhs._sv);
1705
0
            case SET:
1706
0
                return _set.size() + !_set.contains(rhs._sv);
1707
0
            }
1708
0
            break;
1709
3
        case BITMAP:
1710
3
            switch (_type) {
1711
0
            case EMPTY:
1712
0
                return rhs.cardinality();
1713
0
            case SINGLE:
1714
0
                return rhs.cardinality() + !rhs._bitmap->contains(_sv);
1715
3
            case BITMAP:
1716
3
                return _bitmap->orCardinality(*rhs._bitmap);
1717
0
            case SET: {
1718
0
                uint64_t cardinality = rhs._bitmap->cardinality();
1719
0
                for (auto v : _set) {
1720
0
                    if (!rhs._bitmap->contains(v)) {
1721
0
                        ++cardinality;
1722
0
                    }
1723
0
                }
1724
0
                return cardinality;
1725
0
            }
1726
3
            }
1727
0
            break;
1728
0
        case SET:
1729
0
            switch (_type) {
1730
0
            case EMPTY:
1731
0
                return rhs.cardinality();
1732
0
            case SINGLE:
1733
0
                return rhs.cardinality() + !rhs._set.contains(_sv);
1734
0
            case BITMAP: {
1735
0
                uint64_t cardinality = _bitmap->cardinality();
1736
0
                for (auto v : rhs._set) {
1737
0
                    if (!_bitmap->contains(v)) {
1738
0
                        ++cardinality;
1739
0
                    }
1740
0
                }
1741
0
                return cardinality;
1742
0
            }
1743
0
            case SET: {
1744
0
                uint64_t cardinality = _set.size();
1745
0
                for (auto v : _set) {
1746
0
                    if (!rhs._set.contains(v)) {
1747
0
                        ++cardinality;
1748
0
                    }
1749
0
                }
1750
0
                return cardinality;
1751
0
            }
1752
0
            }
1753
60.0k
        }
1754
0
        return 0;
1755
60.0k
    }
1756
1757
24
    uint64_t andnot_cardinality(const BitmapValue& rhs) const {
1758
24
        switch (rhs._type) {
1759
8
        case EMPTY:
1760
8
            return cardinality();
1761
4
        case SINGLE:
1762
4
            switch (_type) {
1763
1
            case EMPTY:
1764
1
                return 0;
1765
1
            case SINGLE:
1766
1
                return 1 - (_sv == rhs._sv);
1767
1
            case BITMAP:
1768
1
                return cardinality() - _bitmap->contains(rhs._sv);
1769
1
            case SET:
1770
1
                return cardinality() - _set.contains(rhs._sv);
1771
4
            }
1772
0
            break;
1773
4
        case BITMAP:
1774
4
            switch (_type) {
1775
1
            case EMPTY:
1776
1
                return 0;
1777
1
            case SINGLE:
1778
1
                return !rhs._bitmap->contains(_sv);
1779
1
            case BITMAP:
1780
1
                return _bitmap->andnotCardinality(*rhs._bitmap);
1781
1
            case SET: {
1782
1
                uint64_t cardinality = _set.size();
1783
31
                for (auto v : _set) {
1784
31
                    if (rhs._bitmap->contains(v)) {
1785
16
                        cardinality -= 1;
1786
16
                    }
1787
31
                }
1788
1
                return cardinality;
1789
0
            }
1790
4
            }
1791
0
            break;
1792
8
        case SET:
1793
8
            switch (_type) {
1794
1
            case EMPTY:
1795
1
                return 0;
1796
1
            case SINGLE:
1797
1
                return !rhs._set.contains(_sv);
1798
1
            case BITMAP: {
1799
1
                uint64_t cardinality = _bitmap->cardinality();
1800
31
                for (auto v : rhs._set) {
1801
31
                    if (_bitmap->contains(v)) {
1802
0
                        cardinality -= 1;
1803
0
                    }
1804
31
                }
1805
1
                return cardinality;
1806
0
            }
1807
5
            case SET: {
1808
5
                uint64_t cardinality = _set.size();
1809
33
                for (auto v : rhs._set) {
1810
33
                    if (_set.contains(v)) {
1811
16
                        cardinality -= 1;
1812
16
                    }
1813
33
                }
1814
5
                return cardinality;
1815
0
            }
1816
8
            }
1817
24
        }
1818
0
        return 0;
1819
24
    }
1820
1821
    // Return how many bytes are required to serialize this bitmap.
1822
    // See BitmapTypeCode for the serialized format.
1823
137k
    size_t getSizeInBytes() {
1824
137k
        size_t res = 0;
1825
137k
        switch (_type) {
1826
120k
        case EMPTY:
1827
120k
            res = 1;
1828
120k
            break;
1829
27
        case SINGLE:
1830
27
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
1831
22
                res = 1 + sizeof(uint32_t);
1832
22
            } else {
1833
5
                res = 1 + sizeof(uint64_t);
1834
5
            }
1835
27
            break;
1836
17.3k
        case BITMAP:
1837
17.3k
            _prepare_bitmap_for_write();
1838
17.3k
            _bitmap->runOptimize();
1839
17.3k
            _bitmap->shrinkToFit();
1840
17.3k
            res = _bitmap->getSizeInBytes(config::bitmap_serialize_version);
1841
17.3k
            break;
1842
39
        case SET:
1843
            /// 1 byte for type, 1 byte for count
1844
39
            res = 2 + sizeof(uint64_t) * _set.size();
1845
39
            break;
1846
137k
        }
1847
137k
        return res;
1848
137k
    }
1849
1850
    // Serialize the bitmap value to dst, which should be large enough.
1851
    // Client should call `getSizeInBytes` first to get the serialized size.
1852
71.2k
    void write_to(char* dst) const {
1853
71.2k
        switch (_type) {
1854
60.0k
        case EMPTY:
1855
60.0k
            *dst = BitmapTypeCode::EMPTY;
1856
60.0k
            break;
1857
17
        case SINGLE:
1858
17
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
1859
14
                *(dst++) = BitmapTypeCode::SINGLE32;
1860
14
                encode_fixed32_le(reinterpret_cast<uint8_t*>(dst), static_cast<uint32_t>(_sv));
1861
14
            } else {
1862
3
                *(dst++) = BitmapTypeCode::SINGLE64;
1863
3
                encode_fixed64_le(reinterpret_cast<uint8_t*>(dst), _sv);
1864
3
            }
1865
17
            break;
1866
36
        case SET:
1867
36
            DCHECK(config::enable_set_in_bitmap_value);
1868
36
            *dst = BitmapTypeCode::SET;
1869
36
            ++dst;
1870
36
            *dst = static_cast<uint8_t>(_set.size());
1871
36
            ++dst;
1872
567
            for (auto v : _set) {
1873
567
                encode_fixed64_le(reinterpret_cast<uint8_t*>(dst), v);
1874
567
                dst += sizeof(uint64_t);
1875
567
            }
1876
36
            break;
1877
11.2k
        case BITMAP:
1878
11.2k
            _bitmap->write(dst, config::bitmap_serialize_version);
1879
11.2k
            break;
1880
71.2k
        }
1881
71.2k
    }
1882
1883
    // Deserialize a bitmap value from `src`.
1884
    // Return false if `src` begins with unknown type code, true otherwise.
1885
66.1k
    bool deserialize(const char* src) {
1886
66.1k
        switch (*src) {
1887
60.0k
        case BitmapTypeCode::EMPTY:
1888
60.0k
            _type = EMPTY;
1889
60.0k
            break;
1890
9
        case BitmapTypeCode::SINGLE32:
1891
9
            _type = SINGLE;
1892
9
            _sv = decode_fixed32_le(reinterpret_cast<const uint8_t*>(src + 1));
1893
9
            if (config::enable_set_in_bitmap_value) {
1894
2
                _type = SET;
1895
2
                _set.insert(_sv);
1896
2
            }
1897
9
            break;
1898
3
        case BitmapTypeCode::SINGLE64:
1899
3
            _type = SINGLE;
1900
3
            _sv = decode_fixed64_le(reinterpret_cast<const uint8_t*>(src + 1));
1901
3
            if (config::enable_set_in_bitmap_value) {
1902
1
                _type = SET;
1903
1
                _set.insert(_sv);
1904
1
            }
1905
3
            break;
1906
5.12k
        case BitmapTypeCode::BITMAP32:
1907
5.13k
        case BitmapTypeCode::BITMAP64:
1908
6.12k
        case BitmapTypeCode::BITMAP32_V2:
1909
6.12k
        case BitmapTypeCode::BITMAP64_V2:
1910
6.12k
            _type = BITMAP;
1911
6.12k
            _is_shared = false;
1912
6.12k
            _bitmap = std::make_shared<detail::Roaring64Map>(detail::Roaring64Map::read(src));
1913
6.12k
            break;
1914
35
        case BitmapTypeCode::SET: {
1915
35
            _type = SET;
1916
35
            ++src;
1917
35
            uint8_t count = *src;
1918
35
            ++src;
1919
35
            CHECK(count <= SET_TYPE_THRESHOLD) << "bitmap value with incorrect set count";
1920
598
            for (uint8_t i = 0; i != count; ++i, src += sizeof(uint64_t)) {
1921
563
                _set.insert(decode_fixed64_le(reinterpret_cast<const uint8_t*>(src)));
1922
563
            }
1923
35
            CHECK_EQ(count, _set.size()) << "bitmap value with incorrect set count";
1924
1925
35
            if (!config::enable_set_in_bitmap_value) {
1926
0
                _prepare_bitmap_for_write();
1927
0
                for (auto v : _set) {
1928
0
                    _bitmap->add(v);
1929
0
                }
1930
0
                _type = BITMAP;
1931
0
                _set.clear();
1932
0
            }
1933
35
            break;
1934
6.12k
        }
1935
1
        case BitmapTypeCode::SET_V2: {
1936
1
            uint32_t size = 0;
1937
1
            memcpy(&size, src + 1, sizeof(uint32_t));
1938
1
            src += sizeof(uint32_t) + 1;
1939
1940
1
            if (!config::enable_set_in_bitmap_value || size > SET_TYPE_THRESHOLD) {
1941
0
                _type = BITMAP;
1942
0
                _prepare_bitmap_for_write();
1943
1944
0
                for (int i = 0; i < size; ++i) {
1945
0
                    uint64_t key {};
1946
0
                    memcpy(&key, src, sizeof(uint64_t));
1947
0
                    _bitmap->add(key);
1948
0
                    src += sizeof(uint64_t);
1949
0
                }
1950
1
            } else {
1951
1
                _type = SET;
1952
1
                _set.reserve(size);
1953
1954
3
                for (int i = 0; i < size; ++i) {
1955
2
                    uint64_t key {};
1956
2
                    memcpy(&key, src, sizeof(uint64_t));
1957
2
                    _set.insert(key);
1958
2
                    src += sizeof(uint64_t);
1959
2
                }
1960
1
            }
1961
1
            break;
1962
6.12k
        }
1963
0
        default:
1964
0
            LOG(ERROR) << "BitmapTypeCode invalid, should between: " << BitmapTypeCode::EMPTY
1965
0
                       << " and " << BitmapTypeCode::BITMAP64 << " actual is "
1966
0
                       << static_cast<int>(*src);
1967
0
            return false;
1968
66.1k
        }
1969
66.1k
        return true;
1970
66.1k
    }
1971
1972
13
    int64_t minimum() const {
1973
13
        switch (_type) {
1974
2
        case SINGLE:
1975
2
            return _sv;
1976
4
        case BITMAP:
1977
4
            return _bitmap->minimum();
1978
5
        case SET:
1979
5
            return _min_in_set();
1980
2
        default:
1981
2
            return 0;
1982
13
        }
1983
13
    }
1984
1985
    // TODO limit string size to avoid OOM
1986
2.94k
    std::string to_string() const {
1987
2.94k
        std::stringstream ss;
1988
2.94k
        switch (_type) {
1989
8
        case EMPTY:
1990
8
            break;
1991
30
        case SINGLE:
1992
30
            ss << _sv;
1993
30
            break;
1994
2.77k
        case BITMAP: {
1995
2.77k
            struct IterCtx {
1996
2.77k
                std::stringstream* ss = nullptr;
1997
2.77k
                bool first = true;
1998
2.77k
            } iter_ctx;
1999
2.77k
            iter_ctx.ss = &ss;
2000
2001
2.77k
            _bitmap->iterate(
2002
1.08M
                    [](uint64_t value, void* c) -> bool {
2003
1.08M
                        auto ctx = reinterpret_cast<IterCtx*>(c);
2004
1.08M
                        if (ctx->first) {
2005
2.77k
                            ctx->first = false;
2006
1.08M
                        } else {
2007
1.08M
                            (*ctx->ss) << ",";
2008
1.08M
                        }
2009
1.08M
                        (*ctx->ss) << value;
2010
1.08M
                        return true;
2011
1.08M
                    },
2012
2.77k
                    &iter_ctx);
2013
2.77k
            break;
2014
0
        }
2015
135
        case SET: {
2016
135
            struct IterCtx {
2017
135
                std::stringstream* ss = nullptr;
2018
135
                bool first = true;
2019
135
            } iter_ctx;
2020
135
            iter_ctx.ss = &ss;
2021
2022
135
            std::vector<uint64_t> values(_set.begin(), _set.end());
2023
135
            std::sort(values.begin(), values.end());
2024
2025
2.12k
            for (auto v : values) {
2026
2.12k
                if (iter_ctx.first) {
2027
135
                    iter_ctx.first = false;
2028
1.99k
                } else {
2029
1.99k
                    (*iter_ctx.ss) << ",";
2030
1.99k
                }
2031
2.12k
                (*iter_ctx.ss) << v;
2032
2.12k
            }
2033
135
            break;
2034
0
        }
2035
2.94k
        }
2036
2.94k
        return ss.str();
2037
2.94k
    }
2038
2039
14
    int64_t maximum() const {
2040
14
        switch (_type) {
2041
2
        case SINGLE:
2042
2
            return _sv;
2043
5
        case BITMAP:
2044
5
            return _bitmap->maximum();
2045
5
        case SET:
2046
5
            return _max_in_set();
2047
2
        default:
2048
2
            return 0;
2049
14
        }
2050
14
    }
2051
2052
4
    uint64_t max(bool* empty) const {
2053
4
        return min_or_max(empty, [&]() { return maximum(); });
2054
4
    }
2055
2056
4
    uint64_t min(bool* empty) const {
2057
4
        return min_or_max(empty, [&]() { return minimum(); });
2058
4
    }
2059
2060
5
    uint64_t _min_in_set() const {
2061
5
        DCHECK_EQ(_type, SET);
2062
5
        return *std::min_element(_set.begin(), _set.end());
2063
5
    }
2064
2065
5
    uint64_t _max_in_set() const {
2066
5
        DCHECK_EQ(_type, SET);
2067
5
        return *std::max_element(_set.begin(), _set.end());
2068
5
    }
2069
2070
6
    bool empty() const { return _type == EMPTY; }
2071
2072
    /**
2073
     * Return new set with specified range (not include the range_end)
2074
     */
2075
    int64_t sub_range(const int64_t& range_start, const int64_t& range_end,
2076
7
                      BitmapValue* ret_bitmap) {
2077
7
        switch (_type) {
2078
1
        case EMPTY:
2079
1
            return 0;
2080
2
        case SINGLE: {
2081
            //only single value, so _sv must in [range_start,range_end)
2082
2
            if (range_start <= _sv && _sv < range_end) {
2083
1
                ret_bitmap->add(_sv);
2084
1
                return 1;
2085
1
            } else {
2086
1
                return 0;
2087
1
            }
2088
2
        }
2089
2
        case BITMAP: {
2090
2
            int64_t count = 0;
2091
146
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2092
145
                if (*it < range_start) {
2093
0
                    continue;
2094
0
                }
2095
145
                if (*it < range_end) {
2096
144
                    ret_bitmap->add(*it);
2097
144
                    ++count;
2098
144
                } else {
2099
1
                    break;
2100
1
                }
2101
145
            }
2102
2
            return count;
2103
2
        }
2104
2
        case SET: {
2105
2
            int64_t count = 0;
2106
2
            std::vector<uint64_t> values(_set.begin(), _set.end());
2107
2
            std::sort(values.begin(), values.end());
2108
6
            for (auto it = values.begin(); it != values.end(); ++it) {
2109
4
                if (*it < range_start || *it >= range_end) {
2110
1
                    continue;
2111
1
                }
2112
3
                ret_bitmap->add(*it);
2113
3
                ++count;
2114
3
            }
2115
2
            return count;
2116
2
        }
2117
7
        }
2118
0
        return 0;
2119
7
    }
2120
2121
    /**
2122
     * Return new set with specified start and limit
2123
     * @param range_start the start value for the range
2124
     * @param cardinality_limit the length of the subset
2125
     * @return the real count for subset, maybe less than cardinality_limit
2126
     */
2127
    int64_t sub_limit(const int64_t& range_start, const int64_t& cardinality_limit,
2128
9
                      BitmapValue* ret_bitmap) {
2129
9
        switch (_type) {
2130
1
        case EMPTY:
2131
1
            return 0;
2132
2
        case SINGLE: {
2133
            //only single value, so range_start must less than _sv
2134
2
            if (range_start > _sv) {
2135
0
                return 0;
2136
2
            } else {
2137
2
                ret_bitmap->add(_sv);
2138
2
                return 1;
2139
2
            }
2140
2
        }
2141
4
        case BITMAP: {
2142
4
            int64_t count = 0;
2143
31
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2144
30
                if (*it < range_start) {
2145
8
                    continue;
2146
8
                }
2147
22
                if (count < cardinality_limit) {
2148
19
                    ret_bitmap->add(*it);
2149
19
                    ++count;
2150
19
                } else {
2151
3
                    break;
2152
3
                }
2153
22
            }
2154
4
            return count;
2155
2
        }
2156
2
        case SET: {
2157
2
            int64_t count = 0;
2158
2159
2
            std::vector<uint64_t> values(_set.begin(), _set.end());
2160
2
            std::sort(values.begin(), values.end());
2161
6
            for (auto it = values.begin(); it != values.end(); ++it) {
2162
4
                if (*it < range_start) {
2163
0
                    continue;
2164
0
                }
2165
4
                if (count < cardinality_limit) {
2166
4
                    ret_bitmap->add(*it);
2167
4
                    ++count;
2168
4
                } else {
2169
0
                    break;
2170
0
                }
2171
4
            }
2172
2
            return count;
2173
2
        }
2174
9
        }
2175
0
        return 0;
2176
9
    }
2177
2178
    /**
2179
     * Returns the bitmap elements, starting from the offset position.
2180
     * The number of returned elements is limited by the cardinality_limit parameter.
2181
     * Analog of the substring string function, but for bitmap.
2182
     */
2183
8
    int64_t offset_limit(const int64_t& offset, const int64_t& limit, BitmapValue* ret_bitmap) {
2184
8
        switch (_type) {
2185
1
        case EMPTY:
2186
1
            return 0;
2187
2
        case SINGLE: {
2188
            //only single value, so offset must start 0
2189
2
            if (offset == 0) {
2190
1
                ret_bitmap->add(_sv);
2191
1
                return 1;
2192
1
            } else {
2193
1
                return 0;
2194
1
            }
2195
2
        }
2196
5
        default:
2197
5
            break;
2198
8
        }
2199
5
        if (_type == BITMAP) {
2200
3
            if (std::abs(offset) >= _bitmap->cardinality()) {
2201
0
                return 0;
2202
0
            }
2203
3
            int64_t abs_offset = offset;
2204
3
            if (offset < 0) {
2205
0
                abs_offset = _bitmap->cardinality() + offset;
2206
0
            }
2207
2208
3
            int64_t count = 0;
2209
3
            int64_t offset_count = 0;
2210
3
            auto it = _bitmap->begin();
2211
123
            for (; it != _bitmap->end() && offset_count < abs_offset; ++it) {
2212
120
                ++offset_count;
2213
120
            }
2214
33
            for (; it != _bitmap->end() && count < limit; ++it, ++count) {
2215
30
                ret_bitmap->add(*it);
2216
30
            }
2217
3
            return count;
2218
3
        } else {
2219
2
            if (std::abs(offset) > _set.size()) {
2220
0
                return 0;
2221
0
            }
2222
2223
2
            int64_t abs_offset = offset;
2224
2
            if (offset < 0) {
2225
0
                abs_offset = _set.size() + offset;
2226
0
            }
2227
2228
2
            std::vector<uint64_t> values(_set.begin(), _set.end());
2229
2
            std::sort(values.begin(), values.end());
2230
2231
2
            int64_t count = 0;
2232
2
            size_t index = 0;
2233
4
            for (auto v : values) {
2234
4
                if (index < abs_offset) {
2235
1
                    ++index;
2236
1
                    continue;
2237
1
                }
2238
3
                if (count == limit || index == values.size()) {
2239
0
                    break;
2240
0
                }
2241
3
                ++count;
2242
3
                ++index;
2243
3
                ret_bitmap->add(v);
2244
3
            }
2245
2
            return count;
2246
2
        }
2247
5
    }
2248
2249
    //for function bitmap_to_array
2250
3
    void to_array(vectorized::PaddedPODArray<int64_t>& data) const {
2251
3
        switch (_type) {
2252
0
        case EMPTY:
2253
0
            break;
2254
1
        case SINGLE: {
2255
1
            data.emplace_back(_sv);
2256
1
            break;
2257
0
        }
2258
1
        case BITMAP: {
2259
129
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2260
128
                data.emplace_back(*it);
2261
128
            }
2262
1
            break;
2263
0
        }
2264
1
        case SET: {
2265
1
            std::vector<uint64_t> values(_set.begin(), _set.end());
2266
1
            std::sort(values.begin(), values.end());
2267
31
            for (auto v : values) {
2268
31
                data.emplace_back(v);
2269
31
            }
2270
1
            break;
2271
0
        }
2272
3
        }
2273
3
    }
2274
2275
126
    void reset() {
2276
126
        _type = EMPTY;
2277
126
        _sv = 0;
2278
126
        _set.clear();
2279
126
        _is_shared = false;
2280
126
        _bitmap = nullptr;
2281
126
    }
2282
    // Implement an iterator for convenience
2283
    friend class BitmapValueIterator;
2284
    typedef BitmapValueIterator b_iterator;
2285
2286
    b_iterator begin() const;
2287
    b_iterator end() const;
2288
    b_iterator lower_bound(uint64_t val) const;
2289
2290
private:
2291
41
    void _convert_to_smaller_type() {
2292
41
        if (_type == BITMAP) {
2293
13
            uint64_t c = _bitmap->cardinality();
2294
13
            if (config::enable_set_in_bitmap_value && c > SET_TYPE_THRESHOLD) {
2295
7
                return;
2296
7
            } else if (c > 1) {
2297
4
                return;
2298
4
            }
2299
2
            if (c == 0) {
2300
1
                _type = EMPTY;
2301
1
            } else if (c == 1 && !config::enable_set_in_bitmap_value) {
2302
1
                _type = SINGLE;
2303
1
                _sv = _bitmap->minimum();
2304
1
            } else {
2305
0
                _type = SET;
2306
0
                for (auto v : *_bitmap) {
2307
0
                    _set.insert(v);
2308
0
                }
2309
0
            }
2310
2
            _bitmap.reset();
2311
28
        } else if (_type == SET) {
2312
28
            if (_set.size() == 1 && !config::enable_set_in_bitmap_value) {
2313
0
                _type = SINGLE;
2314
0
                _sv = *_set.begin();
2315
0
                _set.clear();
2316
0
            }
2317
28
        }
2318
41
    }
2319
2320
8
    uint64_t min_or_max(bool* empty, std::function<uint64_t()> func) const {
2321
8
        bool is_empty = false;
2322
8
        uint64_t result = 0;
2323
8
        switch (_type) {
2324
0
        case SINGLE:
2325
0
            result = _sv;
2326
0
            break;
2327
2
        case BITMAP:
2328
6
        case SET:
2329
6
            result = func();
2330
6
            break;
2331
2
        default:
2332
2
            is_empty = true;
2333
8
        }
2334
8
        if (empty) {
2335
8
            *empty = is_empty;
2336
8
        }
2337
8
        return result;
2338
8
    }
2339
2340
3.13M
    void _prepare_bitmap_for_write() {
2341
3.13M
        if (!_bitmap) {
2342
6.29k
            _bitmap = std::make_shared<detail::Roaring64Map>();
2343
6.29k
            _is_shared = false;
2344
6.29k
            return;
2345
6.29k
        }
2346
2347
3.13M
        if (!_is_shared) {
2348
            // the state is not shared, not need to check use count any more
2349
3.12M
            return;
2350
3.12M
        }
2351
2352
8.18k
        if (_bitmap.use_count() > 1) {
2353
1.01k
            auto new_one = std::make_shared<detail::Roaring64Map>();
2354
1.01k
            *new_one = *_bitmap;
2355
1.01k
            _bitmap = new_one;
2356
1.01k
        }
2357
8.18k
        _is_shared = false;
2358
8.18k
    }
2359
2360
32.3k
    void _convert_to_bitmap_if_need() {
2361
32.3k
        if (_type != SET || _set.size() <= SET_TYPE_THRESHOLD) {
2362
31.3k
            return;
2363
31.3k
        }
2364
999
        _prepare_bitmap_for_write();
2365
33.2k
        for (auto v : _set) {
2366
33.2k
            _bitmap->add(v);
2367
33.2k
        }
2368
999
        _type = BITMAP;
2369
999
        _set.clear();
2370
999
    }
2371
2372
    enum BitmapDataType {
2373
        EMPTY = 0,
2374
        SINGLE = 1, // single element
2375
        BITMAP = 2, // more than one elements
2376
        SET = 3     // elements count less or equal than 32
2377
    };
2378
    uint64_t _sv = 0;                              // store the single value when _type == SINGLE
2379
    std::shared_ptr<detail::Roaring64Map> _bitmap; // used when _type == BITMAP
2380
    SetContainer<uint64_t> _set;
2381
    BitmapDataType _type {EMPTY};
2382
    // Indicate whether the state is shared among multi BitmapValue object
2383
    bool _is_shared = true;
2384
    static constexpr uint64_t SET_TYPE_THRESHOLD = 32;
2385
};
2386
2387
// A simple implement of bitmap value iterator(Read only)
2388
// Usage:
2389
//  BitmapValueIterator iter = bitmap_value.begin();
2390
//  BitmapValueIterator end = bitmap_value.end();
2391
//  for (; iter != end(); ++iter) {
2392
//      uint64_t v = *iter;
2393
//      ... do something with "v" ...
2394
//  }
2395
class BitmapValueIterator {
2396
public:
2397
37
    BitmapValueIterator(const BitmapValue& bitmap, bool end = false) : _bitmap(bitmap), _end(end) {
2398
37
        switch (_bitmap._type) {
2399
8
        case BitmapValue::BitmapDataType::EMPTY:
2400
8
            _end = true;
2401
8
            break;
2402
8
        case BitmapValue::BitmapDataType::SINGLE:
2403
8
            _sv = _bitmap._sv;
2404
8
            break;
2405
13
        case BitmapValue::BitmapDataType::BITMAP:
2406
13
            _iter = new detail::Roaring64MapSetBitForwardIterator(*_bitmap._bitmap, _end);
2407
13
            break;
2408
8
        case BitmapValue::BitmapDataType::SET:
2409
8
            _set_iter = _end ? _bitmap._set.end() : _bitmap._set.begin();
2410
8
            break;
2411
0
        default:
2412
0
            CHECK(false) << _bitmap._type;
2413
37
        }
2414
37
    }
2415
2416
    BitmapValueIterator(const BitmapValueIterator& other)
2417
168
            : _bitmap(other._bitmap), _sv(other._sv), _end(other._end) {
2418
168
        _iter = other._iter ? new detail::Roaring64MapSetBitForwardIterator(*other._iter) : nullptr;
2419
168
        if (_bitmap._type == BitmapValue::BitmapDataType::SET) {
2420
33
            _set_iter = other._set_iter;
2421
33
        }
2422
168
    }
2423
2424
205
    ~BitmapValueIterator() {
2425
205
        if (_iter != nullptr) {
2426
143
            delete _iter;
2427
143
            _iter = nullptr;
2428
143
        }
2429
205
    }
2430
2431
646
    uint64_t operator*() const {
2432
646
        CHECK(!_end) << "should not get value of end iterator";
2433
646
        switch (_bitmap._type) {
2434
4
        case BitmapValue::BitmapDataType::SINGLE:
2435
4
            return _sv;
2436
517
        case BitmapValue::BitmapDataType::BITMAP:
2437
517
            return *(*_iter);
2438
125
        case BitmapValue::BitmapDataType::SET: {
2439
125
            return *_set_iter;
2440
0
        }
2441
0
        default:
2442
0
            CHECK(false) << _bitmap._type;
2443
646
        }
2444
0
        return 0;
2445
646
    }
2446
2447
486
    BitmapValueIterator& operator++() { // ++i, must returned inc. value
2448
486
        CHECK(!_end) << "should not forward when iterator ends";
2449
486
        switch (_bitmap._type) {
2450
3
        case BitmapValue::BitmapDataType::SINGLE:
2451
3
            _end = true;
2452
3
            break;
2453
389
        case BitmapValue::BitmapDataType::BITMAP:
2454
389
            ++(*_iter);
2455
389
            break;
2456
94
        case BitmapValue::BitmapDataType::SET:
2457
94
            ++_set_iter;
2458
94
            break;
2459
0
        default:
2460
0
            CHECK(false) << _bitmap._type;
2461
486
        }
2462
486
        return *this;
2463
486
    }
2464
2465
160
    BitmapValueIterator operator++(int) { // i++, must return orig. value
2466
160
        CHECK(!_end) << "should not forward when iterator ends";
2467
160
        BitmapValueIterator orig(*this);
2468
160
        switch (_bitmap._type) {
2469
1
        case BitmapValue::BitmapDataType::SINGLE:
2470
1
            _end = true;
2471
1
            break;
2472
128
        case BitmapValue::BitmapDataType::BITMAP:
2473
128
            ++(*_iter);
2474
128
            break;
2475
31
        case BitmapValue::BitmapDataType::SET:
2476
31
            ++_set_iter;
2477
31
            break;
2478
0
        default:
2479
0
            CHECK(false) << _bitmap._type;
2480
160
        }
2481
160
        return orig;
2482
160
    }
2483
2484
674
    bool operator==(const BitmapValueIterator& other) const {
2485
674
        if (_end && other._end) {
2486
13
            return true;
2487
13
        }
2488
2489
661
        switch (_bitmap._type) {
2490
0
        case BitmapValue::BitmapDataType::EMPTY:
2491
0
            return other._bitmap._type == BitmapValue::BitmapDataType::EMPTY;
2492
5
        case BitmapValue::BitmapDataType::SINGLE:
2493
5
            return _end == other._end && _sv == other._sv;
2494
524
        case BitmapValue::BitmapDataType::BITMAP:
2495
524
            return *_iter == *(other._iter);
2496
132
        case BitmapValue::BitmapDataType::SET:
2497
132
            return _set_iter == other._set_iter;
2498
0
        default:
2499
0
            CHECK(false) << _bitmap._type;
2500
661
        }
2501
0
        return false;
2502
661
    }
2503
2504
664
    bool operator!=(const BitmapValueIterator& other) const { return !(*this == other); }
2505
2506
    /**
2507
    * Move the iterator to the first value >= `val`.
2508
    */
2509
0
    BitmapValueIterator& move(uint64_t val) {
2510
0
        switch (_bitmap._type) {
2511
0
        case BitmapValue::BitmapDataType::SINGLE:
2512
0
            if (_sv < val) {
2513
0
                _end = true;
2514
0
            }
2515
0
            break;
2516
0
        case BitmapValue::BitmapDataType::BITMAP:
2517
0
            if (!_iter->move(val)) {
2518
0
                _end = true;
2519
0
            }
2520
0
            break;
2521
0
        case BitmapValue::BitmapDataType::SET: {
2522
0
            LOG(FATAL) << "BitmapValue with set do not support move";
2523
0
            break;
2524
0
        }
2525
0
        default:
2526
0
            break;
2527
0
        }
2528
0
        return *this;
2529
0
    }
2530
2531
private:
2532
    const BitmapValue& _bitmap;
2533
    detail::Roaring64MapSetBitForwardIterator* _iter = nullptr;
2534
    BitmapValue::SetContainer<uint64_t>::const_iterator _set_iter;
2535
    uint64_t _sv = 0;
2536
    bool _end = false;
2537
};
2538
2539
16
inline BitmapValueIterator BitmapValue::begin() const {
2540
16
    return {*this};
2541
16
}
2542
2543
21
inline BitmapValueIterator BitmapValue::end() const {
2544
21
    return {*this, true};
2545
21
}
2546
2547
0
inline BitmapValueIterator BitmapValue::lower_bound(uint64_t val) const {
2548
0
    return BitmapValueIterator(*this).move(val);
2549
0
}
2550
2551
7
inline bool BitmapValue::contains_any(uint64_t left, uint64_t right) const {
2552
7
    if (left > right) {
2553
0
        return false;
2554
0
    }
2555
2556
7
    if (_type == SET) {
2557
25
        for (auto v : _set) {
2558
25
            if (v >= left && v <= right) {
2559
4
                return true;
2560
4
            }
2561
25
        }
2562
3
        return false;
2563
7
    }
2564
0
    auto it = lower_bound(left);
2565
0
    return it != end() && *it <= right;
2566
7
}
2567
2568
} // namespace doris