Coverage Report

Created: 2026-01-21 02:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/util/bitmap_value.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <parallel_hashmap/btree.h>
21
#include <parallel_hashmap/phmap.h>
22
23
#include <algorithm>
24
#include <cstdarg>
25
#include <cstdint>
26
#include <cstdio>
27
#include <limits>
28
#include <map>
29
#include <memory>
30
#include <new>
31
#include <numeric>
32
#include <queue>
33
#include <roaring/roaring.hh>
34
#include <set>
35
#include <stdexcept>
36
#include <string>
37
#include <utility>
38
39
#include "common/config.h"
40
#include "common/exception.h"
41
#include "common/logging.h"
42
#include "util/coding.h"
43
#include "vec/common/pod_array.h"
44
#include "vec/common/pod_array_fwd.h"
45
namespace doris {
46
47
// serialized bitmap := TypeCode(1), Payload
48
// The format of payload depends on value of TypeCode which is defined below
49
struct BitmapTypeCode {
50
    enum type {
51
        // An empty bitmap. Payload is 0 byte.
52
        // added in 0.11
53
        EMPTY = 0,
54
        // A bitmap containing only one element that is in [0, UINT32_MAX]
55
        // Payload := UInt32LittleEndian(4 byte)
56
        // added in 0.11
57
        SINGLE32 = 1,
58
        // A bitmap whose maximum element is in [0, UINT32_MAX]
59
        // Payload := the standard RoaringBitmap format described by
60
        // https://github.com/RoaringBitmap/RoaringFormatSpec/
61
        // added in 0.11
62
        BITMAP32 = 2,
63
        // A bitmap containing only one element that is in (UINT32_MAX, UINT64_MAX]
64
        // Payload := UInt64LittleEndian(8 byte)
65
        // added in 0.12
66
        SINGLE64 = 3,
67
        // A bitmap whose maximum element is in (UINT32_MAX, UINT64_MAX].
68
        //
69
        // To support 64-bits elements, all elements with the same high 32 bits are stored in a
70
        // RoaringBitmap containing only the lower 32 bits. Thus we could use
71
        // map<uint32_t, RoaringBitmap> to represent bitmap of 64-bits ints.
72
        //
73
        // Since there is no standard format for 64-bits RoaringBitmap, we define our own as below
74
        // Payload := NumRoaring(vint64), { MapKey, MapValue }^NumRoaring
75
        // - MapKey := the shared high 32 bits in UInt32LittleEndian(4 byte)
76
        // - MapValue := the standard RoaringBitmap format
77
        //
78
        // added in 0.12
79
        BITMAP64 = 4,
80
        SET = 5, // V1
81
        SET_V2 = 10,
82
        BITMAP32_V2 = 12,
83
        BITMAP64_V2 = 13,
84
        TYPE_MAX
85
    };
86
1
    Status static inline validate(int bitmap_type) {
87
1
        if (UNLIKELY(bitmap_type < type::EMPTY || bitmap_type >= type::TYPE_MAX)) {
88
1
            std::string err_msg =
89
1
                    fmt::format("BitmapTypeCode invalid, should between: {} and {} actrual is {}",
90
1
                                BitmapTypeCode::EMPTY, BitmapTypeCode::BITMAP64, bitmap_type);
91
1
            LOG(ERROR) << err_msg;
92
1
            return Status::Corruption(err_msg);
93
1
        }
94
0
        return Status::OK();
95
1
    }
96
};
97
98
namespace detail {
99
100
class Roaring64MapSetBitForwardIterator;
101
102
// Forked from https://github.com/RoaringBitmap/CRoaring/blob/v0.2.60/cpp/roaring64map.hh
103
// What we change includes
104
// - a custom serialization format is used inside read()/write()/getSizeInBytes()
105
// - added clear() and is32BitsEnough()
106
class Roaring64Map {
107
public:
108
    /**
109
     * Create an empty bitmap
110
     */
111
18.4k
    Roaring64Map() = default;
112
113
    /**
114
     * Construct a bitmap from a list of 32-bit integer values.
115
     */
116
2
    Roaring64Map(size_t n, const uint32_t* data) { addMany(n, data); }
117
118
    /**
119
     * Construct a bitmap from a list of 64-bit integer values.
120
     */
121
1
    Roaring64Map(size_t n, const uint64_t* data) { addMany(n, data); }
122
123
    /**
124
     * Construct a 64-bit map from a 32-bit one
125
     */
126
1
    explicit Roaring64Map(const roaring::Roaring& r) { emplaceOrInsert(0, r); }
127
128
9
    Roaring64Map(const Roaring64Map& r) = default;
129
130
9.10k
    Roaring64Map(Roaring64Map&& r) = default;
131
132
    /**
133
     * Assignment operator.
134
     */
135
1.05k
    Roaring64Map& operator=(const Roaring64Map& r) {
136
1.05k
        roarings = r.roarings;
137
1.05k
        return *this;
138
1.05k
    }
139
140
    /**
141
     * Add value x
142
     *
143
     */
144
3
    void add(uint32_t x) {
145
3
        roarings[0].add(x);
146
3
        roarings[0].setCopyOnWrite(copyOnWrite);
147
3
    }
148
4.20M
    void add(uint64_t x) {
149
4.20M
        roarings[highBytes(x)].add(lowBytes(x));
150
4.20M
        roarings[highBytes(x)].setCopyOnWrite(copyOnWrite);
151
4.20M
    }
152
153
    template <typename T>
154
9
    void addMany(size_t n_args, const T* vals) {
155
9
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
156
8
            auto& roaring = roarings[0];
157
8
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
158
8
            roaring.setCopyOnWrite(copyOnWrite);
159
8
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
160
1
            auto& roaring = roarings[0];
161
1
            std::vector<uint32_t> values(n_args);
162
8
            for (size_t i = 0; i != n_args; ++i) {
163
7
                values[i] = uint32_t(vals[i]);
164
7
            }
165
1
            roaring.addMany(n_args, values.data());
166
1
            roaring.setCopyOnWrite(copyOnWrite);
167
1
        } else {
168
0
            for (size_t lcv = 0; lcv < n_args; lcv++) {
169
0
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
170
0
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
171
0
            }
172
0
        }
173
9
    }
_ZN5doris6detail12Roaring64Map7addManyIjEEvmPKT_
Line
Count
Source
154
8
    void addMany(size_t n_args, const T* vals) {
155
8
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
156
8
            auto& roaring = roarings[0];
157
8
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
158
8
            roaring.setCopyOnWrite(copyOnWrite);
159
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
160
            auto& roaring = roarings[0];
161
            std::vector<uint32_t> values(n_args);
162
            for (size_t i = 0; i != n_args; ++i) {
163
                values[i] = uint32_t(vals[i]);
164
            }
165
            roaring.addMany(n_args, values.data());
166
            roaring.setCopyOnWrite(copyOnWrite);
167
        } else {
168
            for (size_t lcv = 0; lcv < n_args; lcv++) {
169
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
170
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
171
            }
172
        }
173
8
    }
_ZN5doris6detail12Roaring64Map7addManyItEEvmPKT_
Line
Count
Source
154
1
    void addMany(size_t n_args, const T* vals) {
155
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
156
            auto& roaring = roarings[0];
157
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
158
            roaring.setCopyOnWrite(copyOnWrite);
159
1
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
160
1
            auto& roaring = roarings[0];
161
1
            std::vector<uint32_t> values(n_args);
162
8
            for (size_t i = 0; i != n_args; ++i) {
163
7
                values[i] = uint32_t(vals[i]);
164
7
            }
165
1
            roaring.addMany(n_args, values.data());
166
1
            roaring.setCopyOnWrite(copyOnWrite);
167
        } else {
168
            for (size_t lcv = 0; lcv < n_args; lcv++) {
169
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
170
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
171
            }
172
        }
173
1
    }
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIiEEvmPKT_
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIaEEvmPKT_
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIsEEvmPKT_
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIlEEvmPKT_
174
175
40
    void addMany(size_t n_args, const uint64_t* vals) {
176
1.25k
        for (size_t lcv = 0; lcv < n_args; lcv++) {
177
1.21k
            roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
178
1.21k
            roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
179
1.21k
        }
180
40
    }
181
182
    /**
183
     * Remove value x
184
     *
185
     */
186
4
    void remove(uint32_t x) { roarings[0].remove(x); }
187
23
    void remove(uint64_t x) {
188
23
        auto roaring_iter = roarings.find(highBytes(x));
189
23
        if (roaring_iter != roarings.cend()) {
190
23
            roaring_iter->second.remove(lowBytes(x));
191
23
        }
192
23
    }
193
    /**
194
     * Return the largest value (if not empty)
195
     *
196
     */
197
50.1k
    uint64_t maximum() const {
198
50.1k
        for (auto roaring_iter = roarings.crbegin(); roaring_iter != roarings.crend();
199
50.1k
             ++roaring_iter) {
200
50.1k
            if (!roaring_iter->second.isEmpty()) {
201
50.1k
                return uniteBytes(roaring_iter->first, roaring_iter->second.maximum());
202
50.1k
            }
203
50.1k
        }
204
        // we put std::numeric_limits<>::max/min in parenthesis
205
        // to avoid a clash with the Windows.h header under Windows
206
5
        return (std::numeric_limits<uint64_t>::min)();
207
50.1k
    }
208
209
    /**
210
     * Return the smallest value (if not empty)
211
     *
212
     */
213
7
    uint64_t minimum() const {
214
7
        for (auto roaring_iter = roarings.cbegin(); roaring_iter != roarings.cend();
215
7
             ++roaring_iter) {
216
6
            if (!roaring_iter->second.isEmpty()) {
217
6
                return uniteBytes(roaring_iter->first, roaring_iter->second.minimum());
218
6
            }
219
6
        }
220
        // we put std::numeric_limits<>::max/min in parenthesis
221
        // to avoid a clash with the Windows.h header under Windows
222
1
        return (std::numeric_limits<uint64_t>::max)();
223
7
    }
224
225
    /**
226
     * Check if value x is present
227
     */
228
11
    bool contains(uint32_t x) const {
229
11
        return roarings.count(0) == 0 ? false : roarings.at(0).contains(x);
230
11
    }
231
9.24k
    bool contains(uint64_t x) const {
232
9.24k
        return roarings.count(highBytes(x)) == 0 ? false
233
9.24k
                                                 : roarings.at(highBytes(x)).contains(lowBytes(x));
234
9.24k
    }
235
236
    /**
237
     * Compute the intersection between the current bitmap and the provided
238
     * bitmap,
239
     * writing the result in the current bitmap. The provided bitmap is not
240
     * modified.
241
     */
242
6
    Roaring64Map& operator&=(const Roaring64Map& r) {
243
11
        for (auto& map_entry : roarings) {
244
11
            if (r.roarings.count(map_entry.first) == 1) {
245
10
                map_entry.second &= r.roarings.at(map_entry.first);
246
10
            } else {
247
1
                map_entry.second = roaring::Roaring();
248
1
            }
249
11
        }
250
6
        return *this;
251
6
    }
252
253
    /**
254
     * Compute the difference between the current bitmap and the provided
255
     * bitmap,
256
     * writing the result in the current bitmap. The provided bitmap is not
257
     * modified.
258
     */
259
1
    Roaring64Map& operator-=(const Roaring64Map& r) {
260
4
        for (auto& map_entry : roarings) {
261
4
            if (r.roarings.count(map_entry.first) == 1) {
262
4
                map_entry.second -= r.roarings.at(map_entry.first);
263
4
            }
264
4
        }
265
1
        return *this;
266
1
    }
267
268
    /**
269
     * Compute the union between the current bitmap and the provided bitmap,
270
     * writing the result in the current bitmap. The provided bitmap is not
271
     * modified.
272
     *
273
     * See also the fastunion function to aggregate many bitmaps more quickly.
274
     */
275
5
    Roaring64Map& operator|=(const Roaring64Map& r) {
276
11
        for (const auto& map_entry : r.roarings) {
277
11
            if (roarings.count(map_entry.first) == 0) {
278
2
                roarings[map_entry.first] = map_entry.second;
279
2
                roarings[map_entry.first].setCopyOnWrite(copyOnWrite);
280
9
            } else {
281
9
                roarings[map_entry.first] |= map_entry.second;
282
9
            }
283
11
        }
284
5
        return *this;
285
5
    }
286
287
    /**
288
     * Compute the symmetric union between the current bitmap and the provided
289
     * bitmap,
290
     * writing the result in the current bitmap. The provided bitmap is not
291
     * modified.
292
     */
293
4
    Roaring64Map& operator^=(const Roaring64Map& r) {
294
7
        for (const auto& map_entry : r.roarings) {
295
7
            if (roarings.count(map_entry.first) == 0) {
296
0
                roarings[map_entry.first] = map_entry.second;
297
0
                roarings[map_entry.first].setCopyOnWrite(copyOnWrite);
298
7
            } else {
299
7
                roarings[map_entry.first] ^= map_entry.second;
300
7
            }
301
7
        }
302
4
        return *this;
303
4
    }
304
305
    /**
306
     * Exchange the content of this bitmap with another.
307
     */
308
1
    void swap(Roaring64Map& r) { roarings.swap(r.roarings); }
309
310
    /**
311
     * Get the cardinality of the bitmap (number of elements).
312
     * Throws std::length_error in the special case where the bitmap is full
313
     * (cardinality() == 2^64). Check isFull() before calling to avoid
314
     * exception.
315
     */
316
246
    uint64_t cardinality() const {
317
246
        if (isFull()) {
318
0
            throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
319
0
                                   "bitmap is full, cardinality is 2^64, "
320
0
                                   "unable to represent in a 64-bit integer");
321
0
        }
322
246
        return std::accumulate(roarings.cbegin(), roarings.cend(), (uint64_t)0,
323
246
                               [](uint64_t previous,
324
418
                                  const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
325
418
                                   return previous + map_entry.second.cardinality();
326
418
                               });
327
246
    }
328
    /**
329
     * Computes the size of the intersection between two bitmaps.
330
     *
331
     */
332
6
    uint64_t andCardinality(const Roaring64Map& r) const {
333
6
        uint64_t card = 0;
334
10
        for (const auto& map_entry : roarings) {
335
10
            if (r.roarings.count(map_entry.first) == 1) {
336
9
                card += map_entry.second.and_cardinality(r.roarings.at(map_entry.first));
337
9
            }
338
10
        }
339
6
        return card;
340
6
    }
341
342
    /**
343
     * Computes the size of the union between two bitmaps.
344
     *
345
     */
346
4
    uint64_t orCardinality(const Roaring64Map& r) const {
347
4
        uint64_t card = 0;
348
4
        for (const auto& map_entry : roarings) {
349
4
            if (r.roarings.count(map_entry.first) == 0) {
350
0
                card += map_entry.second.cardinality();
351
0
            }
352
4
        }
353
4
        for (const auto& map_entry : r.roarings) {
354
4
            if (roarings.count(map_entry.first) == 0) {
355
0
                card += map_entry.second.cardinality();
356
4
            } else {
357
4
                card += roarings.at(map_entry.first).or_cardinality(map_entry.second);
358
4
            }
359
4
        }
360
4
        return card;
361
4
    }
362
363
    /**
364
     * Computes the size of the difference (andnot) between two bitmaps.
365
     * r1.cardinality - (r1 & r2).cardinality
366
     */
367
1
    uint64_t andnotCardinality(const Roaring64Map& r) const {
368
1
        uint64_t card = 0;
369
4
        for (const auto& map_entry : roarings) {
370
4
            card += map_entry.second.cardinality();
371
4
            if (r.roarings.count(map_entry.first) == 1) {
372
4
                card -= r.roarings.at(map_entry.first).and_cardinality(map_entry.second);
373
4
            }
374
4
        }
375
1
        return card;
376
1
    }
377
378
    /**
379
     * Computes the size of the symmetric difference (andnot) between two
380
     * bitmaps.
381
     * r1.cardinality + r2.cardinality - 2 * (r1 & r2).cardinality
382
     */
383
1
    uint64_t xorCardinality(const Roaring64Map& r) const {
384
1
        uint64_t card = 0;
385
1
        for (const auto& map_entry : roarings) {
386
1
            card += map_entry.second.cardinality();
387
1
        }
388
1
        for (const auto& map_entry : r.roarings) {
389
1
            card += map_entry.second.cardinality();
390
1
            if (roarings.count(map_entry.first) == 1) {
391
1
                card -= 2 * roarings.at(map_entry.first).and_cardinality(map_entry.second);
392
1
            }
393
1
        }
394
1
        return card;
395
1
    }
396
397
    /**
398
    * Returns true if the bitmap is empty (cardinality is zero).
399
    */
400
1
    bool isEmpty() const {
401
1
        return std::all_of(roarings.cbegin(), roarings.cend(),
402
1
                           [](const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
403
0
                               return map_entry.second.isEmpty();
404
0
                           });
405
1
    }
406
407
    /**
408
    * Returns true if the bitmap is full (cardinality is max uint64_t + 1).
409
    */
410
246
    bool isFull() const {
411
        // only bother to check if map is fully saturated
412
        //
413
        // we put std::numeric_limits<>::max/min in parenthesis
414
        // to avoid a clash with the Windows.h header under Windows
415
246
        return roarings.size() == ((uint64_t)(std::numeric_limits<uint32_t>::max)()) + 1
416
246
                       ? std::all_of(roarings.cbegin(), roarings.cend(),
417
0
                                     [](const std::pair<const uint32_t, roaring::Roaring>&
418
0
                                                roaring_map_entry) {
419
                                         // roarings within map are saturated if cardinality
420
                                         // is uint32_t max + 1
421
0
                                         return roaring_map_entry.second.cardinality() ==
422
0
                                                ((uint64_t)(std::numeric_limits<uint32_t>::max)()) +
423
0
                                                        1;
424
0
                                     })
425
246
                       : false;
426
246
    }
427
428
    /**
429
     * Return true if the two bitmaps contain the same elements.
430
     */
431
14
    bool operator==(const Roaring64Map& r) const {
432
        // we cannot use operator == on the map because either side may contain
433
        // empty Roaring Bitmaps
434
14
        auto lhs_iter = roarings.cbegin();
435
14
        auto lhs_cend = roarings.cend();
436
14
        auto rhs_iter = r.roarings.cbegin();
437
14
        auto rhs_cend = r.roarings.cend();
438
27
        while (lhs_iter != lhs_cend && rhs_iter != rhs_cend) {
439
13
            auto lhs_key = lhs_iter->first;
440
13
            auto rhs_key = rhs_iter->first;
441
13
            const auto& lhs_map = lhs_iter->second;
442
13
            const auto& rhs_map = rhs_iter->second;
443
13
            if (lhs_map.isEmpty()) {
444
1
                ++lhs_iter;
445
1
                continue;
446
1
            }
447
12
            if (rhs_map.isEmpty()) {
448
2
                ++rhs_iter;
449
2
                continue;
450
2
            }
451
10
            if (!(lhs_key == rhs_key)) {
452
0
                return false;
453
0
            }
454
10
            if (!(lhs_map == rhs_map)) {
455
0
                return false;
456
0
            }
457
10
            ++lhs_iter;
458
10
            ++rhs_iter;
459
10
        }
460
16
        while (lhs_iter != lhs_cend) {
461
4
            if (!lhs_iter->second.isEmpty()) {
462
2
                return false;
463
2
            }
464
2
            ++lhs_iter;
465
2
        }
466
14
        while (rhs_iter != rhs_cend) {
467
3
            if (!rhs_iter->second.isEmpty()) {
468
1
                return false;
469
1
            }
470
2
            ++rhs_iter;
471
2
        }
472
11
        return true;
473
12
    }
474
475
    /** convert array and bitmap containers to run containers when it is more
476
     * efficient;
477
     * also convert from run containers when more space efficient.  Returns
478
     * true if the result has at least one run container.
479
     * Additional savings might be possible by calling shrinkToFit().
480
     */
481
29.1k
    bool runOptimize() {
482
29.1k
        return std::accumulate(
483
29.1k
                roarings.begin(), roarings.end(), true,
484
29.1k
                [](bool previous, std::pair<const uint32_t, roaring::Roaring>& map_entry) {
485
29.1k
                    return map_entry.second.runOptimize() && previous;
486
29.1k
                });
487
29.1k
    }
488
489
    /**
490
     * If needed, reallocate memory to shrink the memory usage. Returns
491
     * the number of bytes saved.
492
    */
493
29.1k
    size_t shrinkToFit() {
494
29.1k
        size_t savedBytes = 0;
495
29.1k
        auto iter = roarings.begin();
496
58.2k
        while (iter != roarings.cend()) {
497
29.1k
            if (iter->second.isEmpty()) {
498
                // empty Roarings are 84 bytes
499
1
                savedBytes += 88;
500
1
                iter = roarings.erase(iter);
501
29.1k
            } else {
502
29.1k
                savedBytes += iter->second.shrinkToFit();
503
29.1k
                iter++;
504
29.1k
            }
505
29.1k
        }
506
29.1k
        return savedBytes;
507
29.1k
    }
508
509
    /**
510
     * Iterate over the bitmap elements. The function iterator is called once
511
     * for all the values with ptr (can be nullptr) as the second parameter of each
512
     * call.
513
     *
514
     * roaring_iterator is simply a pointer to a function that returns bool
515
     * (true means that the iteration should continue while false means that it
516
     * should stop), and takes (uint32_t,void*) as inputs.
517
     */
518
4.01k
    void iterate(roaring::api::roaring_iterator64 iterator, void* ptr) const {
519
4.04k
        for (const auto& map_entry : roarings) {
520
4.04k
            bool should_continue = roaring_iterate64(&map_entry.second.roaring, iterator,
521
4.04k
                                                     uint64_t(map_entry.first) << 32, ptr);
522
4.04k
            if (!should_continue) {
523
1
                break;
524
1
            }
525
4.04k
        }
526
4.01k
    }
527
528
    /**
529
     * write a bitmap to a char buffer.
530
     * Returns how many bytes were written which should be getSizeInBytes().
531
     */
532
20.9k
    size_t write(char* buf, int serialize_version) const {
533
20.9k
        bool is_v1 = serialize_version == 1;
534
20.9k
        BitmapTypeCode::type type_bitmap32 =
535
20.9k
                is_v1 ? BitmapTypeCode::type::BITMAP32 : BitmapTypeCode::type::BITMAP32_V2;
536
20.9k
        BitmapTypeCode::type type_bitmap64 =
537
20.9k
                is_v1 ? BitmapTypeCode::type::BITMAP64 : BitmapTypeCode::type::BITMAP64_V2;
538
539
20.9k
        if (is32BitsEnough()) {
540
20.9k
            *(buf++) = type_bitmap32;
541
20.9k
            auto it = roarings.find(0);
542
20.9k
            if (it == roarings.end()) { // empty bitmap
543
2
                roaring::Roaring r;
544
2
                return r.write(buf, is_v1) + 1;
545
2
            }
546
20.9k
            return it->second.write(buf, is_v1) + 1;
547
20.9k
        }
548
549
7
        const char* orig = buf;
550
        // put type code
551
7
        *(buf++) = type_bitmap64;
552
        // push map size
553
7
        buf = (char*)encode_varint64((uint8_t*)buf, roarings.size());
554
7
        std::for_each(roarings.cbegin(), roarings.cend(),
555
16
                      [&buf, is_v1](const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
556
                          // push map key
557
16
                          encode_fixed32_le((uint8_t*)buf, map_entry.first);
558
16
                          buf += sizeof(uint32_t);
559
                          // push map value Roaring
560
16
                          buf += map_entry.second.write(buf, is_v1);
561
16
                      });
562
7
        return buf - orig;
563
20.9k
    }
564
565
    /**
566
     * read a bitmap from a serialized version.
567
     *
568
     * This function is unsafe in the sense that if you provide bad data,
569
     * many bytes could be read, possibly causing a buffer overflow. See also readSafe.
570
     */
571
9.11k
    static Roaring64Map read(const char* buf) {
572
9.11k
        Roaring64Map result;
573
574
9.11k
        bool is_v1 = BitmapTypeCode::BITMAP32 == *buf || BitmapTypeCode::BITMAP64 == *buf;
575
9.11k
        bool is_bitmap32 = BitmapTypeCode::BITMAP32 == *buf || BitmapTypeCode::BITMAP32_V2 == *buf;
576
9.11k
        bool is_bitmap64 = BitmapTypeCode::BITMAP64 == *buf || BitmapTypeCode::BITMAP64_V2 == *buf;
577
9.11k
        if (is_bitmap32) {
578
9.10k
            roaring::Roaring read = roaring::Roaring::read(buf + 1, is_v1);
579
9.10k
            result.emplaceOrInsert(0, std::move(read));
580
9.10k
            return result;
581
9.10k
        }
582
583
9.11k
        DCHECK(is_bitmap64);
584
8
        buf++;
585
586
        // get map size (varint64 took 1~10 bytes)
587
8
        uint64_t map_size;
588
8
        buf = reinterpret_cast<const char*>(
589
8
                decode_varint64_ptr(reinterpret_cast<const uint8_t*>(buf),
590
8
                                    reinterpret_cast<const uint8_t*>(buf + 10), &map_size));
591
8
        DCHECK(buf != nullptr);
592
26
        for (uint64_t lcv = 0; lcv < map_size; lcv++) {
593
            // get map key
594
18
            uint32_t key = decode_fixed32_le(reinterpret_cast<const uint8_t*>(buf));
595
18
            buf += sizeof(uint32_t);
596
            // read map value Roaring
597
18
            roaring::Roaring read_var = roaring::Roaring::read(buf, is_v1);
598
            // forward buffer past the last Roaring Bitmap
599
18
            buf += read_var.getSizeInBytes(is_v1);
600
18
            result.emplaceOrInsert(key, std::move(read_var));
601
18
        }
602
8
        return result;
603
9.11k
    }
604
605
    /**
606
     * How many bytes are required to serialize this bitmap
607
     */
608
29.1k
    size_t getSizeInBytes(int serialize_version) const {
609
29.1k
        bool is_v1 = serialize_version == 1;
610
29.1k
        if (is32BitsEnough()) {
611
29.1k
            auto it = roarings.find(0);
612
29.1k
            if (it == roarings.end()) { // empty bitmap
613
2
                roaring::Roaring r;
614
2
                return r.getSizeInBytes(is_v1) + 1;
615
2
            }
616
29.1k
            return it->second.getSizeInBytes(is_v1) + 1;
617
29.1k
        }
618
        // start with type code, map size and size of keys for each map entry
619
13
        size_t init = 1 + varint_length(roarings.size()) + roarings.size() * sizeof(uint32_t);
620
13
        return std::accumulate(
621
13
                roarings.cbegin(), roarings.cend(), init,
622
28
                [=](size_t previous, const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
623
                    // add in bytes used by each Roaring
624
28
                    return previous + map_entry.second.getSizeInBytes(is_v1);
625
28
                });
626
29.1k
    }
627
628
    /**
629
     * remove all elements
630
     */
631
0
    void clear() { roarings.clear(); }
632
633
    /**
634
     * Return whether all elements can be represented in 32 bits
635
     */
636
50.1k
    bool is32BitsEnough() const { return maximum() <= std::numeric_limits<uint32_t>::max(); }
637
638
    /**
639
     * Computes the intersection between two bitmaps and returns new bitmap.
640
     * The current bitmap and the provided bitmap are unchanged.
641
     */
642
2
    Roaring64Map operator&(const Roaring64Map& o) const { return Roaring64Map(*this) &= o; }
643
644
    /**
645
     * Computes the difference between two bitmaps and returns new bitmap.
646
     * The current bitmap and the provided bitmap are unchanged.
647
     */
648
0
    Roaring64Map operator-(const Roaring64Map& o) const { return Roaring64Map(*this) -= o; }
649
650
    /**
651
     * Computes the union between two bitmaps and returns new bitmap.
652
     * The current bitmap and the provided bitmap are unchanged.
653
     */
654
1
    Roaring64Map operator|(const Roaring64Map& o) const { return Roaring64Map(*this) |= o; }
655
656
    /**
657
     * Computes the symmetric union between two bitmaps and returns new bitmap.
658
     * The current bitmap and the provided bitmap are unchanged.
659
     */
660
1
    Roaring64Map operator^(const Roaring64Map& o) const { return Roaring64Map(*this) ^= o; }
661
662
    /**
663
     * computes the logical or (union) between "n" bitmaps (referenced by a
664
     * pointer).
665
     */
666
5
    static Roaring64Map fastunion(size_t n, const Roaring64Map** inputs) {
667
5
        struct pq_entry {
668
5
            phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator iterator;
669
5
            phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator end;
670
5
        };
671
672
5
        struct pq_comp {
673
9
            bool operator()(const pq_entry& lhs, const pq_entry& rhs) const {
674
9
                auto left_key = lhs.iterator->first;
675
9
                auto right_key = rhs.iterator->first;
676
                // We compare in the opposite direction than normal because priority
677
                // queues normally order from largest to smallest, but we want
678
                // smallest to largest.
679
9
                return left_key > right_key;
680
9
            }
681
5
        };
682
683
5
        std::priority_queue<pq_entry, std::vector<pq_entry>, pq_comp> pq;
684
685
12
        for (auto i = 0; i < n; ++i) {
686
7
            const auto& roaring = inputs[i]->roarings;
687
7
            if (roaring.begin() != roaring.end()) {
688
7
                pq.push({roaring.begin(), roaring.end()});
689
7
            }
690
7
        }
691
692
5
        std::vector<const roaring::api::roaring_bitmap_t*> group_bitmaps;
693
5
        Roaring64Map result;
694
13
        while (!pq.empty()) {
695
8
            auto group_key = pq.top().iterator->first;
696
8
            group_bitmaps.clear();
697
698
19
            while (!pq.empty()) {
699
14
                auto candidate_current_iter = pq.top().iterator;
700
14
                auto candidate_end_iter = pq.top().end;
701
702
14
                auto candidate_key = candidate_current_iter->first;
703
14
                const auto& candidate_bitmap = candidate_current_iter->second;
704
705
14
                if (candidate_key != group_key) {
706
3
                    break;
707
3
                }
708
709
11
                group_bitmaps.push_back(&candidate_bitmap.roaring);
710
11
                pq.pop();
711
11
                ++candidate_current_iter;
712
11
                if (candidate_current_iter != candidate_end_iter) {
713
4
                    pq.push({candidate_current_iter, candidate_end_iter});
714
4
                }
715
11
            }
716
8
            auto* inner_result = roaring::api::roaring_bitmap_or_many(group_bitmaps.size(),
717
8
                                                                      group_bitmaps.data());
718
8
            result.roarings.insert(result.roarings.end(),
719
8
                                   std::make_pair(group_key, roaring::Roaring(inner_result)));
720
8
        }
721
722
5
        return result;
723
5
    }
724
725
    friend class Roaring64MapSetBitForwardIterator;
726
    using const_iterator = Roaring64MapSetBitForwardIterator;
727
728
    /**
729
    * Returns an iterator that can be used to access the position of the
730
    * set bits. The running time complexity of a full scan is proportional to
731
    * the
732
    * number
733
    * of set bits: be aware that if you have long strings of 1s, this can be
734
    * very inefficient.
735
    *
736
    * It can be much faster to use the toArray method if you want to
737
    * retrieve the set bits.
738
    */
739
    const_iterator begin() const;
740
741
    /**
742
    * A bogus iterator that can be used together with begin()
743
    * for constructions such as for(auto i = b.begin();
744
    * i!=b.end(); ++i) {}
745
    */
746
    const_iterator end() const;
747
748
private:
749
    phmap::btree_map<uint32_t, roaring::Roaring> roarings {};
750
    bool copyOnWrite {false};
751
8.44M
    static uint32_t highBytes(const uint64_t in) { return uint32_t(in >> 32); }
752
4.22M
    static uint32_t lowBytes(const uint64_t in) { return uint32_t(in); }
753
53.0k
    static uint64_t uniteBytes(const uint32_t highBytes, const uint32_t lowBytes) {
754
53.0k
        return (uint64_t(highBytes) << 32) | uint64_t(lowBytes);
755
53.0k
    }
756
1
    void emplaceOrInsert(const uint32_t key, const roaring::Roaring& value) {
757
1
        roarings.emplace(std::make_pair(key, value));
758
1
    }
759
760
9.12k
    void emplaceOrInsert(const uint32_t key, roaring::Roaring&& value) {
761
9.12k
        roarings.emplace(key, value);
762
9.12k
    }
763
};
764
765
// Forked from https://github.com/RoaringBitmap/CRoaring/blob/v0.4.0/cpp/roaring64map.hh
766
// Used to go through the set bits. Not optimally fast, but convenient.
767
class Roaring64MapSetBitForwardIterator {
768
public:
769
    using type_of_iterator = Roaring64MapSetBitForwardIterator;
770
771
    /**
772
     * Provides the location of the set bit.
773
     */
774
2.96k
    uint64_t operator*() const {
775
2.96k
        return Roaring64Map::uniteBytes(map_iter->first, i.current_value);
776
2.96k
    }
777
778
975
    type_of_iterator& operator++() { // ++i, must returned inc. value
779
975
        if (i.has_value) {
780
975
            roaring_advance_uint32_iterator(&i);
781
975
        }
782
1.00k
        while (!i.has_value) {
783
34
            map_iter++;
784
34
            if (map_iter == map_end) return *this;
785
25
            roaring_init_iterator(&map_iter->second.roaring, &i);
786
25
        }
787
966
        return *this;
788
975
    }
789
790
1.80k
    type_of_iterator operator++(int) { // i++, must return orig. value
791
1.80k
        Roaring64MapSetBitForwardIterator orig(*this);
792
1.80k
        roaring_advance_uint32_iterator(&i);
793
1.81k
        while (!i.has_value) {
794
4
            map_iter++;
795
4
            if (map_iter == map_end) return orig;
796
2
            roaring_init_iterator(&map_iter->second.roaring, &i);
797
2
        }
798
1.80k
        return orig;
799
1.80k
    }
800
801
5
    bool move(const uint64_t& x) {
802
5
        map_iter = p.lower_bound(Roaring64Map::highBytes(x));
803
5
        if (map_iter != p.cend()) {
804
5
            roaring_init_iterator(&map_iter->second.roaring, &i);
805
5
            if (map_iter->first == Roaring64Map::highBytes(x)) {
806
5
                if (roaring_move_uint32_iterator_equalorlarger(&i, Roaring64Map::lowBytes(x)))
807
4
                    return true;
808
1
                map_iter++;
809
1
                if (map_iter == map_end) return false;
810
0
                roaring_init_iterator(&map_iter->second.roaring, &i);
811
0
            }
812
0
            return true;
813
5
        }
814
0
        return false;
815
5
    }
816
817
527
    bool operator==(const Roaring64MapSetBitForwardIterator& o) const {
818
527
        if (map_iter == map_end && o.map_iter == o.map_end) return true;
819
519
        if (o.map_iter == o.map_end) return false;
820
1
        return **this == *o;
821
519
    }
822
823
2.30k
    bool operator!=(const Roaring64MapSetBitForwardIterator& o) const {
824
2.30k
        if (map_iter == map_end && o.map_iter == o.map_end) return false;
825
2.29k
        if (o.map_iter == o.map_end) return true;
826
0
        return **this != *o;
827
2.29k
    }
828
5
    Roaring64MapSetBitForwardIterator& operator=(const Roaring64MapSetBitForwardIterator& r) {
829
5
        map_iter = r.map_iter;
830
5
        map_end = r.map_end;
831
5
        i = r.i;
832
5
        return *this;
833
5
    }
834
835
    Roaring64MapSetBitForwardIterator(const Roaring64MapSetBitForwardIterator& r) = default;
836
837
    Roaring64MapSetBitForwardIterator(const Roaring64Map& parent, bool exhausted = false)
838
2.29k
            : p(parent.roarings), map_end(parent.roarings.cend()) {
839
2.29k
        if (exhausted || parent.roarings.empty()) {
840
2.27k
            map_iter = parent.roarings.cend();
841
2.27k
        } else {
842
21
            map_iter = parent.roarings.cbegin();
843
21
            roaring_init_iterator(&map_iter->second.roaring, &i);
844
21
            while (!i.has_value) {
845
0
                map_iter++;
846
0
                if (map_iter == map_end) return;
847
0
                roaring_init_iterator(&map_iter->second.roaring, &i);
848
0
            }
849
21
        }
850
2.29k
    }
851
852
protected:
853
    const phmap::btree_map<uint32_t, roaring::Roaring>& p;
854
    phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator map_iter {};
855
    phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator map_end {};
856
    roaring::api::roaring_uint32_iterator_t i {};
857
};
858
859
18
inline Roaring64MapSetBitForwardIterator Roaring64Map::begin() const {
860
18
    return {*this};
861
18
}
862
863
2.26k
inline Roaring64MapSetBitForwardIterator Roaring64Map::end() const {
864
2.26k
    return {*this, true};
865
2.26k
}
866
867
} // namespace detail
868
869
// Represent the in-memory and on-disk structure of Doris's BITMAP data type.
870
// Optimize for the case where the bitmap contains 0 or 1 element which is common
871
// for streaming load scenario.
872
class BitmapValueIterator;
873
class BitmapValue {
874
public:
875
    template <typename T>
876
    using SetContainer = phmap::flat_hash_set<T>;
877
878
    // Construct an empty bitmap.
879
96.9k
    BitmapValue() : _sv(0), _bitmap(nullptr), _type(EMPTY), _is_shared(false) { _set.clear(); }
880
881
    // Construct a bitmap with one element.
882
    explicit BitmapValue(uint64_t value)
883
74
            : _sv(value), _bitmap(nullptr), _type(SINGLE), _is_shared(false) {}
884
885
    // Construct a bitmap from serialized data.
886
3.08k
    explicit BitmapValue(const char* src) : _is_shared(false) {
887
3.08k
        bool res = deserialize(src);
888
3.08k
        DCHECK(res);
889
3.08k
    }
890
891
    // !FIXME: We should rethink the logic here
892
15.2k
    BitmapValue(const BitmapValue& other) {
893
15.2k
        _type = other._type;
894
15.2k
        switch (other._type) {
895
161
        case EMPTY:
896
161
            break;
897
159
        case SINGLE:
898
159
            _sv = other._sv;
899
159
            break;
900
12.5k
        case BITMAP:
901
12.5k
            _bitmap = other._bitmap;
902
12.5k
            break;
903
2.36k
        case SET:
904
2.36k
            _set = other._set;
905
2.36k
            break;
906
15.2k
        }
907
908
15.2k
        if (other._type == BITMAP) {
909
12.5k
            _is_shared = true;
910
            // should also set other's state to shared, so that other bitmap value will
911
            // create a new bitmap when it wants to modify it.
912
12.5k
            other._is_shared = true;
913
12.5k
        }
914
15.2k
    }
915
916
    // !FIXME: We should rethink the logic here
917
19.2k
    BitmapValue(BitmapValue&& other) noexcept {
918
19.2k
        _type = other._type;
919
19.2k
        switch (other._type) {
920
90
        case EMPTY:
921
90
            break;
922
247
        case SINGLE:
923
247
            _sv = other._sv;
924
247
            break;
925
15.9k
        case BITMAP:
926
15.9k
            _bitmap = std::move(other._bitmap);
927
15.9k
            other._bitmap = nullptr;
928
15.9k
            break;
929
3.04k
        case SET:
930
3.04k
            _set = std::move(other._set);
931
3.04k
            break;
932
19.2k
        }
933
19.2k
        _is_shared = other._is_shared;
934
19.2k
        other._type = EMPTY;
935
19.2k
        other._is_shared = false;
936
19.2k
    }
937
938
    // !FIXME: We should rethink the logic here
939
110
    BitmapValue& operator=(const BitmapValue& other) {
940
110
        if (this == &other) {
941
5
            return *this;
942
5
        }
943
105
        reset();
944
105
        _type = other._type;
945
105
        switch (other._type) {
946
20
        case EMPTY:
947
20
            break;
948
32
        case SINGLE:
949
32
            _sv = other._sv;
950
32
            break;
951
26
        case BITMAP:
952
26
            _bitmap = other._bitmap;
953
26
            break;
954
27
        case SET:
955
27
            _set = other._set;
956
27
            break;
957
105
        }
958
959
105
        if (other._type == BITMAP) {
960
26
            _is_shared = true;
961
            // should also set other's state to shared, so that other bitmap value will
962
            // create a new bitmap when it wants to modify it.
963
26
            other._is_shared = true;
964
26
        }
965
105
        return *this;
966
105
    }
967
968
21
    static BitmapValue empty_bitmap() { return BitmapValue {}; }
969
970
25
    BitmapValue& operator=(BitmapValue&& other) noexcept {
971
25
        if (this == &other) {
972
0
            return *this;
973
0
        }
974
25
        reset();
975
976
25
        _type = other._type;
977
25
        switch (other._type) {
978
1
        case EMPTY:
979
1
            break;
980
14
        case SINGLE:
981
14
            _sv = other._sv;
982
14
            break;
983
4
        case BITMAP:
984
4
            _bitmap = std::move(other._bitmap);
985
4
            other._bitmap = nullptr;
986
4
            break;
987
6
        case SET:
988
6
            _set = std::move(other._set);
989
6
            break;
990
25
        }
991
25
        _is_shared = other._is_shared;
992
25
        return *this;
993
25
    }
994
995
    // Construct a bitmap from given elements.
996
62
    explicit BitmapValue(const std::vector<uint64_t>& bits) : _is_shared(false) {
997
62
        if (bits.size() == 0) {
998
0
            _type = EMPTY;
999
0
            return;
1000
0
        }
1001
1002
62
        if (bits.size() == 1) {
1003
1
            _type = SINGLE;
1004
1
            _sv = bits[0];
1005
1
            return;
1006
1
        }
1007
1008
61
        if (!config::enable_set_in_bitmap_value || bits.size() > SET_TYPE_THRESHOLD) {
1009
26
            _type = BITMAP;
1010
26
            _prepare_bitmap_for_write();
1011
26
            _bitmap->addMany(bits.size(), &bits[0]);
1012
35
        } else {
1013
35
            _type = SET;
1014
137
            for (auto v : bits) {
1015
137
                _set.insert(v);
1016
137
            }
1017
35
        }
1018
61
    }
1019
1020
31
    BitmapTypeCode::type get_type_code() const {
1021
31
        switch (_type) {
1022
1
        case EMPTY:
1023
1
            return BitmapTypeCode::EMPTY;
1024
7
        case SINGLE:
1025
7
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
1026
4
                return BitmapTypeCode::SINGLE32;
1027
4
            } else {
1028
3
                return BitmapTypeCode::SINGLE64;
1029
3
            }
1030
9
        case SET:
1031
9
            return BitmapTypeCode::SET;
1032
14
        case BITMAP:
1033
14
            bool is_v1 = (config::bitmap_serialize_version == 1);
1034
14
            if (_bitmap->is32BitsEnough()) {
1035
9
                return is_v1 ? BitmapTypeCode::type::BITMAP32 : BitmapTypeCode::type::BITMAP32_V2;
1036
9
            } else {
1037
5
                return is_v1 ? BitmapTypeCode::type::BITMAP64 : BitmapTypeCode::type::BITMAP64_V2;
1038
5
            }
1039
31
        }
1040
0
        __builtin_unreachable();
1041
31
    }
1042
1043
    template <typename T>
1044
20
    void add_many(const T* values, const size_t count) {
1045
20
        switch (_type) {
1046
15
        case EMPTY:
1047
15
            if (count == 1) {
1048
0
                _sv = values[0];
1049
0
                _type = SINGLE;
1050
15
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1051
240
                for (size_t i = 0; i != count; ++i) {
1052
231
                    _set.insert(values[i]);
1053
231
                }
1054
9
                _type = SET;
1055
9
            } else {
1056
6
                _prepare_bitmap_for_write();
1057
6
                _bitmap->addMany(count, values);
1058
6
                _type = BITMAP;
1059
6
            }
1060
15
            break;
1061
0
        case SINGLE:
1062
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1063
0
                _set.insert(_sv);
1064
0
                for (size_t i = 0; i != count; ++i) {
1065
0
                    _set.insert(values[i]);
1066
0
                }
1067
0
                _type = SET;
1068
0
                _convert_to_bitmap_if_need();
1069
0
            } else {
1070
0
                _prepare_bitmap_for_write();
1071
0
                _bitmap->add(_sv);
1072
0
                _bitmap->addMany(count, values);
1073
0
                _type = BITMAP;
1074
0
            }
1075
0
            break;
1076
1
        case BITMAP:
1077
1
            _prepare_bitmap_for_write();
1078
1
            _bitmap->addMany(count, values);
1079
1
            break;
1080
4
        case SET:
1081
324
            for (size_t i = 0; i != count; ++i) {
1082
320
                _set.insert(values[i]);
1083
320
            }
1084
4
            _convert_to_bitmap_if_need();
1085
4
            break;
1086
20
        }
1087
20
    }
_ZN5doris11BitmapValue8add_manyImEEvPKT_m
Line
Count
Source
1044
17
    void add_many(const T* values, const size_t count) {
1045
17
        switch (_type) {
1046
12
        case EMPTY:
1047
12
            if (count == 1) {
1048
0
                _sv = values[0];
1049
0
                _type = SINGLE;
1050
12
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1051
192
                for (size_t i = 0; i != count; ++i) {
1052
186
                    _set.insert(values[i]);
1053
186
                }
1054
6
                _type = SET;
1055
6
            } else {
1056
6
                _prepare_bitmap_for_write();
1057
6
                _bitmap->addMany(count, values);
1058
6
                _type = BITMAP;
1059
6
            }
1060
12
            break;
1061
0
        case SINGLE:
1062
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1063
0
                _set.insert(_sv);
1064
0
                for (size_t i = 0; i != count; ++i) {
1065
0
                    _set.insert(values[i]);
1066
0
                }
1067
0
                _type = SET;
1068
0
                _convert_to_bitmap_if_need();
1069
0
            } else {
1070
0
                _prepare_bitmap_for_write();
1071
0
                _bitmap->add(_sv);
1072
0
                _bitmap->addMany(count, values);
1073
0
                _type = BITMAP;
1074
0
            }
1075
0
            break;
1076
1
        case BITMAP:
1077
1
            _prepare_bitmap_for_write();
1078
1
            _bitmap->addMany(count, values);
1079
1
            break;
1080
4
        case SET:
1081
324
            for (size_t i = 0; i != count; ++i) {
1082
320
                _set.insert(values[i]);
1083
320
            }
1084
4
            _convert_to_bitmap_if_need();
1085
4
            break;
1086
17
        }
1087
17
    }
_ZN5doris11BitmapValue8add_manyIiEEvPKT_m
Line
Count
Source
1044
3
    void add_many(const T* values, const size_t count) {
1045
3
        switch (_type) {
1046
3
        case EMPTY:
1047
3
            if (count == 1) {
1048
0
                _sv = values[0];
1049
0
                _type = SINGLE;
1050
3
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1051
48
                for (size_t i = 0; i != count; ++i) {
1052
45
                    _set.insert(values[i]);
1053
45
                }
1054
3
                _type = SET;
1055
3
            } else {
1056
0
                _prepare_bitmap_for_write();
1057
0
                _bitmap->addMany(count, values);
1058
0
                _type = BITMAP;
1059
0
            }
1060
3
            break;
1061
0
        case SINGLE:
1062
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1063
0
                _set.insert(_sv);
1064
0
                for (size_t i = 0; i != count; ++i) {
1065
0
                    _set.insert(values[i]);
1066
0
                }
1067
0
                _type = SET;
1068
0
                _convert_to_bitmap_if_need();
1069
0
            } else {
1070
0
                _prepare_bitmap_for_write();
1071
0
                _bitmap->add(_sv);
1072
0
                _bitmap->addMany(count, values);
1073
0
                _type = BITMAP;
1074
0
            }
1075
0
            break;
1076
0
        case BITMAP:
1077
0
            _prepare_bitmap_for_write();
1078
0
            _bitmap->addMany(count, values);
1079
0
            break;
1080
0
        case SET:
1081
0
            for (size_t i = 0; i != count; ++i) {
1082
0
                _set.insert(values[i]);
1083
0
            }
1084
0
            _convert_to_bitmap_if_need();
1085
0
            break;
1086
3
        }
1087
3
    }
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIaEEvPKT_m
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIsEEvPKT_m
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIlEEvPKT_m
1088
1089
4.21M
    void add(uint64_t value) {
1090
4.21M
        switch (_type) {
1091
10.5k
        case EMPTY:
1092
10.5k
            if (!config::enable_set_in_bitmap_value) {
1093
5.32k
                _sv = value;
1094
5.32k
                _type = SINGLE;
1095
5.32k
            } else {
1096
5.19k
                _set.insert(value);
1097
5.19k
                _type = SET;
1098
5.19k
            }
1099
10.5k
            break;
1100
5.28k
        case SINGLE:
1101
            //there is no need to convert the type if two variables are equal
1102
5.28k
            if (_sv == value) {
1103
2
                break;
1104
2
            }
1105
5.27k
            if (config::enable_set_in_bitmap_value) {
1106
5
                _set.insert(_sv);
1107
5
                _set.insert(value);
1108
5
                _type = SET;
1109
5.27k
            } else {
1110
5.27k
                _prepare_bitmap_for_write();
1111
5.27k
                _bitmap->add(_sv);
1112
5.27k
                _bitmap->add(value);
1113
5.27k
                _type = BITMAP;
1114
5.27k
            }
1115
5.27k
            break;
1116
4.09M
        case BITMAP:
1117
4.09M
            _prepare_bitmap_for_write();
1118
4.09M
            _bitmap->add(value);
1119
4.09M
            break;
1120
97.7k
        case SET:
1121
97.7k
            _set.insert(value);
1122
97.7k
            _convert_to_bitmap_if_need();
1123
97.7k
            break;
1124
4.21M
        }
1125
4.21M
    }
1126
1127
8
    void remove(uint64_t value) {
1128
8
        switch (_type) {
1129
1
        case EMPTY:
1130
1
            break;
1131
1
        case SINGLE:
1132
            //there is need to convert the type if two variables are equal
1133
1
            if (_sv == value) {
1134
0
                _type = EMPTY;
1135
0
            }
1136
1
            break;
1137
4
        case BITMAP:
1138
4
            _prepare_bitmap_for_write();
1139
4
            _bitmap->remove(value);
1140
4
            _convert_to_smaller_type();
1141
4
            break;
1142
2
        case SET:
1143
2
            _set.erase(value);
1144
2
            _convert_to_smaller_type();
1145
2
            break;
1146
8
        }
1147
8
    }
1148
1149
    // Compute the union between the current bitmap and the provided bitmap.
1150
16
    BitmapValue& operator-=(const BitmapValue& rhs) {
1151
16
        switch (rhs._type) {
1152
4
        case EMPTY:
1153
4
            break;
1154
4
        case SINGLE:
1155
4
            remove(rhs._sv);
1156
4
            break;
1157
4
        case BITMAP:
1158
4
            switch (_type) {
1159
1
            case EMPTY:
1160
1
                break;
1161
1
            case SINGLE:
1162
1
                if (rhs._bitmap->contains(_sv)) {
1163
0
                    _type = EMPTY;
1164
0
                }
1165
1
                break;
1166
1
            case BITMAP:
1167
1
                _prepare_bitmap_for_write();
1168
1
                *_bitmap -= *rhs._bitmap;
1169
1
                _convert_to_smaller_type();
1170
1
                break;
1171
1
            case SET: {
1172
32
                for (auto it = _set.begin(); it != _set.end();) {
1173
31
                    if (rhs.contains(*it)) {
1174
16
                        it = _set.erase(it);
1175
16
                    } else {
1176
15
                        ++it;
1177
15
                    }
1178
31
                }
1179
1
                _convert_to_smaller_type();
1180
1
                break;
1181
0
            }
1182
4
            }
1183
4
            break;
1184
4
        case SET: {
1185
4
            switch (_type) {
1186
1
            case EMPTY:
1187
1
                break;
1188
1
            case SINGLE:
1189
1
                if (rhs._set.contains(_sv)) {
1190
0
                    _type = EMPTY;
1191
0
                }
1192
1
                break;
1193
1
            case BITMAP:
1194
1
                _prepare_bitmap_for_write();
1195
31
                for (auto v : rhs._set) {
1196
31
                    if (_bitmap->contains(v)) {
1197
0
                        _bitmap->remove(v);
1198
0
                    }
1199
31
                }
1200
1
                _convert_to_smaller_type();
1201
1
                break;
1202
1
            case SET: {
1203
32
                for (auto it = _set.begin(); it != _set.end();) {
1204
31
                    if (rhs.contains(*it)) {
1205
14
                        it = _set.erase(it);
1206
17
                    } else {
1207
17
                        ++it;
1208
17
                    }
1209
31
                }
1210
1
                _convert_to_smaller_type();
1211
1
                break;
1212
0
            }
1213
4
            }
1214
4
        }
1215
16
        }
1216
16
        return *this;
1217
16
    }
1218
1219
    // Compute the union between the current bitmap and the provided bitmap.
1220
    // Possible type transitions are:
1221
    // EMPTY  -> SINGLE
1222
    // EMPTY  -> BITMAP
1223
    // SINGLE -> BITMAP
1224
    // !FIXME: We should rethink the logic here
1225
64
    BitmapValue& operator|=(const BitmapValue& rhs) {
1226
64
        switch (rhs._type) {
1227
12
        case EMPTY:
1228
12
            break;
1229
30
        case SINGLE:
1230
30
            add(rhs._sv);
1231
30
            break;
1232
8
        case BITMAP:
1233
8
            switch (_type) {
1234
2
            case EMPTY:
1235
2
                _bitmap = rhs._bitmap;
1236
2
                rhs._is_shared = true;
1237
2
                _is_shared = true;
1238
2
                _type = BITMAP;
1239
2
                break;
1240
3
            case SINGLE:
1241
3
                _bitmap = rhs._bitmap;
1242
3
                rhs._is_shared = true;
1243
3
                _is_shared = true;
1244
3
                _prepare_bitmap_for_write();
1245
3
                _bitmap->add(_sv);
1246
3
                _type = BITMAP;
1247
3
                break;
1248
2
            case BITMAP:
1249
2
                _prepare_bitmap_for_write();
1250
2
                *_bitmap |= *rhs._bitmap;
1251
2
                break;
1252
1
            case SET: {
1253
1
                _prepare_bitmap_for_write();
1254
1
                *_bitmap = *rhs._bitmap;
1255
1256
31
                for (auto v : _set) {
1257
31
                    _bitmap->add(v);
1258
31
                }
1259
1
                _type = BITMAP;
1260
1
                break;
1261
0
            }
1262
8
            }
1263
8
            break;
1264
14
        case SET:
1265
14
            switch (_type) {
1266
1
            case EMPTY:
1267
1
                _set = rhs._set;
1268
1
                _type = SET;
1269
1
                break;
1270
1
            case SINGLE: {
1271
1
                if ((rhs._set.size() + rhs._set.contains(_sv) > SET_TYPE_THRESHOLD)) {
1272
0
                    _prepare_bitmap_for_write();
1273
0
                    _bitmap->add(_sv);
1274
0
                    for (auto v : rhs._set) {
1275
0
                        _bitmap->add(v);
1276
0
                    }
1277
0
                    _type = BITMAP;
1278
1
                } else {
1279
1
                    _set = rhs._set;
1280
1
                    _set.insert(_sv);
1281
1
                    _type = SET;
1282
1
                }
1283
1
                break;
1284
0
            }
1285
1
            case BITMAP:
1286
1
                _prepare_bitmap_for_write();
1287
31
                for (auto v : rhs._set) {
1288
31
                    _bitmap->add(v);
1289
31
                }
1290
1
                break;
1291
11
            case SET: {
1292
58
                for (auto v : rhs._set) {
1293
58
                    _set.insert(v);
1294
58
                }
1295
11
                _convert_to_bitmap_if_need();
1296
11
                break;
1297
0
            }
1298
14
            }
1299
14
            break;
1300
64
        }
1301
64
        return *this;
1302
64
    }
1303
1304
20
    BitmapValue& fastunion(const std::vector<const BitmapValue*>& values) {
1305
20
        std::vector<const detail::Roaring64Map*> bitmaps;
1306
20
        std::vector<uint64_t> single_values;
1307
20
        std::vector<const SetContainer<uint64_t>*> sets;
1308
1.73k
        for (const auto* value : values) {
1309
1.73k
            switch (value->_type) {
1310
5
            case EMPTY:
1311
5
                break;
1312
11
            case SINGLE:
1313
11
                single_values.push_back(value->_sv);
1314
11
                break;
1315
5
            case BITMAP:
1316
5
                bitmaps.push_back(value->_bitmap.get());
1317
5
                break;
1318
1.71k
            case SET:
1319
1.71k
                sets.push_back(&value->_set);
1320
1.71k
                break;
1321
1.73k
            }
1322
1.73k
        }
1323
1324
20
        if (!bitmaps.empty()) {
1325
5
            _prepare_bitmap_for_write();
1326
5
            switch (_type) {
1327
2
            case EMPTY:
1328
2
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1329
2
                break;
1330
2
            case SINGLE:
1331
2
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1332
2
                _bitmap->add(_sv);
1333
2
                break;
1334
1
            case BITMAP:
1335
1
                for (const auto* bitmap : bitmaps) {
1336
1
                    *_bitmap |= *bitmap;
1337
1
                }
1338
1
                break;
1339
0
            case SET: {
1340
0
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1341
0
                for (auto v : _set) {
1342
0
                    _bitmap->add(v);
1343
0
                }
1344
0
                _set.clear();
1345
0
                break;
1346
0
            }
1347
5
            }
1348
5
            _type = BITMAP;
1349
5
        }
1350
1351
20
        if (!sets.empty()) {
1352
1.71k
            for (auto& set : sets) {
1353
1.76k
                for (auto v : *set) {
1354
1.76k
                    _set.insert(v);
1355
1.76k
                }
1356
1.71k
            }
1357
6
            switch (_type) {
1358
6
            case EMPTY:
1359
6
                _type = SET;
1360
6
                break;
1361
0
            case SINGLE: {
1362
0
                _set.insert(_sv);
1363
0
                _type = SET;
1364
0
                break;
1365
0
            }
1366
0
            case BITMAP:
1367
0
                _prepare_bitmap_for_write();
1368
0
                for (auto v : _set) {
1369
0
                    _bitmap->add(v);
1370
0
                }
1371
0
                _type = BITMAP;
1372
0
                _set.clear();
1373
0
                break;
1374
0
            case SET: {
1375
0
                break;
1376
0
            }
1377
6
            }
1378
6
            if (_type == SET) {
1379
6
                _convert_to_bitmap_if_need();
1380
6
            }
1381
6
        }
1382
1383
20
        if (_type == EMPTY && single_values.size() == 1) {
1384
2
            if (config::enable_set_in_bitmap_value) {
1385
1
                _type = SET;
1386
1
                _set.insert(single_values[0]);
1387
1
            } else {
1388
1
                _sv = single_values[0];
1389
1
                _type = SINGLE;
1390
1
            }
1391
18
        } else if (!single_values.empty()) {
1392
6
            switch (_type) {
1393
2
            case EMPTY:
1394
3
            case SINGLE:
1395
3
                if (config::enable_set_in_bitmap_value) {
1396
2
                    _set.insert(single_values.cbegin(), single_values.cend());
1397
2
                    if (_type == SINGLE) {
1398
0
                        _set.insert(_sv);
1399
0
                    }
1400
2
                    _type = SET;
1401
2
                    _convert_to_bitmap_if_need();
1402
2
                } else {
1403
1
                    _prepare_bitmap_for_write();
1404
1
                    _bitmap->addMany(single_values.size(), single_values.data());
1405
1
                    if (_type == SINGLE) {
1406
1
                        _bitmap->add(_sv);
1407
1
                    }
1408
1
                    _type = BITMAP;
1409
1
                    _convert_to_smaller_type();
1410
1
                }
1411
3
                break;
1412
3
            case BITMAP: {
1413
3
                _prepare_bitmap_for_write();
1414
3
                _bitmap->addMany(single_values.size(), single_values.data());
1415
3
                break;
1416
2
            }
1417
0
            case SET:
1418
0
                _set.insert(single_values.cbegin(), single_values.cend());
1419
0
                _convert_to_bitmap_if_need();
1420
0
                break;
1421
6
            }
1422
6
        }
1423
1424
20
        return *this;
1425
20
    }
1426
1427
    // Compute the intersection between the current bitmap and the provided bitmap.
1428
    // Possible type transitions are:
1429
    // SINGLE -> EMPTY
1430
    // BITMAP -> EMPTY
1431
    // BITMAP -> SINGLE
1432
51
    BitmapValue& operator&=(const BitmapValue& rhs) {
1433
51
        switch (rhs._type) {
1434
14
        case EMPTY:
1435
14
            reset(); // empty & any = empty
1436
14
            break;
1437
10
        case SINGLE:
1438
10
            switch (_type) {
1439
2
            case EMPTY:
1440
2
                break;
1441
3
            case SINGLE:
1442
3
                if (_sv != rhs._sv) {
1443
2
                    _type = EMPTY;
1444
2
                }
1445
3
                break;
1446
4
            case BITMAP:
1447
4
                if (!_bitmap->contains(rhs._sv)) {
1448
1
                    _type = EMPTY;
1449
3
                } else {
1450
3
                    _type = SINGLE;
1451
3
                    _sv = rhs._sv;
1452
3
                }
1453
4
                _bitmap.reset();
1454
4
                _is_shared = false;
1455
4
                break;
1456
1
            case SET:
1457
1
                if (!_set.contains(rhs._sv)) {
1458
0
                    _type = EMPTY;
1459
1
                } else {
1460
1
                    _type = SINGLE;
1461
1
                    _sv = rhs._sv;
1462
1
                }
1463
1
                _set.clear();
1464
1
                break;
1465
10
            }
1466
10
            break;
1467
10
        case BITMAP:
1468
10
            switch (_type) {
1469
2
            case EMPTY:
1470
2
                break;
1471
3
            case SINGLE:
1472
3
                if (!rhs._bitmap->contains(_sv)) {
1473
2
                    _type = EMPTY;
1474
2
                }
1475
3
                break;
1476
4
            case BITMAP:
1477
4
                _prepare_bitmap_for_write();
1478
4
                *_bitmap &= *rhs._bitmap;
1479
4
                _convert_to_smaller_type();
1480
4
                break;
1481
1
            case SET:
1482
32
                for (auto it = _set.begin(); it != _set.end();) {
1483
31
                    if (!rhs._bitmap->contains(*it)) {
1484
15
                        it = _set.erase(it);
1485
16
                    } else {
1486
16
                        ++it;
1487
16
                    }
1488
31
                }
1489
1
                _convert_to_smaller_type();
1490
1
                break;
1491
10
            }
1492
10
            break;
1493
17
        case SET:
1494
17
            switch (_type) {
1495
1
            case EMPTY:
1496
1
                break;
1497
1
            case SINGLE:
1498
1
                if (!rhs._set.contains(_sv)) {
1499
1
                    _type = EMPTY;
1500
1
                }
1501
1
                break;
1502
1
            case BITMAP:
1503
1
                _prepare_bitmap_for_write();
1504
31
                for (auto v : rhs._set) {
1505
31
                    if (_bitmap->contains(v)) {
1506
0
                        _set.insert(v);
1507
0
                    }
1508
31
                }
1509
1
                _type = SET;
1510
1
                _bitmap.reset();
1511
1
                _is_shared = false;
1512
1
                _convert_to_smaller_type();
1513
1
                break;
1514
14
            case SET:
1515
79
                for (auto it = _set.begin(); it != _set.end();) {
1516
65
                    if (!rhs._set.contains(*it)) {
1517
37
                        it = _set.erase(it);
1518
37
                    } else {
1519
28
                        ++it;
1520
28
                    }
1521
65
                }
1522
14
                _convert_to_smaller_type();
1523
14
                break;
1524
17
            }
1525
17
            break;
1526
51
        }
1527
51
        return *this;
1528
51
    }
1529
1530
    // Compute the symmetric union between the current bitmap and the provided bitmap.
1531
    // Possible type transitions are:
1532
    // SINGLE -> EMPTY
1533
    // BITMAP -> EMPTY
1534
    // BITMAP -> SINGLE
1535
    // !FIXME: We should rethink the logic here
1536
29
    BitmapValue& operator^=(const BitmapValue& rhs) {
1537
29
        switch (rhs._type) {
1538
8
        case EMPTY:
1539
8
            break;
1540
4
        case SINGLE:
1541
4
            switch (_type) {
1542
1
            case EMPTY:
1543
1
                add(rhs._sv);
1544
1
                break;
1545
1
            case SINGLE:
1546
1
                if (_sv == rhs._sv) {
1547
0
                    _type = EMPTY;
1548
1
                } else {
1549
1
                    add(rhs._sv);
1550
1
                }
1551
1
                break;
1552
1
            case BITMAP:
1553
1
                if (!_bitmap->contains(rhs._sv)) {
1554
0
                    add(rhs._sv);
1555
1
                } else {
1556
1
                    _prepare_bitmap_for_write();
1557
1
                    _bitmap->remove(rhs._sv);
1558
1
                }
1559
1
                break;
1560
1
            case SET:
1561
1
                if (!_set.contains(rhs._sv)) {
1562
0
                    _set.insert(rhs._sv);
1563
1
                } else {
1564
1
                    _set.erase(rhs._sv);
1565
1
                }
1566
1
                break;
1567
4
            }
1568
4
            break;
1569
6
        case BITMAP:
1570
6
            switch (_type) {
1571
1
            case EMPTY:
1572
1
                _bitmap = rhs._bitmap;
1573
1
                rhs._is_shared = true;
1574
1
                _is_shared = true;
1575
1
                _type = BITMAP;
1576
1
                break;
1577
1
            case SINGLE:
1578
1
                _bitmap = rhs._bitmap;
1579
1
                rhs._is_shared = true;
1580
1
                _is_shared = true;
1581
1
                _type = BITMAP;
1582
1
                _prepare_bitmap_for_write();
1583
1
                if (!rhs._bitmap->contains(_sv)) {
1584
1
                    _bitmap->add(_sv);
1585
1
                } else {
1586
0
                    _bitmap->remove(_sv);
1587
0
                }
1588
1
                break;
1589
3
            case BITMAP:
1590
3
                _prepare_bitmap_for_write();
1591
3
                *_bitmap ^= *rhs._bitmap;
1592
3
                _convert_to_smaller_type();
1593
3
                break;
1594
1
            case SET:
1595
1
                _prepare_bitmap_for_write();
1596
1
                *_bitmap = *rhs._bitmap;
1597
31
                for (auto v : _set) {
1598
31
                    if (_bitmap->contains(v)) {
1599
16
                        _bitmap->remove(v);
1600
16
                    } else {
1601
15
                        _bitmap->add(v);
1602
15
                    }
1603
31
                }
1604
1
                _type = BITMAP;
1605
1
                _convert_to_smaller_type();
1606
1
                break;
1607
6
            }
1608
6
            break;
1609
11
        case SET:
1610
11
            switch (_type) {
1611
1
            case EMPTY:
1612
1
                _set = rhs._set;
1613
1
                _type = SET;
1614
1
                break;
1615
1
            case SINGLE:
1616
1
                _set = rhs._set;
1617
1
                if (!rhs._set.contains(_sv)) {
1618
1
                    _set.insert(_sv);
1619
1
                } else {
1620
0
                    _set.erase(_sv);
1621
0
                }
1622
1
                _type = SET;
1623
1
                break;
1624
1
            case BITMAP:
1625
1
                _prepare_bitmap_for_write();
1626
31
                for (auto v : rhs._set) {
1627
31
                    if (_bitmap->contains(v)) {
1628
0
                        _bitmap->remove(v);
1629
31
                    } else {
1630
31
                        _bitmap->add(v);
1631
31
                    }
1632
31
                }
1633
1
                _convert_to_smaller_type();
1634
1
                break;
1635
8
            case SET:
1636
49
                for (auto v : rhs._set) {
1637
49
                    if (_set.contains(v)) {
1638
22
                        _set.erase(v);
1639
27
                    } else {
1640
27
                        _set.insert(v);
1641
27
                    }
1642
49
                }
1643
8
                _convert_to_smaller_type();
1644
8
                break;
1645
11
            }
1646
11
            break;
1647
29
        }
1648
29
        return *this;
1649
29
    }
1650
1651
    // check if value x is present
1652
9.16k
    bool contains(uint64_t x) const {
1653
9.16k
        switch (_type) {
1654
0
        case EMPTY:
1655
0
            return false;
1656
14
        case SINGLE:
1657
14
            return _sv == x;
1658
8.93k
        case BITMAP:
1659
8.93k
            return _bitmap->contains(x);
1660
214
        case SET:
1661
214
            return _set.contains(x);
1662
9.16k
        }
1663
0
        return false;
1664
9.16k
    }
1665
1666
    // true if contains a value that belongs to the range [left, right].
1667
    bool contains_any(uint64_t left, uint64_t right) const;
1668
1669
240k
    uint64_t cardinality() const {
1670
240k
        switch (_type) {
1671
240k
        case EMPTY:
1672
240k
            return 0;
1673
36
        case SINGLE:
1674
36
            return 1;
1675
210
        case BITMAP:
1676
210
            return _bitmap->cardinality();
1677
74
        case SET:
1678
74
            return _set.size();
1679
240k
        }
1680
0
        return 0;
1681
240k
    }
1682
1683
60.0k
    uint64_t and_cardinality(const BitmapValue& rhs) const {
1684
60.0k
        switch (rhs._type) {
1685
60.0k
        case EMPTY:
1686
60.0k
            return 0;
1687
4
        case SINGLE:
1688
4
            switch (_type) {
1689
1
            case EMPTY:
1690
1
                return 0;
1691
1
            case SINGLE:
1692
1
                return _sv == rhs._sv;
1693
1
            case BITMAP:
1694
1
                return _bitmap->contains(rhs._sv);
1695
1
            case SET:
1696
1
                return _set.contains(rhs._sv);
1697
4
            }
1698
0
            break;
1699
7
        case BITMAP:
1700
7
            switch (_type) {
1701
1
            case EMPTY:
1702
1
                return 0;
1703
1
            case SINGLE:
1704
1
                return rhs._bitmap->contains(_sv);
1705
4
            case BITMAP:
1706
4
                return _bitmap->andCardinality(*rhs._bitmap);
1707
1
            case SET: {
1708
1
                uint64_t cardinality = 0;
1709
31
                for (auto v : _set) {
1710
31
                    if (rhs._bitmap->contains(v)) {
1711
16
                        ++cardinality;
1712
16
                    }
1713
31
                }
1714
1
                return cardinality;
1715
0
            }
1716
7
            }
1717
0
            break;
1718
4
        case SET:
1719
4
            switch (_type) {
1720
1
            case EMPTY:
1721
1
                return 0;
1722
1
            case SINGLE:
1723
1
                return rhs._set.contains(_sv);
1724
1
            case BITMAP: {
1725
1
                uint64_t cardinality = 0;
1726
31
                for (auto v : rhs._set) {
1727
31
                    if (_bitmap->contains(v)) {
1728
0
                        ++cardinality;
1729
0
                    }
1730
31
                }
1731
1
                return cardinality;
1732
0
            }
1733
1
            case SET: {
1734
1
                uint64_t cardinality = 0;
1735
31
                for (auto v : _set) {
1736
31
                    if (rhs._set.contains(v)) {
1737
14
                        ++cardinality;
1738
14
                    }
1739
31
                }
1740
1
                return cardinality;
1741
0
            }
1742
4
            }
1743
60.0k
        }
1744
0
        return 0;
1745
60.0k
    }
1746
1747
60.0k
    uint64_t or_cardinality(const BitmapValue& rhs) const {
1748
60.0k
        switch (rhs._type) {
1749
59.9k
        case EMPTY:
1750
59.9k
            return cardinality();
1751
0
        case SINGLE:
1752
0
            switch (_type) {
1753
0
            case EMPTY:
1754
0
                return 1;
1755
0
            case SINGLE:
1756
0
                return 1 + (_sv != rhs._sv);
1757
0
            case BITMAP:
1758
0
                return cardinality() + !_bitmap->contains(rhs._sv);
1759
0
            case SET:
1760
0
                return _set.size() + !_set.contains(rhs._sv);
1761
0
            }
1762
0
            break;
1763
3
        case BITMAP:
1764
3
            switch (_type) {
1765
0
            case EMPTY:
1766
0
                return rhs.cardinality();
1767
0
            case SINGLE:
1768
0
                return rhs.cardinality() + !rhs._bitmap->contains(_sv);
1769
3
            case BITMAP:
1770
3
                return _bitmap->orCardinality(*rhs._bitmap);
1771
0
            case SET: {
1772
0
                uint64_t cardinality = rhs._bitmap->cardinality();
1773
0
                for (auto v : _set) {
1774
0
                    if (!rhs._bitmap->contains(v)) {
1775
0
                        ++cardinality;
1776
0
                    }
1777
0
                }
1778
0
                return cardinality;
1779
0
            }
1780
3
            }
1781
0
            break;
1782
0
        case SET:
1783
0
            switch (_type) {
1784
0
            case EMPTY:
1785
0
                return rhs.cardinality();
1786
0
            case SINGLE:
1787
0
                return rhs.cardinality() + !rhs._set.contains(_sv);
1788
0
            case BITMAP: {
1789
0
                uint64_t cardinality = _bitmap->cardinality();
1790
0
                for (auto v : rhs._set) {
1791
0
                    if (!_bitmap->contains(v)) {
1792
0
                        ++cardinality;
1793
0
                    }
1794
0
                }
1795
0
                return cardinality;
1796
0
            }
1797
0
            case SET: {
1798
0
                uint64_t cardinality = _set.size();
1799
0
                for (auto v : _set) {
1800
0
                    if (!rhs._set.contains(v)) {
1801
0
                        ++cardinality;
1802
0
                    }
1803
0
                }
1804
0
                return cardinality;
1805
0
            }
1806
0
            }
1807
60.0k
        }
1808
0
        return 0;
1809
60.0k
    }
1810
1811
24
    uint64_t andnot_cardinality(const BitmapValue& rhs) const {
1812
24
        switch (rhs._type) {
1813
8
        case EMPTY:
1814
8
            return cardinality();
1815
4
        case SINGLE:
1816
4
            switch (_type) {
1817
1
            case EMPTY:
1818
1
                return 0;
1819
1
            case SINGLE:
1820
1
                return 1 - (_sv == rhs._sv);
1821
1
            case BITMAP:
1822
1
                return cardinality() - _bitmap->contains(rhs._sv);
1823
1
            case SET:
1824
1
                return cardinality() - _set.contains(rhs._sv);
1825
4
            }
1826
0
            break;
1827
4
        case BITMAP:
1828
4
            switch (_type) {
1829
1
            case EMPTY:
1830
1
                return 0;
1831
1
            case SINGLE:
1832
1
                return !rhs._bitmap->contains(_sv);
1833
1
            case BITMAP:
1834
1
                return _bitmap->andnotCardinality(*rhs._bitmap);
1835
1
            case SET: {
1836
1
                uint64_t cardinality = _set.size();
1837
31
                for (auto v : _set) {
1838
31
                    if (rhs._bitmap->contains(v)) {
1839
16
                        cardinality -= 1;
1840
16
                    }
1841
31
                }
1842
1
                return cardinality;
1843
0
            }
1844
4
            }
1845
0
            break;
1846
8
        case SET:
1847
8
            switch (_type) {
1848
1
            case EMPTY:
1849
1
                return 0;
1850
1
            case SINGLE:
1851
1
                return !rhs._set.contains(_sv);
1852
1
            case BITMAP: {
1853
1
                uint64_t cardinality = _bitmap->cardinality();
1854
31
                for (auto v : rhs._set) {
1855
31
                    if (_bitmap->contains(v)) {
1856
0
                        cardinality -= 1;
1857
0
                    }
1858
31
                }
1859
1
                return cardinality;
1860
0
            }
1861
5
            case SET: {
1862
5
                uint64_t cardinality = _set.size();
1863
33
                for (auto v : rhs._set) {
1864
33
                    if (_set.contains(v)) {
1865
16
                        cardinality -= 1;
1866
16
                    }
1867
33
                }
1868
5
                return cardinality;
1869
0
            }
1870
8
            }
1871
24
        }
1872
0
        return 0;
1873
24
    }
1874
1875
    // Return how many bytes are required to serialize this bitmap.
1876
    // See BitmapTypeCode for the serialized format.
1877
    // !FIXME: We should rethink the logic here
1878
149k
    size_t getSizeInBytes() const {
1879
149k
        size_t res = 0;
1880
149k
        switch (_type) {
1881
120k
        case EMPTY:
1882
120k
            res = 1;
1883
120k
            break;
1884
54
        case SINGLE:
1885
54
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
1886
49
                res = 1 + sizeof(uint32_t);
1887
49
            } else {
1888
5
                res = 1 + sizeof(uint64_t);
1889
5
            }
1890
54
            break;
1891
29.1k
        case BITMAP:
1892
29.1k
            _prepare_bitmap_for_write();
1893
29.1k
            _bitmap->runOptimize();
1894
29.1k
            _bitmap->shrinkToFit();
1895
29.1k
            res = _bitmap->getSizeInBytes(config::bitmap_serialize_version);
1896
29.1k
            break;
1897
619
        case SET:
1898
            /// 1 byte for type, 1 byte for count
1899
619
            res = 2 + sizeof(uint64_t) * _set.size();
1900
619
            break;
1901
149k
        }
1902
149k
        return res;
1903
149k
    }
1904
1905
    // Serialize the bitmap value to dst, which should be large enough.
1906
    // Client should call `getSizeInBytes` first to get the serialized size.
1907
81.5k
    void write_to(char* dst) const {
1908
81.5k
        switch (_type) {
1909
60.0k
        case EMPTY:
1910
60.0k
            *dst = BitmapTypeCode::EMPTY;
1911
60.0k
            break;
1912
44
        case SINGLE:
1913
44
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
1914
41
                *(dst++) = BitmapTypeCode::SINGLE32;
1915
41
                encode_fixed32_le(reinterpret_cast<uint8_t*>(dst), static_cast<uint32_t>(_sv));
1916
41
            } else {
1917
3
                *(dst++) = BitmapTypeCode::SINGLE64;
1918
3
                encode_fixed64_le(reinterpret_cast<uint8_t*>(dst), _sv);
1919
3
            }
1920
44
            break;
1921
488
        case SET:
1922
488
            DCHECK(config::enable_set_in_bitmap_value);
1923
488
            *dst = BitmapTypeCode::SET;
1924
488
            ++dst;
1925
488
            *dst = static_cast<uint8_t>(_set.size());
1926
488
            ++dst;
1927
7.81k
            for (auto v : _set) {
1928
7.81k
                encode_fixed64_le(reinterpret_cast<uint8_t*>(dst), v);
1929
7.81k
                dst += sizeof(uint64_t);
1930
7.81k
            }
1931
488
            break;
1932
20.9k
        case BITMAP:
1933
20.9k
            _bitmap->write(dst, config::bitmap_serialize_version);
1934
20.9k
            break;
1935
81.5k
        }
1936
81.5k
    }
1937
1938
    // Deserialize a bitmap value from `src`.
1939
    // Return false if `src` begins with unknown type code, true otherwise.
1940
69.3k
    bool deserialize(const char* src) {
1941
69.3k
        switch (*src) {
1942
60.0k
        case BitmapTypeCode::EMPTY:
1943
60.0k
            _type = EMPTY;
1944
60.0k
            break;
1945
15
        case BitmapTypeCode::SINGLE32:
1946
15
            _type = SINGLE;
1947
15
            _sv = decode_fixed32_le(reinterpret_cast<const uint8_t*>(src + 1));
1948
15
            if (config::enable_set_in_bitmap_value) {
1949
2
                _type = SET;
1950
2
                _set.insert(_sv);
1951
2
            }
1952
15
            break;
1953
3
        case BitmapTypeCode::SINGLE64:
1954
3
            _type = SINGLE;
1955
3
            _sv = decode_fixed64_le(reinterpret_cast<const uint8_t*>(src + 1));
1956
3
            if (config::enable_set_in_bitmap_value) {
1957
1
                _type = SET;
1958
1
                _set.insert(_sv);
1959
1
            }
1960
3
            break;
1961
5.13k
        case BitmapTypeCode::BITMAP32:
1962
5.13k
        case BitmapTypeCode::BITMAP64:
1963
9.10k
        case BitmapTypeCode::BITMAP32_V2:
1964
9.10k
        case BitmapTypeCode::BITMAP64_V2:
1965
9.10k
            _type = BITMAP;
1966
9.10k
            _is_shared = false;
1967
9.10k
            try {
1968
9.10k
                _bitmap = std::make_shared<detail::Roaring64Map>(detail::Roaring64Map::read(src));
1969
9.10k
            } catch (const std::runtime_error& e) {
1970
1
                LOG(ERROR) << "Decode roaring bitmap failed, " << e.what();
1971
1
                return false;
1972
1
            }
1973
9.10k
            break;
1974
227
        case BitmapTypeCode::SET: {
1975
227
            _type = SET;
1976
227
            ++src;
1977
227
            uint8_t count = *src;
1978
227
            ++src;
1979
227
            if (count > SET_TYPE_THRESHOLD) {
1980
0
                throw Exception(ErrorCode::INTERNAL_ERROR,
1981
0
                                "bitmap value with incorrect set count, count: {}", count);
1982
0
            }
1983
227
            _set.reserve(count);
1984
3.86k
            for (uint8_t i = 0; i != count; ++i, src += sizeof(uint64_t)) {
1985
3.63k
                _set.insert(decode_fixed64_le(reinterpret_cast<const uint8_t*>(src)));
1986
3.63k
            }
1987
227
            if (_set.size() != count) {
1988
0
                throw Exception(ErrorCode::INTERNAL_ERROR,
1989
0
                                "bitmap value with incorrect set count, count: {}, set size: {}",
1990
0
                                count, _set.size());
1991
0
            }
1992
1993
227
            if (!config::enable_set_in_bitmap_value) {
1994
0
                _prepare_bitmap_for_write();
1995
0
                for (auto v : _set) {
1996
0
                    _bitmap->add(v);
1997
0
                }
1998
0
                _type = BITMAP;
1999
0
                _set.clear();
2000
0
            }
2001
227
            break;
2002
227
        }
2003
1
        case BitmapTypeCode::SET_V2: {
2004
1
            uint32_t size = 0;
2005
1
            memcpy(&size, src + 1, sizeof(uint32_t));
2006
1
            src += sizeof(uint32_t) + 1;
2007
2008
1
            if (!config::enable_set_in_bitmap_value || size > SET_TYPE_THRESHOLD) {
2009
0
                _type = BITMAP;
2010
0
                _prepare_bitmap_for_write();
2011
2012
0
                for (int i = 0; i < size; ++i) {
2013
0
                    uint64_t key {};
2014
0
                    memcpy(&key, src, sizeof(uint64_t));
2015
0
                    _bitmap->add(key);
2016
0
                    src += sizeof(uint64_t);
2017
0
                }
2018
1
            } else {
2019
1
                _type = SET;
2020
1
                _set.reserve(size);
2021
2022
3
                for (int i = 0; i < size; ++i) {
2023
2
                    uint64_t key {};
2024
2
                    memcpy(&key, src, sizeof(uint64_t));
2025
2
                    _set.insert(key);
2026
2
                    src += sizeof(uint64_t);
2027
2
                }
2028
1
            }
2029
1
            break;
2030
227
        }
2031
2
        default:
2032
2
            LOG(ERROR) << "BitmapTypeCode invalid, should between: " << BitmapTypeCode::EMPTY
2033
2
                       << " and " << BitmapTypeCode::BITMAP64 << " actual is "
2034
2
                       << static_cast<int>(*src);
2035
2
            return false;
2036
69.3k
        }
2037
69.3k
        return true;
2038
69.3k
    }
2039
2040
13
    int64_t minimum() const {
2041
13
        switch (_type) {
2042
2
        case SINGLE:
2043
2
            return _sv;
2044
4
        case BITMAP:
2045
4
            return _bitmap->minimum();
2046
5
        case SET:
2047
5
            return _min_in_set();
2048
2
        default:
2049
2
            return 0;
2050
13
        }
2051
13
    }
2052
2053
    // TODO limit string size to avoid OOM
2054
4.33k
    std::string to_string() const {
2055
4.33k
        std::stringstream ss;
2056
4.33k
        switch (_type) {
2057
16
        case EMPTY:
2058
16
            break;
2059
39
        case SINGLE:
2060
39
            ss << _sv;
2061
39
            break;
2062
4.01k
        case BITMAP: {
2063
4.01k
            struct IterCtx {
2064
4.01k
                std::stringstream* ss = nullptr;
2065
4.01k
                bool first = true;
2066
4.01k
            } iter_ctx;
2067
4.01k
            iter_ctx.ss = &ss;
2068
2069
4.01k
            _bitmap->iterate(
2070
2.09M
                    [](uint64_t value, void* c) -> bool {
2071
2.09M
                        auto ctx = reinterpret_cast<IterCtx*>(c);
2072
2.09M
                        if (ctx->first) {
2073
4.01k
                            ctx->first = false;
2074
2.09M
                        } else {
2075
2.09M
                            (*ctx->ss) << ",";
2076
2.09M
                        }
2077
2.09M
                        (*ctx->ss) << value;
2078
2.09M
                        return true;
2079
2.09M
                    },
2080
4.01k
                    &iter_ctx);
2081
4.01k
            break;
2082
0
        }
2083
263
        case SET: {
2084
263
            struct IterCtx {
2085
263
                std::stringstream* ss = nullptr;
2086
263
                bool first = true;
2087
263
            } iter_ctx;
2088
263
            iter_ctx.ss = &ss;
2089
2090
263
            std::vector<uint64_t> values(_set.begin(), _set.end());
2091
263
            std::sort(values.begin(), values.end());
2092
2093
4.11k
            for (auto v : values) {
2094
4.11k
                if (iter_ctx.first) {
2095
263
                    iter_ctx.first = false;
2096
3.85k
                } else {
2097
3.85k
                    (*iter_ctx.ss) << ",";
2098
3.85k
                }
2099
4.11k
                (*iter_ctx.ss) << v;
2100
4.11k
            }
2101
263
            break;
2102
0
        }
2103
4.33k
        }
2104
4.33k
        return ss.str();
2105
4.33k
    }
2106
2107
14
    int64_t maximum() const {
2108
14
        switch (_type) {
2109
2
        case SINGLE:
2110
2
            return _sv;
2111
5
        case BITMAP:
2112
5
            return _bitmap->maximum();
2113
5
        case SET:
2114
5
            return _max_in_set();
2115
2
        default:
2116
2
            return 0;
2117
14
        }
2118
14
    }
2119
2120
4
    uint64_t max(bool* empty) const {
2121
4
        return min_or_max(empty, [&]() { return maximum(); });
2122
4
    }
2123
2124
4
    uint64_t min(bool* empty) const {
2125
4
        return min_or_max(empty, [&]() { return minimum(); });
2126
4
    }
2127
2128
5
    uint64_t _min_in_set() const {
2129
5
        DCHECK_EQ(_type, SET);
2130
5
        return *std::min_element(_set.begin(), _set.end());
2131
5
    }
2132
2133
5
    uint64_t _max_in_set() const {
2134
5
        DCHECK_EQ(_type, SET);
2135
5
        return *std::max_element(_set.begin(), _set.end());
2136
5
    }
2137
2138
6
    bool empty() const { return _type == EMPTY; }
2139
2140
    /**
2141
     * Return new set with specified range (not include the range_end)
2142
     */
2143
    int64_t sub_range(const int64_t& range_start, const int64_t& range_end,
2144
7
                      BitmapValue* ret_bitmap) const {
2145
7
        switch (_type) {
2146
1
        case EMPTY:
2147
1
            return 0;
2148
2
        case SINGLE: {
2149
            //only single value, so _sv must in [range_start,range_end)
2150
2
            if (range_start <= _sv && _sv < range_end) {
2151
1
                ret_bitmap->add(_sv);
2152
1
                return 1;
2153
1
            } else {
2154
1
                return 0;
2155
1
            }
2156
2
        }
2157
2
        case BITMAP: {
2158
2
            int64_t count = 0;
2159
146
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2160
145
                if (*it < range_start) {
2161
0
                    continue;
2162
0
                }
2163
145
                if (*it < range_end) {
2164
144
                    ret_bitmap->add(*it);
2165
144
                    ++count;
2166
144
                } else {
2167
1
                    break;
2168
1
                }
2169
145
            }
2170
2
            return count;
2171
2
        }
2172
2
        case SET: {
2173
2
            int64_t count = 0;
2174
2
            std::vector<uint64_t> values(_set.begin(), _set.end());
2175
2
            std::sort(values.begin(), values.end());
2176
6
            for (auto it = values.begin(); it != values.end(); ++it) {
2177
4
                if (*it < range_start || *it >= range_end) {
2178
1
                    continue;
2179
1
                }
2180
3
                ret_bitmap->add(*it);
2181
3
                ++count;
2182
3
            }
2183
2
            return count;
2184
2
        }
2185
7
        }
2186
0
        return 0;
2187
7
    }
2188
2189
    /**
2190
     * Return new set with specified start and limit
2191
     * @param range_start the start value for the range
2192
     * @param cardinality_limit the length of the subset
2193
     * @return the real count for subset, maybe less than cardinality_limit
2194
     */
2195
    int64_t sub_limit(const int64_t& range_start, const int64_t& cardinality_limit,
2196
9
                      BitmapValue* ret_bitmap) const {
2197
9
        switch (_type) {
2198
1
        case EMPTY:
2199
1
            return 0;
2200
2
        case SINGLE: {
2201
            //only single value, so range_start must less than _sv
2202
2
            if (range_start > _sv) {
2203
0
                return 0;
2204
2
            } else {
2205
2
                ret_bitmap->add(_sv);
2206
2
                return 1;
2207
2
            }
2208
2
        }
2209
4
        case BITMAP: {
2210
4
            int64_t count = 0;
2211
31
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2212
30
                if (*it < range_start) {
2213
8
                    continue;
2214
8
                }
2215
22
                if (count < cardinality_limit) {
2216
19
                    ret_bitmap->add(*it);
2217
19
                    ++count;
2218
19
                } else {
2219
3
                    break;
2220
3
                }
2221
22
            }
2222
4
            return count;
2223
2
        }
2224
2
        case SET: {
2225
2
            int64_t count = 0;
2226
2227
2
            std::vector<uint64_t> values(_set.begin(), _set.end());
2228
2
            std::sort(values.begin(), values.end());
2229
6
            for (auto it = values.begin(); it != values.end(); ++it) {
2230
4
                if (*it < range_start) {
2231
0
                    continue;
2232
0
                }
2233
4
                if (count < cardinality_limit) {
2234
4
                    ret_bitmap->add(*it);
2235
4
                    ++count;
2236
4
                } else {
2237
0
                    break;
2238
0
                }
2239
4
            }
2240
2
            return count;
2241
2
        }
2242
9
        }
2243
0
        return 0;
2244
9
    }
2245
2246
    /**
2247
     * Returns the bitmap elements, starting from the offset position.
2248
     * The number of returned elements is limited by the cardinality_limit parameter.
2249
     * Analog of the substring string function, but for bitmap.
2250
     */
2251
    int64_t offset_limit(const int64_t& offset, const int64_t& limit,
2252
8
                         BitmapValue* ret_bitmap) const {
2253
8
        switch (_type) {
2254
1
        case EMPTY:
2255
1
            return 0;
2256
2
        case SINGLE: {
2257
            //only single value, so offset must start 0
2258
2
            if (offset == 0) {
2259
1
                ret_bitmap->add(_sv);
2260
1
                return 1;
2261
1
            } else {
2262
1
                return 0;
2263
1
            }
2264
2
        }
2265
5
        default:
2266
5
            break;
2267
8
        }
2268
5
        if (_type == BITMAP) {
2269
3
            if (std::abs(offset) >= _bitmap->cardinality()) {
2270
0
                return 0;
2271
0
            }
2272
3
            int64_t abs_offset = offset;
2273
3
            if (offset < 0) {
2274
0
                abs_offset = _bitmap->cardinality() + offset;
2275
0
            }
2276
2277
3
            int64_t count = 0;
2278
3
            int64_t offset_count = 0;
2279
3
            auto it = _bitmap->begin();
2280
123
            for (; it != _bitmap->end() && offset_count < abs_offset; ++it) {
2281
120
                ++offset_count;
2282
120
            }
2283
33
            for (; it != _bitmap->end() && count < limit; ++it, ++count) {
2284
30
                ret_bitmap->add(*it);
2285
30
            }
2286
3
            return count;
2287
3
        } else {
2288
2
            if (std::abs(offset) > _set.size()) {
2289
0
                return 0;
2290
0
            }
2291
2292
2
            int64_t abs_offset = offset;
2293
2
            if (offset < 0) {
2294
0
                abs_offset = _set.size() + offset;
2295
0
            }
2296
2297
2
            std::vector<uint64_t> values(_set.begin(), _set.end());
2298
2
            std::sort(values.begin(), values.end());
2299
2300
2
            int64_t count = 0;
2301
2
            size_t index = 0;
2302
4
            for (auto v : values) {
2303
4
                if (index < abs_offset) {
2304
1
                    ++index;
2305
1
                    continue;
2306
1
                }
2307
3
                if (count == limit || index == values.size()) {
2308
0
                    break;
2309
0
                }
2310
3
                ++count;
2311
3
                ++index;
2312
3
                ret_bitmap->add(v);
2313
3
            }
2314
2
            return count;
2315
2
        }
2316
5
    }
2317
2318
    //for function bitmap_to_array
2319
3
    void to_array(vectorized::PaddedPODArray<int64_t>& data) const {
2320
3
        switch (_type) {
2321
0
        case EMPTY:
2322
0
            break;
2323
1
        case SINGLE: {
2324
1
            data.emplace_back(_sv);
2325
1
            break;
2326
0
        }
2327
1
        case BITMAP: {
2328
129
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2329
128
                data.emplace_back(*it);
2330
128
            }
2331
1
            break;
2332
0
        }
2333
1
        case SET: {
2334
1
            std::vector<uint64_t> values(_set.begin(), _set.end());
2335
1
            std::sort(values.begin(), values.end());
2336
31
            for (auto v : values) {
2337
31
                data.emplace_back(v);
2338
31
            }
2339
1
            break;
2340
0
        }
2341
3
        }
2342
3
    }
2343
2344
157
    void reset() {
2345
157
        _type = EMPTY;
2346
157
        _sv = 0;
2347
157
        _set.clear();
2348
157
        _is_shared = false;
2349
157
        _bitmap = nullptr;
2350
157
    }
2351
    // Implement an iterator for convenience
2352
    friend class BitmapValueIterator;
2353
    typedef BitmapValueIterator b_iterator;
2354
2355
    b_iterator begin() const;
2356
    b_iterator end() const;
2357
    b_iterator lower_bound(uint64_t val) const;
2358
2359
private:
2360
44
    void _convert_to_smaller_type() {
2361
44
        if (_type == BITMAP) {
2362
16
            uint64_t c = _bitmap->cardinality();
2363
16
            if (config::enable_set_in_bitmap_value && c > SET_TYPE_THRESHOLD) {
2364
7
                return;
2365
9
            } else if (c > 1) {
2366
7
                return;
2367
7
            }
2368
2
            if (c == 0) {
2369
1
                _type = EMPTY;
2370
1
            } else if (c == 1 && !config::enable_set_in_bitmap_value) {
2371
1
                _type = SINGLE;
2372
1
                _sv = _bitmap->minimum();
2373
1
            } else {
2374
0
                _type = SET;
2375
0
                for (auto v : *_bitmap) {
2376
0
                    _set.insert(v);
2377
0
                }
2378
0
            }
2379
2
            _bitmap.reset();
2380
28
        } else if (_type == SET) {
2381
28
            if (_set.size() == 1 && !config::enable_set_in_bitmap_value) {
2382
0
                _type = SINGLE;
2383
0
                _sv = *_set.begin();
2384
0
                _set.clear();
2385
0
            }
2386
28
        }
2387
44
    }
2388
2389
8
    uint64_t min_or_max(bool* empty, std::function<uint64_t()> func) const {
2390
8
        bool is_empty = false;
2391
8
        uint64_t result = 0;
2392
8
        switch (_type) {
2393
0
        case SINGLE:
2394
0
            result = _sv;
2395
0
            break;
2396
2
        case BITMAP:
2397
6
        case SET:
2398
6
            result = func();
2399
6
            break;
2400
2
        default:
2401
2
            is_empty = true;
2402
8
        }
2403
8
        if (empty) {
2404
8
            *empty = is_empty;
2405
8
        }
2406
8
        return result;
2407
8
    }
2408
2409
4.13M
    void _prepare_bitmap_for_write() const {
2410
4.13M
        if (!_bitmap) {
2411
8.29k
            _bitmap = std::make_shared<detail::Roaring64Map>();
2412
8.29k
            _is_shared = false;
2413
8.29k
            return;
2414
8.29k
        }
2415
2416
4.12M
        if (!_is_shared) {
2417
            // the state is not shared, not need to check use count any more
2418
4.11M
            return;
2419
4.11M
        }
2420
2421
8.34k
        if (_bitmap.use_count() > 1) {
2422
1.04k
            auto new_one = std::make_shared<detail::Roaring64Map>();
2423
1.04k
            *new_one = *_bitmap;
2424
1.04k
            _bitmap = new_one;
2425
1.04k
        }
2426
8.34k
        _is_shared = false;
2427
8.34k
    }
2428
2429
97.7k
    void _convert_to_bitmap_if_need() {
2430
97.7k
        if (_type != SET || _set.size() <= SET_TYPE_THRESHOLD) {
2431
94.7k
            return;
2432
94.7k
        }
2433
2.98k
        _prepare_bitmap_for_write();
2434
100k
        for (auto v : _set) {
2435
100k
            _bitmap->add(v);
2436
100k
        }
2437
2.98k
        _type = BITMAP;
2438
2.98k
        _set.clear();
2439
2.98k
    }
2440
2441
    enum BitmapDataType {
2442
        EMPTY = 0,
2443
        SINGLE = 1, // single element
2444
        BITMAP = 2, // more than one elements
2445
        SET = 3     // elements count less or equal than 32
2446
    };
2447
    uint64_t _sv = 0; // store the single value when _type == SINGLE
2448
    // !FIXME: We should rethink the logic about _bitmap and _is_shared
2449
    mutable std::shared_ptr<detail::Roaring64Map> _bitmap; // used when _type == BITMAP
2450
    SetContainer<uint64_t> _set;
2451
    BitmapDataType _type {EMPTY};
2452
    // Indicate whether the state is shared among multi BitmapValue object
2453
    mutable bool _is_shared = true;
2454
    static constexpr uint64_t SET_TYPE_THRESHOLD = 32;
2455
};
2456
2457
// A simple implement of bitmap value iterator(Read only)
2458
// Usage:
2459
//  BitmapValueIterator iter = bitmap_value.begin();
2460
//  BitmapValueIterator end = bitmap_value.end();
2461
//  for (; iter != end(); ++iter) {
2462
//      uint64_t v = *iter;
2463
//      ... do something with "v" ...
2464
//  }
2465
class BitmapValueIterator {
2466
public:
2467
37
    BitmapValueIterator(const BitmapValue& bitmap, bool end = false) : _bitmap(bitmap), _end(end) {
2468
37
        switch (_bitmap._type) {
2469
8
        case BitmapValue::BitmapDataType::EMPTY:
2470
8
            _end = true;
2471
8
            break;
2472
8
        case BitmapValue::BitmapDataType::SINGLE:
2473
8
            _sv = _bitmap._sv;
2474
8
            break;
2475
13
        case BitmapValue::BitmapDataType::BITMAP:
2476
13
            _iter = new detail::Roaring64MapSetBitForwardIterator(*_bitmap._bitmap, _end);
2477
13
            break;
2478
8
        case BitmapValue::BitmapDataType::SET:
2479
8
            _set_iter = _end ? _bitmap._set.end() : _bitmap._set.begin();
2480
8
            break;
2481
0
        default:
2482
0
            CHECK(false) << _bitmap._type;
2483
37
        }
2484
37
    }
2485
2486
    BitmapValueIterator(const BitmapValueIterator& other)
2487
168
            : _bitmap(other._bitmap), _sv(other._sv), _end(other._end) {
2488
168
        _iter = other._iter ? new detail::Roaring64MapSetBitForwardIterator(*other._iter) : nullptr;
2489
168
        if (_bitmap._type == BitmapValue::BitmapDataType::SET) {
2490
33
            _set_iter = other._set_iter;
2491
33
        }
2492
168
    }
2493
2494
205
    ~BitmapValueIterator() {
2495
205
        if (_iter != nullptr) {
2496
143
            delete _iter;
2497
143
            _iter = nullptr;
2498
143
        }
2499
205
    }
2500
2501
646
    uint64_t operator*() const {
2502
646
        CHECK(!_end) << "should not get value of end iterator";
2503
646
        switch (_bitmap._type) {
2504
4
        case BitmapValue::BitmapDataType::SINGLE:
2505
4
            return _sv;
2506
517
        case BitmapValue::BitmapDataType::BITMAP:
2507
517
            return *(*_iter);
2508
125
        case BitmapValue::BitmapDataType::SET: {
2509
125
            return *_set_iter;
2510
0
        }
2511
0
        default:
2512
0
            CHECK(false) << _bitmap._type;
2513
646
        }
2514
0
        return 0;
2515
646
    }
2516
2517
486
    BitmapValueIterator& operator++() { // ++i, must returned inc. value
2518
486
        CHECK(!_end) << "should not forward when iterator ends";
2519
486
        switch (_bitmap._type) {
2520
3
        case BitmapValue::BitmapDataType::SINGLE:
2521
3
            _end = true;
2522
3
            break;
2523
389
        case BitmapValue::BitmapDataType::BITMAP:
2524
389
            ++(*_iter);
2525
389
            break;
2526
94
        case BitmapValue::BitmapDataType::SET:
2527
94
            ++_set_iter;
2528
94
            break;
2529
0
        default:
2530
0
            CHECK(false) << _bitmap._type;
2531
486
        }
2532
486
        return *this;
2533
486
    }
2534
2535
160
    BitmapValueIterator operator++(int) { // i++, must return orig. value
2536
160
        CHECK(!_end) << "should not forward when iterator ends";
2537
160
        BitmapValueIterator orig(*this);
2538
160
        switch (_bitmap._type) {
2539
1
        case BitmapValue::BitmapDataType::SINGLE:
2540
1
            _end = true;
2541
1
            break;
2542
128
        case BitmapValue::BitmapDataType::BITMAP:
2543
128
            ++(*_iter);
2544
128
            break;
2545
31
        case BitmapValue::BitmapDataType::SET:
2546
31
            ++_set_iter;
2547
31
            break;
2548
0
        default:
2549
0
            CHECK(false) << _bitmap._type;
2550
160
        }
2551
160
        return orig;
2552
160
    }
2553
2554
674
    bool operator==(const BitmapValueIterator& other) const {
2555
674
        if (_end && other._end) {
2556
13
            return true;
2557
13
        }
2558
2559
661
        switch (_bitmap._type) {
2560
0
        case BitmapValue::BitmapDataType::EMPTY:
2561
0
            return other._bitmap._type == BitmapValue::BitmapDataType::EMPTY;
2562
5
        case BitmapValue::BitmapDataType::SINGLE:
2563
5
            return _end == other._end && _sv == other._sv;
2564
524
        case BitmapValue::BitmapDataType::BITMAP:
2565
524
            return *_iter == *(other._iter);
2566
132
        case BitmapValue::BitmapDataType::SET:
2567
132
            return _set_iter == other._set_iter;
2568
0
        default:
2569
0
            CHECK(false) << _bitmap._type;
2570
661
        }
2571
0
        return false;
2572
661
    }
2573
2574
664
    bool operator!=(const BitmapValueIterator& other) const { return !(*this == other); }
2575
2576
    /**
2577
    * Move the iterator to the first value >= `val`.
2578
    */
2579
0
    BitmapValueIterator& move(uint64_t val) {
2580
0
        switch (_bitmap._type) {
2581
0
        case BitmapValue::BitmapDataType::SINGLE:
2582
0
            if (_sv < val) {
2583
0
                _end = true;
2584
0
            }
2585
0
            break;
2586
0
        case BitmapValue::BitmapDataType::BITMAP:
2587
0
            if (!_iter->move(val)) {
2588
0
                _end = true;
2589
0
            }
2590
0
            break;
2591
0
        case BitmapValue::BitmapDataType::SET: {
2592
0
            throw Exception(Status::FatalError("BitmapValue with set do not support move"));
2593
0
        }
2594
0
        default:
2595
0
            break;
2596
0
        }
2597
0
        return *this;
2598
0
    }
2599
2600
private:
2601
    const BitmapValue& _bitmap;
2602
    detail::Roaring64MapSetBitForwardIterator* _iter = nullptr;
2603
    BitmapValue::SetContainer<uint64_t>::const_iterator _set_iter;
2604
    uint64_t _sv = 0;
2605
    bool _end = false;
2606
};
2607
2608
16
inline BitmapValueIterator BitmapValue::begin() const {
2609
16
    return {*this};
2610
16
}
2611
2612
21
inline BitmapValueIterator BitmapValue::end() const {
2613
21
    return {*this, true};
2614
21
}
2615
2616
0
inline BitmapValueIterator BitmapValue::lower_bound(uint64_t val) const {
2617
0
    return BitmapValueIterator(*this).move(val);
2618
0
}
2619
2620
5
inline bool BitmapValue::contains_any(uint64_t left, uint64_t right) const {
2621
5
    if (left > right) {
2622
0
        return false;
2623
0
    }
2624
2625
5
    if (_type == SET) {
2626
21
        for (auto v : _set) {
2627
21
            if (v >= left && v <= right) {
2628
3
                return true;
2629
3
            }
2630
21
        }
2631
2
        return false;
2632
5
    }
2633
0
    auto it = lower_bound(left);
2634
0
    return it != end() && *it <= right;
2635
5
}
2636
2637
} // namespace doris