Coverage Report

Created: 2025-07-31 18:26

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/util/bitmap_value.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <parallel_hashmap/btree.h>
21
#include <parallel_hashmap/phmap.h>
22
23
#include <algorithm>
24
#include <cstdarg>
25
#include <cstdint>
26
#include <cstdio>
27
#include <limits>
28
#include <map>
29
#include <memory>
30
#include <new>
31
#include <numeric>
32
#include <queue>
33
#include <roaring/roaring.hh>
34
#include <set>
35
#include <stdexcept>
36
#include <string>
37
#include <utility>
38
39
#include "common/config.h"
40
#include "common/exception.h"
41
#include "common/logging.h"
42
#include "udf/udf.h"
43
#include "util/coding.h"
44
#include "vec/common/pod_array.h"
45
#include "vec/common/pod_array_fwd.h"
46
namespace doris {
47
48
// serialized bitmap := TypeCode(1), Payload
49
// The format of payload depends on value of TypeCode which is defined below
50
struct BitmapTypeCode {
51
    enum type {
52
        // An empty bitmap. Payload is 0 byte.
53
        // added in 0.11
54
        EMPTY = 0,
55
        // A bitmap containing only one element that is in [0, UINT32_MAX]
56
        // Payload := UInt32LittleEndian(4 byte)
57
        // added in 0.11
58
        SINGLE32 = 1,
59
        // A bitmap whose maximum element is in [0, UINT32_MAX]
60
        // Payload := the standard RoaringBitmap format described by
61
        // https://github.com/RoaringBitmap/RoaringFormatSpec/
62
        // added in 0.11
63
        BITMAP32 = 2,
64
        // A bitmap containing only one element that is in (UINT32_MAX, UINT64_MAX]
65
        // Payload := UInt64LittleEndian(8 byte)
66
        // added in 0.12
67
        SINGLE64 = 3,
68
        // A bitmap whose maximum element is in (UINT32_MAX, UINT64_MAX].
69
        //
70
        // To support 64-bits elements, all elements with the same high 32 bits are stored in a
71
        // RoaringBitmap containing only the lower 32 bits. Thus we could use
72
        // map<uint32_t, RoaringBitmap> to represent bitmap of 64-bits ints.
73
        //
74
        // Since there is no standard format for 64-bits RoaringBitmap, we define our own as below
75
        // Payload := NumRoaring(vint64), { MapKey, MapValue }^NumRoaring
76
        // - MapKey := the shared high 32 bits in UInt32LittleEndian(4 byte)
77
        // - MapValue := the standard RoaringBitmap format
78
        //
79
        // added in 0.12
80
        BITMAP64 = 4,
81
        SET = 5, // V1
82
        SET_V2 = 10,
83
        BITMAP32_V2 = 12,
84
        BITMAP64_V2 = 13,
85
        TYPE_MAX
86
    };
87
1
    Status static inline validate(int bitmap_type) {
88
1
        if (UNLIKELY(bitmap_type < type::EMPTY || bitmap_type >= type::TYPE_MAX)) {
89
1
            std::string err_msg =
90
1
                    fmt::format("BitmapTypeCode invalid, should between: {} and {} actrual is {}",
91
1
                                BitmapTypeCode::EMPTY, BitmapTypeCode::BITMAP64, bitmap_type);
92
1
            LOG(ERROR) << err_msg;
93
1
            return Status::Corruption(err_msg);
94
1
        }
95
0
        return Status::OK();
96
1
    }
97
};
98
99
namespace detail {
100
101
class Roaring64MapSetBitForwardIterator;
102
103
// Forked from https://github.com/RoaringBitmap/CRoaring/blob/v0.2.60/cpp/roaring64map.hh
104
// What we change includes
105
// - a custom serialization format is used inside read()/write()/getSizeInBytes()
106
// - added clear() and is32BitsEnough()
107
class Roaring64Map {
108
public:
109
    /**
110
     * Create an empty bitmap
111
     */
112
21.4k
    Roaring64Map() = default;
113
114
    /**
115
     * Construct a bitmap from a list of 32-bit integer values.
116
     */
117
2
    Roaring64Map(size_t n, const uint32_t* data) { addMany(n, data); }
118
119
    /**
120
     * Construct a bitmap from a list of 64-bit integer values.
121
     */
122
1
    Roaring64Map(size_t n, const uint64_t* data) { addMany(n, data); }
123
124
    /**
125
     * Construct a 64-bit map from a 32-bit one
126
     */
127
1
    explicit Roaring64Map(const roaring::Roaring& r) { emplaceOrInsert(0, r); }
128
129
9
    Roaring64Map(const Roaring64Map& r) = default;
130
131
11.0k
    Roaring64Map(Roaring64Map&& r) = default;
132
133
    /**
134
     * Assignment operator.
135
     */
136
1.03k
    Roaring64Map& operator=(const Roaring64Map& r) {
137
1.03k
        roarings = r.roarings;
138
1.03k
        return *this;
139
1.03k
    }
140
141
    /**
142
     * Add value x
143
     *
144
     */
145
3
    void add(uint32_t x) {
146
3
        roarings[0].add(x);
147
3
        roarings[0].setCopyOnWrite(copyOnWrite);
148
3
    }
149
4.73M
    void add(uint64_t x) {
150
4.73M
        roarings[highBytes(x)].add(lowBytes(x));
151
4.73M
        roarings[highBytes(x)].setCopyOnWrite(copyOnWrite);
152
4.73M
    }
153
154
    template <typename T>
155
9
    void addMany(size_t n_args, const T* vals) {
156
9
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
157
8
            auto& roaring = roarings[0];
158
8
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
159
8
            roaring.setCopyOnWrite(copyOnWrite);
160
8
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
161
1
            auto& roaring = roarings[0];
162
1
            std::vector<uint32_t> values(n_args);
163
8
            for (size_t i = 0; i != n_args; ++i) {
164
7
                values[i] = uint32_t(vals[i]);
165
7
            }
166
1
            roaring.addMany(n_args, values.data());
167
1
            roaring.setCopyOnWrite(copyOnWrite);
168
1
        } else {
169
0
            for (size_t lcv = 0; lcv < n_args; lcv++) {
170
0
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
171
0
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
172
0
            }
173
0
        }
174
9
    }
_ZN5doris6detail12Roaring64Map7addManyIjEEvmPKT_
Line
Count
Source
155
8
    void addMany(size_t n_args, const T* vals) {
156
8
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
157
8
            auto& roaring = roarings[0];
158
8
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
159
8
            roaring.setCopyOnWrite(copyOnWrite);
160
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
161
            auto& roaring = roarings[0];
162
            std::vector<uint32_t> values(n_args);
163
            for (size_t i = 0; i != n_args; ++i) {
164
                values[i] = uint32_t(vals[i]);
165
            }
166
            roaring.addMany(n_args, values.data());
167
            roaring.setCopyOnWrite(copyOnWrite);
168
        } else {
169
            for (size_t lcv = 0; lcv < n_args; lcv++) {
170
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
171
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
172
            }
173
        }
174
8
    }
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIiEEvmPKT_
_ZN5doris6detail12Roaring64Map7addManyItEEvmPKT_
Line
Count
Source
155
1
    void addMany(size_t n_args, const T* vals) {
156
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
157
            auto& roaring = roarings[0];
158
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
159
            roaring.setCopyOnWrite(copyOnWrite);
160
1
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
161
1
            auto& roaring = roarings[0];
162
1
            std::vector<uint32_t> values(n_args);
163
8
            for (size_t i = 0; i != n_args; ++i) {
164
7
                values[i] = uint32_t(vals[i]);
165
7
            }
166
1
            roaring.addMany(n_args, values.data());
167
1
            roaring.setCopyOnWrite(copyOnWrite);
168
        } else {
169
            for (size_t lcv = 0; lcv < n_args; lcv++) {
170
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
171
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
172
            }
173
        }
174
1
    }
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIaEEvmPKT_
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIsEEvmPKT_
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIlEEvmPKT_
175
176
40
    void addMany(size_t n_args, const uint64_t* vals) {
177
1.25k
        for (size_t lcv = 0; lcv < n_args; lcv++) {
178
1.21k
            roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
179
1.21k
            roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
180
1.21k
        }
181
40
    }
182
183
    /**
184
     * Remove value x
185
     *
186
     */
187
4
    void remove(uint32_t x) { roarings[0].remove(x); }
188
23
    void remove(uint64_t x) {
189
23
        auto roaring_iter = roarings.find(highBytes(x));
190
23
        if (roaring_iter != roarings.cend()) {
191
23
            roaring_iter->second.remove(lowBytes(x));
192
23
        }
193
23
    }
194
    /**
195
     * Return the largest value (if not empty)
196
     *
197
     */
198
46.5k
    uint64_t maximum() const {
199
46.5k
        for (auto roaring_iter = roarings.crbegin(); roaring_iter != roarings.crend();
200
46.5k
             ++roaring_iter) {
201
46.5k
            if (!roaring_iter->second.isEmpty()) {
202
46.5k
                return uniteBytes(roaring_iter->first, roaring_iter->second.maximum());
203
46.5k
            }
204
46.5k
        }
205
        // we put std::numeric_limits<>::max/min in parenthesis
206
        // to avoid a clash with the Windows.h header under Windows
207
5
        return (std::numeric_limits<uint64_t>::min)();
208
46.5k
    }
209
210
    /**
211
     * Return the smallest value (if not empty)
212
     *
213
     */
214
7
    uint64_t minimum() const {
215
7
        for (auto roaring_iter = roarings.cbegin(); roaring_iter != roarings.cend();
216
7
             ++roaring_iter) {
217
6
            if (!roaring_iter->second.isEmpty()) {
218
6
                return uniteBytes(roaring_iter->first, roaring_iter->second.minimum());
219
6
            }
220
6
        }
221
        // we put std::numeric_limits<>::max/min in parenthesis
222
        // to avoid a clash with the Windows.h header under Windows
223
1
        return (std::numeric_limits<uint64_t>::max)();
224
7
    }
225
226
    /**
227
     * Check if value x is present
228
     */
229
11
    bool contains(uint32_t x) const {
230
11
        return roarings.count(0) == 0 ? false : roarings.at(0).contains(x);
231
11
    }
232
9.24k
    bool contains(uint64_t x) const {
233
9.24k
        return roarings.count(highBytes(x)) == 0 ? false
234
9.24k
                                                 : roarings.at(highBytes(x)).contains(lowBytes(x));
235
9.24k
    }
236
237
    /**
238
     * Compute the intersection between the current bitmap and the provided
239
     * bitmap,
240
     * writing the result in the current bitmap. The provided bitmap is not
241
     * modified.
242
     */
243
6
    Roaring64Map& operator&=(const Roaring64Map& r) {
244
11
        for (auto& map_entry : roarings) {
245
11
            if (r.roarings.count(map_entry.first) == 1) {
246
10
                map_entry.second &= r.roarings.at(map_entry.first);
247
10
            } else {
248
1
                map_entry.second = roaring::Roaring();
249
1
            }
250
11
        }
251
6
        return *this;
252
6
    }
253
254
    /**
255
     * Compute the difference between the current bitmap and the provided
256
     * bitmap,
257
     * writing the result in the current bitmap. The provided bitmap is not
258
     * modified.
259
     */
260
1
    Roaring64Map& operator-=(const Roaring64Map& r) {
261
4
        for (auto& map_entry : roarings) {
262
4
            if (r.roarings.count(map_entry.first) == 1) {
263
4
                map_entry.second -= r.roarings.at(map_entry.first);
264
4
            }
265
4
        }
266
1
        return *this;
267
1
    }
268
269
    /**
270
     * Compute the union between the current bitmap and the provided bitmap,
271
     * writing the result in the current bitmap. The provided bitmap is not
272
     * modified.
273
     *
274
     * See also the fastunion function to aggregate many bitmaps more quickly.
275
     */
276
5
    Roaring64Map& operator|=(const Roaring64Map& r) {
277
11
        for (const auto& map_entry : r.roarings) {
278
11
            if (roarings.count(map_entry.first) == 0) {
279
2
                roarings[map_entry.first] = map_entry.second;
280
2
                roarings[map_entry.first].setCopyOnWrite(copyOnWrite);
281
9
            } else {
282
9
                roarings[map_entry.first] |= map_entry.second;
283
9
            }
284
11
        }
285
5
        return *this;
286
5
    }
287
288
    /**
289
     * Compute the symmetric union between the current bitmap and the provided
290
     * bitmap,
291
     * writing the result in the current bitmap. The provided bitmap is not
292
     * modified.
293
     */
294
4
    Roaring64Map& operator^=(const Roaring64Map& r) {
295
7
        for (const auto& map_entry : r.roarings) {
296
7
            if (roarings.count(map_entry.first) == 0) {
297
0
                roarings[map_entry.first] = map_entry.second;
298
0
                roarings[map_entry.first].setCopyOnWrite(copyOnWrite);
299
7
            } else {
300
7
                roarings[map_entry.first] ^= map_entry.second;
301
7
            }
302
7
        }
303
4
        return *this;
304
4
    }
305
306
    /**
307
     * Exchange the content of this bitmap with another.
308
     */
309
1
    void swap(Roaring64Map& r) { roarings.swap(r.roarings); }
310
311
    /**
312
     * Get the cardinality of the bitmap (number of elements).
313
     * Throws std::length_error in the special case where the bitmap is full
314
     * (cardinality() == 2^64). Check isFull() before calling to avoid
315
     * exception.
316
     */
317
246
    uint64_t cardinality() const {
318
246
        if (isFull()) {
319
0
            throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
320
0
                                   "bitmap is full, cardinality is 2^64, "
321
0
                                   "unable to represent in a 64-bit integer");
322
0
        }
323
246
        return std::accumulate(roarings.cbegin(), roarings.cend(), (uint64_t)0,
324
246
                               [](uint64_t previous,
325
418
                                  const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
326
418
                                   return previous + map_entry.second.cardinality();
327
418
                               });
328
246
    }
329
    /**
330
     * Computes the size of the intersection between two bitmaps.
331
     *
332
     */
333
6
    uint64_t andCardinality(const Roaring64Map& r) const {
334
6
        uint64_t card = 0;
335
10
        for (const auto& map_entry : roarings) {
336
10
            if (r.roarings.count(map_entry.first) == 1) {
337
9
                card += map_entry.second.and_cardinality(r.roarings.at(map_entry.first));
338
9
            }
339
10
        }
340
6
        return card;
341
6
    }
342
343
    /**
344
     * Computes the size of the union between two bitmaps.
345
     *
346
     */
347
4
    uint64_t orCardinality(const Roaring64Map& r) const {
348
4
        uint64_t card = 0;
349
4
        for (const auto& map_entry : roarings) {
350
4
            if (r.roarings.count(map_entry.first) == 0) {
351
0
                card += map_entry.second.cardinality();
352
0
            }
353
4
        }
354
4
        for (const auto& map_entry : r.roarings) {
355
4
            if (roarings.count(map_entry.first) == 0) {
356
0
                card += map_entry.second.cardinality();
357
4
            } else {
358
4
                card += roarings.at(map_entry.first).or_cardinality(map_entry.second);
359
4
            }
360
4
        }
361
4
        return card;
362
4
    }
363
364
    /**
365
     * Computes the size of the difference (andnot) between two bitmaps.
366
     * r1.cardinality - (r1 & r2).cardinality
367
     */
368
1
    uint64_t andnotCardinality(const Roaring64Map& r) const {
369
1
        uint64_t card = 0;
370
4
        for (const auto& map_entry : roarings) {
371
4
            card += map_entry.second.cardinality();
372
4
            if (r.roarings.count(map_entry.first) == 1) {
373
4
                card -= r.roarings.at(map_entry.first).and_cardinality(map_entry.second);
374
4
            }
375
4
        }
376
1
        return card;
377
1
    }
378
379
    /**
380
     * Computes the size of the symmetric difference (andnot) between two
381
     * bitmaps.
382
     * r1.cardinality + r2.cardinality - 2 * (r1 & r2).cardinality
383
     */
384
1
    uint64_t xorCardinality(const Roaring64Map& r) const {
385
1
        uint64_t card = 0;
386
1
        for (const auto& map_entry : roarings) {
387
1
            card += map_entry.second.cardinality();
388
1
        }
389
1
        for (const auto& map_entry : r.roarings) {
390
1
            card += map_entry.second.cardinality();
391
1
            if (roarings.count(map_entry.first) == 1) {
392
1
                card -= 2 * roarings.at(map_entry.first).and_cardinality(map_entry.second);
393
1
            }
394
1
        }
395
1
        return card;
396
1
    }
397
398
    /**
399
    * Returns true if the bitmap is empty (cardinality is zero).
400
    */
401
1
    bool isEmpty() const {
402
1
        return std::all_of(roarings.cbegin(), roarings.cend(),
403
1
                           [](const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
404
0
                               return map_entry.second.isEmpty();
405
0
                           });
406
1
    }
407
408
    /**
409
    * Returns true if the bitmap is full (cardinality is max uint64_t + 1).
410
    */
411
246
    bool isFull() const {
412
        // only bother to check if map is fully saturated
413
        //
414
        // we put std::numeric_limits<>::max/min in parenthesis
415
        // to avoid a clash with the Windows.h header under Windows
416
246
        return roarings.size() == ((uint64_t)(std::numeric_limits<uint32_t>::max)()) + 1
417
246
                       ? std::all_of(roarings.cbegin(), roarings.cend(),
418
0
                                     [](const std::pair<const uint32_t, roaring::Roaring>&
419
0
                                                roaring_map_entry) {
420
                                         // roarings within map are saturated if cardinality
421
                                         // is uint32_t max + 1
422
0
                                         return roaring_map_entry.second.cardinality() ==
423
0
                                                ((uint64_t)(std::numeric_limits<uint32_t>::max)()) +
424
0
                                                        1;
425
0
                                     })
426
246
                       : false;
427
246
    }
428
429
    /**
430
     * Return true if the two bitmaps contain the same elements.
431
     */
432
14
    bool operator==(const Roaring64Map& r) const {
433
        // we cannot use operator == on the map because either side may contain
434
        // empty Roaring Bitmaps
435
14
        auto lhs_iter = roarings.cbegin();
436
14
        auto lhs_cend = roarings.cend();
437
14
        auto rhs_iter = r.roarings.cbegin();
438
14
        auto rhs_cend = r.roarings.cend();
439
27
        while (lhs_iter != lhs_cend && rhs_iter != rhs_cend) {
440
13
            auto lhs_key = lhs_iter->first;
441
13
            auto rhs_key = rhs_iter->first;
442
13
            const auto& lhs_map = lhs_iter->second;
443
13
            const auto& rhs_map = rhs_iter->second;
444
13
            if (lhs_map.isEmpty()) {
445
1
                ++lhs_iter;
446
1
                continue;
447
1
            }
448
12
            if (rhs_map.isEmpty()) {
449
2
                ++rhs_iter;
450
2
                continue;
451
2
            }
452
10
            if (!(lhs_key == rhs_key)) {
453
0
                return false;
454
0
            }
455
10
            if (!(lhs_map == rhs_map)) {
456
0
                return false;
457
0
            }
458
10
            ++lhs_iter;
459
10
            ++rhs_iter;
460
10
        }
461
16
        while (lhs_iter != lhs_cend) {
462
4
            if (!lhs_iter->second.isEmpty()) {
463
2
                return false;
464
2
            }
465
2
            ++lhs_iter;
466
2
        }
467
14
        while (rhs_iter != rhs_cend) {
468
3
            if (!rhs_iter->second.isEmpty()) {
469
1
                return false;
470
1
            }
471
2
            ++rhs_iter;
472
2
        }
473
11
        return true;
474
12
    }
475
476
    /** convert array and bitmap containers to run containers when it is more
477
     * efficient;
478
     * also convert from run containers when more space efficient.  Returns
479
     * true if the result has at least one run container.
480
     * Additional savings might be possible by calling shrinkToFit().
481
     */
482
27.3k
    bool runOptimize() {
483
27.3k
        return std::accumulate(
484
27.3k
                roarings.begin(), roarings.end(), true,
485
27.3k
                [](bool previous, std::pair<const uint32_t, roaring::Roaring>& map_entry) {
486
27.3k
                    return map_entry.second.runOptimize() && previous;
487
27.3k
                });
488
27.3k
    }
489
490
    /**
491
     * If needed, reallocate memory to shrink the memory usage. Returns
492
     * the number of bytes saved.
493
    */
494
27.3k
    size_t shrinkToFit() {
495
27.3k
        size_t savedBytes = 0;
496
27.3k
        auto iter = roarings.begin();
497
54.6k
        while (iter != roarings.cend()) {
498
27.3k
            if (iter->second.isEmpty()) {
499
                // empty Roarings are 84 bytes
500
1
                savedBytes += 88;
501
1
                iter = roarings.erase(iter);
502
27.3k
            } else {
503
27.3k
                savedBytes += iter->second.shrinkToFit();
504
27.3k
                iter++;
505
27.3k
            }
506
27.3k
        }
507
27.3k
        return savedBytes;
508
27.3k
    }
509
510
    /**
511
     * Iterate over the bitmap elements. The function iterator is called once
512
     * for all the values with ptr (can be nullptr) as the second parameter of each
513
     * call.
514
     *
515
     * roaring_iterator is simply a pointer to a function that returns bool
516
     * (true means that the iteration should continue while false means that it
517
     * should stop), and takes (uint32_t,void*) as inputs.
518
     */
519
8.73k
    void iterate(roaring::api::roaring_iterator64 iterator, void* ptr) const {
520
8.76k
        for (const auto& map_entry : roarings) {
521
8.76k
            bool should_continue = roaring_iterate64(&map_entry.second.roaring, iterator,
522
8.76k
                                                     uint64_t(map_entry.first) << 32, ptr);
523
8.76k
            if (!should_continue) {
524
1
                break;
525
1
            }
526
8.76k
        }
527
8.73k
    }
528
529
    /**
530
     * write a bitmap to a char buffer.
531
     * Returns how many bytes were written which should be getSizeInBytes().
532
     */
533
19.1k
    size_t write(char* buf, int serialize_version) const {
534
19.1k
        bool is_v1 = serialize_version == 1;
535
19.1k
        BitmapTypeCode::type type_bitmap32 =
536
19.1k
                is_v1 ? BitmapTypeCode::type::BITMAP32 : BitmapTypeCode::type::BITMAP32_V2;
537
19.1k
        BitmapTypeCode::type type_bitmap64 =
538
19.1k
                is_v1 ? BitmapTypeCode::type::BITMAP64 : BitmapTypeCode::type::BITMAP64_V2;
539
540
19.1k
        if (is32BitsEnough()) {
541
19.1k
            *(buf++) = type_bitmap32;
542
19.1k
            auto it = roarings.find(0);
543
19.1k
            if (it == roarings.end()) { // empty bitmap
544
2
                roaring::Roaring r;
545
2
                return r.write(buf, is_v1) + 1;
546
2
            }
547
19.1k
            return it->second.write(buf, is_v1) + 1;
548
19.1k
        }
549
550
7
        const char* orig = buf;
551
        // put type code
552
7
        *(buf++) = type_bitmap64;
553
        // push map size
554
7
        buf = (char*)encode_varint64((uint8_t*)buf, roarings.size());
555
7
        std::for_each(roarings.cbegin(), roarings.cend(),
556
16
                      [&buf, is_v1](const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
557
                          // push map key
558
16
                          encode_fixed32_le((uint8_t*)buf, map_entry.first);
559
16
                          buf += sizeof(uint32_t);
560
                          // push map value Roaring
561
16
                          buf += map_entry.second.write(buf, is_v1);
562
16
                      });
563
7
        return buf - orig;
564
19.1k
    }
565
566
    /**
567
     * read a bitmap from a serialized version.
568
     *
569
     * This function is unsafe in the sense that if you provide bad data,
570
     * many bytes could be read, possibly causing a buffer overflow. See also readSafe.
571
     */
572
11.0k
    static Roaring64Map read(const char* buf) {
573
11.0k
        Roaring64Map result;
574
575
11.0k
        bool is_v1 = BitmapTypeCode::BITMAP32 == *buf || BitmapTypeCode::BITMAP64 == *buf;
576
11.0k
        bool is_bitmap32 = BitmapTypeCode::BITMAP32 == *buf || BitmapTypeCode::BITMAP32_V2 == *buf;
577
11.0k
        bool is_bitmap64 = BitmapTypeCode::BITMAP64 == *buf || BitmapTypeCode::BITMAP64_V2 == *buf;
578
11.0k
        if (is_bitmap32) {
579
11.0k
            roaring::Roaring read = roaring::Roaring::read(buf + 1, is_v1);
580
11.0k
            result.emplaceOrInsert(0, std::move(read));
581
11.0k
            return result;
582
11.0k
        }
583
584
11.0k
        DCHECK(is_bitmap64);
585
8
        buf++;
586
587
        // get map size (varint64 took 1~10 bytes)
588
8
        uint64_t map_size;
589
8
        buf = reinterpret_cast<const char*>(
590
8
                decode_varint64_ptr(reinterpret_cast<const uint8_t*>(buf),
591
8
                                    reinterpret_cast<const uint8_t*>(buf + 10), &map_size));
592
8
        DCHECK(buf != nullptr);
593
26
        for (uint64_t lcv = 0; lcv < map_size; lcv++) {
594
            // get map key
595
18
            uint32_t key = decode_fixed32_le(reinterpret_cast<const uint8_t*>(buf));
596
18
            buf += sizeof(uint32_t);
597
            // read map value Roaring
598
18
            roaring::Roaring read_var = roaring::Roaring::read(buf, is_v1);
599
            // forward buffer past the last Roaring Bitmap
600
18
            buf += read_var.getSizeInBytes(is_v1);
601
18
            result.emplaceOrInsert(key, std::move(read_var));
602
18
        }
603
8
        return result;
604
11.0k
    }
605
606
    /**
607
     * How many bytes are required to serialize this bitmap
608
     */
609
27.3k
    size_t getSizeInBytes(int serialize_version) const {
610
27.3k
        bool is_v1 = serialize_version == 1;
611
27.3k
        if (is32BitsEnough()) {
612
27.3k
            auto it = roarings.find(0);
613
27.3k
            if (it == roarings.end()) { // empty bitmap
614
2
                roaring::Roaring r;
615
2
                return r.getSizeInBytes(is_v1) + 1;
616
2
            }
617
27.3k
            return it->second.getSizeInBytes(is_v1) + 1;
618
27.3k
        }
619
        // start with type code, map size and size of keys for each map entry
620
13
        size_t init = 1 + varint_length(roarings.size()) + roarings.size() * sizeof(uint32_t);
621
13
        return std::accumulate(
622
13
                roarings.cbegin(), roarings.cend(), init,
623
28
                [=](size_t previous, const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
624
                    // add in bytes used by each Roaring
625
28
                    return previous + map_entry.second.getSizeInBytes(is_v1);
626
28
                });
627
27.3k
    }
628
629
    /**
630
     * remove all elements
631
     */
632
0
    void clear() { roarings.clear(); }
633
634
    /**
635
     * Return whether all elements can be represented in 32 bits
636
     */
637
46.5k
    bool is32BitsEnough() const { return maximum() <= std::numeric_limits<uint32_t>::max(); }
638
639
    /**
640
     * Computes the intersection between two bitmaps and returns new bitmap.
641
     * The current bitmap and the provided bitmap are unchanged.
642
     */
643
2
    Roaring64Map operator&(const Roaring64Map& o) const { return Roaring64Map(*this) &= o; }
644
645
    /**
646
     * Computes the difference between two bitmaps and returns new bitmap.
647
     * The current bitmap and the provided bitmap are unchanged.
648
     */
649
0
    Roaring64Map operator-(const Roaring64Map& o) const { return Roaring64Map(*this) -= o; }
650
651
    /**
652
     * Computes the union between two bitmaps and returns new bitmap.
653
     * The current bitmap and the provided bitmap are unchanged.
654
     */
655
1
    Roaring64Map operator|(const Roaring64Map& o) const { return Roaring64Map(*this) |= o; }
656
657
    /**
658
     * Computes the symmetric union between two bitmaps and returns new bitmap.
659
     * The current bitmap and the provided bitmap are unchanged.
660
     */
661
1
    Roaring64Map operator^(const Roaring64Map& o) const { return Roaring64Map(*this) ^= o; }
662
663
    /**
664
     * computes the logical or (union) between "n" bitmaps (referenced by a
665
     * pointer).
666
     */
667
5
    static Roaring64Map fastunion(size_t n, const Roaring64Map** inputs) {
668
5
        struct pq_entry {
669
5
            phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator iterator;
670
5
            phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator end;
671
5
        };
672
673
5
        struct pq_comp {
674
9
            bool operator()(const pq_entry& lhs, const pq_entry& rhs) const {
675
9
                auto left_key = lhs.iterator->first;
676
9
                auto right_key = rhs.iterator->first;
677
                // We compare in the opposite direction than normal because priority
678
                // queues normally order from largest to smallest, but we want
679
                // smallest to largest.
680
9
                return left_key > right_key;
681
9
            }
682
5
        };
683
684
5
        std::priority_queue<pq_entry, std::vector<pq_entry>, pq_comp> pq;
685
686
12
        for (auto i = 0; i < n; ++i) {
687
7
            const auto& roaring = inputs[i]->roarings;
688
7
            if (roaring.begin() != roaring.end()) {
689
7
                pq.push({roaring.begin(), roaring.end()});
690
7
            }
691
7
        }
692
693
5
        std::vector<const roaring::api::roaring_bitmap_t*> group_bitmaps;
694
5
        Roaring64Map result;
695
13
        while (!pq.empty()) {
696
8
            auto group_key = pq.top().iterator->first;
697
8
            group_bitmaps.clear();
698
699
19
            while (!pq.empty()) {
700
14
                auto candidate_current_iter = pq.top().iterator;
701
14
                auto candidate_end_iter = pq.top().end;
702
703
14
                auto candidate_key = candidate_current_iter->first;
704
14
                const auto& candidate_bitmap = candidate_current_iter->second;
705
706
14
                if (candidate_key != group_key) {
707
3
                    break;
708
3
                }
709
710
11
                group_bitmaps.push_back(&candidate_bitmap.roaring);
711
11
                pq.pop();
712
11
                ++candidate_current_iter;
713
11
                if (candidate_current_iter != candidate_end_iter) {
714
4
                    pq.push({candidate_current_iter, candidate_end_iter});
715
4
                }
716
11
            }
717
8
            auto* inner_result = roaring::api::roaring_bitmap_or_many(group_bitmaps.size(),
718
8
                                                                      group_bitmaps.data());
719
8
            result.roarings.insert(result.roarings.end(),
720
8
                                   std::make_pair(group_key, roaring::Roaring(inner_result)));
721
8
        }
722
723
5
        return result;
724
5
    }
725
726
    friend class Roaring64MapSetBitForwardIterator;
727
    using const_iterator = Roaring64MapSetBitForwardIterator;
728
729
    /**
730
    * Returns an iterator that can be used to access the position of the
731
    * set bits. The running time complexity of a full scan is proportional to
732
    * the
733
    * number
734
    * of set bits: be aware that if you have long strings of 1s, this can be
735
    * very inefficient.
736
    *
737
    * It can be much faster to use the toArray method if you want to
738
    * retrieve the set bits.
739
    */
740
    const_iterator begin() const;
741
742
    /**
743
    * A bogus iterator that can be used together with begin()
744
    * for constructions such as for(auto i = b.begin();
745
    * i!=b.end(); ++i) {}
746
    */
747
    const_iterator end() const;
748
749
private:
750
    phmap::btree_map<uint32_t, roaring::Roaring> roarings {};
751
    bool copyOnWrite {false};
752
9.48M
    static uint32_t highBytes(const uint64_t in) { return uint32_t(in >> 32); }
753
4.74M
    static uint32_t lowBytes(const uint64_t in) { return uint32_t(in); }
754
49.4k
    static uint64_t uniteBytes(const uint32_t highBytes, const uint32_t lowBytes) {
755
49.4k
        return (uint64_t(highBytes) << 32) | uint64_t(lowBytes);
756
49.4k
    }
757
1
    void emplaceOrInsert(const uint32_t key, const roaring::Roaring& value) {
758
1
        roarings.emplace(std::make_pair(key, value));
759
1
    }
760
761
11.1k
    void emplaceOrInsert(const uint32_t key, roaring::Roaring&& value) {
762
11.1k
        roarings.emplace(key, value);
763
11.1k
    }
764
};
765
766
// Forked from https://github.com/RoaringBitmap/CRoaring/blob/v0.4.0/cpp/roaring64map.hh
767
// Used to go through the set bits. Not optimally fast, but convenient.
768
class Roaring64MapSetBitForwardIterator {
769
public:
770
    using type_of_iterator = Roaring64MapSetBitForwardIterator;
771
772
    /**
773
     * Provides the location of the set bit.
774
     */
775
2.96k
    uint64_t operator*() const {
776
2.96k
        return Roaring64Map::uniteBytes(map_iter->first, i.current_value);
777
2.96k
    }
778
779
975
    type_of_iterator& operator++() { // ++i, must returned inc. value
780
975
        if (i.has_value) {
781
975
            roaring_advance_uint32_iterator(&i);
782
975
        }
783
1.00k
        while (!i.has_value) {
784
34
            map_iter++;
785
34
            if (map_iter == map_end) return *this;
786
25
            roaring_init_iterator(&map_iter->second.roaring, &i);
787
25
        }
788
966
        return *this;
789
975
    }
790
791
1.80k
    type_of_iterator operator++(int) { // i++, must return orig. value
792
1.80k
        Roaring64MapSetBitForwardIterator orig(*this);
793
1.80k
        roaring_advance_uint32_iterator(&i);
794
1.81k
        while (!i.has_value) {
795
4
            map_iter++;
796
4
            if (map_iter == map_end) return orig;
797
2
            roaring_init_iterator(&map_iter->second.roaring, &i);
798
2
        }
799
1.80k
        return orig;
800
1.80k
    }
801
802
5
    bool move(const uint64_t& x) {
803
5
        map_iter = p.lower_bound(Roaring64Map::highBytes(x));
804
5
        if (map_iter != p.cend()) {
805
5
            roaring_init_iterator(&map_iter->second.roaring, &i);
806
5
            if (map_iter->first == Roaring64Map::highBytes(x)) {
807
5
                if (roaring_move_uint32_iterator_equalorlarger(&i, Roaring64Map::lowBytes(x)))
808
4
                    return true;
809
1
                map_iter++;
810
1
                if (map_iter == map_end) return false;
811
0
                roaring_init_iterator(&map_iter->second.roaring, &i);
812
0
            }
813
0
            return true;
814
5
        }
815
0
        return false;
816
5
    }
817
818
527
    bool operator==(const Roaring64MapSetBitForwardIterator& o) const {
819
527
        if (map_iter == map_end && o.map_iter == o.map_end) return true;
820
519
        if (o.map_iter == o.map_end) return false;
821
1
        return **this == *o;
822
519
    }
823
824
2.30k
    bool operator!=(const Roaring64MapSetBitForwardIterator& o) const {
825
2.30k
        if (map_iter == map_end && o.map_iter == o.map_end) return false;
826
2.29k
        if (o.map_iter == o.map_end) return true;
827
0
        return **this != *o;
828
2.29k
    }
829
5
    Roaring64MapSetBitForwardIterator& operator=(const Roaring64MapSetBitForwardIterator& r) {
830
5
        map_iter = r.map_iter;
831
5
        map_end = r.map_end;
832
5
        i = r.i;
833
5
        return *this;
834
5
    }
835
836
    Roaring64MapSetBitForwardIterator(const Roaring64MapSetBitForwardIterator& r) = default;
837
838
    Roaring64MapSetBitForwardIterator(const Roaring64Map& parent, bool exhausted = false)
839
2.29k
            : p(parent.roarings), map_end(parent.roarings.cend()) {
840
2.29k
        if (exhausted || parent.roarings.empty()) {
841
2.27k
            map_iter = parent.roarings.cend();
842
2.27k
        } else {
843
21
            map_iter = parent.roarings.cbegin();
844
21
            roaring_init_iterator(&map_iter->second.roaring, &i);
845
21
            while (!i.has_value) {
846
0
                map_iter++;
847
0
                if (map_iter == map_end) return;
848
0
                roaring_init_iterator(&map_iter->second.roaring, &i);
849
0
            }
850
21
        }
851
2.29k
    }
852
853
protected:
854
    const phmap::btree_map<uint32_t, roaring::Roaring>& p;
855
    phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator map_iter {};
856
    phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator map_end {};
857
    roaring::api::roaring_uint32_iterator_t i {};
858
};
859
860
18
inline Roaring64MapSetBitForwardIterator Roaring64Map::begin() const {
861
18
    return {*this};
862
18
}
863
864
2.26k
inline Roaring64MapSetBitForwardIterator Roaring64Map::end() const {
865
2.26k
    return {*this, true};
866
2.26k
}
867
868
} // namespace detail
869
870
// Represent the in-memory and on-disk structure of Doris's BITMAP data type.
871
// Optimize for the case where the bitmap contains 0 or 1 element which is common
872
// for streaming load scenario.
873
class BitmapValueIterator;
874
class BitmapValue {
875
public:
876
    template <typename T>
877
    using SetContainer = phmap::flat_hash_set<T>;
878
879
    // Construct an empty bitmap.
880
100k
    BitmapValue() : _sv(0), _bitmap(nullptr), _type(EMPTY), _is_shared(false) { _set.clear(); }
881
882
    // Construct a bitmap with one element.
883
    explicit BitmapValue(uint64_t value)
884
71
            : _sv(value), _bitmap(nullptr), _type(SINGLE), _is_shared(false) {}
885
886
    // Construct a bitmap from serialized data.
887
3.08k
    explicit BitmapValue(const char* src) : _is_shared(false) {
888
3.08k
        bool res = deserialize(src);
889
3.08k
        DCHECK(res);
890
3.08k
    }
891
892
20.5k
    BitmapValue(const BitmapValue& other) {
893
20.5k
        _type = other._type;
894
20.5k
        switch (other._type) {
895
196
        case EMPTY:
896
196
            break;
897
172
        case SINGLE:
898
172
            _sv = other._sv;
899
172
            break;
900
17.5k
        case BITMAP:
901
17.5k
            _bitmap = other._bitmap;
902
17.5k
            break;
903
2.68k
        case SET:
904
2.68k
            _set = other._set;
905
2.68k
            break;
906
20.5k
        }
907
908
20.5k
        if (other._type == BITMAP) {
909
17.5k
            _is_shared = true;
910
            // should also set other's state to shared, so that other bitmap value will
911
            // create a new bitmap when it wants to modify it.
912
17.5k
            const_cast<BitmapValue&>(other)._is_shared = true;
913
17.5k
        }
914
20.5k
    }
915
916
25.2k
    BitmapValue(BitmapValue&& other) noexcept {
917
25.2k
        _type = other._type;
918
25.2k
        switch (other._type) {
919
64
        case EMPTY:
920
64
            break;
921
241
        case SINGLE:
922
241
            _sv = other._sv;
923
241
            break;
924
21.0k
        case BITMAP:
925
21.0k
            _bitmap = std::move(other._bitmap);
926
21.0k
            other._bitmap = nullptr;
927
21.0k
            break;
928
3.90k
        case SET:
929
3.90k
            _set = std::move(other._set);
930
3.90k
            break;
931
25.2k
        }
932
25.2k
        _is_shared = other._is_shared;
933
25.2k
        other._type = EMPTY;
934
25.2k
        other._is_shared = false;
935
25.2k
    }
936
937
108
    BitmapValue& operator=(const BitmapValue& other) {
938
108
        if (this == &other) {
939
5
            return *this;
940
5
        }
941
103
        reset();
942
103
        _type = other._type;
943
103
        switch (other._type) {
944
20
        case EMPTY:
945
20
            break;
946
30
        case SINGLE:
947
30
            _sv = other._sv;
948
30
            break;
949
26
        case BITMAP:
950
26
            _bitmap = other._bitmap;
951
26
            break;
952
27
        case SET:
953
27
            _set = other._set;
954
27
            break;
955
103
        }
956
957
103
        if (other._type == BITMAP) {
958
26
            _is_shared = true;
959
            // should also set other's state to shared, so that other bitmap value will
960
            // create a new bitmap when it wants to modify it.
961
26
            const_cast<BitmapValue&>(other)._is_shared = true;
962
26
        }
963
103
        return *this;
964
103
    }
965
966
22
    static BitmapValue empty_bitmap() { return BitmapValue {}; }
967
968
23
    BitmapValue& operator=(BitmapValue&& other) noexcept {
969
23
        if (this == &other) {
970
0
            return *this;
971
0
        }
972
23
        reset();
973
974
23
        _type = other._type;
975
23
        switch (other._type) {
976
1
        case EMPTY:
977
1
            break;
978
12
        case SINGLE:
979
12
            _sv = other._sv;
980
12
            break;
981
4
        case BITMAP:
982
4
            _bitmap = std::move(other._bitmap);
983
4
            other._bitmap = nullptr;
984
4
            break;
985
6
        case SET:
986
6
            _set = std::move(other._set);
987
6
            break;
988
23
        }
989
23
        _is_shared = other._is_shared;
990
23
        return *this;
991
23
    }
992
993
    // Construct a bitmap from given elements.
994
62
    explicit BitmapValue(const std::vector<uint64_t>& bits) : _is_shared(false) {
995
62
        if (bits.size() == 0) {
996
0
            _type = EMPTY;
997
0
            return;
998
0
        }
999
1000
62
        if (bits.size() == 1) {
1001
1
            _type = SINGLE;
1002
1
            _sv = bits[0];
1003
1
            return;
1004
1
        }
1005
1006
61
        if (!config::enable_set_in_bitmap_value || bits.size() > SET_TYPE_THRESHOLD) {
1007
26
            _type = BITMAP;
1008
26
            _prepare_bitmap_for_write();
1009
26
            _bitmap->addMany(bits.size(), &bits[0]);
1010
35
        } else {
1011
35
            _type = SET;
1012
137
            for (auto v : bits) {
1013
137
                _set.insert(v);
1014
137
            }
1015
35
        }
1016
61
    }
1017
1018
31
    BitmapTypeCode::type get_type_code() const {
1019
31
        switch (_type) {
1020
1
        case EMPTY:
1021
1
            return BitmapTypeCode::EMPTY;
1022
7
        case SINGLE:
1023
7
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
1024
4
                return BitmapTypeCode::SINGLE32;
1025
4
            } else {
1026
3
                return BitmapTypeCode::SINGLE64;
1027
3
            }
1028
9
        case SET:
1029
9
            return BitmapTypeCode::SET;
1030
14
        case BITMAP:
1031
14
            bool is_v1 = (config::bitmap_serialize_version == 1);
1032
14
            if (_bitmap->is32BitsEnough()) {
1033
9
                return is_v1 ? BitmapTypeCode::type::BITMAP32 : BitmapTypeCode::type::BITMAP32_V2;
1034
9
            } else {
1035
5
                return is_v1 ? BitmapTypeCode::type::BITMAP64 : BitmapTypeCode::type::BITMAP64_V2;
1036
5
            }
1037
31
        }
1038
0
        __builtin_unreachable();
1039
31
    }
1040
1041
    template <typename T>
1042
22
    void add_many(const T* values, const size_t count) {
1043
22
        switch (_type) {
1044
17
        case EMPTY:
1045
17
            if (count == 1) {
1046
0
                _sv = values[0];
1047
0
                _type = SINGLE;
1048
17
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1049
270
                for (size_t i = 0; i != count; ++i) {
1050
259
                    _set.insert(values[i]);
1051
259
                }
1052
11
                _type = SET;
1053
11
            } else {
1054
6
                _prepare_bitmap_for_write();
1055
6
                _bitmap->addMany(count, values);
1056
6
                _type = BITMAP;
1057
6
            }
1058
17
            break;
1059
0
        case SINGLE:
1060
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1061
0
                _set.insert(_sv);
1062
0
                for (size_t i = 0; i != count; ++i) {
1063
0
                    _set.insert(values[i]);
1064
0
                }
1065
0
                _type = SET;
1066
0
                _convert_to_bitmap_if_need();
1067
0
            } else {
1068
0
                _prepare_bitmap_for_write();
1069
0
                _bitmap->add(_sv);
1070
0
                _bitmap->addMany(count, values);
1071
0
                _type = BITMAP;
1072
0
            }
1073
0
            break;
1074
1
        case BITMAP:
1075
1
            _prepare_bitmap_for_write();
1076
1
            _bitmap->addMany(count, values);
1077
1
            break;
1078
4
        case SET:
1079
324
            for (size_t i = 0; i != count; ++i) {
1080
320
                _set.insert(values[i]);
1081
320
            }
1082
4
            _convert_to_bitmap_if_need();
1083
4
            break;
1084
22
        }
1085
22
    }
_ZN5doris11BitmapValue8add_manyIiEEvPKT_m
Line
Count
Source
1042
5
    void add_many(const T* values, const size_t count) {
1043
5
        switch (_type) {
1044
5
        case EMPTY:
1045
5
            if (count == 1) {
1046
0
                _sv = values[0];
1047
0
                _type = SINGLE;
1048
5
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1049
78
                for (size_t i = 0; i != count; ++i) {
1050
73
                    _set.insert(values[i]);
1051
73
                }
1052
5
                _type = SET;
1053
5
            } else {
1054
0
                _prepare_bitmap_for_write();
1055
0
                _bitmap->addMany(count, values);
1056
0
                _type = BITMAP;
1057
0
            }
1058
5
            break;
1059
0
        case SINGLE:
1060
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1061
0
                _set.insert(_sv);
1062
0
                for (size_t i = 0; i != count; ++i) {
1063
0
                    _set.insert(values[i]);
1064
0
                }
1065
0
                _type = SET;
1066
0
                _convert_to_bitmap_if_need();
1067
0
            } else {
1068
0
                _prepare_bitmap_for_write();
1069
0
                _bitmap->add(_sv);
1070
0
                _bitmap->addMany(count, values);
1071
0
                _type = BITMAP;
1072
0
            }
1073
0
            break;
1074
0
        case BITMAP:
1075
0
            _prepare_bitmap_for_write();
1076
0
            _bitmap->addMany(count, values);
1077
0
            break;
1078
0
        case SET:
1079
0
            for (size_t i = 0; i != count; ++i) {
1080
0
                _set.insert(values[i]);
1081
0
            }
1082
0
            _convert_to_bitmap_if_need();
1083
0
            break;
1084
5
        }
1085
5
    }
_ZN5doris11BitmapValue8add_manyImEEvPKT_m
Line
Count
Source
1042
17
    void add_many(const T* values, const size_t count) {
1043
17
        switch (_type) {
1044
12
        case EMPTY:
1045
12
            if (count == 1) {
1046
0
                _sv = values[0];
1047
0
                _type = SINGLE;
1048
12
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1049
192
                for (size_t i = 0; i != count; ++i) {
1050
186
                    _set.insert(values[i]);
1051
186
                }
1052
6
                _type = SET;
1053
6
            } else {
1054
6
                _prepare_bitmap_for_write();
1055
6
                _bitmap->addMany(count, values);
1056
6
                _type = BITMAP;
1057
6
            }
1058
12
            break;
1059
0
        case SINGLE:
1060
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1061
0
                _set.insert(_sv);
1062
0
                for (size_t i = 0; i != count; ++i) {
1063
0
                    _set.insert(values[i]);
1064
0
                }
1065
0
                _type = SET;
1066
0
                _convert_to_bitmap_if_need();
1067
0
            } else {
1068
0
                _prepare_bitmap_for_write();
1069
0
                _bitmap->add(_sv);
1070
0
                _bitmap->addMany(count, values);
1071
0
                _type = BITMAP;
1072
0
            }
1073
0
            break;
1074
1
        case BITMAP:
1075
1
            _prepare_bitmap_for_write();
1076
1
            _bitmap->addMany(count, values);
1077
1
            break;
1078
4
        case SET:
1079
324
            for (size_t i = 0; i != count; ++i) {
1080
320
                _set.insert(values[i]);
1081
320
            }
1082
4
            _convert_to_bitmap_if_need();
1083
4
            break;
1084
17
        }
1085
17
    }
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIaEEvPKT_m
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIsEEvPKT_m
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIlEEvPKT_m
1086
1087
4.73M
    void add(uint64_t value) {
1088
4.73M
        switch (_type) {
1089
11.5k
        case EMPTY:
1090
11.5k
            if (!config::enable_set_in_bitmap_value) {
1091
5.32k
                _sv = value;
1092
5.32k
                _type = SINGLE;
1093
6.25k
            } else {
1094
6.25k
                _set.insert(value);
1095
6.25k
                _type = SET;
1096
6.25k
            }
1097
11.5k
            break;
1098
5.28k
        case SINGLE:
1099
            //there is no need to convert the type if two variables are equal
1100
5.28k
            if (_sv == value) {
1101
2
                break;
1102
2
            }
1103
5.27k
            if (config::enable_set_in_bitmap_value) {
1104
5
                _set.insert(_sv);
1105
5
                _set.insert(value);
1106
5
                _type = SET;
1107
5.27k
            } else {
1108
5.27k
                _prepare_bitmap_for_write();
1109
5.27k
                _bitmap->add(_sv);
1110
5.27k
                _bitmap->add(value);
1111
5.27k
                _type = BITMAP;
1112
5.27k
            }
1113
5.27k
            break;
1114
4.58M
        case BITMAP:
1115
4.58M
            _prepare_bitmap_for_write();
1116
4.58M
            _bitmap->add(value);
1117
4.58M
            break;
1118
130k
        case SET:
1119
130k
            _set.insert(value);
1120
130k
            _convert_to_bitmap_if_need();
1121
130k
            break;
1122
4.73M
        }
1123
4.73M
    }
1124
1125
8
    void remove(uint64_t value) {
1126
8
        switch (_type) {
1127
1
        case EMPTY:
1128
1
            break;
1129
1
        case SINGLE:
1130
            //there is need to convert the type if two variables are equal
1131
1
            if (_sv == value) {
1132
0
                _type = EMPTY;
1133
0
            }
1134
1
            break;
1135
4
        case BITMAP:
1136
4
            _prepare_bitmap_for_write();
1137
4
            _bitmap->remove(value);
1138
4
            _convert_to_smaller_type();
1139
4
            break;
1140
2
        case SET:
1141
2
            _set.erase(value);
1142
2
            _convert_to_smaller_type();
1143
2
            break;
1144
8
        }
1145
8
    }
1146
1147
    // Compute the union between the current bitmap and the provided bitmap.
1148
16
    BitmapValue& operator-=(const BitmapValue& rhs) {
1149
16
        switch (rhs._type) {
1150
4
        case EMPTY:
1151
4
            break;
1152
4
        case SINGLE:
1153
4
            remove(rhs._sv);
1154
4
            break;
1155
4
        case BITMAP:
1156
4
            switch (_type) {
1157
1
            case EMPTY:
1158
1
                break;
1159
1
            case SINGLE:
1160
1
                if (rhs._bitmap->contains(_sv)) {
1161
0
                    _type = EMPTY;
1162
0
                }
1163
1
                break;
1164
1
            case BITMAP:
1165
1
                _prepare_bitmap_for_write();
1166
1
                *_bitmap -= *rhs._bitmap;
1167
1
                _convert_to_smaller_type();
1168
1
                break;
1169
1
            case SET: {
1170
32
                for (auto it = _set.begin(); it != _set.end();) {
1171
31
                    if (rhs.contains(*it)) {
1172
16
                        it = _set.erase(it);
1173
16
                    } else {
1174
15
                        ++it;
1175
15
                    }
1176
31
                }
1177
1
                _convert_to_smaller_type();
1178
1
                break;
1179
0
            }
1180
4
            }
1181
4
            break;
1182
4
        case SET: {
1183
4
            switch (_type) {
1184
1
            case EMPTY:
1185
1
                break;
1186
1
            case SINGLE:
1187
1
                if (rhs._set.contains(_sv)) {
1188
0
                    _type = EMPTY;
1189
0
                }
1190
1
                break;
1191
1
            case BITMAP:
1192
1
                _prepare_bitmap_for_write();
1193
31
                for (auto v : rhs._set) {
1194
31
                    if (_bitmap->contains(v)) {
1195
0
                        _bitmap->remove(v);
1196
0
                    }
1197
31
                }
1198
1
                _convert_to_smaller_type();
1199
1
                break;
1200
1
            case SET: {
1201
32
                for (auto it = _set.begin(); it != _set.end();) {
1202
31
                    if (rhs.contains(*it)) {
1203
14
                        it = _set.erase(it);
1204
17
                    } else {
1205
17
                        ++it;
1206
17
                    }
1207
31
                }
1208
1
                _convert_to_smaller_type();
1209
1
                break;
1210
0
            }
1211
4
            }
1212
4
        }
1213
16
        }
1214
16
        return *this;
1215
16
    }
1216
1217
    // Compute the union between the current bitmap and the provided bitmap.
1218
    // Possible type transitions are:
1219
    // EMPTY  -> SINGLE
1220
    // EMPTY  -> BITMAP
1221
    // SINGLE -> BITMAP
1222
64
    BitmapValue& operator|=(const BitmapValue& rhs) {
1223
64
        switch (rhs._type) {
1224
12
        case EMPTY:
1225
12
            break;
1226
30
        case SINGLE:
1227
30
            add(rhs._sv);
1228
30
            break;
1229
8
        case BITMAP:
1230
8
            switch (_type) {
1231
2
            case EMPTY:
1232
2
                _bitmap = rhs._bitmap;
1233
2
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1234
2
                _is_shared = true;
1235
2
                _type = BITMAP;
1236
2
                break;
1237
3
            case SINGLE:
1238
3
                _bitmap = rhs._bitmap;
1239
3
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1240
3
                _is_shared = true;
1241
3
                _prepare_bitmap_for_write();
1242
3
                _bitmap->add(_sv);
1243
3
                _type = BITMAP;
1244
3
                break;
1245
2
            case BITMAP:
1246
2
                _prepare_bitmap_for_write();
1247
2
                *_bitmap |= *rhs._bitmap;
1248
2
                break;
1249
1
            case SET: {
1250
1
                _prepare_bitmap_for_write();
1251
1
                *_bitmap = *rhs._bitmap;
1252
1253
31
                for (auto v : _set) {
1254
31
                    _bitmap->add(v);
1255
31
                }
1256
1
                _type = BITMAP;
1257
1
                break;
1258
0
            }
1259
8
            }
1260
8
            break;
1261
14
        case SET:
1262
14
            switch (_type) {
1263
1
            case EMPTY:
1264
1
                _set = rhs._set;
1265
1
                _type = SET;
1266
1
                break;
1267
1
            case SINGLE: {
1268
1
                if ((rhs._set.size() + rhs._set.contains(_sv) > SET_TYPE_THRESHOLD)) {
1269
0
                    _prepare_bitmap_for_write();
1270
0
                    _bitmap->add(_sv);
1271
0
                    for (auto v : rhs._set) {
1272
0
                        _bitmap->add(v);
1273
0
                    }
1274
0
                    _type = BITMAP;
1275
1
                } else {
1276
1
                    _set = rhs._set;
1277
1
                    _set.insert(_sv);
1278
1
                    _type = SET;
1279
1
                }
1280
1
                break;
1281
0
            }
1282
1
            case BITMAP:
1283
1
                _prepare_bitmap_for_write();
1284
31
                for (auto v : rhs._set) {
1285
31
                    _bitmap->add(v);
1286
31
                }
1287
1
                break;
1288
11
            case SET: {
1289
58
                for (auto v : rhs._set) {
1290
58
                    _set.insert(v);
1291
58
                }
1292
11
                _convert_to_bitmap_if_need();
1293
11
                break;
1294
0
            }
1295
14
            }
1296
14
            break;
1297
64
        }
1298
64
        return *this;
1299
64
    }
1300
1301
24
    BitmapValue& fastunion(const std::vector<const BitmapValue*>& values) {
1302
24
        std::vector<const detail::Roaring64Map*> bitmaps;
1303
24
        std::vector<uint64_t> single_values;
1304
24
        std::vector<const SetContainer<uint64_t>*> sets;
1305
1.73k
        for (const auto* value : values) {
1306
1.73k
            switch (value->_type) {
1307
5
            case EMPTY:
1308
5
                break;
1309
11
            case SINGLE:
1310
11
                single_values.push_back(value->_sv);
1311
11
                break;
1312
5
            case BITMAP:
1313
5
                bitmaps.push_back(value->_bitmap.get());
1314
5
                break;
1315
1.71k
            case SET:
1316
1.71k
                sets.push_back(&value->_set);
1317
1.71k
                break;
1318
1.73k
            }
1319
1.73k
        }
1320
1321
24
        if (!bitmaps.empty()) {
1322
5
            _prepare_bitmap_for_write();
1323
5
            switch (_type) {
1324
2
            case EMPTY:
1325
2
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1326
2
                break;
1327
2
            case SINGLE:
1328
2
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1329
2
                _bitmap->add(_sv);
1330
2
                break;
1331
1
            case BITMAP:
1332
1
                for (const auto* bitmap : bitmaps) {
1333
1
                    *_bitmap |= *bitmap;
1334
1
                }
1335
1
                break;
1336
0
            case SET: {
1337
0
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1338
0
                for (auto v : _set) {
1339
0
                    _bitmap->add(v);
1340
0
                }
1341
0
                _set.clear();
1342
0
                break;
1343
0
            }
1344
5
            }
1345
5
            _type = BITMAP;
1346
5
        }
1347
1348
24
        if (!sets.empty()) {
1349
1.71k
            for (auto& set : sets) {
1350
1.80k
                for (auto v : *set) {
1351
1.80k
                    _set.insert(v);
1352
1.80k
                }
1353
1.71k
            }
1354
10
            switch (_type) {
1355
9
            case EMPTY:
1356
9
                _type = SET;
1357
9
                break;
1358
0
            case SINGLE: {
1359
0
                _set.insert(_sv);
1360
0
                _type = SET;
1361
0
                break;
1362
0
            }
1363
0
            case BITMAP:
1364
0
                _prepare_bitmap_for_write();
1365
0
                for (auto v : _set) {
1366
0
                    _bitmap->add(v);
1367
0
                }
1368
0
                _type = BITMAP;
1369
0
                _set.clear();
1370
0
                break;
1371
1
            case SET: {
1372
1
                break;
1373
0
            }
1374
10
            }
1375
10
            if (_type == SET) {
1376
10
                _convert_to_bitmap_if_need();
1377
10
            }
1378
10
        }
1379
1380
24
        if (_type == EMPTY && single_values.size() == 1) {
1381
2
            if (config::enable_set_in_bitmap_value) {
1382
1
                _type = SET;
1383
1
                _set.insert(single_values[0]);
1384
1
            } else {
1385
1
                _sv = single_values[0];
1386
1
                _type = SINGLE;
1387
1
            }
1388
22
        } else if (!single_values.empty()) {
1389
6
            switch (_type) {
1390
2
            case EMPTY:
1391
3
            case SINGLE:
1392
3
                if (config::enable_set_in_bitmap_value) {
1393
2
                    _set.insert(single_values.cbegin(), single_values.cend());
1394
2
                    if (_type == SINGLE) {
1395
0
                        _set.insert(_sv);
1396
0
                    }
1397
2
                    _type = SET;
1398
2
                    _convert_to_bitmap_if_need();
1399
2
                } else {
1400
1
                    _prepare_bitmap_for_write();
1401
1
                    _bitmap->addMany(single_values.size(), single_values.data());
1402
1
                    if (_type == SINGLE) {
1403
1
                        _bitmap->add(_sv);
1404
1
                    }
1405
1
                    _type = BITMAP;
1406
1
                    _convert_to_smaller_type();
1407
1
                }
1408
3
                break;
1409
3
            case BITMAP: {
1410
3
                _prepare_bitmap_for_write();
1411
3
                _bitmap->addMany(single_values.size(), single_values.data());
1412
3
                break;
1413
2
            }
1414
0
            case SET:
1415
0
                _set.insert(single_values.cbegin(), single_values.cend());
1416
0
                _convert_to_bitmap_if_need();
1417
0
                break;
1418
6
            }
1419
6
        }
1420
1421
24
        return *this;
1422
24
    }
1423
1424
    // Compute the intersection between the current bitmap and the provided bitmap.
1425
    // Possible type transitions are:
1426
    // SINGLE -> EMPTY
1427
    // BITMAP -> EMPTY
1428
    // BITMAP -> SINGLE
1429
51
    BitmapValue& operator&=(const BitmapValue& rhs) {
1430
51
        switch (rhs._type) {
1431
14
        case EMPTY:
1432
14
            reset(); // empty & any = empty
1433
14
            break;
1434
10
        case SINGLE:
1435
10
            switch (_type) {
1436
2
            case EMPTY:
1437
2
                break;
1438
3
            case SINGLE:
1439
3
                if (_sv != rhs._sv) {
1440
2
                    _type = EMPTY;
1441
2
                }
1442
3
                break;
1443
4
            case BITMAP:
1444
4
                if (!_bitmap->contains(rhs._sv)) {
1445
1
                    _type = EMPTY;
1446
3
                } else {
1447
3
                    _type = SINGLE;
1448
3
                    _sv = rhs._sv;
1449
3
                }
1450
4
                _bitmap.reset();
1451
4
                _is_shared = false;
1452
4
                break;
1453
1
            case SET:
1454
1
                if (!_set.contains(rhs._sv)) {
1455
0
                    _type = EMPTY;
1456
1
                } else {
1457
1
                    _type = SINGLE;
1458
1
                    _sv = rhs._sv;
1459
1
                }
1460
1
                _set.clear();
1461
1
                break;
1462
10
            }
1463
10
            break;
1464
10
        case BITMAP:
1465
10
            switch (_type) {
1466
2
            case EMPTY:
1467
2
                break;
1468
3
            case SINGLE:
1469
3
                if (!rhs._bitmap->contains(_sv)) {
1470
2
                    _type = EMPTY;
1471
2
                }
1472
3
                break;
1473
4
            case BITMAP:
1474
4
                _prepare_bitmap_for_write();
1475
4
                *_bitmap &= *rhs._bitmap;
1476
4
                _convert_to_smaller_type();
1477
4
                break;
1478
1
            case SET:
1479
32
                for (auto it = _set.begin(); it != _set.end();) {
1480
31
                    if (!rhs._bitmap->contains(*it)) {
1481
15
                        it = _set.erase(it);
1482
16
                    } else {
1483
16
                        ++it;
1484
16
                    }
1485
31
                }
1486
1
                _convert_to_smaller_type();
1487
1
                break;
1488
10
            }
1489
10
            break;
1490
17
        case SET:
1491
17
            switch (_type) {
1492
1
            case EMPTY:
1493
1
                break;
1494
1
            case SINGLE:
1495
1
                if (!rhs._set.contains(_sv)) {
1496
1
                    _type = EMPTY;
1497
1
                }
1498
1
                break;
1499
1
            case BITMAP:
1500
1
                _prepare_bitmap_for_write();
1501
31
                for (auto v : rhs._set) {
1502
31
                    if (_bitmap->contains(v)) {
1503
0
                        _set.insert(v);
1504
0
                    }
1505
31
                }
1506
1
                _type = SET;
1507
1
                _bitmap.reset();
1508
1
                _is_shared = false;
1509
1
                _convert_to_smaller_type();
1510
1
                break;
1511
14
            case SET:
1512
79
                for (auto it = _set.begin(); it != _set.end();) {
1513
65
                    if (!rhs._set.contains(*it)) {
1514
37
                        it = _set.erase(it);
1515
37
                    } else {
1516
28
                        ++it;
1517
28
                    }
1518
65
                }
1519
14
                _convert_to_smaller_type();
1520
14
                break;
1521
17
            }
1522
17
            break;
1523
51
        }
1524
51
        return *this;
1525
51
    }
1526
1527
    // Compute the symmetric union between the current bitmap and the provided bitmap.
1528
    // Possible type transitions are:
1529
    // SINGLE -> EMPTY
1530
    // BITMAP -> EMPTY
1531
    // BITMAP -> SINGLE
1532
29
    BitmapValue& operator^=(const BitmapValue& rhs) {
1533
29
        switch (rhs._type) {
1534
8
        case EMPTY:
1535
8
            break;
1536
4
        case SINGLE:
1537
4
            switch (_type) {
1538
1
            case EMPTY:
1539
1
                add(rhs._sv);
1540
1
                break;
1541
1
            case SINGLE:
1542
1
                if (_sv == rhs._sv) {
1543
0
                    _type = EMPTY;
1544
1
                } else {
1545
1
                    add(rhs._sv);
1546
1
                }
1547
1
                break;
1548
1
            case BITMAP:
1549
1
                if (!_bitmap->contains(rhs._sv)) {
1550
0
                    add(rhs._sv);
1551
1
                } else {
1552
1
                    _prepare_bitmap_for_write();
1553
1
                    _bitmap->remove(rhs._sv);
1554
1
                }
1555
1
                break;
1556
1
            case SET:
1557
1
                if (!_set.contains(rhs._sv)) {
1558
0
                    _set.insert(rhs._sv);
1559
1
                } else {
1560
1
                    _set.erase(rhs._sv);
1561
1
                }
1562
1
                break;
1563
4
            }
1564
4
            break;
1565
6
        case BITMAP:
1566
6
            switch (_type) {
1567
1
            case EMPTY:
1568
1
                _bitmap = rhs._bitmap;
1569
1
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1570
1
                _is_shared = true;
1571
1
                _type = BITMAP;
1572
1
                break;
1573
1
            case SINGLE:
1574
1
                _bitmap = rhs._bitmap;
1575
1
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1576
1
                _is_shared = true;
1577
1
                _type = BITMAP;
1578
1
                _prepare_bitmap_for_write();
1579
1
                if (!rhs._bitmap->contains(_sv)) {
1580
1
                    _bitmap->add(_sv);
1581
1
                } else {
1582
0
                    _bitmap->remove(_sv);
1583
0
                }
1584
1
                break;
1585
3
            case BITMAP:
1586
3
                _prepare_bitmap_for_write();
1587
3
                *_bitmap ^= *rhs._bitmap;
1588
3
                _convert_to_smaller_type();
1589
3
                break;
1590
1
            case SET:
1591
1
                _prepare_bitmap_for_write();
1592
1
                *_bitmap = *rhs._bitmap;
1593
31
                for (auto v : _set) {
1594
31
                    if (_bitmap->contains(v)) {
1595
16
                        _bitmap->remove(v);
1596
16
                    } else {
1597
15
                        _bitmap->add(v);
1598
15
                    }
1599
31
                }
1600
1
                _type = BITMAP;
1601
1
                _convert_to_smaller_type();
1602
1
                break;
1603
6
            }
1604
6
            break;
1605
11
        case SET:
1606
11
            switch (_type) {
1607
1
            case EMPTY:
1608
1
                _set = rhs._set;
1609
1
                _type = SET;
1610
1
                break;
1611
1
            case SINGLE:
1612
1
                _set = rhs._set;
1613
1
                if (!rhs._set.contains(_sv)) {
1614
1
                    _set.insert(_sv);
1615
1
                } else {
1616
0
                    _set.erase(_sv);
1617
0
                }
1618
1
                _type = SET;
1619
1
                break;
1620
1
            case BITMAP:
1621
1
                _prepare_bitmap_for_write();
1622
31
                for (auto v : rhs._set) {
1623
31
                    if (_bitmap->contains(v)) {
1624
0
                        _bitmap->remove(v);
1625
31
                    } else {
1626
31
                        _bitmap->add(v);
1627
31
                    }
1628
31
                }
1629
1
                _convert_to_smaller_type();
1630
1
                break;
1631
8
            case SET:
1632
49
                for (auto v : rhs._set) {
1633
49
                    if (_set.contains(v)) {
1634
22
                        _set.erase(v);
1635
27
                    } else {
1636
27
                        _set.insert(v);
1637
27
                    }
1638
49
                }
1639
8
                _convert_to_smaller_type();
1640
8
                break;
1641
11
            }
1642
11
            break;
1643
29
        }
1644
29
        return *this;
1645
29
    }
1646
1647
    // check if value x is present
1648
9.19k
    bool contains(uint64_t x) const {
1649
9.19k
        switch (_type) {
1650
0
        case EMPTY:
1651
0
            return false;
1652
14
        case SINGLE:
1653
14
            return _sv == x;
1654
8.93k
        case BITMAP:
1655
8.93k
            return _bitmap->contains(x);
1656
238
        case SET:
1657
238
            return _set.contains(x);
1658
9.19k
        }
1659
0
        return false;
1660
9.19k
    }
1661
1662
    // true if contains a value that belongs to the range [left, right].
1663
    bool contains_any(uint64_t left, uint64_t right) const;
1664
1665
240k
    uint64_t cardinality() const {
1666
240k
        switch (_type) {
1667
240k
        case EMPTY:
1668
240k
            return 0;
1669
36
        case SINGLE:
1670
36
            return 1;
1671
210
        case BITMAP:
1672
210
            return _bitmap->cardinality();
1673
74
        case SET:
1674
74
            return _set.size();
1675
240k
        }
1676
0
        return 0;
1677
240k
    }
1678
1679
60.0k
    uint64_t and_cardinality(const BitmapValue& rhs) const {
1680
60.0k
        switch (rhs._type) {
1681
60.0k
        case EMPTY:
1682
60.0k
            return 0;
1683
4
        case SINGLE:
1684
4
            switch (_type) {
1685
1
            case EMPTY:
1686
1
                return 0;
1687
1
            case SINGLE:
1688
1
                return _sv == rhs._sv;
1689
1
            case BITMAP:
1690
1
                return _bitmap->contains(rhs._sv);
1691
1
            case SET:
1692
1
                return _set.contains(rhs._sv);
1693
4
            }
1694
0
            break;
1695
7
        case BITMAP:
1696
7
            switch (_type) {
1697
1
            case EMPTY:
1698
1
                return 0;
1699
1
            case SINGLE:
1700
1
                return rhs._bitmap->contains(_sv);
1701
4
            case BITMAP:
1702
4
                return _bitmap->andCardinality(*rhs._bitmap);
1703
1
            case SET: {
1704
1
                uint64_t cardinality = 0;
1705
31
                for (auto v : _set) {
1706
31
                    if (rhs._bitmap->contains(v)) {
1707
16
                        ++cardinality;
1708
16
                    }
1709
31
                }
1710
1
                return cardinality;
1711
0
            }
1712
7
            }
1713
0
            break;
1714
4
        case SET:
1715
4
            switch (_type) {
1716
1
            case EMPTY:
1717
1
                return 0;
1718
1
            case SINGLE:
1719
1
                return rhs._set.contains(_sv);
1720
1
            case BITMAP: {
1721
1
                uint64_t cardinality = 0;
1722
31
                for (auto v : rhs._set) {
1723
31
                    if (_bitmap->contains(v)) {
1724
0
                        ++cardinality;
1725
0
                    }
1726
31
                }
1727
1
                return cardinality;
1728
0
            }
1729
1
            case SET: {
1730
1
                uint64_t cardinality = 0;
1731
31
                for (auto v : _set) {
1732
31
                    if (rhs._set.contains(v)) {
1733
14
                        ++cardinality;
1734
14
                    }
1735
31
                }
1736
1
                return cardinality;
1737
0
            }
1738
4
            }
1739
60.0k
        }
1740
0
        return 0;
1741
60.0k
    }
1742
1743
60.0k
    uint64_t or_cardinality(const BitmapValue& rhs) const {
1744
60.0k
        switch (rhs._type) {
1745
59.9k
        case EMPTY:
1746
59.9k
            return cardinality();
1747
0
        case SINGLE:
1748
0
            switch (_type) {
1749
0
            case EMPTY:
1750
0
                return 1;
1751
0
            case SINGLE:
1752
0
                return 1 + (_sv != rhs._sv);
1753
0
            case BITMAP:
1754
0
                return cardinality() + !_bitmap->contains(rhs._sv);
1755
0
            case SET:
1756
0
                return _set.size() + !_set.contains(rhs._sv);
1757
0
            }
1758
0
            break;
1759
3
        case BITMAP:
1760
3
            switch (_type) {
1761
0
            case EMPTY:
1762
0
                return rhs.cardinality();
1763
0
            case SINGLE:
1764
0
                return rhs.cardinality() + !rhs._bitmap->contains(_sv);
1765
3
            case BITMAP:
1766
3
                return _bitmap->orCardinality(*rhs._bitmap);
1767
0
            case SET: {
1768
0
                uint64_t cardinality = rhs._bitmap->cardinality();
1769
0
                for (auto v : _set) {
1770
0
                    if (!rhs._bitmap->contains(v)) {
1771
0
                        ++cardinality;
1772
0
                    }
1773
0
                }
1774
0
                return cardinality;
1775
0
            }
1776
3
            }
1777
0
            break;
1778
0
        case SET:
1779
0
            switch (_type) {
1780
0
            case EMPTY:
1781
0
                return rhs.cardinality();
1782
0
            case SINGLE:
1783
0
                return rhs.cardinality() + !rhs._set.contains(_sv);
1784
0
            case BITMAP: {
1785
0
                uint64_t cardinality = _bitmap->cardinality();
1786
0
                for (auto v : rhs._set) {
1787
0
                    if (!_bitmap->contains(v)) {
1788
0
                        ++cardinality;
1789
0
                    }
1790
0
                }
1791
0
                return cardinality;
1792
0
            }
1793
0
            case SET: {
1794
0
                uint64_t cardinality = _set.size();
1795
0
                for (auto v : _set) {
1796
0
                    if (!rhs._set.contains(v)) {
1797
0
                        ++cardinality;
1798
0
                    }
1799
0
                }
1800
0
                return cardinality;
1801
0
            }
1802
0
            }
1803
60.0k
        }
1804
0
        return 0;
1805
60.0k
    }
1806
1807
24
    uint64_t andnot_cardinality(const BitmapValue& rhs) const {
1808
24
        switch (rhs._type) {
1809
8
        case EMPTY:
1810
8
            return cardinality();
1811
4
        case SINGLE:
1812
4
            switch (_type) {
1813
1
            case EMPTY:
1814
1
                return 0;
1815
1
            case SINGLE:
1816
1
                return 1 - (_sv == rhs._sv);
1817
1
            case BITMAP:
1818
1
                return cardinality() - _bitmap->contains(rhs._sv);
1819
1
            case SET:
1820
1
                return cardinality() - _set.contains(rhs._sv);
1821
4
            }
1822
0
            break;
1823
4
        case BITMAP:
1824
4
            switch (_type) {
1825
1
            case EMPTY:
1826
1
                return 0;
1827
1
            case SINGLE:
1828
1
                return !rhs._bitmap->contains(_sv);
1829
1
            case BITMAP:
1830
1
                return _bitmap->andnotCardinality(*rhs._bitmap);
1831
1
            case SET: {
1832
1
                uint64_t cardinality = _set.size();
1833
31
                for (auto v : _set) {
1834
31
                    if (rhs._bitmap->contains(v)) {
1835
16
                        cardinality -= 1;
1836
16
                    }
1837
31
                }
1838
1
                return cardinality;
1839
0
            }
1840
4
            }
1841
0
            break;
1842
8
        case SET:
1843
8
            switch (_type) {
1844
1
            case EMPTY:
1845
1
                return 0;
1846
1
            case SINGLE:
1847
1
                return !rhs._set.contains(_sv);
1848
1
            case BITMAP: {
1849
1
                uint64_t cardinality = _bitmap->cardinality();
1850
31
                for (auto v : rhs._set) {
1851
31
                    if (_bitmap->contains(v)) {
1852
0
                        cardinality -= 1;
1853
0
                    }
1854
31
                }
1855
1
                return cardinality;
1856
0
            }
1857
5
            case SET: {
1858
5
                uint64_t cardinality = _set.size();
1859
33
                for (auto v : rhs._set) {
1860
33
                    if (_set.contains(v)) {
1861
16
                        cardinality -= 1;
1862
16
                    }
1863
33
                }
1864
5
                return cardinality;
1865
0
            }
1866
8
            }
1867
24
        }
1868
0
        return 0;
1869
24
    }
1870
1871
    // Return how many bytes are required to serialize this bitmap.
1872
    // See BitmapTypeCode for the serialized format.
1873
148k
    size_t getSizeInBytes() {
1874
148k
        size_t res = 0;
1875
148k
        switch (_type) {
1876
120k
        case EMPTY:
1877
120k
            res = 1;
1878
120k
            break;
1879
35
        case SINGLE:
1880
35
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
1881
30
                res = 1 + sizeof(uint32_t);
1882
30
            } else {
1883
5
                res = 1 + sizeof(uint64_t);
1884
5
            }
1885
35
            break;
1886
27.3k
        case BITMAP:
1887
27.3k
            _prepare_bitmap_for_write();
1888
27.3k
            _bitmap->runOptimize();
1889
27.3k
            _bitmap->shrinkToFit();
1890
27.3k
            res = _bitmap->getSizeInBytes(config::bitmap_serialize_version);
1891
27.3k
            break;
1892
679
        case SET:
1893
            /// 1 byte for type, 1 byte for count
1894
679
            res = 2 + sizeof(uint64_t) * _set.size();
1895
679
            break;
1896
148k
        }
1897
148k
        return res;
1898
148k
    }
1899
1900
    // Serialize the bitmap value to dst, which should be large enough.
1901
    // Client should call `getSizeInBytes` first to get the serialized size.
1902
79.7k
    void write_to(char* dst) const {
1903
79.7k
        switch (_type) {
1904
60.0k
        case EMPTY:
1905
60.0k
            *dst = BitmapTypeCode::EMPTY;
1906
60.0k
            break;
1907
25
        case SINGLE:
1908
25
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
1909
22
                *(dst++) = BitmapTypeCode::SINGLE32;
1910
22
                encode_fixed32_le(reinterpret_cast<uint8_t*>(dst), static_cast<uint32_t>(_sv));
1911
22
            } else {
1912
3
                *(dst++) = BitmapTypeCode::SINGLE64;
1913
3
                encode_fixed64_le(reinterpret_cast<uint8_t*>(dst), _sv);
1914
3
            }
1915
25
            break;
1916
548
        case SET:
1917
548
            DCHECK(config::enable_set_in_bitmap_value);
1918
548
            *dst = BitmapTypeCode::SET;
1919
548
            ++dst;
1920
548
            *dst = static_cast<uint8_t>(_set.size());
1921
548
            ++dst;
1922
8.76k
            for (auto v : _set) {
1923
8.76k
                encode_fixed64_le(reinterpret_cast<uint8_t*>(dst), v);
1924
8.76k
                dst += sizeof(uint64_t);
1925
8.76k
            }
1926
548
            break;
1927
19.1k
        case BITMAP:
1928
19.1k
            _bitmap->write(dst, config::bitmap_serialize_version);
1929
19.1k
            break;
1930
79.7k
        }
1931
79.7k
    }
1932
1933
    // Deserialize a bitmap value from `src`.
1934
    // Return false if `src` begins with unknown type code, true otherwise.
1935
71.4k
    bool deserialize(const char* src) {
1936
71.4k
        switch (*src) {
1937
60.0k
        case BitmapTypeCode::EMPTY:
1938
60.0k
            _type = EMPTY;
1939
60.0k
            break;
1940
15
        case BitmapTypeCode::SINGLE32:
1941
15
            _type = SINGLE;
1942
15
            _sv = decode_fixed32_le(reinterpret_cast<const uint8_t*>(src + 1));
1943
15
            if (config::enable_set_in_bitmap_value) {
1944
2
                _type = SET;
1945
2
                _set.insert(_sv);
1946
2
            }
1947
15
            break;
1948
3
        case BitmapTypeCode::SINGLE64:
1949
3
            _type = SINGLE;
1950
3
            _sv = decode_fixed64_le(reinterpret_cast<const uint8_t*>(src + 1));
1951
3
            if (config::enable_set_in_bitmap_value) {
1952
1
                _type = SET;
1953
1
                _set.insert(_sv);
1954
1
            }
1955
3
            break;
1956
5.13k
        case BitmapTypeCode::BITMAP32:
1957
5.13k
        case BitmapTypeCode::BITMAP64:
1958
11.0k
        case BitmapTypeCode::BITMAP32_V2:
1959
11.0k
        case BitmapTypeCode::BITMAP64_V2:
1960
11.0k
            _type = BITMAP;
1961
11.0k
            _is_shared = false;
1962
11.0k
            try {
1963
11.0k
                _bitmap = std::make_shared<detail::Roaring64Map>(detail::Roaring64Map::read(src));
1964
11.0k
            } catch (const std::runtime_error& e) {
1965
1
                LOG(ERROR) << "Decode roaring bitmap failed, " << e.what();
1966
1
                return false;
1967
1
            }
1968
11.0k
            break;
1969
355
        case BitmapTypeCode::SET: {
1970
355
            _type = SET;
1971
355
            ++src;
1972
355
            uint8_t count = *src;
1973
355
            ++src;
1974
355
            CHECK(count <= SET_TYPE_THRESHOLD) << "bitmap value with incorrect set count";
1975
6.04k
            for (uint8_t i = 0; i != count; ++i, src += sizeof(uint64_t)) {
1976
5.68k
                _set.insert(decode_fixed64_le(reinterpret_cast<const uint8_t*>(src)));
1977
5.68k
            }
1978
355
            CHECK_EQ(count, _set.size()) << "bitmap value with incorrect set count";
1979
1980
355
            if (!config::enable_set_in_bitmap_value) {
1981
0
                _prepare_bitmap_for_write();
1982
0
                for (auto v : _set) {
1983
0
                    _bitmap->add(v);
1984
0
                }
1985
0
                _type = BITMAP;
1986
0
                _set.clear();
1987
0
            }
1988
355
            break;
1989
11.0k
        }
1990
1
        case BitmapTypeCode::SET_V2: {
1991
1
            uint32_t size = 0;
1992
1
            memcpy(&size, src + 1, sizeof(uint32_t));
1993
1
            src += sizeof(uint32_t) + 1;
1994
1995
1
            if (!config::enable_set_in_bitmap_value || size > SET_TYPE_THRESHOLD) {
1996
0
                _type = BITMAP;
1997
0
                _prepare_bitmap_for_write();
1998
1999
0
                for (int i = 0; i < size; ++i) {
2000
0
                    uint64_t key {};
2001
0
                    memcpy(&key, src, sizeof(uint64_t));
2002
0
                    _bitmap->add(key);
2003
0
                    src += sizeof(uint64_t);
2004
0
                }
2005
1
            } else {
2006
1
                _type = SET;
2007
1
                _set.reserve(size);
2008
2009
3
                for (int i = 0; i < size; ++i) {
2010
2
                    uint64_t key {};
2011
2
                    memcpy(&key, src, sizeof(uint64_t));
2012
2
                    _set.insert(key);
2013
2
                    src += sizeof(uint64_t);
2014
2
                }
2015
1
            }
2016
1
            break;
2017
11.0k
        }
2018
0
        default:
2019
0
            LOG(ERROR) << "BitmapTypeCode invalid, should between: " << BitmapTypeCode::EMPTY
2020
0
                       << " and " << BitmapTypeCode::BITMAP64 << " actual is "
2021
0
                       << static_cast<int>(*src);
2022
0
            return false;
2023
71.4k
        }
2024
71.4k
        return true;
2025
71.4k
    }
2026
2027
13
    int64_t minimum() const {
2028
13
        switch (_type) {
2029
2
        case SINGLE:
2030
2
            return _sv;
2031
4
        case BITMAP:
2032
4
            return _bitmap->minimum();
2033
5
        case SET:
2034
5
            return _min_in_set();
2035
2
        default:
2036
2
            return 0;
2037
13
        }
2038
13
    }
2039
2040
    // TODO limit string size to avoid OOM
2041
9.33k
    std::string to_string() const {
2042
9.33k
        std::stringstream ss;
2043
9.33k
        switch (_type) {
2044
16
        case EMPTY:
2045
16
            break;
2046
58
        case SINGLE:
2047
58
            ss << _sv;
2048
58
            break;
2049
8.73k
        case BITMAP: {
2050
8.73k
            struct IterCtx {
2051
8.73k
                std::stringstream* ss = nullptr;
2052
8.73k
                bool first = true;
2053
8.73k
            } iter_ctx;
2054
8.73k
            iter_ctx.ss = &ss;
2055
2056
8.73k
            _bitmap->iterate(
2057
4.23M
                    [](uint64_t value, void* c) -> bool {
2058
4.23M
                        auto ctx = reinterpret_cast<IterCtx*>(c);
2059
4.23M
                        if (ctx->first) {
2060
8.73k
                            ctx->first = false;
2061
4.22M
                        } else {
2062
4.22M
                            (*ctx->ss) << ",";
2063
4.22M
                        }
2064
4.23M
                        (*ctx->ss) << value;
2065
4.23M
                        return true;
2066
4.23M
                    },
2067
8.73k
                    &iter_ctx);
2068
8.73k
            break;
2069
0
        }
2070
530
        case SET: {
2071
530
            struct IterCtx {
2072
530
                std::stringstream* ss = nullptr;
2073
530
                bool first = true;
2074
530
            } iter_ctx;
2075
530
            iter_ctx.ss = &ss;
2076
2077
530
            std::vector<uint64_t> values(_set.begin(), _set.end());
2078
530
            std::sort(values.begin(), values.end());
2079
2080
8.31k
            for (auto v : values) {
2081
8.31k
                if (iter_ctx.first) {
2082
530
                    iter_ctx.first = false;
2083
7.78k
                } else {
2084
7.78k
                    (*iter_ctx.ss) << ",";
2085
7.78k
                }
2086
8.31k
                (*iter_ctx.ss) << v;
2087
8.31k
            }
2088
530
            break;
2089
0
        }
2090
9.33k
        }
2091
9.33k
        return ss.str();
2092
9.33k
    }
2093
2094
14
    int64_t maximum() const {
2095
14
        switch (_type) {
2096
2
        case SINGLE:
2097
2
            return _sv;
2098
5
        case BITMAP:
2099
5
            return _bitmap->maximum();
2100
5
        case SET:
2101
5
            return _max_in_set();
2102
2
        default:
2103
2
            return 0;
2104
14
        }
2105
14
    }
2106
2107
4
    uint64_t max(bool* empty) const {
2108
4
        return min_or_max(empty, [&]() { return maximum(); });
2109
4
    }
2110
2111
4
    uint64_t min(bool* empty) const {
2112
4
        return min_or_max(empty, [&]() { return minimum(); });
2113
4
    }
2114
2115
5
    uint64_t _min_in_set() const {
2116
5
        DCHECK_EQ(_type, SET);
2117
5
        return *std::min_element(_set.begin(), _set.end());
2118
5
    }
2119
2120
5
    uint64_t _max_in_set() const {
2121
5
        DCHECK_EQ(_type, SET);
2122
5
        return *std::max_element(_set.begin(), _set.end());
2123
5
    }
2124
2125
6
    bool empty() const { return _type == EMPTY; }
2126
2127
    /**
2128
     * Return new set with specified range (not include the range_end)
2129
     */
2130
    int64_t sub_range(const int64_t& range_start, const int64_t& range_end,
2131
7
                      BitmapValue* ret_bitmap) {
2132
7
        switch (_type) {
2133
1
        case EMPTY:
2134
1
            return 0;
2135
2
        case SINGLE: {
2136
            //only single value, so _sv must in [range_start,range_end)
2137
2
            if (range_start <= _sv && _sv < range_end) {
2138
1
                ret_bitmap->add(_sv);
2139
1
                return 1;
2140
1
            } else {
2141
1
                return 0;
2142
1
            }
2143
2
        }
2144
2
        case BITMAP: {
2145
2
            int64_t count = 0;
2146
146
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2147
145
                if (*it < range_start) {
2148
0
                    continue;
2149
0
                }
2150
145
                if (*it < range_end) {
2151
144
                    ret_bitmap->add(*it);
2152
144
                    ++count;
2153
144
                } else {
2154
1
                    break;
2155
1
                }
2156
145
            }
2157
2
            return count;
2158
2
        }
2159
2
        case SET: {
2160
2
            int64_t count = 0;
2161
2
            std::vector<uint64_t> values(_set.begin(), _set.end());
2162
2
            std::sort(values.begin(), values.end());
2163
6
            for (auto it = values.begin(); it != values.end(); ++it) {
2164
4
                if (*it < range_start || *it >= range_end) {
2165
1
                    continue;
2166
1
                }
2167
3
                ret_bitmap->add(*it);
2168
3
                ++count;
2169
3
            }
2170
2
            return count;
2171
2
        }
2172
7
        }
2173
0
        return 0;
2174
7
    }
2175
2176
    /**
2177
     * Return new set with specified start and limit
2178
     * @param range_start the start value for the range
2179
     * @param cardinality_limit the length of the subset
2180
     * @return the real count for subset, maybe less than cardinality_limit
2181
     */
2182
    int64_t sub_limit(const int64_t& range_start, const int64_t& cardinality_limit,
2183
9
                      BitmapValue* ret_bitmap) {
2184
9
        switch (_type) {
2185
1
        case EMPTY:
2186
1
            return 0;
2187
2
        case SINGLE: {
2188
            //only single value, so range_start must less than _sv
2189
2
            if (range_start > _sv) {
2190
0
                return 0;
2191
2
            } else {
2192
2
                ret_bitmap->add(_sv);
2193
2
                return 1;
2194
2
            }
2195
2
        }
2196
4
        case BITMAP: {
2197
4
            int64_t count = 0;
2198
31
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2199
30
                if (*it < range_start) {
2200
8
                    continue;
2201
8
                }
2202
22
                if (count < cardinality_limit) {
2203
19
                    ret_bitmap->add(*it);
2204
19
                    ++count;
2205
19
                } else {
2206
3
                    break;
2207
3
                }
2208
22
            }
2209
4
            return count;
2210
2
        }
2211
2
        case SET: {
2212
2
            int64_t count = 0;
2213
2214
2
            std::vector<uint64_t> values(_set.begin(), _set.end());
2215
2
            std::sort(values.begin(), values.end());
2216
6
            for (auto it = values.begin(); it != values.end(); ++it) {
2217
4
                if (*it < range_start) {
2218
0
                    continue;
2219
0
                }
2220
4
                if (count < cardinality_limit) {
2221
4
                    ret_bitmap->add(*it);
2222
4
                    ++count;
2223
4
                } else {
2224
0
                    break;
2225
0
                }
2226
4
            }
2227
2
            return count;
2228
2
        }
2229
9
        }
2230
0
        return 0;
2231
9
    }
2232
2233
    /**
2234
     * Returns the bitmap elements, starting from the offset position.
2235
     * The number of returned elements is limited by the cardinality_limit parameter.
2236
     * Analog of the substring string function, but for bitmap.
2237
     */
2238
8
    int64_t offset_limit(const int64_t& offset, const int64_t& limit, BitmapValue* ret_bitmap) {
2239
8
        switch (_type) {
2240
1
        case EMPTY:
2241
1
            return 0;
2242
2
        case SINGLE: {
2243
            //only single value, so offset must start 0
2244
2
            if (offset == 0) {
2245
1
                ret_bitmap->add(_sv);
2246
1
                return 1;
2247
1
            } else {
2248
1
                return 0;
2249
1
            }
2250
2
        }
2251
5
        default:
2252
5
            break;
2253
8
        }
2254
5
        if (_type == BITMAP) {
2255
3
            if (std::abs(offset) >= _bitmap->cardinality()) {
2256
0
                return 0;
2257
0
            }
2258
3
            int64_t abs_offset = offset;
2259
3
            if (offset < 0) {
2260
0
                abs_offset = _bitmap->cardinality() + offset;
2261
0
            }
2262
2263
3
            int64_t count = 0;
2264
3
            int64_t offset_count = 0;
2265
3
            auto it = _bitmap->begin();
2266
123
            for (; it != _bitmap->end() && offset_count < abs_offset; ++it) {
2267
120
                ++offset_count;
2268
120
            }
2269
33
            for (; it != _bitmap->end() && count < limit; ++it, ++count) {
2270
30
                ret_bitmap->add(*it);
2271
30
            }
2272
3
            return count;
2273
3
        } else {
2274
2
            if (std::abs(offset) > _set.size()) {
2275
0
                return 0;
2276
0
            }
2277
2278
2
            int64_t abs_offset = offset;
2279
2
            if (offset < 0) {
2280
0
                abs_offset = _set.size() + offset;
2281
0
            }
2282
2283
2
            std::vector<uint64_t> values(_set.begin(), _set.end());
2284
2
            std::sort(values.begin(), values.end());
2285
2286
2
            int64_t count = 0;
2287
2
            size_t index = 0;
2288
4
            for (auto v : values) {
2289
4
                if (index < abs_offset) {
2290
1
                    ++index;
2291
1
                    continue;
2292
1
                }
2293
3
                if (count == limit || index == values.size()) {
2294
0
                    break;
2295
0
                }
2296
3
                ++count;
2297
3
                ++index;
2298
3
                ret_bitmap->add(v);
2299
3
            }
2300
2
            return count;
2301
2
        }
2302
5
    }
2303
2304
    //for function bitmap_to_array
2305
3
    void to_array(vectorized::PaddedPODArray<int64_t>& data) const {
2306
3
        switch (_type) {
2307
0
        case EMPTY:
2308
0
            break;
2309
1
        case SINGLE: {
2310
1
            data.emplace_back(_sv);
2311
1
            break;
2312
0
        }
2313
1
        case BITMAP: {
2314
129
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2315
128
                data.emplace_back(*it);
2316
128
            }
2317
1
            break;
2318
0
        }
2319
1
        case SET: {
2320
1
            std::vector<uint64_t> values(_set.begin(), _set.end());
2321
1
            std::sort(values.begin(), values.end());
2322
31
            for (auto v : values) {
2323
31
                data.emplace_back(v);
2324
31
            }
2325
1
            break;
2326
0
        }
2327
3
        }
2328
3
    }
2329
2330
153
    void reset() {
2331
153
        _type = EMPTY;
2332
153
        _sv = 0;
2333
153
        _set.clear();
2334
153
        _is_shared = false;
2335
153
        _bitmap = nullptr;
2336
153
    }
2337
    // Implement an iterator for convenience
2338
    friend class BitmapValueIterator;
2339
    typedef BitmapValueIterator b_iterator;
2340
2341
    b_iterator begin() const;
2342
    b_iterator end() const;
2343
    b_iterator lower_bound(uint64_t val) const;
2344
2345
private:
2346
44
    void _convert_to_smaller_type() {
2347
44
        if (_type == BITMAP) {
2348
16
            uint64_t c = _bitmap->cardinality();
2349
16
            if (config::enable_set_in_bitmap_value && c > SET_TYPE_THRESHOLD) {
2350
7
                return;
2351
9
            } else if (c > 1) {
2352
7
                return;
2353
7
            }
2354
2
            if (c == 0) {
2355
1
                _type = EMPTY;
2356
1
            } else if (c == 1 && !config::enable_set_in_bitmap_value) {
2357
1
                _type = SINGLE;
2358
1
                _sv = _bitmap->minimum();
2359
1
            } else {
2360
0
                _type = SET;
2361
0
                for (auto v : *_bitmap) {
2362
0
                    _set.insert(v);
2363
0
                }
2364
0
            }
2365
2
            _bitmap.reset();
2366
28
        } else if (_type == SET) {
2367
28
            if (_set.size() == 1 && !config::enable_set_in_bitmap_value) {
2368
0
                _type = SINGLE;
2369
0
                _sv = *_set.begin();
2370
0
                _set.clear();
2371
0
            }
2372
28
        }
2373
44
    }
2374
2375
8
    uint64_t min_or_max(bool* empty, std::function<uint64_t()> func) const {
2376
8
        bool is_empty = false;
2377
8
        uint64_t result = 0;
2378
8
        switch (_type) {
2379
0
        case SINGLE:
2380
0
            result = _sv;
2381
0
            break;
2382
2
        case BITMAP:
2383
6
        case SET:
2384
6
            result = func();
2385
6
            break;
2386
2
        default:
2387
2
            is_empty = true;
2388
8
        }
2389
8
        if (empty) {
2390
8
            *empty = is_empty;
2391
8
        }
2392
8
        return result;
2393
8
    }
2394
2395
4.62M
    void _prepare_bitmap_for_write() {
2396
4.62M
        if (!_bitmap) {
2397
9.28k
            _bitmap = std::make_shared<detail::Roaring64Map>();
2398
9.28k
            _is_shared = false;
2399
9.28k
            return;
2400
9.28k
        }
2401
2402
4.61M
        if (!_is_shared) {
2403
            // the state is not shared, not need to check use count any more
2404
4.60M
            return;
2405
4.60M
        }
2406
2407
9.17k
        if (_bitmap.use_count() > 1) {
2408
1.02k
            auto new_one = std::make_shared<detail::Roaring64Map>();
2409
1.02k
            *new_one = *_bitmap;
2410
1.02k
            _bitmap = new_one;
2411
1.02k
        }
2412
9.17k
        _is_shared = false;
2413
9.17k
    }
2414
2415
130k
    void _convert_to_bitmap_if_need() {
2416
130k
        if (_type != SET || _set.size() <= SET_TYPE_THRESHOLD) {
2417
126k
            return;
2418
126k
        }
2419
3.97k
        _prepare_bitmap_for_write();
2420
133k
        for (auto v : _set) {
2421
133k
            _bitmap->add(v);
2422
133k
        }
2423
3.97k
        _type = BITMAP;
2424
3.97k
        _set.clear();
2425
3.97k
    }
2426
2427
    enum BitmapDataType {
2428
        EMPTY = 0,
2429
        SINGLE = 1, // single element
2430
        BITMAP = 2, // more than one elements
2431
        SET = 3     // elements count less or equal than 32
2432
    };
2433
    uint64_t _sv = 0;                              // store the single value when _type == SINGLE
2434
    std::shared_ptr<detail::Roaring64Map> _bitmap; // used when _type == BITMAP
2435
    SetContainer<uint64_t> _set;
2436
    BitmapDataType _type {EMPTY};
2437
    // Indicate whether the state is shared among multi BitmapValue object
2438
    bool _is_shared = true;
2439
    static constexpr uint64_t SET_TYPE_THRESHOLD = 32;
2440
};
2441
2442
// A simple implement of bitmap value iterator(Read only)
2443
// Usage:
2444
//  BitmapValueIterator iter = bitmap_value.begin();
2445
//  BitmapValueIterator end = bitmap_value.end();
2446
//  for (; iter != end(); ++iter) {
2447
//      uint64_t v = *iter;
2448
//      ... do something with "v" ...
2449
//  }
2450
class BitmapValueIterator {
2451
public:
2452
37
    BitmapValueIterator(const BitmapValue& bitmap, bool end = false) : _bitmap(bitmap), _end(end) {
2453
37
        switch (_bitmap._type) {
2454
8
        case BitmapValue::BitmapDataType::EMPTY:
2455
8
            _end = true;
2456
8
            break;
2457
8
        case BitmapValue::BitmapDataType::SINGLE:
2458
8
            _sv = _bitmap._sv;
2459
8
            break;
2460
13
        case BitmapValue::BitmapDataType::BITMAP:
2461
13
            _iter = new detail::Roaring64MapSetBitForwardIterator(*_bitmap._bitmap, _end);
2462
13
            break;
2463
8
        case BitmapValue::BitmapDataType::SET:
2464
8
            _set_iter = _end ? _bitmap._set.end() : _bitmap._set.begin();
2465
8
            break;
2466
0
        default:
2467
0
            CHECK(false) << _bitmap._type;
2468
37
        }
2469
37
    }
2470
2471
    BitmapValueIterator(const BitmapValueIterator& other)
2472
168
            : _bitmap(other._bitmap), _sv(other._sv), _end(other._end) {
2473
168
        _iter = other._iter ? new detail::Roaring64MapSetBitForwardIterator(*other._iter) : nullptr;
2474
168
        if (_bitmap._type == BitmapValue::BitmapDataType::SET) {
2475
33
            _set_iter = other._set_iter;
2476
33
        }
2477
168
    }
2478
2479
205
    ~BitmapValueIterator() {
2480
205
        if (_iter != nullptr) {
2481
143
            delete _iter;
2482
143
            _iter = nullptr;
2483
143
        }
2484
205
    }
2485
2486
646
    uint64_t operator*() const {
2487
646
        CHECK(!_end) << "should not get value of end iterator";
2488
646
        switch (_bitmap._type) {
2489
4
        case BitmapValue::BitmapDataType::SINGLE:
2490
4
            return _sv;
2491
517
        case BitmapValue::BitmapDataType::BITMAP:
2492
517
            return *(*_iter);
2493
125
        case BitmapValue::BitmapDataType::SET: {
2494
125
            return *_set_iter;
2495
0
        }
2496
0
        default:
2497
0
            CHECK(false) << _bitmap._type;
2498
646
        }
2499
0
        return 0;
2500
646
    }
2501
2502
486
    BitmapValueIterator& operator++() { // ++i, must returned inc. value
2503
486
        CHECK(!_end) << "should not forward when iterator ends";
2504
486
        switch (_bitmap._type) {
2505
3
        case BitmapValue::BitmapDataType::SINGLE:
2506
3
            _end = true;
2507
3
            break;
2508
389
        case BitmapValue::BitmapDataType::BITMAP:
2509
389
            ++(*_iter);
2510
389
            break;
2511
94
        case BitmapValue::BitmapDataType::SET:
2512
94
            ++_set_iter;
2513
94
            break;
2514
0
        default:
2515
0
            CHECK(false) << _bitmap._type;
2516
486
        }
2517
486
        return *this;
2518
486
    }
2519
2520
160
    BitmapValueIterator operator++(int) { // i++, must return orig. value
2521
160
        CHECK(!_end) << "should not forward when iterator ends";
2522
160
        BitmapValueIterator orig(*this);
2523
160
        switch (_bitmap._type) {
2524
1
        case BitmapValue::BitmapDataType::SINGLE:
2525
1
            _end = true;
2526
1
            break;
2527
128
        case BitmapValue::BitmapDataType::BITMAP:
2528
128
            ++(*_iter);
2529
128
            break;
2530
31
        case BitmapValue::BitmapDataType::SET:
2531
31
            ++_set_iter;
2532
31
            break;
2533
0
        default:
2534
0
            CHECK(false) << _bitmap._type;
2535
160
        }
2536
160
        return orig;
2537
160
    }
2538
2539
674
    bool operator==(const BitmapValueIterator& other) const {
2540
674
        if (_end && other._end) {
2541
13
            return true;
2542
13
        }
2543
2544
661
        switch (_bitmap._type) {
2545
0
        case BitmapValue::BitmapDataType::EMPTY:
2546
0
            return other._bitmap._type == BitmapValue::BitmapDataType::EMPTY;
2547
5
        case BitmapValue::BitmapDataType::SINGLE:
2548
5
            return _end == other._end && _sv == other._sv;
2549
524
        case BitmapValue::BitmapDataType::BITMAP:
2550
524
            return *_iter == *(other._iter);
2551
132
        case BitmapValue::BitmapDataType::SET:
2552
132
            return _set_iter == other._set_iter;
2553
0
        default:
2554
0
            CHECK(false) << _bitmap._type;
2555
661
        }
2556
0
        return false;
2557
661
    }
2558
2559
664
    bool operator!=(const BitmapValueIterator& other) const { return !(*this == other); }
2560
2561
    /**
2562
    * Move the iterator to the first value >= `val`.
2563
    */
2564
0
    BitmapValueIterator& move(uint64_t val) {
2565
0
        switch (_bitmap._type) {
2566
0
        case BitmapValue::BitmapDataType::SINGLE:
2567
0
            if (_sv < val) {
2568
0
                _end = true;
2569
0
            }
2570
0
            break;
2571
0
        case BitmapValue::BitmapDataType::BITMAP:
2572
0
            if (!_iter->move(val)) {
2573
0
                _end = true;
2574
0
            }
2575
0
            break;
2576
0
        case BitmapValue::BitmapDataType::SET: {
2577
0
            throw Exception(Status::FatalError("BitmapValue with set do not support move"));
2578
0
        }
2579
0
        default:
2580
0
            break;
2581
0
        }
2582
0
        return *this;
2583
0
    }
2584
2585
private:
2586
    const BitmapValue& _bitmap;
2587
    detail::Roaring64MapSetBitForwardIterator* _iter = nullptr;
2588
    BitmapValue::SetContainer<uint64_t>::const_iterator _set_iter;
2589
    uint64_t _sv = 0;
2590
    bool _end = false;
2591
};
2592
2593
16
inline BitmapValueIterator BitmapValue::begin() const {
2594
16
    return {*this};
2595
16
}
2596
2597
21
inline BitmapValueIterator BitmapValue::end() const {
2598
21
    return {*this, true};
2599
21
}
2600
2601
0
inline BitmapValueIterator BitmapValue::lower_bound(uint64_t val) const {
2602
0
    return BitmapValueIterator(*this).move(val);
2603
0
}
2604
2605
7
inline bool BitmapValue::contains_any(uint64_t left, uint64_t right) const {
2606
7
    if (left > right) {
2607
0
        return false;
2608
0
    }
2609
2610
7
    if (_type == SET) {
2611
25
        for (auto v : _set) {
2612
25
            if (v >= left && v <= right) {
2613
4
                return true;
2614
4
            }
2615
25
        }
2616
3
        return false;
2617
7
    }
2618
0
    auto it = lower_bound(left);
2619
0
    return it != end() && *it <= right;
2620
7
}
2621
2622
} // namespace doris