Coverage Report

Created: 2025-07-26 00:16

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/util/bitmap_value.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <parallel_hashmap/btree.h>
21
#include <parallel_hashmap/phmap.h>
22
23
#include <algorithm>
24
#include <cstdarg>
25
#include <cstdint>
26
#include <cstdio>
27
#include <limits>
28
#include <map>
29
#include <memory>
30
#include <new>
31
#include <numeric>
32
#include <queue>
33
#include <roaring/roaring.hh>
34
#include <set>
35
#include <stdexcept>
36
#include <string>
37
#include <utility>
38
39
#include "common/config.h"
40
#include "common/exception.h"
41
#include "common/logging.h"
42
#include "udf/udf.h"
43
#include "util/coding.h"
44
#include "vec/common/pod_array.h"
45
#include "vec/common/pod_array_fwd.h"
46
namespace doris {
47
48
// serialized bitmap := TypeCode(1), Payload
49
// The format of payload depends on value of TypeCode which is defined below
50
struct BitmapTypeCode {
51
    enum type {
52
        // An empty bitmap. Payload is 0 byte.
53
        // added in 0.11
54
        EMPTY = 0,
55
        // A bitmap containing only one element that is in [0, UINT32_MAX]
56
        // Payload := UInt32LittleEndian(4 byte)
57
        // added in 0.11
58
        SINGLE32 = 1,
59
        // A bitmap whose maximum element is in [0, UINT32_MAX]
60
        // Payload := the standard RoaringBitmap format described by
61
        // https://github.com/RoaringBitmap/RoaringFormatSpec/
62
        // added in 0.11
63
        BITMAP32 = 2,
64
        // A bitmap containing only one element that is in (UINT32_MAX, UINT64_MAX]
65
        // Payload := UInt64LittleEndian(8 byte)
66
        // added in 0.12
67
        SINGLE64 = 3,
68
        // A bitmap whose maximum element is in (UINT32_MAX, UINT64_MAX].
69
        //
70
        // To support 64-bits elements, all elements with the same high 32 bits are stored in a
71
        // RoaringBitmap containing only the lower 32 bits. Thus we could use
72
        // map<uint32_t, RoaringBitmap> to represent bitmap of 64-bits ints.
73
        //
74
        // Since there is no standard format for 64-bits RoaringBitmap, we define our own as below
75
        // Payload := NumRoaring(vint64), { MapKey, MapValue }^NumRoaring
76
        // - MapKey := the shared high 32 bits in UInt32LittleEndian(4 byte)
77
        // - MapValue := the standard RoaringBitmap format
78
        //
79
        // added in 0.12
80
        BITMAP64 = 4,
81
        SET = 5, // V1
82
        SET_V2 = 10,
83
        BITMAP32_V2 = 12,
84
        BITMAP64_V2 = 13,
85
        TYPE_MAX
86
    };
87
1
    Status static inline validate(int bitmap_type) {
88
1
        if (UNLIKELY(bitmap_type < type::EMPTY || bitmap_type >= type::TYPE_MAX)) {
89
1
            std::string err_msg =
90
1
                    fmt::format("BitmapTypeCode invalid, should between: {} and {} actrual is {}",
91
1
                                BitmapTypeCode::EMPTY, BitmapTypeCode::BITMAP64, bitmap_type);
92
1
            LOG(ERROR) << err_msg;
93
1
            return Status::Corruption(err_msg);
94
1
        }
95
0
        return Status::OK();
96
1
    }
97
};
98
99
namespace detail {
100
101
class Roaring64MapSetBitForwardIterator;
102
103
// Forked from https://github.com/RoaringBitmap/CRoaring/blob/v0.2.60/cpp/roaring64map.hh
104
// What we change includes
105
// - a custom serialization format is used inside read()/write()/getSizeInBytes()
106
// - added clear() and is32BitsEnough()
107
class Roaring64Map {
108
public:
109
    /**
110
     * Create an empty bitmap
111
     */
112
21.4k
    Roaring64Map() = default;
113
114
    /**
115
     * Construct a bitmap from a list of 32-bit integer values.
116
     */
117
2
    Roaring64Map(size_t n, const uint32_t* data) { addMany(n, data); }
118
119
    /**
120
     * Construct a bitmap from a list of 64-bit integer values.
121
     */
122
1
    Roaring64Map(size_t n, const uint64_t* data) { addMany(n, data); }
123
124
    /**
125
     * Construct a 64-bit map from a 32-bit one
126
     */
127
1
    explicit Roaring64Map(const roaring::Roaring& r) { emplaceOrInsert(0, r); }
128
129
9
    Roaring64Map(const Roaring64Map& r) = default;
130
131
11.0k
    Roaring64Map(Roaring64Map&& r) = default;
132
133
    /**
134
     * Assignment operator.
135
     */
136
1.03k
    Roaring64Map& operator=(const Roaring64Map& r) {
137
1.03k
        roarings = r.roarings;
138
1.03k
        return *this;
139
1.03k
    }
140
141
    /**
142
     * Add value x
143
     *
144
     */
145
3
    void add(uint32_t x) {
146
3
        roarings[0].add(x);
147
3
        roarings[0].setCopyOnWrite(copyOnWrite);
148
3
    }
149
4.73M
    void add(uint64_t x) {
150
4.73M
        roarings[highBytes(x)].add(lowBytes(x));
151
4.73M
        roarings[highBytes(x)].setCopyOnWrite(copyOnWrite);
152
4.73M
    }
153
154
    template <typename T>
155
9
    void addMany(size_t n_args, const T* vals) {
156
9
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
157
8
            auto& roaring = roarings[0];
158
8
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
159
8
            roaring.setCopyOnWrite(copyOnWrite);
160
8
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
161
1
            auto& roaring = roarings[0];
162
1
            std::vector<uint32_t> values(n_args);
163
8
            for (size_t i = 0; i != n_args; ++i) {
164
7
                values[i] = uint32_t(vals[i]);
165
7
            }
166
1
            roaring.addMany(n_args, values.data());
167
1
            roaring.setCopyOnWrite(copyOnWrite);
168
1
        } else {
169
0
            for (size_t lcv = 0; lcv < n_args; lcv++) {
170
0
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
171
0
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
172
0
            }
173
0
        }
174
9
    }
_ZN5doris6detail12Roaring64Map7addManyIjEEvmPKT_
Line
Count
Source
155
8
    void addMany(size_t n_args, const T* vals) {
156
8
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
157
8
            auto& roaring = roarings[0];
158
8
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
159
8
            roaring.setCopyOnWrite(copyOnWrite);
160
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
161
            auto& roaring = roarings[0];
162
            std::vector<uint32_t> values(n_args);
163
            for (size_t i = 0; i != n_args; ++i) {
164
                values[i] = uint32_t(vals[i]);
165
            }
166
            roaring.addMany(n_args, values.data());
167
            roaring.setCopyOnWrite(copyOnWrite);
168
        } else {
169
            for (size_t lcv = 0; lcv < n_args; lcv++) {
170
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
171
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
172
            }
173
        }
174
8
    }
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIiEEvmPKT_
_ZN5doris6detail12Roaring64Map7addManyItEEvmPKT_
Line
Count
Source
155
1
    void addMany(size_t n_args, const T* vals) {
156
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
157
            auto& roaring = roarings[0];
158
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
159
            roaring.setCopyOnWrite(copyOnWrite);
160
1
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
161
1
            auto& roaring = roarings[0];
162
1
            std::vector<uint32_t> values(n_args);
163
8
            for (size_t i = 0; i != n_args; ++i) {
164
7
                values[i] = uint32_t(vals[i]);
165
7
            }
166
1
            roaring.addMany(n_args, values.data());
167
1
            roaring.setCopyOnWrite(copyOnWrite);
168
        } else {
169
            for (size_t lcv = 0; lcv < n_args; lcv++) {
170
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
171
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
172
            }
173
        }
174
1
    }
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIaEEvmPKT_
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIsEEvmPKT_
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIlEEvmPKT_
175
176
40
    void addMany(size_t n_args, const uint64_t* vals) {
177
1.25k
        for (size_t lcv = 0; lcv < n_args; lcv++) {
178
1.21k
            roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
179
1.21k
            roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
180
1.21k
        }
181
40
    }
182
183
    /**
184
     * Remove value x
185
     *
186
     */
187
4
    void remove(uint32_t x) { roarings[0].remove(x); }
188
23
    void remove(uint64_t x) {
189
23
        auto roaring_iter = roarings.find(highBytes(x));
190
23
        if (roaring_iter != roarings.cend()) {
191
23
            roaring_iter->second.remove(lowBytes(x));
192
23
        }
193
23
    }
194
    /**
195
     * Return the largest value (if not empty)
196
     *
197
     */
198
46.5k
    uint64_t maximum() const {
199
46.5k
        for (auto roaring_iter = roarings.crbegin(); roaring_iter != roarings.crend();
200
46.5k
             ++roaring_iter) {
201
46.5k
            if (!roaring_iter->second.isEmpty()) {
202
46.5k
                return uniteBytes(roaring_iter->first, roaring_iter->second.maximum());
203
46.5k
            }
204
46.5k
        }
205
        // we put std::numeric_limits<>::max/min in parenthesis
206
        // to avoid a clash with the Windows.h header under Windows
207
5
        return (std::numeric_limits<uint64_t>::min)();
208
46.5k
    }
209
210
    /**
211
     * Return the smallest value (if not empty)
212
     *
213
     */
214
7
    uint64_t minimum() const {
215
7
        for (auto roaring_iter = roarings.cbegin(); roaring_iter != roarings.cend();
216
7
             ++roaring_iter) {
217
6
            if (!roaring_iter->second.isEmpty()) {
218
6
                return uniteBytes(roaring_iter->first, roaring_iter->second.minimum());
219
6
            }
220
6
        }
221
        // we put std::numeric_limits<>::max/min in parenthesis
222
        // to avoid a clash with the Windows.h header under Windows
223
1
        return (std::numeric_limits<uint64_t>::max)();
224
7
    }
225
226
    /**
227
     * Check if value x is present
228
     */
229
11
    bool contains(uint32_t x) const {
230
11
        return roarings.count(0) == 0 ? false : roarings.at(0).contains(x);
231
11
    }
232
9.24k
    bool contains(uint64_t x) const {
233
9.24k
        return roarings.count(highBytes(x)) == 0 ? false
234
9.24k
                                                 : roarings.at(highBytes(x)).contains(lowBytes(x));
235
9.24k
    }
236
237
    /**
238
     * Compute the intersection between the current bitmap and the provided
239
     * bitmap,
240
     * writing the result in the current bitmap. The provided bitmap is not
241
     * modified.
242
     */
243
6
    Roaring64Map& operator&=(const Roaring64Map& r) {
244
11
        for (auto& map_entry : roarings) {
245
11
            if (r.roarings.count(map_entry.first) == 1) {
246
10
                map_entry.second &= r.roarings.at(map_entry.first);
247
10
            } else {
248
1
                map_entry.second = roaring::Roaring();
249
1
            }
250
11
        }
251
6
        return *this;
252
6
    }
253
254
    /**
255
     * Compute the difference between the current bitmap and the provided
256
     * bitmap,
257
     * writing the result in the current bitmap. The provided bitmap is not
258
     * modified.
259
     */
260
1
    Roaring64Map& operator-=(const Roaring64Map& r) {
261
4
        for (auto& map_entry : roarings) {
262
4
            if (r.roarings.count(map_entry.first) == 1) {
263
4
                map_entry.second -= r.roarings.at(map_entry.first);
264
4
            }
265
4
        }
266
1
        return *this;
267
1
    }
268
269
    /**
270
     * Compute the union between the current bitmap and the provided bitmap,
271
     * writing the result in the current bitmap. The provided bitmap is not
272
     * modified.
273
     *
274
     * See also the fastunion function to aggregate many bitmaps more quickly.
275
     */
276
5
    Roaring64Map& operator|=(const Roaring64Map& r) {
277
11
        for (const auto& map_entry : r.roarings) {
278
11
            if (roarings.count(map_entry.first) == 0) {
279
2
                roarings[map_entry.first] = map_entry.second;
280
2
                roarings[map_entry.first].setCopyOnWrite(copyOnWrite);
281
9
            } else {
282
9
                roarings[map_entry.first] |= map_entry.second;
283
9
            }
284
11
        }
285
5
        return *this;
286
5
    }
287
288
    /**
289
     * Compute the symmetric union between the current bitmap and the provided
290
     * bitmap,
291
     * writing the result in the current bitmap. The provided bitmap is not
292
     * modified.
293
     */
294
4
    Roaring64Map& operator^=(const Roaring64Map& r) {
295
7
        for (const auto& map_entry : r.roarings) {
296
7
            if (roarings.count(map_entry.first) == 0) {
297
0
                roarings[map_entry.first] = map_entry.second;
298
0
                roarings[map_entry.first].setCopyOnWrite(copyOnWrite);
299
7
            } else {
300
7
                roarings[map_entry.first] ^= map_entry.second;
301
7
            }
302
7
        }
303
4
        return *this;
304
4
    }
305
306
    /**
307
     * Exchange the content of this bitmap with another.
308
     */
309
1
    void swap(Roaring64Map& r) { roarings.swap(r.roarings); }
310
311
    /**
312
     * Get the cardinality of the bitmap (number of elements).
313
     * Throws std::length_error in the special case where the bitmap is full
314
     * (cardinality() == 2^64). Check isFull() before calling to avoid
315
     * exception.
316
     */
317
246
    uint64_t cardinality() const {
318
246
        if (isFull()) {
319
0
            throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
320
0
                                   "bitmap is full, cardinality is 2^64, "
321
0
                                   "unable to represent in a 64-bit integer");
322
0
        }
323
246
        return std::accumulate(roarings.cbegin(), roarings.cend(), (uint64_t)0,
324
246
                               [](uint64_t previous,
325
418
                                  const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
326
418
                                   return previous + map_entry.second.cardinality();
327
418
                               });
328
246
    }
329
    /**
330
     * Computes the size of the intersection between two bitmaps.
331
     *
332
     */
333
6
    uint64_t andCardinality(const Roaring64Map& r) const {
334
6
        uint64_t card = 0;
335
10
        for (const auto& map_entry : roarings) {
336
10
            if (r.roarings.count(map_entry.first) == 1) {
337
9
                card += map_entry.second.and_cardinality(r.roarings.at(map_entry.first));
338
9
            }
339
10
        }
340
6
        return card;
341
6
    }
342
343
    /**
344
     * Computes the size of the union between two bitmaps.
345
     *
346
     */
347
4
    uint64_t orCardinality(const Roaring64Map& r) const {
348
4
        uint64_t card = 0;
349
4
        for (const auto& map_entry : roarings) {
350
4
            if (r.roarings.count(map_entry.first) == 0) {
351
0
                card += map_entry.second.cardinality();
352
0
            }
353
4
        }
354
4
        for (const auto& map_entry : r.roarings) {
355
4
            if (roarings.count(map_entry.first) == 0) {
356
0
                card += map_entry.second.cardinality();
357
4
            } else {
358
4
                card += roarings.at(map_entry.first).or_cardinality(map_entry.second);
359
4
            }
360
4
        }
361
4
        return card;
362
4
    }
363
364
    /**
365
     * Computes the size of the difference (andnot) between two bitmaps.
366
     * r1.cardinality - (r1 & r2).cardinality
367
     */
368
1
    uint64_t andnotCardinality(const Roaring64Map& r) const {
369
1
        uint64_t card = 0;
370
4
        for (const auto& map_entry : roarings) {
371
4
            card += map_entry.second.cardinality();
372
4
            if (r.roarings.count(map_entry.first) == 1) {
373
4
                card -= r.roarings.at(map_entry.first).and_cardinality(map_entry.second);
374
4
            }
375
4
        }
376
1
        return card;
377
1
    }
378
379
    /**
380
     * Computes the size of the symmetric difference (andnot) between two
381
     * bitmaps.
382
     * r1.cardinality + r2.cardinality - 2 * (r1 & r2).cardinality
383
     */
384
1
    uint64_t xorCardinality(const Roaring64Map& r) const {
385
1
        uint64_t card = 0;
386
1
        for (const auto& map_entry : roarings) {
387
1
            card += map_entry.second.cardinality();
388
1
        }
389
1
        for (const auto& map_entry : r.roarings) {
390
1
            card += map_entry.second.cardinality();
391
1
            if (roarings.count(map_entry.first) == 1) {
392
1
                card -= 2 * roarings.at(map_entry.first).and_cardinality(map_entry.second);
393
1
            }
394
1
        }
395
1
        return card;
396
1
    }
397
398
    /**
399
    * Returns true if the bitmap is empty (cardinality is zero).
400
    */
401
1
    bool isEmpty() const {
402
1
        return std::all_of(roarings.cbegin(), roarings.cend(),
403
1
                           [](const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
404
0
                               return map_entry.second.isEmpty();
405
0
                           });
406
1
    }
407
408
    /**
409
    * Returns true if the bitmap is full (cardinality is max uint64_t + 1).
410
    */
411
246
    bool isFull() const {
412
        // only bother to check if map is fully saturated
413
        //
414
        // we put std::numeric_limits<>::max/min in parenthesis
415
        // to avoid a clash with the Windows.h header under Windows
416
246
        return roarings.size() == ((uint64_t)(std::numeric_limits<uint32_t>::max)()) + 1
417
246
                       ? std::all_of(roarings.cbegin(), roarings.cend(),
418
0
                                     [](const std::pair<const uint32_t, roaring::Roaring>&
419
0
                                                roaring_map_entry) {
420
                                         // roarings within map are saturated if cardinality
421
                                         // is uint32_t max + 1
422
0
                                         return roaring_map_entry.second.cardinality() ==
423
0
                                                ((uint64_t)(std::numeric_limits<uint32_t>::max)()) +
424
0
                                                        1;
425
0
                                     })
426
246
                       : false;
427
246
    }
428
429
    /**
430
     * Return true if the two bitmaps contain the same elements.
431
     */
432
14
    bool operator==(const Roaring64Map& r) const {
433
        // we cannot use operator == on the map because either side may contain
434
        // empty Roaring Bitmaps
435
14
        auto lhs_iter = roarings.cbegin();
436
14
        auto lhs_cend = roarings.cend();
437
14
        auto rhs_iter = r.roarings.cbegin();
438
14
        auto rhs_cend = r.roarings.cend();
439
27
        while (lhs_iter != lhs_cend && rhs_iter != rhs_cend) {
440
13
            auto lhs_key = lhs_iter->first;
441
13
            auto rhs_key = rhs_iter->first;
442
13
            const auto& lhs_map = lhs_iter->second;
443
13
            const auto& rhs_map = rhs_iter->second;
444
13
            if (lhs_map.isEmpty()) {
445
1
                ++lhs_iter;
446
1
                continue;
447
1
            }
448
12
            if (rhs_map.isEmpty()) {
449
2
                ++rhs_iter;
450
2
                continue;
451
2
            }
452
10
            if (!(lhs_key == rhs_key)) {
453
0
                return false;
454
0
            }
455
10
            if (!(lhs_map == rhs_map)) {
456
0
                return false;
457
0
            }
458
10
            ++lhs_iter;
459
10
            ++rhs_iter;
460
10
        }
461
16
        while (lhs_iter != lhs_cend) {
462
4
            if (!lhs_iter->second.isEmpty()) {
463
2
                return false;
464
2
            }
465
2
            ++lhs_iter;
466
2
        }
467
14
        while (rhs_iter != rhs_cend) {
468
3
            if (!rhs_iter->second.isEmpty()) {
469
1
                return false;
470
1
            }
471
2
            ++rhs_iter;
472
2
        }
473
11
        return true;
474
12
    }
475
476
    /** convert array and bitmap containers to run containers when it is more
477
     * efficient;
478
     * also convert from run containers when more space efficient.  Returns
479
     * true if the result has at least one run container.
480
     * Additional savings might be possible by calling shrinkToFit().
481
     */
482
27.3k
    bool runOptimize() {
483
27.3k
        return std::accumulate(
484
27.3k
                roarings.begin(), roarings.end(), true,
485
27.3k
                [](bool previous, std::pair<const uint32_t, roaring::Roaring>& map_entry) {
486
27.3k
                    return map_entry.second.runOptimize() && previous;
487
27.3k
                });
488
27.3k
    }
489
490
    /**
491
     * If needed, reallocate memory to shrink the memory usage. Returns
492
     * the number of bytes saved.
493
    */
494
27.3k
    size_t shrinkToFit() {
495
27.3k
        size_t savedBytes = 0;
496
27.3k
        auto iter = roarings.begin();
497
54.6k
        while (iter != roarings.cend()) {
498
27.3k
            if (iter->second.isEmpty()) {
499
                // empty Roarings are 84 bytes
500
1
                savedBytes += 88;
501
1
                iter = roarings.erase(iter);
502
27.3k
            } else {
503
27.3k
                savedBytes += iter->second.shrinkToFit();
504
27.3k
                iter++;
505
27.3k
            }
506
27.3k
        }
507
27.3k
        return savedBytes;
508
27.3k
    }
509
510
    /**
511
     * Iterate over the bitmap elements. The function iterator is called once
512
     * for all the values with ptr (can be nullptr) as the second parameter of each
513
     * call.
514
     *
515
     * roaring_iterator is simply a pointer to a function that returns bool
516
     * (true means that the iteration should continue while false means that it
517
     * should stop), and takes (uint32_t,void*) as inputs.
518
     */
519
8.73k
    void iterate(roaring::api::roaring_iterator64 iterator, void* ptr) const {
520
8.76k
        for (const auto& map_entry : roarings) {
521
8.76k
            bool should_continue = roaring_iterate64(&map_entry.second.roaring, iterator,
522
8.76k
                                                     uint64_t(map_entry.first) << 32, ptr);
523
8.76k
            if (!should_continue) {
524
1
                break;
525
1
            }
526
8.76k
        }
527
8.73k
    }
528
529
    /**
530
     * write a bitmap to a char buffer.
531
     * Returns how many bytes were written which should be getSizeInBytes().
532
     */
533
19.1k
    size_t write(char* buf, int serialize_version) const {
534
19.1k
        bool is_v1 = serialize_version == 1;
535
19.1k
        BitmapTypeCode::type type_bitmap32 =
536
19.1k
                is_v1 ? BitmapTypeCode::type::BITMAP32 : BitmapTypeCode::type::BITMAP32_V2;
537
19.1k
        BitmapTypeCode::type type_bitmap64 =
538
19.1k
                is_v1 ? BitmapTypeCode::type::BITMAP64 : BitmapTypeCode::type::BITMAP64_V2;
539
540
19.1k
        if (is32BitsEnough()) {
541
19.1k
            *(buf++) = type_bitmap32;
542
19.1k
            auto it = roarings.find(0);
543
19.1k
            if (it == roarings.end()) { // empty bitmap
544
2
                roaring::Roaring r;
545
2
                return r.write(buf, is_v1) + 1;
546
2
            }
547
19.1k
            return it->second.write(buf, is_v1) + 1;
548
19.1k
        }
549
550
7
        const char* orig = buf;
551
        // put type code
552
7
        *(buf++) = type_bitmap64;
553
        // push map size
554
7
        buf = (char*)encode_varint64((uint8_t*)buf, roarings.size());
555
7
        std::for_each(roarings.cbegin(), roarings.cend(),
556
16
                      [&buf, is_v1](const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
557
                          // push map key
558
16
                          encode_fixed32_le((uint8_t*)buf, map_entry.first);
559
16
                          buf += sizeof(uint32_t);
560
                          // push map value Roaring
561
16
                          buf += map_entry.second.write(buf, is_v1);
562
16
                      });
563
7
        return buf - orig;
564
19.1k
    }
565
566
    /**
567
     * read a bitmap from a serialized version.
568
     *
569
     * This function is unsafe in the sense that if you provide bad data,
570
     * many bytes could be read, possibly causing a buffer overflow. See also readSafe.
571
     */
572
11.0k
    static Roaring64Map read(const char* buf) {
573
11.0k
        Roaring64Map result;
574
575
11.0k
        bool is_v1 = BitmapTypeCode::BITMAP32 == *buf || BitmapTypeCode::BITMAP64 == *buf;
576
11.0k
        bool is_bitmap32 = BitmapTypeCode::BITMAP32 == *buf || BitmapTypeCode::BITMAP32_V2 == *buf;
577
11.0k
        bool is_bitmap64 = BitmapTypeCode::BITMAP64 == *buf || BitmapTypeCode::BITMAP64_V2 == *buf;
578
11.0k
        if (is_bitmap32) {
579
11.0k
            roaring::Roaring read = roaring::Roaring::read(buf + 1, is_v1);
580
11.0k
            result.emplaceOrInsert(0, std::move(read));
581
11.0k
            return result;
582
11.0k
        }
583
584
11.0k
        DCHECK(is_bitmap64);
585
8
        buf++;
586
587
        // get map size (varint64 took 1~10 bytes)
588
8
        uint64_t map_size;
589
8
        buf = reinterpret_cast<const char*>(
590
8
                decode_varint64_ptr(reinterpret_cast<const uint8_t*>(buf),
591
8
                                    reinterpret_cast<const uint8_t*>(buf + 10), &map_size));
592
8
        DCHECK(buf != nullptr);
593
26
        for (uint64_t lcv = 0; lcv < map_size; lcv++) {
594
            // get map key
595
18
            uint32_t key = decode_fixed32_le(reinterpret_cast<const uint8_t*>(buf));
596
18
            buf += sizeof(uint32_t);
597
            // read map value Roaring
598
18
            roaring::Roaring read_var = roaring::Roaring::read(buf, is_v1);
599
            // forward buffer past the last Roaring Bitmap
600
18
            buf += read_var.getSizeInBytes(is_v1);
601
18
            result.emplaceOrInsert(key, std::move(read_var));
602
18
        }
603
8
        return result;
604
11.0k
    }
605
606
    /**
607
     * How many bytes are required to serialize this bitmap
608
     */
609
27.3k
    size_t getSizeInBytes(int serialize_version) const {
610
27.3k
        bool is_v1 = serialize_version == 1;
611
27.3k
        if (is32BitsEnough()) {
612
27.3k
            auto it = roarings.find(0);
613
27.3k
            if (it == roarings.end()) { // empty bitmap
614
2
                roaring::Roaring r;
615
2
                return r.getSizeInBytes(is_v1) + 1;
616
2
            }
617
27.3k
            return it->second.getSizeInBytes(is_v1) + 1;
618
27.3k
        }
619
        // start with type code, map size and size of keys for each map entry
620
13
        size_t init = 1 + varint_length(roarings.size()) + roarings.size() * sizeof(uint32_t);
621
13
        return std::accumulate(
622
13
                roarings.cbegin(), roarings.cend(), init,
623
28
                [=](size_t previous, const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
624
                    // add in bytes used by each Roaring
625
28
                    return previous + map_entry.second.getSizeInBytes(is_v1);
626
28
                });
627
27.3k
    }
628
629
    /**
630
     * remove all elements
631
     */
632
0
    void clear() { roarings.clear(); }
633
634
    /**
635
     * Return whether all elements can be represented in 32 bits
636
     */
637
46.5k
    bool is32BitsEnough() const { return maximum() <= std::numeric_limits<uint32_t>::max(); }
638
639
    /**
640
     * Computes the intersection between two bitmaps and returns new bitmap.
641
     * The current bitmap and the provided bitmap are unchanged.
642
     */
643
2
    Roaring64Map operator&(const Roaring64Map& o) const { return Roaring64Map(*this) &= o; }
644
645
    /**
646
     * Computes the difference between two bitmaps and returns new bitmap.
647
     * The current bitmap and the provided bitmap are unchanged.
648
     */
649
0
    Roaring64Map operator-(const Roaring64Map& o) const { return Roaring64Map(*this) -= o; }
650
651
    /**
652
     * Computes the union between two bitmaps and returns new bitmap.
653
     * The current bitmap and the provided bitmap are unchanged.
654
     */
655
1
    Roaring64Map operator|(const Roaring64Map& o) const { return Roaring64Map(*this) |= o; }
656
657
    /**
658
     * Computes the symmetric union between two bitmaps and returns new bitmap.
659
     * The current bitmap and the provided bitmap are unchanged.
660
     */
661
1
    Roaring64Map operator^(const Roaring64Map& o) const { return Roaring64Map(*this) ^= o; }
662
663
    /**
664
     * computes the logical or (union) between "n" bitmaps (referenced by a
665
     * pointer).
666
     */
667
5
    static Roaring64Map fastunion(size_t n, const Roaring64Map** inputs) {
668
5
        struct pq_entry {
669
5
            phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator iterator;
670
5
            phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator end;
671
5
        };
672
673
5
        struct pq_comp {
674
9
            bool operator()(const pq_entry& lhs, const pq_entry& rhs) const {
675
9
                auto left_key = lhs.iterator->first;
676
9
                auto right_key = rhs.iterator->first;
677
                // We compare in the opposite direction than normal because priority
678
                // queues normally order from largest to smallest, but we want
679
                // smallest to largest.
680
9
                return left_key > right_key;
681
9
            }
682
5
        };
683
684
5
        std::priority_queue<pq_entry, std::vector<pq_entry>, pq_comp> pq;
685
686
12
        for (auto i = 0; i < n; ++i) {
687
7
            const auto& roaring = inputs[i]->roarings;
688
7
            if (roaring.begin() != roaring.end()) {
689
7
                pq.push({roaring.begin(), roaring.end()});
690
7
            }
691
7
        }
692
693
5
        std::vector<const roaring::api::roaring_bitmap_t*> group_bitmaps;
694
5
        Roaring64Map result;
695
13
        while (!pq.empty()) {
696
8
            auto group_key = pq.top().iterator->first;
697
8
            group_bitmaps.clear();
698
699
19
            while (!pq.empty()) {
700
14
                auto candidate_current_iter = pq.top().iterator;
701
14
                auto candidate_end_iter = pq.top().end;
702
703
14
                auto candidate_key = candidate_current_iter->first;
704
14
                const auto& candidate_bitmap = candidate_current_iter->second;
705
706
14
                if (candidate_key != group_key) {
707
3
                    break;
708
3
                }
709
710
11
                group_bitmaps.push_back(&candidate_bitmap.roaring);
711
11
                pq.pop();
712
11
                ++candidate_current_iter;
713
11
                if (candidate_current_iter != candidate_end_iter) {
714
4
                    pq.push({candidate_current_iter, candidate_end_iter});
715
4
                }
716
11
            }
717
8
            auto* inner_result = roaring::api::roaring_bitmap_or_many(group_bitmaps.size(),
718
8
                                                                      group_bitmaps.data());
719
8
            result.roarings.insert(result.roarings.end(),
720
8
                                   std::make_pair(group_key, roaring::Roaring(inner_result)));
721
8
        }
722
723
5
        return result;
724
5
    }
725
726
    friend class Roaring64MapSetBitForwardIterator;
727
    using const_iterator = Roaring64MapSetBitForwardIterator;
728
729
    /**
730
    * Returns an iterator that can be used to access the position of the
731
    * set bits. The running time complexity of a full scan is proportional to
732
    * the
733
    * number
734
    * of set bits: be aware that if you have long strings of 1s, this can be
735
    * very inefficient.
736
    *
737
    * It can be much faster to use the toArray method if you want to
738
    * retrieve the set bits.
739
    */
740
    const_iterator begin() const;
741
742
    /**
743
    * A bogus iterator that can be used together with begin()
744
    * for constructions such as for(auto i = b.begin();
745
    * i!=b.end(); ++i) {}
746
    */
747
    const_iterator end() const;
748
749
private:
750
    phmap::btree_map<uint32_t, roaring::Roaring> roarings {};
751
    bool copyOnWrite {false};
752
9.48M
    static uint32_t highBytes(const uint64_t in) { return uint32_t(in >> 32); }
753
4.74M
    static uint32_t lowBytes(const uint64_t in) { return uint32_t(in); }
754
49.4k
    static uint64_t uniteBytes(const uint32_t highBytes, const uint32_t lowBytes) {
755
49.4k
        return (uint64_t(highBytes) << 32) | uint64_t(lowBytes);
756
49.4k
    }
757
1
    void emplaceOrInsert(const uint32_t key, const roaring::Roaring& value) {
758
1
        roarings.emplace(std::make_pair(key, value));
759
1
    }
760
761
11.1k
    void emplaceOrInsert(const uint32_t key, roaring::Roaring&& value) {
762
11.1k
        roarings.emplace(key, value);
763
11.1k
    }
764
};
765
766
// Forked from https://github.com/RoaringBitmap/CRoaring/blob/v0.4.0/cpp/roaring64map.hh
767
// Used to go through the set bits. Not optimally fast, but convenient.
768
class Roaring64MapSetBitForwardIterator {
769
public:
770
    using type_of_iterator = Roaring64MapSetBitForwardIterator;
771
772
    /**
773
     * Provides the location of the set bit.
774
     */
775
2.96k
    uint64_t operator*() const {
776
2.96k
        return Roaring64Map::uniteBytes(map_iter->first, i.current_value);
777
2.96k
    }
778
779
975
    type_of_iterator& operator++() { // ++i, must returned inc. value
780
975
        if (i.has_value) {
781
975
            roaring_advance_uint32_iterator(&i);
782
975
        }
783
1.00k
        while (!i.has_value) {
784
34
            map_iter++;
785
34
            if (map_iter == map_end) return *this;
786
25
            roaring_init_iterator(&map_iter->second.roaring, &i);
787
25
        }
788
966
        return *this;
789
975
    }
790
791
1.80k
    type_of_iterator operator++(int) { // i++, must return orig. value
792
1.80k
        Roaring64MapSetBitForwardIterator orig(*this);
793
1.80k
        roaring_advance_uint32_iterator(&i);
794
1.81k
        while (!i.has_value) {
795
4
            map_iter++;
796
4
            if (map_iter == map_end) return orig;
797
2
            roaring_init_iterator(&map_iter->second.roaring, &i);
798
2
        }
799
1.80k
        return orig;
800
1.80k
    }
801
802
5
    bool move(const uint64_t& x) {
803
5
        map_iter = p.lower_bound(Roaring64Map::highBytes(x));
804
5
        if (map_iter != p.cend()) {
805
5
            roaring_init_iterator(&map_iter->second.roaring, &i);
806
5
            if (map_iter->first == Roaring64Map::highBytes(x)) {
807
5
                if (roaring_move_uint32_iterator_equalorlarger(&i, Roaring64Map::lowBytes(x)))
808
4
                    return true;
809
1
                map_iter++;
810
1
                if (map_iter == map_end) return false;
811
0
                roaring_init_iterator(&map_iter->second.roaring, &i);
812
0
            }
813
0
            return true;
814
5
        }
815
0
        return false;
816
5
    }
817
818
527
    bool operator==(const Roaring64MapSetBitForwardIterator& o) const {
819
527
        if (map_iter == map_end && o.map_iter == o.map_end) return true;
820
519
        if (o.map_iter == o.map_end) return false;
821
1
        return **this == *o;
822
519
    }
823
824
2.30k
    bool operator!=(const Roaring64MapSetBitForwardIterator& o) const {
825
2.30k
        if (map_iter == map_end && o.map_iter == o.map_end) return false;
826
2.29k
        if (o.map_iter == o.map_end) return true;
827
0
        return **this != *o;
828
2.29k
    }
829
5
    Roaring64MapSetBitForwardIterator& operator=(const Roaring64MapSetBitForwardIterator& r) {
830
5
        map_iter = r.map_iter;
831
5
        map_end = r.map_end;
832
5
        i = r.i;
833
5
        return *this;
834
5
    }
835
836
    Roaring64MapSetBitForwardIterator(const Roaring64MapSetBitForwardIterator& r) = default;
837
838
    Roaring64MapSetBitForwardIterator(const Roaring64Map& parent, bool exhausted = false)
839
2.29k
            : p(parent.roarings), map_end(parent.roarings.cend()) {
840
2.29k
        if (exhausted || parent.roarings.empty()) {
841
2.27k
            map_iter = parent.roarings.cend();
842
2.27k
        } else {
843
21
            map_iter = parent.roarings.cbegin();
844
21
            roaring_init_iterator(&map_iter->second.roaring, &i);
845
21
            while (!i.has_value) {
846
0
                map_iter++;
847
0
                if (map_iter == map_end) return;
848
0
                roaring_init_iterator(&map_iter->second.roaring, &i);
849
0
            }
850
21
        }
851
2.29k
    }
852
853
protected:
854
    const phmap::btree_map<uint32_t, roaring::Roaring>& p;
855
    phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator map_iter {};
856
    phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator map_end {};
857
    roaring::api::roaring_uint32_iterator_t i {};
858
};
859
860
18
inline Roaring64MapSetBitForwardIterator Roaring64Map::begin() const {
861
18
    return {*this};
862
18
}
863
864
2.26k
inline Roaring64MapSetBitForwardIterator Roaring64Map::end() const {
865
2.26k
    return {*this, true};
866
2.26k
}
867
868
} // namespace detail
869
870
// Represent the in-memory and on-disk structure of Doris's BITMAP data type.
871
// Optimize for the case where the bitmap contains 0 or 1 element which is common
872
// for streaming load scenario.
873
class BitmapValueIterator;
874
class BitmapValue {
875
public:
876
    template <typename T>
877
    using SetContainer = phmap::flat_hash_set<T>;
878
879
    // Construct an empty bitmap.
880
100k
    BitmapValue() : _sv(0), _bitmap(nullptr), _type(EMPTY), _is_shared(false) { _set.clear(); }
881
882
    // Construct a bitmap with one element.
883
    explicit BitmapValue(uint64_t value)
884
71
            : _sv(value), _bitmap(nullptr), _type(SINGLE), _is_shared(false) {}
885
886
    // Construct a bitmap from serialized data.
887
3.08k
    explicit BitmapValue(const char* src) : _is_shared(false) {
888
3.08k
        bool res = deserialize(src);
889
3.08k
        DCHECK(res);
890
3.08k
    }
891
892
20.5k
    BitmapValue(const BitmapValue& other) {
893
20.5k
        _type = other._type;
894
20.5k
        switch (other._type) {
895
196
        case EMPTY:
896
196
            break;
897
172
        case SINGLE:
898
172
            _sv = other._sv;
899
172
            break;
900
17.5k
        case BITMAP:
901
17.5k
            _bitmap = other._bitmap;
902
17.5k
            break;
903
2.68k
        case SET:
904
2.68k
            _set = other._set;
905
2.68k
            break;
906
20.5k
        }
907
908
20.5k
        if (other._type == BITMAP) {
909
17.5k
            _is_shared = true;
910
            // should also set other's state to shared, so that other bitmap value will
911
            // create a new bitmap when it wants to modify it.
912
17.5k
            const_cast<BitmapValue&>(other)._is_shared = true;
913
17.5k
        }
914
20.5k
    }
915
916
25.2k
    BitmapValue(BitmapValue&& other) noexcept {
917
25.2k
        _type = other._type;
918
25.2k
        switch (other._type) {
919
64
        case EMPTY:
920
64
            break;
921
241
        case SINGLE:
922
241
            _sv = other._sv;
923
241
            break;
924
21.0k
        case BITMAP:
925
21.0k
            _bitmap = std::move(other._bitmap);
926
21.0k
            other._bitmap = nullptr;
927
21.0k
            break;
928
3.90k
        case SET:
929
3.90k
            _set = std::move(other._set);
930
3.90k
            break;
931
25.2k
        }
932
25.2k
        _is_shared = other._is_shared;
933
25.2k
        other._type = EMPTY;
934
25.2k
        other._is_shared = false;
935
25.2k
    }
936
937
108
    BitmapValue& operator=(const BitmapValue& other) {
938
108
        if (this == &other) {
939
5
            return *this;
940
5
        }
941
103
        reset();
942
103
        _type = other._type;
943
103
        switch (other._type) {
944
20
        case EMPTY:
945
20
            break;
946
30
        case SINGLE:
947
30
            _sv = other._sv;
948
30
            break;
949
26
        case BITMAP:
950
26
            _bitmap = other._bitmap;
951
26
            break;
952
27
        case SET:
953
27
            _set = other._set;
954
27
            break;
955
103
        }
956
957
103
        if (other._type == BITMAP) {
958
26
            _is_shared = true;
959
            // should also set other's state to shared, so that other bitmap value will
960
            // create a new bitmap when it wants to modify it.
961
26
            const_cast<BitmapValue&>(other)._is_shared = true;
962
26
        }
963
103
        return *this;
964
103
    }
965
966
22
    static BitmapValue empty_bitmap() { return BitmapValue {}; }
967
968
23
    BitmapValue& operator=(BitmapValue&& other) noexcept {
969
23
        if (this == &other) {
970
0
            return *this;
971
0
        }
972
23
        reset();
973
974
23
        _type = other._type;
975
23
        switch (other._type) {
976
1
        case EMPTY:
977
1
            break;
978
12
        case SINGLE:
979
12
            _sv = other._sv;
980
12
            break;
981
4
        case BITMAP:
982
4
            _bitmap = std::move(other._bitmap);
983
4
            other._bitmap = nullptr;
984
4
            break;
985
6
        case SET:
986
6
            _set = std::move(other._set);
987
6
            break;
988
23
        }
989
23
        _is_shared = other._is_shared;
990
23
        return *this;
991
23
    }
992
993
    // Construct a bitmap from given elements.
994
62
    explicit BitmapValue(const std::vector<uint64_t>& bits) : _is_shared(false) {
995
62
        if (bits.size() == 0) {
996
0
            _type = EMPTY;
997
0
            return;
998
0
        }
999
1000
62
        if (bits.size() == 1) {
1001
1
            _type = SINGLE;
1002
1
            _sv = bits[0];
1003
1
            return;
1004
1
        }
1005
1006
61
        if (!config::enable_set_in_bitmap_value || bits.size() > SET_TYPE_THRESHOLD) {
1007
26
            _type = BITMAP;
1008
26
            _prepare_bitmap_for_write();
1009
26
            _bitmap->addMany(bits.size(), &bits[0]);
1010
35
        } else {
1011
35
            _type = SET;
1012
137
            for (auto v : bits) {
1013
137
                _set.insert(v);
1014
137
            }
1015
35
        }
1016
61
    }
1017
1018
31
    BitmapTypeCode::type get_type_code() const {
1019
31
        switch (_type) {
1020
1
        case EMPTY:
1021
1
            return BitmapTypeCode::EMPTY;
1022
7
        case SINGLE:
1023
7
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
1024
4
                return BitmapTypeCode::SINGLE32;
1025
4
            } else {
1026
3
                return BitmapTypeCode::SINGLE64;
1027
3
            }
1028
9
        case SET:
1029
9
            return BitmapTypeCode::SET;
1030
14
        case BITMAP:
1031
14
            bool is_v1 = (config::bitmap_serialize_version == 1);
1032
14
            if (_bitmap->is32BitsEnough()) {
1033
9
                return is_v1 ? BitmapTypeCode::type::BITMAP32 : BitmapTypeCode::type::BITMAP32_V2;
1034
9
            } else {
1035
5
                return is_v1 ? BitmapTypeCode::type::BITMAP64 : BitmapTypeCode::type::BITMAP64_V2;
1036
5
            }
1037
31
        }
1038
0
        __builtin_unreachable();
1039
31
    }
1040
1041
    template <typename T>
1042
22
    void add_many(const T* values, const size_t count) {
1043
22
        switch (_type) {
1044
17
        case EMPTY:
1045
17
            if (count == 1) {
1046
0
                _sv = values[0];
1047
0
                _type = SINGLE;
1048
17
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1049
270
                for (size_t i = 0; i != count; ++i) {
1050
259
                    _set.insert(values[i]);
1051
259
                }
1052
11
                _type = SET;
1053
11
            } else {
1054
6
                _prepare_bitmap_for_write();
1055
6
                _bitmap->addMany(count, values);
1056
6
                _type = BITMAP;
1057
6
            }
1058
17
            break;
1059
0
        case SINGLE:
1060
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1061
0
                _set.insert(_sv);
1062
0
                for (size_t i = 0; i != count; ++i) {
1063
0
                    _set.insert(values[i]);
1064
0
                }
1065
0
                _type = SET;
1066
0
                _convert_to_bitmap_if_need();
1067
0
            } else {
1068
0
                _prepare_bitmap_for_write();
1069
0
                _bitmap->add(_sv);
1070
0
                _bitmap->addMany(count, values);
1071
0
                _type = BITMAP;
1072
0
            }
1073
0
            break;
1074
1
        case BITMAP:
1075
1
            _prepare_bitmap_for_write();
1076
1
            _bitmap->addMany(count, values);
1077
1
            break;
1078
4
        case SET:
1079
324
            for (size_t i = 0; i != count; ++i) {
1080
320
                _set.insert(values[i]);
1081
320
            }
1082
4
            _convert_to_bitmap_if_need();
1083
4
            break;
1084
22
        }
1085
22
    }
_ZN5doris11BitmapValue8add_manyIiEEvPKT_m
Line
Count
Source
1042
5
    void add_many(const T* values, const size_t count) {
1043
5
        switch (_type) {
1044
5
        case EMPTY:
1045
5
            if (count == 1) {
1046
0
                _sv = values[0];
1047
0
                _type = SINGLE;
1048
5
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1049
78
                for (size_t i = 0; i != count; ++i) {
1050
73
                    _set.insert(values[i]);
1051
73
                }
1052
5
                _type = SET;
1053
5
            } else {
1054
0
                _prepare_bitmap_for_write();
1055
0
                _bitmap->addMany(count, values);
1056
0
                _type = BITMAP;
1057
0
            }
1058
5
            break;
1059
0
        case SINGLE:
1060
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1061
0
                _set.insert(_sv);
1062
0
                for (size_t i = 0; i != count; ++i) {
1063
0
                    _set.insert(values[i]);
1064
0
                }
1065
0
                _type = SET;
1066
0
                _convert_to_bitmap_if_need();
1067
0
            } else {
1068
0
                _prepare_bitmap_for_write();
1069
0
                _bitmap->add(_sv);
1070
0
                _bitmap->addMany(count, values);
1071
0
                _type = BITMAP;
1072
0
            }
1073
0
            break;
1074
0
        case BITMAP:
1075
0
            _prepare_bitmap_for_write();
1076
0
            _bitmap->addMany(count, values);
1077
0
            break;
1078
0
        case SET:
1079
0
            for (size_t i = 0; i != count; ++i) {
1080
0
                _set.insert(values[i]);
1081
0
            }
1082
0
            _convert_to_bitmap_if_need();
1083
0
            break;
1084
5
        }
1085
5
    }
_ZN5doris11BitmapValue8add_manyImEEvPKT_m
Line
Count
Source
1042
17
    void add_many(const T* values, const size_t count) {
1043
17
        switch (_type) {
1044
12
        case EMPTY:
1045
12
            if (count == 1) {
1046
0
                _sv = values[0];
1047
0
                _type = SINGLE;
1048
12
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1049
192
                for (size_t i = 0; i != count; ++i) {
1050
186
                    _set.insert(values[i]);
1051
186
                }
1052
6
                _type = SET;
1053
6
            } else {
1054
6
                _prepare_bitmap_for_write();
1055
6
                _bitmap->addMany(count, values);
1056
6
                _type = BITMAP;
1057
6
            }
1058
12
            break;
1059
0
        case SINGLE:
1060
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1061
0
                _set.insert(_sv);
1062
0
                for (size_t i = 0; i != count; ++i) {
1063
0
                    _set.insert(values[i]);
1064
0
                }
1065
0
                _type = SET;
1066
0
                _convert_to_bitmap_if_need();
1067
0
            } else {
1068
0
                _prepare_bitmap_for_write();
1069
0
                _bitmap->add(_sv);
1070
0
                _bitmap->addMany(count, values);
1071
0
                _type = BITMAP;
1072
0
            }
1073
0
            break;
1074
1
        case BITMAP:
1075
1
            _prepare_bitmap_for_write();
1076
1
            _bitmap->addMany(count, values);
1077
1
            break;
1078
4
        case SET:
1079
324
            for (size_t i = 0; i != count; ++i) {
1080
320
                _set.insert(values[i]);
1081
320
            }
1082
4
            _convert_to_bitmap_if_need();
1083
4
            break;
1084
17
        }
1085
17
    }
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIaEEvPKT_m
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIsEEvPKT_m
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIlEEvPKT_m
1086
1087
4.73M
    void add(uint64_t value) {
1088
4.73M
        switch (_type) {
1089
11.5k
        case EMPTY:
1090
11.5k
            if (!config::enable_set_in_bitmap_value) {
1091
5.32k
                _sv = value;
1092
5.32k
                _type = SINGLE;
1093
6.25k
            } else {
1094
6.25k
                _set.insert(value);
1095
6.25k
                _type = SET;
1096
6.25k
            }
1097
11.5k
            break;
1098
5.28k
        case SINGLE:
1099
            //there is no need to convert the type if two variables are equal
1100
5.28k
            if (_sv == value) {
1101
2
                break;
1102
2
            }
1103
5.27k
            if (config::enable_set_in_bitmap_value) {
1104
5
                _set.insert(_sv);
1105
5
                _set.insert(value);
1106
5
                _type = SET;
1107
5.27k
            } else {
1108
5.27k
                _prepare_bitmap_for_write();
1109
5.27k
                _bitmap->add(_sv);
1110
5.27k
                _bitmap->add(value);
1111
5.27k
                _type = BITMAP;
1112
5.27k
            }
1113
5.27k
            break;
1114
4.58M
        case BITMAP:
1115
4.58M
            _prepare_bitmap_for_write();
1116
4.58M
            _bitmap->add(value);
1117
4.58M
            break;
1118
130k
        case SET:
1119
130k
            _set.insert(value);
1120
130k
            _convert_to_bitmap_if_need();
1121
130k
            break;
1122
4.73M
        }
1123
4.73M
    }
1124
1125
8
    void remove(uint64_t value) {
1126
8
        switch (_type) {
1127
1
        case EMPTY:
1128
1
            break;
1129
1
        case SINGLE:
1130
            //there is need to convert the type if two variables are equal
1131
1
            if (_sv == value) {
1132
0
                _type = EMPTY;
1133
0
            }
1134
1
            break;
1135
4
        case BITMAP:
1136
4
            _prepare_bitmap_for_write();
1137
4
            _bitmap->remove(value);
1138
4
            _convert_to_smaller_type();
1139
4
            break;
1140
2
        case SET:
1141
2
            _set.erase(value);
1142
2
            _convert_to_smaller_type();
1143
2
            break;
1144
8
        }
1145
8
    }
1146
1147
    // Compute the union between the current bitmap and the provided bitmap.
1148
16
    BitmapValue& operator-=(const BitmapValue& rhs) {
1149
16
        switch (rhs._type) {
1150
4
        case EMPTY:
1151
4
            break;
1152
4
        case SINGLE:
1153
4
            remove(rhs._sv);
1154
4
            break;
1155
4
        case BITMAP:
1156
4
            switch (_type) {
1157
1
            case EMPTY:
1158
1
                break;
1159
1
            case SINGLE:
1160
1
                if (rhs._bitmap->contains(_sv)) {
1161
0
                    _type = EMPTY;
1162
0
                }
1163
1
                break;
1164
1
            case BITMAP:
1165
1
                _prepare_bitmap_for_write();
1166
1
                *_bitmap -= *rhs._bitmap;
1167
1
                _convert_to_smaller_type();
1168
1
                break;
1169
1
            case SET: {
1170
32
                for (auto it = _set.begin(); it != _set.end();) {
1171
31
                    if (rhs.contains(*it)) {
1172
16
                        it = _set.erase(it);
1173
16
                    } else {
1174
15
                        ++it;
1175
15
                    }
1176
31
                }
1177
1
                _convert_to_smaller_type();
1178
1
                break;
1179
0
            }
1180
4
            }
1181
4
            break;
1182
4
        case SET: {
1183
4
            switch (_type) {
1184
1
            case EMPTY:
1185
1
                break;
1186
1
            case SINGLE:
1187
1
                if (rhs._set.contains(_sv)) {
1188
0
                    _type = EMPTY;
1189
0
                }
1190
1
                break;
1191
1
            case BITMAP:
1192
1
                _prepare_bitmap_for_write();
1193
31
                for (auto v : rhs._set) {
1194
31
                    if (_bitmap->contains(v)) {
1195
0
                        _bitmap->remove(v);
1196
0
                    }
1197
31
                }
1198
1
                _convert_to_smaller_type();
1199
1
                break;
1200
1
            case SET: {
1201
32
                for (auto it = _set.begin(); it != _set.end();) {
1202
31
                    if (rhs.contains(*it)) {
1203
14
                        it = _set.erase(it);
1204
17
                    } else {
1205
17
                        ++it;
1206
17
                    }
1207
31
                }
1208
1
                _convert_to_smaller_type();
1209
1
                break;
1210
0
            }
1211
4
            }
1212
4
        }
1213
16
        }
1214
16
        return *this;
1215
16
    }
1216
1217
    // Compute the union between the current bitmap and the provided bitmap.
1218
    // Possible type transitions are:
1219
    // EMPTY  -> SINGLE
1220
    // EMPTY  -> BITMAP
1221
    // SINGLE -> BITMAP
1222
64
    BitmapValue& operator|=(const BitmapValue& rhs) {
1223
64
        switch (rhs._type) {
1224
12
        case EMPTY:
1225
12
            break;
1226
30
        case SINGLE:
1227
30
            add(rhs._sv);
1228
30
            break;
1229
8
        case BITMAP:
1230
8
            switch (_type) {
1231
2
            case EMPTY:
1232
2
                _bitmap = rhs._bitmap;
1233
2
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1234
2
                _is_shared = true;
1235
2
                _type = BITMAP;
1236
2
                break;
1237
3
            case SINGLE:
1238
3
                _bitmap = rhs._bitmap;
1239
3
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1240
3
                _is_shared = true;
1241
3
                _prepare_bitmap_for_write();
1242
3
                _bitmap->add(_sv);
1243
3
                _type = BITMAP;
1244
3
                break;
1245
2
            case BITMAP:
1246
2
                _prepare_bitmap_for_write();
1247
2
                *_bitmap |= *rhs._bitmap;
1248
2
                break;
1249
1
            case SET: {
1250
1
                _prepare_bitmap_for_write();
1251
1
                *_bitmap = *rhs._bitmap;
1252
1253
31
                for (auto v : _set) {
1254
31
                    _bitmap->add(v);
1255
31
                }
1256
1
                _type = BITMAP;
1257
1
                break;
1258
0
            }
1259
8
            }
1260
8
            break;
1261
14
        case SET:
1262
14
            switch (_type) {
1263
1
            case EMPTY:
1264
1
                _set = rhs._set;
1265
1
                _type = SET;
1266
1
                break;
1267
1
            case SINGLE: {
1268
1
                if ((rhs._set.size() + rhs._set.contains(_sv) > SET_TYPE_THRESHOLD)) {
1269
0
                    _prepare_bitmap_for_write();
1270
0
                    _bitmap->add(_sv);
1271
0
                    for (auto v : rhs._set) {
1272
0
                        _bitmap->add(v);
1273
0
                    }
1274
0
                    _type = BITMAP;
1275
1
                } else {
1276
1
                    _set = rhs._set;
1277
1
                    _set.insert(_sv);
1278
1
                    _type = SET;
1279
1
                }
1280
1
                break;
1281
0
            }
1282
1
            case BITMAP:
1283
1
                _prepare_bitmap_for_write();
1284
31
                for (auto v : rhs._set) {
1285
31
                    _bitmap->add(v);
1286
31
                }
1287
1
                break;
1288
11
            case SET: {
1289
58
                for (auto v : rhs._set) {
1290
58
                    _set.insert(v);
1291
58
                }
1292
11
                _convert_to_bitmap_if_need();
1293
11
                break;
1294
0
            }
1295
14
            }
1296
14
            break;
1297
64
        }
1298
64
        return *this;
1299
64
    }
1300
1301
24
    BitmapValue& fastunion(const std::vector<const BitmapValue*>& values) {
1302
24
        std::vector<const detail::Roaring64Map*> bitmaps;
1303
24
        std::vector<uint64_t> single_values;
1304
24
        std::vector<const SetContainer<uint64_t>*> sets;
1305
1.73k
        for (const auto* value : values) {
1306
1.73k
            switch (value->_type) {
1307
5
            case EMPTY:
1308
5
                break;
1309
11
            case SINGLE:
1310
11
                single_values.push_back(value->_sv);
1311
11
                break;
1312
5
            case BITMAP:
1313
5
                bitmaps.push_back(value->_bitmap.get());
1314
5
                break;
1315
1.71k
            case SET:
1316
1.71k
                sets.push_back(&value->_set);
1317
1.71k
                break;
1318
1.73k
            }
1319
1.73k
        }
1320
1321
24
        if (!bitmaps.empty()) {
1322
5
            _prepare_bitmap_for_write();
1323
5
            switch (_type) {
1324
2
            case EMPTY:
1325
2
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1326
2
                break;
1327
2
            case SINGLE:
1328
2
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1329
2
                _bitmap->add(_sv);
1330
2
                break;
1331
1
            case BITMAP:
1332
1
                for (const auto* bitmap : bitmaps) {
1333
1
                    *_bitmap |= *bitmap;
1334
1
                }
1335
1
                break;
1336
0
            case SET: {
1337
0
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1338
0
                for (auto v : _set) {
1339
0
                    _bitmap->add(v);
1340
0
                }
1341
0
                _set.clear();
1342
0
                break;
1343
0
            }
1344
5
            }
1345
5
            _type = BITMAP;
1346
5
        }
1347
1348
24
        if (!sets.empty()) {
1349
1.71k
            for (auto& set : sets) {
1350
1.80k
                for (auto v : *set) {
1351
1.80k
                    _set.insert(v);
1352
1.80k
                }
1353
1.71k
            }
1354
10
            switch (_type) {
1355
9
            case EMPTY:
1356
9
                _type = SET;
1357
9
                break;
1358
0
            case SINGLE: {
1359
0
                _set.insert(_sv);
1360
0
                _type = SET;
1361
0
                break;
1362
0
            }
1363
0
            case BITMAP:
1364
0
                _prepare_bitmap_for_write();
1365
0
                for (auto v : _set) {
1366
0
                    _bitmap->add(v);
1367
0
                }
1368
0
                _type = BITMAP;
1369
0
                _set.clear();
1370
0
                break;
1371
1
            case SET: {
1372
1
                break;
1373
0
            }
1374
10
            }
1375
10
            if (_type == SET) {
1376
10
                _convert_to_bitmap_if_need();
1377
10
            }
1378
10
        }
1379
1380
24
        if (_type == EMPTY && single_values.size() == 1) {
1381
2
            if (config::enable_set_in_bitmap_value) {
1382
1
                _type = SET;
1383
1
                _set.insert(single_values[0]);
1384
1
            } else {
1385
1
                _sv = single_values[0];
1386
1
                _type = SINGLE;
1387
1
            }
1388
22
        } else if (!single_values.empty()) {
1389
6
            switch (_type) {
1390
2
            case EMPTY:
1391
3
            case SINGLE:
1392
3
                if (config::enable_set_in_bitmap_value) {
1393
2
                    _set.insert(single_values.cbegin(), single_values.cend());
1394
2
                    if (_type == SINGLE) {
1395
0
                        _set.insert(_sv);
1396
0
                    }
1397
2
                    _type = SET;
1398
2
                    _convert_to_bitmap_if_need();
1399
2
                } else {
1400
1
                    _prepare_bitmap_for_write();
1401
1
                    _bitmap->addMany(single_values.size(), single_values.data());
1402
1
                    if (_type == SINGLE) {
1403
1
                        _bitmap->add(_sv);
1404
1
                    }
1405
1
                    _type = BITMAP;
1406
1
                    _convert_to_smaller_type();
1407
1
                }
1408
3
                break;
1409
3
            case BITMAP: {
1410
3
                _prepare_bitmap_for_write();
1411
3
                _bitmap->addMany(single_values.size(), single_values.data());
1412
3
                break;
1413
2
            }
1414
0
            case SET:
1415
0
                _set.insert(single_values.cbegin(), single_values.cend());
1416
0
                _convert_to_bitmap_if_need();
1417
0
                break;
1418
6
            }
1419
6
        }
1420
1421
24
        return *this;
1422
24
    }
1423
1424
    // Compute the intersection between the current bitmap and the provided bitmap.
1425
    // Possible type transitions are:
1426
    // SINGLE -> EMPTY
1427
    // BITMAP -> EMPTY
1428
    // BITMAP -> SINGLE
1429
51
    BitmapValue& operator&=(const BitmapValue& rhs) {
1430
51
        switch (rhs._type) {
1431
14
        case EMPTY:
1432
14
            reset(); // empty & any = empty
1433
14
            break;
1434
10
        case SINGLE:
1435
10
            switch (_type) {
1436
2
            case EMPTY:
1437
2
                break;
1438
3
            case SINGLE:
1439
3
                if (_sv != rhs._sv) {
1440
2
                    _type = EMPTY;
1441
2
                }
1442
3
                break;
1443
4
            case BITMAP:
1444
4
                if (!_bitmap->contains(rhs._sv)) {
1445
1
                    _type = EMPTY;
1446
3
                } else {
1447
3
                    _type = SINGLE;
1448
3
                    _sv = rhs._sv;
1449
3
                }
1450
4
                _bitmap.reset();
1451
4
                _is_shared = false;
1452
4
                break;
1453
1
            case SET:
1454
1
                if (!_set.contains(rhs._sv)) {
1455
0
                    _type = EMPTY;
1456
1
                } else {
1457
1
                    _type = SINGLE;
1458
1
                    _sv = rhs._sv;
1459
1
                }
1460
1
                _set.clear();
1461
1
                break;
1462
10
            }
1463
10
            break;
1464
10
        case BITMAP:
1465
10
            switch (_type) {
1466
2
            case EMPTY:
1467
2
                break;
1468
3
            case SINGLE:
1469
3
                if (!rhs._bitmap->contains(_sv)) {
1470
2
                    _type = EMPTY;
1471
2
                }
1472
3
                break;
1473
4
            case BITMAP:
1474
4
                _prepare_bitmap_for_write();
1475
4
                *_bitmap &= *rhs._bitmap;
1476
4
                _convert_to_smaller_type();
1477
4
                break;
1478
1
            case SET:
1479
32
                for (auto it = _set.begin(); it != _set.end();) {
1480
31
                    if (!rhs._bitmap->contains(*it)) {
1481
15
                        it = _set.erase(it);
1482
16
                    } else {
1483
16
                        ++it;
1484
16
                    }
1485
31
                }
1486
1
                _convert_to_smaller_type();
1487
1
                break;
1488
10
            }
1489
10
            break;
1490
17
        case SET:
1491
17
            switch (_type) {
1492
1
            case EMPTY:
1493
1
                break;
1494
1
            case SINGLE:
1495
1
                if (!rhs._set.contains(_sv)) {
1496
1
                    _type = EMPTY;
1497
1
                }
1498
1
                break;
1499
1
            case BITMAP:
1500
1
                _prepare_bitmap_for_write();
1501
31
                for (auto v : rhs._set) {
1502
31
                    if (_bitmap->contains(v)) {
1503
0
                        _set.insert(v);
1504
0
                    }
1505
31
                }
1506
1
                _type = SET;
1507
1
                _bitmap.reset();
1508
1
                _is_shared = false;
1509
1
                _convert_to_smaller_type();
1510
1
                break;
1511
14
            case SET:
1512
79
                for (auto it = _set.begin(); it != _set.end();) {
1513
65
                    if (!rhs._set.contains(*it)) {
1514
37
                        it = _set.erase(it);
1515
37
                    } else {
1516
28
                        ++it;
1517
28
                    }
1518
65
                }
1519
14
                _convert_to_smaller_type();
1520
14
                break;
1521
17
            }
1522
17
            break;
1523
51
        }
1524
51
        return *this;
1525
51
    }
1526
1527
    // Compute the symmetric union between the current bitmap and the provided bitmap.
1528
    // Possible type transitions are:
1529
    // SINGLE -> EMPTY
1530
    // BITMAP -> EMPTY
1531
    // BITMAP -> SINGLE
1532
29
    BitmapValue& operator^=(const BitmapValue& rhs) {
1533
29
        switch (rhs._type) {
1534
8
        case EMPTY:
1535
8
            break;
1536
4
        case SINGLE:
1537
4
            switch (_type) {
1538
1
            case EMPTY:
1539
1
                add(rhs._sv);
1540
1
                break;
1541
1
            case SINGLE:
1542
1
                if (_sv == rhs._sv) {
1543
0
                    _type = EMPTY;
1544
1
                } else {
1545
1
                    add(rhs._sv);
1546
1
                }
1547
1
                break;
1548
1
            case BITMAP:
1549
1
                if (!_bitmap->contains(rhs._sv)) {
1550
0
                    add(rhs._sv);
1551
1
                } else {
1552
1
                    _prepare_bitmap_for_write();
1553
1
                    _bitmap->remove(rhs._sv);
1554
1
                }
1555
1
                break;
1556
1
            case SET:
1557
1
                if (!_set.contains(rhs._sv)) {
1558
0
                    _set.insert(rhs._sv);
1559
1
                } else {
1560
1
                    _set.erase(rhs._sv);
1561
1
                }
1562
1
                break;
1563
4
            }
1564
4
            break;
1565
6
        case BITMAP:
1566
6
            switch (_type) {
1567
1
            case EMPTY:
1568
1
                _bitmap = rhs._bitmap;
1569
1
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1570
1
                _is_shared = true;
1571
1
                _type = BITMAP;
1572
1
                break;
1573
1
            case SINGLE:
1574
1
                _bitmap = rhs._bitmap;
1575
1
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1576
1
                _is_shared = true;
1577
1
                _type = BITMAP;
1578
1
                _prepare_bitmap_for_write();
1579
1
                if (!rhs._bitmap->contains(_sv)) {
1580
1
                    _bitmap->add(_sv);
1581
1
                } else {
1582
0
                    _bitmap->remove(_sv);
1583
0
                }
1584
1
                break;
1585
3
            case BITMAP:
1586
3
                _prepare_bitmap_for_write();
1587
3
                *_bitmap ^= *rhs._bitmap;
1588
3
                _convert_to_smaller_type();
1589
3
                break;
1590
1
            case SET:
1591
1
                _prepare_bitmap_for_write();
1592
1
                *_bitmap = *rhs._bitmap;
1593
31
                for (auto v : _set) {
1594
31
                    if (_bitmap->contains(v)) {
1595
16
                        _bitmap->remove(v);
1596
16
                    } else {
1597
15
                        _bitmap->add(v);
1598
15
                    }
1599
31
                }
1600
1
                _type = BITMAP;
1601
1
                _convert_to_smaller_type();
1602
1
                break;
1603
6
            }
1604
6
            break;
1605
11
        case SET:
1606
11
            switch (_type) {
1607
1
            case EMPTY:
1608
1
                _set = rhs._set;
1609
1
                _type = SET;
1610
1
                break;
1611
1
            case SINGLE:
1612
1
                _set = rhs._set;
1613
1
                if (!rhs._set.contains(_sv)) {
1614
1
                    _set.insert(_sv);
1615
1
                } else {
1616
0
                    _set.erase(_sv);
1617
0
                }
1618
1
                _type = SET;
1619
1
                break;
1620
1
            case BITMAP:
1621
1
                _prepare_bitmap_for_write();
1622
31
                for (auto v : rhs._set) {
1623
31
                    if (_bitmap->contains(v)) {
1624
0
                        _bitmap->remove(v);
1625
31
                    } else {
1626
31
                        _bitmap->add(v);
1627
31
                    }
1628
31
                }
1629
1
                _convert_to_smaller_type();
1630
1
                break;
1631
8
            case SET:
1632
49
                for (auto v : rhs._set) {
1633
49
                    if (_set.contains(v)) {
1634
22
                        _set.erase(v);
1635
27
                    } else {
1636
27
                        _set.insert(v);
1637
27
                    }
1638
49
                }
1639
8
                _convert_to_smaller_type();
1640
8
                break;
1641
11
            }
1642
11
            break;
1643
29
        }
1644
29
        return *this;
1645
29
    }
1646
1647
    // check if value x is present
1648
9.19k
    bool contains(uint64_t x) const {
1649
9.19k
        switch (_type) {
1650
0
        case EMPTY:
1651
0
            return false;
1652
14
        case SINGLE:
1653
14
            return _sv == x;
1654
8.93k
        case BITMAP:
1655
8.93k
            return _bitmap->contains(x);
1656
238
        case SET:
1657
238
            return _set.contains(x);
1658
9.19k
        }
1659
0
        return false;
1660
9.19k
    }
1661
1662
    // true if contains a value that belongs to the range [left, right].
1663
    bool contains_any(uint64_t left, uint64_t right) const;
1664
1665
240k
    uint64_t cardinality() const {
1666
240k
        switch (_type) {
1667
240k
        case EMPTY:
1668
240k
            return 0;
1669
36
        case SINGLE:
1670
36
            return 1;
1671
210
        case BITMAP:
1672
210
            return _bitmap->cardinality();
1673
74
        case SET:
1674
74
            return _set.size();
1675
240k
        }
1676
0
        return 0;
1677
240k
    }
1678
1679
60.0k
    uint64_t and_cardinality(const BitmapValue& rhs) const {
1680
60.0k
        switch (rhs._type) {
1681
60.0k
        case EMPTY:
1682
60.0k
            return 0;
1683
4
        case SINGLE:
1684
4
            switch (_type) {
1685
1
            case EMPTY:
1686
1
                return 0;
1687
1
            case SINGLE:
1688
1
                return _sv == rhs._sv;
1689
1
            case BITMAP:
1690
1
                return _bitmap->contains(rhs._sv);
1691
1
            case SET:
1692
1
                return _set.contains(rhs._sv);
1693
4
            }
1694
0
            break;
1695
7
        case BITMAP:
1696
7
            switch (_type) {
1697
1
            case EMPTY:
1698
1
                return 0;
1699
1
            case SINGLE:
1700
1
                return rhs._bitmap->contains(_sv);
1701
4
            case BITMAP:
1702
4
                return _bitmap->andCardinality(*rhs._bitmap);
1703
1
            case SET: {
1704
1
                uint64_t cardinality = 0;
1705
31
                for (auto v : _set) {
1706
31
                    if (rhs._bitmap->contains(v)) {
1707
16
                        ++cardinality;
1708
16
                    }
1709
31
                }
1710
1
                return cardinality;
1711
0
            }
1712
7
            }
1713
0
            break;
1714
4
        case SET:
1715
4
            switch (_type) {
1716
1
            case EMPTY:
1717
1
                return 0;
1718
1
            case SINGLE:
1719
1
                return rhs._set.contains(_sv);
1720
1
            case BITMAP: {
1721
1
                uint64_t cardinality = 0;
1722
31
                for (auto v : rhs._set) {
1723
31
                    if (_bitmap->contains(v)) {
1724
0
                        ++cardinality;
1725
0
                    }
1726
31
                }
1727
1
                return cardinality;
1728
0
            }
1729
1
            case SET: {
1730
1
                uint64_t cardinality = 0;
1731
31
                for (auto v : _set) {
1732
31
                    if (rhs._set.contains(v)) {
1733
14
                        ++cardinality;
1734
14
                    }
1735
31
                }
1736
1
                return cardinality;
1737
0
            }
1738
4
            }
1739
60.0k
        }
1740
0
        return 0;
1741
60.0k
    }
1742
1743
60.0k
    uint64_t or_cardinality(const BitmapValue& rhs) const {
1744
60.0k
        switch (rhs._type) {
1745
59.9k
        case EMPTY:
1746
59.9k
            return cardinality();
1747
0
        case SINGLE:
1748
0
            switch (_type) {
1749
0
            case EMPTY:
1750
0
                return 1;
1751
0
            case SINGLE:
1752
0
                return 1 + (_sv != rhs._sv);
1753
0
            case BITMAP:
1754
0
                return cardinality() + !_bitmap->contains(rhs._sv);
1755
0
            case SET:
1756
0
                return _set.size() + !_set.contains(rhs._sv);
1757
0
            }
1758
0
            break;
1759
3
        case BITMAP:
1760
3
            switch (_type) {
1761
0
            case EMPTY:
1762
0
                return rhs.cardinality();
1763
0
            case SINGLE:
1764
0
                return rhs.cardinality() + !rhs._bitmap->contains(_sv);
1765
3
            case BITMAP:
1766
3
                return _bitmap->orCardinality(*rhs._bitmap);
1767
0
            case SET: {
1768
0
                uint64_t cardinality = rhs._bitmap->cardinality();
1769
0
                for (auto v : _set) {
1770
0
                    if (!rhs._bitmap->contains(v)) {
1771
0
                        ++cardinality;
1772
0
                    }
1773
0
                }
1774
0
                return cardinality;
1775
0
            }
1776
3
            }
1777
0
            break;
1778
0
        case SET:
1779
0
            switch (_type) {
1780
0
            case EMPTY:
1781
0
                return rhs.cardinality();
1782
0
            case SINGLE:
1783
0
                return rhs.cardinality() + !rhs._set.contains(_sv);
1784
0
            case BITMAP: {
1785
0
                uint64_t cardinality = _bitmap->cardinality();
1786
0
                for (auto v : rhs._set) {
1787
0
                    if (!_bitmap->contains(v)) {
1788
0
                        ++cardinality;
1789
0
                    }
1790
0
                }
1791
0
                return cardinality;
1792
0
            }
1793
0
            case SET: {
1794
0
                uint64_t cardinality = _set.size();
1795
0
                for (auto v : _set) {
1796
0
                    if (!rhs._set.contains(v)) {
1797
0
                        ++cardinality;
1798
0
                    }
1799
0
                }
1800
0
                return cardinality;
1801
0
            }
1802
0
            }
1803
60.0k
        }
1804
0
        return 0;
1805
60.0k
    }
1806
1807
24
    uint64_t andnot_cardinality(const BitmapValue& rhs) const {
1808
24
        switch (rhs._type) {
1809
8
        case EMPTY:
1810
8
            return cardinality();
1811
4
        case SINGLE:
1812
4
            switch (_type) {
1813
1
            case EMPTY:
1814
1
                return 0;
1815
1
            case SINGLE:
1816
1
                return 1 - (_sv == rhs._sv);
1817
1
            case BITMAP:
1818
1
                return cardinality() - _bitmap->contains(rhs._sv);
1819
1
            case SET:
1820
1
                return cardinality() - _set.contains(rhs._sv);
1821
4
            }
1822
0
            break;
1823
4
        case BITMAP:
1824
4
            switch (_type) {
1825
1
            case EMPTY:
1826
1
                return 0;
1827
1
            case SINGLE:
1828
1
                return !rhs._bitmap->contains(_sv);
1829
1
            case BITMAP:
1830
1
                return _bitmap->andnotCardinality(*rhs._bitmap);
1831
1
            case SET: {
1832
1
                uint64_t cardinality = _set.size();
1833
31
                for (auto v : _set) {
1834
31
                    if (rhs._bitmap->contains(v)) {
1835
16
                        cardinality -= 1;
1836
16
                    }
1837
31
                }
1838
1
                return cardinality;
1839
0
            }
1840
4
            }
1841
0
            break;
1842
8
        case SET:
1843
8
            switch (_type) {
1844
1
            case EMPTY:
1845
1
                return 0;
1846
1
            case SINGLE:
1847
1
                return !rhs._set.contains(_sv);
1848
1
            case BITMAP: {
1849
1
                uint64_t cardinality = _bitmap->cardinality();
1850
31
                for (auto v : rhs._set) {
1851
31
                    if (_bitmap->contains(v)) {
1852
0
                        cardinality -= 1;
1853
0
                    }
1854
31
                }
1855
1
                return cardinality;
1856
0
            }
1857
5
            case SET: {
1858
5
                uint64_t cardinality = _set.size();
1859
33
                for (auto v : rhs._set) {
1860
33
                    if (_set.contains(v)) {
1861
16
                        cardinality -= 1;
1862
16
                    }
1863
33
                }
1864
5
                return cardinality;
1865
0
            }
1866
8
            }
1867
24
        }
1868
0
        return 0;
1869
24
    }
1870
1871
    // Return how many bytes are required to serialize this bitmap.
1872
    // See BitmapTypeCode for the serialized format.
1873
148k
    size_t getSizeInBytes() {
1874
148k
        size_t res = 0;
1875
148k
        switch (_type) {
1876
120k
        case EMPTY:
1877
120k
            res = 1;
1878
120k
            break;
1879
35
        case SINGLE:
1880
35
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
1881
30
                res = 1 + sizeof(uint32_t);
1882
30
            } else {
1883
5
                res = 1 + sizeof(uint64_t);
1884
5
            }
1885
35
            break;
1886
27.3k
        case BITMAP:
1887
27.3k
            _prepare_bitmap_for_write();
1888
27.3k
            _bitmap->runOptimize();
1889
27.3k
            _bitmap->shrinkToFit();
1890
27.3k
            res = _bitmap->getSizeInBytes(config::bitmap_serialize_version);
1891
27.3k
            break;
1892
679
        case SET:
1893
            /// 1 byte for type, 1 byte for count
1894
679
            res = 2 + sizeof(uint64_t) * _set.size();
1895
679
            break;
1896
148k
        }
1897
148k
        return res;
1898
148k
    }
1899
1900
    // Serialize the bitmap value to dst, which should be large enough.
1901
    // Client should call `getSizeInBytes` first to get the serialized size.
1902
79.7k
    void write_to(char* dst) const {
1903
79.7k
        switch (_type) {
1904
60.0k
        case EMPTY:
1905
60.0k
            *dst = BitmapTypeCode::EMPTY;
1906
60.0k
            break;
1907
25
        case SINGLE:
1908
25
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
1909
22
                *(dst++) = BitmapTypeCode::SINGLE32;
1910
22
                encode_fixed32_le(reinterpret_cast<uint8_t*>(dst), static_cast<uint32_t>(_sv));
1911
22
            } else {
1912
3
                *(dst++) = BitmapTypeCode::SINGLE64;
1913
3
                encode_fixed64_le(reinterpret_cast<uint8_t*>(dst), _sv);
1914
3
            }
1915
25
            break;
1916
548
        case SET:
1917
548
            DCHECK(config::enable_set_in_bitmap_value);
1918
548
            *dst = BitmapTypeCode::SET;
1919
548
            ++dst;
1920
548
            *dst = static_cast<uint8_t>(_set.size());
1921
548
            ++dst;
1922
8.76k
            for (auto v : _set) {
1923
8.76k
                encode_fixed64_le(reinterpret_cast<uint8_t*>(dst), v);
1924
8.76k
                dst += sizeof(uint64_t);
1925
8.76k
            }
1926
548
            break;
1927
19.1k
        case BITMAP:
1928
19.1k
            _bitmap->write(dst, config::bitmap_serialize_version);
1929
19.1k
            break;
1930
79.7k
        }
1931
79.7k
    }
1932
1933
    // Deserialize a bitmap value from `src`.
1934
    // Return false if `src` begins with unknown type code, true otherwise.
1935
71.4k
    bool deserialize(const char* src) {
1936
71.4k
        switch (*src) {
1937
60.0k
        case BitmapTypeCode::EMPTY:
1938
60.0k
            _type = EMPTY;
1939
60.0k
            break;
1940
15
        case BitmapTypeCode::SINGLE32:
1941
15
            _type = SINGLE;
1942
15
            _sv = decode_fixed32_le(reinterpret_cast<const uint8_t*>(src + 1));
1943
15
            if (config::enable_set_in_bitmap_value) {
1944
2
                _type = SET;
1945
2
                _set.insert(_sv);
1946
2
            }
1947
15
            break;
1948
3
        case BitmapTypeCode::SINGLE64:
1949
3
            _type = SINGLE;
1950
3
            _sv = decode_fixed64_le(reinterpret_cast<const uint8_t*>(src + 1));
1951
3
            if (config::enable_set_in_bitmap_value) {
1952
1
                _type = SET;
1953
1
                _set.insert(_sv);
1954
1
            }
1955
3
            break;
1956
5.12k
        case BitmapTypeCode::BITMAP32:
1957
5.13k
        case BitmapTypeCode::BITMAP64:
1958
11.0k
        case BitmapTypeCode::BITMAP32_V2:
1959
11.0k
        case BitmapTypeCode::BITMAP64_V2:
1960
11.0k
            _type = BITMAP;
1961
11.0k
            _is_shared = false;
1962
11.0k
            _bitmap = std::make_shared<detail::Roaring64Map>(detail::Roaring64Map::read(src));
1963
11.0k
            break;
1964
355
        case BitmapTypeCode::SET: {
1965
355
            _type = SET;
1966
355
            ++src;
1967
355
            uint8_t count = *src;
1968
355
            ++src;
1969
355
            CHECK(count <= SET_TYPE_THRESHOLD) << "bitmap value with incorrect set count";
1970
6.04k
            for (uint8_t i = 0; i != count; ++i, src += sizeof(uint64_t)) {
1971
5.68k
                _set.insert(decode_fixed64_le(reinterpret_cast<const uint8_t*>(src)));
1972
5.68k
            }
1973
355
            CHECK_EQ(count, _set.size()) << "bitmap value with incorrect set count";
1974
1975
355
            if (!config::enable_set_in_bitmap_value) {
1976
0
                _prepare_bitmap_for_write();
1977
0
                for (auto v : _set) {
1978
0
                    _bitmap->add(v);
1979
0
                }
1980
0
                _type = BITMAP;
1981
0
                _set.clear();
1982
0
            }
1983
355
            break;
1984
11.0k
        }
1985
1
        case BitmapTypeCode::SET_V2: {
1986
1
            uint32_t size = 0;
1987
1
            memcpy(&size, src + 1, sizeof(uint32_t));
1988
1
            src += sizeof(uint32_t) + 1;
1989
1990
1
            if (!config::enable_set_in_bitmap_value || size > SET_TYPE_THRESHOLD) {
1991
0
                _type = BITMAP;
1992
0
                _prepare_bitmap_for_write();
1993
1994
0
                for (int i = 0; i < size; ++i) {
1995
0
                    uint64_t key {};
1996
0
                    memcpy(&key, src, sizeof(uint64_t));
1997
0
                    _bitmap->add(key);
1998
0
                    src += sizeof(uint64_t);
1999
0
                }
2000
1
            } else {
2001
1
                _type = SET;
2002
1
                _set.reserve(size);
2003
2004
3
                for (int i = 0; i < size; ++i) {
2005
2
                    uint64_t key {};
2006
2
                    memcpy(&key, src, sizeof(uint64_t));
2007
2
                    _set.insert(key);
2008
2
                    src += sizeof(uint64_t);
2009
2
                }
2010
1
            }
2011
1
            break;
2012
11.0k
        }
2013
0
        default:
2014
0
            LOG(ERROR) << "BitmapTypeCode invalid, should between: " << BitmapTypeCode::EMPTY
2015
0
                       << " and " << BitmapTypeCode::BITMAP64 << " actual is "
2016
0
                       << static_cast<int>(*src);
2017
0
            return false;
2018
71.4k
        }
2019
71.4k
        return true;
2020
71.4k
    }
2021
2022
13
    int64_t minimum() const {
2023
13
        switch (_type) {
2024
2
        case SINGLE:
2025
2
            return _sv;
2026
4
        case BITMAP:
2027
4
            return _bitmap->minimum();
2028
5
        case SET:
2029
5
            return _min_in_set();
2030
2
        default:
2031
2
            return 0;
2032
13
        }
2033
13
    }
2034
2035
    // TODO limit string size to avoid OOM
2036
9.33k
    std::string to_string() const {
2037
9.33k
        std::stringstream ss;
2038
9.33k
        switch (_type) {
2039
16
        case EMPTY:
2040
16
            break;
2041
58
        case SINGLE:
2042
58
            ss << _sv;
2043
58
            break;
2044
8.73k
        case BITMAP: {
2045
8.73k
            struct IterCtx {
2046
8.73k
                std::stringstream* ss = nullptr;
2047
8.73k
                bool first = true;
2048
8.73k
            } iter_ctx;
2049
8.73k
            iter_ctx.ss = &ss;
2050
2051
8.73k
            _bitmap->iterate(
2052
4.23M
                    [](uint64_t value, void* c) -> bool {
2053
4.23M
                        auto ctx = reinterpret_cast<IterCtx*>(c);
2054
4.23M
                        if (ctx->first) {
2055
8.73k
                            ctx->first = false;
2056
4.22M
                        } else {
2057
4.22M
                            (*ctx->ss) << ",";
2058
4.22M
                        }
2059
4.23M
                        (*ctx->ss) << value;
2060
4.23M
                        return true;
2061
4.23M
                    },
2062
8.73k
                    &iter_ctx);
2063
8.73k
            break;
2064
0
        }
2065
530
        case SET: {
2066
530
            struct IterCtx {
2067
530
                std::stringstream* ss = nullptr;
2068
530
                bool first = true;
2069
530
            } iter_ctx;
2070
530
            iter_ctx.ss = &ss;
2071
2072
530
            std::vector<uint64_t> values(_set.begin(), _set.end());
2073
530
            std::sort(values.begin(), values.end());
2074
2075
8.31k
            for (auto v : values) {
2076
8.31k
                if (iter_ctx.first) {
2077
530
                    iter_ctx.first = false;
2078
7.78k
                } else {
2079
7.78k
                    (*iter_ctx.ss) << ",";
2080
7.78k
                }
2081
8.31k
                (*iter_ctx.ss) << v;
2082
8.31k
            }
2083
530
            break;
2084
0
        }
2085
9.33k
        }
2086
9.33k
        return ss.str();
2087
9.33k
    }
2088
2089
14
    int64_t maximum() const {
2090
14
        switch (_type) {
2091
2
        case SINGLE:
2092
2
            return _sv;
2093
5
        case BITMAP:
2094
5
            return _bitmap->maximum();
2095
5
        case SET:
2096
5
            return _max_in_set();
2097
2
        default:
2098
2
            return 0;
2099
14
        }
2100
14
    }
2101
2102
4
    uint64_t max(bool* empty) const {
2103
4
        return min_or_max(empty, [&]() { return maximum(); });
2104
4
    }
2105
2106
4
    uint64_t min(bool* empty) const {
2107
4
        return min_or_max(empty, [&]() { return minimum(); });
2108
4
    }
2109
2110
5
    uint64_t _min_in_set() const {
2111
5
        DCHECK_EQ(_type, SET);
2112
5
        return *std::min_element(_set.begin(), _set.end());
2113
5
    }
2114
2115
5
    uint64_t _max_in_set() const {
2116
5
        DCHECK_EQ(_type, SET);
2117
5
        return *std::max_element(_set.begin(), _set.end());
2118
5
    }
2119
2120
6
    bool empty() const { return _type == EMPTY; }
2121
2122
    /**
2123
     * Return new set with specified range (not include the range_end)
2124
     */
2125
    int64_t sub_range(const int64_t& range_start, const int64_t& range_end,
2126
7
                      BitmapValue* ret_bitmap) {
2127
7
        switch (_type) {
2128
1
        case EMPTY:
2129
1
            return 0;
2130
2
        case SINGLE: {
2131
            //only single value, so _sv must in [range_start,range_end)
2132
2
            if (range_start <= _sv && _sv < range_end) {
2133
1
                ret_bitmap->add(_sv);
2134
1
                return 1;
2135
1
            } else {
2136
1
                return 0;
2137
1
            }
2138
2
        }
2139
2
        case BITMAP: {
2140
2
            int64_t count = 0;
2141
146
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2142
145
                if (*it < range_start) {
2143
0
                    continue;
2144
0
                }
2145
145
                if (*it < range_end) {
2146
144
                    ret_bitmap->add(*it);
2147
144
                    ++count;
2148
144
                } else {
2149
1
                    break;
2150
1
                }
2151
145
            }
2152
2
            return count;
2153
2
        }
2154
2
        case SET: {
2155
2
            int64_t count = 0;
2156
2
            std::vector<uint64_t> values(_set.begin(), _set.end());
2157
2
            std::sort(values.begin(), values.end());
2158
6
            for (auto it = values.begin(); it != values.end(); ++it) {
2159
4
                if (*it < range_start || *it >= range_end) {
2160
1
                    continue;
2161
1
                }
2162
3
                ret_bitmap->add(*it);
2163
3
                ++count;
2164
3
            }
2165
2
            return count;
2166
2
        }
2167
7
        }
2168
0
        return 0;
2169
7
    }
2170
2171
    /**
2172
     * Return new set with specified start and limit
2173
     * @param range_start the start value for the range
2174
     * @param cardinality_limit the length of the subset
2175
     * @return the real count for subset, maybe less than cardinality_limit
2176
     */
2177
    int64_t sub_limit(const int64_t& range_start, const int64_t& cardinality_limit,
2178
9
                      BitmapValue* ret_bitmap) {
2179
9
        switch (_type) {
2180
1
        case EMPTY:
2181
1
            return 0;
2182
2
        case SINGLE: {
2183
            //only single value, so range_start must less than _sv
2184
2
            if (range_start > _sv) {
2185
0
                return 0;
2186
2
            } else {
2187
2
                ret_bitmap->add(_sv);
2188
2
                return 1;
2189
2
            }
2190
2
        }
2191
4
        case BITMAP: {
2192
4
            int64_t count = 0;
2193
31
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2194
30
                if (*it < range_start) {
2195
8
                    continue;
2196
8
                }
2197
22
                if (count < cardinality_limit) {
2198
19
                    ret_bitmap->add(*it);
2199
19
                    ++count;
2200
19
                } else {
2201
3
                    break;
2202
3
                }
2203
22
            }
2204
4
            return count;
2205
2
        }
2206
2
        case SET: {
2207
2
            int64_t count = 0;
2208
2209
2
            std::vector<uint64_t> values(_set.begin(), _set.end());
2210
2
            std::sort(values.begin(), values.end());
2211
6
            for (auto it = values.begin(); it != values.end(); ++it) {
2212
4
                if (*it < range_start) {
2213
0
                    continue;
2214
0
                }
2215
4
                if (count < cardinality_limit) {
2216
4
                    ret_bitmap->add(*it);
2217
4
                    ++count;
2218
4
                } else {
2219
0
                    break;
2220
0
                }
2221
4
            }
2222
2
            return count;
2223
2
        }
2224
9
        }
2225
0
        return 0;
2226
9
    }
2227
2228
    /**
2229
     * Returns the bitmap elements, starting from the offset position.
2230
     * The number of returned elements is limited by the cardinality_limit parameter.
2231
     * Analog of the substring string function, but for bitmap.
2232
     */
2233
8
    int64_t offset_limit(const int64_t& offset, const int64_t& limit, BitmapValue* ret_bitmap) {
2234
8
        switch (_type) {
2235
1
        case EMPTY:
2236
1
            return 0;
2237
2
        case SINGLE: {
2238
            //only single value, so offset must start 0
2239
2
            if (offset == 0) {
2240
1
                ret_bitmap->add(_sv);
2241
1
                return 1;
2242
1
            } else {
2243
1
                return 0;
2244
1
            }
2245
2
        }
2246
5
        default:
2247
5
            break;
2248
8
        }
2249
5
        if (_type == BITMAP) {
2250
3
            if (std::abs(offset) >= _bitmap->cardinality()) {
2251
0
                return 0;
2252
0
            }
2253
3
            int64_t abs_offset = offset;
2254
3
            if (offset < 0) {
2255
0
                abs_offset = _bitmap->cardinality() + offset;
2256
0
            }
2257
2258
3
            int64_t count = 0;
2259
3
            int64_t offset_count = 0;
2260
3
            auto it = _bitmap->begin();
2261
123
            for (; it != _bitmap->end() && offset_count < abs_offset; ++it) {
2262
120
                ++offset_count;
2263
120
            }
2264
33
            for (; it != _bitmap->end() && count < limit; ++it, ++count) {
2265
30
                ret_bitmap->add(*it);
2266
30
            }
2267
3
            return count;
2268
3
        } else {
2269
2
            if (std::abs(offset) > _set.size()) {
2270
0
                return 0;
2271
0
            }
2272
2273
2
            int64_t abs_offset = offset;
2274
2
            if (offset < 0) {
2275
0
                abs_offset = _set.size() + offset;
2276
0
            }
2277
2278
2
            std::vector<uint64_t> values(_set.begin(), _set.end());
2279
2
            std::sort(values.begin(), values.end());
2280
2281
2
            int64_t count = 0;
2282
2
            size_t index = 0;
2283
4
            for (auto v : values) {
2284
4
                if (index < abs_offset) {
2285
1
                    ++index;
2286
1
                    continue;
2287
1
                }
2288
3
                if (count == limit || index == values.size()) {
2289
0
                    break;
2290
0
                }
2291
3
                ++count;
2292
3
                ++index;
2293
3
                ret_bitmap->add(v);
2294
3
            }
2295
2
            return count;
2296
2
        }
2297
5
    }
2298
2299
    //for function bitmap_to_array
2300
3
    void to_array(vectorized::PaddedPODArray<int64_t>& data) const {
2301
3
        switch (_type) {
2302
0
        case EMPTY:
2303
0
            break;
2304
1
        case SINGLE: {
2305
1
            data.emplace_back(_sv);
2306
1
            break;
2307
0
        }
2308
1
        case BITMAP: {
2309
129
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2310
128
                data.emplace_back(*it);
2311
128
            }
2312
1
            break;
2313
0
        }
2314
1
        case SET: {
2315
1
            std::vector<uint64_t> values(_set.begin(), _set.end());
2316
1
            std::sort(values.begin(), values.end());
2317
31
            for (auto v : values) {
2318
31
                data.emplace_back(v);
2319
31
            }
2320
1
            break;
2321
0
        }
2322
3
        }
2323
3
    }
2324
2325
153
    void reset() {
2326
153
        _type = EMPTY;
2327
153
        _sv = 0;
2328
153
        _set.clear();
2329
153
        _is_shared = false;
2330
153
        _bitmap = nullptr;
2331
153
    }
2332
    // Implement an iterator for convenience
2333
    friend class BitmapValueIterator;
2334
    typedef BitmapValueIterator b_iterator;
2335
2336
    b_iterator begin() const;
2337
    b_iterator end() const;
2338
    b_iterator lower_bound(uint64_t val) const;
2339
2340
private:
2341
44
    void _convert_to_smaller_type() {
2342
44
        if (_type == BITMAP) {
2343
16
            uint64_t c = _bitmap->cardinality();
2344
16
            if (config::enable_set_in_bitmap_value && c > SET_TYPE_THRESHOLD) {
2345
7
                return;
2346
9
            } else if (c > 1) {
2347
7
                return;
2348
7
            }
2349
2
            if (c == 0) {
2350
1
                _type = EMPTY;
2351
1
            } else if (c == 1 && !config::enable_set_in_bitmap_value) {
2352
1
                _type = SINGLE;
2353
1
                _sv = _bitmap->minimum();
2354
1
            } else {
2355
0
                _type = SET;
2356
0
                for (auto v : *_bitmap) {
2357
0
                    _set.insert(v);
2358
0
                }
2359
0
            }
2360
2
            _bitmap.reset();
2361
28
        } else if (_type == SET) {
2362
28
            if (_set.size() == 1 && !config::enable_set_in_bitmap_value) {
2363
0
                _type = SINGLE;
2364
0
                _sv = *_set.begin();
2365
0
                _set.clear();
2366
0
            }
2367
28
        }
2368
44
    }
2369
2370
8
    uint64_t min_or_max(bool* empty, std::function<uint64_t()> func) const {
2371
8
        bool is_empty = false;
2372
8
        uint64_t result = 0;
2373
8
        switch (_type) {
2374
0
        case SINGLE:
2375
0
            result = _sv;
2376
0
            break;
2377
2
        case BITMAP:
2378
6
        case SET:
2379
6
            result = func();
2380
6
            break;
2381
2
        default:
2382
2
            is_empty = true;
2383
8
        }
2384
8
        if (empty) {
2385
8
            *empty = is_empty;
2386
8
        }
2387
8
        return result;
2388
8
    }
2389
2390
4.62M
    void _prepare_bitmap_for_write() {
2391
4.62M
        if (!_bitmap) {
2392
9.28k
            _bitmap = std::make_shared<detail::Roaring64Map>();
2393
9.28k
            _is_shared = false;
2394
9.28k
            return;
2395
9.28k
        }
2396
2397
4.61M
        if (!_is_shared) {
2398
            // the state is not shared, not need to check use count any more
2399
4.60M
            return;
2400
4.60M
        }
2401
2402
9.17k
        if (_bitmap.use_count() > 1) {
2403
1.02k
            auto new_one = std::make_shared<detail::Roaring64Map>();
2404
1.02k
            *new_one = *_bitmap;
2405
1.02k
            _bitmap = new_one;
2406
1.02k
        }
2407
9.17k
        _is_shared = false;
2408
9.17k
    }
2409
2410
130k
    void _convert_to_bitmap_if_need() {
2411
130k
        if (_type != SET || _set.size() <= SET_TYPE_THRESHOLD) {
2412
126k
            return;
2413
126k
        }
2414
3.97k
        _prepare_bitmap_for_write();
2415
133k
        for (auto v : _set) {
2416
133k
            _bitmap->add(v);
2417
133k
        }
2418
3.97k
        _type = BITMAP;
2419
3.97k
        _set.clear();
2420
3.97k
    }
2421
2422
    enum BitmapDataType {
2423
        EMPTY = 0,
2424
        SINGLE = 1, // single element
2425
        BITMAP = 2, // more than one elements
2426
        SET = 3     // elements count less or equal than 32
2427
    };
2428
    uint64_t _sv = 0;                              // store the single value when _type == SINGLE
2429
    std::shared_ptr<detail::Roaring64Map> _bitmap; // used when _type == BITMAP
2430
    SetContainer<uint64_t> _set;
2431
    BitmapDataType _type {EMPTY};
2432
    // Indicate whether the state is shared among multi BitmapValue object
2433
    bool _is_shared = true;
2434
    static constexpr uint64_t SET_TYPE_THRESHOLD = 32;
2435
};
2436
2437
// A simple implement of bitmap value iterator(Read only)
2438
// Usage:
2439
//  BitmapValueIterator iter = bitmap_value.begin();
2440
//  BitmapValueIterator end = bitmap_value.end();
2441
//  for (; iter != end(); ++iter) {
2442
//      uint64_t v = *iter;
2443
//      ... do something with "v" ...
2444
//  }
2445
class BitmapValueIterator {
2446
public:
2447
37
    BitmapValueIterator(const BitmapValue& bitmap, bool end = false) : _bitmap(bitmap), _end(end) {
2448
37
        switch (_bitmap._type) {
2449
8
        case BitmapValue::BitmapDataType::EMPTY:
2450
8
            _end = true;
2451
8
            break;
2452
8
        case BitmapValue::BitmapDataType::SINGLE:
2453
8
            _sv = _bitmap._sv;
2454
8
            break;
2455
13
        case BitmapValue::BitmapDataType::BITMAP:
2456
13
            _iter = new detail::Roaring64MapSetBitForwardIterator(*_bitmap._bitmap, _end);
2457
13
            break;
2458
8
        case BitmapValue::BitmapDataType::SET:
2459
8
            _set_iter = _end ? _bitmap._set.end() : _bitmap._set.begin();
2460
8
            break;
2461
0
        default:
2462
0
            CHECK(false) << _bitmap._type;
2463
37
        }
2464
37
    }
2465
2466
    BitmapValueIterator(const BitmapValueIterator& other)
2467
168
            : _bitmap(other._bitmap), _sv(other._sv), _end(other._end) {
2468
168
        _iter = other._iter ? new detail::Roaring64MapSetBitForwardIterator(*other._iter) : nullptr;
2469
168
        if (_bitmap._type == BitmapValue::BitmapDataType::SET) {
2470
33
            _set_iter = other._set_iter;
2471
33
        }
2472
168
    }
2473
2474
205
    ~BitmapValueIterator() {
2475
205
        if (_iter != nullptr) {
2476
143
            delete _iter;
2477
143
            _iter = nullptr;
2478
143
        }
2479
205
    }
2480
2481
646
    uint64_t operator*() const {
2482
646
        CHECK(!_end) << "should not get value of end iterator";
2483
646
        switch (_bitmap._type) {
2484
4
        case BitmapValue::BitmapDataType::SINGLE:
2485
4
            return _sv;
2486
517
        case BitmapValue::BitmapDataType::BITMAP:
2487
517
            return *(*_iter);
2488
125
        case BitmapValue::BitmapDataType::SET: {
2489
125
            return *_set_iter;
2490
0
        }
2491
0
        default:
2492
0
            CHECK(false) << _bitmap._type;
2493
646
        }
2494
0
        return 0;
2495
646
    }
2496
2497
486
    BitmapValueIterator& operator++() { // ++i, must returned inc. value
2498
486
        CHECK(!_end) << "should not forward when iterator ends";
2499
486
        switch (_bitmap._type) {
2500
3
        case BitmapValue::BitmapDataType::SINGLE:
2501
3
            _end = true;
2502
3
            break;
2503
389
        case BitmapValue::BitmapDataType::BITMAP:
2504
389
            ++(*_iter);
2505
389
            break;
2506
94
        case BitmapValue::BitmapDataType::SET:
2507
94
            ++_set_iter;
2508
94
            break;
2509
0
        default:
2510
0
            CHECK(false) << _bitmap._type;
2511
486
        }
2512
486
        return *this;
2513
486
    }
2514
2515
160
    BitmapValueIterator operator++(int) { // i++, must return orig. value
2516
160
        CHECK(!_end) << "should not forward when iterator ends";
2517
160
        BitmapValueIterator orig(*this);
2518
160
        switch (_bitmap._type) {
2519
1
        case BitmapValue::BitmapDataType::SINGLE:
2520
1
            _end = true;
2521
1
            break;
2522
128
        case BitmapValue::BitmapDataType::BITMAP:
2523
128
            ++(*_iter);
2524
128
            break;
2525
31
        case BitmapValue::BitmapDataType::SET:
2526
31
            ++_set_iter;
2527
31
            break;
2528
0
        default:
2529
0
            CHECK(false) << _bitmap._type;
2530
160
        }
2531
160
        return orig;
2532
160
    }
2533
2534
674
    bool operator==(const BitmapValueIterator& other) const {
2535
674
        if (_end && other._end) {
2536
13
            return true;
2537
13
        }
2538
2539
661
        switch (_bitmap._type) {
2540
0
        case BitmapValue::BitmapDataType::EMPTY:
2541
0
            return other._bitmap._type == BitmapValue::BitmapDataType::EMPTY;
2542
5
        case BitmapValue::BitmapDataType::SINGLE:
2543
5
            return _end == other._end && _sv == other._sv;
2544
524
        case BitmapValue::BitmapDataType::BITMAP:
2545
524
            return *_iter == *(other._iter);
2546
132
        case BitmapValue::BitmapDataType::SET:
2547
132
            return _set_iter == other._set_iter;
2548
0
        default:
2549
0
            CHECK(false) << _bitmap._type;
2550
661
        }
2551
0
        return false;
2552
661
    }
2553
2554
664
    bool operator!=(const BitmapValueIterator& other) const { return !(*this == other); }
2555
2556
    /**
2557
    * Move the iterator to the first value >= `val`.
2558
    */
2559
0
    BitmapValueIterator& move(uint64_t val) {
2560
0
        switch (_bitmap._type) {
2561
0
        case BitmapValue::BitmapDataType::SINGLE:
2562
0
            if (_sv < val) {
2563
0
                _end = true;
2564
0
            }
2565
0
            break;
2566
0
        case BitmapValue::BitmapDataType::BITMAP:
2567
0
            if (!_iter->move(val)) {
2568
0
                _end = true;
2569
0
            }
2570
0
            break;
2571
0
        case BitmapValue::BitmapDataType::SET: {
2572
0
            throw Exception(Status::FatalError("BitmapValue with set do not support move"));
2573
0
        }
2574
0
        default:
2575
0
            break;
2576
0
        }
2577
0
        return *this;
2578
0
    }
2579
2580
private:
2581
    const BitmapValue& _bitmap;
2582
    detail::Roaring64MapSetBitForwardIterator* _iter = nullptr;
2583
    BitmapValue::SetContainer<uint64_t>::const_iterator _set_iter;
2584
    uint64_t _sv = 0;
2585
    bool _end = false;
2586
};
2587
2588
16
inline BitmapValueIterator BitmapValue::begin() const {
2589
16
    return {*this};
2590
16
}
2591
2592
21
inline BitmapValueIterator BitmapValue::end() const {
2593
21
    return {*this, true};
2594
21
}
2595
2596
0
inline BitmapValueIterator BitmapValue::lower_bound(uint64_t val) const {
2597
0
    return BitmapValueIterator(*this).move(val);
2598
0
}
2599
2600
7
inline bool BitmapValue::contains_any(uint64_t left, uint64_t right) const {
2601
7
    if (left > right) {
2602
0
        return false;
2603
0
    }
2604
2605
7
    if (_type == SET) {
2606
25
        for (auto v : _set) {
2607
25
            if (v >= left && v <= right) {
2608
4
                return true;
2609
4
            }
2610
25
        }
2611
3
        return false;
2612
7
    }
2613
0
    auto it = lower_bound(left);
2614
0
    return it != end() && *it <= right;
2615
7
}
2616
2617
} // namespace doris