Coverage Report

Created: 2025-10-24 13:16

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/util/bitmap_value.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <parallel_hashmap/btree.h>
21
#include <parallel_hashmap/phmap.h>
22
23
#include <algorithm>
24
#include <cstdarg>
25
#include <cstdint>
26
#include <cstdio>
27
#include <limits>
28
#include <map>
29
#include <memory>
30
#include <new>
31
#include <numeric>
32
#include <queue>
33
#include <roaring/roaring.hh>
34
#include <set>
35
#include <stdexcept>
36
#include <string>
37
#include <utility>
38
39
#include "common/config.h"
40
#include "common/exception.h"
41
#include "common/logging.h"
42
#include "udf/udf.h"
43
#include "util/coding.h"
44
#include "vec/common/pod_array.h"
45
#include "vec/common/pod_array_fwd.h"
46
namespace doris {
47
48
// serialized bitmap := TypeCode(1), Payload
49
// The format of payload depends on value of TypeCode which is defined below
50
struct BitmapTypeCode {
51
    enum type {
52
        // An empty bitmap. Payload is 0 byte.
53
        // added in 0.11
54
        EMPTY = 0,
55
        // A bitmap containing only one element that is in [0, UINT32_MAX]
56
        // Payload := UInt32LittleEndian(4 byte)
57
        // added in 0.11
58
        SINGLE32 = 1,
59
        // A bitmap whose maximum element is in [0, UINT32_MAX]
60
        // Payload := the standard RoaringBitmap format described by
61
        // https://github.com/RoaringBitmap/RoaringFormatSpec/
62
        // added in 0.11
63
        BITMAP32 = 2,
64
        // A bitmap containing only one element that is in (UINT32_MAX, UINT64_MAX]
65
        // Payload := UInt64LittleEndian(8 byte)
66
        // added in 0.12
67
        SINGLE64 = 3,
68
        // A bitmap whose maximum element is in (UINT32_MAX, UINT64_MAX].
69
        //
70
        // To support 64-bits elements, all elements with the same high 32 bits are stored in a
71
        // RoaringBitmap containing only the lower 32 bits. Thus we could use
72
        // map<uint32_t, RoaringBitmap> to represent bitmap of 64-bits ints.
73
        //
74
        // Since there is no standard format for 64-bits RoaringBitmap, we define our own as below
75
        // Payload := NumRoaring(vint64), { MapKey, MapValue }^NumRoaring
76
        // - MapKey := the shared high 32 bits in UInt32LittleEndian(4 byte)
77
        // - MapValue := the standard RoaringBitmap format
78
        //
79
        // added in 0.12
80
        BITMAP64 = 4,
81
        SET = 5, // V1
82
        SET_V2 = 10,
83
        BITMAP32_V2 = 12,
84
        BITMAP64_V2 = 13,
85
        TYPE_MAX
86
    };
87
1
    Status static inline validate(int bitmap_type) {
88
1
        if (UNLIKELY(bitmap_type < type::EMPTY || bitmap_type >= type::TYPE_MAX)) {
89
1
            std::string err_msg =
90
1
                    fmt::format("BitmapTypeCode invalid, should between: {} and {} actrual is {}",
91
1
                                BitmapTypeCode::EMPTY, BitmapTypeCode::BITMAP64, bitmap_type);
92
1
            LOG(ERROR) << err_msg;
93
1
            return Status::Corruption(err_msg);
94
1
        }
95
0
        return Status::OK();
96
1
    }
97
};
98
99
namespace detail {
100
101
class Roaring64MapSetBitForwardIterator;
102
103
// Forked from https://github.com/RoaringBitmap/CRoaring/blob/v0.2.60/cpp/roaring64map.hh
104
// What we change includes
105
// - a custom serialization format is used inside read()/write()/getSizeInBytes()
106
// - added clear() and is32BitsEnough()
107
class Roaring64Map {
108
public:
109
    /**
110
     * Create an empty bitmap
111
     */
112
18.4k
    Roaring64Map() = default;
113
114
    /**
115
     * Construct a bitmap from a list of 32-bit integer values.
116
     */
117
2
    Roaring64Map(size_t n, const uint32_t* data) { addMany(n, data); }
118
119
    /**
120
     * Construct a bitmap from a list of 64-bit integer values.
121
     */
122
1
    Roaring64Map(size_t n, const uint64_t* data) { addMany(n, data); }
123
124
    /**
125
     * Construct a 64-bit map from a 32-bit one
126
     */
127
1
    explicit Roaring64Map(const roaring::Roaring& r) { emplaceOrInsert(0, r); }
128
129
9
    Roaring64Map(const Roaring64Map& r) = default;
130
131
9.10k
    Roaring64Map(Roaring64Map&& r) = default;
132
133
    /**
134
     * Assignment operator.
135
     */
136
1.05k
    Roaring64Map& operator=(const Roaring64Map& r) {
137
1.05k
        roarings = r.roarings;
138
1.05k
        return *this;
139
1.05k
    }
140
141
    /**
142
     * Add value x
143
     *
144
     */
145
3
    void add(uint32_t x) {
146
3
        roarings[0].add(x);
147
3
        roarings[0].setCopyOnWrite(copyOnWrite);
148
3
    }
149
4.20M
    void add(uint64_t x) {
150
4.20M
        roarings[highBytes(x)].add(lowBytes(x));
151
4.20M
        roarings[highBytes(x)].setCopyOnWrite(copyOnWrite);
152
4.20M
    }
153
154
    template <typename T>
155
9
    void addMany(size_t n_args, const T* vals) {
156
9
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
157
8
            auto& roaring = roarings[0];
158
8
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
159
8
            roaring.setCopyOnWrite(copyOnWrite);
160
8
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
161
1
            auto& roaring = roarings[0];
162
1
            std::vector<uint32_t> values(n_args);
163
8
            for (size_t i = 0; i != n_args; ++i) {
164
7
                values[i] = uint32_t(vals[i]);
165
7
            }
166
1
            roaring.addMany(n_args, values.data());
167
1
            roaring.setCopyOnWrite(copyOnWrite);
168
1
        } else {
169
0
            for (size_t lcv = 0; lcv < n_args; lcv++) {
170
0
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
171
0
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
172
0
            }
173
0
        }
174
9
    }
_ZN5doris6detail12Roaring64Map7addManyIjEEvmPKT_
Line
Count
Source
155
8
    void addMany(size_t n_args, const T* vals) {
156
8
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
157
8
            auto& roaring = roarings[0];
158
8
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
159
8
            roaring.setCopyOnWrite(copyOnWrite);
160
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
161
            auto& roaring = roarings[0];
162
            std::vector<uint32_t> values(n_args);
163
            for (size_t i = 0; i != n_args; ++i) {
164
                values[i] = uint32_t(vals[i]);
165
            }
166
            roaring.addMany(n_args, values.data());
167
            roaring.setCopyOnWrite(copyOnWrite);
168
        } else {
169
            for (size_t lcv = 0; lcv < n_args; lcv++) {
170
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
171
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
172
            }
173
        }
174
8
    }
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIiEEvmPKT_
_ZN5doris6detail12Roaring64Map7addManyItEEvmPKT_
Line
Count
Source
155
1
    void addMany(size_t n_args, const T* vals) {
156
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
157
            auto& roaring = roarings[0];
158
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
159
            roaring.setCopyOnWrite(copyOnWrite);
160
1
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
161
1
            auto& roaring = roarings[0];
162
1
            std::vector<uint32_t> values(n_args);
163
8
            for (size_t i = 0; i != n_args; ++i) {
164
7
                values[i] = uint32_t(vals[i]);
165
7
            }
166
1
            roaring.addMany(n_args, values.data());
167
1
            roaring.setCopyOnWrite(copyOnWrite);
168
        } else {
169
            for (size_t lcv = 0; lcv < n_args; lcv++) {
170
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
171
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
172
            }
173
        }
174
1
    }
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIaEEvmPKT_
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIsEEvmPKT_
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIlEEvmPKT_
175
176
40
    void addMany(size_t n_args, const uint64_t* vals) {
177
1.25k
        for (size_t lcv = 0; lcv < n_args; lcv++) {
178
1.21k
            roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
179
1.21k
            roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
180
1.21k
        }
181
40
    }
182
183
    /**
184
     * Remove value x
185
     *
186
     */
187
4
    void remove(uint32_t x) { roarings[0].remove(x); }
188
23
    void remove(uint64_t x) {
189
23
        auto roaring_iter = roarings.find(highBytes(x));
190
23
        if (roaring_iter != roarings.cend()) {
191
23
            roaring_iter->second.remove(lowBytes(x));
192
23
        }
193
23
    }
194
    /**
195
     * Return the largest value (if not empty)
196
     *
197
     */
198
50.1k
    uint64_t maximum() const {
199
50.1k
        for (auto roaring_iter = roarings.crbegin(); roaring_iter != roarings.crend();
200
50.1k
             ++roaring_iter) {
201
50.1k
            if (!roaring_iter->second.isEmpty()) {
202
50.1k
                return uniteBytes(roaring_iter->first, roaring_iter->second.maximum());
203
50.1k
            }
204
50.1k
        }
205
        // we put std::numeric_limits<>::max/min in parenthesis
206
        // to avoid a clash with the Windows.h header under Windows
207
5
        return (std::numeric_limits<uint64_t>::min)();
208
50.1k
    }
209
210
    /**
211
     * Return the smallest value (if not empty)
212
     *
213
     */
214
7
    uint64_t minimum() const {
215
7
        for (auto roaring_iter = roarings.cbegin(); roaring_iter != roarings.cend();
216
7
             ++roaring_iter) {
217
6
            if (!roaring_iter->second.isEmpty()) {
218
6
                return uniteBytes(roaring_iter->first, roaring_iter->second.minimum());
219
6
            }
220
6
        }
221
        // we put std::numeric_limits<>::max/min in parenthesis
222
        // to avoid a clash with the Windows.h header under Windows
223
1
        return (std::numeric_limits<uint64_t>::max)();
224
7
    }
225
226
    /**
227
     * Check if value x is present
228
     */
229
11
    bool contains(uint32_t x) const {
230
11
        return roarings.count(0) == 0 ? false : roarings.at(0).contains(x);
231
11
    }
232
9.24k
    bool contains(uint64_t x) const {
233
9.24k
        return roarings.count(highBytes(x)) == 0 ? false
234
9.24k
                                                 : roarings.at(highBytes(x)).contains(lowBytes(x));
235
9.24k
    }
236
237
    /**
238
     * Compute the intersection between the current bitmap and the provided
239
     * bitmap,
240
     * writing the result in the current bitmap. The provided bitmap is not
241
     * modified.
242
     */
243
6
    Roaring64Map& operator&=(const Roaring64Map& r) {
244
11
        for (auto& map_entry : roarings) {
245
11
            if (r.roarings.count(map_entry.first) == 1) {
246
10
                map_entry.second &= r.roarings.at(map_entry.first);
247
10
            } else {
248
1
                map_entry.second = roaring::Roaring();
249
1
            }
250
11
        }
251
6
        return *this;
252
6
    }
253
254
    /**
255
     * Compute the difference between the current bitmap and the provided
256
     * bitmap,
257
     * writing the result in the current bitmap. The provided bitmap is not
258
     * modified.
259
     */
260
1
    Roaring64Map& operator-=(const Roaring64Map& r) {
261
4
        for (auto& map_entry : roarings) {
262
4
            if (r.roarings.count(map_entry.first) == 1) {
263
4
                map_entry.second -= r.roarings.at(map_entry.first);
264
4
            }
265
4
        }
266
1
        return *this;
267
1
    }
268
269
    /**
270
     * Compute the union between the current bitmap and the provided bitmap,
271
     * writing the result in the current bitmap. The provided bitmap is not
272
     * modified.
273
     *
274
     * See also the fastunion function to aggregate many bitmaps more quickly.
275
     */
276
5
    Roaring64Map& operator|=(const Roaring64Map& r) {
277
11
        for (const auto& map_entry : r.roarings) {
278
11
            if (roarings.count(map_entry.first) == 0) {
279
2
                roarings[map_entry.first] = map_entry.second;
280
2
                roarings[map_entry.first].setCopyOnWrite(copyOnWrite);
281
9
            } else {
282
9
                roarings[map_entry.first] |= map_entry.second;
283
9
            }
284
11
        }
285
5
        return *this;
286
5
    }
287
288
    /**
289
     * Compute the symmetric union between the current bitmap and the provided
290
     * bitmap,
291
     * writing the result in the current bitmap. The provided bitmap is not
292
     * modified.
293
     */
294
4
    Roaring64Map& operator^=(const Roaring64Map& r) {
295
7
        for (const auto& map_entry : r.roarings) {
296
7
            if (roarings.count(map_entry.first) == 0) {
297
0
                roarings[map_entry.first] = map_entry.second;
298
0
                roarings[map_entry.first].setCopyOnWrite(copyOnWrite);
299
7
            } else {
300
7
                roarings[map_entry.first] ^= map_entry.second;
301
7
            }
302
7
        }
303
4
        return *this;
304
4
    }
305
306
    /**
307
     * Exchange the content of this bitmap with another.
308
     */
309
1
    void swap(Roaring64Map& r) { roarings.swap(r.roarings); }
310
311
    /**
312
     * Get the cardinality of the bitmap (number of elements).
313
     * Throws std::length_error in the special case where the bitmap is full
314
     * (cardinality() == 2^64). Check isFull() before calling to avoid
315
     * exception.
316
     */
317
246
    uint64_t cardinality() const {
318
246
        if (isFull()) {
319
0
            throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
320
0
                                   "bitmap is full, cardinality is 2^64, "
321
0
                                   "unable to represent in a 64-bit integer");
322
0
        }
323
246
        return std::accumulate(roarings.cbegin(), roarings.cend(), (uint64_t)0,
324
246
                               [](uint64_t previous,
325
418
                                  const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
326
418
                                   return previous + map_entry.second.cardinality();
327
418
                               });
328
246
    }
329
    /**
330
     * Computes the size of the intersection between two bitmaps.
331
     *
332
     */
333
6
    uint64_t andCardinality(const Roaring64Map& r) const {
334
6
        uint64_t card = 0;
335
10
        for (const auto& map_entry : roarings) {
336
10
            if (r.roarings.count(map_entry.first) == 1) {
337
9
                card += map_entry.second.and_cardinality(r.roarings.at(map_entry.first));
338
9
            }
339
10
        }
340
6
        return card;
341
6
    }
342
343
    /**
344
     * Computes the size of the union between two bitmaps.
345
     *
346
     */
347
4
    uint64_t orCardinality(const Roaring64Map& r) const {
348
4
        uint64_t card = 0;
349
4
        for (const auto& map_entry : roarings) {
350
4
            if (r.roarings.count(map_entry.first) == 0) {
351
0
                card += map_entry.second.cardinality();
352
0
            }
353
4
        }
354
4
        for (const auto& map_entry : r.roarings) {
355
4
            if (roarings.count(map_entry.first) == 0) {
356
0
                card += map_entry.second.cardinality();
357
4
            } else {
358
4
                card += roarings.at(map_entry.first).or_cardinality(map_entry.second);
359
4
            }
360
4
        }
361
4
        return card;
362
4
    }
363
364
    /**
365
     * Computes the size of the difference (andnot) between two bitmaps.
366
     * r1.cardinality - (r1 & r2).cardinality
367
     */
368
1
    uint64_t andnotCardinality(const Roaring64Map& r) const {
369
1
        uint64_t card = 0;
370
4
        for (const auto& map_entry : roarings) {
371
4
            card += map_entry.second.cardinality();
372
4
            if (r.roarings.count(map_entry.first) == 1) {
373
4
                card -= r.roarings.at(map_entry.first).and_cardinality(map_entry.second);
374
4
            }
375
4
        }
376
1
        return card;
377
1
    }
378
379
    /**
380
     * Computes the size of the symmetric difference (andnot) between two
381
     * bitmaps.
382
     * r1.cardinality + r2.cardinality - 2 * (r1 & r2).cardinality
383
     */
384
1
    uint64_t xorCardinality(const Roaring64Map& r) const {
385
1
        uint64_t card = 0;
386
1
        for (const auto& map_entry : roarings) {
387
1
            card += map_entry.second.cardinality();
388
1
        }
389
1
        for (const auto& map_entry : r.roarings) {
390
1
            card += map_entry.second.cardinality();
391
1
            if (roarings.count(map_entry.first) == 1) {
392
1
                card -= 2 * roarings.at(map_entry.first).and_cardinality(map_entry.second);
393
1
            }
394
1
        }
395
1
        return card;
396
1
    }
397
398
    /**
399
    * Returns true if the bitmap is empty (cardinality is zero).
400
    */
401
1
    bool isEmpty() const {
402
1
        return std::all_of(roarings.cbegin(), roarings.cend(),
403
1
                           [](const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
404
0
                               return map_entry.second.isEmpty();
405
0
                           });
406
1
    }
407
408
    /**
409
    * Returns true if the bitmap is full (cardinality is max uint64_t + 1).
410
    */
411
246
    bool isFull() const {
412
        // only bother to check if map is fully saturated
413
        //
414
        // we put std::numeric_limits<>::max/min in parenthesis
415
        // to avoid a clash with the Windows.h header under Windows
416
246
        return roarings.size() == ((uint64_t)(std::numeric_limits<uint32_t>::max)()) + 1
417
246
                       ? std::all_of(roarings.cbegin(), roarings.cend(),
418
0
                                     [](const std::pair<const uint32_t, roaring::Roaring>&
419
0
                                                roaring_map_entry) {
420
                                         // roarings within map are saturated if cardinality
421
                                         // is uint32_t max + 1
422
0
                                         return roaring_map_entry.second.cardinality() ==
423
0
                                                ((uint64_t)(std::numeric_limits<uint32_t>::max)()) +
424
0
                                                        1;
425
0
                                     })
426
246
                       : false;
427
246
    }
428
429
    /**
430
     * Return true if the two bitmaps contain the same elements.
431
     */
432
14
    bool operator==(const Roaring64Map& r) const {
433
        // we cannot use operator == on the map because either side may contain
434
        // empty Roaring Bitmaps
435
14
        auto lhs_iter = roarings.cbegin();
436
14
        auto lhs_cend = roarings.cend();
437
14
        auto rhs_iter = r.roarings.cbegin();
438
14
        auto rhs_cend = r.roarings.cend();
439
27
        while (lhs_iter != lhs_cend && rhs_iter != rhs_cend) {
440
13
            auto lhs_key = lhs_iter->first;
441
13
            auto rhs_key = rhs_iter->first;
442
13
            const auto& lhs_map = lhs_iter->second;
443
13
            const auto& rhs_map = rhs_iter->second;
444
13
            if (lhs_map.isEmpty()) {
445
1
                ++lhs_iter;
446
1
                continue;
447
1
            }
448
12
            if (rhs_map.isEmpty()) {
449
2
                ++rhs_iter;
450
2
                continue;
451
2
            }
452
10
            if (!(lhs_key == rhs_key)) {
453
0
                return false;
454
0
            }
455
10
            if (!(lhs_map == rhs_map)) {
456
0
                return false;
457
0
            }
458
10
            ++lhs_iter;
459
10
            ++rhs_iter;
460
10
        }
461
16
        while (lhs_iter != lhs_cend) {
462
4
            if (!lhs_iter->second.isEmpty()) {
463
2
                return false;
464
2
            }
465
2
            ++lhs_iter;
466
2
        }
467
14
        while (rhs_iter != rhs_cend) {
468
3
            if (!rhs_iter->second.isEmpty()) {
469
1
                return false;
470
1
            }
471
2
            ++rhs_iter;
472
2
        }
473
11
        return true;
474
12
    }
475
476
    /** convert array and bitmap containers to run containers when it is more
477
     * efficient;
478
     * also convert from run containers when more space efficient.  Returns
479
     * true if the result has at least one run container.
480
     * Additional savings might be possible by calling shrinkToFit().
481
     */
482
29.1k
    bool runOptimize() {
483
29.1k
        return std::accumulate(
484
29.1k
                roarings.begin(), roarings.end(), true,
485
29.1k
                [](bool previous, std::pair<const uint32_t, roaring::Roaring>& map_entry) {
486
29.1k
                    return map_entry.second.runOptimize() && previous;
487
29.1k
                });
488
29.1k
    }
489
490
    /**
491
     * If needed, reallocate memory to shrink the memory usage. Returns
492
     * the number of bytes saved.
493
    */
494
29.1k
    size_t shrinkToFit() {
495
29.1k
        size_t savedBytes = 0;
496
29.1k
        auto iter = roarings.begin();
497
58.2k
        while (iter != roarings.cend()) {
498
29.1k
            if (iter->second.isEmpty()) {
499
                // empty Roarings are 84 bytes
500
1
                savedBytes += 88;
501
1
                iter = roarings.erase(iter);
502
29.1k
            } else {
503
29.1k
                savedBytes += iter->second.shrinkToFit();
504
29.1k
                iter++;
505
29.1k
            }
506
29.1k
        }
507
29.1k
        return savedBytes;
508
29.1k
    }
509
510
    /**
511
     * Iterate over the bitmap elements. The function iterator is called once
512
     * for all the values with ptr (can be nullptr) as the second parameter of each
513
     * call.
514
     *
515
     * roaring_iterator is simply a pointer to a function that returns bool
516
     * (true means that the iteration should continue while false means that it
517
     * should stop), and takes (uint32_t,void*) as inputs.
518
     */
519
4.01k
    void iterate(roaring::api::roaring_iterator64 iterator, void* ptr) const {
520
4.04k
        for (const auto& map_entry : roarings) {
521
4.04k
            bool should_continue = roaring_iterate64(&map_entry.second.roaring, iterator,
522
4.04k
                                                     uint64_t(map_entry.first) << 32, ptr);
523
4.04k
            if (!should_continue) {
524
1
                break;
525
1
            }
526
4.04k
        }
527
4.01k
    }
528
529
    /**
530
     * write a bitmap to a char buffer.
531
     * Returns how many bytes were written which should be getSizeInBytes().
532
     */
533
20.9k
    size_t write(char* buf, int serialize_version) const {
534
20.9k
        bool is_v1 = serialize_version == 1;
535
20.9k
        BitmapTypeCode::type type_bitmap32 =
536
20.9k
                is_v1 ? BitmapTypeCode::type::BITMAP32 : BitmapTypeCode::type::BITMAP32_V2;
537
20.9k
        BitmapTypeCode::type type_bitmap64 =
538
20.9k
                is_v1 ? BitmapTypeCode::type::BITMAP64 : BitmapTypeCode::type::BITMAP64_V2;
539
540
20.9k
        if (is32BitsEnough()) {
541
20.9k
            *(buf++) = type_bitmap32;
542
20.9k
            auto it = roarings.find(0);
543
20.9k
            if (it == roarings.end()) { // empty bitmap
544
2
                roaring::Roaring r;
545
2
                return r.write(buf, is_v1) + 1;
546
2
            }
547
20.9k
            return it->second.write(buf, is_v1) + 1;
548
20.9k
        }
549
550
7
        const char* orig = buf;
551
        // put type code
552
7
        *(buf++) = type_bitmap64;
553
        // push map size
554
7
        buf = (char*)encode_varint64((uint8_t*)buf, roarings.size());
555
7
        std::for_each(roarings.cbegin(), roarings.cend(),
556
16
                      [&buf, is_v1](const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
557
                          // push map key
558
16
                          encode_fixed32_le((uint8_t*)buf, map_entry.first);
559
16
                          buf += sizeof(uint32_t);
560
                          // push map value Roaring
561
16
                          buf += map_entry.second.write(buf, is_v1);
562
16
                      });
563
7
        return buf - orig;
564
20.9k
    }
565
566
    /**
567
     * read a bitmap from a serialized version.
568
     *
569
     * This function is unsafe in the sense that if you provide bad data,
570
     * many bytes could be read, possibly causing a buffer overflow. See also readSafe.
571
     */
572
9.11k
    static Roaring64Map read(const char* buf) {
573
9.11k
        Roaring64Map result;
574
575
9.11k
        bool is_v1 = BitmapTypeCode::BITMAP32 == *buf || BitmapTypeCode::BITMAP64 == *buf;
576
9.11k
        bool is_bitmap32 = BitmapTypeCode::BITMAP32 == *buf || BitmapTypeCode::BITMAP32_V2 == *buf;
577
9.11k
        bool is_bitmap64 = BitmapTypeCode::BITMAP64 == *buf || BitmapTypeCode::BITMAP64_V2 == *buf;
578
9.11k
        if (is_bitmap32) {
579
9.10k
            roaring::Roaring read = roaring::Roaring::read(buf + 1, is_v1);
580
9.10k
            result.emplaceOrInsert(0, std::move(read));
581
9.10k
            return result;
582
9.10k
        }
583
584
9.11k
        DCHECK(is_bitmap64);
585
8
        buf++;
586
587
        // get map size (varint64 took 1~10 bytes)
588
8
        uint64_t map_size;
589
8
        buf = reinterpret_cast<const char*>(
590
8
                decode_varint64_ptr(reinterpret_cast<const uint8_t*>(buf),
591
8
                                    reinterpret_cast<const uint8_t*>(buf + 10), &map_size));
592
8
        DCHECK(buf != nullptr);
593
26
        for (uint64_t lcv = 0; lcv < map_size; lcv++) {
594
            // get map key
595
18
            uint32_t key = decode_fixed32_le(reinterpret_cast<const uint8_t*>(buf));
596
18
            buf += sizeof(uint32_t);
597
            // read map value Roaring
598
18
            roaring::Roaring read_var = roaring::Roaring::read(buf, is_v1);
599
            // forward buffer past the last Roaring Bitmap
600
18
            buf += read_var.getSizeInBytes(is_v1);
601
18
            result.emplaceOrInsert(key, std::move(read_var));
602
18
        }
603
8
        return result;
604
9.11k
    }
605
606
    /**
607
     * How many bytes are required to serialize this bitmap
608
     */
609
29.1k
    size_t getSizeInBytes(int serialize_version) const {
610
29.1k
        bool is_v1 = serialize_version == 1;
611
29.1k
        if (is32BitsEnough()) {
612
29.1k
            auto it = roarings.find(0);
613
29.1k
            if (it == roarings.end()) { // empty bitmap
614
2
                roaring::Roaring r;
615
2
                return r.getSizeInBytes(is_v1) + 1;
616
2
            }
617
29.1k
            return it->second.getSizeInBytes(is_v1) + 1;
618
29.1k
        }
619
        // start with type code, map size and size of keys for each map entry
620
13
        size_t init = 1 + varint_length(roarings.size()) + roarings.size() * sizeof(uint32_t);
621
13
        return std::accumulate(
622
13
                roarings.cbegin(), roarings.cend(), init,
623
28
                [=](size_t previous, const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
624
                    // add in bytes used by each Roaring
625
28
                    return previous + map_entry.second.getSizeInBytes(is_v1);
626
28
                });
627
29.1k
    }
628
629
    /**
630
     * remove all elements
631
     */
632
0
    void clear() { roarings.clear(); }
633
634
    /**
635
     * Return whether all elements can be represented in 32 bits
636
     */
637
50.1k
    bool is32BitsEnough() const { return maximum() <= std::numeric_limits<uint32_t>::max(); }
638
639
    /**
640
     * Computes the intersection between two bitmaps and returns new bitmap.
641
     * The current bitmap and the provided bitmap are unchanged.
642
     */
643
2
    Roaring64Map operator&(const Roaring64Map& o) const { return Roaring64Map(*this) &= o; }
644
645
    /**
646
     * Computes the difference between two bitmaps and returns new bitmap.
647
     * The current bitmap and the provided bitmap are unchanged.
648
     */
649
0
    Roaring64Map operator-(const Roaring64Map& o) const { return Roaring64Map(*this) -= o; }
650
651
    /**
652
     * Computes the union between two bitmaps and returns new bitmap.
653
     * The current bitmap and the provided bitmap are unchanged.
654
     */
655
1
    Roaring64Map operator|(const Roaring64Map& o) const { return Roaring64Map(*this) |= o; }
656
657
    /**
658
     * Computes the symmetric union between two bitmaps and returns new bitmap.
659
     * The current bitmap and the provided bitmap are unchanged.
660
     */
661
1
    Roaring64Map operator^(const Roaring64Map& o) const { return Roaring64Map(*this) ^= o; }
662
663
    /**
664
     * computes the logical or (union) between "n" bitmaps (referenced by a
665
     * pointer).
666
     */
667
5
    static Roaring64Map fastunion(size_t n, const Roaring64Map** inputs) {
668
5
        struct pq_entry {
669
5
            phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator iterator;
670
5
            phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator end;
671
5
        };
672
673
5
        struct pq_comp {
674
9
            bool operator()(const pq_entry& lhs, const pq_entry& rhs) const {
675
9
                auto left_key = lhs.iterator->first;
676
9
                auto right_key = rhs.iterator->first;
677
                // We compare in the opposite direction than normal because priority
678
                // queues normally order from largest to smallest, but we want
679
                // smallest to largest.
680
9
                return left_key > right_key;
681
9
            }
682
5
        };
683
684
5
        std::priority_queue<pq_entry, std::vector<pq_entry>, pq_comp> pq;
685
686
12
        for (auto i = 0; i < n; ++i) {
687
7
            const auto& roaring = inputs[i]->roarings;
688
7
            if (roaring.begin() != roaring.end()) {
689
7
                pq.push({roaring.begin(), roaring.end()});
690
7
            }
691
7
        }
692
693
5
        std::vector<const roaring::api::roaring_bitmap_t*> group_bitmaps;
694
5
        Roaring64Map result;
695
13
        while (!pq.empty()) {
696
8
            auto group_key = pq.top().iterator->first;
697
8
            group_bitmaps.clear();
698
699
19
            while (!pq.empty()) {
700
14
                auto candidate_current_iter = pq.top().iterator;
701
14
                auto candidate_end_iter = pq.top().end;
702
703
14
                auto candidate_key = candidate_current_iter->first;
704
14
                const auto& candidate_bitmap = candidate_current_iter->second;
705
706
14
                if (candidate_key != group_key) {
707
3
                    break;
708
3
                }
709
710
11
                group_bitmaps.push_back(&candidate_bitmap.roaring);
711
11
                pq.pop();
712
11
                ++candidate_current_iter;
713
11
                if (candidate_current_iter != candidate_end_iter) {
714
4
                    pq.push({candidate_current_iter, candidate_end_iter});
715
4
                }
716
11
            }
717
8
            auto* inner_result = roaring::api::roaring_bitmap_or_many(group_bitmaps.size(),
718
8
                                                                      group_bitmaps.data());
719
8
            result.roarings.insert(result.roarings.end(),
720
8
                                   std::make_pair(group_key, roaring::Roaring(inner_result)));
721
8
        }
722
723
5
        return result;
724
5
    }
725
726
    friend class Roaring64MapSetBitForwardIterator;
727
    using const_iterator = Roaring64MapSetBitForwardIterator;
728
729
    /**
730
    * Returns an iterator that can be used to access the position of the
731
    * set bits. The running time complexity of a full scan is proportional to
732
    * the
733
    * number
734
    * of set bits: be aware that if you have long strings of 1s, this can be
735
    * very inefficient.
736
    *
737
    * It can be much faster to use the toArray method if you want to
738
    * retrieve the set bits.
739
    */
740
    const_iterator begin() const;
741
742
    /**
743
    * A bogus iterator that can be used together with begin()
744
    * for constructions such as for(auto i = b.begin();
745
    * i!=b.end(); ++i) {}
746
    */
747
    const_iterator end() const;
748
749
private:
750
    phmap::btree_map<uint32_t, roaring::Roaring> roarings {};
751
    bool copyOnWrite {false};
752
8.44M
    static uint32_t highBytes(const uint64_t in) { return uint32_t(in >> 32); }
753
4.22M
    static uint32_t lowBytes(const uint64_t in) { return uint32_t(in); }
754
53.0k
    static uint64_t uniteBytes(const uint32_t highBytes, const uint32_t lowBytes) {
755
53.0k
        return (uint64_t(highBytes) << 32) | uint64_t(lowBytes);
756
53.0k
    }
757
1
    void emplaceOrInsert(const uint32_t key, const roaring::Roaring& value) {
758
1
        roarings.emplace(std::make_pair(key, value));
759
1
    }
760
761
9.12k
    void emplaceOrInsert(const uint32_t key, roaring::Roaring&& value) {
762
9.12k
        roarings.emplace(key, value);
763
9.12k
    }
764
};
765
766
// Forked from https://github.com/RoaringBitmap/CRoaring/blob/v0.4.0/cpp/roaring64map.hh
767
// Used to go through the set bits. Not optimally fast, but convenient.
768
class Roaring64MapSetBitForwardIterator {
769
public:
770
    using type_of_iterator = Roaring64MapSetBitForwardIterator;
771
772
    /**
773
     * Provides the location of the set bit.
774
     */
775
2.96k
    uint64_t operator*() const {
776
2.96k
        return Roaring64Map::uniteBytes(map_iter->first, i.current_value);
777
2.96k
    }
778
779
975
    type_of_iterator& operator++() { // ++i, must returned inc. value
780
975
        if (i.has_value) {
781
975
            roaring_advance_uint32_iterator(&i);
782
975
        }
783
1.00k
        while (!i.has_value) {
784
34
            map_iter++;
785
34
            if (map_iter == map_end) return *this;
786
25
            roaring_init_iterator(&map_iter->second.roaring, &i);
787
25
        }
788
966
        return *this;
789
975
    }
790
791
1.80k
    type_of_iterator operator++(int) { // i++, must return orig. value
792
1.80k
        Roaring64MapSetBitForwardIterator orig(*this);
793
1.80k
        roaring_advance_uint32_iterator(&i);
794
1.81k
        while (!i.has_value) {
795
4
            map_iter++;
796
4
            if (map_iter == map_end) return orig;
797
2
            roaring_init_iterator(&map_iter->second.roaring, &i);
798
2
        }
799
1.80k
        return orig;
800
1.80k
    }
801
802
5
    bool move(const uint64_t& x) {
803
5
        map_iter = p.lower_bound(Roaring64Map::highBytes(x));
804
5
        if (map_iter != p.cend()) {
805
5
            roaring_init_iterator(&map_iter->second.roaring, &i);
806
5
            if (map_iter->first == Roaring64Map::highBytes(x)) {
807
5
                if (roaring_move_uint32_iterator_equalorlarger(&i, Roaring64Map::lowBytes(x)))
808
4
                    return true;
809
1
                map_iter++;
810
1
                if (map_iter == map_end) return false;
811
0
                roaring_init_iterator(&map_iter->second.roaring, &i);
812
0
            }
813
0
            return true;
814
5
        }
815
0
        return false;
816
5
    }
817
818
527
    bool operator==(const Roaring64MapSetBitForwardIterator& o) const {
819
527
        if (map_iter == map_end && o.map_iter == o.map_end) return true;
820
519
        if (o.map_iter == o.map_end) return false;
821
1
        return **this == *o;
822
519
    }
823
824
2.30k
    bool operator!=(const Roaring64MapSetBitForwardIterator& o) const {
825
2.30k
        if (map_iter == map_end && o.map_iter == o.map_end) return false;
826
2.29k
        if (o.map_iter == o.map_end) return true;
827
0
        return **this != *o;
828
2.29k
    }
829
5
    Roaring64MapSetBitForwardIterator& operator=(const Roaring64MapSetBitForwardIterator& r) {
830
5
        map_iter = r.map_iter;
831
5
        map_end = r.map_end;
832
5
        i = r.i;
833
5
        return *this;
834
5
    }
835
836
    Roaring64MapSetBitForwardIterator(const Roaring64MapSetBitForwardIterator& r) = default;
837
838
    Roaring64MapSetBitForwardIterator(const Roaring64Map& parent, bool exhausted = false)
839
2.29k
            : p(parent.roarings), map_end(parent.roarings.cend()) {
840
2.29k
        if (exhausted || parent.roarings.empty()) {
841
2.27k
            map_iter = parent.roarings.cend();
842
2.27k
        } else {
843
21
            map_iter = parent.roarings.cbegin();
844
21
            roaring_init_iterator(&map_iter->second.roaring, &i);
845
21
            while (!i.has_value) {
846
0
                map_iter++;
847
0
                if (map_iter == map_end) return;
848
0
                roaring_init_iterator(&map_iter->second.roaring, &i);
849
0
            }
850
21
        }
851
2.29k
    }
852
853
protected:
854
    const phmap::btree_map<uint32_t, roaring::Roaring>& p;
855
    phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator map_iter {};
856
    phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator map_end {};
857
    roaring::api::roaring_uint32_iterator_t i {};
858
};
859
860
18
inline Roaring64MapSetBitForwardIterator Roaring64Map::begin() const {
861
18
    return {*this};
862
18
}
863
864
2.26k
inline Roaring64MapSetBitForwardIterator Roaring64Map::end() const {
865
2.26k
    return {*this, true};
866
2.26k
}
867
868
} // namespace detail
869
870
// Represent the in-memory and on-disk structure of Doris's BITMAP data type.
871
// Optimize for the case where the bitmap contains 0 or 1 element which is common
872
// for streaming load scenario.
873
class BitmapValueIterator;
874
class BitmapValue {
875
public:
876
    template <typename T>
877
    using SetContainer = phmap::flat_hash_set<T>;
878
879
    // Construct an empty bitmap.
880
96.9k
    BitmapValue() : _sv(0), _bitmap(nullptr), _type(EMPTY), _is_shared(false) { _set.clear(); }
881
882
    // Construct a bitmap with one element.
883
    explicit BitmapValue(uint64_t value)
884
75
            : _sv(value), _bitmap(nullptr), _type(SINGLE), _is_shared(false) {}
885
886
    // Construct a bitmap from serialized data.
887
3.08k
    explicit BitmapValue(const char* src) : _is_shared(false) {
888
3.08k
        bool res = deserialize(src);
889
3.08k
        DCHECK(res);
890
3.08k
    }
891
892
    // !FIXME: We should rethink the logic here
893
15.2k
    BitmapValue(const BitmapValue& other) {
894
15.2k
        _type = other._type;
895
15.2k
        switch (other._type) {
896
162
        case EMPTY:
897
162
            break;
898
160
        case SINGLE:
899
160
            _sv = other._sv;
900
160
            break;
901
12.5k
        case BITMAP:
902
12.5k
            _bitmap = other._bitmap;
903
12.5k
            break;
904
2.36k
        case SET:
905
2.36k
            _set = other._set;
906
2.36k
            break;
907
15.2k
        }
908
909
15.2k
        if (other._type == BITMAP) {
910
12.5k
            _is_shared = true;
911
            // should also set other's state to shared, so that other bitmap value will
912
            // create a new bitmap when it wants to modify it.
913
12.5k
            other._is_shared = true;
914
12.5k
        }
915
15.2k
    }
916
917
    // !FIXME: We should rethink the logic here
918
19.2k
    BitmapValue(BitmapValue&& other) noexcept {
919
19.2k
        _type = other._type;
920
19.2k
        switch (other._type) {
921
80
        case EMPTY:
922
80
            break;
923
248
        case SINGLE:
924
248
            _sv = other._sv;
925
248
            break;
926
15.9k
        case BITMAP:
927
15.9k
            _bitmap = std::move(other._bitmap);
928
15.9k
            other._bitmap = nullptr;
929
15.9k
            break;
930
3.04k
        case SET:
931
3.04k
            _set = std::move(other._set);
932
3.04k
            break;
933
19.2k
        }
934
19.2k
        _is_shared = other._is_shared;
935
19.2k
        other._type = EMPTY;
936
19.2k
        other._is_shared = false;
937
19.2k
    }
938
939
    // !FIXME: We should rethink the logic here
940
110
    BitmapValue& operator=(const BitmapValue& other) {
941
110
        if (this == &other) {
942
5
            return *this;
943
5
        }
944
105
        reset();
945
105
        _type = other._type;
946
105
        switch (other._type) {
947
20
        case EMPTY:
948
20
            break;
949
32
        case SINGLE:
950
32
            _sv = other._sv;
951
32
            break;
952
26
        case BITMAP:
953
26
            _bitmap = other._bitmap;
954
26
            break;
955
27
        case SET:
956
27
            _set = other._set;
957
27
            break;
958
105
        }
959
960
105
        if (other._type == BITMAP) {
961
26
            _is_shared = true;
962
            // should also set other's state to shared, so that other bitmap value will
963
            // create a new bitmap when it wants to modify it.
964
26
            other._is_shared = true;
965
26
        }
966
105
        return *this;
967
105
    }
968
969
22
    static BitmapValue empty_bitmap() { return BitmapValue {}; }
970
971
25
    BitmapValue& operator=(BitmapValue&& other) noexcept {
972
25
        if (this == &other) {
973
0
            return *this;
974
0
        }
975
25
        reset();
976
977
25
        _type = other._type;
978
25
        switch (other._type) {
979
1
        case EMPTY:
980
1
            break;
981
14
        case SINGLE:
982
14
            _sv = other._sv;
983
14
            break;
984
4
        case BITMAP:
985
4
            _bitmap = std::move(other._bitmap);
986
4
            other._bitmap = nullptr;
987
4
            break;
988
6
        case SET:
989
6
            _set = std::move(other._set);
990
6
            break;
991
25
        }
992
25
        _is_shared = other._is_shared;
993
25
        return *this;
994
25
    }
995
996
    // Construct a bitmap from given elements.
997
62
    explicit BitmapValue(const std::vector<uint64_t>& bits) : _is_shared(false) {
998
62
        if (bits.size() == 0) {
999
0
            _type = EMPTY;
1000
0
            return;
1001
0
        }
1002
1003
62
        if (bits.size() == 1) {
1004
1
            _type = SINGLE;
1005
1
            _sv = bits[0];
1006
1
            return;
1007
1
        }
1008
1009
61
        if (!config::enable_set_in_bitmap_value || bits.size() > SET_TYPE_THRESHOLD) {
1010
26
            _type = BITMAP;
1011
26
            _prepare_bitmap_for_write();
1012
26
            _bitmap->addMany(bits.size(), &bits[0]);
1013
35
        } else {
1014
35
            _type = SET;
1015
137
            for (auto v : bits) {
1016
137
                _set.insert(v);
1017
137
            }
1018
35
        }
1019
61
    }
1020
1021
31
    BitmapTypeCode::type get_type_code() const {
1022
31
        switch (_type) {
1023
1
        case EMPTY:
1024
1
            return BitmapTypeCode::EMPTY;
1025
7
        case SINGLE:
1026
7
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
1027
4
                return BitmapTypeCode::SINGLE32;
1028
4
            } else {
1029
3
                return BitmapTypeCode::SINGLE64;
1030
3
            }
1031
9
        case SET:
1032
9
            return BitmapTypeCode::SET;
1033
14
        case BITMAP:
1034
14
            bool is_v1 = (config::bitmap_serialize_version == 1);
1035
14
            if (_bitmap->is32BitsEnough()) {
1036
9
                return is_v1 ? BitmapTypeCode::type::BITMAP32 : BitmapTypeCode::type::BITMAP32_V2;
1037
9
            } else {
1038
5
                return is_v1 ? BitmapTypeCode::type::BITMAP64 : BitmapTypeCode::type::BITMAP64_V2;
1039
5
            }
1040
31
        }
1041
0
        __builtin_unreachable();
1042
31
    }
1043
1044
    template <typename T>
1045
22
    void add_many(const T* values, const size_t count) {
1046
22
        switch (_type) {
1047
17
        case EMPTY:
1048
17
            if (count == 1) {
1049
0
                _sv = values[0];
1050
0
                _type = SINGLE;
1051
17
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1052
270
                for (size_t i = 0; i != count; ++i) {
1053
259
                    _set.insert(values[i]);
1054
259
                }
1055
11
                _type = SET;
1056
11
            } else {
1057
6
                _prepare_bitmap_for_write();
1058
6
                _bitmap->addMany(count, values);
1059
6
                _type = BITMAP;
1060
6
            }
1061
17
            break;
1062
0
        case SINGLE:
1063
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1064
0
                _set.insert(_sv);
1065
0
                for (size_t i = 0; i != count; ++i) {
1066
0
                    _set.insert(values[i]);
1067
0
                }
1068
0
                _type = SET;
1069
0
                _convert_to_bitmap_if_need();
1070
0
            } else {
1071
0
                _prepare_bitmap_for_write();
1072
0
                _bitmap->add(_sv);
1073
0
                _bitmap->addMany(count, values);
1074
0
                _type = BITMAP;
1075
0
            }
1076
0
            break;
1077
1
        case BITMAP:
1078
1
            _prepare_bitmap_for_write();
1079
1
            _bitmap->addMany(count, values);
1080
1
            break;
1081
4
        case SET:
1082
324
            for (size_t i = 0; i != count; ++i) {
1083
320
                _set.insert(values[i]);
1084
320
            }
1085
4
            _convert_to_bitmap_if_need();
1086
4
            break;
1087
22
        }
1088
22
    }
_ZN5doris11BitmapValue8add_manyIiEEvPKT_m
Line
Count
Source
1045
5
    void add_many(const T* values, const size_t count) {
1046
5
        switch (_type) {
1047
5
        case EMPTY:
1048
5
            if (count == 1) {
1049
0
                _sv = values[0];
1050
0
                _type = SINGLE;
1051
5
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1052
78
                for (size_t i = 0; i != count; ++i) {
1053
73
                    _set.insert(values[i]);
1054
73
                }
1055
5
                _type = SET;
1056
5
            } else {
1057
0
                _prepare_bitmap_for_write();
1058
0
                _bitmap->addMany(count, values);
1059
0
                _type = BITMAP;
1060
0
            }
1061
5
            break;
1062
0
        case SINGLE:
1063
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1064
0
                _set.insert(_sv);
1065
0
                for (size_t i = 0; i != count; ++i) {
1066
0
                    _set.insert(values[i]);
1067
0
                }
1068
0
                _type = SET;
1069
0
                _convert_to_bitmap_if_need();
1070
0
            } else {
1071
0
                _prepare_bitmap_for_write();
1072
0
                _bitmap->add(_sv);
1073
0
                _bitmap->addMany(count, values);
1074
0
                _type = BITMAP;
1075
0
            }
1076
0
            break;
1077
0
        case BITMAP:
1078
0
            _prepare_bitmap_for_write();
1079
0
            _bitmap->addMany(count, values);
1080
0
            break;
1081
0
        case SET:
1082
0
            for (size_t i = 0; i != count; ++i) {
1083
0
                _set.insert(values[i]);
1084
0
            }
1085
0
            _convert_to_bitmap_if_need();
1086
0
            break;
1087
5
        }
1088
5
    }
_ZN5doris11BitmapValue8add_manyImEEvPKT_m
Line
Count
Source
1045
17
    void add_many(const T* values, const size_t count) {
1046
17
        switch (_type) {
1047
12
        case EMPTY:
1048
12
            if (count == 1) {
1049
0
                _sv = values[0];
1050
0
                _type = SINGLE;
1051
12
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1052
192
                for (size_t i = 0; i != count; ++i) {
1053
186
                    _set.insert(values[i]);
1054
186
                }
1055
6
                _type = SET;
1056
6
            } else {
1057
6
                _prepare_bitmap_for_write();
1058
6
                _bitmap->addMany(count, values);
1059
6
                _type = BITMAP;
1060
6
            }
1061
12
            break;
1062
0
        case SINGLE:
1063
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1064
0
                _set.insert(_sv);
1065
0
                for (size_t i = 0; i != count; ++i) {
1066
0
                    _set.insert(values[i]);
1067
0
                }
1068
0
                _type = SET;
1069
0
                _convert_to_bitmap_if_need();
1070
0
            } else {
1071
0
                _prepare_bitmap_for_write();
1072
0
                _bitmap->add(_sv);
1073
0
                _bitmap->addMany(count, values);
1074
0
                _type = BITMAP;
1075
0
            }
1076
0
            break;
1077
1
        case BITMAP:
1078
1
            _prepare_bitmap_for_write();
1079
1
            _bitmap->addMany(count, values);
1080
1
            break;
1081
4
        case SET:
1082
324
            for (size_t i = 0; i != count; ++i) {
1083
320
                _set.insert(values[i]);
1084
320
            }
1085
4
            _convert_to_bitmap_if_need();
1086
4
            break;
1087
17
        }
1088
17
    }
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIaEEvPKT_m
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIsEEvPKT_m
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIlEEvPKT_m
1089
1090
4.21M
    void add(uint64_t value) {
1091
4.21M
        switch (_type) {
1092
10.5k
        case EMPTY:
1093
10.5k
            if (!config::enable_set_in_bitmap_value) {
1094
5.32k
                _sv = value;
1095
5.32k
                _type = SINGLE;
1096
5.32k
            } else {
1097
5.19k
                _set.insert(value);
1098
5.19k
                _type = SET;
1099
5.19k
            }
1100
10.5k
            break;
1101
5.28k
        case SINGLE:
1102
            //there is no need to convert the type if two variables are equal
1103
5.28k
            if (_sv == value) {
1104
2
                break;
1105
2
            }
1106
5.27k
            if (config::enable_set_in_bitmap_value) {
1107
5
                _set.insert(_sv);
1108
5
                _set.insert(value);
1109
5
                _type = SET;
1110
5.27k
            } else {
1111
5.27k
                _prepare_bitmap_for_write();
1112
5.27k
                _bitmap->add(_sv);
1113
5.27k
                _bitmap->add(value);
1114
5.27k
                _type = BITMAP;
1115
5.27k
            }
1116
5.27k
            break;
1117
4.09M
        case BITMAP:
1118
4.09M
            _prepare_bitmap_for_write();
1119
4.09M
            _bitmap->add(value);
1120
4.09M
            break;
1121
97.7k
        case SET:
1122
97.7k
            _set.insert(value);
1123
97.7k
            _convert_to_bitmap_if_need();
1124
97.7k
            break;
1125
4.21M
        }
1126
4.21M
    }
1127
1128
8
    void remove(uint64_t value) {
1129
8
        switch (_type) {
1130
1
        case EMPTY:
1131
1
            break;
1132
1
        case SINGLE:
1133
            //there is need to convert the type if two variables are equal
1134
1
            if (_sv == value) {
1135
0
                _type = EMPTY;
1136
0
            }
1137
1
            break;
1138
4
        case BITMAP:
1139
4
            _prepare_bitmap_for_write();
1140
4
            _bitmap->remove(value);
1141
4
            _convert_to_smaller_type();
1142
4
            break;
1143
2
        case SET:
1144
2
            _set.erase(value);
1145
2
            _convert_to_smaller_type();
1146
2
            break;
1147
8
        }
1148
8
    }
1149
1150
    // Compute the union between the current bitmap and the provided bitmap.
1151
16
    BitmapValue& operator-=(const BitmapValue& rhs) {
1152
16
        switch (rhs._type) {
1153
4
        case EMPTY:
1154
4
            break;
1155
4
        case SINGLE:
1156
4
            remove(rhs._sv);
1157
4
            break;
1158
4
        case BITMAP:
1159
4
            switch (_type) {
1160
1
            case EMPTY:
1161
1
                break;
1162
1
            case SINGLE:
1163
1
                if (rhs._bitmap->contains(_sv)) {
1164
0
                    _type = EMPTY;
1165
0
                }
1166
1
                break;
1167
1
            case BITMAP:
1168
1
                _prepare_bitmap_for_write();
1169
1
                *_bitmap -= *rhs._bitmap;
1170
1
                _convert_to_smaller_type();
1171
1
                break;
1172
1
            case SET: {
1173
32
                for (auto it = _set.begin(); it != _set.end();) {
1174
31
                    if (rhs.contains(*it)) {
1175
16
                        it = _set.erase(it);
1176
16
                    } else {
1177
15
                        ++it;
1178
15
                    }
1179
31
                }
1180
1
                _convert_to_smaller_type();
1181
1
                break;
1182
0
            }
1183
4
            }
1184
4
            break;
1185
4
        case SET: {
1186
4
            switch (_type) {
1187
1
            case EMPTY:
1188
1
                break;
1189
1
            case SINGLE:
1190
1
                if (rhs._set.contains(_sv)) {
1191
0
                    _type = EMPTY;
1192
0
                }
1193
1
                break;
1194
1
            case BITMAP:
1195
1
                _prepare_bitmap_for_write();
1196
31
                for (auto v : rhs._set) {
1197
31
                    if (_bitmap->contains(v)) {
1198
0
                        _bitmap->remove(v);
1199
0
                    }
1200
31
                }
1201
1
                _convert_to_smaller_type();
1202
1
                break;
1203
1
            case SET: {
1204
32
                for (auto it = _set.begin(); it != _set.end();) {
1205
31
                    if (rhs.contains(*it)) {
1206
14
                        it = _set.erase(it);
1207
17
                    } else {
1208
17
                        ++it;
1209
17
                    }
1210
31
                }
1211
1
                _convert_to_smaller_type();
1212
1
                break;
1213
0
            }
1214
4
            }
1215
4
        }
1216
16
        }
1217
16
        return *this;
1218
16
    }
1219
1220
    // Compute the union between the current bitmap and the provided bitmap.
1221
    // Possible type transitions are:
1222
    // EMPTY  -> SINGLE
1223
    // EMPTY  -> BITMAP
1224
    // SINGLE -> BITMAP
1225
    // !FIXME: We should rethink the logic here
1226
64
    BitmapValue& operator|=(const BitmapValue& rhs) {
1227
64
        switch (rhs._type) {
1228
12
        case EMPTY:
1229
12
            break;
1230
30
        case SINGLE:
1231
30
            add(rhs._sv);
1232
30
            break;
1233
8
        case BITMAP:
1234
8
            switch (_type) {
1235
2
            case EMPTY:
1236
2
                _bitmap = rhs._bitmap;
1237
2
                rhs._is_shared = true;
1238
2
                _is_shared = true;
1239
2
                _type = BITMAP;
1240
2
                break;
1241
3
            case SINGLE:
1242
3
                _bitmap = rhs._bitmap;
1243
3
                rhs._is_shared = true;
1244
3
                _is_shared = true;
1245
3
                _prepare_bitmap_for_write();
1246
3
                _bitmap->add(_sv);
1247
3
                _type = BITMAP;
1248
3
                break;
1249
2
            case BITMAP:
1250
2
                _prepare_bitmap_for_write();
1251
2
                *_bitmap |= *rhs._bitmap;
1252
2
                break;
1253
1
            case SET: {
1254
1
                _prepare_bitmap_for_write();
1255
1
                *_bitmap = *rhs._bitmap;
1256
1257
31
                for (auto v : _set) {
1258
31
                    _bitmap->add(v);
1259
31
                }
1260
1
                _type = BITMAP;
1261
1
                break;
1262
0
            }
1263
8
            }
1264
8
            break;
1265
14
        case SET:
1266
14
            switch (_type) {
1267
1
            case EMPTY:
1268
1
                _set = rhs._set;
1269
1
                _type = SET;
1270
1
                break;
1271
1
            case SINGLE: {
1272
1
                if ((rhs._set.size() + rhs._set.contains(_sv) > SET_TYPE_THRESHOLD)) {
1273
0
                    _prepare_bitmap_for_write();
1274
0
                    _bitmap->add(_sv);
1275
0
                    for (auto v : rhs._set) {
1276
0
                        _bitmap->add(v);
1277
0
                    }
1278
0
                    _type = BITMAP;
1279
1
                } else {
1280
1
                    _set = rhs._set;
1281
1
                    _set.insert(_sv);
1282
1
                    _type = SET;
1283
1
                }
1284
1
                break;
1285
0
            }
1286
1
            case BITMAP:
1287
1
                _prepare_bitmap_for_write();
1288
31
                for (auto v : rhs._set) {
1289
31
                    _bitmap->add(v);
1290
31
                }
1291
1
                break;
1292
11
            case SET: {
1293
58
                for (auto v : rhs._set) {
1294
58
                    _set.insert(v);
1295
58
                }
1296
11
                _convert_to_bitmap_if_need();
1297
11
                break;
1298
0
            }
1299
14
            }
1300
14
            break;
1301
64
        }
1302
64
        return *this;
1303
64
    }
1304
1305
24
    BitmapValue& fastunion(const std::vector<const BitmapValue*>& values) {
1306
24
        std::vector<const detail::Roaring64Map*> bitmaps;
1307
24
        std::vector<uint64_t> single_values;
1308
24
        std::vector<const SetContainer<uint64_t>*> sets;
1309
1.73k
        for (const auto* value : values) {
1310
1.73k
            switch (value->_type) {
1311
5
            case EMPTY:
1312
5
                break;
1313
11
            case SINGLE:
1314
11
                single_values.push_back(value->_sv);
1315
11
                break;
1316
5
            case BITMAP:
1317
5
                bitmaps.push_back(value->_bitmap.get());
1318
5
                break;
1319
1.71k
            case SET:
1320
1.71k
                sets.push_back(&value->_set);
1321
1.71k
                break;
1322
1.73k
            }
1323
1.73k
        }
1324
1325
24
        if (!bitmaps.empty()) {
1326
5
            _prepare_bitmap_for_write();
1327
5
            switch (_type) {
1328
2
            case EMPTY:
1329
2
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1330
2
                break;
1331
2
            case SINGLE:
1332
2
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1333
2
                _bitmap->add(_sv);
1334
2
                break;
1335
1
            case BITMAP:
1336
1
                for (const auto* bitmap : bitmaps) {
1337
1
                    *_bitmap |= *bitmap;
1338
1
                }
1339
1
                break;
1340
0
            case SET: {
1341
0
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1342
0
                for (auto v : _set) {
1343
0
                    _bitmap->add(v);
1344
0
                }
1345
0
                _set.clear();
1346
0
                break;
1347
0
            }
1348
5
            }
1349
5
            _type = BITMAP;
1350
5
        }
1351
1352
24
        if (!sets.empty()) {
1353
1.71k
            for (auto& set : sets) {
1354
1.80k
                for (auto v : *set) {
1355
1.80k
                    _set.insert(v);
1356
1.80k
                }
1357
1.71k
            }
1358
10
            switch (_type) {
1359
9
            case EMPTY:
1360
9
                _type = SET;
1361
9
                break;
1362
0
            case SINGLE: {
1363
0
                _set.insert(_sv);
1364
0
                _type = SET;
1365
0
                break;
1366
0
            }
1367
0
            case BITMAP:
1368
0
                _prepare_bitmap_for_write();
1369
0
                for (auto v : _set) {
1370
0
                    _bitmap->add(v);
1371
0
                }
1372
0
                _type = BITMAP;
1373
0
                _set.clear();
1374
0
                break;
1375
1
            case SET: {
1376
1
                break;
1377
0
            }
1378
10
            }
1379
10
            if (_type == SET) {
1380
10
                _convert_to_bitmap_if_need();
1381
10
            }
1382
10
        }
1383
1384
24
        if (_type == EMPTY && single_values.size() == 1) {
1385
2
            if (config::enable_set_in_bitmap_value) {
1386
1
                _type = SET;
1387
1
                _set.insert(single_values[0]);
1388
1
            } else {
1389
1
                _sv = single_values[0];
1390
1
                _type = SINGLE;
1391
1
            }
1392
22
        } else if (!single_values.empty()) {
1393
6
            switch (_type) {
1394
2
            case EMPTY:
1395
3
            case SINGLE:
1396
3
                if (config::enable_set_in_bitmap_value) {
1397
2
                    _set.insert(single_values.cbegin(), single_values.cend());
1398
2
                    if (_type == SINGLE) {
1399
0
                        _set.insert(_sv);
1400
0
                    }
1401
2
                    _type = SET;
1402
2
                    _convert_to_bitmap_if_need();
1403
2
                } else {
1404
1
                    _prepare_bitmap_for_write();
1405
1
                    _bitmap->addMany(single_values.size(), single_values.data());
1406
1
                    if (_type == SINGLE) {
1407
1
                        _bitmap->add(_sv);
1408
1
                    }
1409
1
                    _type = BITMAP;
1410
1
                    _convert_to_smaller_type();
1411
1
                }
1412
3
                break;
1413
3
            case BITMAP: {
1414
3
                _prepare_bitmap_for_write();
1415
3
                _bitmap->addMany(single_values.size(), single_values.data());
1416
3
                break;
1417
2
            }
1418
0
            case SET:
1419
0
                _set.insert(single_values.cbegin(), single_values.cend());
1420
0
                _convert_to_bitmap_if_need();
1421
0
                break;
1422
6
            }
1423
6
        }
1424
1425
24
        return *this;
1426
24
    }
1427
1428
    // Compute the intersection between the current bitmap and the provided bitmap.
1429
    // Possible type transitions are:
1430
    // SINGLE -> EMPTY
1431
    // BITMAP -> EMPTY
1432
    // BITMAP -> SINGLE
1433
51
    BitmapValue& operator&=(const BitmapValue& rhs) {
1434
51
        switch (rhs._type) {
1435
14
        case EMPTY:
1436
14
            reset(); // empty & any = empty
1437
14
            break;
1438
10
        case SINGLE:
1439
10
            switch (_type) {
1440
2
            case EMPTY:
1441
2
                break;
1442
3
            case SINGLE:
1443
3
                if (_sv != rhs._sv) {
1444
2
                    _type = EMPTY;
1445
2
                }
1446
3
                break;
1447
4
            case BITMAP:
1448
4
                if (!_bitmap->contains(rhs._sv)) {
1449
1
                    _type = EMPTY;
1450
3
                } else {
1451
3
                    _type = SINGLE;
1452
3
                    _sv = rhs._sv;
1453
3
                }
1454
4
                _bitmap.reset();
1455
4
                _is_shared = false;
1456
4
                break;
1457
1
            case SET:
1458
1
                if (!_set.contains(rhs._sv)) {
1459
0
                    _type = EMPTY;
1460
1
                } else {
1461
1
                    _type = SINGLE;
1462
1
                    _sv = rhs._sv;
1463
1
                }
1464
1
                _set.clear();
1465
1
                break;
1466
10
            }
1467
10
            break;
1468
10
        case BITMAP:
1469
10
            switch (_type) {
1470
2
            case EMPTY:
1471
2
                break;
1472
3
            case SINGLE:
1473
3
                if (!rhs._bitmap->contains(_sv)) {
1474
2
                    _type = EMPTY;
1475
2
                }
1476
3
                break;
1477
4
            case BITMAP:
1478
4
                _prepare_bitmap_for_write();
1479
4
                *_bitmap &= *rhs._bitmap;
1480
4
                _convert_to_smaller_type();
1481
4
                break;
1482
1
            case SET:
1483
32
                for (auto it = _set.begin(); it != _set.end();) {
1484
31
                    if (!rhs._bitmap->contains(*it)) {
1485
15
                        it = _set.erase(it);
1486
16
                    } else {
1487
16
                        ++it;
1488
16
                    }
1489
31
                }
1490
1
                _convert_to_smaller_type();
1491
1
                break;
1492
10
            }
1493
10
            break;
1494
17
        case SET:
1495
17
            switch (_type) {
1496
1
            case EMPTY:
1497
1
                break;
1498
1
            case SINGLE:
1499
1
                if (!rhs._set.contains(_sv)) {
1500
1
                    _type = EMPTY;
1501
1
                }
1502
1
                break;
1503
1
            case BITMAP:
1504
1
                _prepare_bitmap_for_write();
1505
31
                for (auto v : rhs._set) {
1506
31
                    if (_bitmap->contains(v)) {
1507
0
                        _set.insert(v);
1508
0
                    }
1509
31
                }
1510
1
                _type = SET;
1511
1
                _bitmap.reset();
1512
1
                _is_shared = false;
1513
1
                _convert_to_smaller_type();
1514
1
                break;
1515
14
            case SET:
1516
79
                for (auto it = _set.begin(); it != _set.end();) {
1517
65
                    if (!rhs._set.contains(*it)) {
1518
37
                        it = _set.erase(it);
1519
37
                    } else {
1520
28
                        ++it;
1521
28
                    }
1522
65
                }
1523
14
                _convert_to_smaller_type();
1524
14
                break;
1525
17
            }
1526
17
            break;
1527
51
        }
1528
51
        return *this;
1529
51
    }
1530
1531
    // Compute the symmetric union between the current bitmap and the provided bitmap.
1532
    // Possible type transitions are:
1533
    // SINGLE -> EMPTY
1534
    // BITMAP -> EMPTY
1535
    // BITMAP -> SINGLE
1536
    // !FIXME: We should rethink the logic here
1537
29
    BitmapValue& operator^=(const BitmapValue& rhs) {
1538
29
        switch (rhs._type) {
1539
8
        case EMPTY:
1540
8
            break;
1541
4
        case SINGLE:
1542
4
            switch (_type) {
1543
1
            case EMPTY:
1544
1
                add(rhs._sv);
1545
1
                break;
1546
1
            case SINGLE:
1547
1
                if (_sv == rhs._sv) {
1548
0
                    _type = EMPTY;
1549
1
                } else {
1550
1
                    add(rhs._sv);
1551
1
                }
1552
1
                break;
1553
1
            case BITMAP:
1554
1
                if (!_bitmap->contains(rhs._sv)) {
1555
0
                    add(rhs._sv);
1556
1
                } else {
1557
1
                    _prepare_bitmap_for_write();
1558
1
                    _bitmap->remove(rhs._sv);
1559
1
                }
1560
1
                break;
1561
1
            case SET:
1562
1
                if (!_set.contains(rhs._sv)) {
1563
0
                    _set.insert(rhs._sv);
1564
1
                } else {
1565
1
                    _set.erase(rhs._sv);
1566
1
                }
1567
1
                break;
1568
4
            }
1569
4
            break;
1570
6
        case BITMAP:
1571
6
            switch (_type) {
1572
1
            case EMPTY:
1573
1
                _bitmap = rhs._bitmap;
1574
1
                rhs._is_shared = true;
1575
1
                _is_shared = true;
1576
1
                _type = BITMAP;
1577
1
                break;
1578
1
            case SINGLE:
1579
1
                _bitmap = rhs._bitmap;
1580
1
                rhs._is_shared = true;
1581
1
                _is_shared = true;
1582
1
                _type = BITMAP;
1583
1
                _prepare_bitmap_for_write();
1584
1
                if (!rhs._bitmap->contains(_sv)) {
1585
1
                    _bitmap->add(_sv);
1586
1
                } else {
1587
0
                    _bitmap->remove(_sv);
1588
0
                }
1589
1
                break;
1590
3
            case BITMAP:
1591
3
                _prepare_bitmap_for_write();
1592
3
                *_bitmap ^= *rhs._bitmap;
1593
3
                _convert_to_smaller_type();
1594
3
                break;
1595
1
            case SET:
1596
1
                _prepare_bitmap_for_write();
1597
1
                *_bitmap = *rhs._bitmap;
1598
31
                for (auto v : _set) {
1599
31
                    if (_bitmap->contains(v)) {
1600
16
                        _bitmap->remove(v);
1601
16
                    } else {
1602
15
                        _bitmap->add(v);
1603
15
                    }
1604
31
                }
1605
1
                _type = BITMAP;
1606
1
                _convert_to_smaller_type();
1607
1
                break;
1608
6
            }
1609
6
            break;
1610
11
        case SET:
1611
11
            switch (_type) {
1612
1
            case EMPTY:
1613
1
                _set = rhs._set;
1614
1
                _type = SET;
1615
1
                break;
1616
1
            case SINGLE:
1617
1
                _set = rhs._set;
1618
1
                if (!rhs._set.contains(_sv)) {
1619
1
                    _set.insert(_sv);
1620
1
                } else {
1621
0
                    _set.erase(_sv);
1622
0
                }
1623
1
                _type = SET;
1624
1
                break;
1625
1
            case BITMAP:
1626
1
                _prepare_bitmap_for_write();
1627
31
                for (auto v : rhs._set) {
1628
31
                    if (_bitmap->contains(v)) {
1629
0
                        _bitmap->remove(v);
1630
31
                    } else {
1631
31
                        _bitmap->add(v);
1632
31
                    }
1633
31
                }
1634
1
                _convert_to_smaller_type();
1635
1
                break;
1636
8
            case SET:
1637
49
                for (auto v : rhs._set) {
1638
49
                    if (_set.contains(v)) {
1639
22
                        _set.erase(v);
1640
27
                    } else {
1641
27
                        _set.insert(v);
1642
27
                    }
1643
49
                }
1644
8
                _convert_to_smaller_type();
1645
8
                break;
1646
11
            }
1647
11
            break;
1648
29
        }
1649
29
        return *this;
1650
29
    }
1651
1652
    // check if value x is present
1653
9.19k
    bool contains(uint64_t x) const {
1654
9.19k
        switch (_type) {
1655
0
        case EMPTY:
1656
0
            return false;
1657
14
        case SINGLE:
1658
14
            return _sv == x;
1659
8.93k
        case BITMAP:
1660
8.93k
            return _bitmap->contains(x);
1661
238
        case SET:
1662
238
            return _set.contains(x);
1663
9.19k
        }
1664
0
        return false;
1665
9.19k
    }
1666
1667
    // true if contains a value that belongs to the range [left, right].
1668
    bool contains_any(uint64_t left, uint64_t right) const;
1669
1670
240k
    uint64_t cardinality() const {
1671
240k
        switch (_type) {
1672
240k
        case EMPTY:
1673
240k
            return 0;
1674
36
        case SINGLE:
1675
36
            return 1;
1676
210
        case BITMAP:
1677
210
            return _bitmap->cardinality();
1678
74
        case SET:
1679
74
            return _set.size();
1680
240k
        }
1681
0
        return 0;
1682
240k
    }
1683
1684
60.0k
    uint64_t and_cardinality(const BitmapValue& rhs) const {
1685
60.0k
        switch (rhs._type) {
1686
60.0k
        case EMPTY:
1687
60.0k
            return 0;
1688
4
        case SINGLE:
1689
4
            switch (_type) {
1690
1
            case EMPTY:
1691
1
                return 0;
1692
1
            case SINGLE:
1693
1
                return _sv == rhs._sv;
1694
1
            case BITMAP:
1695
1
                return _bitmap->contains(rhs._sv);
1696
1
            case SET:
1697
1
                return _set.contains(rhs._sv);
1698
4
            }
1699
0
            break;
1700
7
        case BITMAP:
1701
7
            switch (_type) {
1702
1
            case EMPTY:
1703
1
                return 0;
1704
1
            case SINGLE:
1705
1
                return rhs._bitmap->contains(_sv);
1706
4
            case BITMAP:
1707
4
                return _bitmap->andCardinality(*rhs._bitmap);
1708
1
            case SET: {
1709
1
                uint64_t cardinality = 0;
1710
31
                for (auto v : _set) {
1711
31
                    if (rhs._bitmap->contains(v)) {
1712
16
                        ++cardinality;
1713
16
                    }
1714
31
                }
1715
1
                return cardinality;
1716
0
            }
1717
7
            }
1718
0
            break;
1719
4
        case SET:
1720
4
            switch (_type) {
1721
1
            case EMPTY:
1722
1
                return 0;
1723
1
            case SINGLE:
1724
1
                return rhs._set.contains(_sv);
1725
1
            case BITMAP: {
1726
1
                uint64_t cardinality = 0;
1727
31
                for (auto v : rhs._set) {
1728
31
                    if (_bitmap->contains(v)) {
1729
0
                        ++cardinality;
1730
0
                    }
1731
31
                }
1732
1
                return cardinality;
1733
0
            }
1734
1
            case SET: {
1735
1
                uint64_t cardinality = 0;
1736
31
                for (auto v : _set) {
1737
31
                    if (rhs._set.contains(v)) {
1738
14
                        ++cardinality;
1739
14
                    }
1740
31
                }
1741
1
                return cardinality;
1742
0
            }
1743
4
            }
1744
60.0k
        }
1745
0
        return 0;
1746
60.0k
    }
1747
1748
60.0k
    uint64_t or_cardinality(const BitmapValue& rhs) const {
1749
60.0k
        switch (rhs._type) {
1750
59.9k
        case EMPTY:
1751
59.9k
            return cardinality();
1752
0
        case SINGLE:
1753
0
            switch (_type) {
1754
0
            case EMPTY:
1755
0
                return 1;
1756
0
            case SINGLE:
1757
0
                return 1 + (_sv != rhs._sv);
1758
0
            case BITMAP:
1759
0
                return cardinality() + !_bitmap->contains(rhs._sv);
1760
0
            case SET:
1761
0
                return _set.size() + !_set.contains(rhs._sv);
1762
0
            }
1763
0
            break;
1764
3
        case BITMAP:
1765
3
            switch (_type) {
1766
0
            case EMPTY:
1767
0
                return rhs.cardinality();
1768
0
            case SINGLE:
1769
0
                return rhs.cardinality() + !rhs._bitmap->contains(_sv);
1770
3
            case BITMAP:
1771
3
                return _bitmap->orCardinality(*rhs._bitmap);
1772
0
            case SET: {
1773
0
                uint64_t cardinality = rhs._bitmap->cardinality();
1774
0
                for (auto v : _set) {
1775
0
                    if (!rhs._bitmap->contains(v)) {
1776
0
                        ++cardinality;
1777
0
                    }
1778
0
                }
1779
0
                return cardinality;
1780
0
            }
1781
3
            }
1782
0
            break;
1783
0
        case SET:
1784
0
            switch (_type) {
1785
0
            case EMPTY:
1786
0
                return rhs.cardinality();
1787
0
            case SINGLE:
1788
0
                return rhs.cardinality() + !rhs._set.contains(_sv);
1789
0
            case BITMAP: {
1790
0
                uint64_t cardinality = _bitmap->cardinality();
1791
0
                for (auto v : rhs._set) {
1792
0
                    if (!_bitmap->contains(v)) {
1793
0
                        ++cardinality;
1794
0
                    }
1795
0
                }
1796
0
                return cardinality;
1797
0
            }
1798
0
            case SET: {
1799
0
                uint64_t cardinality = _set.size();
1800
0
                for (auto v : _set) {
1801
0
                    if (!rhs._set.contains(v)) {
1802
0
                        ++cardinality;
1803
0
                    }
1804
0
                }
1805
0
                return cardinality;
1806
0
            }
1807
0
            }
1808
60.0k
        }
1809
0
        return 0;
1810
60.0k
    }
1811
1812
24
    uint64_t andnot_cardinality(const BitmapValue& rhs) const {
1813
24
        switch (rhs._type) {
1814
8
        case EMPTY:
1815
8
            return cardinality();
1816
4
        case SINGLE:
1817
4
            switch (_type) {
1818
1
            case EMPTY:
1819
1
                return 0;
1820
1
            case SINGLE:
1821
1
                return 1 - (_sv == rhs._sv);
1822
1
            case BITMAP:
1823
1
                return cardinality() - _bitmap->contains(rhs._sv);
1824
1
            case SET:
1825
1
                return cardinality() - _set.contains(rhs._sv);
1826
4
            }
1827
0
            break;
1828
4
        case BITMAP:
1829
4
            switch (_type) {
1830
1
            case EMPTY:
1831
1
                return 0;
1832
1
            case SINGLE:
1833
1
                return !rhs._bitmap->contains(_sv);
1834
1
            case BITMAP:
1835
1
                return _bitmap->andnotCardinality(*rhs._bitmap);
1836
1
            case SET: {
1837
1
                uint64_t cardinality = _set.size();
1838
31
                for (auto v : _set) {
1839
31
                    if (rhs._bitmap->contains(v)) {
1840
16
                        cardinality -= 1;
1841
16
                    }
1842
31
                }
1843
1
                return cardinality;
1844
0
            }
1845
4
            }
1846
0
            break;
1847
8
        case SET:
1848
8
            switch (_type) {
1849
1
            case EMPTY:
1850
1
                return 0;
1851
1
            case SINGLE:
1852
1
                return !rhs._set.contains(_sv);
1853
1
            case BITMAP: {
1854
1
                uint64_t cardinality = _bitmap->cardinality();
1855
31
                for (auto v : rhs._set) {
1856
31
                    if (_bitmap->contains(v)) {
1857
0
                        cardinality -= 1;
1858
0
                    }
1859
31
                }
1860
1
                return cardinality;
1861
0
            }
1862
5
            case SET: {
1863
5
                uint64_t cardinality = _set.size();
1864
33
                for (auto v : rhs._set) {
1865
33
                    if (_set.contains(v)) {
1866
16
                        cardinality -= 1;
1867
16
                    }
1868
33
                }
1869
5
                return cardinality;
1870
0
            }
1871
8
            }
1872
24
        }
1873
0
        return 0;
1874
24
    }
1875
1876
    // Return how many bytes are required to serialize this bitmap.
1877
    // See BitmapTypeCode for the serialized format.
1878
    // !FIXME: We should rethink the logic here
1879
149k
    size_t getSizeInBytes() const {
1880
149k
        size_t res = 0;
1881
149k
        switch (_type) {
1882
120k
        case EMPTY:
1883
120k
            res = 1;
1884
120k
            break;
1885
55
        case SINGLE:
1886
55
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
1887
50
                res = 1 + sizeof(uint32_t);
1888
50
            } else {
1889
5
                res = 1 + sizeof(uint64_t);
1890
5
            }
1891
55
            break;
1892
29.1k
        case BITMAP:
1893
29.1k
            _prepare_bitmap_for_write();
1894
29.1k
            _bitmap->runOptimize();
1895
29.1k
            _bitmap->shrinkToFit();
1896
29.1k
            res = _bitmap->getSizeInBytes(config::bitmap_serialize_version);
1897
29.1k
            break;
1898
619
        case SET:
1899
            /// 1 byte for type, 1 byte for count
1900
619
            res = 2 + sizeof(uint64_t) * _set.size();
1901
619
            break;
1902
149k
        }
1903
149k
        return res;
1904
149k
    }
1905
1906
    // Serialize the bitmap value to dst, which should be large enough.
1907
    // Client should call `getSizeInBytes` first to get the serialized size.
1908
81.5k
    void write_to(char* dst) const {
1909
81.5k
        switch (_type) {
1910
60.0k
        case EMPTY:
1911
60.0k
            *dst = BitmapTypeCode::EMPTY;
1912
60.0k
            break;
1913
45
        case SINGLE:
1914
45
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
1915
42
                *(dst++) = BitmapTypeCode::SINGLE32;
1916
42
                encode_fixed32_le(reinterpret_cast<uint8_t*>(dst), static_cast<uint32_t>(_sv));
1917
42
            } else {
1918
3
                *(dst++) = BitmapTypeCode::SINGLE64;
1919
3
                encode_fixed64_le(reinterpret_cast<uint8_t*>(dst), _sv);
1920
3
            }
1921
45
            break;
1922
488
        case SET:
1923
488
            DCHECK(config::enable_set_in_bitmap_value);
1924
488
            *dst = BitmapTypeCode::SET;
1925
488
            ++dst;
1926
488
            *dst = static_cast<uint8_t>(_set.size());
1927
488
            ++dst;
1928
7.81k
            for (auto v : _set) {
1929
7.81k
                encode_fixed64_le(reinterpret_cast<uint8_t*>(dst), v);
1930
7.81k
                dst += sizeof(uint64_t);
1931
7.81k
            }
1932
488
            break;
1933
20.9k
        case BITMAP:
1934
20.9k
            _bitmap->write(dst, config::bitmap_serialize_version);
1935
20.9k
            break;
1936
81.5k
        }
1937
81.5k
    }
1938
1939
    // Deserialize a bitmap value from `src`.
1940
    // Return false if `src` begins with unknown type code, true otherwise.
1941
69.3k
    bool deserialize(const char* src) {
1942
69.3k
        switch (*src) {
1943
60.0k
        case BitmapTypeCode::EMPTY:
1944
60.0k
            _type = EMPTY;
1945
60.0k
            break;
1946
15
        case BitmapTypeCode::SINGLE32:
1947
15
            _type = SINGLE;
1948
15
            _sv = decode_fixed32_le(reinterpret_cast<const uint8_t*>(src + 1));
1949
15
            if (config::enable_set_in_bitmap_value) {
1950
2
                _type = SET;
1951
2
                _set.insert(_sv);
1952
2
            }
1953
15
            break;
1954
3
        case BitmapTypeCode::SINGLE64:
1955
3
            _type = SINGLE;
1956
3
            _sv = decode_fixed64_le(reinterpret_cast<const uint8_t*>(src + 1));
1957
3
            if (config::enable_set_in_bitmap_value) {
1958
1
                _type = SET;
1959
1
                _set.insert(_sv);
1960
1
            }
1961
3
            break;
1962
5.13k
        case BitmapTypeCode::BITMAP32:
1963
5.13k
        case BitmapTypeCode::BITMAP64:
1964
9.10k
        case BitmapTypeCode::BITMAP32_V2:
1965
9.10k
        case BitmapTypeCode::BITMAP64_V2:
1966
9.10k
            _type = BITMAP;
1967
9.10k
            _is_shared = false;
1968
9.10k
            try {
1969
9.10k
                _bitmap = std::make_shared<detail::Roaring64Map>(detail::Roaring64Map::read(src));
1970
9.10k
            } catch (const std::runtime_error& e) {
1971
1
                LOG(ERROR) << "Decode roaring bitmap failed, " << e.what();
1972
1
                return false;
1973
1
            }
1974
9.10k
            break;
1975
227
        case BitmapTypeCode::SET: {
1976
227
            _type = SET;
1977
227
            ++src;
1978
227
            uint8_t count = *src;
1979
227
            ++src;
1980
227
            CHECK(count <= SET_TYPE_THRESHOLD) << "bitmap value with incorrect set count";
1981
227
            _set.reserve(count);
1982
3.86k
            for (uint8_t i = 0; i != count; ++i, src += sizeof(uint64_t)) {
1983
3.63k
                _set.insert(decode_fixed64_le(reinterpret_cast<const uint8_t*>(src)));
1984
3.63k
            }
1985
227
            CHECK_EQ(count, _set.size()) << "bitmap value with incorrect set count";
1986
1987
227
            if (!config::enable_set_in_bitmap_value) {
1988
0
                _prepare_bitmap_for_write();
1989
0
                for (auto v : _set) {
1990
0
                    _bitmap->add(v);
1991
0
                }
1992
0
                _type = BITMAP;
1993
0
                _set.clear();
1994
0
            }
1995
227
            break;
1996
9.10k
        }
1997
1
        case BitmapTypeCode::SET_V2: {
1998
1
            uint32_t size = 0;
1999
1
            memcpy(&size, src + 1, sizeof(uint32_t));
2000
1
            src += sizeof(uint32_t) + 1;
2001
2002
1
            if (!config::enable_set_in_bitmap_value || size > SET_TYPE_THRESHOLD) {
2003
0
                _type = BITMAP;
2004
0
                _prepare_bitmap_for_write();
2005
2006
0
                for (int i = 0; i < size; ++i) {
2007
0
                    uint64_t key {};
2008
0
                    memcpy(&key, src, sizeof(uint64_t));
2009
0
                    _bitmap->add(key);
2010
0
                    src += sizeof(uint64_t);
2011
0
                }
2012
1
            } else {
2013
1
                _type = SET;
2014
1
                _set.reserve(size);
2015
2016
3
                for (int i = 0; i < size; ++i) {
2017
2
                    uint64_t key {};
2018
2
                    memcpy(&key, src, sizeof(uint64_t));
2019
2
                    _set.insert(key);
2020
2
                    src += sizeof(uint64_t);
2021
2
                }
2022
1
            }
2023
1
            break;
2024
9.10k
        }
2025
2
        default:
2026
2
            LOG(ERROR) << "BitmapTypeCode invalid, should between: " << BitmapTypeCode::EMPTY
2027
2
                       << " and " << BitmapTypeCode::BITMAP64 << " actual is "
2028
2
                       << static_cast<int>(*src);
2029
2
            return false;
2030
69.3k
        }
2031
69.3k
        return true;
2032
69.3k
    }
2033
2034
13
    int64_t minimum() const {
2035
13
        switch (_type) {
2036
2
        case SINGLE:
2037
2
            return _sv;
2038
4
        case BITMAP:
2039
4
            return _bitmap->minimum();
2040
5
        case SET:
2041
5
            return _min_in_set();
2042
2
        default:
2043
2
            return 0;
2044
13
        }
2045
13
    }
2046
2047
    // TODO limit string size to avoid OOM
2048
4.33k
    std::string to_string() const {
2049
4.33k
        std::stringstream ss;
2050
4.33k
        switch (_type) {
2051
16
        case EMPTY:
2052
16
            break;
2053
39
        case SINGLE:
2054
39
            ss << _sv;
2055
39
            break;
2056
4.01k
        case BITMAP: {
2057
4.01k
            struct IterCtx {
2058
4.01k
                std::stringstream* ss = nullptr;
2059
4.01k
                bool first = true;
2060
4.01k
            } iter_ctx;
2061
4.01k
            iter_ctx.ss = &ss;
2062
2063
4.01k
            _bitmap->iterate(
2064
2.09M
                    [](uint64_t value, void* c) -> bool {
2065
2.09M
                        auto ctx = reinterpret_cast<IterCtx*>(c);
2066
2.09M
                        if (ctx->first) {
2067
4.01k
                            ctx->first = false;
2068
2.09M
                        } else {
2069
2.09M
                            (*ctx->ss) << ",";
2070
2.09M
                        }
2071
2.09M
                        (*ctx->ss) << value;
2072
2.09M
                        return true;
2073
2.09M
                    },
2074
4.01k
                    &iter_ctx);
2075
4.01k
            break;
2076
0
        }
2077
263
        case SET: {
2078
263
            struct IterCtx {
2079
263
                std::stringstream* ss = nullptr;
2080
263
                bool first = true;
2081
263
            } iter_ctx;
2082
263
            iter_ctx.ss = &ss;
2083
2084
263
            std::vector<uint64_t> values(_set.begin(), _set.end());
2085
263
            std::sort(values.begin(), values.end());
2086
2087
4.11k
            for (auto v : values) {
2088
4.11k
                if (iter_ctx.first) {
2089
263
                    iter_ctx.first = false;
2090
3.85k
                } else {
2091
3.85k
                    (*iter_ctx.ss) << ",";
2092
3.85k
                }
2093
4.11k
                (*iter_ctx.ss) << v;
2094
4.11k
            }
2095
263
            break;
2096
0
        }
2097
4.33k
        }
2098
4.33k
        return ss.str();
2099
4.33k
    }
2100
2101
14
    int64_t maximum() const {
2102
14
        switch (_type) {
2103
2
        case SINGLE:
2104
2
            return _sv;
2105
5
        case BITMAP:
2106
5
            return _bitmap->maximum();
2107
5
        case SET:
2108
5
            return _max_in_set();
2109
2
        default:
2110
2
            return 0;
2111
14
        }
2112
14
    }
2113
2114
4
    uint64_t max(bool* empty) const {
2115
4
        return min_or_max(empty, [&]() { return maximum(); });
2116
4
    }
2117
2118
4
    uint64_t min(bool* empty) const {
2119
4
        return min_or_max(empty, [&]() { return minimum(); });
2120
4
    }
2121
2122
5
    uint64_t _min_in_set() const {
2123
5
        DCHECK_EQ(_type, SET);
2124
5
        return *std::min_element(_set.begin(), _set.end());
2125
5
    }
2126
2127
5
    uint64_t _max_in_set() const {
2128
5
        DCHECK_EQ(_type, SET);
2129
5
        return *std::max_element(_set.begin(), _set.end());
2130
5
    }
2131
2132
6
    bool empty() const { return _type == EMPTY; }
2133
2134
    /**
2135
     * Return new set with specified range (not include the range_end)
2136
     */
2137
    int64_t sub_range(const int64_t& range_start, const int64_t& range_end,
2138
7
                      BitmapValue* ret_bitmap) const {
2139
7
        switch (_type) {
2140
1
        case EMPTY:
2141
1
            return 0;
2142
2
        case SINGLE: {
2143
            //only single value, so _sv must in [range_start,range_end)
2144
2
            if (range_start <= _sv && _sv < range_end) {
2145
1
                ret_bitmap->add(_sv);
2146
1
                return 1;
2147
1
            } else {
2148
1
                return 0;
2149
1
            }
2150
2
        }
2151
2
        case BITMAP: {
2152
2
            int64_t count = 0;
2153
146
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2154
145
                if (*it < range_start) {
2155
0
                    continue;
2156
0
                }
2157
145
                if (*it < range_end) {
2158
144
                    ret_bitmap->add(*it);
2159
144
                    ++count;
2160
144
                } else {
2161
1
                    break;
2162
1
                }
2163
145
            }
2164
2
            return count;
2165
2
        }
2166
2
        case SET: {
2167
2
            int64_t count = 0;
2168
2
            std::vector<uint64_t> values(_set.begin(), _set.end());
2169
2
            std::sort(values.begin(), values.end());
2170
6
            for (auto it = values.begin(); it != values.end(); ++it) {
2171
4
                if (*it < range_start || *it >= range_end) {
2172
1
                    continue;
2173
1
                }
2174
3
                ret_bitmap->add(*it);
2175
3
                ++count;
2176
3
            }
2177
2
            return count;
2178
2
        }
2179
7
        }
2180
0
        return 0;
2181
7
    }
2182
2183
    /**
2184
     * Return new set with specified start and limit
2185
     * @param range_start the start value for the range
2186
     * @param cardinality_limit the length of the subset
2187
     * @return the real count for subset, maybe less than cardinality_limit
2188
     */
2189
    int64_t sub_limit(const int64_t& range_start, const int64_t& cardinality_limit,
2190
9
                      BitmapValue* ret_bitmap) const {
2191
9
        switch (_type) {
2192
1
        case EMPTY:
2193
1
            return 0;
2194
2
        case SINGLE: {
2195
            //only single value, so range_start must less than _sv
2196
2
            if (range_start > _sv) {
2197
0
                return 0;
2198
2
            } else {
2199
2
                ret_bitmap->add(_sv);
2200
2
                return 1;
2201
2
            }
2202
2
        }
2203
4
        case BITMAP: {
2204
4
            int64_t count = 0;
2205
31
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2206
30
                if (*it < range_start) {
2207
8
                    continue;
2208
8
                }
2209
22
                if (count < cardinality_limit) {
2210
19
                    ret_bitmap->add(*it);
2211
19
                    ++count;
2212
19
                } else {
2213
3
                    break;
2214
3
                }
2215
22
            }
2216
4
            return count;
2217
2
        }
2218
2
        case SET: {
2219
2
            int64_t count = 0;
2220
2221
2
            std::vector<uint64_t> values(_set.begin(), _set.end());
2222
2
            std::sort(values.begin(), values.end());
2223
6
            for (auto it = values.begin(); it != values.end(); ++it) {
2224
4
                if (*it < range_start) {
2225
0
                    continue;
2226
0
                }
2227
4
                if (count < cardinality_limit) {
2228
4
                    ret_bitmap->add(*it);
2229
4
                    ++count;
2230
4
                } else {
2231
0
                    break;
2232
0
                }
2233
4
            }
2234
2
            return count;
2235
2
        }
2236
9
        }
2237
0
        return 0;
2238
9
    }
2239
2240
    /**
2241
     * Returns the bitmap elements, starting from the offset position.
2242
     * The number of returned elements is limited by the cardinality_limit parameter.
2243
     * Analog of the substring string function, but for bitmap.
2244
     */
2245
    int64_t offset_limit(const int64_t& offset, const int64_t& limit,
2246
8
                         BitmapValue* ret_bitmap) const {
2247
8
        switch (_type) {
2248
1
        case EMPTY:
2249
1
            return 0;
2250
2
        case SINGLE: {
2251
            //only single value, so offset must start 0
2252
2
            if (offset == 0) {
2253
1
                ret_bitmap->add(_sv);
2254
1
                return 1;
2255
1
            } else {
2256
1
                return 0;
2257
1
            }
2258
2
        }
2259
5
        default:
2260
5
            break;
2261
8
        }
2262
5
        if (_type == BITMAP) {
2263
3
            if (std::abs(offset) >= _bitmap->cardinality()) {
2264
0
                return 0;
2265
0
            }
2266
3
            int64_t abs_offset = offset;
2267
3
            if (offset < 0) {
2268
0
                abs_offset = _bitmap->cardinality() + offset;
2269
0
            }
2270
2271
3
            int64_t count = 0;
2272
3
            int64_t offset_count = 0;
2273
3
            auto it = _bitmap->begin();
2274
123
            for (; it != _bitmap->end() && offset_count < abs_offset; ++it) {
2275
120
                ++offset_count;
2276
120
            }
2277
33
            for (; it != _bitmap->end() && count < limit; ++it, ++count) {
2278
30
                ret_bitmap->add(*it);
2279
30
            }
2280
3
            return count;
2281
3
        } else {
2282
2
            if (std::abs(offset) > _set.size()) {
2283
0
                return 0;
2284
0
            }
2285
2286
2
            int64_t abs_offset = offset;
2287
2
            if (offset < 0) {
2288
0
                abs_offset = _set.size() + offset;
2289
0
            }
2290
2291
2
            std::vector<uint64_t> values(_set.begin(), _set.end());
2292
2
            std::sort(values.begin(), values.end());
2293
2294
2
            int64_t count = 0;
2295
2
            size_t index = 0;
2296
4
            for (auto v : values) {
2297
4
                if (index < abs_offset) {
2298
1
                    ++index;
2299
1
                    continue;
2300
1
                }
2301
3
                if (count == limit || index == values.size()) {
2302
0
                    break;
2303
0
                }
2304
3
                ++count;
2305
3
                ++index;
2306
3
                ret_bitmap->add(v);
2307
3
            }
2308
2
            return count;
2309
2
        }
2310
5
    }
2311
2312
    //for function bitmap_to_array
2313
3
    void to_array(vectorized::PaddedPODArray<int64_t>& data) const {
2314
3
        switch (_type) {
2315
0
        case EMPTY:
2316
0
            break;
2317
1
        case SINGLE: {
2318
1
            data.emplace_back(_sv);
2319
1
            break;
2320
0
        }
2321
1
        case BITMAP: {
2322
129
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2323
128
                data.emplace_back(*it);
2324
128
            }
2325
1
            break;
2326
0
        }
2327
1
        case SET: {
2328
1
            std::vector<uint64_t> values(_set.begin(), _set.end());
2329
1
            std::sort(values.begin(), values.end());
2330
31
            for (auto v : values) {
2331
31
                data.emplace_back(v);
2332
31
            }
2333
1
            break;
2334
0
        }
2335
3
        }
2336
3
    }
2337
2338
157
    void reset() {
2339
157
        _type = EMPTY;
2340
157
        _sv = 0;
2341
157
        _set.clear();
2342
157
        _is_shared = false;
2343
157
        _bitmap = nullptr;
2344
157
    }
2345
    // Implement an iterator for convenience
2346
    friend class BitmapValueIterator;
2347
    typedef BitmapValueIterator b_iterator;
2348
2349
    b_iterator begin() const;
2350
    b_iterator end() const;
2351
    b_iterator lower_bound(uint64_t val) const;
2352
2353
private:
2354
44
    void _convert_to_smaller_type() {
2355
44
        if (_type == BITMAP) {
2356
16
            uint64_t c = _bitmap->cardinality();
2357
16
            if (config::enable_set_in_bitmap_value && c > SET_TYPE_THRESHOLD) {
2358
7
                return;
2359
9
            } else if (c > 1) {
2360
7
                return;
2361
7
            }
2362
2
            if (c == 0) {
2363
1
                _type = EMPTY;
2364
1
            } else if (c == 1 && !config::enable_set_in_bitmap_value) {
2365
1
                _type = SINGLE;
2366
1
                _sv = _bitmap->minimum();
2367
1
            } else {
2368
0
                _type = SET;
2369
0
                for (auto v : *_bitmap) {
2370
0
                    _set.insert(v);
2371
0
                }
2372
0
            }
2373
2
            _bitmap.reset();
2374
28
        } else if (_type == SET) {
2375
28
            if (_set.size() == 1 && !config::enable_set_in_bitmap_value) {
2376
0
                _type = SINGLE;
2377
0
                _sv = *_set.begin();
2378
0
                _set.clear();
2379
0
            }
2380
28
        }
2381
44
    }
2382
2383
8
    uint64_t min_or_max(bool* empty, std::function<uint64_t()> func) const {
2384
8
        bool is_empty = false;
2385
8
        uint64_t result = 0;
2386
8
        switch (_type) {
2387
0
        case SINGLE:
2388
0
            result = _sv;
2389
0
            break;
2390
2
        case BITMAP:
2391
6
        case SET:
2392
6
            result = func();
2393
6
            break;
2394
2
        default:
2395
2
            is_empty = true;
2396
8
        }
2397
8
        if (empty) {
2398
8
            *empty = is_empty;
2399
8
        }
2400
8
        return result;
2401
8
    }
2402
2403
4.13M
    void _prepare_bitmap_for_write() const {
2404
4.13M
        if (!_bitmap) {
2405
8.29k
            _bitmap = std::make_shared<detail::Roaring64Map>();
2406
8.29k
            _is_shared = false;
2407
8.29k
            return;
2408
8.29k
        }
2409
2410
4.12M
        if (!_is_shared) {
2411
            // the state is not shared, not need to check use count any more
2412
4.11M
            return;
2413
4.11M
        }
2414
2415
8.34k
        if (_bitmap.use_count() > 1) {
2416
1.04k
            auto new_one = std::make_shared<detail::Roaring64Map>();
2417
1.04k
            *new_one = *_bitmap;
2418
1.04k
            _bitmap = new_one;
2419
1.04k
        }
2420
8.34k
        _is_shared = false;
2421
8.34k
    }
2422
2423
97.7k
    void _convert_to_bitmap_if_need() {
2424
97.7k
        if (_type != SET || _set.size() <= SET_TYPE_THRESHOLD) {
2425
94.7k
            return;
2426
94.7k
        }
2427
2.98k
        _prepare_bitmap_for_write();
2428
100k
        for (auto v : _set) {
2429
100k
            _bitmap->add(v);
2430
100k
        }
2431
2.98k
        _type = BITMAP;
2432
2.98k
        _set.clear();
2433
2.98k
    }
2434
2435
    enum BitmapDataType {
2436
        EMPTY = 0,
2437
        SINGLE = 1, // single element
2438
        BITMAP = 2, // more than one elements
2439
        SET = 3     // elements count less or equal than 32
2440
    };
2441
    uint64_t _sv = 0; // store the single value when _type == SINGLE
2442
    // !FIXME: We should rethink the logic about _bitmap and _is_shared
2443
    mutable std::shared_ptr<detail::Roaring64Map> _bitmap; // used when _type == BITMAP
2444
    SetContainer<uint64_t> _set;
2445
    BitmapDataType _type {EMPTY};
2446
    // Indicate whether the state is shared among multi BitmapValue object
2447
    mutable bool _is_shared = true;
2448
    static constexpr uint64_t SET_TYPE_THRESHOLD = 32;
2449
};
2450
2451
// A simple implement of bitmap value iterator(Read only)
2452
// Usage:
2453
//  BitmapValueIterator iter = bitmap_value.begin();
2454
//  BitmapValueIterator end = bitmap_value.end();
2455
//  for (; iter != end(); ++iter) {
2456
//      uint64_t v = *iter;
2457
//      ... do something with "v" ...
2458
//  }
2459
class BitmapValueIterator {
2460
public:
2461
37
    BitmapValueIterator(const BitmapValue& bitmap, bool end = false) : _bitmap(bitmap), _end(end) {
2462
37
        switch (_bitmap._type) {
2463
8
        case BitmapValue::BitmapDataType::EMPTY:
2464
8
            _end = true;
2465
8
            break;
2466
8
        case BitmapValue::BitmapDataType::SINGLE:
2467
8
            _sv = _bitmap._sv;
2468
8
            break;
2469
13
        case BitmapValue::BitmapDataType::BITMAP:
2470
13
            _iter = new detail::Roaring64MapSetBitForwardIterator(*_bitmap._bitmap, _end);
2471
13
            break;
2472
8
        case BitmapValue::BitmapDataType::SET:
2473
8
            _set_iter = _end ? _bitmap._set.end() : _bitmap._set.begin();
2474
8
            break;
2475
0
        default:
2476
0
            CHECK(false) << _bitmap._type;
2477
37
        }
2478
37
    }
2479
2480
    BitmapValueIterator(const BitmapValueIterator& other)
2481
168
            : _bitmap(other._bitmap), _sv(other._sv), _end(other._end) {
2482
168
        _iter = other._iter ? new detail::Roaring64MapSetBitForwardIterator(*other._iter) : nullptr;
2483
168
        if (_bitmap._type == BitmapValue::BitmapDataType::SET) {
2484
33
            _set_iter = other._set_iter;
2485
33
        }
2486
168
    }
2487
2488
205
    ~BitmapValueIterator() {
2489
205
        if (_iter != nullptr) {
2490
143
            delete _iter;
2491
143
            _iter = nullptr;
2492
143
        }
2493
205
    }
2494
2495
646
    uint64_t operator*() const {
2496
646
        CHECK(!_end) << "should not get value of end iterator";
2497
646
        switch (_bitmap._type) {
2498
4
        case BitmapValue::BitmapDataType::SINGLE:
2499
4
            return _sv;
2500
517
        case BitmapValue::BitmapDataType::BITMAP:
2501
517
            return *(*_iter);
2502
125
        case BitmapValue::BitmapDataType::SET: {
2503
125
            return *_set_iter;
2504
0
        }
2505
0
        default:
2506
0
            CHECK(false) << _bitmap._type;
2507
646
        }
2508
0
        return 0;
2509
646
    }
2510
2511
486
    BitmapValueIterator& operator++() { // ++i, must returned inc. value
2512
486
        CHECK(!_end) << "should not forward when iterator ends";
2513
486
        switch (_bitmap._type) {
2514
3
        case BitmapValue::BitmapDataType::SINGLE:
2515
3
            _end = true;
2516
3
            break;
2517
389
        case BitmapValue::BitmapDataType::BITMAP:
2518
389
            ++(*_iter);
2519
389
            break;
2520
94
        case BitmapValue::BitmapDataType::SET:
2521
94
            ++_set_iter;
2522
94
            break;
2523
0
        default:
2524
0
            CHECK(false) << _bitmap._type;
2525
486
        }
2526
486
        return *this;
2527
486
    }
2528
2529
160
    BitmapValueIterator operator++(int) { // i++, must return orig. value
2530
160
        CHECK(!_end) << "should not forward when iterator ends";
2531
160
        BitmapValueIterator orig(*this);
2532
160
        switch (_bitmap._type) {
2533
1
        case BitmapValue::BitmapDataType::SINGLE:
2534
1
            _end = true;
2535
1
            break;
2536
128
        case BitmapValue::BitmapDataType::BITMAP:
2537
128
            ++(*_iter);
2538
128
            break;
2539
31
        case BitmapValue::BitmapDataType::SET:
2540
31
            ++_set_iter;
2541
31
            break;
2542
0
        default:
2543
0
            CHECK(false) << _bitmap._type;
2544
160
        }
2545
160
        return orig;
2546
160
    }
2547
2548
674
    bool operator==(const BitmapValueIterator& other) const {
2549
674
        if (_end && other._end) {
2550
13
            return true;
2551
13
        }
2552
2553
661
        switch (_bitmap._type) {
2554
0
        case BitmapValue::BitmapDataType::EMPTY:
2555
0
            return other._bitmap._type == BitmapValue::BitmapDataType::EMPTY;
2556
5
        case BitmapValue::BitmapDataType::SINGLE:
2557
5
            return _end == other._end && _sv == other._sv;
2558
524
        case BitmapValue::BitmapDataType::BITMAP:
2559
524
            return *_iter == *(other._iter);
2560
132
        case BitmapValue::BitmapDataType::SET:
2561
132
            return _set_iter == other._set_iter;
2562
0
        default:
2563
0
            CHECK(false) << _bitmap._type;
2564
661
        }
2565
0
        return false;
2566
661
    }
2567
2568
664
    bool operator!=(const BitmapValueIterator& other) const { return !(*this == other); }
2569
2570
    /**
2571
    * Move the iterator to the first value >= `val`.
2572
    */
2573
0
    BitmapValueIterator& move(uint64_t val) {
2574
0
        switch (_bitmap._type) {
2575
0
        case BitmapValue::BitmapDataType::SINGLE:
2576
0
            if (_sv < val) {
2577
0
                _end = true;
2578
0
            }
2579
0
            break;
2580
0
        case BitmapValue::BitmapDataType::BITMAP:
2581
0
            if (!_iter->move(val)) {
2582
0
                _end = true;
2583
0
            }
2584
0
            break;
2585
0
        case BitmapValue::BitmapDataType::SET: {
2586
0
            throw Exception(Status::FatalError("BitmapValue with set do not support move"));
2587
0
        }
2588
0
        default:
2589
0
            break;
2590
0
        }
2591
0
        return *this;
2592
0
    }
2593
2594
private:
2595
    const BitmapValue& _bitmap;
2596
    detail::Roaring64MapSetBitForwardIterator* _iter = nullptr;
2597
    BitmapValue::SetContainer<uint64_t>::const_iterator _set_iter;
2598
    uint64_t _sv = 0;
2599
    bool _end = false;
2600
};
2601
2602
16
inline BitmapValueIterator BitmapValue::begin() const {
2603
16
    return {*this};
2604
16
}
2605
2606
21
inline BitmapValueIterator BitmapValue::end() const {
2607
21
    return {*this, true};
2608
21
}
2609
2610
0
inline BitmapValueIterator BitmapValue::lower_bound(uint64_t val) const {
2611
0
    return BitmapValueIterator(*this).move(val);
2612
0
}
2613
2614
7
inline bool BitmapValue::contains_any(uint64_t left, uint64_t right) const {
2615
7
    if (left > right) {
2616
0
        return false;
2617
0
    }
2618
2619
7
    if (_type == SET) {
2620
25
        for (auto v : _set) {
2621
25
            if (v >= left && v <= right) {
2622
4
                return true;
2623
4
            }
2624
25
        }
2625
3
        return false;
2626
7
    }
2627
0
    auto it = lower_bound(left);
2628
0
    return it != end() && *it <= right;
2629
7
}
2630
2631
} // namespace doris