Coverage Report

Created: 2024-11-21 13:02

/root/doris/be/src/util/bitmap_value.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <parallel_hashmap/btree.h>
21
#include <parallel_hashmap/phmap.h>
22
23
#include <algorithm>
24
#include <cstdarg>
25
#include <cstdint>
26
#include <cstdio>
27
#include <limits>
28
#include <map>
29
#include <memory>
30
#include <new>
31
#include <numeric>
32
#include <roaring/roaring.hh>
33
#include <set>
34
#include <stdexcept>
35
#include <string>
36
#include <utility>
37
38
#include "common/config.h"
39
#include "common/logging.h"
40
#include "gutil/integral_types.h"
41
#include "udf/udf.h"
42
#include "util/coding.h"
43
#include "vec/common/pod_array.h"
44
#include "vec/common/pod_array_fwd.h"
45
namespace doris {
46
47
// serialized bitmap := TypeCode(1), Payload
48
// The format of payload depends on value of TypeCode which is defined below
49
struct BitmapTypeCode {
50
    enum type {
51
        // An empty bitmap. Payload is 0 byte.
52
        // added in 0.11
53
        EMPTY = 0,
54
        // A bitmap containing only one element that is in [0, UINT32_MAX]
55
        // Payload := UInt32LittleEndian(4 byte)
56
        // added in 0.11
57
        SINGLE32 = 1,
58
        // A bitmap whose maximum element is in [0, UINT32_MAX]
59
        // Payload := the standard RoaringBitmap format described by
60
        // https://github.com/RoaringBitmap/RoaringFormatSpec/
61
        // added in 0.11
62
        BITMAP32 = 2,
63
        // A bitmap containing only one element that is in (UINT32_MAX, UINT64_MAX]
64
        // Payload := UInt64LittleEndian(8 byte)
65
        // added in 0.12
66
        SINGLE64 = 3,
67
        // A bitmap whose maximum element is in (UINT32_MAX, UINT64_MAX].
68
        //
69
        // To support 64-bits elements, all elements with the same high 32 bits are stored in a
70
        // RoaringBitmap containing only the lower 32 bits. Thus we could use
71
        // map<uint32_t, RoaringBitmap> to represent bitmap of 64-bits ints.
72
        //
73
        // Since there is no standard format for 64-bits RoaringBitmap, we define our own as below
74
        // Payload := NumRoaring(vint64), { MapKey, MapValue }^NumRoaring
75
        // - MapKey := the shared high 32 bits in UInt32LittleEndian(4 byte)
76
        // - MapValue := the standard RoaringBitmap format
77
        //
78
        // added in 0.12
79
        BITMAP64 = 4,
80
        SET = 5, // V1
81
        SET_V2 = 10,
82
        BITMAP32_V2 = 12,
83
        BITMAP64_V2 = 13,
84
        TYPE_MAX
85
    };
86
1
    Status static inline validate(int bitmap_type) {
87
1
        if (UNLIKELY(bitmap_type < type::EMPTY || bitmap_type >= type::TYPE_MAX)) {
88
1
            std::string err_msg =
89
1
                    fmt::format("BitmapTypeCode invalid, should between: {} and {} actrual is {}",
90
1
                                BitmapTypeCode::EMPTY, BitmapTypeCode::BITMAP64, bitmap_type);
91
1
            LOG(ERROR) << err_msg;
92
1
            return Status::Corruption(err_msg);
93
1
        }
94
0
        return Status::OK();
95
1
    }
96
};
97
98
namespace detail {
99
100
class Roaring64MapSetBitForwardIterator;
101
102
// Forked from https://github.com/RoaringBitmap/CRoaring/blob/v0.2.60/cpp/roaring64map.hh
103
// What we change includes
104
// - a custom serialization format is used inside read()/write()/getSizeInBytes()
105
// - added clear() and is32BitsEnough()
106
class Roaring64Map {
107
public:
108
    /**
109
     * Create an empty bitmap
110
     */
111
11.4k
    Roaring64Map() = default;
112
113
    /**
114
     * Construct a bitmap from a list of 32-bit integer values.
115
     */
116
2
    Roaring64Map(size_t n, const uint32_t* data) { addMany(n, data); }
117
118
    /**
119
     * Construct a bitmap from a list of 64-bit integer values.
120
     */
121
1
    Roaring64Map(size_t n, const uint64_t* data) { addMany(n, data); }
122
123
    /**
124
     * Construct a 64-bit map from a 32-bit one
125
     */
126
1
    explicit Roaring64Map(const roaring::Roaring& r) { emplaceOrInsert(0, r); }
127
128
9
    Roaring64Map(const Roaring64Map& r) = default;
129
130
5.10k
    Roaring64Map(Roaring64Map&& r) = default;
131
132
    /**
133
     * Assignment operator.
134
     */
135
1.03k
    Roaring64Map& operator=(const Roaring64Map& r) {
136
1.03k
        roarings = r.roarings;
137
1.03k
        return *this;
138
1.03k
    }
139
140
    /**
141
     * Add value x
142
     *
143
     */
144
3
    void add(uint32_t x) {
145
3
        roarings[0].add(x);
146
3
        roarings[0].setCopyOnWrite(copyOnWrite);
147
3
    }
148
2.63M
    void add(uint64_t x) {
149
2.63M
        roarings[highBytes(x)].add(lowBytes(x));
150
2.63M
        roarings[highBytes(x)].setCopyOnWrite(copyOnWrite);
151
2.63M
    }
152
153
    template <typename T>
154
11
    void addMany(size_t n_args, const T* vals) {
155
11
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
156
1
            auto& roaring = roarings[0];
157
1
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
158
1
            roaring.setCopyOnWrite(copyOnWrite);
159
1
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
160
0
            auto& roaring = roarings[0];
161
0
            std::vector<uint32_t> values(n_args);
162
8
            for (size_t i = 0; i != n_args; ++i) {
163
7
                values[i] = uint32_t(vals[i]);
164
7
            }
165
1
            roaring.addMany(n_args, values.data());
166
1
            roaring.setCopyOnWrite(copyOnWrite);
167
1
        } else {
168
0
            for (size_t lcv = 0; lcv < n_args; lcv++) {
169
0
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
170
0
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
171
0
            }
172
0
        }
173
11
    }
_ZN5doris6detail12Roaring64Map7addManyIjEEvmPKT_
Line
Count
Source
154
8
    void addMany(size_t n_args, const T* vals) {
155
8
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
156
8
            auto& roaring = roarings[0];
157
8
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
158
8
            roaring.setCopyOnWrite(copyOnWrite);
159
8
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
160
8
            auto& roaring = roarings[0];
161
8
            std::vector<uint32_t> values(n_args);
162
8
            for (size_t i = 0; i != n_args; ++i) {
163
8
                values[i] = uint32_t(vals[i]);
164
8
            }
165
8
            roaring.addMany(n_args, values.data());
166
8
            roaring.setCopyOnWrite(copyOnWrite);
167
8
        } else {
168
8
            for (size_t lcv = 0; lcv < n_args; lcv++) {
169
8
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
170
8
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
171
8
            }
172
8
        }
173
8
    }
_ZN5doris6detail12Roaring64Map7addManyIiEEvmPKT_
Line
Count
Source
154
2
    void addMany(size_t n_args, const T* vals) {
155
2
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
156
2
            auto& roaring = roarings[0];
157
2
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
158
2
            roaring.setCopyOnWrite(copyOnWrite);
159
2
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
160
2
            auto& roaring = roarings[0];
161
2
            std::vector<uint32_t> values(n_args);
162
2
            for (size_t i = 0; i != n_args; ++i) {
163
2
                values[i] = uint32_t(vals[i]);
164
2
            }
165
2
            roaring.addMany(n_args, values.data());
166
2
            roaring.setCopyOnWrite(copyOnWrite);
167
2
        } else {
168
2
            for (size_t lcv = 0; lcv < n_args; lcv++) {
169
2
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
170
2
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
171
2
            }
172
2
        }
173
2
    }
_ZN5doris6detail12Roaring64Map7addManyItEEvmPKT_
Line
Count
Source
154
1
    void addMany(size_t n_args, const T* vals) {
155
1
        if constexpr (sizeof(T) == sizeof(uint32_t)) {
156
1
            auto& roaring = roarings[0];
157
1
            roaring.addMany(n_args, reinterpret_cast<const uint32_t*>(vals));
158
1
            roaring.setCopyOnWrite(copyOnWrite);
159
1
        } else if constexpr (sizeof(T) < sizeof(uint32_t)) {
160
1
            auto& roaring = roarings[0];
161
1
            std::vector<uint32_t> values(n_args);
162
8
            for (size_t i = 0; i != n_args; ++i) {
163
7
                values[i] = uint32_t(vals[i]);
164
7
            }
165
1
            roaring.addMany(n_args, values.data());
166
1
            roaring.setCopyOnWrite(copyOnWrite);
167
1
        } else {
168
1
            for (size_t lcv = 0; lcv < n_args; lcv++) {
169
1
                roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
170
1
                roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
171
1
            }
172
1
        }
173
1
    }
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIhEEvmPKT_
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIaEEvmPKT_
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIsEEvmPKT_
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyIlEEvmPKT_
Unexecuted instantiation: _ZN5doris6detail12Roaring64Map7addManyInEEvmPKT_
174
175
40
    void addMany(size_t n_args, const uint64_t* vals) {
176
1.25k
        for (size_t lcv = 0; lcv < n_args; lcv++) {
177
1.21k
            roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
178
1.21k
            roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
179
1.21k
        }
180
40
    }
181
182
    /**
183
     * Remove value x
184
     *
185
     */
186
4
    void remove(uint32_t x) { roarings[0].remove(x); }
187
23
    void remove(uint64_t x) {
188
23
        auto roaring_iter = roarings.find(highBytes(x));
189
23
        if (roaring_iter != roarings.cend()) {
190
23
            roaring_iter->second.remove(lowBytes(x));
191
23
        }
192
23
    }
193
    /**
194
     * Return the largest value (if not empty)
195
     *
196
     */
197
22.5k
    uint64_t maximum() const {
198
22.5k
        for (auto roaring_iter = roarings.crbegin(); roaring_iter != roarings.crend();
199
22.5k
             ++roaring_iter) {
200
22.5k
            if (!roaring_iter->second.isEmpty()) {
201
22.5k
                return uniteBytes(roaring_iter->first, roaring_iter->second.maximum());
202
22.5k
            }
203
22.5k
        }
204
        // we put std::numeric_limits<>::max/min in parenthesis
205
        // to avoid a clash with the Windows.h header under Windows
206
5
        return (std::numeric_limits<uint64_t>::min)();
207
22.5k
    }
208
209
    /**
210
     * Return the smallest value (if not empty)
211
     *
212
     */
213
7
    uint64_t minimum() const {
214
7
        for (auto roaring_iter = roarings.cbegin(); roaring_iter != roarings.cend();
215
7
             ++roaring_iter) {
216
6
            if (!roaring_iter->second.isEmpty()) {
217
6
                return uniteBytes(roaring_iter->first, roaring_iter->second.minimum());
218
6
            }
219
6
        }
220
        // we put std::numeric_limits<>::max/min in parenthesis
221
        // to avoid a clash with the Windows.h header under Windows
222
1
        return (std::numeric_limits<uint64_t>::max)();
223
7
    }
224
225
    /**
226
     * Check if value x is present
227
     */
228
11
    bool contains(uint32_t x) const {
229
11
        return roarings.count(0) == 0 ? false : roarings.at(0).contains(x);
230
11
    }
231
9.31k
    bool contains(uint64_t x) const {
232
9.31k
        return roarings.count(highBytes(x)) == 0 ? false
233
9.31k
                                                 : roarings.at(highBytes(x)).contains(lowBytes(x));
234
9.31k
    }
235
236
    /**
237
     * Compute the intersection between the current bitmap and the provided
238
     * bitmap,
239
     * writing the result in the current bitmap. The provided bitmap is not
240
     * modified.
241
     */
242
5
    Roaring64Map& operator&=(const Roaring64Map& r) {
243
10
        for (auto& map_entry : roarings) {
244
10
            if (r.roarings.count(map_entry.first) == 1) {
245
9
                map_entry.second &= r.roarings.at(map_entry.first);
246
9
            } else {
247
1
                map_entry.second = roaring::Roaring();
248
1
            }
249
10
        }
250
5
        return *this;
251
5
    }
252
253
    /**
254
     * Compute the difference between the current bitmap and the provided
255
     * bitmap,
256
     * writing the result in the current bitmap. The provided bitmap is not
257
     * modified.
258
     */
259
1
    Roaring64Map& operator-=(const Roaring64Map& r) {
260
4
        for (auto& map_entry : roarings) {
261
4
            if (r.roarings.count(map_entry.first) == 1) {
262
4
                map_entry.second -= r.roarings.at(map_entry.first);
263
4
            }
264
4
        }
265
1
        return *this;
266
1
    }
267
268
    /**
269
     * Compute the union between the current bitmap and the provided bitmap,
270
     * writing the result in the current bitmap. The provided bitmap is not
271
     * modified.
272
     *
273
     * See also the fastunion function to aggregate many bitmaps more quickly.
274
     */
275
14
    Roaring64Map& operator|=(const Roaring64Map& r) {
276
24
        for (const auto& map_entry : r.roarings) {
277
24
            if (roarings.count(map_entry.first) == 0) {
278
11
                roarings[map_entry.first] = map_entry.second;
279
11
                roarings[map_entry.first].setCopyOnWrite(copyOnWrite);
280
13
            } else {
281
13
                roarings[map_entry.first] |= map_entry.second;
282
13
            }
283
24
        }
284
14
        return *this;
285
14
    }
286
287
    /**
288
     * Compute the symmetric union between the current bitmap and the provided
289
     * bitmap,
290
     * writing the result in the current bitmap. The provided bitmap is not
291
     * modified.
292
     */
293
2
    Roaring64Map& operator^=(const Roaring64Map& r) {
294
5
        for (const auto& map_entry : r.roarings) {
295
5
            if (roarings.count(map_entry.first) == 0) {
296
0
                roarings[map_entry.first] = map_entry.second;
297
0
                roarings[map_entry.first].setCopyOnWrite(copyOnWrite);
298
5
            } else {
299
5
                roarings[map_entry.first] ^= map_entry.second;
300
5
            }
301
5
        }
302
2
        return *this;
303
2
    }
304
305
    /**
306
     * Exchange the content of this bitmap with another.
307
     */
308
1
    void swap(Roaring64Map& r) { roarings.swap(r.roarings); }
309
310
    /**
311
     * Get the cardinality of the bitmap (number of elements).
312
     * Throws std::length_error in the special case where the bitmap is full
313
     * (cardinality() == 2^64). Check isFull() before calling to avoid
314
     * exception.
315
     */
316
235
    uint64_t cardinality() const {
317
235
        if (isFull()) {
318
0
            throw std::length_error(
319
0
                    "bitmap is full, cardinality is 2^64, "
320
0
                    "unable to represent in a 64-bit integer");
321
0
        }
322
235
        return std::accumulate(roarings.cbegin(), roarings.cend(), (uint64_t)0,
323
235
                               [](uint64_t previous,
324
407
                                  const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
325
407
                                   return previous + map_entry.second.cardinality();
326
407
                               });
327
235
    }
328
    /**
329
     * Computes the size of the intersection between two bitmaps.
330
     *
331
     */
332
6
    uint64_t andCardinality(const Roaring64Map& r) const {
333
6
        uint64_t card = 0;
334
10
        for (const auto& map_entry : roarings) {
335
10
            if (r.roarings.count(map_entry.first) == 1) {
336
9
                card += map_entry.second.and_cardinality(r.roarings.at(map_entry.first));
337
9
            }
338
10
        }
339
6
        return card;
340
6
    }
341
342
    /**
343
     * Computes the size of the union between two bitmaps.
344
     *
345
     */
346
4
    uint64_t orCardinality(const Roaring64Map& r) const {
347
4
        uint64_t card = 0;
348
4
        for (const auto& map_entry : roarings) {
349
4
            if (r.roarings.count(map_entry.first) == 0) {
350
0
                card += map_entry.second.cardinality();
351
0
            }
352
4
        }
353
4
        for (const auto& map_entry : r.roarings) {
354
4
            if (roarings.count(map_entry.first) == 0) {
355
0
                card += map_entry.second.cardinality();
356
4
            } else {
357
4
                card += roarings.at(map_entry.first).or_cardinality(map_entry.second);
358
4
            }
359
4
        }
360
4
        return card;
361
4
    }
362
363
    /**
364
     * Computes the size of the difference (andnot) between two bitmaps.
365
     * r1.cardinality - (r1 & r2).cardinality
366
     */
367
1
    uint64_t andnotCardinality(const Roaring64Map& r) const {
368
1
        uint64_t card = 0;
369
4
        for (const auto& map_entry : roarings) {
370
4
            card += map_entry.second.cardinality();
371
4
            if (r.roarings.count(map_entry.first) == 1) {
372
4
                card -= r.roarings.at(map_entry.first).and_cardinality(map_entry.second);
373
4
            }
374
4
        }
375
1
        return card;
376
1
    }
377
378
    /**
379
     * Computes the size of the symmetric difference (andnot) between two
380
     * bitmaps.
381
     * r1.cardinality + r2.cardinality - 2 * (r1 & r2).cardinality
382
     */
383
1
    uint64_t xorCardinality(const Roaring64Map& r) const {
384
1
        uint64_t card = 0;
385
1
        for (const auto& map_entry : roarings) {
386
1
            card += map_entry.second.cardinality();
387
1
        }
388
1
        for (const auto& map_entry : r.roarings) {
389
1
            card += map_entry.second.cardinality();
390
1
            if (roarings.count(map_entry.first) == 1) {
391
1
                card -= 2 * roarings.at(map_entry.first).and_cardinality(map_entry.second);
392
1
            }
393
1
        }
394
1
        return card;
395
1
    }
396
397
    /**
398
    * Returns true if the bitmap is empty (cardinality is zero).
399
    */
400
1
    bool isEmpty() const {
401
1
        return std::all_of(roarings.cbegin(), roarings.cend(),
402
1
                           [](const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
403
0
                               return map_entry.second.isEmpty();
404
0
                           });
405
1
    }
406
407
    /**
408
    * Returns true if the bitmap is full (cardinality is max uint64_t + 1).
409
    */
410
235
    bool isFull() const {
411
        // only bother to check if map is fully saturated
412
        //
413
        // we put std::numeric_limits<>::max/min in parenthesis
414
        // to avoid a clash with the Windows.h header under Windows
415
235
        return roarings.size() == ((uint64_t)(std::numeric_limits<uint32_t>::max)()) + 1
416
235
                       ? std::all_of(roarings.cbegin(), roarings.cend(),
417
0
                                     [](const std::pair<const uint32_t, roaring::Roaring>&
418
0
                                                roaring_map_entry) {
419
                                         // roarings within map are saturated if cardinality
420
                                         // is uint32_t max + 1
421
0
                                         return roaring_map_entry.second.cardinality() ==
422
0
                                                ((uint64_t)(std::numeric_limits<uint32_t>::max)()) +
423
0
                                                        1;
424
0
                                     })
425
235
                       : false;
426
235
    }
427
428
    /**
429
     * Return true if the two bitmaps contain the same elements.
430
     */
431
14
    bool operator==(const Roaring64Map& r) const {
432
        // we cannot use operator == on the map because either side may contain
433
        // empty Roaring Bitmaps
434
14
        auto lhs_iter = roarings.cbegin();
435
14
        auto lhs_cend = roarings.cend();
436
14
        auto rhs_iter = r.roarings.cbegin();
437
14
        auto rhs_cend = r.roarings.cend();
438
27
        while (lhs_iter != lhs_cend && rhs_iter != rhs_cend) {
439
13
            auto lhs_key = lhs_iter->first;
440
13
            auto rhs_key = rhs_iter->first;
441
13
            const auto& lhs_map = lhs_iter->second;
442
13
            const auto& rhs_map = rhs_iter->second;
443
13
            if (lhs_map.isEmpty()) {
444
1
                ++lhs_iter;
445
1
                continue;
446
1
            }
447
12
            if (rhs_map.isEmpty()) {
448
2
                ++rhs_iter;
449
2
                continue;
450
2
            }
451
10
            if (!(lhs_key == rhs_key)) {
452
0
                return false;
453
0
            }
454
10
            if (!(lhs_map == rhs_map)) {
455
0
                return false;
456
0
            }
457
10
            ++lhs_iter;
458
10
            ++rhs_iter;
459
10
        }
460
16
        while (lhs_iter != lhs_cend) {
461
4
            if (!lhs_iter->second.isEmpty()) {
462
2
                return false;
463
2
            }
464
2
            ++lhs_iter;
465
2
        }
466
14
        while (rhs_iter != rhs_cend) {
467
3
            if (!rhs_iter->second.isEmpty()) {
468
1
                return false;
469
1
            }
470
2
            ++rhs_iter;
471
2
        }
472
11
        return true;
473
12
    }
474
475
    /** convert array and bitmap containers to run containers when it is more
476
     * efficient;
477
     * also convert from run containers when more space efficient.  Returns
478
     * true if the result has at least one run container.
479
     * Additional savings might be possible by calling shrinkToFit().
480
     */
481
13.3k
    bool runOptimize() {
482
13.3k
        return std::accumulate(
483
13.3k
                roarings.begin(), roarings.end(), true,
484
13.3k
                [](bool previous, std::pair<const uint32_t, roaring::Roaring>& map_entry) {
485
13.3k
                    return map_entry.second.runOptimize() && previous;
486
13.3k
                });
487
13.3k
    }
488
489
    /**
490
     * If needed, reallocate memory to shrink the memory usage. Returns
491
     * the number of bytes saved.
492
    */
493
13.3k
    size_t shrinkToFit() {
494
13.3k
        size_t savedBytes = 0;
495
13.3k
        auto iter = roarings.begin();
496
26.6k
        while (iter != roarings.cend()) {
497
13.3k
            if (iter->second.isEmpty()) {
498
                // empty Roarings are 84 bytes
499
1
                savedBytes += 88;
500
1
                iter = roarings.erase(iter);
501
13.3k
            } else {
502
13.3k
                savedBytes += iter->second.shrinkToFit();
503
13.3k
                iter++;
504
13.3k
            }
505
13.3k
        }
506
13.3k
        return savedBytes;
507
13.3k
    }
508
509
    /**
510
     * Iterate over the bitmap elements. The function iterator is called once
511
     * for all the values with ptr (can be nullptr) as the second parameter of each
512
     * call.
513
     *
514
     * roaring_iterator is simply a pointer to a function that returns bool
515
     * (true means that the iteration should continue while false means that it
516
     * should stop), and takes (uint32_t,void*) as inputs.
517
     */
518
706
    void iterate(roaring::api::roaring_iterator64 iterator, void* ptr) const {
519
736
        for (const auto& map_entry : roarings) {
520
736
            bool should_continue = roaring_iterate64(&map_entry.second.roaring, iterator,
521
736
                                                     uint64_t(map_entry.first) << 32, ptr);
522
736
            if (!should_continue) {
523
1
                break;
524
1
            }
525
736
        }
526
706
    }
527
528
    /**
529
     * write a bitmap to a char buffer.
530
     * Returns how many bytes were written which should be getSizeInBytes().
531
     */
532
9.22k
    size_t write(char* buf, int serialize_version) const {
533
9.22k
        bool is_v1 = serialize_version == 1;
534
9.22k
        BitmapTypeCode::type type_bitmap32 =
535
9.22k
                is_v1 ? BitmapTypeCode::type::BITMAP32 : BitmapTypeCode::type::BITMAP32_V2;
536
9.22k
        BitmapTypeCode::type type_bitmap64 =
537
9.22k
                is_v1 ? BitmapTypeCode::type::BITMAP64 : BitmapTypeCode::type::BITMAP64_V2;
538
539
9.22k
        if (is32BitsEnough()) {
540
9.21k
            *(buf++) = type_bitmap32;
541
9.21k
            auto it = roarings.find(0);
542
9.21k
            if (it == roarings.end()) { // empty bitmap
543
2
                roaring::Roaring r;
544
2
                return r.write(buf, is_v1) + 1;
545
2
            }
546
9.21k
            return it->second.write(buf, is_v1) + 1;
547
9.21k
        }
548
549
7
        const char* orig = buf;
550
        // put type code
551
7
        *(buf++) = type_bitmap64;
552
        // push map size
553
7
        buf = (char*)encode_varint64((uint8_t*)buf, roarings.size());
554
7
        std::for_each(roarings.cbegin(), roarings.cend(),
555
16
                      [&buf, is_v1](const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
556
                          // push map key
557
16
                          encode_fixed32_le((uint8_t*)buf, map_entry.first);
558
16
                          buf += sizeof(uint32_t);
559
                          // push map value Roaring
560
16
                          buf += map_entry.second.write(buf, is_v1);
561
16
                      });
562
7
        return buf - orig;
563
9.22k
    }
564
565
    /**
566
     * read a bitmap from a serialized version.
567
     *
568
     * This function is unsafe in the sense that if you provide bad data,
569
     * many bytes could be read, possibly causing a buffer overflow. See also readSafe.
570
     */
571
5.11k
    static Roaring64Map read(const char* buf) {
572
5.11k
        Roaring64Map result;
573
574
5.11k
        bool is_v1 = BitmapTypeCode::BITMAP32 == *buf || BitmapTypeCode::BITMAP64 == *buf;
575
5.11k
        bool is_bitmap32 = BitmapTypeCode::BITMAP32 == *buf || BitmapTypeCode::BITMAP32_V2 == *buf;
576
5.11k
        bool is_bitmap64 = BitmapTypeCode::BITMAP64 == *buf || BitmapTypeCode::BITMAP64_V2 == *buf;
577
5.11k
        if (is_bitmap32) {
578
5.10k
            roaring::Roaring read = roaring::Roaring::read(buf + 1, is_v1);
579
5.10k
            result.emplaceOrInsert(0, std::move(read));
580
5.10k
            return result;
581
5.10k
        }
582
583
8
        DCHECK(is_bitmap64);
584
8
        buf++;
585
586
        // get map size (varint64 took 1~10 bytes)
587
8
        uint64_t map_size;
588
8
        buf = reinterpret_cast<const char*>(
589
8
                decode_varint64_ptr(reinterpret_cast<const uint8_t*>(buf),
590
8
                                    reinterpret_cast<const uint8_t*>(buf + 10), &map_size));
591
8
        DCHECK(buf != nullptr);
592
26
        for (uint64_t lcv = 0; lcv < map_size; lcv++) {
593
            // get map key
594
18
            uint32_t key = decode_fixed32_le(reinterpret_cast<const uint8_t*>(buf));
595
18
            buf += sizeof(uint32_t);
596
            // read map value Roaring
597
18
            roaring::Roaring read_var = roaring::Roaring::read(buf, is_v1);
598
            // forward buffer past the last Roaring Bitmap
599
18
            buf += read_var.getSizeInBytes(is_v1);
600
18
            result.emplaceOrInsert(key, std::move(read_var));
601
18
        }
602
8
        return result;
603
5.11k
    }
604
605
    /**
606
     * How many bytes are required to serialize this bitmap
607
     */
608
13.3k
    size_t getSizeInBytes(int serialize_version) const {
609
13.3k
        bool is_v1 = serialize_version == 1;
610
13.3k
        if (is32BitsEnough()) {
611
13.3k
            auto it = roarings.find(0);
612
13.3k
            if (it == roarings.end()) { // empty bitmap
613
2
                roaring::Roaring r;
614
2
                return r.getSizeInBytes(is_v1) + 1;
615
2
            }
616
13.3k
            return it->second.getSizeInBytes(is_v1) + 1;
617
13.3k
        }
618
        // start with type code, map size and size of keys for each map entry
619
13
        size_t init = 1 + varint_length(roarings.size()) + roarings.size() * sizeof(uint32_t);
620
13
        return std::accumulate(
621
13
                roarings.cbegin(), roarings.cend(), init,
622
28
                [=](size_t previous, const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
623
                    // add in bytes used by each Roaring
624
28
                    return previous + map_entry.second.getSizeInBytes(is_v1);
625
28
                });
626
13.3k
    }
627
628
    /**
629
     * remove all elements
630
     */
631
0
    void clear() { roarings.clear(); }
632
633
    /**
634
     * Return whether all elements can be represented in 32 bits
635
     */
636
22.5k
    bool is32BitsEnough() const { return maximum() <= std::numeric_limits<uint32_t>::max(); }
637
638
    /**
639
     * Computes the intersection between two bitmaps and returns new bitmap.
640
     * The current bitmap and the provided bitmap are unchanged.
641
     */
642
2
    Roaring64Map operator&(const Roaring64Map& o) const { return Roaring64Map(*this) &= o; }
643
644
    /**
645
     * Computes the difference between two bitmaps and returns new bitmap.
646
     * The current bitmap and the provided bitmap are unchanged.
647
     */
648
0
    Roaring64Map operator-(const Roaring64Map& o) const { return Roaring64Map(*this) -= o; }
649
650
    /**
651
     * Computes the union between two bitmaps and returns new bitmap.
652
     * The current bitmap and the provided bitmap are unchanged.
653
     */
654
1
    Roaring64Map operator|(const Roaring64Map& o) const { return Roaring64Map(*this) |= o; }
655
656
    /**
657
     * Computes the symmetric union between two bitmaps and returns new bitmap.
658
     * The current bitmap and the provided bitmap are unchanged.
659
     */
660
1
    Roaring64Map operator^(const Roaring64Map& o) const { return Roaring64Map(*this) ^= o; }
661
662
    /**
663
     * computes the logical or (union) between "n" bitmaps (referenced by a
664
     * pointer).
665
     */
666
6
    static Roaring64Map fastunion(size_t n, const Roaring64Map** inputs) {
667
6
        Roaring64Map ans;
668
        // not particularly fast
669
14
        for (size_t lcv = 0; lcv < n; ++lcv) {
670
8
            ans |= *(inputs[lcv]);
671
8
        }
672
6
        return ans;
673
6
    }
674
675
    friend class Roaring64MapSetBitForwardIterator;
676
    using const_iterator = Roaring64MapSetBitForwardIterator;
677
678
    /**
679
    * Returns an iterator that can be used to access the position of the
680
    * set bits. The running time complexity of a full scan is proportional to
681
    * the
682
    * number
683
    * of set bits: be aware that if you have long strings of 1s, this can be
684
    * very inefficient.
685
    *
686
    * It can be much faster to use the toArray method if you want to
687
    * retrieve the set bits.
688
    */
689
    const_iterator begin() const;
690
691
    /**
692
    * A bogus iterator that can be used together with begin()
693
    * for constructions such as for(auto i = b.begin();
694
    * i!=b.end(); ++i) {}
695
    */
696
    const_iterator end() const;
697
698
private:
699
    phmap::btree_map<uint32_t, roaring::Roaring> roarings {};
700
    bool copyOnWrite {false};
701
5.28M
    static uint32_t highBytes(const uint64_t in) { return uint32_t(in >> 32); }
702
2.64M
    static uint32_t lowBytes(const uint64_t in) { return uint32_t(in); }
703
25.5k
    static uint64_t uniteBytes(const uint32_t highBytes, const uint32_t lowBytes) {
704
25.5k
        return (uint64_t(highBytes) << 32) | uint64_t(lowBytes);
705
25.5k
    }
706
1
    void emplaceOrInsert(const uint32_t key, const roaring::Roaring& value) {
707
1
        roarings.emplace(std::make_pair(key, value));
708
1
    }
709
710
5.12k
    void emplaceOrInsert(const uint32_t key, roaring::Roaring&& value) {
711
5.12k
        roarings.emplace(key, value);
712
5.12k
    }
713
};
714
715
// Forked from https://github.com/RoaringBitmap/CRoaring/blob/v0.4.0/cpp/roaring64map.hh
716
// Used to go through the set bits. Not optimally fast, but convenient.
717
class Roaring64MapSetBitForwardIterator {
718
public:
719
    using type_of_iterator = Roaring64MapSetBitForwardIterator;
720
721
    /**
722
     * Provides the location of the set bit.
723
     */
724
2.96k
    uint64_t operator*() const {
725
2.96k
        return Roaring64Map::uniteBytes(map_iter->first, i.current_value);
726
2.96k
    }
727
728
975
    type_of_iterator& operator++() { // ++i, must returned inc. value
729
975
        if (i.has_value) {
730
975
            roaring_advance_uint32_iterator(&i);
731
975
        }
732
1.00k
        while (!i.has_value) {
733
34
            map_iter++;
734
34
            if (map_iter == map_end) return *this;
735
25
            roaring_init_iterator(&map_iter->second.roaring, &i);
736
25
        }
737
966
        return *this;
738
975
    }
739
740
1.80k
    type_of_iterator operator++(int) { // i++, must return orig. value
741
1.80k
        Roaring64MapSetBitForwardIterator orig(*this);
742
1.80k
        roaring_advance_uint32_iterator(&i);
743
1.81k
        while (!i.has_value) {
744
4
            map_iter++;
745
4
            if (map_iter == map_end) return orig;
746
2
            roaring_init_iterator(&map_iter->second.roaring, &i);
747
2
        }
748
1.80k
        return orig;
749
1.80k
    }
750
751
12
    bool move(const uint64_t& x) {
752
12
        map_iter = p.lower_bound(Roaring64Map::highBytes(x));
753
12
        if (map_iter != p.cend()) {
754
12
            roaring_init_iterator(&map_iter->second.roaring, &i);
755
12
            if (map_iter->first == Roaring64Map::highBytes(x)) {
756
12
                if (roaring_move_uint32_iterator_equalorlarger(&i, Roaring64Map::lowBytes(x)))
757
11
                    return true;
758
1
                map_iter++;
759
1
                if (map_iter == map_end) return false;
760
0
                roaring_init_iterator(&map_iter->second.roaring, &i);
761
0
            }
762
0
            return true;
763
12
        }
764
0
        return false;
765
12
    }
766
767
534
    bool operator==(const Roaring64MapSetBitForwardIterator& o) const {
768
534
        if (map_iter == map_end && o.map_iter == o.map_end) return true;
769
526
        if (o.map_iter == o.map_end) return false;
770
1
        return **this == *o;
771
526
    }
772
773
2.30k
    bool operator!=(const Roaring64MapSetBitForwardIterator& o) const {
774
2.30k
        if (map_iter == map_end && o.map_iter == o.map_end) return false;
775
2.29k
        if (o.map_iter == o.map_end) return true;
776
0
        return **this != *o;
777
2.29k
    }
778
5
    Roaring64MapSetBitForwardIterator& operator=(const Roaring64MapSetBitForwardIterator& r) {
779
5
        map_iter = r.map_iter;
780
5
        map_end = r.map_end;
781
5
        i = r.i;
782
5
        return *this;
783
5
    }
784
785
    Roaring64MapSetBitForwardIterator(const Roaring64MapSetBitForwardIterator& r) = default;
786
787
    Roaring64MapSetBitForwardIterator(const Roaring64Map& parent, bool exhausted = false)
788
2.31k
            : p(parent.roarings), map_end(parent.roarings.cend()) {
789
2.31k
        if (exhausted || parent.roarings.empty()) {
790
2.28k
            map_iter = parent.roarings.cend();
791
2.28k
        } else {
792
28
            map_iter = parent.roarings.cbegin();
793
28
            roaring_init_iterator(&map_iter->second.roaring, &i);
794
28
            while (!i.has_value) {
795
0
                map_iter++;
796
0
                if (map_iter == map_end) return;
797
0
                roaring_init_iterator(&map_iter->second.roaring, &i);
798
0
            }
799
28
        }
800
2.31k
    }
801
802
protected:
803
    const phmap::btree_map<uint32_t, roaring::Roaring>& p;
804
    phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator map_iter {};
805
    phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator map_end {};
806
    roaring::api::roaring_uint32_iterator_t i {};
807
};
808
809
18
inline Roaring64MapSetBitForwardIterator Roaring64Map::begin() const {
810
18
    return {*this};
811
18
}
812
813
2.26k
inline Roaring64MapSetBitForwardIterator Roaring64Map::end() const {
814
2.26k
    return {*this, true};
815
2.26k
}
816
817
} // namespace detail
818
819
// Represent the in-memory and on-disk structure of Doris's BITMAP data type.
820
// Optimize for the case where the bitmap contains 0 or 1 element which is common
821
// for streaming load scenario.
822
class BitmapValueIterator;
823
class BitmapValue {
824
public:
825
    template <typename T>
826
    using SetContainer = phmap::flat_hash_set<T>;
827
828
    // Construct an empty bitmap.
829
87.5k
    BitmapValue() : _sv(0), _bitmap(nullptr), _type(EMPTY), _is_shared(false) { _set.clear(); }
830
831
    // Construct a bitmap with one element.
832
    explicit BitmapValue(uint64_t value)
833
43
            : _sv(value), _bitmap(nullptr), _type(SINGLE), _is_shared(false) {}
834
835
    // Construct a bitmap from serialized data.
836
3.07k
    explicit BitmapValue(const char* src) : _is_shared(false) {
837
3.07k
        bool res = deserialize(src);
838
3.07k
        DCHECK(res);
839
3.07k
    }
840
841
9.65k
    BitmapValue(const BitmapValue& other) {
842
9.65k
        _type = other._type;
843
9.65k
        switch (other._type) {
844
36
        case EMPTY:
845
36
            break;
846
96
        case SINGLE:
847
96
            _sv = other._sv;
848
96
            break;
849
9.33k
        case BITMAP:
850
9.33k
            _bitmap = other._bitmap;
851
9.33k
            break;
852
190
        case SET:
853
190
            _set = other._set;
854
190
            break;
855
9.65k
        }
856
857
9.65k
        if (other._type == BITMAP) {
858
9.33k
            _is_shared = true;
859
            // should also set other's state to shared, so that other bitmap value will
860
            // create a new bitmap when it wants to modify it.
861
9.33k
            const_cast<BitmapValue&>(other)._is_shared = true;
862
9.33k
        }
863
9.65k
    }
864
865
11.5k
    BitmapValue(BitmapValue&& other) noexcept {
866
11.5k
        _type = other._type;
867
11.5k
        switch (other._type) {
868
19
        case EMPTY:
869
19
            break;
870
164
        case SINGLE:
871
164
            _sv = other._sv;
872
164
            break;
873
10.9k
        case BITMAP:
874
10.9k
            _bitmap = std::move(other._bitmap);
875
10.9k
            other._bitmap = nullptr;
876
10.9k
            break;
877
427
        case SET:
878
427
            _set = std::move(other._set);
879
427
            break;
880
11.5k
        }
881
11.5k
        _is_shared = other._is_shared;
882
11.5k
        other._type = EMPTY;
883
11.5k
        other._is_shared = false;
884
11.5k
    }
885
886
85
    BitmapValue& operator=(const BitmapValue& other) {
887
85
        if (this == &other) {
888
0
            return *this;
889
0
        }
890
85
        reset();
891
85
        _type = other._type;
892
85
        switch (other._type) {
893
13
        case EMPTY:
894
13
            break;
895
24
        case SINGLE:
896
24
            _sv = other._sv;
897
24
            break;
898
21
        case BITMAP:
899
21
            _bitmap = other._bitmap;
900
21
            break;
901
27
        case SET:
902
27
            _set = other._set;
903
27
            break;
904
85
        }
905
906
85
        if (other._type == BITMAP) {
907
21
            _is_shared = true;
908
            // should also set other's state to shared, so that other bitmap value will
909
            // create a new bitmap when it wants to modify it.
910
21
            const_cast<BitmapValue&>(other)._is_shared = true;
911
21
        }
912
85
        return *this;
913
85
    }
914
915
0
    static BitmapValue empty_bitmap() { return BitmapValue {}; }
916
917
18
    BitmapValue& operator=(BitmapValue&& other) noexcept {
918
18
        if (this == &other) {
919
0
            return *this;
920
0
        }
921
18
        reset();
922
923
18
        _type = other._type;
924
18
        switch (other._type) {
925
0
        case EMPTY:
926
0
            break;
927
12
        case SINGLE:
928
12
            _sv = other._sv;
929
12
            break;
930
4
        case BITMAP:
931
4
            _bitmap = std::move(other._bitmap);
932
4
            other._bitmap = nullptr;
933
4
            break;
934
2
        case SET:
935
2
            _set = std::move(other._set);
936
2
            break;
937
18
        }
938
18
        _is_shared = other._is_shared;
939
18
        return *this;
940
18
    }
941
942
    // Construct a bitmap from given elements.
943
59
    explicit BitmapValue(const std::vector<uint64_t>& bits) : _is_shared(false) {
944
59
        if (bits.size() == 0) {
945
0
            _type = EMPTY;
946
0
            return;
947
0
        }
948
949
59
        if (bits.size() == 1) {
950
1
            _type = SINGLE;
951
1
            _sv = bits[0];
952
1
            return;
953
1
        }
954
955
58
        if (!config::enable_set_in_bitmap_value || bits.size() > SET_TYPE_THRESHOLD) {
956
26
            _type = BITMAP;
957
26
            _prepare_bitmap_for_write();
958
26
            _bitmap->addMany(bits.size(), &bits[0]);
959
32
        } else {
960
32
            _type = SET;
961
126
            for (auto v : bits) {
962
126
                _set.insert(v);
963
126
            }
964
32
        }
965
58
    }
966
967
31
    BitmapTypeCode::type get_type_code() const {
968
31
        switch (_type) {
969
1
        case EMPTY:
970
1
            return BitmapTypeCode::EMPTY;
971
7
        case SINGLE:
972
7
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
973
4
                return BitmapTypeCode::SINGLE32;
974
4
            } else {
975
3
                return BitmapTypeCode::SINGLE64;
976
3
            }
977
9
        case SET:
978
9
            return BitmapTypeCode::SET;
979
14
        case BITMAP:
980
14
            bool is_v1 = (config::bitmap_serialize_version == 1);
981
14
            if (_bitmap->is32BitsEnough()) {
982
9
                return is_v1 ? BitmapTypeCode::type::BITMAP32 : BitmapTypeCode::type::BITMAP32_V2;
983
9
            } else {
984
5
                return is_v1 ? BitmapTypeCode::type::BITMAP64 : BitmapTypeCode::type::BITMAP64_V2;
985
5
            }
986
31
        }
987
0
        __builtin_unreachable();
988
31
    }
989
990
    template <typename T>
991
22
    void add_many(const T* values, const size_t count) {
992
22
        switch (_type) {
993
17
        case EMPTY:
994
17
            if (count == 1) {
995
0
                _sv = values[0];
996
0
                _type = SINGLE;
997
17
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
998
240
                for (size_t i = 0; i != count; ++i) {
999
231
                    _set.insert(values[i]);
1000
231
                }
1001
9
                _type = SET;
1002
9
            } else {
1003
8
                _prepare_bitmap_for_write();
1004
8
                _bitmap->addMany(count, values);
1005
8
                _type = BITMAP;
1006
8
            }
1007
17
            break;
1008
0
        case SINGLE:
1009
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1010
0
                _set.insert(_sv);
1011
0
                for (size_t i = 0; i != count; ++i) {
1012
0
                    _set.insert(values[i]);
1013
0
                }
1014
0
                _type = SET;
1015
0
                _convert_to_bitmap_if_need();
1016
0
            } else {
1017
0
                _prepare_bitmap_for_write();
1018
0
                _bitmap->add(_sv);
1019
0
                _bitmap->addMany(count, values);
1020
0
                _type = BITMAP;
1021
0
            }
1022
0
            break;
1023
1
        case BITMAP:
1024
1
            _prepare_bitmap_for_write();
1025
1
            _bitmap->addMany(count, values);
1026
1
            break;
1027
4
        case SET:
1028
324
            for (size_t i = 0; i != count; ++i) {
1029
320
                _set.insert(values[i]);
1030
320
            }
1031
4
            _convert_to_bitmap_if_need();
1032
4
            break;
1033
22
        }
1034
22
    }
_ZN5doris11BitmapValue8add_manyIiEEvPKT_m
Line
Count
Source
991
5
    void add_many(const T* values, const size_t count) {
992
5
        switch (_type) {
993
5
        case EMPTY:
994
5
            if (count == 1) {
995
0
                _sv = values[0];
996
0
                _type = SINGLE;
997
5
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
998
48
                for (size_t i = 0; i != count; ++i) {
999
45
                    _set.insert(values[i]);
1000
45
                }
1001
3
                _type = SET;
1002
3
            } else {
1003
2
                _prepare_bitmap_for_write();
1004
2
                _bitmap->addMany(count, values);
1005
2
                _type = BITMAP;
1006
2
            }
1007
5
            break;
1008
0
        case SINGLE:
1009
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1010
0
                _set.insert(_sv);
1011
0
                for (size_t i = 0; i != count; ++i) {
1012
0
                    _set.insert(values[i]);
1013
0
                }
1014
0
                _type = SET;
1015
0
                _convert_to_bitmap_if_need();
1016
0
            } else {
1017
0
                _prepare_bitmap_for_write();
1018
0
                _bitmap->add(_sv);
1019
0
                _bitmap->addMany(count, values);
1020
0
                _type = BITMAP;
1021
0
            }
1022
0
            break;
1023
0
        case BITMAP:
1024
0
            _prepare_bitmap_for_write();
1025
0
            _bitmap->addMany(count, values);
1026
0
            break;
1027
0
        case SET:
1028
0
            for (size_t i = 0; i != count; ++i) {
1029
0
                _set.insert(values[i]);
1030
0
            }
1031
0
            _convert_to_bitmap_if_need();
1032
0
            break;
1033
5
        }
1034
5
    }
_ZN5doris11BitmapValue8add_manyImEEvPKT_m
Line
Count
Source
991
17
    void add_many(const T* values, const size_t count) {
992
17
        switch (_type) {
993
12
        case EMPTY:
994
12
            if (count == 1) {
995
0
                _sv = values[0];
996
0
                _type = SINGLE;
997
12
            } else if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
998
192
                for (size_t i = 0; i != count; ++i) {
999
186
                    _set.insert(values[i]);
1000
186
                }
1001
6
                _type = SET;
1002
6
            } else {
1003
6
                _prepare_bitmap_for_write();
1004
6
                _bitmap->addMany(count, values);
1005
6
                _type = BITMAP;
1006
6
            }
1007
12
            break;
1008
0
        case SINGLE:
1009
0
            if (config::enable_set_in_bitmap_value && count < SET_TYPE_THRESHOLD) {
1010
0
                _set.insert(_sv);
1011
0
                for (size_t i = 0; i != count; ++i) {
1012
0
                    _set.insert(values[i]);
1013
0
                }
1014
0
                _type = SET;
1015
0
                _convert_to_bitmap_if_need();
1016
0
            } else {
1017
0
                _prepare_bitmap_for_write();
1018
0
                _bitmap->add(_sv);
1019
0
                _bitmap->addMany(count, values);
1020
0
                _type = BITMAP;
1021
0
            }
1022
0
            break;
1023
1
        case BITMAP:
1024
1
            _prepare_bitmap_for_write();
1025
1
            _bitmap->addMany(count, values);
1026
1
            break;
1027
4
        case SET:
1028
324
            for (size_t i = 0; i != count; ++i) {
1029
320
                _set.insert(values[i]);
1030
320
            }
1031
4
            _convert_to_bitmap_if_need();
1032
4
            break;
1033
17
        }
1034
17
    }
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIhEEvPKT_m
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIaEEvPKT_m
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIsEEvPKT_m
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyIlEEvPKT_m
Unexecuted instantiation: _ZN5doris11BitmapValue8add_manyInEEvPKT_m
1035
1036
2.63M
    void add(uint64_t value) {
1037
2.63M
        switch (_type) {
1038
5.33k
        case EMPTY:
1039
5.33k
            if (!config::enable_set_in_bitmap_value) {
1040
4.30k
                _sv = value;
1041
4.30k
                _type = SINGLE;
1042
4.30k
            } else {
1043
1.03k
                _set.insert(value);
1044
1.03k
                _type = SET;
1045
1.03k
            }
1046
5.33k
            break;
1047
4.25k
        case SINGLE:
1048
            //there is no need to convert the type if two variables are equal
1049
4.25k
            if (_sv == value) {
1050
2
                break;
1051
2
            }
1052
4.25k
            if (config::enable_set_in_bitmap_value) {
1053
4
                _set.insert(_sv);
1054
4
                _set.insert(value);
1055
4
                _type = SET;
1056
4.25k
            } else {
1057
4.25k
                _prepare_bitmap_for_write();
1058
4.25k
                _bitmap->add(_sv);
1059
4.25k
                _bitmap->add(value);
1060
4.25k
                _type = BITMAP;
1061
4.25k
            }
1062
4.25k
            break;
1063
2.59M
        case BITMAP:
1064
2.59M
            _prepare_bitmap_for_write();
1065
2.59M
            _bitmap->add(value);
1066
2.59M
            break;
1067
32.2k
        case SET:
1068
32.2k
            _set.insert(value);
1069
32.2k
            _convert_to_bitmap_if_need();
1070
32.2k
            break;
1071
2.63M
        }
1072
2.63M
    }
1073
1074
8
    void remove(uint64_t value) {
1075
8
        switch (_type) {
1076
1
        case EMPTY:
1077
1
            break;
1078
1
        case SINGLE:
1079
            //there is need to convert the type if two variables are equal
1080
1
            if (_sv == value) {
1081
0
                _type = EMPTY;
1082
0
            }
1083
1
            break;
1084
4
        case BITMAP:
1085
4
            _prepare_bitmap_for_write();
1086
4
            _bitmap->remove(value);
1087
4
            _convert_to_smaller_type();
1088
4
            break;
1089
2
        case SET:
1090
2
            _set.erase(value);
1091
2
            _convert_to_smaller_type();
1092
2
            break;
1093
8
        }
1094
8
    }
1095
1096
    // Compute the union between the current bitmap and the provided bitmap.
1097
16
    BitmapValue& operator-=(const BitmapValue& rhs) {
1098
16
        switch (rhs._type) {
1099
4
        case EMPTY:
1100
4
            break;
1101
4
        case SINGLE:
1102
4
            remove(rhs._sv);
1103
4
            break;
1104
4
        case BITMAP:
1105
4
            switch (_type) {
1106
1
            case EMPTY:
1107
1
                break;
1108
1
            case SINGLE:
1109
1
                if (rhs._bitmap->contains(_sv)) {
1110
0
                    _type = EMPTY;
1111
0
                }
1112
1
                break;
1113
1
            case BITMAP:
1114
1
                _prepare_bitmap_for_write();
1115
1
                *_bitmap -= *rhs._bitmap;
1116
1
                _convert_to_smaller_type();
1117
1
                break;
1118
1
            case SET: {
1119
32
                for (auto it = _set.begin(); it != _set.end();) {
1120
31
                    if (rhs.contains(*it)) {
1121
16
                        it = _set.erase(it);
1122
16
                    } else {
1123
15
                        ++it;
1124
15
                    }
1125
31
                }
1126
1
                _convert_to_smaller_type();
1127
1
                break;
1128
0
            }
1129
4
            }
1130
4
            break;
1131
4
        case SET: {
1132
4
            switch (_type) {
1133
1
            case EMPTY:
1134
1
                break;
1135
1
            case SINGLE:
1136
1
                if (rhs._set.contains(_sv)) {
1137
0
                    _type = EMPTY;
1138
0
                }
1139
1
                break;
1140
1
            case BITMAP:
1141
1
                _prepare_bitmap_for_write();
1142
31
                for (auto v : rhs._set) {
1143
31
                    if (_bitmap->contains(v)) {
1144
0
                        _bitmap->remove(v);
1145
0
                    }
1146
31
                }
1147
1
                _convert_to_smaller_type();
1148
1
                break;
1149
1
            case SET: {
1150
32
                for (auto it = _set.begin(); it != _set.end();) {
1151
31
                    if (rhs.contains(*it)) {
1152
14
                        it = _set.erase(it);
1153
17
                    } else {
1154
17
                        ++it;
1155
17
                    }
1156
31
                }
1157
1
                _convert_to_smaller_type();
1158
1
                break;
1159
0
            }
1160
4
            }
1161
4
        }
1162
16
        }
1163
16
        return *this;
1164
16
    }
1165
1166
    // Compute the union between the current bitmap and the provided bitmap.
1167
    // Possible type transitions are:
1168
    // EMPTY  -> SINGLE
1169
    // EMPTY  -> BITMAP
1170
    // SINGLE -> BITMAP
1171
56
    BitmapValue& operator|=(const BitmapValue& rhs) {
1172
56
        switch (rhs._type) {
1173
12
        case EMPTY:
1174
12
            break;
1175
17
        case SINGLE:
1176
17
            add(rhs._sv);
1177
17
            break;
1178
13
        case BITMAP:
1179
13
            switch (_type) {
1180
6
            case EMPTY:
1181
6
                _bitmap = rhs._bitmap;
1182
6
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1183
6
                _is_shared = true;
1184
6
                _type = BITMAP;
1185
6
                break;
1186
3
            case SINGLE:
1187
3
                _bitmap = rhs._bitmap;
1188
3
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1189
3
                _is_shared = true;
1190
3
                _prepare_bitmap_for_write();
1191
3
                _bitmap->add(_sv);
1192
3
                _type = BITMAP;
1193
3
                break;
1194
3
            case BITMAP:
1195
3
                _prepare_bitmap_for_write();
1196
3
                *_bitmap |= *rhs._bitmap;
1197
3
                break;
1198
1
            case SET: {
1199
1
                _prepare_bitmap_for_write();
1200
1
                *_bitmap = *rhs._bitmap;
1201
1202
31
                for (auto v : _set) {
1203
31
                    _bitmap->add(v);
1204
31
                }
1205
1
                _type = BITMAP;
1206
1
                break;
1207
0
            }
1208
13
            }
1209
13
            break;
1210
14
        case SET:
1211
14
            switch (_type) {
1212
1
            case EMPTY:
1213
1
                _set = rhs._set;
1214
1
                _type = SET;
1215
1
                break;
1216
1
            case SINGLE: {
1217
1
                if ((rhs._set.size() + rhs._set.contains(_sv) > SET_TYPE_THRESHOLD)) {
1218
0
                    _prepare_bitmap_for_write();
1219
0
                    _bitmap->add(_sv);
1220
0
                    for (auto v : rhs._set) {
1221
0
                        _bitmap->add(v);
1222
0
                    }
1223
0
                    _type = BITMAP;
1224
1
                } else {
1225
1
                    _set = rhs._set;
1226
1
                    _set.insert(_sv);
1227
1
                    _type = SET;
1228
1
                }
1229
1
                break;
1230
0
            }
1231
1
            case BITMAP:
1232
1
                _prepare_bitmap_for_write();
1233
31
                for (auto v : rhs._set) {
1234
31
                    _bitmap->add(v);
1235
31
                }
1236
1
                break;
1237
11
            case SET: {
1238
58
                for (auto v : rhs._set) {
1239
58
                    _set.insert(v);
1240
58
                }
1241
11
                _convert_to_bitmap_if_need();
1242
11
                break;
1243
0
            }
1244
14
            }
1245
14
            break;
1246
56
        }
1247
56
        return *this;
1248
56
    }
1249
1250
13
    BitmapValue& fastunion(const std::vector<const BitmapValue*>& values) {
1251
13
        std::vector<const detail::Roaring64Map*> bitmaps;
1252
13
        std::vector<uint64_t> single_values;
1253
13
        std::vector<const SetContainer<uint64_t>*> sets;
1254
19
        for (const auto* value : values) {
1255
19
            switch (value->_type) {
1256
5
            case EMPTY:
1257
5
                break;
1258
5
            case SINGLE:
1259
5
                single_values.push_back(value->_sv);
1260
5
                break;
1261
6
            case BITMAP:
1262
6
                bitmaps.push_back(value->_bitmap.get());
1263
6
                break;
1264
3
            case SET:
1265
3
                sets.push_back(&value->_set);
1266
3
                break;
1267
19
            }
1268
19
        }
1269
1270
13
        if (!bitmaps.empty()) {
1271
6
            _prepare_bitmap_for_write();
1272
6
            switch (_type) {
1273
3
            case EMPTY:
1274
3
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1275
3
                break;
1276
2
            case SINGLE:
1277
2
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1278
2
                _bitmap->add(_sv);
1279
2
                break;
1280
1
            case BITMAP:
1281
1
                for (const auto* bitmap : bitmaps) {
1282
1
                    *_bitmap |= *bitmap;
1283
1
                }
1284
1
                break;
1285
0
            case SET: {
1286
0
                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
1287
0
                for (auto v : _set) {
1288
0
                    _bitmap->add(v);
1289
0
                }
1290
0
                _set.clear();
1291
0
                break;
1292
0
            }
1293
6
            }
1294
6
            _type = BITMAP;
1295
6
        }
1296
1297
13
        if (!sets.empty()) {
1298
3
            for (auto& set : sets) {
1299
45
                for (auto v : *set) {
1300
45
                    _set.insert(v);
1301
45
                }
1302
3
            }
1303
1
            switch (_type) {
1304
1
            case EMPTY:
1305
1
                _type = SET;
1306
1
                break;
1307
0
            case SINGLE: {
1308
0
                _set.insert(_sv);
1309
0
                _type = SET;
1310
0
                break;
1311
0
            }
1312
0
            case BITMAP:
1313
0
                _prepare_bitmap_for_write();
1314
0
                for (auto v : _set) {
1315
0
                    _bitmap->add(v);
1316
0
                }
1317
0
                _type = BITMAP;
1318
0
                _set.clear();
1319
0
                break;
1320
0
            case SET: {
1321
0
                break;
1322
0
            }
1323
1
            }
1324
1
            if (_type == SET) {
1325
1
                _convert_to_bitmap_if_need();
1326
1
            }
1327
1
        }
1328
1329
13
        if (_type == EMPTY && single_values.size() == 1) {
1330
1
            if (config::enable_set_in_bitmap_value) {
1331
0
                _type = SET;
1332
0
                _set.insert(single_values[0]);
1333
1
            } else {
1334
1
                _sv = single_values[0];
1335
1
                _type = SINGLE;
1336
1
            }
1337
12
        } else if (!single_values.empty()) {
1338
4
            switch (_type) {
1339
0
            case EMPTY:
1340
1
            case SINGLE:
1341
1
                if (config::enable_set_in_bitmap_value) {
1342
0
                    _set.insert(single_values.cbegin(), single_values.cend());
1343
0
                    if (_type == SINGLE) {
1344
0
                        _set.insert(_sv);
1345
0
                    }
1346
0
                    _type = SET;
1347
0
                    _convert_to_bitmap_if_need();
1348
1
                } else {
1349
1
                    _prepare_bitmap_for_write();
1350
1
                    _bitmap->addMany(single_values.size(), single_values.data());
1351
1
                    if (_type == SINGLE) {
1352
1
                        _bitmap->add(_sv);
1353
1
                    }
1354
1
                    _type = BITMAP;
1355
1
                    _convert_to_smaller_type();
1356
1
                }
1357
1
                break;
1358
3
            case BITMAP: {
1359
3
                _prepare_bitmap_for_write();
1360
3
                _bitmap->addMany(single_values.size(), single_values.data());
1361
3
                break;
1362
0
            }
1363
0
            case SET:
1364
0
                _set.insert(single_values.cbegin(), single_values.cend());
1365
0
                _convert_to_bitmap_if_need();
1366
0
                break;
1367
4
            }
1368
4
        }
1369
1370
13
        return *this;
1371
13
    }
1372
1373
    // Compute the intersection between the current bitmap and the provided bitmap.
1374
    // Possible type transitions are:
1375
    // SINGLE -> EMPTY
1376
    // BITMAP -> EMPTY
1377
    // BITMAP -> SINGLE
1378
50
    BitmapValue& operator&=(const BitmapValue& rhs) {
1379
50
        switch (rhs._type) {
1380
14
        case EMPTY:
1381
14
            reset(); // empty & any = empty
1382
14
            break;
1383
10
        case SINGLE:
1384
10
            switch (_type) {
1385
2
            case EMPTY:
1386
2
                break;
1387
3
            case SINGLE:
1388
3
                if (_sv != rhs._sv) {
1389
2
                    _type = EMPTY;
1390
2
                }
1391
3
                break;
1392
4
            case BITMAP:
1393
4
                if (!_bitmap->contains(rhs._sv)) {
1394
1
                    _type = EMPTY;
1395
3
                } else {
1396
3
                    _type = SINGLE;
1397
3
                    _sv = rhs._sv;
1398
3
                }
1399
4
                _bitmap.reset();
1400
4
                _is_shared = false;
1401
4
                break;
1402
1
            case SET:
1403
1
                if (!_set.contains(rhs._sv)) {
1404
0
                    _type = EMPTY;
1405
1
                } else {
1406
1
                    _type = SINGLE;
1407
1
                    _sv = rhs._sv;
1408
1
                }
1409
1
                _set.clear();
1410
1
                break;
1411
10
            }
1412
10
            break;
1413
10
        case BITMAP:
1414
9
            switch (_type) {
1415
2
            case EMPTY:
1416
2
                break;
1417
3
            case SINGLE:
1418
3
                if (!rhs._bitmap->contains(_sv)) {
1419
2
                    _type = EMPTY;
1420
2
                }
1421
3
                break;
1422
3
            case BITMAP:
1423
3
                _prepare_bitmap_for_write();
1424
3
                *_bitmap &= *rhs._bitmap;
1425
3
                _convert_to_smaller_type();
1426
3
                break;
1427
1
            case SET:
1428
32
                for (auto it = _set.begin(); it != _set.end();) {
1429
31
                    if (!rhs._bitmap->contains(*it)) {
1430
15
                        it = _set.erase(it);
1431
16
                    } else {
1432
16
                        ++it;
1433
16
                    }
1434
31
                }
1435
1
                _convert_to_smaller_type();
1436
1
                break;
1437
9
            }
1438
9
            break;
1439
17
        case SET:
1440
17
            switch (_type) {
1441
1
            case EMPTY:
1442
1
                break;
1443
1
            case SINGLE:
1444
1
                if (!rhs._set.contains(_sv)) {
1445
1
                    _type = EMPTY;
1446
1
                }
1447
1
                break;
1448
1
            case BITMAP:
1449
1
                _prepare_bitmap_for_write();
1450
31
                for (auto v : rhs._set) {
1451
31
                    if (_bitmap->contains(v)) {
1452
0
                        _set.insert(v);
1453
0
                    }
1454
31
                }
1455
1
                _type = SET;
1456
1
                _bitmap.reset();
1457
1
                _is_shared = false;
1458
1
                _convert_to_smaller_type();
1459
1
                break;
1460
14
            case SET:
1461
79
                for (auto it = _set.begin(); it != _set.end();) {
1462
65
                    if (!rhs._set.contains(*it)) {
1463
37
                        it = _set.erase(it);
1464
37
                    } else {
1465
28
                        ++it;
1466
28
                    }
1467
65
                }
1468
14
                _convert_to_smaller_type();
1469
14
                break;
1470
17
            }
1471
17
            break;
1472
50
        }
1473
50
        return *this;
1474
50
    }
1475
1476
    // Compute the symmetric union between the current bitmap and the provided bitmap.
1477
    // Possible type transitions are:
1478
    // SINGLE -> EMPTY
1479
    // BITMAP -> EMPTY
1480
    // BITMAP -> SINGLE
1481
27
    BitmapValue& operator^=(const BitmapValue& rhs) {
1482
27
        switch (rhs._type) {
1483
8
        case EMPTY:
1484
8
            break;
1485
4
        case SINGLE:
1486
4
            switch (_type) {
1487
1
            case EMPTY:
1488
1
                add(rhs._sv);
1489
1
                break;
1490
1
            case SINGLE:
1491
1
                if (_sv == rhs._sv) {
1492
0
                    _type = EMPTY;
1493
1
                } else {
1494
1
                    add(rhs._sv);
1495
1
                }
1496
1
                break;
1497
1
            case BITMAP:
1498
1
                if (!_bitmap->contains(rhs._sv)) {
1499
0
                    add(rhs._sv);
1500
1
                } else {
1501
1
                    _prepare_bitmap_for_write();
1502
1
                    _bitmap->remove(rhs._sv);
1503
1
                }
1504
1
                break;
1505
1
            case SET:
1506
1
                if (!_set.contains(rhs._sv)) {
1507
0
                    _set.insert(rhs._sv);
1508
1
                } else {
1509
1
                    _set.erase(rhs._sv);
1510
1
                }
1511
1
                break;
1512
4
            }
1513
4
            break;
1514
4
        case BITMAP:
1515
4
            switch (_type) {
1516
1
            case EMPTY:
1517
1
                _bitmap = rhs._bitmap;
1518
1
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1519
1
                _is_shared = true;
1520
1
                _type = BITMAP;
1521
1
                break;
1522
1
            case SINGLE:
1523
1
                _bitmap = rhs._bitmap;
1524
1
                const_cast<BitmapValue&>(rhs)._is_shared = true;
1525
1
                _is_shared = true;
1526
1
                _type = BITMAP;
1527
1
                _prepare_bitmap_for_write();
1528
1
                if (!rhs._bitmap->contains(_sv)) {
1529
1
                    _bitmap->add(_sv);
1530
1
                } else {
1531
0
                    _bitmap->remove(_sv);
1532
0
                }
1533
1
                break;
1534
1
            case BITMAP:
1535
1
                _prepare_bitmap_for_write();
1536
1
                *_bitmap ^= *rhs._bitmap;
1537
1
                _convert_to_smaller_type();
1538
1
                break;
1539
1
            case SET:
1540
1
                _prepare_bitmap_for_write();
1541
1
                *_bitmap = *rhs._bitmap;
1542
31
                for (auto v : _set) {
1543
31
                    if (_bitmap->contains(v)) {
1544
16
                        _bitmap->remove(v);
1545
16
                    } else {
1546
15
                        _bitmap->add(v);
1547
15
                    }
1548
31
                }
1549
1
                _type = BITMAP;
1550
1
                _convert_to_smaller_type();
1551
1
                break;
1552
4
            }
1553
4
            break;
1554
11
        case SET:
1555
11
            switch (_type) {
1556
1
            case EMPTY:
1557
1
                _set = rhs._set;
1558
1
                _type = SET;
1559
1
                break;
1560
1
            case SINGLE:
1561
1
                _set = rhs._set;
1562
1
                if (!rhs._set.contains(_sv)) {
1563
1
                    _set.insert(_sv);
1564
1
                } else {
1565
0
                    _set.erase(_sv);
1566
0
                }
1567
1
                _type = SET;
1568
1
                break;
1569
1
            case BITMAP:
1570
1
                _prepare_bitmap_for_write();
1571
31
                for (auto v : rhs._set) {
1572
31
                    if (_bitmap->contains(v)) {
1573
0
                        _bitmap->remove(v);
1574
31
                    } else {
1575
31
                        _bitmap->add(v);
1576
31
                    }
1577
31
                }
1578
1
                _convert_to_smaller_type();
1579
1
                break;
1580
8
            case SET:
1581
49
                for (auto v : rhs._set) {
1582
49
                    if (_set.contains(v)) {
1583
22
                        _set.erase(v);
1584
27
                    } else {
1585
27
                        _set.insert(v);
1586
27
                    }
1587
49
                }
1588
8
                _convert_to_smaller_type();
1589
8
                break;
1590
11
            }
1591
11
            break;
1592
27
        }
1593
27
        return *this;
1594
27
    }
1595
1596
    // check if value x is present
1597
9.21k
    bool contains(uint64_t x) const {
1598
9.21k
        switch (_type) {
1599
0
        case EMPTY:
1600
0
            return false;
1601
14
        case SINGLE:
1602
14
            return _sv == x;
1603
9.01k
        case BITMAP:
1604
9.01k
            return _bitmap->contains(x);
1605
188
        case SET:
1606
188
            return _set.contains(x);
1607
9.21k
        }
1608
0
        return false;
1609
9.21k
    }
1610
1611
    // true if contains a value that belongs to the range [left, right].
1612
    bool contains_any(uint64_t left, uint64_t right) const;
1613
1614
240k
    uint64_t cardinality() const {
1615
240k
        switch (_type) {
1616
240k
        case EMPTY:
1617
240k
            return 0;
1618
36
        case SINGLE:
1619
36
            return 1;
1620
202
        case BITMAP:
1621
202
            return _bitmap->cardinality();
1622
74
        case SET:
1623
74
            return _set.size();
1624
240k
        }
1625
0
        return 0;
1626
240k
    }
1627
1628
60.0k
    uint64_t and_cardinality(const BitmapValue& rhs) const {
1629
60.0k
        switch (rhs._type) {
1630
60.0k
        case EMPTY:
1631
60.0k
            return 0;
1632
4
        case SINGLE:
1633
4
            switch (_type) {
1634
1
            case EMPTY:
1635
1
                return 0;
1636
1
            case SINGLE:
1637
1
                return _sv == rhs._sv;
1638
1
            case BITMAP:
1639
1
                return _bitmap->contains(rhs._sv);
1640
1
            case SET:
1641
1
                return _set.contains(rhs._sv);
1642
4
            }
1643
0
            break;
1644
7
        case BITMAP:
1645
7
            switch (_type) {
1646
1
            case EMPTY:
1647
1
                return 0;
1648
1
            case SINGLE:
1649
1
                return rhs._bitmap->contains(_sv);
1650
4
            case BITMAP:
1651
4
                return _bitmap->andCardinality(*rhs._bitmap);
1652
1
            case SET: {
1653
1
                uint64_t cardinality = 0;
1654
31
                for (auto v : _set) {
1655
31
                    if (rhs._bitmap->contains(v)) {
1656
16
                        ++cardinality;
1657
16
                    }
1658
31
                }
1659
1
                return cardinality;
1660
0
            }
1661
7
            }
1662
0
            break;
1663
4
        case SET:
1664
4
            switch (_type) {
1665
1
            case EMPTY:
1666
1
                return 0;
1667
1
            case SINGLE:
1668
1
                return rhs._set.contains(_sv);
1669
1
            case BITMAP: {
1670
1
                uint64_t cardinality = 0;
1671
31
                for (auto v : rhs._set) {
1672
31
                    if (_bitmap->contains(v)) {
1673
0
                        ++cardinality;
1674
0
                    }
1675
31
                }
1676
1
                return cardinality;
1677
0
            }
1678
1
            case SET: {
1679
1
                uint64_t cardinality = 0;
1680
31
                for (auto v : _set) {
1681
31
                    if (rhs._set.contains(v)) {
1682
14
                        ++cardinality;
1683
14
                    }
1684
31
                }
1685
1
                return cardinality;
1686
0
            }
1687
4
            }
1688
60.0k
        }
1689
0
        return 0;
1690
60.0k
    }
1691
1692
60.0k
    uint64_t or_cardinality(const BitmapValue& rhs) const {
1693
60.0k
        switch (rhs._type) {
1694
59.9k
        case EMPTY:
1695
59.9k
            return cardinality();
1696
0
        case SINGLE:
1697
0
            switch (_type) {
1698
0
            case EMPTY:
1699
0
                return 1;
1700
0
            case SINGLE:
1701
0
                return 1 + (_sv != rhs._sv);
1702
0
            case BITMAP:
1703
0
                return cardinality() + !_bitmap->contains(rhs._sv);
1704
0
            case SET:
1705
0
                return _set.size() + !_set.contains(rhs._sv);
1706
0
            }
1707
0
            break;
1708
3
        case BITMAP:
1709
3
            switch (_type) {
1710
0
            case EMPTY:
1711
0
                return rhs.cardinality();
1712
0
            case SINGLE:
1713
0
                return rhs.cardinality() + !rhs._bitmap->contains(_sv);
1714
3
            case BITMAP:
1715
3
                return _bitmap->orCardinality(*rhs._bitmap);
1716
0
            case SET: {
1717
0
                uint64_t cardinality = rhs._bitmap->cardinality();
1718
0
                for (auto v : _set) {
1719
0
                    if (!rhs._bitmap->contains(v)) {
1720
0
                        ++cardinality;
1721
0
                    }
1722
0
                }
1723
0
                return cardinality;
1724
0
            }
1725
3
            }
1726
0
            break;
1727
0
        case SET:
1728
0
            switch (_type) {
1729
0
            case EMPTY:
1730
0
                return rhs.cardinality();
1731
0
            case SINGLE:
1732
0
                return rhs.cardinality() + !rhs._set.contains(_sv);
1733
0
            case BITMAP: {
1734
0
                uint64_t cardinality = _bitmap->cardinality();
1735
0
                for (auto v : rhs._set) {
1736
0
                    if (!_bitmap->contains(v)) {
1737
0
                        ++cardinality;
1738
0
                    }
1739
0
                }
1740
0
                return cardinality;
1741
0
            }
1742
0
            case SET: {
1743
0
                uint64_t cardinality = _set.size();
1744
0
                for (auto v : _set) {
1745
0
                    if (!rhs._set.contains(v)) {
1746
0
                        ++cardinality;
1747
0
                    }
1748
0
                }
1749
0
                return cardinality;
1750
0
            }
1751
0
            }
1752
60.0k
        }
1753
0
        return 0;
1754
60.0k
    }
1755
1756
24
    uint64_t andnot_cardinality(const BitmapValue& rhs) const {
1757
24
        switch (rhs._type) {
1758
8
        case EMPTY:
1759
8
            return cardinality();
1760
4
        case SINGLE:
1761
4
            switch (_type) {
1762
1
            case EMPTY:
1763
1
                return 0;
1764
1
            case SINGLE:
1765
1
                return 1 - (_sv == rhs._sv);
1766
1
            case BITMAP:
1767
1
                return cardinality() - _bitmap->contains(rhs._sv);
1768
1
            case SET:
1769
1
                return cardinality() - _set.contains(rhs._sv);
1770
4
            }
1771
0
            break;
1772
4
        case BITMAP:
1773
4
            switch (_type) {
1774
1
            case EMPTY:
1775
1
                return 0;
1776
1
            case SINGLE:
1777
1
                return !rhs._bitmap->contains(_sv);
1778
1
            case BITMAP:
1779
1
                return _bitmap->andnotCardinality(*rhs._bitmap);
1780
1
            case SET: {
1781
1
                uint64_t cardinality = _set.size();
1782
31
                for (auto v : _set) {
1783
31
                    if (rhs._bitmap->contains(v)) {
1784
16
                        cardinality -= 1;
1785
16
                    }
1786
31
                }
1787
1
                return cardinality;
1788
0
            }
1789
4
            }
1790
0
            break;
1791
8
        case SET:
1792
8
            switch (_type) {
1793
1
            case EMPTY:
1794
1
                return 0;
1795
1
            case SINGLE:
1796
1
                return !rhs._set.contains(_sv);
1797
1
            case BITMAP: {
1798
1
                uint64_t cardinality = _bitmap->cardinality();
1799
31
                for (auto v : rhs._set) {
1800
31
                    if (_bitmap->contains(v)) {
1801
0
                        cardinality -= 1;
1802
0
                    }
1803
31
                }
1804
1
                return cardinality;
1805
0
            }
1806
5
            case SET: {
1807
5
                uint64_t cardinality = _set.size();
1808
33
                for (auto v : rhs._set) {
1809
33
                    if (_set.contains(v)) {
1810
16
                        cardinality -= 1;
1811
16
                    }
1812
33
                }
1813
5
                return cardinality;
1814
0
            }
1815
8
            }
1816
24
        }
1817
0
        return 0;
1818
24
    }
1819
1820
    // Return how many bytes are required to serialize this bitmap.
1821
    // See BitmapTypeCode for the serialized format.
1822
133k
    size_t getSizeInBytes() {
1823
133k
        size_t res = 0;
1824
133k
        switch (_type) {
1825
120k
        case EMPTY:
1826
120k
            res = 1;
1827
120k
            break;
1828
27
        case SINGLE:
1829
27
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
1830
22
                res = 1 + sizeof(uint32_t);
1831
22
            } else {
1832
5
                res = 1 + sizeof(uint64_t);
1833
5
            }
1834
27
            break;
1835
13.3k
        case BITMAP:
1836
13.3k
            _prepare_bitmap_for_write();
1837
13.3k
            _bitmap->runOptimize();
1838
13.3k
            _bitmap->shrinkToFit();
1839
13.3k
            res = _bitmap->getSizeInBytes(config::bitmap_serialize_version);
1840
13.3k
            break;
1841
39
        case SET:
1842
            /// 1 byte for type, 1 byte for count
1843
39
            res = 2 + sizeof(uint64_t) * _set.size();
1844
39
            break;
1845
133k
        }
1846
133k
        return res;
1847
133k
    }
1848
1849
    // Serialize the bitmap value to dst, which should be large enough.
1850
    // Client should call `getSizeInBytes` first to get the serialized size.
1851
69.2k
    void write_to(char* dst) const {
1852
69.2k
        switch (_type) {
1853
60.0k
        case EMPTY:
1854
60.0k
            *dst = BitmapTypeCode::EMPTY;
1855
60.0k
            break;
1856
18
        case SINGLE:
1857
18
            if (_sv <= std::numeric_limits<uint32_t>::max()) {
1858
15
                *(dst++) = BitmapTypeCode::SINGLE32;
1859
15
                encode_fixed32_le(reinterpret_cast<uint8_t*>(dst), static_cast<uint32_t>(_sv));
1860
15
            } else {
1861
3
                *(dst++) = BitmapTypeCode::SINGLE64;
1862
3
                encode_fixed64_le(reinterpret_cast<uint8_t*>(dst), _sv);
1863
3
            }
1864
18
            break;
1865
36
        case SET:
1866
36
            DCHECK(config::enable_set_in_bitmap_value);
1867
36
            *dst = BitmapTypeCode::SET;
1868
36
            ++dst;
1869
36
            *dst = static_cast<uint8_t>(_set.size());
1870
36
            ++dst;
1871
567
            for (auto v : _set) {
1872
567
                encode_fixed64_le(reinterpret_cast<uint8_t*>(dst), v);
1873
567
                dst += sizeof(uint64_t);
1874
567
            }
1875
36
            break;
1876
9.21k
        case BITMAP:
1877
9.21k
            _bitmap->write(dst, config::bitmap_serialize_version);
1878
9.21k
            break;
1879
69.2k
        }
1880
69.2k
    }
1881
1882
    // Deserialize a bitmap value from `src`.
1883
    // Return false if `src` begins with unknown type code, true otherwise.
1884
65.1k
    bool deserialize(const char* src) {
1885
65.1k
        switch (*src) {
1886
60.0k
        case BitmapTypeCode::EMPTY:
1887
60.0k
            _type = EMPTY;
1888
60.0k
            break;
1889
9
        case BitmapTypeCode::SINGLE32:
1890
9
            _type = SINGLE;
1891
9
            _sv = decode_fixed32_le(reinterpret_cast<const uint8_t*>(src + 1));
1892
9
            if (config::enable_set_in_bitmap_value) {
1893
2
                _type = SET;
1894
2
                _set.insert(_sv);
1895
2
            }
1896
9
            break;
1897
3
        case BitmapTypeCode::SINGLE64:
1898
3
            _type = SINGLE;
1899
3
            _sv = decode_fixed64_le(reinterpret_cast<const uint8_t*>(src + 1));
1900
3
            if (config::enable_set_in_bitmap_value) {
1901
1
                _type = SET;
1902
1
                _set.insert(_sv);
1903
1
            }
1904
3
            break;
1905
4.10k
        case BitmapTypeCode::BITMAP32:
1906
4.11k
        case BitmapTypeCode::BITMAP64:
1907
5.10k
        case BitmapTypeCode::BITMAP32_V2:
1908
5.10k
        case BitmapTypeCode::BITMAP64_V2:
1909
5.10k
            _type = BITMAP;
1910
5.10k
            _is_shared = false;
1911
5.10k
            _bitmap = std::make_shared<detail::Roaring64Map>(detail::Roaring64Map::read(src));
1912
5.10k
            break;
1913
35
        case BitmapTypeCode::SET: {
1914
35
            _type = SET;
1915
35
            ++src;
1916
35
            uint8_t count = *src;
1917
35
            ++src;
1918
35
            CHECK(count <= SET_TYPE_THRESHOLD) << "bitmap value with incorrect set count";
1919
598
            for (uint8_t i = 0; i != count; ++i, src += sizeof(uint64_t)) {
1920
563
                _set.insert(decode_fixed64_le(reinterpret_cast<const uint8_t*>(src)));
1921
563
            }
1922
35
            CHECK_EQ(count, _set.size()) << "bitmap value with incorrect set count";
1923
1924
35
            if (!config::enable_set_in_bitmap_value) {
1925
0
                _prepare_bitmap_for_write();
1926
0
                for (auto v : _set) {
1927
0
                    _bitmap->add(v);
1928
0
                }
1929
0
                _type = BITMAP;
1930
0
                _set.clear();
1931
0
            }
1932
35
            break;
1933
5.10k
        }
1934
1
        case BitmapTypeCode::SET_V2: {
1935
1
            uint32_t size = 0;
1936
1
            memcpy(&size, src + 1, sizeof(uint32_t));
1937
1
            src += sizeof(uint32_t) + 1;
1938
1939
1
            if (!config::enable_set_in_bitmap_value || size > SET_TYPE_THRESHOLD) {
1940
0
                _type = BITMAP;
1941
0
                _prepare_bitmap_for_write();
1942
1943
0
                for (int i = 0; i < size; ++i) {
1944
0
                    uint64_t key {};
1945
0
                    memcpy(&key, src, sizeof(uint64_t));
1946
0
                    _bitmap->add(key);
1947
0
                    src += sizeof(uint64_t);
1948
0
                }
1949
1
            } else {
1950
1
                _type = SET;
1951
1
                _set.reserve(size);
1952
1953
3
                for (int i = 0; i < size; ++i) {
1954
2
                    uint64_t key {};
1955
2
                    memcpy(&key, src, sizeof(uint64_t));
1956
2
                    _set.insert(key);
1957
2
                    src += sizeof(uint64_t);
1958
2
                }
1959
1
            }
1960
1
            break;
1961
5.10k
        }
1962
0
        default:
1963
0
            LOG(ERROR) << "BitmapTypeCode invalid, should between: " << BitmapTypeCode::EMPTY
1964
0
                       << " and " << BitmapTypeCode::BITMAP64 << " actual is "
1965
0
                       << static_cast<int>(*src);
1966
0
            return false;
1967
65.1k
        }
1968
65.1k
        return true;
1969
65.1k
    }
1970
1971
13
    int64_t minimum() const {
1972
13
        switch (_type) {
1973
2
        case SINGLE:
1974
2
            return _sv;
1975
4
        case BITMAP:
1976
4
            return _bitmap->minimum();
1977
5
        case SET:
1978
5
            return _min_in_set();
1979
2
        default:
1980
2
            return 0;
1981
13
        }
1982
13
    }
1983
1984
    // TODO limit string size to avoid OOM
1985
875
    std::string to_string() const {
1986
875
        std::stringstream ss;
1987
875
        switch (_type) {
1988
8
        case EMPTY:
1989
8
            break;
1990
28
        case SINGLE:
1991
28
            ss << _sv;
1992
28
            break;
1993
704
        case BITMAP: {
1994
704
            struct IterCtx {
1995
704
                std::stringstream* ss = nullptr;
1996
704
                bool first = true;
1997
704
            } iter_ctx;
1998
704
            iter_ctx.ss = &ss;
1999
2000
704
            _bitmap->iterate(
2001
39.6k
                    [](uint64_t value, void* c) -> bool {
2002
39.6k
                        auto ctx = reinterpret_cast<IterCtx*>(c);
2003
39.6k
                        if (ctx->first) {
2004
704
                            ctx->first = false;
2005
38.9k
                        } else {
2006
38.9k
                            (*ctx->ss) << ",";
2007
38.9k
                        }
2008
39.6k
                        (*ctx->ss) << value;
2009
39.6k
                        return true;
2010
39.6k
                    },
2011
704
                    &iter_ctx);
2012
704
            break;
2013
0
        }
2014
135
        case SET: {
2015
135
            struct IterCtx {
2016
135
                std::stringstream* ss = nullptr;
2017
135
                bool first = true;
2018
135
            } iter_ctx;
2019
135
            iter_ctx.ss = &ss;
2020
2021
135
            std::vector<uint64_t> values(_set.begin(), _set.end());
2022
135
            std::sort(values.begin(), values.end());
2023
2024
2.12k
            for (auto v : values) {
2025
2.12k
                if (iter_ctx.first) {
2026
135
                    iter_ctx.first = false;
2027
1.99k
                } else {
2028
1.99k
                    (*iter_ctx.ss) << ",";
2029
1.99k
                }
2030
2.12k
                (*iter_ctx.ss) << v;
2031
2.12k
            }
2032
135
            break;
2033
0
        }
2034
875
        }
2035
875
        return ss.str();
2036
875
    }
2037
2038
14
    int64_t maximum() const {
2039
14
        switch (_type) {
2040
2
        case SINGLE:
2041
2
            return _sv;
2042
5
        case BITMAP:
2043
5
            return _bitmap->maximum();
2044
5
        case SET:
2045
5
            return _max_in_set();
2046
2
        default:
2047
2
            return 0;
2048
14
        }
2049
14
    }
2050
2051
4
    uint64_t max(bool* empty) const {
2052
4
        return min_or_max(empty, [&]() { return maximum(); });
2053
4
    }
2054
2055
4
    uint64_t min(bool* empty) const {
2056
4
        return min_or_max(empty, [&]() { return minimum(); });
2057
4
    }
2058
2059
5
    uint64_t _min_in_set() const {
2060
5
        DCHECK_EQ(_type, SET);
2061
5
        return *std::min_element(_set.begin(), _set.end());
2062
5
    }
2063
2064
5
    uint64_t _max_in_set() const {
2065
5
        DCHECK_EQ(_type, SET);
2066
5
        return *std::max_element(_set.begin(), _set.end());
2067
5
    }
2068
2069
9
    bool empty() const { return _type == EMPTY; }
2070
2071
    /**
2072
     * Return new set with specified range (not include the range_end)
2073
     */
2074
    int64_t sub_range(const int64_t& range_start, const int64_t& range_end,
2075
7
                      BitmapValue* ret_bitmap) {
2076
7
        switch (_type) {
2077
1
        case EMPTY:
2078
1
            return 0;
2079
2
        case SINGLE: {
2080
            //only single value, so _sv must in [range_start,range_end)
2081
2
            if (range_start <= _sv && _sv < range_end) {
2082
1
                ret_bitmap->add(_sv);
2083
1
                return 1;
2084
1
            } else {
2085
1
                return 0;
2086
1
            }
2087
2
        }
2088
2
        case BITMAP: {
2089
2
            int64_t count = 0;
2090
146
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2091
145
                if (*it < range_start) {
2092
0
                    continue;
2093
0
                }
2094
145
                if (*it < range_end) {
2095
144
                    ret_bitmap->add(*it);
2096
144
                    ++count;
2097
144
                } else {
2098
1
                    break;
2099
1
                }
2100
145
            }
2101
2
            return count;
2102
2
        }
2103
2
        case SET: {
2104
2
            int64_t count = 0;
2105
2
            std::vector<uint64_t> values(_set.begin(), _set.end());
2106
2
            std::sort(values.begin(), values.end());
2107
6
            for (auto it = values.begin(); it != values.end(); ++it) {
2108
4
                if (*it < range_start || *it >= range_end) {
2109
1
                    continue;
2110
1
                }
2111
3
                ret_bitmap->add(*it);
2112
3
                ++count;
2113
3
            }
2114
2
            return count;
2115
2
        }
2116
7
        }
2117
0
        return 0;
2118
7
    }
2119
2120
    /**
2121
     * Return new set with specified start and limit
2122
     * @param range_start the start value for the range
2123
     * @param cardinality_limit the length of the subset
2124
     * @return the real count for subset, maybe less than cardinality_limit
2125
     */
2126
    int64_t sub_limit(const int64_t& range_start, const int64_t& cardinality_limit,
2127
9
                      BitmapValue* ret_bitmap) {
2128
9
        switch (_type) {
2129
1
        case EMPTY:
2130
1
            return 0;
2131
2
        case SINGLE: {
2132
            //only single value, so range_start must less than _sv
2133
2
            if (range_start > _sv) {
2134
0
                return 0;
2135
2
            } else {
2136
2
                ret_bitmap->add(_sv);
2137
2
                return 1;
2138
2
            }
2139
2
        }
2140
4
        case BITMAP: {
2141
4
            int64_t count = 0;
2142
31
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2143
30
                if (*it < range_start) {
2144
8
                    continue;
2145
8
                }
2146
22
                if (count < cardinality_limit) {
2147
19
                    ret_bitmap->add(*it);
2148
19
                    ++count;
2149
19
                } else {
2150
3
                    break;
2151
3
                }
2152
22
            }
2153
4
            return count;
2154
2
        }
2155
2
        case SET: {
2156
2
            int64_t count = 0;
2157
2158
2
            std::vector<uint64_t> values(_set.begin(), _set.end());
2159
2
            std::sort(values.begin(), values.end());
2160
6
            for (auto it = values.begin(); it != values.end(); ++it) {
2161
4
                if (*it < range_start) {
2162
0
                    continue;
2163
0
                }
2164
4
                if (count < cardinality_limit) {
2165
4
                    ret_bitmap->add(*it);
2166
4
                    ++count;
2167
4
                } else {
2168
0
                    break;
2169
0
                }
2170
4
            }
2171
2
            return count;
2172
2
        }
2173
9
        }
2174
0
        return 0;
2175
9
    }
2176
2177
    /**
2178
     * Returns the bitmap elements, starting from the offset position.
2179
     * The number of returned elements is limited by the cardinality_limit parameter.
2180
     * Analog of the substring string function, but for bitmap.
2181
     */
2182
8
    int64_t offset_limit(const int64_t& offset, const int64_t& limit, BitmapValue* ret_bitmap) {
2183
8
        switch (_type) {
2184
1
        case EMPTY:
2185
1
            return 0;
2186
2
        case SINGLE: {
2187
            //only single value, so offset must start 0
2188
2
            if (offset == 0) {
2189
1
                ret_bitmap->add(_sv);
2190
1
                return 1;
2191
1
            } else {
2192
1
                return 0;
2193
1
            }
2194
2
        }
2195
5
        default:
2196
5
            break;
2197
8
        }
2198
5
        if (_type == BITMAP) {
2199
3
            if (std::abs(offset) >= _bitmap->cardinality()) {
2200
0
                return 0;
2201
0
            }
2202
3
            int64_t abs_offset = offset;
2203
3
            if (offset < 0) {
2204
0
                abs_offset = _bitmap->cardinality() + offset;
2205
0
            }
2206
2207
3
            int64_t count = 0;
2208
3
            int64_t offset_count = 0;
2209
3
            auto it = _bitmap->begin();
2210
123
            for (; it != _bitmap->end() && offset_count < abs_offset; ++it) {
2211
120
                ++offset_count;
2212
120
            }
2213
33
            for (; it != _bitmap->end() && count < limit; ++it, ++count) {
2214
30
                ret_bitmap->add(*it);
2215
30
            }
2216
3
            return count;
2217
3
        } else {
2218
2
            if (std::abs(offset) > _set.size()) {
2219
0
                return 0;
2220
0
            }
2221
2222
2
            int64_t abs_offset = offset;
2223
2
            if (offset < 0) {
2224
0
                abs_offset = _set.size() + offset;
2225
0
            }
2226
2227
2
            std::vector<uint64_t> values(_set.begin(), _set.end());
2228
2
            std::sort(values.begin(), values.end());
2229
2230
2
            int64_t count = 0;
2231
2
            size_t index = 0;
2232
4
            for (auto v : values) {
2233
4
                if (index < abs_offset) {
2234
1
                    ++index;
2235
1
                    continue;
2236
1
                }
2237
3
                if (count == limit || index == values.size()) {
2238
0
                    break;
2239
0
                }
2240
3
                ++count;
2241
3
                ++index;
2242
3
                ret_bitmap->add(v);
2243
3
            }
2244
2
            return count;
2245
2
        }
2246
5
    }
2247
2248
    //for function bitmap_to_array
2249
3
    void to_array(vectorized::PaddedPODArray<int64_t>& data) const {
2250
3
        switch (_type) {
2251
0
        case EMPTY:
2252
0
            break;
2253
1
        case SINGLE: {
2254
1
            data.emplace_back(_sv);
2255
1
            break;
2256
0
        }
2257
1
        case BITMAP: {
2258
129
            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
2259
128
                data.emplace_back(*it);
2260
128
            }
2261
1
            break;
2262
0
        }
2263
1
        case SET: {
2264
1
            std::vector<uint64_t> values(_set.begin(), _set.end());
2265
1
            std::sort(values.begin(), values.end());
2266
31
            for (auto v : values) {
2267
31
                data.emplace_back(v);
2268
31
            }
2269
1
            break;
2270
0
        }
2271
3
        }
2272
3
    }
2273
2274
130
    void reset() {
2275
130
        _type = EMPTY;
2276
130
        _sv = 0;
2277
130
        _set.clear();
2278
130
        _is_shared = false;
2279
130
        _bitmap = nullptr;
2280
130
    }
2281
    // Implement an iterator for convenience
2282
    friend class BitmapValueIterator;
2283
    typedef BitmapValueIterator b_iterator;
2284
2285
    b_iterator begin() const;
2286
    b_iterator end() const;
2287
    b_iterator lower_bound(uint64_t val) const;
2288
2289
private:
2290
41
    void _convert_to_smaller_type() {
2291
41
        if (_type == BITMAP) {
2292
13
            uint64_t c = _bitmap->cardinality();
2293
13
            if (config::enable_set_in_bitmap_value && c > SET_TYPE_THRESHOLD) {
2294
7
                return;
2295
7
            } else if (c > 1) {
2296
4
                return;
2297
4
            }
2298
2
            if (c == 0) {
2299
1
                _type = EMPTY;
2300
1
            } else if (c == 1 && !config::enable_set_in_bitmap_value) {
2301
1
                _type = SINGLE;
2302
1
                _sv = _bitmap->minimum();
2303
1
            } else {
2304
0
                _type = SET;
2305
0
                for (auto v : *_bitmap) {
2306
0
                    _set.insert(v);
2307
0
                }
2308
0
            }
2309
2
            _bitmap.reset();
2310
28
        } else if (_type == SET) {
2311
28
            if (_set.size() == 1 && !config::enable_set_in_bitmap_value) {
2312
0
                _type = SINGLE;
2313
0
                _sv = *_set.begin();
2314
0
                _set.clear();
2315
0
            }
2316
28
        }
2317
41
    }
2318
2319
8
    uint64_t min_or_max(bool* empty, std::function<uint64_t()> func) const {
2320
8
        bool is_empty = false;
2321
8
        uint64_t result = 0;
2322
8
        switch (_type) {
2323
0
        case SINGLE:
2324
0
            result = _sv;
2325
0
            break;
2326
2
        case BITMAP:
2327
6
        case SET:
2328
6
            result = func();
2329
6
            break;
2330
2
        default:
2331
2
            is_empty = true;
2332
8
        }
2333
8
        if (empty) {
2334
8
            *empty = is_empty;
2335
8
        }
2336
8
        return result;
2337
8
    }
2338
2339
2.60M
    void _prepare_bitmap_for_write() {
2340
2.60M
        if (!_bitmap) {
2341
5.29k
            _bitmap = std::make_shared<detail::Roaring64Map>();
2342
5.29k
            _is_shared = false;
2343
5.29k
            return;
2344
5.29k
        }
2345
2346
2.60M
        if (!_is_shared) {
2347
            // the state is not shared, not need to check use count any more
2348
2.59M
            return;
2349
2.59M
        }
2350
2351
7.16k
        if (_bitmap.use_count() > 1) {
2352
1.02k
            auto new_one = std::make_shared<detail::Roaring64Map>();
2353
1.02k
            *new_one = *_bitmap;
2354
1.02k
            _bitmap = new_one;
2355
1.02k
        }
2356
7.16k
        _is_shared = false;
2357
7.16k
    }
2358
2359
32.3k
    void _convert_to_bitmap_if_need() {
2360
32.3k
        if (_type != SET || _set.size() <= SET_TYPE_THRESHOLD) {
2361
31.3k
            return;
2362
31.3k
        }
2363
999
        _prepare_bitmap_for_write();
2364
33.2k
        for (auto v : _set) {
2365
33.2k
            _bitmap->add(v);
2366
33.2k
        }
2367
999
        _type = BITMAP;
2368
999
        _set.clear();
2369
999
    }
2370
2371
    enum BitmapDataType {
2372
        EMPTY = 0,
2373
        SINGLE = 1, // single element
2374
        BITMAP = 2, // more than one elements
2375
        SET = 3     // elements count less or equal than 32
2376
    };
2377
    uint64_t _sv = 0;                              // store the single value when _type == SINGLE
2378
    std::shared_ptr<detail::Roaring64Map> _bitmap; // used when _type == BITMAP
2379
    SetContainer<uint64_t> _set;
2380
    BitmapDataType _type {EMPTY};
2381
    // Indicate whether the state is shared among multi BitmapValue object
2382
    bool _is_shared = true;
2383
    static constexpr uint64_t SET_TYPE_THRESHOLD = 32;
2384
};
2385
2386
// A simple implement of bitmap value iterator(Read only)
2387
// Usage:
2388
//  BitmapValueIterator iter = bitmap_value.begin();
2389
//  BitmapValueIterator end = bitmap_value.end();
2390
//  for (; iter != end(); ++iter) {
2391
//      uint64_t v = *iter;
2392
//      ... do something with "v" ...
2393
//  }
2394
class BitmapValueIterator {
2395
public:
2396
51
    BitmapValueIterator(const BitmapValue& bitmap, bool end = false) : _bitmap(bitmap), _end(end) {
2397
51
        switch (_bitmap._type) {
2398
8
        case BitmapValue::BitmapDataType::EMPTY:
2399
8
            _end = true;
2400
8
            break;
2401
8
        case BitmapValue::BitmapDataType::SINGLE:
2402
8
            _sv = _bitmap._sv;
2403
8
            break;
2404
27
        case BitmapValue::BitmapDataType::BITMAP:
2405
27
            _iter = new detail::Roaring64MapSetBitForwardIterator(*_bitmap._bitmap, _end);
2406
27
            break;
2407
8
        case BitmapValue::BitmapDataType::SET:
2408
8
            _set_iter = _end ? _bitmap._set.end() : _bitmap._set.begin();
2409
8
            break;
2410
0
        default:
2411
0
            CHECK(false) << _bitmap._type;
2412
51
        }
2413
51
    }
2414
2415
    BitmapValueIterator(const BitmapValueIterator& other)
2416
175
            : _bitmap(other._bitmap), _sv(other._sv), _end(other._end) {
2417
175
        _iter = other._iter ? new detail::Roaring64MapSetBitForwardIterator(*other._iter) : nullptr;
2418
175
        if (_bitmap._type == BitmapValue::BitmapDataType::SET) {
2419
33
            _set_iter = other._set_iter;
2420
33
        }
2421
175
    }
2422
2423
226
    ~BitmapValueIterator() {
2424
226
        if (_iter != nullptr) {
2425
164
            delete _iter;
2426
164
            _iter = nullptr;
2427
164
        }
2428
226
    }
2429
2430
653
    uint64_t operator*() const {
2431
653
        CHECK(!_end) << "should not get value of end iterator";
2432
653
        switch (_bitmap._type) {
2433
4
        case BitmapValue::BitmapDataType::SINGLE:
2434
4
            return _sv;
2435
524
        case BitmapValue::BitmapDataType::BITMAP:
2436
524
            return *(*_iter);
2437
125
        case BitmapValue::BitmapDataType::SET: {
2438
125
            return *_set_iter;
2439
0
        }
2440
0
        default:
2441
0
            CHECK(false) << _bitmap._type;
2442
653
        }
2443
0
        return 0;
2444
653
    }
2445
2446
486
    BitmapValueIterator& operator++() { // ++i, must returned inc. value
2447
486
        CHECK(!_end) << "should not forward when iterator ends";
2448
486
        switch (_bitmap._type) {
2449
3
        case BitmapValue::BitmapDataType::SINGLE:
2450
3
            _end = true;
2451
3
            break;
2452
389
        case BitmapValue::BitmapDataType::BITMAP:
2453
389
            ++(*_iter);
2454
389
            break;
2455
94
        case BitmapValue::BitmapDataType::SET:
2456
94
            ++_set_iter;
2457
94
            break;
2458
0
        default:
2459
0
            CHECK(false) << _bitmap._type;
2460
486
        }
2461
486
        return *this;
2462
486
    }
2463
2464
160
    BitmapValueIterator operator++(int) { // i++, must return orig. value
2465
160
        CHECK(!_end) << "should not forward when iterator ends";
2466
160
        BitmapValueIterator orig(*this);
2467
160
        switch (_bitmap._type) {
2468
1
        case BitmapValue::BitmapDataType::SINGLE:
2469
1
            _end = true;
2470
1
            break;
2471
128
        case BitmapValue::BitmapDataType::BITMAP:
2472
128
            ++(*_iter);
2473
128
            break;
2474
31
        case BitmapValue::BitmapDataType::SET:
2475
31
            ++_set_iter;
2476
31
            break;
2477
0
        default:
2478
0
            CHECK(false) << _bitmap._type;
2479
160
        }
2480
160
        return orig;
2481
160
    }
2482
2483
681
    bool operator==(const BitmapValueIterator& other) const {
2484
681
        if (_end && other._end) {
2485
13
            return true;
2486
13
        }
2487
2488
668
        switch (_bitmap._type) {
2489
0
        case BitmapValue::BitmapDataType::EMPTY:
2490
0
            return other._bitmap._type == BitmapValue::BitmapDataType::EMPTY;
2491
5
        case BitmapValue::BitmapDataType::SINGLE:
2492
5
            return _end == other._end && _sv == other._sv;
2493
531
        case BitmapValue::BitmapDataType::BITMAP:
2494
531
            return *_iter == *(other._iter);
2495
132
        case BitmapValue::BitmapDataType::SET:
2496
132
            return _set_iter == other._set_iter;
2497
0
        default:
2498
0
            CHECK(false) << _bitmap._type;
2499
668
        }
2500
0
        return false;
2501
668
    }
2502
2503
671
    bool operator!=(const BitmapValueIterator& other) const { return !(*this == other); }
2504
2505
    /**
2506
    * Move the iterator to the first value >= `val`.
2507
    */
2508
7
    BitmapValueIterator& move(uint64_t val) {
2509
7
        switch (_bitmap._type) {
2510
0
        case BitmapValue::BitmapDataType::SINGLE:
2511
0
            if (_sv < val) {
2512
0
                _end = true;
2513
0
            }
2514
0
            break;
2515
7
        case BitmapValue::BitmapDataType::BITMAP:
2516
7
            if (!_iter->move(val)) {
2517
0
                _end = true;
2518
0
            }
2519
7
            break;
2520
0
        case BitmapValue::BitmapDataType::SET: {
2521
0
            LOG(FATAL) << "BitmapValue with set do not support move";
2522
0
            break;
2523
0
        }
2524
0
        default:
2525
0
            break;
2526
7
        }
2527
7
        return *this;
2528
7
    }
2529
2530
private:
2531
    const BitmapValue& _bitmap;
2532
    detail::Roaring64MapSetBitForwardIterator* _iter = nullptr;
2533
    BitmapValue::SetContainer<uint64_t>::const_iterator _set_iter;
2534
    uint64_t _sv = 0;
2535
    bool _end = false;
2536
};
2537
2538
16
inline BitmapValueIterator BitmapValue::begin() const {
2539
16
    return {*this};
2540
16
}
2541
2542
28
inline BitmapValueIterator BitmapValue::end() const {
2543
28
    return {*this, true};
2544
28
}
2545
2546
7
inline BitmapValueIterator BitmapValue::lower_bound(uint64_t val) const {
2547
7
    return BitmapValueIterator(*this).move(val);
2548
7
}
2549
2550
7
inline bool BitmapValue::contains_any(uint64_t left, uint64_t right) const {
2551
7
    if (left > right) {
2552
0
        return false;
2553
0
    }
2554
2555
7
    if (_type == SET) {
2556
0
        for (auto v : _set) {
2557
0
            if (v >= left && v <= right) {
2558
0
                return true;
2559
0
            }
2560
0
        }
2561
0
        return false;
2562
0
    }
2563
7
    auto it = lower_bound(left);
2564
7
    return it != end() && *it <= right;
2565
7
}
2566
2567
} // namespace doris