Coverage Report

Created: 2026-04-14 12:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_bitmap.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionBitmap.h
19
// and modified by Doris
20
21
#include <absl/strings/numbers.h>
22
#include <absl/strings/str_split.h>
23
#include <glog/logging.h>
24
#include <stdint.h>
25
#include <string.h>
26
27
#include <algorithm>
28
#include <boost/iterator/iterator_facade.hpp>
29
#include <functional>
30
#include <memory>
31
#include <ostream>
32
#include <string>
33
#include <utility>
34
#include <vector>
35
36
#include "common/compiler_util.h" // IWYU pragma: keep
37
#include "common/status.h"
38
#include "core/assert_cast.h"
39
#include "core/block/block.h"
40
#include "core/block/column_numbers.h"
41
#include "core/block/column_with_type_and_name.h"
42
#include "core/column/column.h"
43
#include "core/column/column_array.h"
44
#include "core/column/column_complex.h"
45
#include "core/column/column_const.h"
46
#include "core/column/column_nullable.h"
47
#include "core/column/column_string.h"
48
#include "core/column/column_vector.h"
49
#include "core/data_type/data_type.h"
50
#include "core/data_type/data_type_array.h"
51
#include "core/data_type/data_type_bitmap.h"
52
#include "core/data_type/data_type_nullable.h"
53
#include "core/data_type/data_type_number.h"
54
#include "core/data_type/data_type_string.h"
55
#include "core/field.h"
56
#include "core/types.h"
57
#include "core/value/bitmap_value.h"
58
#include "exec/common/stringop_substring.h"
59
#include "exec/common/util.hpp"
60
#include "exprs/aggregate/aggregate_function.h"
61
#include "exprs/function/function.h"
62
#include "exprs/function/function_always_not_nullable.h"
63
#include "exprs/function/function_bitmap_min_or_max.h"
64
#include "exprs/function/function_const.h"
65
#include "exprs/function/function_helpers.h"
66
#include "exprs/function/function_totype.h"
67
#include "exprs/function/simple_function_factory.h"
68
#include "util/hash/murmur_hash3.h"
69
#include "util/hash_util.hpp"
70
#include "util/string_parser.hpp"
71
#include "util/url_coding.h"
72
73
namespace doris {
74
class FunctionContext;
75
} // namespace doris
76
77
namespace doris {
78
79
struct BitmapEmpty {
80
    static constexpr auto name = "bitmap_empty";
81
    using ReturnColVec = ColumnBitmap;
82
273
    static DataTypePtr get_return_type() { return std::make_shared<DataTypeBitMap>(); }
83
273
    static auto init_value() { return BitmapValue {}; }
84
};
85
86
struct ToBitmap {
87
    static constexpr auto name = "to_bitmap";
88
    using ReturnType = DataTypeBitMap;
89
90
    template <typename ColumnType>
91
1.10k
    static void vector(const ColumnType* col, MutableColumnPtr& col_res) {
92
1.10k
        execute<ColumnType, false>(col, nullptr, col_res);
93
1.10k
    }
_ZN5doris8ToBitmap6vectorINS_9ColumnStrIjEEEEvPKT_RNS_3COWINS_7IColumnEE11mutable_ptrIS8_EE
Line
Count
Source
91
12
    static void vector(const ColumnType* col, MutableColumnPtr& col_res) {
92
12
        execute<ColumnType, false>(col, nullptr, col_res);
93
12
    }
_ZN5doris8ToBitmap6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEEvPKT_RNS_3COWINS_7IColumnEE11mutable_ptrIS9_EE
Line
Count
Source
91
1.09k
    static void vector(const ColumnType* col, MutableColumnPtr& col_res) {
92
1.09k
        execute<ColumnType, false>(col, nullptr, col_res);
93
1.09k
    }
94
    template <typename ColumnType>
95
    static void vector_nullable(const ColumnType* col, const NullMap& nullmap,
96
574
                                MutableColumnPtr& col_res) {
97
574
        execute<ColumnType, true>(col, &nullmap, col_res);
98
574
    }
_ZN5doris8ToBitmap15vector_nullableINS_9ColumnStrIjEEEEvPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISF_EE
Line
Count
Source
96
49
                                MutableColumnPtr& col_res) {
97
49
        execute<ColumnType, true>(col, &nullmap, col_res);
98
49
    }
_ZN5doris8ToBitmap15vector_nullableINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEEvPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE
Line
Count
Source
96
525
                                MutableColumnPtr& col_res) {
97
525
        execute<ColumnType, true>(col, &nullmap, col_res);
98
525
    }
99
    template <typename ColumnType, bool arg_is_nullable>
100
1.67k
    static void execute(const ColumnType* col, const NullMap* nullmap, MutableColumnPtr& col_res) {
101
1.67k
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
102
61
            const ColumnString::Chars& data = col->get_chars();
103
61
            const ColumnString::Offsets& offsets = col->get_offsets();
104
105
61
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
106
61
            auto& res_data = res_column->get_data();
107
61
            size_t size = offsets.size();
108
109
3.56k
            for (size_t i = 0; i < size; ++i) {
110
3.49k
                if (arg_is_nullable && ((*nullmap)[i])) {
111
26
                    continue;
112
3.47k
                } else {
113
3.47k
                    const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
114
3.47k
                    int str_size = cast_set<int>(offsets[i] - offsets[i - 1]);
115
3.47k
                    StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
116
3.47k
                    uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
117
3.47k
                            raw_str, str_size, &parse_result);
118
3.47k
                    if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
119
3.26k
                        res_data[i].add(int_value);
120
3.26k
                    }
121
3.47k
                }
122
3.49k
            }
123
1.61k
        } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
124
1.61k
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
125
1.61k
            auto& res_data = res_column->get_data();
126
1.61k
            size_t size = col->size();
127
128
134k
            for (size_t i = 0; i < size; ++i) {
129
131k
                if constexpr (arg_is_nullable) {
130
2.33k
                    if ((*nullmap)[i]) {
131
1.21k
                        continue;
132
1.21k
                    }
133
2.33k
                }
134
131k
                if (auto value = col->get_data()[i]; value >= 0) {
135
128k
                    res_data[i].add(value);
136
128k
                }
137
131k
            }
138
1.61k
        }
139
1.67k
    }
_ZN5doris8ToBitmap7executeINS_9ColumnStrIjEELb1EEEvPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISF_EE
Line
Count
Source
100
49
    static void execute(const ColumnType* col, const NullMap* nullmap, MutableColumnPtr& col_res) {
101
49
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
102
49
            const ColumnString::Chars& data = col->get_chars();
103
49
            const ColumnString::Offsets& offsets = col->get_offsets();
104
105
49
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
106
49
            auto& res_data = res_column->get_data();
107
49
            size_t size = offsets.size();
108
109
3.52k
            for (size_t i = 0; i < size; ++i) {
110
3.47k
                if (arg_is_nullable && ((*nullmap)[i])) {
111
26
                    continue;
112
3.44k
                } else {
113
3.44k
                    const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
114
3.44k
                    int str_size = cast_set<int>(offsets[i] - offsets[i - 1]);
115
3.44k
                    StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
116
3.44k
                    uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
117
3.44k
                            raw_str, str_size, &parse_result);
118
3.44k
                    if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
119
3.26k
                        res_data[i].add(int_value);
120
3.26k
                    }
121
3.44k
                }
122
3.47k
            }
123
        } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
124
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
125
            auto& res_data = res_column->get_data();
126
            size_t size = col->size();
127
128
            for (size_t i = 0; i < size; ++i) {
129
                if constexpr (arg_is_nullable) {
130
                    if ((*nullmap)[i]) {
131
                        continue;
132
                    }
133
                }
134
                if (auto value = col->get_data()[i]; value >= 0) {
135
                    res_data[i].add(value);
136
                }
137
            }
138
        }
139
49
    }
_ZN5doris8ToBitmap7executeINS_12ColumnVectorILNS_13PrimitiveTypeE6EEELb1EEEvPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE
Line
Count
Source
100
525
    static void execute(const ColumnType* col, const NullMap* nullmap, MutableColumnPtr& col_res) {
101
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
102
            const ColumnString::Chars& data = col->get_chars();
103
            const ColumnString::Offsets& offsets = col->get_offsets();
104
105
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
106
            auto& res_data = res_column->get_data();
107
            size_t size = offsets.size();
108
109
            for (size_t i = 0; i < size; ++i) {
110
                if (arg_is_nullable && ((*nullmap)[i])) {
111
                    continue;
112
                } else {
113
                    const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
114
                    int str_size = cast_set<int>(offsets[i] - offsets[i - 1]);
115
                    StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
116
                    uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
117
                            raw_str, str_size, &parse_result);
118
                    if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
119
                        res_data[i].add(int_value);
120
                    }
121
                }
122
            }
123
525
        } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
124
525
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
125
525
            auto& res_data = res_column->get_data();
126
525
            size_t size = col->size();
127
128
4.07k
            for (size_t i = 0; i < size; ++i) {
129
2.33k
                if constexpr (arg_is_nullable) {
130
2.33k
                    if ((*nullmap)[i]) {
131
1.21k
                        continue;
132
1.21k
                    }
133
2.33k
                }
134
2.33k
                if (auto value = col->get_data()[i]; value >= 0) {
135
786
                    res_data[i].add(value);
136
786
                }
137
2.33k
            }
138
525
        }
139
525
    }
_ZN5doris8ToBitmap7executeINS_9ColumnStrIjEELb0EEEvPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISF_EE
Line
Count
Source
100
12
    static void execute(const ColumnType* col, const NullMap* nullmap, MutableColumnPtr& col_res) {
101
12
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
102
12
            const ColumnString::Chars& data = col->get_chars();
103
12
            const ColumnString::Offsets& offsets = col->get_offsets();
104
105
12
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
106
12
            auto& res_data = res_column->get_data();
107
12
            size_t size = offsets.size();
108
109
40
            for (size_t i = 0; i < size; ++i) {
110
28
                if (arg_is_nullable && ((*nullmap)[i])) {
111
0
                    continue;
112
28
                } else {
113
28
                    const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
114
28
                    int str_size = cast_set<int>(offsets[i] - offsets[i - 1]);
115
28
                    StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
116
28
                    uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
117
28
                            raw_str, str_size, &parse_result);
118
28
                    if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
119
0
                        res_data[i].add(int_value);
120
0
                    }
121
28
                }
122
28
            }
123
        } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
124
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
125
            auto& res_data = res_column->get_data();
126
            size_t size = col->size();
127
128
            for (size_t i = 0; i < size; ++i) {
129
                if constexpr (arg_is_nullable) {
130
                    if ((*nullmap)[i]) {
131
                        continue;
132
                    }
133
                }
134
                if (auto value = col->get_data()[i]; value >= 0) {
135
                    res_data[i].add(value);
136
                }
137
            }
138
        }
139
12
    }
_ZN5doris8ToBitmap7executeINS_12ColumnVectorILNS_13PrimitiveTypeE6EEELb0EEEvPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE
Line
Count
Source
100
1.09k
    static void execute(const ColumnType* col, const NullMap* nullmap, MutableColumnPtr& col_res) {
101
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
102
            const ColumnString::Chars& data = col->get_chars();
103
            const ColumnString::Offsets& offsets = col->get_offsets();
104
105
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
106
            auto& res_data = res_column->get_data();
107
            size_t size = offsets.size();
108
109
            for (size_t i = 0; i < size; ++i) {
110
                if (arg_is_nullable && ((*nullmap)[i])) {
111
                    continue;
112
                } else {
113
                    const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
114
                    int str_size = cast_set<int>(offsets[i] - offsets[i - 1]);
115
                    StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
116
                    uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
117
                            raw_str, str_size, &parse_result);
118
                    if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
119
                        res_data[i].add(int_value);
120
                    }
121
                }
122
            }
123
1.09k
        } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
124
1.09k
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
125
1.09k
            auto& res_data = res_column->get_data();
126
1.09k
            size_t size = col->size();
127
128
130k
            for (size_t i = 0; i < size; ++i) {
129
                if constexpr (arg_is_nullable) {
130
                    if ((*nullmap)[i]) {
131
                        continue;
132
                    }
133
                }
134
129k
                if (auto value = col->get_data()[i]; value >= 0) {
135
127k
                    res_data[i].add(value);
136
127k
                }
137
129k
            }
138
1.09k
        }
139
1.09k
    }
140
};
141
142
struct ToBitmapWithCheck {
143
    static constexpr auto name = "to_bitmap_with_check";
144
    using ReturnType = DataTypeBitMap;
145
146
    template <typename ColumnType>
147
49
    static Status vector(const ColumnType* col, MutableColumnPtr& col_res) {
148
49
        return execute<ColumnType, false>(col, nullptr, col_res);
149
49
    }
Unexecuted instantiation: _ZN5doris17ToBitmapWithCheck6vectorINS_9ColumnStrIjEEEENS_6StatusEPKT_RNS_3COWINS_7IColumnEE11mutable_ptrIS9_EE
_ZN5doris17ToBitmapWithCheck6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEENS_6StatusEPKT_RNS_3COWINS_7IColumnEE11mutable_ptrISA_EE
Line
Count
Source
147
49
    static Status vector(const ColumnType* col, MutableColumnPtr& col_res) {
148
49
        return execute<ColumnType, false>(col, nullptr, col_res);
149
49
    }
150
    template <typename ColumnType>
151
    static Status vector_nullable(const ColumnType* col, const NullMap& nullmap,
152
46
                                  MutableColumnPtr& col_res) {
153
46
        return execute<ColumnType, true>(col, &nullmap, col_res);
154
46
    }
Unexecuted instantiation: _ZN5doris17ToBitmapWithCheck15vector_nullableINS_9ColumnStrIjEEEENS_6StatusEPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE
_ZN5doris17ToBitmapWithCheck15vector_nullableINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEENS_6StatusEPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISH_EE
Line
Count
Source
152
46
                                  MutableColumnPtr& col_res) {
153
46
        return execute<ColumnType, true>(col, &nullmap, col_res);
154
46
    }
155
    template <typename ColumnType, bool arg_is_nullable>
156
    static Status execute(const ColumnType* col, const NullMap* nullmap,
157
95
                          MutableColumnPtr& col_res) {
158
95
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
159
0
            const ColumnString::Chars& data = col->get_chars();
160
0
            const ColumnString::Offsets& offsets = col->get_offsets();
161
0
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
162
0
            auto& res_data = res_column->get_data();
163
0
            size_t size = offsets.size();
164
165
0
            for (size_t i = 0; i < size; ++i) {
166
0
                if (arg_is_nullable && ((*nullmap)[i])) {
167
0
                    continue;
168
0
                } else {
169
0
                    const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
170
                    // The string lenght is less than 2G, so that cast the str size to int, not use size_t
171
0
                    int str_size = cast_set<int>(offsets[i] - offsets[i - 1]);
172
0
                    StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
173
0
                    uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
174
0
                            raw_str, str_size, &parse_result);
175
0
                    if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
176
0
                        res_data[i].add(int_value);
177
0
                    } else {
178
0
                        return Status::InvalidArgument(
179
0
                                "The input: {} is not valid, to_bitmap only support bigint value "
180
0
                                "from 0 to 18446744073709551615 currently, cannot create MV with "
181
0
                                "to_bitmap on column with negative values or cannot load negative "
182
0
                                "values to column with to_bitmap MV on it.",
183
0
                                std::string(raw_str, str_size));
184
0
                    }
185
0
                }
186
0
            }
187
95
        } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
188
95
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
189
95
            auto& res_data = res_column->get_data();
190
95
            size_t size = col->size();
191
192
213
            for (size_t i = 0; i < size; ++i) {
193
120
                if (arg_is_nullable && ((*nullmap)[i])) {
194
21
                    continue;
195
99
                } else {
196
99
                    int64_t int_value = col->get_data()[i];
197
99
                    if (LIKELY(int_value >= 0)) {
198
97
                        res_data[i].add(int_value);
199
97
                    } else {
200
2
                        return Status::InvalidArgument(
201
2
                                "The input: {} is not valid, to_bitmap only support bigint value "
202
2
                                "from 0 to 18446744073709551615 currently, cannot create MV with "
203
2
                                "to_bitmap on column with negative values or cannot load negative "
204
2
                                "values to column with to_bitmap MV on it.",
205
2
                                int_value);
206
2
                    }
207
99
                }
208
120
            }
209
        } else {
210
            return Status::InvalidArgument("not support type");
211
        }
212
93
        return Status::OK();
213
95
    }
Unexecuted instantiation: _ZN5doris17ToBitmapWithCheck7executeINS_9ColumnStrIjEELb1EEENS_6StatusEPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE
_ZN5doris17ToBitmapWithCheck7executeINS_12ColumnVectorILNS_13PrimitiveTypeE6EEELb1EEENS_6StatusEPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISH_EE
Line
Count
Source
157
46
                          MutableColumnPtr& col_res) {
158
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
159
            const ColumnString::Chars& data = col->get_chars();
160
            const ColumnString::Offsets& offsets = col->get_offsets();
161
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
162
            auto& res_data = res_column->get_data();
163
            size_t size = offsets.size();
164
165
            for (size_t i = 0; i < size; ++i) {
166
                if (arg_is_nullable && ((*nullmap)[i])) {
167
                    continue;
168
                } else {
169
                    const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
170
                    // The string lenght is less than 2G, so that cast the str size to int, not use size_t
171
                    int str_size = cast_set<int>(offsets[i] - offsets[i - 1]);
172
                    StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
173
                    uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
174
                            raw_str, str_size, &parse_result);
175
                    if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
176
                        res_data[i].add(int_value);
177
                    } else {
178
                        return Status::InvalidArgument(
179
                                "The input: {} is not valid, to_bitmap only support bigint value "
180
                                "from 0 to 18446744073709551615 currently, cannot create MV with "
181
                                "to_bitmap on column with negative values or cannot load negative "
182
                                "values to column with to_bitmap MV on it.",
183
                                std::string(raw_str, str_size));
184
                    }
185
                }
186
            }
187
46
        } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
188
46
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
189
46
            auto& res_data = res_column->get_data();
190
46
            size_t size = col->size();
191
192
110
            for (size_t i = 0; i < size; ++i) {
193
63
                if (arg_is_nullable && ((*nullmap)[i])) {
194
21
                    continue;
195
42
                } else {
196
42
                    int64_t int_value = col->get_data()[i];
197
43
                    if (LIKELY(int_value >= 0)) {
198
43
                        res_data[i].add(int_value);
199
18.4E
                    } else {
200
18.4E
                        return Status::InvalidArgument(
201
18.4E
                                "The input: {} is not valid, to_bitmap only support bigint value "
202
18.4E
                                "from 0 to 18446744073709551615 currently, cannot create MV with "
203
18.4E
                                "to_bitmap on column with negative values or cannot load negative "
204
18.4E
                                "values to column with to_bitmap MV on it.",
205
18.4E
                                int_value);
206
18.4E
                    }
207
42
                }
208
63
            }
209
        } else {
210
            return Status::InvalidArgument("not support type");
211
        }
212
47
        return Status::OK();
213
46
    }
Unexecuted instantiation: _ZN5doris17ToBitmapWithCheck7executeINS_9ColumnStrIjEELb0EEENS_6StatusEPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE
_ZN5doris17ToBitmapWithCheck7executeINS_12ColumnVectorILNS_13PrimitiveTypeE6EEELb0EEENS_6StatusEPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISH_EE
Line
Count
Source
157
49
                          MutableColumnPtr& col_res) {
158
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
159
            const ColumnString::Chars& data = col->get_chars();
160
            const ColumnString::Offsets& offsets = col->get_offsets();
161
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
162
            auto& res_data = res_column->get_data();
163
            size_t size = offsets.size();
164
165
            for (size_t i = 0; i < size; ++i) {
166
                if (arg_is_nullable && ((*nullmap)[i])) {
167
                    continue;
168
                } else {
169
                    const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
170
                    // The string lenght is less than 2G, so that cast the str size to int, not use size_t
171
                    int str_size = cast_set<int>(offsets[i] - offsets[i - 1]);
172
                    StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
173
                    uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
174
                            raw_str, str_size, &parse_result);
175
                    if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
176
                        res_data[i].add(int_value);
177
                    } else {
178
                        return Status::InvalidArgument(
179
                                "The input: {} is not valid, to_bitmap only support bigint value "
180
                                "from 0 to 18446744073709551615 currently, cannot create MV with "
181
                                "to_bitmap on column with negative values or cannot load negative "
182
                                "values to column with to_bitmap MV on it.",
183
                                std::string(raw_str, str_size));
184
                    }
185
                }
186
            }
187
49
        } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
188
49
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
189
49
            auto& res_data = res_column->get_data();
190
49
            size_t size = col->size();
191
192
103
            for (size_t i = 0; i < size; ++i) {
193
57
                if (arg_is_nullable && ((*nullmap)[i])) {
194
0
                    continue;
195
57
                } else {
196
57
                    int64_t int_value = col->get_data()[i];
197
57
                    if (LIKELY(int_value >= 0)) {
198
54
                        res_data[i].add(int_value);
199
54
                    } else {
200
3
                        return Status::InvalidArgument(
201
3
                                "The input: {} is not valid, to_bitmap only support bigint value "
202
3
                                "from 0 to 18446744073709551615 currently, cannot create MV with "
203
3
                                "to_bitmap on column with negative values or cannot load negative "
204
3
                                "values to column with to_bitmap MV on it.",
205
3
                                int_value);
206
3
                    }
207
57
                }
208
57
            }
209
        } else {
210
            return Status::InvalidArgument("not support type");
211
        }
212
46
        return Status::OK();
213
49
    }
214
};
215
216
struct BitmapFromString {
217
    using ArgumentType = DataTypeString;
218
219
    static constexpr auto name = "bitmap_from_string";
220
221
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
222
                         std::vector<BitmapValue>& res, NullMap& null_map,
223
278
                         size_t input_rows_count) {
224
278
        res.reserve(input_rows_count);
225
278
        std::vector<uint64_t> bits;
226
278
        if (offsets.size() == 0 && input_rows_count == 1) {
227
            // For NULL constant
228
0
            res.emplace_back();
229
0
            null_map[0] = 1;
230
0
            return Status::OK();
231
0
        }
232
233
367
        auto split_and_parse = [&bits](const char* raw_str, size_t str_size) {
234
367
            bits.clear();
235
367
            auto res = absl::StrSplit(std::string_view {raw_str, str_size}, ",", absl::SkipEmpty());
236
367
            uint64_t value = 0;
237
1.28k
            for (auto s : res) {
238
1.28k
                if (!absl::SimpleAtoi(s, &value)) {
239
66
                    return false;
240
66
                }
241
1.21k
                bits.push_back(value);
242
1.21k
            }
243
301
            return true;
244
367
        };
245
246
        // split by comma
247
248
645
        for (size_t i = 0; i < input_rows_count; ++i) {
249
367
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
250
367
            int64_t str_size = offsets[i] - offsets[i - 1];
251
252
367
            if ((str_size > INT32_MAX) || !split_and_parse(raw_str, str_size)) {
253
66
                res.emplace_back();
254
66
                null_map[i] = 1;
255
66
                continue;
256
66
            }
257
301
            res.emplace_back(bits);
258
301
        }
259
278
        return Status::OK();
260
278
    }
261
};
262
263
struct NameBitmapFromBase64 {
264
    static constexpr auto name = "bitmap_from_base64";
265
};
266
struct BitmapFromBase64 {
267
    using ArgumentType = DataTypeString;
268
269
    static constexpr auto name = "bitmap_from_base64";
270
271
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
272
                         std::vector<BitmapValue>& res, NullMap& null_map,
273
18
                         size_t input_rows_count) {
274
18
        res.reserve(input_rows_count);
275
18
        if (offsets.size() == 0 && input_rows_count == 1) {
276
            // For NULL constant
277
0
            res.emplace_back();
278
0
            null_map[0] = 1;
279
0
            return Status::OK();
280
0
        }
281
18
        std::string decode_buff;
282
18
        size_t last_decode_buff_len = 0;
283
18
        size_t curr_decode_buff_len = 0;
284
58
        for (size_t i = 0; i < input_rows_count; ++i) {
285
41
            const char* src_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
286
41
            size_t src_size = offsets[i] - offsets[i - 1];
287
41
            if (0 != src_size % 4) {
288
                // return Status::InvalidArgument(
289
                //         fmt::format("invalid base64: {}", std::string(src_str, src_size)));
290
0
                res.emplace_back();
291
0
                null_map[i] = 1;
292
0
                continue;
293
0
            }
294
41
            curr_decode_buff_len = src_size + 3;
295
41
            if (curr_decode_buff_len > last_decode_buff_len) {
296
27
                decode_buff.resize(curr_decode_buff_len);
297
27
                last_decode_buff_len = curr_decode_buff_len;
298
27
            }
299
41
            auto outlen = base64_decode(src_str, src_size, decode_buff.data());
300
41
            if (outlen < 0) {
301
0
                res.emplace_back();
302
0
                null_map[i] = 1;
303
41
            } else {
304
41
                BitmapValue bitmap_val;
305
41
                if (!bitmap_val.deserialize(decode_buff.data())) {
306
1
                    return Status::RuntimeError("bitmap_from_base64 decode failed: base64: {}",
307
1
                                                std::string(src_str, src_size));
308
1
                }
309
40
                res.emplace_back(std::move(bitmap_val));
310
40
            }
311
41
        }
312
17
        return Status::OK();
313
18
    }
314
};
315
struct BitmapFromArray {
316
    using ArgumentType = DataTypeArray;
317
    static constexpr auto name = "bitmap_from_array";
318
319
    template <typename ColumnType>
320
    static Status vector(const ColumnArray::Offsets64& offset_column_data,
321
                         const IColumn& nested_column, const NullMap& nested_null_map,
322
19
                         std::vector<BitmapValue>& res, NullMap& null_map) {
323
19
        const auto& nested_column_data = static_cast<const ColumnType&>(nested_column).get_data();
324
19
        auto size = offset_column_data.size();
325
19
        res.reserve(size);
326
19
        std::vector<uint64_t> bits;
327
48
        for (size_t i = 0; i < size; ++i) {
328
29
            auto curr_offset = offset_column_data[i];
329
29
            auto prev_offset = offset_column_data[i - 1];
330
134
            for (auto j = prev_offset; j < curr_offset; ++j) {
331
107
                auto data = nested_column_data[j];
332
                // invaild value
333
107
                if (UNLIKELY(data < 0) || UNLIKELY(nested_null_map[j])) {
334
2
                    res.emplace_back();
335
2
                    null_map[i] = 1;
336
2
                    break;
337
105
                } else {
338
105
                    bits.push_back(data);
339
105
                }
340
107
            }
341
            //input is valid value
342
29
            if (!null_map[i]) {
343
27
                res.emplace_back(bits);
344
27
            }
345
29
            bits.clear();
346
29
        }
347
19
        return Status::OK();
348
19
    }
_ZN5doris15BitmapFromArray6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE3EEEEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_7IColumnERKNS6_IhLm4096ES9_Lm16ELm15EEERSt6vectorINS_11BitmapValueESaISK_EERSG_
Line
Count
Source
322
3
                         std::vector<BitmapValue>& res, NullMap& null_map) {
323
3
        const auto& nested_column_data = static_cast<const ColumnType&>(nested_column).get_data();
324
3
        auto size = offset_column_data.size();
325
3
        res.reserve(size);
326
3
        std::vector<uint64_t> bits;
327
6
        for (size_t i = 0; i < size; ++i) {
328
3
            auto curr_offset = offset_column_data[i];
329
3
            auto prev_offset = offset_column_data[i - 1];
330
16
            for (auto j = prev_offset; j < curr_offset; ++j) {
331
13
                auto data = nested_column_data[j];
332
                // invaild value
333
13
                if (UNLIKELY(data < 0) || UNLIKELY(nested_null_map[j])) {
334
0
                    res.emplace_back();
335
0
                    null_map[i] = 1;
336
0
                    break;
337
13
                } else {
338
13
                    bits.push_back(data);
339
13
                }
340
13
            }
341
            //input is valid value
342
3
            if (!null_map[i]) {
343
3
                res.emplace_back(bits);
344
3
            }
345
3
            bits.clear();
346
3
        }
347
3
        return Status::OK();
348
3
    }
Unexecuted instantiation: _ZN5doris15BitmapFromArray6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE2EEEEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_7IColumnERKNS6_IhLm4096ES9_Lm16ELm15EEERSt6vectorINS_11BitmapValueESaISK_EERSG_
Unexecuted instantiation: _ZN5doris15BitmapFromArray6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE4EEEEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_7IColumnERKNS6_IhLm4096ES9_Lm16ELm15EEERSt6vectorINS_11BitmapValueESaISK_EERSG_
_ZN5doris15BitmapFromArray6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE5EEEEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_7IColumnERKNS6_IhLm4096ES9_Lm16ELm15EEERSt6vectorINS_11BitmapValueESaISK_EERSG_
Line
Count
Source
322
9
                         std::vector<BitmapValue>& res, NullMap& null_map) {
323
9
        const auto& nested_column_data = static_cast<const ColumnType&>(nested_column).get_data();
324
9
        auto size = offset_column_data.size();
325
9
        res.reserve(size);
326
9
        std::vector<uint64_t> bits;
327
26
        for (size_t i = 0; i < size; ++i) {
328
17
            auto curr_offset = offset_column_data[i];
329
17
            auto prev_offset = offset_column_data[i - 1];
330
98
            for (auto j = prev_offset; j < curr_offset; ++j) {
331
83
                auto data = nested_column_data[j];
332
                // invaild value
333
83
                if (UNLIKELY(data < 0) || UNLIKELY(nested_null_map[j])) {
334
2
                    res.emplace_back();
335
2
                    null_map[i] = 1;
336
2
                    break;
337
81
                } else {
338
81
                    bits.push_back(data);
339
81
                }
340
83
            }
341
            //input is valid value
342
17
            if (!null_map[i]) {
343
15
                res.emplace_back(bits);
344
15
            }
345
17
            bits.clear();
346
17
        }
347
9
        return Status::OK();
348
9
    }
_ZN5doris15BitmapFromArray6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_7IColumnERKNS6_IhLm4096ES9_Lm16ELm15EEERSt6vectorINS_11BitmapValueESaISK_EERSG_
Line
Count
Source
322
7
                         std::vector<BitmapValue>& res, NullMap& null_map) {
323
7
        const auto& nested_column_data = static_cast<const ColumnType&>(nested_column).get_data();
324
7
        auto size = offset_column_data.size();
325
7
        res.reserve(size);
326
7
        std::vector<uint64_t> bits;
327
16
        for (size_t i = 0; i < size; ++i) {
328
9
            auto curr_offset = offset_column_data[i];
329
9
            auto prev_offset = offset_column_data[i - 1];
330
20
            for (auto j = prev_offset; j < curr_offset; ++j) {
331
11
                auto data = nested_column_data[j];
332
                // invaild value
333
11
                if (UNLIKELY(data < 0) || UNLIKELY(nested_null_map[j])) {
334
0
                    res.emplace_back();
335
0
                    null_map[i] = 1;
336
0
                    break;
337
11
                } else {
338
11
                    bits.push_back(data);
339
11
                }
340
11
            }
341
            //input is valid value
342
9
            if (!null_map[i]) {
343
9
                res.emplace_back(bits);
344
9
            }
345
9
            bits.clear();
346
9
        }
347
7
        return Status::OK();
348
7
    }
349
};
350
351
template <typename Impl>
352
class FunctionBitmapAlwaysNull : public IFunction {
353
public:
354
    static constexpr auto name = Impl::name;
355
356
3
    String get_name() const override { return name; }
_ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromStringEE8get_nameB5cxx11Ev
Line
Count
Source
356
1
    String get_name() const override { return name; }
_ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromBase64EE8get_nameB5cxx11Ev
Line
Count
Source
356
1
    String get_name() const override { return name; }
_ZNK5doris24FunctionBitmapAlwaysNullINS_15BitmapFromArrayEE8get_nameB5cxx11Ev
Line
Count
Source
356
1
    String get_name() const override { return name; }
357
358
328
    static FunctionPtr create() { return std::make_shared<FunctionBitmapAlwaysNull>(); }
_ZN5doris24FunctionBitmapAlwaysNullINS_16BitmapFromStringEE6createEv
Line
Count
Source
358
273
    static FunctionPtr create() { return std::make_shared<FunctionBitmapAlwaysNull>(); }
_ZN5doris24FunctionBitmapAlwaysNullINS_16BitmapFromBase64EE6createEv
Line
Count
Source
358
26
    static FunctionPtr create() { return std::make_shared<FunctionBitmapAlwaysNull>(); }
_ZN5doris24FunctionBitmapAlwaysNullINS_15BitmapFromArrayEE6createEv
Line
Count
Source
358
29
    static FunctionPtr create() { return std::make_shared<FunctionBitmapAlwaysNull>(); }
359
360
301
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
361
301
        return make_nullable(std::make_shared<DataTypeBitMap>());
362
301
    }
_ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromStringEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
360
264
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
361
264
        return make_nullable(std::make_shared<DataTypeBitMap>());
362
264
    }
_ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromBase64EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
360
17
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
361
17
        return make_nullable(std::make_shared<DataTypeBitMap>());
362
17
    }
_ZNK5doris24FunctionBitmapAlwaysNullINS_15BitmapFromArrayEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
360
20
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
361
20
        return make_nullable(std::make_shared<DataTypeBitMap>());
362
20
    }
363
364
301
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromStringEE23get_number_of_argumentsEv
Line
Count
Source
364
264
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromBase64EE23get_number_of_argumentsEv
Line
Count
Source
364
17
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris24FunctionBitmapAlwaysNullINS_15BitmapFromArrayEE23get_number_of_argumentsEv
Line
Count
Source
364
20
    size_t get_number_of_arguments() const override { return 1; }
365
366
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
367
315
                        uint32_t result, size_t input_rows_count) const override {
368
315
        auto res_null_map = ColumnUInt8::create(input_rows_count, 0);
369
315
        auto res_data_column = ColumnBitmap::create();
370
315
        auto& null_map = res_null_map->get_data();
371
315
        auto& res = res_data_column->get_data();
372
373
315
        ColumnPtr& argument_column = block.get_by_position(arguments[0]).column;
374
315
        if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeString>) {
375
296
            const auto& str_column = static_cast<const ColumnString&>(*argument_column);
376
296
            const ColumnString::Chars& data = str_column.get_chars();
377
296
            const ColumnString::Offsets& offsets = str_column.get_offsets();
378
296
            RETURN_IF_ERROR(Impl::vector(data, offsets, res, null_map, input_rows_count));
379
296
        } else if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeArray>) {
380
19
            auto argument_type = remove_nullable(
381
19
                    assert_cast<const DataTypeArray&>(*block.get_by_position(arguments[0]).type)
382
19
                            .get_nested_type());
383
19
            const auto& array_column = static_cast<const ColumnArray&>(*argument_column);
384
19
            const auto& offset_column_data = array_column.get_offsets();
385
19
            const auto& nested_nullable_column =
386
19
                    static_cast<const ColumnNullable&>(array_column.get_data());
387
19
            const auto& nested_column = nested_nullable_column.get_nested_column();
388
19
            const auto& nested_null_map = nested_nullable_column.get_null_map_column().get_data();
389
390
19
            switch (argument_type->get_primitive_type()) {
391
3
            case PrimitiveType::TYPE_TINYINT:
392
3
                RETURN_IF_ERROR(Impl::template vector<ColumnInt8>(offset_column_data, nested_column,
393
3
                                                                  nested_null_map, res, null_map));
394
3
                break;
395
3
            case PrimitiveType::TYPE_BOOLEAN:
396
0
                RETURN_IF_ERROR(Impl::template vector<ColumnUInt8>(
397
0
                        offset_column_data, nested_column, nested_null_map, res, null_map));
398
0
                break;
399
0
            case PrimitiveType::TYPE_SMALLINT:
400
0
                RETURN_IF_ERROR(Impl::template vector<ColumnInt16>(
401
0
                        offset_column_data, nested_column, nested_null_map, res, null_map));
402
0
                break;
403
9
            case PrimitiveType::TYPE_INT:
404
9
                RETURN_IF_ERROR(Impl::template vector<ColumnInt32>(
405
9
                        offset_column_data, nested_column, nested_null_map, res, null_map));
406
9
                break;
407
9
            case PrimitiveType::TYPE_BIGINT:
408
7
                RETURN_IF_ERROR(Impl::template vector<ColumnInt64>(
409
7
                        offset_column_data, nested_column, nested_null_map, res, null_map));
410
7
                break;
411
7
            default:
412
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
413
0
                                            block.get_by_position(arguments[0]).column->get_name(),
414
0
                                            get_name());
415
19
            }
416
        } else {
417
            return Status::RuntimeError("Illegal column {} of argument of function {}",
418
                                        block.get_by_position(arguments[0]).column->get_name(),
419
                                        get_name());
420
        }
421
314
        block.get_by_position(result).column =
422
315
                ColumnNullable::create(std::move(res_data_column), std::move(res_null_map));
423
315
        return Status::OK();
424
315
    }
_ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromStringEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
367
278
                        uint32_t result, size_t input_rows_count) const override {
368
278
        auto res_null_map = ColumnUInt8::create(input_rows_count, 0);
369
278
        auto res_data_column = ColumnBitmap::create();
370
278
        auto& null_map = res_null_map->get_data();
371
278
        auto& res = res_data_column->get_data();
372
373
278
        ColumnPtr& argument_column = block.get_by_position(arguments[0]).column;
374
278
        if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeString>) {
375
278
            const auto& str_column = static_cast<const ColumnString&>(*argument_column);
376
278
            const ColumnString::Chars& data = str_column.get_chars();
377
278
            const ColumnString::Offsets& offsets = str_column.get_offsets();
378
278
            RETURN_IF_ERROR(Impl::vector(data, offsets, res, null_map, input_rows_count));
379
        } else if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeArray>) {
380
            auto argument_type = remove_nullable(
381
                    assert_cast<const DataTypeArray&>(*block.get_by_position(arguments[0]).type)
382
                            .get_nested_type());
383
            const auto& array_column = static_cast<const ColumnArray&>(*argument_column);
384
            const auto& offset_column_data = array_column.get_offsets();
385
            const auto& nested_nullable_column =
386
                    static_cast<const ColumnNullable&>(array_column.get_data());
387
            const auto& nested_column = nested_nullable_column.get_nested_column();
388
            const auto& nested_null_map = nested_nullable_column.get_null_map_column().get_data();
389
390
            switch (argument_type->get_primitive_type()) {
391
            case PrimitiveType::TYPE_TINYINT:
392
                RETURN_IF_ERROR(Impl::template vector<ColumnInt8>(offset_column_data, nested_column,
393
                                                                  nested_null_map, res, null_map));
394
                break;
395
            case PrimitiveType::TYPE_BOOLEAN:
396
                RETURN_IF_ERROR(Impl::template vector<ColumnUInt8>(
397
                        offset_column_data, nested_column, nested_null_map, res, null_map));
398
                break;
399
            case PrimitiveType::TYPE_SMALLINT:
400
                RETURN_IF_ERROR(Impl::template vector<ColumnInt16>(
401
                        offset_column_data, nested_column, nested_null_map, res, null_map));
402
                break;
403
            case PrimitiveType::TYPE_INT:
404
                RETURN_IF_ERROR(Impl::template vector<ColumnInt32>(
405
                        offset_column_data, nested_column, nested_null_map, res, null_map));
406
                break;
407
            case PrimitiveType::TYPE_BIGINT:
408
                RETURN_IF_ERROR(Impl::template vector<ColumnInt64>(
409
                        offset_column_data, nested_column, nested_null_map, res, null_map));
410
                break;
411
            default:
412
                return Status::RuntimeError("Illegal column {} of argument of function {}",
413
                                            block.get_by_position(arguments[0]).column->get_name(),
414
                                            get_name());
415
            }
416
        } else {
417
            return Status::RuntimeError("Illegal column {} of argument of function {}",
418
                                        block.get_by_position(arguments[0]).column->get_name(),
419
                                        get_name());
420
        }
421
278
        block.get_by_position(result).column =
422
278
                ColumnNullable::create(std::move(res_data_column), std::move(res_null_map));
423
278
        return Status::OK();
424
278
    }
_ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromBase64EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
367
18
                        uint32_t result, size_t input_rows_count) const override {
368
18
        auto res_null_map = ColumnUInt8::create(input_rows_count, 0);
369
18
        auto res_data_column = ColumnBitmap::create();
370
18
        auto& null_map = res_null_map->get_data();
371
18
        auto& res = res_data_column->get_data();
372
373
18
        ColumnPtr& argument_column = block.get_by_position(arguments[0]).column;
374
18
        if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeString>) {
375
18
            const auto& str_column = static_cast<const ColumnString&>(*argument_column);
376
18
            const ColumnString::Chars& data = str_column.get_chars();
377
18
            const ColumnString::Offsets& offsets = str_column.get_offsets();
378
18
            RETURN_IF_ERROR(Impl::vector(data, offsets, res, null_map, input_rows_count));
379
        } else if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeArray>) {
380
            auto argument_type = remove_nullable(
381
                    assert_cast<const DataTypeArray&>(*block.get_by_position(arguments[0]).type)
382
                            .get_nested_type());
383
            const auto& array_column = static_cast<const ColumnArray&>(*argument_column);
384
            const auto& offset_column_data = array_column.get_offsets();
385
            const auto& nested_nullable_column =
386
                    static_cast<const ColumnNullable&>(array_column.get_data());
387
            const auto& nested_column = nested_nullable_column.get_nested_column();
388
            const auto& nested_null_map = nested_nullable_column.get_null_map_column().get_data();
389
390
            switch (argument_type->get_primitive_type()) {
391
            case PrimitiveType::TYPE_TINYINT:
392
                RETURN_IF_ERROR(Impl::template vector<ColumnInt8>(offset_column_data, nested_column,
393
                                                                  nested_null_map, res, null_map));
394
                break;
395
            case PrimitiveType::TYPE_BOOLEAN:
396
                RETURN_IF_ERROR(Impl::template vector<ColumnUInt8>(
397
                        offset_column_data, nested_column, nested_null_map, res, null_map));
398
                break;
399
            case PrimitiveType::TYPE_SMALLINT:
400
                RETURN_IF_ERROR(Impl::template vector<ColumnInt16>(
401
                        offset_column_data, nested_column, nested_null_map, res, null_map));
402
                break;
403
            case PrimitiveType::TYPE_INT:
404
                RETURN_IF_ERROR(Impl::template vector<ColumnInt32>(
405
                        offset_column_data, nested_column, nested_null_map, res, null_map));
406
                break;
407
            case PrimitiveType::TYPE_BIGINT:
408
                RETURN_IF_ERROR(Impl::template vector<ColumnInt64>(
409
                        offset_column_data, nested_column, nested_null_map, res, null_map));
410
                break;
411
            default:
412
                return Status::RuntimeError("Illegal column {} of argument of function {}",
413
                                            block.get_by_position(arguments[0]).column->get_name(),
414
                                            get_name());
415
            }
416
        } else {
417
            return Status::RuntimeError("Illegal column {} of argument of function {}",
418
                                        block.get_by_position(arguments[0]).column->get_name(),
419
                                        get_name());
420
        }
421
17
        block.get_by_position(result).column =
422
18
                ColumnNullable::create(std::move(res_data_column), std::move(res_null_map));
423
18
        return Status::OK();
424
18
    }
_ZNK5doris24FunctionBitmapAlwaysNullINS_15BitmapFromArrayEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
367
19
                        uint32_t result, size_t input_rows_count) const override {
368
19
        auto res_null_map = ColumnUInt8::create(input_rows_count, 0);
369
19
        auto res_data_column = ColumnBitmap::create();
370
19
        auto& null_map = res_null_map->get_data();
371
19
        auto& res = res_data_column->get_data();
372
373
19
        ColumnPtr& argument_column = block.get_by_position(arguments[0]).column;
374
        if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeString>) {
375
            const auto& str_column = static_cast<const ColumnString&>(*argument_column);
376
            const ColumnString::Chars& data = str_column.get_chars();
377
            const ColumnString::Offsets& offsets = str_column.get_offsets();
378
            RETURN_IF_ERROR(Impl::vector(data, offsets, res, null_map, input_rows_count));
379
19
        } else if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeArray>) {
380
19
            auto argument_type = remove_nullable(
381
19
                    assert_cast<const DataTypeArray&>(*block.get_by_position(arguments[0]).type)
382
19
                            .get_nested_type());
383
19
            const auto& array_column = static_cast<const ColumnArray&>(*argument_column);
384
19
            const auto& offset_column_data = array_column.get_offsets();
385
19
            const auto& nested_nullable_column =
386
19
                    static_cast<const ColumnNullable&>(array_column.get_data());
387
19
            const auto& nested_column = nested_nullable_column.get_nested_column();
388
19
            const auto& nested_null_map = nested_nullable_column.get_null_map_column().get_data();
389
390
19
            switch (argument_type->get_primitive_type()) {
391
3
            case PrimitiveType::TYPE_TINYINT:
392
3
                RETURN_IF_ERROR(Impl::template vector<ColumnInt8>(offset_column_data, nested_column,
393
3
                                                                  nested_null_map, res, null_map));
394
3
                break;
395
3
            case PrimitiveType::TYPE_BOOLEAN:
396
0
                RETURN_IF_ERROR(Impl::template vector<ColumnUInt8>(
397
0
                        offset_column_data, nested_column, nested_null_map, res, null_map));
398
0
                break;
399
0
            case PrimitiveType::TYPE_SMALLINT:
400
0
                RETURN_IF_ERROR(Impl::template vector<ColumnInt16>(
401
0
                        offset_column_data, nested_column, nested_null_map, res, null_map));
402
0
                break;
403
9
            case PrimitiveType::TYPE_INT:
404
9
                RETURN_IF_ERROR(Impl::template vector<ColumnInt32>(
405
9
                        offset_column_data, nested_column, nested_null_map, res, null_map));
406
9
                break;
407
9
            case PrimitiveType::TYPE_BIGINT:
408
7
                RETURN_IF_ERROR(Impl::template vector<ColumnInt64>(
409
7
                        offset_column_data, nested_column, nested_null_map, res, null_map));
410
7
                break;
411
7
            default:
412
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
413
0
                                            block.get_by_position(arguments[0]).column->get_name(),
414
0
                                            get_name());
415
19
            }
416
        } else {
417
            return Status::RuntimeError("Illegal column {} of argument of function {}",
418
                                        block.get_by_position(arguments[0]).column->get_name(),
419
                                        get_name());
420
        }
421
19
        block.get_by_position(result).column =
422
19
                ColumnNullable::create(std::move(res_data_column), std::move(res_null_map));
423
19
        return Status::OK();
424
19
    }
425
};
426
427
template <int HashBits>
428
struct BitmapHashName {};
429
430
template <>
431
struct BitmapHashName<32> {
432
    static constexpr auto name = "bitmap_hash";
433
};
434
435
template <>
436
struct BitmapHashName<64> {
437
    static constexpr auto name = "bitmap_hash64";
438
};
439
440
template <int HashBits>
441
struct BitmapHash {
442
    static constexpr auto name = BitmapHashName<HashBits>::name;
443
444
    using ReturnType = DataTypeBitMap;
445
446
    template <typename ColumnType>
447
128
    static void vector(const ColumnType* col, MutableColumnPtr& col_res) {
448
128
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
449
128
            const ColumnString::Chars& data = col->get_chars();
450
128
            const ColumnString::Offsets& offsets = col->get_offsets();
451
128
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
452
128
            auto& res_data = res_column->get_data();
453
128
            size_t size = offsets.size();
454
455
433
            for (size_t i = 0; i < size; ++i) {
456
305
                const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
457
305
                size_t str_size = offsets[i] - offsets[i - 1];
458
305
                if constexpr (HashBits == 32) {
459
273
                    uint32_t hash_value =
460
273
                            HashUtil::murmur_hash3_32(raw_str, str_size, HashUtil::MURMUR3_32_SEED);
461
273
                    res_data[i].add(hash_value);
462
273
                } else {
463
32
                    uint64_t hash_value = 0;
464
32
                    murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value);
465
32
                    res_data[i].add(hash_value);
466
32
                }
467
305
            }
468
128
        }
469
128
    }
_ZN5doris10BitmapHashILi32EE6vectorINS_9ColumnStrIjEEEEvPKT_RNS_3COWINS_7IColumnEE11mutable_ptrIS9_EE
Line
Count
Source
447
112
    static void vector(const ColumnType* col, MutableColumnPtr& col_res) {
448
112
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
449
112
            const ColumnString::Chars& data = col->get_chars();
450
112
            const ColumnString::Offsets& offsets = col->get_offsets();
451
112
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
452
112
            auto& res_data = res_column->get_data();
453
112
            size_t size = offsets.size();
454
455
385
            for (size_t i = 0; i < size; ++i) {
456
273
                const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
457
273
                size_t str_size = offsets[i] - offsets[i - 1];
458
273
                if constexpr (HashBits == 32) {
459
273
                    uint32_t hash_value =
460
273
                            HashUtil::murmur_hash3_32(raw_str, str_size, HashUtil::MURMUR3_32_SEED);
461
273
                    res_data[i].add(hash_value);
462
                } else {
463
                    uint64_t hash_value = 0;
464
                    murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value);
465
                    res_data[i].add(hash_value);
466
                }
467
273
            }
468
112
        }
469
112
    }
Unexecuted instantiation: _ZN5doris10BitmapHashILi32EE6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEEvPKT_RNS_3COWINS_7IColumnEE11mutable_ptrISA_EE
_ZN5doris10BitmapHashILi64EE6vectorINS_9ColumnStrIjEEEEvPKT_RNS_3COWINS_7IColumnEE11mutable_ptrIS9_EE
Line
Count
Source
447
16
    static void vector(const ColumnType* col, MutableColumnPtr& col_res) {
448
16
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
449
16
            const ColumnString::Chars& data = col->get_chars();
450
16
            const ColumnString::Offsets& offsets = col->get_offsets();
451
16
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
452
16
            auto& res_data = res_column->get_data();
453
16
            size_t size = offsets.size();
454
455
48
            for (size_t i = 0; i < size; ++i) {
456
32
                const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
457
32
                size_t str_size = offsets[i] - offsets[i - 1];
458
                if constexpr (HashBits == 32) {
459
                    uint32_t hash_value =
460
                            HashUtil::murmur_hash3_32(raw_str, str_size, HashUtil::MURMUR3_32_SEED);
461
                    res_data[i].add(hash_value);
462
32
                } else {
463
32
                    uint64_t hash_value = 0;
464
32
                    murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value);
465
32
                    res_data[i].add(hash_value);
466
32
                }
467
32
            }
468
16
        }
469
16
    }
Unexecuted instantiation: _ZN5doris10BitmapHashILi64EE6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEEvPKT_RNS_3COWINS_7IColumnEE11mutable_ptrISA_EE
470
471
    template <typename ColumnType>
472
    static void vector_nullable(const ColumnType* col, const NullMap& nullmap,
473
190
                                MutableColumnPtr& col_res) {
474
190
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
475
190
            const ColumnString::Chars& data = col->get_chars();
476
190
            const ColumnString::Offsets& offsets = col->get_offsets();
477
190
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
478
190
            auto& res_data = res_column->get_data();
479
190
            size_t size = offsets.size();
480
481
990
            for (size_t i = 0; i < size; ++i) {
482
800
                if (nullmap[i]) {
483
57
                    continue;
484
743
                } else {
485
743
                    const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
486
743
                    size_t str_size = offsets[i] - offsets[i - 1];
487
743
                    if constexpr (HashBits == 32) {
488
717
                        uint32_t hash_value = HashUtil::murmur_hash3_32(raw_str, str_size,
489
717
                                                                        HashUtil::MURMUR3_32_SEED);
490
717
                        res_data[i].add(hash_value);
491
717
                    } else {
492
26
                        uint64_t hash_value = 0;
493
26
                        murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value);
494
26
                        res_data[i].add(hash_value);
495
26
                    }
496
743
                }
497
800
            }
498
190
        }
499
190
    }
_ZN5doris10BitmapHashILi32EE15vector_nullableINS_9ColumnStrIjEEEEvPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE
Line
Count
Source
473
181
                                MutableColumnPtr& col_res) {
474
181
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
475
181
            const ColumnString::Chars& data = col->get_chars();
476
181
            const ColumnString::Offsets& offsets = col->get_offsets();
477
181
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
478
181
            auto& res_data = res_column->get_data();
479
181
            size_t size = offsets.size();
480
481
954
            for (size_t i = 0; i < size; ++i) {
482
773
                if (nullmap[i]) {
483
56
                    continue;
484
717
                } else {
485
717
                    const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
486
717
                    size_t str_size = offsets[i] - offsets[i - 1];
487
717
                    if constexpr (HashBits == 32) {
488
717
                        uint32_t hash_value = HashUtil::murmur_hash3_32(raw_str, str_size,
489
717
                                                                        HashUtil::MURMUR3_32_SEED);
490
717
                        res_data[i].add(hash_value);
491
                    } else {
492
                        uint64_t hash_value = 0;
493
                        murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value);
494
                        res_data[i].add(hash_value);
495
                    }
496
717
                }
497
773
            }
498
181
        }
499
181
    }
Unexecuted instantiation: _ZN5doris10BitmapHashILi32EE15vector_nullableINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEEvPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISH_EE
_ZN5doris10BitmapHashILi64EE15vector_nullableINS_9ColumnStrIjEEEEvPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE
Line
Count
Source
473
9
                                MutableColumnPtr& col_res) {
474
9
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
475
9
            const ColumnString::Chars& data = col->get_chars();
476
9
            const ColumnString::Offsets& offsets = col->get_offsets();
477
9
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
478
9
            auto& res_data = res_column->get_data();
479
9
            size_t size = offsets.size();
480
481
36
            for (size_t i = 0; i < size; ++i) {
482
27
                if (nullmap[i]) {
483
1
                    continue;
484
26
                } else {
485
26
                    const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
486
26
                    size_t str_size = offsets[i] - offsets[i - 1];
487
                    if constexpr (HashBits == 32) {
488
                        uint32_t hash_value = HashUtil::murmur_hash3_32(raw_str, str_size,
489
                                                                        HashUtil::MURMUR3_32_SEED);
490
                        res_data[i].add(hash_value);
491
26
                    } else {
492
26
                        uint64_t hash_value = 0;
493
26
                        murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value);
494
26
                        res_data[i].add(hash_value);
495
26
                    }
496
26
                }
497
27
            }
498
9
        }
499
9
    }
Unexecuted instantiation: _ZN5doris10BitmapHashILi64EE15vector_nullableINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEEvPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISH_EE
500
};
501
502
class FunctionBitmapCount : public IFunction {
503
public:
504
    static constexpr auto name = "bitmap_count";
505
506
1
    String get_name() const override { return name; }
507
508
79
    static FunctionPtr create() { return std::make_shared<FunctionBitmapCount>(); }
509
510
70
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
511
70
        return std::make_shared<DataTypeInt64>();
512
70
    }
513
514
70
    size_t get_number_of_arguments() const override { return 1; }
515
516
183
    bool use_default_implementation_for_nulls() const override { return false; }
517
518
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
519
113
                        uint32_t result, size_t input_rows_count) const override {
520
113
        auto res_data_column = ColumnInt64::create();
521
113
        auto& res = res_data_column->get_data();
522
113
        auto data_null_map = ColumnUInt8::create(input_rows_count, 0);
523
113
        auto& null_map = data_null_map->get_data();
524
525
113
        auto column = block.get_by_position(arguments[0]).column;
526
113
        if (auto* nullable = check_and_get_column<const ColumnNullable>(*column)) {
527
19
            VectorizedUtils::update_null_map(null_map, nullable->get_null_map_data());
528
19
            column = nullable->get_nested_column_ptr();
529
19
        }
530
113
        auto str_col = assert_cast<const ColumnBitmap*>(column.get());
531
113
        const auto& col_data = str_col->get_data();
532
533
113
        res.reserve(input_rows_count);
534
313
        for (size_t i = 0; i < input_rows_count; ++i) {
535
200
            if (null_map[i]) {
536
3
                res.push_back(0);
537
3
                continue;
538
3
            }
539
197
            res.push_back(col_data[i].cardinality());
540
197
        }
541
113
        block.replace_by_position(result, std::move(res_data_column));
542
113
        return Status::OK();
543
113
    }
544
};
545
546
struct NameBitmapNot {
547
    static constexpr auto name = "bitmap_not";
548
};
549
550
template <typename LeftDataType, typename RightDataType>
551
struct BitmapNot {
552
    using ResultDataType = DataTypeBitMap;
553
    using T0 = typename LeftDataType::FieldType;
554
    using T1 = typename RightDataType::FieldType;
555
    using TData = std::vector<BitmapValue>;
556
557
10
    static void vector_vector(const TData& lvec, const TData& rvec, TData& res) {
558
10
        size_t size = lvec.size();
559
37
        for (size_t i = 0; i < size; ++i) {
560
27
            res[i] = lvec[i];
561
27
            res[i] -= rvec[i];
562
27
        }
563
10
    }
564
0
    static void vector_scalar(const TData& lvec, const BitmapValue& rval, TData& res) {
565
0
        size_t size = lvec.size();
566
0
        for (size_t i = 0; i < size; ++i) {
567
0
            res[i] = lvec[i];
568
0
            res[i] -= rval;
569
0
        }
570
0
    }
571
0
    static void scalar_vector(const BitmapValue& lval, const TData& rvec, TData& res) {
572
0
        size_t size = rvec.size();
573
0
        for (size_t i = 0; i < size; ++i) {
574
0
            res[i] = lval;
575
0
            res[i] -= rvec[i];
576
0
        }
577
0
    }
578
};
579
580
struct NameBitmapAndNot {
581
    static constexpr auto name = "bitmap_and_not";
582
};
583
584
template <typename LeftDataType, typename RightDataType>
585
struct BitmapAndNot {
586
    using ResultDataType = DataTypeBitMap;
587
    using T0 = typename LeftDataType::FieldType;
588
    using T1 = typename RightDataType::FieldType;
589
    using TData = std::vector<BitmapValue>;
590
591
19
    static void vector_vector(const TData& lvec, const TData& rvec, TData& res) {
592
19
        size_t size = lvec.size();
593
19
        BitmapValue mid_data;
594
74
        for (size_t i = 0; i < size; ++i) {
595
55
            mid_data = lvec[i];
596
55
            mid_data &= rvec[i];
597
55
            res[i] = lvec[i];
598
55
            res[i] -= mid_data;
599
55
            mid_data.reset();
600
55
        }
601
19
    }
602
0
    static void vector_scalar(const TData& lvec, const BitmapValue& rval, TData& res) {
603
0
        size_t size = lvec.size();
604
0
        BitmapValue mid_data;
605
0
        for (size_t i = 0; i < size; ++i) {
606
0
            mid_data = lvec[i];
607
0
            mid_data &= rval;
608
0
            res[i] = lvec[i];
609
0
            res[i] -= mid_data;
610
0
            mid_data.reset();
611
0
        }
612
0
    }
613
0
    static void scalar_vector(const BitmapValue& lval, const TData& rvec, TData& res) {
614
0
        size_t size = rvec.size();
615
0
        BitmapValue mid_data;
616
0
        for (size_t i = 0; i < size; ++i) {
617
0
            mid_data = lval;
618
0
            mid_data &= rvec[i];
619
0
            res[i] = lval;
620
0
            res[i] -= mid_data;
621
0
            mid_data.reset();
622
0
        }
623
0
    }
624
};
625
626
struct NameBitmapAndNotCount {
627
    static constexpr auto name = "bitmap_and_not_count";
628
};
629
630
template <typename LeftDataType, typename RightDataType>
631
struct BitmapAndNotCount {
632
    using ResultDataType = DataTypeInt64;
633
    using T0 = typename LeftDataType::FieldType;
634
    using T1 = typename RightDataType::FieldType;
635
    using TData = std::vector<BitmapValue>;
636
    using ResTData = typename ColumnInt64::Container::value_type;
637
638
29
    static void vector_vector(const TData& lvec, const TData& rvec, ResTData* res) {
639
29
        size_t size = lvec.size();
640
29
        BitmapValue mid_data;
641
107
        for (size_t i = 0; i < size; ++i) {
642
78
            mid_data = lvec[i];
643
78
            mid_data &= rvec[i];
644
78
            res[i] = lvec[i].andnot_cardinality(mid_data);
645
78
            mid_data.reset();
646
78
        }
647
29
    }
648
0
    static void scalar_vector(const BitmapValue& lval, const TData& rvec, ResTData* res) {
649
0
        size_t size = rvec.size();
650
0
        BitmapValue mid_data;
651
0
        for (size_t i = 0; i < size; ++i) {
652
0
            mid_data = lval;
653
0
            mid_data &= rvec[i];
654
0
            res[i] = lval.andnot_cardinality(mid_data);
655
0
            mid_data.reset();
656
0
        }
657
0
    }
658
0
    static void vector_scalar(const TData& lvec, const BitmapValue& rval, ResTData* res) {
659
0
        size_t size = lvec.size();
660
0
        BitmapValue mid_data;
661
0
        for (size_t i = 0; i < size; ++i) {
662
0
            mid_data = lvec[i];
663
0
            mid_data &= rval;
664
0
            res[i] = lvec[i].andnot_cardinality(mid_data);
665
0
            mid_data.reset();
666
0
        }
667
0
    }
668
};
669
670
63
void update_bitmap_op_count(int64_t* __restrict count, const NullMap& null_map) {
671
63
    static constexpr int64_t flags[2] = {-1, 0};
672
63
    size_t size = null_map.size();
673
63
    auto* __restrict null_map_data = null_map.data();
674
178
    for (size_t i = 0; i < size; ++i) {
675
115
        count[i] &= flags[null_map_data[i]];
676
115
    }
677
63
}
678
679
// for bitmap_and_count, bitmap_xor_count and bitmap_and_not_count,
680
// result is 0 for rows that if any column is null value
681
ColumnPtr handle_bitmap_op_count_null_value(ColumnPtr& src, const Block& block,
682
                                            const ColumnNumbers& args, uint32_t result,
683
34
                                            size_t input_rows_count) {
684
34
    auto* nullable = assert_cast<const ColumnNullable*>(src.get());
685
34
    ColumnPtr src_not_nullable = nullable->get_nested_column_ptr();
686
34
    MutableColumnPtr src_not_nullable_mutable = (*std::move(src_not_nullable)).assume_mutable();
687
34
    auto* __restrict count_data =
688
34
            assert_cast<ColumnInt64*>(src_not_nullable_mutable.get())->get_data().data();
689
690
80
    for (const auto& arg : args) {
691
80
        const ColumnWithTypeAndName& elem = block.get_by_position(arg);
692
80
        if (!elem.type->is_nullable()) {
693
17
            continue;
694
17
        }
695
696
63
        bool is_const = is_column_const(*elem.column);
697
        /// Const Nullable that are NULL.
698
63
        if (is_const && assert_cast<const ColumnConst*>(elem.column.get())->only_null()) {
699
0
            return block.get_by_position(result).type->create_column_const(
700
0
                    input_rows_count, Field::create_field<TYPE_BIGINT>(0));
701
0
        }
702
63
        if (is_const) {
703
0
            continue;
704
0
        }
705
706
63
        if (const auto* nullable_column = assert_cast<const ColumnNullable*>(elem.column.get())) {
707
63
            const ColumnPtr& null_map_column = nullable_column->get_null_map_column_ptr();
708
63
            const NullMap& src_null_map =
709
63
                    assert_cast<const ColumnUInt8&>(*null_map_column).get_data();
710
711
63
            update_bitmap_op_count(count_data, src_null_map);
712
63
        }
713
63
    }
714
715
34
    return src;
716
34
}
717
718
Status execute_bitmap_op_count_null_to_zero(
719
        FunctionContext* context, Block& block, const ColumnNumbers& arguments, uint32_t result,
720
        size_t input_rows_count,
721
        const std::function<Status(FunctionContext*, Block&, const ColumnNumbers&, size_t, size_t)>&
722
74
                exec_impl_func) {
723
74
    if (have_null_column(block, arguments)) {
724
34
        auto [temporary_block, new_args, new_result] =
725
34
                create_block_with_nested_columns(block, arguments, result);
726
34
        RETURN_IF_ERROR(exec_impl_func(context, temporary_block, new_args, new_result,
727
34
                                       temporary_block.rows()));
728
34
        block.get_by_position(result).column = handle_bitmap_op_count_null_value(
729
34
                temporary_block.get_by_position(new_result).column, block, arguments, result,
730
34
                input_rows_count);
731
40
    } else {
732
40
        return exec_impl_func(context, block, arguments, result, input_rows_count);
733
40
    }
734
34
    return Status::OK();
735
74
}
736
737
template <typename FunctionName>
738
class FunctionBitmapAndNotCount : public IFunction {
739
public:
740
    using LeftDataType = DataTypeBitMap;
741
    using RightDataType = DataTypeBitMap;
742
    using ResultDataType = typename BitmapAndNotCount<LeftDataType, RightDataType>::ResultDataType;
743
744
    static constexpr auto name = FunctionName::name;
745
23
    static FunctionPtr create() { return std::make_shared<FunctionBitmapAndNotCount>(); }
746
1
    String get_name() const override { return name; }
747
14
    size_t get_number_of_arguments() const override { return 2; }
748
14
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
749
14
        bool return_nullable = false;
750
        // result is nullable only when any columns is nullable for bitmap_and_not_count
751
31
        for (size_t i = 0; i < arguments.size(); ++i) {
752
24
            if (arguments[i]->is_nullable()) {
753
7
                return_nullable = true;
754
7
                break;
755
7
            }
756
24
        }
757
14
        auto result_type = std::make_shared<ResultDataType>();
758
14
        return return_nullable ? make_nullable(result_type) : result_type;
759
14
    }
760
761
43
    bool use_default_implementation_for_nulls() const override {
762
        // for bitmap_and_not_count, result is always not null, and if the bitmap op result is null,
763
        // the count is 0
764
43
        return false;
765
43
    }
766
767
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
768
28
                        uint32_t result, size_t input_rows_count) const override {
769
28
        DCHECK_EQ(arguments.size(), 2);
770
28
        auto impl_func = [&](FunctionContext* context, Block& block, const ColumnNumbers& arguments,
771
29
                             uint32_t result, size_t input_rows_count) {
772
29
            return execute_impl_internal(context, block, arguments, result, input_rows_count);
773
29
        };
774
28
        return execute_bitmap_op_count_null_to_zero(context, block, arguments, result,
775
28
                                                    input_rows_count, impl_func);
776
28
    }
777
778
    Status execute_impl_internal(FunctionContext* context, Block& block,
779
                                 const ColumnNumbers& arguments, uint32_t result,
780
29
                                 size_t input_rows_count) const {
781
29
        using ColVecResult = ColumnVector<ResultDataType::PType>;
782
783
29
        typename ColVecResult::MutablePtr col_res = ColVecResult::create();
784
29
        auto& vec_res = col_res->get_data();
785
29
        vec_res.resize(block.rows());
786
787
29
        const auto& left = block.get_by_position(arguments[0]);
788
29
        auto lcol = left.column;
789
29
        const auto& right = block.get_by_position(arguments[1]);
790
29
        auto rcol = right.column;
791
792
29
        if (is_column_const(*left.column)) {
793
0
            BitmapAndNotCount<LeftDataType, RightDataType>::scalar_vector(
794
0
                    assert_cast<const ColumnBitmap&>(
795
0
                            assert_cast<const ColumnConst*>(lcol.get())->get_data_column())
796
0
                            .get_data()[0],
797
0
                    assert_cast<const ColumnBitmap*>(rcol.get())->get_data(), vec_res.data());
798
29
        } else if (is_column_const(*right.column)) {
799
0
            BitmapAndNotCount<LeftDataType, RightDataType>::vector_scalar(
800
0
                    assert_cast<const ColumnBitmap*>(lcol.get())->get_data(),
801
0
                    assert_cast<const ColumnBitmap&>(
802
0
                            assert_cast<const ColumnConst*>(rcol.get())->get_data_column())
803
0
                            .get_data()[0],
804
0
                    vec_res.data());
805
29
        } else {
806
29
            BitmapAndNotCount<LeftDataType, RightDataType>::vector_vector(
807
29
                    assert_cast<const ColumnBitmap*>(lcol.get())->get_data(),
808
29
                    assert_cast<const ColumnBitmap*>(rcol.get())->get_data(), vec_res.data());
809
29
        }
810
811
29
        auto& result_info = block.get_by_position(result);
812
29
        if (result_info.type->is_nullable()) {
813
10
            block.replace_by_position(
814
10
                    result, ColumnNullable::create(std::move(col_res),
815
10
                                                   ColumnUInt8::create(input_rows_count, 0)));
816
19
        } else {
817
19
            block.replace_by_position(result, std::move(col_res));
818
19
        }
819
29
        return Status::OK();
820
29
    }
821
};
822
823
struct NameBitmapContains {
824
    static constexpr auto name = "bitmap_contains";
825
};
826
827
template <typename LeftDataType, typename RightDataType>
828
struct BitmapContains {
829
    using ResultDataType = DataTypeUInt8;
830
    using T0 = typename LeftDataType::FieldType;
831
    using T1 = typename RightDataType::FieldType;
832
    using LTData = std::vector<BitmapValue>;
833
    using RTData = typename ColumnVector<RightDataType::PType>::Container;
834
    using ResTData = typename ColumnUInt8::Container;
835
836
31
    static void vector_vector(const LTData& lvec, const RTData& rvec, ResTData& res) {
837
31
        size_t size = lvec.size();
838
81
        for (size_t i = 0; i < size; ++i) {
839
50
            res[i] = lvec[i].contains(rvec[i]);
840
50
        }
841
31
    }
842
7
    static void vector_scalar(const LTData& lvec, const T1& rval, ResTData& res) {
843
7
        size_t size = lvec.size();
844
14
        for (size_t i = 0; i < size; ++i) {
845
7
            res[i] = lvec[i].contains(rval);
846
7
        }
847
7
    }
848
0
    static void scalar_vector(const BitmapValue& lval, const RTData& rvec, ResTData& res) {
849
0
        size_t size = rvec.size();
850
0
        for (size_t i = 0; i < size; ++i) {
851
0
            res[i] = lval.contains(rvec[i]);
852
0
        }
853
0
    }
854
};
855
856
struct NameBitmapRemove {
857
    static constexpr auto name = "bitmap_remove";
858
};
859
860
template <typename LeftDataType, typename RightDataType>
861
struct BitmapRemove {
862
    using ResultDataType = DataTypeBitMap;
863
    using T0 = typename LeftDataType::FieldType;
864
    using T1 = typename RightDataType::FieldType;
865
    using LTData = std::vector<BitmapValue>;
866
    using RTData = typename ColumnVector<RightDataType::PType>::Container;
867
    using ResTData = std::vector<BitmapValue>;
868
869
2
    static void vector_vector(const LTData& lvec, const RTData& rvec, ResTData& res) {
870
2
        size_t size = lvec.size();
871
6
        for (size_t i = 0; i < size; ++i) {
872
4
            res[i] = lvec[i];
873
4
            res[i].remove(rvec[i]);
874
4
        }
875
2
    }
876
12
    static void vector_scalar(const LTData& lvec, const T1& rval, ResTData& res) {
877
12
        size_t size = lvec.size();
878
45
        for (size_t i = 0; i < size; ++i) {
879
33
            res[i] = lvec[i];
880
33
            res[i].remove(rval);
881
33
        }
882
12
    }
883
0
    static void scalar_vector(const BitmapValue& lval, const RTData& rvec, ResTData& res) {
884
0
        size_t size = rvec.size();
885
0
        for (size_t i = 0; i < size; ++i) {
886
0
            res[i] = lval;
887
0
            res[i].remove(rvec[i]);
888
0
        }
889
0
    }
890
};
891
892
struct NameBitmapHasAny {
893
    static constexpr auto name = "bitmap_has_any";
894
};
895
896
template <typename LeftDataType, typename RightDataType>
897
struct BitmapHasAny {
898
    using ResultDataType = DataTypeUInt8;
899
    using T0 = typename LeftDataType::FieldType;
900
    using T1 = typename RightDataType::FieldType;
901
    using TData = std::vector<BitmapValue>;
902
    using ResTData = typename ColumnUInt8::Container;
903
904
10
    static void vector_vector(const TData& lvec, const TData& rvec, ResTData& res) {
905
10
        size_t size = lvec.size();
906
37
        for (size_t i = 0; i < size; ++i) {
907
27
            auto bitmap = lvec[i];
908
27
            bitmap &= rvec[i];
909
27
            res[i] = bitmap.cardinality() != 0;
910
27
        }
911
10
    }
912
0
    static void vector_scalar(const TData& lvec, const BitmapValue& rval, ResTData& res) {
913
0
        size_t size = lvec.size();
914
0
        for (size_t i = 0; i < size; ++i) {
915
0
            auto bitmap = lvec[i];
916
0
            bitmap &= rval;
917
0
            res[i] = bitmap.cardinality() != 0;
918
0
        }
919
0
    }
920
0
    static void scalar_vector(const BitmapValue& lval, const TData& rvec, ResTData& res) {
921
0
        size_t size = rvec.size();
922
0
        for (size_t i = 0; i < size; ++i) {
923
0
            auto bitmap = lval;
924
0
            bitmap &= rvec[i];
925
0
            res[i] = bitmap.cardinality() != 0;
926
0
        }
927
0
    }
928
};
929
930
struct NameBitmapHasAll {
931
    static constexpr auto name = "bitmap_has_all";
932
};
933
934
template <typename LeftDataType, typename RightDataType>
935
struct BitmapHasAll {
936
    using ResultDataType = DataTypeUInt8;
937
    using T0 = typename LeftDataType::FieldType;
938
    using T1 = typename RightDataType::FieldType;
939
    using TData = std::vector<BitmapValue>;
940
    using ResTData = typename ColumnUInt8::Container;
941
942
11
    static void vector_vector(const TData& lvec, const TData& rvec, ResTData& res) {
943
11
        size_t size = lvec.size();
944
43
        for (size_t i = 0; i < size; ++i) {
945
32
            uint64_t lhs_cardinality = lvec[i].cardinality();
946
32
            auto bitmap = lvec[i];
947
32
            bitmap |= rvec[i];
948
32
            res[i] = bitmap.cardinality() == lhs_cardinality;
949
32
        }
950
11
    }
951
0
    static void vector_scalar(const TData& lvec, const BitmapValue& rval, ResTData& res) {
952
0
        size_t size = lvec.size();
953
0
        for (size_t i = 0; i < size; ++i) {
954
0
            uint64_t lhs_cardinality = lvec[i].cardinality();
955
0
            auto bitmap = lvec[i];
956
0
            bitmap |= rval;
957
0
            res[i] = bitmap.cardinality() == lhs_cardinality;
958
0
        }
959
0
    }
960
0
    static void scalar_vector(const BitmapValue& lval, const TData& rvec, ResTData& res) {
961
0
        size_t size = rvec.size();
962
0
        uint64_t lhs_cardinality = lval.cardinality();
963
0
        for (size_t i = 0; i < size; ++i) {
964
0
            auto bitmap = lval;
965
0
            bitmap |= rvec[i];
966
0
            res[i] = bitmap.cardinality() == lhs_cardinality;
967
0
        }
968
0
    }
969
};
970
971
struct NameBitmapToString {
972
    static constexpr auto name = "bitmap_to_string";
973
};
974
975
struct BitmapToString {
976
    using ReturnType = DataTypeString;
977
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_BITMAP;
978
    using Type = DataTypeBitMap::FieldType;
979
    using ReturnColumnType = ColumnString;
980
    using Chars = ColumnString::Chars;
981
    using Offsets = ColumnString::Offsets;
982
983
224
    static Status vector(const std::vector<BitmapValue>& data, Chars& chars, Offsets& offsets) {
984
224
        size_t size = data.size();
985
224
        offsets.resize(size);
986
224
        chars.reserve(size);
987
1.17k
        for (size_t i = 0; i < size; ++i) {
988
951
            StringOP::push_value_string(data[i].to_string(), i, chars, offsets);
989
951
        }
990
224
        return Status::OK();
991
224
    }
992
};
993
994
struct NameBitmapToBase64 {
995
    static constexpr auto name = "bitmap_to_base64";
996
};
997
998
struct BitmapToBase64 {
999
    using ReturnType = DataTypeString;
1000
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_BITMAP;
1001
    using Type = DataTypeBitMap::FieldType;
1002
    using ReturnColumnType = ColumnString;
1003
    using Chars = ColumnString::Chars;
1004
    using Offsets = ColumnString::Offsets;
1005
1006
    // ColumnString not support 64bit, only 32bit, so that the max size is 4G
1007
15
    static Status vector(const std::vector<BitmapValue>& data, Chars& chars, Offsets& offsets) {
1008
15
        size_t size = data.size();
1009
15
        offsets.resize(size);
1010
15
        size_t output_char_size = 0;
1011
52
        for (size_t i = 0; i < size; ++i) {
1012
37
            const BitmapValue& bitmap_val = data[i];
1013
37
            auto ser_size = bitmap_val.getSizeInBytes();
1014
37
            output_char_size += (int)(4.0 * ceil((double)ser_size / 3.0));
1015
37
        }
1016
15
        ColumnString::check_chars_length(output_char_size, size);
1017
15
        chars.resize(output_char_size);
1018
15
        auto chars_data = chars.data();
1019
1020
15
        size_t cur_ser_size = 0;
1021
15
        size_t last_ser_size = 0;
1022
15
        std::string ser_buff;
1023
15
        size_t encoded_offset = 0;
1024
52
        for (size_t i = 0; i < size; ++i) {
1025
37
            const BitmapValue& bitmap_val = data[i];
1026
37
            cur_ser_size = bitmap_val.getSizeInBytes();
1027
37
            if (cur_ser_size > last_ser_size) {
1028
23
                last_ser_size = cur_ser_size;
1029
23
                ser_buff.resize(cur_ser_size);
1030
23
            }
1031
37
            bitmap_val.write_to(ser_buff.data());
1032
1033
37
            auto outlen = base64_encode((const unsigned char*)ser_buff.data(), cur_ser_size,
1034
37
                                        chars_data + encoded_offset);
1035
37
            DCHECK(outlen > 0);
1036
1037
37
            encoded_offset += (int)(4.0 * ceil((double)cur_ser_size / 3.0));
1038
37
            offsets[i] = cast_set<UInt32>(encoded_offset);
1039
37
        }
1040
15
        return Status::OK();
1041
15
    }
1042
};
1043
1044
struct SubBitmap {
1045
    static constexpr auto name = "sub_bitmap";
1046
    using TData1 = std::vector<BitmapValue>;
1047
    using TData2 = typename ColumnInt64::Container;
1048
1049
    static void vector3(const TData1& bitmap_data, const TData2& offset_data,
1050
                        const TData2& limit_data, NullMap& null_map, size_t input_rows_count,
1051
13
                        TData1& res) {
1052
43
        for (int i = 0; i < input_rows_count; ++i) {
1053
30
            if (null_map[i]) {
1054
0
                continue;
1055
0
            }
1056
30
            if (limit_data[i] <= 0) {
1057
1
                null_map[i] = 1;
1058
1
                continue;
1059
1
            }
1060
29
            if (bitmap_data[i].offset_limit(offset_data[i], limit_data[i], &res[i]) == 0) {
1061
24
                null_map[i] = 1;
1062
24
            }
1063
29
        }
1064
13
    }
1065
    static void vector_scalars(const TData1& bitmap_data, const Int64& offset_data,
1066
                               const Int64& limit_data, NullMap& null_map, size_t input_rows_count,
1067
0
                               TData1& res) {
1068
0
        for (int i = 0; i < input_rows_count; ++i) {
1069
0
            if (null_map[i]) {
1070
0
                continue;
1071
0
            }
1072
0
            if (limit_data <= 0) {
1073
0
                null_map[i] = 1;
1074
0
                continue;
1075
0
            }
1076
0
            if (bitmap_data[i].offset_limit(offset_data, limit_data, &res[i]) == 0) {
1077
0
                null_map[i] = 1;
1078
0
            }
1079
0
        }
1080
0
    }
1081
};
1082
1083
struct BitmapSubsetLimit {
1084
    static constexpr auto name = "bitmap_subset_limit";
1085
    using TData1 = std::vector<BitmapValue>;
1086
    using TData2 = typename ColumnInt64::Container;
1087
1088
    static void vector3(const TData1& bitmap_data, const TData2& offset_data,
1089
                        const TData2& limit_data, NullMap& null_map, size_t input_rows_count,
1090
11
                        TData1& res) {
1091
39
        for (int i = 0; i < input_rows_count; ++i) {
1092
28
            if (null_map[i]) {
1093
0
                continue;
1094
0
            }
1095
28
            if (offset_data[i] < 0 || limit_data[i] < 0) {
1096
0
                null_map[i] = 1;
1097
0
                continue;
1098
0
            }
1099
28
            bitmap_data[i].sub_limit(offset_data[i], limit_data[i], &res[i]);
1100
28
        }
1101
11
    }
1102
    static void vector_scalars(const TData1& bitmap_data, const Int64& offset_data,
1103
                               const Int64& limit_data, NullMap& null_map, size_t input_rows_count,
1104
0
                               TData1& res) {
1105
0
        for (int i = 0; i < input_rows_count; ++i) {
1106
0
            if (null_map[i]) {
1107
0
                continue;
1108
0
            }
1109
0
            if (offset_data < 0 || limit_data < 0) {
1110
0
                null_map[i] = 1;
1111
0
                continue;
1112
0
            }
1113
0
            bitmap_data[i].sub_limit(offset_data, limit_data, &res[i]);
1114
0
        }
1115
0
    }
1116
};
1117
1118
struct BitmapSubsetInRange {
1119
    static constexpr auto name = "bitmap_subset_in_range";
1120
    using TData1 = std::vector<BitmapValue>;
1121
    using TData2 = typename ColumnInt64::Container;
1122
1123
    static void vector3(const TData1& bitmap_data, const TData2& range_start,
1124
                        const TData2& range_end, NullMap& null_map, size_t input_rows_count,
1125
11
                        TData1& res) {
1126
39
        for (int i = 0; i < input_rows_count; ++i) {
1127
28
            if (null_map[i]) {
1128
0
                continue;
1129
0
            }
1130
28
            if (range_start[i] >= range_end[i] || range_start[i] < 0 || range_end[i] < 0) {
1131
25
                null_map[i] = 1;
1132
25
                continue;
1133
25
            }
1134
3
            bitmap_data[i].sub_range(range_start[i], range_end[i], &res[i]);
1135
3
        }
1136
11
    }
1137
    static void vector_scalars(const TData1& bitmap_data, const Int64& range_start,
1138
                               const Int64& range_end, NullMap& null_map, size_t input_rows_count,
1139
0
                               TData1& res) {
1140
0
        for (int i = 0; i < input_rows_count; ++i) {
1141
0
            if (null_map[i]) {
1142
0
                continue;
1143
0
            }
1144
0
            if (range_start >= range_end || range_start < 0 || range_end < 0) {
1145
0
                null_map[i] = 1;
1146
0
                continue;
1147
0
            }
1148
0
            bitmap_data[i].sub_range(range_start, range_end, &res[i]);
1149
0
        }
1150
0
    }
1151
};
1152
1153
template <typename Impl>
1154
class FunctionBitmapSubs : public IFunction {
1155
public:
1156
    static constexpr auto name = Impl::name;
1157
3
    String get_name() const override { return name; }
_ZNK5doris18FunctionBitmapSubsINS_9SubBitmapEE8get_nameB5cxx11Ev
Line
Count
Source
1157
1
    String get_name() const override { return name; }
_ZNK5doris18FunctionBitmapSubsINS_17BitmapSubsetLimitEE8get_nameB5cxx11Ev
Line
Count
Source
1157
1
    String get_name() const override { return name; }
_ZNK5doris18FunctionBitmapSubsINS_19BitmapSubsetInRangeEE8get_nameB5cxx11Ev
Line
Count
Source
1157
1
    String get_name() const override { return name; }
1158
1159
44
    static FunctionPtr create() { return std::make_shared<FunctionBitmapSubs>(); }
_ZN5doris18FunctionBitmapSubsINS_9SubBitmapEE6createEv
Line
Count
Source
1159
16
    static FunctionPtr create() { return std::make_shared<FunctionBitmapSubs>(); }
_ZN5doris18FunctionBitmapSubsINS_17BitmapSubsetLimitEE6createEv
Line
Count
Source
1159
14
    static FunctionPtr create() { return std::make_shared<FunctionBitmapSubs>(); }
_ZN5doris18FunctionBitmapSubsINS_19BitmapSubsetInRangeEE6createEv
Line
Count
Source
1159
14
    static FunctionPtr create() { return std::make_shared<FunctionBitmapSubs>(); }
1160
1161
17
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1162
17
        return make_nullable(std::make_shared<DataTypeBitMap>());
1163
17
    }
_ZNK5doris18FunctionBitmapSubsINS_9SubBitmapEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1161
7
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1162
7
        return make_nullable(std::make_shared<DataTypeBitMap>());
1163
7
    }
_ZNK5doris18FunctionBitmapSubsINS_17BitmapSubsetLimitEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1161
5
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1162
5
        return make_nullable(std::make_shared<DataTypeBitMap>());
1163
5
    }
_ZNK5doris18FunctionBitmapSubsINS_19BitmapSubsetInRangeEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1161
5
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1162
5
        return make_nullable(std::make_shared<DataTypeBitMap>());
1163
5
    }
1164
1165
17
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris18FunctionBitmapSubsINS_9SubBitmapEE23get_number_of_argumentsEv
Line
Count
Source
1165
7
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris18FunctionBitmapSubsINS_17BitmapSubsetLimitEE23get_number_of_argumentsEv
Line
Count
Source
1165
5
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris18FunctionBitmapSubsINS_19BitmapSubsetInRangeEE23get_number_of_argumentsEv
Line
Count
Source
1165
5
    size_t get_number_of_arguments() const override { return 3; }
1166
1167
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1168
35
                        uint32_t result, size_t input_rows_count) const override {
1169
35
        DCHECK_EQ(arguments.size(), 3);
1170
35
        auto res_null_map = ColumnUInt8::create(input_rows_count, 0);
1171
35
        auto res_data_column = ColumnBitmap::create(input_rows_count);
1172
1173
35
        bool col_const[3];
1174
35
        ColumnPtr argument_columns[3];
1175
140
        for (int i = 0; i < 3; ++i) {
1176
105
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
1177
105
        }
1178
35
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
1179
0
                                                     *block.get_by_position(arguments[0]).column)
1180
0
                                                     .convert_to_full_column()
1181
35
                                           : block.get_by_position(arguments[0]).column;
1182
1183
35
        default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments);
1184
1185
35
        auto bitmap_column = assert_cast<const ColumnBitmap*>(argument_columns[0].get());
1186
35
        auto offset_column = assert_cast<const ColumnInt64*>(argument_columns[1].get());
1187
35
        auto limit_column = assert_cast<const ColumnInt64*>(argument_columns[2].get());
1188
1189
35
        if (col_const[1] && col_const[2]) {
1190
0
            Impl::vector_scalars(bitmap_column->get_data(), offset_column->get_element(0),
1191
0
                                 limit_column->get_element(0), res_null_map->get_data(),
1192
0
                                 input_rows_count, res_data_column->get_data());
1193
35
        } else {
1194
35
            Impl::vector3(bitmap_column->get_data(), offset_column->get_data(),
1195
35
                          limit_column->get_data(), res_null_map->get_data(), input_rows_count,
1196
35
                          res_data_column->get_data());
1197
35
        }
1198
1199
35
        block.get_by_position(result).column =
1200
35
                ColumnNullable::create(std::move(res_data_column), std::move(res_null_map));
1201
35
        return Status::OK();
1202
35
    }
_ZNK5doris18FunctionBitmapSubsINS_9SubBitmapEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1168
13
                        uint32_t result, size_t input_rows_count) const override {
1169
13
        DCHECK_EQ(arguments.size(), 3);
1170
13
        auto res_null_map = ColumnUInt8::create(input_rows_count, 0);
1171
13
        auto res_data_column = ColumnBitmap::create(input_rows_count);
1172
1173
13
        bool col_const[3];
1174
13
        ColumnPtr argument_columns[3];
1175
52
        for (int i = 0; i < 3; ++i) {
1176
39
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
1177
39
        }
1178
13
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
1179
0
                                                     *block.get_by_position(arguments[0]).column)
1180
0
                                                     .convert_to_full_column()
1181
13
                                           : block.get_by_position(arguments[0]).column;
1182
1183
13
        default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments);
1184
1185
13
        auto bitmap_column = assert_cast<const ColumnBitmap*>(argument_columns[0].get());
1186
13
        auto offset_column = assert_cast<const ColumnInt64*>(argument_columns[1].get());
1187
13
        auto limit_column = assert_cast<const ColumnInt64*>(argument_columns[2].get());
1188
1189
13
        if (col_const[1] && col_const[2]) {
1190
0
            Impl::vector_scalars(bitmap_column->get_data(), offset_column->get_element(0),
1191
0
                                 limit_column->get_element(0), res_null_map->get_data(),
1192
0
                                 input_rows_count, res_data_column->get_data());
1193
13
        } else {
1194
13
            Impl::vector3(bitmap_column->get_data(), offset_column->get_data(),
1195
13
                          limit_column->get_data(), res_null_map->get_data(), input_rows_count,
1196
13
                          res_data_column->get_data());
1197
13
        }
1198
1199
13
        block.get_by_position(result).column =
1200
13
                ColumnNullable::create(std::move(res_data_column), std::move(res_null_map));
1201
13
        return Status::OK();
1202
13
    }
_ZNK5doris18FunctionBitmapSubsINS_17BitmapSubsetLimitEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1168
11
                        uint32_t result, size_t input_rows_count) const override {
1169
11
        DCHECK_EQ(arguments.size(), 3);
1170
11
        auto res_null_map = ColumnUInt8::create(input_rows_count, 0);
1171
11
        auto res_data_column = ColumnBitmap::create(input_rows_count);
1172
1173
11
        bool col_const[3];
1174
11
        ColumnPtr argument_columns[3];
1175
44
        for (int i = 0; i < 3; ++i) {
1176
33
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
1177
33
        }
1178
11
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
1179
0
                                                     *block.get_by_position(arguments[0]).column)
1180
0
                                                     .convert_to_full_column()
1181
11
                                           : block.get_by_position(arguments[0]).column;
1182
1183
11
        default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments);
1184
1185
11
        auto bitmap_column = assert_cast<const ColumnBitmap*>(argument_columns[0].get());
1186
11
        auto offset_column = assert_cast<const ColumnInt64*>(argument_columns[1].get());
1187
11
        auto limit_column = assert_cast<const ColumnInt64*>(argument_columns[2].get());
1188
1189
11
        if (col_const[1] && col_const[2]) {
1190
0
            Impl::vector_scalars(bitmap_column->get_data(), offset_column->get_element(0),
1191
0
                                 limit_column->get_element(0), res_null_map->get_data(),
1192
0
                                 input_rows_count, res_data_column->get_data());
1193
11
        } else {
1194
11
            Impl::vector3(bitmap_column->get_data(), offset_column->get_data(),
1195
11
                          limit_column->get_data(), res_null_map->get_data(), input_rows_count,
1196
11
                          res_data_column->get_data());
1197
11
        }
1198
1199
11
        block.get_by_position(result).column =
1200
11
                ColumnNullable::create(std::move(res_data_column), std::move(res_null_map));
1201
11
        return Status::OK();
1202
11
    }
_ZNK5doris18FunctionBitmapSubsINS_19BitmapSubsetInRangeEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1168
11
                        uint32_t result, size_t input_rows_count) const override {
1169
11
        DCHECK_EQ(arguments.size(), 3);
1170
11
        auto res_null_map = ColumnUInt8::create(input_rows_count, 0);
1171
11
        auto res_data_column = ColumnBitmap::create(input_rows_count);
1172
1173
11
        bool col_const[3];
1174
11
        ColumnPtr argument_columns[3];
1175
44
        for (int i = 0; i < 3; ++i) {
1176
33
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
1177
33
        }
1178
11
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
1179
0
                                                     *block.get_by_position(arguments[0]).column)
1180
0
                                                     .convert_to_full_column()
1181
11
                                           : block.get_by_position(arguments[0]).column;
1182
1183
11
        default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments);
1184
1185
11
        auto bitmap_column = assert_cast<const ColumnBitmap*>(argument_columns[0].get());
1186
11
        auto offset_column = assert_cast<const ColumnInt64*>(argument_columns[1].get());
1187
11
        auto limit_column = assert_cast<const ColumnInt64*>(argument_columns[2].get());
1188
1189
11
        if (col_const[1] && col_const[2]) {
1190
0
            Impl::vector_scalars(bitmap_column->get_data(), offset_column->get_element(0),
1191
0
                                 limit_column->get_element(0), res_null_map->get_data(),
1192
0
                                 input_rows_count, res_data_column->get_data());
1193
11
        } else {
1194
11
            Impl::vector3(bitmap_column->get_data(), offset_column->get_data(),
1195
11
                          limit_column->get_data(), res_null_map->get_data(), input_rows_count,
1196
11
                          res_data_column->get_data());
1197
11
        }
1198
1199
11
        block.get_by_position(result).column =
1200
11
                ColumnNullable::create(std::move(res_data_column), std::move(res_null_map));
1201
11
        return Status::OK();
1202
11
    }
1203
};
1204
1205
class FunctionBitmapToArray : public IFunction {
1206
public:
1207
    static constexpr auto name = "bitmap_to_array";
1208
1209
1
    String get_name() const override { return name; }
1210
1211
14
    static FunctionPtr create() { return std::make_shared<FunctionBitmapToArray>(); }
1212
1213
5
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1214
5
        auto nested_type = make_nullable(std::make_shared<DataTypeInt64>());
1215
5
        return std::make_shared<DataTypeArray>(nested_type);
1216
5
    }
1217
1218
5
    size_t get_number_of_arguments() const override { return 1; }
1219
1220
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1221
8
                        uint32_t result, size_t input_rows_count) const override {
1222
8
        auto return_nested_type = make_nullable(std::make_shared<DataTypeInt64>());
1223
8
        auto dest_array_column_ptr = ColumnArray::create(return_nested_type->create_column(),
1224
8
                                                         ColumnArray::ColumnOffsets::create());
1225
1226
8
        IColumn* dest_nested_column = &dest_array_column_ptr->get_data();
1227
8
        ColumnNullable* dest_nested_nullable_col =
1228
8
                reinterpret_cast<ColumnNullable*>(dest_nested_column);
1229
8
        dest_nested_column = dest_nested_nullable_col->get_nested_column_ptr().get();
1230
8
        auto& dest_nested_null_map = dest_nested_nullable_col->get_null_map_column().get_data();
1231
1232
8
        auto& arg_col = block.get_by_position(arguments[0]).column;
1233
8
        auto bitmap_col = assert_cast<const ColumnBitmap*>(arg_col.get());
1234
8
        const auto& bitmap_col_data = bitmap_col->get_data();
1235
8
        auto& nested_column_data = assert_cast<ColumnInt64*>(dest_nested_column)->get_data();
1236
8
        auto& dest_offsets = dest_array_column_ptr->get_offsets();
1237
8
        dest_offsets.reserve(input_rows_count);
1238
1239
16
        for (int i = 0; i < input_rows_count; ++i) {
1240
8
            bitmap_col_data[i].to_array(nested_column_data);
1241
8
            dest_nested_null_map.resize_fill(nested_column_data.size(), 0);
1242
8
            dest_offsets.push_back(nested_column_data.size());
1243
8
        }
1244
1245
8
        block.replace_by_position(result, std::move(dest_array_column_ptr));
1246
8
        return Status::OK();
1247
8
    }
1248
};
1249
1250
using FunctionBitmapEmpty = FunctionConst<BitmapEmpty, false>;
1251
using FunctionToBitmap = FunctionAlwaysNotNullable<ToBitmap>;
1252
using FunctionToBitmapWithCheck = FunctionAlwaysNotNullable<ToBitmapWithCheck, true>;
1253
1254
using FunctionBitmapFromString = FunctionBitmapAlwaysNull<BitmapFromString>;
1255
using FunctionBitmapFromArray = FunctionBitmapAlwaysNull<BitmapFromArray>;
1256
using FunctionBitmapHash = FunctionAlwaysNotNullable<BitmapHash<32>>;
1257
using FunctionBitmapHash64 = FunctionAlwaysNotNullable<BitmapHash<64>>;
1258
1259
using FunctionBitmapMin = FunctionBitmapSingle<FunctionBitmapMinImpl>;
1260
using FunctionBitmapMax = FunctionBitmapSingle<FunctionBitmapMaxImpl>;
1261
1262
using FunctionBitmapToString = FunctionUnaryToType<BitmapToString, NameBitmapToString>;
1263
using FunctionBitmapToBase64 = FunctionUnaryToType<BitmapToBase64, NameBitmapToBase64>;
1264
using FunctionBitmapFromBase64 = FunctionBitmapAlwaysNull<BitmapFromBase64>;
1265
using FunctionBitmapNot =
1266
        FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapNot, NameBitmapNot>;
1267
using FunctionBitmapAndNot =
1268
        FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapAndNot, NameBitmapAndNot>;
1269
using FunctionBitmapContains =
1270
        FunctionBinaryToType<DataTypeBitMap, DataTypeInt64, BitmapContains, NameBitmapContains>;
1271
using FunctionBitmapRemove =
1272
        FunctionBinaryToType<DataTypeBitMap, DataTypeInt64, BitmapRemove, NameBitmapRemove>;
1273
1274
using FunctionBitmapHasAny =
1275
        FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapHasAny, NameBitmapHasAny>;
1276
using FunctionBitmapHasAll =
1277
        FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapHasAll, NameBitmapHasAll>;
1278
using FunctionSubBitmap = FunctionBitmapSubs<SubBitmap>;
1279
using FunctionBitmapSubsetLimit = FunctionBitmapSubs<BitmapSubsetLimit>;
1280
using FunctionBitmapSubsetInRange = FunctionBitmapSubs<BitmapSubsetInRange>;
1281
1282
8
void register_function_bitmap(SimpleFunctionFactory& factory) {
1283
8
    factory.register_function<FunctionBitmapEmpty>();
1284
8
    factory.register_function<FunctionToBitmap>();
1285
8
    factory.register_function<FunctionToBitmapWithCheck>();
1286
8
    factory.register_function<FunctionBitmapFromString>();
1287
8
    factory.register_function<FunctionBitmapToBase64>();
1288
8
    factory.register_function<FunctionBitmapFromBase64>();
1289
8
    factory.register_function<FunctionBitmapFromArray>();
1290
8
    factory.register_function<FunctionBitmapHash>();
1291
8
    factory.register_function<FunctionBitmapHash64>();
1292
8
    factory.register_function<FunctionBitmapCount>();
1293
8
    factory.register_function<FunctionBitmapMin>();
1294
8
    factory.register_function<FunctionBitmapMax>();
1295
8
    factory.register_function<FunctionBitmapToString>();
1296
8
    factory.register_function<FunctionBitmapNot>();
1297
8
    factory.register_function<FunctionBitmapAndNot>();
1298
8
    factory.register_alias(NameBitmapAndNot::name, "bitmap_andnot");
1299
8
    factory.register_function<FunctionBitmapAndNotCount<NameBitmapAndNotCount>>();
1300
8
    factory.register_alias(NameBitmapAndNotCount::name, "bitmap_andnot_count");
1301
8
    factory.register_function<FunctionBitmapContains>();
1302
8
    factory.register_function<FunctionBitmapRemove>();
1303
8
    factory.register_function<FunctionBitmapHasAny>();
1304
8
    factory.register_function<FunctionBitmapHasAll>();
1305
8
    factory.register_function<FunctionSubBitmap>();
1306
8
    factory.register_function<FunctionBitmapSubsetLimit>();
1307
8
    factory.register_function<FunctionBitmapSubsetInRange>();
1308
8
    factory.register_function<FunctionBitmapToArray>();
1309
8
}
1310
1311
} // namespace doris