Coverage Report

Created: 2026-03-12 17:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_bitmap.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionBitmap.h
19
// and modified by Doris
20
21
#include <absl/strings/numbers.h>
22
#include <absl/strings/str_split.h>
23
#include <glog/logging.h>
24
#include <stdint.h>
25
#include <string.h>
26
27
#include <algorithm>
28
#include <boost/iterator/iterator_facade.hpp>
29
#include <functional>
30
#include <memory>
31
#include <ostream>
32
#include <string>
33
#include <utility>
34
#include <vector>
35
36
#include "common/compiler_util.h" // IWYU pragma: keep
37
#include "common/status.h"
38
#include "core/assert_cast.h"
39
#include "core/block/block.h"
40
#include "core/block/column_numbers.h"
41
#include "core/block/column_with_type_and_name.h"
42
#include "core/column/column.h"
43
#include "core/column/column_array.h"
44
#include "core/column/column_complex.h"
45
#include "core/column/column_const.h"
46
#include "core/column/column_nullable.h"
47
#include "core/column/column_string.h"
48
#include "core/column/column_vector.h"
49
#include "core/data_type/data_type.h"
50
#include "core/data_type/data_type_array.h"
51
#include "core/data_type/data_type_bitmap.h"
52
#include "core/data_type/data_type_nullable.h"
53
#include "core/data_type/data_type_number.h"
54
#include "core/data_type/data_type_string.h"
55
#include "core/field.h"
56
#include "core/types.h"
57
#include "core/value/bitmap_value.h"
58
#include "exec/common/stringop_substring.h"
59
#include "exec/common/util.hpp"
60
#include "exprs/aggregate/aggregate_function.h"
61
#include "exprs/function/function.h"
62
#include "exprs/function/function_always_not_nullable.h"
63
#include "exprs/function/function_bitmap_min_or_max.h"
64
#include "exprs/function/function_const.h"
65
#include "exprs/function/function_helpers.h"
66
#include "exprs/function/function_totype.h"
67
#include "exprs/function/simple_function_factory.h"
68
#include "util/hash/murmur_hash3.h"
69
#include "util/hash_util.hpp"
70
#include "util/string_parser.hpp"
71
#include "util/url_coding.h"
72
73
namespace doris {
74
class FunctionContext;
75
} // namespace doris
76
77
namespace doris {
78
#include "common/compile_check_begin.h"
79
80
struct BitmapEmpty {
81
    static constexpr auto name = "bitmap_empty";
82
    using ReturnColVec = ColumnBitmap;
83
284
    static DataTypePtr get_return_type() { return std::make_shared<DataTypeBitMap>(); }
84
284
    static auto init_value() { return BitmapValue {}; }
85
};
86
87
struct ToBitmap {
88
    static constexpr auto name = "to_bitmap";
89
    using ReturnType = DataTypeBitMap;
90
91
    template <typename ColumnType>
92
1.15k
    static void vector(const ColumnType* col, MutableColumnPtr& col_res) {
93
1.15k
        execute<ColumnType, false>(col, nullptr, col_res);
94
1.15k
    }
_ZN5doris8ToBitmap6vectorINS_9ColumnStrIjEEEEvPKT_RNS_3COWINS_7IColumnEE11mutable_ptrIS8_EE
Line
Count
Source
92
13
    static void vector(const ColumnType* col, MutableColumnPtr& col_res) {
93
13
        execute<ColumnType, false>(col, nullptr, col_res);
94
13
    }
_ZN5doris8ToBitmap6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEEvPKT_RNS_3COWINS_7IColumnEE11mutable_ptrIS9_EE
Line
Count
Source
92
1.13k
    static void vector(const ColumnType* col, MutableColumnPtr& col_res) {
93
1.13k
        execute<ColumnType, false>(col, nullptr, col_res);
94
1.13k
    }
95
    template <typename ColumnType>
96
    static void vector_nullable(const ColumnType* col, const NullMap& nullmap,
97
679
                                MutableColumnPtr& col_res) {
98
679
        execute<ColumnType, true>(col, &nullmap, col_res);
99
679
    }
_ZN5doris8ToBitmap15vector_nullableINS_9ColumnStrIjEEEEvPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISF_EE
Line
Count
Source
97
49
                                MutableColumnPtr& col_res) {
98
49
        execute<ColumnType, true>(col, &nullmap, col_res);
99
49
    }
_ZN5doris8ToBitmap15vector_nullableINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEEvPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE
Line
Count
Source
97
630
                                MutableColumnPtr& col_res) {
98
630
        execute<ColumnType, true>(col, &nullmap, col_res);
99
630
    }
100
    template <typename ColumnType, bool arg_is_nullable>
101
1.82k
    static void execute(const ColumnType* col, const NullMap* nullmap, MutableColumnPtr& col_res) {
102
1.82k
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
103
62
            const ColumnString::Chars& data = col->get_chars();
104
62
            const ColumnString::Offsets& offsets = col->get_offsets();
105
106
62
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
107
62
            auto& res_data = res_column->get_data();
108
62
            size_t size = offsets.size();
109
110
3.56k
            for (size_t i = 0; i < size; ++i) {
111
3.50k
                if (arg_is_nullable && ((*nullmap)[i])) {
112
26
                    continue;
113
3.47k
                } else {
114
3.47k
                    const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
115
3.47k
                    int str_size = cast_set<int>(offsets[i] - offsets[i - 1]);
116
3.47k
                    StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
117
3.47k
                    uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
118
3.47k
                            raw_str, str_size, &parse_result);
119
3.47k
                    if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
120
3.26k
                        res_data[i].add(int_value);
121
3.26k
                    }
122
3.47k
                }
123
3.50k
            }
124
1.76k
        } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
125
1.76k
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
126
1.76k
            auto& res_data = res_column->get_data();
127
1.76k
            size_t size = col->size();
128
129
131k
            for (size_t i = 0; i < size; ++i) {
130
128k
                if constexpr (arg_is_nullable) {
131
2.62k
                    if ((*nullmap)[i]) {
132
1.44k
                        continue;
133
1.44k
                    }
134
2.62k
                }
135
128k
                if (auto value = col->get_data()[i]; value >= 0) {
136
119k
                    res_data[i].add(value);
137
119k
                }
138
128k
            }
139
1.76k
        }
140
1.82k
    }
_ZN5doris8ToBitmap7executeINS_9ColumnStrIjEELb1EEEvPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISF_EE
Line
Count
Source
101
49
    static void execute(const ColumnType* col, const NullMap* nullmap, MutableColumnPtr& col_res) {
102
49
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
103
49
            const ColumnString::Chars& data = col->get_chars();
104
49
            const ColumnString::Offsets& offsets = col->get_offsets();
105
106
49
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
107
49
            auto& res_data = res_column->get_data();
108
49
            size_t size = offsets.size();
109
110
3.52k
            for (size_t i = 0; i < size; ++i) {
111
3.47k
                if (arg_is_nullable && ((*nullmap)[i])) {
112
26
                    continue;
113
3.44k
                } else {
114
3.44k
                    const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
115
3.44k
                    int str_size = cast_set<int>(offsets[i] - offsets[i - 1]);
116
3.44k
                    StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
117
3.44k
                    uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
118
3.44k
                            raw_str, str_size, &parse_result);
119
3.44k
                    if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
120
3.26k
                        res_data[i].add(int_value);
121
3.26k
                    }
122
3.44k
                }
123
3.47k
            }
124
        } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
125
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
126
            auto& res_data = res_column->get_data();
127
            size_t size = col->size();
128
129
            for (size_t i = 0; i < size; ++i) {
130
                if constexpr (arg_is_nullable) {
131
                    if ((*nullmap)[i]) {
132
                        continue;
133
                    }
134
                }
135
                if (auto value = col->get_data()[i]; value >= 0) {
136
                    res_data[i].add(value);
137
                }
138
            }
139
        }
140
49
    }
_ZN5doris8ToBitmap7executeINS_12ColumnVectorILNS_13PrimitiveTypeE6EEELb1EEEvPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE
Line
Count
Source
101
630
    static void execute(const ColumnType* col, const NullMap* nullmap, MutableColumnPtr& col_res) {
102
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
103
            const ColumnString::Chars& data = col->get_chars();
104
            const ColumnString::Offsets& offsets = col->get_offsets();
105
106
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
107
            auto& res_data = res_column->get_data();
108
            size_t size = offsets.size();
109
110
            for (size_t i = 0; i < size; ++i) {
111
                if (arg_is_nullable && ((*nullmap)[i])) {
112
                    continue;
113
                } else {
114
                    const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
115
                    int str_size = cast_set<int>(offsets[i] - offsets[i - 1]);
116
                    StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
117
                    uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
118
                            raw_str, str_size, &parse_result);
119
                    if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
120
                        res_data[i].add(int_value);
121
                    }
122
                }
123
            }
124
630
        } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
125
630
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
126
630
            auto& res_data = res_column->get_data();
127
630
            size_t size = col->size();
128
129
4.70k
            for (size_t i = 0; i < size; ++i) {
130
2.62k
                if constexpr (arg_is_nullable) {
131
2.62k
                    if ((*nullmap)[i]) {
132
1.44k
                        continue;
133
1.44k
                    }
134
2.62k
                }
135
2.62k
                if (auto value = col->get_data()[i]; value >= 0) {
136
844
                    res_data[i].add(value);
137
844
                }
138
2.62k
            }
139
630
        }
140
630
    }
_ZN5doris8ToBitmap7executeINS_9ColumnStrIjEELb0EEEvPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISF_EE
Line
Count
Source
101
13
    static void execute(const ColumnType* col, const NullMap* nullmap, MutableColumnPtr& col_res) {
102
13
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
103
13
            const ColumnString::Chars& data = col->get_chars();
104
13
            const ColumnString::Offsets& offsets = col->get_offsets();
105
106
13
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
107
13
            auto& res_data = res_column->get_data();
108
13
            size_t size = offsets.size();
109
110
42
            for (size_t i = 0; i < size; ++i) {
111
29
                if (arg_is_nullable && ((*nullmap)[i])) {
112
0
                    continue;
113
29
                } else {
114
29
                    const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
115
29
                    int str_size = cast_set<int>(offsets[i] - offsets[i - 1]);
116
29
                    StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
117
29
                    uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
118
29
                            raw_str, str_size, &parse_result);
119
29
                    if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
120
0
                        res_data[i].add(int_value);
121
0
                    }
122
29
                }
123
29
            }
124
        } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
125
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
126
            auto& res_data = res_column->get_data();
127
            size_t size = col->size();
128
129
            for (size_t i = 0; i < size; ++i) {
130
                if constexpr (arg_is_nullable) {
131
                    if ((*nullmap)[i]) {
132
                        continue;
133
                    }
134
                }
135
                if (auto value = col->get_data()[i]; value >= 0) {
136
                    res_data[i].add(value);
137
                }
138
            }
139
        }
140
13
    }
_ZN5doris8ToBitmap7executeINS_12ColumnVectorILNS_13PrimitiveTypeE6EEELb0EEEvPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE
Line
Count
Source
101
1.13k
    static void execute(const ColumnType* col, const NullMap* nullmap, MutableColumnPtr& col_res) {
102
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
103
            const ColumnString::Chars& data = col->get_chars();
104
            const ColumnString::Offsets& offsets = col->get_offsets();
105
106
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
107
            auto& res_data = res_column->get_data();
108
            size_t size = offsets.size();
109
110
            for (size_t i = 0; i < size; ++i) {
111
                if (arg_is_nullable && ((*nullmap)[i])) {
112
                    continue;
113
                } else {
114
                    const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
115
                    int str_size = cast_set<int>(offsets[i] - offsets[i - 1]);
116
                    StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
117
                    uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
118
                            raw_str, str_size, &parse_result);
119
                    if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
120
                        res_data[i].add(int_value);
121
                    }
122
                }
123
            }
124
1.13k
        } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
125
1.13k
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
126
1.13k
            auto& res_data = res_column->get_data();
127
1.13k
            size_t size = col->size();
128
129
126k
            for (size_t i = 0; i < size; ++i) {
130
                if constexpr (arg_is_nullable) {
131
                    if ((*nullmap)[i]) {
132
                        continue;
133
                    }
134
                }
135
125k
                if (auto value = col->get_data()[i]; value >= 0) {
136
118k
                    res_data[i].add(value);
137
118k
                }
138
125k
            }
139
1.13k
        }
140
1.13k
    }
141
};
142
143
struct ToBitmapWithCheck {
144
    static constexpr auto name = "to_bitmap_with_check";
145
    using ReturnType = DataTypeBitMap;
146
147
    template <typename ColumnType>
148
50
    static Status vector(const ColumnType* col, MutableColumnPtr& col_res) {
149
50
        return execute<ColumnType, false>(col, nullptr, col_res);
150
50
    }
Unexecuted instantiation: _ZN5doris17ToBitmapWithCheck6vectorINS_9ColumnStrIjEEEENS_6StatusEPKT_RNS_3COWINS_7IColumnEE11mutable_ptrIS9_EE
_ZN5doris17ToBitmapWithCheck6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEENS_6StatusEPKT_RNS_3COWINS_7IColumnEE11mutable_ptrISA_EE
Line
Count
Source
148
50
    static Status vector(const ColumnType* col, MutableColumnPtr& col_res) {
149
50
        return execute<ColumnType, false>(col, nullptr, col_res);
150
50
    }
151
    template <typename ColumnType>
152
    static Status vector_nullable(const ColumnType* col, const NullMap& nullmap,
153
46
                                  MutableColumnPtr& col_res) {
154
46
        return execute<ColumnType, true>(col, &nullmap, col_res);
155
46
    }
Unexecuted instantiation: _ZN5doris17ToBitmapWithCheck15vector_nullableINS_9ColumnStrIjEEEENS_6StatusEPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE
_ZN5doris17ToBitmapWithCheck15vector_nullableINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEENS_6StatusEPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISH_EE
Line
Count
Source
153
46
                                  MutableColumnPtr& col_res) {
154
46
        return execute<ColumnType, true>(col, &nullmap, col_res);
155
46
    }
156
    template <typename ColumnType, bool arg_is_nullable>
157
    static Status execute(const ColumnType* col, const NullMap* nullmap,
158
96
                          MutableColumnPtr& col_res) {
159
96
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
160
0
            const ColumnString::Chars& data = col->get_chars();
161
0
            const ColumnString::Offsets& offsets = col->get_offsets();
162
0
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
163
0
            auto& res_data = res_column->get_data();
164
0
            size_t size = offsets.size();
165
166
0
            for (size_t i = 0; i < size; ++i) {
167
0
                if (arg_is_nullable && ((*nullmap)[i])) {
168
0
                    continue;
169
0
                } else {
170
0
                    const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
171
                    // The string lenght is less than 2G, so that cast the str size to int, not use size_t
172
0
                    int str_size = cast_set<int>(offsets[i] - offsets[i - 1]);
173
0
                    StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
174
0
                    uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
175
0
                            raw_str, str_size, &parse_result);
176
0
                    if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
177
0
                        res_data[i].add(int_value);
178
0
                    } else {
179
0
                        return Status::InvalidArgument(
180
0
                                "The input: {} is not valid, to_bitmap only support bigint value "
181
0
                                "from 0 to 18446744073709551615 currently, cannot create MV with "
182
0
                                "to_bitmap on column with negative values or cannot load negative "
183
0
                                "values to column with to_bitmap MV on it.",
184
0
                                std::string(raw_str, str_size));
185
0
                    }
186
0
                }
187
0
            }
188
96
        } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
189
96
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
190
96
            auto& res_data = res_column->get_data();
191
96
            size_t size = col->size();
192
193
215
            for (size_t i = 0; i < size; ++i) {
194
123
                if (arg_is_nullable && ((*nullmap)[i])) {
195
21
                    continue;
196
102
                } else {
197
102
                    int64_t int_value = col->get_data()[i];
198
102
                    if (LIKELY(int_value >= 0)) {
199
98
                        res_data[i].add(int_value);
200
98
                    } else {
201
4
                        return Status::InvalidArgument(
202
4
                                "The input: {} is not valid, to_bitmap only support bigint value "
203
4
                                "from 0 to 18446744073709551615 currently, cannot create MV with "
204
4
                                "to_bitmap on column with negative values or cannot load negative "
205
4
                                "values to column with to_bitmap MV on it.",
206
4
                                int_value);
207
4
                    }
208
102
                }
209
123
            }
210
        } else {
211
            return Status::InvalidArgument("not support type");
212
        }
213
92
        return Status::OK();
214
96
    }
Unexecuted instantiation: _ZN5doris17ToBitmapWithCheck7executeINS_9ColumnStrIjEELb1EEENS_6StatusEPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE
_ZN5doris17ToBitmapWithCheck7executeINS_12ColumnVectorILNS_13PrimitiveTypeE6EEELb1EEENS_6StatusEPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISH_EE
Line
Count
Source
158
46
                          MutableColumnPtr& col_res) {
159
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
160
            const ColumnString::Chars& data = col->get_chars();
161
            const ColumnString::Offsets& offsets = col->get_offsets();
162
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
163
            auto& res_data = res_column->get_data();
164
            size_t size = offsets.size();
165
166
            for (size_t i = 0; i < size; ++i) {
167
                if (arg_is_nullable && ((*nullmap)[i])) {
168
                    continue;
169
                } else {
170
                    const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
171
                    // The string lenght is less than 2G, so that cast the str size to int, not use size_t
172
                    int str_size = cast_set<int>(offsets[i] - offsets[i - 1]);
173
                    StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
174
                    uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
175
                            raw_str, str_size, &parse_result);
176
                    if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
177
                        res_data[i].add(int_value);
178
                    } else {
179
                        return Status::InvalidArgument(
180
                                "The input: {} is not valid, to_bitmap only support bigint value "
181
                                "from 0 to 18446744073709551615 currently, cannot create MV with "
182
                                "to_bitmap on column with negative values or cannot load negative "
183
                                "values to column with to_bitmap MV on it.",
184
                                std::string(raw_str, str_size));
185
                    }
186
                }
187
            }
188
46
        } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
189
46
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
190
46
            auto& res_data = res_column->get_data();
191
46
            size_t size = col->size();
192
193
111
            for (size_t i = 0; i < size; ++i) {
194
65
                if (arg_is_nullable && ((*nullmap)[i])) {
195
21
                    continue;
196
44
                } else {
197
44
                    int64_t int_value = col->get_data()[i];
198
44
                    if (LIKELY(int_value >= 0)) {
199
44
                        res_data[i].add(int_value);
200
44
                    } else {
201
0
                        return Status::InvalidArgument(
202
0
                                "The input: {} is not valid, to_bitmap only support bigint value "
203
0
                                "from 0 to 18446744073709551615 currently, cannot create MV with "
204
0
                                "to_bitmap on column with negative values or cannot load negative "
205
0
                                "values to column with to_bitmap MV on it.",
206
0
                                int_value);
207
0
                    }
208
44
                }
209
65
            }
210
        } else {
211
            return Status::InvalidArgument("not support type");
212
        }
213
46
        return Status::OK();
214
46
    }
Unexecuted instantiation: _ZN5doris17ToBitmapWithCheck7executeINS_9ColumnStrIjEELb0EEENS_6StatusEPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE
_ZN5doris17ToBitmapWithCheck7executeINS_12ColumnVectorILNS_13PrimitiveTypeE6EEELb0EEENS_6StatusEPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISH_EE
Line
Count
Source
158
50
                          MutableColumnPtr& col_res) {
159
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
160
            const ColumnString::Chars& data = col->get_chars();
161
            const ColumnString::Offsets& offsets = col->get_offsets();
162
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
163
            auto& res_data = res_column->get_data();
164
            size_t size = offsets.size();
165
166
            for (size_t i = 0; i < size; ++i) {
167
                if (arg_is_nullable && ((*nullmap)[i])) {
168
                    continue;
169
                } else {
170
                    const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
171
                    // The string lenght is less than 2G, so that cast the str size to int, not use size_t
172
                    int str_size = cast_set<int>(offsets[i] - offsets[i - 1]);
173
                    StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
174
                    uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
175
                            raw_str, str_size, &parse_result);
176
                    if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
177
                        res_data[i].add(int_value);
178
                    } else {
179
                        return Status::InvalidArgument(
180
                                "The input: {} is not valid, to_bitmap only support bigint value "
181
                                "from 0 to 18446744073709551615 currently, cannot create MV with "
182
                                "to_bitmap on column with negative values or cannot load negative "
183
                                "values to column with to_bitmap MV on it.",
184
                                std::string(raw_str, str_size));
185
                    }
186
                }
187
            }
188
50
        } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
189
50
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
190
50
            auto& res_data = res_column->get_data();
191
50
            size_t size = col->size();
192
193
104
            for (size_t i = 0; i < size; ++i) {
194
58
                if (arg_is_nullable && ((*nullmap)[i])) {
195
0
                    continue;
196
58
                } else {
197
58
                    int64_t int_value = col->get_data()[i];
198
58
                    if (LIKELY(int_value >= 0)) {
199
54
                        res_data[i].add(int_value);
200
54
                    } else {
201
4
                        return Status::InvalidArgument(
202
4
                                "The input: {} is not valid, to_bitmap only support bigint value "
203
4
                                "from 0 to 18446744073709551615 currently, cannot create MV with "
204
4
                                "to_bitmap on column with negative values or cannot load negative "
205
4
                                "values to column with to_bitmap MV on it.",
206
4
                                int_value);
207
4
                    }
208
58
                }
209
58
            }
210
        } else {
211
            return Status::InvalidArgument("not support type");
212
        }
213
46
        return Status::OK();
214
50
    }
215
};
216
217
struct BitmapFromString {
218
    using ArgumentType = DataTypeString;
219
220
    static constexpr auto name = "bitmap_from_string";
221
222
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
223
                         std::vector<BitmapValue>& res, NullMap& null_map,
224
399
                         size_t input_rows_count) {
225
399
        res.reserve(input_rows_count);
226
399
        std::vector<uint64_t> bits;
227
399
        if (offsets.size() == 0 && input_rows_count == 1) {
228
            // For NULL constant
229
0
            res.emplace_back();
230
0
            null_map[0] = 1;
231
0
            return Status::OK();
232
0
        }
233
234
488
        auto split_and_parse = [&bits](const char* raw_str, size_t str_size) {
235
488
            bits.clear();
236
488
            auto res = absl::StrSplit(std::string_view {raw_str, str_size}, ",", absl::SkipEmpty());
237
488
            uint64_t value = 0;
238
1.64k
            for (auto s : res) {
239
1.64k
                if (!absl::SimpleAtoi(s, &value)) {
240
76
                    return false;
241
76
                }
242
1.56k
                bits.push_back(value);
243
1.56k
            }
244
412
            return true;
245
488
        };
246
247
        // split by comma
248
249
887
        for (size_t i = 0; i < input_rows_count; ++i) {
250
488
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
251
488
            int64_t str_size = offsets[i] - offsets[i - 1];
252
253
488
            if ((str_size > INT32_MAX) || !split_and_parse(raw_str, str_size)) {
254
76
                res.emplace_back();
255
76
                null_map[i] = 1;
256
76
                continue;
257
76
            }
258
412
            res.emplace_back(bits);
259
412
        }
260
399
        return Status::OK();
261
399
    }
262
};
263
264
struct NameBitmapFromBase64 {
265
    static constexpr auto name = "bitmap_from_base64";
266
};
267
struct BitmapFromBase64 {
268
    using ArgumentType = DataTypeString;
269
270
    static constexpr auto name = "bitmap_from_base64";
271
272
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
273
                         std::vector<BitmapValue>& res, NullMap& null_map,
274
18
                         size_t input_rows_count) {
275
18
        res.reserve(input_rows_count);
276
18
        if (offsets.size() == 0 && input_rows_count == 1) {
277
            // For NULL constant
278
0
            res.emplace_back();
279
0
            null_map[0] = 1;
280
0
            return Status::OK();
281
0
        }
282
18
        std::string decode_buff;
283
18
        size_t last_decode_buff_len = 0;
284
18
        size_t curr_decode_buff_len = 0;
285
58
        for (size_t i = 0; i < input_rows_count; ++i) {
286
41
            const char* src_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
287
41
            size_t src_size = offsets[i] - offsets[i - 1];
288
41
            if (0 != src_size % 4) {
289
                // return Status::InvalidArgument(
290
                //         fmt::format("invalid base64: {}", std::string(src_str, src_size)));
291
0
                res.emplace_back();
292
0
                null_map[i] = 1;
293
0
                continue;
294
0
            }
295
41
            curr_decode_buff_len = src_size + 3;
296
41
            if (curr_decode_buff_len > last_decode_buff_len) {
297
29
                decode_buff.resize(curr_decode_buff_len);
298
29
                last_decode_buff_len = curr_decode_buff_len;
299
29
            }
300
41
            auto outlen = base64_decode(src_str, src_size, decode_buff.data());
301
41
            if (outlen < 0) {
302
0
                res.emplace_back();
303
0
                null_map[i] = 1;
304
41
            } else {
305
41
                BitmapValue bitmap_val;
306
41
                if (!bitmap_val.deserialize(decode_buff.data())) {
307
1
                    return Status::RuntimeError("bitmap_from_base64 decode failed: base64: {}",
308
1
                                                std::string(src_str, src_size));
309
1
                }
310
40
                res.emplace_back(std::move(bitmap_val));
311
40
            }
312
41
        }
313
17
        return Status::OK();
314
18
    }
315
};
316
struct BitmapFromArray {
317
    using ArgumentType = DataTypeArray;
318
    static constexpr auto name = "bitmap_from_array";
319
320
    template <typename ColumnType>
321
    static Status vector(const ColumnArray::Offsets64& offset_column_data,
322
                         const IColumn& nested_column, const NullMap& nested_null_map,
323
13
                         std::vector<BitmapValue>& res, NullMap& null_map) {
324
13
        const auto& nested_column_data = static_cast<const ColumnType&>(nested_column).get_data();
325
13
        auto size = offset_column_data.size();
326
13
        res.reserve(size);
327
13
        std::vector<uint64_t> bits;
328
36
        for (size_t i = 0; i < size; ++i) {
329
23
            auto curr_offset = offset_column_data[i];
330
23
            auto prev_offset = offset_column_data[i - 1];
331
92
            for (auto j = prev_offset; j < curr_offset; ++j) {
332
71
                auto data = nested_column_data[j];
333
                // invaild value
334
71
                if (UNLIKELY(data < 0) || UNLIKELY(nested_null_map[j])) {
335
2
                    res.emplace_back();
336
2
                    null_map[i] = 1;
337
2
                    break;
338
69
                } else {
339
69
                    bits.push_back(data);
340
69
                }
341
71
            }
342
            //input is valid value
343
23
            if (!null_map[i]) {
344
21
                res.emplace_back(bits);
345
21
            }
346
23
            bits.clear();
347
23
        }
348
13
        return Status::OK();
349
13
    }
_ZN5doris15BitmapFromArray6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE3EEEEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_7IColumnERKNS6_IhLm4096ES9_Lm16ELm15EEERSt6vectorINS_11BitmapValueESaISK_EERSG_
Line
Count
Source
323
3
                         std::vector<BitmapValue>& res, NullMap& null_map) {
324
3
        const auto& nested_column_data = static_cast<const ColumnType&>(nested_column).get_data();
325
3
        auto size = offset_column_data.size();
326
3
        res.reserve(size);
327
3
        std::vector<uint64_t> bits;
328
6
        for (size_t i = 0; i < size; ++i) {
329
3
            auto curr_offset = offset_column_data[i];
330
3
            auto prev_offset = offset_column_data[i - 1];
331
16
            for (auto j = prev_offset; j < curr_offset; ++j) {
332
13
                auto data = nested_column_data[j];
333
                // invaild value
334
13
                if (UNLIKELY(data < 0) || UNLIKELY(nested_null_map[j])) {
335
0
                    res.emplace_back();
336
0
                    null_map[i] = 1;
337
0
                    break;
338
13
                } else {
339
13
                    bits.push_back(data);
340
13
                }
341
13
            }
342
            //input is valid value
343
3
            if (!null_map[i]) {
344
3
                res.emplace_back(bits);
345
3
            }
346
3
            bits.clear();
347
3
        }
348
3
        return Status::OK();
349
3
    }
Unexecuted instantiation: _ZN5doris15BitmapFromArray6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE2EEEEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_7IColumnERKNS6_IhLm4096ES9_Lm16ELm15EEERSt6vectorINS_11BitmapValueESaISK_EERSG_
Unexecuted instantiation: _ZN5doris15BitmapFromArray6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE4EEEEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_7IColumnERKNS6_IhLm4096ES9_Lm16ELm15EEERSt6vectorINS_11BitmapValueESaISK_EERSG_
_ZN5doris15BitmapFromArray6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE5EEEEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_7IColumnERKNS6_IhLm4096ES9_Lm16ELm15EEERSt6vectorINS_11BitmapValueESaISK_EERSG_
Line
Count
Source
323
3
                         std::vector<BitmapValue>& res, NullMap& null_map) {
324
3
        const auto& nested_column_data = static_cast<const ColumnType&>(nested_column).get_data();
325
3
        auto size = offset_column_data.size();
326
3
        res.reserve(size);
327
3
        std::vector<uint64_t> bits;
328
14
        for (size_t i = 0; i < size; ++i) {
329
11
            auto curr_offset = offset_column_data[i];
330
11
            auto prev_offset = offset_column_data[i - 1];
331
56
            for (auto j = prev_offset; j < curr_offset; ++j) {
332
47
                auto data = nested_column_data[j];
333
                // invaild value
334
47
                if (UNLIKELY(data < 0) || UNLIKELY(nested_null_map[j])) {
335
2
                    res.emplace_back();
336
2
                    null_map[i] = 1;
337
2
                    break;
338
45
                } else {
339
45
                    bits.push_back(data);
340
45
                }
341
47
            }
342
            //input is valid value
343
11
            if (!null_map[i]) {
344
9
                res.emplace_back(bits);
345
9
            }
346
11
            bits.clear();
347
11
        }
348
3
        return Status::OK();
349
3
    }
_ZN5doris15BitmapFromArray6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_7IColumnERKNS6_IhLm4096ES9_Lm16ELm15EEERSt6vectorINS_11BitmapValueESaISK_EERSG_
Line
Count
Source
323
7
                         std::vector<BitmapValue>& res, NullMap& null_map) {
324
7
        const auto& nested_column_data = static_cast<const ColumnType&>(nested_column).get_data();
325
7
        auto size = offset_column_data.size();
326
7
        res.reserve(size);
327
7
        std::vector<uint64_t> bits;
328
16
        for (size_t i = 0; i < size; ++i) {
329
9
            auto curr_offset = offset_column_data[i];
330
9
            auto prev_offset = offset_column_data[i - 1];
331
20
            for (auto j = prev_offset; j < curr_offset; ++j) {
332
11
                auto data = nested_column_data[j];
333
                // invaild value
334
11
                if (UNLIKELY(data < 0) || UNLIKELY(nested_null_map[j])) {
335
0
                    res.emplace_back();
336
0
                    null_map[i] = 1;
337
0
                    break;
338
11
                } else {
339
11
                    bits.push_back(data);
340
11
                }
341
11
            }
342
            //input is valid value
343
9
            if (!null_map[i]) {
344
9
                res.emplace_back(bits);
345
9
            }
346
9
            bits.clear();
347
9
        }
348
7
        return Status::OK();
349
7
    }
350
};
351
352
template <typename Impl>
353
class FunctionBitmapAlwaysNull : public IFunction {
354
public:
355
    static constexpr auto name = Impl::name;
356
357
3
    String get_name() const override { return name; }
_ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromStringEE8get_nameB5cxx11Ev
Line
Count
Source
357
1
    String get_name() const override { return name; }
_ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromBase64EE8get_nameB5cxx11Ev
Line
Count
Source
357
1
    String get_name() const override { return name; }
_ZNK5doris24FunctionBitmapAlwaysNullINS_15BitmapFromArrayEE8get_nameB5cxx11Ev
Line
Count
Source
357
1
    String get_name() const override { return name; }
358
359
440
    static FunctionPtr create() { return std::make_shared<FunctionBitmapAlwaysNull>(); }
_ZN5doris24FunctionBitmapAlwaysNullINS_16BitmapFromStringEE6createEv
Line
Count
Source
359
392
    static FunctionPtr create() { return std::make_shared<FunctionBitmapAlwaysNull>(); }
_ZN5doris24FunctionBitmapAlwaysNullINS_16BitmapFromBase64EE6createEv
Line
Count
Source
359
26
    static FunctionPtr create() { return std::make_shared<FunctionBitmapAlwaysNull>(); }
_ZN5doris24FunctionBitmapAlwaysNullINS_15BitmapFromArrayEE6createEv
Line
Count
Source
359
22
    static FunctionPtr create() { return std::make_shared<FunctionBitmapAlwaysNull>(); }
360
361
413
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
362
413
        return make_nullable(std::make_shared<DataTypeBitMap>());
363
413
    }
_ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromStringEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
361
383
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
362
383
        return make_nullable(std::make_shared<DataTypeBitMap>());
363
383
    }
_ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromBase64EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
361
17
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
362
17
        return make_nullable(std::make_shared<DataTypeBitMap>());
363
17
    }
_ZNK5doris24FunctionBitmapAlwaysNullINS_15BitmapFromArrayEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
361
13
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
362
13
        return make_nullable(std::make_shared<DataTypeBitMap>());
363
13
    }
364
365
413
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromStringEE23get_number_of_argumentsEv
Line
Count
Source
365
383
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromBase64EE23get_number_of_argumentsEv
Line
Count
Source
365
17
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris24FunctionBitmapAlwaysNullINS_15BitmapFromArrayEE23get_number_of_argumentsEv
Line
Count
Source
365
13
    size_t get_number_of_arguments() const override { return 1; }
366
367
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
368
430
                        uint32_t result, size_t input_rows_count) const override {
369
430
        auto res_null_map = ColumnUInt8::create(input_rows_count, 0);
370
430
        auto res_data_column = ColumnBitmap::create();
371
430
        auto& null_map = res_null_map->get_data();
372
430
        auto& res = res_data_column->get_data();
373
374
430
        ColumnPtr& argument_column = block.get_by_position(arguments[0]).column;
375
430
        if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeString>) {
376
417
            const auto& str_column = static_cast<const ColumnString&>(*argument_column);
377
417
            const ColumnString::Chars& data = str_column.get_chars();
378
417
            const ColumnString::Offsets& offsets = str_column.get_offsets();
379
417
            RETURN_IF_ERROR(Impl::vector(data, offsets, res, null_map, input_rows_count));
380
417
        } else if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeArray>) {
381
13
            auto argument_type = remove_nullable(
382
13
                    assert_cast<const DataTypeArray&>(*block.get_by_position(arguments[0]).type)
383
13
                            .get_nested_type());
384
13
            const auto& array_column = static_cast<const ColumnArray&>(*argument_column);
385
13
            const auto& offset_column_data = array_column.get_offsets();
386
13
            const auto& nested_nullable_column =
387
13
                    static_cast<const ColumnNullable&>(array_column.get_data());
388
13
            const auto& nested_column = nested_nullable_column.get_nested_column();
389
13
            const auto& nested_null_map = nested_nullable_column.get_null_map_column().get_data();
390
391
13
            switch (argument_type->get_primitive_type()) {
392
3
            case PrimitiveType::TYPE_TINYINT:
393
3
                RETURN_IF_ERROR(Impl::template vector<ColumnInt8>(offset_column_data, nested_column,
394
3
                                                                  nested_null_map, res, null_map));
395
3
                break;
396
3
            case PrimitiveType::TYPE_BOOLEAN:
397
0
                RETURN_IF_ERROR(Impl::template vector<ColumnUInt8>(
398
0
                        offset_column_data, nested_column, nested_null_map, res, null_map));
399
0
                break;
400
0
            case PrimitiveType::TYPE_SMALLINT:
401
0
                RETURN_IF_ERROR(Impl::template vector<ColumnInt16>(
402
0
                        offset_column_data, nested_column, nested_null_map, res, null_map));
403
0
                break;
404
3
            case PrimitiveType::TYPE_INT:
405
3
                RETURN_IF_ERROR(Impl::template vector<ColumnInt32>(
406
3
                        offset_column_data, nested_column, nested_null_map, res, null_map));
407
3
                break;
408
7
            case PrimitiveType::TYPE_BIGINT:
409
7
                RETURN_IF_ERROR(Impl::template vector<ColumnInt64>(
410
7
                        offset_column_data, nested_column, nested_null_map, res, null_map));
411
7
                break;
412
7
            default:
413
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
414
0
                                            block.get_by_position(arguments[0]).column->get_name(),
415
0
                                            get_name());
416
13
            }
417
        } else {
418
            return Status::RuntimeError("Illegal column {} of argument of function {}",
419
                                        block.get_by_position(arguments[0]).column->get_name(),
420
                                        get_name());
421
        }
422
429
        block.get_by_position(result).column =
423
430
                ColumnNullable::create(std::move(res_data_column), std::move(res_null_map));
424
430
        return Status::OK();
425
430
    }
_ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromStringEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
368
399
                        uint32_t result, size_t input_rows_count) const override {
369
399
        auto res_null_map = ColumnUInt8::create(input_rows_count, 0);
370
399
        auto res_data_column = ColumnBitmap::create();
371
399
        auto& null_map = res_null_map->get_data();
372
399
        auto& res = res_data_column->get_data();
373
374
399
        ColumnPtr& argument_column = block.get_by_position(arguments[0]).column;
375
399
        if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeString>) {
376
399
            const auto& str_column = static_cast<const ColumnString&>(*argument_column);
377
399
            const ColumnString::Chars& data = str_column.get_chars();
378
399
            const ColumnString::Offsets& offsets = str_column.get_offsets();
379
399
            RETURN_IF_ERROR(Impl::vector(data, offsets, res, null_map, input_rows_count));
380
        } else if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeArray>) {
381
            auto argument_type = remove_nullable(
382
                    assert_cast<const DataTypeArray&>(*block.get_by_position(arguments[0]).type)
383
                            .get_nested_type());
384
            const auto& array_column = static_cast<const ColumnArray&>(*argument_column);
385
            const auto& offset_column_data = array_column.get_offsets();
386
            const auto& nested_nullable_column =
387
                    static_cast<const ColumnNullable&>(array_column.get_data());
388
            const auto& nested_column = nested_nullable_column.get_nested_column();
389
            const auto& nested_null_map = nested_nullable_column.get_null_map_column().get_data();
390
391
            switch (argument_type->get_primitive_type()) {
392
            case PrimitiveType::TYPE_TINYINT:
393
                RETURN_IF_ERROR(Impl::template vector<ColumnInt8>(offset_column_data, nested_column,
394
                                                                  nested_null_map, res, null_map));
395
                break;
396
            case PrimitiveType::TYPE_BOOLEAN:
397
                RETURN_IF_ERROR(Impl::template vector<ColumnUInt8>(
398
                        offset_column_data, nested_column, nested_null_map, res, null_map));
399
                break;
400
            case PrimitiveType::TYPE_SMALLINT:
401
                RETURN_IF_ERROR(Impl::template vector<ColumnInt16>(
402
                        offset_column_data, nested_column, nested_null_map, res, null_map));
403
                break;
404
            case PrimitiveType::TYPE_INT:
405
                RETURN_IF_ERROR(Impl::template vector<ColumnInt32>(
406
                        offset_column_data, nested_column, nested_null_map, res, null_map));
407
                break;
408
            case PrimitiveType::TYPE_BIGINT:
409
                RETURN_IF_ERROR(Impl::template vector<ColumnInt64>(
410
                        offset_column_data, nested_column, nested_null_map, res, null_map));
411
                break;
412
            default:
413
                return Status::RuntimeError("Illegal column {} of argument of function {}",
414
                                            block.get_by_position(arguments[0]).column->get_name(),
415
                                            get_name());
416
            }
417
        } else {
418
            return Status::RuntimeError("Illegal column {} of argument of function {}",
419
                                        block.get_by_position(arguments[0]).column->get_name(),
420
                                        get_name());
421
        }
422
399
        block.get_by_position(result).column =
423
399
                ColumnNullable::create(std::move(res_data_column), std::move(res_null_map));
424
399
        return Status::OK();
425
399
    }
_ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromBase64EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
368
18
                        uint32_t result, size_t input_rows_count) const override {
369
18
        auto res_null_map = ColumnUInt8::create(input_rows_count, 0);
370
18
        auto res_data_column = ColumnBitmap::create();
371
18
        auto& null_map = res_null_map->get_data();
372
18
        auto& res = res_data_column->get_data();
373
374
18
        ColumnPtr& argument_column = block.get_by_position(arguments[0]).column;
375
18
        if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeString>) {
376
18
            const auto& str_column = static_cast<const ColumnString&>(*argument_column);
377
18
            const ColumnString::Chars& data = str_column.get_chars();
378
18
            const ColumnString::Offsets& offsets = str_column.get_offsets();
379
18
            RETURN_IF_ERROR(Impl::vector(data, offsets, res, null_map, input_rows_count));
380
        } else if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeArray>) {
381
            auto argument_type = remove_nullable(
382
                    assert_cast<const DataTypeArray&>(*block.get_by_position(arguments[0]).type)
383
                            .get_nested_type());
384
            const auto& array_column = static_cast<const ColumnArray&>(*argument_column);
385
            const auto& offset_column_data = array_column.get_offsets();
386
            const auto& nested_nullable_column =
387
                    static_cast<const ColumnNullable&>(array_column.get_data());
388
            const auto& nested_column = nested_nullable_column.get_nested_column();
389
            const auto& nested_null_map = nested_nullable_column.get_null_map_column().get_data();
390
391
            switch (argument_type->get_primitive_type()) {
392
            case PrimitiveType::TYPE_TINYINT:
393
                RETURN_IF_ERROR(Impl::template vector<ColumnInt8>(offset_column_data, nested_column,
394
                                                                  nested_null_map, res, null_map));
395
                break;
396
            case PrimitiveType::TYPE_BOOLEAN:
397
                RETURN_IF_ERROR(Impl::template vector<ColumnUInt8>(
398
                        offset_column_data, nested_column, nested_null_map, res, null_map));
399
                break;
400
            case PrimitiveType::TYPE_SMALLINT:
401
                RETURN_IF_ERROR(Impl::template vector<ColumnInt16>(
402
                        offset_column_data, nested_column, nested_null_map, res, null_map));
403
                break;
404
            case PrimitiveType::TYPE_INT:
405
                RETURN_IF_ERROR(Impl::template vector<ColumnInt32>(
406
                        offset_column_data, nested_column, nested_null_map, res, null_map));
407
                break;
408
            case PrimitiveType::TYPE_BIGINT:
409
                RETURN_IF_ERROR(Impl::template vector<ColumnInt64>(
410
                        offset_column_data, nested_column, nested_null_map, res, null_map));
411
                break;
412
            default:
413
                return Status::RuntimeError("Illegal column {} of argument of function {}",
414
                                            block.get_by_position(arguments[0]).column->get_name(),
415
                                            get_name());
416
            }
417
        } else {
418
            return Status::RuntimeError("Illegal column {} of argument of function {}",
419
                                        block.get_by_position(arguments[0]).column->get_name(),
420
                                        get_name());
421
        }
422
17
        block.get_by_position(result).column =
423
18
                ColumnNullable::create(std::move(res_data_column), std::move(res_null_map));
424
18
        return Status::OK();
425
18
    }
_ZNK5doris24FunctionBitmapAlwaysNullINS_15BitmapFromArrayEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
368
13
                        uint32_t result, size_t input_rows_count) const override {
369
13
        auto res_null_map = ColumnUInt8::create(input_rows_count, 0);
370
13
        auto res_data_column = ColumnBitmap::create();
371
13
        auto& null_map = res_null_map->get_data();
372
13
        auto& res = res_data_column->get_data();
373
374
13
        ColumnPtr& argument_column = block.get_by_position(arguments[0]).column;
375
        if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeString>) {
376
            const auto& str_column = static_cast<const ColumnString&>(*argument_column);
377
            const ColumnString::Chars& data = str_column.get_chars();
378
            const ColumnString::Offsets& offsets = str_column.get_offsets();
379
            RETURN_IF_ERROR(Impl::vector(data, offsets, res, null_map, input_rows_count));
380
13
        } else if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeArray>) {
381
13
            auto argument_type = remove_nullable(
382
13
                    assert_cast<const DataTypeArray&>(*block.get_by_position(arguments[0]).type)
383
13
                            .get_nested_type());
384
13
            const auto& array_column = static_cast<const ColumnArray&>(*argument_column);
385
13
            const auto& offset_column_data = array_column.get_offsets();
386
13
            const auto& nested_nullable_column =
387
13
                    static_cast<const ColumnNullable&>(array_column.get_data());
388
13
            const auto& nested_column = nested_nullable_column.get_nested_column();
389
13
            const auto& nested_null_map = nested_nullable_column.get_null_map_column().get_data();
390
391
13
            switch (argument_type->get_primitive_type()) {
392
3
            case PrimitiveType::TYPE_TINYINT:
393
3
                RETURN_IF_ERROR(Impl::template vector<ColumnInt8>(offset_column_data, nested_column,
394
3
                                                                  nested_null_map, res, null_map));
395
3
                break;
396
3
            case PrimitiveType::TYPE_BOOLEAN:
397
0
                RETURN_IF_ERROR(Impl::template vector<ColumnUInt8>(
398
0
                        offset_column_data, nested_column, nested_null_map, res, null_map));
399
0
                break;
400
0
            case PrimitiveType::TYPE_SMALLINT:
401
0
                RETURN_IF_ERROR(Impl::template vector<ColumnInt16>(
402
0
                        offset_column_data, nested_column, nested_null_map, res, null_map));
403
0
                break;
404
3
            case PrimitiveType::TYPE_INT:
405
3
                RETURN_IF_ERROR(Impl::template vector<ColumnInt32>(
406
3
                        offset_column_data, nested_column, nested_null_map, res, null_map));
407
3
                break;
408
7
            case PrimitiveType::TYPE_BIGINT:
409
7
                RETURN_IF_ERROR(Impl::template vector<ColumnInt64>(
410
7
                        offset_column_data, nested_column, nested_null_map, res, null_map));
411
7
                break;
412
7
            default:
413
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
414
0
                                            block.get_by_position(arguments[0]).column->get_name(),
415
0
                                            get_name());
416
13
            }
417
        } else {
418
            return Status::RuntimeError("Illegal column {} of argument of function {}",
419
                                        block.get_by_position(arguments[0]).column->get_name(),
420
                                        get_name());
421
        }
422
13
        block.get_by_position(result).column =
423
13
                ColumnNullable::create(std::move(res_data_column), std::move(res_null_map));
424
13
        return Status::OK();
425
13
    }
426
};
427
428
template <int HashBits>
429
struct BitmapHashName {};
430
431
template <>
432
struct BitmapHashName<32> {
433
    static constexpr auto name = "bitmap_hash";
434
};
435
436
template <>
437
struct BitmapHashName<64> {
438
    static constexpr auto name = "bitmap_hash64";
439
};
440
441
template <int HashBits>
442
struct BitmapHash {
443
    static constexpr auto name = BitmapHashName<HashBits>::name;
444
445
    using ReturnType = DataTypeBitMap;
446
447
    template <typename ColumnType>
448
151
    static void vector(const ColumnType* col, MutableColumnPtr& col_res) {
449
151
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
450
151
            const ColumnString::Chars& data = col->get_chars();
451
151
            const ColumnString::Offsets& offsets = col->get_offsets();
452
151
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
453
151
            auto& res_data = res_column->get_data();
454
151
            size_t size = offsets.size();
455
456
462
            for (size_t i = 0; i < size; ++i) {
457
311
                const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
458
311
                size_t str_size = offsets[i] - offsets[i - 1];
459
311
                if constexpr (HashBits == 32) {
460
277
                    uint32_t hash_value =
461
277
                            HashUtil::murmur_hash3_32(raw_str, str_size, HashUtil::MURMUR3_32_SEED);
462
277
                    res_data[i].add(hash_value);
463
277
                } else {
464
34
                    uint64_t hash_value = 0;
465
34
                    murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value);
466
34
                    res_data[i].add(hash_value);
467
34
                }
468
311
            }
469
151
        }
470
151
    }
_ZN5doris10BitmapHashILi32EE6vectorINS_9ColumnStrIjEEEEvPKT_RNS_3COWINS_7IColumnEE11mutable_ptrIS9_EE
Line
Count
Source
448
133
    static void vector(const ColumnType* col, MutableColumnPtr& col_res) {
449
133
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
450
133
            const ColumnString::Chars& data = col->get_chars();
451
133
            const ColumnString::Offsets& offsets = col->get_offsets();
452
133
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
453
133
            auto& res_data = res_column->get_data();
454
133
            size_t size = offsets.size();
455
456
410
            for (size_t i = 0; i < size; ++i) {
457
277
                const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
458
277
                size_t str_size = offsets[i] - offsets[i - 1];
459
277
                if constexpr (HashBits == 32) {
460
277
                    uint32_t hash_value =
461
277
                            HashUtil::murmur_hash3_32(raw_str, str_size, HashUtil::MURMUR3_32_SEED);
462
277
                    res_data[i].add(hash_value);
463
                } else {
464
                    uint64_t hash_value = 0;
465
                    murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value);
466
                    res_data[i].add(hash_value);
467
                }
468
277
            }
469
133
        }
470
133
    }
Unexecuted instantiation: _ZN5doris10BitmapHashILi32EE6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEEvPKT_RNS_3COWINS_7IColumnEE11mutable_ptrISA_EE
_ZN5doris10BitmapHashILi64EE6vectorINS_9ColumnStrIjEEEEvPKT_RNS_3COWINS_7IColumnEE11mutable_ptrIS9_EE
Line
Count
Source
448
18
    static void vector(const ColumnType* col, MutableColumnPtr& col_res) {
449
18
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
450
18
            const ColumnString::Chars& data = col->get_chars();
451
18
            const ColumnString::Offsets& offsets = col->get_offsets();
452
18
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
453
18
            auto& res_data = res_column->get_data();
454
18
            size_t size = offsets.size();
455
456
52
            for (size_t i = 0; i < size; ++i) {
457
34
                const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
458
34
                size_t str_size = offsets[i] - offsets[i - 1];
459
                if constexpr (HashBits == 32) {
460
                    uint32_t hash_value =
461
                            HashUtil::murmur_hash3_32(raw_str, str_size, HashUtil::MURMUR3_32_SEED);
462
                    res_data[i].add(hash_value);
463
34
                } else {
464
34
                    uint64_t hash_value = 0;
465
34
                    murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value);
466
34
                    res_data[i].add(hash_value);
467
34
                }
468
34
            }
469
18
        }
470
18
    }
Unexecuted instantiation: _ZN5doris10BitmapHashILi64EE6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEEvPKT_RNS_3COWINS_7IColumnEE11mutable_ptrISA_EE
471
472
    template <typename ColumnType>
473
    static void vector_nullable(const ColumnType* col, const NullMap& nullmap,
474
285
                                MutableColumnPtr& col_res) {
475
285
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
476
285
            const ColumnString::Chars& data = col->get_chars();
477
285
            const ColumnString::Offsets& offsets = col->get_offsets();
478
285
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
479
285
            auto& res_data = res_column->get_data();
480
285
            size_t size = offsets.size();
481
482
1.09k
            for (size_t i = 0; i < size; ++i) {
483
806
                if (nullmap[i]) {
484
59
                    continue;
485
747
                } else {
486
747
                    const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
487
747
                    size_t str_size = offsets[i] - offsets[i - 1];
488
747
                    if constexpr (HashBits == 32) {
489
721
                        uint32_t hash_value = HashUtil::murmur_hash3_32(raw_str, str_size,
490
721
                                                                        HashUtil::MURMUR3_32_SEED);
491
721
                        res_data[i].add(hash_value);
492
721
                    } else {
493
26
                        uint64_t hash_value = 0;
494
26
                        murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value);
495
26
                        res_data[i].add(hash_value);
496
26
                    }
497
747
                }
498
806
            }
499
285
        }
500
285
    }
_ZN5doris10BitmapHashILi32EE15vector_nullableINS_9ColumnStrIjEEEEvPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE
Line
Count
Source
474
275
                                MutableColumnPtr& col_res) {
475
275
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
476
275
            const ColumnString::Chars& data = col->get_chars();
477
275
            const ColumnString::Offsets& offsets = col->get_offsets();
478
275
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
479
275
            auto& res_data = res_column->get_data();
480
275
            size_t size = offsets.size();
481
482
1.05k
            for (size_t i = 0; i < size; ++i) {
483
778
                if (nullmap[i]) {
484
57
                    continue;
485
721
                } else {
486
721
                    const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
487
721
                    size_t str_size = offsets[i] - offsets[i - 1];
488
721
                    if constexpr (HashBits == 32) {
489
721
                        uint32_t hash_value = HashUtil::murmur_hash3_32(raw_str, str_size,
490
721
                                                                        HashUtil::MURMUR3_32_SEED);
491
721
                        res_data[i].add(hash_value);
492
                    } else {
493
                        uint64_t hash_value = 0;
494
                        murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value);
495
                        res_data[i].add(hash_value);
496
                    }
497
721
                }
498
778
            }
499
275
        }
500
275
    }
Unexecuted instantiation: _ZN5doris10BitmapHashILi32EE15vector_nullableINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEEvPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISH_EE
_ZN5doris10BitmapHashILi64EE15vector_nullableINS_9ColumnStrIjEEEEvPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE
Line
Count
Source
474
10
                                MutableColumnPtr& col_res) {
475
10
        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
476
10
            const ColumnString::Chars& data = col->get_chars();
477
10
            const ColumnString::Offsets& offsets = col->get_offsets();
478
10
            auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
479
10
            auto& res_data = res_column->get_data();
480
10
            size_t size = offsets.size();
481
482
38
            for (size_t i = 0; i < size; ++i) {
483
28
                if (nullmap[i]) {
484
2
                    continue;
485
26
                } else {
486
26
                    const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
487
26
                    size_t str_size = offsets[i] - offsets[i - 1];
488
                    if constexpr (HashBits == 32) {
489
                        uint32_t hash_value = HashUtil::murmur_hash3_32(raw_str, str_size,
490
                                                                        HashUtil::MURMUR3_32_SEED);
491
                        res_data[i].add(hash_value);
492
26
                    } else {
493
26
                        uint64_t hash_value = 0;
494
26
                        murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value);
495
26
                        res_data[i].add(hash_value);
496
26
                    }
497
26
                }
498
28
            }
499
10
        }
500
10
    }
Unexecuted instantiation: _ZN5doris10BitmapHashILi64EE15vector_nullableINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEEvPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISH_EE
501
};
502
503
class FunctionBitmapCount : public IFunction {
504
public:
505
    static constexpr auto name = "bitmap_count";
506
507
1
    String get_name() const override { return name; }
508
509
108
    static FunctionPtr create() { return std::make_shared<FunctionBitmapCount>(); }
510
511
99
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
512
99
        return std::make_shared<DataTypeInt64>();
513
99
    }
514
515
99
    size_t get_number_of_arguments() const override { return 1; }
516
517
249
    bool use_default_implementation_for_nulls() const override { return false; }
518
519
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
520
150
                        uint32_t result, size_t input_rows_count) const override {
521
150
        auto res_data_column = ColumnInt64::create();
522
150
        auto& res = res_data_column->get_data();
523
150
        auto data_null_map = ColumnUInt8::create(input_rows_count, 0);
524
150
        auto& null_map = data_null_map->get_data();
525
526
150
        auto column = block.get_by_position(arguments[0]).column;
527
150
        if (auto* nullable = check_and_get_column<const ColumnNullable>(*column)) {
528
31
            VectorizedUtils::update_null_map(null_map, nullable->get_null_map_data());
529
31
            column = nullable->get_nested_column_ptr();
530
31
        }
531
150
        auto str_col = assert_cast<const ColumnBitmap*>(column.get());
532
150
        const auto& col_data = str_col->get_data();
533
534
150
        res.reserve(input_rows_count);
535
389
        for (size_t i = 0; i < input_rows_count; ++i) {
536
239
            if (null_map[i]) {
537
6
                res.push_back(0);
538
6
                continue;
539
6
            }
540
233
            res.push_back(col_data[i].cardinality());
541
233
        }
542
150
        block.replace_by_position(result, std::move(res_data_column));
543
150
        return Status::OK();
544
150
    }
545
};
546
547
struct NameBitmapNot {
548
    static constexpr auto name = "bitmap_not";
549
};
550
551
template <typename LeftDataType, typename RightDataType>
552
struct BitmapNot {
553
    using ResultDataType = DataTypeBitMap;
554
    using T0 = typename LeftDataType::FieldType;
555
    using T1 = typename RightDataType::FieldType;
556
    using TData = std::vector<BitmapValue>;
557
558
12
    static void vector_vector(const TData& lvec, const TData& rvec, TData& res) {
559
12
        size_t size = lvec.size();
560
41
        for (size_t i = 0; i < size; ++i) {
561
29
            res[i] = lvec[i];
562
29
            res[i] -= rvec[i];
563
29
        }
564
12
    }
565
0
    static void vector_scalar(const TData& lvec, const BitmapValue& rval, TData& res) {
566
0
        size_t size = lvec.size();
567
0
        for (size_t i = 0; i < size; ++i) {
568
0
            res[i] = lvec[i];
569
0
            res[i] -= rval;
570
0
        }
571
0
    }
572
0
    static void scalar_vector(const BitmapValue& lval, const TData& rvec, TData& res) {
573
0
        size_t size = rvec.size();
574
0
        for (size_t i = 0; i < size; ++i) {
575
0
            res[i] = lval;
576
0
            res[i] -= rvec[i];
577
0
        }
578
0
    }
579
};
580
581
struct NameBitmapAndNot {
582
    static constexpr auto name = "bitmap_and_not";
583
};
584
585
template <typename LeftDataType, typename RightDataType>
586
struct BitmapAndNot {
587
    using ResultDataType = DataTypeBitMap;
588
    using T0 = typename LeftDataType::FieldType;
589
    using T1 = typename RightDataType::FieldType;
590
    using TData = std::vector<BitmapValue>;
591
592
19
    static void vector_vector(const TData& lvec, const TData& rvec, TData& res) {
593
19
        size_t size = lvec.size();
594
19
        BitmapValue mid_data;
595
75
        for (size_t i = 0; i < size; ++i) {
596
56
            mid_data = lvec[i];
597
56
            mid_data &= rvec[i];
598
56
            res[i] = lvec[i];
599
56
            res[i] -= mid_data;
600
56
            mid_data.reset();
601
56
        }
602
19
    }
603
0
    static void vector_scalar(const TData& lvec, const BitmapValue& rval, TData& res) {
604
0
        size_t size = lvec.size();
605
0
        BitmapValue mid_data;
606
0
        for (size_t i = 0; i < size; ++i) {
607
0
            mid_data = lvec[i];
608
0
            mid_data &= rval;
609
0
            res[i] = lvec[i];
610
0
            res[i] -= mid_data;
611
0
            mid_data.reset();
612
0
        }
613
0
    }
614
0
    static void scalar_vector(const BitmapValue& lval, const TData& rvec, TData& res) {
615
0
        size_t size = rvec.size();
616
0
        BitmapValue mid_data;
617
0
        for (size_t i = 0; i < size; ++i) {
618
0
            mid_data = lval;
619
0
            mid_data &= rvec[i];
620
0
            res[i] = lval;
621
0
            res[i] -= mid_data;
622
0
            mid_data.reset();
623
0
        }
624
0
    }
625
};
626
627
struct NameBitmapAndNotCount {
628
    static constexpr auto name = "bitmap_and_not_count";
629
};
630
631
template <typename LeftDataType, typename RightDataType>
632
struct BitmapAndNotCount {
633
    using ResultDataType = DataTypeInt64;
634
    using T0 = typename LeftDataType::FieldType;
635
    using T1 = typename RightDataType::FieldType;
636
    using TData = std::vector<BitmapValue>;
637
    using ResTData = typename ColumnInt64::Container::value_type;
638
639
38
    static void vector_vector(const TData& lvec, const TData& rvec, ResTData* res) {
640
38
        size_t size = lvec.size();
641
38
        BitmapValue mid_data;
642
132
        for (size_t i = 0; i < size; ++i) {
643
94
            mid_data = lvec[i];
644
94
            mid_data &= rvec[i];
645
94
            res[i] = lvec[i].andnot_cardinality(mid_data);
646
94
            mid_data.reset();
647
94
        }
648
38
    }
649
0
    static void scalar_vector(const BitmapValue& lval, const TData& rvec, ResTData* res) {
650
0
        size_t size = rvec.size();
651
0
        BitmapValue mid_data;
652
0
        for (size_t i = 0; i < size; ++i) {
653
0
            mid_data = lval;
654
0
            mid_data &= rvec[i];
655
0
            res[i] = lval.andnot_cardinality(mid_data);
656
0
            mid_data.reset();
657
0
        }
658
0
    }
659
0
    static void vector_scalar(const TData& lvec, const BitmapValue& rval, ResTData* res) {
660
0
        size_t size = lvec.size();
661
0
        BitmapValue mid_data;
662
0
        for (size_t i = 0; i < size; ++i) {
663
0
            mid_data = lvec[i];
664
0
            mid_data &= rval;
665
0
            res[i] = lvec[i].andnot_cardinality(mid_data);
666
0
            mid_data.reset();
667
0
        }
668
0
    }
669
};
670
671
110
void update_bitmap_op_count(int64_t* __restrict count, const NullMap& null_map) {
672
110
    static constexpr int64_t flags[2] = {-1, 0};
673
110
    size_t size = null_map.size();
674
110
    auto* __restrict null_map_data = null_map.data();
675
284
    for (size_t i = 0; i < size; ++i) {
676
174
        count[i] &= flags[null_map_data[i]];
677
174
    }
678
110
}
679
680
// for bitmap_and_count, bitmap_xor_count and bitmap_and_not_count,
681
// result is 0 for rows that if any column is null value
682
ColumnPtr handle_bitmap_op_count_null_value(ColumnPtr& src, const Block& block,
683
                                            const ColumnNumbers& args, uint32_t result,
684
60
                                            size_t input_rows_count) {
685
60
    auto* nullable = assert_cast<const ColumnNullable*>(src.get());
686
60
    ColumnPtr src_not_nullable = nullable->get_nested_column_ptr();
687
60
    MutableColumnPtr src_not_nullable_mutable = (*std::move(src_not_nullable)).assume_mutable();
688
60
    auto* __restrict count_data =
689
60
            assert_cast<ColumnInt64*>(src_not_nullable_mutable.get())->get_data().data();
690
691
142
    for (const auto& arg : args) {
692
142
        const ColumnWithTypeAndName& elem = block.get_by_position(arg);
693
142
        if (!elem.type->is_nullable()) {
694
32
            continue;
695
32
        }
696
697
110
        bool is_const = is_column_const(*elem.column);
698
        /// Const Nullable that are NULL.
699
110
        if (is_const && assert_cast<const ColumnConst*>(elem.column.get())->only_null()) {
700
0
            return block.get_by_position(result).type->create_column_const(
701
0
                    input_rows_count, Field::create_field<TYPE_BIGINT>(0));
702
0
        }
703
110
        if (is_const) {
704
0
            continue;
705
0
        }
706
707
110
        if (const auto* nullable_column = assert_cast<const ColumnNullable*>(elem.column.get())) {
708
110
            const ColumnPtr& null_map_column = nullable_column->get_null_map_column_ptr();
709
110
            const NullMap& src_null_map =
710
110
                    assert_cast<const ColumnUInt8&>(*null_map_column).get_data();
711
712
110
            update_bitmap_op_count(count_data, src_null_map);
713
110
        }
714
110
    }
715
716
60
    return src;
717
60
}
718
719
Status execute_bitmap_op_count_null_to_zero(
720
        FunctionContext* context, Block& block, const ColumnNumbers& arguments, uint32_t result,
721
        size_t input_rows_count,
722
        const std::function<Status(FunctionContext*, Block&, const ColumnNumbers&, size_t, size_t)>&
723
110
                exec_impl_func) {
724
110
    if (have_null_column(block, arguments)) {
725
60
        auto [temporary_block, new_args, new_result] =
726
60
                create_block_with_nested_columns(block, arguments, result);
727
60
        RETURN_IF_ERROR(exec_impl_func(context, temporary_block, new_args, new_result,
728
60
                                       temporary_block.rows()));
729
60
        block.get_by_position(result).column = handle_bitmap_op_count_null_value(
730
60
                temporary_block.get_by_position(new_result).column, block, arguments, result,
731
60
                input_rows_count);
732
60
    } else {
733
50
        return exec_impl_func(context, block, arguments, result, input_rows_count);
734
50
    }
735
60
    return Status::OK();
736
110
}
737
738
template <typename FunctionName>
739
class FunctionBitmapAndNotCount : public IFunction {
740
public:
741
    using LeftDataType = DataTypeBitMap;
742
    using RightDataType = DataTypeBitMap;
743
    using ResultDataType = typename BitmapAndNotCount<LeftDataType, RightDataType>::ResultDataType;
744
745
    static constexpr auto name = FunctionName::name;
746
30
    static FunctionPtr create() { return std::make_shared<FunctionBitmapAndNotCount>(); }
747
1
    String get_name() const override { return name; }
748
21
    size_t get_number_of_arguments() const override { return 2; }
749
21
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
750
21
        bool return_nullable = false;
751
        // result is nullable only when any columns is nullable for bitmap_and_not_count
752
46
        for (size_t i = 0; i < arguments.size(); ++i) {
753
36
            if (arguments[i]->is_nullable()) {
754
11
                return_nullable = true;
755
11
                break;
756
11
            }
757
36
        }
758
21
        auto result_type = std::make_shared<ResultDataType>();
759
21
        return return_nullable ? make_nullable(result_type) : result_type;
760
21
    }
761
762
59
    bool use_default_implementation_for_nulls() const override {
763
        // for bitmap_and_not_count, result is always not null, and if the bitmap op result is null,
764
        // the count is 0
765
59
        return false;
766
59
    }
767
768
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
769
38
                        uint32_t result, size_t input_rows_count) const override {
770
38
        DCHECK_EQ(arguments.size(), 2);
771
38
        auto impl_func = [&](FunctionContext* context, Block& block, const ColumnNumbers& arguments,
772
38
                             uint32_t result, size_t input_rows_count) {
773
38
            return execute_impl_internal(context, block, arguments, result, input_rows_count);
774
38
        };
775
38
        return execute_bitmap_op_count_null_to_zero(context, block, arguments, result,
776
38
                                                    input_rows_count, impl_func);
777
38
    }
778
779
    Status execute_impl_internal(FunctionContext* context, Block& block,
780
                                 const ColumnNumbers& arguments, uint32_t result,
781
38
                                 size_t input_rows_count) const {
782
38
        using ColVecResult = ColumnVector<ResultDataType::PType>;
783
784
38
        typename ColVecResult::MutablePtr col_res = ColVecResult::create();
785
38
        auto& vec_res = col_res->get_data();
786
38
        vec_res.resize(block.rows());
787
788
38
        const auto& left = block.get_by_position(arguments[0]);
789
38
        auto lcol = left.column;
790
38
        const auto& right = block.get_by_position(arguments[1]);
791
38
        auto rcol = right.column;
792
793
38
        if (is_column_const(*left.column)) {
794
0
            BitmapAndNotCount<LeftDataType, RightDataType>::scalar_vector(
795
0
                    assert_cast<const ColumnBitmap&>(
796
0
                            assert_cast<const ColumnConst*>(lcol.get())->get_data_column())
797
0
                            .get_data()[0],
798
0
                    assert_cast<const ColumnBitmap*>(rcol.get())->get_data(), vec_res.data());
799
38
        } else if (is_column_const(*right.column)) {
800
0
            BitmapAndNotCount<LeftDataType, RightDataType>::vector_scalar(
801
0
                    assert_cast<const ColumnBitmap*>(lcol.get())->get_data(),
802
0
                    assert_cast<const ColumnBitmap&>(
803
0
                            assert_cast<const ColumnConst*>(rcol.get())->get_data_column())
804
0
                            .get_data()[0],
805
0
                    vec_res.data());
806
38
        } else {
807
38
            BitmapAndNotCount<LeftDataType, RightDataType>::vector_vector(
808
38
                    assert_cast<const ColumnBitmap*>(lcol.get())->get_data(),
809
38
                    assert_cast<const ColumnBitmap*>(rcol.get())->get_data(), vec_res.data());
810
38
        }
811
812
38
        auto& result_info = block.get_by_position(result);
813
38
        if (result_info.type->is_nullable()) {
814
16
            block.replace_by_position(
815
16
                    result, ColumnNullable::create(std::move(col_res),
816
16
                                                   ColumnUInt8::create(input_rows_count, 0)));
817
22
        } else {
818
22
            block.replace_by_position(result, std::move(col_res));
819
22
        }
820
38
        return Status::OK();
821
38
    }
822
};
823
824
struct NameBitmapContains {
825
    static constexpr auto name = "bitmap_contains";
826
};
827
828
template <typename LeftDataType, typename RightDataType>
829
struct BitmapContains {
830
    using ResultDataType = DataTypeUInt8;
831
    using T0 = typename LeftDataType::FieldType;
832
    using T1 = typename RightDataType::FieldType;
833
    using LTData = std::vector<BitmapValue>;
834
    using RTData = typename ColumnVector<RightDataType::PType>::Container;
835
    using ResTData = typename ColumnUInt8::Container;
836
837
33
    static void vector_vector(const LTData& lvec, const RTData& rvec, ResTData& res) {
838
33
        size_t size = lvec.size();
839
85
        for (size_t i = 0; i < size; ++i) {
840
52
            res[i] = lvec[i].contains(rvec[i]);
841
52
        }
842
33
    }
843
7
    static void vector_scalar(const LTData& lvec, const T1& rval, ResTData& res) {
844
7
        size_t size = lvec.size();
845
14
        for (size_t i = 0; i < size; ++i) {
846
7
            res[i] = lvec[i].contains(rval);
847
7
        }
848
7
    }
849
0
    static void scalar_vector(const BitmapValue& lval, const RTData& rvec, ResTData& res) {
850
0
        size_t size = rvec.size();
851
0
        for (size_t i = 0; i < size; ++i) {
852
0
            res[i] = lval.contains(rvec[i]);
853
0
        }
854
0
    }
855
};
856
857
struct NameBitmapRemove {
858
    static constexpr auto name = "bitmap_remove";
859
};
860
861
template <typename LeftDataType, typename RightDataType>
862
struct BitmapRemove {
863
    using ResultDataType = DataTypeBitMap;
864
    using T0 = typename LeftDataType::FieldType;
865
    using T1 = typename RightDataType::FieldType;
866
    using LTData = std::vector<BitmapValue>;
867
    using RTData = typename ColumnVector<RightDataType::PType>::Container;
868
    using ResTData = std::vector<BitmapValue>;
869
870
2
    static void vector_vector(const LTData& lvec, const RTData& rvec, ResTData& res) {
871
2
        size_t size = lvec.size();
872
6
        for (size_t i = 0; i < size; ++i) {
873
4
            res[i] = lvec[i];
874
4
            res[i].remove(rvec[i]);
875
4
        }
876
2
    }
877
12
    static void vector_scalar(const LTData& lvec, const T1& rval, ResTData& res) {
878
12
        size_t size = lvec.size();
879
45
        for (size_t i = 0; i < size; ++i) {
880
33
            res[i] = lvec[i];
881
33
            res[i].remove(rval);
882
33
        }
883
12
    }
884
0
    static void scalar_vector(const BitmapValue& lval, const RTData& rvec, ResTData& res) {
885
0
        size_t size = rvec.size();
886
0
        for (size_t i = 0; i < size; ++i) {
887
0
            res[i] = lval;
888
0
            res[i].remove(rvec[i]);
889
0
        }
890
0
    }
891
};
892
893
struct NameBitmapHasAny {
894
    static constexpr auto name = "bitmap_has_any";
895
};
896
897
template <typename LeftDataType, typename RightDataType>
898
struct BitmapHasAny {
899
    using ResultDataType = DataTypeUInt8;
900
    using T0 = typename LeftDataType::FieldType;
901
    using T1 = typename RightDataType::FieldType;
902
    using TData = std::vector<BitmapValue>;
903
    using ResTData = typename ColumnUInt8::Container;
904
905
12
    static void vector_vector(const TData& lvec, const TData& rvec, ResTData& res) {
906
12
        size_t size = lvec.size();
907
41
        for (size_t i = 0; i < size; ++i) {
908
29
            auto bitmap = lvec[i];
909
29
            bitmap &= rvec[i];
910
29
            res[i] = bitmap.cardinality() != 0;
911
29
        }
912
12
    }
913
0
    static void vector_scalar(const TData& lvec, const BitmapValue& rval, ResTData& res) {
914
0
        size_t size = lvec.size();
915
0
        for (size_t i = 0; i < size; ++i) {
916
0
            auto bitmap = lvec[i];
917
0
            bitmap &= rval;
918
0
            res[i] = bitmap.cardinality() != 0;
919
0
        }
920
0
    }
921
0
    static void scalar_vector(const BitmapValue& lval, const TData& rvec, ResTData& res) {
922
0
        size_t size = rvec.size();
923
0
        for (size_t i = 0; i < size; ++i) {
924
0
            auto bitmap = lval;
925
0
            bitmap &= rvec[i];
926
0
            res[i] = bitmap.cardinality() != 0;
927
0
        }
928
0
    }
929
};
930
931
struct NameBitmapHasAll {
932
    static constexpr auto name = "bitmap_has_all";
933
};
934
935
template <typename LeftDataType, typename RightDataType>
936
struct BitmapHasAll {
937
    using ResultDataType = DataTypeUInt8;
938
    using T0 = typename LeftDataType::FieldType;
939
    using T1 = typename RightDataType::FieldType;
940
    using TData = std::vector<BitmapValue>;
941
    using ResTData = typename ColumnUInt8::Container;
942
943
13
    static void vector_vector(const TData& lvec, const TData& rvec, ResTData& res) {
944
13
        size_t size = lvec.size();
945
47
        for (size_t i = 0; i < size; ++i) {
946
34
            uint64_t lhs_cardinality = lvec[i].cardinality();
947
34
            auto bitmap = lvec[i];
948
34
            bitmap |= rvec[i];
949
34
            res[i] = bitmap.cardinality() == lhs_cardinality;
950
34
        }
951
13
    }
952
0
    static void vector_scalar(const TData& lvec, const BitmapValue& rval, ResTData& res) {
953
0
        size_t size = lvec.size();
954
0
        for (size_t i = 0; i < size; ++i) {
955
0
            uint64_t lhs_cardinality = lvec[i].cardinality();
956
0
            auto bitmap = lvec[i];
957
0
            bitmap |= rval;
958
0
            res[i] = bitmap.cardinality() == lhs_cardinality;
959
0
        }
960
0
    }
961
0
    static void scalar_vector(const BitmapValue& lval, const TData& rvec, ResTData& res) {
962
0
        size_t size = rvec.size();
963
0
        uint64_t lhs_cardinality = lval.cardinality();
964
0
        for (size_t i = 0; i < size; ++i) {
965
0
            auto bitmap = lval;
966
0
            bitmap |= rvec[i];
967
0
            res[i] = bitmap.cardinality() == lhs_cardinality;
968
0
        }
969
0
    }
970
};
971
972
struct NameBitmapToString {
973
    static constexpr auto name = "bitmap_to_string";
974
};
975
976
struct BitmapToString {
977
    using ReturnType = DataTypeString;
978
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_BITMAP;
979
    using Type = DataTypeBitMap::FieldType;
980
    using ReturnColumnType = ColumnString;
981
    using Chars = ColumnString::Chars;
982
    using Offsets = ColumnString::Offsets;
983
984
266
    static Status vector(const std::vector<BitmapValue>& data, Chars& chars, Offsets& offsets) {
985
266
        size_t size = data.size();
986
266
        offsets.resize(size);
987
266
        chars.reserve(size);
988
1.26k
        for (size_t i = 0; i < size; ++i) {
989
999
            StringOP::push_value_string(data[i].to_string(), i, chars, offsets);
990
999
        }
991
266
        return Status::OK();
992
266
    }
993
};
994
995
struct NameBitmapToBase64 {
996
    static constexpr auto name = "bitmap_to_base64";
997
};
998
999
struct BitmapToBase64 {
1000
    using ReturnType = DataTypeString;
1001
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_BITMAP;
1002
    using Type = DataTypeBitMap::FieldType;
1003
    using ReturnColumnType = ColumnString;
1004
    using Chars = ColumnString::Chars;
1005
    using Offsets = ColumnString::Offsets;
1006
1007
    // ColumnString not support 64bit, only 32bit, so that the max size is 4G
1008
15
    static Status vector(const std::vector<BitmapValue>& data, Chars& chars, Offsets& offsets) {
1009
15
        size_t size = data.size();
1010
15
        offsets.resize(size);
1011
15
        size_t output_char_size = 0;
1012
52
        for (size_t i = 0; i < size; ++i) {
1013
37
            const BitmapValue& bitmap_val = data[i];
1014
37
            auto ser_size = bitmap_val.getSizeInBytes();
1015
37
            output_char_size += (int)(4.0 * ceil((double)ser_size / 3.0));
1016
37
        }
1017
15
        ColumnString::check_chars_length(output_char_size, size);
1018
15
        chars.resize(output_char_size);
1019
15
        auto chars_data = chars.data();
1020
1021
15
        size_t cur_ser_size = 0;
1022
15
        size_t last_ser_size = 0;
1023
15
        std::string ser_buff;
1024
15
        size_t encoded_offset = 0;
1025
52
        for (size_t i = 0; i < size; ++i) {
1026
37
            const BitmapValue& bitmap_val = data[i];
1027
37
            cur_ser_size = bitmap_val.getSizeInBytes();
1028
37
            if (cur_ser_size > last_ser_size) {
1029
25
                last_ser_size = cur_ser_size;
1030
25
                ser_buff.resize(cur_ser_size);
1031
25
            }
1032
37
            bitmap_val.write_to(ser_buff.data());
1033
1034
37
            auto outlen = base64_encode((const unsigned char*)ser_buff.data(), cur_ser_size,
1035
37
                                        chars_data + encoded_offset);
1036
37
            DCHECK(outlen > 0);
1037
1038
37
            encoded_offset += (int)(4.0 * ceil((double)cur_ser_size / 3.0));
1039
37
            offsets[i] = cast_set<UInt32>(encoded_offset);
1040
37
        }
1041
15
        return Status::OK();
1042
15
    }
1043
};
1044
1045
struct SubBitmap {
1046
    static constexpr auto name = "sub_bitmap";
1047
    using TData1 = std::vector<BitmapValue>;
1048
    using TData2 = typename ColumnInt64::Container;
1049
1050
    static void vector3(const TData1& bitmap_data, const TData2& offset_data,
1051
                        const TData2& limit_data, NullMap& null_map, size_t input_rows_count,
1052
18
                        TData1& res) {
1053
53
        for (int i = 0; i < input_rows_count; ++i) {
1054
35
            if (null_map[i]) {
1055
0
                continue;
1056
0
            }
1057
35
            if (limit_data[i] <= 0) {
1058
1
                null_map[i] = 1;
1059
1
                continue;
1060
1
            }
1061
34
            if (bitmap_data[i].offset_limit(offset_data[i], limit_data[i], &res[i]) == 0) {
1062
24
                null_map[i] = 1;
1063
24
            }
1064
34
        }
1065
18
    }
1066
    static void vector_scalars(const TData1& bitmap_data, const Int64& offset_data,
1067
                               const Int64& limit_data, NullMap& null_map, size_t input_rows_count,
1068
0
                               TData1& res) {
1069
0
        for (int i = 0; i < input_rows_count; ++i) {
1070
0
            if (null_map[i]) {
1071
0
                continue;
1072
0
            }
1073
0
            if (limit_data <= 0) {
1074
0
                null_map[i] = 1;
1075
0
                continue;
1076
0
            }
1077
0
            if (bitmap_data[i].offset_limit(offset_data, limit_data, &res[i]) == 0) {
1078
0
                null_map[i] = 1;
1079
0
            }
1080
0
        }
1081
0
    }
1082
};
1083
1084
struct BitmapSubsetLimit {
1085
    static constexpr auto name = "bitmap_subset_limit";
1086
    using TData1 = std::vector<BitmapValue>;
1087
    using TData2 = typename ColumnInt64::Container;
1088
1089
    static void vector3(const TData1& bitmap_data, const TData2& offset_data,
1090
                        const TData2& limit_data, NullMap& null_map, size_t input_rows_count,
1091
14
                        TData1& res) {
1092
45
        for (int i = 0; i < input_rows_count; ++i) {
1093
31
            if (null_map[i]) {
1094
0
                continue;
1095
0
            }
1096
31
            if (offset_data[i] < 0 || limit_data[i] < 0) {
1097
0
                null_map[i] = 1;
1098
0
                continue;
1099
0
            }
1100
31
            bitmap_data[i].sub_limit(offset_data[i], limit_data[i], &res[i]);
1101
31
        }
1102
14
    }
1103
    static void vector_scalars(const TData1& bitmap_data, const Int64& offset_data,
1104
                               const Int64& limit_data, NullMap& null_map, size_t input_rows_count,
1105
0
                               TData1& res) {
1106
0
        for (int i = 0; i < input_rows_count; ++i) {
1107
0
            if (null_map[i]) {
1108
0
                continue;
1109
0
            }
1110
0
            if (offset_data < 0 || limit_data < 0) {
1111
0
                null_map[i] = 1;
1112
0
                continue;
1113
0
            }
1114
0
            bitmap_data[i].sub_limit(offset_data, limit_data, &res[i]);
1115
0
        }
1116
0
    }
1117
};
1118
1119
struct BitmapSubsetInRange {
1120
    static constexpr auto name = "bitmap_subset_in_range";
1121
    using TData1 = std::vector<BitmapValue>;
1122
    using TData2 = typename ColumnInt64::Container;
1123
1124
    static void vector3(const TData1& bitmap_data, const TData2& range_start,
1125
                        const TData2& range_end, NullMap& null_map, size_t input_rows_count,
1126
14
                        TData1& res) {
1127
45
        for (int i = 0; i < input_rows_count; ++i) {
1128
31
            if (null_map[i]) {
1129
0
                continue;
1130
0
            }
1131
31
            if (range_start[i] >= range_end[i] || range_start[i] < 0 || range_end[i] < 0) {
1132
25
                null_map[i] = 1;
1133
25
                continue;
1134
25
            }
1135
6
            bitmap_data[i].sub_range(range_start[i], range_end[i], &res[i]);
1136
6
        }
1137
14
    }
1138
    static void vector_scalars(const TData1& bitmap_data, const Int64& range_start,
1139
                               const Int64& range_end, NullMap& null_map, size_t input_rows_count,
1140
0
                               TData1& res) {
1141
0
        for (int i = 0; i < input_rows_count; ++i) {
1142
0
            if (null_map[i]) {
1143
0
                continue;
1144
0
            }
1145
0
            if (range_start >= range_end || range_start < 0 || range_end < 0) {
1146
0
                null_map[i] = 1;
1147
0
                continue;
1148
0
            }
1149
0
            bitmap_data[i].sub_range(range_start, range_end, &res[i]);
1150
0
        }
1151
0
    }
1152
};
1153
1154
template <typename Impl>
1155
class FunctionBitmapSubs : public IFunction {
1156
public:
1157
    static constexpr auto name = Impl::name;
1158
3
    String get_name() const override { return name; }
_ZNK5doris18FunctionBitmapSubsINS_9SubBitmapEE8get_nameB5cxx11Ev
Line
Count
Source
1158
1
    String get_name() const override { return name; }
_ZNK5doris18FunctionBitmapSubsINS_17BitmapSubsetLimitEE8get_nameB5cxx11Ev
Line
Count
Source
1158
1
    String get_name() const override { return name; }
_ZNK5doris18FunctionBitmapSubsINS_19BitmapSubsetInRangeEE8get_nameB5cxx11Ev
Line
Count
Source
1158
1
    String get_name() const override { return name; }
1159
1160
55
    static FunctionPtr create() { return std::make_shared<FunctionBitmapSubs>(); }
_ZN5doris18FunctionBitmapSubsINS_9SubBitmapEE6createEv
Line
Count
Source
1160
21
    static FunctionPtr create() { return std::make_shared<FunctionBitmapSubs>(); }
_ZN5doris18FunctionBitmapSubsINS_17BitmapSubsetLimitEE6createEv
Line
Count
Source
1160
17
    static FunctionPtr create() { return std::make_shared<FunctionBitmapSubs>(); }
_ZN5doris18FunctionBitmapSubsINS_19BitmapSubsetInRangeEE6createEv
Line
Count
Source
1160
17
    static FunctionPtr create() { return std::make_shared<FunctionBitmapSubs>(); }
1161
1162
28
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1163
28
        return make_nullable(std::make_shared<DataTypeBitMap>());
1164
28
    }
_ZNK5doris18FunctionBitmapSubsINS_9SubBitmapEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1162
12
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1163
12
        return make_nullable(std::make_shared<DataTypeBitMap>());
1164
12
    }
_ZNK5doris18FunctionBitmapSubsINS_17BitmapSubsetLimitEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1162
8
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1163
8
        return make_nullable(std::make_shared<DataTypeBitMap>());
1164
8
    }
_ZNK5doris18FunctionBitmapSubsINS_19BitmapSubsetInRangeEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1162
8
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1163
8
        return make_nullable(std::make_shared<DataTypeBitMap>());
1164
8
    }
1165
1166
28
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris18FunctionBitmapSubsINS_9SubBitmapEE23get_number_of_argumentsEv
Line
Count
Source
1166
12
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris18FunctionBitmapSubsINS_17BitmapSubsetLimitEE23get_number_of_argumentsEv
Line
Count
Source
1166
8
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris18FunctionBitmapSubsINS_19BitmapSubsetInRangeEE23get_number_of_argumentsEv
Line
Count
Source
1166
8
    size_t get_number_of_arguments() const override { return 3; }
1167
1168
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1169
46
                        uint32_t result, size_t input_rows_count) const override {
1170
46
        DCHECK_EQ(arguments.size(), 3);
1171
46
        auto res_null_map = ColumnUInt8::create(input_rows_count, 0);
1172
46
        auto res_data_column = ColumnBitmap::create(input_rows_count);
1173
1174
46
        bool col_const[3];
1175
46
        ColumnPtr argument_columns[3];
1176
184
        for (int i = 0; i < 3; ++i) {
1177
138
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
1178
138
        }
1179
46
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
1180
0
                                                     *block.get_by_position(arguments[0]).column)
1181
0
                                                     .convert_to_full_column()
1182
46
                                           : block.get_by_position(arguments[0]).column;
1183
1184
46
        default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments);
1185
1186
46
        auto bitmap_column = assert_cast<const ColumnBitmap*>(argument_columns[0].get());
1187
46
        auto offset_column = assert_cast<const ColumnInt64*>(argument_columns[1].get());
1188
46
        auto limit_column = assert_cast<const ColumnInt64*>(argument_columns[2].get());
1189
1190
46
        if (col_const[1] && col_const[2]) {
1191
0
            Impl::vector_scalars(bitmap_column->get_data(), offset_column->get_element(0),
1192
0
                                 limit_column->get_element(0), res_null_map->get_data(),
1193
0
                                 input_rows_count, res_data_column->get_data());
1194
46
        } else {
1195
46
            Impl::vector3(bitmap_column->get_data(), offset_column->get_data(),
1196
46
                          limit_column->get_data(), res_null_map->get_data(), input_rows_count,
1197
46
                          res_data_column->get_data());
1198
46
        }
1199
1200
46
        block.get_by_position(result).column =
1201
46
                ColumnNullable::create(std::move(res_data_column), std::move(res_null_map));
1202
46
        return Status::OK();
1203
46
    }
_ZNK5doris18FunctionBitmapSubsINS_9SubBitmapEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1169
18
                        uint32_t result, size_t input_rows_count) const override {
1170
18
        DCHECK_EQ(arguments.size(), 3);
1171
18
        auto res_null_map = ColumnUInt8::create(input_rows_count, 0);
1172
18
        auto res_data_column = ColumnBitmap::create(input_rows_count);
1173
1174
18
        bool col_const[3];
1175
18
        ColumnPtr argument_columns[3];
1176
72
        for (int i = 0; i < 3; ++i) {
1177
54
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
1178
54
        }
1179
18
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
1180
0
                                                     *block.get_by_position(arguments[0]).column)
1181
0
                                                     .convert_to_full_column()
1182
18
                                           : block.get_by_position(arguments[0]).column;
1183
1184
18
        default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments);
1185
1186
18
        auto bitmap_column = assert_cast<const ColumnBitmap*>(argument_columns[0].get());
1187
18
        auto offset_column = assert_cast<const ColumnInt64*>(argument_columns[1].get());
1188
18
        auto limit_column = assert_cast<const ColumnInt64*>(argument_columns[2].get());
1189
1190
18
        if (col_const[1] && col_const[2]) {
1191
0
            Impl::vector_scalars(bitmap_column->get_data(), offset_column->get_element(0),
1192
0
                                 limit_column->get_element(0), res_null_map->get_data(),
1193
0
                                 input_rows_count, res_data_column->get_data());
1194
18
        } else {
1195
18
            Impl::vector3(bitmap_column->get_data(), offset_column->get_data(),
1196
18
                          limit_column->get_data(), res_null_map->get_data(), input_rows_count,
1197
18
                          res_data_column->get_data());
1198
18
        }
1199
1200
18
        block.get_by_position(result).column =
1201
18
                ColumnNullable::create(std::move(res_data_column), std::move(res_null_map));
1202
18
        return Status::OK();
1203
18
    }
_ZNK5doris18FunctionBitmapSubsINS_17BitmapSubsetLimitEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1169
14
                        uint32_t result, size_t input_rows_count) const override {
1170
14
        DCHECK_EQ(arguments.size(), 3);
1171
14
        auto res_null_map = ColumnUInt8::create(input_rows_count, 0);
1172
14
        auto res_data_column = ColumnBitmap::create(input_rows_count);
1173
1174
14
        bool col_const[3];
1175
14
        ColumnPtr argument_columns[3];
1176
56
        for (int i = 0; i < 3; ++i) {
1177
42
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
1178
42
        }
1179
14
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
1180
0
                                                     *block.get_by_position(arguments[0]).column)
1181
0
                                                     .convert_to_full_column()
1182
14
                                           : block.get_by_position(arguments[0]).column;
1183
1184
14
        default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments);
1185
1186
14
        auto bitmap_column = assert_cast<const ColumnBitmap*>(argument_columns[0].get());
1187
14
        auto offset_column = assert_cast<const ColumnInt64*>(argument_columns[1].get());
1188
14
        auto limit_column = assert_cast<const ColumnInt64*>(argument_columns[2].get());
1189
1190
14
        if (col_const[1] && col_const[2]) {
1191
0
            Impl::vector_scalars(bitmap_column->get_data(), offset_column->get_element(0),
1192
0
                                 limit_column->get_element(0), res_null_map->get_data(),
1193
0
                                 input_rows_count, res_data_column->get_data());
1194
14
        } else {
1195
14
            Impl::vector3(bitmap_column->get_data(), offset_column->get_data(),
1196
14
                          limit_column->get_data(), res_null_map->get_data(), input_rows_count,
1197
14
                          res_data_column->get_data());
1198
14
        }
1199
1200
14
        block.get_by_position(result).column =
1201
14
                ColumnNullable::create(std::move(res_data_column), std::move(res_null_map));
1202
14
        return Status::OK();
1203
14
    }
_ZNK5doris18FunctionBitmapSubsINS_19BitmapSubsetInRangeEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1169
14
                        uint32_t result, size_t input_rows_count) const override {
1170
14
        DCHECK_EQ(arguments.size(), 3);
1171
14
        auto res_null_map = ColumnUInt8::create(input_rows_count, 0);
1172
14
        auto res_data_column = ColumnBitmap::create(input_rows_count);
1173
1174
14
        bool col_const[3];
1175
14
        ColumnPtr argument_columns[3];
1176
56
        for (int i = 0; i < 3; ++i) {
1177
42
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
1178
42
        }
1179
14
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
1180
0
                                                     *block.get_by_position(arguments[0]).column)
1181
0
                                                     .convert_to_full_column()
1182
14
                                           : block.get_by_position(arguments[0]).column;
1183
1184
14
        default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments);
1185
1186
14
        auto bitmap_column = assert_cast<const ColumnBitmap*>(argument_columns[0].get());
1187
14
        auto offset_column = assert_cast<const ColumnInt64*>(argument_columns[1].get());
1188
14
        auto limit_column = assert_cast<const ColumnInt64*>(argument_columns[2].get());
1189
1190
14
        if (col_const[1] && col_const[2]) {
1191
0
            Impl::vector_scalars(bitmap_column->get_data(), offset_column->get_element(0),
1192
0
                                 limit_column->get_element(0), res_null_map->get_data(),
1193
0
                                 input_rows_count, res_data_column->get_data());
1194
14
        } else {
1195
14
            Impl::vector3(bitmap_column->get_data(), offset_column->get_data(),
1196
14
                          limit_column->get_data(), res_null_map->get_data(), input_rows_count,
1197
14
                          res_data_column->get_data());
1198
14
        }
1199
1200
14
        block.get_by_position(result).column =
1201
14
                ColumnNullable::create(std::move(res_data_column), std::move(res_null_map));
1202
14
        return Status::OK();
1203
14
    }
1204
};
1205
1206
class FunctionBitmapToArray : public IFunction {
1207
public:
1208
    static constexpr auto name = "bitmap_to_array";
1209
1210
1
    String get_name() const override { return name; }
1211
1212
14
    static FunctionPtr create() { return std::make_shared<FunctionBitmapToArray>(); }
1213
1214
5
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1215
5
        auto nested_type = make_nullable(std::make_shared<DataTypeInt64>());
1216
5
        return std::make_shared<DataTypeArray>(nested_type);
1217
5
    }
1218
1219
5
    size_t get_number_of_arguments() const override { return 1; }
1220
1221
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1222
8
                        uint32_t result, size_t input_rows_count) const override {
1223
8
        auto return_nested_type = make_nullable(std::make_shared<DataTypeInt64>());
1224
8
        auto dest_array_column_ptr = ColumnArray::create(return_nested_type->create_column(),
1225
8
                                                         ColumnArray::ColumnOffsets::create());
1226
1227
8
        IColumn* dest_nested_column = &dest_array_column_ptr->get_data();
1228
8
        ColumnNullable* dest_nested_nullable_col =
1229
8
                reinterpret_cast<ColumnNullable*>(dest_nested_column);
1230
8
        dest_nested_column = dest_nested_nullable_col->get_nested_column_ptr().get();
1231
8
        auto& dest_nested_null_map = dest_nested_nullable_col->get_null_map_column().get_data();
1232
1233
8
        auto& arg_col = block.get_by_position(arguments[0]).column;
1234
8
        auto bitmap_col = assert_cast<const ColumnBitmap*>(arg_col.get());
1235
8
        const auto& bitmap_col_data = bitmap_col->get_data();
1236
8
        auto& nested_column_data = assert_cast<ColumnInt64*>(dest_nested_column)->get_data();
1237
8
        auto& dest_offsets = dest_array_column_ptr->get_offsets();
1238
8
        dest_offsets.reserve(input_rows_count);
1239
1240
16
        for (int i = 0; i < input_rows_count; ++i) {
1241
8
            bitmap_col_data[i].to_array(nested_column_data);
1242
8
            dest_nested_null_map.resize_fill(nested_column_data.size(), 0);
1243
8
            dest_offsets.push_back(nested_column_data.size());
1244
8
        }
1245
1246
8
        block.replace_by_position(result, std::move(dest_array_column_ptr));
1247
8
        return Status::OK();
1248
8
    }
1249
};
1250
1251
using FunctionBitmapEmpty = FunctionConst<BitmapEmpty, false>;
1252
using FunctionToBitmap = FunctionAlwaysNotNullable<ToBitmap>;
1253
using FunctionToBitmapWithCheck = FunctionAlwaysNotNullable<ToBitmapWithCheck, true>;
1254
1255
using FunctionBitmapFromString = FunctionBitmapAlwaysNull<BitmapFromString>;
1256
using FunctionBitmapFromArray = FunctionBitmapAlwaysNull<BitmapFromArray>;
1257
using FunctionBitmapHash = FunctionAlwaysNotNullable<BitmapHash<32>>;
1258
using FunctionBitmapHash64 = FunctionAlwaysNotNullable<BitmapHash<64>>;
1259
1260
using FunctionBitmapMin = FunctionBitmapSingle<FunctionBitmapMinImpl>;
1261
using FunctionBitmapMax = FunctionBitmapSingle<FunctionBitmapMaxImpl>;
1262
1263
using FunctionBitmapToString = FunctionUnaryToType<BitmapToString, NameBitmapToString>;
1264
using FunctionBitmapToBase64 = FunctionUnaryToType<BitmapToBase64, NameBitmapToBase64>;
1265
using FunctionBitmapFromBase64 = FunctionBitmapAlwaysNull<BitmapFromBase64>;
1266
using FunctionBitmapNot =
1267
        FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapNot, NameBitmapNot>;
1268
using FunctionBitmapAndNot =
1269
        FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapAndNot, NameBitmapAndNot>;
1270
using FunctionBitmapContains =
1271
        FunctionBinaryToType<DataTypeBitMap, DataTypeInt64, BitmapContains, NameBitmapContains>;
1272
using FunctionBitmapRemove =
1273
        FunctionBinaryToType<DataTypeBitMap, DataTypeInt64, BitmapRemove, NameBitmapRemove>;
1274
1275
using FunctionBitmapHasAny =
1276
        FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapHasAny, NameBitmapHasAny>;
1277
using FunctionBitmapHasAll =
1278
        FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapHasAll, NameBitmapHasAll>;
1279
using FunctionSubBitmap = FunctionBitmapSubs<SubBitmap>;
1280
using FunctionBitmapSubsetLimit = FunctionBitmapSubs<BitmapSubsetLimit>;
1281
using FunctionBitmapSubsetInRange = FunctionBitmapSubs<BitmapSubsetInRange>;
1282
1283
8
void register_function_bitmap(SimpleFunctionFactory& factory) {
1284
8
    factory.register_function<FunctionBitmapEmpty>();
1285
8
    factory.register_function<FunctionToBitmap>();
1286
8
    factory.register_function<FunctionToBitmapWithCheck>();
1287
8
    factory.register_function<FunctionBitmapFromString>();
1288
8
    factory.register_function<FunctionBitmapToBase64>();
1289
8
    factory.register_function<FunctionBitmapFromBase64>();
1290
8
    factory.register_function<FunctionBitmapFromArray>();
1291
8
    factory.register_function<FunctionBitmapHash>();
1292
8
    factory.register_function<FunctionBitmapHash64>();
1293
8
    factory.register_function<FunctionBitmapCount>();
1294
8
    factory.register_function<FunctionBitmapMin>();
1295
8
    factory.register_function<FunctionBitmapMax>();
1296
8
    factory.register_function<FunctionBitmapToString>();
1297
8
    factory.register_function<FunctionBitmapNot>();
1298
8
    factory.register_function<FunctionBitmapAndNot>();
1299
8
    factory.register_alias(NameBitmapAndNot::name, "bitmap_andnot");
1300
8
    factory.register_function<FunctionBitmapAndNotCount<NameBitmapAndNotCount>>();
1301
8
    factory.register_alias(NameBitmapAndNotCount::name, "bitmap_andnot_count");
1302
8
    factory.register_function<FunctionBitmapContains>();
1303
8
    factory.register_function<FunctionBitmapRemove>();
1304
8
    factory.register_function<FunctionBitmapHasAny>();
1305
8
    factory.register_function<FunctionBitmapHasAll>();
1306
8
    factory.register_function<FunctionSubBitmap>();
1307
8
    factory.register_function<FunctionBitmapSubsetLimit>();
1308
8
    factory.register_function<FunctionBitmapSubsetInRange>();
1309
8
    factory.register_function<FunctionBitmapToArray>();
1310
8
}
1311
1312
} // namespace doris