be/src/exprs/function/function_bitmap.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionBitmap.h |
19 | | // and modified by Doris |
20 | | |
21 | | #include <absl/strings/numbers.h> |
22 | | #include <absl/strings/str_split.h> |
23 | | #include <glog/logging.h> |
24 | | #include <stdint.h> |
25 | | #include <string.h> |
26 | | |
27 | | #include <algorithm> |
28 | | #include <boost/iterator/iterator_facade.hpp> |
29 | | #include <functional> |
30 | | #include <memory> |
31 | | #include <ostream> |
32 | | #include <string> |
33 | | #include <utility> |
34 | | #include <vector> |
35 | | |
36 | | #include "common/compiler_util.h" // IWYU pragma: keep |
37 | | #include "common/status.h" |
38 | | #include "core/assert_cast.h" |
39 | | #include "core/block/block.h" |
40 | | #include "core/block/column_numbers.h" |
41 | | #include "core/block/column_with_type_and_name.h" |
42 | | #include "core/column/column.h" |
43 | | #include "core/column/column_array.h" |
44 | | #include "core/column/column_complex.h" |
45 | | #include "core/column/column_const.h" |
46 | | #include "core/column/column_nullable.h" |
47 | | #include "core/column/column_string.h" |
48 | | #include "core/column/column_vector.h" |
49 | | #include "core/data_type/data_type.h" |
50 | | #include "core/data_type/data_type_array.h" |
51 | | #include "core/data_type/data_type_bitmap.h" |
52 | | #include "core/data_type/data_type_nullable.h" |
53 | | #include "core/data_type/data_type_number.h" |
54 | | #include "core/data_type/data_type_string.h" |
55 | | #include "core/field.h" |
56 | | #include "core/types.h" |
57 | | #include "core/value/bitmap_value.h" |
58 | | #include "exec/common/stringop_substring.h" |
59 | | #include "exec/common/util.hpp" |
60 | | #include "exprs/aggregate/aggregate_function.h" |
61 | | #include "exprs/function/function.h" |
62 | | #include "exprs/function/function_always_not_nullable.h" |
63 | | #include "exprs/function/function_bitmap_min_or_max.h" |
64 | | #include "exprs/function/function_const.h" |
65 | | #include "exprs/function/function_helpers.h" |
66 | | #include "exprs/function/function_totype.h" |
67 | | #include "exprs/function/simple_function_factory.h" |
68 | | #include "util/hash/murmur_hash3.h" |
69 | | #include "util/hash_util.hpp" |
70 | | #include "util/string_parser.hpp" |
71 | | #include "util/url_coding.h" |
72 | | |
73 | | namespace doris { |
74 | | class FunctionContext; |
75 | | } // namespace doris |
76 | | |
77 | | namespace doris { |
78 | | #include "common/compile_check_begin.h" |
79 | | |
80 | | struct BitmapEmpty { |
81 | | static constexpr auto name = "bitmap_empty"; |
82 | | using ReturnColVec = ColumnBitmap; |
83 | 284 | static DataTypePtr get_return_type() { return std::make_shared<DataTypeBitMap>(); } |
84 | 284 | static auto init_value() { return BitmapValue {}; } |
85 | | }; |
86 | | |
87 | | struct ToBitmap { |
88 | | static constexpr auto name = "to_bitmap"; |
89 | | using ReturnType = DataTypeBitMap; |
90 | | |
91 | | template <typename ColumnType> |
92 | 1.15k | static void vector(const ColumnType* col, MutableColumnPtr& col_res) { |
93 | 1.15k | execute<ColumnType, false>(col, nullptr, col_res); |
94 | 1.15k | } _ZN5doris8ToBitmap6vectorINS_9ColumnStrIjEEEEvPKT_RNS_3COWINS_7IColumnEE11mutable_ptrIS8_EE Line | Count | Source | 92 | 13 | static void vector(const ColumnType* col, MutableColumnPtr& col_res) { | 93 | 13 | execute<ColumnType, false>(col, nullptr, col_res); | 94 | 13 | } |
_ZN5doris8ToBitmap6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEEvPKT_RNS_3COWINS_7IColumnEE11mutable_ptrIS9_EE Line | Count | Source | 92 | 1.13k | static void vector(const ColumnType* col, MutableColumnPtr& col_res) { | 93 | 1.13k | execute<ColumnType, false>(col, nullptr, col_res); | 94 | 1.13k | } |
|
95 | | template <typename ColumnType> |
96 | | static void vector_nullable(const ColumnType* col, const NullMap& nullmap, |
97 | 679 | MutableColumnPtr& col_res) { |
98 | 679 | execute<ColumnType, true>(col, &nullmap, col_res); |
99 | 679 | } _ZN5doris8ToBitmap15vector_nullableINS_9ColumnStrIjEEEEvPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISF_EE Line | Count | Source | 97 | 49 | MutableColumnPtr& col_res) { | 98 | 49 | execute<ColumnType, true>(col, &nullmap, col_res); | 99 | 49 | } |
_ZN5doris8ToBitmap15vector_nullableINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEEvPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE Line | Count | Source | 97 | 630 | MutableColumnPtr& col_res) { | 98 | 630 | execute<ColumnType, true>(col, &nullmap, col_res); | 99 | 630 | } |
|
100 | | template <typename ColumnType, bool arg_is_nullable> |
101 | 1.82k | static void execute(const ColumnType* col, const NullMap* nullmap, MutableColumnPtr& col_res) { |
102 | 1.82k | if constexpr (std::is_same_v<ColumnType, ColumnString>) { |
103 | 62 | const ColumnString::Chars& data = col->get_chars(); |
104 | 62 | const ColumnString::Offsets& offsets = col->get_offsets(); |
105 | | |
106 | 62 | auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); |
107 | 62 | auto& res_data = res_column->get_data(); |
108 | 62 | size_t size = offsets.size(); |
109 | | |
110 | 3.56k | for (size_t i = 0; i < size; ++i) { |
111 | 3.50k | if (arg_is_nullable && ((*nullmap)[i])) { |
112 | 26 | continue; |
113 | 3.47k | } else { |
114 | 3.47k | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
115 | 3.47k | int str_size = cast_set<int>(offsets[i] - offsets[i - 1]); |
116 | 3.47k | StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; |
117 | 3.47k | uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>( |
118 | 3.47k | raw_str, str_size, &parse_result); |
119 | 3.47k | if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) { |
120 | 3.26k | res_data[i].add(int_value); |
121 | 3.26k | } |
122 | 3.47k | } |
123 | 3.50k | } |
124 | 1.76k | } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) { |
125 | 1.76k | auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); |
126 | 1.76k | auto& res_data = res_column->get_data(); |
127 | 1.76k | size_t size = col->size(); |
128 | | |
129 | 131k | for (size_t i = 0; i < size; ++i) { |
130 | 128k | if constexpr (arg_is_nullable) { |
131 | 2.62k | if ((*nullmap)[i]) { |
132 | 1.44k | continue; |
133 | 1.44k | } |
134 | 2.62k | } |
135 | 128k | if (auto value = col->get_data()[i]; value >= 0) { |
136 | 119k | res_data[i].add(value); |
137 | 119k | } |
138 | 128k | } |
139 | 1.76k | } |
140 | 1.82k | } _ZN5doris8ToBitmap7executeINS_9ColumnStrIjEELb1EEEvPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISF_EE Line | Count | Source | 101 | 49 | static void execute(const ColumnType* col, const NullMap* nullmap, MutableColumnPtr& col_res) { | 102 | 49 | if constexpr (std::is_same_v<ColumnType, ColumnString>) { | 103 | 49 | const ColumnString::Chars& data = col->get_chars(); | 104 | 49 | const ColumnString::Offsets& offsets = col->get_offsets(); | 105 | | | 106 | 49 | auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); | 107 | 49 | auto& res_data = res_column->get_data(); | 108 | 49 | size_t size = offsets.size(); | 109 | | | 110 | 3.52k | for (size_t i = 0; i < size; ++i) { | 111 | 3.47k | if (arg_is_nullable && ((*nullmap)[i])) { | 112 | 26 | continue; | 113 | 3.44k | } else { | 114 | 3.44k | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); | 115 | 3.44k | int str_size = cast_set<int>(offsets[i] - offsets[i - 1]); | 116 | 3.44k | StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; | 117 | 3.44k | uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>( | 118 | 3.44k | raw_str, str_size, &parse_result); | 119 | 3.44k | if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) { | 120 | 3.26k | res_data[i].add(int_value); | 121 | 3.26k | } | 122 | 3.44k | } | 123 | 3.47k | } | 124 | | } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) { | 125 | | auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); | 126 | | auto& res_data = res_column->get_data(); | 127 | | size_t size = col->size(); | 128 | | | 129 | | for (size_t i = 0; i < size; ++i) { | 130 | | if constexpr (arg_is_nullable) { | 131 | | if ((*nullmap)[i]) { | 132 | | continue; | 133 | | } | 134 | | } | 135 | | if (auto value = col->get_data()[i]; value >= 0) { | 136 | | res_data[i].add(value); | 137 | | } | 138 | | } | 139 | | } | 140 | 49 | } |
_ZN5doris8ToBitmap7executeINS_12ColumnVectorILNS_13PrimitiveTypeE6EEELb1EEEvPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE Line | Count | Source | 101 | 630 | static void execute(const ColumnType* col, const NullMap* nullmap, MutableColumnPtr& col_res) { | 102 | | if constexpr (std::is_same_v<ColumnType, ColumnString>) { | 103 | | const ColumnString::Chars& data = col->get_chars(); | 104 | | const ColumnString::Offsets& offsets = col->get_offsets(); | 105 | | | 106 | | auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); | 107 | | auto& res_data = res_column->get_data(); | 108 | | size_t size = offsets.size(); | 109 | | | 110 | | for (size_t i = 0; i < size; ++i) { | 111 | | if (arg_is_nullable && ((*nullmap)[i])) { | 112 | | continue; | 113 | | } else { | 114 | | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); | 115 | | int str_size = cast_set<int>(offsets[i] - offsets[i - 1]); | 116 | | StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; | 117 | | uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>( | 118 | | raw_str, str_size, &parse_result); | 119 | | if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) { | 120 | | res_data[i].add(int_value); | 121 | | } | 122 | | } | 123 | | } | 124 | 630 | } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) { | 125 | 630 | auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); | 126 | 630 | auto& res_data = res_column->get_data(); | 127 | 630 | size_t size = col->size(); | 128 | | | 129 | 4.70k | for (size_t i = 0; i < size; ++i) { | 130 | 2.62k | if constexpr (arg_is_nullable) { | 131 | 2.62k | if ((*nullmap)[i]) { | 132 | 1.44k | continue; | 133 | 1.44k | } | 134 | 2.62k | } | 135 | 2.62k | if (auto value = col->get_data()[i]; value >= 0) { | 136 | 844 | res_data[i].add(value); | 137 | 844 | } | 138 | 2.62k | } | 139 | 630 | } | 140 | 630 | } |
_ZN5doris8ToBitmap7executeINS_9ColumnStrIjEELb0EEEvPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISF_EE Line | Count | Source | 101 | 13 | static void execute(const ColumnType* col, const NullMap* nullmap, MutableColumnPtr& col_res) { | 102 | 13 | if constexpr (std::is_same_v<ColumnType, ColumnString>) { | 103 | 13 | const ColumnString::Chars& data = col->get_chars(); | 104 | 13 | const ColumnString::Offsets& offsets = col->get_offsets(); | 105 | | | 106 | 13 | auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); | 107 | 13 | auto& res_data = res_column->get_data(); | 108 | 13 | size_t size = offsets.size(); | 109 | | | 110 | 42 | for (size_t i = 0; i < size; ++i) { | 111 | 29 | if (arg_is_nullable && ((*nullmap)[i])) { | 112 | 0 | continue; | 113 | 29 | } else { | 114 | 29 | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); | 115 | 29 | int str_size = cast_set<int>(offsets[i] - offsets[i - 1]); | 116 | 29 | StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; | 117 | 29 | uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>( | 118 | 29 | raw_str, str_size, &parse_result); | 119 | 29 | if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) { | 120 | 0 | res_data[i].add(int_value); | 121 | 0 | } | 122 | 29 | } | 123 | 29 | } | 124 | | } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) { | 125 | | auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); | 126 | | auto& res_data = res_column->get_data(); | 127 | | size_t size = col->size(); | 128 | | | 129 | | for (size_t i = 0; i < size; ++i) { | 130 | | if constexpr (arg_is_nullable) { | 131 | | if ((*nullmap)[i]) { | 132 | | continue; | 133 | | } | 134 | | } | 135 | | if (auto value = col->get_data()[i]; value >= 0) { | 136 | | res_data[i].add(value); | 137 | | } | 138 | | } | 139 | | } | 140 | 13 | } |
_ZN5doris8ToBitmap7executeINS_12ColumnVectorILNS_13PrimitiveTypeE6EEELb0EEEvPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE Line | Count | Source | 101 | 1.13k | static void execute(const ColumnType* col, const NullMap* nullmap, MutableColumnPtr& col_res) { | 102 | | if constexpr (std::is_same_v<ColumnType, ColumnString>) { | 103 | | const ColumnString::Chars& data = col->get_chars(); | 104 | | const ColumnString::Offsets& offsets = col->get_offsets(); | 105 | | | 106 | | auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); | 107 | | auto& res_data = res_column->get_data(); | 108 | | size_t size = offsets.size(); | 109 | | | 110 | | for (size_t i = 0; i < size; ++i) { | 111 | | if (arg_is_nullable && ((*nullmap)[i])) { | 112 | | continue; | 113 | | } else { | 114 | | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); | 115 | | int str_size = cast_set<int>(offsets[i] - offsets[i - 1]); | 116 | | StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; | 117 | | uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>( | 118 | | raw_str, str_size, &parse_result); | 119 | | if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) { | 120 | | res_data[i].add(int_value); | 121 | | } | 122 | | } | 123 | | } | 124 | 1.13k | } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) { | 125 | 1.13k | auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); | 126 | 1.13k | auto& res_data = res_column->get_data(); | 127 | 1.13k | size_t size = col->size(); | 128 | | | 129 | 126k | for (size_t i = 0; i < size; ++i) { | 130 | | if constexpr (arg_is_nullable) { | 131 | | if ((*nullmap)[i]) { | 132 | | continue; | 133 | | } | 134 | | } | 135 | 125k | if (auto value = col->get_data()[i]; value >= 0) { | 136 | 118k | res_data[i].add(value); | 137 | 118k | } | 138 | 125k | } | 139 | 1.13k | } | 140 | 1.13k | } |
|
141 | | }; |
142 | | |
143 | | struct ToBitmapWithCheck { |
144 | | static constexpr auto name = "to_bitmap_with_check"; |
145 | | using ReturnType = DataTypeBitMap; |
146 | | |
147 | | template <typename ColumnType> |
148 | 50 | static Status vector(const ColumnType* col, MutableColumnPtr& col_res) { |
149 | 50 | return execute<ColumnType, false>(col, nullptr, col_res); |
150 | 50 | } Unexecuted instantiation: _ZN5doris17ToBitmapWithCheck6vectorINS_9ColumnStrIjEEEENS_6StatusEPKT_RNS_3COWINS_7IColumnEE11mutable_ptrIS9_EE _ZN5doris17ToBitmapWithCheck6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEENS_6StatusEPKT_RNS_3COWINS_7IColumnEE11mutable_ptrISA_EE Line | Count | Source | 148 | 50 | static Status vector(const ColumnType* col, MutableColumnPtr& col_res) { | 149 | 50 | return execute<ColumnType, false>(col, nullptr, col_res); | 150 | 50 | } |
|
151 | | template <typename ColumnType> |
152 | | static Status vector_nullable(const ColumnType* col, const NullMap& nullmap, |
153 | 46 | MutableColumnPtr& col_res) { |
154 | 46 | return execute<ColumnType, true>(col, &nullmap, col_res); |
155 | 46 | } Unexecuted instantiation: _ZN5doris17ToBitmapWithCheck15vector_nullableINS_9ColumnStrIjEEEENS_6StatusEPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE _ZN5doris17ToBitmapWithCheck15vector_nullableINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEENS_6StatusEPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISH_EE Line | Count | Source | 153 | 46 | MutableColumnPtr& col_res) { | 154 | 46 | return execute<ColumnType, true>(col, &nullmap, col_res); | 155 | 46 | } |
|
156 | | template <typename ColumnType, bool arg_is_nullable> |
157 | | static Status execute(const ColumnType* col, const NullMap* nullmap, |
158 | 96 | MutableColumnPtr& col_res) { |
159 | 96 | if constexpr (std::is_same_v<ColumnType, ColumnString>) { |
160 | 0 | const ColumnString::Chars& data = col->get_chars(); |
161 | 0 | const ColumnString::Offsets& offsets = col->get_offsets(); |
162 | 0 | auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); |
163 | 0 | auto& res_data = res_column->get_data(); |
164 | 0 | size_t size = offsets.size(); |
165 | |
|
166 | 0 | for (size_t i = 0; i < size; ++i) { |
167 | 0 | if (arg_is_nullable && ((*nullmap)[i])) { |
168 | 0 | continue; |
169 | 0 | } else { |
170 | 0 | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
171 | | // The string lenght is less than 2G, so that cast the str size to int, not use size_t |
172 | 0 | int str_size = cast_set<int>(offsets[i] - offsets[i - 1]); |
173 | 0 | StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; |
174 | 0 | uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>( |
175 | 0 | raw_str, str_size, &parse_result); |
176 | 0 | if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) { |
177 | 0 | res_data[i].add(int_value); |
178 | 0 | } else { |
179 | 0 | return Status::InvalidArgument( |
180 | 0 | "The input: {} is not valid, to_bitmap only support bigint value " |
181 | 0 | "from 0 to 18446744073709551615 currently, cannot create MV with " |
182 | 0 | "to_bitmap on column with negative values or cannot load negative " |
183 | 0 | "values to column with to_bitmap MV on it.", |
184 | 0 | std::string(raw_str, str_size)); |
185 | 0 | } |
186 | 0 | } |
187 | 0 | } |
188 | 96 | } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) { |
189 | 96 | auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); |
190 | 96 | auto& res_data = res_column->get_data(); |
191 | 96 | size_t size = col->size(); |
192 | | |
193 | 215 | for (size_t i = 0; i < size; ++i) { |
194 | 123 | if (arg_is_nullable && ((*nullmap)[i])) { |
195 | 21 | continue; |
196 | 102 | } else { |
197 | 102 | int64_t int_value = col->get_data()[i]; |
198 | 102 | if (LIKELY(int_value >= 0)) { |
199 | 98 | res_data[i].add(int_value); |
200 | 98 | } else { |
201 | 4 | return Status::InvalidArgument( |
202 | 4 | "The input: {} is not valid, to_bitmap only support bigint value " |
203 | 4 | "from 0 to 18446744073709551615 currently, cannot create MV with " |
204 | 4 | "to_bitmap on column with negative values or cannot load negative " |
205 | 4 | "values to column with to_bitmap MV on it.", |
206 | 4 | int_value); |
207 | 4 | } |
208 | 102 | } |
209 | 123 | } |
210 | | } else { |
211 | | return Status::InvalidArgument("not support type"); |
212 | | } |
213 | 92 | return Status::OK(); |
214 | 96 | } Unexecuted instantiation: _ZN5doris17ToBitmapWithCheck7executeINS_9ColumnStrIjEELb1EEENS_6StatusEPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE _ZN5doris17ToBitmapWithCheck7executeINS_12ColumnVectorILNS_13PrimitiveTypeE6EEELb1EEENS_6StatusEPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISH_EE Line | Count | Source | 158 | 46 | MutableColumnPtr& col_res) { | 159 | | if constexpr (std::is_same_v<ColumnType, ColumnString>) { | 160 | | const ColumnString::Chars& data = col->get_chars(); | 161 | | const ColumnString::Offsets& offsets = col->get_offsets(); | 162 | | auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); | 163 | | auto& res_data = res_column->get_data(); | 164 | | size_t size = offsets.size(); | 165 | | | 166 | | for (size_t i = 0; i < size; ++i) { | 167 | | if (arg_is_nullable && ((*nullmap)[i])) { | 168 | | continue; | 169 | | } else { | 170 | | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); | 171 | | // The string lenght is less than 2G, so that cast the str size to int, not use size_t | 172 | | int str_size = cast_set<int>(offsets[i] - offsets[i - 1]); | 173 | | StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; | 174 | | uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>( | 175 | | raw_str, str_size, &parse_result); | 176 | | if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) { | 177 | | res_data[i].add(int_value); | 178 | | } else { | 179 | | return Status::InvalidArgument( | 180 | | "The input: {} is not valid, to_bitmap only support bigint value " | 181 | | "from 0 to 18446744073709551615 currently, cannot create MV with " | 182 | | "to_bitmap on column with negative values or cannot load negative " | 183 | | "values to column with to_bitmap MV on it.", | 184 | | std::string(raw_str, str_size)); | 185 | | } | 186 | | } | 187 | | } | 188 | 46 | } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) { | 189 | 46 | auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); | 190 | 46 | auto& res_data = res_column->get_data(); | 191 | 46 | size_t size = col->size(); | 192 | | | 193 | 111 | for (size_t i = 0; i < size; ++i) { | 194 | 65 | if (arg_is_nullable && ((*nullmap)[i])) { | 195 | 21 | continue; | 196 | 44 | } else { | 197 | 44 | int64_t int_value = col->get_data()[i]; | 198 | 44 | if (LIKELY(int_value >= 0)) { | 199 | 44 | res_data[i].add(int_value); | 200 | 44 | } else { | 201 | 0 | return Status::InvalidArgument( | 202 | 0 | "The input: {} is not valid, to_bitmap only support bigint value " | 203 | 0 | "from 0 to 18446744073709551615 currently, cannot create MV with " | 204 | 0 | "to_bitmap on column with negative values or cannot load negative " | 205 | 0 | "values to column with to_bitmap MV on it.", | 206 | 0 | int_value); | 207 | 0 | } | 208 | 44 | } | 209 | 65 | } | 210 | | } else { | 211 | | return Status::InvalidArgument("not support type"); | 212 | | } | 213 | 46 | return Status::OK(); | 214 | 46 | } |
Unexecuted instantiation: _ZN5doris17ToBitmapWithCheck7executeINS_9ColumnStrIjEELb0EEENS_6StatusEPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE _ZN5doris17ToBitmapWithCheck7executeINS_12ColumnVectorILNS_13PrimitiveTypeE6EEELb0EEENS_6StatusEPKT_PKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISH_EE Line | Count | Source | 158 | 50 | MutableColumnPtr& col_res) { | 159 | | if constexpr (std::is_same_v<ColumnType, ColumnString>) { | 160 | | const ColumnString::Chars& data = col->get_chars(); | 161 | | const ColumnString::Offsets& offsets = col->get_offsets(); | 162 | | auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); | 163 | | auto& res_data = res_column->get_data(); | 164 | | size_t size = offsets.size(); | 165 | | | 166 | | for (size_t i = 0; i < size; ++i) { | 167 | | if (arg_is_nullable && ((*nullmap)[i])) { | 168 | | continue; | 169 | | } else { | 170 | | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); | 171 | | // The string lenght is less than 2G, so that cast the str size to int, not use size_t | 172 | | int str_size = cast_set<int>(offsets[i] - offsets[i - 1]); | 173 | | StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; | 174 | | uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>( | 175 | | raw_str, str_size, &parse_result); | 176 | | if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) { | 177 | | res_data[i].add(int_value); | 178 | | } else { | 179 | | return Status::InvalidArgument( | 180 | | "The input: {} is not valid, to_bitmap only support bigint value " | 181 | | "from 0 to 18446744073709551615 currently, cannot create MV with " | 182 | | "to_bitmap on column with negative values or cannot load negative " | 183 | | "values to column with to_bitmap MV on it.", | 184 | | std::string(raw_str, str_size)); | 185 | | } | 186 | | } | 187 | | } | 188 | 50 | } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) { | 189 | 50 | auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); | 190 | 50 | auto& res_data = res_column->get_data(); | 191 | 50 | size_t size = col->size(); | 192 | | | 193 | 104 | for (size_t i = 0; i < size; ++i) { | 194 | 58 | if (arg_is_nullable && ((*nullmap)[i])) { | 195 | 0 | continue; | 196 | 58 | } else { | 197 | 58 | int64_t int_value = col->get_data()[i]; | 198 | 58 | if (LIKELY(int_value >= 0)) { | 199 | 54 | res_data[i].add(int_value); | 200 | 54 | } else { | 201 | 4 | return Status::InvalidArgument( | 202 | 4 | "The input: {} is not valid, to_bitmap only support bigint value " | 203 | 4 | "from 0 to 18446744073709551615 currently, cannot create MV with " | 204 | 4 | "to_bitmap on column with negative values or cannot load negative " | 205 | 4 | "values to column with to_bitmap MV on it.", | 206 | 4 | int_value); | 207 | 4 | } | 208 | 58 | } | 209 | 58 | } | 210 | | } else { | 211 | | return Status::InvalidArgument("not support type"); | 212 | | } | 213 | 46 | return Status::OK(); | 214 | 50 | } |
|
215 | | }; |
216 | | |
217 | | struct BitmapFromString { |
218 | | using ArgumentType = DataTypeString; |
219 | | |
220 | | static constexpr auto name = "bitmap_from_string"; |
221 | | |
222 | | static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
223 | | std::vector<BitmapValue>& res, NullMap& null_map, |
224 | 399 | size_t input_rows_count) { |
225 | 399 | res.reserve(input_rows_count); |
226 | 399 | std::vector<uint64_t> bits; |
227 | 399 | if (offsets.size() == 0 && input_rows_count == 1) { |
228 | | // For NULL constant |
229 | 0 | res.emplace_back(); |
230 | 0 | null_map[0] = 1; |
231 | 0 | return Status::OK(); |
232 | 0 | } |
233 | | |
234 | 488 | auto split_and_parse = [&bits](const char* raw_str, size_t str_size) { |
235 | 488 | bits.clear(); |
236 | 488 | auto res = absl::StrSplit(std::string_view {raw_str, str_size}, ",", absl::SkipEmpty()); |
237 | 488 | uint64_t value = 0; |
238 | 1.64k | for (auto s : res) { |
239 | 1.64k | if (!absl::SimpleAtoi(s, &value)) { |
240 | 76 | return false; |
241 | 76 | } |
242 | 1.56k | bits.push_back(value); |
243 | 1.56k | } |
244 | 412 | return true; |
245 | 488 | }; |
246 | | |
247 | | // split by comma |
248 | | |
249 | 887 | for (size_t i = 0; i < input_rows_count; ++i) { |
250 | 488 | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
251 | 488 | int64_t str_size = offsets[i] - offsets[i - 1]; |
252 | | |
253 | 488 | if ((str_size > INT32_MAX) || !split_and_parse(raw_str, str_size)) { |
254 | 76 | res.emplace_back(); |
255 | 76 | null_map[i] = 1; |
256 | 76 | continue; |
257 | 76 | } |
258 | 412 | res.emplace_back(bits); |
259 | 412 | } |
260 | 399 | return Status::OK(); |
261 | 399 | } |
262 | | }; |
263 | | |
264 | | struct NameBitmapFromBase64 { |
265 | | static constexpr auto name = "bitmap_from_base64"; |
266 | | }; |
267 | | struct BitmapFromBase64 { |
268 | | using ArgumentType = DataTypeString; |
269 | | |
270 | | static constexpr auto name = "bitmap_from_base64"; |
271 | | |
272 | | static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
273 | | std::vector<BitmapValue>& res, NullMap& null_map, |
274 | 18 | size_t input_rows_count) { |
275 | 18 | res.reserve(input_rows_count); |
276 | 18 | if (offsets.size() == 0 && input_rows_count == 1) { |
277 | | // For NULL constant |
278 | 0 | res.emplace_back(); |
279 | 0 | null_map[0] = 1; |
280 | 0 | return Status::OK(); |
281 | 0 | } |
282 | 18 | std::string decode_buff; |
283 | 18 | size_t last_decode_buff_len = 0; |
284 | 18 | size_t curr_decode_buff_len = 0; |
285 | 58 | for (size_t i = 0; i < input_rows_count; ++i) { |
286 | 41 | const char* src_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
287 | 41 | size_t src_size = offsets[i] - offsets[i - 1]; |
288 | 41 | if (0 != src_size % 4) { |
289 | | // return Status::InvalidArgument( |
290 | | // fmt::format("invalid base64: {}", std::string(src_str, src_size))); |
291 | 0 | res.emplace_back(); |
292 | 0 | null_map[i] = 1; |
293 | 0 | continue; |
294 | 0 | } |
295 | 41 | curr_decode_buff_len = src_size + 3; |
296 | 41 | if (curr_decode_buff_len > last_decode_buff_len) { |
297 | 29 | decode_buff.resize(curr_decode_buff_len); |
298 | 29 | last_decode_buff_len = curr_decode_buff_len; |
299 | 29 | } |
300 | 41 | auto outlen = base64_decode(src_str, src_size, decode_buff.data()); |
301 | 41 | if (outlen < 0) { |
302 | 0 | res.emplace_back(); |
303 | 0 | null_map[i] = 1; |
304 | 41 | } else { |
305 | 41 | BitmapValue bitmap_val; |
306 | 41 | if (!bitmap_val.deserialize(decode_buff.data())) { |
307 | 1 | return Status::RuntimeError("bitmap_from_base64 decode failed: base64: {}", |
308 | 1 | std::string(src_str, src_size)); |
309 | 1 | } |
310 | 40 | res.emplace_back(std::move(bitmap_val)); |
311 | 40 | } |
312 | 41 | } |
313 | 17 | return Status::OK(); |
314 | 18 | } |
315 | | }; |
316 | | struct BitmapFromArray { |
317 | | using ArgumentType = DataTypeArray; |
318 | | static constexpr auto name = "bitmap_from_array"; |
319 | | |
320 | | template <typename ColumnType> |
321 | | static Status vector(const ColumnArray::Offsets64& offset_column_data, |
322 | | const IColumn& nested_column, const NullMap& nested_null_map, |
323 | 13 | std::vector<BitmapValue>& res, NullMap& null_map) { |
324 | 13 | const auto& nested_column_data = static_cast<const ColumnType&>(nested_column).get_data(); |
325 | 13 | auto size = offset_column_data.size(); |
326 | 13 | res.reserve(size); |
327 | 13 | std::vector<uint64_t> bits; |
328 | 36 | for (size_t i = 0; i < size; ++i) { |
329 | 23 | auto curr_offset = offset_column_data[i]; |
330 | 23 | auto prev_offset = offset_column_data[i - 1]; |
331 | 92 | for (auto j = prev_offset; j < curr_offset; ++j) { |
332 | 71 | auto data = nested_column_data[j]; |
333 | | // invaild value |
334 | 71 | if (UNLIKELY(data < 0) || UNLIKELY(nested_null_map[j])) { |
335 | 2 | res.emplace_back(); |
336 | 2 | null_map[i] = 1; |
337 | 2 | break; |
338 | 69 | } else { |
339 | 69 | bits.push_back(data); |
340 | 69 | } |
341 | 71 | } |
342 | | //input is valid value |
343 | 23 | if (!null_map[i]) { |
344 | 21 | res.emplace_back(bits); |
345 | 21 | } |
346 | 23 | bits.clear(); |
347 | 23 | } |
348 | 13 | return Status::OK(); |
349 | 13 | } _ZN5doris15BitmapFromArray6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE3EEEEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_7IColumnERKNS6_IhLm4096ES9_Lm16ELm15EEERSt6vectorINS_11BitmapValueESaISK_EERSG_ Line | Count | Source | 323 | 3 | std::vector<BitmapValue>& res, NullMap& null_map) { | 324 | 3 | const auto& nested_column_data = static_cast<const ColumnType&>(nested_column).get_data(); | 325 | 3 | auto size = offset_column_data.size(); | 326 | 3 | res.reserve(size); | 327 | 3 | std::vector<uint64_t> bits; | 328 | 6 | for (size_t i = 0; i < size; ++i) { | 329 | 3 | auto curr_offset = offset_column_data[i]; | 330 | 3 | auto prev_offset = offset_column_data[i - 1]; | 331 | 16 | for (auto j = prev_offset; j < curr_offset; ++j) { | 332 | 13 | auto data = nested_column_data[j]; | 333 | | // invaild value | 334 | 13 | if (UNLIKELY(data < 0) || UNLIKELY(nested_null_map[j])) { | 335 | 0 | res.emplace_back(); | 336 | 0 | null_map[i] = 1; | 337 | 0 | break; | 338 | 13 | } else { | 339 | 13 | bits.push_back(data); | 340 | 13 | } | 341 | 13 | } | 342 | | //input is valid value | 343 | 3 | if (!null_map[i]) { | 344 | 3 | res.emplace_back(bits); | 345 | 3 | } | 346 | 3 | bits.clear(); | 347 | 3 | } | 348 | 3 | return Status::OK(); | 349 | 3 | } |
Unexecuted instantiation: _ZN5doris15BitmapFromArray6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE2EEEEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_7IColumnERKNS6_IhLm4096ES9_Lm16ELm15EEERSt6vectorINS_11BitmapValueESaISK_EERSG_ Unexecuted instantiation: _ZN5doris15BitmapFromArray6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE4EEEEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_7IColumnERKNS6_IhLm4096ES9_Lm16ELm15EEERSt6vectorINS_11BitmapValueESaISK_EERSG_ _ZN5doris15BitmapFromArray6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE5EEEEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_7IColumnERKNS6_IhLm4096ES9_Lm16ELm15EEERSt6vectorINS_11BitmapValueESaISK_EERSG_ Line | Count | Source | 323 | 3 | std::vector<BitmapValue>& res, NullMap& null_map) { | 324 | 3 | const auto& nested_column_data = static_cast<const ColumnType&>(nested_column).get_data(); | 325 | 3 | auto size = offset_column_data.size(); | 326 | 3 | res.reserve(size); | 327 | 3 | std::vector<uint64_t> bits; | 328 | 14 | for (size_t i = 0; i < size; ++i) { | 329 | 11 | auto curr_offset = offset_column_data[i]; | 330 | 11 | auto prev_offset = offset_column_data[i - 1]; | 331 | 56 | for (auto j = prev_offset; j < curr_offset; ++j) { | 332 | 47 | auto data = nested_column_data[j]; | 333 | | // invaild value | 334 | 47 | if (UNLIKELY(data < 0) || UNLIKELY(nested_null_map[j])) { | 335 | 2 | res.emplace_back(); | 336 | 2 | null_map[i] = 1; | 337 | 2 | break; | 338 | 45 | } else { | 339 | 45 | bits.push_back(data); | 340 | 45 | } | 341 | 47 | } | 342 | | //input is valid value | 343 | 11 | if (!null_map[i]) { | 344 | 9 | res.emplace_back(bits); | 345 | 9 | } | 346 | 11 | bits.clear(); | 347 | 11 | } | 348 | 3 | return Status::OK(); | 349 | 3 | } |
_ZN5doris15BitmapFromArray6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_7IColumnERKNS6_IhLm4096ES9_Lm16ELm15EEERSt6vectorINS_11BitmapValueESaISK_EERSG_ Line | Count | Source | 323 | 7 | std::vector<BitmapValue>& res, NullMap& null_map) { | 324 | 7 | const auto& nested_column_data = static_cast<const ColumnType&>(nested_column).get_data(); | 325 | 7 | auto size = offset_column_data.size(); | 326 | 7 | res.reserve(size); | 327 | 7 | std::vector<uint64_t> bits; | 328 | 16 | for (size_t i = 0; i < size; ++i) { | 329 | 9 | auto curr_offset = offset_column_data[i]; | 330 | 9 | auto prev_offset = offset_column_data[i - 1]; | 331 | 20 | for (auto j = prev_offset; j < curr_offset; ++j) { | 332 | 11 | auto data = nested_column_data[j]; | 333 | | // invaild value | 334 | 11 | if (UNLIKELY(data < 0) || UNLIKELY(nested_null_map[j])) { | 335 | 0 | res.emplace_back(); | 336 | 0 | null_map[i] = 1; | 337 | 0 | break; | 338 | 11 | } else { | 339 | 11 | bits.push_back(data); | 340 | 11 | } | 341 | 11 | } | 342 | | //input is valid value | 343 | 9 | if (!null_map[i]) { | 344 | 9 | res.emplace_back(bits); | 345 | 9 | } | 346 | 9 | bits.clear(); | 347 | 9 | } | 348 | 7 | return Status::OK(); | 349 | 7 | } |
|
350 | | }; |
351 | | |
352 | | template <typename Impl> |
353 | | class FunctionBitmapAlwaysNull : public IFunction { |
354 | | public: |
355 | | static constexpr auto name = Impl::name; |
356 | | |
357 | 3 | String get_name() const override { return name; }_ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromStringEE8get_nameB5cxx11Ev Line | Count | Source | 357 | 1 | String get_name() const override { return name; } |
_ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromBase64EE8get_nameB5cxx11Ev Line | Count | Source | 357 | 1 | String get_name() const override { return name; } |
_ZNK5doris24FunctionBitmapAlwaysNullINS_15BitmapFromArrayEE8get_nameB5cxx11Ev Line | Count | Source | 357 | 1 | String get_name() const override { return name; } |
|
358 | | |
359 | 440 | static FunctionPtr create() { return std::make_shared<FunctionBitmapAlwaysNull>(); }_ZN5doris24FunctionBitmapAlwaysNullINS_16BitmapFromStringEE6createEv Line | Count | Source | 359 | 392 | static FunctionPtr create() { return std::make_shared<FunctionBitmapAlwaysNull>(); } |
_ZN5doris24FunctionBitmapAlwaysNullINS_16BitmapFromBase64EE6createEv Line | Count | Source | 359 | 26 | static FunctionPtr create() { return std::make_shared<FunctionBitmapAlwaysNull>(); } |
_ZN5doris24FunctionBitmapAlwaysNullINS_15BitmapFromArrayEE6createEv Line | Count | Source | 359 | 22 | static FunctionPtr create() { return std::make_shared<FunctionBitmapAlwaysNull>(); } |
|
360 | | |
361 | 413 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
362 | 413 | return make_nullable(std::make_shared<DataTypeBitMap>()); |
363 | 413 | } _ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromStringEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 361 | 383 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 362 | 383 | return make_nullable(std::make_shared<DataTypeBitMap>()); | 363 | 383 | } |
_ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromBase64EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 361 | 17 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 362 | 17 | return make_nullable(std::make_shared<DataTypeBitMap>()); | 363 | 17 | } |
_ZNK5doris24FunctionBitmapAlwaysNullINS_15BitmapFromArrayEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 361 | 13 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 362 | 13 | return make_nullable(std::make_shared<DataTypeBitMap>()); | 363 | 13 | } |
|
364 | | |
365 | 413 | size_t get_number_of_arguments() const override { return 1; }_ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromStringEE23get_number_of_argumentsEv Line | Count | Source | 365 | 383 | size_t get_number_of_arguments() const override { return 1; } |
_ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromBase64EE23get_number_of_argumentsEv Line | Count | Source | 365 | 17 | size_t get_number_of_arguments() const override { return 1; } |
_ZNK5doris24FunctionBitmapAlwaysNullINS_15BitmapFromArrayEE23get_number_of_argumentsEv Line | Count | Source | 365 | 13 | size_t get_number_of_arguments() const override { return 1; } |
|
366 | | |
367 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
368 | 430 | uint32_t result, size_t input_rows_count) const override { |
369 | 430 | auto res_null_map = ColumnUInt8::create(input_rows_count, 0); |
370 | 430 | auto res_data_column = ColumnBitmap::create(); |
371 | 430 | auto& null_map = res_null_map->get_data(); |
372 | 430 | auto& res = res_data_column->get_data(); |
373 | | |
374 | 430 | ColumnPtr& argument_column = block.get_by_position(arguments[0]).column; |
375 | 430 | if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeString>) { |
376 | 417 | const auto& str_column = static_cast<const ColumnString&>(*argument_column); |
377 | 417 | const ColumnString::Chars& data = str_column.get_chars(); |
378 | 417 | const ColumnString::Offsets& offsets = str_column.get_offsets(); |
379 | 417 | RETURN_IF_ERROR(Impl::vector(data, offsets, res, null_map, input_rows_count)); |
380 | 417 | } else if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeArray>) { |
381 | 13 | auto argument_type = remove_nullable( |
382 | 13 | assert_cast<const DataTypeArray&>(*block.get_by_position(arguments[0]).type) |
383 | 13 | .get_nested_type()); |
384 | 13 | const auto& array_column = static_cast<const ColumnArray&>(*argument_column); |
385 | 13 | const auto& offset_column_data = array_column.get_offsets(); |
386 | 13 | const auto& nested_nullable_column = |
387 | 13 | static_cast<const ColumnNullable&>(array_column.get_data()); |
388 | 13 | const auto& nested_column = nested_nullable_column.get_nested_column(); |
389 | 13 | const auto& nested_null_map = nested_nullable_column.get_null_map_column().get_data(); |
390 | | |
391 | 13 | switch (argument_type->get_primitive_type()) { |
392 | 3 | case PrimitiveType::TYPE_TINYINT: |
393 | 3 | RETURN_IF_ERROR(Impl::template vector<ColumnInt8>(offset_column_data, nested_column, |
394 | 3 | nested_null_map, res, null_map)); |
395 | 3 | break; |
396 | 3 | case PrimitiveType::TYPE_BOOLEAN: |
397 | 0 | RETURN_IF_ERROR(Impl::template vector<ColumnUInt8>( |
398 | 0 | offset_column_data, nested_column, nested_null_map, res, null_map)); |
399 | 0 | break; |
400 | 0 | case PrimitiveType::TYPE_SMALLINT: |
401 | 0 | RETURN_IF_ERROR(Impl::template vector<ColumnInt16>( |
402 | 0 | offset_column_data, nested_column, nested_null_map, res, null_map)); |
403 | 0 | break; |
404 | 3 | case PrimitiveType::TYPE_INT: |
405 | 3 | RETURN_IF_ERROR(Impl::template vector<ColumnInt32>( |
406 | 3 | offset_column_data, nested_column, nested_null_map, res, null_map)); |
407 | 3 | break; |
408 | 7 | case PrimitiveType::TYPE_BIGINT: |
409 | 7 | RETURN_IF_ERROR(Impl::template vector<ColumnInt64>( |
410 | 7 | offset_column_data, nested_column, nested_null_map, res, null_map)); |
411 | 7 | break; |
412 | 7 | default: |
413 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", |
414 | 0 | block.get_by_position(arguments[0]).column->get_name(), |
415 | 0 | get_name()); |
416 | 13 | } |
417 | | } else { |
418 | | return Status::RuntimeError("Illegal column {} of argument of function {}", |
419 | | block.get_by_position(arguments[0]).column->get_name(), |
420 | | get_name()); |
421 | | } |
422 | 429 | block.get_by_position(result).column = |
423 | 430 | ColumnNullable::create(std::move(res_data_column), std::move(res_null_map)); |
424 | 430 | return Status::OK(); |
425 | 430 | } _ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromStringEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 368 | 399 | uint32_t result, size_t input_rows_count) const override { | 369 | 399 | auto res_null_map = ColumnUInt8::create(input_rows_count, 0); | 370 | 399 | auto res_data_column = ColumnBitmap::create(); | 371 | 399 | auto& null_map = res_null_map->get_data(); | 372 | 399 | auto& res = res_data_column->get_data(); | 373 | | | 374 | 399 | ColumnPtr& argument_column = block.get_by_position(arguments[0]).column; | 375 | 399 | if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeString>) { | 376 | 399 | const auto& str_column = static_cast<const ColumnString&>(*argument_column); | 377 | 399 | const ColumnString::Chars& data = str_column.get_chars(); | 378 | 399 | const ColumnString::Offsets& offsets = str_column.get_offsets(); | 379 | 399 | RETURN_IF_ERROR(Impl::vector(data, offsets, res, null_map, input_rows_count)); | 380 | | } else if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeArray>) { | 381 | | auto argument_type = remove_nullable( | 382 | | assert_cast<const DataTypeArray&>(*block.get_by_position(arguments[0]).type) | 383 | | .get_nested_type()); | 384 | | const auto& array_column = static_cast<const ColumnArray&>(*argument_column); | 385 | | const auto& offset_column_data = array_column.get_offsets(); | 386 | | const auto& nested_nullable_column = | 387 | | static_cast<const ColumnNullable&>(array_column.get_data()); | 388 | | const auto& nested_column = nested_nullable_column.get_nested_column(); | 389 | | const auto& nested_null_map = nested_nullable_column.get_null_map_column().get_data(); | 390 | | | 391 | | switch (argument_type->get_primitive_type()) { | 392 | | case PrimitiveType::TYPE_TINYINT: | 393 | | RETURN_IF_ERROR(Impl::template vector<ColumnInt8>(offset_column_data, nested_column, | 394 | | nested_null_map, res, null_map)); | 395 | | break; | 396 | | case PrimitiveType::TYPE_BOOLEAN: | 397 | | RETURN_IF_ERROR(Impl::template vector<ColumnUInt8>( | 398 | | offset_column_data, nested_column, nested_null_map, res, null_map)); | 399 | | break; | 400 | | case PrimitiveType::TYPE_SMALLINT: | 401 | | RETURN_IF_ERROR(Impl::template vector<ColumnInt16>( | 402 | | offset_column_data, nested_column, nested_null_map, res, null_map)); | 403 | | break; | 404 | | case PrimitiveType::TYPE_INT: | 405 | | RETURN_IF_ERROR(Impl::template vector<ColumnInt32>( | 406 | | offset_column_data, nested_column, nested_null_map, res, null_map)); | 407 | | break; | 408 | | case PrimitiveType::TYPE_BIGINT: | 409 | | RETURN_IF_ERROR(Impl::template vector<ColumnInt64>( | 410 | | offset_column_data, nested_column, nested_null_map, res, null_map)); | 411 | | break; | 412 | | default: | 413 | | return Status::RuntimeError("Illegal column {} of argument of function {}", | 414 | | block.get_by_position(arguments[0]).column->get_name(), | 415 | | get_name()); | 416 | | } | 417 | | } else { | 418 | | return Status::RuntimeError("Illegal column {} of argument of function {}", | 419 | | block.get_by_position(arguments[0]).column->get_name(), | 420 | | get_name()); | 421 | | } | 422 | 399 | block.get_by_position(result).column = | 423 | 399 | ColumnNullable::create(std::move(res_data_column), std::move(res_null_map)); | 424 | 399 | return Status::OK(); | 425 | 399 | } |
_ZNK5doris24FunctionBitmapAlwaysNullINS_16BitmapFromBase64EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 368 | 18 | uint32_t result, size_t input_rows_count) const override { | 369 | 18 | auto res_null_map = ColumnUInt8::create(input_rows_count, 0); | 370 | 18 | auto res_data_column = ColumnBitmap::create(); | 371 | 18 | auto& null_map = res_null_map->get_data(); | 372 | 18 | auto& res = res_data_column->get_data(); | 373 | | | 374 | 18 | ColumnPtr& argument_column = block.get_by_position(arguments[0]).column; | 375 | 18 | if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeString>) { | 376 | 18 | const auto& str_column = static_cast<const ColumnString&>(*argument_column); | 377 | 18 | const ColumnString::Chars& data = str_column.get_chars(); | 378 | 18 | const ColumnString::Offsets& offsets = str_column.get_offsets(); | 379 | 18 | RETURN_IF_ERROR(Impl::vector(data, offsets, res, null_map, input_rows_count)); | 380 | | } else if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeArray>) { | 381 | | auto argument_type = remove_nullable( | 382 | | assert_cast<const DataTypeArray&>(*block.get_by_position(arguments[0]).type) | 383 | | .get_nested_type()); | 384 | | const auto& array_column = static_cast<const ColumnArray&>(*argument_column); | 385 | | const auto& offset_column_data = array_column.get_offsets(); | 386 | | const auto& nested_nullable_column = | 387 | | static_cast<const ColumnNullable&>(array_column.get_data()); | 388 | | const auto& nested_column = nested_nullable_column.get_nested_column(); | 389 | | const auto& nested_null_map = nested_nullable_column.get_null_map_column().get_data(); | 390 | | | 391 | | switch (argument_type->get_primitive_type()) { | 392 | | case PrimitiveType::TYPE_TINYINT: | 393 | | RETURN_IF_ERROR(Impl::template vector<ColumnInt8>(offset_column_data, nested_column, | 394 | | nested_null_map, res, null_map)); | 395 | | break; | 396 | | case PrimitiveType::TYPE_BOOLEAN: | 397 | | RETURN_IF_ERROR(Impl::template vector<ColumnUInt8>( | 398 | | offset_column_data, nested_column, nested_null_map, res, null_map)); | 399 | | break; | 400 | | case PrimitiveType::TYPE_SMALLINT: | 401 | | RETURN_IF_ERROR(Impl::template vector<ColumnInt16>( | 402 | | offset_column_data, nested_column, nested_null_map, res, null_map)); | 403 | | break; | 404 | | case PrimitiveType::TYPE_INT: | 405 | | RETURN_IF_ERROR(Impl::template vector<ColumnInt32>( | 406 | | offset_column_data, nested_column, nested_null_map, res, null_map)); | 407 | | break; | 408 | | case PrimitiveType::TYPE_BIGINT: | 409 | | RETURN_IF_ERROR(Impl::template vector<ColumnInt64>( | 410 | | offset_column_data, nested_column, nested_null_map, res, null_map)); | 411 | | break; | 412 | | default: | 413 | | return Status::RuntimeError("Illegal column {} of argument of function {}", | 414 | | block.get_by_position(arguments[0]).column->get_name(), | 415 | | get_name()); | 416 | | } | 417 | | } else { | 418 | | return Status::RuntimeError("Illegal column {} of argument of function {}", | 419 | | block.get_by_position(arguments[0]).column->get_name(), | 420 | | get_name()); | 421 | | } | 422 | 17 | block.get_by_position(result).column = | 423 | 18 | ColumnNullable::create(std::move(res_data_column), std::move(res_null_map)); | 424 | 18 | return Status::OK(); | 425 | 18 | } |
_ZNK5doris24FunctionBitmapAlwaysNullINS_15BitmapFromArrayEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 368 | 13 | uint32_t result, size_t input_rows_count) const override { | 369 | 13 | auto res_null_map = ColumnUInt8::create(input_rows_count, 0); | 370 | 13 | auto res_data_column = ColumnBitmap::create(); | 371 | 13 | auto& null_map = res_null_map->get_data(); | 372 | 13 | auto& res = res_data_column->get_data(); | 373 | | | 374 | 13 | ColumnPtr& argument_column = block.get_by_position(arguments[0]).column; | 375 | | if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeString>) { | 376 | | const auto& str_column = static_cast<const ColumnString&>(*argument_column); | 377 | | const ColumnString::Chars& data = str_column.get_chars(); | 378 | | const ColumnString::Offsets& offsets = str_column.get_offsets(); | 379 | | RETURN_IF_ERROR(Impl::vector(data, offsets, res, null_map, input_rows_count)); | 380 | 13 | } else if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeArray>) { | 381 | 13 | auto argument_type = remove_nullable( | 382 | 13 | assert_cast<const DataTypeArray&>(*block.get_by_position(arguments[0]).type) | 383 | 13 | .get_nested_type()); | 384 | 13 | const auto& array_column = static_cast<const ColumnArray&>(*argument_column); | 385 | 13 | const auto& offset_column_data = array_column.get_offsets(); | 386 | 13 | const auto& nested_nullable_column = | 387 | 13 | static_cast<const ColumnNullable&>(array_column.get_data()); | 388 | 13 | const auto& nested_column = nested_nullable_column.get_nested_column(); | 389 | 13 | const auto& nested_null_map = nested_nullable_column.get_null_map_column().get_data(); | 390 | | | 391 | 13 | switch (argument_type->get_primitive_type()) { | 392 | 3 | case PrimitiveType::TYPE_TINYINT: | 393 | 3 | RETURN_IF_ERROR(Impl::template vector<ColumnInt8>(offset_column_data, nested_column, | 394 | 3 | nested_null_map, res, null_map)); | 395 | 3 | break; | 396 | 3 | case PrimitiveType::TYPE_BOOLEAN: | 397 | 0 | RETURN_IF_ERROR(Impl::template vector<ColumnUInt8>( | 398 | 0 | offset_column_data, nested_column, nested_null_map, res, null_map)); | 399 | 0 | break; | 400 | 0 | case PrimitiveType::TYPE_SMALLINT: | 401 | 0 | RETURN_IF_ERROR(Impl::template vector<ColumnInt16>( | 402 | 0 | offset_column_data, nested_column, nested_null_map, res, null_map)); | 403 | 0 | break; | 404 | 3 | case PrimitiveType::TYPE_INT: | 405 | 3 | RETURN_IF_ERROR(Impl::template vector<ColumnInt32>( | 406 | 3 | offset_column_data, nested_column, nested_null_map, res, null_map)); | 407 | 3 | break; | 408 | 7 | case PrimitiveType::TYPE_BIGINT: | 409 | 7 | RETURN_IF_ERROR(Impl::template vector<ColumnInt64>( | 410 | 7 | offset_column_data, nested_column, nested_null_map, res, null_map)); | 411 | 7 | break; | 412 | 7 | default: | 413 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", | 414 | 0 | block.get_by_position(arguments[0]).column->get_name(), | 415 | 0 | get_name()); | 416 | 13 | } | 417 | | } else { | 418 | | return Status::RuntimeError("Illegal column {} of argument of function {}", | 419 | | block.get_by_position(arguments[0]).column->get_name(), | 420 | | get_name()); | 421 | | } | 422 | 13 | block.get_by_position(result).column = | 423 | 13 | ColumnNullable::create(std::move(res_data_column), std::move(res_null_map)); | 424 | 13 | return Status::OK(); | 425 | 13 | } |
|
426 | | }; |
427 | | |
428 | | template <int HashBits> |
429 | | struct BitmapHashName {}; |
430 | | |
431 | | template <> |
432 | | struct BitmapHashName<32> { |
433 | | static constexpr auto name = "bitmap_hash"; |
434 | | }; |
435 | | |
436 | | template <> |
437 | | struct BitmapHashName<64> { |
438 | | static constexpr auto name = "bitmap_hash64"; |
439 | | }; |
440 | | |
441 | | template <int HashBits> |
442 | | struct BitmapHash { |
443 | | static constexpr auto name = BitmapHashName<HashBits>::name; |
444 | | |
445 | | using ReturnType = DataTypeBitMap; |
446 | | |
447 | | template <typename ColumnType> |
448 | 151 | static void vector(const ColumnType* col, MutableColumnPtr& col_res) { |
449 | 151 | if constexpr (std::is_same_v<ColumnType, ColumnString>) { |
450 | 151 | const ColumnString::Chars& data = col->get_chars(); |
451 | 151 | const ColumnString::Offsets& offsets = col->get_offsets(); |
452 | 151 | auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); |
453 | 151 | auto& res_data = res_column->get_data(); |
454 | 151 | size_t size = offsets.size(); |
455 | | |
456 | 462 | for (size_t i = 0; i < size; ++i) { |
457 | 311 | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
458 | 311 | size_t str_size = offsets[i] - offsets[i - 1]; |
459 | 311 | if constexpr (HashBits == 32) { |
460 | 277 | uint32_t hash_value = |
461 | 277 | HashUtil::murmur_hash3_32(raw_str, str_size, HashUtil::MURMUR3_32_SEED); |
462 | 277 | res_data[i].add(hash_value); |
463 | 277 | } else { |
464 | 34 | uint64_t hash_value = 0; |
465 | 34 | murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value); |
466 | 34 | res_data[i].add(hash_value); |
467 | 34 | } |
468 | 311 | } |
469 | 151 | } |
470 | 151 | } _ZN5doris10BitmapHashILi32EE6vectorINS_9ColumnStrIjEEEEvPKT_RNS_3COWINS_7IColumnEE11mutable_ptrIS9_EE Line | Count | Source | 448 | 133 | static void vector(const ColumnType* col, MutableColumnPtr& col_res) { | 449 | 133 | if constexpr (std::is_same_v<ColumnType, ColumnString>) { | 450 | 133 | const ColumnString::Chars& data = col->get_chars(); | 451 | 133 | const ColumnString::Offsets& offsets = col->get_offsets(); | 452 | 133 | auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); | 453 | 133 | auto& res_data = res_column->get_data(); | 454 | 133 | size_t size = offsets.size(); | 455 | | | 456 | 410 | for (size_t i = 0; i < size; ++i) { | 457 | 277 | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); | 458 | 277 | size_t str_size = offsets[i] - offsets[i - 1]; | 459 | 277 | if constexpr (HashBits == 32) { | 460 | 277 | uint32_t hash_value = | 461 | 277 | HashUtil::murmur_hash3_32(raw_str, str_size, HashUtil::MURMUR3_32_SEED); | 462 | 277 | res_data[i].add(hash_value); | 463 | | } else { | 464 | | uint64_t hash_value = 0; | 465 | | murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value); | 466 | | res_data[i].add(hash_value); | 467 | | } | 468 | 277 | } | 469 | 133 | } | 470 | 133 | } |
Unexecuted instantiation: _ZN5doris10BitmapHashILi32EE6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEEvPKT_RNS_3COWINS_7IColumnEE11mutable_ptrISA_EE _ZN5doris10BitmapHashILi64EE6vectorINS_9ColumnStrIjEEEEvPKT_RNS_3COWINS_7IColumnEE11mutable_ptrIS9_EE Line | Count | Source | 448 | 18 | static void vector(const ColumnType* col, MutableColumnPtr& col_res) { | 449 | 18 | if constexpr (std::is_same_v<ColumnType, ColumnString>) { | 450 | 18 | const ColumnString::Chars& data = col->get_chars(); | 451 | 18 | const ColumnString::Offsets& offsets = col->get_offsets(); | 452 | 18 | auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); | 453 | 18 | auto& res_data = res_column->get_data(); | 454 | 18 | size_t size = offsets.size(); | 455 | | | 456 | 52 | for (size_t i = 0; i < size; ++i) { | 457 | 34 | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); | 458 | 34 | size_t str_size = offsets[i] - offsets[i - 1]; | 459 | | if constexpr (HashBits == 32) { | 460 | | uint32_t hash_value = | 461 | | HashUtil::murmur_hash3_32(raw_str, str_size, HashUtil::MURMUR3_32_SEED); | 462 | | res_data[i].add(hash_value); | 463 | 34 | } else { | 464 | 34 | uint64_t hash_value = 0; | 465 | 34 | murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value); | 466 | 34 | res_data[i].add(hash_value); | 467 | 34 | } | 468 | 34 | } | 469 | 18 | } | 470 | 18 | } |
Unexecuted instantiation: _ZN5doris10BitmapHashILi64EE6vectorINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEEvPKT_RNS_3COWINS_7IColumnEE11mutable_ptrISA_EE |
471 | | |
472 | | template <typename ColumnType> |
473 | | static void vector_nullable(const ColumnType* col, const NullMap& nullmap, |
474 | 285 | MutableColumnPtr& col_res) { |
475 | 285 | if constexpr (std::is_same_v<ColumnType, ColumnString>) { |
476 | 285 | const ColumnString::Chars& data = col->get_chars(); |
477 | 285 | const ColumnString::Offsets& offsets = col->get_offsets(); |
478 | 285 | auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); |
479 | 285 | auto& res_data = res_column->get_data(); |
480 | 285 | size_t size = offsets.size(); |
481 | | |
482 | 1.09k | for (size_t i = 0; i < size; ++i) { |
483 | 806 | if (nullmap[i]) { |
484 | 59 | continue; |
485 | 747 | } else { |
486 | 747 | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
487 | 747 | size_t str_size = offsets[i] - offsets[i - 1]; |
488 | 747 | if constexpr (HashBits == 32) { |
489 | 721 | uint32_t hash_value = HashUtil::murmur_hash3_32(raw_str, str_size, |
490 | 721 | HashUtil::MURMUR3_32_SEED); |
491 | 721 | res_data[i].add(hash_value); |
492 | 721 | } else { |
493 | 26 | uint64_t hash_value = 0; |
494 | 26 | murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value); |
495 | 26 | res_data[i].add(hash_value); |
496 | 26 | } |
497 | 747 | } |
498 | 806 | } |
499 | 285 | } |
500 | 285 | } _ZN5doris10BitmapHashILi32EE15vector_nullableINS_9ColumnStrIjEEEEvPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE Line | Count | Source | 474 | 275 | MutableColumnPtr& col_res) { | 475 | 275 | if constexpr (std::is_same_v<ColumnType, ColumnString>) { | 476 | 275 | const ColumnString::Chars& data = col->get_chars(); | 477 | 275 | const ColumnString::Offsets& offsets = col->get_offsets(); | 478 | 275 | auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); | 479 | 275 | auto& res_data = res_column->get_data(); | 480 | 275 | size_t size = offsets.size(); | 481 | | | 482 | 1.05k | for (size_t i = 0; i < size; ++i) { | 483 | 778 | if (nullmap[i]) { | 484 | 57 | continue; | 485 | 721 | } else { | 486 | 721 | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); | 487 | 721 | size_t str_size = offsets[i] - offsets[i - 1]; | 488 | 721 | if constexpr (HashBits == 32) { | 489 | 721 | uint32_t hash_value = HashUtil::murmur_hash3_32(raw_str, str_size, | 490 | 721 | HashUtil::MURMUR3_32_SEED); | 491 | 721 | res_data[i].add(hash_value); | 492 | | } else { | 493 | | uint64_t hash_value = 0; | 494 | | murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value); | 495 | | res_data[i].add(hash_value); | 496 | | } | 497 | 721 | } | 498 | 778 | } | 499 | 275 | } | 500 | 275 | } |
Unexecuted instantiation: _ZN5doris10BitmapHashILi32EE15vector_nullableINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEEvPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISH_EE _ZN5doris10BitmapHashILi64EE15vector_nullableINS_9ColumnStrIjEEEEvPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISG_EE Line | Count | Source | 474 | 10 | MutableColumnPtr& col_res) { | 475 | 10 | if constexpr (std::is_same_v<ColumnType, ColumnString>) { | 476 | 10 | const ColumnString::Chars& data = col->get_chars(); | 477 | 10 | const ColumnString::Offsets& offsets = col->get_offsets(); | 478 | 10 | auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); | 479 | 10 | auto& res_data = res_column->get_data(); | 480 | 10 | size_t size = offsets.size(); | 481 | | | 482 | 38 | for (size_t i = 0; i < size; ++i) { | 483 | 28 | if (nullmap[i]) { | 484 | 2 | continue; | 485 | 26 | } else { | 486 | 26 | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); | 487 | 26 | size_t str_size = offsets[i] - offsets[i - 1]; | 488 | | if constexpr (HashBits == 32) { | 489 | | uint32_t hash_value = HashUtil::murmur_hash3_32(raw_str, str_size, | 490 | | HashUtil::MURMUR3_32_SEED); | 491 | | res_data[i].add(hash_value); | 492 | 26 | } else { | 493 | 26 | uint64_t hash_value = 0; | 494 | 26 | murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value); | 495 | 26 | res_data[i].add(hash_value); | 496 | 26 | } | 497 | 26 | } | 498 | 28 | } | 499 | 10 | } | 500 | 10 | } |
Unexecuted instantiation: _ZN5doris10BitmapHashILi64EE15vector_nullableINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEEvPKT_RKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS_3COWINS_7IColumnEE11mutable_ptrISH_EE |
501 | | }; |
502 | | |
503 | | class FunctionBitmapCount : public IFunction { |
504 | | public: |
505 | | static constexpr auto name = "bitmap_count"; |
506 | | |
507 | 1 | String get_name() const override { return name; } |
508 | | |
509 | 108 | static FunctionPtr create() { return std::make_shared<FunctionBitmapCount>(); } |
510 | | |
511 | 99 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
512 | 99 | return std::make_shared<DataTypeInt64>(); |
513 | 99 | } |
514 | | |
515 | 99 | size_t get_number_of_arguments() const override { return 1; } |
516 | | |
517 | 249 | bool use_default_implementation_for_nulls() const override { return false; } |
518 | | |
519 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
520 | 150 | uint32_t result, size_t input_rows_count) const override { |
521 | 150 | auto res_data_column = ColumnInt64::create(); |
522 | 150 | auto& res = res_data_column->get_data(); |
523 | 150 | auto data_null_map = ColumnUInt8::create(input_rows_count, 0); |
524 | 150 | auto& null_map = data_null_map->get_data(); |
525 | | |
526 | 150 | auto column = block.get_by_position(arguments[0]).column; |
527 | 150 | if (auto* nullable = check_and_get_column<const ColumnNullable>(*column)) { |
528 | 31 | VectorizedUtils::update_null_map(null_map, nullable->get_null_map_data()); |
529 | 31 | column = nullable->get_nested_column_ptr(); |
530 | 31 | } |
531 | 150 | auto str_col = assert_cast<const ColumnBitmap*>(column.get()); |
532 | 150 | const auto& col_data = str_col->get_data(); |
533 | | |
534 | 150 | res.reserve(input_rows_count); |
535 | 389 | for (size_t i = 0; i < input_rows_count; ++i) { |
536 | 239 | if (null_map[i]) { |
537 | 6 | res.push_back(0); |
538 | 6 | continue; |
539 | 6 | } |
540 | 233 | res.push_back(col_data[i].cardinality()); |
541 | 233 | } |
542 | 150 | block.replace_by_position(result, std::move(res_data_column)); |
543 | 150 | return Status::OK(); |
544 | 150 | } |
545 | | }; |
546 | | |
547 | | struct NameBitmapNot { |
548 | | static constexpr auto name = "bitmap_not"; |
549 | | }; |
550 | | |
551 | | template <typename LeftDataType, typename RightDataType> |
552 | | struct BitmapNot { |
553 | | using ResultDataType = DataTypeBitMap; |
554 | | using T0 = typename LeftDataType::FieldType; |
555 | | using T1 = typename RightDataType::FieldType; |
556 | | using TData = std::vector<BitmapValue>; |
557 | | |
558 | 12 | static void vector_vector(const TData& lvec, const TData& rvec, TData& res) { |
559 | 12 | size_t size = lvec.size(); |
560 | 41 | for (size_t i = 0; i < size; ++i) { |
561 | 29 | res[i] = lvec[i]; |
562 | 29 | res[i] -= rvec[i]; |
563 | 29 | } |
564 | 12 | } |
565 | 0 | static void vector_scalar(const TData& lvec, const BitmapValue& rval, TData& res) { |
566 | 0 | size_t size = lvec.size(); |
567 | 0 | for (size_t i = 0; i < size; ++i) { |
568 | 0 | res[i] = lvec[i]; |
569 | 0 | res[i] -= rval; |
570 | 0 | } |
571 | 0 | } |
572 | 0 | static void scalar_vector(const BitmapValue& lval, const TData& rvec, TData& res) { |
573 | 0 | size_t size = rvec.size(); |
574 | 0 | for (size_t i = 0; i < size; ++i) { |
575 | 0 | res[i] = lval; |
576 | 0 | res[i] -= rvec[i]; |
577 | 0 | } |
578 | 0 | } |
579 | | }; |
580 | | |
581 | | struct NameBitmapAndNot { |
582 | | static constexpr auto name = "bitmap_and_not"; |
583 | | }; |
584 | | |
585 | | template <typename LeftDataType, typename RightDataType> |
586 | | struct BitmapAndNot { |
587 | | using ResultDataType = DataTypeBitMap; |
588 | | using T0 = typename LeftDataType::FieldType; |
589 | | using T1 = typename RightDataType::FieldType; |
590 | | using TData = std::vector<BitmapValue>; |
591 | | |
592 | 19 | static void vector_vector(const TData& lvec, const TData& rvec, TData& res) { |
593 | 19 | size_t size = lvec.size(); |
594 | 19 | BitmapValue mid_data; |
595 | 75 | for (size_t i = 0; i < size; ++i) { |
596 | 56 | mid_data = lvec[i]; |
597 | 56 | mid_data &= rvec[i]; |
598 | 56 | res[i] = lvec[i]; |
599 | 56 | res[i] -= mid_data; |
600 | 56 | mid_data.reset(); |
601 | 56 | } |
602 | 19 | } |
603 | 0 | static void vector_scalar(const TData& lvec, const BitmapValue& rval, TData& res) { |
604 | 0 | size_t size = lvec.size(); |
605 | 0 | BitmapValue mid_data; |
606 | 0 | for (size_t i = 0; i < size; ++i) { |
607 | 0 | mid_data = lvec[i]; |
608 | 0 | mid_data &= rval; |
609 | 0 | res[i] = lvec[i]; |
610 | 0 | res[i] -= mid_data; |
611 | 0 | mid_data.reset(); |
612 | 0 | } |
613 | 0 | } |
614 | 0 | static void scalar_vector(const BitmapValue& lval, const TData& rvec, TData& res) { |
615 | 0 | size_t size = rvec.size(); |
616 | 0 | BitmapValue mid_data; |
617 | 0 | for (size_t i = 0; i < size; ++i) { |
618 | 0 | mid_data = lval; |
619 | 0 | mid_data &= rvec[i]; |
620 | 0 | res[i] = lval; |
621 | 0 | res[i] -= mid_data; |
622 | 0 | mid_data.reset(); |
623 | 0 | } |
624 | 0 | } |
625 | | }; |
626 | | |
627 | | struct NameBitmapAndNotCount { |
628 | | static constexpr auto name = "bitmap_and_not_count"; |
629 | | }; |
630 | | |
631 | | template <typename LeftDataType, typename RightDataType> |
632 | | struct BitmapAndNotCount { |
633 | | using ResultDataType = DataTypeInt64; |
634 | | using T0 = typename LeftDataType::FieldType; |
635 | | using T1 = typename RightDataType::FieldType; |
636 | | using TData = std::vector<BitmapValue>; |
637 | | using ResTData = typename ColumnInt64::Container::value_type; |
638 | | |
639 | 38 | static void vector_vector(const TData& lvec, const TData& rvec, ResTData* res) { |
640 | 38 | size_t size = lvec.size(); |
641 | 38 | BitmapValue mid_data; |
642 | 132 | for (size_t i = 0; i < size; ++i) { |
643 | 94 | mid_data = lvec[i]; |
644 | 94 | mid_data &= rvec[i]; |
645 | 94 | res[i] = lvec[i].andnot_cardinality(mid_data); |
646 | 94 | mid_data.reset(); |
647 | 94 | } |
648 | 38 | } |
649 | 0 | static void scalar_vector(const BitmapValue& lval, const TData& rvec, ResTData* res) { |
650 | 0 | size_t size = rvec.size(); |
651 | 0 | BitmapValue mid_data; |
652 | 0 | for (size_t i = 0; i < size; ++i) { |
653 | 0 | mid_data = lval; |
654 | 0 | mid_data &= rvec[i]; |
655 | 0 | res[i] = lval.andnot_cardinality(mid_data); |
656 | 0 | mid_data.reset(); |
657 | 0 | } |
658 | 0 | } |
659 | 0 | static void vector_scalar(const TData& lvec, const BitmapValue& rval, ResTData* res) { |
660 | 0 | size_t size = lvec.size(); |
661 | 0 | BitmapValue mid_data; |
662 | 0 | for (size_t i = 0; i < size; ++i) { |
663 | 0 | mid_data = lvec[i]; |
664 | 0 | mid_data &= rval; |
665 | 0 | res[i] = lvec[i].andnot_cardinality(mid_data); |
666 | 0 | mid_data.reset(); |
667 | 0 | } |
668 | 0 | } |
669 | | }; |
670 | | |
671 | 110 | void update_bitmap_op_count(int64_t* __restrict count, const NullMap& null_map) { |
672 | 110 | static constexpr int64_t flags[2] = {-1, 0}; |
673 | 110 | size_t size = null_map.size(); |
674 | 110 | auto* __restrict null_map_data = null_map.data(); |
675 | 284 | for (size_t i = 0; i < size; ++i) { |
676 | 174 | count[i] &= flags[null_map_data[i]]; |
677 | 174 | } |
678 | 110 | } |
679 | | |
680 | | // for bitmap_and_count, bitmap_xor_count and bitmap_and_not_count, |
681 | | // result is 0 for rows that if any column is null value |
682 | | ColumnPtr handle_bitmap_op_count_null_value(ColumnPtr& src, const Block& block, |
683 | | const ColumnNumbers& args, uint32_t result, |
684 | 60 | size_t input_rows_count) { |
685 | 60 | auto* nullable = assert_cast<const ColumnNullable*>(src.get()); |
686 | 60 | ColumnPtr src_not_nullable = nullable->get_nested_column_ptr(); |
687 | 60 | MutableColumnPtr src_not_nullable_mutable = (*std::move(src_not_nullable)).assume_mutable(); |
688 | 60 | auto* __restrict count_data = |
689 | 60 | assert_cast<ColumnInt64*>(src_not_nullable_mutable.get())->get_data().data(); |
690 | | |
691 | 142 | for (const auto& arg : args) { |
692 | 142 | const ColumnWithTypeAndName& elem = block.get_by_position(arg); |
693 | 142 | if (!elem.type->is_nullable()) { |
694 | 32 | continue; |
695 | 32 | } |
696 | | |
697 | 110 | bool is_const = is_column_const(*elem.column); |
698 | | /// Const Nullable that are NULL. |
699 | 110 | if (is_const && assert_cast<const ColumnConst*>(elem.column.get())->only_null()) { |
700 | 0 | return block.get_by_position(result).type->create_column_const( |
701 | 0 | input_rows_count, Field::create_field<TYPE_BIGINT>(0)); |
702 | 0 | } |
703 | 110 | if (is_const) { |
704 | 0 | continue; |
705 | 0 | } |
706 | | |
707 | 110 | if (const auto* nullable_column = assert_cast<const ColumnNullable*>(elem.column.get())) { |
708 | 110 | const ColumnPtr& null_map_column = nullable_column->get_null_map_column_ptr(); |
709 | 110 | const NullMap& src_null_map = |
710 | 110 | assert_cast<const ColumnUInt8&>(*null_map_column).get_data(); |
711 | | |
712 | 110 | update_bitmap_op_count(count_data, src_null_map); |
713 | 110 | } |
714 | 110 | } |
715 | | |
716 | 60 | return src; |
717 | 60 | } |
718 | | |
719 | | Status execute_bitmap_op_count_null_to_zero( |
720 | | FunctionContext* context, Block& block, const ColumnNumbers& arguments, uint32_t result, |
721 | | size_t input_rows_count, |
722 | | const std::function<Status(FunctionContext*, Block&, const ColumnNumbers&, size_t, size_t)>& |
723 | 110 | exec_impl_func) { |
724 | 110 | if (have_null_column(block, arguments)) { |
725 | 60 | auto [temporary_block, new_args, new_result] = |
726 | 60 | create_block_with_nested_columns(block, arguments, result); |
727 | 60 | RETURN_IF_ERROR(exec_impl_func(context, temporary_block, new_args, new_result, |
728 | 60 | temporary_block.rows())); |
729 | 60 | block.get_by_position(result).column = handle_bitmap_op_count_null_value( |
730 | 60 | temporary_block.get_by_position(new_result).column, block, arguments, result, |
731 | 60 | input_rows_count); |
732 | 60 | } else { |
733 | 50 | return exec_impl_func(context, block, arguments, result, input_rows_count); |
734 | 50 | } |
735 | 60 | return Status::OK(); |
736 | 110 | } |
737 | | |
738 | | template <typename FunctionName> |
739 | | class FunctionBitmapAndNotCount : public IFunction { |
740 | | public: |
741 | | using LeftDataType = DataTypeBitMap; |
742 | | using RightDataType = DataTypeBitMap; |
743 | | using ResultDataType = typename BitmapAndNotCount<LeftDataType, RightDataType>::ResultDataType; |
744 | | |
745 | | static constexpr auto name = FunctionName::name; |
746 | 30 | static FunctionPtr create() { return std::make_shared<FunctionBitmapAndNotCount>(); } |
747 | 1 | String get_name() const override { return name; } |
748 | 21 | size_t get_number_of_arguments() const override { return 2; } |
749 | 21 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
750 | 21 | bool return_nullable = false; |
751 | | // result is nullable only when any columns is nullable for bitmap_and_not_count |
752 | 46 | for (size_t i = 0; i < arguments.size(); ++i) { |
753 | 36 | if (arguments[i]->is_nullable()) { |
754 | 11 | return_nullable = true; |
755 | 11 | break; |
756 | 11 | } |
757 | 36 | } |
758 | 21 | auto result_type = std::make_shared<ResultDataType>(); |
759 | 21 | return return_nullable ? make_nullable(result_type) : result_type; |
760 | 21 | } |
761 | | |
762 | 59 | bool use_default_implementation_for_nulls() const override { |
763 | | // for bitmap_and_not_count, result is always not null, and if the bitmap op result is null, |
764 | | // the count is 0 |
765 | 59 | return false; |
766 | 59 | } |
767 | | |
768 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
769 | 38 | uint32_t result, size_t input_rows_count) const override { |
770 | 38 | DCHECK_EQ(arguments.size(), 2); |
771 | 38 | auto impl_func = [&](FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
772 | 38 | uint32_t result, size_t input_rows_count) { |
773 | 38 | return execute_impl_internal(context, block, arguments, result, input_rows_count); |
774 | 38 | }; |
775 | 38 | return execute_bitmap_op_count_null_to_zero(context, block, arguments, result, |
776 | 38 | input_rows_count, impl_func); |
777 | 38 | } |
778 | | |
779 | | Status execute_impl_internal(FunctionContext* context, Block& block, |
780 | | const ColumnNumbers& arguments, uint32_t result, |
781 | 38 | size_t input_rows_count) const { |
782 | 38 | using ColVecResult = ColumnVector<ResultDataType::PType>; |
783 | | |
784 | 38 | typename ColVecResult::MutablePtr col_res = ColVecResult::create(); |
785 | 38 | auto& vec_res = col_res->get_data(); |
786 | 38 | vec_res.resize(block.rows()); |
787 | | |
788 | 38 | const auto& left = block.get_by_position(arguments[0]); |
789 | 38 | auto lcol = left.column; |
790 | 38 | const auto& right = block.get_by_position(arguments[1]); |
791 | 38 | auto rcol = right.column; |
792 | | |
793 | 38 | if (is_column_const(*left.column)) { |
794 | 0 | BitmapAndNotCount<LeftDataType, RightDataType>::scalar_vector( |
795 | 0 | assert_cast<const ColumnBitmap&>( |
796 | 0 | assert_cast<const ColumnConst*>(lcol.get())->get_data_column()) |
797 | 0 | .get_data()[0], |
798 | 0 | assert_cast<const ColumnBitmap*>(rcol.get())->get_data(), vec_res.data()); |
799 | 38 | } else if (is_column_const(*right.column)) { |
800 | 0 | BitmapAndNotCount<LeftDataType, RightDataType>::vector_scalar( |
801 | 0 | assert_cast<const ColumnBitmap*>(lcol.get())->get_data(), |
802 | 0 | assert_cast<const ColumnBitmap&>( |
803 | 0 | assert_cast<const ColumnConst*>(rcol.get())->get_data_column()) |
804 | 0 | .get_data()[0], |
805 | 0 | vec_res.data()); |
806 | 38 | } else { |
807 | 38 | BitmapAndNotCount<LeftDataType, RightDataType>::vector_vector( |
808 | 38 | assert_cast<const ColumnBitmap*>(lcol.get())->get_data(), |
809 | 38 | assert_cast<const ColumnBitmap*>(rcol.get())->get_data(), vec_res.data()); |
810 | 38 | } |
811 | | |
812 | 38 | auto& result_info = block.get_by_position(result); |
813 | 38 | if (result_info.type->is_nullable()) { |
814 | 16 | block.replace_by_position( |
815 | 16 | result, ColumnNullable::create(std::move(col_res), |
816 | 16 | ColumnUInt8::create(input_rows_count, 0))); |
817 | 22 | } else { |
818 | 22 | block.replace_by_position(result, std::move(col_res)); |
819 | 22 | } |
820 | 38 | return Status::OK(); |
821 | 38 | } |
822 | | }; |
823 | | |
824 | | struct NameBitmapContains { |
825 | | static constexpr auto name = "bitmap_contains"; |
826 | | }; |
827 | | |
828 | | template <typename LeftDataType, typename RightDataType> |
829 | | struct BitmapContains { |
830 | | using ResultDataType = DataTypeUInt8; |
831 | | using T0 = typename LeftDataType::FieldType; |
832 | | using T1 = typename RightDataType::FieldType; |
833 | | using LTData = std::vector<BitmapValue>; |
834 | | using RTData = typename ColumnVector<RightDataType::PType>::Container; |
835 | | using ResTData = typename ColumnUInt8::Container; |
836 | | |
837 | 33 | static void vector_vector(const LTData& lvec, const RTData& rvec, ResTData& res) { |
838 | 33 | size_t size = lvec.size(); |
839 | 85 | for (size_t i = 0; i < size; ++i) { |
840 | 52 | res[i] = lvec[i].contains(rvec[i]); |
841 | 52 | } |
842 | 33 | } |
843 | 7 | static void vector_scalar(const LTData& lvec, const T1& rval, ResTData& res) { |
844 | 7 | size_t size = lvec.size(); |
845 | 14 | for (size_t i = 0; i < size; ++i) { |
846 | 7 | res[i] = lvec[i].contains(rval); |
847 | 7 | } |
848 | 7 | } |
849 | 0 | static void scalar_vector(const BitmapValue& lval, const RTData& rvec, ResTData& res) { |
850 | 0 | size_t size = rvec.size(); |
851 | 0 | for (size_t i = 0; i < size; ++i) { |
852 | 0 | res[i] = lval.contains(rvec[i]); |
853 | 0 | } |
854 | 0 | } |
855 | | }; |
856 | | |
857 | | struct NameBitmapRemove { |
858 | | static constexpr auto name = "bitmap_remove"; |
859 | | }; |
860 | | |
861 | | template <typename LeftDataType, typename RightDataType> |
862 | | struct BitmapRemove { |
863 | | using ResultDataType = DataTypeBitMap; |
864 | | using T0 = typename LeftDataType::FieldType; |
865 | | using T1 = typename RightDataType::FieldType; |
866 | | using LTData = std::vector<BitmapValue>; |
867 | | using RTData = typename ColumnVector<RightDataType::PType>::Container; |
868 | | using ResTData = std::vector<BitmapValue>; |
869 | | |
870 | 2 | static void vector_vector(const LTData& lvec, const RTData& rvec, ResTData& res) { |
871 | 2 | size_t size = lvec.size(); |
872 | 6 | for (size_t i = 0; i < size; ++i) { |
873 | 4 | res[i] = lvec[i]; |
874 | 4 | res[i].remove(rvec[i]); |
875 | 4 | } |
876 | 2 | } |
877 | 12 | static void vector_scalar(const LTData& lvec, const T1& rval, ResTData& res) { |
878 | 12 | size_t size = lvec.size(); |
879 | 45 | for (size_t i = 0; i < size; ++i) { |
880 | 33 | res[i] = lvec[i]; |
881 | 33 | res[i].remove(rval); |
882 | 33 | } |
883 | 12 | } |
884 | 0 | static void scalar_vector(const BitmapValue& lval, const RTData& rvec, ResTData& res) { |
885 | 0 | size_t size = rvec.size(); |
886 | 0 | for (size_t i = 0; i < size; ++i) { |
887 | 0 | res[i] = lval; |
888 | 0 | res[i].remove(rvec[i]); |
889 | 0 | } |
890 | 0 | } |
891 | | }; |
892 | | |
893 | | struct NameBitmapHasAny { |
894 | | static constexpr auto name = "bitmap_has_any"; |
895 | | }; |
896 | | |
897 | | template <typename LeftDataType, typename RightDataType> |
898 | | struct BitmapHasAny { |
899 | | using ResultDataType = DataTypeUInt8; |
900 | | using T0 = typename LeftDataType::FieldType; |
901 | | using T1 = typename RightDataType::FieldType; |
902 | | using TData = std::vector<BitmapValue>; |
903 | | using ResTData = typename ColumnUInt8::Container; |
904 | | |
905 | 12 | static void vector_vector(const TData& lvec, const TData& rvec, ResTData& res) { |
906 | 12 | size_t size = lvec.size(); |
907 | 41 | for (size_t i = 0; i < size; ++i) { |
908 | 29 | auto bitmap = lvec[i]; |
909 | 29 | bitmap &= rvec[i]; |
910 | 29 | res[i] = bitmap.cardinality() != 0; |
911 | 29 | } |
912 | 12 | } |
913 | 0 | static void vector_scalar(const TData& lvec, const BitmapValue& rval, ResTData& res) { |
914 | 0 | size_t size = lvec.size(); |
915 | 0 | for (size_t i = 0; i < size; ++i) { |
916 | 0 | auto bitmap = lvec[i]; |
917 | 0 | bitmap &= rval; |
918 | 0 | res[i] = bitmap.cardinality() != 0; |
919 | 0 | } |
920 | 0 | } |
921 | 0 | static void scalar_vector(const BitmapValue& lval, const TData& rvec, ResTData& res) { |
922 | 0 | size_t size = rvec.size(); |
923 | 0 | for (size_t i = 0; i < size; ++i) { |
924 | 0 | auto bitmap = lval; |
925 | 0 | bitmap &= rvec[i]; |
926 | 0 | res[i] = bitmap.cardinality() != 0; |
927 | 0 | } |
928 | 0 | } |
929 | | }; |
930 | | |
931 | | struct NameBitmapHasAll { |
932 | | static constexpr auto name = "bitmap_has_all"; |
933 | | }; |
934 | | |
935 | | template <typename LeftDataType, typename RightDataType> |
936 | | struct BitmapHasAll { |
937 | | using ResultDataType = DataTypeUInt8; |
938 | | using T0 = typename LeftDataType::FieldType; |
939 | | using T1 = typename RightDataType::FieldType; |
940 | | using TData = std::vector<BitmapValue>; |
941 | | using ResTData = typename ColumnUInt8::Container; |
942 | | |
943 | 13 | static void vector_vector(const TData& lvec, const TData& rvec, ResTData& res) { |
944 | 13 | size_t size = lvec.size(); |
945 | 47 | for (size_t i = 0; i < size; ++i) { |
946 | 34 | uint64_t lhs_cardinality = lvec[i].cardinality(); |
947 | 34 | auto bitmap = lvec[i]; |
948 | 34 | bitmap |= rvec[i]; |
949 | 34 | res[i] = bitmap.cardinality() == lhs_cardinality; |
950 | 34 | } |
951 | 13 | } |
952 | 0 | static void vector_scalar(const TData& lvec, const BitmapValue& rval, ResTData& res) { |
953 | 0 | size_t size = lvec.size(); |
954 | 0 | for (size_t i = 0; i < size; ++i) { |
955 | 0 | uint64_t lhs_cardinality = lvec[i].cardinality(); |
956 | 0 | auto bitmap = lvec[i]; |
957 | 0 | bitmap |= rval; |
958 | 0 | res[i] = bitmap.cardinality() == lhs_cardinality; |
959 | 0 | } |
960 | 0 | } |
961 | 0 | static void scalar_vector(const BitmapValue& lval, const TData& rvec, ResTData& res) { |
962 | 0 | size_t size = rvec.size(); |
963 | 0 | uint64_t lhs_cardinality = lval.cardinality(); |
964 | 0 | for (size_t i = 0; i < size; ++i) { |
965 | 0 | auto bitmap = lval; |
966 | 0 | bitmap |= rvec[i]; |
967 | 0 | res[i] = bitmap.cardinality() == lhs_cardinality; |
968 | 0 | } |
969 | 0 | } |
970 | | }; |
971 | | |
972 | | struct NameBitmapToString { |
973 | | static constexpr auto name = "bitmap_to_string"; |
974 | | }; |
975 | | |
976 | | struct BitmapToString { |
977 | | using ReturnType = DataTypeString; |
978 | | static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_BITMAP; |
979 | | using Type = DataTypeBitMap::FieldType; |
980 | | using ReturnColumnType = ColumnString; |
981 | | using Chars = ColumnString::Chars; |
982 | | using Offsets = ColumnString::Offsets; |
983 | | |
984 | 266 | static Status vector(const std::vector<BitmapValue>& data, Chars& chars, Offsets& offsets) { |
985 | 266 | size_t size = data.size(); |
986 | 266 | offsets.resize(size); |
987 | 266 | chars.reserve(size); |
988 | 1.26k | for (size_t i = 0; i < size; ++i) { |
989 | 999 | StringOP::push_value_string(data[i].to_string(), i, chars, offsets); |
990 | 999 | } |
991 | 266 | return Status::OK(); |
992 | 266 | } |
993 | | }; |
994 | | |
995 | | struct NameBitmapToBase64 { |
996 | | static constexpr auto name = "bitmap_to_base64"; |
997 | | }; |
998 | | |
999 | | struct BitmapToBase64 { |
1000 | | using ReturnType = DataTypeString; |
1001 | | static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_BITMAP; |
1002 | | using Type = DataTypeBitMap::FieldType; |
1003 | | using ReturnColumnType = ColumnString; |
1004 | | using Chars = ColumnString::Chars; |
1005 | | using Offsets = ColumnString::Offsets; |
1006 | | |
1007 | | // ColumnString not support 64bit, only 32bit, so that the max size is 4G |
1008 | 15 | static Status vector(const std::vector<BitmapValue>& data, Chars& chars, Offsets& offsets) { |
1009 | 15 | size_t size = data.size(); |
1010 | 15 | offsets.resize(size); |
1011 | 15 | size_t output_char_size = 0; |
1012 | 52 | for (size_t i = 0; i < size; ++i) { |
1013 | 37 | const BitmapValue& bitmap_val = data[i]; |
1014 | 37 | auto ser_size = bitmap_val.getSizeInBytes(); |
1015 | 37 | output_char_size += (int)(4.0 * ceil((double)ser_size / 3.0)); |
1016 | 37 | } |
1017 | 15 | ColumnString::check_chars_length(output_char_size, size); |
1018 | 15 | chars.resize(output_char_size); |
1019 | 15 | auto chars_data = chars.data(); |
1020 | | |
1021 | 15 | size_t cur_ser_size = 0; |
1022 | 15 | size_t last_ser_size = 0; |
1023 | 15 | std::string ser_buff; |
1024 | 15 | size_t encoded_offset = 0; |
1025 | 52 | for (size_t i = 0; i < size; ++i) { |
1026 | 37 | const BitmapValue& bitmap_val = data[i]; |
1027 | 37 | cur_ser_size = bitmap_val.getSizeInBytes(); |
1028 | 37 | if (cur_ser_size > last_ser_size) { |
1029 | 25 | last_ser_size = cur_ser_size; |
1030 | 25 | ser_buff.resize(cur_ser_size); |
1031 | 25 | } |
1032 | 37 | bitmap_val.write_to(ser_buff.data()); |
1033 | | |
1034 | 37 | auto outlen = base64_encode((const unsigned char*)ser_buff.data(), cur_ser_size, |
1035 | 37 | chars_data + encoded_offset); |
1036 | 37 | DCHECK(outlen > 0); |
1037 | | |
1038 | 37 | encoded_offset += (int)(4.0 * ceil((double)cur_ser_size / 3.0)); |
1039 | 37 | offsets[i] = cast_set<UInt32>(encoded_offset); |
1040 | 37 | } |
1041 | 15 | return Status::OK(); |
1042 | 15 | } |
1043 | | }; |
1044 | | |
1045 | | struct SubBitmap { |
1046 | | static constexpr auto name = "sub_bitmap"; |
1047 | | using TData1 = std::vector<BitmapValue>; |
1048 | | using TData2 = typename ColumnInt64::Container; |
1049 | | |
1050 | | static void vector3(const TData1& bitmap_data, const TData2& offset_data, |
1051 | | const TData2& limit_data, NullMap& null_map, size_t input_rows_count, |
1052 | 18 | TData1& res) { |
1053 | 53 | for (int i = 0; i < input_rows_count; ++i) { |
1054 | 35 | if (null_map[i]) { |
1055 | 0 | continue; |
1056 | 0 | } |
1057 | 35 | if (limit_data[i] <= 0) { |
1058 | 1 | null_map[i] = 1; |
1059 | 1 | continue; |
1060 | 1 | } |
1061 | 34 | if (bitmap_data[i].offset_limit(offset_data[i], limit_data[i], &res[i]) == 0) { |
1062 | 24 | null_map[i] = 1; |
1063 | 24 | } |
1064 | 34 | } |
1065 | 18 | } |
1066 | | static void vector_scalars(const TData1& bitmap_data, const Int64& offset_data, |
1067 | | const Int64& limit_data, NullMap& null_map, size_t input_rows_count, |
1068 | 0 | TData1& res) { |
1069 | 0 | for (int i = 0; i < input_rows_count; ++i) { |
1070 | 0 | if (null_map[i]) { |
1071 | 0 | continue; |
1072 | 0 | } |
1073 | 0 | if (limit_data <= 0) { |
1074 | 0 | null_map[i] = 1; |
1075 | 0 | continue; |
1076 | 0 | } |
1077 | 0 | if (bitmap_data[i].offset_limit(offset_data, limit_data, &res[i]) == 0) { |
1078 | 0 | null_map[i] = 1; |
1079 | 0 | } |
1080 | 0 | } |
1081 | 0 | } |
1082 | | }; |
1083 | | |
1084 | | struct BitmapSubsetLimit { |
1085 | | static constexpr auto name = "bitmap_subset_limit"; |
1086 | | using TData1 = std::vector<BitmapValue>; |
1087 | | using TData2 = typename ColumnInt64::Container; |
1088 | | |
1089 | | static void vector3(const TData1& bitmap_data, const TData2& offset_data, |
1090 | | const TData2& limit_data, NullMap& null_map, size_t input_rows_count, |
1091 | 14 | TData1& res) { |
1092 | 45 | for (int i = 0; i < input_rows_count; ++i) { |
1093 | 31 | if (null_map[i]) { |
1094 | 0 | continue; |
1095 | 0 | } |
1096 | 31 | if (offset_data[i] < 0 || limit_data[i] < 0) { |
1097 | 0 | null_map[i] = 1; |
1098 | 0 | continue; |
1099 | 0 | } |
1100 | 31 | bitmap_data[i].sub_limit(offset_data[i], limit_data[i], &res[i]); |
1101 | 31 | } |
1102 | 14 | } |
1103 | | static void vector_scalars(const TData1& bitmap_data, const Int64& offset_data, |
1104 | | const Int64& limit_data, NullMap& null_map, size_t input_rows_count, |
1105 | 0 | TData1& res) { |
1106 | 0 | for (int i = 0; i < input_rows_count; ++i) { |
1107 | 0 | if (null_map[i]) { |
1108 | 0 | continue; |
1109 | 0 | } |
1110 | 0 | if (offset_data < 0 || limit_data < 0) { |
1111 | 0 | null_map[i] = 1; |
1112 | 0 | continue; |
1113 | 0 | } |
1114 | 0 | bitmap_data[i].sub_limit(offset_data, limit_data, &res[i]); |
1115 | 0 | } |
1116 | 0 | } |
1117 | | }; |
1118 | | |
1119 | | struct BitmapSubsetInRange { |
1120 | | static constexpr auto name = "bitmap_subset_in_range"; |
1121 | | using TData1 = std::vector<BitmapValue>; |
1122 | | using TData2 = typename ColumnInt64::Container; |
1123 | | |
1124 | | static void vector3(const TData1& bitmap_data, const TData2& range_start, |
1125 | | const TData2& range_end, NullMap& null_map, size_t input_rows_count, |
1126 | 14 | TData1& res) { |
1127 | 45 | for (int i = 0; i < input_rows_count; ++i) { |
1128 | 31 | if (null_map[i]) { |
1129 | 0 | continue; |
1130 | 0 | } |
1131 | 31 | if (range_start[i] >= range_end[i] || range_start[i] < 0 || range_end[i] < 0) { |
1132 | 25 | null_map[i] = 1; |
1133 | 25 | continue; |
1134 | 25 | } |
1135 | 6 | bitmap_data[i].sub_range(range_start[i], range_end[i], &res[i]); |
1136 | 6 | } |
1137 | 14 | } |
1138 | | static void vector_scalars(const TData1& bitmap_data, const Int64& range_start, |
1139 | | const Int64& range_end, NullMap& null_map, size_t input_rows_count, |
1140 | 0 | TData1& res) { |
1141 | 0 | for (int i = 0; i < input_rows_count; ++i) { |
1142 | 0 | if (null_map[i]) { |
1143 | 0 | continue; |
1144 | 0 | } |
1145 | 0 | if (range_start >= range_end || range_start < 0 || range_end < 0) { |
1146 | 0 | null_map[i] = 1; |
1147 | 0 | continue; |
1148 | 0 | } |
1149 | 0 | bitmap_data[i].sub_range(range_start, range_end, &res[i]); |
1150 | 0 | } |
1151 | 0 | } |
1152 | | }; |
1153 | | |
1154 | | template <typename Impl> |
1155 | | class FunctionBitmapSubs : public IFunction { |
1156 | | public: |
1157 | | static constexpr auto name = Impl::name; |
1158 | 3 | String get_name() const override { return name; }_ZNK5doris18FunctionBitmapSubsINS_9SubBitmapEE8get_nameB5cxx11Ev Line | Count | Source | 1158 | 1 | String get_name() const override { return name; } |
_ZNK5doris18FunctionBitmapSubsINS_17BitmapSubsetLimitEE8get_nameB5cxx11Ev Line | Count | Source | 1158 | 1 | String get_name() const override { return name; } |
_ZNK5doris18FunctionBitmapSubsINS_19BitmapSubsetInRangeEE8get_nameB5cxx11Ev Line | Count | Source | 1158 | 1 | String get_name() const override { return name; } |
|
1159 | | |
1160 | 55 | static FunctionPtr create() { return std::make_shared<FunctionBitmapSubs>(); }_ZN5doris18FunctionBitmapSubsINS_9SubBitmapEE6createEv Line | Count | Source | 1160 | 21 | static FunctionPtr create() { return std::make_shared<FunctionBitmapSubs>(); } |
_ZN5doris18FunctionBitmapSubsINS_17BitmapSubsetLimitEE6createEv Line | Count | Source | 1160 | 17 | static FunctionPtr create() { return std::make_shared<FunctionBitmapSubs>(); } |
_ZN5doris18FunctionBitmapSubsINS_19BitmapSubsetInRangeEE6createEv Line | Count | Source | 1160 | 17 | static FunctionPtr create() { return std::make_shared<FunctionBitmapSubs>(); } |
|
1161 | | |
1162 | 28 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1163 | 28 | return make_nullable(std::make_shared<DataTypeBitMap>()); |
1164 | 28 | } _ZNK5doris18FunctionBitmapSubsINS_9SubBitmapEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 1162 | 12 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1163 | 12 | return make_nullable(std::make_shared<DataTypeBitMap>()); | 1164 | 12 | } |
_ZNK5doris18FunctionBitmapSubsINS_17BitmapSubsetLimitEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 1162 | 8 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1163 | 8 | return make_nullable(std::make_shared<DataTypeBitMap>()); | 1164 | 8 | } |
_ZNK5doris18FunctionBitmapSubsINS_19BitmapSubsetInRangeEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 1162 | 8 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1163 | 8 | return make_nullable(std::make_shared<DataTypeBitMap>()); | 1164 | 8 | } |
|
1165 | | |
1166 | 28 | size_t get_number_of_arguments() const override { return 3; }_ZNK5doris18FunctionBitmapSubsINS_9SubBitmapEE23get_number_of_argumentsEv Line | Count | Source | 1166 | 12 | size_t get_number_of_arguments() const override { return 3; } |
_ZNK5doris18FunctionBitmapSubsINS_17BitmapSubsetLimitEE23get_number_of_argumentsEv Line | Count | Source | 1166 | 8 | size_t get_number_of_arguments() const override { return 3; } |
_ZNK5doris18FunctionBitmapSubsINS_19BitmapSubsetInRangeEE23get_number_of_argumentsEv Line | Count | Source | 1166 | 8 | size_t get_number_of_arguments() const override { return 3; } |
|
1167 | | |
1168 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1169 | 46 | uint32_t result, size_t input_rows_count) const override { |
1170 | 46 | DCHECK_EQ(arguments.size(), 3); |
1171 | 46 | auto res_null_map = ColumnUInt8::create(input_rows_count, 0); |
1172 | 46 | auto res_data_column = ColumnBitmap::create(input_rows_count); |
1173 | | |
1174 | 46 | bool col_const[3]; |
1175 | 46 | ColumnPtr argument_columns[3]; |
1176 | 184 | for (int i = 0; i < 3; ++i) { |
1177 | 138 | col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column); |
1178 | 138 | } |
1179 | 46 | argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>( |
1180 | 0 | *block.get_by_position(arguments[0]).column) |
1181 | 0 | .convert_to_full_column() |
1182 | 46 | : block.get_by_position(arguments[0]).column; |
1183 | | |
1184 | 46 | default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments); |
1185 | | |
1186 | 46 | auto bitmap_column = assert_cast<const ColumnBitmap*>(argument_columns[0].get()); |
1187 | 46 | auto offset_column = assert_cast<const ColumnInt64*>(argument_columns[1].get()); |
1188 | 46 | auto limit_column = assert_cast<const ColumnInt64*>(argument_columns[2].get()); |
1189 | | |
1190 | 46 | if (col_const[1] && col_const[2]) { |
1191 | 0 | Impl::vector_scalars(bitmap_column->get_data(), offset_column->get_element(0), |
1192 | 0 | limit_column->get_element(0), res_null_map->get_data(), |
1193 | 0 | input_rows_count, res_data_column->get_data()); |
1194 | 46 | } else { |
1195 | 46 | Impl::vector3(bitmap_column->get_data(), offset_column->get_data(), |
1196 | 46 | limit_column->get_data(), res_null_map->get_data(), input_rows_count, |
1197 | 46 | res_data_column->get_data()); |
1198 | 46 | } |
1199 | | |
1200 | 46 | block.get_by_position(result).column = |
1201 | 46 | ColumnNullable::create(std::move(res_data_column), std::move(res_null_map)); |
1202 | 46 | return Status::OK(); |
1203 | 46 | } _ZNK5doris18FunctionBitmapSubsINS_9SubBitmapEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1169 | 18 | uint32_t result, size_t input_rows_count) const override { | 1170 | 18 | DCHECK_EQ(arguments.size(), 3); | 1171 | 18 | auto res_null_map = ColumnUInt8::create(input_rows_count, 0); | 1172 | 18 | auto res_data_column = ColumnBitmap::create(input_rows_count); | 1173 | | | 1174 | 18 | bool col_const[3]; | 1175 | 18 | ColumnPtr argument_columns[3]; | 1176 | 72 | for (int i = 0; i < 3; ++i) { | 1177 | 54 | col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column); | 1178 | 54 | } | 1179 | 18 | argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>( | 1180 | 0 | *block.get_by_position(arguments[0]).column) | 1181 | 0 | .convert_to_full_column() | 1182 | 18 | : block.get_by_position(arguments[0]).column; | 1183 | | | 1184 | 18 | default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments); | 1185 | | | 1186 | 18 | auto bitmap_column = assert_cast<const ColumnBitmap*>(argument_columns[0].get()); | 1187 | 18 | auto offset_column = assert_cast<const ColumnInt64*>(argument_columns[1].get()); | 1188 | 18 | auto limit_column = assert_cast<const ColumnInt64*>(argument_columns[2].get()); | 1189 | | | 1190 | 18 | if (col_const[1] && col_const[2]) { | 1191 | 0 | Impl::vector_scalars(bitmap_column->get_data(), offset_column->get_element(0), | 1192 | 0 | limit_column->get_element(0), res_null_map->get_data(), | 1193 | 0 | input_rows_count, res_data_column->get_data()); | 1194 | 18 | } else { | 1195 | 18 | Impl::vector3(bitmap_column->get_data(), offset_column->get_data(), | 1196 | 18 | limit_column->get_data(), res_null_map->get_data(), input_rows_count, | 1197 | 18 | res_data_column->get_data()); | 1198 | 18 | } | 1199 | | | 1200 | 18 | block.get_by_position(result).column = | 1201 | 18 | ColumnNullable::create(std::move(res_data_column), std::move(res_null_map)); | 1202 | 18 | return Status::OK(); | 1203 | 18 | } |
_ZNK5doris18FunctionBitmapSubsINS_17BitmapSubsetLimitEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1169 | 14 | uint32_t result, size_t input_rows_count) const override { | 1170 | 14 | DCHECK_EQ(arguments.size(), 3); | 1171 | 14 | auto res_null_map = ColumnUInt8::create(input_rows_count, 0); | 1172 | 14 | auto res_data_column = ColumnBitmap::create(input_rows_count); | 1173 | | | 1174 | 14 | bool col_const[3]; | 1175 | 14 | ColumnPtr argument_columns[3]; | 1176 | 56 | for (int i = 0; i < 3; ++i) { | 1177 | 42 | col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column); | 1178 | 42 | } | 1179 | 14 | argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>( | 1180 | 0 | *block.get_by_position(arguments[0]).column) | 1181 | 0 | .convert_to_full_column() | 1182 | 14 | : block.get_by_position(arguments[0]).column; | 1183 | | | 1184 | 14 | default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments); | 1185 | | | 1186 | 14 | auto bitmap_column = assert_cast<const ColumnBitmap*>(argument_columns[0].get()); | 1187 | 14 | auto offset_column = assert_cast<const ColumnInt64*>(argument_columns[1].get()); | 1188 | 14 | auto limit_column = assert_cast<const ColumnInt64*>(argument_columns[2].get()); | 1189 | | | 1190 | 14 | if (col_const[1] && col_const[2]) { | 1191 | 0 | Impl::vector_scalars(bitmap_column->get_data(), offset_column->get_element(0), | 1192 | 0 | limit_column->get_element(0), res_null_map->get_data(), | 1193 | 0 | input_rows_count, res_data_column->get_data()); | 1194 | 14 | } else { | 1195 | 14 | Impl::vector3(bitmap_column->get_data(), offset_column->get_data(), | 1196 | 14 | limit_column->get_data(), res_null_map->get_data(), input_rows_count, | 1197 | 14 | res_data_column->get_data()); | 1198 | 14 | } | 1199 | | | 1200 | 14 | block.get_by_position(result).column = | 1201 | 14 | ColumnNullable::create(std::move(res_data_column), std::move(res_null_map)); | 1202 | 14 | return Status::OK(); | 1203 | 14 | } |
_ZNK5doris18FunctionBitmapSubsINS_19BitmapSubsetInRangeEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1169 | 14 | uint32_t result, size_t input_rows_count) const override { | 1170 | 14 | DCHECK_EQ(arguments.size(), 3); | 1171 | 14 | auto res_null_map = ColumnUInt8::create(input_rows_count, 0); | 1172 | 14 | auto res_data_column = ColumnBitmap::create(input_rows_count); | 1173 | | | 1174 | 14 | bool col_const[3]; | 1175 | 14 | ColumnPtr argument_columns[3]; | 1176 | 56 | for (int i = 0; i < 3; ++i) { | 1177 | 42 | col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column); | 1178 | 42 | } | 1179 | 14 | argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>( | 1180 | 0 | *block.get_by_position(arguments[0]).column) | 1181 | 0 | .convert_to_full_column() | 1182 | 14 | : block.get_by_position(arguments[0]).column; | 1183 | | | 1184 | 14 | default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments); | 1185 | | | 1186 | 14 | auto bitmap_column = assert_cast<const ColumnBitmap*>(argument_columns[0].get()); | 1187 | 14 | auto offset_column = assert_cast<const ColumnInt64*>(argument_columns[1].get()); | 1188 | 14 | auto limit_column = assert_cast<const ColumnInt64*>(argument_columns[2].get()); | 1189 | | | 1190 | 14 | if (col_const[1] && col_const[2]) { | 1191 | 0 | Impl::vector_scalars(bitmap_column->get_data(), offset_column->get_element(0), | 1192 | 0 | limit_column->get_element(0), res_null_map->get_data(), | 1193 | 0 | input_rows_count, res_data_column->get_data()); | 1194 | 14 | } else { | 1195 | 14 | Impl::vector3(bitmap_column->get_data(), offset_column->get_data(), | 1196 | 14 | limit_column->get_data(), res_null_map->get_data(), input_rows_count, | 1197 | 14 | res_data_column->get_data()); | 1198 | 14 | } | 1199 | | | 1200 | 14 | block.get_by_position(result).column = | 1201 | 14 | ColumnNullable::create(std::move(res_data_column), std::move(res_null_map)); | 1202 | 14 | return Status::OK(); | 1203 | 14 | } |
|
1204 | | }; |
1205 | | |
1206 | | class FunctionBitmapToArray : public IFunction { |
1207 | | public: |
1208 | | static constexpr auto name = "bitmap_to_array"; |
1209 | | |
1210 | 1 | String get_name() const override { return name; } |
1211 | | |
1212 | 14 | static FunctionPtr create() { return std::make_shared<FunctionBitmapToArray>(); } |
1213 | | |
1214 | 5 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1215 | 5 | auto nested_type = make_nullable(std::make_shared<DataTypeInt64>()); |
1216 | 5 | return std::make_shared<DataTypeArray>(nested_type); |
1217 | 5 | } |
1218 | | |
1219 | 5 | size_t get_number_of_arguments() const override { return 1; } |
1220 | | |
1221 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1222 | 8 | uint32_t result, size_t input_rows_count) const override { |
1223 | 8 | auto return_nested_type = make_nullable(std::make_shared<DataTypeInt64>()); |
1224 | 8 | auto dest_array_column_ptr = ColumnArray::create(return_nested_type->create_column(), |
1225 | 8 | ColumnArray::ColumnOffsets::create()); |
1226 | | |
1227 | 8 | IColumn* dest_nested_column = &dest_array_column_ptr->get_data(); |
1228 | 8 | ColumnNullable* dest_nested_nullable_col = |
1229 | 8 | reinterpret_cast<ColumnNullable*>(dest_nested_column); |
1230 | 8 | dest_nested_column = dest_nested_nullable_col->get_nested_column_ptr().get(); |
1231 | 8 | auto& dest_nested_null_map = dest_nested_nullable_col->get_null_map_column().get_data(); |
1232 | | |
1233 | 8 | auto& arg_col = block.get_by_position(arguments[0]).column; |
1234 | 8 | auto bitmap_col = assert_cast<const ColumnBitmap*>(arg_col.get()); |
1235 | 8 | const auto& bitmap_col_data = bitmap_col->get_data(); |
1236 | 8 | auto& nested_column_data = assert_cast<ColumnInt64*>(dest_nested_column)->get_data(); |
1237 | 8 | auto& dest_offsets = dest_array_column_ptr->get_offsets(); |
1238 | 8 | dest_offsets.reserve(input_rows_count); |
1239 | | |
1240 | 16 | for (int i = 0; i < input_rows_count; ++i) { |
1241 | 8 | bitmap_col_data[i].to_array(nested_column_data); |
1242 | 8 | dest_nested_null_map.resize_fill(nested_column_data.size(), 0); |
1243 | 8 | dest_offsets.push_back(nested_column_data.size()); |
1244 | 8 | } |
1245 | | |
1246 | 8 | block.replace_by_position(result, std::move(dest_array_column_ptr)); |
1247 | 8 | return Status::OK(); |
1248 | 8 | } |
1249 | | }; |
1250 | | |
1251 | | using FunctionBitmapEmpty = FunctionConst<BitmapEmpty, false>; |
1252 | | using FunctionToBitmap = FunctionAlwaysNotNullable<ToBitmap>; |
1253 | | using FunctionToBitmapWithCheck = FunctionAlwaysNotNullable<ToBitmapWithCheck, true>; |
1254 | | |
1255 | | using FunctionBitmapFromString = FunctionBitmapAlwaysNull<BitmapFromString>; |
1256 | | using FunctionBitmapFromArray = FunctionBitmapAlwaysNull<BitmapFromArray>; |
1257 | | using FunctionBitmapHash = FunctionAlwaysNotNullable<BitmapHash<32>>; |
1258 | | using FunctionBitmapHash64 = FunctionAlwaysNotNullable<BitmapHash<64>>; |
1259 | | |
1260 | | using FunctionBitmapMin = FunctionBitmapSingle<FunctionBitmapMinImpl>; |
1261 | | using FunctionBitmapMax = FunctionBitmapSingle<FunctionBitmapMaxImpl>; |
1262 | | |
1263 | | using FunctionBitmapToString = FunctionUnaryToType<BitmapToString, NameBitmapToString>; |
1264 | | using FunctionBitmapToBase64 = FunctionUnaryToType<BitmapToBase64, NameBitmapToBase64>; |
1265 | | using FunctionBitmapFromBase64 = FunctionBitmapAlwaysNull<BitmapFromBase64>; |
1266 | | using FunctionBitmapNot = |
1267 | | FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapNot, NameBitmapNot>; |
1268 | | using FunctionBitmapAndNot = |
1269 | | FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapAndNot, NameBitmapAndNot>; |
1270 | | using FunctionBitmapContains = |
1271 | | FunctionBinaryToType<DataTypeBitMap, DataTypeInt64, BitmapContains, NameBitmapContains>; |
1272 | | using FunctionBitmapRemove = |
1273 | | FunctionBinaryToType<DataTypeBitMap, DataTypeInt64, BitmapRemove, NameBitmapRemove>; |
1274 | | |
1275 | | using FunctionBitmapHasAny = |
1276 | | FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapHasAny, NameBitmapHasAny>; |
1277 | | using FunctionBitmapHasAll = |
1278 | | FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapHasAll, NameBitmapHasAll>; |
1279 | | using FunctionSubBitmap = FunctionBitmapSubs<SubBitmap>; |
1280 | | using FunctionBitmapSubsetLimit = FunctionBitmapSubs<BitmapSubsetLimit>; |
1281 | | using FunctionBitmapSubsetInRange = FunctionBitmapSubs<BitmapSubsetInRange>; |
1282 | | |
1283 | 8 | void register_function_bitmap(SimpleFunctionFactory& factory) { |
1284 | 8 | factory.register_function<FunctionBitmapEmpty>(); |
1285 | 8 | factory.register_function<FunctionToBitmap>(); |
1286 | 8 | factory.register_function<FunctionToBitmapWithCheck>(); |
1287 | 8 | factory.register_function<FunctionBitmapFromString>(); |
1288 | 8 | factory.register_function<FunctionBitmapToBase64>(); |
1289 | 8 | factory.register_function<FunctionBitmapFromBase64>(); |
1290 | 8 | factory.register_function<FunctionBitmapFromArray>(); |
1291 | 8 | factory.register_function<FunctionBitmapHash>(); |
1292 | 8 | factory.register_function<FunctionBitmapHash64>(); |
1293 | 8 | factory.register_function<FunctionBitmapCount>(); |
1294 | 8 | factory.register_function<FunctionBitmapMin>(); |
1295 | 8 | factory.register_function<FunctionBitmapMax>(); |
1296 | 8 | factory.register_function<FunctionBitmapToString>(); |
1297 | 8 | factory.register_function<FunctionBitmapNot>(); |
1298 | 8 | factory.register_function<FunctionBitmapAndNot>(); |
1299 | 8 | factory.register_alias(NameBitmapAndNot::name, "bitmap_andnot"); |
1300 | 8 | factory.register_function<FunctionBitmapAndNotCount<NameBitmapAndNotCount>>(); |
1301 | 8 | factory.register_alias(NameBitmapAndNotCount::name, "bitmap_andnot_count"); |
1302 | 8 | factory.register_function<FunctionBitmapContains>(); |
1303 | 8 | factory.register_function<FunctionBitmapRemove>(); |
1304 | 8 | factory.register_function<FunctionBitmapHasAny>(); |
1305 | 8 | factory.register_function<FunctionBitmapHasAll>(); |
1306 | 8 | factory.register_function<FunctionSubBitmap>(); |
1307 | 8 | factory.register_function<FunctionBitmapSubsetLimit>(); |
1308 | 8 | factory.register_function<FunctionBitmapSubsetInRange>(); |
1309 | 8 | factory.register_function<FunctionBitmapToArray>(); |
1310 | 8 | } |
1311 | | |
1312 | | } // namespace doris |