be/src/exprs/function/function_varbinary.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "exprs/function/function_varbinary.h" |
19 | | |
20 | | #include <glog/logging.h> |
21 | | |
22 | | #include <cstddef> |
23 | | #include <memory> |
24 | | |
25 | | #include "common/status.h" |
26 | | #include "core/block/block.h" |
27 | | #include "core/column/column_const.h" |
28 | | #include "core/column/column_nullable.h" |
29 | | #include "core/column/column_string.h" |
30 | | #include "core/column/column_varbinary.h" |
31 | | #include "core/data_type/data_type.h" |
32 | | #include "core/data_type/data_type_nullable.h" |
33 | | #include "core/data_type/data_type_string.h" |
34 | | #include "core/data_type/data_type_varbinary.h" |
35 | | #include "exprs/function/function.h" |
36 | | #include "exprs/function/function_helpers.h" |
37 | | #include "exprs/function/function_totype.h" |
38 | | #include "exprs/function/simple_function_factory.h" |
39 | | #include "exprs/function/string_hex_util.h" |
40 | | #include "util/url_coding.h" |
41 | | |
42 | | namespace doris { |
43 | | #include "common/compile_check_begin.h" |
44 | | |
45 | | class FunctionToBinary : public IFunction { |
46 | | public: |
47 | | static constexpr auto name = "to_binary"; |
48 | | |
49 | 29 | static FunctionPtr create() { return std::make_shared<FunctionToBinary>(); } |
50 | | |
51 | 1 | String get_name() const override { return name; } |
52 | | |
53 | 20 | size_t get_number_of_arguments() const override { return 1; } |
54 | | |
55 | 20 | DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override { |
56 | 20 | return make_nullable(std::make_shared<DataTypeVarbinary>()); |
57 | 20 | } |
58 | | |
59 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
60 | 19 | uint32_t result, size_t input_rows_count) const override { |
61 | 19 | auto& col_ptr = block.get_by_position(arguments[0]).column; |
62 | 19 | if (const auto* col = check_and_get_column<ColumnString>(col_ptr.get())) { |
63 | 19 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
64 | 19 | auto col_res = ColumnVarbinary::create(); |
65 | 19 | const auto& data = col->get_chars(); |
66 | 19 | const auto& offsets = col->get_offsets(); |
67 | 19 | col_res->get_data().assign(input_rows_count, StringView()); |
68 | | |
69 | 61 | for (int i = 0; i < input_rows_count; ++i) { |
70 | 42 | const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
71 | 42 | ColumnString::Offset srclen = offsets[i] - offsets[i - 1]; |
72 | | |
73 | 42 | int cipher_len = srclen / 2; |
74 | 42 | auto [cipher_inline, dst] = VarBinaryOP::alloc(col_res.get(), i, cipher_len); |
75 | | |
76 | 42 | int outlen = string_hex::hex_decode(source, srclen, dst); |
77 | | |
78 | | // if empty string or decode failed, may return NULL |
79 | 42 | if (outlen == 0) { |
80 | 27 | null_map->get_data()[i] = 1; |
81 | 27 | continue; |
82 | 27 | } |
83 | 15 | VarBinaryOP::check_and_insert_data(col_res->get_data()[i], dst, |
84 | 15 | cast_set<uint32_t>(outlen), cipher_inline); |
85 | 15 | } |
86 | 19 | block.replace_by_position( |
87 | 19 | result, ColumnNullable::create(std::move(col_res), std::move(null_map))); |
88 | 19 | } else { |
89 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", |
90 | 0 | block.get_by_position(arguments[0]).column->get_name(), |
91 | 0 | get_name()); |
92 | 0 | } |
93 | 19 | return Status::OK(); |
94 | 19 | } |
95 | | }; |
96 | | |
97 | | class FunctionFromBinary : public IFunction { |
98 | | public: |
99 | | static constexpr auto name = "from_binary"; |
100 | | |
101 | 33 | static FunctionPtr create() { return std::make_shared<FunctionFromBinary>(); } |
102 | | |
103 | 1 | String get_name() const override { return name; } |
104 | | |
105 | 24 | size_t get_number_of_arguments() const override { return 1; } |
106 | | |
107 | 24 | DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override { |
108 | 24 | return make_nullable(std::make_shared<DataTypeString>()); |
109 | 24 | } |
110 | | |
111 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
112 | 24 | uint32_t result, size_t input_rows_count) const override { |
113 | 24 | auto& col_ptr = block.get_by_position(arguments[0]).column; |
114 | 24 | if (const auto* col = check_and_get_column<ColumnVarbinary>(col_ptr.get())) { |
115 | 24 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
116 | 24 | auto col_res = ColumnString::create(); |
117 | 24 | auto& data = col_res->get_chars(); |
118 | 24 | auto& offsets = col_res->get_offsets(); |
119 | 24 | offsets.resize(input_rows_count); |
120 | 24 | size_t total_len = 0; |
121 | 128 | for (size_t i = 0; i < input_rows_count; ++i) { |
122 | 104 | total_len += col->get_data()[i].size() * 2; |
123 | 104 | } |
124 | 24 | data.resize(total_len); |
125 | | |
126 | 24 | size_t offset = 0; |
127 | 24 | auto* dst_ptr = reinterpret_cast<unsigned char*>(data.data()); |
128 | 128 | for (int i = 0; i < input_rows_count; ++i) { |
129 | 104 | const auto& val = col->get_data()[i]; |
130 | 104 | string_hex::hex_encode(reinterpret_cast<const unsigned char*>(val.data()), |
131 | 104 | val.size(), dst_ptr, offset); |
132 | 104 | offsets[i] = cast_set<uint32_t>(offset); |
133 | 104 | } |
134 | 24 | block.replace_by_position( |
135 | 24 | result, ColumnNullable::create(std::move(col_res), std::move(null_map))); |
136 | 24 | } else { |
137 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", |
138 | 0 | block.get_by_position(arguments[0]).column->get_name(), |
139 | 0 | get_name()); |
140 | 0 | } |
141 | 24 | return Status::OK(); |
142 | 24 | } |
143 | | }; |
144 | | |
145 | | struct NameVarbinaryLength { |
146 | | static constexpr auto name = "length"; |
147 | | }; |
148 | | |
149 | | struct VarbinaryLengthImpl { |
150 | | using ReturnType = DataTypeInt32; |
151 | | using ReturnColumnType = ColumnInt32; |
152 | | static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_VARBINARY; |
153 | | |
154 | 8 | static DataTypes get_variadic_argument_types() { |
155 | 8 | return {std::make_shared<DataTypeVarbinary>()}; |
156 | 8 | } |
157 | | |
158 | | static Status vector(const PaddedPODArray<doris::StringView>& data, |
159 | 38 | PaddedPODArray<Int32>& res) { |
160 | 38 | size_t rows_count = data.size(); |
161 | 38 | res.resize(rows_count); |
162 | 119 | for (size_t i = 0; i < rows_count; ++i) { |
163 | 81 | res[i] = data[i].size(); |
164 | 81 | } |
165 | 38 | return Status::OK(); |
166 | 38 | } |
167 | | }; |
168 | | |
169 | | using FunctionBinaryLength = FunctionUnaryToType<VarbinaryLengthImpl, NameVarbinaryLength>; |
170 | | |
171 | | struct ToBase64BinaryImpl { |
172 | | static constexpr auto name = "to_base64_binary"; |
173 | | using ReturnType = DataTypeString; |
174 | | using ColumnType = ColumnString; |
175 | | static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_VARBINARY; |
176 | | |
177 | | static Status vector(const PaddedPODArray<doris::StringView>& data, |
178 | 13 | ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) { |
179 | 13 | auto rows_count = data.size(); |
180 | 13 | dst_offsets.resize(rows_count); |
181 | | |
182 | 13 | size_t total_size = 0; |
183 | 48 | for (size_t i = 0; i < rows_count; i++) { |
184 | 35 | total_size += 4 * ((data[i].size() + 2) / 3); |
185 | 35 | } |
186 | 13 | ColumnString::check_chars_length(total_size, rows_count); |
187 | 13 | dst_data.resize(total_size); |
188 | 13 | auto* dst_data_ptr = dst_data.data(); |
189 | 13 | size_t offset = 0; |
190 | | |
191 | 48 | for (size_t i = 0; i < rows_count; i++) { |
192 | 35 | auto binary = data[i]; |
193 | 35 | auto binlen = binary.size(); |
194 | | |
195 | 35 | if (UNLIKELY(binlen == 0)) { |
196 | 5 | dst_offsets[i] = cast_set<uint32_t>(offset); |
197 | 5 | continue; |
198 | 5 | } |
199 | | |
200 | 30 | auto outlen = doris::base64_encode( |
201 | 30 | reinterpret_cast<const unsigned char*>(binary.data()), binlen, |
202 | 30 | reinterpret_cast<unsigned char*>(dst_data_ptr + offset)); |
203 | | |
204 | 30 | offset += outlen; |
205 | 30 | dst_offsets[i] = cast_set<uint32_t>(offset); |
206 | 30 | } |
207 | | |
208 | 13 | dst_data.pop_back(total_size - offset); |
209 | | |
210 | 13 | return Status::OK(); |
211 | 13 | } |
212 | | }; |
213 | | |
214 | | using FunctionToBase64Binary = FunctionStringEncode<ToBase64BinaryImpl, false>; |
215 | | |
216 | | struct FromBase64BinaryImpl { |
217 | | static constexpr auto name = "from_base64_binary"; |
218 | | using ReturnType = DataTypeVarbinary; |
219 | | using ColumnType = ColumnVarbinary; |
220 | | |
221 | | static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
222 | 14 | ColumnVarbinary* res, NullMap& null_map) { |
223 | 14 | auto rows_count = offsets.size(); |
224 | 14 | res->get_data().assign(rows_count, StringView()); |
225 | | |
226 | 51 | for (size_t i = 0; i < rows_count; i++) { |
227 | 37 | const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
228 | 37 | ColumnString::Offset slen = offsets[i] - offsets[i - 1]; |
229 | | |
230 | 37 | if (UNLIKELY(slen == 0)) { |
231 | 5 | continue; |
232 | 5 | } |
233 | | |
234 | 32 | int cipher_len = slen / 4 * 3; |
235 | 32 | auto [cipher_inline, dst] = VarBinaryOP::alloc(res, i, cipher_len); |
236 | | |
237 | 32 | auto outlen = doris::base64_decode(source, slen, dst); |
238 | | |
239 | 32 | if (outlen < 0) { |
240 | 14 | null_map[i] = 1; |
241 | 18 | } else { |
242 | 18 | VarBinaryOP::check_and_insert_data(res->get_data()[i], dst, |
243 | 18 | cast_set<uint32_t>(outlen), cipher_inline); |
244 | 18 | } |
245 | 32 | } |
246 | | |
247 | 14 | return Status::OK(); |
248 | 14 | } |
249 | | }; |
250 | | |
251 | | using FunctionFromBase64Binary = FunctionStringOperateToNullType<FromBase64BinaryImpl>; |
252 | | |
253 | 8 | void register_function_binary(SimpleFunctionFactory& factory) { |
254 | 8 | factory.register_function<FunctionBinaryLength>(); |
255 | 8 | factory.register_function<FunctionToBase64Binary>(); |
256 | 8 | factory.register_function<FunctionFromBase64Binary>(); |
257 | 8 | factory.register_function<FunctionSubBinary>(); |
258 | 8 | factory.register_function<FunctionToBinary>(); |
259 | 8 | factory.register_function<FunctionFromBinary>(); |
260 | 8 | factory.register_alias("from_binary", "from_hex"); |
261 | 8 | factory.register_alias("to_binary", "to_hex"); |
262 | 8 | } |
263 | | |
264 | | #include "common/compile_check_end.h" |
265 | | } // namespace doris |