Coverage Report

Created: 2026-03-13 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_varbinary.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exprs/function/function_varbinary.h"
19
20
#include <glog/logging.h>
21
22
#include <cstddef>
23
#include <memory>
24
25
#include "common/status.h"
26
#include "core/block/block.h"
27
#include "core/column/column_const.h"
28
#include "core/column/column_nullable.h"
29
#include "core/column/column_string.h"
30
#include "core/column/column_varbinary.h"
31
#include "core/data_type/data_type.h"
32
#include "core/data_type/data_type_nullable.h"
33
#include "core/data_type/data_type_string.h"
34
#include "core/data_type/data_type_varbinary.h"
35
#include "exprs/function/function.h"
36
#include "exprs/function/function_helpers.h"
37
#include "exprs/function/function_totype.h"
38
#include "exprs/function/simple_function_factory.h"
39
#include "exprs/function/string_hex_util.h"
40
#include "util/url_coding.h"
41
42
namespace doris {
43
#include "common/compile_check_begin.h"
44
45
class FunctionToBinary : public IFunction {
46
public:
47
    static constexpr auto name = "to_binary";
48
49
29
    static FunctionPtr create() { return std::make_shared<FunctionToBinary>(); }
50
51
1
    String get_name() const override { return name; }
52
53
20
    size_t get_number_of_arguments() const override { return 1; }
54
55
20
    DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override {
56
20
        return make_nullable(std::make_shared<DataTypeVarbinary>());
57
20
    }
58
59
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
60
19
                        uint32_t result, size_t input_rows_count) const override {
61
19
        auto& col_ptr = block.get_by_position(arguments[0]).column;
62
19
        if (const auto* col = check_and_get_column<ColumnString>(col_ptr.get())) {
63
19
            auto null_map = ColumnUInt8::create(input_rows_count, 0);
64
19
            auto col_res = ColumnVarbinary::create();
65
19
            const auto& data = col->get_chars();
66
19
            const auto& offsets = col->get_offsets();
67
19
            col_res->get_data().assign(input_rows_count, StringView());
68
69
61
            for (int i = 0; i < input_rows_count; ++i) {
70
42
                const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
71
42
                ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
72
73
42
                int cipher_len = srclen / 2;
74
42
                auto [cipher_inline, dst] = VarBinaryOP::alloc(col_res.get(), i, cipher_len);
75
76
42
                int outlen = string_hex::hex_decode(source, srclen, dst);
77
78
                // if empty string or decode failed, may return NULL
79
42
                if (outlen == 0) {
80
27
                    null_map->get_data()[i] = 1;
81
27
                    continue;
82
27
                }
83
15
                VarBinaryOP::check_and_insert_data(col_res->get_data()[i], dst,
84
15
                                                   cast_set<uint32_t>(outlen), cipher_inline);
85
15
            }
86
19
            block.replace_by_position(
87
19
                    result, ColumnNullable::create(std::move(col_res), std::move(null_map)));
88
19
        } else {
89
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
90
0
                                        block.get_by_position(arguments[0]).column->get_name(),
91
0
                                        get_name());
92
0
        }
93
19
        return Status::OK();
94
19
    }
95
};
96
97
class FunctionFromBinary : public IFunction {
98
public:
99
    static constexpr auto name = "from_binary";
100
101
33
    static FunctionPtr create() { return std::make_shared<FunctionFromBinary>(); }
102
103
1
    String get_name() const override { return name; }
104
105
24
    size_t get_number_of_arguments() const override { return 1; }
106
107
24
    DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override {
108
24
        return make_nullable(std::make_shared<DataTypeString>());
109
24
    }
110
111
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
112
24
                        uint32_t result, size_t input_rows_count) const override {
113
24
        auto& col_ptr = block.get_by_position(arguments[0]).column;
114
24
        if (const auto* col = check_and_get_column<ColumnVarbinary>(col_ptr.get())) {
115
24
            auto null_map = ColumnUInt8::create(input_rows_count, 0);
116
24
            auto col_res = ColumnString::create();
117
24
            auto& data = col_res->get_chars();
118
24
            auto& offsets = col_res->get_offsets();
119
24
            offsets.resize(input_rows_count);
120
24
            size_t total_len = 0;
121
128
            for (size_t i = 0; i < input_rows_count; ++i) {
122
104
                total_len += col->get_data()[i].size() * 2;
123
104
            }
124
24
            data.resize(total_len);
125
126
24
            size_t offset = 0;
127
24
            auto* dst_ptr = reinterpret_cast<unsigned char*>(data.data());
128
128
            for (int i = 0; i < input_rows_count; ++i) {
129
104
                const auto& val = col->get_data()[i];
130
104
                string_hex::hex_encode(reinterpret_cast<const unsigned char*>(val.data()),
131
104
                                       val.size(), dst_ptr, offset);
132
104
                offsets[i] = cast_set<uint32_t>(offset);
133
104
            }
134
24
            block.replace_by_position(
135
24
                    result, ColumnNullable::create(std::move(col_res), std::move(null_map)));
136
24
        } else {
137
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
138
0
                                        block.get_by_position(arguments[0]).column->get_name(),
139
0
                                        get_name());
140
0
        }
141
24
        return Status::OK();
142
24
    }
143
};
144
145
struct NameVarbinaryLength {
146
    static constexpr auto name = "length";
147
};
148
149
struct VarbinaryLengthImpl {
150
    using ReturnType = DataTypeInt32;
151
    using ReturnColumnType = ColumnInt32;
152
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_VARBINARY;
153
154
8
    static DataTypes get_variadic_argument_types() {
155
8
        return {std::make_shared<DataTypeVarbinary>()};
156
8
    }
157
158
    static Status vector(const PaddedPODArray<doris::StringView>& data,
159
38
                         PaddedPODArray<Int32>& res) {
160
38
        size_t rows_count = data.size();
161
38
        res.resize(rows_count);
162
119
        for (size_t i = 0; i < rows_count; ++i) {
163
81
            res[i] = data[i].size();
164
81
        }
165
38
        return Status::OK();
166
38
    }
167
};
168
169
using FunctionBinaryLength = FunctionUnaryToType<VarbinaryLengthImpl, NameVarbinaryLength>;
170
171
struct ToBase64BinaryImpl {
172
    static constexpr auto name = "to_base64_binary";
173
    using ReturnType = DataTypeString;
174
    using ColumnType = ColumnString;
175
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_VARBINARY;
176
177
    static Status vector(const PaddedPODArray<doris::StringView>& data,
178
13
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
179
13
        auto rows_count = data.size();
180
13
        dst_offsets.resize(rows_count);
181
182
13
        size_t total_size = 0;
183
48
        for (size_t i = 0; i < rows_count; i++) {
184
35
            total_size += 4 * ((data[i].size() + 2) / 3);
185
35
        }
186
13
        ColumnString::check_chars_length(total_size, rows_count);
187
13
        dst_data.resize(total_size);
188
13
        auto* dst_data_ptr = dst_data.data();
189
13
        size_t offset = 0;
190
191
48
        for (size_t i = 0; i < rows_count; i++) {
192
35
            auto binary = data[i];
193
35
            auto binlen = binary.size();
194
195
35
            if (UNLIKELY(binlen == 0)) {
196
5
                dst_offsets[i] = cast_set<uint32_t>(offset);
197
5
                continue;
198
5
            }
199
200
30
            auto outlen = doris::base64_encode(
201
30
                    reinterpret_cast<const unsigned char*>(binary.data()), binlen,
202
30
                    reinterpret_cast<unsigned char*>(dst_data_ptr + offset));
203
204
30
            offset += outlen;
205
30
            dst_offsets[i] = cast_set<uint32_t>(offset);
206
30
        }
207
208
13
        dst_data.pop_back(total_size - offset);
209
210
13
        return Status::OK();
211
13
    }
212
};
213
214
using FunctionToBase64Binary = FunctionStringEncode<ToBase64BinaryImpl, false>;
215
216
struct FromBase64BinaryImpl {
217
    static constexpr auto name = "from_base64_binary";
218
    using ReturnType = DataTypeVarbinary;
219
    using ColumnType = ColumnVarbinary;
220
221
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
222
14
                         ColumnVarbinary* res, NullMap& null_map) {
223
14
        auto rows_count = offsets.size();
224
14
        res->get_data().assign(rows_count, StringView());
225
226
51
        for (size_t i = 0; i < rows_count; i++) {
227
37
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
228
37
            ColumnString::Offset slen = offsets[i] - offsets[i - 1];
229
230
37
            if (UNLIKELY(slen == 0)) {
231
5
                continue;
232
5
            }
233
234
32
            int cipher_len = slen / 4 * 3;
235
32
            auto [cipher_inline, dst] = VarBinaryOP::alloc(res, i, cipher_len);
236
237
32
            auto outlen = doris::base64_decode(source, slen, dst);
238
239
32
            if (outlen < 0) {
240
14
                null_map[i] = 1;
241
18
            } else {
242
18
                VarBinaryOP::check_and_insert_data(res->get_data()[i], dst,
243
18
                                                   cast_set<uint32_t>(outlen), cipher_inline);
244
18
            }
245
32
        }
246
247
14
        return Status::OK();
248
14
    }
249
};
250
251
using FunctionFromBase64Binary = FunctionStringOperateToNullType<FromBase64BinaryImpl>;
252
253
8
void register_function_binary(SimpleFunctionFactory& factory) {
254
8
    factory.register_function<FunctionBinaryLength>();
255
8
    factory.register_function<FunctionToBase64Binary>();
256
8
    factory.register_function<FunctionFromBase64Binary>();
257
8
    factory.register_function<FunctionSubBinary>();
258
8
    factory.register_function<FunctionToBinary>();
259
8
    factory.register_function<FunctionFromBinary>();
260
8
    factory.register_alias("from_binary", "from_hex");
261
8
    factory.register_alias("to_binary", "to_hex");
262
8
}
263
264
#include "common/compile_check_end.h"
265
} // namespace doris