Coverage Report

Created: 2026-05-22 15:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_string_digest.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <cstddef>
19
#include <cstring>
20
#include <string_view>
21
#include <type_traits>
22
#include <vector>
23
24
#include "common/status.h"
25
#include "core/assert_cast.h"
26
#include "core/block/block.h"
27
#include "core/block/column_numbers.h"
28
#include "core/column/column_string.h"
29
#include "core/column/column_varbinary.h"
30
#include "core/column/column_vector.h"
31
#include "core/data_type/data_type_string.h"
32
#include "core/string_ref.h"
33
#include "exec/common/stringop_substring.h"
34
#include "exprs/function/function.h"
35
#include "exprs/function/simple_function_factory.h"
36
#include "exprs/function_context.h"
37
#include "util/md5.h"
38
#include "util/sha.h"
39
#include "util/sm3.h"
40
41
namespace doris {
42
#include "common/compile_check_avoid_begin.h"
43
44
struct SM3Sum {
45
    static constexpr auto name = "sm3sum";
46
    using ObjectData = SM3Digest;
47
};
48
49
struct MD5Sum {
50
    static constexpr auto name = "md5sum";
51
    using ObjectData = Md5Digest;
52
};
53
54
template <typename Impl>
55
class FunctionStringDigestMulti : public IFunction {
56
public:
57
    static constexpr auto name = Impl::name;
58
382
    static FunctionPtr create() { return std::make_shared<FunctionStringDigestMulti>(); }
_ZN5doris25FunctionStringDigestMultiINS_6SM3SumEE6createEv
Line
Count
Source
58
133
    static FunctionPtr create() { return std::make_shared<FunctionStringDigestMulti>(); }
_ZN5doris25FunctionStringDigestMultiINS_6MD5SumEE6createEv
Line
Count
Source
58
249
    static FunctionPtr create() { return std::make_shared<FunctionStringDigestMulti>(); }
59
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE8get_nameB5cxx11Ev
60
0
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE23get_number_of_argumentsEv
61
366
    bool is_variadic() const override { return true; }
_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE11is_variadicEv
Line
Count
Source
61
125
    bool is_variadic() const override { return true; }
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE11is_variadicEv
Line
Count
Source
61
241
    bool is_variadic() const override { return true; }
62
63
364
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
64
364
        return std::make_shared<DataTypeString>();
65
364
    }
_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
63
124
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
64
124
        return std::make_shared<DataTypeString>();
65
124
    }
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
63
240
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
64
240
        return std::make_shared<DataTypeString>();
65
240
    }
66
67
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
68
367
                        uint32_t result, size_t input_rows_count) const override {
69
367
        DCHECK_GE(arguments.size(), 1);
70
71
367
        auto res = ColumnString::create();
72
367
        auto& res_data = res->get_chars();
73
367
        auto& res_offset = res->get_offsets();
74
367
        res_offset.resize(input_rows_count);
75
76
367
        std::vector<ColumnPtr> argument_columns(arguments.size());
77
367
        std::vector<uint8_t> is_const(arguments.size(), 0);
78
1.03k
        for (size_t i = 0; i < arguments.size(); ++i) {
79
665
            std::tie(argument_columns[i], is_const[i]) =
80
665
                    unpack_if_const(block.get_by_position(arguments[i]).column);
81
665
        }
82
83
367
        if (check_and_get_column<ColumnString>(argument_columns[0].get())) {
84
233
            vector_execute<ColumnString>(block, input_rows_count, argument_columns, is_const,
85
233
                                         res_data, res_offset);
86
233
        } else if (check_and_get_column<ColumnVarbinary>(argument_columns[0].get())) {
87
135
            vector_execute<ColumnVarbinary>(block, input_rows_count, argument_columns, is_const,
88
135
                                            res_data, res_offset);
89
18.4E
        } else {
90
18.4E
            return Status::RuntimeError("Illegal column {} of argument of function {}",
91
18.4E
                                        argument_columns[0]->get_name(), get_name());
92
18.4E
        }
93
94
368
        block.replace_by_position(result, std::move(res));
95
368
        return Status::OK();
96
367
    }
_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
68
123
                        uint32_t result, size_t input_rows_count) const override {
69
123
        DCHECK_GE(arguments.size(), 1);
70
71
123
        auto res = ColumnString::create();
72
123
        auto& res_data = res->get_chars();
73
123
        auto& res_offset = res->get_offsets();
74
123
        res_offset.resize(input_rows_count);
75
76
123
        std::vector<ColumnPtr> argument_columns(arguments.size());
77
123
        std::vector<uint8_t> is_const(arguments.size(), 0);
78
364
        for (size_t i = 0; i < arguments.size(); ++i) {
79
241
            std::tie(argument_columns[i], is_const[i]) =
80
241
                    unpack_if_const(block.get_by_position(arguments[i]).column);
81
241
        }
82
83
123
        if (check_and_get_column<ColumnString>(argument_columns[0].get())) {
84
80
            vector_execute<ColumnString>(block, input_rows_count, argument_columns, is_const,
85
80
                                         res_data, res_offset);
86
80
        } else if (check_and_get_column<ColumnVarbinary>(argument_columns[0].get())) {
87
44
            vector_execute<ColumnVarbinary>(block, input_rows_count, argument_columns, is_const,
88
44
                                            res_data, res_offset);
89
18.4E
        } else {
90
18.4E
            return Status::RuntimeError("Illegal column {} of argument of function {}",
91
18.4E
                                        argument_columns[0]->get_name(), get_name());
92
18.4E
        }
93
94
124
        block.replace_by_position(result, std::move(res));
95
124
        return Status::OK();
96
123
    }
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
68
244
                        uint32_t result, size_t input_rows_count) const override {
69
244
        DCHECK_GE(arguments.size(), 1);
70
71
244
        auto res = ColumnString::create();
72
244
        auto& res_data = res->get_chars();
73
244
        auto& res_offset = res->get_offsets();
74
244
        res_offset.resize(input_rows_count);
75
76
244
        std::vector<ColumnPtr> argument_columns(arguments.size());
77
244
        std::vector<uint8_t> is_const(arguments.size(), 0);
78
668
        for (size_t i = 0; i < arguments.size(); ++i) {
79
424
            std::tie(argument_columns[i], is_const[i]) =
80
424
                    unpack_if_const(block.get_by_position(arguments[i]).column);
81
424
        }
82
83
244
        if (check_and_get_column<ColumnString>(argument_columns[0].get())) {
84
153
            vector_execute<ColumnString>(block, input_rows_count, argument_columns, is_const,
85
153
                                         res_data, res_offset);
86
153
        } else if (check_and_get_column<ColumnVarbinary>(argument_columns[0].get())) {
87
91
            vector_execute<ColumnVarbinary>(block, input_rows_count, argument_columns, is_const,
88
91
                                            res_data, res_offset);
89
91
        } else {
90
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
91
0
                                        argument_columns[0]->get_name(), get_name());
92
0
        }
93
94
244
        block.replace_by_position(result, std::move(res));
95
244
        return Status::OK();
96
244
    }
97
98
private:
99
    template <typename ColumnType>
100
    void vector_execute(Block& block, size_t input_rows_count,
101
                        const std::vector<ColumnPtr>& argument_columns,
102
                        const std::vector<uint8_t>& is_const, ColumnString::Chars& res_data,
103
368
                        ColumnString::Offsets& res_offset) const {
104
368
        if constexpr (std::is_same_v<Impl, MD5Sum>) {
105
244
            if (argument_columns.size() == 1) {
106
140
                const auto* col = assert_cast<const ColumnType*>(argument_columns[0].get());
107
140
                vector_execute_single_md5(col, input_rows_count, is_const[0], res_data, res_offset);
108
140
                return;
109
140
            }
110
244
        }
111
112
104
        using ObjectData = typename Impl::ObjectData;
113
755
        for (size_t i = 0; i < input_rows_count; ++i) {
114
387
            ObjectData digest;
115
1.18k
            for (size_t j = 0; j < argument_columns.size(); ++j) {
116
793
                const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get());
117
793
                StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i);
118
793
                if (data_ref.size < 1) {
119
194
                    continue;
120
194
                }
121
599
                digest.update(data_ref.data, data_ref.size);
122
599
            }
123
387
            digest.digest();
124
387
            StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()),
125
387
                                        i, res_data, res_offset);
126
387
        }
127
368
    }
_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE14vector_executeINS_9ColumnStrIjEEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrISA_EESaISD_EERKS8_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSM_IjLm4096ESP_Lm16ELm15EEE
Line
Count
Source
103
80
                        ColumnString::Offsets& res_offset) const {
104
        if constexpr (std::is_same_v<Impl, MD5Sum>) {
105
            if (argument_columns.size() == 1) {
106
                const auto* col = assert_cast<const ColumnType*>(argument_columns[0].get());
107
                vector_execute_single_md5(col, input_rows_count, is_const[0], res_data, res_offset);
108
                return;
109
            }
110
        }
111
112
80
        using ObjectData = typename Impl::ObjectData;
113
257
        for (size_t i = 0; i < input_rows_count; ++i) {
114
177
            ObjectData digest;
115
438
            for (size_t j = 0; j < argument_columns.size(); ++j) {
116
261
                const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get());
117
261
                StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i);
118
261
                if (data_ref.size < 1) {
119
38
                    continue;
120
38
                }
121
223
                digest.update(data_ref.data, data_ref.size);
122
223
            }
123
177
            digest.digest();
124
177
            StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()),
125
177
                                        i, res_data, res_offset);
126
177
        }
127
80
    }
_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE14vector_executeINS_15ColumnVarbinaryEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS9_EESaISC_EERKS7_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSL_IjLm4096ESO_Lm16ELm15EEE
Line
Count
Source
103
44
                        ColumnString::Offsets& res_offset) const {
104
        if constexpr (std::is_same_v<Impl, MD5Sum>) {
105
            if (argument_columns.size() == 1) {
106
                const auto* col = assert_cast<const ColumnType*>(argument_columns[0].get());
107
                vector_execute_single_md5(col, input_rows_count, is_const[0], res_data, res_offset);
108
                return;
109
            }
110
        }
111
112
44
        using ObjectData = typename Impl::ObjectData;
113
117
        for (size_t i = 0; i < input_rows_count; ++i) {
114
73
            ObjectData digest;
115
229
            for (size_t j = 0; j < argument_columns.size(); ++j) {
116
156
                const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get());
117
156
                StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i);
118
156
                if (data_ref.size < 1) {
119
38
                    continue;
120
38
                }
121
118
                digest.update(data_ref.data, data_ref.size);
122
118
            }
123
73
            digest.digest();
124
73
            StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()),
125
73
                                        i, res_data, res_offset);
126
73
        }
127
44
    }
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE14vector_executeINS_9ColumnStrIjEEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrISA_EESaISD_EERKS8_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSM_IjLm4096ESP_Lm16ELm15EEE
Line
Count
Source
103
153
                        ColumnString::Offsets& res_offset) const {
104
153
        if constexpr (std::is_same_v<Impl, MD5Sum>) {
105
153
            if (argument_columns.size() == 1) {
106
104
                const auto* col = assert_cast<const ColumnType*>(argument_columns[0].get());
107
104
                vector_execute_single_md5(col, input_rows_count, is_const[0], res_data, res_offset);
108
104
                return;
109
104
            }
110
153
        }
111
112
49
        using ObjectData = typename Impl::ObjectData;
113
218
        for (size_t i = 0; i < input_rows_count; ++i) {
114
65
            ObjectData digest;
115
246
            for (size_t j = 0; j < argument_columns.size(); ++j) {
116
181
                const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get());
117
181
                StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i);
118
181
                if (data_ref.size < 1) {
119
57
                    continue;
120
57
                }
121
124
                digest.update(data_ref.data, data_ref.size);
122
124
            }
123
65
            digest.digest();
124
65
            StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()),
125
65
                                        i, res_data, res_offset);
126
65
        }
127
153
    }
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE14vector_executeINS_15ColumnVarbinaryEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS9_EESaISC_EERKS7_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSL_IjLm4096ESO_Lm16ELm15EEE
Line
Count
Source
103
91
                        ColumnString::Offsets& res_offset) const {
104
91
        if constexpr (std::is_same_v<Impl, MD5Sum>) {
105
91
            if (argument_columns.size() == 1) {
106
36
                const auto* col = assert_cast<const ColumnType*>(argument_columns[0].get());
107
36
                vector_execute_single_md5(col, input_rows_count, is_const[0], res_data, res_offset);
108
36
                return;
109
36
            }
110
91
        }
111
112
55
        using ObjectData = typename Impl::ObjectData;
113
163
        for (size_t i = 0; i < input_rows_count; ++i) {
114
72
            ObjectData digest;
115
267
            for (size_t j = 0; j < argument_columns.size(); ++j) {
116
195
                const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get());
117
195
                StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i);
118
195
                if (data_ref.size < 1) {
119
61
                    continue;
120
61
                }
121
134
                digest.update(data_ref.data, data_ref.size);
122
134
            }
123
72
            digest.digest();
124
72
            StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()),
125
72
                                        i, res_data, res_offset);
126
72
        }
127
91
    }
128
129
    template <typename ColumnType>
130
    void vector_execute_single_md5(const ColumnType* col, size_t input_rows_count, bool is_const,
131
                                   ColumnString::Chars& res_data,
132
140
                                   ColumnString::Offsets& res_offset) const {
133
140
        ColumnString::check_chars_length(input_rows_count * MD5_HEX_LENGTH, input_rows_count);
134
140
        res_data.resize(input_rows_count * MD5_HEX_LENGTH);
135
442
        for (size_t i = 0; i < input_rows_count; ++i) {
136
302
            res_offset[i] = (i + 1) * MD5_HEX_LENGTH;
137
302
        }
138
140
        if (input_rows_count == 0) {
139
0
            return;
140
0
        }
141
142
140
        if (is_const) {
143
0
            StringRef data_ref = col->get_data_at(0);
144
0
            const unsigned char* input = reinterpret_cast<const unsigned char*>(data_ref.data);
145
0
            size_t length = data_ref.size;
146
0
            char digest[MD5_HEX_LENGTH];
147
0
            md5_hex_batch(&input, &length, digest, 1);
148
0
            for (size_t i = 0; i < input_rows_count; ++i) {
149
0
                std::memcpy(res_data.data() + i * MD5_HEX_LENGTH, digest, MD5_HEX_LENGTH);
150
0
            }
151
0
            return;
152
0
        }
153
154
140
        std::vector<const unsigned char*> inputs(input_rows_count);
155
140
        std::vector<size_t> lengths(input_rows_count);
156
442
        for (size_t i = 0; i < input_rows_count; ++i) {
157
302
            StringRef data_ref = col->get_data_at(i);
158
302
            inputs[i] = reinterpret_cast<const unsigned char*>(data_ref.data);
159
302
            lengths[i] = data_ref.size;
160
302
        }
161
140
        md5_hex_batch(inputs.data(), lengths.data(), reinterpret_cast<char*>(res_data.data()),
162
140
                      input_rows_count);
163
140
    }
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE25vector_execute_single_md5INS_9ColumnStrIjEEEEvPKT_mbRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS9_IjLm4096ESC_Lm16ELm15EEE
Line
Count
Source
132
104
                                   ColumnString::Offsets& res_offset) const {
133
104
        ColumnString::check_chars_length(input_rows_count * MD5_HEX_LENGTH, input_rows_count);
134
104
        res_data.resize(input_rows_count * MD5_HEX_LENGTH);
135
333
        for (size_t i = 0; i < input_rows_count; ++i) {
136
229
            res_offset[i] = (i + 1) * MD5_HEX_LENGTH;
137
229
        }
138
104
        if (input_rows_count == 0) {
139
0
            return;
140
0
        }
141
142
104
        if (is_const) {
143
0
            StringRef data_ref = col->get_data_at(0);
144
0
            const unsigned char* input = reinterpret_cast<const unsigned char*>(data_ref.data);
145
0
            size_t length = data_ref.size;
146
0
            char digest[MD5_HEX_LENGTH];
147
0
            md5_hex_batch(&input, &length, digest, 1);
148
0
            for (size_t i = 0; i < input_rows_count; ++i) {
149
0
                std::memcpy(res_data.data() + i * MD5_HEX_LENGTH, digest, MD5_HEX_LENGTH);
150
0
            }
151
0
            return;
152
0
        }
153
154
104
        std::vector<const unsigned char*> inputs(input_rows_count);
155
104
        std::vector<size_t> lengths(input_rows_count);
156
333
        for (size_t i = 0; i < input_rows_count; ++i) {
157
229
            StringRef data_ref = col->get_data_at(i);
158
229
            inputs[i] = reinterpret_cast<const unsigned char*>(data_ref.data);
159
229
            lengths[i] = data_ref.size;
160
229
        }
161
104
        md5_hex_batch(inputs.data(), lengths.data(), reinterpret_cast<char*>(res_data.data()),
162
104
                      input_rows_count);
163
104
    }
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE25vector_execute_single_md5INS_15ColumnVarbinaryEEEvPKT_mbRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Line
Count
Source
132
36
                                   ColumnString::Offsets& res_offset) const {
133
36
        ColumnString::check_chars_length(input_rows_count * MD5_HEX_LENGTH, input_rows_count);
134
36
        res_data.resize(input_rows_count * MD5_HEX_LENGTH);
135
109
        for (size_t i = 0; i < input_rows_count; ++i) {
136
73
            res_offset[i] = (i + 1) * MD5_HEX_LENGTH;
137
73
        }
138
36
        if (input_rows_count == 0) {
139
0
            return;
140
0
        }
141
142
36
        if (is_const) {
143
0
            StringRef data_ref = col->get_data_at(0);
144
0
            const unsigned char* input = reinterpret_cast<const unsigned char*>(data_ref.data);
145
0
            size_t length = data_ref.size;
146
0
            char digest[MD5_HEX_LENGTH];
147
0
            md5_hex_batch(&input, &length, digest, 1);
148
0
            for (size_t i = 0; i < input_rows_count; ++i) {
149
0
                std::memcpy(res_data.data() + i * MD5_HEX_LENGTH, digest, MD5_HEX_LENGTH);
150
0
            }
151
0
            return;
152
0
        }
153
154
36
        std::vector<const unsigned char*> inputs(input_rows_count);
155
36
        std::vector<size_t> lengths(input_rows_count);
156
109
        for (size_t i = 0; i < input_rows_count; ++i) {
157
73
            StringRef data_ref = col->get_data_at(i);
158
73
            inputs[i] = reinterpret_cast<const unsigned char*>(data_ref.data);
159
73
            lengths[i] = data_ref.size;
160
73
        }
161
36
        md5_hex_batch(inputs.data(), lengths.data(), reinterpret_cast<char*>(res_data.data()),
162
36
                      input_rows_count);
163
36
    }
164
};
165
166
class FunctionStringDigestSHA1 : public IFunction {
167
public:
168
    static constexpr auto name = "sha1";
169
28
    static FunctionPtr create() { return std::make_shared<FunctionStringDigestSHA1>(); }
170
0
    String get_name() const override { return name; }
171
0
    size_t get_number_of_arguments() const override { return 1; }
172
20
    bool is_variadic() const override { return true; }
173
174
19
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
175
19
        return std::make_shared<DataTypeString>();
176
19
    }
177
178
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
179
18
                        uint32_t result, size_t input_rows_count) const override {
180
18
        DCHECK_EQ(arguments.size(), 1);
181
18
        ColumnPtr data_col = block.get_by_position(arguments[0]).column;
182
183
18
        auto res_col = ColumnString::create();
184
18
        auto& res_data = res_col->get_chars();
185
18
        auto& res_offset = res_col->get_offsets();
186
18
        res_offset.resize(input_rows_count);
187
18
        if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) {
188
11
            vector_execute(str_col, input_rows_count, res_data, res_offset);
189
11
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) {
190
7
            vector_execute(vb_col, input_rows_count, res_data, res_offset);
191
7
        } else {
192
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
193
0
                                        data_col->get_name(), get_name());
194
0
        }
195
196
18
        block.replace_by_position(result, std::move(res_col));
197
18
        return Status::OK();
198
18
    }
199
200
private:
201
    template <typename ColumnType>
202
    void vector_execute(const ColumnType* col, size_t input_rows_count,
203
18
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
204
18
        SHA1Digest digest;
205
61
        for (size_t i = 0; i < input_rows_count; ++i) {
206
43
            StringRef data_ref = col->get_data_at(i);
207
43
            digest.reset(data_ref.data, data_ref.size);
208
43
            std::string_view ans = digest.digest();
209
210
43
            StringOP::push_value_string(ans, i, res_data, res_offset);
211
43
        }
212
18
    }
_ZNK5doris24FunctionStringDigestSHA114vector_executeINS_9ColumnStrIjEEEEvPKT_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE
Line
Count
Source
203
11
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
204
11
        SHA1Digest digest;
205
35
        for (size_t i = 0; i < input_rows_count; ++i) {
206
24
            StringRef data_ref = col->get_data_at(i);
207
24
            digest.reset(data_ref.data, data_ref.size);
208
24
            std::string_view ans = digest.digest();
209
210
24
            StringOP::push_value_string(ans, i, res_data, res_offset);
211
24
        }
212
11
    }
_ZNK5doris24FunctionStringDigestSHA114vector_executeINS_15ColumnVarbinaryEEEvPKT_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS6_IjLm4096ES9_Lm16ELm15EEE
Line
Count
Source
203
7
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
204
7
        SHA1Digest digest;
205
26
        for (size_t i = 0; i < input_rows_count; ++i) {
206
19
            StringRef data_ref = col->get_data_at(i);
207
19
            digest.reset(data_ref.data, data_ref.size);
208
19
            std::string_view ans = digest.digest();
209
210
19
            StringOP::push_value_string(ans, i, res_data, res_offset);
211
19
        }
212
7
    }
213
};
214
215
class FunctionStringDigestSHA2 : public IFunction {
216
public:
217
    static constexpr auto name = "sha2";
218
30
    static FunctionPtr create() { return std::make_shared<FunctionStringDigestSHA2>(); }
219
0
    String get_name() const override { return name; }
220
0
    size_t get_number_of_arguments() const override { return 2; }
221
22
    bool is_variadic() const override { return true; }
222
223
21
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
224
21
        return std::make_shared<DataTypeString>();
225
21
    }
226
227
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
228
22
                        uint32_t result, size_t input_rows_count) const override {
229
22
        DCHECK(!is_column_const(*block.get_by_position(arguments[0]).column));
230
231
22
        ColumnPtr data_col = block.get_by_position(arguments[0]).column;
232
233
22
        [[maybe_unused]] const auto& [right_column, right_const] =
234
22
                unpack_if_const(block.get_by_position(arguments[1]).column);
235
22
        auto digest_length = assert_cast<const ColumnInt32*>(right_column.get())->get_data()[0];
236
237
22
        auto res_col = ColumnString::create();
238
22
        auto& res_data = res_col->get_chars();
239
22
        auto& res_offset = res_col->get_offsets();
240
22
        res_offset.resize(input_rows_count);
241
242
22
        if (digest_length == 224) {
243
5
            execute_base<SHA224Digest>(data_col, input_rows_count, res_data, res_offset);
244
17
        } else if (digest_length == 256) {
245
6
            execute_base<SHA256Digest>(data_col, input_rows_count, res_data, res_offset);
246
11
        } else if (digest_length == 384) {
247
5
            execute_base<SHA384Digest>(data_col, input_rows_count, res_data, res_offset);
248
6
        } else if (digest_length == 512) {
249
6
            execute_base<SHA512Digest>(data_col, input_rows_count, res_data, res_offset);
250
6
        } else {
251
0
            return Status::InvalidArgument(
252
0
                    "sha2's digest length only support 224/256/384/512 but meet {}", digest_length);
253
0
        }
254
255
22
        block.replace_by_position(result, std::move(res_col));
256
22
        return Status::OK();
257
22
    }
258
259
private:
260
    template <typename T>
261
    void execute_base(ColumnPtr data_col, int input_rows_count, ColumnString::Chars& res_data,
262
22
                      ColumnString::Offsets& res_offset) const {
263
22
        if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) {
264
22
            vector_execute<T>(str_col, input_rows_count, res_data, res_offset);
265
22
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) {
266
0
            vector_execute<T>(vb_col, input_rows_count, res_data, res_offset);
267
0
        } else {
268
0
            throw Exception(ErrorCode::RUNTIME_ERROR,
269
0
                            "Illegal column {} of argument of function {}", data_col->get_name(),
270
0
                            get_name());
271
0
        }
272
22
    }
_ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA224DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Line
Count
Source
262
5
                      ColumnString::Offsets& res_offset) const {
263
5
        if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) {
264
5
            vector_execute<T>(str_col, input_rows_count, res_data, res_offset);
265
5
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) {
266
0
            vector_execute<T>(vb_col, input_rows_count, res_data, res_offset);
267
0
        } else {
268
0
            throw Exception(ErrorCode::RUNTIME_ERROR,
269
0
                            "Illegal column {} of argument of function {}", data_col->get_name(),
270
0
                            get_name());
271
0
        }
272
5
    }
_ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA256DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Line
Count
Source
262
6
                      ColumnString::Offsets& res_offset) const {
263
6
        if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) {
264
6
            vector_execute<T>(str_col, input_rows_count, res_data, res_offset);
265
6
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) {
266
0
            vector_execute<T>(vb_col, input_rows_count, res_data, res_offset);
267
0
        } else {
268
0
            throw Exception(ErrorCode::RUNTIME_ERROR,
269
0
                            "Illegal column {} of argument of function {}", data_col->get_name(),
270
0
                            get_name());
271
0
        }
272
6
    }
_ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA384DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Line
Count
Source
262
5
                      ColumnString::Offsets& res_offset) const {
263
5
        if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) {
264
5
            vector_execute<T>(str_col, input_rows_count, res_data, res_offset);
265
5
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) {
266
0
            vector_execute<T>(vb_col, input_rows_count, res_data, res_offset);
267
0
        } else {
268
0
            throw Exception(ErrorCode::RUNTIME_ERROR,
269
0
                            "Illegal column {} of argument of function {}", data_col->get_name(),
270
0
                            get_name());
271
0
        }
272
5
    }
_ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA512DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Line
Count
Source
262
6
                      ColumnString::Offsets& res_offset) const {
263
6
        if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) {
264
6
            vector_execute<T>(str_col, input_rows_count, res_data, res_offset);
265
6
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) {
266
0
            vector_execute<T>(vb_col, input_rows_count, res_data, res_offset);
267
0
        } else {
268
0
            throw Exception(ErrorCode::RUNTIME_ERROR,
269
0
                            "Illegal column {} of argument of function {}", data_col->get_name(),
270
0
                            get_name());
271
0
        }
272
6
    }
273
274
    template <typename DigestType, typename ColumnType>
275
    void vector_execute(const ColumnType* col, size_t input_rows_count,
276
22
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
277
22
        DigestType digest;
278
110
        for (size_t i = 0; i < input_rows_count; ++i) {
279
88
            StringRef data_ref = col->get_data_at(i);
280
88
            digest.reset(data_ref.data, data_ref.size);
281
88
            std::string_view ans = digest.digest();
282
283
88
            StringOP::push_value_string(ans, i, res_data, res_offset);
284
88
        }
285
22
    }
_ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA224DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Line
Count
Source
276
5
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
277
5
        DigestType digest;
278
26
        for (size_t i = 0; i < input_rows_count; ++i) {
279
21
            StringRef data_ref = col->get_data_at(i);
280
21
            digest.reset(data_ref.data, data_ref.size);
281
21
            std::string_view ans = digest.digest();
282
283
21
            StringOP::push_value_string(ans, i, res_data, res_offset);
284
21
        }
285
5
    }
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA224DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE
_ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA256DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Line
Count
Source
276
6
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
277
6
        DigestType digest;
278
29
        for (size_t i = 0; i < input_rows_count; ++i) {
279
23
            StringRef data_ref = col->get_data_at(i);
280
23
            digest.reset(data_ref.data, data_ref.size);
281
23
            std::string_view ans = digest.digest();
282
283
23
            StringOP::push_value_string(ans, i, res_data, res_offset);
284
23
        }
285
6
    }
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA256DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE
_ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA384DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Line
Count
Source
276
5
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
277
5
        DigestType digest;
278
26
        for (size_t i = 0; i < input_rows_count; ++i) {
279
21
            StringRef data_ref = col->get_data_at(i);
280
21
            digest.reset(data_ref.data, data_ref.size);
281
21
            std::string_view ans = digest.digest();
282
283
21
            StringOP::push_value_string(ans, i, res_data, res_offset);
284
21
        }
285
5
    }
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA384DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE
_ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA512DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Line
Count
Source
276
6
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
277
6
        DigestType digest;
278
29
        for (size_t i = 0; i < input_rows_count; ++i) {
279
23
            StringRef data_ref = col->get_data_at(i);
280
23
            digest.reset(data_ref.data, data_ref.size);
281
23
            std::string_view ans = digest.digest();
282
283
23
            StringOP::push_value_string(ans, i, res_data, res_offset);
284
23
        }
285
6
    }
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA512DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE
286
};
287
288
8
void register_function_string_digest(SimpleFunctionFactory& factory) {
289
8
    factory.register_function<FunctionStringDigestMulti<SM3Sum>>();
290
8
    factory.register_function<FunctionStringDigestMulti<MD5Sum>>();
291
8
    factory.register_function<FunctionStringDigestSHA1>();
292
8
    factory.register_function<FunctionStringDigestSHA2>();
293
294
8
    factory.register_alias(FunctionStringDigestMulti<MD5Sum>::name, "md5");
295
8
    factory.register_alias(FunctionStringDigestMulti<SM3Sum>::name, "sm3");
296
8
    factory.register_alias(FunctionStringDigestSHA1::name, "sha");
297
8
}
298
299
#include "common/compile_check_avoid_end.h"
300
} // namespace doris