be/src/exprs/function/function_string_digest.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <cstddef> |
19 | | #include <cstring> |
20 | | #include <string_view> |
21 | | #include <type_traits> |
22 | | #include <vector> |
23 | | |
24 | | #include "common/status.h" |
25 | | #include "core/assert_cast.h" |
26 | | #include "core/block/block.h" |
27 | | #include "core/block/column_numbers.h" |
28 | | #include "core/column/column_string.h" |
29 | | #include "core/column/column_varbinary.h" |
30 | | #include "core/column/column_vector.h" |
31 | | #include "core/data_type/data_type_string.h" |
32 | | #include "core/string_ref.h" |
33 | | #include "exec/common/stringop_substring.h" |
34 | | #include "exprs/function/function.h" |
35 | | #include "exprs/function/simple_function_factory.h" |
36 | | #include "exprs/function_context.h" |
37 | | #include "util/md5.h" |
38 | | #include "util/sha.h" |
39 | | #include "util/sm3.h" |
40 | | |
41 | | namespace doris { |
42 | | #include "common/compile_check_avoid_begin.h" |
43 | | |
44 | | struct SM3Sum { |
45 | | static constexpr auto name = "sm3sum"; |
46 | | using ObjectData = SM3Digest; |
47 | | }; |
48 | | |
49 | | struct MD5Sum { |
50 | | static constexpr auto name = "md5sum"; |
51 | | using ObjectData = Md5Digest; |
52 | | }; |
53 | | |
54 | | template <typename Impl> |
55 | | class FunctionStringDigestMulti : public IFunction { |
56 | | public: |
57 | | static constexpr auto name = Impl::name; |
58 | 367 | static FunctionPtr create() { return std::make_shared<FunctionStringDigestMulti>(); }_ZN5doris25FunctionStringDigestMultiINS_6SM3SumEE6createEv Line | Count | Source | 58 | 125 | static FunctionPtr create() { return std::make_shared<FunctionStringDigestMulti>(); } |
_ZN5doris25FunctionStringDigestMultiINS_6MD5SumEE6createEv Line | Count | Source | 58 | 242 | static FunctionPtr create() { return std::make_shared<FunctionStringDigestMulti>(); } |
|
59 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE8get_nameB5cxx11Ev |
60 | 0 | size_t get_number_of_arguments() const override { return 0; }Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE23get_number_of_argumentsEv |
61 | 351 | bool is_variadic() const override { return true; }_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE11is_variadicEv Line | Count | Source | 61 | 117 | bool is_variadic() const override { return true; } |
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE11is_variadicEv Line | Count | Source | 61 | 234 | bool is_variadic() const override { return true; } |
|
62 | | |
63 | 349 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
64 | 349 | return std::make_shared<DataTypeString>(); |
65 | 349 | } _ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 63 | 116 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 64 | 116 | return std::make_shared<DataTypeString>(); | 65 | 116 | } |
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 63 | 233 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 64 | 233 | return std::make_shared<DataTypeString>(); | 65 | 233 | } |
|
66 | | |
67 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
68 | 353 | uint32_t result, size_t input_rows_count) const override { |
69 | 353 | DCHECK_GE(arguments.size(), 1); |
70 | | |
71 | 353 | auto res = ColumnString::create(); |
72 | 353 | auto& res_data = res->get_chars(); |
73 | 353 | auto& res_offset = res->get_offsets(); |
74 | 353 | res_offset.resize(input_rows_count); |
75 | | |
76 | 353 | std::vector<ColumnPtr> argument_columns(arguments.size()); |
77 | 353 | std::vector<uint8_t> is_const(arguments.size(), 0); |
78 | 988 | for (size_t i = 0; i < arguments.size(); ++i) { |
79 | 635 | std::tie(argument_columns[i], is_const[i]) = |
80 | 635 | unpack_if_const(block.get_by_position(arguments[i]).column); |
81 | 635 | } |
82 | | |
83 | 353 | if (check_and_get_column<ColumnString>(argument_columns[0].get())) { |
84 | 227 | vector_execute<ColumnString>(block, input_rows_count, argument_columns, is_const, |
85 | 227 | res_data, res_offset); |
86 | 227 | } else if (check_and_get_column<ColumnVarbinary>(argument_columns[0].get())) { |
87 | 127 | vector_execute<ColumnVarbinary>(block, input_rows_count, argument_columns, is_const, |
88 | 127 | res_data, res_offset); |
89 | 18.4E | } else { |
90 | 18.4E | return Status::RuntimeError("Illegal column {} of argument of function {}", |
91 | 18.4E | argument_columns[0]->get_name(), get_name()); |
92 | 18.4E | } |
93 | | |
94 | 354 | block.replace_by_position(result, std::move(res)); |
95 | 354 | return Status::OK(); |
96 | 353 | } _ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 68 | 116 | uint32_t result, size_t input_rows_count) const override { | 69 | 116 | DCHECK_GE(arguments.size(), 1); | 70 | | | 71 | 116 | auto res = ColumnString::create(); | 72 | 116 | auto& res_data = res->get_chars(); | 73 | 116 | auto& res_offset = res->get_offsets(); | 74 | 116 | res_offset.resize(input_rows_count); | 75 | | | 76 | 116 | std::vector<ColumnPtr> argument_columns(arguments.size()); | 77 | 116 | std::vector<uint8_t> is_const(arguments.size(), 0); | 78 | 341 | for (size_t i = 0; i < arguments.size(); ++i) { | 79 | 225 | std::tie(argument_columns[i], is_const[i]) = | 80 | 225 | unpack_if_const(block.get_by_position(arguments[i]).column); | 81 | 225 | } | 82 | | | 83 | 116 | if (check_and_get_column<ColumnString>(argument_columns[0].get())) { | 84 | 76 | vector_execute<ColumnString>(block, input_rows_count, argument_columns, is_const, | 85 | 76 | res_data, res_offset); | 86 | 76 | } else if (check_and_get_column<ColumnVarbinary>(argument_columns[0].get())) { | 87 | 40 | vector_execute<ColumnVarbinary>(block, input_rows_count, argument_columns, is_const, | 88 | 40 | res_data, res_offset); | 89 | 40 | } else { | 90 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", | 91 | 0 | argument_columns[0]->get_name(), get_name()); | 92 | 0 | } | 93 | | | 94 | 116 | block.replace_by_position(result, std::move(res)); | 95 | 116 | return Status::OK(); | 96 | 116 | } |
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 68 | 237 | uint32_t result, size_t input_rows_count) const override { | 69 | 237 | DCHECK_GE(arguments.size(), 1); | 70 | | | 71 | 237 | auto res = ColumnString::create(); | 72 | 237 | auto& res_data = res->get_chars(); | 73 | 237 | auto& res_offset = res->get_offsets(); | 74 | 237 | res_offset.resize(input_rows_count); | 75 | | | 76 | 237 | std::vector<ColumnPtr> argument_columns(arguments.size()); | 77 | 237 | std::vector<uint8_t> is_const(arguments.size(), 0); | 78 | 647 | for (size_t i = 0; i < arguments.size(); ++i) { | 79 | 410 | std::tie(argument_columns[i], is_const[i]) = | 80 | 410 | unpack_if_const(block.get_by_position(arguments[i]).column); | 81 | 410 | } | 82 | | | 83 | 237 | if (check_and_get_column<ColumnString>(argument_columns[0].get())) { | 84 | 151 | vector_execute<ColumnString>(block, input_rows_count, argument_columns, is_const, | 85 | 151 | res_data, res_offset); | 86 | 151 | } else if (check_and_get_column<ColumnVarbinary>(argument_columns[0].get())) { | 87 | 87 | vector_execute<ColumnVarbinary>(block, input_rows_count, argument_columns, is_const, | 88 | 87 | res_data, res_offset); | 89 | 18.4E | } else { | 90 | 18.4E | return Status::RuntimeError("Illegal column {} of argument of function {}", | 91 | 18.4E | argument_columns[0]->get_name(), get_name()); | 92 | 18.4E | } | 93 | | | 94 | 238 | block.replace_by_position(result, std::move(res)); | 95 | 238 | return Status::OK(); | 96 | 237 | } |
|
97 | | |
98 | | private: |
99 | | template <typename ColumnType> |
100 | | void vector_execute(Block& block, size_t input_rows_count, |
101 | | const std::vector<ColumnPtr>& argument_columns, |
102 | | const std::vector<uint8_t>& is_const, ColumnString::Chars& res_data, |
103 | 354 | ColumnString::Offsets& res_offset) const { |
104 | 354 | if constexpr (std::is_same_v<Impl, MD5Sum>) { |
105 | 238 | if (argument_columns.size() == 1) { |
106 | 138 | const auto* col = assert_cast<const ColumnType*>(argument_columns[0].get()); |
107 | 138 | vector_execute_single_md5(col, input_rows_count, is_const[0], res_data, res_offset); |
108 | 138 | return; |
109 | 138 | } |
110 | 238 | } |
111 | | |
112 | 100 | using ObjectData = typename Impl::ObjectData; |
113 | 681 | for (size_t i = 0; i < input_rows_count; ++i) { |
114 | 327 | ObjectData digest; |
115 | 980 | for (size_t j = 0; j < argument_columns.size(); ++j) { |
116 | 653 | const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get()); |
117 | 653 | StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i); |
118 | 653 | if (data_ref.size < 1) { |
119 | 166 | continue; |
120 | 166 | } |
121 | 487 | digest.update(data_ref.data, data_ref.size); |
122 | 487 | } |
123 | 327 | digest.digest(); |
124 | 327 | StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()), |
125 | 327 | i, res_data, res_offset); |
126 | 327 | } |
127 | 354 | } _ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE14vector_executeINS_9ColumnStrIjEEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrISA_EESaISD_EERKS8_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSM_IjLm4096ESP_Lm16ELm15EEE Line | Count | Source | 103 | 76 | ColumnString::Offsets& res_offset) const { | 104 | | if constexpr (std::is_same_v<Impl, MD5Sum>) { | 105 | | if (argument_columns.size() == 1) { | 106 | | const auto* col = assert_cast<const ColumnType*>(argument_columns[0].get()); | 107 | | vector_execute_single_md5(col, input_rows_count, is_const[0], res_data, res_offset); | 108 | | return; | 109 | | } | 110 | | } | 111 | | | 112 | 76 | using ObjectData = typename Impl::ObjectData; | 113 | 233 | for (size_t i = 0; i < input_rows_count; ++i) { | 114 | 157 | ObjectData digest; | 115 | 378 | for (size_t j = 0; j < argument_columns.size(); ++j) { | 116 | 221 | const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get()); | 117 | 221 | StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i); | 118 | 221 | if (data_ref.size < 1) { | 119 | 30 | continue; | 120 | 30 | } | 121 | 191 | digest.update(data_ref.data, data_ref.size); | 122 | 191 | } | 123 | 157 | digest.digest(); | 124 | 157 | StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()), | 125 | 157 | i, res_data, res_offset); | 126 | 157 | } | 127 | 76 | } |
_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE14vector_executeINS_15ColumnVarbinaryEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS9_EESaISC_EERKS7_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSL_IjLm4096ESO_Lm16ELm15EEE Line | Count | Source | 103 | 40 | ColumnString::Offsets& res_offset) const { | 104 | | if constexpr (std::is_same_v<Impl, MD5Sum>) { | 105 | | if (argument_columns.size() == 1) { | 106 | | const auto* col = assert_cast<const ColumnType*>(argument_columns[0].get()); | 107 | | vector_execute_single_md5(col, input_rows_count, is_const[0], res_data, res_offset); | 108 | | return; | 109 | | } | 110 | | } | 111 | | | 112 | 40 | using ObjectData = typename Impl::ObjectData; | 113 | 93 | for (size_t i = 0; i < input_rows_count; ++i) { | 114 | 53 | ObjectData digest; | 115 | 169 | for (size_t j = 0; j < argument_columns.size(); ++j) { | 116 | 116 | const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get()); | 117 | 116 | StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i); | 118 | 116 | if (data_ref.size < 1) { | 119 | 30 | continue; | 120 | 30 | } | 121 | 86 | digest.update(data_ref.data, data_ref.size); | 122 | 86 | } | 123 | 53 | digest.digest(); | 124 | 53 | StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()), | 125 | 53 | i, res_data, res_offset); | 126 | 53 | } | 127 | 40 | } |
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE14vector_executeINS_9ColumnStrIjEEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrISA_EESaISD_EERKS8_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSM_IjLm4096ESP_Lm16ELm15EEE Line | Count | Source | 103 | 151 | ColumnString::Offsets& res_offset) const { | 104 | 151 | if constexpr (std::is_same_v<Impl, MD5Sum>) { | 105 | 151 | if (argument_columns.size() == 1) { | 106 | 104 | const auto* col = assert_cast<const ColumnType*>(argument_columns[0].get()); | 107 | 104 | vector_execute_single_md5(col, input_rows_count, is_const[0], res_data, res_offset); | 108 | 104 | return; | 109 | 104 | } | 110 | 151 | } | 111 | | | 112 | 47 | using ObjectData = typename Impl::ObjectData; | 113 | 206 | for (size_t i = 0; i < input_rows_count; ++i) { | 114 | 55 | ObjectData digest; | 115 | 206 | for (size_t j = 0; j < argument_columns.size(); ++j) { | 116 | 151 | const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get()); | 117 | 151 | StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i); | 118 | 151 | if (data_ref.size < 1) { | 119 | 51 | continue; | 120 | 51 | } | 121 | 100 | digest.update(data_ref.data, data_ref.size); | 122 | 100 | } | 123 | 55 | digest.digest(); | 124 | 55 | StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()), | 125 | 55 | i, res_data, res_offset); | 126 | 55 | } | 127 | 151 | } |
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE14vector_executeINS_15ColumnVarbinaryEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS9_EESaISC_EERKS7_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSL_IjLm4096ESO_Lm16ELm15EEE Line | Count | Source | 103 | 87 | ColumnString::Offsets& res_offset) const { | 104 | 87 | if constexpr (std::is_same_v<Impl, MD5Sum>) { | 105 | 87 | if (argument_columns.size() == 1) { | 106 | 34 | const auto* col = assert_cast<const ColumnType*>(argument_columns[0].get()); | 107 | 34 | vector_execute_single_md5(col, input_rows_count, is_const[0], res_data, res_offset); | 108 | 34 | return; | 109 | 34 | } | 110 | 87 | } | 111 | | | 112 | 53 | using ObjectData = typename Impl::ObjectData; | 113 | 149 | for (size_t i = 0; i < input_rows_count; ++i) { | 114 | 62 | ObjectData digest; | 115 | 227 | for (size_t j = 0; j < argument_columns.size(); ++j) { | 116 | 165 | const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get()); | 117 | 165 | StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i); | 118 | 165 | if (data_ref.size < 1) { | 119 | 55 | continue; | 120 | 55 | } | 121 | 110 | digest.update(data_ref.data, data_ref.size); | 122 | 110 | } | 123 | 62 | digest.digest(); | 124 | 62 | StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()), | 125 | 62 | i, res_data, res_offset); | 126 | 62 | } | 127 | 87 | } |
|
128 | | |
129 | | template <typename ColumnType> |
130 | | void vector_execute_single_md5(const ColumnType* col, size_t input_rows_count, bool is_const, |
131 | | ColumnString::Chars& res_data, |
132 | 138 | ColumnString::Offsets& res_offset) const { |
133 | 138 | ColumnString::check_chars_length(input_rows_count * MD5_HEX_LENGTH, input_rows_count); |
134 | 138 | res_data.resize(input_rows_count * MD5_HEX_LENGTH); |
135 | 423 | for (size_t i = 0; i < input_rows_count; ++i) { |
136 | 285 | res_offset[i] = (i + 1) * MD5_HEX_LENGTH; |
137 | 285 | } |
138 | 138 | if (input_rows_count == 0) { |
139 | 0 | return; |
140 | 0 | } |
141 | | |
142 | 138 | if (is_const) { |
143 | 0 | StringRef data_ref = col->get_data_at(0); |
144 | 0 | const unsigned char* input = reinterpret_cast<const unsigned char*>(data_ref.data); |
145 | 0 | size_t length = data_ref.size; |
146 | 0 | char digest[MD5_HEX_LENGTH]; |
147 | 0 | md5_hex_batch(&input, &length, digest, 1); |
148 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
149 | 0 | std::memcpy(res_data.data() + i * MD5_HEX_LENGTH, digest, MD5_HEX_LENGTH); |
150 | 0 | } |
151 | 0 | return; |
152 | 0 | } |
153 | | |
154 | 138 | std::vector<const unsigned char*> inputs(input_rows_count); |
155 | 138 | std::vector<size_t> lengths(input_rows_count); |
156 | 423 | for (size_t i = 0; i < input_rows_count; ++i) { |
157 | 285 | StringRef data_ref = col->get_data_at(i); |
158 | 285 | inputs[i] = reinterpret_cast<const unsigned char*>(data_ref.data); |
159 | 285 | lengths[i] = data_ref.size; |
160 | 285 | } |
161 | 138 | md5_hex_batch(inputs.data(), lengths.data(), reinterpret_cast<char*>(res_data.data()), |
162 | 138 | input_rows_count); |
163 | 138 | } _ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE25vector_execute_single_md5INS_9ColumnStrIjEEEEvPKT_mbRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS9_IjLm4096ESC_Lm16ELm15EEE Line | Count | Source | 132 | 104 | ColumnString::Offsets& res_offset) const { | 133 | 104 | ColumnString::check_chars_length(input_rows_count * MD5_HEX_LENGTH, input_rows_count); | 134 | 104 | res_data.resize(input_rows_count * MD5_HEX_LENGTH); | 135 | 326 | for (size_t i = 0; i < input_rows_count; ++i) { | 136 | 222 | res_offset[i] = (i + 1) * MD5_HEX_LENGTH; | 137 | 222 | } | 138 | 104 | if (input_rows_count == 0) { | 139 | 0 | return; | 140 | 0 | } | 141 | | | 142 | 104 | if (is_const) { | 143 | 0 | StringRef data_ref = col->get_data_at(0); | 144 | 0 | const unsigned char* input = reinterpret_cast<const unsigned char*>(data_ref.data); | 145 | 0 | size_t length = data_ref.size; | 146 | 0 | char digest[MD5_HEX_LENGTH]; | 147 | 0 | md5_hex_batch(&input, &length, digest, 1); | 148 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 149 | 0 | std::memcpy(res_data.data() + i * MD5_HEX_LENGTH, digest, MD5_HEX_LENGTH); | 150 | 0 | } | 151 | 0 | return; | 152 | 0 | } | 153 | | | 154 | 104 | std::vector<const unsigned char*> inputs(input_rows_count); | 155 | 104 | std::vector<size_t> lengths(input_rows_count); | 156 | 326 | for (size_t i = 0; i < input_rows_count; ++i) { | 157 | 222 | StringRef data_ref = col->get_data_at(i); | 158 | 222 | inputs[i] = reinterpret_cast<const unsigned char*>(data_ref.data); | 159 | 222 | lengths[i] = data_ref.size; | 160 | 222 | } | 161 | 104 | md5_hex_batch(inputs.data(), lengths.data(), reinterpret_cast<char*>(res_data.data()), | 162 | 104 | input_rows_count); | 163 | 104 | } |
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE25vector_execute_single_md5INS_15ColumnVarbinaryEEEvPKT_mbRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE Line | Count | Source | 132 | 34 | ColumnString::Offsets& res_offset) const { | 133 | 34 | ColumnString::check_chars_length(input_rows_count * MD5_HEX_LENGTH, input_rows_count); | 134 | 34 | res_data.resize(input_rows_count * MD5_HEX_LENGTH); | 135 | 97 | for (size_t i = 0; i < input_rows_count; ++i) { | 136 | 63 | res_offset[i] = (i + 1) * MD5_HEX_LENGTH; | 137 | 63 | } | 138 | 34 | if (input_rows_count == 0) { | 139 | 0 | return; | 140 | 0 | } | 141 | | | 142 | 34 | if (is_const) { | 143 | 0 | StringRef data_ref = col->get_data_at(0); | 144 | 0 | const unsigned char* input = reinterpret_cast<const unsigned char*>(data_ref.data); | 145 | 0 | size_t length = data_ref.size; | 146 | 0 | char digest[MD5_HEX_LENGTH]; | 147 | 0 | md5_hex_batch(&input, &length, digest, 1); | 148 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 149 | 0 | std::memcpy(res_data.data() + i * MD5_HEX_LENGTH, digest, MD5_HEX_LENGTH); | 150 | 0 | } | 151 | 0 | return; | 152 | 0 | } | 153 | | | 154 | 34 | std::vector<const unsigned char*> inputs(input_rows_count); | 155 | 34 | std::vector<size_t> lengths(input_rows_count); | 156 | 97 | for (size_t i = 0; i < input_rows_count; ++i) { | 157 | 63 | StringRef data_ref = col->get_data_at(i); | 158 | 63 | inputs[i] = reinterpret_cast<const unsigned char*>(data_ref.data); | 159 | 63 | lengths[i] = data_ref.size; | 160 | 63 | } | 161 | 34 | md5_hex_batch(inputs.data(), lengths.data(), reinterpret_cast<char*>(res_data.data()), | 162 | 34 | input_rows_count); | 163 | 34 | } |
|
164 | | }; |
165 | | |
166 | | class FunctionStringDigestSHA1 : public IFunction { |
167 | | public: |
168 | | static constexpr auto name = "sha1"; |
169 | 24 | static FunctionPtr create() { return std::make_shared<FunctionStringDigestSHA1>(); } |
170 | 0 | String get_name() const override { return name; } |
171 | 0 | size_t get_number_of_arguments() const override { return 1; } |
172 | 16 | bool is_variadic() const override { return true; } |
173 | | |
174 | 15 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
175 | 15 | return std::make_shared<DataTypeString>(); |
176 | 15 | } |
177 | | |
178 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
179 | 14 | uint32_t result, size_t input_rows_count) const override { |
180 | 14 | DCHECK_EQ(arguments.size(), 1); |
181 | 14 | ColumnPtr data_col = block.get_by_position(arguments[0]).column; |
182 | | |
183 | 14 | auto res_col = ColumnString::create(); |
184 | 14 | auto& res_data = res_col->get_chars(); |
185 | 14 | auto& res_offset = res_col->get_offsets(); |
186 | 14 | res_offset.resize(input_rows_count); |
187 | 14 | if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) { |
188 | 9 | vector_execute(str_col, input_rows_count, res_data, res_offset); |
189 | 9 | } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) { |
190 | 5 | vector_execute(vb_col, input_rows_count, res_data, res_offset); |
191 | 5 | } else { |
192 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", |
193 | 0 | data_col->get_name(), get_name()); |
194 | 0 | } |
195 | | |
196 | 14 | block.replace_by_position(result, std::move(res_col)); |
197 | 14 | return Status::OK(); |
198 | 14 | } |
199 | | |
200 | | private: |
201 | | template <typename ColumnType> |
202 | | void vector_execute(const ColumnType* col, size_t input_rows_count, |
203 | 14 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const { |
204 | 14 | SHA1Digest digest; |
205 | 37 | for (size_t i = 0; i < input_rows_count; ++i) { |
206 | 23 | StringRef data_ref = col->get_data_at(i); |
207 | 23 | digest.reset(data_ref.data, data_ref.size); |
208 | 23 | std::string_view ans = digest.digest(); |
209 | | |
210 | 23 | StringOP::push_value_string(ans, i, res_data, res_offset); |
211 | 23 | } |
212 | 14 | } _ZNK5doris24FunctionStringDigestSHA114vector_executeINS_9ColumnStrIjEEEEvPKT_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE Line | Count | Source | 203 | 9 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const { | 204 | 9 | SHA1Digest digest; | 205 | 23 | for (size_t i = 0; i < input_rows_count; ++i) { | 206 | 14 | StringRef data_ref = col->get_data_at(i); | 207 | 14 | digest.reset(data_ref.data, data_ref.size); | 208 | 14 | std::string_view ans = digest.digest(); | 209 | | | 210 | 14 | StringOP::push_value_string(ans, i, res_data, res_offset); | 211 | 14 | } | 212 | 9 | } |
_ZNK5doris24FunctionStringDigestSHA114vector_executeINS_15ColumnVarbinaryEEEvPKT_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS6_IjLm4096ES9_Lm16ELm15EEE Line | Count | Source | 203 | 5 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const { | 204 | 5 | SHA1Digest digest; | 205 | 14 | for (size_t i = 0; i < input_rows_count; ++i) { | 206 | 9 | StringRef data_ref = col->get_data_at(i); | 207 | 9 | digest.reset(data_ref.data, data_ref.size); | 208 | 9 | std::string_view ans = digest.digest(); | 209 | | | 210 | 9 | StringOP::push_value_string(ans, i, res_data, res_offset); | 211 | 9 | } | 212 | 5 | } |
|
213 | | }; |
214 | | |
215 | | class FunctionStringDigestSHA2 : public IFunction { |
216 | | public: |
217 | | static constexpr auto name = "sha2"; |
218 | 14 | static FunctionPtr create() { return std::make_shared<FunctionStringDigestSHA2>(); } |
219 | 0 | String get_name() const override { return name; } |
220 | 0 | size_t get_number_of_arguments() const override { return 2; } |
221 | 6 | bool is_variadic() const override { return true; } |
222 | | |
223 | 5 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
224 | 5 | return std::make_shared<DataTypeString>(); |
225 | 5 | } |
226 | | |
227 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
228 | 6 | uint32_t result, size_t input_rows_count) const override { |
229 | 6 | DCHECK(!is_column_const(*block.get_by_position(arguments[0]).column)); |
230 | | |
231 | 6 | ColumnPtr data_col = block.get_by_position(arguments[0]).column; |
232 | | |
233 | 6 | [[maybe_unused]] const auto& [right_column, right_const] = |
234 | 6 | unpack_if_const(block.get_by_position(arguments[1]).column); |
235 | 6 | auto digest_length = assert_cast<const ColumnInt32*>(right_column.get())->get_data()[0]; |
236 | | |
237 | 6 | auto res_col = ColumnString::create(); |
238 | 6 | auto& res_data = res_col->get_chars(); |
239 | 6 | auto& res_offset = res_col->get_offsets(); |
240 | 6 | res_offset.resize(input_rows_count); |
241 | | |
242 | 6 | if (digest_length == 224) { |
243 | 1 | execute_base<SHA224Digest>(data_col, input_rows_count, res_data, res_offset); |
244 | 5 | } else if (digest_length == 256) { |
245 | 2 | execute_base<SHA256Digest>(data_col, input_rows_count, res_data, res_offset); |
246 | 3 | } else if (digest_length == 384) { |
247 | 1 | execute_base<SHA384Digest>(data_col, input_rows_count, res_data, res_offset); |
248 | 2 | } else if (digest_length == 512) { |
249 | 2 | execute_base<SHA512Digest>(data_col, input_rows_count, res_data, res_offset); |
250 | 2 | } else { |
251 | 0 | return Status::InvalidArgument( |
252 | 0 | "sha2's digest length only support 224/256/384/512 but meet {}", digest_length); |
253 | 0 | } |
254 | | |
255 | 6 | block.replace_by_position(result, std::move(res_col)); |
256 | 6 | return Status::OK(); |
257 | 6 | } |
258 | | |
259 | | private: |
260 | | template <typename T> |
261 | | void execute_base(ColumnPtr data_col, int input_rows_count, ColumnString::Chars& res_data, |
262 | 6 | ColumnString::Offsets& res_offset) const { |
263 | 6 | if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) { |
264 | 6 | vector_execute<T>(str_col, input_rows_count, res_data, res_offset); |
265 | 6 | } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) { |
266 | 0 | vector_execute<T>(vb_col, input_rows_count, res_data, res_offset); |
267 | 0 | } else { |
268 | 0 | throw Exception(ErrorCode::RUNTIME_ERROR, |
269 | 0 | "Illegal column {} of argument of function {}", data_col->get_name(), |
270 | 0 | get_name()); |
271 | 0 | } |
272 | 6 | } _ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA224DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE Line | Count | Source | 262 | 1 | ColumnString::Offsets& res_offset) const { | 263 | 1 | if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) { | 264 | 1 | vector_execute<T>(str_col, input_rows_count, res_data, res_offset); | 265 | 1 | } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) { | 266 | 0 | vector_execute<T>(vb_col, input_rows_count, res_data, res_offset); | 267 | 0 | } else { | 268 | 0 | throw Exception(ErrorCode::RUNTIME_ERROR, | 269 | 0 | "Illegal column {} of argument of function {}", data_col->get_name(), | 270 | 0 | get_name()); | 271 | 0 | } | 272 | 1 | } |
_ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA256DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE Line | Count | Source | 262 | 2 | ColumnString::Offsets& res_offset) const { | 263 | 2 | if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) { | 264 | 2 | vector_execute<T>(str_col, input_rows_count, res_data, res_offset); | 265 | 2 | } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) { | 266 | 0 | vector_execute<T>(vb_col, input_rows_count, res_data, res_offset); | 267 | 0 | } else { | 268 | 0 | throw Exception(ErrorCode::RUNTIME_ERROR, | 269 | 0 | "Illegal column {} of argument of function {}", data_col->get_name(), | 270 | 0 | get_name()); | 271 | 0 | } | 272 | 2 | } |
_ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA384DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE Line | Count | Source | 262 | 1 | ColumnString::Offsets& res_offset) const { | 263 | 1 | if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) { | 264 | 1 | vector_execute<T>(str_col, input_rows_count, res_data, res_offset); | 265 | 1 | } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) { | 266 | 0 | vector_execute<T>(vb_col, input_rows_count, res_data, res_offset); | 267 | 0 | } else { | 268 | 0 | throw Exception(ErrorCode::RUNTIME_ERROR, | 269 | 0 | "Illegal column {} of argument of function {}", data_col->get_name(), | 270 | 0 | get_name()); | 271 | 0 | } | 272 | 1 | } |
_ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA512DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE Line | Count | Source | 262 | 2 | ColumnString::Offsets& res_offset) const { | 263 | 2 | if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) { | 264 | 2 | vector_execute<T>(str_col, input_rows_count, res_data, res_offset); | 265 | 2 | } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) { | 266 | 0 | vector_execute<T>(vb_col, input_rows_count, res_data, res_offset); | 267 | 0 | } else { | 268 | 0 | throw Exception(ErrorCode::RUNTIME_ERROR, | 269 | 0 | "Illegal column {} of argument of function {}", data_col->get_name(), | 270 | 0 | get_name()); | 271 | 0 | } | 272 | 2 | } |
|
273 | | |
274 | | template <typename DigestType, typename ColumnType> |
275 | | void vector_execute(const ColumnType* col, size_t input_rows_count, |
276 | 6 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const { |
277 | 6 | DigestType digest; |
278 | 14 | for (size_t i = 0; i < input_rows_count; ++i) { |
279 | 8 | StringRef data_ref = col->get_data_at(i); |
280 | 8 | digest.reset(data_ref.data, data_ref.size); |
281 | 8 | std::string_view ans = digest.digest(); |
282 | | |
283 | 8 | StringOP::push_value_string(ans, i, res_data, res_offset); |
284 | 8 | } |
285 | 6 | } _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA224DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE Line | Count | Source | 276 | 1 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const { | 277 | 1 | DigestType digest; | 278 | 2 | for (size_t i = 0; i < input_rows_count; ++i) { | 279 | 1 | StringRef data_ref = col->get_data_at(i); | 280 | 1 | digest.reset(data_ref.data, data_ref.size); | 281 | 1 | std::string_view ans = digest.digest(); | 282 | | | 283 | 1 | StringOP::push_value_string(ans, i, res_data, res_offset); | 284 | 1 | } | 285 | 1 | } |
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA224DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA256DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE Line | Count | Source | 276 | 2 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const { | 277 | 2 | DigestType digest; | 278 | 5 | for (size_t i = 0; i < input_rows_count; ++i) { | 279 | 3 | StringRef data_ref = col->get_data_at(i); | 280 | 3 | digest.reset(data_ref.data, data_ref.size); | 281 | 3 | std::string_view ans = digest.digest(); | 282 | | | 283 | 3 | StringOP::push_value_string(ans, i, res_data, res_offset); | 284 | 3 | } | 285 | 2 | } |
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA256DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA384DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE Line | Count | Source | 276 | 1 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const { | 277 | 1 | DigestType digest; | 278 | 2 | for (size_t i = 0; i < input_rows_count; ++i) { | 279 | 1 | StringRef data_ref = col->get_data_at(i); | 280 | 1 | digest.reset(data_ref.data, data_ref.size); | 281 | 1 | std::string_view ans = digest.digest(); | 282 | | | 283 | 1 | StringOP::push_value_string(ans, i, res_data, res_offset); | 284 | 1 | } | 285 | 1 | } |
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA384DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA512DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE Line | Count | Source | 276 | 2 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const { | 277 | 2 | DigestType digest; | 278 | 5 | for (size_t i = 0; i < input_rows_count; ++i) { | 279 | 3 | StringRef data_ref = col->get_data_at(i); | 280 | 3 | digest.reset(data_ref.data, data_ref.size); | 281 | 3 | std::string_view ans = digest.digest(); | 282 | | | 283 | 3 | StringOP::push_value_string(ans, i, res_data, res_offset); | 284 | 3 | } | 285 | 2 | } |
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA512DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE |
286 | | }; |
287 | | |
288 | 8 | void register_function_string_digest(SimpleFunctionFactory& factory) { |
289 | 8 | factory.register_function<FunctionStringDigestMulti<SM3Sum>>(); |
290 | 8 | factory.register_function<FunctionStringDigestMulti<MD5Sum>>(); |
291 | 8 | factory.register_function<FunctionStringDigestSHA1>(); |
292 | 8 | factory.register_function<FunctionStringDigestSHA2>(); |
293 | | |
294 | 8 | factory.register_alias(FunctionStringDigestMulti<MD5Sum>::name, "md5"); |
295 | 8 | factory.register_alias(FunctionStringDigestMulti<SM3Sum>::name, "sm3"); |
296 | 8 | factory.register_alias(FunctionStringDigestSHA1::name, "sha"); |
297 | 8 | } |
298 | | |
299 | | #include "common/compile_check_avoid_end.h" |
300 | | } // namespace doris |