be/src/exprs/function/function_hash.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionHash.cpp |
19 | | // and modified by Doris |
20 | | |
21 | | #include "exprs/function/function_hash.h" |
22 | | |
23 | | #include <vector> |
24 | | |
25 | | #include "common/status.h" |
26 | | #include "core/assert_cast.h" |
27 | | #include "core/column/column.h" |
28 | | #include "core/column/column_const.h" |
29 | | #include "core/column/column_string.h" |
30 | | #include "core/column/column_varbinary.h" |
31 | | #include "core/column/column_vector.h" |
32 | | #include "core/data_type/data_type.h" |
33 | | #include "core/data_type/data_type_number.h" |
34 | | #include "core/data_type/data_type_string.h" |
35 | | #include "core/field.h" |
36 | | #include "core/value/large_int_value.h" |
37 | | #include "exec/common/template_helpers.hpp" |
38 | | #include "exprs/function/function_helpers.h" |
39 | | #include "exprs/function/function_variadic_arguments.h" |
40 | | #include "exprs/function/simple_function_factory.h" |
41 | | #include "util/hash/murmur_hash3.h" |
42 | | #include "util/hash_util.hpp" |
43 | | |
44 | | namespace doris { |
45 | | constexpr uint64_t emtpy_value = 0xe28dbde7fe22e41c; |
46 | | |
47 | | namespace { |
48 | | |
49 | 250 | __int128_t pack_murmur_hash3_128(uint64_t h1, uint64_t h2) { |
50 | 250 | static_assert(sizeof(__int128_t) == sizeof(uint64_t) * 2); |
51 | | // Store the two MurmurHash3 x64 128-bit lanes in a single LARGEINT value. Keep h1 in the |
52 | | // low 64 bits and h2 in the high 64 bits to match murmur_hash3_x64_128's out[0]/out[1]. |
53 | 250 | const auto value = |
54 | 250 | (static_cast<unsigned __int128>(h2) << 64) | static_cast<unsigned __int128>(h1); |
55 | 250 | return static_cast<__int128_t>(value); |
56 | 250 | } |
57 | | |
58 | 110 | void unpack_murmur_hash3_128(__int128_t value, uint64_t& h1, uint64_t& h2) { |
59 | 110 | static_assert(sizeof(__int128_t) == sizeof(uint64_t) * 2); |
60 | 110 | const auto unsigned_value = static_cast<unsigned __int128>(value); |
61 | 110 | h1 = static_cast<uint64_t>(unsigned_value); |
62 | 110 | h2 = static_cast<uint64_t>(unsigned_value >> 64); |
63 | 110 | } |
64 | | |
65 | 140 | void init_murmur_hash3_128(__int128_t& value, const void* data, size_t size) { |
66 | 140 | uint64_t hash[2] = {0, 0}; |
67 | 140 | murmur_hash3_x64_128(data, size, 0, hash); |
68 | 140 | value = pack_murmur_hash3_128(hash[0], hash[1]); |
69 | 140 | } |
70 | | |
71 | 110 | void update_murmur_hash3_128(__int128_t& value, const void* data, size_t size) { |
72 | 110 | uint64_t h1 = 0; |
73 | 110 | uint64_t h2 = 0; |
74 | 110 | unpack_murmur_hash3_128(value, h1, h2); |
75 | 110 | murmur_hash3_x64_process(data, size, h1, h2); |
76 | 110 | value = pack_murmur_hash3_128(h1, h2); |
77 | 110 | } |
78 | | |
79 | | template <bool first, typename StateContainer> |
80 | | Status execute_murmur_hash3_128_column(const IColumn* column, size_t input_rows_count, |
81 | 118 | StateContainer& state, const char* function_name) { |
82 | 118 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { |
83 | 92 | const typename ColumnString::Chars& data = col_from->get_chars(); |
84 | 92 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); |
85 | 92 | size_t size = offsets.size(); |
86 | 92 | ColumnString::Offset current_offset = 0; |
87 | 284 | for (size_t i = 0; i < size; ++i) { |
88 | 192 | if constexpr (first) { |
89 | 114 | init_murmur_hash3_128(state[i], |
90 | 114 | reinterpret_cast<const char*>(&data[current_offset]), |
91 | 114 | offsets[i] - current_offset); |
92 | 114 | } else { |
93 | 78 | update_murmur_hash3_128(state[i], |
94 | 78 | reinterpret_cast<const char*>(&data[current_offset]), |
95 | 78 | offsets[i] - current_offset); |
96 | 78 | } |
97 | 192 | current_offset = offsets[i]; |
98 | 192 | } |
99 | 92 | } else if (const ColumnConst* col_from_const = |
100 | 26 | check_and_get_column_const_string_or_fixedstring(column)) { |
101 | 26 | auto value = col_from_const->get_value<TYPE_STRING>(); |
102 | 84 | for (size_t i = 0; i < input_rows_count; ++i) { |
103 | 58 | if constexpr (first) { |
104 | 26 | init_murmur_hash3_128(state[i], value.data(), value.size()); |
105 | 32 | } else { |
106 | 32 | update_murmur_hash3_128(state[i], value.data(), value.size()); |
107 | 32 | } |
108 | 58 | } |
109 | 26 | } else { |
110 | 0 | DCHECK(false); |
111 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", |
112 | 0 | column->get_name(), function_name); |
113 | 0 | } |
114 | 118 | return Status::OK(); |
115 | 118 | } function_hash.cpp:_ZN5doris12_GLOBAL__N_131execute_murmur_hash3_128_columnILb1ENS_8PODArrayInLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEEENS_6StatusEPKNS_7IColumnEmRT0_PKc Line | Count | Source | 81 | 31 | StateContainer& state, const char* function_name) { | 82 | 31 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 83 | 26 | const typename ColumnString::Chars& data = col_from->get_chars(); | 84 | 26 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 85 | 26 | size_t size = offsets.size(); | 86 | 26 | ColumnString::Offset current_offset = 0; | 87 | 81 | for (size_t i = 0; i < size; ++i) { | 88 | 55 | if constexpr (first) { | 89 | 55 | init_murmur_hash3_128(state[i], | 90 | 55 | reinterpret_cast<const char*>(&data[current_offset]), | 91 | 55 | offsets[i] - current_offset); | 92 | | } else { | 93 | | update_murmur_hash3_128(state[i], | 94 | | reinterpret_cast<const char*>(&data[current_offset]), | 95 | | offsets[i] - current_offset); | 96 | | } | 97 | 55 | current_offset = offsets[i]; | 98 | 55 | } | 99 | 26 | } else if (const ColumnConst* col_from_const = | 100 | 5 | check_and_get_column_const_string_or_fixedstring(column)) { | 101 | 5 | auto value = col_from_const->get_value<TYPE_STRING>(); | 102 | 18 | for (size_t i = 0; i < input_rows_count; ++i) { | 103 | 13 | if constexpr (first) { | 104 | 13 | init_murmur_hash3_128(state[i], value.data(), value.size()); | 105 | | } else { | 106 | | update_murmur_hash3_128(state[i], value.data(), value.size()); | 107 | | } | 108 | 13 | } | 109 | 5 | } else { | 110 | 0 | DCHECK(false); | 111 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 112 | 0 | column->get_name(), function_name); | 113 | 0 | } | 114 | 31 | return Status::OK(); | 115 | 31 | } |
function_hash.cpp:_ZN5doris12_GLOBAL__N_131execute_murmur_hash3_128_columnILb0ENS_8PODArrayInLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEEENS_6StatusEPKNS_7IColumnEmRT0_PKc Line | Count | Source | 81 | 27 | StateContainer& state, const char* function_name) { | 82 | 27 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 83 | 19 | const typename ColumnString::Chars& data = col_from->get_chars(); | 84 | 19 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 85 | 19 | size_t size = offsets.size(); | 86 | 19 | ColumnString::Offset current_offset = 0; | 87 | 58 | for (size_t i = 0; i < size; ++i) { | 88 | | if constexpr (first) { | 89 | | init_murmur_hash3_128(state[i], | 90 | | reinterpret_cast<const char*>(&data[current_offset]), | 91 | | offsets[i] - current_offset); | 92 | 39 | } else { | 93 | 39 | update_murmur_hash3_128(state[i], | 94 | 39 | reinterpret_cast<const char*>(&data[current_offset]), | 95 | 39 | offsets[i] - current_offset); | 96 | 39 | } | 97 | 39 | current_offset = offsets[i]; | 98 | 39 | } | 99 | 19 | } else if (const ColumnConst* col_from_const = | 100 | 8 | check_and_get_column_const_string_or_fixedstring(column)) { | 101 | 8 | auto value = col_from_const->get_value<TYPE_STRING>(); | 102 | 24 | for (size_t i = 0; i < input_rows_count; ++i) { | 103 | | if constexpr (first) { | 104 | | init_murmur_hash3_128(state[i], value.data(), value.size()); | 105 | 16 | } else { | 106 | 16 | update_murmur_hash3_128(state[i], value.data(), value.size()); | 107 | 16 | } | 108 | 16 | } | 109 | 8 | } else { | 110 | 0 | DCHECK(false); | 111 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 112 | 0 | column->get_name(), function_name); | 113 | 0 | } | 114 | 27 | return Status::OK(); | 115 | 27 | } |
function_hash.cpp:_ZN5doris12_GLOBAL__N_131execute_murmur_hash3_128_columnILb1ESt6vectorInSaInEEEENS_6StatusEPKNS_7IColumnEmRT0_PKc Line | Count | Source | 81 | 33 | StateContainer& state, const char* function_name) { | 82 | 33 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 83 | 28 | const typename ColumnString::Chars& data = col_from->get_chars(); | 84 | 28 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 85 | 28 | size_t size = offsets.size(); | 86 | 28 | ColumnString::Offset current_offset = 0; | 87 | 87 | for (size_t i = 0; i < size; ++i) { | 88 | 59 | if constexpr (first) { | 89 | 59 | init_murmur_hash3_128(state[i], | 90 | 59 | reinterpret_cast<const char*>(&data[current_offset]), | 91 | 59 | offsets[i] - current_offset); | 92 | | } else { | 93 | | update_murmur_hash3_128(state[i], | 94 | | reinterpret_cast<const char*>(&data[current_offset]), | 95 | | offsets[i] - current_offset); | 96 | | } | 97 | 59 | current_offset = offsets[i]; | 98 | 59 | } | 99 | 28 | } else if (const ColumnConst* col_from_const = | 100 | 5 | check_and_get_column_const_string_or_fixedstring(column)) { | 101 | 5 | auto value = col_from_const->get_value<TYPE_STRING>(); | 102 | 18 | for (size_t i = 0; i < input_rows_count; ++i) { | 103 | 13 | if constexpr (first) { | 104 | 13 | init_murmur_hash3_128(state[i], value.data(), value.size()); | 105 | | } else { | 106 | | update_murmur_hash3_128(state[i], value.data(), value.size()); | 107 | | } | 108 | 13 | } | 109 | 5 | } else { | 110 | 0 | DCHECK(false); | 111 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 112 | 0 | column->get_name(), function_name); | 113 | 0 | } | 114 | 33 | return Status::OK(); | 115 | 33 | } |
function_hash.cpp:_ZN5doris12_GLOBAL__N_131execute_murmur_hash3_128_columnILb0ESt6vectorInSaInEEEENS_6StatusEPKNS_7IColumnEmRT0_PKc Line | Count | Source | 81 | 27 | StateContainer& state, const char* function_name) { | 82 | 27 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 83 | 19 | const typename ColumnString::Chars& data = col_from->get_chars(); | 84 | 19 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 85 | 19 | size_t size = offsets.size(); | 86 | 19 | ColumnString::Offset current_offset = 0; | 87 | 58 | for (size_t i = 0; i < size; ++i) { | 88 | | if constexpr (first) { | 89 | | init_murmur_hash3_128(state[i], | 90 | | reinterpret_cast<const char*>(&data[current_offset]), | 91 | | offsets[i] - current_offset); | 92 | 39 | } else { | 93 | 39 | update_murmur_hash3_128(state[i], | 94 | 39 | reinterpret_cast<const char*>(&data[current_offset]), | 95 | 39 | offsets[i] - current_offset); | 96 | 39 | } | 97 | 39 | current_offset = offsets[i]; | 98 | 39 | } | 99 | 19 | } else if (const ColumnConst* col_from_const = | 100 | 8 | check_and_get_column_const_string_or_fixedstring(column)) { | 101 | 8 | auto value = col_from_const->get_value<TYPE_STRING>(); | 102 | 24 | for (size_t i = 0; i < input_rows_count; ++i) { | 103 | | if constexpr (first) { | 104 | | init_murmur_hash3_128(state[i], value.data(), value.size()); | 105 | 16 | } else { | 106 | 16 | update_murmur_hash3_128(state[i], value.data(), value.size()); | 107 | 16 | } | 108 | 16 | } | 109 | 8 | } else { | 110 | 0 | DCHECK(false); | 111 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 112 | 0 | column->get_name(), function_name); | 113 | 0 | } | 114 | 27 | return Status::OK(); | 115 | 27 | } |
|
116 | | |
117 | | } // namespace |
118 | | |
119 | | template <PrimitiveType ReturnType, bool is_mmh64_v2 = false> |
120 | | struct MurmurHash3Impl { |
121 | 0 | static constexpr auto get_name() { |
122 | 0 | if constexpr (ReturnType == TYPE_INT) { |
123 | 0 | return "murmur_hash3_32"; |
124 | 0 | } else if constexpr (ReturnType == TYPE_LARGEINT) { |
125 | 0 | return "murmur_hash3_u64_v2"; |
126 | 0 | } else if constexpr (is_mmh64_v2) { |
127 | 0 | return "murmur_hash3_64_v2"; |
128 | 0 | } else { |
129 | 0 | return "murmur_hash3_64"; |
130 | 0 | } |
131 | 0 | } |
132 | | static constexpr auto name = get_name(); |
133 | | |
134 | 0 | static Status empty_apply(IColumn& icolumn, size_t input_rows_count) { |
135 | 0 | ColumnVector<ReturnType>& vec_to = assert_cast<ColumnVector<ReturnType>&>(icolumn); |
136 | 0 | vec_to.get_data().assign( |
137 | 0 | input_rows_count, |
138 | 0 | static_cast<typename PrimitiveTypeTraits<ReturnType>::CppType>(emtpy_value)); |
139 | 0 | return Status::OK(); |
140 | 0 | } Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE11empty_applyERNS_7IColumnEm Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE11empty_applyERNS_7IColumnEm Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE11empty_applyERNS_7IColumnEm Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE11empty_applyERNS_7IColumnEm |
141 | | |
142 | | static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count, |
143 | 81 | IColumn& icolumn) { |
144 | 81 | return execute<true>(type, column, input_rows_count, icolumn); |
145 | 81 | } _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 143 | 21 | IColumn& icolumn) { | 144 | 21 | return execute<true>(type, column, input_rows_count, icolumn); | 145 | 21 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 143 | 21 | IColumn& icolumn) { | 144 | 21 | return execute<true>(type, column, input_rows_count, icolumn); | 145 | 21 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 143 | 18 | IColumn& icolumn) { | 144 | 18 | return execute<true>(type, column, input_rows_count, icolumn); | 145 | 18 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 143 | 21 | IColumn& icolumn) { | 144 | 21 | return execute<true>(type, column, input_rows_count, icolumn); | 145 | 21 | } |
|
146 | | |
147 | | static Status combine_apply(const IDataType* type, const IColumn* column, |
148 | 13 | size_t input_rows_count, IColumn& icolumn) { |
149 | 13 | return execute<false>(type, column, input_rows_count, icolumn); |
150 | 13 | } _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 148 | 4 | size_t input_rows_count, IColumn& icolumn) { | 149 | 4 | return execute<false>(type, column, input_rows_count, icolumn); | 150 | 4 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 148 | 4 | size_t input_rows_count, IColumn& icolumn) { | 149 | 4 | return execute<false>(type, column, input_rows_count, icolumn); | 150 | 4 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 148 | 1 | size_t input_rows_count, IColumn& icolumn) { | 149 | 1 | return execute<false>(type, column, input_rows_count, icolumn); | 150 | 1 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 148 | 4 | size_t input_rows_count, IColumn& icolumn) { | 149 | 4 | return execute<false>(type, column, input_rows_count, icolumn); | 150 | 4 | } |
|
151 | | |
152 | | template <bool first> |
153 | | static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count, |
154 | 94 | IColumn& col_to) { |
155 | 94 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); |
156 | 94 | if constexpr (first) { |
157 | 81 | if constexpr (ReturnType == TYPE_INT) { |
158 | 21 | to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED), |
159 | 21 | input_rows_count); |
160 | 60 | } else { |
161 | 60 | to_column.insert_many_defaults(input_rows_count); |
162 | 60 | } |
163 | 81 | } |
164 | 94 | auto& col_to_data = to_column.get_data(); |
165 | 94 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { |
166 | 94 | const typename ColumnString::Chars& data = col_from->get_chars(); |
167 | 94 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); |
168 | 94 | size_t size = offsets.size(); |
169 | 94 | ColumnString::Offset current_offset = 0; |
170 | 286 | for (size_t i = 0; i < size; ++i) { |
171 | 192 | if constexpr (ReturnType == TYPE_INT) { |
172 | 65 | col_to_data[i] = HashUtil::murmur_hash3_32( |
173 | 65 | reinterpret_cast<const char*>(&data[current_offset]), |
174 | 65 | offsets[i] - current_offset, col_to_data[i]); |
175 | 127 | } else { |
176 | 127 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( |
177 | 127 | reinterpret_cast<const char*>(&data[current_offset]), |
178 | 127 | offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i])); |
179 | 127 | } |
180 | 192 | current_offset = offsets[i]; |
181 | 192 | } |
182 | 94 | } else if (const ColumnConst* col_from_const = |
183 | 0 | check_and_get_column_const_string_or_fixedstring(column)) { |
184 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); |
185 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
186 | 0 | if constexpr (ReturnType == TYPE_INT) { |
187 | 0 | col_to_data[i] = |
188 | 0 | HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]); |
189 | 0 | } else { |
190 | 0 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( |
191 | 0 | value.data(), value.size(), static_cast<uint64_t>(col_to_data[i])); |
192 | 0 | } |
193 | 0 | } |
194 | 0 | } else { |
195 | 0 | DCHECK(false); |
196 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", |
197 | 0 | column->get_name(), name); |
198 | 0 | } |
199 | 94 | return Status::OK(); |
200 | 94 | } _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 154 | 21 | IColumn& col_to) { | 155 | 21 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 156 | 21 | if constexpr (first) { | 157 | 21 | if constexpr (ReturnType == TYPE_INT) { | 158 | 21 | to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED), | 159 | 21 | input_rows_count); | 160 | | } else { | 161 | | to_column.insert_many_defaults(input_rows_count); | 162 | | } | 163 | 21 | } | 164 | 21 | auto& col_to_data = to_column.get_data(); | 165 | 21 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 166 | 21 | const typename ColumnString::Chars& data = col_from->get_chars(); | 167 | 21 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 168 | 21 | size_t size = offsets.size(); | 169 | 21 | ColumnString::Offset current_offset = 0; | 170 | 79 | for (size_t i = 0; i < size; ++i) { | 171 | 58 | if constexpr (ReturnType == TYPE_INT) { | 172 | 58 | col_to_data[i] = HashUtil::murmur_hash3_32( | 173 | 58 | reinterpret_cast<const char*>(&data[current_offset]), | 174 | 58 | offsets[i] - current_offset, col_to_data[i]); | 175 | | } else { | 176 | | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 177 | | reinterpret_cast<const char*>(&data[current_offset]), | 178 | | offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i])); | 179 | | } | 180 | 58 | current_offset = offsets[i]; | 181 | 58 | } | 182 | 21 | } else if (const ColumnConst* col_from_const = | 183 | 0 | check_and_get_column_const_string_or_fixedstring(column)) { | 184 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 185 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 186 | 0 | if constexpr (ReturnType == TYPE_INT) { | 187 | 0 | col_to_data[i] = | 188 | 0 | HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]); | 189 | | } else { | 190 | | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 191 | | value.data(), value.size(), static_cast<uint64_t>(col_to_data[i])); | 192 | | } | 193 | 0 | } | 194 | 0 | } else { | 195 | 0 | DCHECK(false); | 196 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 197 | 0 | column->get_name(), name); | 198 | 0 | } | 199 | 21 | return Status::OK(); | 200 | 21 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 154 | 4 | IColumn& col_to) { | 155 | 4 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 156 | | if constexpr (first) { | 157 | | if constexpr (ReturnType == TYPE_INT) { | 158 | | to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED), | 159 | | input_rows_count); | 160 | | } else { | 161 | | to_column.insert_many_defaults(input_rows_count); | 162 | | } | 163 | | } | 164 | 4 | auto& col_to_data = to_column.get_data(); | 165 | 4 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 166 | 4 | const typename ColumnString::Chars& data = col_from->get_chars(); | 167 | 4 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 168 | 4 | size_t size = offsets.size(); | 169 | 4 | ColumnString::Offset current_offset = 0; | 170 | 11 | for (size_t i = 0; i < size; ++i) { | 171 | 7 | if constexpr (ReturnType == TYPE_INT) { | 172 | 7 | col_to_data[i] = HashUtil::murmur_hash3_32( | 173 | 7 | reinterpret_cast<const char*>(&data[current_offset]), | 174 | 7 | offsets[i] - current_offset, col_to_data[i]); | 175 | | } else { | 176 | | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 177 | | reinterpret_cast<const char*>(&data[current_offset]), | 178 | | offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i])); | 179 | | } | 180 | 7 | current_offset = offsets[i]; | 181 | 7 | } | 182 | 4 | } else if (const ColumnConst* col_from_const = | 183 | 0 | check_and_get_column_const_string_or_fixedstring(column)) { | 184 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 185 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 186 | 0 | if constexpr (ReturnType == TYPE_INT) { | 187 | 0 | col_to_data[i] = | 188 | 0 | HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]); | 189 | | } else { | 190 | | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 191 | | value.data(), value.size(), static_cast<uint64_t>(col_to_data[i])); | 192 | | } | 193 | 0 | } | 194 | 0 | } else { | 195 | 0 | DCHECK(false); | 196 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 197 | 0 | column->get_name(), name); | 198 | 0 | } | 199 | 4 | return Status::OK(); | 200 | 4 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 154 | 21 | IColumn& col_to) { | 155 | 21 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 156 | 21 | if constexpr (first) { | 157 | | if constexpr (ReturnType == TYPE_INT) { | 158 | | to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED), | 159 | | input_rows_count); | 160 | 21 | } else { | 161 | 21 | to_column.insert_many_defaults(input_rows_count); | 162 | 21 | } | 163 | 21 | } | 164 | 21 | auto& col_to_data = to_column.get_data(); | 165 | 21 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 166 | 21 | const typename ColumnString::Chars& data = col_from->get_chars(); | 167 | 21 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 168 | 21 | size_t size = offsets.size(); | 169 | 21 | ColumnString::Offset current_offset = 0; | 170 | 79 | for (size_t i = 0; i < size; ++i) { | 171 | | if constexpr (ReturnType == TYPE_INT) { | 172 | | col_to_data[i] = HashUtil::murmur_hash3_32( | 173 | | reinterpret_cast<const char*>(&data[current_offset]), | 174 | | offsets[i] - current_offset, col_to_data[i]); | 175 | 58 | } else { | 176 | 58 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 177 | 58 | reinterpret_cast<const char*>(&data[current_offset]), | 178 | 58 | offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i])); | 179 | 58 | } | 180 | 58 | current_offset = offsets[i]; | 181 | 58 | } | 182 | 21 | } else if (const ColumnConst* col_from_const = | 183 | 0 | check_and_get_column_const_string_or_fixedstring(column)) { | 184 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 185 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 186 | | if constexpr (ReturnType == TYPE_INT) { | 187 | | col_to_data[i] = | 188 | | HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]); | 189 | 0 | } else { | 190 | 0 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 191 | 0 | value.data(), value.size(), static_cast<uint64_t>(col_to_data[i])); | 192 | 0 | } | 193 | 0 | } | 194 | 0 | } else { | 195 | 0 | DCHECK(false); | 196 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 197 | 0 | column->get_name(), name); | 198 | 0 | } | 199 | 21 | return Status::OK(); | 200 | 21 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 154 | 4 | IColumn& col_to) { | 155 | 4 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 156 | | if constexpr (first) { | 157 | | if constexpr (ReturnType == TYPE_INT) { | 158 | | to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED), | 159 | | input_rows_count); | 160 | | } else { | 161 | | to_column.insert_many_defaults(input_rows_count); | 162 | | } | 163 | | } | 164 | 4 | auto& col_to_data = to_column.get_data(); | 165 | 4 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 166 | 4 | const typename ColumnString::Chars& data = col_from->get_chars(); | 167 | 4 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 168 | 4 | size_t size = offsets.size(); | 169 | 4 | ColumnString::Offset current_offset = 0; | 170 | 11 | for (size_t i = 0; i < size; ++i) { | 171 | | if constexpr (ReturnType == TYPE_INT) { | 172 | | col_to_data[i] = HashUtil::murmur_hash3_32( | 173 | | reinterpret_cast<const char*>(&data[current_offset]), | 174 | | offsets[i] - current_offset, col_to_data[i]); | 175 | 7 | } else { | 176 | 7 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 177 | 7 | reinterpret_cast<const char*>(&data[current_offset]), | 178 | 7 | offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i])); | 179 | 7 | } | 180 | 7 | current_offset = offsets[i]; | 181 | 7 | } | 182 | 4 | } else if (const ColumnConst* col_from_const = | 183 | 0 | check_and_get_column_const_string_or_fixedstring(column)) { | 184 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 185 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 186 | | if constexpr (ReturnType == TYPE_INT) { | 187 | | col_to_data[i] = | 188 | | HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]); | 189 | 0 | } else { | 190 | 0 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 191 | 0 | value.data(), value.size(), static_cast<uint64_t>(col_to_data[i])); | 192 | 0 | } | 193 | 0 | } | 194 | 0 | } else { | 195 | 0 | DCHECK(false); | 196 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 197 | 0 | column->get_name(), name); | 198 | 0 | } | 199 | 4 | return Status::OK(); | 200 | 4 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 154 | 18 | IColumn& col_to) { | 155 | 18 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 156 | 18 | if constexpr (first) { | 157 | | if constexpr (ReturnType == TYPE_INT) { | 158 | | to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED), | 159 | | input_rows_count); | 160 | 18 | } else { | 161 | 18 | to_column.insert_many_defaults(input_rows_count); | 162 | 18 | } | 163 | 18 | } | 164 | 18 | auto& col_to_data = to_column.get_data(); | 165 | 18 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 166 | 18 | const typename ColumnString::Chars& data = col_from->get_chars(); | 167 | 18 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 168 | 18 | size_t size = offsets.size(); | 169 | 18 | ColumnString::Offset current_offset = 0; | 170 | 46 | for (size_t i = 0; i < size; ++i) { | 171 | | if constexpr (ReturnType == TYPE_INT) { | 172 | | col_to_data[i] = HashUtil::murmur_hash3_32( | 173 | | reinterpret_cast<const char*>(&data[current_offset]), | 174 | | offsets[i] - current_offset, col_to_data[i]); | 175 | 28 | } else { | 176 | 28 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 177 | 28 | reinterpret_cast<const char*>(&data[current_offset]), | 178 | 28 | offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i])); | 179 | 28 | } | 180 | 28 | current_offset = offsets[i]; | 181 | 28 | } | 182 | 18 | } else if (const ColumnConst* col_from_const = | 183 | 0 | check_and_get_column_const_string_or_fixedstring(column)) { | 184 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 185 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 186 | | if constexpr (ReturnType == TYPE_INT) { | 187 | | col_to_data[i] = | 188 | | HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]); | 189 | 0 | } else { | 190 | 0 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 191 | 0 | value.data(), value.size(), static_cast<uint64_t>(col_to_data[i])); | 192 | 0 | } | 193 | 0 | } | 194 | 0 | } else { | 195 | 0 | DCHECK(false); | 196 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 197 | 0 | column->get_name(), name); | 198 | 0 | } | 199 | 18 | return Status::OK(); | 200 | 18 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 154 | 1 | IColumn& col_to) { | 155 | 1 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 156 | | if constexpr (first) { | 157 | | if constexpr (ReturnType == TYPE_INT) { | 158 | | to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED), | 159 | | input_rows_count); | 160 | | } else { | 161 | | to_column.insert_many_defaults(input_rows_count); | 162 | | } | 163 | | } | 164 | 1 | auto& col_to_data = to_column.get_data(); | 165 | 1 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 166 | 1 | const typename ColumnString::Chars& data = col_from->get_chars(); | 167 | 1 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 168 | 1 | size_t size = offsets.size(); | 169 | 1 | ColumnString::Offset current_offset = 0; | 170 | 2 | for (size_t i = 0; i < size; ++i) { | 171 | | if constexpr (ReturnType == TYPE_INT) { | 172 | | col_to_data[i] = HashUtil::murmur_hash3_32( | 173 | | reinterpret_cast<const char*>(&data[current_offset]), | 174 | | offsets[i] - current_offset, col_to_data[i]); | 175 | 1 | } else { | 176 | 1 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 177 | 1 | reinterpret_cast<const char*>(&data[current_offset]), | 178 | 1 | offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i])); | 179 | 1 | } | 180 | 1 | current_offset = offsets[i]; | 181 | 1 | } | 182 | 1 | } else if (const ColumnConst* col_from_const = | 183 | 0 | check_and_get_column_const_string_or_fixedstring(column)) { | 184 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 185 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 186 | | if constexpr (ReturnType == TYPE_INT) { | 187 | | col_to_data[i] = | 188 | | HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]); | 189 | 0 | } else { | 190 | 0 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 191 | 0 | value.data(), value.size(), static_cast<uint64_t>(col_to_data[i])); | 192 | 0 | } | 193 | 0 | } | 194 | 0 | } else { | 195 | 0 | DCHECK(false); | 196 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 197 | 0 | column->get_name(), name); | 198 | 0 | } | 199 | 1 | return Status::OK(); | 200 | 1 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 154 | 21 | IColumn& col_to) { | 155 | 21 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 156 | 21 | if constexpr (first) { | 157 | | if constexpr (ReturnType == TYPE_INT) { | 158 | | to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED), | 159 | | input_rows_count); | 160 | 21 | } else { | 161 | 21 | to_column.insert_many_defaults(input_rows_count); | 162 | 21 | } | 163 | 21 | } | 164 | 21 | auto& col_to_data = to_column.get_data(); | 165 | 21 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 166 | 21 | const typename ColumnString::Chars& data = col_from->get_chars(); | 167 | 21 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 168 | 21 | size_t size = offsets.size(); | 169 | 21 | ColumnString::Offset current_offset = 0; | 170 | 50 | for (size_t i = 0; i < size; ++i) { | 171 | | if constexpr (ReturnType == TYPE_INT) { | 172 | | col_to_data[i] = HashUtil::murmur_hash3_32( | 173 | | reinterpret_cast<const char*>(&data[current_offset]), | 174 | | offsets[i] - current_offset, col_to_data[i]); | 175 | 29 | } else { | 176 | 29 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 177 | 29 | reinterpret_cast<const char*>(&data[current_offset]), | 178 | 29 | offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i])); | 179 | 29 | } | 180 | 29 | current_offset = offsets[i]; | 181 | 29 | } | 182 | 21 | } else if (const ColumnConst* col_from_const = | 183 | 0 | check_and_get_column_const_string_or_fixedstring(column)) { | 184 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 185 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 186 | | if constexpr (ReturnType == TYPE_INT) { | 187 | | col_to_data[i] = | 188 | | HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]); | 189 | 0 | } else { | 190 | 0 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 191 | 0 | value.data(), value.size(), static_cast<uint64_t>(col_to_data[i])); | 192 | 0 | } | 193 | 0 | } | 194 | 0 | } else { | 195 | 0 | DCHECK(false); | 196 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 197 | 0 | column->get_name(), name); | 198 | 0 | } | 199 | 21 | return Status::OK(); | 200 | 21 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 154 | 4 | IColumn& col_to) { | 155 | 4 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 156 | | if constexpr (first) { | 157 | | if constexpr (ReturnType == TYPE_INT) { | 158 | | to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED), | 159 | | input_rows_count); | 160 | | } else { | 161 | | to_column.insert_many_defaults(input_rows_count); | 162 | | } | 163 | | } | 164 | 4 | auto& col_to_data = to_column.get_data(); | 165 | 4 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 166 | 4 | const typename ColumnString::Chars& data = col_from->get_chars(); | 167 | 4 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 168 | 4 | size_t size = offsets.size(); | 169 | 4 | ColumnString::Offset current_offset = 0; | 170 | 8 | for (size_t i = 0; i < size; ++i) { | 171 | | if constexpr (ReturnType == TYPE_INT) { | 172 | | col_to_data[i] = HashUtil::murmur_hash3_32( | 173 | | reinterpret_cast<const char*>(&data[current_offset]), | 174 | | offsets[i] - current_offset, col_to_data[i]); | 175 | 4 | } else { | 176 | 4 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 177 | 4 | reinterpret_cast<const char*>(&data[current_offset]), | 178 | 4 | offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i])); | 179 | 4 | } | 180 | 4 | current_offset = offsets[i]; | 181 | 4 | } | 182 | 4 | } else if (const ColumnConst* col_from_const = | 183 | 0 | check_and_get_column_const_string_or_fixedstring(column)) { | 184 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 185 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 186 | | if constexpr (ReturnType == TYPE_INT) { | 187 | | col_to_data[i] = | 188 | | HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]); | 189 | 0 | } else { | 190 | 0 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 191 | 0 | value.data(), value.size(), static_cast<uint64_t>(col_to_data[i])); | 192 | 0 | } | 193 | 0 | } | 194 | 0 | } else { | 195 | 0 | DCHECK(false); | 196 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 197 | 0 | column->get_name(), name); | 198 | 0 | } | 199 | 4 | return Status::OK(); | 200 | 4 | } |
|
201 | | }; |
202 | | |
203 | | using FunctionMurmurHash3_32 = |
204 | | FunctionVariadicArgumentsBase<DataTypeInt32, MurmurHash3Impl<TYPE_INT>>; |
205 | | using FunctionMurmurHash3_64 = |
206 | | FunctionVariadicArgumentsBase<DataTypeInt64, MurmurHash3Impl<TYPE_BIGINT>>; |
207 | | using FunctionMurmurHash3_64_V2 = |
208 | | FunctionVariadicArgumentsBase<DataTypeInt64, MurmurHash3Impl<TYPE_BIGINT, true>>; |
209 | | using FunctionMurmurHash3U64V2 = |
210 | | FunctionVariadicArgumentsBase<DataTypeInt128, MurmurHash3Impl<TYPE_LARGEINT, true>>; |
211 | | |
212 | | struct MurmurHash3128Impl { |
213 | | static constexpr auto name = "murmur_hash3_128"; |
214 | | |
215 | 1 | static Status empty_apply(IColumn& /*icolumn*/, size_t /*input_rows_count*/) { |
216 | 1 | return Status::InvalidArgument("Function {} requires at least one argument", name); |
217 | 1 | } |
218 | | |
219 | | static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count, |
220 | 31 | IColumn& icolumn) { |
221 | 31 | return execute<true>(type, column, input_rows_count, icolumn); |
222 | 31 | } |
223 | | |
224 | | static Status combine_apply(const IDataType* type, const IColumn* column, |
225 | 27 | size_t input_rows_count, IColumn& icolumn) { |
226 | 27 | return execute<false>(type, column, input_rows_count, icolumn); |
227 | 27 | } |
228 | | |
229 | | template <bool first> |
230 | | static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count, |
231 | 58 | IColumn& col_to) { |
232 | 58 | auto& to_column = assert_cast<ColumnVector<TYPE_LARGEINT>&>(col_to); |
233 | 58 | if constexpr (first) { |
234 | | // The first argument initializes one 128-bit hash state per row. Later arguments reuse |
235 | | // the same result column and update the saved state in place. |
236 | 31 | to_column.insert_many_defaults(input_rows_count); |
237 | 31 | } |
238 | 58 | auto& col_to_data = to_column.get_data(); |
239 | 58 | return execute_murmur_hash3_128_column<first>(column, input_rows_count, col_to_data, name); |
240 | 58 | } _ZN5doris18MurmurHash3128Impl7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 231 | 31 | IColumn& col_to) { | 232 | 31 | auto& to_column = assert_cast<ColumnVector<TYPE_LARGEINT>&>(col_to); | 233 | 31 | if constexpr (first) { | 234 | | // The first argument initializes one 128-bit hash state per row. Later arguments reuse | 235 | | // the same result column and update the saved state in place. | 236 | 31 | to_column.insert_many_defaults(input_rows_count); | 237 | 31 | } | 238 | 31 | auto& col_to_data = to_column.get_data(); | 239 | 31 | return execute_murmur_hash3_128_column<first>(column, input_rows_count, col_to_data, name); | 240 | 31 | } |
_ZN5doris18MurmurHash3128Impl7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 231 | 27 | IColumn& col_to) { | 232 | 27 | auto& to_column = assert_cast<ColumnVector<TYPE_LARGEINT>&>(col_to); | 233 | | if constexpr (first) { | 234 | | // The first argument initializes one 128-bit hash state per row. Later arguments reuse | 235 | | // the same result column and update the saved state in place. | 236 | | to_column.insert_many_defaults(input_rows_count); | 237 | | } | 238 | 27 | auto& col_to_data = to_column.get_data(); | 239 | 27 | return execute_murmur_hash3_128_column<first>(column, input_rows_count, col_to_data, name); | 240 | 27 | } |
|
241 | | }; |
242 | | |
243 | | using FunctionMurmurHash3_128 = FunctionVariadicArgumentsBase<DataTypeInt128, MurmurHash3128Impl>; |
244 | | |
245 | | class FunctionMurmurHash3U128 : public IFunction { |
246 | | public: |
247 | | static constexpr auto name = "murmur_hash3_u128"; |
248 | | |
249 | 54 | static FunctionPtr create() { return std::make_shared<FunctionMurmurHash3U128>(); } |
250 | | |
251 | 0 | String get_name() const override { return name; } |
252 | | |
253 | 46 | bool is_variadic() const override { return true; } |
254 | | |
255 | 0 | size_t get_number_of_arguments() const override { return 0; } |
256 | | |
257 | 45 | DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& /*arguments*/) const override { |
258 | 45 | return std::make_shared<DataTypeString>(); |
259 | 45 | } |
260 | | |
261 | | Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments, |
262 | 34 | uint32_t result, size_t input_rows_count) const override { |
263 | 34 | if (arguments.empty()) { |
264 | 1 | return Status::InvalidArgument("Function {} requires at least one argument", name); |
265 | 1 | } |
266 | | |
267 | 33 | std::vector<__int128_t> state(input_rows_count); |
268 | 33 | const ColumnWithTypeAndName& first_col = block.get_by_position(arguments[0]); |
269 | 33 | RETURN_IF_ERROR(execute_murmur_hash3_128_column<true>(first_col.column.get(), |
270 | 33 | input_rows_count, state, name)); |
271 | | |
272 | 60 | for (size_t i = 1; i < arguments.size(); ++i) { |
273 | 27 | const ColumnWithTypeAndName& col = block.get_by_position(arguments[i]); |
274 | 27 | RETURN_IF_ERROR(execute_murmur_hash3_128_column<false>(col.column.get(), |
275 | 27 | input_rows_count, state, name)); |
276 | 27 | } |
277 | | |
278 | 33 | auto result_column = ColumnString::create(); |
279 | 33 | result_column->reserve(input_rows_count); |
280 | 72 | for (const auto value : state) { |
281 | 72 | auto unsigned_value = static_cast<__uint128_t>(value); |
282 | 72 | std::string value_str = LargeIntValue::to_string(unsigned_value); |
283 | 72 | result_column->insert_data(value_str.data(), value_str.size()); |
284 | 72 | } |
285 | 33 | block.get_by_position(result).column = std::move(result_column); |
286 | 33 | return Status::OK(); |
287 | 33 | } |
288 | | }; |
289 | | |
290 | | #ifdef BE_TEST |
291 | | const char* murmur_hash3_get_name_type_int_for_test() { |
292 | | return MurmurHash3Impl<TYPE_INT>::get_name(); |
293 | | } |
294 | | |
295 | | const char* murmur_hash3_get_name_type_bigint_for_test() { |
296 | | return MurmurHash3Impl<TYPE_BIGINT>::get_name(); |
297 | | } |
298 | | |
299 | | const char* murmur_hash3_get_name_type_bigint_v2_for_test() { |
300 | | return MurmurHash3Impl<TYPE_BIGINT, true>::get_name(); |
301 | | } |
302 | | #endif |
303 | | |
304 | | template <PrimitiveType ReturnType> |
305 | | struct XxHashImpl { |
306 | | static constexpr auto name = ReturnType == TYPE_INT ? "xxhash_32" : "xxhash_64"; |
307 | | |
308 | 0 | static Status empty_apply(IColumn& icolumn, size_t input_rows_count) { |
309 | 0 | ColumnVector<ReturnType>& vec_to = assert_cast<ColumnVector<ReturnType>&>(icolumn); |
310 | 0 | vec_to.get_data().assign( |
311 | 0 | input_rows_count, |
312 | 0 | static_cast<typename PrimitiveTypeTraits<ReturnType>::CppType>(emtpy_value)); |
313 | 0 | return Status::OK(); |
314 | 0 | } Unexecuted instantiation: _ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE11empty_applyERNS_7IColumnEm Unexecuted instantiation: _ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE11empty_applyERNS_7IColumnEm |
315 | | |
316 | | static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count, |
317 | 2.29k | IColumn& icolumn) { |
318 | 2.29k | return execute<true>(type, column, input_rows_count, icolumn); |
319 | 2.29k | } _ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 317 | 86 | IColumn& icolumn) { | 318 | 86 | return execute<true>(type, column, input_rows_count, icolumn); | 319 | 86 | } |
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 317 | 2.21k | IColumn& icolumn) { | 318 | 2.21k | return execute<true>(type, column, input_rows_count, icolumn); | 319 | 2.21k | } |
|
320 | | |
321 | | static Status combine_apply(const IDataType* type, const IColumn* column, |
322 | 24 | size_t input_rows_count, IColumn& icolumn) { |
323 | 24 | return execute<false>(type, column, input_rows_count, icolumn); |
324 | 24 | } _ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 322 | 11 | size_t input_rows_count, IColumn& icolumn) { | 323 | 11 | return execute<false>(type, column, input_rows_count, icolumn); | 324 | 11 | } |
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 322 | 13 | size_t input_rows_count, IColumn& icolumn) { | 323 | 13 | return execute<false>(type, column, input_rows_count, icolumn); | 324 | 13 | } |
|
325 | | |
326 | | template <bool first> |
327 | | static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count, |
328 | 2.32k | IColumn& col_to) { |
329 | 2.32k | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); |
330 | 2.32k | if constexpr (first) { |
331 | 2.29k | to_column.insert_many_defaults(input_rows_count); |
332 | 2.29k | } |
333 | 2.32k | auto& col_to_data = to_column.get_data(); |
334 | 2.32k | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { |
335 | 2.29k | const typename ColumnString::Chars& data = col_from->get_chars(); |
336 | 2.29k | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); |
337 | 2.29k | size_t size = offsets.size(); |
338 | 2.29k | ColumnString::Offset current_offset = 0; |
339 | 173k | for (size_t i = 0; i < size; ++i) { |
340 | 170k | if constexpr (ReturnType == TYPE_INT) { |
341 | 405 | col_to_data[i] = HashUtil::xxHash32WithSeed( |
342 | 405 | reinterpret_cast<const char*>(&data[current_offset]), |
343 | 405 | offsets[i] - current_offset, col_to_data[i]); |
344 | 170k | } else { |
345 | 170k | col_to_data[i] = HashUtil::xxHash64WithSeed( |
346 | 170k | reinterpret_cast<const char*>(&data[current_offset]), |
347 | 170k | offsets[i] - current_offset, col_to_data[i]); |
348 | 170k | } |
349 | 170k | current_offset = offsets[i]; |
350 | 170k | } |
351 | 2.29k | } else if (const ColumnConst* col_from_const = |
352 | 24 | check_and_get_column_const_string_or_fixedstring(column)) { |
353 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); |
354 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
355 | 0 | if constexpr (ReturnType == TYPE_INT) { |
356 | 0 | col_to_data[i] = |
357 | 0 | HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]); |
358 | 0 | } else { |
359 | 0 | col_to_data[i] = |
360 | 0 | HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]); |
361 | 0 | } |
362 | 0 | } |
363 | 24 | } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) { |
364 | 108 | for (size_t i = 0; i < input_rows_count; ++i) { |
365 | 84 | auto data_ref = vb_col->get_data_at(i); |
366 | 84 | if constexpr (ReturnType == TYPE_INT) { |
367 | 42 | col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size, |
368 | 42 | col_to_data[i]); |
369 | 42 | } else { |
370 | 42 | col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size, |
371 | 42 | col_to_data[i]); |
372 | 42 | } |
373 | 84 | } |
374 | 24 | } else { |
375 | 0 | DCHECK(false); |
376 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", |
377 | 0 | column->get_name(), name); |
378 | 0 | } |
379 | 2.32k | return Status::OK(); |
380 | 2.32k | } _ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 328 | 86 | IColumn& col_to) { | 329 | 86 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 330 | 86 | if constexpr (first) { | 331 | 86 | to_column.insert_many_defaults(input_rows_count); | 332 | 86 | } | 333 | 86 | auto& col_to_data = to_column.get_data(); | 334 | 86 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 335 | 79 | const typename ColumnString::Chars& data = col_from->get_chars(); | 336 | 79 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 337 | 79 | size_t size = offsets.size(); | 338 | 79 | ColumnString::Offset current_offset = 0; | 339 | 467 | for (size_t i = 0; i < size; ++i) { | 340 | 388 | if constexpr (ReturnType == TYPE_INT) { | 341 | 388 | col_to_data[i] = HashUtil::xxHash32WithSeed( | 342 | 388 | reinterpret_cast<const char*>(&data[current_offset]), | 343 | 388 | offsets[i] - current_offset, col_to_data[i]); | 344 | | } else { | 345 | | col_to_data[i] = HashUtil::xxHash64WithSeed( | 346 | | reinterpret_cast<const char*>(&data[current_offset]), | 347 | | offsets[i] - current_offset, col_to_data[i]); | 348 | | } | 349 | 388 | current_offset = offsets[i]; | 350 | 388 | } | 351 | 79 | } else if (const ColumnConst* col_from_const = | 352 | 7 | check_and_get_column_const_string_or_fixedstring(column)) { | 353 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 354 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 355 | 0 | if constexpr (ReturnType == TYPE_INT) { | 356 | 0 | col_to_data[i] = | 357 | 0 | HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]); | 358 | | } else { | 359 | | col_to_data[i] = | 360 | | HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]); | 361 | | } | 362 | 0 | } | 363 | 7 | } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) { | 364 | 33 | for (size_t i = 0; i < input_rows_count; ++i) { | 365 | 26 | auto data_ref = vb_col->get_data_at(i); | 366 | 26 | if constexpr (ReturnType == TYPE_INT) { | 367 | 26 | col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size, | 368 | 26 | col_to_data[i]); | 369 | | } else { | 370 | | col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size, | 371 | | col_to_data[i]); | 372 | | } | 373 | 26 | } | 374 | 7 | } else { | 375 | 0 | DCHECK(false); | 376 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 377 | 0 | column->get_name(), name); | 378 | 0 | } | 379 | 86 | return Status::OK(); | 380 | 86 | } |
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 328 | 11 | IColumn& col_to) { | 329 | 11 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 330 | | if constexpr (first) { | 331 | | to_column.insert_many_defaults(input_rows_count); | 332 | | } | 333 | 11 | auto& col_to_data = to_column.get_data(); | 334 | 11 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 335 | 6 | const typename ColumnString::Chars& data = col_from->get_chars(); | 336 | 6 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 337 | 6 | size_t size = offsets.size(); | 338 | 6 | ColumnString::Offset current_offset = 0; | 339 | 23 | for (size_t i = 0; i < size; ++i) { | 340 | 17 | if constexpr (ReturnType == TYPE_INT) { | 341 | 17 | col_to_data[i] = HashUtil::xxHash32WithSeed( | 342 | 17 | reinterpret_cast<const char*>(&data[current_offset]), | 343 | 17 | offsets[i] - current_offset, col_to_data[i]); | 344 | | } else { | 345 | | col_to_data[i] = HashUtil::xxHash64WithSeed( | 346 | | reinterpret_cast<const char*>(&data[current_offset]), | 347 | | offsets[i] - current_offset, col_to_data[i]); | 348 | | } | 349 | 17 | current_offset = offsets[i]; | 350 | 17 | } | 351 | 6 | } else if (const ColumnConst* col_from_const = | 352 | 5 | check_and_get_column_const_string_or_fixedstring(column)) { | 353 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 354 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 355 | 0 | if constexpr (ReturnType == TYPE_INT) { | 356 | 0 | col_to_data[i] = | 357 | 0 | HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]); | 358 | | } else { | 359 | | col_to_data[i] = | 360 | | HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]); | 361 | | } | 362 | 0 | } | 363 | 5 | } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) { | 364 | 21 | for (size_t i = 0; i < input_rows_count; ++i) { | 365 | 16 | auto data_ref = vb_col->get_data_at(i); | 366 | 16 | if constexpr (ReturnType == TYPE_INT) { | 367 | 16 | col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size, | 368 | 16 | col_to_data[i]); | 369 | | } else { | 370 | | col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size, | 371 | | col_to_data[i]); | 372 | | } | 373 | 16 | } | 374 | 5 | } else { | 375 | 0 | DCHECK(false); | 376 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 377 | 0 | column->get_name(), name); | 378 | 0 | } | 379 | 11 | return Status::OK(); | 380 | 11 | } |
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 328 | 2.21k | IColumn& col_to) { | 329 | 2.21k | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 330 | 2.21k | if constexpr (first) { | 331 | 2.21k | to_column.insert_many_defaults(input_rows_count); | 332 | 2.21k | } | 333 | 2.21k | auto& col_to_data = to_column.get_data(); | 334 | 2.21k | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 335 | 2.20k | const typename ColumnString::Chars& data = col_from->get_chars(); | 336 | 2.20k | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 337 | 2.20k | size_t size = offsets.size(); | 338 | 2.20k | ColumnString::Offset current_offset = 0; | 339 | 172k | for (size_t i = 0; i < size; ++i) { | 340 | | if constexpr (ReturnType == TYPE_INT) { | 341 | | col_to_data[i] = HashUtil::xxHash32WithSeed( | 342 | | reinterpret_cast<const char*>(&data[current_offset]), | 343 | | offsets[i] - current_offset, col_to_data[i]); | 344 | 170k | } else { | 345 | 170k | col_to_data[i] = HashUtil::xxHash64WithSeed( | 346 | 170k | reinterpret_cast<const char*>(&data[current_offset]), | 347 | 170k | offsets[i] - current_offset, col_to_data[i]); | 348 | 170k | } | 349 | 170k | current_offset = offsets[i]; | 350 | 170k | } | 351 | 2.20k | } else if (const ColumnConst* col_from_const = | 352 | 7 | check_and_get_column_const_string_or_fixedstring(column)) { | 353 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 354 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 355 | | if constexpr (ReturnType == TYPE_INT) { | 356 | | col_to_data[i] = | 357 | | HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]); | 358 | 0 | } else { | 359 | 0 | col_to_data[i] = | 360 | 0 | HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]); | 361 | 0 | } | 362 | 0 | } | 363 | 7 | } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) { | 364 | 33 | for (size_t i = 0; i < input_rows_count; ++i) { | 365 | 26 | auto data_ref = vb_col->get_data_at(i); | 366 | | if constexpr (ReturnType == TYPE_INT) { | 367 | | col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size, | 368 | | col_to_data[i]); | 369 | 26 | } else { | 370 | 26 | col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size, | 371 | 26 | col_to_data[i]); | 372 | 26 | } | 373 | 26 | } | 374 | 7 | } else { | 375 | 0 | DCHECK(false); | 376 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 377 | 0 | column->get_name(), name); | 378 | 0 | } | 379 | 2.21k | return Status::OK(); | 380 | 2.21k | } |
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 328 | 13 | IColumn& col_to) { | 329 | 13 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 330 | | if constexpr (first) { | 331 | | to_column.insert_many_defaults(input_rows_count); | 332 | | } | 333 | 13 | auto& col_to_data = to_column.get_data(); | 334 | 13 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 335 | 8 | const typename ColumnString::Chars& data = col_from->get_chars(); | 336 | 8 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 337 | 8 | size_t size = offsets.size(); | 338 | 8 | ColumnString::Offset current_offset = 0; | 339 | 27 | for (size_t i = 0; i < size; ++i) { | 340 | | if constexpr (ReturnType == TYPE_INT) { | 341 | | col_to_data[i] = HashUtil::xxHash32WithSeed( | 342 | | reinterpret_cast<const char*>(&data[current_offset]), | 343 | | offsets[i] - current_offset, col_to_data[i]); | 344 | 19 | } else { | 345 | 19 | col_to_data[i] = HashUtil::xxHash64WithSeed( | 346 | 19 | reinterpret_cast<const char*>(&data[current_offset]), | 347 | 19 | offsets[i] - current_offset, col_to_data[i]); | 348 | 19 | } | 349 | 19 | current_offset = offsets[i]; | 350 | 19 | } | 351 | 8 | } else if (const ColumnConst* col_from_const = | 352 | 5 | check_and_get_column_const_string_or_fixedstring(column)) { | 353 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 354 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 355 | | if constexpr (ReturnType == TYPE_INT) { | 356 | | col_to_data[i] = | 357 | | HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]); | 358 | 0 | } else { | 359 | 0 | col_to_data[i] = | 360 | 0 | HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]); | 361 | 0 | } | 362 | 0 | } | 363 | 5 | } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) { | 364 | 21 | for (size_t i = 0; i < input_rows_count; ++i) { | 365 | 16 | auto data_ref = vb_col->get_data_at(i); | 366 | | if constexpr (ReturnType == TYPE_INT) { | 367 | | col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size, | 368 | | col_to_data[i]); | 369 | 16 | } else { | 370 | 16 | col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size, | 371 | 16 | col_to_data[i]); | 372 | 16 | } | 373 | 16 | } | 374 | 5 | } else { | 375 | 0 | DCHECK(false); | 376 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 377 | 0 | column->get_name(), name); | 378 | 0 | } | 379 | 13 | return Status::OK(); | 380 | 13 | } |
|
381 | | }; |
382 | | |
383 | | using FunctionXxHash_32 = FunctionVariadicArgumentsBase<DataTypeInt32, XxHashImpl<TYPE_INT>>; |
384 | | using FunctionXxHash_64 = FunctionVariadicArgumentsBase<DataTypeInt64, XxHashImpl<TYPE_BIGINT>>; |
385 | | |
386 | 8 | void register_function_hash(SimpleFunctionFactory& factory) { |
387 | 8 | factory.register_function<FunctionMurmurHash3_32>(); |
388 | 8 | factory.register_function<FunctionMurmurHash3_64>(); |
389 | 8 | factory.register_function<FunctionMurmurHash3_64_V2>(); |
390 | 8 | factory.register_function<FunctionMurmurHash3U64V2>(); |
391 | 8 | factory.register_function<FunctionMurmurHash3_128>(); |
392 | 8 | factory.register_function<FunctionMurmurHash3U128>(); |
393 | 8 | factory.register_function<FunctionXxHash_32>(); |
394 | 8 | factory.register_function<FunctionXxHash_64>(); |
395 | 8 | factory.register_alias("xxhash_64", "xxhash3_64"); |
396 | 8 | } |
397 | | } // namespace doris |