be/src/exprs/function/function_hash.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionHash.cpp |
19 | | // and modified by Doris |
20 | | |
21 | | #include "exprs/function/function_hash.h" |
22 | | |
23 | | #include "common/status.h" |
24 | | #include "core/assert_cast.h" |
25 | | #include "core/column/column.h" |
26 | | #include "core/column/column_const.h" |
27 | | #include "core/column/column_string.h" |
28 | | #include "core/column/column_varbinary.h" |
29 | | #include "core/column/column_vector.h" |
30 | | #include "core/data_type/data_type.h" |
31 | | #include "core/data_type/data_type_number.h" |
32 | | #include "core/field.h" |
33 | | #include "exec/common/template_helpers.hpp" |
34 | | #include "exprs/function/function_helpers.h" |
35 | | #include "exprs/function/function_variadic_arguments.h" |
36 | | #include "exprs/function/simple_function_factory.h" |
37 | | #include "util/hash/murmur_hash3.h" |
38 | | #include "util/hash_util.hpp" |
39 | | |
40 | | namespace doris { |
41 | | #include "common/compile_check_begin.h" |
42 | | constexpr uint64_t emtpy_value = 0xe28dbde7fe22e41c; |
43 | | |
44 | | template <PrimitiveType ReturnType, bool is_mmh64_v2 = false> |
45 | | struct MurmurHash3Impl { |
46 | 0 | static constexpr auto get_name() { |
47 | 0 | if constexpr (ReturnType == TYPE_INT) { |
48 | 0 | return "murmur_hash3_32"; |
49 | 0 | } else if constexpr (ReturnType == TYPE_LARGEINT) { |
50 | 0 | return "murmur_hash3_u64_v2"; |
51 | 0 | } else if constexpr (is_mmh64_v2) { |
52 | 0 | return "murmur_hash3_64_v2"; |
53 | 0 | } else { |
54 | 0 | return "murmur_hash3_64"; |
55 | 0 | } |
56 | 0 | } |
57 | | static constexpr auto name = get_name(); |
58 | | |
59 | 0 | static Status empty_apply(IColumn& icolumn, size_t input_rows_count) { |
60 | 0 | ColumnVector<ReturnType>& vec_to = assert_cast<ColumnVector<ReturnType>&>(icolumn); |
61 | 0 | vec_to.get_data().assign( |
62 | 0 | input_rows_count, |
63 | 0 | static_cast<typename PrimitiveTypeTraits<ReturnType>::CppType>(emtpy_value)); |
64 | 0 | return Status::OK(); |
65 | 0 | } Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE11empty_applyERNS_7IColumnEm Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE11empty_applyERNS_7IColumnEm Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE11empty_applyERNS_7IColumnEm Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE11empty_applyERNS_7IColumnEm |
66 | | |
67 | | static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count, |
68 | 81 | IColumn& icolumn) { |
69 | 81 | return execute<true>(type, column, input_rows_count, icolumn); |
70 | 81 | } _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 68 | 21 | IColumn& icolumn) { | 69 | 21 | return execute<true>(type, column, input_rows_count, icolumn); | 70 | 21 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 68 | 21 | IColumn& icolumn) { | 69 | 21 | return execute<true>(type, column, input_rows_count, icolumn); | 70 | 21 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 68 | 18 | IColumn& icolumn) { | 69 | 18 | return execute<true>(type, column, input_rows_count, icolumn); | 70 | 18 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 68 | 21 | IColumn& icolumn) { | 69 | 21 | return execute<true>(type, column, input_rows_count, icolumn); | 70 | 21 | } |
|
71 | | |
72 | | static Status combine_apply(const IDataType* type, const IColumn* column, |
73 | 13 | size_t input_rows_count, IColumn& icolumn) { |
74 | 13 | return execute<false>(type, column, input_rows_count, icolumn); |
75 | 13 | } _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 73 | 4 | size_t input_rows_count, IColumn& icolumn) { | 74 | 4 | return execute<false>(type, column, input_rows_count, icolumn); | 75 | 4 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 73 | 4 | size_t input_rows_count, IColumn& icolumn) { | 74 | 4 | return execute<false>(type, column, input_rows_count, icolumn); | 75 | 4 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 73 | 1 | size_t input_rows_count, IColumn& icolumn) { | 74 | 1 | return execute<false>(type, column, input_rows_count, icolumn); | 75 | 1 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 73 | 4 | size_t input_rows_count, IColumn& icolumn) { | 74 | 4 | return execute<false>(type, column, input_rows_count, icolumn); | 75 | 4 | } |
|
76 | | |
77 | | template <bool first> |
78 | | static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count, |
79 | 94 | IColumn& col_to) { |
80 | 94 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); |
81 | 94 | if constexpr (first) { |
82 | 81 | if constexpr (ReturnType == TYPE_INT) { |
83 | 21 | to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED), |
84 | 21 | input_rows_count); |
85 | 60 | } else { |
86 | 60 | to_column.insert_many_defaults(input_rows_count); |
87 | 60 | } |
88 | 81 | } |
89 | 94 | auto& col_to_data = to_column.get_data(); |
90 | 94 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { |
91 | 94 | const typename ColumnString::Chars& data = col_from->get_chars(); |
92 | 94 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); |
93 | 94 | size_t size = offsets.size(); |
94 | 94 | ColumnString::Offset current_offset = 0; |
95 | 286 | for (size_t i = 0; i < size; ++i) { |
96 | 192 | if constexpr (ReturnType == TYPE_INT) { |
97 | 65 | col_to_data[i] = HashUtil::murmur_hash3_32( |
98 | 65 | reinterpret_cast<const char*>(&data[current_offset]), |
99 | 65 | offsets[i] - current_offset, col_to_data[i]); |
100 | 127 | } else { |
101 | 127 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( |
102 | 127 | reinterpret_cast<const char*>(&data[current_offset]), |
103 | 127 | offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i])); |
104 | 127 | } |
105 | 192 | current_offset = offsets[i]; |
106 | 192 | } |
107 | 94 | } else if (const ColumnConst* col_from_const = |
108 | 0 | check_and_get_column_const_string_or_fixedstring(column)) { |
109 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); |
110 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
111 | 0 | if constexpr (ReturnType == TYPE_INT) { |
112 | 0 | col_to_data[i] = |
113 | 0 | HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]); |
114 | 0 | } else { |
115 | 0 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( |
116 | 0 | value.data(), value.size(), static_cast<uint64_t>(col_to_data[i])); |
117 | 0 | } |
118 | 0 | } |
119 | 0 | } else { |
120 | 0 | DCHECK(false); |
121 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", |
122 | 0 | column->get_name(), name); |
123 | 0 | } |
124 | 94 | return Status::OK(); |
125 | 94 | } _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 79 | 21 | IColumn& col_to) { | 80 | 21 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 81 | 21 | if constexpr (first) { | 82 | 21 | if constexpr (ReturnType == TYPE_INT) { | 83 | 21 | to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED), | 84 | 21 | input_rows_count); | 85 | | } else { | 86 | | to_column.insert_many_defaults(input_rows_count); | 87 | | } | 88 | 21 | } | 89 | 21 | auto& col_to_data = to_column.get_data(); | 90 | 21 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 91 | 21 | const typename ColumnString::Chars& data = col_from->get_chars(); | 92 | 21 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 93 | 21 | size_t size = offsets.size(); | 94 | 21 | ColumnString::Offset current_offset = 0; | 95 | 79 | for (size_t i = 0; i < size; ++i) { | 96 | 58 | if constexpr (ReturnType == TYPE_INT) { | 97 | 58 | col_to_data[i] = HashUtil::murmur_hash3_32( | 98 | 58 | reinterpret_cast<const char*>(&data[current_offset]), | 99 | 58 | offsets[i] - current_offset, col_to_data[i]); | 100 | | } else { | 101 | | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 102 | | reinterpret_cast<const char*>(&data[current_offset]), | 103 | | offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i])); | 104 | | } | 105 | 58 | current_offset = offsets[i]; | 106 | 58 | } | 107 | 21 | } else if (const ColumnConst* col_from_const = | 108 | 0 | check_and_get_column_const_string_or_fixedstring(column)) { | 109 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 110 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 111 | 0 | if constexpr (ReturnType == TYPE_INT) { | 112 | 0 | col_to_data[i] = | 113 | 0 | HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]); | 114 | | } else { | 115 | | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 116 | | value.data(), value.size(), static_cast<uint64_t>(col_to_data[i])); | 117 | | } | 118 | 0 | } | 119 | 0 | } else { | 120 | 0 | DCHECK(false); | 121 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 122 | 0 | column->get_name(), name); | 123 | 0 | } | 124 | 21 | return Status::OK(); | 125 | 21 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 79 | 4 | IColumn& col_to) { | 80 | 4 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 81 | | if constexpr (first) { | 82 | | if constexpr (ReturnType == TYPE_INT) { | 83 | | to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED), | 84 | | input_rows_count); | 85 | | } else { | 86 | | to_column.insert_many_defaults(input_rows_count); | 87 | | } | 88 | | } | 89 | 4 | auto& col_to_data = to_column.get_data(); | 90 | 4 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 91 | 4 | const typename ColumnString::Chars& data = col_from->get_chars(); | 92 | 4 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 93 | 4 | size_t size = offsets.size(); | 94 | 4 | ColumnString::Offset current_offset = 0; | 95 | 11 | for (size_t i = 0; i < size; ++i) { | 96 | 7 | if constexpr (ReturnType == TYPE_INT) { | 97 | 7 | col_to_data[i] = HashUtil::murmur_hash3_32( | 98 | 7 | reinterpret_cast<const char*>(&data[current_offset]), | 99 | 7 | offsets[i] - current_offset, col_to_data[i]); | 100 | | } else { | 101 | | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 102 | | reinterpret_cast<const char*>(&data[current_offset]), | 103 | | offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i])); | 104 | | } | 105 | 7 | current_offset = offsets[i]; | 106 | 7 | } | 107 | 4 | } else if (const ColumnConst* col_from_const = | 108 | 0 | check_and_get_column_const_string_or_fixedstring(column)) { | 109 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 110 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 111 | 0 | if constexpr (ReturnType == TYPE_INT) { | 112 | 0 | col_to_data[i] = | 113 | 0 | HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]); | 114 | | } else { | 115 | | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 116 | | value.data(), value.size(), static_cast<uint64_t>(col_to_data[i])); | 117 | | } | 118 | 0 | } | 119 | 0 | } else { | 120 | 0 | DCHECK(false); | 121 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 122 | 0 | column->get_name(), name); | 123 | 0 | } | 124 | 4 | return Status::OK(); | 125 | 4 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 79 | 21 | IColumn& col_to) { | 80 | 21 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 81 | 21 | if constexpr (first) { | 82 | | if constexpr (ReturnType == TYPE_INT) { | 83 | | to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED), | 84 | | input_rows_count); | 85 | 21 | } else { | 86 | 21 | to_column.insert_many_defaults(input_rows_count); | 87 | 21 | } | 88 | 21 | } | 89 | 21 | auto& col_to_data = to_column.get_data(); | 90 | 21 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 91 | 21 | const typename ColumnString::Chars& data = col_from->get_chars(); | 92 | 21 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 93 | 21 | size_t size = offsets.size(); | 94 | 21 | ColumnString::Offset current_offset = 0; | 95 | 79 | for (size_t i = 0; i < size; ++i) { | 96 | | if constexpr (ReturnType == TYPE_INT) { | 97 | | col_to_data[i] = HashUtil::murmur_hash3_32( | 98 | | reinterpret_cast<const char*>(&data[current_offset]), | 99 | | offsets[i] - current_offset, col_to_data[i]); | 100 | 58 | } else { | 101 | 58 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 102 | 58 | reinterpret_cast<const char*>(&data[current_offset]), | 103 | 58 | offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i])); | 104 | 58 | } | 105 | 58 | current_offset = offsets[i]; | 106 | 58 | } | 107 | 21 | } else if (const ColumnConst* col_from_const = | 108 | 0 | check_and_get_column_const_string_or_fixedstring(column)) { | 109 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 110 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 111 | | if constexpr (ReturnType == TYPE_INT) { | 112 | | col_to_data[i] = | 113 | | HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]); | 114 | 0 | } else { | 115 | 0 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 116 | 0 | value.data(), value.size(), static_cast<uint64_t>(col_to_data[i])); | 117 | 0 | } | 118 | 0 | } | 119 | 0 | } else { | 120 | 0 | DCHECK(false); | 121 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 122 | 0 | column->get_name(), name); | 123 | 0 | } | 124 | 21 | return Status::OK(); | 125 | 21 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 79 | 4 | IColumn& col_to) { | 80 | 4 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 81 | | if constexpr (first) { | 82 | | if constexpr (ReturnType == TYPE_INT) { | 83 | | to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED), | 84 | | input_rows_count); | 85 | | } else { | 86 | | to_column.insert_many_defaults(input_rows_count); | 87 | | } | 88 | | } | 89 | 4 | auto& col_to_data = to_column.get_data(); | 90 | 4 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 91 | 4 | const typename ColumnString::Chars& data = col_from->get_chars(); | 92 | 4 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 93 | 4 | size_t size = offsets.size(); | 94 | 4 | ColumnString::Offset current_offset = 0; | 95 | 11 | for (size_t i = 0; i < size; ++i) { | 96 | | if constexpr (ReturnType == TYPE_INT) { | 97 | | col_to_data[i] = HashUtil::murmur_hash3_32( | 98 | | reinterpret_cast<const char*>(&data[current_offset]), | 99 | | offsets[i] - current_offset, col_to_data[i]); | 100 | 7 | } else { | 101 | 7 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 102 | 7 | reinterpret_cast<const char*>(&data[current_offset]), | 103 | 7 | offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i])); | 104 | 7 | } | 105 | 7 | current_offset = offsets[i]; | 106 | 7 | } | 107 | 4 | } else if (const ColumnConst* col_from_const = | 108 | 0 | check_and_get_column_const_string_or_fixedstring(column)) { | 109 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 110 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 111 | | if constexpr (ReturnType == TYPE_INT) { | 112 | | col_to_data[i] = | 113 | | HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]); | 114 | 0 | } else { | 115 | 0 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 116 | 0 | value.data(), value.size(), static_cast<uint64_t>(col_to_data[i])); | 117 | 0 | } | 118 | 0 | } | 119 | 0 | } else { | 120 | 0 | DCHECK(false); | 121 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 122 | 0 | column->get_name(), name); | 123 | 0 | } | 124 | 4 | return Status::OK(); | 125 | 4 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 79 | 18 | IColumn& col_to) { | 80 | 18 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 81 | 18 | if constexpr (first) { | 82 | | if constexpr (ReturnType == TYPE_INT) { | 83 | | to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED), | 84 | | input_rows_count); | 85 | 18 | } else { | 86 | 18 | to_column.insert_many_defaults(input_rows_count); | 87 | 18 | } | 88 | 18 | } | 89 | 18 | auto& col_to_data = to_column.get_data(); | 90 | 18 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 91 | 18 | const typename ColumnString::Chars& data = col_from->get_chars(); | 92 | 18 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 93 | 18 | size_t size = offsets.size(); | 94 | 18 | ColumnString::Offset current_offset = 0; | 95 | 46 | for (size_t i = 0; i < size; ++i) { | 96 | | if constexpr (ReturnType == TYPE_INT) { | 97 | | col_to_data[i] = HashUtil::murmur_hash3_32( | 98 | | reinterpret_cast<const char*>(&data[current_offset]), | 99 | | offsets[i] - current_offset, col_to_data[i]); | 100 | 28 | } else { | 101 | 28 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 102 | 28 | reinterpret_cast<const char*>(&data[current_offset]), | 103 | 28 | offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i])); | 104 | 28 | } | 105 | 28 | current_offset = offsets[i]; | 106 | 28 | } | 107 | 18 | } else if (const ColumnConst* col_from_const = | 108 | 0 | check_and_get_column_const_string_or_fixedstring(column)) { | 109 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 110 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 111 | | if constexpr (ReturnType == TYPE_INT) { | 112 | | col_to_data[i] = | 113 | | HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]); | 114 | 0 | } else { | 115 | 0 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 116 | 0 | value.data(), value.size(), static_cast<uint64_t>(col_to_data[i])); | 117 | 0 | } | 118 | 0 | } | 119 | 0 | } else { | 120 | 0 | DCHECK(false); | 121 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 122 | 0 | column->get_name(), name); | 123 | 0 | } | 124 | 18 | return Status::OK(); | 125 | 18 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 79 | 1 | IColumn& col_to) { | 80 | 1 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 81 | | if constexpr (first) { | 82 | | if constexpr (ReturnType == TYPE_INT) { | 83 | | to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED), | 84 | | input_rows_count); | 85 | | } else { | 86 | | to_column.insert_many_defaults(input_rows_count); | 87 | | } | 88 | | } | 89 | 1 | auto& col_to_data = to_column.get_data(); | 90 | 1 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 91 | 1 | const typename ColumnString::Chars& data = col_from->get_chars(); | 92 | 1 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 93 | 1 | size_t size = offsets.size(); | 94 | 1 | ColumnString::Offset current_offset = 0; | 95 | 2 | for (size_t i = 0; i < size; ++i) { | 96 | | if constexpr (ReturnType == TYPE_INT) { | 97 | | col_to_data[i] = HashUtil::murmur_hash3_32( | 98 | | reinterpret_cast<const char*>(&data[current_offset]), | 99 | | offsets[i] - current_offset, col_to_data[i]); | 100 | 1 | } else { | 101 | 1 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 102 | 1 | reinterpret_cast<const char*>(&data[current_offset]), | 103 | 1 | offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i])); | 104 | 1 | } | 105 | 1 | current_offset = offsets[i]; | 106 | 1 | } | 107 | 1 | } else if (const ColumnConst* col_from_const = | 108 | 0 | check_and_get_column_const_string_or_fixedstring(column)) { | 109 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 110 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 111 | | if constexpr (ReturnType == TYPE_INT) { | 112 | | col_to_data[i] = | 113 | | HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]); | 114 | 0 | } else { | 115 | 0 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 116 | 0 | value.data(), value.size(), static_cast<uint64_t>(col_to_data[i])); | 117 | 0 | } | 118 | 0 | } | 119 | 0 | } else { | 120 | 0 | DCHECK(false); | 121 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 122 | 0 | column->get_name(), name); | 123 | 0 | } | 124 | 1 | return Status::OK(); | 125 | 1 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 79 | 21 | IColumn& col_to) { | 80 | 21 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 81 | 21 | if constexpr (first) { | 82 | | if constexpr (ReturnType == TYPE_INT) { | 83 | | to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED), | 84 | | input_rows_count); | 85 | 21 | } else { | 86 | 21 | to_column.insert_many_defaults(input_rows_count); | 87 | 21 | } | 88 | 21 | } | 89 | 21 | auto& col_to_data = to_column.get_data(); | 90 | 21 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 91 | 21 | const typename ColumnString::Chars& data = col_from->get_chars(); | 92 | 21 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 93 | 21 | size_t size = offsets.size(); | 94 | 21 | ColumnString::Offset current_offset = 0; | 95 | 50 | for (size_t i = 0; i < size; ++i) { | 96 | | if constexpr (ReturnType == TYPE_INT) { | 97 | | col_to_data[i] = HashUtil::murmur_hash3_32( | 98 | | reinterpret_cast<const char*>(&data[current_offset]), | 99 | | offsets[i] - current_offset, col_to_data[i]); | 100 | 29 | } else { | 101 | 29 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 102 | 29 | reinterpret_cast<const char*>(&data[current_offset]), | 103 | 29 | offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i])); | 104 | 29 | } | 105 | 29 | current_offset = offsets[i]; | 106 | 29 | } | 107 | 21 | } else if (const ColumnConst* col_from_const = | 108 | 0 | check_and_get_column_const_string_or_fixedstring(column)) { | 109 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 110 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 111 | | if constexpr (ReturnType == TYPE_INT) { | 112 | | col_to_data[i] = | 113 | | HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]); | 114 | 0 | } else { | 115 | 0 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 116 | 0 | value.data(), value.size(), static_cast<uint64_t>(col_to_data[i])); | 117 | 0 | } | 118 | 0 | } | 119 | 0 | } else { | 120 | 0 | DCHECK(false); | 121 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 122 | 0 | column->get_name(), name); | 123 | 0 | } | 124 | 21 | return Status::OK(); | 125 | 21 | } |
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 79 | 4 | IColumn& col_to) { | 80 | 4 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 81 | | if constexpr (first) { | 82 | | if constexpr (ReturnType == TYPE_INT) { | 83 | | to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED), | 84 | | input_rows_count); | 85 | | } else { | 86 | | to_column.insert_many_defaults(input_rows_count); | 87 | | } | 88 | | } | 89 | 4 | auto& col_to_data = to_column.get_data(); | 90 | 4 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 91 | 4 | const typename ColumnString::Chars& data = col_from->get_chars(); | 92 | 4 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 93 | 4 | size_t size = offsets.size(); | 94 | 4 | ColumnString::Offset current_offset = 0; | 95 | 8 | for (size_t i = 0; i < size; ++i) { | 96 | | if constexpr (ReturnType == TYPE_INT) { | 97 | | col_to_data[i] = HashUtil::murmur_hash3_32( | 98 | | reinterpret_cast<const char*>(&data[current_offset]), | 99 | | offsets[i] - current_offset, col_to_data[i]); | 100 | 4 | } else { | 101 | 4 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 102 | 4 | reinterpret_cast<const char*>(&data[current_offset]), | 103 | 4 | offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i])); | 104 | 4 | } | 105 | 4 | current_offset = offsets[i]; | 106 | 4 | } | 107 | 4 | } else if (const ColumnConst* col_from_const = | 108 | 0 | check_and_get_column_const_string_or_fixedstring(column)) { | 109 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 110 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 111 | | if constexpr (ReturnType == TYPE_INT) { | 112 | | col_to_data[i] = | 113 | | HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]); | 114 | 0 | } else { | 115 | 0 | col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>( | 116 | 0 | value.data(), value.size(), static_cast<uint64_t>(col_to_data[i])); | 117 | 0 | } | 118 | 0 | } | 119 | 0 | } else { | 120 | 0 | DCHECK(false); | 121 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 122 | 0 | column->get_name(), name); | 123 | 0 | } | 124 | 4 | return Status::OK(); | 125 | 4 | } |
|
126 | | }; |
127 | | |
128 | | using FunctionMurmurHash3_32 = |
129 | | FunctionVariadicArgumentsBase<DataTypeInt32, MurmurHash3Impl<TYPE_INT>>; |
130 | | using FunctionMurmurHash3_64 = |
131 | | FunctionVariadicArgumentsBase<DataTypeInt64, MurmurHash3Impl<TYPE_BIGINT>>; |
132 | | using FunctionMurmurHash3_64_V2 = |
133 | | FunctionVariadicArgumentsBase<DataTypeInt64, MurmurHash3Impl<TYPE_BIGINT, true>>; |
134 | | using FunctionMurmurHash3U64V2 = |
135 | | FunctionVariadicArgumentsBase<DataTypeInt128, MurmurHash3Impl<TYPE_LARGEINT, true>>; |
136 | | |
137 | | #ifdef BE_TEST |
138 | | const char* murmur_hash3_get_name_type_int_for_test() { |
139 | | return MurmurHash3Impl<TYPE_INT>::get_name(); |
140 | | } |
141 | | |
142 | | const char* murmur_hash3_get_name_type_bigint_for_test() { |
143 | | return MurmurHash3Impl<TYPE_BIGINT>::get_name(); |
144 | | } |
145 | | |
146 | | const char* murmur_hash3_get_name_type_bigint_v2_for_test() { |
147 | | return MurmurHash3Impl<TYPE_BIGINT, true>::get_name(); |
148 | | } |
149 | | #endif |
150 | | |
151 | | template <PrimitiveType ReturnType> |
152 | | struct XxHashImpl { |
153 | | static constexpr auto name = ReturnType == TYPE_INT ? "xxhash_32" : "xxhash_64"; |
154 | | |
155 | 0 | static Status empty_apply(IColumn& icolumn, size_t input_rows_count) { |
156 | 0 | ColumnVector<ReturnType>& vec_to = assert_cast<ColumnVector<ReturnType>&>(icolumn); |
157 | 0 | vec_to.get_data().assign( |
158 | 0 | input_rows_count, |
159 | 0 | static_cast<typename PrimitiveTypeTraits<ReturnType>::CppType>(emtpy_value)); |
160 | 0 | return Status::OK(); |
161 | 0 | } Unexecuted instantiation: _ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE11empty_applyERNS_7IColumnEm Unexecuted instantiation: _ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE11empty_applyERNS_7IColumnEm |
162 | | |
163 | | static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count, |
164 | 1.08k | IColumn& icolumn) { |
165 | 1.08k | return execute<true>(type, column, input_rows_count, icolumn); |
166 | 1.08k | } _ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 164 | 104 | IColumn& icolumn) { | 165 | 104 | return execute<true>(type, column, input_rows_count, icolumn); | 166 | 104 | } |
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 164 | 981 | IColumn& icolumn) { | 165 | 981 | return execute<true>(type, column, input_rows_count, icolumn); | 166 | 981 | } |
|
167 | | |
168 | | static Status combine_apply(const IDataType* type, const IColumn* column, |
169 | 24 | size_t input_rows_count, IColumn& icolumn) { |
170 | 24 | return execute<false>(type, column, input_rows_count, icolumn); |
171 | 24 | } _ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 169 | 11 | size_t input_rows_count, IColumn& icolumn) { | 170 | 11 | return execute<false>(type, column, input_rows_count, icolumn); | 171 | 11 | } |
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_ Line | Count | Source | 169 | 13 | size_t input_rows_count, IColumn& icolumn) { | 170 | 13 | return execute<false>(type, column, input_rows_count, icolumn); | 171 | 13 | } |
|
172 | | |
173 | | template <bool first> |
174 | | static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count, |
175 | 1.10k | IColumn& col_to) { |
176 | 1.10k | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); |
177 | 1.10k | if constexpr (first) { |
178 | 1.08k | to_column.insert_many_defaults(input_rows_count); |
179 | 1.08k | } |
180 | 1.10k | auto& col_to_data = to_column.get_data(); |
181 | 1.10k | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { |
182 | 1.08k | const typename ColumnString::Chars& data = col_from->get_chars(); |
183 | 1.08k | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); |
184 | 1.08k | size_t size = offsets.size(); |
185 | 1.08k | ColumnString::Offset current_offset = 0; |
186 | 95.2k | for (size_t i = 0; i < size; ++i) { |
187 | 94.1k | if constexpr (ReturnType == TYPE_INT) { |
188 | 410 | col_to_data[i] = HashUtil::xxHash32WithSeed( |
189 | 410 | reinterpret_cast<const char*>(&data[current_offset]), |
190 | 410 | offsets[i] - current_offset, col_to_data[i]); |
191 | 93.7k | } else { |
192 | 93.7k | col_to_data[i] = HashUtil::xxHash64WithSeed( |
193 | 93.7k | reinterpret_cast<const char*>(&data[current_offset]), |
194 | 93.7k | offsets[i] - current_offset, col_to_data[i]); |
195 | 93.7k | } |
196 | 94.1k | current_offset = offsets[i]; |
197 | 94.1k | } |
198 | 1.08k | } else if (const ColumnConst* col_from_const = |
199 | 24 | check_and_get_column_const_string_or_fixedstring(column)) { |
200 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); |
201 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
202 | 0 | if constexpr (ReturnType == TYPE_INT) { |
203 | 0 | col_to_data[i] = |
204 | 0 | HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]); |
205 | 0 | } else { |
206 | 0 | col_to_data[i] = |
207 | 0 | HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]); |
208 | 0 | } |
209 | 0 | } |
210 | 24 | } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) { |
211 | 108 | for (size_t i = 0; i < input_rows_count; ++i) { |
212 | 84 | auto data_ref = vb_col->get_data_at(i); |
213 | 84 | if constexpr (ReturnType == TYPE_INT) { |
214 | 42 | col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size, |
215 | 42 | col_to_data[i]); |
216 | 42 | } else { |
217 | 42 | col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size, |
218 | 42 | col_to_data[i]); |
219 | 42 | } |
220 | 84 | } |
221 | 24 | } else { |
222 | 0 | DCHECK(false); |
223 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", |
224 | 0 | column->get_name(), name); |
225 | 0 | } |
226 | 1.10k | return Status::OK(); |
227 | 1.10k | } _ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 175 | 104 | IColumn& col_to) { | 176 | 104 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 177 | 104 | if constexpr (first) { | 178 | 104 | to_column.insert_many_defaults(input_rows_count); | 179 | 104 | } | 180 | 104 | auto& col_to_data = to_column.get_data(); | 181 | 104 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 182 | 97 | const typename ColumnString::Chars& data = col_from->get_chars(); | 183 | 97 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 184 | 97 | size_t size = offsets.size(); | 185 | 97 | ColumnString::Offset current_offset = 0; | 186 | 490 | for (size_t i = 0; i < size; ++i) { | 187 | 393 | if constexpr (ReturnType == TYPE_INT) { | 188 | 393 | col_to_data[i] = HashUtil::xxHash32WithSeed( | 189 | 393 | reinterpret_cast<const char*>(&data[current_offset]), | 190 | 393 | offsets[i] - current_offset, col_to_data[i]); | 191 | | } else { | 192 | | col_to_data[i] = HashUtil::xxHash64WithSeed( | 193 | | reinterpret_cast<const char*>(&data[current_offset]), | 194 | | offsets[i] - current_offset, col_to_data[i]); | 195 | | } | 196 | 393 | current_offset = offsets[i]; | 197 | 393 | } | 198 | 97 | } else if (const ColumnConst* col_from_const = | 199 | 7 | check_and_get_column_const_string_or_fixedstring(column)) { | 200 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 201 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 202 | 0 | if constexpr (ReturnType == TYPE_INT) { | 203 | 0 | col_to_data[i] = | 204 | 0 | HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]); | 205 | | } else { | 206 | | col_to_data[i] = | 207 | | HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]); | 208 | | } | 209 | 0 | } | 210 | 7 | } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) { | 211 | 33 | for (size_t i = 0; i < input_rows_count; ++i) { | 212 | 26 | auto data_ref = vb_col->get_data_at(i); | 213 | 26 | if constexpr (ReturnType == TYPE_INT) { | 214 | 26 | col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size, | 215 | 26 | col_to_data[i]); | 216 | | } else { | 217 | | col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size, | 218 | | col_to_data[i]); | 219 | | } | 220 | 26 | } | 221 | 7 | } else { | 222 | 0 | DCHECK(false); | 223 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 224 | 0 | column->get_name(), name); | 225 | 0 | } | 226 | 104 | return Status::OK(); | 227 | 104 | } |
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 175 | 11 | IColumn& col_to) { | 176 | 11 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 177 | | if constexpr (first) { | 178 | | to_column.insert_many_defaults(input_rows_count); | 179 | | } | 180 | 11 | auto& col_to_data = to_column.get_data(); | 181 | 11 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 182 | 6 | const typename ColumnString::Chars& data = col_from->get_chars(); | 183 | 6 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 184 | 6 | size_t size = offsets.size(); | 185 | 6 | ColumnString::Offset current_offset = 0; | 186 | 23 | for (size_t i = 0; i < size; ++i) { | 187 | 17 | if constexpr (ReturnType == TYPE_INT) { | 188 | 17 | col_to_data[i] = HashUtil::xxHash32WithSeed( | 189 | 17 | reinterpret_cast<const char*>(&data[current_offset]), | 190 | 17 | offsets[i] - current_offset, col_to_data[i]); | 191 | | } else { | 192 | | col_to_data[i] = HashUtil::xxHash64WithSeed( | 193 | | reinterpret_cast<const char*>(&data[current_offset]), | 194 | | offsets[i] - current_offset, col_to_data[i]); | 195 | | } | 196 | 17 | current_offset = offsets[i]; | 197 | 17 | } | 198 | 6 | } else if (const ColumnConst* col_from_const = | 199 | 5 | check_and_get_column_const_string_or_fixedstring(column)) { | 200 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 201 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 202 | 0 | if constexpr (ReturnType == TYPE_INT) { | 203 | 0 | col_to_data[i] = | 204 | 0 | HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]); | 205 | | } else { | 206 | | col_to_data[i] = | 207 | | HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]); | 208 | | } | 209 | 0 | } | 210 | 5 | } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) { | 211 | 21 | for (size_t i = 0; i < input_rows_count; ++i) { | 212 | 16 | auto data_ref = vb_col->get_data_at(i); | 213 | 16 | if constexpr (ReturnType == TYPE_INT) { | 214 | 16 | col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size, | 215 | 16 | col_to_data[i]); | 216 | | } else { | 217 | | col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size, | 218 | | col_to_data[i]); | 219 | | } | 220 | 16 | } | 221 | 5 | } else { | 222 | 0 | DCHECK(false); | 223 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 224 | 0 | column->get_name(), name); | 225 | 0 | } | 226 | 11 | return Status::OK(); | 227 | 11 | } |
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 175 | 981 | IColumn& col_to) { | 176 | 981 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 177 | 981 | if constexpr (first) { | 178 | 981 | to_column.insert_many_defaults(input_rows_count); | 179 | 981 | } | 180 | 981 | auto& col_to_data = to_column.get_data(); | 181 | 981 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 182 | 974 | const typename ColumnString::Chars& data = col_from->get_chars(); | 183 | 974 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 184 | 974 | size_t size = offsets.size(); | 185 | 974 | ColumnString::Offset current_offset = 0; | 186 | 94.6k | for (size_t i = 0; i < size; ++i) { | 187 | | if constexpr (ReturnType == TYPE_INT) { | 188 | | col_to_data[i] = HashUtil::xxHash32WithSeed( | 189 | | reinterpret_cast<const char*>(&data[current_offset]), | 190 | | offsets[i] - current_offset, col_to_data[i]); | 191 | 93.7k | } else { | 192 | 93.7k | col_to_data[i] = HashUtil::xxHash64WithSeed( | 193 | 93.7k | reinterpret_cast<const char*>(&data[current_offset]), | 194 | 93.7k | offsets[i] - current_offset, col_to_data[i]); | 195 | 93.7k | } | 196 | 93.7k | current_offset = offsets[i]; | 197 | 93.7k | } | 198 | 974 | } else if (const ColumnConst* col_from_const = | 199 | 7 | check_and_get_column_const_string_or_fixedstring(column)) { | 200 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 201 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 202 | | if constexpr (ReturnType == TYPE_INT) { | 203 | | col_to_data[i] = | 204 | | HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]); | 205 | 0 | } else { | 206 | 0 | col_to_data[i] = | 207 | 0 | HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]); | 208 | 0 | } | 209 | 0 | } | 210 | 7 | } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) { | 211 | 33 | for (size_t i = 0; i < input_rows_count; ++i) { | 212 | 26 | auto data_ref = vb_col->get_data_at(i); | 213 | | if constexpr (ReturnType == TYPE_INT) { | 214 | | col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size, | 215 | | col_to_data[i]); | 216 | 26 | } else { | 217 | 26 | col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size, | 218 | 26 | col_to_data[i]); | 219 | 26 | } | 220 | 26 | } | 221 | 7 | } else { | 222 | 0 | DCHECK(false); | 223 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 224 | 0 | column->get_name(), name); | 225 | 0 | } | 226 | 981 | return Status::OK(); | 227 | 981 | } |
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_ Line | Count | Source | 175 | 13 | IColumn& col_to) { | 176 | 13 | auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to); | 177 | | if constexpr (first) { | 178 | | to_column.insert_many_defaults(input_rows_count); | 179 | | } | 180 | 13 | auto& col_to_data = to_column.get_data(); | 181 | 13 | if (const auto* col_from = check_and_get_column<ColumnString>(column)) { | 182 | 8 | const typename ColumnString::Chars& data = col_from->get_chars(); | 183 | 8 | const typename ColumnString::Offsets& offsets = col_from->get_offsets(); | 184 | 8 | size_t size = offsets.size(); | 185 | 8 | ColumnString::Offset current_offset = 0; | 186 | 27 | for (size_t i = 0; i < size; ++i) { | 187 | | if constexpr (ReturnType == TYPE_INT) { | 188 | | col_to_data[i] = HashUtil::xxHash32WithSeed( | 189 | | reinterpret_cast<const char*>(&data[current_offset]), | 190 | | offsets[i] - current_offset, col_to_data[i]); | 191 | 19 | } else { | 192 | 19 | col_to_data[i] = HashUtil::xxHash64WithSeed( | 193 | 19 | reinterpret_cast<const char*>(&data[current_offset]), | 194 | 19 | offsets[i] - current_offset, col_to_data[i]); | 195 | 19 | } | 196 | 19 | current_offset = offsets[i]; | 197 | 19 | } | 198 | 8 | } else if (const ColumnConst* col_from_const = | 199 | 5 | check_and_get_column_const_string_or_fixedstring(column)) { | 200 | 0 | auto value = col_from_const->get_value<TYPE_STRING>(); | 201 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 202 | | if constexpr (ReturnType == TYPE_INT) { | 203 | | col_to_data[i] = | 204 | | HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]); | 205 | 0 | } else { | 206 | 0 | col_to_data[i] = | 207 | 0 | HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]); | 208 | 0 | } | 209 | 0 | } | 210 | 5 | } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) { | 211 | 21 | for (size_t i = 0; i < input_rows_count; ++i) { | 212 | 16 | auto data_ref = vb_col->get_data_at(i); | 213 | | if constexpr (ReturnType == TYPE_INT) { | 214 | | col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size, | 215 | | col_to_data[i]); | 216 | 16 | } else { | 217 | 16 | col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size, | 218 | 16 | col_to_data[i]); | 219 | 16 | } | 220 | 16 | } | 221 | 5 | } else { | 222 | 0 | DCHECK(false); | 223 | 0 | return Status::NotSupported("Illegal column {} of argument of function {}", | 224 | 0 | column->get_name(), name); | 225 | 0 | } | 226 | 13 | return Status::OK(); | 227 | 13 | } |
|
228 | | }; |
229 | | |
230 | | using FunctionXxHash_32 = FunctionVariadicArgumentsBase<DataTypeInt32, XxHashImpl<TYPE_INT>>; |
231 | | using FunctionXxHash_64 = FunctionVariadicArgumentsBase<DataTypeInt64, XxHashImpl<TYPE_BIGINT>>; |
232 | | |
233 | 8 | void register_function_hash(SimpleFunctionFactory& factory) { |
234 | 8 | factory.register_function<FunctionMurmurHash3_32>(); |
235 | 8 | factory.register_function<FunctionMurmurHash3_64>(); |
236 | 8 | factory.register_function<FunctionMurmurHash3_64_V2>(); |
237 | 8 | factory.register_function<FunctionMurmurHash3U64V2>(); |
238 | 8 | factory.register_function<FunctionXxHash_32>(); |
239 | 8 | factory.register_function<FunctionXxHash_64>(); |
240 | 8 | factory.register_alias("xxhash_64", "xxhash3_64"); |
241 | 8 | } |
242 | | } // namespace doris |