Coverage Report

Created: 2026-06-03 03:56

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_hash.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionHash.cpp
19
// and modified by Doris
20
21
#include "exprs/function/function_hash.h"
22
23
#include <vector>
24
25
#include "common/status.h"
26
#include "core/assert_cast.h"
27
#include "core/column/column.h"
28
#include "core/column/column_const.h"
29
#include "core/column/column_string.h"
30
#include "core/column/column_varbinary.h"
31
#include "core/column/column_vector.h"
32
#include "core/data_type/data_type.h"
33
#include "core/data_type/data_type_number.h"
34
#include "core/data_type/data_type_string.h"
35
#include "core/field.h"
36
#include "core/value/large_int_value.h"
37
#include "exec/common/template_helpers.hpp"
38
#include "exprs/function/function_helpers.h"
39
#include "exprs/function/function_variadic_arguments.h"
40
#include "exprs/function/simple_function_factory.h"
41
#include "util/hash/murmur_hash3.h"
42
#include "util/hash_util.hpp"
43
44
namespace doris {
45
constexpr uint64_t emtpy_value = 0xe28dbde7fe22e41c;
46
47
namespace {
48
49
250
__int128_t pack_murmur_hash3_128(uint64_t h1, uint64_t h2) {
50
250
    static_assert(sizeof(__int128_t) == sizeof(uint64_t) * 2);
51
    // Store the two MurmurHash3 x64 128-bit lanes in a single LARGEINT value. Keep h1 in the
52
    // low 64 bits and h2 in the high 64 bits to match murmur_hash3_x64_128's out[0]/out[1].
53
250
    const auto value =
54
250
            (static_cast<unsigned __int128>(h2) << 64) | static_cast<unsigned __int128>(h1);
55
250
    return static_cast<__int128_t>(value);
56
250
}
57
58
110
void unpack_murmur_hash3_128(__int128_t value, uint64_t& h1, uint64_t& h2) {
59
110
    static_assert(sizeof(__int128_t) == sizeof(uint64_t) * 2);
60
110
    const auto unsigned_value = static_cast<unsigned __int128>(value);
61
110
    h1 = static_cast<uint64_t>(unsigned_value);
62
110
    h2 = static_cast<uint64_t>(unsigned_value >> 64);
63
110
}
64
65
140
void init_murmur_hash3_128(__int128_t& value, const void* data, size_t size) {
66
140
    uint64_t hash[2] = {0, 0};
67
140
    murmur_hash3_x64_128(data, size, 0, hash);
68
140
    value = pack_murmur_hash3_128(hash[0], hash[1]);
69
140
}
70
71
110
void update_murmur_hash3_128(__int128_t& value, const void* data, size_t size) {
72
110
    uint64_t h1 = 0;
73
110
    uint64_t h2 = 0;
74
110
    unpack_murmur_hash3_128(value, h1, h2);
75
110
    murmur_hash3_x64_process(data, size, h1, h2);
76
110
    value = pack_murmur_hash3_128(h1, h2);
77
110
}
78
79
template <bool first, typename StateContainer>
80
Status execute_murmur_hash3_128_column(const IColumn* column, size_t input_rows_count,
81
118
                                       StateContainer& state, const char* function_name) {
82
118
    if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
83
92
        const typename ColumnString::Chars& data = col_from->get_chars();
84
92
        const typename ColumnString::Offsets& offsets = col_from->get_offsets();
85
92
        size_t size = offsets.size();
86
92
        ColumnString::Offset current_offset = 0;
87
284
        for (size_t i = 0; i < size; ++i) {
88
192
            if constexpr (first) {
89
114
                init_murmur_hash3_128(state[i],
90
114
                                      reinterpret_cast<const char*>(&data[current_offset]),
91
114
                                      offsets[i] - current_offset);
92
114
            } else {
93
78
                update_murmur_hash3_128(state[i],
94
78
                                        reinterpret_cast<const char*>(&data[current_offset]),
95
78
                                        offsets[i] - current_offset);
96
78
            }
97
192
            current_offset = offsets[i];
98
192
        }
99
92
    } else if (const ColumnConst* col_from_const =
100
26
                       check_and_get_column_const_string_or_fixedstring(column)) {
101
26
        auto value = col_from_const->get_value<TYPE_STRING>();
102
84
        for (size_t i = 0; i < input_rows_count; ++i) {
103
58
            if constexpr (first) {
104
26
                init_murmur_hash3_128(state[i], value.data(), value.size());
105
32
            } else {
106
32
                update_murmur_hash3_128(state[i], value.data(), value.size());
107
32
            }
108
58
        }
109
26
    } else {
110
0
        DCHECK(false);
111
0
        return Status::NotSupported("Illegal column {} of argument of function {}",
112
0
                                    column->get_name(), function_name);
113
0
    }
114
118
    return Status::OK();
115
118
}
function_hash.cpp:_ZN5doris12_GLOBAL__N_131execute_murmur_hash3_128_columnILb1ENS_8PODArrayInLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEEENS_6StatusEPKNS_7IColumnEmRT0_PKc
Line
Count
Source
81
31
                                       StateContainer& state, const char* function_name) {
82
31
    if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
83
26
        const typename ColumnString::Chars& data = col_from->get_chars();
84
26
        const typename ColumnString::Offsets& offsets = col_from->get_offsets();
85
26
        size_t size = offsets.size();
86
26
        ColumnString::Offset current_offset = 0;
87
81
        for (size_t i = 0; i < size; ++i) {
88
55
            if constexpr (first) {
89
55
                init_murmur_hash3_128(state[i],
90
55
                                      reinterpret_cast<const char*>(&data[current_offset]),
91
55
                                      offsets[i] - current_offset);
92
            } else {
93
                update_murmur_hash3_128(state[i],
94
                                        reinterpret_cast<const char*>(&data[current_offset]),
95
                                        offsets[i] - current_offset);
96
            }
97
55
            current_offset = offsets[i];
98
55
        }
99
26
    } else if (const ColumnConst* col_from_const =
100
5
                       check_and_get_column_const_string_or_fixedstring(column)) {
101
5
        auto value = col_from_const->get_value<TYPE_STRING>();
102
18
        for (size_t i = 0; i < input_rows_count; ++i) {
103
13
            if constexpr (first) {
104
13
                init_murmur_hash3_128(state[i], value.data(), value.size());
105
            } else {
106
                update_murmur_hash3_128(state[i], value.data(), value.size());
107
            }
108
13
        }
109
5
    } else {
110
0
        DCHECK(false);
111
0
        return Status::NotSupported("Illegal column {} of argument of function {}",
112
0
                                    column->get_name(), function_name);
113
0
    }
114
31
    return Status::OK();
115
31
}
function_hash.cpp:_ZN5doris12_GLOBAL__N_131execute_murmur_hash3_128_columnILb0ENS_8PODArrayInLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEEENS_6StatusEPKNS_7IColumnEmRT0_PKc
Line
Count
Source
81
27
                                       StateContainer& state, const char* function_name) {
82
27
    if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
83
19
        const typename ColumnString::Chars& data = col_from->get_chars();
84
19
        const typename ColumnString::Offsets& offsets = col_from->get_offsets();
85
19
        size_t size = offsets.size();
86
19
        ColumnString::Offset current_offset = 0;
87
58
        for (size_t i = 0; i < size; ++i) {
88
            if constexpr (first) {
89
                init_murmur_hash3_128(state[i],
90
                                      reinterpret_cast<const char*>(&data[current_offset]),
91
                                      offsets[i] - current_offset);
92
39
            } else {
93
39
                update_murmur_hash3_128(state[i],
94
39
                                        reinterpret_cast<const char*>(&data[current_offset]),
95
39
                                        offsets[i] - current_offset);
96
39
            }
97
39
            current_offset = offsets[i];
98
39
        }
99
19
    } else if (const ColumnConst* col_from_const =
100
8
                       check_and_get_column_const_string_or_fixedstring(column)) {
101
8
        auto value = col_from_const->get_value<TYPE_STRING>();
102
24
        for (size_t i = 0; i < input_rows_count; ++i) {
103
            if constexpr (first) {
104
                init_murmur_hash3_128(state[i], value.data(), value.size());
105
16
            } else {
106
16
                update_murmur_hash3_128(state[i], value.data(), value.size());
107
16
            }
108
16
        }
109
8
    } else {
110
0
        DCHECK(false);
111
0
        return Status::NotSupported("Illegal column {} of argument of function {}",
112
0
                                    column->get_name(), function_name);
113
0
    }
114
27
    return Status::OK();
115
27
}
function_hash.cpp:_ZN5doris12_GLOBAL__N_131execute_murmur_hash3_128_columnILb1ESt6vectorInSaInEEEENS_6StatusEPKNS_7IColumnEmRT0_PKc
Line
Count
Source
81
33
                                       StateContainer& state, const char* function_name) {
82
33
    if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
83
28
        const typename ColumnString::Chars& data = col_from->get_chars();
84
28
        const typename ColumnString::Offsets& offsets = col_from->get_offsets();
85
28
        size_t size = offsets.size();
86
28
        ColumnString::Offset current_offset = 0;
87
87
        for (size_t i = 0; i < size; ++i) {
88
59
            if constexpr (first) {
89
59
                init_murmur_hash3_128(state[i],
90
59
                                      reinterpret_cast<const char*>(&data[current_offset]),
91
59
                                      offsets[i] - current_offset);
92
            } else {
93
                update_murmur_hash3_128(state[i],
94
                                        reinterpret_cast<const char*>(&data[current_offset]),
95
                                        offsets[i] - current_offset);
96
            }
97
59
            current_offset = offsets[i];
98
59
        }
99
28
    } else if (const ColumnConst* col_from_const =
100
5
                       check_and_get_column_const_string_or_fixedstring(column)) {
101
5
        auto value = col_from_const->get_value<TYPE_STRING>();
102
18
        for (size_t i = 0; i < input_rows_count; ++i) {
103
13
            if constexpr (first) {
104
13
                init_murmur_hash3_128(state[i], value.data(), value.size());
105
            } else {
106
                update_murmur_hash3_128(state[i], value.data(), value.size());
107
            }
108
13
        }
109
5
    } else {
110
0
        DCHECK(false);
111
0
        return Status::NotSupported("Illegal column {} of argument of function {}",
112
0
                                    column->get_name(), function_name);
113
0
    }
114
33
    return Status::OK();
115
33
}
function_hash.cpp:_ZN5doris12_GLOBAL__N_131execute_murmur_hash3_128_columnILb0ESt6vectorInSaInEEEENS_6StatusEPKNS_7IColumnEmRT0_PKc
Line
Count
Source
81
27
                                       StateContainer& state, const char* function_name) {
82
27
    if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
83
19
        const typename ColumnString::Chars& data = col_from->get_chars();
84
19
        const typename ColumnString::Offsets& offsets = col_from->get_offsets();
85
19
        size_t size = offsets.size();
86
19
        ColumnString::Offset current_offset = 0;
87
58
        for (size_t i = 0; i < size; ++i) {
88
            if constexpr (first) {
89
                init_murmur_hash3_128(state[i],
90
                                      reinterpret_cast<const char*>(&data[current_offset]),
91
                                      offsets[i] - current_offset);
92
39
            } else {
93
39
                update_murmur_hash3_128(state[i],
94
39
                                        reinterpret_cast<const char*>(&data[current_offset]),
95
39
                                        offsets[i] - current_offset);
96
39
            }
97
39
            current_offset = offsets[i];
98
39
        }
99
19
    } else if (const ColumnConst* col_from_const =
100
8
                       check_and_get_column_const_string_or_fixedstring(column)) {
101
8
        auto value = col_from_const->get_value<TYPE_STRING>();
102
24
        for (size_t i = 0; i < input_rows_count; ++i) {
103
            if constexpr (first) {
104
                init_murmur_hash3_128(state[i], value.data(), value.size());
105
16
            } else {
106
16
                update_murmur_hash3_128(state[i], value.data(), value.size());
107
16
            }
108
16
        }
109
8
    } else {
110
0
        DCHECK(false);
111
0
        return Status::NotSupported("Illegal column {} of argument of function {}",
112
0
                                    column->get_name(), function_name);
113
0
    }
114
27
    return Status::OK();
115
27
}
116
117
} // namespace
118
119
template <PrimitiveType ReturnType, bool is_mmh64_v2 = false>
120
struct MurmurHash3Impl {
121
0
    static constexpr auto get_name() {
122
0
        if constexpr (ReturnType == TYPE_INT) {
123
0
            return "murmur_hash3_32";
124
0
        } else if constexpr (ReturnType == TYPE_LARGEINT) {
125
0
            return "murmur_hash3_u64_v2";
126
0
        } else if constexpr (is_mmh64_v2) {
127
0
            return "murmur_hash3_64_v2";
128
0
        } else {
129
0
            return "murmur_hash3_64";
130
0
        }
131
0
    }
132
    static constexpr auto name = get_name();
133
134
0
    static Status empty_apply(IColumn& icolumn, size_t input_rows_count) {
135
0
        ColumnVector<ReturnType>& vec_to = assert_cast<ColumnVector<ReturnType>&>(icolumn);
136
0
        vec_to.get_data().assign(
137
0
                input_rows_count,
138
0
                static_cast<typename PrimitiveTypeTraits<ReturnType>::CppType>(emtpy_value));
139
0
        return Status::OK();
140
0
    }
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE11empty_applyERNS_7IColumnEm
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE11empty_applyERNS_7IColumnEm
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE11empty_applyERNS_7IColumnEm
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE11empty_applyERNS_7IColumnEm
141
142
    static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count,
143
81
                              IColumn& icolumn) {
144
81
        return execute<true>(type, column, input_rows_count, icolumn);
145
81
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
143
21
                              IColumn& icolumn) {
144
21
        return execute<true>(type, column, input_rows_count, icolumn);
145
21
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
143
21
                              IColumn& icolumn) {
144
21
        return execute<true>(type, column, input_rows_count, icolumn);
145
21
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
143
18
                              IColumn& icolumn) {
144
18
        return execute<true>(type, column, input_rows_count, icolumn);
145
18
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
143
21
                              IColumn& icolumn) {
144
21
        return execute<true>(type, column, input_rows_count, icolumn);
145
21
    }
146
147
    static Status combine_apply(const IDataType* type, const IColumn* column,
148
13
                                size_t input_rows_count, IColumn& icolumn) {
149
13
        return execute<false>(type, column, input_rows_count, icolumn);
150
13
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
148
4
                                size_t input_rows_count, IColumn& icolumn) {
149
4
        return execute<false>(type, column, input_rows_count, icolumn);
150
4
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
148
4
                                size_t input_rows_count, IColumn& icolumn) {
149
4
        return execute<false>(type, column, input_rows_count, icolumn);
150
4
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
148
1
                                size_t input_rows_count, IColumn& icolumn) {
149
1
        return execute<false>(type, column, input_rows_count, icolumn);
150
1
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
148
4
                                size_t input_rows_count, IColumn& icolumn) {
149
4
        return execute<false>(type, column, input_rows_count, icolumn);
150
4
    }
151
152
    template <bool first>
153
    static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count,
154
94
                          IColumn& col_to) {
155
94
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
156
94
        if constexpr (first) {
157
81
            if constexpr (ReturnType == TYPE_INT) {
158
21
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
159
21
                                           input_rows_count);
160
60
            } else {
161
60
                to_column.insert_many_defaults(input_rows_count);
162
60
            }
163
81
        }
164
94
        auto& col_to_data = to_column.get_data();
165
94
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
166
94
            const typename ColumnString::Chars& data = col_from->get_chars();
167
94
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
168
94
            size_t size = offsets.size();
169
94
            ColumnString::Offset current_offset = 0;
170
286
            for (size_t i = 0; i < size; ++i) {
171
192
                if constexpr (ReturnType == TYPE_INT) {
172
65
                    col_to_data[i] = HashUtil::murmur_hash3_32(
173
65
                            reinterpret_cast<const char*>(&data[current_offset]),
174
65
                            offsets[i] - current_offset, col_to_data[i]);
175
127
                } else {
176
127
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
177
127
                            reinterpret_cast<const char*>(&data[current_offset]),
178
127
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
179
127
                }
180
192
                current_offset = offsets[i];
181
192
            }
182
94
        } else if (const ColumnConst* col_from_const =
183
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
184
0
            auto value = col_from_const->get_value<TYPE_STRING>();
185
0
            for (size_t i = 0; i < input_rows_count; ++i) {
186
0
                if constexpr (ReturnType == TYPE_INT) {
187
0
                    col_to_data[i] =
188
0
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
189
0
                } else {
190
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
191
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
192
0
                }
193
0
            }
194
0
        } else {
195
0
            DCHECK(false);
196
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
197
0
                                        column->get_name(), name);
198
0
        }
199
94
        return Status::OK();
200
94
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
154
21
                          IColumn& col_to) {
155
21
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
156
21
        if constexpr (first) {
157
21
            if constexpr (ReturnType == TYPE_INT) {
158
21
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
159
21
                                           input_rows_count);
160
            } else {
161
                to_column.insert_many_defaults(input_rows_count);
162
            }
163
21
        }
164
21
        auto& col_to_data = to_column.get_data();
165
21
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
166
21
            const typename ColumnString::Chars& data = col_from->get_chars();
167
21
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
168
21
            size_t size = offsets.size();
169
21
            ColumnString::Offset current_offset = 0;
170
79
            for (size_t i = 0; i < size; ++i) {
171
58
                if constexpr (ReturnType == TYPE_INT) {
172
58
                    col_to_data[i] = HashUtil::murmur_hash3_32(
173
58
                            reinterpret_cast<const char*>(&data[current_offset]),
174
58
                            offsets[i] - current_offset, col_to_data[i]);
175
                } else {
176
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
177
                            reinterpret_cast<const char*>(&data[current_offset]),
178
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
179
                }
180
58
                current_offset = offsets[i];
181
58
            }
182
21
        } else if (const ColumnConst* col_from_const =
183
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
184
0
            auto value = col_from_const->get_value<TYPE_STRING>();
185
0
            for (size_t i = 0; i < input_rows_count; ++i) {
186
0
                if constexpr (ReturnType == TYPE_INT) {
187
0
                    col_to_data[i] =
188
0
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
189
                } else {
190
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
191
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
192
                }
193
0
            }
194
0
        } else {
195
0
            DCHECK(false);
196
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
197
0
                                        column->get_name(), name);
198
0
        }
199
21
        return Status::OK();
200
21
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
154
4
                          IColumn& col_to) {
155
4
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
156
        if constexpr (first) {
157
            if constexpr (ReturnType == TYPE_INT) {
158
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
159
                                           input_rows_count);
160
            } else {
161
                to_column.insert_many_defaults(input_rows_count);
162
            }
163
        }
164
4
        auto& col_to_data = to_column.get_data();
165
4
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
166
4
            const typename ColumnString::Chars& data = col_from->get_chars();
167
4
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
168
4
            size_t size = offsets.size();
169
4
            ColumnString::Offset current_offset = 0;
170
11
            for (size_t i = 0; i < size; ++i) {
171
7
                if constexpr (ReturnType == TYPE_INT) {
172
7
                    col_to_data[i] = HashUtil::murmur_hash3_32(
173
7
                            reinterpret_cast<const char*>(&data[current_offset]),
174
7
                            offsets[i] - current_offset, col_to_data[i]);
175
                } else {
176
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
177
                            reinterpret_cast<const char*>(&data[current_offset]),
178
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
179
                }
180
7
                current_offset = offsets[i];
181
7
            }
182
4
        } else if (const ColumnConst* col_from_const =
183
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
184
0
            auto value = col_from_const->get_value<TYPE_STRING>();
185
0
            for (size_t i = 0; i < input_rows_count; ++i) {
186
0
                if constexpr (ReturnType == TYPE_INT) {
187
0
                    col_to_data[i] =
188
0
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
189
                } else {
190
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
191
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
192
                }
193
0
            }
194
0
        } else {
195
0
            DCHECK(false);
196
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
197
0
                                        column->get_name(), name);
198
0
        }
199
4
        return Status::OK();
200
4
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
154
21
                          IColumn& col_to) {
155
21
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
156
21
        if constexpr (first) {
157
            if constexpr (ReturnType == TYPE_INT) {
158
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
159
                                           input_rows_count);
160
21
            } else {
161
21
                to_column.insert_many_defaults(input_rows_count);
162
21
            }
163
21
        }
164
21
        auto& col_to_data = to_column.get_data();
165
21
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
166
21
            const typename ColumnString::Chars& data = col_from->get_chars();
167
21
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
168
21
            size_t size = offsets.size();
169
21
            ColumnString::Offset current_offset = 0;
170
79
            for (size_t i = 0; i < size; ++i) {
171
                if constexpr (ReturnType == TYPE_INT) {
172
                    col_to_data[i] = HashUtil::murmur_hash3_32(
173
                            reinterpret_cast<const char*>(&data[current_offset]),
174
                            offsets[i] - current_offset, col_to_data[i]);
175
58
                } else {
176
58
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
177
58
                            reinterpret_cast<const char*>(&data[current_offset]),
178
58
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
179
58
                }
180
58
                current_offset = offsets[i];
181
58
            }
182
21
        } else if (const ColumnConst* col_from_const =
183
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
184
0
            auto value = col_from_const->get_value<TYPE_STRING>();
185
0
            for (size_t i = 0; i < input_rows_count; ++i) {
186
                if constexpr (ReturnType == TYPE_INT) {
187
                    col_to_data[i] =
188
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
189
0
                } else {
190
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
191
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
192
0
                }
193
0
            }
194
0
        } else {
195
0
            DCHECK(false);
196
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
197
0
                                        column->get_name(), name);
198
0
        }
199
21
        return Status::OK();
200
21
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
154
4
                          IColumn& col_to) {
155
4
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
156
        if constexpr (first) {
157
            if constexpr (ReturnType == TYPE_INT) {
158
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
159
                                           input_rows_count);
160
            } else {
161
                to_column.insert_many_defaults(input_rows_count);
162
            }
163
        }
164
4
        auto& col_to_data = to_column.get_data();
165
4
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
166
4
            const typename ColumnString::Chars& data = col_from->get_chars();
167
4
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
168
4
            size_t size = offsets.size();
169
4
            ColumnString::Offset current_offset = 0;
170
11
            for (size_t i = 0; i < size; ++i) {
171
                if constexpr (ReturnType == TYPE_INT) {
172
                    col_to_data[i] = HashUtil::murmur_hash3_32(
173
                            reinterpret_cast<const char*>(&data[current_offset]),
174
                            offsets[i] - current_offset, col_to_data[i]);
175
7
                } else {
176
7
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
177
7
                            reinterpret_cast<const char*>(&data[current_offset]),
178
7
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
179
7
                }
180
7
                current_offset = offsets[i];
181
7
            }
182
4
        } else if (const ColumnConst* col_from_const =
183
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
184
0
            auto value = col_from_const->get_value<TYPE_STRING>();
185
0
            for (size_t i = 0; i < input_rows_count; ++i) {
186
                if constexpr (ReturnType == TYPE_INT) {
187
                    col_to_data[i] =
188
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
189
0
                } else {
190
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
191
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
192
0
                }
193
0
            }
194
0
        } else {
195
0
            DCHECK(false);
196
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
197
0
                                        column->get_name(), name);
198
0
        }
199
4
        return Status::OK();
200
4
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
154
18
                          IColumn& col_to) {
155
18
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
156
18
        if constexpr (first) {
157
            if constexpr (ReturnType == TYPE_INT) {
158
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
159
                                           input_rows_count);
160
18
            } else {
161
18
                to_column.insert_many_defaults(input_rows_count);
162
18
            }
163
18
        }
164
18
        auto& col_to_data = to_column.get_data();
165
18
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
166
18
            const typename ColumnString::Chars& data = col_from->get_chars();
167
18
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
168
18
            size_t size = offsets.size();
169
18
            ColumnString::Offset current_offset = 0;
170
46
            for (size_t i = 0; i < size; ++i) {
171
                if constexpr (ReturnType == TYPE_INT) {
172
                    col_to_data[i] = HashUtil::murmur_hash3_32(
173
                            reinterpret_cast<const char*>(&data[current_offset]),
174
                            offsets[i] - current_offset, col_to_data[i]);
175
28
                } else {
176
28
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
177
28
                            reinterpret_cast<const char*>(&data[current_offset]),
178
28
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
179
28
                }
180
28
                current_offset = offsets[i];
181
28
            }
182
18
        } else if (const ColumnConst* col_from_const =
183
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
184
0
            auto value = col_from_const->get_value<TYPE_STRING>();
185
0
            for (size_t i = 0; i < input_rows_count; ++i) {
186
                if constexpr (ReturnType == TYPE_INT) {
187
                    col_to_data[i] =
188
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
189
0
                } else {
190
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
191
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
192
0
                }
193
0
            }
194
0
        } else {
195
0
            DCHECK(false);
196
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
197
0
                                        column->get_name(), name);
198
0
        }
199
18
        return Status::OK();
200
18
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
154
1
                          IColumn& col_to) {
155
1
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
156
        if constexpr (first) {
157
            if constexpr (ReturnType == TYPE_INT) {
158
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
159
                                           input_rows_count);
160
            } else {
161
                to_column.insert_many_defaults(input_rows_count);
162
            }
163
        }
164
1
        auto& col_to_data = to_column.get_data();
165
1
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
166
1
            const typename ColumnString::Chars& data = col_from->get_chars();
167
1
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
168
1
            size_t size = offsets.size();
169
1
            ColumnString::Offset current_offset = 0;
170
2
            for (size_t i = 0; i < size; ++i) {
171
                if constexpr (ReturnType == TYPE_INT) {
172
                    col_to_data[i] = HashUtil::murmur_hash3_32(
173
                            reinterpret_cast<const char*>(&data[current_offset]),
174
                            offsets[i] - current_offset, col_to_data[i]);
175
1
                } else {
176
1
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
177
1
                            reinterpret_cast<const char*>(&data[current_offset]),
178
1
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
179
1
                }
180
1
                current_offset = offsets[i];
181
1
            }
182
1
        } else if (const ColumnConst* col_from_const =
183
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
184
0
            auto value = col_from_const->get_value<TYPE_STRING>();
185
0
            for (size_t i = 0; i < input_rows_count; ++i) {
186
                if constexpr (ReturnType == TYPE_INT) {
187
                    col_to_data[i] =
188
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
189
0
                } else {
190
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
191
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
192
0
                }
193
0
            }
194
0
        } else {
195
0
            DCHECK(false);
196
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
197
0
                                        column->get_name(), name);
198
0
        }
199
1
        return Status::OK();
200
1
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
154
21
                          IColumn& col_to) {
155
21
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
156
21
        if constexpr (first) {
157
            if constexpr (ReturnType == TYPE_INT) {
158
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
159
                                           input_rows_count);
160
21
            } else {
161
21
                to_column.insert_many_defaults(input_rows_count);
162
21
            }
163
21
        }
164
21
        auto& col_to_data = to_column.get_data();
165
21
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
166
21
            const typename ColumnString::Chars& data = col_from->get_chars();
167
21
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
168
21
            size_t size = offsets.size();
169
21
            ColumnString::Offset current_offset = 0;
170
50
            for (size_t i = 0; i < size; ++i) {
171
                if constexpr (ReturnType == TYPE_INT) {
172
                    col_to_data[i] = HashUtil::murmur_hash3_32(
173
                            reinterpret_cast<const char*>(&data[current_offset]),
174
                            offsets[i] - current_offset, col_to_data[i]);
175
29
                } else {
176
29
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
177
29
                            reinterpret_cast<const char*>(&data[current_offset]),
178
29
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
179
29
                }
180
29
                current_offset = offsets[i];
181
29
            }
182
21
        } else if (const ColumnConst* col_from_const =
183
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
184
0
            auto value = col_from_const->get_value<TYPE_STRING>();
185
0
            for (size_t i = 0; i < input_rows_count; ++i) {
186
                if constexpr (ReturnType == TYPE_INT) {
187
                    col_to_data[i] =
188
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
189
0
                } else {
190
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
191
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
192
0
                }
193
0
            }
194
0
        } else {
195
0
            DCHECK(false);
196
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
197
0
                                        column->get_name(), name);
198
0
        }
199
21
        return Status::OK();
200
21
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
154
4
                          IColumn& col_to) {
155
4
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
156
        if constexpr (first) {
157
            if constexpr (ReturnType == TYPE_INT) {
158
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
159
                                           input_rows_count);
160
            } else {
161
                to_column.insert_many_defaults(input_rows_count);
162
            }
163
        }
164
4
        auto& col_to_data = to_column.get_data();
165
4
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
166
4
            const typename ColumnString::Chars& data = col_from->get_chars();
167
4
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
168
4
            size_t size = offsets.size();
169
4
            ColumnString::Offset current_offset = 0;
170
8
            for (size_t i = 0; i < size; ++i) {
171
                if constexpr (ReturnType == TYPE_INT) {
172
                    col_to_data[i] = HashUtil::murmur_hash3_32(
173
                            reinterpret_cast<const char*>(&data[current_offset]),
174
                            offsets[i] - current_offset, col_to_data[i]);
175
4
                } else {
176
4
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
177
4
                            reinterpret_cast<const char*>(&data[current_offset]),
178
4
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
179
4
                }
180
4
                current_offset = offsets[i];
181
4
            }
182
4
        } else if (const ColumnConst* col_from_const =
183
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
184
0
            auto value = col_from_const->get_value<TYPE_STRING>();
185
0
            for (size_t i = 0; i < input_rows_count; ++i) {
186
                if constexpr (ReturnType == TYPE_INT) {
187
                    col_to_data[i] =
188
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
189
0
                } else {
190
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
191
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
192
0
                }
193
0
            }
194
0
        } else {
195
0
            DCHECK(false);
196
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
197
0
                                        column->get_name(), name);
198
0
        }
199
4
        return Status::OK();
200
4
    }
201
};
202
203
using FunctionMurmurHash3_32 =
204
        FunctionVariadicArgumentsBase<DataTypeInt32, MurmurHash3Impl<TYPE_INT>>;
205
using FunctionMurmurHash3_64 =
206
        FunctionVariadicArgumentsBase<DataTypeInt64, MurmurHash3Impl<TYPE_BIGINT>>;
207
using FunctionMurmurHash3_64_V2 =
208
        FunctionVariadicArgumentsBase<DataTypeInt64, MurmurHash3Impl<TYPE_BIGINT, true>>;
209
using FunctionMurmurHash3U64V2 =
210
        FunctionVariadicArgumentsBase<DataTypeInt128, MurmurHash3Impl<TYPE_LARGEINT, true>>;
211
212
struct MurmurHash3128Impl {
213
    static constexpr auto name = "murmur_hash3_128";
214
215
1
    static Status empty_apply(IColumn& /*icolumn*/, size_t /*input_rows_count*/) {
216
1
        return Status::InvalidArgument("Function {} requires at least one argument", name);
217
1
    }
218
219
    static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count,
220
31
                              IColumn& icolumn) {
221
31
        return execute<true>(type, column, input_rows_count, icolumn);
222
31
    }
223
224
    static Status combine_apply(const IDataType* type, const IColumn* column,
225
27
                                size_t input_rows_count, IColumn& icolumn) {
226
27
        return execute<false>(type, column, input_rows_count, icolumn);
227
27
    }
228
229
    template <bool first>
230
    static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count,
231
58
                          IColumn& col_to) {
232
58
        auto& to_column = assert_cast<ColumnVector<TYPE_LARGEINT>&>(col_to);
233
58
        if constexpr (first) {
234
            // The first argument initializes one 128-bit hash state per row. Later arguments reuse
235
            // the same result column and update the saved state in place.
236
31
            to_column.insert_many_defaults(input_rows_count);
237
31
        }
238
58
        auto& col_to_data = to_column.get_data();
239
58
        return execute_murmur_hash3_128_column<first>(column, input_rows_count, col_to_data, name);
240
58
    }
_ZN5doris18MurmurHash3128Impl7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
231
31
                          IColumn& col_to) {
232
31
        auto& to_column = assert_cast<ColumnVector<TYPE_LARGEINT>&>(col_to);
233
31
        if constexpr (first) {
234
            // The first argument initializes one 128-bit hash state per row. Later arguments reuse
235
            // the same result column and update the saved state in place.
236
31
            to_column.insert_many_defaults(input_rows_count);
237
31
        }
238
31
        auto& col_to_data = to_column.get_data();
239
31
        return execute_murmur_hash3_128_column<first>(column, input_rows_count, col_to_data, name);
240
31
    }
_ZN5doris18MurmurHash3128Impl7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
231
27
                          IColumn& col_to) {
232
27
        auto& to_column = assert_cast<ColumnVector<TYPE_LARGEINT>&>(col_to);
233
        if constexpr (first) {
234
            // The first argument initializes one 128-bit hash state per row. Later arguments reuse
235
            // the same result column and update the saved state in place.
236
            to_column.insert_many_defaults(input_rows_count);
237
        }
238
27
        auto& col_to_data = to_column.get_data();
239
27
        return execute_murmur_hash3_128_column<first>(column, input_rows_count, col_to_data, name);
240
27
    }
241
};
242
243
using FunctionMurmurHash3_128 = FunctionVariadicArgumentsBase<DataTypeInt128, MurmurHash3128Impl>;
244
245
class FunctionMurmurHash3U128 : public IFunction {
246
public:
247
    static constexpr auto name = "murmur_hash3_u128";
248
249
54
    static FunctionPtr create() { return std::make_shared<FunctionMurmurHash3U128>(); }
250
251
0
    String get_name() const override { return name; }
252
253
46
    bool is_variadic() const override { return true; }
254
255
0
    size_t get_number_of_arguments() const override { return 0; }
256
257
45
    DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& /*arguments*/) const override {
258
45
        return std::make_shared<DataTypeString>();
259
45
    }
260
261
    Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments,
262
34
                        uint32_t result, size_t input_rows_count) const override {
263
34
        if (arguments.empty()) {
264
1
            return Status::InvalidArgument("Function {} requires at least one argument", name);
265
1
        }
266
267
33
        std::vector<__int128_t> state(input_rows_count);
268
33
        const ColumnWithTypeAndName& first_col = block.get_by_position(arguments[0]);
269
33
        RETURN_IF_ERROR(execute_murmur_hash3_128_column<true>(first_col.column.get(),
270
33
                                                              input_rows_count, state, name));
271
272
60
        for (size_t i = 1; i < arguments.size(); ++i) {
273
27
            const ColumnWithTypeAndName& col = block.get_by_position(arguments[i]);
274
27
            RETURN_IF_ERROR(execute_murmur_hash3_128_column<false>(col.column.get(),
275
27
                                                                   input_rows_count, state, name));
276
27
        }
277
278
33
        auto result_column = ColumnString::create();
279
33
        result_column->reserve(input_rows_count);
280
72
        for (const auto value : state) {
281
72
            auto unsigned_value = static_cast<__uint128_t>(value);
282
72
            std::string value_str = LargeIntValue::to_string(unsigned_value);
283
72
            result_column->insert_data(value_str.data(), value_str.size());
284
72
        }
285
33
        block.get_by_position(result).column = std::move(result_column);
286
33
        return Status::OK();
287
33
    }
288
};
289
290
#ifdef BE_TEST
291
const char* murmur_hash3_get_name_type_int_for_test() {
292
    return MurmurHash3Impl<TYPE_INT>::get_name();
293
}
294
295
const char* murmur_hash3_get_name_type_bigint_for_test() {
296
    return MurmurHash3Impl<TYPE_BIGINT>::get_name();
297
}
298
299
const char* murmur_hash3_get_name_type_bigint_v2_for_test() {
300
    return MurmurHash3Impl<TYPE_BIGINT, true>::get_name();
301
}
302
#endif
303
304
template <PrimitiveType ReturnType>
305
struct XxHashImpl {
306
    static constexpr auto name = ReturnType == TYPE_INT ? "xxhash_32" : "xxhash_64";
307
308
0
    static Status empty_apply(IColumn& icolumn, size_t input_rows_count) {
309
0
        ColumnVector<ReturnType>& vec_to = assert_cast<ColumnVector<ReturnType>&>(icolumn);
310
0
        vec_to.get_data().assign(
311
0
                input_rows_count,
312
0
                static_cast<typename PrimitiveTypeTraits<ReturnType>::CppType>(emtpy_value));
313
0
        return Status::OK();
314
0
    }
Unexecuted instantiation: _ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE11empty_applyERNS_7IColumnEm
Unexecuted instantiation: _ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE11empty_applyERNS_7IColumnEm
315
316
    static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count,
317
2.39k
                              IColumn& icolumn) {
318
2.39k
        return execute<true>(type, column, input_rows_count, icolumn);
319
2.39k
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
317
85
                              IColumn& icolumn) {
318
85
        return execute<true>(type, column, input_rows_count, icolumn);
319
85
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
317
2.30k
                              IColumn& icolumn) {
318
2.30k
        return execute<true>(type, column, input_rows_count, icolumn);
319
2.30k
    }
320
321
    static Status combine_apply(const IDataType* type, const IColumn* column,
322
24
                                size_t input_rows_count, IColumn& icolumn) {
323
24
        return execute<false>(type, column, input_rows_count, icolumn);
324
24
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
322
11
                                size_t input_rows_count, IColumn& icolumn) {
323
11
        return execute<false>(type, column, input_rows_count, icolumn);
324
11
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
322
13
                                size_t input_rows_count, IColumn& icolumn) {
323
13
        return execute<false>(type, column, input_rows_count, icolumn);
324
13
    }
325
326
    template <bool first>
327
    static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count,
328
2.41k
                          IColumn& col_to) {
329
2.41k
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
330
2.41k
        if constexpr (first) {
331
2.39k
            to_column.insert_many_defaults(input_rows_count);
332
2.39k
        }
333
2.41k
        auto& col_to_data = to_column.get_data();
334
2.41k
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
335
2.39k
            const typename ColumnString::Chars& data = col_from->get_chars();
336
2.39k
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
337
2.39k
            size_t size = offsets.size();
338
2.39k
            ColumnString::Offset current_offset = 0;
339
174k
            for (size_t i = 0; i < size; ++i) {
340
172k
                if constexpr (ReturnType == TYPE_INT) {
341
403
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
342
403
                            reinterpret_cast<const char*>(&data[current_offset]),
343
403
                            offsets[i] - current_offset, col_to_data[i]);
344
171k
                } else {
345
171k
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
346
171k
                            reinterpret_cast<const char*>(&data[current_offset]),
347
171k
                            offsets[i] - current_offset, col_to_data[i]);
348
171k
                }
349
172k
                current_offset = offsets[i];
350
172k
            }
351
2.39k
        } else if (const ColumnConst* col_from_const =
352
24
                           check_and_get_column_const_string_or_fixedstring(column)) {
353
0
            auto value = col_from_const->get_value<TYPE_STRING>();
354
0
            for (size_t i = 0; i < input_rows_count; ++i) {
355
0
                if constexpr (ReturnType == TYPE_INT) {
356
0
                    col_to_data[i] =
357
0
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
358
0
                } else {
359
0
                    col_to_data[i] =
360
0
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
361
0
                }
362
0
            }
363
24
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
364
108
            for (size_t i = 0; i < input_rows_count; ++i) {
365
84
                auto data_ref = vb_col->get_data_at(i);
366
84
                if constexpr (ReturnType == TYPE_INT) {
367
42
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
368
42
                                                                col_to_data[i]);
369
42
                } else {
370
42
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
371
42
                                                                col_to_data[i]);
372
42
                }
373
84
            }
374
24
        } else {
375
0
            DCHECK(false);
376
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
377
0
                                        column->get_name(), name);
378
0
        }
379
2.41k
        return Status::OK();
380
2.41k
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
328
85
                          IColumn& col_to) {
329
85
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
330
85
        if constexpr (first) {
331
85
            to_column.insert_many_defaults(input_rows_count);
332
85
        }
333
85
        auto& col_to_data = to_column.get_data();
334
85
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
335
78
            const typename ColumnString::Chars& data = col_from->get_chars();
336
78
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
337
78
            size_t size = offsets.size();
338
78
            ColumnString::Offset current_offset = 0;
339
464
            for (size_t i = 0; i < size; ++i) {
340
386
                if constexpr (ReturnType == TYPE_INT) {
341
386
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
342
386
                            reinterpret_cast<const char*>(&data[current_offset]),
343
386
                            offsets[i] - current_offset, col_to_data[i]);
344
                } else {
345
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
346
                            reinterpret_cast<const char*>(&data[current_offset]),
347
                            offsets[i] - current_offset, col_to_data[i]);
348
                }
349
386
                current_offset = offsets[i];
350
386
            }
351
78
        } else if (const ColumnConst* col_from_const =
352
7
                           check_and_get_column_const_string_or_fixedstring(column)) {
353
0
            auto value = col_from_const->get_value<TYPE_STRING>();
354
0
            for (size_t i = 0; i < input_rows_count; ++i) {
355
0
                if constexpr (ReturnType == TYPE_INT) {
356
0
                    col_to_data[i] =
357
0
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
358
                } else {
359
                    col_to_data[i] =
360
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
361
                }
362
0
            }
363
7
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
364
33
            for (size_t i = 0; i < input_rows_count; ++i) {
365
26
                auto data_ref = vb_col->get_data_at(i);
366
26
                if constexpr (ReturnType == TYPE_INT) {
367
26
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
368
26
                                                                col_to_data[i]);
369
                } else {
370
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
371
                                                                col_to_data[i]);
372
                }
373
26
            }
374
7
        } else {
375
0
            DCHECK(false);
376
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
377
0
                                        column->get_name(), name);
378
0
        }
379
85
        return Status::OK();
380
85
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
328
11
                          IColumn& col_to) {
329
11
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
330
        if constexpr (first) {
331
            to_column.insert_many_defaults(input_rows_count);
332
        }
333
11
        auto& col_to_data = to_column.get_data();
334
11
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
335
6
            const typename ColumnString::Chars& data = col_from->get_chars();
336
6
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
337
6
            size_t size = offsets.size();
338
6
            ColumnString::Offset current_offset = 0;
339
23
            for (size_t i = 0; i < size; ++i) {
340
17
                if constexpr (ReturnType == TYPE_INT) {
341
17
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
342
17
                            reinterpret_cast<const char*>(&data[current_offset]),
343
17
                            offsets[i] - current_offset, col_to_data[i]);
344
                } else {
345
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
346
                            reinterpret_cast<const char*>(&data[current_offset]),
347
                            offsets[i] - current_offset, col_to_data[i]);
348
                }
349
17
                current_offset = offsets[i];
350
17
            }
351
6
        } else if (const ColumnConst* col_from_const =
352
5
                           check_and_get_column_const_string_or_fixedstring(column)) {
353
0
            auto value = col_from_const->get_value<TYPE_STRING>();
354
0
            for (size_t i = 0; i < input_rows_count; ++i) {
355
0
                if constexpr (ReturnType == TYPE_INT) {
356
0
                    col_to_data[i] =
357
0
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
358
                } else {
359
                    col_to_data[i] =
360
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
361
                }
362
0
            }
363
5
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
364
21
            for (size_t i = 0; i < input_rows_count; ++i) {
365
16
                auto data_ref = vb_col->get_data_at(i);
366
16
                if constexpr (ReturnType == TYPE_INT) {
367
16
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
368
16
                                                                col_to_data[i]);
369
                } else {
370
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
371
                                                                col_to_data[i]);
372
                }
373
16
            }
374
5
        } else {
375
0
            DCHECK(false);
376
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
377
0
                                        column->get_name(), name);
378
0
        }
379
11
        return Status::OK();
380
11
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
328
2.30k
                          IColumn& col_to) {
329
2.30k
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
330
2.30k
        if constexpr (first) {
331
2.30k
            to_column.insert_many_defaults(input_rows_count);
332
2.30k
        }
333
2.30k
        auto& col_to_data = to_column.get_data();
334
2.30k
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
335
2.30k
            const typename ColumnString::Chars& data = col_from->get_chars();
336
2.30k
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
337
2.30k
            size_t size = offsets.size();
338
2.30k
            ColumnString::Offset current_offset = 0;
339
174k
            for (size_t i = 0; i < size; ++i) {
340
                if constexpr (ReturnType == TYPE_INT) {
341
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
342
                            reinterpret_cast<const char*>(&data[current_offset]),
343
                            offsets[i] - current_offset, col_to_data[i]);
344
171k
                } else {
345
171k
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
346
171k
                            reinterpret_cast<const char*>(&data[current_offset]),
347
171k
                            offsets[i] - current_offset, col_to_data[i]);
348
171k
                }
349
171k
                current_offset = offsets[i];
350
171k
            }
351
2.30k
        } else if (const ColumnConst* col_from_const =
352
7
                           check_and_get_column_const_string_or_fixedstring(column)) {
353
0
            auto value = col_from_const->get_value<TYPE_STRING>();
354
0
            for (size_t i = 0; i < input_rows_count; ++i) {
355
                if constexpr (ReturnType == TYPE_INT) {
356
                    col_to_data[i] =
357
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
358
0
                } else {
359
0
                    col_to_data[i] =
360
0
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
361
0
                }
362
0
            }
363
7
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
364
33
            for (size_t i = 0; i < input_rows_count; ++i) {
365
26
                auto data_ref = vb_col->get_data_at(i);
366
                if constexpr (ReturnType == TYPE_INT) {
367
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
368
                                                                col_to_data[i]);
369
26
                } else {
370
26
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
371
26
                                                                col_to_data[i]);
372
26
                }
373
26
            }
374
7
        } else {
375
0
            DCHECK(false);
376
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
377
0
                                        column->get_name(), name);
378
0
        }
379
2.30k
        return Status::OK();
380
2.30k
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
328
13
                          IColumn& col_to) {
329
13
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
330
        if constexpr (first) {
331
            to_column.insert_many_defaults(input_rows_count);
332
        }
333
13
        auto& col_to_data = to_column.get_data();
334
13
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
335
8
            const typename ColumnString::Chars& data = col_from->get_chars();
336
8
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
337
8
            size_t size = offsets.size();
338
8
            ColumnString::Offset current_offset = 0;
339
27
            for (size_t i = 0; i < size; ++i) {
340
                if constexpr (ReturnType == TYPE_INT) {
341
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
342
                            reinterpret_cast<const char*>(&data[current_offset]),
343
                            offsets[i] - current_offset, col_to_data[i]);
344
19
                } else {
345
19
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
346
19
                            reinterpret_cast<const char*>(&data[current_offset]),
347
19
                            offsets[i] - current_offset, col_to_data[i]);
348
19
                }
349
19
                current_offset = offsets[i];
350
19
            }
351
8
        } else if (const ColumnConst* col_from_const =
352
5
                           check_and_get_column_const_string_or_fixedstring(column)) {
353
0
            auto value = col_from_const->get_value<TYPE_STRING>();
354
0
            for (size_t i = 0; i < input_rows_count; ++i) {
355
                if constexpr (ReturnType == TYPE_INT) {
356
                    col_to_data[i] =
357
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
358
0
                } else {
359
0
                    col_to_data[i] =
360
0
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
361
0
                }
362
0
            }
363
5
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
364
21
            for (size_t i = 0; i < input_rows_count; ++i) {
365
16
                auto data_ref = vb_col->get_data_at(i);
366
                if constexpr (ReturnType == TYPE_INT) {
367
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
368
                                                                col_to_data[i]);
369
16
                } else {
370
16
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
371
16
                                                                col_to_data[i]);
372
16
                }
373
16
            }
374
5
        } else {
375
0
            DCHECK(false);
376
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
377
0
                                        column->get_name(), name);
378
0
        }
379
13
        return Status::OK();
380
13
    }
381
};
382
383
using FunctionXxHash_32 = FunctionVariadicArgumentsBase<DataTypeInt32, XxHashImpl<TYPE_INT>>;
384
using FunctionXxHash_64 = FunctionVariadicArgumentsBase<DataTypeInt64, XxHashImpl<TYPE_BIGINT>>;
385
386
8
void register_function_hash(SimpleFunctionFactory& factory) {
387
8
    factory.register_function<FunctionMurmurHash3_32>();
388
8
    factory.register_function<FunctionMurmurHash3_64>();
389
8
    factory.register_function<FunctionMurmurHash3_64_V2>();
390
8
    factory.register_function<FunctionMurmurHash3U64V2>();
391
8
    factory.register_function<FunctionMurmurHash3_128>();
392
8
    factory.register_function<FunctionMurmurHash3U128>();
393
8
    factory.register_function<FunctionXxHash_32>();
394
8
    factory.register_function<FunctionXxHash_64>();
395
8
    factory.register_alias("xxhash_64", "xxhash3_64");
396
8
}
397
} // namespace doris