Coverage Report

Created: 2026-07-02 14:47

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_hash.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionHash.cpp
19
// and modified by Doris
20
21
#include "exprs/function/function_hash.h"
22
23
#include <vector>
24
25
#include "common/status.h"
26
#include "core/assert_cast.h"
27
#include "core/column/column.h"
28
#include "core/column/column_const.h"
29
#include "core/column/column_string.h"
30
#include "core/column/column_varbinary.h"
31
#include "core/column/column_vector.h"
32
#include "core/data_type/data_type.h"
33
#include "core/data_type/data_type_number.h"
34
#include "core/data_type/data_type_string.h"
35
#include "core/field.h"
36
#include "core/value/large_int_value.h"
37
#include "exec/common/template_helpers.hpp"
38
#include "exprs/function/function_helpers.h"
39
#include "exprs/function/function_variadic_arguments.h"
40
#include "exprs/function/simple_function_factory.h"
41
#include "util/hash/murmur_hash3.h"
42
#include "util/hash_util.hpp"
43
44
namespace doris {
45
constexpr uint64_t emtpy_value = 0xe28dbde7fe22e41c;
46
47
namespace {
48
49
192
__int128_t pack_murmur_hash3_128(uint64_t h1, uint64_t h2) {
50
192
    static_assert(sizeof(__int128_t) == sizeof(uint64_t) * 2);
51
    // Store the two MurmurHash3 x64 128-bit lanes in a single LARGEINT value. Keep h1 in the
52
    // low 64 bits and h2 in the high 64 bits to match murmur_hash3_x64_128's out[0]/out[1].
53
192
    const auto value =
54
192
            (static_cast<unsigned __int128>(h2) << 64) | static_cast<unsigned __int128>(h1);
55
192
    return static_cast<__int128_t>(value);
56
192
}
57
58
100
void unpack_murmur_hash3_128(__int128_t value, uint64_t& h1, uint64_t& h2) {
59
100
    static_assert(sizeof(__int128_t) == sizeof(uint64_t) * 2);
60
100
    const auto unsigned_value = static_cast<unsigned __int128>(value);
61
100
    h1 = static_cast<uint64_t>(unsigned_value);
62
100
    h2 = static_cast<uint64_t>(unsigned_value >> 64);
63
100
}
64
65
92
void init_murmur_hash3_128(__int128_t& value, const void* data, size_t size) {
66
92
    uint64_t hash[2] = {0, 0};
67
92
    murmur_hash3_x64_128(data, size, 0, hash);
68
92
    value = pack_murmur_hash3_128(hash[0], hash[1]);
69
92
}
70
71
100
void update_murmur_hash3_128(__int128_t& value, const void* data, size_t size) {
72
100
    uint64_t h1 = 0;
73
100
    uint64_t h2 = 0;
74
100
    unpack_murmur_hash3_128(value, h1, h2);
75
100
    murmur_hash3_x64_process(data, size, h1, h2);
76
100
    value = pack_murmur_hash3_128(h1, h2);
77
100
}
78
79
template <bool first, typename StateContainer>
80
Status execute_murmur_hash3_128_column(const IColumn* column, size_t input_rows_count,
81
152
                                       StateContainer& state, const char* function_name) {
82
152
    if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
83
108
        const typename ColumnString::Chars& data = col_from->get_chars();
84
108
        const typename ColumnString::Offsets& offsets = col_from->get_offsets();
85
108
        size_t size = offsets.size();
86
108
        ColumnString::Offset current_offset = 0;
87
256
        for (size_t i = 0; i < size; ++i) {
88
148
            if constexpr (first) {
89
76
                init_murmur_hash3_128(state[i],
90
76
                                      reinterpret_cast<const char*>(&data[current_offset]),
91
76
                                      offsets[i] - current_offset);
92
76
            } else {
93
72
                update_murmur_hash3_128(state[i],
94
72
                                        reinterpret_cast<const char*>(&data[current_offset]),
95
72
                                        offsets[i] - current_offset);
96
72
            }
97
148
            current_offset = offsets[i];
98
148
        }
99
108
    } else if (const ColumnConst* col_from_const =
100
44
                       check_and_get_column_const_string_or_fixedstring(column)) {
101
44
        auto value = col_from_const->get_value<TYPE_STRING>();
102
88
        for (size_t i = 0; i < input_rows_count; ++i) {
103
44
            if constexpr (first) {
104
16
                init_murmur_hash3_128(state[i], value.data(), value.size());
105
28
            } else {
106
28
                update_murmur_hash3_128(state[i], value.data(), value.size());
107
28
            }
108
44
        }
109
44
    } else {
110
0
        DCHECK(false);
111
0
        return Status::NotSupported("Illegal column {} of argument of function {}",
112
0
                                    column->get_name(), function_name);
113
0
    }
114
152
    return Status::OK();
115
152
}
function_hash.cpp:_ZN5doris12_GLOBAL__N_131execute_murmur_hash3_128_columnILb1ENS_8PODArrayInLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEEENS_6StatusEPKNS_7IColumnEmRT0_PKc
Line
Count
Source
81
32
                                       StateContainer& state, const char* function_name) {
82
32
    if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
83
24
        const typename ColumnString::Chars& data = col_from->get_chars();
84
24
        const typename ColumnString::Offsets& offsets = col_from->get_offsets();
85
24
        size_t size = offsets.size();
86
24
        ColumnString::Offset current_offset = 0;
87
58
        for (size_t i = 0; i < size; ++i) {
88
34
            if constexpr (first) {
89
34
                init_murmur_hash3_128(state[i],
90
34
                                      reinterpret_cast<const char*>(&data[current_offset]),
91
34
                                      offsets[i] - current_offset);
92
            } else {
93
                update_murmur_hash3_128(state[i],
94
                                        reinterpret_cast<const char*>(&data[current_offset]),
95
                                        offsets[i] - current_offset);
96
            }
97
34
            current_offset = offsets[i];
98
34
        }
99
24
    } else if (const ColumnConst* col_from_const =
100
8
                       check_and_get_column_const_string_or_fixedstring(column)) {
101
8
        auto value = col_from_const->get_value<TYPE_STRING>();
102
16
        for (size_t i = 0; i < input_rows_count; ++i) {
103
8
            if constexpr (first) {
104
8
                init_murmur_hash3_128(state[i], value.data(), value.size());
105
            } else {
106
                update_murmur_hash3_128(state[i], value.data(), value.size());
107
            }
108
8
        }
109
8
    } else {
110
0
        DCHECK(false);
111
0
        return Status::NotSupported("Illegal column {} of argument of function {}",
112
0
                                    column->get_name(), function_name);
113
0
    }
114
32
    return Status::OK();
115
32
}
function_hash.cpp:_ZN5doris12_GLOBAL__N_131execute_murmur_hash3_128_columnILb0ENS_8PODArrayInLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEEENS_6StatusEPKNS_7IColumnEmRT0_PKc
Line
Count
Source
81
42
                                       StateContainer& state, const char* function_name) {
82
42
    if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
83
28
        const typename ColumnString::Chars& data = col_from->get_chars();
84
28
        const typename ColumnString::Offsets& offsets = col_from->get_offsets();
85
28
        size_t size = offsets.size();
86
28
        ColumnString::Offset current_offset = 0;
87
64
        for (size_t i = 0; i < size; ++i) {
88
            if constexpr (first) {
89
                init_murmur_hash3_128(state[i],
90
                                      reinterpret_cast<const char*>(&data[current_offset]),
91
                                      offsets[i] - current_offset);
92
36
            } else {
93
36
                update_murmur_hash3_128(state[i],
94
36
                                        reinterpret_cast<const char*>(&data[current_offset]),
95
36
                                        offsets[i] - current_offset);
96
36
            }
97
36
            current_offset = offsets[i];
98
36
        }
99
28
    } else if (const ColumnConst* col_from_const =
100
14
                       check_and_get_column_const_string_or_fixedstring(column)) {
101
14
        auto value = col_from_const->get_value<TYPE_STRING>();
102
28
        for (size_t i = 0; i < input_rows_count; ++i) {
103
            if constexpr (first) {
104
                init_murmur_hash3_128(state[i], value.data(), value.size());
105
14
            } else {
106
14
                update_murmur_hash3_128(state[i], value.data(), value.size());
107
14
            }
108
14
        }
109
14
    } else {
110
0
        DCHECK(false);
111
0
        return Status::NotSupported("Illegal column {} of argument of function {}",
112
0
                                    column->get_name(), function_name);
113
0
    }
114
42
    return Status::OK();
115
42
}
function_hash.cpp:_ZN5doris12_GLOBAL__N_131execute_murmur_hash3_128_columnILb1ESt6vectorInSaInEEEENS_6StatusEPKNS_7IColumnEmRT0_PKc
Line
Count
Source
81
36
                                       StateContainer& state, const char* function_name) {
82
36
    if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
83
28
        const typename ColumnString::Chars& data = col_from->get_chars();
84
28
        const typename ColumnString::Offsets& offsets = col_from->get_offsets();
85
28
        size_t size = offsets.size();
86
28
        ColumnString::Offset current_offset = 0;
87
70
        for (size_t i = 0; i < size; ++i) {
88
42
            if constexpr (first) {
89
42
                init_murmur_hash3_128(state[i],
90
42
                                      reinterpret_cast<const char*>(&data[current_offset]),
91
42
                                      offsets[i] - current_offset);
92
            } else {
93
                update_murmur_hash3_128(state[i],
94
                                        reinterpret_cast<const char*>(&data[current_offset]),
95
                                        offsets[i] - current_offset);
96
            }
97
42
            current_offset = offsets[i];
98
42
        }
99
28
    } else if (const ColumnConst* col_from_const =
100
8
                       check_and_get_column_const_string_or_fixedstring(column)) {
101
8
        auto value = col_from_const->get_value<TYPE_STRING>();
102
16
        for (size_t i = 0; i < input_rows_count; ++i) {
103
8
            if constexpr (first) {
104
8
                init_murmur_hash3_128(state[i], value.data(), value.size());
105
            } else {
106
                update_murmur_hash3_128(state[i], value.data(), value.size());
107
            }
108
8
        }
109
8
    } else {
110
0
        DCHECK(false);
111
0
        return Status::NotSupported("Illegal column {} of argument of function {}",
112
0
                                    column->get_name(), function_name);
113
0
    }
114
36
    return Status::OK();
115
36
}
function_hash.cpp:_ZN5doris12_GLOBAL__N_131execute_murmur_hash3_128_columnILb0ESt6vectorInSaInEEEENS_6StatusEPKNS_7IColumnEmRT0_PKc
Line
Count
Source
81
42
                                       StateContainer& state, const char* function_name) {
82
42
    if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
83
28
        const typename ColumnString::Chars& data = col_from->get_chars();
84
28
        const typename ColumnString::Offsets& offsets = col_from->get_offsets();
85
28
        size_t size = offsets.size();
86
28
        ColumnString::Offset current_offset = 0;
87
64
        for (size_t i = 0; i < size; ++i) {
88
            if constexpr (first) {
89
                init_murmur_hash3_128(state[i],
90
                                      reinterpret_cast<const char*>(&data[current_offset]),
91
                                      offsets[i] - current_offset);
92
36
            } else {
93
36
                update_murmur_hash3_128(state[i],
94
36
                                        reinterpret_cast<const char*>(&data[current_offset]),
95
36
                                        offsets[i] - current_offset);
96
36
            }
97
36
            current_offset = offsets[i];
98
36
        }
99
28
    } else if (const ColumnConst* col_from_const =
100
14
                       check_and_get_column_const_string_or_fixedstring(column)) {
101
14
        auto value = col_from_const->get_value<TYPE_STRING>();
102
28
        for (size_t i = 0; i < input_rows_count; ++i) {
103
            if constexpr (first) {
104
                init_murmur_hash3_128(state[i], value.data(), value.size());
105
14
            } else {
106
14
                update_murmur_hash3_128(state[i], value.data(), value.size());
107
14
            }
108
14
        }
109
14
    } else {
110
0
        DCHECK(false);
111
0
        return Status::NotSupported("Illegal column {} of argument of function {}",
112
0
                                    column->get_name(), function_name);
113
0
    }
114
42
    return Status::OK();
115
42
}
116
117
} // namespace
118
119
template <PrimitiveType ReturnType, bool is_mmh64_v2 = false>
120
struct MurmurHash3Impl {
121
6
    static constexpr auto get_name() {
122
6
        if constexpr (ReturnType == TYPE_INT) {
123
2
            return "murmur_hash3_32";
124
        } else if constexpr (ReturnType == TYPE_LARGEINT) {
125
            return "murmur_hash3_u64_v2";
126
2
        } else if constexpr (is_mmh64_v2) {
127
2
            return "murmur_hash3_64_v2";
128
2
        } else {
129
2
            return "murmur_hash3_64";
130
2
        }
131
6
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE8get_nameEv
Line
Count
Source
121
2
    static constexpr auto get_name() {
122
2
        if constexpr (ReturnType == TYPE_INT) {
123
2
            return "murmur_hash3_32";
124
        } else if constexpr (ReturnType == TYPE_LARGEINT) {
125
            return "murmur_hash3_u64_v2";
126
        } else if constexpr (is_mmh64_v2) {
127
            return "murmur_hash3_64_v2";
128
        } else {
129
            return "murmur_hash3_64";
130
        }
131
2
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE8get_nameEv
Line
Count
Source
121
2
    static constexpr auto get_name() {
122
        if constexpr (ReturnType == TYPE_INT) {
123
            return "murmur_hash3_32";
124
        } else if constexpr (ReturnType == TYPE_LARGEINT) {
125
            return "murmur_hash3_u64_v2";
126
        } else if constexpr (is_mmh64_v2) {
127
            return "murmur_hash3_64_v2";
128
2
        } else {
129
2
            return "murmur_hash3_64";
130
2
        }
131
2
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE8get_nameEv
Line
Count
Source
121
2
    static constexpr auto get_name() {
122
        if constexpr (ReturnType == TYPE_INT) {
123
            return "murmur_hash3_32";
124
        } else if constexpr (ReturnType == TYPE_LARGEINT) {
125
            return "murmur_hash3_u64_v2";
126
2
        } else if constexpr (is_mmh64_v2) {
127
2
            return "murmur_hash3_64_v2";
128
        } else {
129
            return "murmur_hash3_64";
130
        }
131
2
    }
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE8get_nameEv
132
    static constexpr auto name = get_name();
133
134
0
    static Status empty_apply(IColumn& icolumn, size_t input_rows_count) {
135
0
        ColumnVector<ReturnType>& vec_to = assert_cast<ColumnVector<ReturnType>&>(icolumn);
136
0
        vec_to.get_data().assign(
137
0
                input_rows_count,
138
0
                static_cast<typename PrimitiveTypeTraits<ReturnType>::CppType>(emtpy_value));
139
0
        return Status::OK();
140
0
    }
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE11empty_applyERNS_7IColumnEm
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE11empty_applyERNS_7IColumnEm
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE11empty_applyERNS_7IColumnEm
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE11empty_applyERNS_7IColumnEm
141
142
    static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count,
143
14
                              IColumn& icolumn) {
144
14
        return execute<true>(type, column, input_rows_count, icolumn);
145
14
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
143
6
                              IColumn& icolumn) {
144
6
        return execute<true>(type, column, input_rows_count, icolumn);
145
6
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
143
6
                              IColumn& icolumn) {
144
6
        return execute<true>(type, column, input_rows_count, icolumn);
145
6
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
143
2
                              IColumn& icolumn) {
144
2
        return execute<true>(type, column, input_rows_count, icolumn);
145
2
    }
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
146
147
    static Status combine_apply(const IDataType* type, const IColumn* column,
148
12
                                size_t input_rows_count, IColumn& icolumn) {
149
12
        return execute<false>(type, column, input_rows_count, icolumn);
150
12
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
148
6
                                size_t input_rows_count, IColumn& icolumn) {
149
6
        return execute<false>(type, column, input_rows_count, icolumn);
150
6
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
148
6
                                size_t input_rows_count, IColumn& icolumn) {
149
6
        return execute<false>(type, column, input_rows_count, icolumn);
150
6
    }
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
151
152
    template <bool first>
153
    static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count,
154
26
                          IColumn& col_to) {
155
26
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
156
26
        if constexpr (first) {
157
14
            if constexpr (ReturnType == TYPE_INT) {
158
6
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
159
6
                                           input_rows_count);
160
8
            } else {
161
8
                to_column.insert_many_defaults(input_rows_count);
162
8
            }
163
14
        }
164
26
        auto& col_to_data = to_column.get_data();
165
26
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
166
26
            const typename ColumnString::Chars& data = col_from->get_chars();
167
26
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
168
26
            size_t size = offsets.size();
169
26
            ColumnString::Offset current_offset = 0;
170
80
            for (size_t i = 0; i < size; ++i) {
171
54
                if constexpr (ReturnType == TYPE_INT) {
172
24
                    col_to_data[i] = HashUtil::murmur_hash3_32(
173
24
                            reinterpret_cast<const char*>(&data[current_offset]),
174
24
                            offsets[i] - current_offset, col_to_data[i]);
175
30
                } else {
176
30
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
177
30
                            reinterpret_cast<const char*>(&data[current_offset]),
178
30
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
179
30
                }
180
54
                current_offset = offsets[i];
181
54
            }
182
26
        } else if (const ColumnConst* col_from_const =
183
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
184
0
            auto value = col_from_const->get_value<TYPE_STRING>();
185
0
            for (size_t i = 0; i < input_rows_count; ++i) {
186
0
                if constexpr (ReturnType == TYPE_INT) {
187
0
                    col_to_data[i] =
188
0
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
189
0
                } else {
190
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
191
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
192
0
                }
193
0
            }
194
0
        } else {
195
0
            DCHECK(false);
196
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
197
0
                                        column->get_name(), name);
198
0
        }
199
26
        return Status::OK();
200
26
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
154
6
                          IColumn& col_to) {
155
6
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
156
6
        if constexpr (first) {
157
6
            if constexpr (ReturnType == TYPE_INT) {
158
6
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
159
6
                                           input_rows_count);
160
            } else {
161
                to_column.insert_many_defaults(input_rows_count);
162
            }
163
6
        }
164
6
        auto& col_to_data = to_column.get_data();
165
6
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
166
6
            const typename ColumnString::Chars& data = col_from->get_chars();
167
6
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
168
6
            size_t size = offsets.size();
169
6
            ColumnString::Offset current_offset = 0;
170
18
            for (size_t i = 0; i < size; ++i) {
171
12
                if constexpr (ReturnType == TYPE_INT) {
172
12
                    col_to_data[i] = HashUtil::murmur_hash3_32(
173
12
                            reinterpret_cast<const char*>(&data[current_offset]),
174
12
                            offsets[i] - current_offset, col_to_data[i]);
175
                } else {
176
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
177
                            reinterpret_cast<const char*>(&data[current_offset]),
178
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
179
                }
180
12
                current_offset = offsets[i];
181
12
            }
182
6
        } else if (const ColumnConst* col_from_const =
183
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
184
0
            auto value = col_from_const->get_value<TYPE_STRING>();
185
0
            for (size_t i = 0; i < input_rows_count; ++i) {
186
0
                if constexpr (ReturnType == TYPE_INT) {
187
0
                    col_to_data[i] =
188
0
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
189
                } else {
190
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
191
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
192
                }
193
0
            }
194
0
        } else {
195
0
            DCHECK(false);
196
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
197
0
                                        column->get_name(), name);
198
0
        }
199
6
        return Status::OK();
200
6
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
154
6
                          IColumn& col_to) {
155
6
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
156
        if constexpr (first) {
157
            if constexpr (ReturnType == TYPE_INT) {
158
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
159
                                           input_rows_count);
160
            } else {
161
                to_column.insert_many_defaults(input_rows_count);
162
            }
163
        }
164
6
        auto& col_to_data = to_column.get_data();
165
6
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
166
6
            const typename ColumnString::Chars& data = col_from->get_chars();
167
6
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
168
6
            size_t size = offsets.size();
169
6
            ColumnString::Offset current_offset = 0;
170
18
            for (size_t i = 0; i < size; ++i) {
171
12
                if constexpr (ReturnType == TYPE_INT) {
172
12
                    col_to_data[i] = HashUtil::murmur_hash3_32(
173
12
                            reinterpret_cast<const char*>(&data[current_offset]),
174
12
                            offsets[i] - current_offset, col_to_data[i]);
175
                } else {
176
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
177
                            reinterpret_cast<const char*>(&data[current_offset]),
178
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
179
                }
180
12
                current_offset = offsets[i];
181
12
            }
182
6
        } else if (const ColumnConst* col_from_const =
183
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
184
0
            auto value = col_from_const->get_value<TYPE_STRING>();
185
0
            for (size_t i = 0; i < input_rows_count; ++i) {
186
0
                if constexpr (ReturnType == TYPE_INT) {
187
0
                    col_to_data[i] =
188
0
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
189
                } else {
190
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
191
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
192
                }
193
0
            }
194
0
        } else {
195
0
            DCHECK(false);
196
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
197
0
                                        column->get_name(), name);
198
0
        }
199
6
        return Status::OK();
200
6
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
154
6
                          IColumn& col_to) {
155
6
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
156
6
        if constexpr (first) {
157
            if constexpr (ReturnType == TYPE_INT) {
158
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
159
                                           input_rows_count);
160
6
            } else {
161
6
                to_column.insert_many_defaults(input_rows_count);
162
6
            }
163
6
        }
164
6
        auto& col_to_data = to_column.get_data();
165
6
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
166
6
            const typename ColumnString::Chars& data = col_from->get_chars();
167
6
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
168
6
            size_t size = offsets.size();
169
6
            ColumnString::Offset current_offset = 0;
170
18
            for (size_t i = 0; i < size; ++i) {
171
                if constexpr (ReturnType == TYPE_INT) {
172
                    col_to_data[i] = HashUtil::murmur_hash3_32(
173
                            reinterpret_cast<const char*>(&data[current_offset]),
174
                            offsets[i] - current_offset, col_to_data[i]);
175
12
                } else {
176
12
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
177
12
                            reinterpret_cast<const char*>(&data[current_offset]),
178
12
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
179
12
                }
180
12
                current_offset = offsets[i];
181
12
            }
182
6
        } else if (const ColumnConst* col_from_const =
183
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
184
0
            auto value = col_from_const->get_value<TYPE_STRING>();
185
0
            for (size_t i = 0; i < input_rows_count; ++i) {
186
                if constexpr (ReturnType == TYPE_INT) {
187
                    col_to_data[i] =
188
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
189
0
                } else {
190
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
191
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
192
0
                }
193
0
            }
194
0
        } else {
195
0
            DCHECK(false);
196
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
197
0
                                        column->get_name(), name);
198
0
        }
199
6
        return Status::OK();
200
6
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
154
6
                          IColumn& col_to) {
155
6
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
156
        if constexpr (first) {
157
            if constexpr (ReturnType == TYPE_INT) {
158
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
159
                                           input_rows_count);
160
            } else {
161
                to_column.insert_many_defaults(input_rows_count);
162
            }
163
        }
164
6
        auto& col_to_data = to_column.get_data();
165
6
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
166
6
            const typename ColumnString::Chars& data = col_from->get_chars();
167
6
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
168
6
            size_t size = offsets.size();
169
6
            ColumnString::Offset current_offset = 0;
170
18
            for (size_t i = 0; i < size; ++i) {
171
                if constexpr (ReturnType == TYPE_INT) {
172
                    col_to_data[i] = HashUtil::murmur_hash3_32(
173
                            reinterpret_cast<const char*>(&data[current_offset]),
174
                            offsets[i] - current_offset, col_to_data[i]);
175
12
                } else {
176
12
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
177
12
                            reinterpret_cast<const char*>(&data[current_offset]),
178
12
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
179
12
                }
180
12
                current_offset = offsets[i];
181
12
            }
182
6
        } else if (const ColumnConst* col_from_const =
183
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
184
0
            auto value = col_from_const->get_value<TYPE_STRING>();
185
0
            for (size_t i = 0; i < input_rows_count; ++i) {
186
                if constexpr (ReturnType == TYPE_INT) {
187
                    col_to_data[i] =
188
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
189
0
                } else {
190
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
191
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
192
0
                }
193
0
            }
194
0
        } else {
195
0
            DCHECK(false);
196
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
197
0
                                        column->get_name(), name);
198
0
        }
199
6
        return Status::OK();
200
6
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
154
2
                          IColumn& col_to) {
155
2
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
156
2
        if constexpr (first) {
157
            if constexpr (ReturnType == TYPE_INT) {
158
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
159
                                           input_rows_count);
160
2
            } else {
161
2
                to_column.insert_many_defaults(input_rows_count);
162
2
            }
163
2
        }
164
2
        auto& col_to_data = to_column.get_data();
165
2
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
166
2
            const typename ColumnString::Chars& data = col_from->get_chars();
167
2
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
168
2
            size_t size = offsets.size();
169
2
            ColumnString::Offset current_offset = 0;
170
8
            for (size_t i = 0; i < size; ++i) {
171
                if constexpr (ReturnType == TYPE_INT) {
172
                    col_to_data[i] = HashUtil::murmur_hash3_32(
173
                            reinterpret_cast<const char*>(&data[current_offset]),
174
                            offsets[i] - current_offset, col_to_data[i]);
175
6
                } else {
176
6
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
177
6
                            reinterpret_cast<const char*>(&data[current_offset]),
178
6
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
179
6
                }
180
6
                current_offset = offsets[i];
181
6
            }
182
2
        } else if (const ColumnConst* col_from_const =
183
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
184
0
            auto value = col_from_const->get_value<TYPE_STRING>();
185
0
            for (size_t i = 0; i < input_rows_count; ++i) {
186
                if constexpr (ReturnType == TYPE_INT) {
187
                    col_to_data[i] =
188
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
189
0
                } else {
190
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
191
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
192
0
                }
193
0
            }
194
0
        } else {
195
0
            DCHECK(false);
196
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
197
0
                                        column->get_name(), name);
198
0
        }
199
2
        return Status::OK();
200
2
    }
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
201
};
202
203
using FunctionMurmurHash3_32 =
204
        FunctionVariadicArgumentsBase<DataTypeInt32, MurmurHash3Impl<TYPE_INT>>;
205
using FunctionMurmurHash3_64 =
206
        FunctionVariadicArgumentsBase<DataTypeInt64, MurmurHash3Impl<TYPE_BIGINT>>;
207
using FunctionMurmurHash3_64_V2 =
208
        FunctionVariadicArgumentsBase<DataTypeInt64, MurmurHash3Impl<TYPE_BIGINT, true>>;
209
using FunctionMurmurHash3U64V2 =
210
        FunctionVariadicArgumentsBase<DataTypeInt128, MurmurHash3Impl<TYPE_LARGEINT, true>>;
211
212
struct MurmurHash3128Impl {
213
    static constexpr auto name = "murmur_hash3_128";
214
215
2
    static Status empty_apply(IColumn& /*icolumn*/, size_t /*input_rows_count*/) {
216
2
        return Status::InvalidArgument("Function {} requires at least one argument", name);
217
2
    }
218
219
    static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count,
220
32
                              IColumn& icolumn) {
221
32
        return execute<true>(type, column, input_rows_count, icolumn);
222
32
    }
223
224
    static Status combine_apply(const IDataType* type, const IColumn* column,
225
42
                                size_t input_rows_count, IColumn& icolumn) {
226
42
        return execute<false>(type, column, input_rows_count, icolumn);
227
42
    }
228
229
    template <bool first>
230
    static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count,
231
74
                          IColumn& col_to) {
232
74
        auto& to_column = assert_cast<ColumnVector<TYPE_LARGEINT>&>(col_to);
233
74
        if constexpr (first) {
234
            // The first argument initializes one 128-bit hash state per row. Later arguments reuse
235
            // the same result column and update the saved state in place.
236
32
            to_column.insert_many_defaults(input_rows_count);
237
32
        }
238
74
        auto& col_to_data = to_column.get_data();
239
74
        return execute_murmur_hash3_128_column<first>(column, input_rows_count, col_to_data, name);
240
74
    }
_ZN5doris18MurmurHash3128Impl7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
231
32
                          IColumn& col_to) {
232
32
        auto& to_column = assert_cast<ColumnVector<TYPE_LARGEINT>&>(col_to);
233
32
        if constexpr (first) {
234
            // The first argument initializes one 128-bit hash state per row. Later arguments reuse
235
            // the same result column and update the saved state in place.
236
32
            to_column.insert_many_defaults(input_rows_count);
237
32
        }
238
32
        auto& col_to_data = to_column.get_data();
239
32
        return execute_murmur_hash3_128_column<first>(column, input_rows_count, col_to_data, name);
240
32
    }
_ZN5doris18MurmurHash3128Impl7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
231
42
                          IColumn& col_to) {
232
42
        auto& to_column = assert_cast<ColumnVector<TYPE_LARGEINT>&>(col_to);
233
        if constexpr (first) {
234
            // The first argument initializes one 128-bit hash state per row. Later arguments reuse
235
            // the same result column and update the saved state in place.
236
            to_column.insert_many_defaults(input_rows_count);
237
        }
238
42
        auto& col_to_data = to_column.get_data();
239
42
        return execute_murmur_hash3_128_column<first>(column, input_rows_count, col_to_data, name);
240
42
    }
241
};
242
243
using FunctionMurmurHash3_128 = FunctionVariadicArgumentsBase<DataTypeInt128, MurmurHash3128Impl>;
244
245
class FunctionMurmurHash3U128 : public IFunction {
246
public:
247
    static constexpr auto name = "murmur_hash3_u128";
248
249
64
    static FunctionPtr create() { return std::make_shared<FunctionMurmurHash3U128>(); }
250
251
0
    String get_name() const override { return name; }
252
253
62
    bool is_variadic() const override { return true; }
254
255
0
    size_t get_number_of_arguments() const override { return 0; }
256
257
60
    DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& /*arguments*/) const override {
258
60
        return std::make_shared<DataTypeString>();
259
60
    }
260
261
    Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments,
262
38
                        uint32_t result, size_t input_rows_count) const override {
263
38
        if (arguments.empty()) {
264
2
            return Status::InvalidArgument("Function {} requires at least one argument", name);
265
2
        }
266
267
36
        std::vector<__int128_t> state(input_rows_count);
268
36
        const ColumnWithTypeAndName& first_col = block.get_by_position(arguments[0]);
269
36
        RETURN_IF_ERROR(execute_murmur_hash3_128_column<true>(first_col.column.get(),
270
36
                                                              input_rows_count, state, name));
271
272
78
        for (size_t i = 1; i < arguments.size(); ++i) {
273
42
            const ColumnWithTypeAndName& col = block.get_by_position(arguments[i]);
274
42
            RETURN_IF_ERROR(execute_murmur_hash3_128_column<false>(col.column.get(),
275
42
                                                                   input_rows_count, state, name));
276
42
        }
277
278
36
        auto result_column = ColumnString::create();
279
36
        result_column->reserve(input_rows_count);
280
50
        for (const auto value : state) {
281
50
            auto unsigned_value = static_cast<__uint128_t>(value);
282
50
            std::string value_str = LargeIntValue::to_string(unsigned_value);
283
50
            result_column->insert_data(value_str.data(), value_str.size());
284
50
        }
285
36
        block.get_by_position(result).column = std::move(result_column);
286
36
        return Status::OK();
287
36
    }
288
};
289
290
#ifdef BE_TEST
291
2
const char* murmur_hash3_get_name_type_int_for_test() {
292
2
    return MurmurHash3Impl<TYPE_INT>::get_name();
293
2
}
294
295
2
const char* murmur_hash3_get_name_type_bigint_for_test() {
296
2
    return MurmurHash3Impl<TYPE_BIGINT>::get_name();
297
2
}
298
299
2
const char* murmur_hash3_get_name_type_bigint_v2_for_test() {
300
2
    return MurmurHash3Impl<TYPE_BIGINT, true>::get_name();
301
2
}
302
#endif
303
304
template <PrimitiveType ReturnType>
305
struct XxHashImpl {
306
    static constexpr auto name = ReturnType == TYPE_INT ? "xxhash_32" : "xxhash_64";
307
308
0
    static Status empty_apply(IColumn& icolumn, size_t input_rows_count) {
309
0
        ColumnVector<ReturnType>& vec_to = assert_cast<ColumnVector<ReturnType>&>(icolumn);
310
0
        vec_to.get_data().assign(
311
0
                input_rows_count,
312
0
                static_cast<typename PrimitiveTypeTraits<ReturnType>::CppType>(emtpy_value));
313
0
        return Status::OK();
314
0
    }
Unexecuted instantiation: _ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE11empty_applyERNS_7IColumnEm
Unexecuted instantiation: _ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE11empty_applyERNS_7IColumnEm
315
316
    static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count,
317
24
                              IColumn& icolumn) {
318
24
        return execute<true>(type, column, input_rows_count, icolumn);
319
24
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
317
12
                              IColumn& icolumn) {
318
12
        return execute<true>(type, column, input_rows_count, icolumn);
319
12
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
317
12
                              IColumn& icolumn) {
318
12
        return execute<true>(type, column, input_rows_count, icolumn);
319
12
    }
320
321
    static Status combine_apply(const IDataType* type, const IColumn* column,
322
24
                                size_t input_rows_count, IColumn& icolumn) {
323
24
        return execute<false>(type, column, input_rows_count, icolumn);
324
24
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
322
12
                                size_t input_rows_count, IColumn& icolumn) {
323
12
        return execute<false>(type, column, input_rows_count, icolumn);
324
12
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
322
12
                                size_t input_rows_count, IColumn& icolumn) {
323
12
        return execute<false>(type, column, input_rows_count, icolumn);
324
12
    }
325
326
    template <bool first>
327
    static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count,
328
48
                          IColumn& col_to) {
329
48
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
330
48
        if constexpr (first) {
331
24
            to_column.insert_many_defaults(input_rows_count);
332
24
        }
333
48
        auto& col_to_data = to_column.get_data();
334
48
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
335
24
            const typename ColumnString::Chars& data = col_from->get_chars();
336
24
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
337
24
            size_t size = offsets.size();
338
24
            ColumnString::Offset current_offset = 0;
339
72
            for (size_t i = 0; i < size; ++i) {
340
48
                if constexpr (ReturnType == TYPE_INT) {
341
24
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
342
24
                            reinterpret_cast<const char*>(&data[current_offset]),
343
24
                            offsets[i] - current_offset, col_to_data[i]);
344
24
                } else {
345
24
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
346
24
                            reinterpret_cast<const char*>(&data[current_offset]),
347
24
                            offsets[i] - current_offset, col_to_data[i]);
348
24
                }
349
48
                current_offset = offsets[i];
350
48
            }
351
24
        } else if (const ColumnConst* col_from_const =
352
24
                           check_and_get_column_const_string_or_fixedstring(column)) {
353
0
            auto value = col_from_const->get_value<TYPE_STRING>();
354
0
            for (size_t i = 0; i < input_rows_count; ++i) {
355
0
                if constexpr (ReturnType == TYPE_INT) {
356
0
                    col_to_data[i] =
357
0
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
358
0
                } else {
359
0
                    col_to_data[i] =
360
0
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
361
0
                }
362
0
            }
363
24
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
364
72
            for (size_t i = 0; i < input_rows_count; ++i) {
365
48
                auto data_ref = vb_col->get_data_at(i);
366
48
                if constexpr (ReturnType == TYPE_INT) {
367
24
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
368
24
                                                                col_to_data[i]);
369
24
                } else {
370
24
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
371
24
                                                                col_to_data[i]);
372
24
                }
373
48
            }
374
24
        } else {
375
0
            DCHECK(false);
376
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
377
0
                                        column->get_name(), name);
378
0
        }
379
48
        return Status::OK();
380
48
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
328
12
                          IColumn& col_to) {
329
12
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
330
12
        if constexpr (first) {
331
12
            to_column.insert_many_defaults(input_rows_count);
332
12
        }
333
12
        auto& col_to_data = to_column.get_data();
334
12
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
335
6
            const typename ColumnString::Chars& data = col_from->get_chars();
336
6
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
337
6
            size_t size = offsets.size();
338
6
            ColumnString::Offset current_offset = 0;
339
18
            for (size_t i = 0; i < size; ++i) {
340
12
                if constexpr (ReturnType == TYPE_INT) {
341
12
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
342
12
                            reinterpret_cast<const char*>(&data[current_offset]),
343
12
                            offsets[i] - current_offset, col_to_data[i]);
344
                } else {
345
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
346
                            reinterpret_cast<const char*>(&data[current_offset]),
347
                            offsets[i] - current_offset, col_to_data[i]);
348
                }
349
12
                current_offset = offsets[i];
350
12
            }
351
6
        } else if (const ColumnConst* col_from_const =
352
6
                           check_and_get_column_const_string_or_fixedstring(column)) {
353
0
            auto value = col_from_const->get_value<TYPE_STRING>();
354
0
            for (size_t i = 0; i < input_rows_count; ++i) {
355
0
                if constexpr (ReturnType == TYPE_INT) {
356
0
                    col_to_data[i] =
357
0
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
358
                } else {
359
                    col_to_data[i] =
360
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
361
                }
362
0
            }
363
6
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
364
18
            for (size_t i = 0; i < input_rows_count; ++i) {
365
12
                auto data_ref = vb_col->get_data_at(i);
366
12
                if constexpr (ReturnType == TYPE_INT) {
367
12
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
368
12
                                                                col_to_data[i]);
369
                } else {
370
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
371
                                                                col_to_data[i]);
372
                }
373
12
            }
374
6
        } else {
375
0
            DCHECK(false);
376
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
377
0
                                        column->get_name(), name);
378
0
        }
379
12
        return Status::OK();
380
12
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
328
12
                          IColumn& col_to) {
329
12
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
330
        if constexpr (first) {
331
            to_column.insert_many_defaults(input_rows_count);
332
        }
333
12
        auto& col_to_data = to_column.get_data();
334
12
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
335
6
            const typename ColumnString::Chars& data = col_from->get_chars();
336
6
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
337
6
            size_t size = offsets.size();
338
6
            ColumnString::Offset current_offset = 0;
339
18
            for (size_t i = 0; i < size; ++i) {
340
12
                if constexpr (ReturnType == TYPE_INT) {
341
12
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
342
12
                            reinterpret_cast<const char*>(&data[current_offset]),
343
12
                            offsets[i] - current_offset, col_to_data[i]);
344
                } else {
345
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
346
                            reinterpret_cast<const char*>(&data[current_offset]),
347
                            offsets[i] - current_offset, col_to_data[i]);
348
                }
349
12
                current_offset = offsets[i];
350
12
            }
351
6
        } else if (const ColumnConst* col_from_const =
352
6
                           check_and_get_column_const_string_or_fixedstring(column)) {
353
0
            auto value = col_from_const->get_value<TYPE_STRING>();
354
0
            for (size_t i = 0; i < input_rows_count; ++i) {
355
0
                if constexpr (ReturnType == TYPE_INT) {
356
0
                    col_to_data[i] =
357
0
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
358
                } else {
359
                    col_to_data[i] =
360
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
361
                }
362
0
            }
363
6
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
364
18
            for (size_t i = 0; i < input_rows_count; ++i) {
365
12
                auto data_ref = vb_col->get_data_at(i);
366
12
                if constexpr (ReturnType == TYPE_INT) {
367
12
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
368
12
                                                                col_to_data[i]);
369
                } else {
370
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
371
                                                                col_to_data[i]);
372
                }
373
12
            }
374
6
        } else {
375
0
            DCHECK(false);
376
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
377
0
                                        column->get_name(), name);
378
0
        }
379
12
        return Status::OK();
380
12
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
328
12
                          IColumn& col_to) {
329
12
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
330
12
        if constexpr (first) {
331
12
            to_column.insert_many_defaults(input_rows_count);
332
12
        }
333
12
        auto& col_to_data = to_column.get_data();
334
12
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
335
6
            const typename ColumnString::Chars& data = col_from->get_chars();
336
6
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
337
6
            size_t size = offsets.size();
338
6
            ColumnString::Offset current_offset = 0;
339
18
            for (size_t i = 0; i < size; ++i) {
340
                if constexpr (ReturnType == TYPE_INT) {
341
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
342
                            reinterpret_cast<const char*>(&data[current_offset]),
343
                            offsets[i] - current_offset, col_to_data[i]);
344
12
                } else {
345
12
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
346
12
                            reinterpret_cast<const char*>(&data[current_offset]),
347
12
                            offsets[i] - current_offset, col_to_data[i]);
348
12
                }
349
12
                current_offset = offsets[i];
350
12
            }
351
6
        } else if (const ColumnConst* col_from_const =
352
6
                           check_and_get_column_const_string_or_fixedstring(column)) {
353
0
            auto value = col_from_const->get_value<TYPE_STRING>();
354
0
            for (size_t i = 0; i < input_rows_count; ++i) {
355
                if constexpr (ReturnType == TYPE_INT) {
356
                    col_to_data[i] =
357
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
358
0
                } else {
359
0
                    col_to_data[i] =
360
0
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
361
0
                }
362
0
            }
363
6
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
364
18
            for (size_t i = 0; i < input_rows_count; ++i) {
365
12
                auto data_ref = vb_col->get_data_at(i);
366
                if constexpr (ReturnType == TYPE_INT) {
367
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
368
                                                                col_to_data[i]);
369
12
                } else {
370
12
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
371
12
                                                                col_to_data[i]);
372
12
                }
373
12
            }
374
6
        } else {
375
0
            DCHECK(false);
376
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
377
0
                                        column->get_name(), name);
378
0
        }
379
12
        return Status::OK();
380
12
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
328
12
                          IColumn& col_to) {
329
12
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
330
        if constexpr (first) {
331
            to_column.insert_many_defaults(input_rows_count);
332
        }
333
12
        auto& col_to_data = to_column.get_data();
334
12
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
335
6
            const typename ColumnString::Chars& data = col_from->get_chars();
336
6
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
337
6
            size_t size = offsets.size();
338
6
            ColumnString::Offset current_offset = 0;
339
18
            for (size_t i = 0; i < size; ++i) {
340
                if constexpr (ReturnType == TYPE_INT) {
341
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
342
                            reinterpret_cast<const char*>(&data[current_offset]),
343
                            offsets[i] - current_offset, col_to_data[i]);
344
12
                } else {
345
12
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
346
12
                            reinterpret_cast<const char*>(&data[current_offset]),
347
12
                            offsets[i] - current_offset, col_to_data[i]);
348
12
                }
349
12
                current_offset = offsets[i];
350
12
            }
351
6
        } else if (const ColumnConst* col_from_const =
352
6
                           check_and_get_column_const_string_or_fixedstring(column)) {
353
0
            auto value = col_from_const->get_value<TYPE_STRING>();
354
0
            for (size_t i = 0; i < input_rows_count; ++i) {
355
                if constexpr (ReturnType == TYPE_INT) {
356
                    col_to_data[i] =
357
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
358
0
                } else {
359
0
                    col_to_data[i] =
360
0
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
361
0
                }
362
0
            }
363
6
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
364
18
            for (size_t i = 0; i < input_rows_count; ++i) {
365
12
                auto data_ref = vb_col->get_data_at(i);
366
                if constexpr (ReturnType == TYPE_INT) {
367
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
368
                                                                col_to_data[i]);
369
12
                } else {
370
12
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
371
12
                                                                col_to_data[i]);
372
12
                }
373
12
            }
374
6
        } else {
375
0
            DCHECK(false);
376
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
377
0
                                        column->get_name(), name);
378
0
        }
379
12
        return Status::OK();
380
12
    }
381
};
382
383
using FunctionXxHash_32 = FunctionVariadicArgumentsBase<DataTypeInt32, XxHashImpl<TYPE_INT>>;
384
using FunctionXxHash_64 = FunctionVariadicArgumentsBase<DataTypeInt64, XxHashImpl<TYPE_BIGINT>>;
385
386
2
void register_function_hash(SimpleFunctionFactory& factory) {
387
2
    factory.register_function<FunctionMurmurHash3_32>();
388
2
    factory.register_function<FunctionMurmurHash3_64>();
389
2
    factory.register_function<FunctionMurmurHash3_64_V2>();
390
2
    factory.register_function<FunctionMurmurHash3U64V2>();
391
2
    factory.register_function<FunctionMurmurHash3_128>();
392
2
    factory.register_function<FunctionMurmurHash3U128>();
393
2
    factory.register_function<FunctionXxHash_32>();
394
2
    factory.register_function<FunctionXxHash_64>();
395
2
    factory.register_alias("xxhash_64", "xxhash3_64");
396
2
}
397
} // namespace doris