Coverage Report

Created: 2026-06-02 21:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_hash.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionHash.cpp
19
// and modified by Doris
20
21
#include "exprs/function/function_hash.h"
22
23
#include <vector>
24
25
#include "common/status.h"
26
#include "core/assert_cast.h"
27
#include "core/column/column.h"
28
#include "core/column/column_const.h"
29
#include "core/column/column_string.h"
30
#include "core/column/column_varbinary.h"
31
#include "core/column/column_vector.h"
32
#include "core/data_type/data_type.h"
33
#include "core/data_type/data_type_number.h"
34
#include "core/data_type/data_type_string.h"
35
#include "core/field.h"
36
#include "core/value/large_int_value.h"
37
#include "exec/common/template_helpers.hpp"
38
#include "exprs/function/function_helpers.h"
39
#include "exprs/function/function_variadic_arguments.h"
40
#include "exprs/function/simple_function_factory.h"
41
#include "util/hash/murmur_hash3.h"
42
#include "util/hash_util.hpp"
43
44
namespace doris {
45
constexpr uint64_t emtpy_value = 0xe28dbde7fe22e41c;
46
47
namespace {
48
49
96
__int128_t pack_murmur_hash3_128(uint64_t h1, uint64_t h2) {
50
96
    static_assert(sizeof(__int128_t) == sizeof(uint64_t) * 2);
51
    // Store the two MurmurHash3 x64 128-bit lanes in a single LARGEINT value. Keep h1 in the
52
    // low 64 bits and h2 in the high 64 bits to match murmur_hash3_x64_128's out[0]/out[1].
53
96
    const auto value =
54
96
            (static_cast<unsigned __int128>(h2) << 64) | static_cast<unsigned __int128>(h1);
55
96
    return static_cast<__int128_t>(value);
56
96
}
57
58
50
void unpack_murmur_hash3_128(__int128_t value, uint64_t& h1, uint64_t& h2) {
59
50
    static_assert(sizeof(__int128_t) == sizeof(uint64_t) * 2);
60
50
    const auto unsigned_value = static_cast<unsigned __int128>(value);
61
50
    h1 = static_cast<uint64_t>(unsigned_value);
62
50
    h2 = static_cast<uint64_t>(unsigned_value >> 64);
63
50
}
64
65
46
void init_murmur_hash3_128(__int128_t& value, const void* data, size_t size) {
66
46
    uint64_t hash[2] = {0, 0};
67
46
    murmur_hash3_x64_128(data, size, 0, hash);
68
46
    value = pack_murmur_hash3_128(hash[0], hash[1]);
69
46
}
70
71
50
void update_murmur_hash3_128(__int128_t& value, const void* data, size_t size) {
72
50
    uint64_t h1 = 0;
73
50
    uint64_t h2 = 0;
74
50
    unpack_murmur_hash3_128(value, h1, h2);
75
50
    murmur_hash3_x64_process(data, size, h1, h2);
76
50
    value = pack_murmur_hash3_128(h1, h2);
77
50
}
78
79
template <bool first, typename StateContainer>
80
Status execute_murmur_hash3_128_column(const IColumn* column, size_t input_rows_count,
81
76
                                       StateContainer& state, const char* function_name) {
82
76
    if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
83
54
        const typename ColumnString::Chars& data = col_from->get_chars();
84
54
        const typename ColumnString::Offsets& offsets = col_from->get_offsets();
85
54
        size_t size = offsets.size();
86
54
        ColumnString::Offset current_offset = 0;
87
128
        for (size_t i = 0; i < size; ++i) {
88
74
            if constexpr (first) {
89
38
                init_murmur_hash3_128(state[i],
90
38
                                      reinterpret_cast<const char*>(&data[current_offset]),
91
38
                                      offsets[i] - current_offset);
92
38
            } else {
93
36
                update_murmur_hash3_128(state[i],
94
36
                                        reinterpret_cast<const char*>(&data[current_offset]),
95
36
                                        offsets[i] - current_offset);
96
36
            }
97
74
            current_offset = offsets[i];
98
74
        }
99
54
    } else if (const ColumnConst* col_from_const =
100
22
                       check_and_get_column_const_string_or_fixedstring(column)) {
101
22
        auto value = col_from_const->get_value<TYPE_STRING>();
102
44
        for (size_t i = 0; i < input_rows_count; ++i) {
103
22
            if constexpr (first) {
104
8
                init_murmur_hash3_128(state[i], value.data(), value.size());
105
14
            } else {
106
14
                update_murmur_hash3_128(state[i], value.data(), value.size());
107
14
            }
108
22
        }
109
22
    } else {
110
0
        DCHECK(false);
111
0
        return Status::NotSupported("Illegal column {} of argument of function {}",
112
0
                                    column->get_name(), function_name);
113
0
    }
114
76
    return Status::OK();
115
76
}
function_hash.cpp:_ZN5doris12_GLOBAL__N_131execute_murmur_hash3_128_columnILb1ENS_8PODArrayInLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEEENS_6StatusEPKNS_7IColumnEmRT0_PKc
Line
Count
Source
81
16
                                       StateContainer& state, const char* function_name) {
82
16
    if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
83
12
        const typename ColumnString::Chars& data = col_from->get_chars();
84
12
        const typename ColumnString::Offsets& offsets = col_from->get_offsets();
85
12
        size_t size = offsets.size();
86
12
        ColumnString::Offset current_offset = 0;
87
29
        for (size_t i = 0; i < size; ++i) {
88
17
            if constexpr (first) {
89
17
                init_murmur_hash3_128(state[i],
90
17
                                      reinterpret_cast<const char*>(&data[current_offset]),
91
17
                                      offsets[i] - current_offset);
92
            } else {
93
                update_murmur_hash3_128(state[i],
94
                                        reinterpret_cast<const char*>(&data[current_offset]),
95
                                        offsets[i] - current_offset);
96
            }
97
17
            current_offset = offsets[i];
98
17
        }
99
12
    } else if (const ColumnConst* col_from_const =
100
4
                       check_and_get_column_const_string_or_fixedstring(column)) {
101
4
        auto value = col_from_const->get_value<TYPE_STRING>();
102
8
        for (size_t i = 0; i < input_rows_count; ++i) {
103
4
            if constexpr (first) {
104
4
                init_murmur_hash3_128(state[i], value.data(), value.size());
105
            } else {
106
                update_murmur_hash3_128(state[i], value.data(), value.size());
107
            }
108
4
        }
109
4
    } else {
110
0
        DCHECK(false);
111
0
        return Status::NotSupported("Illegal column {} of argument of function {}",
112
0
                                    column->get_name(), function_name);
113
0
    }
114
16
    return Status::OK();
115
16
}
function_hash.cpp:_ZN5doris12_GLOBAL__N_131execute_murmur_hash3_128_columnILb0ENS_8PODArrayInLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEEENS_6StatusEPKNS_7IColumnEmRT0_PKc
Line
Count
Source
81
21
                                       StateContainer& state, const char* function_name) {
82
21
    if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
83
14
        const typename ColumnString::Chars& data = col_from->get_chars();
84
14
        const typename ColumnString::Offsets& offsets = col_from->get_offsets();
85
14
        size_t size = offsets.size();
86
14
        ColumnString::Offset current_offset = 0;
87
32
        for (size_t i = 0; i < size; ++i) {
88
            if constexpr (first) {
89
                init_murmur_hash3_128(state[i],
90
                                      reinterpret_cast<const char*>(&data[current_offset]),
91
                                      offsets[i] - current_offset);
92
18
            } else {
93
18
                update_murmur_hash3_128(state[i],
94
18
                                        reinterpret_cast<const char*>(&data[current_offset]),
95
18
                                        offsets[i] - current_offset);
96
18
            }
97
18
            current_offset = offsets[i];
98
18
        }
99
14
    } else if (const ColumnConst* col_from_const =
100
7
                       check_and_get_column_const_string_or_fixedstring(column)) {
101
7
        auto value = col_from_const->get_value<TYPE_STRING>();
102
14
        for (size_t i = 0; i < input_rows_count; ++i) {
103
            if constexpr (first) {
104
                init_murmur_hash3_128(state[i], value.data(), value.size());
105
7
            } else {
106
7
                update_murmur_hash3_128(state[i], value.data(), value.size());
107
7
            }
108
7
        }
109
7
    } else {
110
0
        DCHECK(false);
111
0
        return Status::NotSupported("Illegal column {} of argument of function {}",
112
0
                                    column->get_name(), function_name);
113
0
    }
114
21
    return Status::OK();
115
21
}
function_hash.cpp:_ZN5doris12_GLOBAL__N_131execute_murmur_hash3_128_columnILb1ESt6vectorInSaInEEEENS_6StatusEPKNS_7IColumnEmRT0_PKc
Line
Count
Source
81
18
                                       StateContainer& state, const char* function_name) {
82
18
    if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
83
14
        const typename ColumnString::Chars& data = col_from->get_chars();
84
14
        const typename ColumnString::Offsets& offsets = col_from->get_offsets();
85
14
        size_t size = offsets.size();
86
14
        ColumnString::Offset current_offset = 0;
87
35
        for (size_t i = 0; i < size; ++i) {
88
21
            if constexpr (first) {
89
21
                init_murmur_hash3_128(state[i],
90
21
                                      reinterpret_cast<const char*>(&data[current_offset]),
91
21
                                      offsets[i] - current_offset);
92
            } else {
93
                update_murmur_hash3_128(state[i],
94
                                        reinterpret_cast<const char*>(&data[current_offset]),
95
                                        offsets[i] - current_offset);
96
            }
97
21
            current_offset = offsets[i];
98
21
        }
99
14
    } else if (const ColumnConst* col_from_const =
100
4
                       check_and_get_column_const_string_or_fixedstring(column)) {
101
4
        auto value = col_from_const->get_value<TYPE_STRING>();
102
8
        for (size_t i = 0; i < input_rows_count; ++i) {
103
4
            if constexpr (first) {
104
4
                init_murmur_hash3_128(state[i], value.data(), value.size());
105
            } else {
106
                update_murmur_hash3_128(state[i], value.data(), value.size());
107
            }
108
4
        }
109
4
    } else {
110
0
        DCHECK(false);
111
0
        return Status::NotSupported("Illegal column {} of argument of function {}",
112
0
                                    column->get_name(), function_name);
113
0
    }
114
18
    return Status::OK();
115
18
}
function_hash.cpp:_ZN5doris12_GLOBAL__N_131execute_murmur_hash3_128_columnILb0ESt6vectorInSaInEEEENS_6StatusEPKNS_7IColumnEmRT0_PKc
Line
Count
Source
81
21
                                       StateContainer& state, const char* function_name) {
82
21
    if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
83
14
        const typename ColumnString::Chars& data = col_from->get_chars();
84
14
        const typename ColumnString::Offsets& offsets = col_from->get_offsets();
85
14
        size_t size = offsets.size();
86
14
        ColumnString::Offset current_offset = 0;
87
32
        for (size_t i = 0; i < size; ++i) {
88
            if constexpr (first) {
89
                init_murmur_hash3_128(state[i],
90
                                      reinterpret_cast<const char*>(&data[current_offset]),
91
                                      offsets[i] - current_offset);
92
18
            } else {
93
18
                update_murmur_hash3_128(state[i],
94
18
                                        reinterpret_cast<const char*>(&data[current_offset]),
95
18
                                        offsets[i] - current_offset);
96
18
            }
97
18
            current_offset = offsets[i];
98
18
        }
99
14
    } else if (const ColumnConst* col_from_const =
100
7
                       check_and_get_column_const_string_or_fixedstring(column)) {
101
7
        auto value = col_from_const->get_value<TYPE_STRING>();
102
14
        for (size_t i = 0; i < input_rows_count; ++i) {
103
            if constexpr (first) {
104
                init_murmur_hash3_128(state[i], value.data(), value.size());
105
7
            } else {
106
7
                update_murmur_hash3_128(state[i], value.data(), value.size());
107
7
            }
108
7
        }
109
7
    } else {
110
0
        DCHECK(false);
111
0
        return Status::NotSupported("Illegal column {} of argument of function {}",
112
0
                                    column->get_name(), function_name);
113
0
    }
114
21
    return Status::OK();
115
21
}
116
117
} // namespace
118
119
template <PrimitiveType ReturnType, bool is_mmh64_v2 = false>
120
struct MurmurHash3Impl {
121
3
    static constexpr auto get_name() {
122
3
        if constexpr (ReturnType == TYPE_INT) {
123
1
            return "murmur_hash3_32";
124
        } else if constexpr (ReturnType == TYPE_LARGEINT) {
125
            return "murmur_hash3_u64_v2";
126
1
        } else if constexpr (is_mmh64_v2) {
127
1
            return "murmur_hash3_64_v2";
128
1
        } else {
129
1
            return "murmur_hash3_64";
130
1
        }
131
3
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE8get_nameEv
Line
Count
Source
121
1
    static constexpr auto get_name() {
122
1
        if constexpr (ReturnType == TYPE_INT) {
123
1
            return "murmur_hash3_32";
124
        } else if constexpr (ReturnType == TYPE_LARGEINT) {
125
            return "murmur_hash3_u64_v2";
126
        } else if constexpr (is_mmh64_v2) {
127
            return "murmur_hash3_64_v2";
128
        } else {
129
            return "murmur_hash3_64";
130
        }
131
1
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE8get_nameEv
Line
Count
Source
121
1
    static constexpr auto get_name() {
122
        if constexpr (ReturnType == TYPE_INT) {
123
            return "murmur_hash3_32";
124
        } else if constexpr (ReturnType == TYPE_LARGEINT) {
125
            return "murmur_hash3_u64_v2";
126
        } else if constexpr (is_mmh64_v2) {
127
            return "murmur_hash3_64_v2";
128
1
        } else {
129
1
            return "murmur_hash3_64";
130
1
        }
131
1
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE8get_nameEv
Line
Count
Source
121
1
    static constexpr auto get_name() {
122
        if constexpr (ReturnType == TYPE_INT) {
123
            return "murmur_hash3_32";
124
        } else if constexpr (ReturnType == TYPE_LARGEINT) {
125
            return "murmur_hash3_u64_v2";
126
1
        } else if constexpr (is_mmh64_v2) {
127
1
            return "murmur_hash3_64_v2";
128
        } else {
129
            return "murmur_hash3_64";
130
        }
131
1
    }
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE8get_nameEv
132
    static constexpr auto name = get_name();
133
134
0
    static Status empty_apply(IColumn& icolumn, size_t input_rows_count) {
135
0
        ColumnVector<ReturnType>& vec_to = assert_cast<ColumnVector<ReturnType>&>(icolumn);
136
0
        vec_to.get_data().assign(
137
0
                input_rows_count,
138
0
                static_cast<typename PrimitiveTypeTraits<ReturnType>::CppType>(emtpy_value));
139
0
        return Status::OK();
140
0
    }
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE11empty_applyERNS_7IColumnEm
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE11empty_applyERNS_7IColumnEm
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE11empty_applyERNS_7IColumnEm
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE11empty_applyERNS_7IColumnEm
141
142
    static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count,
143
7
                              IColumn& icolumn) {
144
7
        return execute<true>(type, column, input_rows_count, icolumn);
145
7
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
143
3
                              IColumn& icolumn) {
144
3
        return execute<true>(type, column, input_rows_count, icolumn);
145
3
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
143
3
                              IColumn& icolumn) {
144
3
        return execute<true>(type, column, input_rows_count, icolumn);
145
3
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
143
1
                              IColumn& icolumn) {
144
1
        return execute<true>(type, column, input_rows_count, icolumn);
145
1
    }
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
146
147
    static Status combine_apply(const IDataType* type, const IColumn* column,
148
6
                                size_t input_rows_count, IColumn& icolumn) {
149
6
        return execute<false>(type, column, input_rows_count, icolumn);
150
6
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
148
3
                                size_t input_rows_count, IColumn& icolumn) {
149
3
        return execute<false>(type, column, input_rows_count, icolumn);
150
3
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
148
3
                                size_t input_rows_count, IColumn& icolumn) {
149
3
        return execute<false>(type, column, input_rows_count, icolumn);
150
3
    }
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
151
152
    template <bool first>
153
    static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count,
154
13
                          IColumn& col_to) {
155
13
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
156
13
        if constexpr (first) {
157
7
            if constexpr (ReturnType == TYPE_INT) {
158
3
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
159
3
                                           input_rows_count);
160
4
            } else {
161
4
                to_column.insert_many_defaults(input_rows_count);
162
4
            }
163
7
        }
164
13
        auto& col_to_data = to_column.get_data();
165
13
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
166
13
            const typename ColumnString::Chars& data = col_from->get_chars();
167
13
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
168
13
            size_t size = offsets.size();
169
13
            ColumnString::Offset current_offset = 0;
170
40
            for (size_t i = 0; i < size; ++i) {
171
27
                if constexpr (ReturnType == TYPE_INT) {
172
12
                    col_to_data[i] = HashUtil::murmur_hash3_32(
173
12
                            reinterpret_cast<const char*>(&data[current_offset]),
174
12
                            offsets[i] - current_offset, col_to_data[i]);
175
15
                } else {
176
15
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
177
15
                            reinterpret_cast<const char*>(&data[current_offset]),
178
15
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
179
15
                }
180
27
                current_offset = offsets[i];
181
27
            }
182
13
        } else if (const ColumnConst* col_from_const =
183
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
184
0
            auto value = col_from_const->get_value<TYPE_STRING>();
185
0
            for (size_t i = 0; i < input_rows_count; ++i) {
186
0
                if constexpr (ReturnType == TYPE_INT) {
187
0
                    col_to_data[i] =
188
0
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
189
0
                } else {
190
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
191
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
192
0
                }
193
0
            }
194
0
        } else {
195
0
            DCHECK(false);
196
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
197
0
                                        column->get_name(), name);
198
0
        }
199
13
        return Status::OK();
200
13
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
154
3
                          IColumn& col_to) {
155
3
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
156
3
        if constexpr (first) {
157
3
            if constexpr (ReturnType == TYPE_INT) {
158
3
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
159
3
                                           input_rows_count);
160
            } else {
161
                to_column.insert_many_defaults(input_rows_count);
162
            }
163
3
        }
164
3
        auto& col_to_data = to_column.get_data();
165
3
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
166
3
            const typename ColumnString::Chars& data = col_from->get_chars();
167
3
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
168
3
            size_t size = offsets.size();
169
3
            ColumnString::Offset current_offset = 0;
170
9
            for (size_t i = 0; i < size; ++i) {
171
6
                if constexpr (ReturnType == TYPE_INT) {
172
6
                    col_to_data[i] = HashUtil::murmur_hash3_32(
173
6
                            reinterpret_cast<const char*>(&data[current_offset]),
174
6
                            offsets[i] - current_offset, col_to_data[i]);
175
                } else {
176
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
177
                            reinterpret_cast<const char*>(&data[current_offset]),
178
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
179
                }
180
6
                current_offset = offsets[i];
181
6
            }
182
3
        } else if (const ColumnConst* col_from_const =
183
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
184
0
            auto value = col_from_const->get_value<TYPE_STRING>();
185
0
            for (size_t i = 0; i < input_rows_count; ++i) {
186
0
                if constexpr (ReturnType == TYPE_INT) {
187
0
                    col_to_data[i] =
188
0
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
189
                } else {
190
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
191
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
192
                }
193
0
            }
194
0
        } else {
195
0
            DCHECK(false);
196
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
197
0
                                        column->get_name(), name);
198
0
        }
199
3
        return Status::OK();
200
3
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
154
3
                          IColumn& col_to) {
155
3
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
156
        if constexpr (first) {
157
            if constexpr (ReturnType == TYPE_INT) {
158
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
159
                                           input_rows_count);
160
            } else {
161
                to_column.insert_many_defaults(input_rows_count);
162
            }
163
        }
164
3
        auto& col_to_data = to_column.get_data();
165
3
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
166
3
            const typename ColumnString::Chars& data = col_from->get_chars();
167
3
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
168
3
            size_t size = offsets.size();
169
3
            ColumnString::Offset current_offset = 0;
170
9
            for (size_t i = 0; i < size; ++i) {
171
6
                if constexpr (ReturnType == TYPE_INT) {
172
6
                    col_to_data[i] = HashUtil::murmur_hash3_32(
173
6
                            reinterpret_cast<const char*>(&data[current_offset]),
174
6
                            offsets[i] - current_offset, col_to_data[i]);
175
                } else {
176
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
177
                            reinterpret_cast<const char*>(&data[current_offset]),
178
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
179
                }
180
6
                current_offset = offsets[i];
181
6
            }
182
3
        } else if (const ColumnConst* col_from_const =
183
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
184
0
            auto value = col_from_const->get_value<TYPE_STRING>();
185
0
            for (size_t i = 0; i < input_rows_count; ++i) {
186
0
                if constexpr (ReturnType == TYPE_INT) {
187
0
                    col_to_data[i] =
188
0
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
189
                } else {
190
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
191
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
192
                }
193
0
            }
194
0
        } else {
195
0
            DCHECK(false);
196
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
197
0
                                        column->get_name(), name);
198
0
        }
199
3
        return Status::OK();
200
3
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
154
3
                          IColumn& col_to) {
155
3
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
156
3
        if constexpr (first) {
157
            if constexpr (ReturnType == TYPE_INT) {
158
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
159
                                           input_rows_count);
160
3
            } else {
161
3
                to_column.insert_many_defaults(input_rows_count);
162
3
            }
163
3
        }
164
3
        auto& col_to_data = to_column.get_data();
165
3
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
166
3
            const typename ColumnString::Chars& data = col_from->get_chars();
167
3
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
168
3
            size_t size = offsets.size();
169
3
            ColumnString::Offset current_offset = 0;
170
9
            for (size_t i = 0; i < size; ++i) {
171
                if constexpr (ReturnType == TYPE_INT) {
172
                    col_to_data[i] = HashUtil::murmur_hash3_32(
173
                            reinterpret_cast<const char*>(&data[current_offset]),
174
                            offsets[i] - current_offset, col_to_data[i]);
175
6
                } else {
176
6
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
177
6
                            reinterpret_cast<const char*>(&data[current_offset]),
178
6
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
179
6
                }
180
6
                current_offset = offsets[i];
181
6
            }
182
3
        } else if (const ColumnConst* col_from_const =
183
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
184
0
            auto value = col_from_const->get_value<TYPE_STRING>();
185
0
            for (size_t i = 0; i < input_rows_count; ++i) {
186
                if constexpr (ReturnType == TYPE_INT) {
187
                    col_to_data[i] =
188
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
189
0
                } else {
190
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
191
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
192
0
                }
193
0
            }
194
0
        } else {
195
0
            DCHECK(false);
196
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
197
0
                                        column->get_name(), name);
198
0
        }
199
3
        return Status::OK();
200
3
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
154
3
                          IColumn& col_to) {
155
3
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
156
        if constexpr (first) {
157
            if constexpr (ReturnType == TYPE_INT) {
158
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
159
                                           input_rows_count);
160
            } else {
161
                to_column.insert_many_defaults(input_rows_count);
162
            }
163
        }
164
3
        auto& col_to_data = to_column.get_data();
165
3
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
166
3
            const typename ColumnString::Chars& data = col_from->get_chars();
167
3
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
168
3
            size_t size = offsets.size();
169
3
            ColumnString::Offset current_offset = 0;
170
9
            for (size_t i = 0; i < size; ++i) {
171
                if constexpr (ReturnType == TYPE_INT) {
172
                    col_to_data[i] = HashUtil::murmur_hash3_32(
173
                            reinterpret_cast<const char*>(&data[current_offset]),
174
                            offsets[i] - current_offset, col_to_data[i]);
175
6
                } else {
176
6
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
177
6
                            reinterpret_cast<const char*>(&data[current_offset]),
178
6
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
179
6
                }
180
6
                current_offset = offsets[i];
181
6
            }
182
3
        } else if (const ColumnConst* col_from_const =
183
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
184
0
            auto value = col_from_const->get_value<TYPE_STRING>();
185
0
            for (size_t i = 0; i < input_rows_count; ++i) {
186
                if constexpr (ReturnType == TYPE_INT) {
187
                    col_to_data[i] =
188
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
189
0
                } else {
190
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
191
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
192
0
                }
193
0
            }
194
0
        } else {
195
0
            DCHECK(false);
196
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
197
0
                                        column->get_name(), name);
198
0
        }
199
3
        return Status::OK();
200
3
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
154
1
                          IColumn& col_to) {
155
1
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
156
1
        if constexpr (first) {
157
            if constexpr (ReturnType == TYPE_INT) {
158
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
159
                                           input_rows_count);
160
1
            } else {
161
1
                to_column.insert_many_defaults(input_rows_count);
162
1
            }
163
1
        }
164
1
        auto& col_to_data = to_column.get_data();
165
1
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
166
1
            const typename ColumnString::Chars& data = col_from->get_chars();
167
1
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
168
1
            size_t size = offsets.size();
169
1
            ColumnString::Offset current_offset = 0;
170
4
            for (size_t i = 0; i < size; ++i) {
171
                if constexpr (ReturnType == TYPE_INT) {
172
                    col_to_data[i] = HashUtil::murmur_hash3_32(
173
                            reinterpret_cast<const char*>(&data[current_offset]),
174
                            offsets[i] - current_offset, col_to_data[i]);
175
3
                } else {
176
3
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
177
3
                            reinterpret_cast<const char*>(&data[current_offset]),
178
3
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
179
3
                }
180
3
                current_offset = offsets[i];
181
3
            }
182
1
        } else if (const ColumnConst* col_from_const =
183
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
184
0
            auto value = col_from_const->get_value<TYPE_STRING>();
185
0
            for (size_t i = 0; i < input_rows_count; ++i) {
186
                if constexpr (ReturnType == TYPE_INT) {
187
                    col_to_data[i] =
188
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
189
0
                } else {
190
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
191
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
192
0
                }
193
0
            }
194
0
        } else {
195
0
            DCHECK(false);
196
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
197
0
                                        column->get_name(), name);
198
0
        }
199
1
        return Status::OK();
200
1
    }
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
201
};
202
203
using FunctionMurmurHash3_32 =
204
        FunctionVariadicArgumentsBase<DataTypeInt32, MurmurHash3Impl<TYPE_INT>>;
205
using FunctionMurmurHash3_64 =
206
        FunctionVariadicArgumentsBase<DataTypeInt64, MurmurHash3Impl<TYPE_BIGINT>>;
207
using FunctionMurmurHash3_64_V2 =
208
        FunctionVariadicArgumentsBase<DataTypeInt64, MurmurHash3Impl<TYPE_BIGINT, true>>;
209
using FunctionMurmurHash3U64V2 =
210
        FunctionVariadicArgumentsBase<DataTypeInt128, MurmurHash3Impl<TYPE_LARGEINT, true>>;
211
212
struct MurmurHash3128Impl {
213
    static constexpr auto name = "murmur_hash3_128";
214
215
1
    static Status empty_apply(IColumn& /*icolumn*/, size_t /*input_rows_count*/) {
216
1
        return Status::InvalidArgument("Function {} requires at least one argument", name);
217
1
    }
218
219
    static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count,
220
16
                              IColumn& icolumn) {
221
16
        return execute<true>(type, column, input_rows_count, icolumn);
222
16
    }
223
224
    static Status combine_apply(const IDataType* type, const IColumn* column,
225
21
                                size_t input_rows_count, IColumn& icolumn) {
226
21
        return execute<false>(type, column, input_rows_count, icolumn);
227
21
    }
228
229
    template <bool first>
230
    static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count,
231
37
                          IColumn& col_to) {
232
37
        auto& to_column = assert_cast<ColumnVector<TYPE_LARGEINT>&>(col_to);
233
37
        if constexpr (first) {
234
            // The first argument initializes one 128-bit hash state per row. Later arguments reuse
235
            // the same result column and update the saved state in place.
236
16
            to_column.insert_many_defaults(input_rows_count);
237
16
        }
238
37
        auto& col_to_data = to_column.get_data();
239
37
        return execute_murmur_hash3_128_column<first>(column, input_rows_count, col_to_data, name);
240
37
    }
_ZN5doris18MurmurHash3128Impl7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
231
16
                          IColumn& col_to) {
232
16
        auto& to_column = assert_cast<ColumnVector<TYPE_LARGEINT>&>(col_to);
233
16
        if constexpr (first) {
234
            // The first argument initializes one 128-bit hash state per row. Later arguments reuse
235
            // the same result column and update the saved state in place.
236
16
            to_column.insert_many_defaults(input_rows_count);
237
16
        }
238
16
        auto& col_to_data = to_column.get_data();
239
16
        return execute_murmur_hash3_128_column<first>(column, input_rows_count, col_to_data, name);
240
16
    }
_ZN5doris18MurmurHash3128Impl7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
231
21
                          IColumn& col_to) {
232
21
        auto& to_column = assert_cast<ColumnVector<TYPE_LARGEINT>&>(col_to);
233
        if constexpr (first) {
234
            // The first argument initializes one 128-bit hash state per row. Later arguments reuse
235
            // the same result column and update the saved state in place.
236
            to_column.insert_many_defaults(input_rows_count);
237
        }
238
21
        auto& col_to_data = to_column.get_data();
239
21
        return execute_murmur_hash3_128_column<first>(column, input_rows_count, col_to_data, name);
240
21
    }
241
};
242
243
using FunctionMurmurHash3_128 = FunctionVariadicArgumentsBase<DataTypeInt128, MurmurHash3128Impl>;
244
245
class FunctionMurmurHash3U128 : public IFunction {
246
public:
247
    static constexpr auto name = "murmur_hash3_u128";
248
249
32
    static FunctionPtr create() { return std::make_shared<FunctionMurmurHash3U128>(); }
250
251
0
    String get_name() const override { return name; }
252
253
31
    bool is_variadic() const override { return true; }
254
255
0
    size_t get_number_of_arguments() const override { return 0; }
256
257
30
    DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& /*arguments*/) const override {
258
30
        return std::make_shared<DataTypeString>();
259
30
    }
260
261
    Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments,
262
19
                        uint32_t result, size_t input_rows_count) const override {
263
19
        if (arguments.empty()) {
264
1
            return Status::InvalidArgument("Function {} requires at least one argument", name);
265
1
        }
266
267
18
        std::vector<__int128_t> state(input_rows_count);
268
18
        const ColumnWithTypeAndName& first_col = block.get_by_position(arguments[0]);
269
18
        RETURN_IF_ERROR(execute_murmur_hash3_128_column<true>(first_col.column.get(),
270
18
                                                              input_rows_count, state, name));
271
272
39
        for (size_t i = 1; i < arguments.size(); ++i) {
273
21
            const ColumnWithTypeAndName& col = block.get_by_position(arguments[i]);
274
21
            RETURN_IF_ERROR(execute_murmur_hash3_128_column<false>(col.column.get(),
275
21
                                                                   input_rows_count, state, name));
276
21
        }
277
278
18
        auto result_column = ColumnString::create();
279
18
        result_column->reserve(input_rows_count);
280
25
        for (const auto value : state) {
281
25
            auto unsigned_value = static_cast<__uint128_t>(value);
282
25
            std::string value_str = LargeIntValue::to_string(unsigned_value);
283
25
            result_column->insert_data(value_str.data(), value_str.size());
284
25
        }
285
18
        block.get_by_position(result).column = std::move(result_column);
286
18
        return Status::OK();
287
18
    }
288
};
289
290
#ifdef BE_TEST
291
1
const char* murmur_hash3_get_name_type_int_for_test() {
292
1
    return MurmurHash3Impl<TYPE_INT>::get_name();
293
1
}
294
295
1
const char* murmur_hash3_get_name_type_bigint_for_test() {
296
1
    return MurmurHash3Impl<TYPE_BIGINT>::get_name();
297
1
}
298
299
1
const char* murmur_hash3_get_name_type_bigint_v2_for_test() {
300
1
    return MurmurHash3Impl<TYPE_BIGINT, true>::get_name();
301
1
}
302
#endif
303
304
template <PrimitiveType ReturnType>
305
struct XxHashImpl {
306
    static constexpr auto name = ReturnType == TYPE_INT ? "xxhash_32" : "xxhash_64";
307
308
0
    static Status empty_apply(IColumn& icolumn, size_t input_rows_count) {
309
0
        ColumnVector<ReturnType>& vec_to = assert_cast<ColumnVector<ReturnType>&>(icolumn);
310
0
        vec_to.get_data().assign(
311
0
                input_rows_count,
312
0
                static_cast<typename PrimitiveTypeTraits<ReturnType>::CppType>(emtpy_value));
313
0
        return Status::OK();
314
0
    }
Unexecuted instantiation: _ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE11empty_applyERNS_7IColumnEm
Unexecuted instantiation: _ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE11empty_applyERNS_7IColumnEm
315
316
    static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count,
317
12
                              IColumn& icolumn) {
318
12
        return execute<true>(type, column, input_rows_count, icolumn);
319
12
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
317
6
                              IColumn& icolumn) {
318
6
        return execute<true>(type, column, input_rows_count, icolumn);
319
6
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
317
6
                              IColumn& icolumn) {
318
6
        return execute<true>(type, column, input_rows_count, icolumn);
319
6
    }
320
321
    static Status combine_apply(const IDataType* type, const IColumn* column,
322
12
                                size_t input_rows_count, IColumn& icolumn) {
323
12
        return execute<false>(type, column, input_rows_count, icolumn);
324
12
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
322
6
                                size_t input_rows_count, IColumn& icolumn) {
323
6
        return execute<false>(type, column, input_rows_count, icolumn);
324
6
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
322
6
                                size_t input_rows_count, IColumn& icolumn) {
323
6
        return execute<false>(type, column, input_rows_count, icolumn);
324
6
    }
325
326
    template <bool first>
327
    static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count,
328
24
                          IColumn& col_to) {
329
24
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
330
24
        if constexpr (first) {
331
12
            to_column.insert_many_defaults(input_rows_count);
332
12
        }
333
24
        auto& col_to_data = to_column.get_data();
334
24
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
335
12
            const typename ColumnString::Chars& data = col_from->get_chars();
336
12
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
337
12
            size_t size = offsets.size();
338
12
            ColumnString::Offset current_offset = 0;
339
36
            for (size_t i = 0; i < size; ++i) {
340
24
                if constexpr (ReturnType == TYPE_INT) {
341
12
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
342
12
                            reinterpret_cast<const char*>(&data[current_offset]),
343
12
                            offsets[i] - current_offset, col_to_data[i]);
344
12
                } else {
345
12
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
346
12
                            reinterpret_cast<const char*>(&data[current_offset]),
347
12
                            offsets[i] - current_offset, col_to_data[i]);
348
12
                }
349
24
                current_offset = offsets[i];
350
24
            }
351
12
        } else if (const ColumnConst* col_from_const =
352
12
                           check_and_get_column_const_string_or_fixedstring(column)) {
353
0
            auto value = col_from_const->get_value<TYPE_STRING>();
354
0
            for (size_t i = 0; i < input_rows_count; ++i) {
355
0
                if constexpr (ReturnType == TYPE_INT) {
356
0
                    col_to_data[i] =
357
0
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
358
0
                } else {
359
0
                    col_to_data[i] =
360
0
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
361
0
                }
362
0
            }
363
12
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
364
36
            for (size_t i = 0; i < input_rows_count; ++i) {
365
24
                auto data_ref = vb_col->get_data_at(i);
366
24
                if constexpr (ReturnType == TYPE_INT) {
367
12
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
368
12
                                                                col_to_data[i]);
369
12
                } else {
370
12
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
371
12
                                                                col_to_data[i]);
372
12
                }
373
24
            }
374
12
        } else {
375
0
            DCHECK(false);
376
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
377
0
                                        column->get_name(), name);
378
0
        }
379
24
        return Status::OK();
380
24
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
328
6
                          IColumn& col_to) {
329
6
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
330
6
        if constexpr (first) {
331
6
            to_column.insert_many_defaults(input_rows_count);
332
6
        }
333
6
        auto& col_to_data = to_column.get_data();
334
6
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
335
3
            const typename ColumnString::Chars& data = col_from->get_chars();
336
3
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
337
3
            size_t size = offsets.size();
338
3
            ColumnString::Offset current_offset = 0;
339
9
            for (size_t i = 0; i < size; ++i) {
340
6
                if constexpr (ReturnType == TYPE_INT) {
341
6
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
342
6
                            reinterpret_cast<const char*>(&data[current_offset]),
343
6
                            offsets[i] - current_offset, col_to_data[i]);
344
                } else {
345
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
346
                            reinterpret_cast<const char*>(&data[current_offset]),
347
                            offsets[i] - current_offset, col_to_data[i]);
348
                }
349
6
                current_offset = offsets[i];
350
6
            }
351
3
        } else if (const ColumnConst* col_from_const =
352
3
                           check_and_get_column_const_string_or_fixedstring(column)) {
353
0
            auto value = col_from_const->get_value<TYPE_STRING>();
354
0
            for (size_t i = 0; i < input_rows_count; ++i) {
355
0
                if constexpr (ReturnType == TYPE_INT) {
356
0
                    col_to_data[i] =
357
0
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
358
                } else {
359
                    col_to_data[i] =
360
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
361
                }
362
0
            }
363
3
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
364
9
            for (size_t i = 0; i < input_rows_count; ++i) {
365
6
                auto data_ref = vb_col->get_data_at(i);
366
6
                if constexpr (ReturnType == TYPE_INT) {
367
6
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
368
6
                                                                col_to_data[i]);
369
                } else {
370
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
371
                                                                col_to_data[i]);
372
                }
373
6
            }
374
3
        } else {
375
0
            DCHECK(false);
376
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
377
0
                                        column->get_name(), name);
378
0
        }
379
6
        return Status::OK();
380
6
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
328
6
                          IColumn& col_to) {
329
6
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
330
        if constexpr (first) {
331
            to_column.insert_many_defaults(input_rows_count);
332
        }
333
6
        auto& col_to_data = to_column.get_data();
334
6
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
335
3
            const typename ColumnString::Chars& data = col_from->get_chars();
336
3
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
337
3
            size_t size = offsets.size();
338
3
            ColumnString::Offset current_offset = 0;
339
9
            for (size_t i = 0; i < size; ++i) {
340
6
                if constexpr (ReturnType == TYPE_INT) {
341
6
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
342
6
                            reinterpret_cast<const char*>(&data[current_offset]),
343
6
                            offsets[i] - current_offset, col_to_data[i]);
344
                } else {
345
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
346
                            reinterpret_cast<const char*>(&data[current_offset]),
347
                            offsets[i] - current_offset, col_to_data[i]);
348
                }
349
6
                current_offset = offsets[i];
350
6
            }
351
3
        } else if (const ColumnConst* col_from_const =
352
3
                           check_and_get_column_const_string_or_fixedstring(column)) {
353
0
            auto value = col_from_const->get_value<TYPE_STRING>();
354
0
            for (size_t i = 0; i < input_rows_count; ++i) {
355
0
                if constexpr (ReturnType == TYPE_INT) {
356
0
                    col_to_data[i] =
357
0
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
358
                } else {
359
                    col_to_data[i] =
360
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
361
                }
362
0
            }
363
3
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
364
9
            for (size_t i = 0; i < input_rows_count; ++i) {
365
6
                auto data_ref = vb_col->get_data_at(i);
366
6
                if constexpr (ReturnType == TYPE_INT) {
367
6
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
368
6
                                                                col_to_data[i]);
369
                } else {
370
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
371
                                                                col_to_data[i]);
372
                }
373
6
            }
374
3
        } else {
375
0
            DCHECK(false);
376
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
377
0
                                        column->get_name(), name);
378
0
        }
379
6
        return Status::OK();
380
6
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
328
6
                          IColumn& col_to) {
329
6
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
330
6
        if constexpr (first) {
331
6
            to_column.insert_many_defaults(input_rows_count);
332
6
        }
333
6
        auto& col_to_data = to_column.get_data();
334
6
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
335
3
            const typename ColumnString::Chars& data = col_from->get_chars();
336
3
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
337
3
            size_t size = offsets.size();
338
3
            ColumnString::Offset current_offset = 0;
339
9
            for (size_t i = 0; i < size; ++i) {
340
                if constexpr (ReturnType == TYPE_INT) {
341
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
342
                            reinterpret_cast<const char*>(&data[current_offset]),
343
                            offsets[i] - current_offset, col_to_data[i]);
344
6
                } else {
345
6
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
346
6
                            reinterpret_cast<const char*>(&data[current_offset]),
347
6
                            offsets[i] - current_offset, col_to_data[i]);
348
6
                }
349
6
                current_offset = offsets[i];
350
6
            }
351
3
        } else if (const ColumnConst* col_from_const =
352
3
                           check_and_get_column_const_string_or_fixedstring(column)) {
353
0
            auto value = col_from_const->get_value<TYPE_STRING>();
354
0
            for (size_t i = 0; i < input_rows_count; ++i) {
355
                if constexpr (ReturnType == TYPE_INT) {
356
                    col_to_data[i] =
357
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
358
0
                } else {
359
0
                    col_to_data[i] =
360
0
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
361
0
                }
362
0
            }
363
3
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
364
9
            for (size_t i = 0; i < input_rows_count; ++i) {
365
6
                auto data_ref = vb_col->get_data_at(i);
366
                if constexpr (ReturnType == TYPE_INT) {
367
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
368
                                                                col_to_data[i]);
369
6
                } else {
370
6
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
371
6
                                                                col_to_data[i]);
372
6
                }
373
6
            }
374
3
        } else {
375
0
            DCHECK(false);
376
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
377
0
                                        column->get_name(), name);
378
0
        }
379
6
        return Status::OK();
380
6
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
328
6
                          IColumn& col_to) {
329
6
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
330
        if constexpr (first) {
331
            to_column.insert_many_defaults(input_rows_count);
332
        }
333
6
        auto& col_to_data = to_column.get_data();
334
6
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
335
3
            const typename ColumnString::Chars& data = col_from->get_chars();
336
3
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
337
3
            size_t size = offsets.size();
338
3
            ColumnString::Offset current_offset = 0;
339
9
            for (size_t i = 0; i < size; ++i) {
340
                if constexpr (ReturnType == TYPE_INT) {
341
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
342
                            reinterpret_cast<const char*>(&data[current_offset]),
343
                            offsets[i] - current_offset, col_to_data[i]);
344
6
                } else {
345
6
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
346
6
                            reinterpret_cast<const char*>(&data[current_offset]),
347
6
                            offsets[i] - current_offset, col_to_data[i]);
348
6
                }
349
6
                current_offset = offsets[i];
350
6
            }
351
3
        } else if (const ColumnConst* col_from_const =
352
3
                           check_and_get_column_const_string_or_fixedstring(column)) {
353
0
            auto value = col_from_const->get_value<TYPE_STRING>();
354
0
            for (size_t i = 0; i < input_rows_count; ++i) {
355
                if constexpr (ReturnType == TYPE_INT) {
356
                    col_to_data[i] =
357
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
358
0
                } else {
359
0
                    col_to_data[i] =
360
0
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
361
0
                }
362
0
            }
363
3
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
364
9
            for (size_t i = 0; i < input_rows_count; ++i) {
365
6
                auto data_ref = vb_col->get_data_at(i);
366
                if constexpr (ReturnType == TYPE_INT) {
367
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
368
                                                                col_to_data[i]);
369
6
                } else {
370
6
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
371
6
                                                                col_to_data[i]);
372
6
                }
373
6
            }
374
3
        } else {
375
0
            DCHECK(false);
376
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
377
0
                                        column->get_name(), name);
378
0
        }
379
6
        return Status::OK();
380
6
    }
381
};
382
383
using FunctionXxHash_32 = FunctionVariadicArgumentsBase<DataTypeInt32, XxHashImpl<TYPE_INT>>;
384
using FunctionXxHash_64 = FunctionVariadicArgumentsBase<DataTypeInt64, XxHashImpl<TYPE_BIGINT>>;
385
386
1
void register_function_hash(SimpleFunctionFactory& factory) {
387
1
    factory.register_function<FunctionMurmurHash3_32>();
388
1
    factory.register_function<FunctionMurmurHash3_64>();
389
1
    factory.register_function<FunctionMurmurHash3_64_V2>();
390
1
    factory.register_function<FunctionMurmurHash3U64V2>();
391
1
    factory.register_function<FunctionMurmurHash3_128>();
392
1
    factory.register_function<FunctionMurmurHash3U128>();
393
1
    factory.register_function<FunctionXxHash_32>();
394
1
    factory.register_function<FunctionXxHash_64>();
395
1
    factory.register_alias("xxhash_64", "xxhash3_64");
396
1
}
397
} // namespace doris