Coverage Report

Created: 2026-04-15 12:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_hash.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionHash.cpp
19
// and modified by Doris
20
21
#include "exprs/function/function_hash.h"
22
23
#include "common/status.h"
24
#include "core/assert_cast.h"
25
#include "core/column/column.h"
26
#include "core/column/column_const.h"
27
#include "core/column/column_string.h"
28
#include "core/column/column_varbinary.h"
29
#include "core/column/column_vector.h"
30
#include "core/data_type/data_type.h"
31
#include "core/data_type/data_type_number.h"
32
#include "core/field.h"
33
#include "exec/common/template_helpers.hpp"
34
#include "exprs/function/function_helpers.h"
35
#include "exprs/function/function_variadic_arguments.h"
36
#include "exprs/function/simple_function_factory.h"
37
#include "util/hash/murmur_hash3.h"
38
#include "util/hash_util.hpp"
39
40
namespace doris {
41
constexpr uint64_t emtpy_value = 0xe28dbde7fe22e41c;
42
43
template <PrimitiveType ReturnType, bool is_mmh64_v2 = false>
44
struct MurmurHash3Impl {
45
0
    static constexpr auto get_name() {
46
0
        if constexpr (ReturnType == TYPE_INT) {
47
0
            return "murmur_hash3_32";
48
0
        } else if constexpr (ReturnType == TYPE_LARGEINT) {
49
0
            return "murmur_hash3_u64_v2";
50
0
        } else if constexpr (is_mmh64_v2) {
51
0
            return "murmur_hash3_64_v2";
52
0
        } else {
53
0
            return "murmur_hash3_64";
54
0
        }
55
0
    }
56
    static constexpr auto name = get_name();
57
58
0
    static Status empty_apply(IColumn& icolumn, size_t input_rows_count) {
59
0
        ColumnVector<ReturnType>& vec_to = assert_cast<ColumnVector<ReturnType>&>(icolumn);
60
0
        vec_to.get_data().assign(
61
0
                input_rows_count,
62
0
                static_cast<typename PrimitiveTypeTraits<ReturnType>::CppType>(emtpy_value));
63
0
        return Status::OK();
64
0
    }
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE11empty_applyERNS_7IColumnEm
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE11empty_applyERNS_7IColumnEm
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE11empty_applyERNS_7IColumnEm
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE11empty_applyERNS_7IColumnEm
65
66
    static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count,
67
88
                              IColumn& icolumn) {
68
88
        return execute<true>(type, column, input_rows_count, icolumn);
69
88
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
67
24
                              IColumn& icolumn) {
68
24
        return execute<true>(type, column, input_rows_count, icolumn);
69
24
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
67
24
                              IColumn& icolumn) {
68
24
        return execute<true>(type, column, input_rows_count, icolumn);
69
24
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
67
19
                              IColumn& icolumn) {
68
19
        return execute<true>(type, column, input_rows_count, icolumn);
69
19
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
67
21
                              IColumn& icolumn) {
68
21
        return execute<true>(type, column, input_rows_count, icolumn);
69
21
    }
70
71
    static Status combine_apply(const IDataType* type, const IColumn* column,
72
19
                                size_t input_rows_count, IColumn& icolumn) {
73
19
        return execute<false>(type, column, input_rows_count, icolumn);
74
19
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
72
7
                                size_t input_rows_count, IColumn& icolumn) {
73
7
        return execute<false>(type, column, input_rows_count, icolumn);
74
7
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
72
7
                                size_t input_rows_count, IColumn& icolumn) {
73
7
        return execute<false>(type, column, input_rows_count, icolumn);
74
7
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
72
1
                                size_t input_rows_count, IColumn& icolumn) {
73
1
        return execute<false>(type, column, input_rows_count, icolumn);
74
1
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
72
4
                                size_t input_rows_count, IColumn& icolumn) {
73
4
        return execute<false>(type, column, input_rows_count, icolumn);
74
4
    }
75
76
    template <bool first>
77
    static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count,
78
107
                          IColumn& col_to) {
79
107
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
80
107
        if constexpr (first) {
81
88
            if constexpr (ReturnType == TYPE_INT) {
82
24
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
83
24
                                           input_rows_count);
84
64
            } else {
85
64
                to_column.insert_many_defaults(input_rows_count);
86
64
            }
87
88
        }
88
107
        auto& col_to_data = to_column.get_data();
89
107
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
90
107
            const typename ColumnString::Chars& data = col_from->get_chars();
91
107
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
92
107
            size_t size = offsets.size();
93
107
            ColumnString::Offset current_offset = 0;
94
326
            for (size_t i = 0; i < size; ++i) {
95
219
                if constexpr (ReturnType == TYPE_INT) {
96
77
                    col_to_data[i] = HashUtil::murmur_hash3_32(
97
77
                            reinterpret_cast<const char*>(&data[current_offset]),
98
77
                            offsets[i] - current_offset, col_to_data[i]);
99
142
                } else {
100
142
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
101
142
                            reinterpret_cast<const char*>(&data[current_offset]),
102
142
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
103
142
                }
104
219
                current_offset = offsets[i];
105
219
            }
106
107
        } else if (const ColumnConst* col_from_const =
107
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
108
0
            auto value = col_from_const->get_value<TYPE_STRING>();
109
0
            for (size_t i = 0; i < input_rows_count; ++i) {
110
0
                if constexpr (ReturnType == TYPE_INT) {
111
0
                    col_to_data[i] =
112
0
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
113
0
                } else {
114
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
115
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
116
0
                }
117
0
            }
118
0
        } else {
119
0
            DCHECK(false);
120
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
121
0
                                        column->get_name(), name);
122
0
        }
123
107
        return Status::OK();
124
107
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
78
24
                          IColumn& col_to) {
79
24
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
80
24
        if constexpr (first) {
81
24
            if constexpr (ReturnType == TYPE_INT) {
82
24
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
83
24
                                           input_rows_count);
84
            } else {
85
                to_column.insert_many_defaults(input_rows_count);
86
            }
87
24
        }
88
24
        auto& col_to_data = to_column.get_data();
89
24
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
90
24
            const typename ColumnString::Chars& data = col_from->get_chars();
91
24
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
92
24
            size_t size = offsets.size();
93
24
            ColumnString::Offset current_offset = 0;
94
88
            for (size_t i = 0; i < size; ++i) {
95
64
                if constexpr (ReturnType == TYPE_INT) {
96
64
                    col_to_data[i] = HashUtil::murmur_hash3_32(
97
64
                            reinterpret_cast<const char*>(&data[current_offset]),
98
64
                            offsets[i] - current_offset, col_to_data[i]);
99
                } else {
100
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
101
                            reinterpret_cast<const char*>(&data[current_offset]),
102
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
103
                }
104
64
                current_offset = offsets[i];
105
64
            }
106
24
        } else if (const ColumnConst* col_from_const =
107
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
108
0
            auto value = col_from_const->get_value<TYPE_STRING>();
109
0
            for (size_t i = 0; i < input_rows_count; ++i) {
110
0
                if constexpr (ReturnType == TYPE_INT) {
111
0
                    col_to_data[i] =
112
0
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
113
                } else {
114
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
115
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
116
                }
117
0
            }
118
0
        } else {
119
0
            DCHECK(false);
120
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
121
0
                                        column->get_name(), name);
122
0
        }
123
24
        return Status::OK();
124
24
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
78
7
                          IColumn& col_to) {
79
7
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
80
        if constexpr (first) {
81
            if constexpr (ReturnType == TYPE_INT) {
82
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
83
                                           input_rows_count);
84
            } else {
85
                to_column.insert_many_defaults(input_rows_count);
86
            }
87
        }
88
7
        auto& col_to_data = to_column.get_data();
89
7
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
90
7
            const typename ColumnString::Chars& data = col_from->get_chars();
91
7
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
92
7
            size_t size = offsets.size();
93
7
            ColumnString::Offset current_offset = 0;
94
20
            for (size_t i = 0; i < size; ++i) {
95
13
                if constexpr (ReturnType == TYPE_INT) {
96
13
                    col_to_data[i] = HashUtil::murmur_hash3_32(
97
13
                            reinterpret_cast<const char*>(&data[current_offset]),
98
13
                            offsets[i] - current_offset, col_to_data[i]);
99
                } else {
100
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
101
                            reinterpret_cast<const char*>(&data[current_offset]),
102
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
103
                }
104
13
                current_offset = offsets[i];
105
13
            }
106
7
        } else if (const ColumnConst* col_from_const =
107
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
108
0
            auto value = col_from_const->get_value<TYPE_STRING>();
109
0
            for (size_t i = 0; i < input_rows_count; ++i) {
110
0
                if constexpr (ReturnType == TYPE_INT) {
111
0
                    col_to_data[i] =
112
0
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
113
                } else {
114
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
115
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
116
                }
117
0
            }
118
0
        } else {
119
0
            DCHECK(false);
120
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
121
0
                                        column->get_name(), name);
122
0
        }
123
7
        return Status::OK();
124
7
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
78
24
                          IColumn& col_to) {
79
24
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
80
24
        if constexpr (first) {
81
            if constexpr (ReturnType == TYPE_INT) {
82
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
83
                                           input_rows_count);
84
24
            } else {
85
24
                to_column.insert_many_defaults(input_rows_count);
86
24
            }
87
24
        }
88
24
        auto& col_to_data = to_column.get_data();
89
24
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
90
24
            const typename ColumnString::Chars& data = col_from->get_chars();
91
24
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
92
24
            size_t size = offsets.size();
93
24
            ColumnString::Offset current_offset = 0;
94
88
            for (size_t i = 0; i < size; ++i) {
95
                if constexpr (ReturnType == TYPE_INT) {
96
                    col_to_data[i] = HashUtil::murmur_hash3_32(
97
                            reinterpret_cast<const char*>(&data[current_offset]),
98
                            offsets[i] - current_offset, col_to_data[i]);
99
64
                } else {
100
64
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
101
64
                            reinterpret_cast<const char*>(&data[current_offset]),
102
64
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
103
64
                }
104
64
                current_offset = offsets[i];
105
64
            }
106
24
        } else if (const ColumnConst* col_from_const =
107
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
108
0
            auto value = col_from_const->get_value<TYPE_STRING>();
109
0
            for (size_t i = 0; i < input_rows_count; ++i) {
110
                if constexpr (ReturnType == TYPE_INT) {
111
                    col_to_data[i] =
112
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
113
0
                } else {
114
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
115
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
116
0
                }
117
0
            }
118
0
        } else {
119
0
            DCHECK(false);
120
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
121
0
                                        column->get_name(), name);
122
0
        }
123
24
        return Status::OK();
124
24
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
78
7
                          IColumn& col_to) {
79
7
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
80
        if constexpr (first) {
81
            if constexpr (ReturnType == TYPE_INT) {
82
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
83
                                           input_rows_count);
84
            } else {
85
                to_column.insert_many_defaults(input_rows_count);
86
            }
87
        }
88
7
        auto& col_to_data = to_column.get_data();
89
7
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
90
7
            const typename ColumnString::Chars& data = col_from->get_chars();
91
7
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
92
7
            size_t size = offsets.size();
93
7
            ColumnString::Offset current_offset = 0;
94
20
            for (size_t i = 0; i < size; ++i) {
95
                if constexpr (ReturnType == TYPE_INT) {
96
                    col_to_data[i] = HashUtil::murmur_hash3_32(
97
                            reinterpret_cast<const char*>(&data[current_offset]),
98
                            offsets[i] - current_offset, col_to_data[i]);
99
13
                } else {
100
13
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
101
13
                            reinterpret_cast<const char*>(&data[current_offset]),
102
13
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
103
13
                }
104
13
                current_offset = offsets[i];
105
13
            }
106
7
        } else if (const ColumnConst* col_from_const =
107
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
108
0
            auto value = col_from_const->get_value<TYPE_STRING>();
109
0
            for (size_t i = 0; i < input_rows_count; ++i) {
110
                if constexpr (ReturnType == TYPE_INT) {
111
                    col_to_data[i] =
112
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
113
0
                } else {
114
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
115
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
116
0
                }
117
0
            }
118
0
        } else {
119
0
            DCHECK(false);
120
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
121
0
                                        column->get_name(), name);
122
0
        }
123
7
        return Status::OK();
124
7
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
78
19
                          IColumn& col_to) {
79
19
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
80
19
        if constexpr (first) {
81
            if constexpr (ReturnType == TYPE_INT) {
82
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
83
                                           input_rows_count);
84
19
            } else {
85
19
                to_column.insert_many_defaults(input_rows_count);
86
19
            }
87
19
        }
88
19
        auto& col_to_data = to_column.get_data();
89
19
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
90
19
            const typename ColumnString::Chars& data = col_from->get_chars();
91
19
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
92
19
            size_t size = offsets.size();
93
19
            ColumnString::Offset current_offset = 0;
94
50
            for (size_t i = 0; i < size; ++i) {
95
                if constexpr (ReturnType == TYPE_INT) {
96
                    col_to_data[i] = HashUtil::murmur_hash3_32(
97
                            reinterpret_cast<const char*>(&data[current_offset]),
98
                            offsets[i] - current_offset, col_to_data[i]);
99
31
                } else {
100
31
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
101
31
                            reinterpret_cast<const char*>(&data[current_offset]),
102
31
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
103
31
                }
104
31
                current_offset = offsets[i];
105
31
            }
106
19
        } else if (const ColumnConst* col_from_const =
107
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
108
0
            auto value = col_from_const->get_value<TYPE_STRING>();
109
0
            for (size_t i = 0; i < input_rows_count; ++i) {
110
                if constexpr (ReturnType == TYPE_INT) {
111
                    col_to_data[i] =
112
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
113
0
                } else {
114
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
115
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
116
0
                }
117
0
            }
118
0
        } else {
119
0
            DCHECK(false);
120
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
121
0
                                        column->get_name(), name);
122
0
        }
123
19
        return Status::OK();
124
19
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
78
1
                          IColumn& col_to) {
79
1
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
80
        if constexpr (first) {
81
            if constexpr (ReturnType == TYPE_INT) {
82
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
83
                                           input_rows_count);
84
            } else {
85
                to_column.insert_many_defaults(input_rows_count);
86
            }
87
        }
88
1
        auto& col_to_data = to_column.get_data();
89
1
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
90
1
            const typename ColumnString::Chars& data = col_from->get_chars();
91
1
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
92
1
            size_t size = offsets.size();
93
1
            ColumnString::Offset current_offset = 0;
94
2
            for (size_t i = 0; i < size; ++i) {
95
                if constexpr (ReturnType == TYPE_INT) {
96
                    col_to_data[i] = HashUtil::murmur_hash3_32(
97
                            reinterpret_cast<const char*>(&data[current_offset]),
98
                            offsets[i] - current_offset, col_to_data[i]);
99
1
                } else {
100
1
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
101
1
                            reinterpret_cast<const char*>(&data[current_offset]),
102
1
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
103
1
                }
104
1
                current_offset = offsets[i];
105
1
            }
106
1
        } else if (const ColumnConst* col_from_const =
107
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
108
0
            auto value = col_from_const->get_value<TYPE_STRING>();
109
0
            for (size_t i = 0; i < input_rows_count; ++i) {
110
                if constexpr (ReturnType == TYPE_INT) {
111
                    col_to_data[i] =
112
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
113
0
                } else {
114
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
115
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
116
0
                }
117
0
            }
118
0
        } else {
119
0
            DCHECK(false);
120
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
121
0
                                        column->get_name(), name);
122
0
        }
123
1
        return Status::OK();
124
1
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
78
21
                          IColumn& col_to) {
79
21
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
80
21
        if constexpr (first) {
81
            if constexpr (ReturnType == TYPE_INT) {
82
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
83
                                           input_rows_count);
84
21
            } else {
85
21
                to_column.insert_many_defaults(input_rows_count);
86
21
            }
87
21
        }
88
21
        auto& col_to_data = to_column.get_data();
89
21
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
90
21
            const typename ColumnString::Chars& data = col_from->get_chars();
91
21
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
92
21
            size_t size = offsets.size();
93
21
            ColumnString::Offset current_offset = 0;
94
50
            for (size_t i = 0; i < size; ++i) {
95
                if constexpr (ReturnType == TYPE_INT) {
96
                    col_to_data[i] = HashUtil::murmur_hash3_32(
97
                            reinterpret_cast<const char*>(&data[current_offset]),
98
                            offsets[i] - current_offset, col_to_data[i]);
99
29
                } else {
100
29
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
101
29
                            reinterpret_cast<const char*>(&data[current_offset]),
102
29
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
103
29
                }
104
29
                current_offset = offsets[i];
105
29
            }
106
21
        } else if (const ColumnConst* col_from_const =
107
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
108
0
            auto value = col_from_const->get_value<TYPE_STRING>();
109
0
            for (size_t i = 0; i < input_rows_count; ++i) {
110
                if constexpr (ReturnType == TYPE_INT) {
111
                    col_to_data[i] =
112
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
113
0
                } else {
114
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
115
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
116
0
                }
117
0
            }
118
0
        } else {
119
0
            DCHECK(false);
120
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
121
0
                                        column->get_name(), name);
122
0
        }
123
21
        return Status::OK();
124
21
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
78
4
                          IColumn& col_to) {
79
4
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
80
        if constexpr (first) {
81
            if constexpr (ReturnType == TYPE_INT) {
82
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
83
                                           input_rows_count);
84
            } else {
85
                to_column.insert_many_defaults(input_rows_count);
86
            }
87
        }
88
4
        auto& col_to_data = to_column.get_data();
89
4
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
90
4
            const typename ColumnString::Chars& data = col_from->get_chars();
91
4
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
92
4
            size_t size = offsets.size();
93
4
            ColumnString::Offset current_offset = 0;
94
8
            for (size_t i = 0; i < size; ++i) {
95
                if constexpr (ReturnType == TYPE_INT) {
96
                    col_to_data[i] = HashUtil::murmur_hash3_32(
97
                            reinterpret_cast<const char*>(&data[current_offset]),
98
                            offsets[i] - current_offset, col_to_data[i]);
99
4
                } else {
100
4
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
101
4
                            reinterpret_cast<const char*>(&data[current_offset]),
102
4
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
103
4
                }
104
4
                current_offset = offsets[i];
105
4
            }
106
4
        } else if (const ColumnConst* col_from_const =
107
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
108
0
            auto value = col_from_const->get_value<TYPE_STRING>();
109
0
            for (size_t i = 0; i < input_rows_count; ++i) {
110
                if constexpr (ReturnType == TYPE_INT) {
111
                    col_to_data[i] =
112
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
113
0
                } else {
114
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
115
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
116
0
                }
117
0
            }
118
0
        } else {
119
0
            DCHECK(false);
120
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
121
0
                                        column->get_name(), name);
122
0
        }
123
4
        return Status::OK();
124
4
    }
125
};
126
127
using FunctionMurmurHash3_32 =
128
        FunctionVariadicArgumentsBase<DataTypeInt32, MurmurHash3Impl<TYPE_INT>>;
129
using FunctionMurmurHash3_64 =
130
        FunctionVariadicArgumentsBase<DataTypeInt64, MurmurHash3Impl<TYPE_BIGINT>>;
131
using FunctionMurmurHash3_64_V2 =
132
        FunctionVariadicArgumentsBase<DataTypeInt64, MurmurHash3Impl<TYPE_BIGINT, true>>;
133
using FunctionMurmurHash3U64V2 =
134
        FunctionVariadicArgumentsBase<DataTypeInt128, MurmurHash3Impl<TYPE_LARGEINT, true>>;
135
136
#ifdef BE_TEST
137
const char* murmur_hash3_get_name_type_int_for_test() {
138
    return MurmurHash3Impl<TYPE_INT>::get_name();
139
}
140
141
const char* murmur_hash3_get_name_type_bigint_for_test() {
142
    return MurmurHash3Impl<TYPE_BIGINT>::get_name();
143
}
144
145
const char* murmur_hash3_get_name_type_bigint_v2_for_test() {
146
    return MurmurHash3Impl<TYPE_BIGINT, true>::get_name();
147
}
148
#endif
149
150
template <PrimitiveType ReturnType>
151
struct XxHashImpl {
152
    static constexpr auto name = ReturnType == TYPE_INT ? "xxhash_32" : "xxhash_64";
153
154
0
    static Status empty_apply(IColumn& icolumn, size_t input_rows_count) {
155
0
        ColumnVector<ReturnType>& vec_to = assert_cast<ColumnVector<ReturnType>&>(icolumn);
156
0
        vec_to.get_data().assign(
157
0
                input_rows_count,
158
0
                static_cast<typename PrimitiveTypeTraits<ReturnType>::CppType>(emtpy_value));
159
0
        return Status::OK();
160
0
    }
Unexecuted instantiation: _ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE11empty_applyERNS_7IColumnEm
Unexecuted instantiation: _ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE11empty_applyERNS_7IColumnEm
161
162
    static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count,
163
1.10k
                              IColumn& icolumn) {
164
1.10k
        return execute<true>(type, column, input_rows_count, icolumn);
165
1.10k
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
163
110
                              IColumn& icolumn) {
164
110
        return execute<true>(type, column, input_rows_count, icolumn);
165
110
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
163
999
                              IColumn& icolumn) {
164
999
        return execute<true>(type, column, input_rows_count, icolumn);
165
999
    }
166
167
    static Status combine_apply(const IDataType* type, const IColumn* column,
168
36
                                size_t input_rows_count, IColumn& icolumn) {
169
36
        return execute<false>(type, column, input_rows_count, icolumn);
170
36
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
168
17
                                size_t input_rows_count, IColumn& icolumn) {
169
17
        return execute<false>(type, column, input_rows_count, icolumn);
170
17
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
168
19
                                size_t input_rows_count, IColumn& icolumn) {
169
19
        return execute<false>(type, column, input_rows_count, icolumn);
170
19
    }
171
172
    template <bool first>
173
    static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count,
174
1.14k
                          IColumn& col_to) {
175
1.14k
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
176
1.14k
        if constexpr (first) {
177
1.10k
            to_column.insert_many_defaults(input_rows_count);
178
1.10k
        }
179
1.14k
        auto& col_to_data = to_column.get_data();
180
1.14k
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
181
1.10k
            const typename ColumnString::Chars& data = col_from->get_chars();
182
1.10k
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
183
1.10k
            size_t size = offsets.size();
184
1.10k
            ColumnString::Offset current_offset = 0;
185
93.2k
            for (size_t i = 0; i < size; ++i) {
186
92.1k
                if constexpr (ReturnType == TYPE_INT) {
187
422
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
188
422
                            reinterpret_cast<const char*>(&data[current_offset]),
189
422
                            offsets[i] - current_offset, col_to_data[i]);
190
91.6k
                } else {
191
91.6k
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
192
91.6k
                            reinterpret_cast<const char*>(&data[current_offset]),
193
91.6k
                            offsets[i] - current_offset, col_to_data[i]);
194
91.6k
                }
195
92.1k
                current_offset = offsets[i];
196
92.1k
            }
197
1.10k
        } else if (const ColumnConst* col_from_const =
198
36
                           check_and_get_column_const_string_or_fixedstring(column)) {
199
0
            auto value = col_from_const->get_value<TYPE_STRING>();
200
0
            for (size_t i = 0; i < input_rows_count; ++i) {
201
0
                if constexpr (ReturnType == TYPE_INT) {
202
0
                    col_to_data[i] =
203
0
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
204
0
                } else {
205
0
                    col_to_data[i] =
206
0
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
207
0
                }
208
0
            }
209
36
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
210
144
            for (size_t i = 0; i < input_rows_count; ++i) {
211
108
                auto data_ref = vb_col->get_data_at(i);
212
108
                if constexpr (ReturnType == TYPE_INT) {
213
54
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
214
54
                                                                col_to_data[i]);
215
54
                } else {
216
54
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
217
54
                                                                col_to_data[i]);
218
54
                }
219
108
            }
220
36
        } else {
221
0
            DCHECK(false);
222
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
223
0
                                        column->get_name(), name);
224
0
        }
225
1.14k
        return Status::OK();
226
1.14k
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
174
110
                          IColumn& col_to) {
175
110
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
176
110
        if constexpr (first) {
177
110
            to_column.insert_many_defaults(input_rows_count);
178
110
        }
179
110
        auto& col_to_data = to_column.get_data();
180
110
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
181
100
            const typename ColumnString::Chars& data = col_from->get_chars();
182
100
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
183
100
            size_t size = offsets.size();
184
100
            ColumnString::Offset current_offset = 0;
185
499
            for (size_t i = 0; i < size; ++i) {
186
399
                if constexpr (ReturnType == TYPE_INT) {
187
399
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
188
399
                            reinterpret_cast<const char*>(&data[current_offset]),
189
399
                            offsets[i] - current_offset, col_to_data[i]);
190
                } else {
191
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
192
                            reinterpret_cast<const char*>(&data[current_offset]),
193
                            offsets[i] - current_offset, col_to_data[i]);
194
                }
195
399
                current_offset = offsets[i];
196
399
            }
197
100
        } else if (const ColumnConst* col_from_const =
198
10
                           check_and_get_column_const_string_or_fixedstring(column)) {
199
0
            auto value = col_from_const->get_value<TYPE_STRING>();
200
0
            for (size_t i = 0; i < input_rows_count; ++i) {
201
0
                if constexpr (ReturnType == TYPE_INT) {
202
0
                    col_to_data[i] =
203
0
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
204
                } else {
205
                    col_to_data[i] =
206
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
207
                }
208
0
            }
209
10
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
210
42
            for (size_t i = 0; i < input_rows_count; ++i) {
211
32
                auto data_ref = vb_col->get_data_at(i);
212
32
                if constexpr (ReturnType == TYPE_INT) {
213
32
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
214
32
                                                                col_to_data[i]);
215
                } else {
216
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
217
                                                                col_to_data[i]);
218
                }
219
32
            }
220
10
        } else {
221
0
            DCHECK(false);
222
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
223
0
                                        column->get_name(), name);
224
0
        }
225
110
        return Status::OK();
226
110
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
174
17
                          IColumn& col_to) {
175
17
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
176
        if constexpr (first) {
177
            to_column.insert_many_defaults(input_rows_count);
178
        }
179
17
        auto& col_to_data = to_column.get_data();
180
17
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
181
9
            const typename ColumnString::Chars& data = col_from->get_chars();
182
9
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
183
9
            size_t size = offsets.size();
184
9
            ColumnString::Offset current_offset = 0;
185
32
            for (size_t i = 0; i < size; ++i) {
186
23
                if constexpr (ReturnType == TYPE_INT) {
187
23
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
188
23
                            reinterpret_cast<const char*>(&data[current_offset]),
189
23
                            offsets[i] - current_offset, col_to_data[i]);
190
                } else {
191
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
192
                            reinterpret_cast<const char*>(&data[current_offset]),
193
                            offsets[i] - current_offset, col_to_data[i]);
194
                }
195
23
                current_offset = offsets[i];
196
23
            }
197
9
        } else if (const ColumnConst* col_from_const =
198
8
                           check_and_get_column_const_string_or_fixedstring(column)) {
199
0
            auto value = col_from_const->get_value<TYPE_STRING>();
200
0
            for (size_t i = 0; i < input_rows_count; ++i) {
201
0
                if constexpr (ReturnType == TYPE_INT) {
202
0
                    col_to_data[i] =
203
0
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
204
                } else {
205
                    col_to_data[i] =
206
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
207
                }
208
0
            }
209
8
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
210
30
            for (size_t i = 0; i < input_rows_count; ++i) {
211
22
                auto data_ref = vb_col->get_data_at(i);
212
22
                if constexpr (ReturnType == TYPE_INT) {
213
22
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
214
22
                                                                col_to_data[i]);
215
                } else {
216
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
217
                                                                col_to_data[i]);
218
                }
219
22
            }
220
8
        } else {
221
0
            DCHECK(false);
222
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
223
0
                                        column->get_name(), name);
224
0
        }
225
17
        return Status::OK();
226
17
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
174
999
                          IColumn& col_to) {
175
999
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
176
999
        if constexpr (first) {
177
999
            to_column.insert_many_defaults(input_rows_count);
178
999
        }
179
999
        auto& col_to_data = to_column.get_data();
180
999
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
181
989
            const typename ColumnString::Chars& data = col_from->get_chars();
182
989
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
183
989
            size_t size = offsets.size();
184
989
            ColumnString::Offset current_offset = 0;
185
92.6k
            for (size_t i = 0; i < size; ++i) {
186
                if constexpr (ReturnType == TYPE_INT) {
187
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
188
                            reinterpret_cast<const char*>(&data[current_offset]),
189
                            offsets[i] - current_offset, col_to_data[i]);
190
91.6k
                } else {
191
91.6k
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
192
91.6k
                            reinterpret_cast<const char*>(&data[current_offset]),
193
91.6k
                            offsets[i] - current_offset, col_to_data[i]);
194
91.6k
                }
195
91.6k
                current_offset = offsets[i];
196
91.6k
            }
197
989
        } else if (const ColumnConst* col_from_const =
198
10
                           check_and_get_column_const_string_or_fixedstring(column)) {
199
0
            auto value = col_from_const->get_value<TYPE_STRING>();
200
0
            for (size_t i = 0; i < input_rows_count; ++i) {
201
                if constexpr (ReturnType == TYPE_INT) {
202
                    col_to_data[i] =
203
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
204
0
                } else {
205
0
                    col_to_data[i] =
206
0
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
207
0
                }
208
0
            }
209
10
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
210
42
            for (size_t i = 0; i < input_rows_count; ++i) {
211
32
                auto data_ref = vb_col->get_data_at(i);
212
                if constexpr (ReturnType == TYPE_INT) {
213
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
214
                                                                col_to_data[i]);
215
32
                } else {
216
32
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
217
32
                                                                col_to_data[i]);
218
32
                }
219
32
            }
220
10
        } else {
221
0
            DCHECK(false);
222
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
223
0
                                        column->get_name(), name);
224
0
        }
225
999
        return Status::OK();
226
999
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
174
19
                          IColumn& col_to) {
175
19
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
176
        if constexpr (first) {
177
            to_column.insert_many_defaults(input_rows_count);
178
        }
179
19
        auto& col_to_data = to_column.get_data();
180
19
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
181
11
            const typename ColumnString::Chars& data = col_from->get_chars();
182
11
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
183
11
            size_t size = offsets.size();
184
11
            ColumnString::Offset current_offset = 0;
185
36
            for (size_t i = 0; i < size; ++i) {
186
                if constexpr (ReturnType == TYPE_INT) {
187
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
188
                            reinterpret_cast<const char*>(&data[current_offset]),
189
                            offsets[i] - current_offset, col_to_data[i]);
190
25
                } else {
191
25
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
192
25
                            reinterpret_cast<const char*>(&data[current_offset]),
193
25
                            offsets[i] - current_offset, col_to_data[i]);
194
25
                }
195
25
                current_offset = offsets[i];
196
25
            }
197
11
        } else if (const ColumnConst* col_from_const =
198
8
                           check_and_get_column_const_string_or_fixedstring(column)) {
199
0
            auto value = col_from_const->get_value<TYPE_STRING>();
200
0
            for (size_t i = 0; i < input_rows_count; ++i) {
201
                if constexpr (ReturnType == TYPE_INT) {
202
                    col_to_data[i] =
203
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
204
0
                } else {
205
0
                    col_to_data[i] =
206
0
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
207
0
                }
208
0
            }
209
8
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
210
30
            for (size_t i = 0; i < input_rows_count; ++i) {
211
22
                auto data_ref = vb_col->get_data_at(i);
212
                if constexpr (ReturnType == TYPE_INT) {
213
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
214
                                                                col_to_data[i]);
215
22
                } else {
216
22
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
217
22
                                                                col_to_data[i]);
218
22
                }
219
22
            }
220
8
        } else {
221
0
            DCHECK(false);
222
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
223
0
                                        column->get_name(), name);
224
0
        }
225
19
        return Status::OK();
226
19
    }
227
};
228
229
using FunctionXxHash_32 = FunctionVariadicArgumentsBase<DataTypeInt32, XxHashImpl<TYPE_INT>>;
230
using FunctionXxHash_64 = FunctionVariadicArgumentsBase<DataTypeInt64, XxHashImpl<TYPE_BIGINT>>;
231
232
9
void register_function_hash(SimpleFunctionFactory& factory) {
233
9
    factory.register_function<FunctionMurmurHash3_32>();
234
9
    factory.register_function<FunctionMurmurHash3_64>();
235
9
    factory.register_function<FunctionMurmurHash3_64_V2>();
236
9
    factory.register_function<FunctionMurmurHash3U64V2>();
237
9
    factory.register_function<FunctionXxHash_32>();
238
9
    factory.register_function<FunctionXxHash_64>();
239
9
    factory.register_alias("xxhash_64", "xxhash3_64");
240
9
}
241
} // namespace doris