Coverage Report

Created: 2026-04-02 09:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_hash.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionHash.cpp
19
// and modified by Doris
20
21
#include "exprs/function/function_hash.h"
22
23
#include "common/status.h"
24
#include "core/assert_cast.h"
25
#include "core/column/column.h"
26
#include "core/column/column_const.h"
27
#include "core/column/column_string.h"
28
#include "core/column/column_varbinary.h"
29
#include "core/column/column_vector.h"
30
#include "core/data_type/data_type.h"
31
#include "core/data_type/data_type_number.h"
32
#include "core/field.h"
33
#include "exec/common/template_helpers.hpp"
34
#include "exprs/function/function_helpers.h"
35
#include "exprs/function/function_variadic_arguments.h"
36
#include "exprs/function/simple_function_factory.h"
37
#include "util/hash/murmur_hash3.h"
38
#include "util/hash_util.hpp"
39
40
namespace doris {
41
#include "common/compile_check_begin.h"
42
constexpr uint64_t emtpy_value = 0xe28dbde7fe22e41c;
43
44
template <PrimitiveType ReturnType, bool is_mmh64_v2 = false>
45
struct MurmurHash3Impl {
46
0
    static constexpr auto get_name() {
47
0
        if constexpr (ReturnType == TYPE_INT) {
48
0
            return "murmur_hash3_32";
49
0
        } else if constexpr (ReturnType == TYPE_LARGEINT) {
50
0
            return "murmur_hash3_u64_v2";
51
0
        } else if constexpr (is_mmh64_v2) {
52
0
            return "murmur_hash3_64_v2";
53
0
        } else {
54
0
            return "murmur_hash3_64";
55
0
        }
56
0
    }
57
    static constexpr auto name = get_name();
58
59
0
    static Status empty_apply(IColumn& icolumn, size_t input_rows_count) {
60
0
        ColumnVector<ReturnType>& vec_to = assert_cast<ColumnVector<ReturnType>&>(icolumn);
61
0
        vec_to.get_data().assign(
62
0
                input_rows_count,
63
0
                static_cast<typename PrimitiveTypeTraits<ReturnType>::CppType>(emtpy_value));
64
0
        return Status::OK();
65
0
    }
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE11empty_applyERNS_7IColumnEm
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE11empty_applyERNS_7IColumnEm
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE11empty_applyERNS_7IColumnEm
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE11empty_applyERNS_7IColumnEm
66
67
    static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count,
68
81
                              IColumn& icolumn) {
69
81
        return execute<true>(type, column, input_rows_count, icolumn);
70
81
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
68
21
                              IColumn& icolumn) {
69
21
        return execute<true>(type, column, input_rows_count, icolumn);
70
21
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
68
21
                              IColumn& icolumn) {
69
21
        return execute<true>(type, column, input_rows_count, icolumn);
70
21
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
68
18
                              IColumn& icolumn) {
69
18
        return execute<true>(type, column, input_rows_count, icolumn);
70
18
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
68
21
                              IColumn& icolumn) {
69
21
        return execute<true>(type, column, input_rows_count, icolumn);
70
21
    }
71
72
    static Status combine_apply(const IDataType* type, const IColumn* column,
73
13
                                size_t input_rows_count, IColumn& icolumn) {
74
13
        return execute<false>(type, column, input_rows_count, icolumn);
75
13
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
73
4
                                size_t input_rows_count, IColumn& icolumn) {
74
4
        return execute<false>(type, column, input_rows_count, icolumn);
75
4
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
73
4
                                size_t input_rows_count, IColumn& icolumn) {
74
4
        return execute<false>(type, column, input_rows_count, icolumn);
75
4
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
73
1
                                size_t input_rows_count, IColumn& icolumn) {
74
1
        return execute<false>(type, column, input_rows_count, icolumn);
75
1
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
73
4
                                size_t input_rows_count, IColumn& icolumn) {
74
4
        return execute<false>(type, column, input_rows_count, icolumn);
75
4
    }
76
77
    template <bool first>
78
    static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count,
79
94
                          IColumn& col_to) {
80
94
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
81
94
        if constexpr (first) {
82
81
            if constexpr (ReturnType == TYPE_INT) {
83
21
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
84
21
                                           input_rows_count);
85
60
            } else {
86
60
                to_column.insert_many_defaults(input_rows_count);
87
60
            }
88
81
        }
89
94
        auto& col_to_data = to_column.get_data();
90
94
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
91
94
            const typename ColumnString::Chars& data = col_from->get_chars();
92
94
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
93
94
            size_t size = offsets.size();
94
94
            ColumnString::Offset current_offset = 0;
95
286
            for (size_t i = 0; i < size; ++i) {
96
192
                if constexpr (ReturnType == TYPE_INT) {
97
65
                    col_to_data[i] = HashUtil::murmur_hash3_32(
98
65
                            reinterpret_cast<const char*>(&data[current_offset]),
99
65
                            offsets[i] - current_offset, col_to_data[i]);
100
127
                } else {
101
127
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
102
127
                            reinterpret_cast<const char*>(&data[current_offset]),
103
127
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
104
127
                }
105
192
                current_offset = offsets[i];
106
192
            }
107
94
        } else if (const ColumnConst* col_from_const =
108
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
109
0
            auto value = col_from_const->get_value<TYPE_STRING>();
110
0
            for (size_t i = 0; i < input_rows_count; ++i) {
111
0
                if constexpr (ReturnType == TYPE_INT) {
112
0
                    col_to_data[i] =
113
0
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
114
0
                } else {
115
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
116
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
117
0
                }
118
0
            }
119
0
        } else {
120
0
            DCHECK(false);
121
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
122
0
                                        column->get_name(), name);
123
0
        }
124
94
        return Status::OK();
125
94
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
79
21
                          IColumn& col_to) {
80
21
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
81
21
        if constexpr (first) {
82
21
            if constexpr (ReturnType == TYPE_INT) {
83
21
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
84
21
                                           input_rows_count);
85
            } else {
86
                to_column.insert_many_defaults(input_rows_count);
87
            }
88
21
        }
89
21
        auto& col_to_data = to_column.get_data();
90
21
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
91
21
            const typename ColumnString::Chars& data = col_from->get_chars();
92
21
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
93
21
            size_t size = offsets.size();
94
21
            ColumnString::Offset current_offset = 0;
95
79
            for (size_t i = 0; i < size; ++i) {
96
58
                if constexpr (ReturnType == TYPE_INT) {
97
58
                    col_to_data[i] = HashUtil::murmur_hash3_32(
98
58
                            reinterpret_cast<const char*>(&data[current_offset]),
99
58
                            offsets[i] - current_offset, col_to_data[i]);
100
                } else {
101
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
102
                            reinterpret_cast<const char*>(&data[current_offset]),
103
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
104
                }
105
58
                current_offset = offsets[i];
106
58
            }
107
21
        } else if (const ColumnConst* col_from_const =
108
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
109
0
            auto value = col_from_const->get_value<TYPE_STRING>();
110
0
            for (size_t i = 0; i < input_rows_count; ++i) {
111
0
                if constexpr (ReturnType == TYPE_INT) {
112
0
                    col_to_data[i] =
113
0
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
114
                } else {
115
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
116
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
117
                }
118
0
            }
119
0
        } else {
120
0
            DCHECK(false);
121
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
122
0
                                        column->get_name(), name);
123
0
        }
124
21
        return Status::OK();
125
21
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
79
4
                          IColumn& col_to) {
80
4
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
81
        if constexpr (first) {
82
            if constexpr (ReturnType == TYPE_INT) {
83
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
84
                                           input_rows_count);
85
            } else {
86
                to_column.insert_many_defaults(input_rows_count);
87
            }
88
        }
89
4
        auto& col_to_data = to_column.get_data();
90
4
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
91
4
            const typename ColumnString::Chars& data = col_from->get_chars();
92
4
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
93
4
            size_t size = offsets.size();
94
4
            ColumnString::Offset current_offset = 0;
95
11
            for (size_t i = 0; i < size; ++i) {
96
7
                if constexpr (ReturnType == TYPE_INT) {
97
7
                    col_to_data[i] = HashUtil::murmur_hash3_32(
98
7
                            reinterpret_cast<const char*>(&data[current_offset]),
99
7
                            offsets[i] - current_offset, col_to_data[i]);
100
                } else {
101
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
102
                            reinterpret_cast<const char*>(&data[current_offset]),
103
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
104
                }
105
7
                current_offset = offsets[i];
106
7
            }
107
4
        } else if (const ColumnConst* col_from_const =
108
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
109
0
            auto value = col_from_const->get_value<TYPE_STRING>();
110
0
            for (size_t i = 0; i < input_rows_count; ++i) {
111
0
                if constexpr (ReturnType == TYPE_INT) {
112
0
                    col_to_data[i] =
113
0
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
114
                } else {
115
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
116
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
117
                }
118
0
            }
119
0
        } else {
120
0
            DCHECK(false);
121
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
122
0
                                        column->get_name(), name);
123
0
        }
124
4
        return Status::OK();
125
4
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
79
21
                          IColumn& col_to) {
80
21
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
81
21
        if constexpr (first) {
82
            if constexpr (ReturnType == TYPE_INT) {
83
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
84
                                           input_rows_count);
85
21
            } else {
86
21
                to_column.insert_many_defaults(input_rows_count);
87
21
            }
88
21
        }
89
21
        auto& col_to_data = to_column.get_data();
90
21
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
91
21
            const typename ColumnString::Chars& data = col_from->get_chars();
92
21
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
93
21
            size_t size = offsets.size();
94
21
            ColumnString::Offset current_offset = 0;
95
79
            for (size_t i = 0; i < size; ++i) {
96
                if constexpr (ReturnType == TYPE_INT) {
97
                    col_to_data[i] = HashUtil::murmur_hash3_32(
98
                            reinterpret_cast<const char*>(&data[current_offset]),
99
                            offsets[i] - current_offset, col_to_data[i]);
100
58
                } else {
101
58
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
102
58
                            reinterpret_cast<const char*>(&data[current_offset]),
103
58
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
104
58
                }
105
58
                current_offset = offsets[i];
106
58
            }
107
21
        } else if (const ColumnConst* col_from_const =
108
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
109
0
            auto value = col_from_const->get_value<TYPE_STRING>();
110
0
            for (size_t i = 0; i < input_rows_count; ++i) {
111
                if constexpr (ReturnType == TYPE_INT) {
112
                    col_to_data[i] =
113
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
114
0
                } else {
115
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
116
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
117
0
                }
118
0
            }
119
0
        } else {
120
0
            DCHECK(false);
121
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
122
0
                                        column->get_name(), name);
123
0
        }
124
21
        return Status::OK();
125
21
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
79
4
                          IColumn& col_to) {
80
4
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
81
        if constexpr (first) {
82
            if constexpr (ReturnType == TYPE_INT) {
83
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
84
                                           input_rows_count);
85
            } else {
86
                to_column.insert_many_defaults(input_rows_count);
87
            }
88
        }
89
4
        auto& col_to_data = to_column.get_data();
90
4
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
91
4
            const typename ColumnString::Chars& data = col_from->get_chars();
92
4
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
93
4
            size_t size = offsets.size();
94
4
            ColumnString::Offset current_offset = 0;
95
11
            for (size_t i = 0; i < size; ++i) {
96
                if constexpr (ReturnType == TYPE_INT) {
97
                    col_to_data[i] = HashUtil::murmur_hash3_32(
98
                            reinterpret_cast<const char*>(&data[current_offset]),
99
                            offsets[i] - current_offset, col_to_data[i]);
100
7
                } else {
101
7
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
102
7
                            reinterpret_cast<const char*>(&data[current_offset]),
103
7
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
104
7
                }
105
7
                current_offset = offsets[i];
106
7
            }
107
4
        } else if (const ColumnConst* col_from_const =
108
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
109
0
            auto value = col_from_const->get_value<TYPE_STRING>();
110
0
            for (size_t i = 0; i < input_rows_count; ++i) {
111
                if constexpr (ReturnType == TYPE_INT) {
112
                    col_to_data[i] =
113
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
114
0
                } else {
115
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
116
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
117
0
                }
118
0
            }
119
0
        } else {
120
0
            DCHECK(false);
121
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
122
0
                                        column->get_name(), name);
123
0
        }
124
4
        return Status::OK();
125
4
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
79
18
                          IColumn& col_to) {
80
18
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
81
18
        if constexpr (first) {
82
            if constexpr (ReturnType == TYPE_INT) {
83
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
84
                                           input_rows_count);
85
18
            } else {
86
18
                to_column.insert_many_defaults(input_rows_count);
87
18
            }
88
18
        }
89
18
        auto& col_to_data = to_column.get_data();
90
18
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
91
18
            const typename ColumnString::Chars& data = col_from->get_chars();
92
18
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
93
18
            size_t size = offsets.size();
94
18
            ColumnString::Offset current_offset = 0;
95
46
            for (size_t i = 0; i < size; ++i) {
96
                if constexpr (ReturnType == TYPE_INT) {
97
                    col_to_data[i] = HashUtil::murmur_hash3_32(
98
                            reinterpret_cast<const char*>(&data[current_offset]),
99
                            offsets[i] - current_offset, col_to_data[i]);
100
28
                } else {
101
28
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
102
28
                            reinterpret_cast<const char*>(&data[current_offset]),
103
28
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
104
28
                }
105
28
                current_offset = offsets[i];
106
28
            }
107
18
        } else if (const ColumnConst* col_from_const =
108
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
109
0
            auto value = col_from_const->get_value<TYPE_STRING>();
110
0
            for (size_t i = 0; i < input_rows_count; ++i) {
111
                if constexpr (ReturnType == TYPE_INT) {
112
                    col_to_data[i] =
113
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
114
0
                } else {
115
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
116
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
117
0
                }
118
0
            }
119
0
        } else {
120
0
            DCHECK(false);
121
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
122
0
                                        column->get_name(), name);
123
0
        }
124
18
        return Status::OK();
125
18
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
79
1
                          IColumn& col_to) {
80
1
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
81
        if constexpr (first) {
82
            if constexpr (ReturnType == TYPE_INT) {
83
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
84
                                           input_rows_count);
85
            } else {
86
                to_column.insert_many_defaults(input_rows_count);
87
            }
88
        }
89
1
        auto& col_to_data = to_column.get_data();
90
1
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
91
1
            const typename ColumnString::Chars& data = col_from->get_chars();
92
1
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
93
1
            size_t size = offsets.size();
94
1
            ColumnString::Offset current_offset = 0;
95
2
            for (size_t i = 0; i < size; ++i) {
96
                if constexpr (ReturnType == TYPE_INT) {
97
                    col_to_data[i] = HashUtil::murmur_hash3_32(
98
                            reinterpret_cast<const char*>(&data[current_offset]),
99
                            offsets[i] - current_offset, col_to_data[i]);
100
1
                } else {
101
1
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
102
1
                            reinterpret_cast<const char*>(&data[current_offset]),
103
1
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
104
1
                }
105
1
                current_offset = offsets[i];
106
1
            }
107
1
        } else if (const ColumnConst* col_from_const =
108
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
109
0
            auto value = col_from_const->get_value<TYPE_STRING>();
110
0
            for (size_t i = 0; i < input_rows_count; ++i) {
111
                if constexpr (ReturnType == TYPE_INT) {
112
                    col_to_data[i] =
113
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
114
0
                } else {
115
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
116
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
117
0
                }
118
0
            }
119
0
        } else {
120
0
            DCHECK(false);
121
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
122
0
                                        column->get_name(), name);
123
0
        }
124
1
        return Status::OK();
125
1
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
79
21
                          IColumn& col_to) {
80
21
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
81
21
        if constexpr (first) {
82
            if constexpr (ReturnType == TYPE_INT) {
83
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
84
                                           input_rows_count);
85
21
            } else {
86
21
                to_column.insert_many_defaults(input_rows_count);
87
21
            }
88
21
        }
89
21
        auto& col_to_data = to_column.get_data();
90
21
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
91
21
            const typename ColumnString::Chars& data = col_from->get_chars();
92
21
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
93
21
            size_t size = offsets.size();
94
21
            ColumnString::Offset current_offset = 0;
95
50
            for (size_t i = 0; i < size; ++i) {
96
                if constexpr (ReturnType == TYPE_INT) {
97
                    col_to_data[i] = HashUtil::murmur_hash3_32(
98
                            reinterpret_cast<const char*>(&data[current_offset]),
99
                            offsets[i] - current_offset, col_to_data[i]);
100
29
                } else {
101
29
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
102
29
                            reinterpret_cast<const char*>(&data[current_offset]),
103
29
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
104
29
                }
105
29
                current_offset = offsets[i];
106
29
            }
107
21
        } else if (const ColumnConst* col_from_const =
108
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
109
0
            auto value = col_from_const->get_value<TYPE_STRING>();
110
0
            for (size_t i = 0; i < input_rows_count; ++i) {
111
                if constexpr (ReturnType == TYPE_INT) {
112
                    col_to_data[i] =
113
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
114
0
                } else {
115
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
116
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
117
0
                }
118
0
            }
119
0
        } else {
120
0
            DCHECK(false);
121
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
122
0
                                        column->get_name(), name);
123
0
        }
124
21
        return Status::OK();
125
21
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE7ELb1EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
79
4
                          IColumn& col_to) {
80
4
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
81
        if constexpr (first) {
82
            if constexpr (ReturnType == TYPE_INT) {
83
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
84
                                           input_rows_count);
85
            } else {
86
                to_column.insert_many_defaults(input_rows_count);
87
            }
88
        }
89
4
        auto& col_to_data = to_column.get_data();
90
4
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
91
4
            const typename ColumnString::Chars& data = col_from->get_chars();
92
4
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
93
4
            size_t size = offsets.size();
94
4
            ColumnString::Offset current_offset = 0;
95
8
            for (size_t i = 0; i < size; ++i) {
96
                if constexpr (ReturnType == TYPE_INT) {
97
                    col_to_data[i] = HashUtil::murmur_hash3_32(
98
                            reinterpret_cast<const char*>(&data[current_offset]),
99
                            offsets[i] - current_offset, col_to_data[i]);
100
4
                } else {
101
4
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
102
4
                            reinterpret_cast<const char*>(&data[current_offset]),
103
4
                            offsets[i] - current_offset, static_cast<uint64_t>(col_to_data[i]));
104
4
                }
105
4
                current_offset = offsets[i];
106
4
            }
107
4
        } else if (const ColumnConst* col_from_const =
108
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
109
0
            auto value = col_from_const->get_value<TYPE_STRING>();
110
0
            for (size_t i = 0; i < input_rows_count; ++i) {
111
                if constexpr (ReturnType == TYPE_INT) {
112
                    col_to_data[i] =
113
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
114
0
                } else {
115
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
116
0
                            value.data(), value.size(), static_cast<uint64_t>(col_to_data[i]));
117
0
                }
118
0
            }
119
0
        } else {
120
0
            DCHECK(false);
121
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
122
0
                                        column->get_name(), name);
123
0
        }
124
4
        return Status::OK();
125
4
    }
126
};
127
128
using FunctionMurmurHash3_32 =
129
        FunctionVariadicArgumentsBase<DataTypeInt32, MurmurHash3Impl<TYPE_INT>>;
130
using FunctionMurmurHash3_64 =
131
        FunctionVariadicArgumentsBase<DataTypeInt64, MurmurHash3Impl<TYPE_BIGINT>>;
132
using FunctionMurmurHash3_64_V2 =
133
        FunctionVariadicArgumentsBase<DataTypeInt64, MurmurHash3Impl<TYPE_BIGINT, true>>;
134
using FunctionMurmurHash3U64V2 =
135
        FunctionVariadicArgumentsBase<DataTypeInt128, MurmurHash3Impl<TYPE_LARGEINT, true>>;
136
137
#ifdef BE_TEST
138
const char* murmur_hash3_get_name_type_int_for_test() {
139
    return MurmurHash3Impl<TYPE_INT>::get_name();
140
}
141
142
const char* murmur_hash3_get_name_type_bigint_for_test() {
143
    return MurmurHash3Impl<TYPE_BIGINT>::get_name();
144
}
145
146
const char* murmur_hash3_get_name_type_bigint_v2_for_test() {
147
    return MurmurHash3Impl<TYPE_BIGINT, true>::get_name();
148
}
149
#endif
150
151
template <PrimitiveType ReturnType>
152
struct XxHashImpl {
153
    static constexpr auto name = ReturnType == TYPE_INT ? "xxhash_32" : "xxhash_64";
154
155
0
    static Status empty_apply(IColumn& icolumn, size_t input_rows_count) {
156
0
        ColumnVector<ReturnType>& vec_to = assert_cast<ColumnVector<ReturnType>&>(icolumn);
157
0
        vec_to.get_data().assign(
158
0
                input_rows_count,
159
0
                static_cast<typename PrimitiveTypeTraits<ReturnType>::CppType>(emtpy_value));
160
0
        return Status::OK();
161
0
    }
Unexecuted instantiation: _ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE11empty_applyERNS_7IColumnEm
Unexecuted instantiation: _ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE11empty_applyERNS_7IColumnEm
162
163
    static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count,
164
1.16k
                              IColumn& icolumn) {
165
1.16k
        return execute<true>(type, column, input_rows_count, icolumn);
166
1.16k
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
164
104
                              IColumn& icolumn) {
165
104
        return execute<true>(type, column, input_rows_count, icolumn);
166
104
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
164
1.06k
                              IColumn& icolumn) {
165
1.06k
        return execute<true>(type, column, input_rows_count, icolumn);
166
1.06k
    }
167
168
    static Status combine_apply(const IDataType* type, const IColumn* column,
169
24
                                size_t input_rows_count, IColumn& icolumn) {
170
24
        return execute<false>(type, column, input_rows_count, icolumn);
171
24
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
169
11
                                size_t input_rows_count, IColumn& icolumn) {
170
11
        return execute<false>(type, column, input_rows_count, icolumn);
171
11
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
169
13
                                size_t input_rows_count, IColumn& icolumn) {
170
13
        return execute<false>(type, column, input_rows_count, icolumn);
171
13
    }
172
173
    template <bool first>
174
    static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count,
175
1.19k
                          IColumn& col_to) {
176
1.19k
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
177
1.19k
        if constexpr (first) {
178
1.16k
            to_column.insert_many_defaults(input_rows_count);
179
1.16k
        }
180
1.19k
        auto& col_to_data = to_column.get_data();
181
1.19k
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
182
1.16k
            const typename ColumnString::Chars& data = col_from->get_chars();
183
1.16k
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
184
1.16k
            size_t size = offsets.size();
185
1.16k
            ColumnString::Offset current_offset = 0;
186
89.2k
            for (size_t i = 0; i < size; ++i) {
187
88.0k
                if constexpr (ReturnType == TYPE_INT) {
188
410
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
189
410
                            reinterpret_cast<const char*>(&data[current_offset]),
190
410
                            offsets[i] - current_offset, col_to_data[i]);
191
87.6k
                } else {
192
87.6k
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
193
87.6k
                            reinterpret_cast<const char*>(&data[current_offset]),
194
87.6k
                            offsets[i] - current_offset, col_to_data[i]);
195
87.6k
                }
196
88.0k
                current_offset = offsets[i];
197
88.0k
            }
198
1.16k
        } else if (const ColumnConst* col_from_const =
199
24
                           check_and_get_column_const_string_or_fixedstring(column)) {
200
0
            auto value = col_from_const->get_value<TYPE_STRING>();
201
0
            for (size_t i = 0; i < input_rows_count; ++i) {
202
0
                if constexpr (ReturnType == TYPE_INT) {
203
0
                    col_to_data[i] =
204
0
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
205
0
                } else {
206
0
                    col_to_data[i] =
207
0
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
208
0
                }
209
0
            }
210
24
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
211
108
            for (size_t i = 0; i < input_rows_count; ++i) {
212
84
                auto data_ref = vb_col->get_data_at(i);
213
84
                if constexpr (ReturnType == TYPE_INT) {
214
42
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
215
42
                                                                col_to_data[i]);
216
42
                } else {
217
42
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
218
42
                                                                col_to_data[i]);
219
42
                }
220
84
            }
221
24
        } else {
222
0
            DCHECK(false);
223
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
224
0
                                        column->get_name(), name);
225
0
        }
226
1.19k
        return Status::OK();
227
1.19k
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
175
104
                          IColumn& col_to) {
176
104
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
177
104
        if constexpr (first) {
178
104
            to_column.insert_many_defaults(input_rows_count);
179
104
        }
180
104
        auto& col_to_data = to_column.get_data();
181
104
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
182
97
            const typename ColumnString::Chars& data = col_from->get_chars();
183
97
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
184
97
            size_t size = offsets.size();
185
97
            ColumnString::Offset current_offset = 0;
186
490
            for (size_t i = 0; i < size; ++i) {
187
393
                if constexpr (ReturnType == TYPE_INT) {
188
393
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
189
393
                            reinterpret_cast<const char*>(&data[current_offset]),
190
393
                            offsets[i] - current_offset, col_to_data[i]);
191
                } else {
192
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
193
                            reinterpret_cast<const char*>(&data[current_offset]),
194
                            offsets[i] - current_offset, col_to_data[i]);
195
                }
196
393
                current_offset = offsets[i];
197
393
            }
198
97
        } else if (const ColumnConst* col_from_const =
199
7
                           check_and_get_column_const_string_or_fixedstring(column)) {
200
0
            auto value = col_from_const->get_value<TYPE_STRING>();
201
0
            for (size_t i = 0; i < input_rows_count; ++i) {
202
0
                if constexpr (ReturnType == TYPE_INT) {
203
0
                    col_to_data[i] =
204
0
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
205
                } else {
206
                    col_to_data[i] =
207
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
208
                }
209
0
            }
210
7
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
211
33
            for (size_t i = 0; i < input_rows_count; ++i) {
212
26
                auto data_ref = vb_col->get_data_at(i);
213
26
                if constexpr (ReturnType == TYPE_INT) {
214
26
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
215
26
                                                                col_to_data[i]);
216
                } else {
217
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
218
                                                                col_to_data[i]);
219
                }
220
26
            }
221
7
        } else {
222
0
            DCHECK(false);
223
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
224
0
                                        column->get_name(), name);
225
0
        }
226
104
        return Status::OK();
227
104
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
175
11
                          IColumn& col_to) {
176
11
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
177
        if constexpr (first) {
178
            to_column.insert_many_defaults(input_rows_count);
179
        }
180
11
        auto& col_to_data = to_column.get_data();
181
11
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
182
6
            const typename ColumnString::Chars& data = col_from->get_chars();
183
6
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
184
6
            size_t size = offsets.size();
185
6
            ColumnString::Offset current_offset = 0;
186
23
            for (size_t i = 0; i < size; ++i) {
187
17
                if constexpr (ReturnType == TYPE_INT) {
188
17
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
189
17
                            reinterpret_cast<const char*>(&data[current_offset]),
190
17
                            offsets[i] - current_offset, col_to_data[i]);
191
                } else {
192
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
193
                            reinterpret_cast<const char*>(&data[current_offset]),
194
                            offsets[i] - current_offset, col_to_data[i]);
195
                }
196
17
                current_offset = offsets[i];
197
17
            }
198
6
        } else if (const ColumnConst* col_from_const =
199
5
                           check_and_get_column_const_string_or_fixedstring(column)) {
200
0
            auto value = col_from_const->get_value<TYPE_STRING>();
201
0
            for (size_t i = 0; i < input_rows_count; ++i) {
202
0
                if constexpr (ReturnType == TYPE_INT) {
203
0
                    col_to_data[i] =
204
0
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
205
                } else {
206
                    col_to_data[i] =
207
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
208
                }
209
0
            }
210
5
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
211
21
            for (size_t i = 0; i < input_rows_count; ++i) {
212
16
                auto data_ref = vb_col->get_data_at(i);
213
16
                if constexpr (ReturnType == TYPE_INT) {
214
16
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
215
16
                                                                col_to_data[i]);
216
                } else {
217
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
218
                                                                col_to_data[i]);
219
                }
220
16
            }
221
5
        } else {
222
0
            DCHECK(false);
223
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
224
0
                                        column->get_name(), name);
225
0
        }
226
11
        return Status::OK();
227
11
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
175
1.06k
                          IColumn& col_to) {
176
1.06k
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
177
1.06k
        if constexpr (first) {
178
1.06k
            to_column.insert_many_defaults(input_rows_count);
179
1.06k
        }
180
1.06k
        auto& col_to_data = to_column.get_data();
181
1.06k
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
182
1.05k
            const typename ColumnString::Chars& data = col_from->get_chars();
183
1.05k
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
184
1.05k
            size_t size = offsets.size();
185
1.05k
            ColumnString::Offset current_offset = 0;
186
88.7k
            for (size_t i = 0; i < size; ++i) {
187
                if constexpr (ReturnType == TYPE_INT) {
188
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
189
                            reinterpret_cast<const char*>(&data[current_offset]),
190
                            offsets[i] - current_offset, col_to_data[i]);
191
87.6k
                } else {
192
87.6k
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
193
87.6k
                            reinterpret_cast<const char*>(&data[current_offset]),
194
87.6k
                            offsets[i] - current_offset, col_to_data[i]);
195
87.6k
                }
196
87.6k
                current_offset = offsets[i];
197
87.6k
            }
198
1.05k
        } else if (const ColumnConst* col_from_const =
199
7
                           check_and_get_column_const_string_or_fixedstring(column)) {
200
0
            auto value = col_from_const->get_value<TYPE_STRING>();
201
0
            for (size_t i = 0; i < input_rows_count; ++i) {
202
                if constexpr (ReturnType == TYPE_INT) {
203
                    col_to_data[i] =
204
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
205
0
                } else {
206
0
                    col_to_data[i] =
207
0
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
208
0
                }
209
0
            }
210
7
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
211
33
            for (size_t i = 0; i < input_rows_count; ++i) {
212
26
                auto data_ref = vb_col->get_data_at(i);
213
                if constexpr (ReturnType == TYPE_INT) {
214
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
215
                                                                col_to_data[i]);
216
26
                } else {
217
26
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
218
26
                                                                col_to_data[i]);
219
26
                }
220
26
            }
221
7
        } else {
222
0
            DCHECK(false);
223
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
224
0
                                        column->get_name(), name);
225
0
        }
226
1.06k
        return Status::OK();
227
1.06k
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
175
13
                          IColumn& col_to) {
176
13
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
177
        if constexpr (first) {
178
            to_column.insert_many_defaults(input_rows_count);
179
        }
180
13
        auto& col_to_data = to_column.get_data();
181
13
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
182
8
            const typename ColumnString::Chars& data = col_from->get_chars();
183
8
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
184
8
            size_t size = offsets.size();
185
8
            ColumnString::Offset current_offset = 0;
186
27
            for (size_t i = 0; i < size; ++i) {
187
                if constexpr (ReturnType == TYPE_INT) {
188
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
189
                            reinterpret_cast<const char*>(&data[current_offset]),
190
                            offsets[i] - current_offset, col_to_data[i]);
191
19
                } else {
192
19
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
193
19
                            reinterpret_cast<const char*>(&data[current_offset]),
194
19
                            offsets[i] - current_offset, col_to_data[i]);
195
19
                }
196
19
                current_offset = offsets[i];
197
19
            }
198
8
        } else if (const ColumnConst* col_from_const =
199
5
                           check_and_get_column_const_string_or_fixedstring(column)) {
200
0
            auto value = col_from_const->get_value<TYPE_STRING>();
201
0
            for (size_t i = 0; i < input_rows_count; ++i) {
202
                if constexpr (ReturnType == TYPE_INT) {
203
                    col_to_data[i] =
204
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
205
0
                } else {
206
0
                    col_to_data[i] =
207
0
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
208
0
                }
209
0
            }
210
5
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
211
21
            for (size_t i = 0; i < input_rows_count; ++i) {
212
16
                auto data_ref = vb_col->get_data_at(i);
213
                if constexpr (ReturnType == TYPE_INT) {
214
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
215
                                                                col_to_data[i]);
216
16
                } else {
217
16
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
218
16
                                                                col_to_data[i]);
219
16
                }
220
16
            }
221
5
        } else {
222
0
            DCHECK(false);
223
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
224
0
                                        column->get_name(), name);
225
0
        }
226
13
        return Status::OK();
227
13
    }
228
};
229
230
using FunctionXxHash_32 = FunctionVariadicArgumentsBase<DataTypeInt32, XxHashImpl<TYPE_INT>>;
231
using FunctionXxHash_64 = FunctionVariadicArgumentsBase<DataTypeInt64, XxHashImpl<TYPE_BIGINT>>;
232
233
8
void register_function_hash(SimpleFunctionFactory& factory) {
234
8
    factory.register_function<FunctionMurmurHash3_32>();
235
8
    factory.register_function<FunctionMurmurHash3_64>();
236
8
    factory.register_function<FunctionMurmurHash3_64_V2>();
237
8
    factory.register_function<FunctionMurmurHash3U64V2>();
238
8
    factory.register_function<FunctionXxHash_32>();
239
8
    factory.register_function<FunctionXxHash_64>();
240
8
    factory.register_alias("xxhash_64", "xxhash3_64");
241
8
}
242
} // namespace doris