Coverage Report

Created: 2026-03-14 13:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_hash.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionHash.cpp
19
// and modified by Doris
20
21
#include "exprs/function/function_hash.h"
22
23
#include "common/status.h"
24
#include "core/assert_cast.h"
25
#include "core/column/column.h"
26
#include "core/column/column_const.h"
27
#include "core/column/column_string.h"
28
#include "core/column/column_varbinary.h"
29
#include "core/column/column_vector.h"
30
#include "core/data_type/data_type.h"
31
#include "core/data_type/data_type_number.h"
32
#include "core/field.h"
33
#include "exec/common/template_helpers.hpp"
34
#include "exprs/function/function_helpers.h"
35
#include "exprs/function/function_variadic_arguments.h"
36
#include "exprs/function/simple_function_factory.h"
37
#include "util/hash/murmur_hash3.h"
38
#include "util/hash_util.hpp"
39
40
namespace doris {
41
#include "common/compile_check_begin.h"
42
constexpr uint64_t emtpy_value = 0xe28dbde7fe22e41c;
43
44
template <PrimitiveType ReturnType, bool is_mmh64_v2 = false>
45
struct MurmurHash3Impl {
46
    static constexpr auto get_name() {
47
        if constexpr (ReturnType == TYPE_INT) {
48
            return "murmur_hash3_32";
49
        } else if constexpr (is_mmh64_v2) {
50
            return "murmur_hash3_64_v2";
51
        } else {
52
            return "murmur_hash3_64";
53
        }
54
    }
55
    static constexpr auto name = get_name();
56
57
0
    static Status empty_apply(IColumn& icolumn, size_t input_rows_count) {
58
0
        ColumnVector<ReturnType>& vec_to = assert_cast<ColumnVector<ReturnType>&>(icolumn);
59
0
        vec_to.get_data().assign(
60
0
                input_rows_count,
61
0
                static_cast<typename PrimitiveTypeTraits<ReturnType>::CppType>(emtpy_value));
62
0
        return Status::OK();
63
0
    }
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE11empty_applyERNS_7IColumnEm
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE11empty_applyERNS_7IColumnEm
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE11empty_applyERNS_7IColumnEm
64
65
    static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count,
66
50
                              IColumn& icolumn) {
67
50
        return execute<true>(type, column, input_rows_count, icolumn);
68
50
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
66
23
                              IColumn& icolumn) {
67
23
        return execute<true>(type, column, input_rows_count, icolumn);
68
23
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
66
23
                              IColumn& icolumn) {
67
23
        return execute<true>(type, column, input_rows_count, icolumn);
68
23
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
66
4
                              IColumn& icolumn) {
67
4
        return execute<true>(type, column, input_rows_count, icolumn);
68
4
    }
69
70
    static Status combine_apply(const IDataType* type, const IColumn* column,
71
10
                                size_t input_rows_count, IColumn& icolumn) {
72
10
        return execute<false>(type, column, input_rows_count, icolumn);
73
10
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
71
5
                                size_t input_rows_count, IColumn& icolumn) {
72
5
        return execute<false>(type, column, input_rows_count, icolumn);
73
5
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
71
5
                                size_t input_rows_count, IColumn& icolumn) {
72
5
        return execute<false>(type, column, input_rows_count, icolumn);
73
5
    }
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
74
75
    template <bool first>
76
    static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count,
77
60
                          IColumn& col_to) {
78
60
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
79
60
        if constexpr (first) {
80
50
            if constexpr (ReturnType == TYPE_INT) {
81
23
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
82
23
                                           input_rows_count);
83
27
            } else {
84
27
                to_column.insert_many_defaults(input_rows_count);
85
27
            }
86
50
        }
87
60
        auto& col_to_data = to_column.get_data();
88
60
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
89
60
            const typename ColumnString::Chars& data = col_from->get_chars();
90
60
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
91
60
            size_t size = offsets.size();
92
60
            ColumnString::Offset current_offset = 0;
93
202
            for (size_t i = 0; i < size; ++i) {
94
142
                if constexpr (ReturnType == TYPE_INT) {
95
68
                    col_to_data[i] = HashUtil::murmur_hash3_32(
96
68
                            reinterpret_cast<const char*>(&data[current_offset]),
97
68
                            offsets[i] - current_offset, col_to_data[i]);
98
74
                } else {
99
74
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
100
74
                            reinterpret_cast<const char*>(&data[current_offset]),
101
74
                            offsets[i] - current_offset, col_to_data[i]);
102
74
                }
103
142
                current_offset = offsets[i];
104
142
            }
105
60
        } else if (const ColumnConst* col_from_const =
106
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
107
0
            auto value = col_from_const->get_value<TYPE_STRING>();
108
0
            for (size_t i = 0; i < input_rows_count; ++i) {
109
0
                if constexpr (ReturnType == TYPE_INT) {
110
0
                    col_to_data[i] =
111
0
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
112
0
                } else {
113
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
114
0
                            value.data(), value.size(), col_to_data[i]);
115
0
                }
116
0
            }
117
0
        } else {
118
0
            DCHECK(false);
119
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
120
0
                                        column->get_name(), name);
121
0
        }
122
60
        return Status::OK();
123
60
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
77
23
                          IColumn& col_to) {
78
23
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
79
23
        if constexpr (first) {
80
23
            if constexpr (ReturnType == TYPE_INT) {
81
23
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
82
23
                                           input_rows_count);
83
            } else {
84
                to_column.insert_many_defaults(input_rows_count);
85
            }
86
23
        }
87
23
        auto& col_to_data = to_column.get_data();
88
23
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
89
23
            const typename ColumnString::Chars& data = col_from->get_chars();
90
23
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
91
23
            size_t size = offsets.size();
92
23
            ColumnString::Offset current_offset = 0;
93
83
            for (size_t i = 0; i < size; ++i) {
94
60
                if constexpr (ReturnType == TYPE_INT) {
95
60
                    col_to_data[i] = HashUtil::murmur_hash3_32(
96
60
                            reinterpret_cast<const char*>(&data[current_offset]),
97
60
                            offsets[i] - current_offset, col_to_data[i]);
98
                } else {
99
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
100
                            reinterpret_cast<const char*>(&data[current_offset]),
101
                            offsets[i] - current_offset, col_to_data[i]);
102
                }
103
60
                current_offset = offsets[i];
104
60
            }
105
23
        } else if (const ColumnConst* col_from_const =
106
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
107
0
            auto value = col_from_const->get_value<TYPE_STRING>();
108
0
            for (size_t i = 0; i < input_rows_count; ++i) {
109
0
                if constexpr (ReturnType == TYPE_INT) {
110
0
                    col_to_data[i] =
111
0
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
112
                } else {
113
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
114
                            value.data(), value.size(), col_to_data[i]);
115
                }
116
0
            }
117
0
        } else {
118
0
            DCHECK(false);
119
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
120
0
                                        column->get_name(), name);
121
0
        }
122
23
        return Status::OK();
123
23
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE5ELb0EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
77
5
                          IColumn& col_to) {
78
5
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
79
        if constexpr (first) {
80
            if constexpr (ReturnType == TYPE_INT) {
81
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
82
                                           input_rows_count);
83
            } else {
84
                to_column.insert_many_defaults(input_rows_count);
85
            }
86
        }
87
5
        auto& col_to_data = to_column.get_data();
88
5
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
89
5
            const typename ColumnString::Chars& data = col_from->get_chars();
90
5
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
91
5
            size_t size = offsets.size();
92
5
            ColumnString::Offset current_offset = 0;
93
13
            for (size_t i = 0; i < size; ++i) {
94
8
                if constexpr (ReturnType == TYPE_INT) {
95
8
                    col_to_data[i] = HashUtil::murmur_hash3_32(
96
8
                            reinterpret_cast<const char*>(&data[current_offset]),
97
8
                            offsets[i] - current_offset, col_to_data[i]);
98
                } else {
99
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
100
                            reinterpret_cast<const char*>(&data[current_offset]),
101
                            offsets[i] - current_offset, col_to_data[i]);
102
                }
103
8
                current_offset = offsets[i];
104
8
            }
105
5
        } else if (const ColumnConst* col_from_const =
106
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
107
0
            auto value = col_from_const->get_value<TYPE_STRING>();
108
0
            for (size_t i = 0; i < input_rows_count; ++i) {
109
0
                if constexpr (ReturnType == TYPE_INT) {
110
0
                    col_to_data[i] =
111
0
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
112
                } else {
113
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
114
                            value.data(), value.size(), col_to_data[i]);
115
                }
116
0
            }
117
0
        } else {
118
0
            DCHECK(false);
119
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
120
0
                                        column->get_name(), name);
121
0
        }
122
5
        return Status::OK();
123
5
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
77
23
                          IColumn& col_to) {
78
23
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
79
23
        if constexpr (first) {
80
            if constexpr (ReturnType == TYPE_INT) {
81
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
82
                                           input_rows_count);
83
23
            } else {
84
23
                to_column.insert_many_defaults(input_rows_count);
85
23
            }
86
23
        }
87
23
        auto& col_to_data = to_column.get_data();
88
23
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
89
23
            const typename ColumnString::Chars& data = col_from->get_chars();
90
23
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
91
23
            size_t size = offsets.size();
92
23
            ColumnString::Offset current_offset = 0;
93
83
            for (size_t i = 0; i < size; ++i) {
94
                if constexpr (ReturnType == TYPE_INT) {
95
                    col_to_data[i] = HashUtil::murmur_hash3_32(
96
                            reinterpret_cast<const char*>(&data[current_offset]),
97
                            offsets[i] - current_offset, col_to_data[i]);
98
60
                } else {
99
60
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
100
60
                            reinterpret_cast<const char*>(&data[current_offset]),
101
60
                            offsets[i] - current_offset, col_to_data[i]);
102
60
                }
103
60
                current_offset = offsets[i];
104
60
            }
105
23
        } else if (const ColumnConst* col_from_const =
106
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
107
0
            auto value = col_from_const->get_value<TYPE_STRING>();
108
0
            for (size_t i = 0; i < input_rows_count; ++i) {
109
                if constexpr (ReturnType == TYPE_INT) {
110
                    col_to_data[i] =
111
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
112
0
                } else {
113
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
114
0
                            value.data(), value.size(), col_to_data[i]);
115
0
                }
116
0
            }
117
0
        } else {
118
0
            DCHECK(false);
119
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
120
0
                                        column->get_name(), name);
121
0
        }
122
23
        return Status::OK();
123
23
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb0EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
77
5
                          IColumn& col_to) {
78
5
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
79
        if constexpr (first) {
80
            if constexpr (ReturnType == TYPE_INT) {
81
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
82
                                           input_rows_count);
83
            } else {
84
                to_column.insert_many_defaults(input_rows_count);
85
            }
86
        }
87
5
        auto& col_to_data = to_column.get_data();
88
5
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
89
5
            const typename ColumnString::Chars& data = col_from->get_chars();
90
5
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
91
5
            size_t size = offsets.size();
92
5
            ColumnString::Offset current_offset = 0;
93
13
            for (size_t i = 0; i < size; ++i) {
94
                if constexpr (ReturnType == TYPE_INT) {
95
                    col_to_data[i] = HashUtil::murmur_hash3_32(
96
                            reinterpret_cast<const char*>(&data[current_offset]),
97
                            offsets[i] - current_offset, col_to_data[i]);
98
8
                } else {
99
8
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
100
8
                            reinterpret_cast<const char*>(&data[current_offset]),
101
8
                            offsets[i] - current_offset, col_to_data[i]);
102
8
                }
103
8
                current_offset = offsets[i];
104
8
            }
105
5
        } else if (const ColumnConst* col_from_const =
106
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
107
0
            auto value = col_from_const->get_value<TYPE_STRING>();
108
0
            for (size_t i = 0; i < input_rows_count; ++i) {
109
                if constexpr (ReturnType == TYPE_INT) {
110
                    col_to_data[i] =
111
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
112
0
                } else {
113
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
114
0
                            value.data(), value.size(), col_to_data[i]);
115
0
                }
116
0
            }
117
0
        } else {
118
0
            DCHECK(false);
119
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
120
0
                                        column->get_name(), name);
121
0
        }
122
5
        return Status::OK();
123
5
    }
_ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
77
4
                          IColumn& col_to) {
78
4
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
79
4
        if constexpr (first) {
80
            if constexpr (ReturnType == TYPE_INT) {
81
                to_column.insert_many_vals(static_cast<Int32>(HashUtil::MURMUR3_32_SEED),
82
                                           input_rows_count);
83
4
            } else {
84
4
                to_column.insert_many_defaults(input_rows_count);
85
4
            }
86
4
        }
87
4
        auto& col_to_data = to_column.get_data();
88
4
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
89
4
            const typename ColumnString::Chars& data = col_from->get_chars();
90
4
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
91
4
            size_t size = offsets.size();
92
4
            ColumnString::Offset current_offset = 0;
93
10
            for (size_t i = 0; i < size; ++i) {
94
                if constexpr (ReturnType == TYPE_INT) {
95
                    col_to_data[i] = HashUtil::murmur_hash3_32(
96
                            reinterpret_cast<const char*>(&data[current_offset]),
97
                            offsets[i] - current_offset, col_to_data[i]);
98
6
                } else {
99
6
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
100
6
                            reinterpret_cast<const char*>(&data[current_offset]),
101
6
                            offsets[i] - current_offset, col_to_data[i]);
102
6
                }
103
6
                current_offset = offsets[i];
104
6
            }
105
4
        } else if (const ColumnConst* col_from_const =
106
0
                           check_and_get_column_const_string_or_fixedstring(column)) {
107
0
            auto value = col_from_const->get_value<TYPE_STRING>();
108
0
            for (size_t i = 0; i < input_rows_count; ++i) {
109
                if constexpr (ReturnType == TYPE_INT) {
110
                    col_to_data[i] =
111
                            HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]);
112
0
                } else {
113
0
                    col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
114
0
                            value.data(), value.size(), col_to_data[i]);
115
0
                }
116
0
            }
117
0
        } else {
118
0
            DCHECK(false);
119
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
120
0
                                        column->get_name(), name);
121
0
        }
122
4
        return Status::OK();
123
4
    }
Unexecuted instantiation: _ZN5doris15MurmurHash3ImplILNS_13PrimitiveTypeE6ELb1EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
124
};
125
126
using FunctionMurmurHash3_32 =
127
        FunctionVariadicArgumentsBase<DataTypeInt32, MurmurHash3Impl<TYPE_INT>>;
128
using FunctionMurmurHash3_64 =
129
        FunctionVariadicArgumentsBase<DataTypeInt64, MurmurHash3Impl<TYPE_BIGINT>>;
130
using FunctionMurmurHash3_64_V2 =
131
        FunctionVariadicArgumentsBase<DataTypeInt64, MurmurHash3Impl<TYPE_BIGINT, true>>;
132
133
#ifdef BE_TEST
134
const char* murmur_hash3_get_name_type_int_for_test() {
135
    return MurmurHash3Impl<TYPE_INT>::get_name();
136
}
137
138
const char* murmur_hash3_get_name_type_bigint_for_test() {
139
    return MurmurHash3Impl<TYPE_BIGINT>::get_name();
140
}
141
142
const char* murmur_hash3_get_name_type_bigint_v2_for_test() {
143
    return MurmurHash3Impl<TYPE_BIGINT, true>::get_name();
144
}
145
#endif
146
147
template <PrimitiveType ReturnType>
148
struct XxHashImpl {
149
    static constexpr auto name = ReturnType == TYPE_INT ? "xxhash_32" : "xxhash_64";
150
151
0
    static Status empty_apply(IColumn& icolumn, size_t input_rows_count) {
152
0
        ColumnVector<ReturnType>& vec_to = assert_cast<ColumnVector<ReturnType>&>(icolumn);
153
0
        vec_to.get_data().assign(
154
0
                input_rows_count,
155
0
                static_cast<typename PrimitiveTypeTraits<ReturnType>::CppType>(emtpy_value));
156
0
        return Status::OK();
157
0
    }
Unexecuted instantiation: _ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE11empty_applyERNS_7IColumnEm
Unexecuted instantiation: _ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE11empty_applyERNS_7IColumnEm
158
159
    static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count,
160
914
                              IColumn& icolumn) {
161
914
        return execute<true>(type, column, input_rows_count, icolumn);
162
914
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
160
79
                              IColumn& icolumn) {
161
79
        return execute<true>(type, column, input_rows_count, icolumn);
162
79
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE11first_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
160
835
                              IColumn& icolumn) {
161
835
        return execute<true>(type, column, input_rows_count, icolumn);
162
835
    }
163
164
    static Status combine_apply(const IDataType* type, const IColumn* column,
165
18
                                size_t input_rows_count, IColumn& icolumn) {
166
18
        return execute<false>(type, column, input_rows_count, icolumn);
167
18
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
165
8
                                size_t input_rows_count, IColumn& icolumn) {
166
8
        return execute<false>(type, column, input_rows_count, icolumn);
167
8
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE13combine_applyEPKNS_9IDataTypeEPKNS_7IColumnEmRS6_
Line
Count
Source
165
10
                                size_t input_rows_count, IColumn& icolumn) {
166
10
        return execute<false>(type, column, input_rows_count, icolumn);
167
10
    }
168
169
    template <bool first>
170
    static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count,
171
932
                          IColumn& col_to) {
172
932
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
173
932
        if constexpr (first) {
174
914
            to_column.insert_many_defaults(input_rows_count);
175
914
        }
176
932
        auto& col_to_data = to_column.get_data();
177
932
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
178
920
            const typename ColumnString::Chars& data = col_from->get_chars();
179
920
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
180
920
            size_t size = offsets.size();
181
920
            ColumnString::Offset current_offset = 0;
182
39.0k
            for (size_t i = 0; i < size; ++i) {
183
38.1k
                if constexpr (ReturnType == TYPE_INT) {
184
376
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
185
376
                            reinterpret_cast<const char*>(&data[current_offset]),
186
376
                            offsets[i] - current_offset, col_to_data[i]);
187
37.7k
                } else {
188
37.7k
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
189
37.7k
                            reinterpret_cast<const char*>(&data[current_offset]),
190
37.7k
                            offsets[i] - current_offset, col_to_data[i]);
191
37.7k
                }
192
38.1k
                current_offset = offsets[i];
193
38.1k
            }
194
920
        } else if (const ColumnConst* col_from_const =
195
12
                           check_and_get_column_const_string_or_fixedstring(column)) {
196
0
            auto value = col_from_const->get_value<TYPE_STRING>();
197
0
            for (size_t i = 0; i < input_rows_count; ++i) {
198
0
                if constexpr (ReturnType == TYPE_INT) {
199
0
                    col_to_data[i] =
200
0
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
201
0
                } else {
202
0
                    col_to_data[i] =
203
0
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
204
0
                }
205
0
            }
206
12
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
207
36
            for (size_t i = 0; i < input_rows_count; ++i) {
208
24
                auto data_ref = vb_col->get_data_at(i);
209
24
                if constexpr (ReturnType == TYPE_INT) {
210
12
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
211
12
                                                                col_to_data[i]);
212
12
                } else {
213
12
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
214
12
                                                                col_to_data[i]);
215
12
                }
216
24
            }
217
12
        } else {
218
0
            DCHECK(false);
219
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
220
0
                                        column->get_name(), name);
221
0
        }
222
932
        return Status::OK();
223
932
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
171
79
                          IColumn& col_to) {
172
79
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
173
79
        if constexpr (first) {
174
79
            to_column.insert_many_defaults(input_rows_count);
175
79
        }
176
79
        auto& col_to_data = to_column.get_data();
177
79
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
178
76
            const typename ColumnString::Chars& data = col_from->get_chars();
179
76
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
180
76
            size_t size = offsets.size();
181
76
            ColumnString::Offset current_offset = 0;
182
444
            for (size_t i = 0; i < size; ++i) {
183
368
                if constexpr (ReturnType == TYPE_INT) {
184
368
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
185
368
                            reinterpret_cast<const char*>(&data[current_offset]),
186
368
                            offsets[i] - current_offset, col_to_data[i]);
187
                } else {
188
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
189
                            reinterpret_cast<const char*>(&data[current_offset]),
190
                            offsets[i] - current_offset, col_to_data[i]);
191
                }
192
368
                current_offset = offsets[i];
193
368
            }
194
76
        } else if (const ColumnConst* col_from_const =
195
3
                           check_and_get_column_const_string_or_fixedstring(column)) {
196
0
            auto value = col_from_const->get_value<TYPE_STRING>();
197
0
            for (size_t i = 0; i < input_rows_count; ++i) {
198
0
                if constexpr (ReturnType == TYPE_INT) {
199
0
                    col_to_data[i] =
200
0
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
201
                } else {
202
                    col_to_data[i] =
203
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
204
                }
205
0
            }
206
3
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
207
9
            for (size_t i = 0; i < input_rows_count; ++i) {
208
6
                auto data_ref = vb_col->get_data_at(i);
209
6
                if constexpr (ReturnType == TYPE_INT) {
210
6
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
211
6
                                                                col_to_data[i]);
212
                } else {
213
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
214
                                                                col_to_data[i]);
215
                }
216
6
            }
217
3
        } else {
218
0
            DCHECK(false);
219
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
220
0
                                        column->get_name(), name);
221
0
        }
222
79
        return Status::OK();
223
79
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE5EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
171
8
                          IColumn& col_to) {
172
8
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
173
        if constexpr (first) {
174
            to_column.insert_many_defaults(input_rows_count);
175
        }
176
8
        auto& col_to_data = to_column.get_data();
177
8
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
178
5
            const typename ColumnString::Chars& data = col_from->get_chars();
179
5
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
180
5
            size_t size = offsets.size();
181
5
            ColumnString::Offset current_offset = 0;
182
13
            for (size_t i = 0; i < size; ++i) {
183
8
                if constexpr (ReturnType == TYPE_INT) {
184
8
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
185
8
                            reinterpret_cast<const char*>(&data[current_offset]),
186
8
                            offsets[i] - current_offset, col_to_data[i]);
187
                } else {
188
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
189
                            reinterpret_cast<const char*>(&data[current_offset]),
190
                            offsets[i] - current_offset, col_to_data[i]);
191
                }
192
8
                current_offset = offsets[i];
193
8
            }
194
5
        } else if (const ColumnConst* col_from_const =
195
3
                           check_and_get_column_const_string_or_fixedstring(column)) {
196
0
            auto value = col_from_const->get_value<TYPE_STRING>();
197
0
            for (size_t i = 0; i < input_rows_count; ++i) {
198
0
                if constexpr (ReturnType == TYPE_INT) {
199
0
                    col_to_data[i] =
200
0
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
201
                } else {
202
                    col_to_data[i] =
203
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
204
                }
205
0
            }
206
3
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
207
9
            for (size_t i = 0; i < input_rows_count; ++i) {
208
6
                auto data_ref = vb_col->get_data_at(i);
209
6
                if constexpr (ReturnType == TYPE_INT) {
210
6
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
211
6
                                                                col_to_data[i]);
212
                } else {
213
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
214
                                                                col_to_data[i]);
215
                }
216
6
            }
217
3
        } else {
218
0
            DCHECK(false);
219
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
220
0
                                        column->get_name(), name);
221
0
        }
222
8
        return Status::OK();
223
8
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE7executeILb1EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
171
835
                          IColumn& col_to) {
172
835
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
173
835
        if constexpr (first) {
174
835
            to_column.insert_many_defaults(input_rows_count);
175
835
        }
176
835
        auto& col_to_data = to_column.get_data();
177
835
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
178
832
            const typename ColumnString::Chars& data = col_from->get_chars();
179
832
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
180
832
            size_t size = offsets.size();
181
832
            ColumnString::Offset current_offset = 0;
182
38.5k
            for (size_t i = 0; i < size; ++i) {
183
                if constexpr (ReturnType == TYPE_INT) {
184
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
185
                            reinterpret_cast<const char*>(&data[current_offset]),
186
                            offsets[i] - current_offset, col_to_data[i]);
187
37.7k
                } else {
188
37.7k
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
189
37.7k
                            reinterpret_cast<const char*>(&data[current_offset]),
190
37.7k
                            offsets[i] - current_offset, col_to_data[i]);
191
37.7k
                }
192
37.7k
                current_offset = offsets[i];
193
37.7k
            }
194
832
        } else if (const ColumnConst* col_from_const =
195
3
                           check_and_get_column_const_string_or_fixedstring(column)) {
196
0
            auto value = col_from_const->get_value<TYPE_STRING>();
197
0
            for (size_t i = 0; i < input_rows_count; ++i) {
198
                if constexpr (ReturnType == TYPE_INT) {
199
                    col_to_data[i] =
200
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
201
0
                } else {
202
0
                    col_to_data[i] =
203
0
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
204
0
                }
205
0
            }
206
3
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
207
9
            for (size_t i = 0; i < input_rows_count; ++i) {
208
6
                auto data_ref = vb_col->get_data_at(i);
209
                if constexpr (ReturnType == TYPE_INT) {
210
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
211
                                                                col_to_data[i]);
212
6
                } else {
213
6
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
214
6
                                                                col_to_data[i]);
215
6
                }
216
6
            }
217
3
        } else {
218
0
            DCHECK(false);
219
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
220
0
                                        column->get_name(), name);
221
0
        }
222
835
        return Status::OK();
223
835
    }
_ZN5doris10XxHashImplILNS_13PrimitiveTypeE6EE7executeILb0EEENS_6StatusEPKNS_9IDataTypeEPKNS_7IColumnEmRS8_
Line
Count
Source
171
10
                          IColumn& col_to) {
172
10
        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
173
        if constexpr (first) {
174
            to_column.insert_many_defaults(input_rows_count);
175
        }
176
10
        auto& col_to_data = to_column.get_data();
177
10
        if (const auto* col_from = check_and_get_column<ColumnString>(column)) {
178
7
            const typename ColumnString::Chars& data = col_from->get_chars();
179
7
            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
180
7
            size_t size = offsets.size();
181
7
            ColumnString::Offset current_offset = 0;
182
17
            for (size_t i = 0; i < size; ++i) {
183
                if constexpr (ReturnType == TYPE_INT) {
184
                    col_to_data[i] = HashUtil::xxHash32WithSeed(
185
                            reinterpret_cast<const char*>(&data[current_offset]),
186
                            offsets[i] - current_offset, col_to_data[i]);
187
10
                } else {
188
10
                    col_to_data[i] = HashUtil::xxHash64WithSeed(
189
10
                            reinterpret_cast<const char*>(&data[current_offset]),
190
10
                            offsets[i] - current_offset, col_to_data[i]);
191
10
                }
192
10
                current_offset = offsets[i];
193
10
            }
194
7
        } else if (const ColumnConst* col_from_const =
195
3
                           check_and_get_column_const_string_or_fixedstring(column)) {
196
0
            auto value = col_from_const->get_value<TYPE_STRING>();
197
0
            for (size_t i = 0; i < input_rows_count; ++i) {
198
                if constexpr (ReturnType == TYPE_INT) {
199
                    col_to_data[i] =
200
                            HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]);
201
0
                } else {
202
0
                    col_to_data[i] =
203
0
                            HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]);
204
0
                }
205
0
            }
206
3
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(column)) {
207
9
            for (size_t i = 0; i < input_rows_count; ++i) {
208
6
                auto data_ref = vb_col->get_data_at(i);
209
                if constexpr (ReturnType == TYPE_INT) {
210
                    col_to_data[i] = HashUtil::xxHash32WithSeed(data_ref.data, data_ref.size,
211
                                                                col_to_data[i]);
212
6
                } else {
213
6
                    col_to_data[i] = HashUtil::xxHash64WithSeed(data_ref.data, data_ref.size,
214
6
                                                                col_to_data[i]);
215
6
                }
216
6
            }
217
3
        } else {
218
0
            DCHECK(false);
219
0
            return Status::NotSupported("Illegal column {} of argument of function {}",
220
0
                                        column->get_name(), name);
221
0
        }
222
10
        return Status::OK();
223
10
    }
224
};
225
226
using FunctionXxHash_32 = FunctionVariadicArgumentsBase<DataTypeInt32, XxHashImpl<TYPE_INT>>;
227
using FunctionXxHash_64 = FunctionVariadicArgumentsBase<DataTypeInt64, XxHashImpl<TYPE_BIGINT>>;
228
229
8
void register_function_hash(SimpleFunctionFactory& factory) {
230
8
    factory.register_function<FunctionMurmurHash3_32>();
231
8
    factory.register_function<FunctionMurmurHash3_64>();
232
8
    factory.register_function<FunctionMurmurHash3_64_V2>();
233
8
    factory.register_function<FunctionXxHash_32>();
234
8
    factory.register_function<FunctionXxHash_64>();
235
8
    factory.register_alias("xxhash_64", "xxhash3_64");
236
8
}
237
} // namespace doris