Coverage Report

Created: 2026-04-16 20:39

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/hash_util.hpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/hash-util.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <crc32c/crc32c.h>
24
#include <gen_cpp/Types_types.h>
25
#include <xxh3.h>
26
#include <xxhash.h>
27
#include <zlib.h>
28
29
#include <bit>
30
#include <functional>
31
32
#include "common/compiler_util.h" // IWYU pragma: keep
33
#include "exec/common/endian.h"
34
#include "util/cpu_info.h"
35
#include "util/hash/city.h"
36
#include "util/hash/murmur_hash3.h"
37
#include "util/sse_util.hpp"
38
39
namespace doris {
40
namespace detail {
41
// Slicing-by-4 table: t[0] is the standard byte-at-a-time table,
42
// t[1..3] are extended tables for parallel 4-byte processing.
43
struct CRC32SliceBy4Table {
44
    uint32_t t[4][256] {};
45
0
    constexpr CRC32SliceBy4Table() {
46
0
        // t[0]: standard CRC32 lookup table
47
0
        for (uint32_t i = 0; i < 256; i++) {
48
0
            uint32_t c = i;
49
0
            for (int j = 0; j < 8; j++) {
50
0
                c = (c & 1) ? ((c >> 1) ^ 0xEDB88320U) : (c >> 1);
51
0
            }
52
0
            t[0][i] = c;
53
0
        }
54
0
        // t[1..3]: each entry is one additional CRC byte-step applied to t[k-1]
55
0
        for (uint32_t i = 0; i < 256; i++) {
56
0
            uint32_t c = t[0][i];
57
0
            for (int k = 1; k < 4; k++) {
58
0
                c = t[0][c & 0xFF] ^ (c >> 8);
59
0
                t[k][i] = c;
60
0
            }
61
0
        }
62
0
    }
63
};
64
} // namespace detail
65
66
// Utility class to compute hash values.
67
class HashUtil {
68
private:
69
    static inline constexpr detail::CRC32SliceBy4Table CRC32_TABLE {};
70
71
public:
72
641k
    static uint32_t zlib_crc_hash(const void* data, uint32_t bytes, uint32_t hash) {
73
641k
        return (uint32_t)crc32(hash, (const unsigned char*)data, bytes);
74
641k
    }
75
76
    // Inline CRC32 (zlib-compatible, standard CRC32 polynomial) for fixed-size types.
77
    // Uses Slicing-by-4 technique for 4/8-byte types: processes 4 bytes at a time using
78
    // 4 precomputed lookup tables, reducing serial table lookups from 4 to 1 per 4-byte chunk.
79
    // Polynomial: 0xEDB88320 (reflected form of 0x04C11DB7).
80
    // Endian note: CRC32 reflected algorithm processes bytes in address order (byte[0] first).
81
    // Slicing-by-4 requires byte[0] at LSB of the loaded uint32_t, which is little-endian layout.
82
    // LittleEndian::Load32 provides this on ALL platforms: noop on LE, bswap on BE.
83
    template <typename T>
84
4.23M
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
85
4.23M
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
86
        // zlib convention: pre/post XOR with 0xFFFFFFFF
87
4.23M
        uint32_t crc = hash ^ 0xFFFFFFFFU;
88
89
4.23M
        if constexpr (sizeof(T) == 1) {
90
            // 1 byte: single table lookup
91
202
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
92
207
        } else if constexpr (sizeof(T) == 2) {
93
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
94
207
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
95
207
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
96
4.22M
        } else if constexpr (sizeof(T) == 4) {
97
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
98
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
99
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
100
4.22M
            uint32_t word = LittleEndian::Load32(p) ^ crc;
101
4.22M
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
102
4.22M
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
103
4.22M
        } else if constexpr (sizeof(T) == 8) {
104
            // 8 bytes: two Slicing-by-4 steps
105
1.34k
            uint32_t word = LittleEndian::Load32(p) ^ crc;
106
1.34k
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
107
1.34k
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
108
109
1.34k
            word = LittleEndian::Load32(p + 4) ^ crc;
110
1.34k
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
111
1.34k
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
112
2.88k
        } else {
113
            // Fallback to zlib for larger/unusual types
114
2.88k
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
115
2.88k
        }
116
0
        return crc ^ 0xFFFFFFFFU;
117
4.23M
    }
_ZN5doris8HashUtil16zlib_crc32_fixedIiEEjRKT_j
Line
Count
Source
84
4.22M
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
85
4.22M
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
86
        // zlib convention: pre/post XOR with 0xFFFFFFFF
87
4.22M
        uint32_t crc = hash ^ 0xFFFFFFFFU;
88
89
        if constexpr (sizeof(T) == 1) {
90
            // 1 byte: single table lookup
91
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
92
        } else if constexpr (sizeof(T) == 2) {
93
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
94
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
95
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
96
4.22M
        } else if constexpr (sizeof(T) == 4) {
97
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
98
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
99
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
100
4.22M
            uint32_t word = LittleEndian::Load32(p) ^ crc;
101
4.22M
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
102
4.22M
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
103
        } else if constexpr (sizeof(T) == 8) {
104
            // 8 bytes: two Slicing-by-4 steps
105
            uint32_t word = LittleEndian::Load32(p) ^ crc;
106
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
107
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
108
109
            word = LittleEndian::Load32(p + 4) ^ crc;
110
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
111
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
112
        } else {
113
            // Fallback to zlib for larger/unusual types
114
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
115
        }
116
4.22M
        return crc ^ 0xFFFFFFFFU;
117
4.22M
    }
_ZN5doris8HashUtil16zlib_crc32_fixedIhEEjRKT_j
Line
Count
Source
84
41
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
85
41
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
86
        // zlib convention: pre/post XOR with 0xFFFFFFFF
87
41
        uint32_t crc = hash ^ 0xFFFFFFFFU;
88
89
41
        if constexpr (sizeof(T) == 1) {
90
            // 1 byte: single table lookup
91
41
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
92
        } else if constexpr (sizeof(T) == 2) {
93
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
94
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
95
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
96
        } else if constexpr (sizeof(T) == 4) {
97
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
98
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
99
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
100
            uint32_t word = LittleEndian::Load32(p) ^ crc;
101
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
102
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
103
        } else if constexpr (sizeof(T) == 8) {
104
            // 8 bytes: two Slicing-by-4 steps
105
            uint32_t word = LittleEndian::Load32(p) ^ crc;
106
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
107
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
108
109
            word = LittleEndian::Load32(p + 4) ^ crc;
110
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
111
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
112
        } else {
113
            // Fallback to zlib for larger/unusual types
114
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
115
        }
116
41
        return crc ^ 0xFFFFFFFFU;
117
41
    }
_ZN5doris8HashUtil16zlib_crc32_fixedIsEEjRKT_j
Line
Count
Source
84
186
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
85
186
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
86
        // zlib convention: pre/post XOR with 0xFFFFFFFF
87
186
        uint32_t crc = hash ^ 0xFFFFFFFFU;
88
89
        if constexpr (sizeof(T) == 1) {
90
            // 1 byte: single table lookup
91
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
92
186
        } else if constexpr (sizeof(T) == 2) {
93
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
94
186
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
95
186
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
96
        } else if constexpr (sizeof(T) == 4) {
97
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
98
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
99
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
100
            uint32_t word = LittleEndian::Load32(p) ^ crc;
101
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
102
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
103
        } else if constexpr (sizeof(T) == 8) {
104
            // 8 bytes: two Slicing-by-4 steps
105
            uint32_t word = LittleEndian::Load32(p) ^ crc;
106
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
107
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
108
109
            word = LittleEndian::Load32(p + 4) ^ crc;
110
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
111
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
112
        } else {
113
            // Fallback to zlib for larger/unusual types
114
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
115
        }
116
186
        return crc ^ 0xFFFFFFFFU;
117
186
    }
_ZN5doris8HashUtil16zlib_crc32_fixedItEEjRKT_j
Line
Count
Source
84
21
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
85
21
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
86
        // zlib convention: pre/post XOR with 0xFFFFFFFF
87
21
        uint32_t crc = hash ^ 0xFFFFFFFFU;
88
89
        if constexpr (sizeof(T) == 1) {
90
            // 1 byte: single table lookup
91
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
92
21
        } else if constexpr (sizeof(T) == 2) {
93
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
94
21
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
95
21
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
96
        } else if constexpr (sizeof(T) == 4) {
97
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
98
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
99
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
100
            uint32_t word = LittleEndian::Load32(p) ^ crc;
101
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
102
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
103
        } else if constexpr (sizeof(T) == 8) {
104
            // 8 bytes: two Slicing-by-4 steps
105
            uint32_t word = LittleEndian::Load32(p) ^ crc;
106
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
107
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
108
109
            word = LittleEndian::Load32(p + 4) ^ crc;
110
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
111
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
112
        } else {
113
            // Fallback to zlib for larger/unusual types
114
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
115
        }
116
21
        return crc ^ 0xFFFFFFFFU;
117
21
    }
_ZN5doris8HashUtil16zlib_crc32_fixedIjEEjRKT_j
Line
Count
Source
84
51
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
85
51
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
86
        // zlib convention: pre/post XOR with 0xFFFFFFFF
87
51
        uint32_t crc = hash ^ 0xFFFFFFFFU;
88
89
        if constexpr (sizeof(T) == 1) {
90
            // 1 byte: single table lookup
91
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
92
        } else if constexpr (sizeof(T) == 2) {
93
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
94
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
95
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
96
51
        } else if constexpr (sizeof(T) == 4) {
97
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
98
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
99
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
100
51
            uint32_t word = LittleEndian::Load32(p) ^ crc;
101
51
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
102
51
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
103
        } else if constexpr (sizeof(T) == 8) {
104
            // 8 bytes: two Slicing-by-4 steps
105
            uint32_t word = LittleEndian::Load32(p) ^ crc;
106
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
107
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
108
109
            word = LittleEndian::Load32(p + 4) ^ crc;
110
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
111
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
112
        } else {
113
            // Fallback to zlib for larger/unusual types
114
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
115
        }
116
51
        return crc ^ 0xFFFFFFFFU;
117
51
    }
_ZN5doris8HashUtil16zlib_crc32_fixedIlEEjRKT_j
Line
Count
Source
84
206
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
85
206
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
86
        // zlib convention: pre/post XOR with 0xFFFFFFFF
87
206
        uint32_t crc = hash ^ 0xFFFFFFFFU;
88
89
        if constexpr (sizeof(T) == 1) {
90
            // 1 byte: single table lookup
91
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
92
        } else if constexpr (sizeof(T) == 2) {
93
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
94
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
95
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
96
        } else if constexpr (sizeof(T) == 4) {
97
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
98
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
99
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
100
            uint32_t word = LittleEndian::Load32(p) ^ crc;
101
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
102
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
103
206
        } else if constexpr (sizeof(T) == 8) {
104
            // 8 bytes: two Slicing-by-4 steps
105
206
            uint32_t word = LittleEndian::Load32(p) ^ crc;
106
206
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
107
206
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
108
109
206
            word = LittleEndian::Load32(p + 4) ^ crc;
110
206
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
111
206
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
112
        } else {
113
            // Fallback to zlib for larger/unusual types
114
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
115
        }
116
206
        return crc ^ 0xFFFFFFFFU;
117
206
    }
_ZN5doris8HashUtil16zlib_crc32_fixedImEEjRKT_j
Line
Count
Source
84
18
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
85
18
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
86
        // zlib convention: pre/post XOR with 0xFFFFFFFF
87
18
        uint32_t crc = hash ^ 0xFFFFFFFFU;
88
89
        if constexpr (sizeof(T) == 1) {
90
            // 1 byte: single table lookup
91
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
92
        } else if constexpr (sizeof(T) == 2) {
93
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
94
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
95
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
96
        } else if constexpr (sizeof(T) == 4) {
97
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
98
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
99
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
100
            uint32_t word = LittleEndian::Load32(p) ^ crc;
101
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
102
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
103
18
        } else if constexpr (sizeof(T) == 8) {
104
            // 8 bytes: two Slicing-by-4 steps
105
18
            uint32_t word = LittleEndian::Load32(p) ^ crc;
106
18
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
107
18
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
108
109
18
            word = LittleEndian::Load32(p + 4) ^ crc;
110
18
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
111
18
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
112
        } else {
113
            // Fallback to zlib for larger/unusual types
114
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
115
        }
116
18
        return crc ^ 0xFFFFFFFFU;
117
18
    }
_ZN5doris8HashUtil16zlib_crc32_fixedIfEEjRKT_j
Line
Count
Source
84
20
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
85
20
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
86
        // zlib convention: pre/post XOR with 0xFFFFFFFF
87
20
        uint32_t crc = hash ^ 0xFFFFFFFFU;
88
89
        if constexpr (sizeof(T) == 1) {
90
            // 1 byte: single table lookup
91
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
92
        } else if constexpr (sizeof(T) == 2) {
93
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
94
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
95
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
96
20
        } else if constexpr (sizeof(T) == 4) {
97
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
98
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
99
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
100
20
            uint32_t word = LittleEndian::Load32(p) ^ crc;
101
20
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
102
20
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
103
        } else if constexpr (sizeof(T) == 8) {
104
            // 8 bytes: two Slicing-by-4 steps
105
            uint32_t word = LittleEndian::Load32(p) ^ crc;
106
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
107
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
108
109
            word = LittleEndian::Load32(p + 4) ^ crc;
110
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
111
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
112
        } else {
113
            // Fallback to zlib for larger/unusual types
114
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
115
        }
116
20
        return crc ^ 0xFFFFFFFFU;
117
20
    }
_ZN5doris8HashUtil16zlib_crc32_fixedIdEEjRKT_j
Line
Count
Source
84
42
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
85
42
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
86
        // zlib convention: pre/post XOR with 0xFFFFFFFF
87
42
        uint32_t crc = hash ^ 0xFFFFFFFFU;
88
89
        if constexpr (sizeof(T) == 1) {
90
            // 1 byte: single table lookup
91
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
92
        } else if constexpr (sizeof(T) == 2) {
93
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
94
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
95
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
96
        } else if constexpr (sizeof(T) == 4) {
97
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
98
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
99
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
100
            uint32_t word = LittleEndian::Load32(p) ^ crc;
101
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
102
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
103
42
        } else if constexpr (sizeof(T) == 8) {
104
            // 8 bytes: two Slicing-by-4 steps
105
42
            uint32_t word = LittleEndian::Load32(p) ^ crc;
106
42
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
107
42
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
108
109
42
            word = LittleEndian::Load32(p + 4) ^ crc;
110
42
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
111
42
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
112
        } else {
113
            // Fallback to zlib for larger/unusual types
114
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
115
        }
116
42
        return crc ^ 0xFFFFFFFFU;
117
42
    }
_ZN5doris8HashUtil16zlib_crc32_fixedInEEjRKT_j
Line
Count
Source
84
158
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
85
158
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
86
        // zlib convention: pre/post XOR with 0xFFFFFFFF
87
158
        uint32_t crc = hash ^ 0xFFFFFFFFU;
88
89
        if constexpr (sizeof(T) == 1) {
90
            // 1 byte: single table lookup
91
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
92
        } else if constexpr (sizeof(T) == 2) {
93
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
94
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
95
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
96
        } else if constexpr (sizeof(T) == 4) {
97
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
98
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
99
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
100
            uint32_t word = LittleEndian::Load32(p) ^ crc;
101
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
102
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
103
        } else if constexpr (sizeof(T) == 8) {
104
            // 8 bytes: two Slicing-by-4 steps
105
            uint32_t word = LittleEndian::Load32(p) ^ crc;
106
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
107
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
108
109
            word = LittleEndian::Load32(p + 4) ^ crc;
110
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
111
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
112
158
        } else {
113
            // Fallback to zlib for larger/unusual types
114
158
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
115
158
        }
116
0
        return crc ^ 0xFFFFFFFFU;
117
158
    }
_ZN5doris8HashUtil16zlib_crc32_fixedINS_7DecimalIiEEEEjRKT_j
Line
Count
Source
84
519
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
85
519
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
86
        // zlib convention: pre/post XOR with 0xFFFFFFFF
87
519
        uint32_t crc = hash ^ 0xFFFFFFFFU;
88
89
        if constexpr (sizeof(T) == 1) {
90
            // 1 byte: single table lookup
91
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
92
        } else if constexpr (sizeof(T) == 2) {
93
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
94
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
95
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
96
519
        } else if constexpr (sizeof(T) == 4) {
97
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
98
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
99
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
100
519
            uint32_t word = LittleEndian::Load32(p) ^ crc;
101
519
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
102
519
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
103
        } else if constexpr (sizeof(T) == 8) {
104
            // 8 bytes: two Slicing-by-4 steps
105
            uint32_t word = LittleEndian::Load32(p) ^ crc;
106
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
107
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
108
109
            word = LittleEndian::Load32(p + 4) ^ crc;
110
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
111
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
112
        } else {
113
            // Fallback to zlib for larger/unusual types
114
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
115
        }
116
519
        return crc ^ 0xFFFFFFFFU;
117
519
    }
_ZN5doris8HashUtil16zlib_crc32_fixedINS_7DecimalIlEEEEjRKT_j
Line
Count
Source
84
966
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
85
966
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
86
        // zlib convention: pre/post XOR with 0xFFFFFFFF
87
966
        uint32_t crc = hash ^ 0xFFFFFFFFU;
88
89
        if constexpr (sizeof(T) == 1) {
90
            // 1 byte: single table lookup
91
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
92
        } else if constexpr (sizeof(T) == 2) {
93
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
94
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
95
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
96
        } else if constexpr (sizeof(T) == 4) {
97
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
98
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
99
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
100
            uint32_t word = LittleEndian::Load32(p) ^ crc;
101
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
102
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
103
966
        } else if constexpr (sizeof(T) == 8) {
104
            // 8 bytes: two Slicing-by-4 steps
105
966
            uint32_t word = LittleEndian::Load32(p) ^ crc;
106
966
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
107
966
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
108
109
966
            word = LittleEndian::Load32(p + 4) ^ crc;
110
966
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
111
966
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
112
        } else {
113
            // Fallback to zlib for larger/unusual types
114
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
115
        }
116
966
        return crc ^ 0xFFFFFFFFU;
117
966
    }
_ZN5doris8HashUtil16zlib_crc32_fixedINS_12Decimal128V3EEEjRKT_j
Line
Count
Source
84
1.05k
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
85
1.05k
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
86
        // zlib convention: pre/post XOR with 0xFFFFFFFF
87
1.05k
        uint32_t crc = hash ^ 0xFFFFFFFFU;
88
89
        if constexpr (sizeof(T) == 1) {
90
            // 1 byte: single table lookup
91
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
92
        } else if constexpr (sizeof(T) == 2) {
93
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
94
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
95
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
96
        } else if constexpr (sizeof(T) == 4) {
97
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
98
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
99
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
100
            uint32_t word = LittleEndian::Load32(p) ^ crc;
101
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
102
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
103
        } else if constexpr (sizeof(T) == 8) {
104
            // 8 bytes: two Slicing-by-4 steps
105
            uint32_t word = LittleEndian::Load32(p) ^ crc;
106
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
107
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
108
109
            word = LittleEndian::Load32(p + 4) ^ crc;
110
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
111
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
112
1.05k
        } else {
113
            // Fallback to zlib for larger/unusual types
114
1.05k
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
115
1.05k
        }
116
0
        return crc ^ 0xFFFFFFFFU;
117
1.05k
    }
_ZN5doris8HashUtil16zlib_crc32_fixedINS_7DecimalIN4wide7integerILm256EiEEEEEEjRKT_j
Line
Count
Source
84
1.65k
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
85
1.65k
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
86
        // zlib convention: pre/post XOR with 0xFFFFFFFF
87
1.65k
        uint32_t crc = hash ^ 0xFFFFFFFFU;
88
89
        if constexpr (sizeof(T) == 1) {
90
            // 1 byte: single table lookup
91
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
92
        } else if constexpr (sizeof(T) == 2) {
93
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
94
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
95
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
96
        } else if constexpr (sizeof(T) == 4) {
97
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
98
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
99
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
100
            uint32_t word = LittleEndian::Load32(p) ^ crc;
101
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
102
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
103
        } else if constexpr (sizeof(T) == 8) {
104
            // 8 bytes: two Slicing-by-4 steps
105
            uint32_t word = LittleEndian::Load32(p) ^ crc;
106
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
107
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
108
109
            word = LittleEndian::Load32(p + 4) ^ crc;
110
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
111
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
112
1.65k
        } else {
113
            // Fallback to zlib for larger/unusual types
114
1.65k
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
115
1.65k
        }
116
0
        return crc ^ 0xFFFFFFFFU;
117
1.65k
    }
_ZN5doris8HashUtil16zlib_crc32_fixedIaEEjRKT_j
Line
Count
Source
84
161
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
85
161
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
86
        // zlib convention: pre/post XOR with 0xFFFFFFFF
87
161
        uint32_t crc = hash ^ 0xFFFFFFFFU;
88
89
161
        if constexpr (sizeof(T) == 1) {
90
            // 1 byte: single table lookup
91
161
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
92
        } else if constexpr (sizeof(T) == 2) {
93
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
94
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
95
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
96
        } else if constexpr (sizeof(T) == 4) {
97
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
98
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
99
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
100
            uint32_t word = LittleEndian::Load32(p) ^ crc;
101
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
102
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
103
        } else if constexpr (sizeof(T) == 8) {
104
            // 8 bytes: two Slicing-by-4 steps
105
            uint32_t word = LittleEndian::Load32(p) ^ crc;
106
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
107
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
108
109
            word = LittleEndian::Load32(p + 4) ^ crc;
110
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
111
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
112
        } else {
113
            // Fallback to zlib for larger/unusual types
114
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
115
        }
116
161
        return crc ^ 0xFFFFFFFFU;
117
161
    }
_ZN5doris8HashUtil16zlib_crc32_fixedIoEEjRKT_j
Line
Count
Source
84
12
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
85
12
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
86
        // zlib convention: pre/post XOR with 0xFFFFFFFF
87
12
        uint32_t crc = hash ^ 0xFFFFFFFFU;
88
89
        if constexpr (sizeof(T) == 1) {
90
            // 1 byte: single table lookup
91
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
92
        } else if constexpr (sizeof(T) == 2) {
93
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
94
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
95
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
96
        } else if constexpr (sizeof(T) == 4) {
97
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
98
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
99
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
100
            uint32_t word = LittleEndian::Load32(p) ^ crc;
101
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
102
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
103
        } else if constexpr (sizeof(T) == 8) {
104
            // 8 bytes: two Slicing-by-4 steps
105
            uint32_t word = LittleEndian::Load32(p) ^ crc;
106
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
107
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
108
109
            word = LittleEndian::Load32(p + 4) ^ crc;
110
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
111
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
112
12
        } else {
113
            // Fallback to zlib for larger/unusual types
114
12
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
115
12
        }
116
0
        return crc ^ 0xFFFFFFFFU;
117
12
    }
_ZN5doris8HashUtil16zlib_crc32_fixedINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEjRKT_j
Line
Count
Source
84
37
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
85
37
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
86
        // zlib convention: pre/post XOR with 0xFFFFFFFF
87
37
        uint32_t crc = hash ^ 0xFFFFFFFFU;
88
89
        if constexpr (sizeof(T) == 1) {
90
            // 1 byte: single table lookup
91
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
92
        } else if constexpr (sizeof(T) == 2) {
93
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
94
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
95
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
96
37
        } else if constexpr (sizeof(T) == 4) {
97
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
98
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
99
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
100
37
            uint32_t word = LittleEndian::Load32(p) ^ crc;
101
37
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
102
37
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
103
        } else if constexpr (sizeof(T) == 8) {
104
            // 8 bytes: two Slicing-by-4 steps
105
            uint32_t word = LittleEndian::Load32(p) ^ crc;
106
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
107
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
108
109
            word = LittleEndian::Load32(p + 4) ^ crc;
110
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
111
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
112
        } else {
113
            // Fallback to zlib for larger/unusual types
114
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
115
        }
116
37
        return crc ^ 0xFFFFFFFFU;
117
37
    }
_ZN5doris8HashUtil16zlib_crc32_fixedINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEjRKT_j
Line
Count
Source
84
111
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
85
111
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
86
        // zlib convention: pre/post XOR with 0xFFFFFFFF
87
111
        uint32_t crc = hash ^ 0xFFFFFFFFU;
88
89
        if constexpr (sizeof(T) == 1) {
90
            // 1 byte: single table lookup
91
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
92
        } else if constexpr (sizeof(T) == 2) {
93
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
94
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
95
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
96
        } else if constexpr (sizeof(T) == 4) {
97
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
98
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
99
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
100
            uint32_t word = LittleEndian::Load32(p) ^ crc;
101
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
102
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
103
111
        } else if constexpr (sizeof(T) == 8) {
104
            // 8 bytes: two Slicing-by-4 steps
105
111
            uint32_t word = LittleEndian::Load32(p) ^ crc;
106
111
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
107
111
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
108
109
111
            word = LittleEndian::Load32(p + 4) ^ crc;
110
111
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
111
111
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
112
        } else {
113
            // Fallback to zlib for larger/unusual types
114
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
115
        }
116
111
        return crc ^ 0xFFFFFFFFU;
117
111
    }
Unexecuted instantiation: _ZN5doris8HashUtil16zlib_crc32_fixedINS_16TimestampTzValueEEEjRKT_j
118
119
1.08M
    static uint32_t zlib_crc_hash_null(uint32_t hash) {
120
        // null is treat as 0 when hash
121
1.08M
        static const int INT_VALUE = 0;
122
1.08M
        return zlib_crc32_fixed(INT_VALUE, hash);
123
1.08M
    }
124
125
    template <typename T>
126
83.7k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
127
83.7k
        if constexpr (sizeof(T) == 1) {
128
5.90k
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
129
5.90k
        } else if constexpr (sizeof(T) == 2) {
130
4.93k
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
131
26.9k
        } else if constexpr (sizeof(T) == 4) {
132
26.9k
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
133
26.9k
        } else if constexpr (sizeof(T) == 8) {
134
26.2k
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
135
26.2k
        } else {
136
19.6k
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
137
19.6k
        }
138
83.7k
    }
_ZN5doris8HashUtil12crc32c_fixedIiEEjRKT_j
Line
Count
Source
126
12.0k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
127
        if constexpr (sizeof(T) == 1) {
128
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
129
        } else if constexpr (sizeof(T) == 2) {
130
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
131
12.0k
        } else if constexpr (sizeof(T) == 4) {
132
12.0k
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
133
        } else if constexpr (sizeof(T) == 8) {
134
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
135
        } else {
136
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
137
        }
138
12.0k
    }
_ZN5doris8HashUtil12crc32c_fixedINS_7DecimalIiEEEEjRKT_j
Line
Count
Source
126
2.16k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
127
        if constexpr (sizeof(T) == 1) {
128
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
129
        } else if constexpr (sizeof(T) == 2) {
130
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
131
2.16k
        } else if constexpr (sizeof(T) == 4) {
132
2.16k
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
133
        } else if constexpr (sizeof(T) == 8) {
134
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
135
        } else {
136
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
137
        }
138
2.16k
    }
_ZN5doris8HashUtil12crc32c_fixedINS_7DecimalIlEEEEjRKT_j
Line
Count
Source
126
3.49k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
127
        if constexpr (sizeof(T) == 1) {
128
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
129
        } else if constexpr (sizeof(T) == 2) {
130
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
131
        } else if constexpr (sizeof(T) == 4) {
132
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
133
3.49k
        } else if constexpr (sizeof(T) == 8) {
134
3.49k
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
135
        } else {
136
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
137
        }
138
3.49k
    }
Unexecuted instantiation: _ZN5doris8HashUtil12crc32c_fixedINS_14DecimalV2ValueEEEjRKT_j
_ZN5doris8HashUtil12crc32c_fixedINS_12Decimal128V3EEEjRKT_j
Line
Count
Source
126
3.91k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
127
        if constexpr (sizeof(T) == 1) {
128
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
129
        } else if constexpr (sizeof(T) == 2) {
130
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
131
        } else if constexpr (sizeof(T) == 4) {
132
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
133
        } else if constexpr (sizeof(T) == 8) {
134
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
135
3.91k
        } else {
136
3.91k
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
137
3.91k
        }
138
3.91k
    }
_ZN5doris8HashUtil12crc32c_fixedINS_7DecimalIN4wide7integerILm256EiEEEEEEjRKT_j
Line
Count
Source
126
7.44k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
127
        if constexpr (sizeof(T) == 1) {
128
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
129
        } else if constexpr (sizeof(T) == 2) {
130
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
131
        } else if constexpr (sizeof(T) == 4) {
132
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
133
        } else if constexpr (sizeof(T) == 8) {
134
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
135
7.44k
        } else {
136
7.44k
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
137
7.44k
        }
138
7.44k
    }
_ZN5doris8HashUtil12crc32c_fixedIhEEjRKT_j
Line
Count
Source
126
804
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
127
804
        if constexpr (sizeof(T) == 1) {
128
804
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
129
        } else if constexpr (sizeof(T) == 2) {
130
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
131
        } else if constexpr (sizeof(T) == 4) {
132
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
133
        } else if constexpr (sizeof(T) == 8) {
134
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
135
        } else {
136
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
137
        }
138
804
    }
_ZN5doris8HashUtil12crc32c_fixedIaEEjRKT_j
Line
Count
Source
126
5.09k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
127
5.09k
        if constexpr (sizeof(T) == 1) {
128
5.09k
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
129
        } else if constexpr (sizeof(T) == 2) {
130
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
131
        } else if constexpr (sizeof(T) == 4) {
132
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
133
        } else if constexpr (sizeof(T) == 8) {
134
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
135
        } else {
136
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
137
        }
138
5.09k
    }
_ZN5doris8HashUtil12crc32c_fixedIsEEjRKT_j
Line
Count
Source
126
4.93k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
127
        if constexpr (sizeof(T) == 1) {
128
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
129
4.93k
        } else if constexpr (sizeof(T) == 2) {
130
4.93k
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
131
        } else if constexpr (sizeof(T) == 4) {
132
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
133
        } else if constexpr (sizeof(T) == 8) {
134
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
135
        } else {
136
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
137
        }
138
4.93k
    }
_ZN5doris8HashUtil12crc32c_fixedIlEEjRKT_j
Line
Count
Source
126
6.90k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
127
        if constexpr (sizeof(T) == 1) {
128
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
129
        } else if constexpr (sizeof(T) == 2) {
130
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
131
        } else if constexpr (sizeof(T) == 4) {
132
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
133
6.90k
        } else if constexpr (sizeof(T) == 8) {
134
6.90k
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
135
        } else {
136
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
137
        }
138
6.90k
    }
_ZN5doris8HashUtil12crc32c_fixedInEEjRKT_j
Line
Count
Source
126
3.77k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
127
        if constexpr (sizeof(T) == 1) {
128
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
129
        } else if constexpr (sizeof(T) == 2) {
130
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
131
        } else if constexpr (sizeof(T) == 4) {
132
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
133
        } else if constexpr (sizeof(T) == 8) {
134
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
135
3.77k
        } else {
136
3.77k
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
137
3.77k
        }
138
3.77k
    }
_ZN5doris8HashUtil12crc32c_fixedIfEEjRKT_j
Line
Count
Source
126
4.32k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
127
        if constexpr (sizeof(T) == 1) {
128
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
129
        } else if constexpr (sizeof(T) == 2) {
130
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
131
4.32k
        } else if constexpr (sizeof(T) == 4) {
132
4.32k
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
133
        } else if constexpr (sizeof(T) == 8) {
134
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
135
        } else {
136
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
137
        }
138
4.32k
    }
_ZN5doris8HashUtil12crc32c_fixedIdEEjRKT_j
Line
Count
Source
126
4.85k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
127
        if constexpr (sizeof(T) == 1) {
128
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
129
        } else if constexpr (sizeof(T) == 2) {
130
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
131
        } else if constexpr (sizeof(T) == 4) {
132
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
133
4.85k
        } else if constexpr (sizeof(T) == 8) {
134
4.85k
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
135
        } else {
136
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
137
        }
138
4.85k
    }
_ZN5doris8HashUtil12crc32c_fixedIjEEjRKT_j
Line
Count
Source
126
6.79k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
127
        if constexpr (sizeof(T) == 1) {
128
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
129
        } else if constexpr (sizeof(T) == 2) {
130
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
131
6.79k
        } else if constexpr (sizeof(T) == 4) {
132
6.79k
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
133
        } else if constexpr (sizeof(T) == 8) {
134
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
135
        } else {
136
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
137
        }
138
6.79k
    }
_ZN5doris8HashUtil12crc32c_fixedIoEEjRKT_j
Line
Count
Source
126
4.53k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
127
        if constexpr (sizeof(T) == 1) {
128
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
129
        } else if constexpr (sizeof(T) == 2) {
130
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
131
        } else if constexpr (sizeof(T) == 4) {
132
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
133
        } else if constexpr (sizeof(T) == 8) {
134
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
135
4.53k
        } else {
136
4.53k
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
137
4.53k
        }
138
4.53k
    }
_ZN5doris8HashUtil12crc32c_fixedINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEjRKT_j
Line
Count
Source
126
1.57k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
127
        if constexpr (sizeof(T) == 1) {
128
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
129
        } else if constexpr (sizeof(T) == 2) {
130
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
131
1.57k
        } else if constexpr (sizeof(T) == 4) {
132
1.57k
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
133
        } else if constexpr (sizeof(T) == 8) {
134
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
135
        } else {
136
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
137
        }
138
1.57k
    }
_ZN5doris8HashUtil12crc32c_fixedINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEjRKT_j
Line
Count
Source
126
11.0k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
127
        if constexpr (sizeof(T) == 1) {
128
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
129
        } else if constexpr (sizeof(T) == 2) {
130
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
131
        } else if constexpr (sizeof(T) == 4) {
132
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
133
11.0k
        } else if constexpr (sizeof(T) == 8) {
134
11.0k
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
135
        } else {
136
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
137
        }
138
11.0k
    }
Unexecuted instantiation: _ZN5doris8HashUtil12crc32c_fixedINS_16TimestampTzValueEEEjRKT_j
Unexecuted instantiation: _ZN5doris8HashUtil12crc32c_fixedImEEjRKT_j
139
140
460
    static uint32_t crc32c_null(uint32_t hash) {
141
        // null is treat as 0 when hash
142
460
        static const int INT_VALUE = 0;
143
460
        return crc32c_fixed(INT_VALUE, hash);
144
460
    }
145
146
    // Compute the Crc32 hash for data using SSE4 instructions.  The input hash parameter is
147
    // the current hash/seed value.
148
    // This should only be called if SSE is supported.
149
    // This is ~4x faster than Fnv/Boost Hash.
150
    // NOTE: DO NOT use this method for checksum! This does not generate the standard CRC32 checksum!
151
    //       For checksum, use CRC-32C algorithm from crc32c.h
152
    // NOTE: Any changes made to this function need to be reflected in Codegen::GetHashFn.
153
    // TODO: crc32 hashes with different seeds do not result in different hash functions.
154
    // The resulting hashes are correlated.
155
    // ATTN: prefer do not use this function anymore, use crc32c::Extend instead
156
    // This function is retained because it is not certain whether there are compatibility issues with historical data.
157
184k
    static uint32_t crc_hash(const void* data, uint32_t bytes, uint32_t hash) {
158
184k
        if (!CpuInfo::is_supported(CpuInfo::SSE4_2)) {
159
0
            return zlib_crc_hash(data, bytes, hash);
160
0
        }
161
184k
        uint32_t words = bytes / sizeof(uint32_t);
162
184k
        bytes = bytes % sizeof(uint32_t);
163
164
184k
        const uint32_t* p = reinterpret_cast<const uint32_t*>(data);
165
166
324k
        while (words--) {
167
140k
            hash = _mm_crc32_u32(hash, *p);
168
140k
            ++p;
169
140k
        }
170
171
184k
        const uint8_t* s = reinterpret_cast<const uint8_t*>(p);
172
173
189k
        while (bytes--) {
174
5.31k
            hash = _mm_crc32_u8(hash, *s);
175
5.31k
            ++s;
176
5.31k
        }
177
178
        // The lower half of the CRC hash has has poor uniformity, so swap the halves
179
        // for anyone who only uses the first several bits of the hash.
180
184k
        hash = (hash << 16) | (hash >> 16);
181
184k
        return hash;
182
184k
    }
183
184
0
    static uint64_t crc_hash64(const void* data, uint32_t bytes, uint64_t hash) {
185
0
        uint32_t words = bytes / sizeof(uint32_t);
186
0
        bytes = bytes % sizeof(uint32_t);
187
188
0
        uint32_t h1 = hash >> 32;
189
0
        uint32_t h2 = (hash << 32) >> 32;
190
191
0
        const uint32_t* p = reinterpret_cast<const uint32_t*>(data);
192
0
        while (words--) {
193
0
            (words & 1) ? (h1 = _mm_crc32_u32(h1, *p)) : (h2 = _mm_crc32_u32(h2, *p));
194
0
            ++p;
195
0
        }
196
197
0
        const uint8_t* s = reinterpret_cast<const uint8_t*>(p);
198
0
        while (bytes--) {
199
0
            (bytes & 1) ? (h1 = _mm_crc32_u8(h1, *s)) : (h2 = _mm_crc32_u8(h2, *s));
200
0
            ++s;
201
0
        }
202
0
        union {
203
0
            uint64_t u64;
204
0
            uint32_t u32[2];
205
0
        } converter;
206
0
        converter.u64 = hash;
207
208
0
        h1 = (h1 << 16) | (h1 >> 16);
209
0
        h2 = (h2 << 16) | (h2 >> 16);
210
0
        converter.u32[0] = h1;
211
0
        converter.u32[1] = h2;
212
213
0
        return converter.u64;
214
0
    }
215
216
    // refer to https://github.com/apache/commons-codec/blob/master/src/main/java/org/apache/commons/codec/digest/MurmurHash3.java
217
    static const uint32_t MURMUR3_32_SEED = 104729;
218
219
    // modify from https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
220
20
    static uint32_t murmur_hash3_32(const void* key, int64_t len, uint32_t seed) {
221
20
        uint32_t out = 0;
222
20
        murmur_hash3_x86_32(key, len, seed, &out);
223
20
        return out;
224
20
    }
225
226
    template <bool is_mmh64_v2>
227
15
    static uint64_t murmur_hash3_64(const void* key, int64_t len, uint64_t seed) {
228
15
        uint64_t out = 0;
229
15
        if constexpr (is_mmh64_v2) {
230
3
            murmur_hash3_x64_64_shared(key, len, seed, &out);
231
12
        } else {
232
12
            murmur_hash3_x64_64(key, len, seed, &out);
233
12
        }
234
15
        return out;
235
15
    }
_ZN5doris8HashUtil15murmur_hash3_64ILb0EEEmPKvlm
Line
Count
Source
227
12
    static uint64_t murmur_hash3_64(const void* key, int64_t len, uint64_t seed) {
228
12
        uint64_t out = 0;
229
        if constexpr (is_mmh64_v2) {
230
            murmur_hash3_x64_64_shared(key, len, seed, &out);
231
12
        } else {
232
12
            murmur_hash3_x64_64(key, len, seed, &out);
233
12
        }
234
12
        return out;
235
12
    }
_ZN5doris8HashUtil15murmur_hash3_64ILb1EEEmPKvlm
Line
Count
Source
227
3
    static uint64_t murmur_hash3_64(const void* key, int64_t len, uint64_t seed) {
228
3
        uint64_t out = 0;
229
3
        if constexpr (is_mmh64_v2) {
230
3
            murmur_hash3_x64_64_shared(key, len, seed, &out);
231
        } else {
232
            murmur_hash3_x64_64(key, len, seed, &out);
233
        }
234
3
        return out;
235
3
    }
236
237
    static const int MURMUR_R = 47;
238
239
    // Murmur2 hash implementation returning 64-bit hashes.
240
0
    static uint64_t murmur_hash2_64(const void* input, int len, uint64_t seed) {
241
0
        uint64_t h = seed ^ (len * MURMUR_PRIME);
242
0
243
0
        const uint64_t* data = reinterpret_cast<const uint64_t*>(input);
244
0
        const uint64_t* end = data + (len / sizeof(uint64_t));
245
0
246
0
        while (data != end) {
247
0
            uint64_t k = *data++;
248
0
            k *= MURMUR_PRIME;
249
0
            k ^= k >> MURMUR_R;
250
0
            k *= MURMUR_PRIME;
251
0
            h ^= k;
252
0
            h *= MURMUR_PRIME;
253
0
        }
254
0
255
0
        const uint8_t* data2 = reinterpret_cast<const uint8_t*>(data);
256
0
        switch (len & 7) {
257
0
        case 7:
258
0
            h ^= uint64_t(data2[6]) << 48;
259
0
            [[fallthrough]];
260
0
        case 6:
261
0
            h ^= uint64_t(data2[5]) << 40;
262
0
            [[fallthrough]];
263
0
        case 5:
264
0
            h ^= uint64_t(data2[4]) << 32;
265
0
            [[fallthrough]];
266
0
        case 4:
267
0
            h ^= uint64_t(data2[3]) << 24;
268
0
            [[fallthrough]];
269
0
        case 3:
270
0
            h ^= uint64_t(data2[2]) << 16;
271
0
            [[fallthrough]];
272
0
        case 2:
273
0
            h ^= uint64_t(data2[1]) << 8;
274
0
            [[fallthrough]];
275
0
        case 1:
276
0
            h ^= uint64_t(data2[0]);
277
0
            h *= MURMUR_PRIME;
278
0
        }
279
0
280
0
        h ^= h >> MURMUR_R;
281
0
        h *= MURMUR_PRIME;
282
0
        h ^= h >> MURMUR_R;
283
0
        return h;
284
0
    }
285
286
    // default values recommended by http://isthe.com/chongo/tech/comp/fnv/
287
    static const uint32_t FNV_PRIME = 0x01000193; //   16777619
288
    static const uint32_t FNV_SEED = 0x811C9DC5;  // 2166136261
289
    static const uint64_t FNV64_PRIME = 1099511628211UL;
290
    static const uint64_t FNV64_SEED = 14695981039346656037UL;
291
    static const uint64_t MURMUR_PRIME = 0xc6a4a7935bd1e995ULL;
292
    static const uint32_t MURMUR_SEED = 0xadc83b19ULL;
293
    // Implementation of the Fowler–Noll–Vo hash function.  This is not as performant
294
    // as boost's hash on int types (2x slower) but has bit entropy.
295
    // For ints, boost just returns the value of the int which can be pathological.
296
    // For example, if the data is <1000, 2000, 3000, 4000, ..> and then the mod of 1000
297
    // is taken on the hash, all values will collide to the same bucket.
298
    // For string values, Fnv is slightly faster than boost.
299
0
    static uint32_t fnv_hash(const void* data, uint32_t bytes, uint32_t hash) {
300
0
        const uint8_t* ptr = reinterpret_cast<const uint8_t*>(data);
301
302
0
        while (bytes--) {
303
0
            hash = (*ptr ^ hash) * FNV_PRIME;
304
0
            ++ptr;
305
0
        }
306
307
0
        return hash;
308
0
    }
309
310
0
    static uint64_t fnv_hash64(const void* data, uint32_t bytes, uint64_t hash) {
311
0
        const uint8_t* ptr = reinterpret_cast<const uint8_t*>(data);
312
0
313
0
        while (bytes--) {
314
0
            hash = (*ptr ^ hash) * FNV64_PRIME;
315
0
            ++ptr;
316
0
        }
317
0
318
0
        return hash;
319
0
    }
320
321
    // Our hash function is MurmurHash2, 64 bit version.
322
    // It was modified in order to provide the same result in
323
    // big and little endian archs (endian neutral).
324
67.8k
    static uint64_t murmur_hash64A(const void* key, int64_t len, unsigned int seed) {
325
67.8k
        const uint64_t m = MURMUR_PRIME;
326
67.8k
        const int r = 47;
327
67.8k
        uint64_t h = seed ^ (len * m);
328
67.8k
        const uint8_t* data = (const uint8_t*)key;
329
67.8k
        const uint8_t* end = data + (len - (len & 7));
330
331
135k
        while (data != end) {
332
67.8k
            uint64_t k;
333
            if constexpr (std::endian::native == std::endian::big) {
334
                k = (uint64_t)data[0];
335
                k |= (uint64_t)data[1] << 8;
336
                k |= (uint64_t)data[2] << 16;
337
                k |= (uint64_t)data[3] << 24;
338
                k |= (uint64_t)data[4] << 32;
339
                k |= (uint64_t)data[5] << 40;
340
                k |= (uint64_t)data[6] << 48;
341
                k |= (uint64_t)data[7] << 56;
342
67.8k
            } else if constexpr (std::endian::native == std::endian::little) {
343
67.8k
                memcpy(&k, data, sizeof(k));
344
            } else {
345
                static_assert(std::endian::native == std::endian::big ||
346
                                      std::endian::native == std::endian::little,
347
                              "Unsupported endianness");
348
            }
349
350
67.8k
            k *= m;
351
67.8k
            k ^= k >> r;
352
67.8k
            k *= m;
353
67.8k
            h ^= k;
354
67.8k
            h *= m;
355
67.8k
            data += 8;
356
67.8k
        }
357
358
67.8k
        switch (len & 7) {
359
0
        case 7:
360
0
            h ^= (uint64_t)data[6] << 48;
361
0
            [[fallthrough]];
362
0
        case 6:
363
0
            h ^= (uint64_t)data[5] << 40;
364
0
            [[fallthrough]];
365
0
        case 5:
366
0
            h ^= (uint64_t)data[4] << 32;
367
0
            [[fallthrough]];
368
3
        case 4:
369
3
            h ^= (uint64_t)data[3] << 24;
370
3
            [[fallthrough]];
371
3
        case 3:
372
3
            h ^= (uint64_t)data[2] << 16;
373
3
            [[fallthrough]];
374
3
        case 2:
375
3
            h ^= (uint64_t)data[1] << 8;
376
3
            [[fallthrough]];
377
6
        case 1:
378
6
            h ^= (uint64_t)data[0];
379
6
            h *= m;
380
67.8k
        }
381
382
67.8k
        h ^= h >> r;
383
67.8k
        h *= m;
384
67.8k
        h ^= h >> r;
385
67.8k
        return h;
386
67.8k
    }
387
388
    // Computes the hash value for data.  Will call either CrcHash or FnvHash
389
    // depending on hardware capabilities.
390
    // Seed values for different steps of the query execution should use different seeds
391
    // to prevent accidental key collisions. (See IMPALA-219 for more details).
392
184k
    static uint32_t hash(const void* data, uint32_t bytes, uint32_t seed) {
393
184k
#ifdef __SSE4_2__
394
395
184k
        if (LIKELY(CpuInfo::is_supported(CpuInfo::SSE4_2))) {
396
184k
            return crc_hash(data, bytes, seed);
397
184k
        } else {
398
0
            return fnv_hash(data, bytes, seed);
399
0
        }
400
401
#else
402
        return fnv_hash(data, bytes, seed);
403
#endif
404
184k
    }
405
406
89.8k
    static uint64_t hash64(const void* data, uint64_t bytes, uint64_t seed) {
407
#ifdef _SSE4_2_
408
        if (LIKELY(CpuInfo::is_supported(CpuInfo::SSE4_2))) {
409
            return crc_hash64(data, bytes, seed);
410
411
        } else {
412
            uint64_t hash = 0;
413
            murmur_hash3_x64_64(data, bytes, seed, &hash);
414
            return hash;
415
        }
416
#else
417
89.8k
        uint64_t hash = 0;
418
89.8k
        murmur_hash3_x64_64(data, bytes, seed, &hash);
419
89.8k
        return hash;
420
89.8k
#endif
421
89.8k
    }
422
    // hash_combine is the same with boost hash_combine,
423
    // except replace boost::hash with std::hash
424
    template <class T>
425
422
    static inline void hash_combine(std::size_t& seed, const T& v) {
426
422
        std::hash<T> hasher;
427
422
        seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
428
422
    }
_ZN5doris8HashUtil12hash_combineINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEvRmRKT_
Line
Count
Source
425
404
    static inline void hash_combine(std::size_t& seed, const T& v) {
426
404
        std::hash<T> hasher;
427
404
        seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
428
404
    }
_ZN5doris8HashUtil12hash_combineIlEEvRmRKT_
Line
Count
Source
425
18
    static inline void hash_combine(std::size_t& seed, const T& v) {
426
18
        std::hash<T> hasher;
427
18
        seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
428
18
    }
429
430
#if defined(__clang__)
431
#pragma clang diagnostic push
432
#pragma clang diagnostic ignored "-Wused-but-marked-unused"
433
#endif
434
    // xxHash function for a byte array.  For convenience, a 64-bit seed is also
435
    // hashed into the result.  The mapping may change from time to time.
436
24
    static xxh_u32 xxHash32WithSeed(const char* s, size_t len, xxh_u32 seed) {
437
24
        return XXH32(s, len, seed);
438
24
    }
439
440
    // same to the up function, just for null value
441
0
    static xxh_u32 xxHash32NullWithSeed(xxh_u32 seed) {
442
0
        static const int INT_VALUE = 0;
443
0
        return XXH32(reinterpret_cast<const char*>(&INT_VALUE), sizeof(int), seed);
444
0
    }
445
446
157k
    static xxh_u64 xxHash64WithSeed(const char* s, size_t len, xxh_u64 seed) {
447
157k
        return XXH3_64bits_withSeed(s, len, seed);
448
157k
    }
449
450
    // same to the up function, just for null value
451
1.08M
    static xxh_u64 xxHash64NullWithSeed(xxh_u64 seed) {
452
1.08M
        static const int INT_VALUE = 0;
453
1.08M
        return XXH3_64bits_withSeed(reinterpret_cast<const char*>(&INT_VALUE), sizeof(int), seed);
454
1.08M
    }
455
456
21
    static xxh_u64 xxhash64_compat_with_seed(const char* s, size_t len, xxh_u64 seed) {
457
21
        return XXH64(reinterpret_cast<const void*>(s), len, seed);
458
21
    }
459
460
0
    static xxh_u64 xxhash64_compat_null_with_seed(xxh_u64 seed) {
461
0
        static const int INT_VALUE = 0;
462
0
        return XXH64(reinterpret_cast<const void*>(&INT_VALUE), sizeof(int), seed);
463
0
    }
464
465
#if defined(__clang__)
466
#pragma clang diagnostic pop
467
#endif
468
};
469
470
} // namespace doris
471
472
template <>
473
struct std::hash<doris::TUniqueId> {
474
3.59k
    size_t operator()(const doris::TUniqueId& id) const {
475
3.59k
        uint32_t seed = 0;
476
3.59k
        seed = doris::HashUtil::hash(&id.lo, sizeof(id.lo), seed);
477
3.59k
        seed = doris::HashUtil::hash(&id.hi, sizeof(id.hi), seed);
478
3.59k
        return seed;
479
3.59k
    }
480
};
481
482
template <>
483
struct std::hash<doris::TNetworkAddress> {
484
0
    size_t operator()(const doris::TNetworkAddress& address) const {
485
0
        uint32_t seed = 0;
486
0
        seed = doris::HashUtil::hash(address.hostname.data(), (uint32_t)address.hostname.size(),
487
0
                                     seed);
488
0
        seed = doris::HashUtil::hash(&address.port, 4, seed);
489
0
        return seed;
490
0
    }
491
};
492
493
template <>
494
struct std::hash<std::pair<doris::TUniqueId, int64_t>> {
495
0
    size_t operator()(const std::pair<doris::TUniqueId, int64_t>& pair) const {
496
0
        uint32_t seed = 0;
497
0
        seed = doris::HashUtil::hash(&pair.first.lo, sizeof(pair.first.lo), seed);
498
0
        seed = doris::HashUtil::hash(&pair.first.hi, sizeof(pair.first.hi), seed);
499
0
        seed = doris::HashUtil::hash(&pair.second, sizeof(pair.second), seed);
500
0
        return seed;
501
0
    }
502
};
503
504
template <class First, class Second>
505
struct std::hash<std::pair<First, Second>> {
506
31.2k
    size_t operator()(const pair<First, Second>& p) const {
507
31.2k
        size_t h1 = std::hash<First>()(p.first);
508
31.2k
        size_t h2 = std::hash<Second>()(p.second);
509
31.2k
        return doris::util_hash::HashLen16(h1, h2);
510
31.2k
    }
Unexecuted instantiation: _ZNKSt4hashISt4pairIlN5doris8RowsetIdEEEclERKS3_
_ZNKSt4hashISt4pairINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEElEEclERKS7_
Line
Count
Source
506
32
    size_t operator()(const pair<First, Second>& p) const {
507
32
        size_t h1 = std::hash<First>()(p.first);
508
32
        size_t h2 = std::hash<Second>()(p.second);
509
32
        return doris::util_hash::HashLen16(h1, h2);
510
32
    }
_ZNKSt4hashISt4pairIiN5doris10PathInDataEEEclERKS3_
Line
Count
Source
506
31.1k
    size_t operator()(const pair<First, Second>& p) const {
507
31.1k
        size_t h1 = std::hash<First>()(p.first);
508
31.1k
        size_t h2 = std::hash<Second>()(p.second);
509
31.1k
        return doris::util_hash::HashLen16(h1, h2);
510
31.1k
    }
_ZNKSt4hashISt4pairIllEEclERKS1_
Line
Count
Source
506
64
    size_t operator()(const pair<First, Second>& p) const {
507
64
        size_t h1 = std::hash<First>()(p.first);
508
64
        size_t h2 = std::hash<Second>()(p.second);
509
64
        return doris::util_hash::HashLen16(h1, h2);
510
64
    }
Unexecuted instantiation: _ZNKSt4hashISt4pairIN5doris9TUniqueIdEiEEclERKS3_
511
};