Coverage Report

Created: 2026-03-12 16:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/hash_util.hpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/hash-util.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <crc32c/crc32c.h>
24
#include <gen_cpp/Types_types.h>
25
#include <xxh3.h>
26
#include <xxhash.h>
27
#include <zlib.h>
28
29
#include <bit>
30
#include <functional>
31
32
#include "common/compiler_util.h" // IWYU pragma: keep
33
#include "exec/common/endian.h"
34
#include "util/cpu_info.h"
35
#include "util/hash/city.h"
36
#include "util/hash/murmur_hash3.h"
37
#include "util/sse_util.hpp"
38
39
namespace doris {
40
#include "common/compile_check_begin.h"
41
namespace detail {
42
// Slicing-by-4 table: t[0] is the standard byte-at-a-time table,
43
// t[1..3] are extended tables for parallel 4-byte processing.
44
struct CRC32SliceBy4Table {
45
    uint32_t t[4][256] {};
46
0
    constexpr CRC32SliceBy4Table() {
47
0
        // t[0]: standard CRC32 lookup table
48
0
        for (uint32_t i = 0; i < 256; i++) {
49
0
            uint32_t c = i;
50
0
            for (int j = 0; j < 8; j++) {
51
0
                c = (c & 1) ? ((c >> 1) ^ 0xEDB88320U) : (c >> 1);
52
0
            }
53
0
            t[0][i] = c;
54
0
        }
55
0
        // t[1..3]: each entry is one additional CRC byte-step applied to t[k-1]
56
0
        for (uint32_t i = 0; i < 256; i++) {
57
0
            uint32_t c = t[0][i];
58
0
            for (int k = 1; k < 4; k++) {
59
0
                c = t[0][c & 0xFF] ^ (c >> 8);
60
0
                t[k][i] = c;
61
0
            }
62
0
        }
63
0
    }
64
};
65
} // namespace detail
66
67
// Utility class to compute hash values.
68
class HashUtil {
69
private:
70
    static inline constexpr detail::CRC32SliceBy4Table CRC32_TABLE {};
71
72
public:
73
44.2M
    static uint32_t zlib_crc_hash(const void* data, uint32_t bytes, uint32_t hash) {
74
44.2M
        return (uint32_t)crc32(hash, (const unsigned char*)data, bytes);
75
44.2M
    }
76
77
    // Inline CRC32 (zlib-compatible, standard CRC32 polynomial) for fixed-size types.
78
    // Uses Slicing-by-4 technique for 4/8-byte types: processes 4 bytes at a time using
79
    // 4 precomputed lookup tables, reducing serial table lookups from 4 to 1 per 4-byte chunk.
80
    // Polynomial: 0xEDB88320 (reflected form of 0x04C11DB7).
81
    // Endian note: CRC32 reflected algorithm processes bytes in address order (byte[0] first).
82
    // Slicing-by-4 requires byte[0] at LSB of the loaded uint32_t, which is little-endian layout.
83
    // LittleEndian::Load32 provides this on ALL platforms: noop on LE, bswap on BE.
84
    template <typename T>
85
17.2M
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
86
17.2M
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
87
        // zlib convention: pre/post XOR with 0xFFFFFFFF
88
17.2M
        uint32_t crc = hash ^ 0xFFFFFFFFU;
89
90
17.2M
        if constexpr (sizeof(T) == 1) {
91
            // 1 byte: single table lookup
92
864
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
93
864
        } else if constexpr (sizeof(T) == 2) {
94
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
95
186
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
96
186
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
97
17.2M
        } else if constexpr (sizeof(T) == 4) {
98
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
99
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
100
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
101
17.2M
            uint32_t word = LittleEndian::Load32(p) ^ crc;
102
17.2M
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
103
17.2M
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
104
17.2M
        } else if constexpr (sizeof(T) == 8) {
105
            // 8 bytes: two Slicing-by-4 steps
106
5.26k
            uint32_t word = LittleEndian::Load32(p) ^ crc;
107
5.26k
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
108
5.26k
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
109
110
5.26k
            word = LittleEndian::Load32(p + 4) ^ crc;
111
5.26k
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
112
5.26k
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
113
5.26k
        } else {
114
            // Fallback to zlib for larger/unusual types
115
2.90k
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
116
2.90k
        }
117
0
        return crc ^ 0xFFFFFFFFU;
118
17.2M
    }
_ZN5doris8HashUtil16zlib_crc32_fixedIiEEjRKT_j
Line
Count
Source
85
17.2M
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
86
17.2M
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
87
        // zlib convention: pre/post XOR with 0xFFFFFFFF
88
17.2M
        uint32_t crc = hash ^ 0xFFFFFFFFU;
89
90
        if constexpr (sizeof(T) == 1) {
91
            // 1 byte: single table lookup
92
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
93
        } else if constexpr (sizeof(T) == 2) {
94
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
95
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
96
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
97
17.2M
        } else if constexpr (sizeof(T) == 4) {
98
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
99
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
100
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
101
17.2M
            uint32_t word = LittleEndian::Load32(p) ^ crc;
102
17.2M
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
103
17.2M
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
104
        } else if constexpr (sizeof(T) == 8) {
105
            // 8 bytes: two Slicing-by-4 steps
106
            uint32_t word = LittleEndian::Load32(p) ^ crc;
107
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
108
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
109
110
            word = LittleEndian::Load32(p + 4) ^ crc;
111
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
112
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
113
        } else {
114
            // Fallback to zlib for larger/unusual types
115
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
116
        }
117
17.2M
        return crc ^ 0xFFFFFFFFU;
118
17.2M
    }
_ZN5doris8HashUtil16zlib_crc32_fixedINS_7DecimalIiEEEEjRKT_j
Line
Count
Source
85
521
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
86
521
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
87
        // zlib convention: pre/post XOR with 0xFFFFFFFF
88
521
        uint32_t crc = hash ^ 0xFFFFFFFFU;
89
90
        if constexpr (sizeof(T) == 1) {
91
            // 1 byte: single table lookup
92
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
93
        } else if constexpr (sizeof(T) == 2) {
94
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
95
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
96
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
97
521
        } else if constexpr (sizeof(T) == 4) {
98
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
99
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
100
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
101
521
            uint32_t word = LittleEndian::Load32(p) ^ crc;
102
521
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
103
521
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
104
        } else if constexpr (sizeof(T) == 8) {
105
            // 8 bytes: two Slicing-by-4 steps
106
            uint32_t word = LittleEndian::Load32(p) ^ crc;
107
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
108
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
109
110
            word = LittleEndian::Load32(p + 4) ^ crc;
111
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
112
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
113
        } else {
114
            // Fallback to zlib for larger/unusual types
115
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
116
        }
117
521
        return crc ^ 0xFFFFFFFFU;
118
521
    }
_ZN5doris8HashUtil16zlib_crc32_fixedINS_7DecimalIlEEEEjRKT_j
Line
Count
Source
85
966
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
86
966
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
87
        // zlib convention: pre/post XOR with 0xFFFFFFFF
88
966
        uint32_t crc = hash ^ 0xFFFFFFFFU;
89
90
        if constexpr (sizeof(T) == 1) {
91
            // 1 byte: single table lookup
92
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
93
        } else if constexpr (sizeof(T) == 2) {
94
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
95
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
96
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
97
        } else if constexpr (sizeof(T) == 4) {
98
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
99
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
100
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
101
            uint32_t word = LittleEndian::Load32(p) ^ crc;
102
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
103
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
104
966
        } else if constexpr (sizeof(T) == 8) {
105
            // 8 bytes: two Slicing-by-4 steps
106
966
            uint32_t word = LittleEndian::Load32(p) ^ crc;
107
966
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
108
966
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
109
110
966
            word = LittleEndian::Load32(p + 4) ^ crc;
111
966
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
112
966
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
113
        } else {
114
            // Fallback to zlib for larger/unusual types
115
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
116
        }
117
966
        return crc ^ 0xFFFFFFFFU;
118
966
    }
_ZN5doris8HashUtil16zlib_crc32_fixedINS_12Decimal128V3EEEjRKT_j
Line
Count
Source
85
1.07k
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
86
1.07k
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
87
        // zlib convention: pre/post XOR with 0xFFFFFFFF
88
1.07k
        uint32_t crc = hash ^ 0xFFFFFFFFU;
89
90
        if constexpr (sizeof(T) == 1) {
91
            // 1 byte: single table lookup
92
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
93
        } else if constexpr (sizeof(T) == 2) {
94
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
95
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
96
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
97
        } else if constexpr (sizeof(T) == 4) {
98
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
99
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
100
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
101
            uint32_t word = LittleEndian::Load32(p) ^ crc;
102
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
103
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
104
        } else if constexpr (sizeof(T) == 8) {
105
            // 8 bytes: two Slicing-by-4 steps
106
            uint32_t word = LittleEndian::Load32(p) ^ crc;
107
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
108
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
109
110
            word = LittleEndian::Load32(p + 4) ^ crc;
111
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
112
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
113
1.07k
        } else {
114
            // Fallback to zlib for larger/unusual types
115
1.07k
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
116
1.07k
        }
117
0
        return crc ^ 0xFFFFFFFFU;
118
1.07k
    }
_ZN5doris8HashUtil16zlib_crc32_fixedINS_7DecimalIN4wide7integerILm256EiEEEEEEjRKT_j
Line
Count
Source
85
1.65k
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
86
1.65k
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
87
        // zlib convention: pre/post XOR with 0xFFFFFFFF
88
1.65k
        uint32_t crc = hash ^ 0xFFFFFFFFU;
89
90
        if constexpr (sizeof(T) == 1) {
91
            // 1 byte: single table lookup
92
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
93
        } else if constexpr (sizeof(T) == 2) {
94
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
95
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
96
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
97
        } else if constexpr (sizeof(T) == 4) {
98
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
99
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
100
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
101
            uint32_t word = LittleEndian::Load32(p) ^ crc;
102
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
103
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
104
        } else if constexpr (sizeof(T) == 8) {
105
            // 8 bytes: two Slicing-by-4 steps
106
            uint32_t word = LittleEndian::Load32(p) ^ crc;
107
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
108
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
109
110
            word = LittleEndian::Load32(p + 4) ^ crc;
111
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
112
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
113
1.65k
        } else {
114
            // Fallback to zlib for larger/unusual types
115
1.65k
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
116
1.65k
        }
117
0
        return crc ^ 0xFFFFFFFFU;
118
1.65k
    }
_ZN5doris8HashUtil16zlib_crc32_fixedIhEEjRKT_j
Line
Count
Source
85
41
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
86
41
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
87
        // zlib convention: pre/post XOR with 0xFFFFFFFF
88
41
        uint32_t crc = hash ^ 0xFFFFFFFFU;
89
90
41
        if constexpr (sizeof(T) == 1) {
91
            // 1 byte: single table lookup
92
41
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
93
        } else if constexpr (sizeof(T) == 2) {
94
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
95
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
96
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
97
        } else if constexpr (sizeof(T) == 4) {
98
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
99
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
100
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
101
            uint32_t word = LittleEndian::Load32(p) ^ crc;
102
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
103
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
104
        } else if constexpr (sizeof(T) == 8) {
105
            // 8 bytes: two Slicing-by-4 steps
106
            uint32_t word = LittleEndian::Load32(p) ^ crc;
107
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
108
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
109
110
            word = LittleEndian::Load32(p + 4) ^ crc;
111
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
112
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
113
        } else {
114
            // Fallback to zlib for larger/unusual types
115
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
116
        }
117
41
        return crc ^ 0xFFFFFFFFU;
118
41
    }
_ZN5doris8HashUtil16zlib_crc32_fixedIaEEjRKT_j
Line
Count
Source
85
823
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
86
823
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
87
        // zlib convention: pre/post XOR with 0xFFFFFFFF
88
823
        uint32_t crc = hash ^ 0xFFFFFFFFU;
89
90
823
        if constexpr (sizeof(T) == 1) {
91
            // 1 byte: single table lookup
92
823
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
93
        } else if constexpr (sizeof(T) == 2) {
94
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
95
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
96
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
97
        } else if constexpr (sizeof(T) == 4) {
98
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
99
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
100
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
101
            uint32_t word = LittleEndian::Load32(p) ^ crc;
102
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
103
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
104
        } else if constexpr (sizeof(T) == 8) {
105
            // 8 bytes: two Slicing-by-4 steps
106
            uint32_t word = LittleEndian::Load32(p) ^ crc;
107
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
108
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
109
110
            word = LittleEndian::Load32(p + 4) ^ crc;
111
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
112
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
113
        } else {
114
            // Fallback to zlib for larger/unusual types
115
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
116
        }
117
823
        return crc ^ 0xFFFFFFFFU;
118
823
    }
_ZN5doris8HashUtil16zlib_crc32_fixedIsEEjRKT_j
Line
Count
Source
85
186
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
86
186
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
87
        // zlib convention: pre/post XOR with 0xFFFFFFFF
88
186
        uint32_t crc = hash ^ 0xFFFFFFFFU;
89
90
        if constexpr (sizeof(T) == 1) {
91
            // 1 byte: single table lookup
92
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
93
186
        } else if constexpr (sizeof(T) == 2) {
94
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
95
186
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
96
186
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
97
        } else if constexpr (sizeof(T) == 4) {
98
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
99
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
100
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
101
            uint32_t word = LittleEndian::Load32(p) ^ crc;
102
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
103
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
104
        } else if constexpr (sizeof(T) == 8) {
105
            // 8 bytes: two Slicing-by-4 steps
106
            uint32_t word = LittleEndian::Load32(p) ^ crc;
107
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
108
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
109
110
            word = LittleEndian::Load32(p + 4) ^ crc;
111
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
112
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
113
        } else {
114
            // Fallback to zlib for larger/unusual types
115
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
116
        }
117
186
        return crc ^ 0xFFFFFFFFU;
118
186
    }
_ZN5doris8HashUtil16zlib_crc32_fixedIlEEjRKT_j
Line
Count
Source
85
4.11k
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
86
4.11k
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
87
        // zlib convention: pre/post XOR with 0xFFFFFFFF
88
4.11k
        uint32_t crc = hash ^ 0xFFFFFFFFU;
89
90
        if constexpr (sizeof(T) == 1) {
91
            // 1 byte: single table lookup
92
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
93
        } else if constexpr (sizeof(T) == 2) {
94
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
95
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
96
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
97
        } else if constexpr (sizeof(T) == 4) {
98
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
99
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
100
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
101
            uint32_t word = LittleEndian::Load32(p) ^ crc;
102
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
103
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
104
4.11k
        } else if constexpr (sizeof(T) == 8) {
105
            // 8 bytes: two Slicing-by-4 steps
106
4.11k
            uint32_t word = LittleEndian::Load32(p) ^ crc;
107
4.11k
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
108
4.11k
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
109
110
4.11k
            word = LittleEndian::Load32(p + 4) ^ crc;
111
4.11k
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
112
4.11k
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
113
        } else {
114
            // Fallback to zlib for larger/unusual types
115
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
116
        }
117
4.11k
        return crc ^ 0xFFFFFFFFU;
118
4.11k
    }
_ZN5doris8HashUtil16zlib_crc32_fixedInEEjRKT_j
Line
Count
Source
85
163
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
86
163
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
87
        // zlib convention: pre/post XOR with 0xFFFFFFFF
88
163
        uint32_t crc = hash ^ 0xFFFFFFFFU;
89
90
        if constexpr (sizeof(T) == 1) {
91
            // 1 byte: single table lookup
92
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
93
        } else if constexpr (sizeof(T) == 2) {
94
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
95
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
96
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
97
        } else if constexpr (sizeof(T) == 4) {
98
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
99
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
100
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
101
            uint32_t word = LittleEndian::Load32(p) ^ crc;
102
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
103
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
104
        } else if constexpr (sizeof(T) == 8) {
105
            // 8 bytes: two Slicing-by-4 steps
106
            uint32_t word = LittleEndian::Load32(p) ^ crc;
107
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
108
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
109
110
            word = LittleEndian::Load32(p + 4) ^ crc;
111
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
112
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
113
163
        } else {
114
            // Fallback to zlib for larger/unusual types
115
163
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
116
163
        }
117
0
        return crc ^ 0xFFFFFFFFU;
118
163
    }
_ZN5doris8HashUtil16zlib_crc32_fixedIfEEjRKT_j
Line
Count
Source
85
20
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
86
20
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
87
        // zlib convention: pre/post XOR with 0xFFFFFFFF
88
20
        uint32_t crc = hash ^ 0xFFFFFFFFU;
89
90
        if constexpr (sizeof(T) == 1) {
91
            // 1 byte: single table lookup
92
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
93
        } else if constexpr (sizeof(T) == 2) {
94
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
95
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
96
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
97
20
        } else if constexpr (sizeof(T) == 4) {
98
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
99
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
100
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
101
20
            uint32_t word = LittleEndian::Load32(p) ^ crc;
102
20
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
103
20
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
104
        } else if constexpr (sizeof(T) == 8) {
105
            // 8 bytes: two Slicing-by-4 steps
106
            uint32_t word = LittleEndian::Load32(p) ^ crc;
107
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
108
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
109
110
            word = LittleEndian::Load32(p + 4) ^ crc;
111
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
112
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
113
        } else {
114
            // Fallback to zlib for larger/unusual types
115
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
116
        }
117
20
        return crc ^ 0xFFFFFFFFU;
118
20
    }
_ZN5doris8HashUtil16zlib_crc32_fixedIdEEjRKT_j
Line
Count
Source
85
42
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
86
42
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
87
        // zlib convention: pre/post XOR with 0xFFFFFFFF
88
42
        uint32_t crc = hash ^ 0xFFFFFFFFU;
89
90
        if constexpr (sizeof(T) == 1) {
91
            // 1 byte: single table lookup
92
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
93
        } else if constexpr (sizeof(T) == 2) {
94
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
95
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
96
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
97
        } else if constexpr (sizeof(T) == 4) {
98
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
99
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
100
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
101
            uint32_t word = LittleEndian::Load32(p) ^ crc;
102
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
103
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
104
42
        } else if constexpr (sizeof(T) == 8) {
105
            // 8 bytes: two Slicing-by-4 steps
106
42
            uint32_t word = LittleEndian::Load32(p) ^ crc;
107
42
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
108
42
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
109
110
42
            word = LittleEndian::Load32(p + 4) ^ crc;
111
42
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
112
42
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
113
        } else {
114
            // Fallback to zlib for larger/unusual types
115
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
116
        }
117
42
        return crc ^ 0xFFFFFFFFU;
118
42
    }
_ZN5doris8HashUtil16zlib_crc32_fixedIjEEjRKT_j
Line
Count
Source
85
51
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
86
51
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
87
        // zlib convention: pre/post XOR with 0xFFFFFFFF
88
51
        uint32_t crc = hash ^ 0xFFFFFFFFU;
89
90
        if constexpr (sizeof(T) == 1) {
91
            // 1 byte: single table lookup
92
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
93
        } else if constexpr (sizeof(T) == 2) {
94
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
95
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
96
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
97
51
        } else if constexpr (sizeof(T) == 4) {
98
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
99
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
100
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
101
51
            uint32_t word = LittleEndian::Load32(p) ^ crc;
102
51
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
103
51
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
104
        } else if constexpr (sizeof(T) == 8) {
105
            // 8 bytes: two Slicing-by-4 steps
106
            uint32_t word = LittleEndian::Load32(p) ^ crc;
107
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
108
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
109
110
            word = LittleEndian::Load32(p + 4) ^ crc;
111
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
112
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
113
        } else {
114
            // Fallback to zlib for larger/unusual types
115
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
116
        }
117
51
        return crc ^ 0xFFFFFFFFU;
118
51
    }
_ZN5doris8HashUtil16zlib_crc32_fixedIoEEjRKT_j
Line
Count
Source
85
12
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
86
12
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
87
        // zlib convention: pre/post XOR with 0xFFFFFFFF
88
12
        uint32_t crc = hash ^ 0xFFFFFFFFU;
89
90
        if constexpr (sizeof(T) == 1) {
91
            // 1 byte: single table lookup
92
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
93
        } else if constexpr (sizeof(T) == 2) {
94
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
95
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
96
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
97
        } else if constexpr (sizeof(T) == 4) {
98
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
99
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
100
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
101
            uint32_t word = LittleEndian::Load32(p) ^ crc;
102
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
103
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
104
        } else if constexpr (sizeof(T) == 8) {
105
            // 8 bytes: two Slicing-by-4 steps
106
            uint32_t word = LittleEndian::Load32(p) ^ crc;
107
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
108
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
109
110
            word = LittleEndian::Load32(p + 4) ^ crc;
111
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
112
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
113
12
        } else {
114
            // Fallback to zlib for larger/unusual types
115
12
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
116
12
        }
117
0
        return crc ^ 0xFFFFFFFFU;
118
12
    }
_ZN5doris8HashUtil16zlib_crc32_fixedINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEjRKT_j
Line
Count
Source
85
42
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
86
42
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
87
        // zlib convention: pre/post XOR with 0xFFFFFFFF
88
42
        uint32_t crc = hash ^ 0xFFFFFFFFU;
89
90
        if constexpr (sizeof(T) == 1) {
91
            // 1 byte: single table lookup
92
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
93
        } else if constexpr (sizeof(T) == 2) {
94
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
95
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
96
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
97
42
        } else if constexpr (sizeof(T) == 4) {
98
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
99
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
100
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
101
42
            uint32_t word = LittleEndian::Load32(p) ^ crc;
102
42
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
103
42
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
104
        } else if constexpr (sizeof(T) == 8) {
105
            // 8 bytes: two Slicing-by-4 steps
106
            uint32_t word = LittleEndian::Load32(p) ^ crc;
107
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
108
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
109
110
            word = LittleEndian::Load32(p + 4) ^ crc;
111
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
112
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
113
        } else {
114
            // Fallback to zlib for larger/unusual types
115
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
116
        }
117
42
        return crc ^ 0xFFFFFFFFU;
118
42
    }
_ZN5doris8HashUtil16zlib_crc32_fixedINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEjRKT_j
Line
Count
Source
85
126
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
86
126
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
87
        // zlib convention: pre/post XOR with 0xFFFFFFFF
88
126
        uint32_t crc = hash ^ 0xFFFFFFFFU;
89
90
        if constexpr (sizeof(T) == 1) {
91
            // 1 byte: single table lookup
92
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
93
        } else if constexpr (sizeof(T) == 2) {
94
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
95
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
96
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
97
        } else if constexpr (sizeof(T) == 4) {
98
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
99
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
100
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
101
            uint32_t word = LittleEndian::Load32(p) ^ crc;
102
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
103
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
104
126
        } else if constexpr (sizeof(T) == 8) {
105
            // 8 bytes: two Slicing-by-4 steps
106
126
            uint32_t word = LittleEndian::Load32(p) ^ crc;
107
126
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
108
126
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
109
110
126
            word = LittleEndian::Load32(p + 4) ^ crc;
111
126
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
112
126
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
113
        } else {
114
            // Fallback to zlib for larger/unusual types
115
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
116
        }
117
126
        return crc ^ 0xFFFFFFFFU;
118
126
    }
Unexecuted instantiation: _ZN5doris8HashUtil16zlib_crc32_fixedINS_16TimestampTzValueEEEjRKT_j
_ZN5doris8HashUtil16zlib_crc32_fixedImEEjRKT_j
Line
Count
Source
85
18
    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
86
18
        const auto* p = reinterpret_cast<const uint8_t*>(&value);
87
        // zlib convention: pre/post XOR with 0xFFFFFFFF
88
18
        uint32_t crc = hash ^ 0xFFFFFFFFU;
89
90
        if constexpr (sizeof(T) == 1) {
91
            // 1 byte: single table lookup
92
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
93
        } else if constexpr (sizeof(T) == 2) {
94
            // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes)
95
            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
96
            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
97
        } else if constexpr (sizeof(T) == 4) {
98
            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel
99
            // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian,
100
            // ensuring byte[0] is always at LSB for correct CRC byte processing order.
101
            uint32_t word = LittleEndian::Load32(p) ^ crc;
102
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
103
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
104
18
        } else if constexpr (sizeof(T) == 8) {
105
            // 8 bytes: two Slicing-by-4 steps
106
18
            uint32_t word = LittleEndian::Load32(p) ^ crc;
107
18
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
108
18
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
109
110
18
            word = LittleEndian::Load32(p + 4) ^ crc;
111
18
            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^
112
18
                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF];
113
        } else {
114
            // Fallback to zlib for larger/unusual types
115
            return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T));
116
        }
117
18
        return crc ^ 0xFFFFFFFFU;
118
18
    }
119
120
11.1M
    static uint32_t zlib_crc_hash_null(uint32_t hash) {
121
        // null is treat as 0 when hash
122
11.1M
        static const int INT_VALUE = 0;
123
11.1M
        return zlib_crc32_fixed(INT_VALUE, hash);
124
11.1M
    }
125
126
    template <typename T>
127
12.0M
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
128
12.0M
        if constexpr (sizeof(T) == 1) {
129
49.3k
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
130
81.8k
        } else if constexpr (sizeof(T) == 2) {
131
81.8k
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
132
5.19M
        } else if constexpr (sizeof(T) == 4) {
133
5.19M
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
134
6.64M
        } else if constexpr (sizeof(T) == 8) {
135
6.64M
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
136
6.64M
        } else {
137
44.0k
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
138
44.0k
        }
139
12.0M
    }
_ZN5doris8HashUtil12crc32c_fixedIiEEjRKT_j
Line
Count
Source
127
5.14M
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
128
        if constexpr (sizeof(T) == 1) {
129
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
130
        } else if constexpr (sizeof(T) == 2) {
131
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
132
5.14M
        } else if constexpr (sizeof(T) == 4) {
133
5.14M
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
134
        } else if constexpr (sizeof(T) == 8) {
135
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
136
        } else {
137
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
138
        }
139
5.14M
    }
_ZN5doris8HashUtil12crc32c_fixedINS_7DecimalIiEEEEjRKT_j
Line
Count
Source
127
4.96k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
128
        if constexpr (sizeof(T) == 1) {
129
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
130
        } else if constexpr (sizeof(T) == 2) {
131
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
132
4.96k
        } else if constexpr (sizeof(T) == 4) {
133
4.96k
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
134
        } else if constexpr (sizeof(T) == 8) {
135
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
136
        } else {
137
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
138
        }
139
4.96k
    }
_ZN5doris8HashUtil12crc32c_fixedINS_7DecimalIlEEEEjRKT_j
Line
Count
Source
127
63.3k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
128
        if constexpr (sizeof(T) == 1) {
129
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
130
        } else if constexpr (sizeof(T) == 2) {
131
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
132
        } else if constexpr (sizeof(T) == 4) {
133
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
134
63.3k
        } else if constexpr (sizeof(T) == 8) {
135
63.3k
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
136
        } else {
137
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
138
        }
139
63.3k
    }
Unexecuted instantiation: _ZN5doris8HashUtil12crc32c_fixedINS_14DecimalV2ValueEEEjRKT_j
_ZN5doris8HashUtil12crc32c_fixedINS_12Decimal128V3EEEjRKT_j
Line
Count
Source
127
5.93k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
128
        if constexpr (sizeof(T) == 1) {
129
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
130
        } else if constexpr (sizeof(T) == 2) {
131
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
132
        } else if constexpr (sizeof(T) == 4) {
133
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
134
        } else if constexpr (sizeof(T) == 8) {
135
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
136
5.93k
        } else {
137
5.93k
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
138
5.93k
        }
139
5.93k
    }
_ZN5doris8HashUtil12crc32c_fixedINS_7DecimalIN4wide7integerILm256EiEEEEEEjRKT_j
Line
Count
Source
127
7.48k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
128
        if constexpr (sizeof(T) == 1) {
129
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
130
        } else if constexpr (sizeof(T) == 2) {
131
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
132
        } else if constexpr (sizeof(T) == 4) {
133
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
134
        } else if constexpr (sizeof(T) == 8) {
135
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
136
7.48k
        } else {
137
7.48k
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
138
7.48k
        }
139
7.48k
    }
_ZN5doris8HashUtil12crc32c_fixedIhEEjRKT_j
Line
Count
Source
127
6.06k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
128
6.06k
        if constexpr (sizeof(T) == 1) {
129
6.06k
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
130
        } else if constexpr (sizeof(T) == 2) {
131
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
132
        } else if constexpr (sizeof(T) == 4) {
133
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
134
        } else if constexpr (sizeof(T) == 8) {
135
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
136
        } else {
137
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
138
        }
139
6.06k
    }
_ZN5doris8HashUtil12crc32c_fixedIaEEjRKT_j
Line
Count
Source
127
43.2k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
128
43.2k
        if constexpr (sizeof(T) == 1) {
129
43.2k
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
130
        } else if constexpr (sizeof(T) == 2) {
131
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
132
        } else if constexpr (sizeof(T) == 4) {
133
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
134
        } else if constexpr (sizeof(T) == 8) {
135
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
136
        } else {
137
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
138
        }
139
43.2k
    }
_ZN5doris8HashUtil12crc32c_fixedIsEEjRKT_j
Line
Count
Source
127
81.8k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
128
        if constexpr (sizeof(T) == 1) {
129
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
130
81.8k
        } else if constexpr (sizeof(T) == 2) {
131
81.8k
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
132
        } else if constexpr (sizeof(T) == 4) {
133
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
134
        } else if constexpr (sizeof(T) == 8) {
135
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
136
        } else {
137
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
138
        }
139
81.8k
    }
_ZN5doris8HashUtil12crc32c_fixedIlEEjRKT_j
Line
Count
Source
127
6.46M
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
128
        if constexpr (sizeof(T) == 1) {
129
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
130
        } else if constexpr (sizeof(T) == 2) {
131
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
132
        } else if constexpr (sizeof(T) == 4) {
133
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
134
6.46M
        } else if constexpr (sizeof(T) == 8) {
135
6.46M
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
136
        } else {
137
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
138
        }
139
6.46M
    }
_ZN5doris8HashUtil12crc32c_fixedInEEjRKT_j
Line
Count
Source
127
26.0k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
128
        if constexpr (sizeof(T) == 1) {
129
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
130
        } else if constexpr (sizeof(T) == 2) {
131
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
132
        } else if constexpr (sizeof(T) == 4) {
133
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
134
        } else if constexpr (sizeof(T) == 8) {
135
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
136
26.0k
        } else {
137
26.0k
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
138
26.0k
        }
139
26.0k
    }
_ZN5doris8HashUtil12crc32c_fixedIfEEjRKT_j
Line
Count
Source
127
8.46k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
128
        if constexpr (sizeof(T) == 1) {
129
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
130
        } else if constexpr (sizeof(T) == 2) {
131
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
132
8.46k
        } else if constexpr (sizeof(T) == 4) {
133
8.46k
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
134
        } else if constexpr (sizeof(T) == 8) {
135
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
136
        } else {
137
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
138
        }
139
8.46k
    }
_ZN5doris8HashUtil12crc32c_fixedIdEEjRKT_j
Line
Count
Source
127
86.8k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
128
        if constexpr (sizeof(T) == 1) {
129
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
130
        } else if constexpr (sizeof(T) == 2) {
131
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
132
        } else if constexpr (sizeof(T) == 4) {
133
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
134
86.8k
        } else if constexpr (sizeof(T) == 8) {
135
86.8k
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
136
        } else {
137
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
138
        }
139
86.8k
    }
_ZN5doris8HashUtil12crc32c_fixedIjEEjRKT_j
Line
Count
Source
127
6.81k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
128
        if constexpr (sizeof(T) == 1) {
129
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
130
        } else if constexpr (sizeof(T) == 2) {
131
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
132
6.81k
        } else if constexpr (sizeof(T) == 4) {
133
6.81k
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
134
        } else if constexpr (sizeof(T) == 8) {
135
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
136
        } else {
137
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
138
        }
139
6.81k
    }
_ZN5doris8HashUtil12crc32c_fixedIoEEjRKT_j
Line
Count
Source
127
4.54k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
128
        if constexpr (sizeof(T) == 1) {
129
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
130
        } else if constexpr (sizeof(T) == 2) {
131
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
132
        } else if constexpr (sizeof(T) == 4) {
133
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
134
        } else if constexpr (sizeof(T) == 8) {
135
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
136
4.54k
        } else {
137
4.54k
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
138
4.54k
        }
139
4.54k
    }
_ZN5doris8HashUtil12crc32c_fixedINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEjRKT_j
Line
Count
Source
127
34.1k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
128
        if constexpr (sizeof(T) == 1) {
129
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
130
        } else if constexpr (sizeof(T) == 2) {
131
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
132
34.1k
        } else if constexpr (sizeof(T) == 4) {
133
34.1k
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
134
        } else if constexpr (sizeof(T) == 8) {
135
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
136
        } else {
137
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
138
        }
139
34.1k
    }
_ZN5doris8HashUtil12crc32c_fixedINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEjRKT_j
Line
Count
Source
127
25.5k
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
128
        if constexpr (sizeof(T) == 1) {
129
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
130
        } else if constexpr (sizeof(T) == 2) {
131
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
132
        } else if constexpr (sizeof(T) == 4) {
133
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
134
25.5k
        } else if constexpr (sizeof(T) == 8) {
135
25.5k
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
136
        } else {
137
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
138
        }
139
25.5k
    }
_ZN5doris8HashUtil12crc32c_fixedINS_16TimestampTzValueEEEjRKT_j
Line
Count
Source
127
56
    static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
128
        if constexpr (sizeof(T) == 1) {
129
            return _mm_crc32_u8(hash, *reinterpret_cast<const uint8_t*>(&value));
130
        } else if constexpr (sizeof(T) == 2) {
131
            return _mm_crc32_u16(hash, *reinterpret_cast<const uint16_t*>(&value));
132
        } else if constexpr (sizeof(T) == 4) {
133
            return _mm_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(&value));
134
56
        } else if constexpr (sizeof(T) == 8) {
135
56
            return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(&value));
136
        } else {
137
            return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
138
        }
139
56
    }
Unexecuted instantiation: _ZN5doris8HashUtil12crc32c_fixedImEEjRKT_j
140
141
2.64k
    static uint32_t crc32c_null(uint32_t hash) {
142
        // null is treat as 0 when hash
143
2.64k
        static const int INT_VALUE = 0;
144
2.64k
        return crc32c_fixed(INT_VALUE, hash);
145
2.64k
    }
146
147
    // Compute the Crc32 hash for data using SSE4 instructions.  The input hash parameter is
148
    // the current hash/seed value.
149
    // This should only be called if SSE is supported.
150
    // This is ~4x faster than Fnv/Boost Hash.
151
    // NOTE: DO NOT use this method for checksum! This does not generate the standard CRC32 checksum!
152
    //       For checksum, use CRC-32C algorithm from crc32c.h
153
    // NOTE: Any changes made to this function need to be reflected in Codegen::GetHashFn.
154
    // TODO: crc32 hashes with different seeds do not result in different hash functions.
155
    // The resulting hashes are correlated.
156
    // ATTN: prefer do not use this function anymore, use crc32c::Extend instead
157
    // This function is retained because it is not certain whether there are compatibility issues with historical data.
158
136M
    static uint32_t crc_hash(const void* data, uint32_t bytes, uint32_t hash) {
159
136M
        if (!CpuInfo::is_supported(CpuInfo::SSE4_2)) {
160
0
            return zlib_crc_hash(data, bytes, hash);
161
0
        }
162
136M
        uint32_t words = bytes / sizeof(uint32_t);
163
136M
        bytes = bytes % sizeof(uint32_t);
164
165
136M
        const uint32_t* p = reinterpret_cast<const uint32_t*>(data);
166
167
368M
        while (words--) {
168
232M
            hash = _mm_crc32_u32(hash, *p);
169
232M
            ++p;
170
232M
        }
171
172
136M
        const uint8_t* s = reinterpret_cast<const uint8_t*>(p);
173
174
176M
        while (bytes--) {
175
39.8M
            hash = _mm_crc32_u8(hash, *s);
176
39.8M
            ++s;
177
39.8M
        }
178
179
        // The lower half of the CRC hash has has poor uniformity, so swap the halves
180
        // for anyone who only uses the first several bits of the hash.
181
136M
        hash = (hash << 16) | (hash >> 16);
182
136M
        return hash;
183
136M
    }
184
185
150k
    static uint64_t crc_hash64(const void* data, uint32_t bytes, uint64_t hash) {
186
150k
        uint32_t words = bytes / sizeof(uint32_t);
187
150k
        bytes = bytes % sizeof(uint32_t);
188
189
150k
        uint32_t h1 = hash >> 32;
190
150k
        uint32_t h2 = (hash << 32) >> 32;
191
192
150k
        const uint32_t* p = reinterpret_cast<const uint32_t*>(data);
193
554k
        while (words--) {
194
404k
            (words & 1) ? (h1 = _mm_crc32_u32(h1, *p)) : (h2 = _mm_crc32_u32(h2, *p));
195
404k
            ++p;
196
404k
        }
197
198
150k
        const uint8_t* s = reinterpret_cast<const uint8_t*>(p);
199
275k
        while (bytes--) {
200
125k
            (bytes & 1) ? (h1 = _mm_crc32_u8(h1, *s)) : (h2 = _mm_crc32_u8(h2, *s));
201
125k
            ++s;
202
125k
        }
203
150k
        union {
204
150k
            uint64_t u64;
205
150k
            uint32_t u32[2];
206
150k
        } converter;
207
150k
        converter.u64 = hash;
208
209
150k
        h1 = (h1 << 16) | (h1 >> 16);
210
150k
        h2 = (h2 << 16) | (h2 >> 16);
211
150k
        converter.u32[0] = h1;
212
150k
        converter.u32[1] = h2;
213
214
150k
        return converter.u64;
215
150k
    }
216
217
    // refer to https://github.com/apache/commons-codec/blob/master/src/main/java/org/apache/commons/codec/digest/MurmurHash3.java
218
    static const uint32_t MURMUR3_32_SEED = 104729;
219
220
    // modify from https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
221
1.07k
    static uint32_t murmur_hash3_32(const void* key, int64_t len, uint32_t seed) {
222
1.07k
        uint32_t out = 0;
223
1.07k
        murmur_hash3_x86_32(key, len, seed, &out);
224
1.07k
        return out;
225
1.07k
    }
226
227
    template <bool is_mmh64_v2>
228
74
    static uint64_t murmur_hash3_64(const void* key, int64_t len, uint64_t seed) {
229
74
        uint64_t out = 0;
230
74
        if constexpr (is_mmh64_v2) {
231
6
            murmur_hash3_x64_64_shared(key, len, seed, &out);
232
68
        } else {
233
68
            murmur_hash3_x64_64(key, len, seed, &out);
234
68
        }
235
74
        return out;
236
74
    }
_ZN5doris8HashUtil15murmur_hash3_64ILb0EEEmPKvlm
Line
Count
Source
228
68
    static uint64_t murmur_hash3_64(const void* key, int64_t len, uint64_t seed) {
229
68
        uint64_t out = 0;
230
        if constexpr (is_mmh64_v2) {
231
            murmur_hash3_x64_64_shared(key, len, seed, &out);
232
68
        } else {
233
68
            murmur_hash3_x64_64(key, len, seed, &out);
234
68
        }
235
68
        return out;
236
68
    }
_ZN5doris8HashUtil15murmur_hash3_64ILb1EEEmPKvlm
Line
Count
Source
228
6
    static uint64_t murmur_hash3_64(const void* key, int64_t len, uint64_t seed) {
229
6
        uint64_t out = 0;
230
6
        if constexpr (is_mmh64_v2) {
231
6
            murmur_hash3_x64_64_shared(key, len, seed, &out);
232
        } else {
233
            murmur_hash3_x64_64(key, len, seed, &out);
234
        }
235
6
        return out;
236
6
    }
237
238
    static const int MURMUR_R = 47;
239
240
    // Murmur2 hash implementation returning 64-bit hashes.
241
0
    static uint64_t murmur_hash2_64(const void* input, int len, uint64_t seed) {
242
0
        uint64_t h = seed ^ (len * MURMUR_PRIME);
243
0
244
0
        const uint64_t* data = reinterpret_cast<const uint64_t*>(input);
245
0
        const uint64_t* end = data + (len / sizeof(uint64_t));
246
0
247
0
        while (data != end) {
248
0
            uint64_t k = *data++;
249
0
            k *= MURMUR_PRIME;
250
0
            k ^= k >> MURMUR_R;
251
0
            k *= MURMUR_PRIME;
252
0
            h ^= k;
253
0
            h *= MURMUR_PRIME;
254
0
        }
255
0
256
0
        const uint8_t* data2 = reinterpret_cast<const uint8_t*>(data);
257
0
        switch (len & 7) {
258
0
        case 7:
259
0
            h ^= uint64_t(data2[6]) << 48;
260
0
            [[fallthrough]];
261
0
        case 6:
262
0
            h ^= uint64_t(data2[5]) << 40;
263
0
            [[fallthrough]];
264
0
        case 5:
265
0
            h ^= uint64_t(data2[4]) << 32;
266
0
            [[fallthrough]];
267
0
        case 4:
268
0
            h ^= uint64_t(data2[3]) << 24;
269
0
            [[fallthrough]];
270
0
        case 3:
271
0
            h ^= uint64_t(data2[2]) << 16;
272
0
            [[fallthrough]];
273
0
        case 2:
274
0
            h ^= uint64_t(data2[1]) << 8;
275
0
            [[fallthrough]];
276
0
        case 1:
277
0
            h ^= uint64_t(data2[0]);
278
0
            h *= MURMUR_PRIME;
279
0
        }
280
0
281
0
        h ^= h >> MURMUR_R;
282
0
        h *= MURMUR_PRIME;
283
0
        h ^= h >> MURMUR_R;
284
0
        return h;
285
0
    }
286
287
    // default values recommended by http://isthe.com/chongo/tech/comp/fnv/
288
    static const uint32_t FNV_PRIME = 0x01000193; //   16777619
289
    static const uint32_t FNV_SEED = 0x811C9DC5;  // 2166136261
290
    static const uint64_t FNV64_PRIME = 1099511628211UL;
291
    static const uint64_t FNV64_SEED = 14695981039346656037UL;
292
    static const uint64_t MURMUR_PRIME = 0xc6a4a7935bd1e995ULL;
293
    static const uint32_t MURMUR_SEED = 0xadc83b19ULL;
294
    // Implementation of the Fowler–Noll–Vo hash function.  This is not as performant
295
    // as boost's hash on int types (2x slower) but has bit entropy.
296
    // For ints, boost just returns the value of the int which can be pathological.
297
    // For example, if the data is <1000, 2000, 3000, 4000, ..> and then the mod of 1000
298
    // is taken on the hash, all values will collide to the same bucket.
299
    // For string values, Fnv is slightly faster than boost.
300
0
    static uint32_t fnv_hash(const void* data, uint32_t bytes, uint32_t hash) {
301
0
        const uint8_t* ptr = reinterpret_cast<const uint8_t*>(data);
302
303
0
        while (bytes--) {
304
0
            hash = (*ptr ^ hash) * FNV_PRIME;
305
0
            ++ptr;
306
0
        }
307
308
0
        return hash;
309
0
    }
310
311
0
    static uint64_t fnv_hash64(const void* data, uint32_t bytes, uint64_t hash) {
312
0
        const uint8_t* ptr = reinterpret_cast<const uint8_t*>(data);
313
0
314
0
        while (bytes--) {
315
0
            hash = (*ptr ^ hash) * FNV64_PRIME;
316
0
            ++ptr;
317
0
        }
318
0
319
0
        return hash;
320
0
    }
321
322
    // Our hash function is MurmurHash2, 64 bit version.
323
    // It was modified in order to provide the same result in
324
    // big and little endian archs (endian neutral).
325
12.2M
    static uint64_t murmur_hash64A(const void* key, int64_t len, unsigned int seed) {
326
12.2M
        const uint64_t m = MURMUR_PRIME;
327
12.2M
        const int r = 47;
328
12.2M
        uint64_t h = seed ^ (len * m);
329
12.2M
        const uint8_t* data = (const uint8_t*)key;
330
12.2M
        const uint8_t* end = data + (len - (len & 7));
331
332
20.3M
        while (data != end) {
333
8.09M
            uint64_t k;
334
            if constexpr (std::endian::native == std::endian::big) {
335
                k = (uint64_t)data[0];
336
                k |= (uint64_t)data[1] << 8;
337
                k |= (uint64_t)data[2] << 16;
338
                k |= (uint64_t)data[3] << 24;
339
                k |= (uint64_t)data[4] << 32;
340
                k |= (uint64_t)data[5] << 40;
341
                k |= (uint64_t)data[6] << 48;
342
                k |= (uint64_t)data[7] << 56;
343
8.09M
            } else if constexpr (std::endian::native == std::endian::little) {
344
8.09M
                memcpy(&k, data, sizeof(k));
345
            } else {
346
                static_assert(std::endian::native == std::endian::big ||
347
                                      std::endian::native == std::endian::little,
348
                              "Unsupported endianness");
349
            }
350
351
8.09M
            k *= m;
352
8.09M
            k ^= k >> r;
353
8.09M
            k *= m;
354
8.09M
            h ^= k;
355
8.09M
            h *= m;
356
8.09M
            data += 8;
357
8.09M
        }
358
359
12.2M
        switch (len & 7) {
360
406k
        case 7:
361
406k
            h ^= (uint64_t)data[6] << 48;
362
406k
            [[fallthrough]];
363
579k
        case 6:
364
579k
            h ^= (uint64_t)data[5] << 40;
365
579k
            [[fallthrough]];
366
824k
        case 5:
367
824k
            h ^= (uint64_t)data[4] << 32;
368
824k
            [[fallthrough]];
369
6.83M
        case 4:
370
6.83M
            h ^= (uint64_t)data[3] << 24;
371
6.83M
            [[fallthrough]];
372
7.29M
        case 3:
373
7.29M
            h ^= (uint64_t)data[2] << 16;
374
7.29M
            [[fallthrough]];
375
7.46M
        case 2:
376
7.46M
            h ^= (uint64_t)data[1] << 8;
377
7.46M
            [[fallthrough]];
378
9.11M
        case 1:
379
9.11M
            h ^= (uint64_t)data[0];
380
9.11M
            h *= m;
381
12.2M
        }
382
383
12.2M
        h ^= h >> r;
384
12.2M
        h *= m;
385
12.2M
        h ^= h >> r;
386
12.2M
        return h;
387
12.2M
    }
388
389
    // Computes the hash value for data.  Will call either CrcHash or FnvHash
390
    // depending on hardware capabilities.
391
    // Seed values for different steps of the query execution should use different seeds
392
    // to prevent accidental key collisions. (See IMPALA-219 for more details).
393
136M
    static uint32_t hash(const void* data, uint32_t bytes, uint32_t seed) {
394
136M
#ifdef __SSE4_2__
395
396
136M
        if (LIKELY(CpuInfo::is_supported(CpuInfo::SSE4_2))) {
397
136M
            return crc_hash(data, bytes, seed);
398
136M
        } else {
399
117k
            return fnv_hash(data, bytes, seed);
400
117k
        }
401
402
#else
403
        return fnv_hash(data, bytes, seed);
404
#endif
405
136M
    }
406
407
35.1M
    static uint64_t hash64(const void* data, uint64_t bytes, uint64_t seed) {
408
#ifdef _SSE4_2_
409
        if (LIKELY(CpuInfo::is_supported(CpuInfo::SSE4_2))) {
410
            return crc_hash64(data, bytes, seed);
411
412
        } else {
413
            uint64_t hash = 0;
414
            murmur_hash3_x64_64(data, bytes, seed, &hash);
415
            return hash;
416
        }
417
#else
418
35.1M
        uint64_t hash = 0;
419
35.1M
        murmur_hash3_x64_64(data, bytes, seed, &hash);
420
35.1M
        return hash;
421
35.1M
#endif
422
35.1M
    }
423
    // hash_combine is the same with boost hash_combine,
424
    // except replace boost::hash with std::hash
425
    template <class T>
426
6.40M
    static inline void hash_combine(std::size_t& seed, const T& v) {
427
6.40M
        std::hash<T> hasher;
428
6.40M
        seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
429
6.40M
    }
_ZN5doris8HashUtil12hash_combineINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEvRmRKT_
Line
Count
Source
426
568
    static inline void hash_combine(std::size_t& seed, const T& v) {
427
568
        std::hash<T> hasher;
428
568
        seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
429
568
    }
_ZN5doris8HashUtil12hash_combineIlEEvRmRKT_
Line
Count
Source
426
6.40M
    static inline void hash_combine(std::size_t& seed, const T& v) {
427
6.40M
        std::hash<T> hasher;
428
6.40M
        seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
429
6.40M
    }
430
431
#if defined(__clang__)
432
#pragma clang diagnostic push
433
#pragma clang diagnostic ignored "-Wused-but-marked-unused"
434
#endif
435
    // xxHash function for a byte array.  For convenience, a 64-bit seed is also
436
    // hashed into the result.  The mapping may change from time to time.
437
388
    static xxh_u32 xxHash32WithSeed(const char* s, size_t len, xxh_u32 seed) {
438
388
        return XXH32(s, len, seed);
439
388
    }
440
441
    // same to the up function, just for null value
442
0
    static xxh_u32 xxHash32NullWithSeed(xxh_u32 seed) {
443
0
        static const int INT_VALUE = 0;
444
0
        return XXH32(reinterpret_cast<const char*>(&INT_VALUE), sizeof(int), seed);
445
0
    }
446
447
13.5M
    static xxh_u64 xxHash64WithSeed(const char* s, size_t len, xxh_u64 seed) {
448
13.5M
        return XXH3_64bits_withSeed(s, len, seed);
449
13.5M
    }
450
451
    // same to the up function, just for null value
452
1.08M
    static xxh_u64 xxHash64NullWithSeed(xxh_u64 seed) {
453
1.08M
        static const int INT_VALUE = 0;
454
1.08M
        return XXH3_64bits_withSeed(reinterpret_cast<const char*>(&INT_VALUE), sizeof(int), seed);
455
1.08M
    }
456
457
21
    static xxh_u64 xxhash64_compat_with_seed(const char* s, size_t len, xxh_u64 seed) {
458
21
        return XXH64(reinterpret_cast<const void*>(s), len, seed);
459
21
    }
460
461
0
    static xxh_u64 xxhash64_compat_null_with_seed(xxh_u64 seed) {
462
0
        static const int INT_VALUE = 0;
463
0
        return XXH64(reinterpret_cast<const void*>(&INT_VALUE), sizeof(int), seed);
464
0
    }
465
466
#if defined(__clang__)
467
#pragma clang diagnostic pop
468
#endif
469
};
470
471
} // namespace doris
472
473
template <>
474
struct std::hash<doris::TUniqueId> {
475
3.80M
    size_t operator()(const doris::TUniqueId& id) const {
476
3.80M
        uint32_t seed = 0;
477
3.80M
        seed = doris::HashUtil::hash(&id.lo, sizeof(id.lo), seed);
478
3.80M
        seed = doris::HashUtil::hash(&id.hi, sizeof(id.hi), seed);
479
3.80M
        return seed;
480
3.80M
    }
481
};
482
483
template <>
484
struct std::hash<doris::TNetworkAddress> {
485
3.85M
    size_t operator()(const doris::TNetworkAddress& address) const {
486
3.85M
        uint32_t seed = 0;
487
3.85M
        seed = doris::HashUtil::hash(address.hostname.data(), (uint32_t)address.hostname.size(),
488
3.85M
                                     seed);
489
3.85M
        seed = doris::HashUtil::hash(&address.port, 4, seed);
490
3.85M
        return seed;
491
3.85M
    }
492
};
493
494
template <>
495
struct std::hash<std::pair<doris::TUniqueId, int64_t>> {
496
0
    size_t operator()(const std::pair<doris::TUniqueId, int64_t>& pair) const {
497
0
        uint32_t seed = 0;
498
0
        seed = doris::HashUtil::hash(&pair.first.lo, sizeof(pair.first.lo), seed);
499
0
        seed = doris::HashUtil::hash(&pair.first.hi, sizeof(pair.first.hi), seed);
500
0
        seed = doris::HashUtil::hash(&pair.second, sizeof(pair.second), seed);
501
0
        return seed;
502
0
    }
503
};
504
505
template <class First, class Second>
506
struct std::hash<std::pair<First, Second>> {
507
28.0M
    size_t operator()(const pair<First, Second>& p) const {
508
28.0M
        size_t h1 = std::hash<First>()(p.first);
509
28.0M
        size_t h2 = std::hash<Second>()(p.second);
510
28.0M
        return doris::util_hash::HashLen16(h1, h2);
511
28.0M
    }
_ZNKSt4hashISt4pairINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEElEEclERKS7_
Line
Count
Source
507
32
    size_t operator()(const pair<First, Second>& p) const {
508
32
        size_t h1 = std::hash<First>()(p.first);
509
32
        size_t h2 = std::hash<Second>()(p.second);
510
32
        return doris::util_hash::HashLen16(h1, h2);
511
32
    }
_ZNKSt4hashISt4pairIiN5doris10PathInDataEEEclERKS3_
Line
Count
Source
507
28.0M
    size_t operator()(const pair<First, Second>& p) const {
508
28.0M
        size_t h1 = std::hash<First>()(p.first);
509
28.0M
        size_t h2 = std::hash<Second>()(p.second);
510
28.0M
        return doris::util_hash::HashLen16(h1, h2);
511
28.0M
    }
_ZNKSt4hashISt4pairIlN5doris8RowsetIdEEEclERKS3_
Line
Count
Source
507
16.4k
    size_t operator()(const pair<First, Second>& p) const {
508
16.4k
        size_t h1 = std::hash<First>()(p.first);
509
16.4k
        size_t h2 = std::hash<Second>()(p.second);
510
16.4k
        return doris::util_hash::HashLen16(h1, h2);
511
16.4k
    }
_ZNKSt4hashISt4pairIllEEclERKS1_
Line
Count
Source
507
64
    size_t operator()(const pair<First, Second>& p) const {
508
64
        size_t h1 = std::hash<First>()(p.first);
509
64
        size_t h2 = std::hash<Second>()(p.second);
510
64
        return doris::util_hash::HashLen16(h1, h2);
511
64
    }
_ZNKSt4hashISt4pairIN5doris9TUniqueIdEiEEclERKS3_
Line
Count
Source
507
1.89k
    size_t operator()(const pair<First, Second>& p) const {
508
1.89k
        size_t h1 = std::hash<First>()(p.first);
509
1.89k
        size_t h2 = std::hash<Second>()(p.second);
510
1.89k
        return doris::util_hash::HashLen16(h1, h2);
511
1.89k
    }
512
};
513
514
#include "common/compile_check_end.h"