/root/doris/be/src/util/hash_util.hpp

Source (jump to first uncovered line)
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/hash-util.h
// and modified by Doris

#pragma once

#include <gen_cpp/Types_types.h>
#include <xxh3.h>
#include <zlib.h>

#include <functional>

#include "common/compiler_util.h" // IWYU pragma: keep
#include "gutil/hash/city.h"
#include "runtime/define_primitive_type.h"
#include "util/cpu_info.h"
#include "util/murmur_hash3.h"
#include "util/sse_util.hpp"

namespace doris {

// Utility class to compute hash values.
class HashUtil {
public:
    static uint32_t zlib_crc_hash(const void* data, uint32_t bytes, uint32_t hash) {
        return crc32(hash, (const unsigned char*)data, bytes);
    }

    static uint32_t zlib_crc_hash_null(uint32_t hash) {
        // null is treat as 0 when hash
        static const int INT_VALUE = 0;
        return crc32(hash, (const unsigned char*)(&INT_VALUE), 4);
    }

#if defined(__SSE4_2__) || defined(__aarch64__)
    // Compute the Crc32 hash for data using SSE4 instructions.  The input hash parameter is
    // the current hash/seed value.
    // This should only be called if SSE is supported.
    // This is ~4x faster than Fnv/Boost Hash.
    // NOTE: DO NOT use this method for checksum! This does not generate the standard CRC32 checksum!
    //       For checksum, use CRC-32C algorithm from crc32c.h
    // NOTE: Any changes made to this function need to be reflected in Codegen::GetHashFn.
    // TODO: crc32 hashes with different seeds do not result in different hash functions.
    // The resulting hashes are correlated.
    static uint32_t crc_hash(const void* data, uint32_t bytes, uint32_t hash) {
        if (!CpuInfo::is_supported(CpuInfo::SSE4_2)) {
            return zlib_crc_hash(data, bytes, hash);
        }
        uint32_t words = bytes / sizeof(uint32_t);
        bytes = bytes % sizeof(uint32_t);

        const uint32_t* p = reinterpret_cast<const uint32_t*>(data);

        while (words--) {
            hash = _mm_crc32_u32(hash, *p);
            ++p;
        }

        const uint8_t* s = reinterpret_cast<const uint8_t*>(p);

        while (bytes--) {
            hash = _mm_crc32_u8(hash, *s);
            ++s;
        }

        // The lower half of the CRC hash has has poor uniformity, so swap the halves
        // for anyone who only uses the first several bits of the hash.
        hash = (hash << 16) | (hash >> 16);
        return hash;
    }

    static uint64_t crc_hash64(const void* data, uint32_t bytes, uint64_t hash) {
        uint32_t words = bytes / sizeof(uint32_t);
        bytes = bytes % sizeof(uint32_t);

        uint32_t h1 = hash >> 32;
        uint32_t h2 = (hash << 32) >> 32;

        const uint32_t* p = reinterpret_cast<const uint32_t*>(data);
        while (words--) {
            (words & 1) ? (h1 = _mm_crc32_u32(h1, *p)) : (h2 = _mm_crc32_u32(h2, *p));
            ++p;
        }

        const uint8_t* s = reinterpret_cast<const uint8_t*>(p);
        while (bytes--) {
            (bytes & 1) ? (h1 = _mm_crc32_u8(h1, *s)) : (h2 = _mm_crc32_u8(h2, *s));
            ++s;
        }
        union {
            uint64_t u64;
            uint32_t u32[2];
        } converter;
        converter.u64 = hash;

        h1 = (h1 << 16) | (h1 >> 16);
        h2 = (h2 << 16) | (h2 >> 16);
        converter.u32[0] = h1;
        converter.u32[1] = h2;

        return converter.u64;
    }
#else
    static uint32_t crc_hash(const void* data, uint32_t bytes, uint32_t hash) {
        return zlib_crc_hash(data, bytes, hash);
    }
#endif

    // refer to https://github.com/apache/commons-codec/blob/master/src/main/java/org/apache/commons/codec/digest/MurmurHash3.java
    static const uint32_t MURMUR3_32_SEED = 104729;

    // modify from https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
    static uint32_t murmur_hash3_32(const void* key, int64_t len, uint32_t seed) {
        uint32_t out = 0;
        murmur_hash3_x86_32(key, len, seed, &out);
        return out;
    }

    static const int MURMUR_R = 47;

    // Murmur2 hash implementation returning 64-bit hashes.
    static uint64_t murmur_hash2_64(const void* input, int len, uint64_t seed) {
        uint64_t h = seed ^ (len * MURMUR_PRIME);

        const uint64_t* data = reinterpret_cast<const uint64_t*>(input);
        const uint64_t* end = data + (len / sizeof(uint64_t));

        while (data != end) {
            uint64_t k = *data++;
            k *= MURMUR_PRIME;
            k ^= k >> MURMUR_R;
            k *= MURMUR_PRIME;
            h ^= k;
            h *= MURMUR_PRIME;
        }

        const uint8_t* data2 = reinterpret_cast<const uint8_t*>(data);
        switch (len & 7) {
        case 7:
            h ^= uint64_t(data2[6]) << 48;
            [[fallthrough]];
        case 6:
            h ^= uint64_t(data2[5]) << 40;
            [[fallthrough]];
        case 5:
            h ^= uint64_t(data2[4]) << 32;
            [[fallthrough]];
        case 4:
            h ^= uint64_t(data2[3]) << 24;
            [[fallthrough]];
        case 3:
            h ^= uint64_t(data2[2]) << 16;
            [[fallthrough]];
        case 2:
            h ^= uint64_t(data2[1]) << 8;
            [[fallthrough]];
        case 1:
            h ^= uint64_t(data2[0]);
            h *= MURMUR_PRIME;
        }

        h ^= h >> MURMUR_R;
        h *= MURMUR_PRIME;
        h ^= h >> MURMUR_R;
        return h;
    }

    // default values recommended by http://isthe.com/chongo/tech/comp/fnv/
    static const uint32_t FNV_PRIME = 0x01000193; //   16777619
    static const uint32_t FNV_SEED = 0x811C9DC5;  // 2166136261
    static const uint64_t FNV64_PRIME = 1099511628211UL;
    static const uint64_t FNV64_SEED = 14695981039346656037UL;
    static const uint64_t MURMUR_PRIME = 0xc6a4a7935bd1e995ULL;
    static const uint32_t MURMUR_SEED = 0xadc83b19ULL;
    // Implementation of the Fowler–Noll–Vo hash function.  This is not as performant
    // as boost's hash on int types (2x slower) but has bit entropy.
    // For ints, boost just returns the value of the int which can be pathological.
    // For example, if the data is <1000, 2000, 3000, 4000, ..> and then the mod of 1000
    // is taken on the hash, all values will collide to the same bucket.
    // For string values, Fnv is slightly faster than boost.
    static uint32_t fnv_hash(const void* data, uint32_t bytes, uint32_t hash) {
        const uint8_t* ptr = reinterpret_cast<const uint8_t*>(data);

        while (bytes--) {
            hash = (*ptr ^ hash) * FNV_PRIME;
            ++ptr;
        }

        return hash;
    }

    static uint64_t fnv_hash64(const void* data, uint32_t bytes, uint64_t hash) {
        const uint8_t* ptr = reinterpret_cast<const uint8_t*>(data);

        while (bytes--) {
            hash = (*ptr ^ hash) * FNV64_PRIME;
            ++ptr;
        }

        return hash;
    }

    // Our hash function is MurmurHash2, 64 bit version.
    // It was modified in order to provide the same result in
    // big and little endian archs (endian neutral).
    static uint64_t murmur_hash64A(const void* key, int64_t len, unsigned int seed) {
        const uint64_t m = MURMUR_PRIME;
        const int r = 47;
        uint64_t h = seed ^ (len * m);
        const uint8_t* data = (const uint8_t*)key;
        const uint8_t* end = data + (len - (len & 7));

        while (data != end) {
            uint64_t k;
#if (BYTE_ORDER == BIG_ENDIAN)
            k = (uint64_t)data[0];
            k |= (uint64_t)data[1] << 8;
            k |= (uint64_t)data[2] << 16;
            k |= (uint64_t)data[3] << 24;
            k |= (uint64_t)data[4] << 32;
            k |= (uint64_t)data[5] << 40;
            k |= (uint64_t)data[6] << 48;
            k |= (uint64_t)data[7] << 56;
#else
            k = *((uint64_t*)data);
#endif

            k *= m;
            k ^= k >> r;
            k *= m;
            h ^= k;
            h *= m;
            data += 8;
        }

        switch (len & 7) {
        case 7:
            h ^= (uint64_t)data[6] << 48;
            [[fallthrough]];
        case 6:
            h ^= (uint64_t)data[5] << 40;
            [[fallthrough]];
        case 5:
            h ^= (uint64_t)data[4] << 32;
            [[fallthrough]];
        case 4:
            h ^= (uint64_t)data[3] << 24;
            [[fallthrough]];
        case 3:
            h ^= (uint64_t)data[2] << 16;
            [[fallthrough]];
        case 2:
            h ^= (uint64_t)data[1] << 8;
            [[fallthrough]];
        case 1:
            h ^= (uint64_t)data[0];
            h *= m;
        }

        h ^= h >> r;
        h *= m;
        h ^= h >> r;
        return h;
    }

    // Computes the hash value for data.  Will call either CrcHash or FnvHash
    // depending on hardware capabilities.
    // Seed values for different steps of the query execution should use different seeds
    // to prevent accidental key collisions. (See IMPALA-219 for more details).
    static uint32_t hash(const void* data, uint32_t bytes, uint32_t seed) {
#ifdef __SSE4_2__

        if (LIKELY(CpuInfo::is_supported(CpuInfo::SSE4_2))) {
            return crc_hash(data, bytes, seed);
        } else {
            return fnv_hash(data, bytes, seed);
        }

#else
        return fnv_hash(data, bytes, seed);
#endif
    }

    static uint64_t hash64(const void* data, uint64_t bytes, uint64_t seed) {
#ifdef _SSE4_2_
        if (LIKELY(CpuInfo::is_supported(CpuInfo::SSE4_2))) {
            return crc_hash64(data, bytes, seed);

        } else {
            uint64_t hash = 0;
            murmur_hash3_x64_64(data, bytes, seed, &hash);
            return hash;
        }
#else
        uint64_t hash = 0;
        murmur_hash3_x64_64(data, bytes, seed, &hash);
        return hash;
#endif
    }
    // hash_combine is the same with boost hash_combine,
    // except replace boost::hash with std::hash
    template <class T>
    static inline void hash_combine(std::size_t& seed, const T& v) {
        std::hash<T> hasher;
        seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
    }

#if defined(__clang__)
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wused-but-marked-unused"
#endif
    // xxHash function for a byte array.  For convenience, a 64-bit seed is also
    // hashed into the result.  The mapping may change from time to time.
    static xxh_u32 xxHash32WithSeed(const char* s, size_t len, xxh_u32 seed) {
        return XXH32(s, len, seed);
    }

    // same to the up function, just for null value
    static xxh_u32 xxHash32NullWithSeed(xxh_u32 seed) {
        static const int INT_VALUE = 0;
        return XXH32(reinterpret_cast<const char*>(&INT_VALUE), sizeof(int), seed);
    }

    static xxh_u64 xxHash64WithSeed(const char* s, size_t len, xxh_u64 seed) {
        return XXH3_64bits_withSeed(s, len, seed);
    }

    // same to the up function, just for null value
    static xxh_u64 xxHash64NullWithSeed(xxh_u64 seed) {
        static const int INT_VALUE = 0;
        return XXH3_64bits_withSeed(reinterpret_cast<const char*>(&INT_VALUE), sizeof(int), seed);
    }

#if defined(__clang__)
#pragma clang diagnostic pop
#endif
};

} // namespace doris

template <>
struct std::hash<doris::TUniqueId> {
    std::size_t operator()(const doris::TUniqueId& id) const {
        std::size_t seed = 0;
        seed = doris::HashUtil::hash(&id.lo, sizeof(id.lo), seed);
        seed = doris::HashUtil::hash(&id.hi, sizeof(id.hi), seed);
        return seed;
    }
};

template <>
struct std::hash<doris::TNetworkAddress> {
    size_t operator()(const doris::TNetworkAddress& address) const {
        std::size_t seed = 0;
        seed = doris::HashUtil::hash(address.hostname.data(), address.hostname.size(), seed);
        seed = doris::HashUtil::hash(&address.port, 4, seed);
        return seed;
    }
};

template <>
struct std::hash<std::pair<doris::TUniqueId, int64_t>> {
    size_t operator()(const std::pair<doris::TUniqueId, int64_t>& pair) const {
        size_t seed = 0;
        seed = doris::HashUtil::hash(&pair.first.lo, sizeof(pair.first.lo), seed);
        seed = doris::HashUtil::hash(&pair.first.hi, sizeof(pair.first.hi), seed);
        seed = doris::HashUtil::hash(&pair.second, sizeof(pair.second), seed);
        return seed;
    }
};

template <class First, class Second>
struct std::hash<std::pair<First, Second>> {
    size_t operator()(const pair<First, Second>& p) const {
        size_t h1 = std::hash<First>()(p.first);
        size_t h2 = std::hash<Second>()(p.second);
        return util_hash::HashLen16(h1, h2);
    }
};

Coverage Report

Created: 2025-03-11 14:37

Line	Count	Source (jump to first uncovered line)
1		// Licensed to the Apache Software Foundation (ASF) under one
2		// or more contributor license agreements. See the NOTICE file
3		// distributed with this work for additional information
4		// regarding copyright ownership. The ASF licenses this file
5		// to you under the Apache License, Version 2.0 (the
6		// "License"); you may not use this file except in compliance
7		// with the License. You may obtain a copy of the License at
8		//
9		// http://www.apache.org/licenses/LICENSE-2.0
10		//
11		// Unless required by applicable law or agreed to in writing,
12		// software distributed under the License is distributed on an
13		// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14		// KIND, either express or implied. See the License for the
15		// specific language governing permissions and limitations
16		// under the License.
17		// This file is copied from
18		// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/hash-util.h
19		// and modified by Doris
20
21		#pragma once
22
23		#include <gen_cpp/Types_types.h>
24		#include <xxh3.h>
25		#include <zlib.h>
26
27		#include <functional>
28
29		#include "common/compiler_util.h" // IWYU pragma: keep
30		#include "gutil/hash/city.h"
31		#include "runtime/define_primitive_type.h"
32		#include "util/cpu_info.h"
33		#include "util/murmur_hash3.h"
34		#include "util/sse_util.hpp"
35
36		namespace doris {
37
38		// Utility class to compute hash values.
39		class HashUtil {
40		public:
41	3.65M	static uint32_t zlib_crc_hash(const void* data, uint32_t bytes, uint32_t hash) {
42	3.65M	return crc32(hash, (const unsigned char*)data, bytes);
43	3.65M	}
44
45		static uint32_t zlib_crc_hash_null(uint32_t hash) {
46		// null is treat as 0 when hash
47		static const int INT_VALUE = 0;
48		return crc32(hash, (const unsigned char*)(&INT_VALUE), 4);
49		}
50
51		#if defined(__SSE4_2__) \|\| defined(__aarch64__)
52		// Compute the Crc32 hash for data using SSE4 instructions. The input hash parameter is
53		// the current hash/seed value.
54		// This should only be called if SSE is supported.
55		// This is ~4x faster than Fnv/Boost Hash.
56		// NOTE: DO NOT use this method for checksum! This does not generate the standard CRC32 checksum!
57		// For checksum, use CRC-32C algorithm from crc32c.h
58		// NOTE: Any changes made to this function need to be reflected in Codegen::GetHashFn.
59		// TODO: crc32 hashes with different seeds do not result in different hash functions.
60		// The resulting hashes are correlated.
61	15.4k	static uint32_t crc_hash(const void* data, uint32_t bytes, uint32_t hash) {
62	15.4k	if (!CpuInfo::is_supported(CpuInfo::SSE4_2)) {
63	0	return zlib_crc_hash(data, bytes, hash);
64	0	}
65	15.4k	uint32_t words = bytes / sizeof(uint32_t);
66	15.4k	bytes = bytes % sizeof(uint32_t);
67
68	15.4k	const uint32_t* p = reinterpret_cast<const uint32_t*>(data);
69
70	46.4k	while (words--) {
71	31.0k	hash = _mm_crc32_u32(hash, *p);
72	31.0k	++p;
73	31.0k	}
74
75	15.4k	const uint8_t* s = reinterpret_cast<const uint8_t*>(p);
76
77	15.5k	while (bytes--) {
78	70	hash = _mm_crc32_u8(hash, *s);
79	70	++s;
80	70	}
81
82		// The lower half of the CRC hash has has poor uniformity, so swap the halves
83		// for anyone who only uses the first several bits of the hash.
84	15.4k	hash = (hash << 16) \| (hash >> 16);
85	15.4k	return hash;
86	15.4k	}
87
88	0	static uint64_t crc_hash64(const void* data, uint32_t bytes, uint64_t hash) {
89	0	uint32_t words = bytes / sizeof(uint32_t);
90	0	bytes = bytes % sizeof(uint32_t);
91	0
92	0	uint32_t h1 = hash >> 32;
93	0	uint32_t h2 = (hash << 32) >> 32;
94	0
95	0	const uint32_t* p = reinterpret_cast<const uint32_t*>(data);
96	0	while (words--) {
97	0	(words & 1) ? (h1 = _mm_crc32_u32(h1, p)) : (h2 = _mm_crc32_u32(h2, p));
98	0	++p;
99	0	}
100	0
101	0	const uint8_t* s = reinterpret_cast<const uint8_t*>(p);
102	0	while (bytes--) {
103	0	(bytes & 1) ? (h1 = _mm_crc32_u8(h1, s)) : (h2 = _mm_crc32_u8(h2, s));
104	0	++s;
105	0	}
106	0	union {
107	0	uint64_t u64;
108	0	uint32_t u32[2];
109	0	} converter;
110	0	converter.u64 = hash;
111	0
112	0	h1 = (h1 << 16) \| (h1 >> 16);
113	0	h2 = (h2 << 16) \| (h2 >> 16);
114	0	converter.u32[0] = h1;
115	0	converter.u32[1] = h2;
116	0
117	0	return converter.u64;
118	0	}
119		#else
120		static uint32_t crc_hash(const void* data, uint32_t bytes, uint32_t hash) {
121		return zlib_crc_hash(data, bytes, hash);
122		}
123		#endif
124
125		// refer to https://github.com/apache/commons-codec/blob/master/src/main/java/org/apache/commons/codec/digest/MurmurHash3.java
126		static const uint32_t MURMUR3_32_SEED = 104729;
127
128		// modify from https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
129	20	static uint32_t murmur_hash3_32(const void* key, int64_t len, uint32_t seed) {
130	20	uint32_t out = 0;
131	20	murmur_hash3_x86_32(key, len, seed, &out);
132	20	return out;
133	20	}
134
135		static const int MURMUR_R = 47;
136
137		// Murmur2 hash implementation returning 64-bit hashes.
138	0	static uint64_t murmur_hash2_64(const void* input, int len, uint64_t seed) {
139	0	uint64_t h = seed ^ (len * MURMUR_PRIME);
140	0
141	0	const uint64_t* data = reinterpret_cast<const uint64_t*>(input);
142	0	const uint64_t* end = data + (len / sizeof(uint64_t));
143	0
144	0	while (data != end) {
145	0	uint64_t k = *data++;
146	0	k *= MURMUR_PRIME;
147	0	k ^= k >> MURMUR_R;
148	0	k *= MURMUR_PRIME;
149	0	h ^= k;
150	0	h *= MURMUR_PRIME;
151	0	}
152	0
153	0	const uint8_t* data2 = reinterpret_cast<const uint8_t*>(data);
154	0	switch (len & 7) {
155	0	case 7:
156	0	h ^= uint64_t(data2[6]) << 48;
157	0	[[fallthrough]];
158	0	case 6:
159	0	h ^= uint64_t(data2[5]) << 40;
160	0	[[fallthrough]];
161	0	case 5:
162	0	h ^= uint64_t(data2[4]) << 32;
163	0	[[fallthrough]];
164	0	case 4:
165	0	h ^= uint64_t(data2[3]) << 24;
166	0	[[fallthrough]];
167	0	case 3:
168	0	h ^= uint64_t(data2[2]) << 16;
169	0	[[fallthrough]];
170	0	case 2:
171	0	h ^= uint64_t(data2[1]) << 8;
172	0	[[fallthrough]];
173	0	case 1:
174	0	h ^= uint64_t(data2[0]);
175	0	h *= MURMUR_PRIME;
176	0	}
177	0
178	0	h ^= h >> MURMUR_R;
179	0	h *= MURMUR_PRIME;
180	0	h ^= h >> MURMUR_R;
181	0	return h;
182	0	}
183
184		// default values recommended by http://isthe.com/chongo/tech/comp/fnv/
185		static const uint32_t FNV_PRIME = 0x01000193; // 16777619
186		static const uint32_t FNV_SEED = 0x811C9DC5; // 2166136261
187		static const uint64_t FNV64_PRIME = 1099511628211UL;
188		static const uint64_t FNV64_SEED = 14695981039346656037UL;
189		static const uint64_t MURMUR_PRIME = 0xc6a4a7935bd1e995ULL;
190		static const uint32_t MURMUR_SEED = 0xadc83b19ULL;
191		// Implementation of the Fowler–Noll–Vo hash function. This is not as performant
192		// as boost's hash on int types (2x slower) but has bit entropy.
193		// For ints, boost just returns the value of the int which can be pathological.
194		// For example, if the data is <1000, 2000, 3000, 4000, ..> and then the mod of 1000
195		// is taken on the hash, all values will collide to the same bucket.
196		// For string values, Fnv is slightly faster than boost.
197	0	static uint32_t fnv_hash(const void* data, uint32_t bytes, uint32_t hash) {
198	0	const uint8_t* ptr = reinterpret_cast<const uint8_t*>(data);
199
200	0	while (bytes--) {
201	0	hash = (ptr ^ hash) FNV_PRIME;
202	0	++ptr;
203	0	}
204
205	0	return hash;
206	0	}
207
208	0	static uint64_t fnv_hash64(const void* data, uint32_t bytes, uint64_t hash) {
209	0	const uint8_t* ptr = reinterpret_cast<const uint8_t*>(data);
210	0
211	0	while (bytes--) {
212	0	hash = (ptr ^ hash) FNV64_PRIME;
213	0	++ptr;
214	0	}
215	0
216	0	return hash;
217	0	}
218
219		// Our hash function is MurmurHash2, 64 bit version.
220		// It was modified in order to provide the same result in
221		// big and little endian archs (endian neutral).
222	67.8k	static uint64_t murmur_hash64A(const void* key, int64_t len, unsigned int seed) {
223	67.8k	const uint64_t m = MURMUR_PRIME;
224	67.8k	const int r = 47;
225	67.8k	uint64_t h = seed ^ (len * m);
226	67.8k	const uint8_t* data = (const uint8_t*)key;
227	67.8k	const uint8_t* end = data + (len - (len & 7));
228
229	135k	while (data != end) {
230	67.8k	uint64_t k;
231		#if (BYTE_ORDER == BIG_ENDIAN)
232		k = (uint64_t)data[0];
233		k \|= (uint64_t)data[1] << 8;
234		k \|= (uint64_t)data[2] << 16;
235		k \|= (uint64_t)data[3] << 24;
236		k \|= (uint64_t)data[4] << 32;
237		k \|= (uint64_t)data[5] << 40;
238		k \|= (uint64_t)data[6] << 48;
239		k \|= (uint64_t)data[7] << 56;
240		#else
241	67.8k	k = ((uint64_t)data);
242	67.8k	#endif
243
244	67.8k	k *= m;
245	67.8k	k ^= k >> r;
246	67.8k	k *= m;
247	67.8k	h ^= k;
248	67.8k	h *= m;
249	67.8k	data += 8;
250	67.8k	}
251
252	67.8k	switch (len & 7) {
253	0	case 7:
254	0	h ^= (uint64_t)data[6] << 48;
255	0	[[fallthrough]];
256	0	case 6:
257	0	h ^= (uint64_t)data[5] << 40;
258	0	[[fallthrough]];
259	0	case 5:
260	0	h ^= (uint64_t)data[4] << 32;
261	0	[[fallthrough]];
262	3	case 4:
263	3	h ^= (uint64_t)data[3] << 24;
264	3	[[fallthrough]];
265	3	case 3:
266	3	h ^= (uint64_t)data[2] << 16;
267	3	[[fallthrough]];
268	3	case 2:
269	3	h ^= (uint64_t)data[1] << 8;
270	3	[[fallthrough]];
271	6	case 1:
272	6	h ^= (uint64_t)data[0];
273	6	h *= m;
274	67.8k	}
275
276	67.8k	h ^= h >> r;
277	67.8k	h *= m;
278	67.8k	h ^= h >> r;
279	67.8k	return h;
280	67.8k	}
281
282		// Computes the hash value for data. Will call either CrcHash or FnvHash
283		// depending on hardware capabilities.
284		// Seed values for different steps of the query execution should use different seeds
285		// to prevent accidental key collisions. (See IMPALA-219 for more details).
286	15.4k	static uint32_t hash(const void* data, uint32_t bytes, uint32_t seed) {
287	15.4k	#ifdef __SSE4_2__
288
289	15.4k	if (LIKELY(CpuInfo::is_supported(CpuInfo::SSE4_2))) {
290	15.4k	return crc_hash(data, bytes, seed);
291	15.4k	} else {
292	0	return fnv_hash(data, bytes, seed);
293	0	}
294
295		#else
296		return fnv_hash(data, bytes, seed);
297		#endif
298	15.4k	}
299
300	23.2k	static uint64_t hash64(const void* data, uint64_t bytes, uint64_t seed) {
301		#ifdef _SSE4_2_
302		if (LIKELY(CpuInfo::is_supported(CpuInfo::SSE4_2))) {
303		return crc_hash64(data, bytes, seed);
304
305		} else {
306		uint64_t hash = 0;
307		murmur_hash3_x64_64(data, bytes, seed, &hash);
308		return hash;
309		}
310		#else
311	23.2k	uint64_t hash = 0;
312	23.2k	murmur_hash3_x64_64(data, bytes, seed, &hash);
313	23.2k	return hash;
314	23.2k	#endif
315	23.2k	}
316		// hash_combine is the same with boost hash_combine,
317		// except replace boost::hash with std::hash
318		template <class T>
319		static inline void hash_combine(std::size_t& seed, const T& v) {
320		std::hash<T> hasher;
321		seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
322		}
323
324		#if defined(__clang__)
325		#pragma clang diagnostic push
326		#pragma clang diagnostic ignored "-Wused-but-marked-unused"
327		#endif
328		// xxHash function for a byte array. For convenience, a 64-bit seed is also
329		// hashed into the result. The mapping may change from time to time.
330		static xxh_u32 xxHash32WithSeed(const char* s, size_t len, xxh_u32 seed) {
331		return XXH32(s, len, seed);
332		}
333
334		// same to the up function, just for null value
335	0	static xxh_u32 xxHash32NullWithSeed(xxh_u32 seed) {
336	0	static const int INT_VALUE = 0;
337	0	return XXH32(reinterpret_cast<const char*>(&INT_VALUE), sizeof(int), seed);
338	0	}
339
340	18.1k	static xxh_u64 xxHash64WithSeed(const char* s, size_t len, xxh_u64 seed) {
341	18.1k	return XXH3_64bits_withSeed(s, len, seed);
342	18.1k	}
343
344		// same to the up function, just for null value
345		static xxh_u64 xxHash64NullWithSeed(xxh_u64 seed) {
346		static const int INT_VALUE = 0;
347		return XXH3_64bits_withSeed(reinterpret_cast<const char*>(&INT_VALUE), sizeof(int), seed);
348		}
349
350		#if defined(__clang__)
351		#pragma clang diagnostic pop
352		#endif
353		};
354
355		} // namespace doris
356
357		template <>
358		struct std::hash<doris::TUniqueId> {
359	7.52k	std::size_t operator()(const doris::TUniqueId& id) const {
360	7.52k	std::size_t seed = 0;
361	7.52k	seed = doris::HashUtil::hash(&id.lo, sizeof(id.lo), seed);
362	7.52k	seed = doris::HashUtil::hash(&id.hi, sizeof(id.hi), seed);
363	7.52k	return seed;
364	7.52k	}
365		};
366
367		template <>
368		struct std::hash<doris::TNetworkAddress> {
369		size_t operator()(const doris::TNetworkAddress& address) const {
370		std::size_t seed = 0;
371		seed = doris::HashUtil::hash(address.hostname.data(), address.hostname.size(), seed);
372		seed = doris::HashUtil::hash(&address.port, 4, seed);
373		return seed;
374		}
375		};
376
377		template <>
378		struct std::hash<std::pair<doris::TUniqueId, int64_t>> {
379	0	size_t operator()(const std::pair<doris::TUniqueId, int64_t>& pair) const {
380	0	size_t seed = 0;
381	0	seed = doris::HashUtil::hash(&pair.first.lo, sizeof(pair.first.lo), seed);
382	0	seed = doris::HashUtil::hash(&pair.first.hi, sizeof(pair.first.hi), seed);
383	0	seed = doris::HashUtil::hash(&pair.second, sizeof(pair.second), seed);
384	0	return seed;
385	0	}
386		};
387
388		template <class First, class Second>
389		struct std::hash<std::pair<First, Second>> {
390		size_t operator()(const pair<First, Second>& p) const {
391		size_t h1 = std::hash<First>()(p.first);
392		size_t h2 = std::hash<Second>()(p.second);
393		return util_hash::HashLen16(h1, h2);
394		}
395		};