/root/doris/be/src/util/murmur_hash3.cpp
Line | Count | Source |
1 | | //----------------------------------------------------------------------------- |
2 | | // MurmurHash3 was written by Austin Appleby, and is placed in the public |
3 | | // domain. The author hereby disclaims copyright to this source code. |
4 | | |
5 | | // Note - The x86 and x64 versions do _not_ produce the same results, as the |
6 | | // algorithms are optimized for their respective platforms. You can still |
7 | | // compile and run any of them on any platform, but your performance with the |
8 | | // non-native version will be less than optimal. |
9 | | |
10 | | #include "murmur_hash3.h" |
11 | | |
12 | | #include "vec/common/unaligned.h" |
13 | | |
14 | | namespace doris { |
15 | | |
16 | | #include "common/compile_check_begin.h" |
17 | | #if defined(_MSC_VER) |
18 | | |
19 | | #define FORCE_INLINE __forceinline |
20 | | |
21 | | #include <stdlib.h> |
22 | | |
23 | | #define ROTL32(x, y) _rotl(x, y) |
24 | | #define ROTL64(x, y) _rotl64(x, y) |
25 | | |
26 | | #define BIG_CONSTANT(x) (x) |
27 | | |
28 | | // Other compilers |
29 | | |
30 | | #else // defined(_MSC_VER) |
31 | | |
32 | | #define FORCE_INLINE inline __attribute__((always_inline)) |
33 | | |
34 | 53 | FORCE_INLINE uint32_t rotl32(uint32_t x, int8_t r) { |
35 | 53 | return (x << r) | (x >> (32 - r)); |
36 | 53 | } |
37 | | |
38 | 2.09M | FORCE_INLINE uint64_t rotl64(uint64_t x, int8_t r) { |
39 | 2.09M | return (x << r) | (x >> (64 - r)); |
40 | 2.09M | } |
41 | | |
42 | 53 | #define ROTL32(x, y) rotl32(x, y) |
43 | 2.09M | #define ROTL64(x, y) rotl64(x, y) |
44 | | |
45 | 2.96M | #define BIG_CONSTANT(x) (x##LLU) |
46 | | |
47 | | #endif // !defined(_MSC_VER) |
48 | | |
49 | | //----------------------------------------------------------------------------- |
50 | | // Block read - if your platform needs to do endian-swapping or can only |
51 | | // handle aligned reads, do the conversion here |
52 | | |
53 | 21 | FORCE_INLINE uint32_t getblock32(const uint32_t* p, int i) { |
54 | 21 | return unaligned_load<uint32_t>(&p[i]); |
55 | 21 | } |
56 | | |
57 | 925k | FORCE_INLINE uint64_t getblock64(const uint64_t* p, int i) { |
58 | 925k | return unaligned_load<uint64_t>(&p[i]); |
59 | 925k | } |
60 | | |
61 | | //----------------------------------------------------------------------------- |
62 | | // Finalization mix - force all bits of a hash block to avalanche |
63 | | |
64 | 20 | FORCE_INLINE uint32_t fmix32(uint32_t h) { |
65 | 20 | h ^= h >> 16; |
66 | 20 | h *= 0x85ebca6b; |
67 | 20 | h ^= h >> 13; |
68 | 20 | h *= 0xc2b2ae35; |
69 | 20 | h ^= h >> 16; |
70 | | |
71 | 20 | return h; |
72 | 20 | } |
73 | | |
74 | | //---------- |
75 | | |
76 | 741k | FORCE_INLINE uint64_t fmix64(uint64_t k) { |
77 | 741k | k ^= k >> 33; |
78 | 741k | k *= BIG_CONSTANT(0xff51afd7ed558ccd); |
79 | 741k | k ^= k >> 33; |
80 | 741k | k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); |
81 | 741k | k ^= k >> 33; |
82 | | |
83 | 741k | return k; |
84 | 741k | } |
85 | | |
86 | | //----------------------------------------------------------------------------- |
87 | | |
88 | 20 | void murmur_hash3_x86_32(const void* key, int64_t len, uint32_t seed, void* out) { |
89 | 20 | const uint8_t* data = (const uint8_t*)key; |
90 | 20 | const int nblocks = (int)len / 4; |
91 | | |
92 | 20 | uint32_t h1 = seed; |
93 | | |
94 | 20 | const uint32_t c1 = 0xcc9e2d51; |
95 | 20 | const uint32_t c2 = 0x1b873593; |
96 | | |
97 | | //---------- |
98 | | // body |
99 | | |
100 | 20 | const uint32_t* blocks = (const uint32_t*)(data + nblocks * 4); |
101 | | |
102 | 41 | for (int i = -nblocks; i; i++) { |
103 | 21 | uint32_t k1 = getblock32(blocks, i); |
104 | | |
105 | 21 | k1 *= c1; |
106 | 21 | k1 = ROTL32(k1, 15); |
107 | 21 | k1 *= c2; |
108 | | |
109 | 21 | h1 ^= k1; |
110 | 21 | h1 = ROTL32(h1, 13); |
111 | 21 | h1 = h1 * 5 + 0xe6546b64; |
112 | 21 | } |
113 | | |
114 | | //---------- |
115 | | // tail |
116 | | |
117 | 20 | const uint8_t* tail = (const uint8_t*)(data + nblocks * 4); |
118 | | |
119 | 20 | uint32_t k1 = 0; |
120 | | |
121 | 20 | switch (len & 3) { |
122 | 1 | case 3: |
123 | 1 | k1 ^= tail[2] << 16; |
124 | 1 | [[fallthrough]]; |
125 | 2 | case 2: |
126 | 2 | k1 ^= tail[1] << 8; |
127 | 2 | [[fallthrough]]; |
128 | 11 | case 1: |
129 | 11 | k1 ^= tail[0]; |
130 | 11 | k1 *= c1; |
131 | 11 | k1 = ROTL32(k1, 15); |
132 | 11 | k1 *= c2; |
133 | 11 | h1 ^= k1; |
134 | 20 | }; |
135 | | |
136 | | //---------- |
137 | | // finalization |
138 | | |
139 | 20 | h1 ^= len; |
140 | | |
141 | 20 | h1 = fmix32(h1); |
142 | | |
143 | 20 | *(uint32_t*)out = h1; |
144 | 20 | } |
145 | | |
146 | | //----------------------------------------------------------------------------- |
147 | | |
148 | 0 | void murmur_hash3_x86_128(const void* key, const int len, uint32_t seed, void* out) { |
149 | 0 | const uint8_t* data = (const uint8_t*)key; |
150 | 0 | const int nblocks = len / 16; |
151 | |
|
152 | 0 | uint32_t h1 = seed; |
153 | 0 | uint32_t h2 = seed; |
154 | 0 | uint32_t h3 = seed; |
155 | 0 | uint32_t h4 = seed; |
156 | |
|
157 | 0 | const uint32_t c1 = 0x239b961b; |
158 | 0 | const uint32_t c2 = 0xab0e9789; |
159 | 0 | const uint32_t c3 = 0x38b34ae5; |
160 | 0 | const uint32_t c4 = 0xa1e38b93; |
161 | | |
162 | | //---------- |
163 | | // body |
164 | |
|
165 | 0 | const uint32_t* blocks = (const uint32_t*)(data + nblocks * 16); |
166 | |
|
167 | 0 | for (int i = -nblocks; i; i++) { |
168 | 0 | uint32_t k1 = getblock32(blocks, i * 4 + 0); |
169 | 0 | uint32_t k2 = getblock32(blocks, i * 4 + 1); |
170 | 0 | uint32_t k3 = getblock32(blocks, i * 4 + 2); |
171 | 0 | uint32_t k4 = getblock32(blocks, i * 4 + 3); |
172 | |
|
173 | 0 | k1 *= c1; |
174 | 0 | k1 = ROTL32(k1, 15); |
175 | 0 | k1 *= c2; |
176 | 0 | h1 ^= k1; |
177 | |
|
178 | 0 | h1 = ROTL32(h1, 19); |
179 | 0 | h1 += h2; |
180 | 0 | h1 = h1 * 5 + 0x561ccd1b; |
181 | |
|
182 | 0 | k2 *= c2; |
183 | 0 | k2 = ROTL32(k2, 16); |
184 | 0 | k2 *= c3; |
185 | 0 | h2 ^= k2; |
186 | |
|
187 | 0 | h2 = ROTL32(h2, 17); |
188 | 0 | h2 += h3; |
189 | 0 | h2 = h2 * 5 + 0x0bcaa747; |
190 | |
|
191 | 0 | k3 *= c3; |
192 | 0 | k3 = ROTL32(k3, 17); |
193 | 0 | k3 *= c4; |
194 | 0 | h3 ^= k3; |
195 | |
|
196 | 0 | h3 = ROTL32(h3, 15); |
197 | 0 | h3 += h4; |
198 | 0 | h3 = h3 * 5 + 0x96cd1c35; |
199 | |
|
200 | 0 | k4 *= c4; |
201 | 0 | k4 = ROTL32(k4, 18); |
202 | 0 | k4 *= c1; |
203 | 0 | h4 ^= k4; |
204 | |
|
205 | 0 | h4 = ROTL32(h4, 13); |
206 | 0 | h4 += h1; |
207 | 0 | h4 = h4 * 5 + 0x32ac3b17; |
208 | 0 | } |
209 | | |
210 | | //---------- |
211 | | // tail |
212 | |
|
213 | 0 | const uint8_t* tail = (const uint8_t*)(data + nblocks * 16); |
214 | |
|
215 | 0 | uint32_t k1 = 0; |
216 | 0 | uint32_t k2 = 0; |
217 | 0 | uint32_t k3 = 0; |
218 | 0 | uint32_t k4 = 0; |
219 | |
|
220 | 0 | switch (len & 15) { |
221 | 0 | case 15: |
222 | 0 | k4 ^= tail[14] << 16; |
223 | 0 | [[fallthrough]]; |
224 | 0 | case 14: |
225 | 0 | k4 ^= tail[13] << 8; |
226 | 0 | [[fallthrough]]; |
227 | 0 | case 13: |
228 | 0 | k4 ^= tail[12] << 0; |
229 | 0 | k4 *= c4; |
230 | 0 | k4 = ROTL32(k4, 18); |
231 | 0 | k4 *= c1; |
232 | 0 | h4 ^= k4; |
233 | 0 | [[fallthrough]]; |
234 | 0 | case 12: |
235 | 0 | k3 ^= tail[11] << 24; |
236 | 0 | [[fallthrough]]; |
237 | 0 | case 11: |
238 | 0 | k3 ^= tail[10] << 16; |
239 | 0 | [[fallthrough]]; |
240 | 0 | case 10: |
241 | 0 | k3 ^= tail[9] << 8; |
242 | 0 | [[fallthrough]]; |
243 | 0 | case 9: |
244 | 0 | k3 ^= tail[8] << 0; |
245 | 0 | k3 *= c3; |
246 | 0 | k3 = ROTL32(k3, 17); |
247 | 0 | k3 *= c4; |
248 | 0 | h3 ^= k3; |
249 | 0 | [[fallthrough]]; |
250 | 0 | case 8: |
251 | 0 | k2 ^= tail[7] << 24; |
252 | 0 | [[fallthrough]]; |
253 | 0 | case 7: |
254 | 0 | k2 ^= tail[6] << 16; |
255 | 0 | [[fallthrough]]; |
256 | 0 | case 6: |
257 | 0 | k2 ^= tail[5] << 8; |
258 | 0 | [[fallthrough]]; |
259 | 0 | case 5: |
260 | 0 | k2 ^= tail[4] << 0; |
261 | 0 | k2 *= c2; |
262 | 0 | k2 = ROTL32(k2, 16); |
263 | 0 | k2 *= c3; |
264 | 0 | h2 ^= k2; |
265 | 0 | [[fallthrough]]; |
266 | 0 | case 4: |
267 | 0 | k1 ^= tail[3] << 24; |
268 | 0 | [[fallthrough]]; |
269 | 0 | case 3: |
270 | 0 | k1 ^= tail[2] << 16; |
271 | 0 | [[fallthrough]]; |
272 | 0 | case 2: |
273 | 0 | k1 ^= tail[1] << 8; |
274 | 0 | [[fallthrough]]; |
275 | 0 | case 1: |
276 | 0 | k1 ^= tail[0] << 0; |
277 | 0 | k1 *= c1; |
278 | 0 | k1 = ROTL32(k1, 15); |
279 | 0 | k1 *= c2; |
280 | 0 | h1 ^= k1; |
281 | 0 | }; |
282 | | |
283 | | //---------- |
284 | | // finalization |
285 | |
|
286 | 0 | h1 ^= len; |
287 | 0 | h2 ^= len; |
288 | 0 | h3 ^= len; |
289 | 0 | h4 ^= len; |
290 | |
|
291 | 0 | h1 += h2; |
292 | 0 | h1 += h3; |
293 | 0 | h1 += h4; |
294 | 0 | h2 += h1; |
295 | 0 | h3 += h1; |
296 | 0 | h4 += h1; |
297 | |
|
298 | 0 | h1 = fmix32(h1); |
299 | 0 | h2 = fmix32(h2); |
300 | 0 | h3 = fmix32(h3); |
301 | 0 | h4 = fmix32(h4); |
302 | |
|
303 | 0 | h1 += h2; |
304 | 0 | h1 += h3; |
305 | 0 | h1 += h4; |
306 | 0 | h2 += h1; |
307 | 0 | h3 += h1; |
308 | 0 | h4 += h1; |
309 | |
|
310 | 0 | ((uint32_t*)out)[0] = h1; |
311 | 0 | ((uint32_t*)out)[1] = h2; |
312 | 0 | ((uint32_t*)out)[2] = h3; |
313 | 0 | ((uint32_t*)out)[3] = h4; |
314 | 0 | } |
315 | | |
316 | | //----------------------------------------------------------------------------- |
317 | | |
318 | | // Helper function that implements the core MurmurHash3 128-bit hashing algorithm |
319 | 7 | void murmur_hash3_x64_process(const void* key, const int len, uint64_t& h1, uint64_t& h2) { |
320 | 7 | const uint8_t* data = (const uint8_t*)key; |
321 | 7 | const int nblocks = len / 16; |
322 | | |
323 | 7 | const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); |
324 | 7 | const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); |
325 | | |
326 | | //---------- |
327 | | // body |
328 | | |
329 | 7 | const uint64_t* blocks = (const uint64_t*)(data); |
330 | | |
331 | 8 | for (int i = 0; i < nblocks; i++) { |
332 | 1 | uint64_t k1 = getblock64(blocks, i * 2 + 0); |
333 | 1 | uint64_t k2 = getblock64(blocks, i * 2 + 1); |
334 | | |
335 | 1 | k1 *= c1; |
336 | 1 | k1 = ROTL64(k1, 31); |
337 | 1 | k1 *= c2; |
338 | 1 | h1 ^= k1; |
339 | | |
340 | 1 | h1 = ROTL64(h1, 27); |
341 | 1 | h1 += h2; |
342 | 1 | h1 = h1 * 5 + 0x52dce729; |
343 | | |
344 | 1 | k2 *= c2; |
345 | 1 | k2 = ROTL64(k2, 33); |
346 | 1 | k2 *= c1; |
347 | 1 | h2 ^= k2; |
348 | | |
349 | 1 | h2 = ROTL64(h2, 31); |
350 | 1 | h2 += h1; |
351 | 1 | h2 = h2 * 5 + 0x38495ab5; |
352 | 1 | } |
353 | | |
354 | | //---------- |
355 | | // tail |
356 | | |
357 | 7 | const uint8_t* tail = (const uint8_t*)(data + nblocks * 16); |
358 | | |
359 | 7 | uint64_t k1 = 0; |
360 | 7 | uint64_t k2 = 0; |
361 | | |
362 | 7 | switch (len & 15) { |
363 | 0 | case 15: |
364 | 0 | k2 ^= ((uint64_t)tail[14]) << 48; |
365 | 0 | [[fallthrough]]; |
366 | 0 | case 14: |
367 | 0 | k2 ^= ((uint64_t)tail[13]) << 40; |
368 | 0 | [[fallthrough]]; |
369 | 0 | case 13: |
370 | 0 | k2 ^= ((uint64_t)tail[12]) << 32; |
371 | 0 | [[fallthrough]]; |
372 | 1 | case 12: |
373 | 1 | k2 ^= ((uint64_t)tail[11]) << 24; |
374 | 1 | [[fallthrough]]; |
375 | 5 | case 11: |
376 | 5 | k2 ^= ((uint64_t)tail[10]) << 16; |
377 | 5 | [[fallthrough]]; |
378 | 5 | case 10: |
379 | 5 | k2 ^= ((uint64_t)tail[9]) << 8; |
380 | 5 | [[fallthrough]]; |
381 | 5 | case 9: |
382 | 5 | k2 ^= ((uint64_t)tail[8]) << 0; |
383 | 5 | k2 *= c2; |
384 | 5 | k2 = ROTL64(k2, 33); |
385 | 5 | k2 *= c1; |
386 | 5 | h2 ^= k2; |
387 | 5 | [[fallthrough]]; |
388 | 5 | case 8: |
389 | 5 | k1 ^= ((uint64_t)tail[7]) << 56; |
390 | 5 | [[fallthrough]]; |
391 | 5 | case 7: |
392 | 5 | k1 ^= ((uint64_t)tail[6]) << 48; |
393 | 5 | [[fallthrough]]; |
394 | 5 | case 6: |
395 | 5 | k1 ^= ((uint64_t)tail[5]) << 40; |
396 | 5 | [[fallthrough]]; |
397 | 6 | case 5: |
398 | 6 | k1 ^= ((uint64_t)tail[4]) << 32; |
399 | 6 | [[fallthrough]]; |
400 | 6 | case 4: |
401 | 6 | k1 ^= ((uint64_t)tail[3]) << 24; |
402 | 6 | [[fallthrough]]; |
403 | 6 | case 3: |
404 | 6 | k1 ^= ((uint64_t)tail[2]) << 16; |
405 | 6 | [[fallthrough]]; |
406 | 6 | case 2: |
407 | 6 | k1 ^= ((uint64_t)tail[1]) << 8; |
408 | 6 | [[fallthrough]]; |
409 | 6 | case 1: |
410 | 6 | k1 ^= ((uint64_t)tail[0]) << 0; |
411 | 6 | k1 *= c1; |
412 | 6 | k1 = ROTL64(k1, 31); |
413 | 6 | k1 *= c2; |
414 | 6 | h1 ^= k1; |
415 | 7 | }; |
416 | | |
417 | | //---------- |
418 | | // finalization |
419 | | |
420 | 7 | h1 ^= len; |
421 | 7 | h2 ^= len; |
422 | | |
423 | 7 | h1 += h2; |
424 | 7 | h2 += h1; |
425 | | |
426 | 7 | h1 = fmix64(h1); |
427 | 7 | h2 = fmix64(h2); |
428 | | |
429 | 7 | h1 += h2; |
430 | 7 | h2 += h1; |
431 | 7 | } |
432 | | |
433 | | //----------------------------------------------------------------------------- |
434 | | |
435 | | // The origin function `murmur_hash3_x64_128` is copied from: https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp |
436 | | // And Doris modified it into function `murmur_hash3_x64_process` |
437 | | // For this reason, this function is still retained even though it has no calls. |
438 | 1 | void murmur_hash3_x64_128(const void* key, const int len, const uint32_t seed, void* out) { |
439 | 1 | uint64_t h1 = seed; |
440 | 1 | uint64_t h2 = seed; |
441 | 1 | murmur_hash3_x64_process(key, len, h1, h2); |
442 | 1 | ((uint64_t*)out)[0] = h1; |
443 | 1 | ((uint64_t*)out)[1] = h2; |
444 | 1 | } |
445 | | |
446 | | //----------------------------------------------------------------------------- |
447 | | |
448 | | // MurmurHash3 x64 64-bit variant using shared 128-bit processing function |
449 | | // This implementation reuses the murmur_hash3_x64_process function and only outputs the first hash value |
450 | | // Used for function mmh3_64_v2 |
451 | | void murmur_hash3_x64_64_shared(const void* key, const int64_t len, const uint64_t seed, |
452 | 4 | void* out) { |
453 | 4 | uint64_t h1 = seed; |
454 | 4 | uint64_t h2 = seed; |
455 | 4 | murmur_hash3_x64_process(key, static_cast<int>(len), h1, h2); |
456 | 4 | ((uint64_t*)out)[0] = h1; |
457 | 4 | } |
458 | | |
459 | | //----------------------------------------------------------------------------- |
460 | | |
461 | | // MurmurHash3 x64 64-bit variant with optimized standalone implementation |
462 | | // This implementation is specifically optimized for 64-bit output |
463 | | // Used for function mmh3_64 |
464 | 741k | void murmur_hash3_x64_64(const void* key, const int64_t len, const uint64_t seed, void* out) { |
465 | 741k | const uint8_t* data = (const uint8_t*)key; |
466 | 741k | const int nblocks = (int)len / 8; |
467 | 741k | uint64_t h1 = seed; |
468 | | |
469 | 741k | const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); |
470 | 741k | const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); |
471 | | |
472 | | //---------- |
473 | | // body |
474 | | |
475 | 741k | const uint64_t* blocks = (const uint64_t*)(data); |
476 | | |
477 | 1.66M | for (int i = 0; i < nblocks; i++) { |
478 | 925k | uint64_t k1 = getblock64(blocks, i); |
479 | | |
480 | 925k | k1 *= c1; |
481 | 925k | k1 = ROTL64(k1, 31); |
482 | 925k | k1 *= c2; |
483 | 925k | h1 ^= k1; |
484 | | |
485 | 925k | h1 = ROTL64(h1, 27); |
486 | 925k | h1 = h1 * 5 + 0x52dce729; |
487 | 925k | } |
488 | | |
489 | | //---------- |
490 | | // tail |
491 | | |
492 | 741k | const uint8_t* tail = (const uint8_t*)(data + nblocks * 8); |
493 | 741k | uint64_t k1 = 0; |
494 | | |
495 | 741k | switch (len & 7) { |
496 | 9.97k | case 7: |
497 | 9.97k | k1 ^= ((uint64_t)tail[6]) << 48; |
498 | 9.97k | [[fallthrough]]; |
499 | 11.0k | case 6: |
500 | 11.0k | k1 ^= ((uint64_t)tail[5]) << 40; |
501 | 11.0k | [[fallthrough]]; |
502 | 11.3k | case 5: |
503 | 11.3k | k1 ^= ((uint64_t)tail[4]) << 32; |
504 | 11.3k | [[fallthrough]]; |
505 | 90.1k | case 4: |
506 | 90.1k | k1 ^= ((uint64_t)tail[3]) << 24; |
507 | 90.1k | [[fallthrough]]; |
508 | 102k | case 3: |
509 | 102k | k1 ^= ((uint64_t)tail[2]) << 16; |
510 | 102k | [[fallthrough]]; |
511 | 239k | case 2: |
512 | 239k | k1 ^= ((uint64_t)tail[1]) << 8; |
513 | 239k | [[fallthrough]]; |
514 | 245k | case 1: |
515 | 245k | k1 ^= ((uint64_t)tail[0]) << 0; |
516 | 245k | k1 *= c1; |
517 | 245k | k1 = ROTL64(k1, 31); |
518 | 245k | k1 *= c2; |
519 | 245k | h1 ^= k1; |
520 | 741k | }; |
521 | | |
522 | | //---------- |
523 | | // finalization |
524 | | |
525 | 741k | h1 ^= len; |
526 | 741k | h1 = fmix64(h1); |
527 | | |
528 | 741k | ((uint64_t*)out)[0] = h1; |
529 | 741k | } |
530 | | #include "common/compile_check_end.h" |
531 | | |
532 | | } // namespace doris |