/root/doris/be/src/gutil/hash/jenkins.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2011 Google Inc. All Rights Reserved. |
2 | | // |
3 | | // Contains the legacy Bob Jenkins Lookup2-based hashing routines. These need to |
4 | | // always return the same results as their values have been recorded in various |
5 | | // places and cannot easily be updated. |
6 | | // |
7 | | // Original Author: Sanjay Ghemawat |
8 | | // |
9 | | // This is based on Bob Jenkins newhash function |
10 | | // see: http://burtleburtle.net/bob/hash/evahash.html |
11 | | // According to http://burtleburtle.net/bob/c/lookup2.c, |
12 | | // his implementation is public domain. |
13 | | // |
14 | | // The implementation here is backwards compatible with the google1 |
15 | | // implementation. The google1 implementation used a 'signed char *' |
16 | | // to load words from memory a byte at a time. See gwshash.cc for an |
17 | | // implementation that is compatible with Bob Jenkins' lookup2.c. |
18 | | |
19 | | #include "gutil/hash/jenkins.h" |
20 | | |
21 | | #include "common/logging.h" |
22 | | |
23 | | #include "gutil/hash/jenkins_lookup2.h" |
24 | | #include "gutil/integral_types.h" |
25 | | |
26 | 0 | static inline uint32 char2unsigned(char c) { |
27 | 0 | return static_cast<uint32>(static_cast<unsigned char>(c)); |
28 | 0 | } |
29 | | |
30 | 0 | static inline uint64 char2unsigned64(char c) { |
31 | 0 | return static_cast<uint64>(static_cast<unsigned char>(c)); |
32 | 0 | } |
33 | | |
34 | 0 | uint32 Hash32StringWithSeedReferenceImplementation(const char* s, uint32 len, uint32 c) { |
35 | 0 | uint32 a, b; |
36 | 0 | uint32 keylen; |
37 | |
|
38 | 0 | a = b = 0x9e3779b9UL; // the golden ratio; an arbitrary value |
39 | |
|
40 | 0 | for (keylen = len; keylen >= 3 * sizeof(a); |
41 | 0 | keylen -= static_cast<uint32>(3 * sizeof(a)), s += 3 * sizeof(a)) { |
42 | 0 | a += Google1At(s); |
43 | 0 | b += Google1At(s + sizeof(a)); |
44 | 0 | c += Google1At(s + sizeof(a) * 2); |
45 | 0 | mix(a, b, c); |
46 | 0 | } |
47 | |
|
48 | 0 | c += len; |
49 | 0 | switch (keylen) { // deal with rest. Cases fall through |
50 | 0 | case 11: |
51 | 0 | c += char2unsigned(s[10]) << 24; |
52 | 0 | case 10: |
53 | 0 | c += char2unsigned(s[9]) << 16; |
54 | 0 | case 9: |
55 | 0 | c += char2unsigned(s[8]) << 8; |
56 | | // the first byte of c is reserved for the length |
57 | 0 | case 8: |
58 | 0 | b += Google1At(s + 4); |
59 | 0 | a += Google1At(s); |
60 | 0 | break; |
61 | 0 | case 7: |
62 | 0 | b += char2unsigned(s[6]) << 16; |
63 | 0 | case 6: |
64 | 0 | b += char2unsigned(s[5]) << 8; |
65 | 0 | case 5: |
66 | 0 | b += char2unsigned(s[4]); |
67 | 0 | case 4: |
68 | 0 | a += Google1At(s); |
69 | 0 | break; |
70 | 0 | case 3: |
71 | 0 | a += char2unsigned(s[2]) << 16; |
72 | 0 | case 2: |
73 | 0 | a += char2unsigned(s[1]) << 8; |
74 | 0 | case 1: |
75 | 0 | a += char2unsigned(s[0]); |
76 | | // case 0 : nothing left to add |
77 | 0 | } |
78 | 0 | mix(a, b, c); |
79 | 0 | return c; |
80 | 0 | } |
81 | | |
82 | 0 | uint32 Hash32StringWithSeed(const char* s, uint32 len, uint32 c) { |
83 | 0 | uint32 a, b; |
84 | 0 | uint32 keylen; |
85 | |
|
86 | 0 | a = b = 0x9e3779b9UL; // the golden ratio; an arbitrary value |
87 | |
|
88 | 0 | keylen = len; |
89 | 0 | if (keylen >= 4 * sizeof(a)) { |
90 | 0 | uint32 word32AtOffset0 = Google1At(s); |
91 | 0 | do { |
92 | 0 | a += word32AtOffset0; |
93 | 0 | b += Google1At(s + sizeof(a)); |
94 | 0 | c += Google1At(s + sizeof(a) * 2); |
95 | 0 | s += 3 * sizeof(a); |
96 | 0 | word32AtOffset0 = Google1At(s); |
97 | 0 | mix(a, b, c); |
98 | 0 | keylen -= 3 * static_cast<uint32>(sizeof(a)); |
99 | 0 | } while (keylen >= 4 * sizeof(a)); |
100 | 0 | if (keylen >= 3 * sizeof(a)) { |
101 | 0 | a += word32AtOffset0; |
102 | 0 | b += Google1At(s + sizeof(a)); |
103 | 0 | c += Google1At(s + sizeof(a) * 2); |
104 | 0 | s += 3 * sizeof(a); |
105 | 0 | mix(a, b, c); |
106 | 0 | keylen -= 3 * static_cast<uint32>(sizeof(a)); |
107 | 0 | DCHECK_LT(keylen, sizeof(a)); |
108 | 0 | c += len; |
109 | 0 | switch (keylen) { // deal with rest. Cases fall through |
110 | 0 | case 3: |
111 | 0 | a += char2unsigned(s[2]) << 16; |
112 | 0 | case 2: |
113 | 0 | a += char2unsigned(s[1]) << 8; |
114 | 0 | case 1: |
115 | 0 | a += char2unsigned(s[0]); |
116 | 0 | } |
117 | 0 | } else { |
118 | 0 | DCHECK(sizeof(a) <= keylen && keylen < 3 * sizeof(a)); |
119 | 0 | c += len; |
120 | 0 | switch (keylen) { // deal with rest. Cases fall through |
121 | 0 | case 11: |
122 | 0 | c += char2unsigned(s[10]) << 24; |
123 | 0 | case 10: |
124 | 0 | c += char2unsigned(s[9]) << 16; |
125 | 0 | case 9: |
126 | 0 | c += char2unsigned(s[8]) << 8; |
127 | 0 | case 8: |
128 | 0 | b += Google1At(s + 4); |
129 | 0 | a += word32AtOffset0; |
130 | 0 | break; |
131 | 0 | case 7: |
132 | 0 | b += char2unsigned(s[6]) << 16; |
133 | 0 | case 6: |
134 | 0 | b += char2unsigned(s[5]) << 8; |
135 | 0 | case 5: |
136 | 0 | b += char2unsigned(s[4]); |
137 | 0 | case 4: |
138 | 0 | a += word32AtOffset0; |
139 | 0 | break; |
140 | 0 | } |
141 | 0 | } |
142 | 0 | } else { |
143 | 0 | if (keylen >= 3 * sizeof(a)) { |
144 | 0 | a += Google1At(s); |
145 | 0 | b += Google1At(s + sizeof(a)); |
146 | 0 | c += Google1At(s + sizeof(a) * 2); |
147 | 0 | s += 3 * sizeof(a); |
148 | 0 | mix(a, b, c); |
149 | 0 | keylen -= 3 * static_cast<uint32>(sizeof(a)); |
150 | 0 | } |
151 | 0 | c += len; |
152 | 0 | switch (keylen) { // deal with rest. Cases fall through |
153 | 0 | case 11: |
154 | 0 | c += char2unsigned(s[10]) << 24; |
155 | 0 | case 10: |
156 | 0 | c += char2unsigned(s[9]) << 16; |
157 | 0 | case 9: |
158 | 0 | c += char2unsigned(s[8]) << 8; |
159 | 0 | case 8: |
160 | 0 | b += Google1At(s + 4); |
161 | 0 | a += Google1At(s); |
162 | 0 | break; |
163 | 0 | case 7: |
164 | 0 | b += char2unsigned(s[6]) << 16; |
165 | 0 | case 6: |
166 | 0 | b += char2unsigned(s[5]) << 8; |
167 | 0 | case 5: |
168 | 0 | b += char2unsigned(s[4]); |
169 | 0 | case 4: |
170 | 0 | a += Google1At(s); |
171 | 0 | break; |
172 | 0 | case 3: |
173 | 0 | a += char2unsigned(s[2]) << 16; |
174 | 0 | case 2: |
175 | 0 | a += char2unsigned(s[1]) << 8; |
176 | 0 | case 1: |
177 | 0 | a += char2unsigned(s[0]); |
178 | 0 | } |
179 | 0 | } |
180 | 0 | mix(a, b, c); |
181 | 0 | return c; |
182 | 0 | } |
183 | | |
184 | 0 | uint64 Hash64StringWithSeed(const char* s, uint32 len, uint64 c) { |
185 | 0 | uint64 a, b; |
186 | 0 | uint32 keylen; |
187 | |
|
188 | 0 | a = b = GG_ULONGLONG(0xe08c1d668b756f82); // the golden ratio; an arbitrary value |
189 | |
|
190 | 0 | for (keylen = len; keylen >= 3 * sizeof(a); |
191 | 0 | keylen -= 3 * static_cast<uint32>(sizeof(a)), s += 3 * sizeof(a)) { |
192 | 0 | a += Word64At(s); |
193 | 0 | b += Word64At(s + sizeof(a)); |
194 | 0 | c += Word64At(s + sizeof(a) * 2); |
195 | 0 | mix(a, b, c); |
196 | 0 | } |
197 | |
|
198 | 0 | c += len; |
199 | 0 | switch (keylen) { // deal with rest. Cases fall through |
200 | 0 | case 23: |
201 | 0 | c += char2unsigned64(s[22]) << 56; |
202 | 0 | case 22: |
203 | 0 | c += char2unsigned64(s[21]) << 48; |
204 | 0 | case 21: |
205 | 0 | c += char2unsigned64(s[20]) << 40; |
206 | 0 | case 20: |
207 | 0 | c += char2unsigned64(s[19]) << 32; |
208 | 0 | case 19: |
209 | 0 | c += char2unsigned64(s[18]) << 24; |
210 | 0 | case 18: |
211 | 0 | c += char2unsigned64(s[17]) << 16; |
212 | 0 | case 17: |
213 | 0 | c += char2unsigned64(s[16]) << 8; |
214 | | // the first byte of c is reserved for the length |
215 | 0 | case 16: |
216 | 0 | b += Word64At(s + 8); |
217 | 0 | a += Word64At(s); |
218 | 0 | break; |
219 | 0 | case 15: |
220 | 0 | b += char2unsigned64(s[14]) << 48; |
221 | 0 | case 14: |
222 | 0 | b += char2unsigned64(s[13]) << 40; |
223 | 0 | case 13: |
224 | 0 | b += char2unsigned64(s[12]) << 32; |
225 | 0 | case 12: |
226 | 0 | b += char2unsigned64(s[11]) << 24; |
227 | 0 | case 11: |
228 | 0 | b += char2unsigned64(s[10]) << 16; |
229 | 0 | case 10: |
230 | 0 | b += char2unsigned64(s[9]) << 8; |
231 | 0 | case 9: |
232 | 0 | b += char2unsigned64(s[8]); |
233 | 0 | case 8: |
234 | 0 | a += Word64At(s); |
235 | 0 | break; |
236 | 0 | case 7: |
237 | 0 | a += char2unsigned64(s[6]) << 48; |
238 | 0 | case 6: |
239 | 0 | a += char2unsigned64(s[5]) << 40; |
240 | 0 | case 5: |
241 | 0 | a += char2unsigned64(s[4]) << 32; |
242 | 0 | case 4: |
243 | 0 | a += char2unsigned64(s[3]) << 24; |
244 | 0 | case 3: |
245 | 0 | a += char2unsigned64(s[2]) << 16; |
246 | 0 | case 2: |
247 | 0 | a += char2unsigned64(s[1]) << 8; |
248 | 0 | case 1: |
249 | 0 | a += char2unsigned64(s[0]); |
250 | | // case 0: nothing left to add |
251 | 0 | } |
252 | 0 | mix(a, b, c); |
253 | 0 | return c; |
254 | 0 | } |