Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
MurmurHash3.cpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10//-----------------------------------------------------------------------------
11// MurmurHash3 was written by Austin Appleby, and is placed in the public
12// domain. The author hereby disclaims copyright to this source code.
13
14// Note - The x86 and x64 versions do _not_ produce the same results, as the
15// algorithms are optimized for their respective platforms. You can still
16// compile and run any of them on any platform, but your performance with the
17// non-native version will be less than optimal.
18
19#include "MurmurHash3.hpp"
20
21//-----------------------------------------------------------------------------
22// Platform-specific functions and macros
23
24// Microsoft Visual Studio
25#if defined(_MSC_VER)
26
27#define FORCE_INLINE __forceinline
28
29#include <stdlib.h>
30
31#define ROTL32(x, y) _rotl(x, y)
32#define ROTL64(x, y) _rotl64(x, y)
33
34#define BIG_CONSTANT(x) (x)
35
36// Other compilers
37
38#else // not defined(_MSC_VER)
39
40namespace { // anonymous
41
42inline uint32_t rotl32(uint32_t x, int8_t r) {
43 return (x << r) | (x >> (32 - r));
44}
45
46inline uint64_t rotl64(uint64_t x, int8_t r) {
47 return (x << r) | (x >> (64 - r));
48}
49
50} // namespace
51
52#define ROTL32(x, y) rotl32(x, y)
53#define ROTL64(x, y) rotl64(x, y)
54
55#define BIG_CONSTANT(x) (x##LLU)
56
57#endif // !defined(_MSC_VER)
58
59//-----------------------------------------------------------------------------
60// Block read - if your platform needs to do endian-swapping or can only
61// handle aligned reads, do the conversion here
62
63#define GETBLOCK(lhs, p, i) \
64 { \
65 lhs = p[(i)]; \
66 }
67
68//-----------------------------------------------------------------------------
69// Finalization mix - force all bits of a hash block to avalanche
70
71#define FMIX_32(h) \
72 { \
73 uint32_t t_h = (h); \
74 t_h ^= t_h >> 16; \
75 t_h *= 0x85ebca6b; \
76 t_h ^= t_h >> 13; \
77 t_h *= 0xc2b2ae35; \
78 t_h ^= t_h >> 16; \
79 h = t_h; \
80 }
81
82//----------
83
84#define FMIX_64(k) \
85 { \
86 uint64_t t_k = (k); \
87 t_k ^= t_k >> 33; \
88 t_k *= BIG_CONSTANT(0xff51afd7ed558ccd); \
89 t_k ^= t_k >> 33; \
90 t_k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); \
91 t_k ^= t_k >> 33; \
92 k = t_k; \
93 }
94
95//-----------------------------------------------------------------------------
96
97namespace Tpetra {
98namespace Details {
99
100void MurmurHash3_x86_32(const void *key, int len,
101 uint32_t seed, void *out) {
102 const uint8_t *data = (const uint8_t *)key;
103 const int nblocks = len / 4;
104
105 uint32_t h1 = seed;
106
107 const uint32_t c1 = 0xcc9e2d51;
108 const uint32_t c2 = 0x1b873593;
109
110 //----------
111 // body
112
113 const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4);
114
115 for (int i = -nblocks; i; i++) {
116 uint32_t k1;
117 GETBLOCK(k1, blocks, i);
118
119 k1 *= c1;
120 k1 = ROTL32(k1, 15);
121 k1 *= c2;
122
123 h1 ^= k1;
124 h1 = ROTL32(h1, 13);
125 h1 = h1 * 5 + 0xe6546b64;
126 }
127
128 //----------
129 // tail
130
131 const uint8_t *tail = (const uint8_t *)(data + nblocks * 4);
132
133 uint32_t k1 = 0;
134
135 switch (len & 3) {
136 case 3: k1 ^= tail[2] << 16;
137 case 2: k1 ^= tail[1] << 8;
138 case 1:
139 k1 ^= tail[0];
140 k1 *= c1;
141 k1 = ROTL32(k1, 15);
142 k1 *= c2;
143 h1 ^= k1;
144 };
145
146 //----------
147 // finalization
148
149 h1 ^= len;
150
151 FMIX_32(h1);
152
153 *(uint32_t *)out = h1;
154}
155
156//-----------------------------------------------------------------------------
157
158void MurmurHash3_x86_128(const void *key, const int len,
159 uint32_t seed, void *out) {
160 const uint8_t *data = (const uint8_t *)key;
161 const int nblocks = len / 16;
162
163 uint32_t h1 = seed;
164 uint32_t h2 = seed;
165 uint32_t h3 = seed;
166 uint32_t h4 = seed;
167
168 const uint32_t c1 = 0x239b961b;
169 const uint32_t c2 = 0xab0e9789;
170 const uint32_t c3 = 0x38b34ae5;
171 const uint32_t c4 = 0xa1e38b93;
172
173 //----------
174 // body
175
176 const uint32_t *blocks = (const uint32_t *)(data + nblocks * 16);
177
178 for (int i = -nblocks; i; i++) {
179 uint32_t k1, k2, k3, k4;
180 GETBLOCK(k1, blocks, i * 4 + 0);
181 GETBLOCK(k2, blocks, i * 4 + 1);
182 GETBLOCK(k3, blocks, i * 4 + 2);
183 GETBLOCK(k4, blocks, i * 4 + 3);
184
185 k1 *= c1;
186 k1 = ROTL32(k1, 15);
187 k1 *= c2;
188 h1 ^= k1;
189
190 h1 = ROTL32(h1, 19);
191 h1 += h2;
192 h1 = h1 * 5 + 0x561ccd1b;
193
194 k2 *= c2;
195 k2 = ROTL32(k2, 16);
196 k2 *= c3;
197 h2 ^= k2;
198
199 h2 = ROTL32(h2, 17);
200 h2 += h3;
201 h2 = h2 * 5 + 0x0bcaa747;
202
203 k3 *= c3;
204 k3 = ROTL32(k3, 17);
205 k3 *= c4;
206 h3 ^= k3;
207
208 h3 = ROTL32(h3, 15);
209 h3 += h4;
210 h3 = h3 * 5 + 0x96cd1c35;
211
212 k4 *= c4;
213 k4 = ROTL32(k4, 18);
214 k4 *= c1;
215 h4 ^= k4;
216
217 h4 = ROTL32(h4, 13);
218 h4 += h1;
219 h4 = h4 * 5 + 0x32ac3b17;
220 }
221
222 //----------
223 // tail
224
225 const uint8_t *tail = (const uint8_t *)(data + nblocks * 16);
226
227 uint32_t k1 = 0;
228 uint32_t k2 = 0;
229 uint32_t k3 = 0;
230 uint32_t k4 = 0;
231
232 switch (len & 15) {
233 case 15: k4 ^= tail[14] << 16;
234 case 14: k4 ^= tail[13] << 8;
235 case 13:
236 k4 ^= tail[12] << 0;
237 k4 *= c4;
238 k4 = ROTL32(k4, 18);
239 k4 *= c1;
240 h4 ^= k4;
241
242 case 12: k3 ^= tail[11] << 24;
243 case 11: k3 ^= tail[10] << 16;
244 case 10: k3 ^= tail[9] << 8;
245 case 9:
246 k3 ^= tail[8] << 0;
247 k3 *= c3;
248 k3 = ROTL32(k3, 17);
249 k3 *= c4;
250 h3 ^= k3;
251
252 case 8: k2 ^= tail[7] << 24;
253 case 7: k2 ^= tail[6] << 16;
254 case 6: k2 ^= tail[5] << 8;
255 case 5:
256 k2 ^= tail[4] << 0;
257 k2 *= c2;
258 k2 = ROTL32(k2, 16);
259 k2 *= c3;
260 h2 ^= k2;
261
262 case 4: k1 ^= tail[3] << 24;
263 case 3: k1 ^= tail[2] << 16;
264 case 2: k1 ^= tail[1] << 8;
265 case 1:
266 k1 ^= tail[0] << 0;
267 k1 *= c1;
268 k1 = ROTL32(k1, 15);
269 k1 *= c2;
270 h1 ^= k1;
271 };
272
273 //----------
274 // finalization
275
276 h1 ^= len;
277 h2 ^= len;
278 h3 ^= len;
279 h4 ^= len;
280
281 h1 += h2;
282 h1 += h3;
283 h1 += h4;
284 h2 += h1;
285 h3 += h1;
286 h4 += h1;
287
288 FMIX_32(h1);
289 FMIX_32(h2);
290 FMIX_32(h3);
291 FMIX_32(h4);
292
293 h1 += h2;
294 h1 += h3;
295 h1 += h4;
296 h2 += h1;
297 h3 += h1;
298 h4 += h1;
299
300 ((uint32_t *)out)[0] = h1;
301 ((uint32_t *)out)[1] = h2;
302 ((uint32_t *)out)[2] = h3;
303 ((uint32_t *)out)[3] = h4;
304}
305
306//-----------------------------------------------------------------------------
307
308void MurmurHash3_x64_128(const void *key, const int len,
309 const uint32_t seed, void *out) {
310 const uint8_t *data = (const uint8_t *)key;
311 const int nblocks = len / 16;
312
313 uint64_t h1 = seed;
314 uint64_t h2 = seed;
315
316 const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
317 const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
318
319 //----------
320 // body
321
322 const uint64_t *blocks = (const uint64_t *)(data);
323
324 for (int i = 0; i < nblocks; i++) {
325 uint64_t k1, k2;
326 GETBLOCK(k1, blocks, i * 2 + 0);
327 GETBLOCK(k2, blocks, i * 2 + 1);
328
329 k1 *= c1;
330 k1 = ROTL64(k1, 31);
331 k1 *= c2;
332 h1 ^= k1;
333
334 h1 = ROTL64(h1, 27);
335 h1 += h2;
336 h1 = h1 * 5 + 0x52dce729;
337
338 k2 *= c2;
339 k2 = ROTL64(k2, 33);
340 k2 *= c1;
341 h2 ^= k2;
342
343 h2 = ROTL64(h2, 31);
344 h2 += h1;
345 h2 = h2 * 5 + 0x38495ab5;
346 }
347
348 //----------
349 // tail
350
351 const uint8_t *tail = (const uint8_t *)(data + nblocks * 16);
352
353 uint64_t k1 = 0;
354 uint64_t k2 = 0;
355
356 switch (len & 15) {
357 case 15: k2 ^= uint64_t(tail[14]) << 48;
358 case 14: k2 ^= uint64_t(tail[13]) << 40;
359 case 13: k2 ^= uint64_t(tail[12]) << 32;
360 case 12: k2 ^= uint64_t(tail[11]) << 24;
361 case 11: k2 ^= uint64_t(tail[10]) << 16;
362 case 10: k2 ^= uint64_t(tail[9]) << 8;
363 case 9:
364 k2 ^= uint64_t(tail[8]) << 0;
365 k2 *= c2;
366 k2 = ROTL64(k2, 33);
367 k2 *= c1;
368 h2 ^= k2;
369
370 case 8: k1 ^= uint64_t(tail[7]) << 56;
371 case 7: k1 ^= uint64_t(tail[6]) << 48;
372 case 6: k1 ^= uint64_t(tail[5]) << 40;
373 case 5: k1 ^= uint64_t(tail[4]) << 32;
374 case 4: k1 ^= uint64_t(tail[3]) << 24;
375 case 3: k1 ^= uint64_t(tail[2]) << 16;
376 case 2: k1 ^= uint64_t(tail[1]) << 8;
377 case 1:
378 k1 ^= uint64_t(tail[0]) << 0;
379 k1 *= c1;
380 k1 = ROTL64(k1, 31);
381 k1 *= c2;
382 h1 ^= k1;
383 };
384
385 //----------
386 // finalization
387
388 h1 ^= len;
389 h2 ^= len;
390
391 h1 += h2;
392 h2 += h1;
393
394 FMIX_64(h1);
395 FMIX_64(h2);
396
397 h1 += h2;
398 h2 += h1;
399
400 ((uint64_t *)out)[0] = h1;
401 ((uint64_t *)out)[1] = h2;
402}
403
404} // namespace Details
405} // namespace Tpetra
406
407//-----------------------------------------------------------------------------
Implementation details of Tpetra.
Namespace Tpetra contains the class and methods constituting the Tpetra library.