|
| 1 | +/*----------------------------------------------------------------------------- |
| 2 | + * MurmurHash3 was written by Austin Appleby, and is placed in the public |
| 3 | + * domain. |
| 4 | + * |
| 5 | + * This implementation was written by Shane Day, and is also public domain. |
| 6 | + * |
| 7 | + * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A) |
| 8 | + * with support for progressive processing. |
| 9 | + */ |
| 10 | + |
| 11 | +/*----------------------------------------------------------------------------- |
| 12 | + |
| 13 | +If you want to understand the MurmurHash algorithm you would be much better |
| 14 | +off reading the original source. Just point your browser at: |
| 15 | +http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp |
| 16 | +
|
| 17 | +
|
| 18 | +What this version provides? |
| 19 | +
|
| 20 | +1. Progressive data feeding. Useful when the entire payload to be hashed |
| 21 | +does not fit in memory or when the data is streamed through the application. |
| 22 | +Also useful when hashing a number of strings with a common prefix. A partial |
| 23 | +hash of a prefix string can be generated and reused for each suffix string. |
| 24 | +
|
| 25 | +2. Portability. Plain old C so that it should compile on any old compiler. |
| 26 | +Both CPU endian and access-alignment neutral, but avoiding inefficient code |
| 27 | +when possible depending on CPU capabilities. |
| 28 | +
|
| 29 | +3. Drop in. I personally like nice self contained public domain code, making it |
| 30 | +easy to pilfer without loads of refactoring to work properly in the existing |
| 31 | +application code & makefile structure and mucking around with licence files. |
| 32 | +Just copy PMurHash.h and PMurHash.c and you're ready to go. |
| 33 | +
|
| 34 | +
|
| 35 | +How does it work? |
| 36 | +
|
| 37 | +We can only process entire 32 bit chunks of input, except for the very end |
| 38 | +that may be shorter. So along with the partial hash we need to give back to |
| 39 | +the caller a carry containing up to 3 bytes that we were unable to process. |
| 40 | +This carry also needs to record the number of bytes the carry holds. I use |
| 41 | +the low 2 bits as a count (0..3) and the carry bytes are shifted into the |
| 42 | +high byte in stream order. |
| 43 | +
|
| 44 | +To handle endianess I simply use a macro that reads a uint32_t and define |
| 45 | +that macro to be a direct read on little endian machines, a read and swap |
| 46 | +on big endian machines, or a byte-by-byte read if the endianess is unknown. |
| 47 | +
|
| 48 | +-----------------------------------------------------------------------------*/ |
| 49 | + |
| 50 | + |
| 51 | +#include "PMurHash.h" |
| 52 | + |
| 53 | +/* I used ugly type names in the header to avoid potential conflicts with |
| 54 | + * application or system typedefs & defines. Since I'm not including any more |
| 55 | + * headers below here I can rename these so that the code reads like C99 */ |
| 56 | +#undef uint32_t |
| 57 | +#define uint32_t MH_UINT32 |
| 58 | +#undef uint8_t |
| 59 | +#define uint8_t MH_UINT8 |
| 60 | + |
| 61 | +/* MSVC warnings we choose to ignore */ |
| 62 | +#if defined(_MSC_VER) |
| 63 | + #pragma warning(disable: 4127) /* conditional expression is constant */ |
| 64 | +#endif |
| 65 | + |
| 66 | +/*----------------------------------------------------------------------------- |
| 67 | + * Endianess, misalignment capabilities and util macros |
| 68 | + * |
| 69 | + * The following 3 macros are defined in this section. The other macros defined |
| 70 | + * are only needed to help derive these 3. |
| 71 | + * |
| 72 | + * READ_UINT32(x) Read a little endian unsigned 32-bit int |
| 73 | + * UNALIGNED_SAFE Defined if READ_UINT32 works on non-word boundaries |
| 74 | + * ROTL32(x,r) Rotate x left by r bits |
| 75 | + */ |
| 76 | + |
| 77 | +/* Convention is to define __BYTE_ORDER == to one of these values */ |
| 78 | +#if !defined(__BIG_ENDIAN) |
| 79 | + #define __BIG_ENDIAN 4321 |
| 80 | +#endif |
| 81 | +#if !defined(__LITTLE_ENDIAN) |
| 82 | + #define __LITTLE_ENDIAN 1234 |
| 83 | +#endif |
| 84 | + |
| 85 | +/* I386 */ |
| 86 | +#if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(i386) |
| 87 | + #define __BYTE_ORDER __LITTLE_ENDIAN |
| 88 | + #define UNALIGNED_SAFE |
| 89 | +#endif |
| 90 | + |
| 91 | +/* gcc 'may' define __LITTLE_ENDIAN__ or __BIG_ENDIAN__ to 1 (Note the trailing __), |
| 92 | + * or even _LITTLE_ENDIAN or _BIG_ENDIAN (Note the single _ prefix) */ |
| 93 | +#if !defined(__BYTE_ORDER) |
| 94 | + #if defined(__LITTLE_ENDIAN__) && __LITTLE_ENDIAN__==1 || defined(_LITTLE_ENDIAN) && _LITTLE_ENDIAN==1 |
| 95 | + #define __BYTE_ORDER __LITTLE_ENDIAN |
| 96 | + #elif defined(__BIG_ENDIAN__) && __BIG_ENDIAN__==1 || defined(_BIG_ENDIAN) && _BIG_ENDIAN==1 |
| 97 | + #define __BYTE_ORDER __BIG_ENDIAN |
| 98 | + #endif |
| 99 | +#endif |
| 100 | + |
| 101 | +/* gcc (usually) defines xEL/EB macros for ARM and MIPS endianess */ |
| 102 | +#if !defined(__BYTE_ORDER) |
| 103 | + #if defined(__ARMEL__) || defined(__MIPSEL__) |
| 104 | + #define __BYTE_ORDER __LITTLE_ENDIAN |
| 105 | + #endif |
| 106 | + #if defined(__ARMEB__) || defined(__MIPSEB__) |
| 107 | + #define __BYTE_ORDER __BIG_ENDIAN |
| 108 | + #endif |
| 109 | +#endif |
| 110 | + |
| 111 | +/* Now find best way we can to READ_UINT32 */ |
| 112 | +#if __BYTE_ORDER==__LITTLE_ENDIAN |
| 113 | + /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */ |
| 114 | + #define READ_UINT32(ptr) (*((uint32_t*)(ptr))) |
| 115 | +#elif __BYTE_ORDER==__BIG_ENDIAN |
| 116 | + /* TODO: Add additional cases below where a compiler provided bswap32 is available */ |
| 117 | + #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3)) |
| 118 | + #define READ_UINT32(ptr) (__builtin_bswap32(*((uint32_t*)(ptr)))) |
| 119 | + #else |
| 120 | + /* Without a known fast bswap32 we're just as well off doing this */ |
| 121 | + #define READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24) |
| 122 | + #define UNALIGNED_SAFE |
| 123 | + #endif |
| 124 | +#else |
| 125 | + /* Unknown endianess so last resort is to read individual bytes */ |
| 126 | + #define READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24) |
| 127 | + |
| 128 | + /* Since we're not doing word-reads we can skip the messing about with realignment */ |
| 129 | + #define UNALIGNED_SAFE |
| 130 | +#endif |
| 131 | + |
| 132 | +/* Find best way to ROTL32 */ |
| 133 | +#if defined(_MSC_VER) |
| 134 | + #include <stdlib.h> /* Microsoft put _rotl declaration in here */ |
| 135 | + #define ROTL32(x,r) _rotl(x,r) |
| 136 | +#else |
| 137 | + /* gcc recognises this code and generates a rotate instruction for CPUs with one */ |
| 138 | + #define ROTL32(x,r) (((uint32_t)x << r) | ((uint32_t)x >> (32 - r))) |
| 139 | +#endif |
| 140 | + |
| 141 | + |
| 142 | +/*----------------------------------------------------------------------------- |
| 143 | + * Core murmurhash algorithm macros */ |
| 144 | + |
| 145 | +#define C1 (0xcc9e2d51) |
| 146 | +#define C2 (0x1b873593) |
| 147 | + |
| 148 | +/* This is the main processing body of the algorithm. It operates |
| 149 | + * on each full 32-bits of input. */ |
| 150 | +#define DOBLOCK(h1, k1) do{ \ |
| 151 | + k1 *= C1; \ |
| 152 | + k1 = ROTL32(k1,15); \ |
| 153 | + k1 *= C2; \ |
| 154 | + \ |
| 155 | + h1 ^= k1; \ |
| 156 | + h1 = ROTL32(h1,13); \ |
| 157 | + h1 = h1*5+0xe6546b64; \ |
| 158 | + }while(0) |
| 159 | + |
| 160 | + |
| 161 | +/* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */ |
| 162 | +/* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */ |
| 163 | +#define DOBYTES(cnt, h1, c, n, ptr, len) do{ \ |
| 164 | + int _i = cnt; \ |
| 165 | + while(_i--) { \ |
| 166 | + c = c>>8 | *ptr++<<24; \ |
| 167 | + n++; len--; \ |
| 168 | + if(n==4) { \ |
| 169 | + DOBLOCK(h1, c); \ |
| 170 | + n = 0; \ |
| 171 | + } \ |
| 172 | + } }while(0) |
| 173 | + |
| 174 | +/*---------------------------------------------------------------------------*/ |
| 175 | + |
| 176 | +/* Main hashing function. Initialise carry to 0 and h1 to 0 or an initial seed |
| 177 | + * if wanted. Both ph1 and pcarry are required arguments. */ |
| 178 | +void PMurHash32_Process(uint32_t *ph1, uint32_t *pcarry, const void *key, int len) |
| 179 | +{ |
| 180 | + uint32_t h1 = *ph1; |
| 181 | + uint32_t c = *pcarry; |
| 182 | + |
| 183 | + const uint8_t *ptr = (uint8_t*)key; |
| 184 | + const uint8_t *end; |
| 185 | + |
| 186 | + /* Extract carry count from low 2 bits of c value */ |
| 187 | + int n = c & 3; |
| 188 | + |
| 189 | +#if defined(UNALIGNED_SAFE) |
| 190 | + /* This CPU handles unaligned word access */ |
| 191 | + |
| 192 | + /* Consume any carry bytes */ |
| 193 | + int i = (4-n) & 3; |
| 194 | + if(i && i <= len) { |
| 195 | + DOBYTES(i, h1, c, n, ptr, len); |
| 196 | + } |
| 197 | + |
| 198 | + /* Process 32-bit chunks */ |
| 199 | + end = ptr + len/4*4; |
| 200 | + for( ; ptr < end ; ptr+=4) { |
| 201 | + uint32_t k1 = READ_UINT32(ptr); |
| 202 | + DOBLOCK(h1, k1); |
| 203 | + } |
| 204 | + |
| 205 | +#else /*UNALIGNED_SAFE*/ |
| 206 | + /* This CPU does not handle unaligned word access */ |
| 207 | + |
| 208 | + /* Consume enough so that the next data byte is word aligned */ |
| 209 | + int i = -(long)ptr & 3; |
| 210 | + if(i && i <= len) { |
| 211 | + DOBYTES(i, h1, c, n, ptr, len); |
| 212 | + } |
| 213 | + |
| 214 | + /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */ |
| 215 | + end = ptr + len/4*4; |
| 216 | + switch(n) { /* how many bytes in c */ |
| 217 | + case 0: /* c=[----] w=[3210] b=[3210]=w c'=[----] */ |
| 218 | + for( ; ptr < end ; ptr+=4) { |
| 219 | + uint32_t k1 = READ_UINT32(ptr); |
| 220 | + DOBLOCK(h1, k1); |
| 221 | + } |
| 222 | + break; |
| 223 | + case 1: /* c=[0---] w=[4321] b=[3210]=c>>24|w<<8 c'=[4---] */ |
| 224 | + for( ; ptr < end ; ptr+=4) { |
| 225 | + uint32_t k1 = c>>24; |
| 226 | + c = READ_UINT32(ptr); |
| 227 | + k1 |= c<<8; |
| 228 | + DOBLOCK(h1, k1); |
| 229 | + } |
| 230 | + break; |
| 231 | + case 2: /* c=[10--] w=[5432] b=[3210]=c>>16|w<<16 c'=[54--] */ |
| 232 | + for( ; ptr < end ; ptr+=4) { |
| 233 | + uint32_t k1 = c>>16; |
| 234 | + c = READ_UINT32(ptr); |
| 235 | + k1 |= c<<16; |
| 236 | + DOBLOCK(h1, k1); |
| 237 | + } |
| 238 | + break; |
| 239 | + case 3: /* c=[210-] w=[6543] b=[3210]=c>>8|w<<24 c'=[654-] */ |
| 240 | + for( ; ptr < end ; ptr+=4) { |
| 241 | + uint32_t k1 = c>>8; |
| 242 | + c = READ_UINT32(ptr); |
| 243 | + k1 |= c<<24; |
| 244 | + DOBLOCK(h1, k1); |
| 245 | + } |
| 246 | + } |
| 247 | +#endif /*UNALIGNED_SAFE*/ |
| 248 | + |
| 249 | + /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */ |
| 250 | + len -= len/4*4; |
| 251 | + |
| 252 | + /* Append any remaining bytes into carry */ |
| 253 | + DOBYTES(len, h1, c, n, ptr, len); |
| 254 | + |
| 255 | + /* Copy out new running hash and carry */ |
| 256 | + *ph1 = h1; |
| 257 | + *pcarry = (c & ~0xff) | n; |
| 258 | +} |
| 259 | + |
| 260 | +/*---------------------------------------------------------------------------*/ |
| 261 | + |
| 262 | +/* Finalize a hash. To match the original Murmur3A the total_length must be provided */ |
| 263 | +uint32_t PMurHash32_Result(uint32_t h, uint32_t carry, uint32_t total_length) |
| 264 | +{ |
| 265 | + uint32_t k1; |
| 266 | + int n = carry & 3; |
| 267 | + if(n) { |
| 268 | + k1 = carry >> (4-n)*8; |
| 269 | + k1 *= C1; k1 = ROTL32(k1,15); k1 *= C2; h ^= k1; |
| 270 | + } |
| 271 | + h ^= total_length; |
| 272 | + |
| 273 | + /* fmix */ |
| 274 | + h ^= h >> 16; |
| 275 | + h *= 0x85ebca6b; |
| 276 | + h ^= h >> 13; |
| 277 | + h *= 0xc2b2ae35; |
| 278 | + h ^= h >> 16; |
| 279 | + |
| 280 | + return h; |
| 281 | +} |
| 282 | + |
| 283 | +/*---------------------------------------------------------------------------*/ |
| 284 | + |
| 285 | +/* Murmur3A compatable all-at-once */ |
| 286 | +uint32_t PMurHash32(uint32_t seed, const void *key, int len) |
| 287 | +{ |
| 288 | + uint32_t h1=seed, carry=0; |
| 289 | + PMurHash32_Process(&h1, &carry, key, len); |
| 290 | + return PMurHash32_Result(h1, carry, len); |
| 291 | +} |
| 292 | + |
| 293 | +/*---------------------------------------------------------------------------*/ |
| 294 | + |
| 295 | +/* Provide an API suitable for smhasher */ |
| 296 | +void PMurHash32_test(const void *key, int len, uint32_t seed, void *out) |
| 297 | +{ |
| 298 | + uint32_t h1=seed, carry=0; |
| 299 | + const uint8_t *ptr = (uint8_t*)key; |
| 300 | + const uint8_t *end = ptr + len; |
| 301 | + |
| 302 | +#if 0 /* Exercise the progressive processing */ |
| 303 | + while(ptr < end) { |
| 304 | + //const uint8_t *mid = ptr + rand()%(end-ptr)+1; |
| 305 | + const uint8_t *mid = ptr + (rand()&0xF); |
| 306 | + mid = mid<end?mid:end; |
| 307 | + PMurHash32_Process(&h1, &carry, ptr, mid-ptr); |
| 308 | + ptr = mid; |
| 309 | + } |
| 310 | +#else |
| 311 | + PMurHash32_Process(&h1, &carry, ptr, (int)(end-ptr)); |
| 312 | +#endif |
| 313 | + h1 = PMurHash32_Result(h1, carry, len); |
| 314 | + *(uint32_t*)out = h1; |
| 315 | +} |
| 316 | + |
| 317 | +/*---------------------------------------------------------------------------*/ |
0 commit comments