|
13 | 13 | // is 64 bits the truncation causes collission issues. Given all that, we use our own |
14 | 14 | // simple hash, viewing the double bytes as an int64 and using khash's default |
15 | 15 | // hash for 64 bit integers. |
16 | | -// GH 13436 |
| 16 | +// GH 13436 showed that _Py_HashDouble doesn't work well with khash |
| 17 | +// GH 28303 showed, that the simple xoring-version isn't good enough |
| 18 | +// thus murmur2-hash is used |
| 19 | + |
17 | 20 | khint64_t PANDAS_INLINE asint64(double key) { |
18 | | - khint64_t val; |
19 | | - memcpy(&val, &key, sizeof(double)); |
20 | | - return val; |
| 21 | + khint64_t val; |
| 22 | + memcpy(&val, &key, sizeof(double)); |
| 23 | + return val; |
21 | 24 | } |
22 | 25 |
|
23 | | -// correct for all inputs but not -0.0 and NaNs |
24 | | -#define kh_float64_hash_func_0_NAN(key) (khint32_t)((asint64(key))>>33^(asint64(key))^(asint64(key))<<11) |
25 | | - |
26 | | -// correct for all inputs but not NaNs |
27 | | -#define kh_float64_hash_func_NAN(key) ((key) == 0.0 ? \ |
28 | | - kh_float64_hash_func_0_NAN(0.0) : \ |
29 | | - kh_float64_hash_func_0_NAN(key)) |
| 26 | +#define ZERO_HASH 0 |
| 27 | +#define NAN_HASH 0 |
30 | 28 |
|
31 | | -// correct for all |
32 | | -#define kh_float64_hash_func(key) ((key) != (key) ? \ |
33 | | - kh_float64_hash_func_NAN(Py_NAN) : \ |
34 | | - kh_float64_hash_func_NAN(key)) |
| 29 | +khint32_t PANDAS_INLINE kh_float64_hash_func(double val){ |
| 30 | + // 0.0 and -0.0 should have the same hash: |
| 31 | + if (val == 0.0){ |
| 32 | + return ZERO_HASH; |
| 33 | + } |
| 34 | + // all nans should have the same hash: |
| 35 | + if ( val!=val ){ |
| 36 | + return NAN_HASH; |
| 37 | + } |
| 38 | + khint64_t as_int = asint64(val); |
| 39 | + return murmur2_64to32(as_int); |
| 40 | +} |
35 | 41 |
|
36 | 42 | #define kh_float64_hash_equal(a, b) ((a) == (b) || ((b) != (b) && (a) != (a))) |
37 | 43 |
|
|
0 commit comments