Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions be/src/gutil/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,7 @@ SET(SOURCE_FILES
bits.cc
dynamic_annotations.c
hash/city.cc
hash/hash.cc
hash/jenkins.cc
int128.cc
ref_counted.cc
stringprintf.cc
strings/ascii_ctype.cc
Expand Down
50 changes: 0 additions & 50 deletions be/src/gutil/endian.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@

#include <assert.h>

#include "gutil/int128.h"
#include "gutil/integral_types.h"
#include "gutil/port.h"
#include "vec/core/wide_integer.h"
Expand Down Expand Up @@ -197,29 +196,6 @@ class LittleEndian {

static void Store64(void* p, uint64 v) { UNALIGNED_STORE64(p, FromHost64(v)); }

static uint128 Load128(const void* p) {
return uint128(ToHost64(UNALIGNED_LOAD64(reinterpret_cast<const uint64*>(p) + 1)),
ToHost64(UNALIGNED_LOAD64(p)));
}

static void Store128(void* p, const uint128 v) {
UNALIGNED_STORE64(p, FromHost64(Uint128Low64(v)));
UNALIGNED_STORE64(reinterpret_cast<uint64*>(p) + 1, FromHost64(Uint128High64(v)));
}

// Build a uint128 from 1-16 bytes.
// 8 * len least significant bits are loaded from the memory with
// LittleEndian order. The 128 - 8 * len most significant bits are
// set all to 0.
static uint128 Load128VariableLength(const void* p, int len) {
if (len <= 8) {
return uint128(Load64VariableLength(p, len));
} else {
return uint128(Load64VariableLength(static_cast<const char*>(p) + 8, len - 8),
Load64(p));
}
}

// Load & Store in machine's word size.
static uword_t LoadUnsignedWord(const void* p) {
if (sizeof(uword_t) == 8)
Expand Down Expand Up @@ -278,9 +254,6 @@ class BigEndian {
static uint64 FromHost64(uint64 x) { return x; }
static uint64 ToHost64(uint64 x) { return x; }

static uint128 FromHost128(uint128 x) { return x; }
static uint128 ToHost128(uint128 x) { return x; }

static wide::UInt256 FromHost256(wide::UInt256 x) { return x; }
static wide::UInt256 ToHost256(wide::UInt256 x) { return x; }

Expand Down Expand Up @@ -328,29 +301,6 @@ class BigEndian {

static void Store64(void* p, uint64 v) { UNALIGNED_STORE64(p, FromHost64(v)); }

static uint128 Load128(const void* p) {
return uint128(ToHost64(UNALIGNED_LOAD64(p)),
ToHost64(UNALIGNED_LOAD64(reinterpret_cast<const uint64*>(p) + 1)));
}

static void Store128(void* p, const uint128 v) {
UNALIGNED_STORE64(p, FromHost64(Uint128High64(v)));
UNALIGNED_STORE64(reinterpret_cast<uint64*>(p) + 1, FromHost64(Uint128Low64(v)));
}

// Build a uint128 from 1-16 bytes.
// 8 * len least significant bits are loaded from the memory with
// BigEndian order. The 128 - 8 * len most significant bits are
// set all to 0.
static uint128 Load128VariableLength(const void* p, int len) {
if (len <= 8) {
return uint128(Load64VariableLength(static_cast<const char*>(p) + 8, len));
} else {
return uint128(Load64VariableLength(p, len - 8),
Load64(static_cast<const char*>(p) + 8));
}
}

// Load & Store in machine's word size.
static uword_t LoadUnsignedWord(const void* p) {
if (sizeof(uword_t) == 8)
Expand Down
119 changes: 9 additions & 110 deletions be/src/gutil/hash/city.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
// IWYU pragma: no_include <pstl/glue_algorithm_defs.h>

#include <sys/types.h>

#include <algorithm>
#include <iterator>

Expand All @@ -34,10 +35,7 @@ using std::make_pair;
using std::pair;

#include "common/logging.h"

#include "gutil/endian.h"
#include "gutil/hash/hash128to64.h"
#include "gutil/int128.h"
#include "gutil/integral_types.h"
#include "gutil/port.h"

Expand Down Expand Up @@ -71,8 +69,14 @@ static uint64 ShiftMix(uint64 val) {
return val ^ (val >> 47);
}

static uint64 HashLen16(uint64 u, uint64 v) {
return Hash128to64(uint128(u, v));
uint64 HashLen16(uint64 u, uint64 v) {
const uint64 kMul = 0xc6a4a7935bd1e995ULL;
uint64 a = (u ^ v) * kMul;
a ^= (a >> 47);
uint64 b = (v ^ a) * kMul;
b ^= (b >> 47);
b *= kMul;
return b;
}

static uint64 HashLen0to16(const char* s, size_t len) {
Expand Down Expand Up @@ -202,109 +206,4 @@ uint64 CityHash64WithSeed(const char* s, size_t len, uint64 seed) {
uint64 CityHash64WithSeeds(const char* s, size_t len, uint64 seed0, uint64 seed1) {
return HashLen16(CityHash64(s, len) - seed0, seed1);
}

// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
// of any length representable in ssize_t. Based on City and Murmur128.
static uint128 CityMurmur(const char* s, size_t len, uint128 seed) {
uint64 a = Uint128Low64(seed);
uint64 b = Uint128High64(seed);
uint64 c = 0;
uint64 d = 0;
ssize_t l = len - 16;
if (l <= 0) { // len <= 16
c = b * k1 + HashLen0to16(s, len);
d = Rotate(a + (len >= 8 ? LittleEndian::Load64(s) : c), 32);
} else { // len > 16
c = HashLen16(LittleEndian::Load64(s + len - 8) + k1, a);
d = HashLen16(b + len, c + LittleEndian::Load64(s + len - 16));
a += d;
do {
a ^= ShiftMix(LittleEndian::Load64(s) * k1) * k1;
a *= k1;
b ^= a;
c ^= ShiftMix(LittleEndian::Load64(s + 8) * k1) * k1;
c *= k1;
d ^= c;
s += 16;
l -= 16;
} while (l > 0);
}
a = HashLen16(a, c);
b = HashLen16(d, b);
return uint128(a ^ b, HashLen16(b, a));
}

uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed) {
// TODO(user): As of February 2011, there's a beta of Murmur3 that would
// most likely be useful here. E.g., if (len < 900) return Murmur3(...)
if (len < 128) {
return CityMurmur(s, len, seed);
}

// We expect len >= 128 to be the common case. Keep 56 bytes of state:
// v, w, x, y, and z.
pair<uint64, uint64> v, w;
uint64 x = Uint128Low64(seed);
uint64 y = Uint128High64(seed);
uint64 z = len * k1;
v.first = Rotate(y ^ k1, 49) * k1 + LittleEndian::Load64(s);
v.second = Rotate(v.first, 42) * k1 + LittleEndian::Load64(s + 8);
w.first = Rotate(y + z, 35) * k1 + x;
w.second = Rotate(x + LittleEndian::Load64(s + 88), 53) * k1;

// This is similar to the inner loop of CityHash64(), manually unrolled.
do {
x = Rotate(x + y + v.first + LittleEndian::Load64(s + 16), 37) * k1;
y = Rotate(y + v.second + LittleEndian::Load64(s + 48), 42) * k1;
x ^= w.second;
y ^= v.first;
z = Rotate(z ^ w.first, 33);
v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
w = WeakHashLen32WithSeeds(s + 32, z + w.second, y);
std::swap(z, x);
s += 64;
x = Rotate(x + y + v.first + LittleEndian::Load64(s + 16), 37) * k1;
y = Rotate(y + v.second + LittleEndian::Load64(s + 48), 42) * k1;
x ^= w.second;
y ^= v.first;
z = Rotate(z ^ w.first, 33);
v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
w = WeakHashLen32WithSeeds(s + 32, z + w.second, y);
std::swap(z, x);
s += 64;
len -= 128;
} while (PREDICT_TRUE(len >= 128));
y += Rotate(w.first, 37) * k0 + z;
x += Rotate(v.first + z, 49) * k0;
// If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
for (size_t tail_done = 0; tail_done < len;) {
tail_done += 32;
y = Rotate(y - x, 42) * k0 + v.second;
w.first += LittleEndian::Load64(s + len - tail_done + 16);
x = Rotate(x, 49) * k0 + w.first;
w.first += v.first;
v = WeakHashLen32WithSeeds(s + len - tail_done, v.first, v.second);
}
// At this point our 48 bytes of state should contain more than
// enough information for a strong 128-bit hash. We use two
// different 48-byte-to-8-byte hashes to get a 16-byte final result.
x = HashLen16(x, v.first);
y = HashLen16(y, w.first);
return uint128(HashLen16(x + v.second, w.second) + y, HashLen16(x + w.second, y + v.second));
}

uint128 CityHash128(const char* s, size_t len) {
if (len >= 16) {
return CityHash128WithSeed(
s + 16, len - 16,
uint128(LittleEndian::Load64(s) ^ k3, LittleEndian::Load64(s + 8)));
} else if (len >= 8) {
return CityHash128WithSeed(nullptr, 0,
uint128(LittleEndian::Load64(s) ^ (len * k0),
LittleEndian::Load64(s + len - 8) ^ k1));
} else {
return CityHash128WithSeed(s, len, uint128(k0, k1));
}
}

} // namespace util_hash
10 changes: 2 additions & 8 deletions be/src/gutil/hash/city.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,12 @@

#include <stddef.h> // for size_t.

#include "gutil/int128.h"
#include "gutil/integral_types.h"

namespace util_hash {

uint64 HashLen16(uint64 u, uint64 v);

// Hash function for a byte array.
// The mapping may change from time to time.
uint64 CityHash64(const char* buf, size_t len);
Expand All @@ -39,11 +40,4 @@ uint64 CityHash64WithSeed(const char* buf, size_t len, uint64 seed);
// hashed into the result. The mapping may change from time to time.
uint64 CityHash64WithSeeds(const char* buf, size_t len, uint64 seed0, uint64 seed1);

// Hash function for a byte array. The mapping will never change.
uint128 CityHash128(const char* s, size_t len);

// Hash function for a byte array. For convenience, a 128-bit seed is also
// hashed into the result. The mapping will never change.
uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed);

} // namespace util_hash
Loading