From 8a8c0a707927d0ae79f594e6f001c92ed7ce9d78 Mon Sep 17 00:00:00 2001 From: Atsushi Kanamori Date: Fri, 7 Apr 2017 14:10:59 -0700 Subject: [PATCH] Incorporate CR feedback on Marvin hash - String.GetHashCode() now uses Marvin unconditionally. - Make the Marvin routines more general purpose and easier to use. - Expanded the testing to odd-sized byte arrays (i.e. not strings), and fixed the bug found. Original data obtained by exporting SymCryptMarvin32() from CoreClr.dll and P/Invoking to it from test app. --- .../src/System/Marvin.cs | 32 ++++++++----- .../src/System/String.Comparison.cs | 47 +------------------ 2 files changed, 22 insertions(+), 57 deletions(-) diff --git a/src/System.Private.CoreLib/src/System/Marvin.cs b/src/System.Private.CoreLib/src/System/Marvin.cs index b4ac62be0d8..91a7c1021bc 100644 --- a/src/System.Private.CoreLib/src/System/Marvin.cs +++ b/src/System.Private.CoreLib/src/System/Marvin.cs @@ -9,14 +9,28 @@ namespace System { internal static class Marvin { - public static uint ComputeStringHash(ref byte data, uint count, ulong seed) + /// + /// Convenience method to compute a Marvin hash and collapse it into a 32-bit hash. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int ComputeHash32(ref byte data, int count, ulong seed) + { + long hash64 = ComputeHash(ref data, count, seed); + return ((int)(hash64 >> 32)) ^ (int)hash64; + } + + /// + /// Computes a 64-hash using the Marvin algorithm. + /// + public static long ComputeHash(ref byte data, int count, ulong seed) { + uint ucount = (uint)count; uint p0 = (uint)seed; uint p1 = (uint)(seed >> 32); int byteOffset = 0; // declared as signed int so we don't have to cast everywhere (it's passed to Unsafe.Add() and used for nothing else.) - while (count >= 8) + while (ucount >= 8) { p0 += Unsafe.As(ref Unsafe.Add(ref data, byteOffset)); Block(ref p0, ref p1); @@ -25,10 +39,10 @@ public static uint ComputeStringHash(ref byte data, uint count, ulong seed) Block(ref p0, ref p1); byteOffset += 8; - count -= 8; + ucount -= 8; } - switch (count) + switch (ucount) { case 4: p0 += Unsafe.As(ref Unsafe.Add(ref data, byteOffset)); @@ -66,22 +80,18 @@ public static uint ComputeStringHash(ref byte data, uint count, ulong seed) goto case 3; case 3: - p0 += 0x80000000u | Unsafe.As(ref Unsafe.Add(ref data, byteOffset)) | Unsafe.Add(ref data, byteOffset + 2); + p0 += 0x80000000u | (((uint)(Unsafe.Add(ref data, byteOffset + 2))) << 16)| (uint)(Unsafe.As(ref Unsafe.Add(ref data, byteOffset))); break; default: Debug.Fail("Should not get here."); - throw new InvalidOperationException(); + break; } Block(ref p0, ref p1); Block(ref p0, ref p1); - // At this point, p0 and p1 contains the 8-byte Marvin hash result. If we need a general purpose Marvin implementation in the future, - // this could be refactored to stop here. For now, String.GetHashCode() is the only user of this function and he wants an 4-byte hash code - // so this last step is specific to String.GetHashCode(). - - return p0 ^ p1; + return (((long)p1) << 32) | p0; } [MethodImpl(MethodImplOptions.AggressiveInlining)] diff --git a/src/System.Private.CoreLib/src/System/String.Comparison.cs b/src/System.Private.CoreLib/src/System/String.Comparison.cs index 8280142cf18..09f28181cbc 100644 --- a/src/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/System.Private.CoreLib/src/System/String.Comparison.cs @@ -997,52 +997,7 @@ public static bool Equals(String a, String b, StringComparison comparisonType) // they will return the same hash code. public override int GetHashCode() { -#if FEATURE_RANDOMIZED_STRING_HASHING - return (int)Marvin.ComputeStringHash(ref Unsafe.As(ref _firstChar), (uint)(_stringLength * 2), Marvin.DefaultSeed); -#else - unsafe - { - fixed (char* src = &_firstChar) - { -#if BIT64 - int hash1 = 5381; -#else - int hash1 = (5381 << 16) + 5381; -#endif - int hash2 = hash1; - -#if BIT64 - int c; - char* s = src; - while ((c = s[0]) != 0) - { - hash1 = ((hash1 << 5) + hash1) ^ c; - c = s[1]; - if (c == 0) - break; - hash2 = ((hash2 << 5) + hash2) ^ c; - s += 2; - } -#else - // 32bit machines. - int* pint = (int*)src; - int len = this.Length; - while (len > 0) - { - hash1 = ((hash1 << 5) + hash1 + (hash1 >> 27)) ^ pint[0]; - if (len <= 2) - { - break; - } - hash2 = ((hash2 << 5) + hash2 + (hash2 >> 27)) ^ pint[1]; - pint += 2; - len -= 4; - } -#endif - return hash1 + (hash2 * 1566083941); - } - } -#endif // FEATURE_RANDOMIZED_STRING_HASHING + return Marvin.ComputeHash32(ref Unsafe.As(ref _firstChar), _stringLength * 2, Marvin.DefaultSeed); } // Determines whether a specified string is a prefix of the current instance