diff --git a/src/Common/src/System/Collections/HashHelpers.cs b/src/Common/src/System/Collections/HashHelpers.cs index 661e9faf2e9e..68a738409907 100644 --- a/src/Common/src/System/Collections/HashHelpers.cs +++ b/src/Common/src/System/Collections/HashHelpers.cs @@ -2,27 +2,18 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -/*============================================================ -** -** -** -** -** Purpose: Hash table implementation -** -** -===========================================================*/ -using System; using System.Diagnostics; -using System.Runtime; -using System.Runtime.CompilerServices; -using System.Runtime.Serialization; -using System.Threading; namespace System.Collections { internal static class HashHelpers { + // This is the maximum prime smaller than Array.MaxArrayLength + public const int MaxPrimeArrayLength = 0x7FEFFFFD; + + private const int HashPrime = 101; + // Table of prime numbers to use as hash table sizes. // A typical resize algorithm would pick the smallest prime number in this array // that is larger than twice the previous capacity. @@ -34,16 +25,29 @@ internal static class HashHelpers // hashtable operations such as add. Having a prime guarantees that double // hashing does not lead to infinite loops. IE, your hash function will be // h1(key) + i*h2(key), 0 <= i < size. h2 and the size must be relatively prime. + // We prefer the low computation costs of higher prime numbers over the increased + // memory allocation of a fixed prime number i.e. when right sizing a HashSet. public static readonly int[] primes = { 3, 7, 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919, 1103, 1327, 1597, 1931, 2333, 2801, 3371, 4049, 4861, 5839, 7013, 8419, 10103, 12143, 14591, 17519, 21023, 25229, 30293, 36353, 43627, 52361, 62851, 75431, 90523, 108631, 130363, 156437, 187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, 968897, 1162687, 1395263, - 1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369, 8639249, 10367101, - 12440537, 14928671, 17914409, 21497293, 25796759, 30956117, 37147349, 44576837, 53492207, 64190669, - 77028803, 92434613, 110921543, 133105859, 159727031, 191672443, 230006941, 276008387, 331210079, - 397452101, 476942527, 572331049, 686797261, 824156741, 988988137, 1186785773, 1424142949, 1708971541, - 2050765853, MaxPrimeArrayLength }; + 1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369 }; + + public static bool IsPrime(int candidate) + { + if ((candidate & 1) != 0) + { + int limit = (int)Math.Sqrt(candidate); + for (int divisor = 3; divisor <= limit; divisor += 2) + { + if ((candidate % divisor) == 0) + return false; + } + return true; + } + return (candidate == 2); + } public static int GetPrime(int min) { @@ -56,6 +60,13 @@ public static int GetPrime(int min) if (prime >= min) return prime; } + //outside of our predefined table. + //compute the hard way. + for (int i = (min | 1); i < int.MaxValue; i += 2) + { + if (IsPrime(i) && ((i - 1) % HashPrime != 0)) + return i; + } return min; } @@ -64,7 +75,7 @@ public static int ExpandPrime(int oldSize) { int newSize = 2 * oldSize; - // Allow the hashtables to grow to maximum possible size (~2G elements) before encoutering capacity overflow. + // Allow the hashtables to grow to maximum possible size (~2G elements) before encountering capacity overflow. // Note that this check works even when _items.Length overflowed thanks to the (uint) cast if ((uint)newSize > MaxPrimeArrayLength && MaxPrimeArrayLength > oldSize) { @@ -74,9 +85,5 @@ public static int ExpandPrime(int oldSize) return GetPrime(newSize); } - - - // This is the maximum prime smaller than Array.MaxArrayLength - public const int MaxPrimeArrayLength = 0x7FEFFFFD; } } diff --git a/src/System.Collections/src/System/Collections/Generic/HashSet.cs b/src/System.Collections/src/System/Collections/Generic/HashSet.cs index e13246b56257..06484e79a604 100644 --- a/src/System.Collections/src/System/Collections/Generic/HashSet.cs +++ b/src/System.Collections/src/System/Collections/Generic/HashSet.cs @@ -1162,6 +1162,7 @@ private void IncreaseCapacity() /// private void SetCapacity(int newSize) { + Debug.Assert(HashHelpers.IsPrime(newSize), "New size is not prime!"); Debug.Assert(_buckets != null, "SetCapacity called on a set with no elements"); Slot[] newSlots = new Slot[newSize]; diff --git a/src/System.Collections/tests/Generic/HashSet/HashSet.Generic.Tests.netcoreapp.cs b/src/System.Collections/tests/Generic/HashSet/HashSet.Generic.Tests.netcoreapp.cs index aff2d7c7e1a8..2f9226db0b21 100644 --- a/src/System.Collections/tests/Generic/HashSet/HashSet.Generic.Tests.netcoreapp.cs +++ b/src/System.Collections/tests/Generic/HashSet/HashSet.Generic.Tests.netcoreapp.cs @@ -30,6 +30,18 @@ public void HashSet_Generic_Constructor_int_AddUpToAndBeyondCapacity(int capacit Assert.Equal(capacity + 1, set.Count); } + [Fact] + public void HashSet_Generic_Constructor_Capacity_ToNextPrimeNumber() + { + // Highest pre-computed number + 1. + const int Capacity = 7199370; + var set = new HashSet(Capacity); + + // Assert that the HashTable's capacity is set to the descendant prime number of the given one. + const int NextPrime = 7199371; + Assert.Equal(NextPrime, set.EnsureCapacity(0)); + } + [Fact] public void HashSet_Generic_Constructor_int_Negative_ThrowsArgumentOutOfRangeException() { diff --git a/src/System.Collections/tests/System.Collections.Tests.csproj b/src/System.Collections/tests/System.Collections.Tests.csproj index e85d478938a4..40039d447f5d 100644 --- a/src/System.Collections/tests/System.Collections.Tests.csproj +++ b/src/System.Collections/tests/System.Collections.Tests.csproj @@ -74,7 +74,7 @@ Common\System\Collections\DictionaryExtensions.cs - System\Runtime\Serialization\Formatters\BinaryFormatterHelpers.cs + Common\System\Runtime\Serialization\Formatters\BinaryFormatterHelpers.cs diff --git a/src/System.Runtime.Extensions/src/System.Runtime.Extensions.csproj b/src/System.Runtime.Extensions/src/System.Runtime.Extensions.csproj index d79435f95300..3637bd70d9ac 100644 --- a/src/System.Runtime.Extensions/src/System.Runtime.Extensions.csproj +++ b/src/System.Runtime.Extensions/src/System.Runtime.Extensions.csproj @@ -83,6 +83,9 @@ CoreLib\System\Text\ValueStringBuilder.cs + + Common\System\Collections\HashHelpers.cs + @@ -278,4 +281,4 @@ - \ No newline at end of file + diff --git a/src/System.Runtime.Extensions/src/System/Collections/Hashtable.cs b/src/System.Runtime.Extensions/src/System/Collections/Hashtable.cs index a279aada8bb0..2709e1172ed4 100644 --- a/src/System.Runtime.Extensions/src/System/Collections/Hashtable.cs +++ b/src/System.Runtime.Extensions/src/System/Collections/Hashtable.cs @@ -153,6 +153,9 @@ private struct bucket private IEqualityComparer _keycomparer; private Object _syncRoot; + private static ConditionalWeakTable s_serializationInfoTable; + private static ConditionalWeakTable SerializationInfoTable => LazyInitializer.EnsureInitialized(ref s_serializationInfoTable); + [Obsolete("Please use EqualityComparer property.")] protected IHashCodeProvider hcp { @@ -380,7 +383,7 @@ protected Hashtable(SerializationInfo info, StreamingContext context) //We can't do anything with the keys and values until the entire graph has been deserialized //and we have a reasonable estimate that GetHashCode is not going to fail. For the time being, //we'll just cache this. The graph is not valid until OnDeserialization has been called. - HashHelpers.SerializationInfoTable.Add(this, info); + SerializationInfoTable.Add(this, info); } // ?InitHash? is basically an implementation of classic DoubleHashing (see http://en.wikipedia.org/wiki/Double_hashing) @@ -1172,7 +1175,7 @@ public virtual void OnDeserialization(Object sender) } SerializationInfo siInfo; - HashHelpers.SerializationInfoTable.TryGetValue(this, out siInfo); + SerializationInfoTable.TryGetValue(this, out siInfo); if (siInfo == null) { @@ -1254,7 +1257,7 @@ public virtual void OnDeserialization(Object sender) _version = siInfo.GetInt32(VersionName); - HashHelpers.SerializationInfoTable.Remove(this); + SerializationInfoTable.Remove(this); } // Implements a Collection for the keys of a hashtable. An instance of this @@ -1640,84 +1643,4 @@ public KeyValuePairs[] Items } } } - - internal static class HashHelpers - { - // Table of prime numbers to use as hash table sizes. - // A typical resize algorithm would pick the smallest prime number in this array - // that is larger than twice the previous capacity. - // Suppose our Hashtable currently has capacity x and enough elements are added - // such that a resize needs to occur. Resizing first computes 2x then finds the - // first prime in the table greater than 2x, i.e. if primes are ordered - // p_1, p_2, ..., p_i, ..., it finds p_n such that p_n-1 < 2x < p_n. - // Doubling is important for preserving the asymptotic complexity of the - // hashtable operations such as add. Having a prime guarantees that double - // hashing does not lead to infinite loops. IE, your hash function will be - // h1(key) + i*h2(key), 0 <= i < size. h2 and the size must be relatively prime. - public static readonly int[] primes = { - 3, 7, 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919, - 1103, 1327, 1597, 1931, 2333, 2801, 3371, 4049, 4861, 5839, 7013, 8419, 10103, 12143, 14591, - 17519, 21023, 25229, 30293, 36353, 43627, 52361, 62851, 75431, 90523, 108631, 130363, 156437, - 187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, 968897, 1162687, 1395263, - 1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369}; - - public static bool IsPrime(int candidate) - { - if ((candidate & 1) != 0) - { - int limit = (int)Math.Sqrt(candidate); - for (int divisor = 3; divisor <= limit; divisor += 2) - { - if ((candidate % divisor) == 0) - return false; - } - return true; - } - return (candidate == 2); - } - - public static int GetPrime(int min) - { - if (min < 0) - throw new ArgumentException(SR.Arg_HTCapacityOverflow); - - for (int i = 0; i < primes.Length; i++) - { - int prime = primes[i]; - if (prime >= min) return prime; - } - - //outside of our predefined table. - //compute the hard way. - for (int i = (min | 1); i < Int32.MaxValue; i += 2) - { - if (IsPrime(i) && ((i - 1) % Hashtable.HashPrime != 0)) - return i; - } - return min; - } - - // Returns size of hashtable to grow to. - public static int ExpandPrime(int oldSize) - { - int newSize = 2 * oldSize; - - // Allow the hashtables to grow to maximum possible size (~2G elements) before encountering capacity overflow. - // Note that this check works even when _items.Length overflowed thanks to the (uint) cast - if ((uint)newSize > MaxPrimeArrayLength && MaxPrimeArrayLength > oldSize) - { - Debug.Assert(MaxPrimeArrayLength == GetPrime(MaxPrimeArrayLength), "Invalid MaxPrimeArrayLength"); - return MaxPrimeArrayLength; - } - - return GetPrime(newSize); - } - - - // This is the maximum prime smaller than Array.MaxArrayLength - public const int MaxPrimeArrayLength = 0x7FEFFFFD; - - private static ConditionalWeakTable s_serializationInfoTable; - public static ConditionalWeakTable SerializationInfoTable => LazyInitializer.EnsureInitialized(ref s_serializationInfoTable); - } } diff --git a/src/System.Runtime.Serialization.Formatters/src/Resources/Strings.resx b/src/System.Runtime.Serialization.Formatters/src/Resources/Strings.resx index 8311d781049f..4a01b4fadcbb 100644 --- a/src/System.Runtime.Serialization.Formatters/src/Resources/Strings.resx +++ b/src/System.Runtime.Serialization.Formatters/src/Resources/Strings.resx @@ -58,6 +58,9 @@ System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + Capacity overflowed and went negative. + Type '{0}' in Assembly '{1}' is not marked as serializable. diff --git a/src/System.Runtime.Serialization.Formatters/src/System.Runtime.Serialization.Formatters.csproj b/src/System.Runtime.Serialization.Formatters/src/System.Runtime.Serialization.Formatters.csproj index 7752ff98795d..ae432a86ca46 100644 --- a/src/System.Runtime.Serialization.Formatters/src/System.Runtime.Serialization.Formatters.csproj +++ b/src/System.Runtime.Serialization.Formatters/src/System.Runtime.Serialization.Formatters.csproj @@ -61,6 +61,9 @@ + + Common\System\Collections\HashHelpers.cs + @@ -77,4 +80,4 @@ - \ No newline at end of file + diff --git a/src/System.Runtime.Serialization.Formatters/src/System/Runtime/Serialization/ObjectIDGenerator.cs b/src/System.Runtime.Serialization.Formatters/src/System/Runtime/Serialization/ObjectIDGenerator.cs index e6b465333ef4..cc4a885a67d5 100644 --- a/src/System.Runtime.Serialization.Formatters/src/System/Runtime/Serialization/ObjectIDGenerator.cs +++ b/src/System.Runtime.Serialization.Formatters/src/System/Runtime/Serialization/ObjectIDGenerator.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Collections; using System.Runtime.CompilerServices; namespace System.Runtime.Serialization @@ -10,24 +11,16 @@ public class ObjectIDGenerator { private const int NumBins = 4; - // Table of prime numbers to use as hash table sizes. Each entry is the - // smallest prime number larger than twice the previous entry. - private static readonly int[] s_sizes = - { - 5, 11, 29, 47, 97, 197, 397, 797, 1597, 3203, 6421, 12853, 25717, 51437, - 102877, 205759, 411527, 823117, 1646237, 3292489, 6584983 - }; - internal int _currentCount; - internal int _currentSize; - internal long[] _ids; - internal object[] _objs; + private int _currentSize; + private long[] _ids; + private object[] _objs; // Constructs a new ObjectID generator, initializing all of the necessary variables. public ObjectIDGenerator() { _currentCount = 1; - _currentSize = s_sizes[0]; + _currentSize = HashHelpers.primes[0]; // Starting with 3 _ids = new long[_currentSize * NumBins]; _objs = new object[_currentSize * NumBins]; } @@ -106,13 +99,12 @@ public virtual long GetId(object obj, out bool firstTime) // we return that id, otherwise we return 0. public virtual long HasId(object obj, out bool firstTime) { - bool found; - if (obj == null) { throw new ArgumentNullException(nameof(obj)); } + bool found; int pos = FindElement(obj, out found); if (found) { @@ -129,14 +121,14 @@ public virtual long HasId(object obj, out bool firstTime) // the old arrays into the new ones. Expensive but necessary. private void Rehash() { - int i = 0; - for (int currSize = _currentSize; i < s_sizes.Length && s_sizes[i] <= currSize; i++) ; - if (i == s_sizes.Length) + int currSize = _currentSize; + int newSize = HashHelpers.ExpandPrime(currSize); + if (newSize == currSize) { // We just walked off the end of the array. throw new SerializationException(SR.Serialization_TooManyElements); } - _currentSize = s_sizes[i]; + _currentSize = newSize; long[] newIds = new long[_currentSize * NumBins]; object[] newObjs = new object[_currentSize * NumBins]; @@ -151,8 +143,7 @@ private void Rehash() { if (oldObjs[j] != null) { - bool found; - int pos = FindElement(oldObjs[j], out found); + int pos = FindElement(oldObjs[j], out _); _objs[pos] = oldObjs[j]; _ids[pos] = oldIds[j]; } diff --git a/src/System.Runtime.Serialization.Formatters/tests/BinaryFormatterTests.cs b/src/System.Runtime.Serialization.Formatters/tests/BinaryFormatterTests.cs index 077f99c3fe84..a6b814711ae1 100644 --- a/src/System.Runtime.Serialization.Formatters/tests/BinaryFormatterTests.cs +++ b/src/System.Runtime.Serialization.Formatters/tests/BinaryFormatterTests.cs @@ -18,6 +18,31 @@ namespace System.Runtime.Serialization.Formatters.Tests { public partial class BinaryFormatterTests : RemoteExecutorTestBase { + private static unsafe bool Is64Bit => sizeof(void*) == 8; + + // On 32-bit we can't test these high inputs as they cause OutOfMemoryExceptions. + [ConditionalTheory(nameof(Is64Bit))] + [InlineData(2 * 6_584_983 - 2)] // previous limit + [InlineData(2 * 7_199_369 - 2)] // last pre-computed prime number + public void SerializeHugeObjectGraphs(int limit) + { + Point[] pointArr = Enumerable.Range(0, limit) + .Select(i => new Point(i, i + 1)) + .ToArray(); + + // This should not throw a SerializationException as we removed the artifical limit in the ObjectIDGenerator. + // Instead of round tripping we only serialize to minimize test time. + // This will throw on .NET Framework as the artificial limit is still enabled. + var bf = new BinaryFormatter(); + AssertExtensions.ThrowsIf(PlatformDetection.IsFullFramework, () => + { + using (MemoryStream ms = new MemoryStream()) + { + bf.Serialize(ms, pointArr); + } + }); + } + [Theory] [MemberData(nameof(BasicObjectsRoundtrip_MemberData))] public void ValidateBasicObjectsRoundtrip(object obj, FormatterAssemblyStyle assemblyFormat, TypeFilterLevel filterLevel, FormatterTypeStyle typeFormat) diff --git a/src/System.Runtime.Serialization.Formatters/tests/System.Runtime.Serialization.Formatters.Tests.csproj b/src/System.Runtime.Serialization.Formatters/tests/System.Runtime.Serialization.Formatters.Tests.csproj index 3660ed19c4b1..b51c89f48807 100644 --- a/src/System.Runtime.Serialization.Formatters/tests/System.Runtime.Serialization.Formatters.Tests.csproj +++ b/src/System.Runtime.Serialization.Formatters/tests/System.Runtime.Serialization.Formatters.Tests.csproj @@ -4,6 +4,7 @@ {13CE5E71-D373-4EA6-B3CB-166FF089A42A} true + true