Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 31 additions & 24 deletions src/Common/src/System/Collections/HashHelpers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,18 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

/*============================================================
**
**
**
**
** Purpose: Hash table implementation
**
**
===========================================================*/

using System;
using System.Diagnostics;
using System.Runtime;
using System.Runtime.CompilerServices;
using System.Runtime.Serialization;
using System.Threading;

namespace System.Collections
{
internal static class HashHelpers
{
// This is the maximum prime smaller than Array.MaxArrayLength
public const int MaxPrimeArrayLength = 0x7FEFFFFD;

private const int HashPrime = 101;

// Table of prime numbers to use as hash table sizes.
// A typical resize algorithm would pick the smallest prime number in this array
// that is larger than twice the previous capacity.
Expand All @@ -34,16 +25,29 @@ internal static class HashHelpers
// hashtable operations such as add. Having a prime guarantees that double
// hashing does not lead to infinite loops. IE, your hash function will be
// h1(key) + i*h2(key), 0 <= i < size. h2 and the size must be relatively prime.
// We prefer the low computation costs of higher prime numbers over the increased
// memory allocation of a fixed prime number i.e. when right sizing a HashSet.
public static readonly int[] primes = {
3, 7, 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919,
1103, 1327, 1597, 1931, 2333, 2801, 3371, 4049, 4861, 5839, 7013, 8419, 10103, 12143, 14591,
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would add a comment explaining why we don't just grow the table... since apparently it wasn't obvious to us

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added.

17519, 21023, 25229, 30293, 36353, 43627, 52361, 62851, 75431, 90523, 108631, 130363, 156437,
187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, 968897, 1162687, 1395263,
1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369, 8639249, 10367101,
12440537, 14928671, 17914409, 21497293, 25796759, 30956117, 37147349, 44576837, 53492207, 64190669,
77028803, 92434613, 110921543, 133105859, 159727031, 191672443, 230006941, 276008387, 331210079,
397452101, 476942527, 572331049, 686797261, 824156741, 988988137, 1186785773, 1424142949, 1708971541,
2050765853, MaxPrimeArrayLength };
1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369 };

public static bool IsPrime(int candidate)
{
if ((candidate & 1) != 0)
{
int limit = (int)Math.Sqrt(candidate);
for (int divisor = 3; divisor <= limit; divisor += 2)
{
if ((candidate % divisor) == 0)
return false;
}
return true;
}
return (candidate == 2);
}

public static int GetPrime(int min)
{
Expand All @@ -56,6 +60,13 @@ public static int GetPrime(int min)
if (prime >= min) return prime;
}

//outside of our predefined table.
//compute the hard way.
for (int i = (min | 1); i < int.MaxValue; i += 2)
{
if (IsPrime(i) && ((i - 1) % HashPrime != 0))
return i;
}
return min;
}

Expand All @@ -64,7 +75,7 @@ public static int ExpandPrime(int oldSize)
{
int newSize = 2 * oldSize;

// Allow the hashtables to grow to maximum possible size (~2G elements) before encoutering capacity overflow.
// Allow the hashtables to grow to maximum possible size (~2G elements) before encountering capacity overflow.
// Note that this check works even when _items.Length overflowed thanks to the (uint) cast
if ((uint)newSize > MaxPrimeArrayLength && MaxPrimeArrayLength > oldSize)
{
Expand All @@ -74,9 +85,5 @@ public static int ExpandPrime(int oldSize)

return GetPrime(newSize);
}


// This is the maximum prime smaller than Array.MaxArrayLength
public const int MaxPrimeArrayLength = 0x7FEFFFFD;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1162,6 +1162,7 @@ private void IncreaseCapacity()
/// </summary>
private void SetCapacity(int newSize)
{
Debug.Assert(HashHelpers.IsPrime(newSize), "New size is not prime!");
Debug.Assert(_buckets != null, "SetCapacity called on a set with no elements");

Slot[] newSlots = new Slot[newSize];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,18 @@ public void HashSet_Generic_Constructor_int_AddUpToAndBeyondCapacity(int capacit
Assert.Equal(capacity + 1, set.Count);
}

[Fact]
public void HashSet_Generic_Constructor_Capacity_ToNextPrimeNumber()
{
// Highest pre-computed number + 1.
const int Capacity = 7199370;
var set = new HashSet<T>(Capacity);

// Assert that the HashTable's capacity is set to the descendant prime number of the given one.
const int NextPrime = 7199371;
Assert.Equal(NextPrime, set.EnsureCapacity(0));
}

[Fact]
public void HashSet_Generic_Constructor_int_Negative_ThrowsArgumentOutOfRangeException()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
<Link>Common\System\Collections\DictionaryExtensions.cs</Link>
</Compile>
<Compile Include="$(CommonTestPath)\System\Runtime\Serialization\Formatters\BinaryFormatterHelpers.cs">
<Link>System\Runtime\Serialization\Formatters\BinaryFormatterHelpers.cs</Link>
<Link>Common\System\Runtime\Serialization\Formatters\BinaryFormatterHelpers.cs</Link>
</Compile>
<!-- Generic tests -->
<Compile Include="Generic\Dictionary\Dictionary.Generic.Tests.netcoreapp.cs" Condition="'$(TargetGroup)' == 'netcoreapp'" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@
<Compile Include="$(CommonPath)\CoreLib\System\Text\ValueStringBuilder.cs">
<Link>CoreLib\System\Text\ValueStringBuilder.cs</Link>
</Compile>
<Compile Include="$(CommonPath)\System\Collections\HashHelpers.cs">
<Link>Common\System\Collections\HashHelpers.cs</Link>
</Compile>
</ItemGroup>
<ItemGroup Condition="'$(TargetGroup)' == 'uapaot' or '$(TargetGroup)' == 'uap'">
<Compile Include="System\Environment.WinRT.cs" />
Expand Down Expand Up @@ -278,4 +281,4 @@
<ReferenceFromRuntime Include="System.Private.CoreLib" />
</ItemGroup>
<Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
</Project>
</Project>
89 changes: 6 additions & 83 deletions src/System.Runtime.Extensions/src/System/Collections/Hashtable.cs
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,9 @@ private struct bucket
private IEqualityComparer _keycomparer;
private Object _syncRoot;

private static ConditionalWeakTable<object, SerializationInfo> s_serializationInfoTable;
private static ConditionalWeakTable<object, SerializationInfo> SerializationInfoTable => LazyInitializer.EnsureInitialized(ref s_serializationInfoTable);
Copy link
Copy Markdown
Member Author

@ViktorHofer ViktorHofer Mar 28, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In CoreLib we use Interlocked.CompareExchange instead. @jkotas any preference here?

internal static ConditionalWeakTable<object, SerializationInfo> SerializationInfoTable
        {
            get
            {
                if (s_serializationInfoTable == null)
                    Interlocked.CompareExchange(ref s_serializationInfoTable, new ConditionalWeakTable<object, SerializationInfo>(), null);

                return s_serializationInfoTable;
            }
        }

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not have preference.

LazyInitializer.EnsureInitialized is convenience helper. It looks nicer, but it results into bigger slower code.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks :) I'm fine with using LazyInitializer here as the serialization code path isn't highly perf related.


[Obsolete("Please use EqualityComparer property.")]
protected IHashCodeProvider hcp
{
Expand Down Expand Up @@ -380,7 +383,7 @@ protected Hashtable(SerializationInfo info, StreamingContext context)
//We can't do anything with the keys and values until the entire graph has been deserialized
//and we have a reasonable estimate that GetHashCode is not going to fail. For the time being,
//we'll just cache this. The graph is not valid until OnDeserialization has been called.
HashHelpers.SerializationInfoTable.Add(this, info);
SerializationInfoTable.Add(this, info);
}

// ?InitHash? is basically an implementation of classic DoubleHashing (see http://en.wikipedia.org/wiki/Double_hashing)
Expand Down Expand Up @@ -1172,7 +1175,7 @@ public virtual void OnDeserialization(Object sender)
}

SerializationInfo siInfo;
HashHelpers.SerializationInfoTable.TryGetValue(this, out siInfo);
SerializationInfoTable.TryGetValue(this, out siInfo);

if (siInfo == null)
{
Expand Down Expand Up @@ -1254,7 +1257,7 @@ public virtual void OnDeserialization(Object sender)

_version = siInfo.GetInt32(VersionName);

HashHelpers.SerializationInfoTable.Remove(this);
SerializationInfoTable.Remove(this);
}

// Implements a Collection for the keys of a hashtable. An instance of this
Expand Down Expand Up @@ -1640,84 +1643,4 @@ public KeyValuePairs[] Items
}
}
}

internal static class HashHelpers
{
// Table of prime numbers to use as hash table sizes.
// A typical resize algorithm would pick the smallest prime number in this array
// that is larger than twice the previous capacity.
// Suppose our Hashtable currently has capacity x and enough elements are added
// such that a resize needs to occur. Resizing first computes 2x then finds the
// first prime in the table greater than 2x, i.e. if primes are ordered
// p_1, p_2, ..., p_i, ..., it finds p_n such that p_n-1 < 2x < p_n.
// Doubling is important for preserving the asymptotic complexity of the
// hashtable operations such as add. Having a prime guarantees that double
// hashing does not lead to infinite loops. IE, your hash function will be
// h1(key) + i*h2(key), 0 <= i < size. h2 and the size must be relatively prime.
public static readonly int[] primes = {
3, 7, 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919,
1103, 1327, 1597, 1931, 2333, 2801, 3371, 4049, 4861, 5839, 7013, 8419, 10103, 12143, 14591,
17519, 21023, 25229, 30293, 36353, 43627, 52361, 62851, 75431, 90523, 108631, 130363, 156437,
187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, 968897, 1162687, 1395263,
1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369};

public static bool IsPrime(int candidate)
{
if ((candidate & 1) != 0)
{
int limit = (int)Math.Sqrt(candidate);
for (int divisor = 3; divisor <= limit; divisor += 2)
{
if ((candidate % divisor) == 0)
return false;
}
return true;
}
return (candidate == 2);
}

public static int GetPrime(int min)
{
if (min < 0)
throw new ArgumentException(SR.Arg_HTCapacityOverflow);

for (int i = 0; i < primes.Length; i++)
{
int prime = primes[i];
if (prime >= min) return prime;
}

//outside of our predefined table.
//compute the hard way.
for (int i = (min | 1); i < Int32.MaxValue; i += 2)
{
if (IsPrime(i) && ((i - 1) % Hashtable.HashPrime != 0))
return i;
}
return min;
}

// Returns size of hashtable to grow to.
public static int ExpandPrime(int oldSize)
{
int newSize = 2 * oldSize;

// Allow the hashtables to grow to maximum possible size (~2G elements) before encountering capacity overflow.
// Note that this check works even when _items.Length overflowed thanks to the (uint) cast
if ((uint)newSize > MaxPrimeArrayLength && MaxPrimeArrayLength > oldSize)
{
Debug.Assert(MaxPrimeArrayLength == GetPrime(MaxPrimeArrayLength), "Invalid MaxPrimeArrayLength");
return MaxPrimeArrayLength;
}

return GetPrime(newSize);
}


// This is the maximum prime smaller than Array.MaxArrayLength
public const int MaxPrimeArrayLength = 0x7FEFFFFD;

private static ConditionalWeakTable<object, SerializationInfo> s_serializationInfoTable;
public static ConditionalWeakTable<object, SerializationInfo> SerializationInfoTable => LazyInitializer.EnsureInitialized(ref s_serializationInfoTable);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@
<resheader name="writer">
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<data name="Arg_HTCapacityOverflow" xml:space="preserve">
<value>Capacity overflowed and went negative.</value>
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something like "Cannot add more than {0} objects." might be more useful/user oriented? They don't care whether we use signed ints.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't address this one as we now should never hit this message unless the array entirely overflows.

</data>
<data name="Serialization_NonSerType" xml:space="preserve">
<value>Type '{0}' in Assembly '{1}' is not marked as serializable.</value>
</data>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@
<Compile Include="System\Runtime\Serialization\Formatters\Binary\BinaryObjectWriter.cs" />
<Compile Include="System\Runtime\Serialization\Formatters\Binary\BinaryParser.cs" />
<Compile Include="System\Runtime\Serialization\Formatters\Binary\BinaryUtilClasses.cs" />
<Compile Include="$(CommonPath)\System\Collections\HashHelpers.cs">
<Link>Common\System\Collections\HashHelpers.cs</Link>
</Compile>
</ItemGroup>
<ItemGroup>
<Reference Include="System.Collections" />
Expand All @@ -77,4 +80,4 @@
<EmbeddedResource Include="Resources\System.Runtime.Serialization.Formatters.rd.xml" />
</ItemGroup>
<Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
</Project>
</Project>
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Collections;
using System.Runtime.CompilerServices;

namespace System.Runtime.Serialization
Expand All @@ -10,24 +11,16 @@ public class ObjectIDGenerator
{
private const int NumBins = 4;

// Table of prime numbers to use as hash table sizes. Each entry is the
// smallest prime number larger than twice the previous entry.
private static readonly int[] s_sizes =
{
5, 11, 29, 47, 97, 197, 397, 797, 1597, 3203, 6421, 12853, 25717, 51437,
102877, 205759, 411527, 823117, 1646237, 3292489, 6584983
};

internal int _currentCount;
internal int _currentSize;
internal long[] _ids;
internal object[] _objs;
private int _currentSize;
private long[] _ids;
private object[] _objs;

// Constructs a new ObjectID generator, initializing all of the necessary variables.
public ObjectIDGenerator()
{
_currentCount = 1;
_currentSize = s_sizes[0];
_currentSize = HashHelpers.primes[0]; // Starting with 3
_ids = new long[_currentSize * NumBins];
_objs = new object[_currentSize * NumBins];
}
Expand Down Expand Up @@ -106,13 +99,12 @@ public virtual long GetId(object obj, out bool firstTime)
// we return that id, otherwise we return 0.
public virtual long HasId(object obj, out bool firstTime)
{
bool found;

if (obj == null)
{
throw new ArgumentNullException(nameof(obj));
}

bool found;
int pos = FindElement(obj, out found);
if (found)
{
Expand All @@ -129,14 +121,14 @@ public virtual long HasId(object obj, out bool firstTime)
// the old arrays into the new ones. Expensive but necessary.
private void Rehash()
{
int i = 0;
for (int currSize = _currentSize; i < s_sizes.Length && s_sizes[i] <= currSize; i++) ;
if (i == s_sizes.Length)
int currSize = _currentSize;
int newSize = HashHelpers.ExpandPrime(currSize);
if (newSize == currSize)
{
// We just walked off the end of the array.
throw new SerializationException(SR.Serialization_TooManyElements);
}
_currentSize = s_sizes[i];
_currentSize = newSize;

long[] newIds = new long[_currentSize * NumBins];
object[] newObjs = new object[_currentSize * NumBins];
Expand All @@ -151,8 +143,7 @@ private void Rehash()
{
if (oldObjs[j] != null)
{
bool found;
int pos = FindElement(oldObjs[j], out found);
int pos = FindElement(oldObjs[j], out _);
_objs[pos] = oldObjs[j];
_ids[pos] = oldIds[j];
}
Expand Down
Loading