From a096b4e82121d25d4ed473fd213613a3ba13f1d1 Mon Sep 17 00:00:00 2001 From: Jon Hanna Date: Thu, 28 Jan 2016 15:58:21 +0000 Subject: [PATCH 1/2] Use Remove instead of Contains + Remove on Set IntersectQueryOperator does a call to Contains followed by a call to Remove on the same set for the same value. Since Remove takes a very similar path to Contains in checking for existance, and returns false when the item to remove isn't found, this can be simplified to just Remove. Also remove unused constructor on Set. --- src/System.Linq.Parallel/src/System/Linq/Parallel/Helpers.cs | 2 -- .../Parallel/QueryOperators/Binary/IntersectQueryOperator.cs | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/System.Linq.Parallel/src/System/Linq/Parallel/Helpers.cs b/src/System.Linq.Parallel/src/System/Linq/Parallel/Helpers.cs index ed97ebfe9f9c..e13726691523 100644 --- a/src/System.Linq.Parallel/src/System/Linq/Parallel/Helpers.cs +++ b/src/System.Linq.Parallel/src/System/Linq/Parallel/Helpers.cs @@ -30,8 +30,6 @@ internal class Set private const int InitialSize = 7; private const int HashCodeMask = 0x7FFFFFFF; - public Set() : this(null) { } - public Set(IEqualityComparer comparer) { if (comparer == null) comparer = EqualityComparer.Default; diff --git a/src/System.Linq.Parallel/src/System/Linq/Parallel/QueryOperators/Binary/IntersectQueryOperator.cs b/src/System.Linq.Parallel/src/System/Linq/Parallel/QueryOperators/Binary/IntersectQueryOperator.cs index a7739da5ef1f..87f1a849c5db 100644 --- a/src/System.Linq.Parallel/src/System/Linq/Parallel/QueryOperators/Binary/IntersectQueryOperator.cs +++ b/src/System.Linq.Parallel/src/System/Linq/Parallel/QueryOperators/Binary/IntersectQueryOperator.cs @@ -191,9 +191,8 @@ internal override bool MoveNext(ref TInputOutput currentElement, ref int current // If we found the element in our set, and if we haven't returned it yet, // we can yield it to the caller. We also mark it so we know we've returned // it once already and never will again. - if (_hashLookup.Contains((TInputOutput)leftElement.First)) + if (_hashLookup.Remove((TInputOutput)leftElement.First)) { - _hashLookup.Remove((TInputOutput)leftElement.First); currentElement = (TInputOutput)leftElement.First; #if DEBUG currentKey = unchecked((int)0xdeadbeef); From a3faba45d0f3b7747bd8e54c4894e733ba4a76ec Mon Sep 17 00:00:00 2001 From: Jon Hanna Date: Thu, 28 Jan 2016 16:37:12 +0000 Subject: [PATCH 2/2] Remove unused paths in Set. Set in Linq and Linq.Parallel never has Add called after Remove. Remove paths and fields only necessary in that case. Add debug-only check on this assumption, so any new uses that break this assumption are flagged to the developer. --- .../src/System/Linq/Parallel/Helpers.cs | 32 +++++----- src/System.Linq/src/System/Linq/Enumerable.cs | 61 ++++++++----------- 2 files changed, 40 insertions(+), 53 deletions(-) diff --git a/src/System.Linq.Parallel/src/System/Linq/Parallel/Helpers.cs b/src/System.Linq.Parallel/src/System/Linq/Parallel/Helpers.cs index e13726691523..d3788c3fb252 100644 --- a/src/System.Linq.Parallel/src/System/Linq/Parallel/Helpers.cs +++ b/src/System.Linq.Parallel/src/System/Linq/Parallel/Helpers.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System.Collections.Generic; +using System.Diagnostics; namespace System.Linq.Parallel { @@ -24,8 +25,10 @@ internal class Set private int[] _buckets; private Slot[] _slots; private int _count; - private int _freeList; - private IEqualityComparer _comparer; + private readonly IEqualityComparer _comparer; +#if DEBUG + private bool _haveRemoved; +#endif private const int InitialSize = 7; private const int HashCodeMask = 0x7FFFFFFF; @@ -36,12 +39,14 @@ public Set(IEqualityComparer comparer) _comparer = comparer; _buckets = new int[InitialSize]; _slots = new Slot[InitialSize]; - _freeList = -1; } // If value is not in set, add it and return true; otherwise return false public bool Add(TElement value) { +#if DEBUG + Debug.Assert(!_haveRemoved, "This class is optimised for never calling Add after Remove. If your changes need to do so, undo that optimization."); +#endif return !Find(value, true); } @@ -54,6 +59,9 @@ public bool Contains(TElement value) // If value is in set, remove it and return true; otherwise return false public bool Remove(TElement value) { +#if DEBUG + _haveRemoved = true; +#endif int hashCode = InternalGetHashCode(value); int bucket = hashCode % _buckets.Length; int last = -1; @@ -71,8 +79,7 @@ public bool Remove(TElement value) } _slots[i].hashCode = -1; _slots[i].value = default(TElement); - _slots[i].next = _freeList; - _freeList = i; + _slots[i].next = -1; return true; } } @@ -88,18 +95,9 @@ bool Find(TElement value, bool add) } if (add) { - int index; - if (_freeList >= 0) - { - index = _freeList; - _freeList = _slots[index].next; - } - else - { - if (_count == _slots.Length) Resize(); - index = _count; - _count++; - } + if (_count == _slots.Length) Resize(); + int index = _count; + _count++; int bucket = hashCode % _buckets.Length; _slots[index].hashCode = hashCode; _slots[index].value = value; diff --git a/src/System.Linq/src/System/Linq/Enumerable.cs b/src/System.Linq/src/System/Linq/Enumerable.cs index d303be7545a3..92b51f5583a7 100644 --- a/src/System.Linq/src/System/Linq/Enumerable.cs +++ b/src/System.Linq/src/System/Linq/Enumerable.cs @@ -3667,8 +3667,10 @@ internal class Set private int[] _buckets; private Slot[] _slots; private int _count; - private int _freeList; - private IEqualityComparer _comparer; + private readonly IEqualityComparer _comparer; +#if DEBUG + private bool _haveRemoved; +#endif public Set(IEqualityComparer comparer) { @@ -3676,18 +3678,36 @@ public Set(IEqualityComparer comparer) _comparer = comparer; _buckets = new int[7]; _slots = new Slot[7]; - _freeList = -1; } // If value is not in set, add it and return true; otherwise return false public bool Add(TElement value) { - return !Find(value, true); +#if DEBUG + Debug.Assert(!_haveRemoved, "This class is optimised for never calling Add after Remove. If your changes need to do so, undo that optimization."); +#endif + int hashCode = InternalGetHashCode(value); + for (int i = _buckets[hashCode % _buckets.Length] - 1; i >= 0; i = _slots[i].next) + { + if (_slots[i].hashCode == hashCode && _comparer.Equals(_slots[i].value, value)) return false; + } + if (_count == _slots.Length) Resize(); + int index = _count; + _count++; + int bucket = hashCode % _buckets.Length; + _slots[index].hashCode = hashCode; + _slots[index].value = value; + _slots[index].next = _buckets[bucket] - 1; + _buckets[bucket] = index + 1; + return true; } // If value is in set, remove it and return true; otherwise return false public bool Remove(TElement value) { +#if DEBUG + _haveRemoved = true; +#endif int hashCode = InternalGetHashCode(value); int bucket = hashCode % _buckets.Length; int last = -1; @@ -3705,44 +3725,13 @@ public bool Remove(TElement value) } _slots[i].hashCode = -1; _slots[i].value = default(TElement); - _slots[i].next = _freeList; - _freeList = i; + _slots[i].next = -1; return true; } } return false; } - private bool Find(TElement value, bool add) - { - int hashCode = InternalGetHashCode(value); - for (int i = _buckets[hashCode % _buckets.Length] - 1; i >= 0; i = _slots[i].next) - { - if (_slots[i].hashCode == hashCode && _comparer.Equals(_slots[i].value, value)) return true; - } - if (add) - { - int index; - if (_freeList >= 0) - { - index = _freeList; - _freeList = _slots[index].next; - } - else - { - if (_count == _slots.Length) Resize(); - index = _count; - _count++; - } - int bucket = hashCode % _buckets.Length; - _slots[index].hashCode = hashCode; - _slots[index].value = value; - _slots[index].next = _buckets[bucket] - 1; - _buckets[bucket] = index + 1; - } - return false; - } - private void Resize() { int newSize = checked(_count * 2 + 1);