From daded037909e8e1f5706abab6ede1ad264eb43a0 Mon Sep 17 00:00:00 2001
From: Paul Westcott
Date: Wed, 27 Nov 2019 21:23:16 +1100
Subject: [PATCH 1/4] Sorting-by-layer for System.Linq.OrderBy
---
.../src/System/Linq/OrderedEnumerable.cs | 145 +++++++++++++++---
1 file changed, 123 insertions(+), 22 deletions(-)
diff --git a/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.cs b/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.cs
index c8610253f210b9..54ed82142a32e9 100644
--- a/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.cs
+++ b/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.cs
@@ -60,9 +60,9 @@ internal IEnumerator GetEnumerator(int minIdx, int maxIdx)
}
}
- private EnumerableSorter GetEnumerableSorter() => GetEnumerableSorter(null);
+ private EnumerableSorter GetEnumerableSorter() => GetEnumerableSorter(EnumerableSorterRoot.Instance);
- internal abstract EnumerableSorter GetEnumerableSorter(EnumerableSorter? next);
+ internal abstract EnumerableSorter GetEnumerableSorter(IEnumerableSorter next);
private CachingComparer GetComparer() => GetComparer(null);
@@ -167,7 +167,7 @@ internal OrderedEnumerable(IEnumerable source, Func ke
_descending = descending;
}
- internal override EnumerableSorter GetEnumerableSorter(EnumerableSorter? next)
+ internal override EnumerableSorter GetEnumerableSorter(IEnumerableSorter next)
{
// Special case the common use of string with default comparer. Comparer.Default checks the
// thread's Culture on each call which is an overhead which is not required, because we are about to
@@ -273,41 +273,94 @@ internal override void SetElement(TElement element)
}
}
- internal abstract class EnumerableSorter
+ internal interface IEnumerableSorter
{
- internal abstract void ComputeKeys(TElement[] elements, int count);
+ void ComputeKeys(TElement[] elements, int count);
- internal abstract int CompareAnyKeys(int index1, int index2);
+ int CompareAnyKeys(int index1, int index2);
- private int[] ComputeMap(TElement[] elements, int count)
+ bool IsAscending { get; }
+
+ void InitializeSortByLayer(int size);
+
+ void SortByLayer(TElement[] elements, int[] indexes, int startIdx, int count);
+ }
+
+ internal class EnumerableSorterRoot : IEnumerableSorter
+ {
+ public static IEnumerableSorter Instance { get; } = new EnumerableSorterRoot();
+
+ private EnumerableSorterRoot() { }
+
+ public int CompareAnyKeys(int index1, int index2) => index1 - index2;
+
+ public void ComputeKeys(TElement[] elements, int count) { }
+
+ public bool IsAscending => true;
+
+ public void SortByLayer(TElement[] elements, int[] indexes, int startIdx, int count) => Array.Sort(indexes, startIdx, count);
+
+ public void InitializeSortByLayer(int size) { }
+ }
+
+ internal abstract class EnumerableSorter : IEnumerableSorter
+ {
+ public abstract void InitializeSortByLayer(int size);
+ public abstract void SortByLayer(TElement[] elements, int[] indexes, int startIdx, int count);
+ public abstract void ComputeKeys(TElement[] elements, int count);
+ public abstract int CompareAnyKeys(int index1, int index2);
+
+ private int[] ComputeKeysAndMap(TElement[] elements, int count)
{
ComputeKeys(elements, count);
+ return ComputeMap(count);
+ }
+
+ private static int[] ComputeMap(int count)
+ {
int[] map = new int[count];
for (int i = 0; i < map.Length; i++)
{
map[i] = i;
}
-
return map;
}
+ protected abstract bool IsValueType { get; }
+ public abstract bool IsAscending { get; }
+
internal int[] Sort(TElement[] elements, int count)
{
- int[] map = ComputeMap(elements, count);
- QuickSort(map, 0, count - 1);
- return map;
+ // check that we can use the layered sort. This adds (as a first level approximation) O(N) comparisons
+ // to the sort, but this is offset by simpler comparers (i.e. removes a level of indirection),
+ // Array.Sort optimizations (for primitives), removes level of indirection from objects (i.e. accessing
+ // directly in array by sort, rather than an index into another array) and increases caching affects
+ // due to location in array.
+ if (IsValueType && IsAscending)
+ {
+ InitializeSortByLayer(count);
+ int[] map = ComputeMap(count);
+ SortByLayer(elements, map, 0, count);
+ return map;
+ }
+ else
+ {
+ int[] map = ComputeKeysAndMap(elements, count);
+ QuickSort(map, 0, count - 1);
+ return map;
+ }
}
internal int[] Sort(TElement[] elements, int count, int minIdx, int maxIdx)
{
- int[] map = ComputeMap(elements, count);
+ int[] map = ComputeKeysAndMap(elements, count);
PartialQuickSort(map, 0, count - 1, minIdx, maxIdx);
return map;
}
internal TElement ElementAt(TElement[] elements, int count, int idx)
{
- int[] map = ComputeMap(elements, count);
+ int[] map = ComputeKeysAndMap(elements, count);
return idx == 0 ?
elements[Min(map, count)] :
elements[QuickSelect(map, count - 1, idx)];
@@ -331,10 +384,10 @@ internal sealed class EnumerableSorter : EnumerableSorter _keySelector;
private readonly IComparer _comparer;
private readonly bool _descending;
- private readonly EnumerableSorter? _next;
+ private readonly IEnumerableSorter _next;
private TKey[]? _keys;
- internal EnumerableSorter(Func keySelector, IComparer comparer, bool descending, EnumerableSorter? next)
+ internal EnumerableSorter(Func keySelector, IComparer comparer, bool descending, IEnumerableSorter next)
{
_keySelector = keySelector;
_comparer = comparer;
@@ -342,7 +395,60 @@ internal EnumerableSorter(Func keySelector, IComparer comp
_next = next;
}
- internal override void ComputeKeys(TElement[] elements, int count)
+ protected override bool IsValueType => default(TKey)! != null;
+
+ public override bool IsAscending => !_descending && _next.IsAscending;
+
+ public override void InitializeSortByLayer(int size)
+ {
+ _keys = new TKey[size];
+ _next.InitializeSortByLayer(size);
+ }
+
+ public override void SortByLayer(TElement[] data, int[] indexes, int startIdx, int count)
+ {
+ Debug.Assert(_keys != null);
+ Debug.Assert(_next != null);
+
+ int exclusiveEndIdx = startIdx + count;
+
+ // copy the keys that we need
+ for (int idx = startIdx; idx < exclusiveEndIdx; ++idx)
+ {
+ _keys[idx] = _keySelector(data[indexes[idx]]);
+ }
+
+ // unstable sort
+ Array.Sort(_keys, indexes, startIdx, count, _comparer);
+
+ // now find duplicate keys, and go to the lower level to sort
+ TKey examplar = _keys[startIdx];
+ int examplarIdx = startIdx;
+
+ int batchCount;
+ for (int idx = startIdx + 1; idx < exclusiveEndIdx; ++idx)
+ {
+ if (_comparer.Compare(examplar, _keys[idx]) != 0)
+ {
+ batchCount = idx - examplarIdx;
+ if (batchCount > 1)
+ {
+ _next.SortByLayer(data, indexes, examplarIdx, batchCount);
+ }
+ examplar = _keys[idx];
+ examplarIdx = idx;
+ }
+ }
+
+ // handle the remainders
+ batchCount = exclusiveEndIdx - examplarIdx;
+ if (batchCount > 1)
+ {
+ _next.SortByLayer(data, indexes, examplarIdx, batchCount);
+ }
+ }
+
+ public override void ComputeKeys(TElement[] elements, int count)
{
_keys = new TKey[count];
for (int i = 0; i < count; i++)
@@ -353,18 +459,13 @@ internal override void ComputeKeys(TElement[] elements, int count)
_next?.ComputeKeys(elements, count);
}
- internal override int CompareAnyKeys(int index1, int index2)
+ public override int CompareAnyKeys(int index1, int index2)
{
Debug.Assert(_keys != null);
int c = _comparer.Compare(_keys[index1], _keys[index2]);
if (c == 0)
{
- if (_next == null)
- {
- return index1 - index2; // ensure stability of sort
- }
-
return _next.CompareAnyKeys(index1, index2);
}
From ba24847a6c88ac592bd3a73e69e21dce833ef71b Mon Sep 17 00:00:00 2001
From: Paul Westcott
Date: Fri, 29 Nov 2019 21:05:01 +1100
Subject: [PATCH 2/4] Initialize _keys lazily
---
.../src/System/Linq/OrderedEnumerable.cs | 18 +++++-------------
1 file changed, 5 insertions(+), 13 deletions(-)
diff --git a/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.cs b/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.cs
index 54ed82142a32e9..345c7e2d6bd587 100644
--- a/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.cs
+++ b/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.cs
@@ -281,8 +281,6 @@ internal interface IEnumerableSorter
bool IsAscending { get; }
- void InitializeSortByLayer(int size);
-
void SortByLayer(TElement[] elements, int[] indexes, int startIdx, int count);
}
@@ -299,13 +297,10 @@ public void ComputeKeys(TElement[] elements, int count) { }
public bool IsAscending => true;
public void SortByLayer(TElement[] elements, int[] indexes, int startIdx, int count) => Array.Sort(indexes, startIdx, count);
-
- public void InitializeSortByLayer(int size) { }
}
internal abstract class EnumerableSorter : IEnumerableSorter
{
- public abstract void InitializeSortByLayer(int size);
public abstract void SortByLayer(TElement[] elements, int[] indexes, int startIdx, int count);
public abstract void ComputeKeys(TElement[] elements, int count);
public abstract int CompareAnyKeys(int index1, int index2);
@@ -338,7 +333,6 @@ internal int[] Sort(TElement[] elements, int count)
// due to location in array.
if (IsValueType && IsAscending)
{
- InitializeSortByLayer(count);
int[] map = ComputeMap(count);
SortByLayer(elements, map, 0, count);
return map;
@@ -399,17 +393,15 @@ internal EnumerableSorter(Func keySelector, IComparer comp
public override bool IsAscending => !_descending && _next.IsAscending;
- public override void InitializeSortByLayer(int size)
- {
- _keys = new TKey[size];
- _next.InitializeSortByLayer(size);
- }
-
public override void SortByLayer(TElement[] data, int[] indexes, int startIdx, int count)
{
- Debug.Assert(_keys != null);
Debug.Assert(_next != null);
+ if (_keys == null)
+ {
+ _keys = new TKey[data.Length];
+ }
+
int exclusiveEndIdx = startIdx + count;
// copy the keys that we need
From c684269289b5c9238a8894f3b1ef8f6a733c828f Mon Sep 17 00:00:00 2001
From: Paul Westcott
Date: Sat, 30 Nov 2019 14:23:22 +1100
Subject: [PATCH 3/4] Greater descriptions on functions
---
.../src/System/Linq/OrderedEnumerable.cs | 22 ++++++++-----------
1 file changed, 9 insertions(+), 13 deletions(-)
diff --git a/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.cs b/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.cs
index 345c7e2d6bd587..60175c673c1ddf 100644
--- a/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.cs
+++ b/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.cs
@@ -279,7 +279,7 @@ internal interface IEnumerableSorter
int CompareAnyKeys(int index1, int index2);
- bool IsAscending { get; }
+ bool AreAllLayersAscending { get; }
void SortByLayer(TElement[] elements, int[] indexes, int startIdx, int count);
}
@@ -294,7 +294,7 @@ private EnumerableSorterRoot() { }
public void ComputeKeys(TElement[] elements, int count) { }
- public bool IsAscending => true;
+ public bool AreAllLayersAscending => true;
public void SortByLayer(TElement[] elements, int[] indexes, int startIdx, int count) => Array.Sort(indexes, startIdx, count);
}
@@ -321,8 +321,8 @@ private static int[] ComputeMap(int count)
return map;
}
- protected abstract bool IsValueType { get; }
- public abstract bool IsAscending { get; }
+ protected abstract bool IsOuterLayerValueType { get; }
+ public abstract bool AreAllLayersAscending { get; }
internal int[] Sort(TElement[] elements, int count)
{
@@ -331,7 +331,7 @@ internal int[] Sort(TElement[] elements, int count)
// Array.Sort optimizations (for primitives), removes level of indirection from objects (i.e. accessing
// directly in array by sort, rather than an index into another array) and increases caching affects
// due to location in array.
- if (IsValueType && IsAscending)
+ if (IsOuterLayerValueType && AreAllLayersAscending)
{
int[] map = ComputeMap(count);
SortByLayer(elements, map, 0, count);
@@ -379,6 +379,7 @@ internal sealed class EnumerableSorter : EnumerableSorter _comparer;
private readonly bool _descending;
private readonly IEnumerableSorter _next;
+
private TKey[]? _keys;
internal EnumerableSorter(Func keySelector, IComparer comparer, bool descending, IEnumerableSorter next)
@@ -389,14 +390,12 @@ internal EnumerableSorter(Func keySelector, IComparer comp
_next = next;
}
- protected override bool IsValueType => default(TKey)! != null;
+ protected override bool IsOuterLayerValueType => default(TKey)! != null;
- public override bool IsAscending => !_descending && _next.IsAscending;
+ public override bool AreAllLayersAscending => !_descending && _next.AreAllLayersAscending;
public override void SortByLayer(TElement[] data, int[] indexes, int startIdx, int count)
{
- Debug.Assert(_next != null);
-
if (_keys == null)
{
_keys = new TKey[data.Length];
@@ -448,7 +447,7 @@ public override void ComputeKeys(TElement[] elements, int count)
_keys[i] = _keySelector(elements[i]);
}
- _next?.ComputeKeys(elements, count);
+ _next.ComputeKeys(elements, count);
}
public override int CompareAnyKeys(int index1, int index2)
@@ -467,14 +466,11 @@ public override int CompareAnyKeys(int index1, int index2)
return (_descending != (c > 0)) ? 1 : -1;
}
-
private int CompareKeys(int index1, int index2) => index1 == index2 ? 0 : CompareAnyKeys(index1, index2);
protected override void QuickSort(int[] keys, int lo, int hi) =>
Array.Sort(keys, lo, hi - lo + 1, Comparer.Create(CompareAnyKeys)); // TODO #24115: Remove Create call when delegate-based overload is available
-
-
// Sorts the k elements between minIdx and maxIdx without sorting all elements
// Time complexity: O(n + k log k) best and average case. O(n^2) worse case.
protected override void PartialQuickSort(int[] map, int left, int right, int minIdx, int maxIdx)
From 03fc4f35151d22758504e9a0f97ac57b8e9aa10c Mon Sep 17 00:00:00 2001
From: Paul Westcott
Date: Tue, 3 Dec 2019 19:18:57 +1000
Subject: [PATCH 4/4] Update OrderedEnumerable.cs
---
src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.cs | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.cs b/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.cs
index 60175c673c1ddf..d823da2d8c4278 100644
--- a/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.cs
+++ b/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.cs
@@ -326,7 +326,7 @@ private static int[] ComputeMap(int count)
internal int[] Sort(TElement[] elements, int count)
{
- // check that we can use the layered sort. This adds (as a first level approximation) O(N) comparisons
+ // Check that we can use the layered sort. This adds (as a first level approximation) O(N) comparisons
// to the sort, but this is offset by simpler comparers (i.e. removes a level of indirection),
// Array.Sort optimizations (for primitives), removes level of indirection from objects (i.e. accessing
// directly in array by sort, rather than an index into another array) and increases caching affects