From 0d4db3a0daffeb9ffcc88c86676d469f0ce860d1 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 7 Jul 2020 20:25:16 +0200 Subject: [PATCH 01/40] Added StringPool type --- .../Buffers/StringPool.cs | 171 ++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs new file mode 100644 index 00000000000..91f877870e3 --- /dev/null +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -0,0 +1,171 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using Microsoft.Toolkit.HighPerformance.Extensions; +using Microsoft.Toolkit.HighPerformance.Helpers; + +namespace Microsoft.Toolkit.HighPerformance.Buffers +{ + /// + /// A configurable pool for instances. This can be used to minimize allocations + /// when creating multiple instances from buffers of values. + /// The method provides a best-effort alternative to just creating a new + /// instance every time, in order to minimize the number of duplicated instances. + /// + public sealed class StringPool + { + /// + /// The default number of instances in . + /// + private const int DefaultNumberOfBuckets = 20; + + /// + /// The default number of entries stored in each instance. + /// + private const int DefaultEntriesPerBucket = 64; + + /// + /// The current array of instances in use. + /// + private readonly Bucket[] buckets; + + /// + /// Initializes a new instance of the class. + /// + /// The total number of buckets to use. + /// The maximum number of entries per bucket. + public StringPool(int numberOfBuckets, int entriesPerBucket) + { + Span span = this.buckets = new Bucket[numberOfBuckets]; + + // We preallocate the buckets in advance, since each bucket only contains the + // array field, which is not preinitialized, so the allocations are minimal. + // This lets us lock on each individual buckets when retrieving a string instance. + foreach (ref Bucket bucket in span) + { + bucket = new Bucket(); + } + + NumberOfBuckets = numberOfBuckets; + EntriesPerBucket = entriesPerBucket; + } + + /// + /// Gets the default instance. + /// + public static StringPool Default { get; } = new StringPool(DefaultNumberOfBuckets, DefaultEntriesPerBucket); + + /// + /// Gets the total number of buckets in use. + /// + public int NumberOfBuckets { get; } + + /// + /// Gets the maximum number of entries stored in each bucket. + /// + public int EntriesPerBucket { get; } + + /// + /// Gets a cached instance matching the input content, or creates a new one. + /// + /// The input with the contents to use. + /// A instance with the contents of , cached if possible. + public string GetOrAdd(ReadOnlySpan span) + { + if (span.IsEmpty) + { + return string.Empty; + } + + int bucketIndex = span.Length % NumberOfBuckets; + + Bucket bucket = this.buckets.DangerousGetReferenceAt(bucketIndex); + + lock (bucket) + { + return bucket.GetOrAdd(span, EntriesPerBucket); + } + } + + /// + /// Resets the current instance and its associated buckets. + /// + public void Reset() + { + foreach (Bucket bucket in this.buckets) + { + lock (bucket) + { + bucket.Reset(); + } + } + } + + /// + /// A configurable bucket containing a group of cached instances. + /// + private sealed class Bucket + { + /// + /// A bitmask used to avoid branches when computing the absolute value of an . + /// This will ignore overflows, as we just need this to map hashcodes into the valid bucket range. + /// + private const int SignMask = ~(1 << 31); + + /// + /// The array of entries currently in use. + /// + private string?[]? entries; + + /// + /// Implements for the current instance. + /// + /// The input with the contents to use. + /// The number of entries being used in the current instance. + /// A instance with the contents of , cached if possible. + public string GetOrAdd(ReadOnlySpan span, int entriesPerBucket) + { + ref string?[]? entries = ref this.entries; + + entries ??= new string[entriesPerBucket]; + + int entryIndex = +#if NETSTANDARD1_4 + (span.GetDjb2HashCode() & SignMask) % entriesPerBucket; +#else + (HashCode.Combine(span) & SignMask) % entriesPerBucket; +#endif + + ref string? entry = ref entries.DangerousGetReferenceAt(entryIndex); + + if (!(entry is null) && + entry.AsSpan().SequenceEqual(span)) + { + return entry; + } + +#if SPAN_RUNTIME_SUPPORT + return entry = new string(span); +#else + unsafe + { + fixed (char* c = span) + { + return entry = new string(c, 0, span.Length); + } + } +#endif + } + + /// + /// Resets the current array of entries. + /// + public void Reset() + { + this.entries = null; + } + } + } +} From 55ddf1d1884a22abdfa87ab736688c6102e4f2bd Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 7 Jul 2020 20:41:33 +0200 Subject: [PATCH 02/40] Added StringPool default constructor, input checks --- .../Buffers/StringPool.cs | 30 ++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index 91f877870e3..cd80e90da0e 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -4,7 +4,9 @@ using System; using Microsoft.Toolkit.HighPerformance.Extensions; +#if !NETSTANDARD1_4 using Microsoft.Toolkit.HighPerformance.Helpers; +#endif namespace Microsoft.Toolkit.HighPerformance.Buffers { @@ -31,6 +33,14 @@ public sealed class StringPool /// private readonly Bucket[] buckets; + /// + /// Initializes a new instance of the class. + /// + public StringPool() + : this(DefaultNumberOfBuckets, DefaultEntriesPerBucket) + { + } + /// /// Initializes a new instance of the class. /// @@ -38,6 +48,16 @@ public sealed class StringPool /// The maximum number of entries per bucket. public StringPool(int numberOfBuckets, int entriesPerBucket) { + if (numberOfBuckets < 0) + { + ThrowArgumentOutOfRangeException(nameof(numberOfBuckets)); + } + + if (entriesPerBucket < 0) + { + ThrowArgumentOutOfRangeException(nameof(entriesPerBucket)); + } + Span span = this.buckets = new Bucket[numberOfBuckets]; // We preallocate the buckets in advance, since each bucket only contains the @@ -55,7 +75,7 @@ public StringPool(int numberOfBuckets, int entriesPerBucket) /// /// Gets the default instance. /// - public static StringPool Default { get; } = new StringPool(DefaultNumberOfBuckets, DefaultEntriesPerBucket); + public static StringPool Default { get; } = new StringPool(); /// /// Gets the total number of buckets in use. @@ -167,5 +187,13 @@ public void Reset() this.entries = null; } } + + /// + /// Throws an when the requested size exceeds the capacity. + /// + private static void ThrowArgumentOutOfRangeException(string name) + { + throw new ArgumentOutOfRangeException(name, $"The input parameter must be greater than 0"); + } } } From 6f48d84c541720e59ff3f663f9b9ea9c6dff24f9 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 7 Jul 2020 20:46:33 +0200 Subject: [PATCH 03/40] Fixed input checks --- Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index cd80e90da0e..7a26065ce1d 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -48,12 +48,12 @@ public StringPool() /// The maximum number of entries per bucket. public StringPool(int numberOfBuckets, int entriesPerBucket) { - if (numberOfBuckets < 0) + if (numberOfBuckets <= 0) { ThrowArgumentOutOfRangeException(nameof(numberOfBuckets)); } - if (entriesPerBucket < 0) + if (entriesPerBucket <= 0) { ThrowArgumentOutOfRangeException(nameof(entriesPerBucket)); } @@ -193,7 +193,7 @@ public void Reset() /// private static void ThrowArgumentOutOfRangeException(string name) { - throw new ArgumentOutOfRangeException(name, $"The input parameter must be greater than 0"); + throw new ArgumentOutOfRangeException(name, "The input parameter must be greater than 0"); } } } From 77217a363476d656fbe734569f37f620f2008ff7 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 7 Jul 2020 20:46:48 +0200 Subject: [PATCH 04/40] Added StringPool tests --- .../Buffers/Test_StringPool.cs | 68 +++++++++++++++++++ ...UnitTests.HighPerformance.Shared.projitems | 1 + 2 files changed, 69 insertions(+) create mode 100644 UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs diff --git a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs new file mode 100644 index 00000000000..39b8733f1c6 --- /dev/null +++ b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs @@ -0,0 +1,68 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using Microsoft.Toolkit.HighPerformance.Buffers; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace UnitTests.HighPerformance.Buffers +{ + [TestClass] + public class Test_StringPool + { + [TestCategory("StringPool")] + [TestMethod] + public void Test_StringPool_Empty() + { + string empty = StringPool.Default.GetOrAdd(default); + + Assert.AreSame(string.Empty, empty); + } + + [TestCategory("StringPool")] + [TestMethod] + [DataRow(0, 0)] + [DataRow(1, 0)] + [DataRow(0, 1)] + [DataRow(-3248234, 22)] + [DataRow(int.MinValue, int.MinValue)] + [ExpectedException(typeof(ArgumentOutOfRangeException))] + public void Test_StringPool_Fail(int buckets, int entries) + { + var pool = new StringPool(buckets, entries); + + Assert.Fail(); + } + + [TestCategory("StringPool")] + [TestMethod] + public void Test_StringPool_Misc() + { + var pool = new StringPool(); + + string + hello = pool.GetOrAdd(nameof(hello)), + world = pool.GetOrAdd(nameof(world)), + windowsCommunityToolkit = pool.GetOrAdd(nameof(windowsCommunityToolkit)); + + Assert.AreEqual(nameof(hello), hello); + Assert.AreEqual(nameof(world), world); + Assert.AreEqual(nameof(windowsCommunityToolkit), windowsCommunityToolkit); + + Assert.AreSame(hello, pool.GetOrAdd(hello)); + Assert.AreSame(world, pool.GetOrAdd(world)); + Assert.AreSame(windowsCommunityToolkit, pool.GetOrAdd(windowsCommunityToolkit)); + + pool.Reset(); + + Assert.AreEqual(nameof(hello), hello); + Assert.AreEqual(nameof(world), world); + Assert.AreEqual(nameof(windowsCommunityToolkit), windowsCommunityToolkit); + + Assert.AreNotSame(hello, pool.GetOrAdd(hello)); + Assert.AreNotSame(world, pool.GetOrAdd(world)); + Assert.AreNotSame(windowsCommunityToolkit, pool.GetOrAdd(windowsCommunityToolkit)); + } + } +} diff --git a/UnitTests/UnitTests.HighPerformance.Shared/UnitTests.HighPerformance.Shared.projitems b/UnitTests/UnitTests.HighPerformance.Shared/UnitTests.HighPerformance.Shared.projitems index 06b7503c5f8..3b52650e85b 100644 --- a/UnitTests/UnitTests.HighPerformance.Shared/UnitTests.HighPerformance.Shared.projitems +++ b/UnitTests/UnitTests.HighPerformance.Shared/UnitTests.HighPerformance.Shared.projitems @@ -12,6 +12,7 @@ + From 4601f7e18cf17136553c47127893f95fa89b62bb Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 7 Jul 2020 22:42:32 +0200 Subject: [PATCH 05/40] Minor code refactoring --- .../Buffers/StringPool.cs | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index 7a26065ce1d..421120f7812 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -65,7 +65,7 @@ public StringPool(int numberOfBuckets, int entriesPerBucket) // This lets us lock on each individual buckets when retrieving a string instance. foreach (ref Bucket bucket in span) { - bucket = new Bucket(); + bucket = new Bucket(entriesPerBucket); } NumberOfBuckets = numberOfBuckets; @@ -105,7 +105,7 @@ public string GetOrAdd(ReadOnlySpan span) lock (bucket) { - return bucket.GetOrAdd(span, EntriesPerBucket); + return bucket.GetOrAdd(span); } } @@ -134,18 +134,31 @@ private sealed class Bucket /// private const int SignMask = ~(1 << 31); + /// + /// The number of entries being used in the current instance. + /// + private readonly int entriesPerBucket; + /// /// The array of entries currently in use. /// private string?[]? entries; + /// + /// Initializes a new instance of the class. + /// + /// The number of entries being used in the current instance. + public Bucket(int entriesPerBucket) + { + this.entriesPerBucket = entriesPerBucket; + } + /// /// Implements for the current instance. /// /// The input with the contents to use. - /// The number of entries being used in the current instance. /// A instance with the contents of , cached if possible. - public string GetOrAdd(ReadOnlySpan span, int entriesPerBucket) + public string GetOrAdd(ReadOnlySpan span) { ref string?[]? entries = ref this.entries; From 63446ce1649a312e9f59872ced53ab0e256a73d8 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 7 Jul 2020 22:48:00 +0200 Subject: [PATCH 06/40] Added StringPool.TryGet API --- .../Buffers/StringPool.cs | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index 421120f7812..471a93169bf 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System; +using System.Diagnostics.CodeAnalysis; using Microsoft.Toolkit.HighPerformance.Extensions; #if !NETSTANDARD1_4 using Microsoft.Toolkit.HighPerformance.Helpers; @@ -109,6 +110,31 @@ public string GetOrAdd(ReadOnlySpan span) } } + /// + /// Tries to get a cached instance matching the input content, if present. + /// + /// The input with the contents to use. + /// The resulting cached instance, if present + /// Whether or not the target instance was found. + public bool TryGet(ReadOnlySpan span, [NotNullWhen(true)] out string? value) + { + if (span.IsEmpty) + { + value = string.Empty; + + return true; + } + + int bucketIndex = span.Length % NumberOfBuckets; + + Bucket bucket = this.buckets.DangerousGetReferenceAt(bucketIndex); + + lock (bucket) + { + return bucket.TryGet(span, out value); + } + } + /// /// Resets the current instance and its associated buckets. /// @@ -192,6 +218,41 @@ public string GetOrAdd(ReadOnlySpan span) #endif } + /// + /// Implements for the current instance. + /// + /// The input with the contents to use. + /// The resulting cached instance, if present + /// Whether or not the target instance was found. + public bool TryGet(ReadOnlySpan span, [NotNullWhen(true)] out string? value) + { + ref string?[]? entries = ref this.entries; + + if (!(entries is null)) + { + int entryIndex = +#if NETSTANDARD1_4 + (span.GetDjb2HashCode() & SignMask) % entriesPerBucket; +#else + (HashCode.Combine(span) & SignMask) % entriesPerBucket; +#endif + + ref string? entry = ref entries.DangerousGetReferenceAt(entryIndex); + + if (!(entry is null) && + entry.AsSpan().SequenceEqual(span)) + { + value = entry; + + return true; + } + } + + value = null; + + return false; + } + /// /// Resets the current array of entries. /// From a877555910bd16218bd2a951d27222d538f2fa03 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 7 Jul 2020 22:58:32 +0200 Subject: [PATCH 07/40] Fixed build errors on UWP (string -> ReadOnlySpan) --- .../Buffers/Test_StringPool.cs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs index 39b8733f1c6..3dd5a7164ab 100644 --- a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs +++ b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs @@ -42,17 +42,17 @@ public void Test_StringPool_Misc() var pool = new StringPool(); string - hello = pool.GetOrAdd(nameof(hello)), - world = pool.GetOrAdd(nameof(world)), - windowsCommunityToolkit = pool.GetOrAdd(nameof(windowsCommunityToolkit)); + hello = pool.GetOrAdd(nameof(hello).AsSpan()), + world = pool.GetOrAdd(nameof(world).AsSpan()), + windowsCommunityToolkit = pool.GetOrAdd(nameof(windowsCommunityToolkit).AsSpan()); Assert.AreEqual(nameof(hello), hello); Assert.AreEqual(nameof(world), world); Assert.AreEqual(nameof(windowsCommunityToolkit), windowsCommunityToolkit); - Assert.AreSame(hello, pool.GetOrAdd(hello)); - Assert.AreSame(world, pool.GetOrAdd(world)); - Assert.AreSame(windowsCommunityToolkit, pool.GetOrAdd(windowsCommunityToolkit)); + Assert.AreSame(hello, pool.GetOrAdd(hello.AsSpan())); + Assert.AreSame(world, pool.GetOrAdd(world.AsSpan())); + Assert.AreSame(windowsCommunityToolkit, pool.GetOrAdd(windowsCommunityToolkit.AsSpan())); pool.Reset(); @@ -60,9 +60,9 @@ public void Test_StringPool_Misc() Assert.AreEqual(nameof(world), world); Assert.AreEqual(nameof(windowsCommunityToolkit), windowsCommunityToolkit); - Assert.AreNotSame(hello, pool.GetOrAdd(hello)); - Assert.AreNotSame(world, pool.GetOrAdd(world)); - Assert.AreNotSame(windowsCommunityToolkit, pool.GetOrAdd(windowsCommunityToolkit)); + Assert.AreNotSame(hello, pool.GetOrAdd(hello.AsSpan())); + Assert.AreNotSame(world, pool.GetOrAdd(world.AsSpan())); + Assert.AreNotSame(windowsCommunityToolkit, pool.GetOrAdd(windowsCommunityToolkit.AsSpan())); } } } From e2737e08e0cbe5d392a327ba63850c3cbbb22f00 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Wed, 8 Jul 2020 00:37:04 +0200 Subject: [PATCH 08/40] Switched Bucket type to a struct --- .../Buffers/StringPool.cs | 107 ++++++++++-------- 1 file changed, 57 insertions(+), 50 deletions(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index 471a93169bf..872e23cffee 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -102,12 +102,9 @@ public string GetOrAdd(ReadOnlySpan span) int bucketIndex = span.Length % NumberOfBuckets; - Bucket bucket = this.buckets.DangerousGetReferenceAt(bucketIndex); + ref Bucket bucket = ref this.buckets.DangerousGetReferenceAt(bucketIndex); - lock (bucket) - { - return bucket.GetOrAdd(span); - } + return bucket.GetOrAdd(span); } /// @@ -127,12 +124,9 @@ public bool TryGet(ReadOnlySpan span, [NotNullWhen(true)] out string? valu int bucketIndex = span.Length % NumberOfBuckets; - Bucket bucket = this.buckets.DangerousGetReferenceAt(bucketIndex); + ref Bucket bucket = ref this.buckets.DangerousGetReferenceAt(bucketIndex); - lock (bucket) - { - return bucket.TryGet(span, out value); - } + return bucket.TryGet(span, out value); } /// @@ -140,19 +134,16 @@ public bool TryGet(ReadOnlySpan span, [NotNullWhen(true)] out string? valu /// public void Reset() { - foreach (Bucket bucket in this.buckets) + foreach (ref Bucket bucket in this.buckets.AsSpan()) { - lock (bucket) - { - bucket.Reset(); - } + bucket.Reset(); } } /// /// A configurable bucket containing a group of cached instances. /// - private sealed class Bucket + private struct Bucket { /// /// A bitmask used to avoid branches when computing the absolute value of an . @@ -165,18 +156,25 @@ private sealed class Bucket /// private readonly int entriesPerBucket; + /// + /// A dummy used for synchronization. + /// + private readonly object dummy; + /// /// The array of entries currently in use. /// private string?[]? entries; /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the struct. /// /// The number of entries being used in the current instance. public Bucket(int entriesPerBucket) { this.entriesPerBucket = entriesPerBucket; + this.dummy = new object(); + this.entries = null; } /// @@ -186,36 +184,39 @@ public Bucket(int entriesPerBucket) /// A instance with the contents of , cached if possible. public string GetOrAdd(ReadOnlySpan span) { - ref string?[]? entries = ref this.entries; + lock (this.dummy) + { + ref string?[]? entries = ref this.entries; - entries ??= new string[entriesPerBucket]; + entries ??= new string[entriesPerBucket]; - int entryIndex = + int entryIndex = #if NETSTANDARD1_4 - (span.GetDjb2HashCode() & SignMask) % entriesPerBucket; + (span.GetDjb2HashCode() & SignMask) % entriesPerBucket; #else - (HashCode.Combine(span) & SignMask) % entriesPerBucket; + (HashCode.Combine(span) & SignMask) % entriesPerBucket; #endif - ref string? entry = ref entries.DangerousGetReferenceAt(entryIndex); + ref string? entry = ref entries.DangerousGetReferenceAt(entryIndex); - if (!(entry is null) && - entry.AsSpan().SequenceEqual(span)) - { - return entry; - } + if (!(entry is null) && + entry.AsSpan().SequenceEqual(span)) + { + return entry; + } #if SPAN_RUNTIME_SUPPORT - return entry = new string(span); + return entry = new string(span); #else - unsafe - { - fixed (char* c = span) + unsafe { - return entry = new string(c, 0, span.Length); + fixed (char* c = span) + { + return entry = new string(c, 0, span.Length); + } } - } #endif + } } /// @@ -226,31 +227,34 @@ public string GetOrAdd(ReadOnlySpan span) /// Whether or not the target instance was found. public bool TryGet(ReadOnlySpan span, [NotNullWhen(true)] out string? value) { - ref string?[]? entries = ref this.entries; - - if (!(entries is null)) + lock (this.dummy) { - int entryIndex = + ref string?[]? entries = ref this.entries; + + if (!(entries is null)) + { + int entryIndex = #if NETSTANDARD1_4 - (span.GetDjb2HashCode() & SignMask) % entriesPerBucket; + (span.GetDjb2HashCode() & SignMask) % entriesPerBucket; #else - (HashCode.Combine(span) & SignMask) % entriesPerBucket; + (HashCode.Combine(span) & SignMask) % entriesPerBucket; #endif - ref string? entry = ref entries.DangerousGetReferenceAt(entryIndex); + ref string? entry = ref entries.DangerousGetReferenceAt(entryIndex); - if (!(entry is null) && - entry.AsSpan().SequenceEqual(span)) - { - value = entry; + if (!(entry is null) && + entry.AsSpan().SequenceEqual(span)) + { + value = entry; - return true; + return true; + } } - } - value = null; + value = null; - return false; + return false; + } } /// @@ -258,7 +262,10 @@ public bool TryGet(ReadOnlySpan span, [NotNullWhen(true)] out string? valu /// public void Reset() { - this.entries = null; + lock (this.dummy) + { + this.entries = null; + } } } From 194e082f118db738af60fe16966dd1472b071dbe Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Wed, 8 Jul 2020 00:56:03 +0200 Subject: [PATCH 09/40] Added StringPool.Add(string) method --- .../Buffers/StringPool.cs | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index 872e23cffee..4c11994c47c 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -88,6 +88,24 @@ public StringPool(int numberOfBuckets, int entriesPerBucket) /// public int EntriesPerBucket { get; } + /// + /// Stores a instance in the internal cache. + /// + /// The input instance to cache. + public void Add(string value) + { + if (value.Length == 0) + { + return; + } + + int bucketIndex = value.Length % NumberOfBuckets; + + ref Bucket bucket = ref this.buckets.DangerousGetReferenceAt(bucketIndex); + + bucket.Add(value); + } + /// /// Gets a cached instance matching the input content, or creates a new one. /// @@ -177,6 +195,29 @@ public Bucket(int entriesPerBucket) this.entries = null; } + /// + /// Implements for the current instance. + /// + /// The input instance to cache. + public void Add(string value) + { + lock (this.dummy) + { + ref string?[]? entries = ref this.entries; + + entries ??= new string[entriesPerBucket]; + + int entryIndex = +#if NETSTANDARD1_4 + (value.GetDjb2HashCode() & SignMask) % entriesPerBucket; +#else + (HashCode.Combine(value.AsSpan()) & SignMask) % entriesPerBucket; +#endif + + entries.DangerousGetReferenceAt(entryIndex) = value; + } + } + /// /// Implements for the current instance. /// From 769126f45fa117ccfbfbb1c6112d7e664c953129 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Wed, 8 Jul 2020 01:08:32 +0200 Subject: [PATCH 10/40] Minor code refactoring --- .../Buffers/StringPool.cs | 39 ++++++++++--------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index 4c11994c47c..9d03eb12d2d 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -4,6 +4,8 @@ using System; using System.Diagnostics.CodeAnalysis; +using System.Diagnostics.Contracts; +using System.Runtime.CompilerServices; using Microsoft.Toolkit.HighPerformance.Extensions; #if !NETSTANDARD1_4 using Microsoft.Toolkit.HighPerformance.Helpers; @@ -207,12 +209,7 @@ public void Add(string value) entries ??= new string[entriesPerBucket]; - int entryIndex = -#if NETSTANDARD1_4 - (value.GetDjb2HashCode() & SignMask) % entriesPerBucket; -#else - (HashCode.Combine(value.AsSpan()) & SignMask) % entriesPerBucket; -#endif + int entryIndex = GetIndex(value.AsSpan()); entries.DangerousGetReferenceAt(entryIndex) = value; } @@ -231,12 +228,7 @@ public string GetOrAdd(ReadOnlySpan span) entries ??= new string[entriesPerBucket]; - int entryIndex = -#if NETSTANDARD1_4 - (span.GetDjb2HashCode() & SignMask) % entriesPerBucket; -#else - (HashCode.Combine(span) & SignMask) % entriesPerBucket; -#endif + int entryIndex = GetIndex(span); ref string? entry = ref entries.DangerousGetReferenceAt(entryIndex); @@ -274,12 +266,7 @@ public bool TryGet(ReadOnlySpan span, [NotNullWhen(true)] out string? valu if (!(entries is null)) { - int entryIndex = -#if NETSTANDARD1_4 - (span.GetDjb2HashCode() & SignMask) % entriesPerBucket; -#else - (HashCode.Combine(span) & SignMask) % entriesPerBucket; -#endif + int entryIndex = GetIndex(span); ref string? entry = ref entries.DangerousGetReferenceAt(entryIndex); @@ -308,6 +295,22 @@ public void Reset() this.entries = null; } } + + /// + /// Gets the target index for a given instance. + /// + /// The input instance. + /// The target bucket index for . + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private int GetIndex(ReadOnlySpan span) + { +#if NETSTANDARD1_4 + return (span.GetDjb2HashCode() & SignMask) % entriesPerBucket; +#else + return (HashCode.Combine(span) & SignMask) % entriesPerBucket; +#endif + } } /// From a217cfe8136462f702c4d7f6c9870780b3f17acc Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Wed, 8 Jul 2020 01:15:56 +0200 Subject: [PATCH 11/40] Added uni tests for StringPool.Add and TryGet --- .../Buffers/Test_StringPool.cs | 64 +++++++++++++++---- 1 file changed, 53 insertions(+), 11 deletions(-) diff --git a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs index 3dd5a7164ab..34cd301139c 100644 --- a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs +++ b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs @@ -6,20 +6,13 @@ using Microsoft.Toolkit.HighPerformance.Buffers; using Microsoft.VisualStudio.TestTools.UnitTesting; +#nullable enable + namespace UnitTests.HighPerformance.Buffers { [TestClass] public class Test_StringPool { - [TestCategory("StringPool")] - [TestMethod] - public void Test_StringPool_Empty() - { - string empty = StringPool.Default.GetOrAdd(default); - - Assert.AreSame(string.Empty, empty); - } - [TestCategory("StringPool")] [TestMethod] [DataRow(0, 0)] @@ -28,7 +21,7 @@ public void Test_StringPool_Empty() [DataRow(-3248234, 22)] [DataRow(int.MinValue, int.MinValue)] [ExpectedException(typeof(ArgumentOutOfRangeException))] - public void Test_StringPool_Fail(int buckets, int entries) + public void Test_StringPool_Cctor_Fail(int buckets, int entries) { var pool = new StringPool(buckets, entries); @@ -37,7 +30,56 @@ public void Test_StringPool_Fail(int buckets, int entries) [TestCategory("StringPool")] [TestMethod] - public void Test_StringPool_Misc() + public void Test_StringPool_Add_Empty() + { + StringPool.Default.Add(string.Empty); + + bool found = StringPool.Default.TryGet(default, out string? text); + + Assert.IsTrue(found); + Assert.AreSame(string.Empty, text); + } + + [TestCategory("StringPool")] + [TestMethod] + public void Test_StringPool_Add_Misc() + { + var pool = new StringPool(); + + string + hello = nameof(hello), + world = nameof(world), + windowsCommunityToolkit = nameof(windowsCommunityToolkit); + + Assert.IsFalse(pool.TryGet(hello.AsSpan(), out _)); + Assert.IsFalse(pool.TryGet(world.AsSpan(), out _)); + Assert.IsFalse(pool.TryGet(windowsCommunityToolkit.AsSpan(), out _)); + + pool.Add(hello); + pool.Add(world); + pool.Add(windowsCommunityToolkit); + + Assert.IsTrue(pool.TryGet(hello.AsSpan(), out string? hello2)); + Assert.IsTrue(pool.TryGet(world.AsSpan(), out string? world2)); + Assert.IsTrue(pool.TryGet(windowsCommunityToolkit.AsSpan(), out string? windowsCommunityToolkit2)); + + Assert.AreSame(hello, hello2); + Assert.AreSame(world, world2); + Assert.AreSame(windowsCommunityToolkit, windowsCommunityToolkit2); + } + + [TestCategory("StringPool")] + [TestMethod] + public void Test_StringPool_GetOrAdd_Empty() + { + string empty = StringPool.Default.GetOrAdd(default); + + Assert.AreSame(string.Empty, empty); + } + + [TestCategory("StringPool")] + [TestMethod] + public void Test_StringPool_GetOrAdd_Misc() { var pool = new StringPool(); From 39949c81546a440113b1ca21204d2fe58897ee5e Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Wed, 8 Jul 2020 01:23:50 +0200 Subject: [PATCH 12/40] Fixed unit tests on UWP --- .../Buffers/Test_StringPool.cs | 37 ++++++++++++++----- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs index 34cd301139c..2e065009d70 100644 --- a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs +++ b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs @@ -40,6 +40,23 @@ public void Test_StringPool_Add_Empty() Assert.AreSame(string.Empty, text); } + [TestCategory("StringPool")] + [TestMethod] + public void Test_StringPool_Add_Single() + { + var pool = new StringPool(); + + string hello = nameof(hello); + + Assert.IsFalse(pool.TryGet(hello.AsSpan(), out _)); + + pool.Add(hello); + + Assert.IsTrue(pool.TryGet(hello.AsSpan(), out string? hello2)); + + Assert.AreSame(hello, hello2); + } + [TestCategory("StringPool")] [TestMethod] public void Test_StringPool_Add_Misc() @@ -48,23 +65,23 @@ public void Test_StringPool_Add_Misc() string hello = nameof(hello), - world = nameof(world), + helloworld = nameof(helloworld), windowsCommunityToolkit = nameof(windowsCommunityToolkit); Assert.IsFalse(pool.TryGet(hello.AsSpan(), out _)); - Assert.IsFalse(pool.TryGet(world.AsSpan(), out _)); + Assert.IsFalse(pool.TryGet(helloworld.AsSpan(), out _)); Assert.IsFalse(pool.TryGet(windowsCommunityToolkit.AsSpan(), out _)); pool.Add(hello); - pool.Add(world); + pool.Add(helloworld); pool.Add(windowsCommunityToolkit); Assert.IsTrue(pool.TryGet(hello.AsSpan(), out string? hello2)); - Assert.IsTrue(pool.TryGet(world.AsSpan(), out string? world2)); + Assert.IsTrue(pool.TryGet(helloworld.AsSpan(), out string? world2)); Assert.IsTrue(pool.TryGet(windowsCommunityToolkit.AsSpan(), out string? windowsCommunityToolkit2)); Assert.AreSame(hello, hello2); - Assert.AreSame(world, world2); + Assert.AreSame(helloworld, world2); Assert.AreSame(windowsCommunityToolkit, windowsCommunityToolkit2); } @@ -85,25 +102,25 @@ public void Test_StringPool_GetOrAdd_Misc() string hello = pool.GetOrAdd(nameof(hello).AsSpan()), - world = pool.GetOrAdd(nameof(world).AsSpan()), + helloworld = pool.GetOrAdd(nameof(helloworld).AsSpan()), windowsCommunityToolkit = pool.GetOrAdd(nameof(windowsCommunityToolkit).AsSpan()); Assert.AreEqual(nameof(hello), hello); - Assert.AreEqual(nameof(world), world); + Assert.AreEqual(nameof(helloworld), helloworld); Assert.AreEqual(nameof(windowsCommunityToolkit), windowsCommunityToolkit); Assert.AreSame(hello, pool.GetOrAdd(hello.AsSpan())); - Assert.AreSame(world, pool.GetOrAdd(world.AsSpan())); + Assert.AreSame(helloworld, pool.GetOrAdd(helloworld.AsSpan())); Assert.AreSame(windowsCommunityToolkit, pool.GetOrAdd(windowsCommunityToolkit.AsSpan())); pool.Reset(); Assert.AreEqual(nameof(hello), hello); - Assert.AreEqual(nameof(world), world); + Assert.AreEqual(nameof(helloworld), helloworld); Assert.AreEqual(nameof(windowsCommunityToolkit), windowsCommunityToolkit); Assert.AreNotSame(hello, pool.GetOrAdd(hello.AsSpan())); - Assert.AreNotSame(world, pool.GetOrAdd(world.AsSpan())); + Assert.AreNotSame(helloworld, pool.GetOrAdd(helloworld.AsSpan())); Assert.AreNotSame(windowsCommunityToolkit, pool.GetOrAdd(windowsCommunityToolkit.AsSpan())); } } From 76b3115ecfa43b5ffc8116994031f63f459830c6 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Wed, 8 Jul 2020 01:51:04 +0200 Subject: [PATCH 13/40] Added GetOrAdd(string) overload --- .../Buffers/StringPool.cs | 52 +++++++++++++++++-- 1 file changed, 49 insertions(+), 3 deletions(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index 9d03eb12d2d..b9cdab7dcd1 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -16,8 +16,8 @@ namespace Microsoft.Toolkit.HighPerformance.Buffers /// /// A configurable pool for instances. This can be used to minimize allocations /// when creating multiple instances from buffers of values. - /// The method provides a best-effort alternative to just creating a new - /// instance every time, in order to minimize the number of duplicated instances. + /// The method provides a best-effort alternative to just creating + /// a new instance every time, in order to minimize the number of duplicated instances. /// public sealed class StringPool { @@ -108,6 +108,25 @@ public void Add(string value) bucket.Add(value); } + /// + /// Gets a cached instance matching the input content, or stores the input one. + /// + /// The input instance with the contents to use. + /// A instance with the contents of , cached if possible. + public string GetOrAdd(string value) + { + if (value.Length == 0) + { + return string.Empty; + } + + int bucketIndex = value.Length % NumberOfBuckets; + + ref Bucket bucket = ref this.buckets.DangerousGetReferenceAt(bucketIndex); + + return bucket.GetOrAdd(value); + } + /// /// Gets a cached instance matching the input content, or creates a new one. /// @@ -216,7 +235,34 @@ public void Add(string value) } /// - /// Implements for the current instance. + /// Implements for the current instance. + /// + /// The input instance with the contents to use. + /// A instance with the contents of . + public string GetOrAdd(string value) + { + lock (this.dummy) + { + ref string?[]? entries = ref this.entries; + + entries ??= new string[entriesPerBucket]; + + int entryIndex = GetIndex(value.AsSpan()); + + ref string? entry = ref entries.DangerousGetReferenceAt(entryIndex); + + if (!(entry is null) && + entry.AsSpan().SequenceEqual(value.AsSpan())) + { + return entry; + } + + return entry = value; + } + } + + /// + /// Implements for the current instance. /// /// The input with the contents to use. /// A instance with the contents of , cached if possible. From bab3d82a27623d9314a997b8582b63f30b5a02b3 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Wed, 8 Jul 2020 01:52:54 +0200 Subject: [PATCH 14/40] Minor tweaks to unit tests --- .../Buffers/Test_StringPool.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs index 2e065009d70..bfc7521775f 100644 --- a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs +++ b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs @@ -34,7 +34,7 @@ public void Test_StringPool_Add_Empty() { StringPool.Default.Add(string.Empty); - bool found = StringPool.Default.TryGet(default, out string? text); + bool found = StringPool.Default.TryGet(ReadOnlySpan.Empty, out string? text); Assert.IsTrue(found); Assert.AreSame(string.Empty, text); @@ -89,7 +89,7 @@ public void Test_StringPool_Add_Misc() [TestMethod] public void Test_StringPool_GetOrAdd_Empty() { - string empty = StringPool.Default.GetOrAdd(default); + string empty = StringPool.Default.GetOrAdd(ReadOnlySpan.Empty); Assert.AreSame(string.Empty, empty); } From 1958ac6c3c3e2929fb0242b60e1f4fded182b066 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Wed, 8 Jul 2020 01:58:13 +0200 Subject: [PATCH 15/40] Minor code refactoring --- .../Buffers/StringPool.cs | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index b9cdab7dcd1..9c9e52667a2 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -284,17 +284,10 @@ public string GetOrAdd(ReadOnlySpan span) return entry; } -#if SPAN_RUNTIME_SUPPORT - return entry = new string(span); -#else - unsafe - { - fixed (char* c = span) - { - return entry = new string(c, 0, span.Length); - } - } -#endif + // ReadOnlySpan.ToString() creates a string with the span contents. + // This is equivalent to doing new string(span) on Span-enabled runtimes, + // or to using an unsafe block, a fixed statement and new string(char*, int, int). + return entry = span.ToString(); } } From 96196fee1a528b65636700ca17427757ac5e971f Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Wed, 8 Jul 2020 02:01:55 +0200 Subject: [PATCH 16/40] Added unit tests for GetOrAdd(string) overload --- .../Buffers/Test_StringPool.cs | 40 ++++++++++++++++++- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs index bfc7521775f..03fae39384e 100644 --- a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs +++ b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs @@ -87,7 +87,43 @@ public void Test_StringPool_Add_Misc() [TestCategory("StringPool")] [TestMethod] - public void Test_StringPool_GetOrAdd_Empty() + public void Test_StringPool_GetOrAdd_String_Empty() + { + string empty = StringPool.Default.GetOrAdd(string.Empty); + + Assert.AreSame(string.Empty, empty); + } + + [TestCategory("StringPool")] + [TestMethod] + public void Test_StringPool_GetOrAdd_String_Misc() + { + var pool = new StringPool(); + + string helloworld = nameof(helloworld); + + string cached = pool.GetOrAdd(helloworld); + + Assert.AreSame(helloworld, cached); + + Span span = stackalloc char[helloworld.Length]; + + helloworld.AsSpan().CopyTo(span); + + string helloworld2 = span.ToString(); + + cached = pool.GetOrAdd(helloworld2); + + Assert.AreSame(helloworld, cached); + + cached = pool.GetOrAdd(new string(helloworld.ToCharArray())); + + Assert.AreSame(helloworld, cached); + } + + [TestCategory("StringPool")] + [TestMethod] + public void Test_StringPool_GetOrAdd_ReadOnlySpan_Empty() { string empty = StringPool.Default.GetOrAdd(ReadOnlySpan.Empty); @@ -96,7 +132,7 @@ public void Test_StringPool_GetOrAdd_Empty() [TestCategory("StringPool")] [TestMethod] - public void Test_StringPool_GetOrAdd_Misc() + public void Test_StringPool_GetOrAdd_ReadOnlySpan_Misc() { var pool = new StringPool(); From cc143714022de85ab1c47d61263645b65552c1ef Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Wed, 8 Jul 2020 12:03:51 +0200 Subject: [PATCH 17/40] Added GetOrAdd(ReadOnlySpan, Encoding) overload --- .../Buffers/StringPool.cs | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index 9c9e52667a2..c8c9f2c37e4 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -6,6 +6,7 @@ using System.Diagnostics.CodeAnalysis; using System.Diagnostics.Contracts; using System.Runtime.CompilerServices; +using System.Text; using Microsoft.Toolkit.HighPerformance.Extensions; #if !NETSTANDARD1_4 using Microsoft.Toolkit.HighPerformance.Helpers; @@ -146,6 +147,32 @@ public string GetOrAdd(ReadOnlySpan span) return bucket.GetOrAdd(span); } + /// + /// Gets a cached instance matching the input content (converted to Unicode), or creates a new one. + /// + /// The input with the contents to use, in a specified encoding. + /// The instance to use to decode the contents of . + /// A instance with the contents of , cached if possible. + public unsafe string GetOrAdd(ReadOnlySpan span, Encoding encoding) + { + if (span.IsEmpty) + { + return string.Empty; + } + + int maxLength = encoding.GetMaxCharCount(span.Length); + + using SpanOwner buffer = SpanOwner.Allocate(maxLength); + + fixed (byte* source = span) + fixed (char* destination = &buffer.DangerousGetReference()) + { + int effectiveLength = encoding.GetChars(source, span.Length, destination, maxLength); + + return GetOrAdd(new ReadOnlySpan(destination, effectiveLength)); + } + } + /// /// Tries to get a cached instance matching the input content, if present. /// From cfb6d230ef623e325e04c6bbd06ae8dfc1ab1a54 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Wed, 8 Jul 2020 12:08:11 +0200 Subject: [PATCH 18/40] Added tests for GetOrAdd with encoding --- .../Buffers/Test_StringPool.cs | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs index 03fae39384e..249620ba440 100644 --- a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs +++ b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System; +using System.Text; using Microsoft.Toolkit.HighPerformance.Buffers; using Microsoft.VisualStudio.TestTools.UnitTesting; @@ -159,5 +160,41 @@ public void Test_StringPool_GetOrAdd_ReadOnlySpan_Misc() Assert.AreNotSame(helloworld, pool.GetOrAdd(helloworld.AsSpan())); Assert.AreNotSame(windowsCommunityToolkit, pool.GetOrAdd(windowsCommunityToolkit.AsSpan())); } + + [TestCategory("StringPool")] + [TestMethod] + public void Test_StringPool_GetOrAdd_Encoding_Empty() + { + string empty = StringPool.Default.GetOrAdd(ReadOnlySpan.Empty, Encoding.ASCII); + + Assert.AreSame(string.Empty, empty); + } + + [TestCategory("StringPool")] + [TestMethod] + public void Test_StringPool_GetOrAdd_Encoding_Misc() + { + var pool = new StringPool(); + + string helloworld = nameof(helloworld); + + pool.Add(helloworld); + + Span span = Encoding.UTF8.GetBytes(nameof(helloworld)); + + string helloworld2 = pool.GetOrAdd(span, Encoding.UTF8); + + Assert.AreSame(helloworld, helloworld2); + + string windowsCommunityToolkit = nameof(windowsCommunityToolkit); + + Span span2 = Encoding.ASCII.GetBytes(windowsCommunityToolkit); + + string + windowsCommunityToolkit2 = pool.GetOrAdd(span2, Encoding.ASCII), + windowsCommunityToolkit3 = pool.GetOrAdd(windowsCommunityToolkit); + + Assert.AreSame(windowsCommunityToolkit2, windowsCommunityToolkit3); + } } } From 3134668217bc3781ed2fccdf5993d6566e63bcb8 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Wed, 8 Jul 2020 16:12:18 +0200 Subject: [PATCH 19/40] Improved StringPool distribution We're now using the content hashcode to calculate both the bucket index and the entry index, so that even if a given StringPool instance is used just with a series of strings with a very small range of different lengths, all the available buckets will still be used --- .../Buffers/StringPool.cs | 85 +++++++++++-------- 1 file changed, 51 insertions(+), 34 deletions(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index c8c9f2c37e4..bfa98446209 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -32,6 +32,12 @@ public sealed class StringPool /// private const int DefaultEntriesPerBucket = 64; + /// + /// A bitmask used to avoid branches when computing the absolute value of an . + /// This will ignore overflows, as we just need this to map hashcodes into the valid bucket range. + /// + private const int SignMask = ~(1 << 31); + /// /// The current array of instances in use. /// @@ -102,11 +108,13 @@ public void Add(string value) return; } - int bucketIndex = value.Length % NumberOfBuckets; + int + hashcode = GetHashCode(value.AsSpan()), + bucketIndex = hashcode % NumberOfBuckets; ref Bucket bucket = ref this.buckets.DangerousGetReferenceAt(bucketIndex); - bucket.Add(value); + bucket.Add(value, hashcode); } /// @@ -121,11 +129,13 @@ public string GetOrAdd(string value) return string.Empty; } - int bucketIndex = value.Length % NumberOfBuckets; + int + hashcode = GetHashCode(value.AsSpan()), + bucketIndex = hashcode % NumberOfBuckets; ref Bucket bucket = ref this.buckets.DangerousGetReferenceAt(bucketIndex); - return bucket.GetOrAdd(value); + return bucket.GetOrAdd(value, hashcode); } /// @@ -140,11 +150,13 @@ public string GetOrAdd(ReadOnlySpan span) return string.Empty; } - int bucketIndex = span.Length % NumberOfBuckets; + int + hashcode = GetHashCode(span), + bucketIndex = hashcode % NumberOfBuckets; ref Bucket bucket = ref this.buckets.DangerousGetReferenceAt(bucketIndex); - return bucket.GetOrAdd(span); + return bucket.GetOrAdd(span, hashcode); } /// @@ -188,11 +200,13 @@ public bool TryGet(ReadOnlySpan span, [NotNullWhen(true)] out string? valu return true; } - int bucketIndex = span.Length % NumberOfBuckets; + int + hashcode = GetHashCode(span), + bucketIndex = hashcode % NumberOfBuckets; ref Bucket bucket = ref this.buckets.DangerousGetReferenceAt(bucketIndex); - return bucket.TryGet(span, out value); + return bucket.TryGet(span, hashcode, out value); } /// @@ -211,12 +225,6 @@ public void Reset() /// private struct Bucket { - /// - /// A bitmask used to avoid branches when computing the absolute value of an . - /// This will ignore overflows, as we just need this to map hashcodes into the valid bucket range. - /// - private const int SignMask = ~(1 << 31); - /// /// The number of entries being used in the current instance. /// @@ -247,7 +255,8 @@ public Bucket(int entriesPerBucket) /// Implements for the current instance. /// /// The input instance to cache. - public void Add(string value) + /// The precomputed hashcode for . + public void Add(string value, int hashcode) { lock (this.dummy) { @@ -255,7 +264,7 @@ public void Add(string value) entries ??= new string[entriesPerBucket]; - int entryIndex = GetIndex(value.AsSpan()); + int entryIndex = hashcode % this.entriesPerBucket; entries.DangerousGetReferenceAt(entryIndex) = value; } @@ -265,8 +274,9 @@ public void Add(string value) /// Implements for the current instance. /// /// The input instance with the contents to use. + /// The precomputed hashcode for . /// A instance with the contents of . - public string GetOrAdd(string value) + public string GetOrAdd(string value, int hashcode) { lock (this.dummy) { @@ -274,7 +284,7 @@ public string GetOrAdd(string value) entries ??= new string[entriesPerBucket]; - int entryIndex = GetIndex(value.AsSpan()); + int entryIndex = hashcode % this.entriesPerBucket; ref string? entry = ref entries.DangerousGetReferenceAt(entryIndex); @@ -292,8 +302,9 @@ public string GetOrAdd(string value) /// Implements for the current instance. /// /// The input with the contents to use. + /// The precomputed hashcode for . /// A instance with the contents of , cached if possible. - public string GetOrAdd(ReadOnlySpan span) + public string GetOrAdd(ReadOnlySpan span, int hashcode) { lock (this.dummy) { @@ -301,7 +312,7 @@ public string GetOrAdd(ReadOnlySpan span) entries ??= new string[entriesPerBucket]; - int entryIndex = GetIndex(span); + int entryIndex = hashcode % this.entriesPerBucket; ref string? entry = ref entries.DangerousGetReferenceAt(entryIndex); @@ -322,9 +333,10 @@ public string GetOrAdd(ReadOnlySpan span) /// Implements for the current instance. /// /// The input with the contents to use. + /// The precomputed hashcode for . /// The resulting cached instance, if present /// Whether or not the target instance was found. - public bool TryGet(ReadOnlySpan span, [NotNullWhen(true)] out string? value) + public bool TryGet(ReadOnlySpan span, int hashcode, [NotNullWhen(true)] out string? value) { lock (this.dummy) { @@ -332,7 +344,7 @@ public bool TryGet(ReadOnlySpan span, [NotNullWhen(true)] out string? valu if (!(entries is null)) { - int entryIndex = GetIndex(span); + int entryIndex = hashcode % this.entriesPerBucket; ref string? entry = ref entries.DangerousGetReferenceAt(entryIndex); @@ -361,22 +373,27 @@ public void Reset() this.entries = null; } } + } - /// - /// Gets the target index for a given instance. - /// - /// The input instance. - /// The target bucket index for . - [Pure] - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private int GetIndex(ReadOnlySpan span) - { + /// + /// Gets the (positive) hashcode for a given instance. + /// + /// The input instance. + /// The hashcode for . + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int GetHashCode(ReadOnlySpan span) + { + // We calculate the content hashcode for the input span and + // perform a XOR with the input length, to try to reduce collisions + // in case two sequences of different length result in the same one. + return + span.Length ^ #if NETSTANDARD1_4 - return (span.GetDjb2HashCode() & SignMask) % entriesPerBucket; + (span.GetDjb2HashCode() & SignMask); #else - return (HashCode.Combine(span) & SignMask) % entriesPerBucket; + (HashCode.Combine(span) & SignMask); #endif - } } /// From 5ebb510ca11361c2b176bc2fd8fab18a3729430d Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Thu, 9 Jul 2020 18:45:12 +0200 Subject: [PATCH 20/40] Restructured buckets/entries setup with target size --- .../Buffers/StringPool.cs | 137 ++++++++++++++---- .../Buffers/Test_StringPool.cs | 45 +++++- 2 files changed, 143 insertions(+), 39 deletions(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index bfa98446209..1a218ccbf80 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -5,6 +5,9 @@ using System; using System.Diagnostics.CodeAnalysis; using System.Diagnostics.Contracts; +#if NETCOREAPP3_1 +using System.Numerics; +#endif using System.Runtime.CompilerServices; using System.Text; using Microsoft.Toolkit.HighPerformance.Extensions; @@ -23,14 +26,14 @@ namespace Microsoft.Toolkit.HighPerformance.Buffers public sealed class StringPool { /// - /// The default number of instances in . + /// The size for the instance. /// - private const int DefaultNumberOfBuckets = 20; + private const int DefaultSize = 2048; /// - /// The default number of entries stored in each instance. + /// The minimum size for instances. /// - private const int DefaultEntriesPerBucket = 64; + private const int MinimumSize = 32; /// /// A bitmask used to avoid branches when computing the absolute value of an . @@ -43,59 +46,129 @@ public sealed class StringPool /// private readonly Bucket[] buckets; + /// + /// The total number of buckets in use. + /// + private readonly int numberOfBuckets; + /// /// Initializes a new instance of the class. /// public StringPool() - : this(DefaultNumberOfBuckets, DefaultEntriesPerBucket) + : this(DefaultSize) { } /// /// Initializes a new instance of the class. /// - /// The total number of buckets to use. - /// The maximum number of entries per bucket. - public StringPool(int numberOfBuckets, int entriesPerBucket) + /// The minimum size for the pool to create. + public StringPool(int minimumSize) { - if (numberOfBuckets <= 0) + if (minimumSize <= 0) + { + ThrowArgumentOutOfRangeException(); + } + + // Set the minimum size + minimumSize = Math.Max(minimumSize, MinimumSize); + + // Calculates the rounded up factors for a specific size/factor pair + static void FindFactors(int size, int factor, out int x, out int y) + { + double + a = Math.Sqrt((double)size / factor), + b = factor * a; + + x = RoundUpPowerOfTwo((int)a); + y = RoundUpPowerOfTwo((int)b); + } + + // We want to find two powers of 2 factors that produce a number + // that is at least equal to the requested size. In order to find the + // combination producing the optimal factors (with the product being as + // close as possible to the requested size), we test a number of ratios + // that we consider acceptable, and pick the best results produced. + // The ratio between buckets influences the number of objects being allocated, + // as well as the multithreading performance when locking on buckets. + // We still want to contraint this number to avoid situations where we + // have a way too high number of buckets compared to total size. + FindFactors(minimumSize, 2, out int x2, out int y2); + FindFactors(minimumSize, 3, out int x3, out int y3); + FindFactors(minimumSize, 4, out int x4, out int y4); + + int + p2 = x2 * y2, + p3 = x3 * y3, + p4 = x4 * y4; + + if (p3 < p2) { - ThrowArgumentOutOfRangeException(nameof(numberOfBuckets)); + p2 = p3; + x2 = x3; + y2 = y3; } - if (entriesPerBucket <= 0) + if (p4 < p2) { - ThrowArgumentOutOfRangeException(nameof(entriesPerBucket)); + p2 = p4; + x2 = x4; + y2 = y4; } - Span span = this.buckets = new Bucket[numberOfBuckets]; + Span span = this.buckets = new Bucket[x2]; // We preallocate the buckets in advance, since each bucket only contains the // array field, which is not preinitialized, so the allocations are minimal. // This lets us lock on each individual buckets when retrieving a string instance. foreach (ref Bucket bucket in span) { - bucket = new Bucket(entriesPerBucket); + bucket = new Bucket(y2); } - NumberOfBuckets = numberOfBuckets; - EntriesPerBucket = entriesPerBucket; + this.numberOfBuckets = x2; + + Size = p2; } /// - /// Gets the default instance. + /// Rounds up an value to a power of 2. /// - public static StringPool Default { get; } = new StringPool(); + /// The input value to round up. + /// The smallest power of two greater than or equal to . + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int RoundUpPowerOfTwo(int x) + { +#if NETCOREAPP3_1 + return 1 << (32 - BitOperations.LeadingZeroCount((uint)(x - 1))); +#else + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x++; + + return x; +#endif + } /// - /// Gets the total number of buckets in use. + /// Gets the default instance. /// - public int NumberOfBuckets { get; } + public static StringPool Default { get; } = new StringPool(); /// - /// Gets the maximum number of entries stored in each bucket. + /// Gets the total number of that can be stored in the current instance. + /// + /// This property only refers to the total number of available slots, ignoring collisions. + /// The requested size should always be set to a higher value than the target number of items + /// that will be stored in the cache, to reduce the number of collisions when caching values. + /// /// - public int EntriesPerBucket { get; } + public int Size { get; } /// /// Stores a instance in the internal cache. @@ -110,7 +183,7 @@ public void Add(string value) int hashcode = GetHashCode(value.AsSpan()), - bucketIndex = hashcode % NumberOfBuckets; + bucketIndex = hashcode & (this.numberOfBuckets - 1); ref Bucket bucket = ref this.buckets.DangerousGetReferenceAt(bucketIndex); @@ -131,7 +204,7 @@ public string GetOrAdd(string value) int hashcode = GetHashCode(value.AsSpan()), - bucketIndex = hashcode % NumberOfBuckets; + bucketIndex = hashcode & (this.numberOfBuckets - 1); ref Bucket bucket = ref this.buckets.DangerousGetReferenceAt(bucketIndex); @@ -152,7 +225,7 @@ public string GetOrAdd(ReadOnlySpan span) int hashcode = GetHashCode(span), - bucketIndex = hashcode % NumberOfBuckets; + bucketIndex = hashcode & (this.numberOfBuckets - 1); ref Bucket bucket = ref this.buckets.DangerousGetReferenceAt(bucketIndex); @@ -202,7 +275,7 @@ public bool TryGet(ReadOnlySpan span, [NotNullWhen(true)] out string? valu int hashcode = GetHashCode(span), - bucketIndex = hashcode % NumberOfBuckets; + bucketIndex = hashcode & (this.numberOfBuckets - 1); ref Bucket bucket = ref this.buckets.DangerousGetReferenceAt(bucketIndex); @@ -264,7 +337,7 @@ public void Add(string value, int hashcode) entries ??= new string[entriesPerBucket]; - int entryIndex = hashcode % this.entriesPerBucket; + int entryIndex = hashcode & (this.entriesPerBucket - 1); entries.DangerousGetReferenceAt(entryIndex) = value; } @@ -284,7 +357,7 @@ public string GetOrAdd(string value, int hashcode) entries ??= new string[entriesPerBucket]; - int entryIndex = hashcode % this.entriesPerBucket; + int entryIndex = hashcode & (this.entriesPerBucket - 1); ref string? entry = ref entries.DangerousGetReferenceAt(entryIndex); @@ -312,7 +385,7 @@ public string GetOrAdd(ReadOnlySpan span, int hashcode) entries ??= new string[entriesPerBucket]; - int entryIndex = hashcode % this.entriesPerBucket; + int entryIndex = hashcode & (this.entriesPerBucket - 1); ref string? entry = ref entries.DangerousGetReferenceAt(entryIndex); @@ -344,7 +417,7 @@ public bool TryGet(ReadOnlySpan span, int hashcode, [NotNullWhen(true)] ou if (!(entries is null)) { - int entryIndex = hashcode % this.entriesPerBucket; + int entryIndex = hashcode & (this.entriesPerBucket - 1); ref string? entry = ref entries.DangerousGetReferenceAt(entryIndex); @@ -399,9 +472,9 @@ private static int GetHashCode(ReadOnlySpan span) /// /// Throws an when the requested size exceeds the capacity. /// - private static void ThrowArgumentOutOfRangeException(string name) + private static void ThrowArgumentOutOfRangeException() { - throw new ArgumentOutOfRangeException(name, "The input parameter must be greater than 0"); + throw new ArgumentOutOfRangeException("minimumSize", "The requested size must be greater than 0"); } } } diff --git a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs index 249620ba440..e9da8268b70 100644 --- a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs +++ b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System; +using System.Reflection; using System.Text; using Microsoft.Toolkit.HighPerformance.Buffers; using Microsoft.VisualStudio.TestTools.UnitTesting; @@ -16,15 +17,45 @@ public class Test_StringPool { [TestCategory("StringPool")] [TestMethod] - [DataRow(0, 0)] - [DataRow(1, 0)] - [DataRow(0, 1)] - [DataRow(-3248234, 22)] - [DataRow(int.MinValue, int.MinValue)] + [DataRow(44, 4, 16, 64)] + [DataRow(76, 8, 16, 128)] + [DataRow(128, 8, 16, 128)] + [DataRow(179, 8, 32, 256)] + [DataRow(366, 16, 32, 512)] + [DataRow(512, 16, 32, 512)] + [DataRow(890, 16, 64, 1024)] + [DataRow(1280, 32, 64, 2048)] + [DataRow(2445, 32, 128, 4096)] + [DataRow(5000, 64, 128, 8192)] + [DataRow(8000, 64, 128, 8192)] + [DataRow(12442, 64, 256, 16384)] + [DataRow(234000, 256, 1024, 262144)] + public void Test_StringPool_Cctor_Ok(int minimumSize, int x, int y, int size) + { + var pool = new StringPool(minimumSize); + + Assert.AreEqual(size, pool.Size); + + Array buckets = (Array)typeof(StringPool).GetField("buckets", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(pool); + + Assert.AreEqual(x, buckets.Length); + + Type bucketType = Type.GetType("Microsoft.Toolkit.HighPerformance.Buffers.StringPool+Bucket, Microsoft.Toolkit.HighPerformance"); + + int bucketSize = (int)bucketType.GetField("entriesPerBucket", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(buckets.GetValue(0)); + + Assert.AreEqual(y, bucketSize); + } + + [TestCategory("StringPool")] + [TestMethod] + [DataRow(0)] + [DataRow(-3248234)] + [DataRow(int.MinValue)] [ExpectedException(typeof(ArgumentOutOfRangeException))] - public void Test_StringPool_Cctor_Fail(int buckets, int entries) + public void Test_StringPool_Cctor_Fail(int size) { - var pool = new StringPool(buckets, entries); + var pool = new StringPool(size); Assert.Fail(); } From 14896611b4c2e6133b02d201e7a15df083938b75 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Thu, 9 Jul 2020 18:46:16 +0200 Subject: [PATCH 21/40] Reverted hashcode computation to just SIMD djb2 --- Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index 1a218ccbf80..04012c29303 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -457,15 +457,10 @@ public void Reset() [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int GetHashCode(ReadOnlySpan span) { - // We calculate the content hashcode for the input span and - // perform a XOR with the input length, to try to reduce collisions - // in case two sequences of different length result in the same one. - return - span.Length ^ #if NETSTANDARD1_4 - (span.GetDjb2HashCode() & SignMask); + return span.GetDjb2HashCode() & SignMask; #else - (HashCode.Combine(span) & SignMask); + return HashCode.Combine(span) & SignMask; #endif } From 2fd35c8aa204e61608d3f0a0a33b2b8767cc87a1 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Mon, 13 Jul 2020 16:26:35 +0200 Subject: [PATCH 22/40] Updated StringPool with new priority-map backend --- .../Buffers/StringPool.cs | 515 ++++++++++++++---- .../Buffers/Test_StringPool.cs | 10 +- 2 files changed, 409 insertions(+), 116 deletions(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index 04012c29303..a9956477774 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System; +using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Diagnostics.Contracts; #if NETCOREAPP3_1 @@ -15,6 +16,8 @@ using Microsoft.Toolkit.HighPerformance.Helpers; #endif +#nullable enable + namespace Microsoft.Toolkit.HighPerformance.Buffers { /// @@ -22,6 +25,9 @@ namespace Microsoft.Toolkit.HighPerformance.Buffers /// when creating multiple instances from buffers of values. /// The method provides a best-effort alternative to just creating /// a new instance every time, in order to minimize the number of duplicated instances. + /// The type will internally manage a highly efficient priority queue for the + /// cached instances, so that when the full capacity is reached, the last recently + /// used values will be automatically discarded to leave room for new values to cache. /// public sealed class StringPool { @@ -36,20 +42,14 @@ public sealed class StringPool private const int MinimumSize = 32; /// - /// A bitmask used to avoid branches when computing the absolute value of an . - /// This will ignore overflows, as we just need this to map hashcodes into the valid bucket range. + /// The current array of instances in use. /// - private const int SignMask = ~(1 << 31); + private readonly FixedSizePriorityMap[] maps; /// - /// The current array of instances in use. + /// The total number of maps in use. /// - private readonly Bucket[] buckets; - - /// - /// The total number of buckets in use. - /// - private readonly int numberOfBuckets; + private readonly int numberOfMaps; /// /// Initializes a new instance of the class. @@ -89,10 +89,10 @@ static void FindFactors(int size, int factor, out int x, out int y) // combination producing the optimal factors (with the product being as // close as possible to the requested size), we test a number of ratios // that we consider acceptable, and pick the best results produced. - // The ratio between buckets influences the number of objects being allocated, - // as well as the multithreading performance when locking on buckets. + // The ratio between maps influences the number of objects being allocated, + // as well as the multithreading performance when locking on maps. // We still want to contraint this number to avoid situations where we - // have a way too high number of buckets compared to total size. + // have a way too high number of maps compared to total size. FindFactors(minimumSize, 2, out int x2, out int y2); FindFactors(minimumSize, 3, out int x3, out int y3); FindFactors(minimumSize, 4, out int x4, out int y4); @@ -116,17 +116,17 @@ static void FindFactors(int size, int factor, out int x, out int y) y2 = y4; } - Span span = this.buckets = new Bucket[x2]; + Span span = this.maps = new FixedSizePriorityMap[x2]; - // We preallocate the buckets in advance, since each bucket only contains the + // We preallocate the maps in advance, since each bucket only contains the // array field, which is not preinitialized, so the allocations are minimal. - // This lets us lock on each individual buckets when retrieving a string instance. - foreach (ref Bucket bucket in span) + // This lets us lock on each individual maps when retrieving a string instance. + foreach (ref FixedSizePriorityMap map in span) { - bucket = new Bucket(y2); + map = new FixedSizePriorityMap(y2); } - this.numberOfBuckets = x2; + this.numberOfMaps = x2; Size = p2; } @@ -183,11 +183,14 @@ public void Add(string value) int hashcode = GetHashCode(value.AsSpan()), - bucketIndex = hashcode & (this.numberOfBuckets - 1); + bucketIndex = hashcode & (this.numberOfMaps - 1); - ref Bucket bucket = ref this.buckets.DangerousGetReferenceAt(bucketIndex); + ref FixedSizePriorityMap map = ref this.maps.DangerousGetReferenceAt(bucketIndex); - bucket.Add(value, hashcode); + lock (map.SyncRoot) + { + map.Add(value, hashcode); + } } /// @@ -204,11 +207,14 @@ public string GetOrAdd(string value) int hashcode = GetHashCode(value.AsSpan()), - bucketIndex = hashcode & (this.numberOfBuckets - 1); + bucketIndex = hashcode & (this.numberOfMaps - 1); - ref Bucket bucket = ref this.buckets.DangerousGetReferenceAt(bucketIndex); + ref FixedSizePriorityMap map = ref this.maps.DangerousGetReferenceAt(bucketIndex); - return bucket.GetOrAdd(value, hashcode); + lock (map.SyncRoot) + { + return map.GetOrAdd(value, hashcode); + } } /// @@ -225,11 +231,14 @@ public string GetOrAdd(ReadOnlySpan span) int hashcode = GetHashCode(span), - bucketIndex = hashcode & (this.numberOfBuckets - 1); + bucketIndex = hashcode & (this.numberOfMaps - 1); - ref Bucket bucket = ref this.buckets.DangerousGetReferenceAt(bucketIndex); + ref FixedSizePriorityMap map = ref this.maps.DangerousGetReferenceAt(bucketIndex); - return bucket.GetOrAdd(span, hashcode); + lock (map.SyncRoot) + { + return map.GetOrAdd(span, hashcode); + } } /// @@ -275,175 +284,459 @@ public bool TryGet(ReadOnlySpan span, [NotNullWhen(true)] out string? valu int hashcode = GetHashCode(span), - bucketIndex = hashcode & (this.numberOfBuckets - 1); + bucketIndex = hashcode & (this.numberOfMaps - 1); - ref Bucket bucket = ref this.buckets.DangerousGetReferenceAt(bucketIndex); + ref FixedSizePriorityMap map = ref this.maps.DangerousGetReferenceAt(bucketIndex); - return bucket.TryGet(span, hashcode, out value); + lock (map.SyncRoot) + { + return map.TryGet(span, hashcode, out value); + } } /// - /// Resets the current instance and its associated buckets. + /// Resets the current instance and its associated maps. /// public void Reset() { - foreach (ref Bucket bucket in this.buckets.AsSpan()) + foreach (ref FixedSizePriorityMap map in this.maps.AsSpan()) { - bucket.Reset(); + lock (map.SyncRoot) + { + map.Reset(); + } } } /// - /// A configurable bucket containing a group of cached instances. + /// A configurable map containing a group of cached instances. /// - private struct Bucket + private struct FixedSizePriorityMap { /// - /// The number of entries being used in the current instance. + /// The index representing the end of a given list. + /// + private const int EndOfList = -1; + + /// + /// The index representing the negative offset for the start of the free list. /// - private readonly int entriesPerBucket; + private const int StartOfFreeList = -3; /// - /// A dummy used for synchronization. + /// The array of 1-based indices for the items stored in . /// - private readonly object dummy; + private readonly int[] buckets; /// - /// The array of entries currently in use. + /// The array of currently cached entries (ie. the lists for each hash group). /// - private string?[]? entries; + private readonly MapEntry[] mapEntries; /// - /// Initializes a new instance of the struct. + /// The array of priority values associated to each item stored in . /// - /// The number of entries being used in the current instance. - public Bucket(int entriesPerBucket) + private readonly HeapEntry[] heapEntries; + + /// + /// The current number of items stored in the map. + /// + private int count; + + /// + /// The 1-based index for the start of the free list within . + /// + private int freeList; + + /// + /// A type representing a map entry, ie. a node in a given list. + /// + private struct MapEntry { - this.entriesPerBucket = entriesPerBucket; - this.dummy = new object(); - this.entries = null; + /// + /// The precomputed hashcode for . + /// + public int HashCode; + + /// + /// The instance cached in this entry. + /// + public string Value; + + /// + /// The 0-based index for the next node in the current list. + /// + public int NextIndex; + + /// + /// The 0-based index for the heap entry corresponding to the current node. + /// + public int HeapIndex; } /// - /// Implements for the current instance. + /// A type representing a heap entry, used to associate priority to each item. /// - /// The input instance to cache. - /// The precomputed hashcode for . - public void Add(string value, int hashcode) + private struct HeapEntry { - lock (this.dummy) - { - ref string?[]? entries = ref this.entries; + /// + /// The timestamp for the current entry (ie. the priority for the item). + /// + public long Timestamp; + + /// + /// The instance cached in the associated map entry. + /// + public string Value; + + /// + /// The 0-based index for the map entry corresponding to the current item. + /// + public int MapIndex; + } + + /// + /// Initializes a new instance of the struct. + /// + /// The fixed capacity of the current map. + public FixedSizePriorityMap(int capacity) + { + this.buckets = new int[capacity]; + this.mapEntries = new MapEntry[capacity]; + this.heapEntries = new HeapEntry[capacity]; + this.count = 0; + this.freeList = EndOfList; + } - entries ??= new string[entriesPerBucket]; + /// + /// Gets an that can be used to synchronize access to the current instance. + /// + public object SyncRoot + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => this.buckets; + } - int entryIndex = hashcode & (this.entriesPerBucket - 1); + /// + /// Implements for the current instance. + /// + /// The input instance to cache. + /// The precomputed hashcode for . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe void Add(string value, int hashcode) + { + ref string? target = ref TryGet(value.AsSpan(), hashcode); - entries.DangerousGetReferenceAt(entryIndex) = value; + if (Unsafe.AreSame(ref target!, ref Unsafe.AsRef(null))) + { + Insert(value, hashcode); + } + else + { + target = value; } } /// - /// Implements for the current instance. + /// Implements for the current instance. /// /// The input instance with the contents to use. /// The precomputed hashcode for . /// A instance with the contents of . + [MethodImpl(MethodImplOptions.AggressiveInlining)] public string GetOrAdd(string value, int hashcode) { - lock (this.dummy) + if (TryGet(value.AsSpan(), hashcode, out string? result)) { - ref string?[]? entries = ref this.entries; - - entries ??= new string[entriesPerBucket]; - - int entryIndex = hashcode & (this.entriesPerBucket - 1); - - ref string? entry = ref entries.DangerousGetReferenceAt(entryIndex); + return result; + } - if (!(entry is null) && - entry.AsSpan().SequenceEqual(value.AsSpan())) - { - return entry; - } + Insert(value, hashcode); - return entry = value; - } + return value; } /// - /// Implements for the current instance. + /// Implements for the current instance. /// /// The input with the contents to use. /// The precomputed hashcode for . /// A instance with the contents of , cached if possible. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public string GetOrAdd(ReadOnlySpan span, int hashcode) { - lock (this.dummy) + if (TryGet(span, hashcode, out string? result)) { - ref string?[]? entries = ref this.entries; + return result!; + } - entries ??= new string[entriesPerBucket]; + string value = span.ToString(); - int entryIndex = hashcode & (this.entriesPerBucket - 1); + Insert(value, hashcode); - ref string? entry = ref entries.DangerousGetReferenceAt(entryIndex); + return value; + } - if (!(entry is null) && - entry.AsSpan().SequenceEqual(span)) - { - return entry; - } + /// + /// Implements for the current instance. + /// + /// The input with the contents to use. + /// The precomputed hashcode for . + /// The resulting cached instance, if present + /// Whether or not the target instance was found. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe bool TryGet(ReadOnlySpan span, int hashcode, [NotNullWhen(true)] out string? value) + { + ref string? result = ref TryGet(span, hashcode); + + if (!Unsafe.AreSame(ref result!, ref Unsafe.AsRef(null))) + { + value = result; - // ReadOnlySpan.ToString() creates a string with the span contents. - // This is equivalent to doing new string(span) on Span-enabled runtimes, - // or to using an unsafe block, a fixed statement and new string(char*, int, int). - return entry = span.ToString(); + return true; } + + value = null; + + return false; } /// - /// Implements for the current instance. + /// Resets the current instance and throws away all the cached values. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Reset() + { + this.buckets.AsSpan().Clear(); + this.mapEntries.AsSpan().Clear(); + this.heapEntries.AsSpan().Clear(); + this.count = 0; + this.freeList = EndOfList; + } + + /// + /// Tries to get a target instance, if it exists, and returns a reference to it. /// /// The input with the contents to use. /// The precomputed hashcode for . - /// The resulting cached instance, if present - /// Whether or not the target instance was found. - public bool TryGet(ReadOnlySpan span, int hashcode, [NotNullWhen(true)] out string? value) + /// A reference to the slot where the target instance could be. + [MethodImpl(MethodImplOptions.NoInlining)] + private unsafe ref string? TryGet(ReadOnlySpan span, int hashcode) { - lock (this.dummy) + ref MapEntry mapEntriesRef = ref this.mapEntries.DangerousGetReference(); + ref MapEntry entry = ref Unsafe.AsRef(null); + int + bucketIndex = hashcode & (this.buckets.Length - 1), + length = this.buckets.Length; + + for (int i = this.buckets.DangerousGetReferenceAt(bucketIndex) - 1; + (uint)i < (uint)length; + i = entry.NextIndex) { - ref string?[]? entries = ref this.entries; + entry = ref Unsafe.Add(ref mapEntriesRef, (IntPtr)(void*)(uint)i); - if (!(entries is null)) + if (entry.HashCode == hashcode && + entry.Value.AsSpan().SequenceEqual(span)) { - int entryIndex = hashcode & (this.entriesPerBucket - 1); + UpdateTimestamp(ref entry.HeapIndex); + + return ref entry.Value!; + } + } + + return ref Unsafe.AsRef(null); + } + + /// + /// Inserts a new instance in the current map, freeing up a space if needed. + /// + /// The new instance to store. + /// The precomputed hashcode for . + [MethodImpl(MethodImplOptions.NoInlining)] + private unsafe void Insert(string value, int hashcode) + { + ref int bucketsRef = ref this.buckets.DangerousGetReference(); + ref MapEntry mapEntriesRef = ref this.mapEntries.DangerousGetReference(); + ref HeapEntry heapEntriesRef = ref this.heapEntries.DangerousGetReference(); + int entryIndex, heapIndex; + + // If the free list is not empty, get that map node and update the field + if (this.freeList != EndOfList) + { + entryIndex = this.freeList; + heapIndex = this.count; + + int nextIndex = Unsafe.Add(ref mapEntriesRef, (IntPtr)(void*)(uint)entryIndex).NextIndex; + + // Update the free list pointer: either to the next free node or to the invalid marker + this.freeList = StartOfFreeList - nextIndex; + } + else if (this.count == this.mapEntries.Length) + { + // If the current map is empty, first get the oldest value, which is + // always the first item in the heap. Then, free up a slot by + // removing that, and insert the new value in that empty location. + string key = heapEntriesRef.Value; + + entryIndex = Remove(key); + heapIndex = 0; + } + else + { + entryIndex = this.count; + heapIndex = this.count; + } + + int bucketIndex = hashcode & (this.buckets.Length - 1); + ref int targetBucket = ref Unsafe.Add(ref bucketsRef, (IntPtr)(void*)(uint)bucketIndex); + ref MapEntry targetMapEntry = ref Unsafe.Add(ref mapEntriesRef, (IntPtr)(void*)(uint)entryIndex); + ref HeapEntry targetHeapEntry = ref Unsafe.Add(ref heapEntriesRef, (IntPtr)(void*)(uint)heapIndex); + + // Assign the values in the new map entry + targetMapEntry.HashCode = hashcode; + targetMapEntry.Value = value; + targetMapEntry.NextIndex = targetBucket - 1; + targetMapEntry.HeapIndex = heapIndex; + + // Update the bucket slot and the current count + targetBucket = entryIndex + 1; + this.count++; + + // Link the heap node with the current entry + targetHeapEntry.Value = value; + targetHeapEntry.MapIndex = entryIndex; + + // Update the timestamp and balance the heap again + UpdateTimestamp(ref targetMapEntry.HeapIndex); + } - ref string? entry = ref entries.DangerousGetReferenceAt(entryIndex); + /// + /// Removes a specified instance from the map to free up one slot. + /// + /// The target instance to remove. + /// The index of the freed up slot within . + /// The input instance needs to already exist in the map. + [MethodImpl(MethodImplOptions.NoInlining)] + private unsafe int Remove(string key) + { + ref MapEntry mapEntriesRef = ref this.mapEntries.DangerousGetReference(); + int + hashcode = StringPool.GetHashCode(key.AsSpan()), + bucketIndex = hashcode & (this.buckets.Length - 1), + entryIndex = this.buckets.DangerousGetReferenceAt(bucketIndex) - 1, + lastIndex = EndOfList; + + // We can just have an undefined loop, as the input + // value we're looking for is guaranteed to be present + while (true) + { + ref MapEntry candidate = ref Unsafe.Add(ref mapEntriesRef, (IntPtr)(void*)(uint)entryIndex); - if (!(entry is null) && - entry.AsSpan().SequenceEqual(span)) + // Check the current value for a match + if (candidate.HashCode == hashcode && + candidate.Value.Equals(key)) + { + // If this was not the first list node, update the parent as well + if (lastIndex != EndOfList) { - value = entry; + ref MapEntry lastEntry = ref Unsafe.Add(ref mapEntriesRef, (IntPtr)(void*)(uint)lastIndex); - return true; + lastEntry.NextIndex = candidate.NextIndex; + } + else + { + // Otherwise, update the target index from the bucket slot + this.buckets.DangerousGetReferenceAt(bucketIndex) = candidate.NextIndex + 1; } - } - value = null; + this.count--; - return false; + return entryIndex; + } + + // Move to the following node in the current list + lastIndex = entryIndex; + entryIndex = candidate.NextIndex; } } /// - /// Resets the current array of entries. + /// Updates the timestamp of a heap node at the specified index (which is then synced back). /// - public void Reset() + /// The index of the target heap node to update. + [MethodImpl(MethodImplOptions.NoInlining)] + private unsafe void UpdateTimestamp(ref int heapIndex) { - lock (this.dummy) + int currentIndex = heapIndex; + + ref MapEntry mapEntriesRef = ref this.mapEntries.DangerousGetReference(); + ref HeapEntry heapEntriesRef = ref this.heapEntries.DangerousGetReference(); + ref HeapEntry root = ref Unsafe.Add(ref heapEntriesRef, (IntPtr)(void*)(uint)currentIndex); + + // Assign a new timestamp to the target heap node + root.Timestamp = Stopwatch.GetTimestamp(); + + // Once the timestamp is updated (which will cause the heap to become + // unbalanced), start a sift down loop to balance the heap again. + while (true) { - this.entries = null; + root = ref Unsafe.Add(ref heapEntriesRef, (IntPtr)(void*)(uint)currentIndex); + + // The heap is 0-based (so that the array length can remain the same + // as the power of 2 value used for the other arrays in this type). + // This means that children of each node are at positions: + // - left: (2 * n) + 1 + // - right: (2 * n) + 2 + ref HeapEntry minimum = ref root; + int + left = (currentIndex * 2) + 1, + right = (currentIndex * 2) + 2, + targetIndex = currentIndex; + + // Check and update the left child, if necessary + if (left < this.count) + { + ref HeapEntry child = ref Unsafe.Add(ref heapEntriesRef, (IntPtr)(void*)(uint)left); + + if (child.Timestamp < minimum.Timestamp) + { + minimum = ref child; + targetIndex = left; + } + } + + // Same check as above for the right child + if (right < this.count) + { + ref HeapEntry child = ref Unsafe.Add(ref heapEntriesRef, (IntPtr)(void*)(uint)right); + + if (child.Timestamp < minimum.Timestamp) + { + minimum = ref child; + targetIndex = right; + } + } + + // If no swap is pending, we can just stop here. + // Before returning, we update the target index as well. + if (Unsafe.AreSame(ref root, ref minimum)) + { + heapIndex = targetIndex; + + return; + } + + // Update the indices in the respective map entries (accounting for the swap) + Unsafe.Add(ref mapEntriesRef, (IntPtr)(void*)(uint)root.MapIndex).HeapIndex = targetIndex; + Unsafe.Add(ref mapEntriesRef, (IntPtr)(void*)(uint)minimum.MapIndex).HeapIndex = currentIndex; + + currentIndex = targetIndex; + + // Swap the parent and child (so that the minimum value bubbles up) + HeapEntry temp = root; + + root = minimum; + minimum = temp; } } } @@ -458,9 +751,9 @@ public void Reset() private static int GetHashCode(ReadOnlySpan span) { #if NETSTANDARD1_4 - return span.GetDjb2HashCode() & SignMask; + return span.GetDjb2HashCode(); #else - return HashCode.Combine(span) & SignMask; + return HashCode.Combine(span); #endif } diff --git a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs index e9da8268b70..8afa8b87d9c 100644 --- a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs +++ b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs @@ -36,15 +36,15 @@ public void Test_StringPool_Cctor_Ok(int minimumSize, int x, int y, int size) Assert.AreEqual(size, pool.Size); - Array buckets = (Array)typeof(StringPool).GetField("buckets", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(pool); + Array maps = (Array)typeof(StringPool).GetField("maps", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(pool); - Assert.AreEqual(x, buckets.Length); + Assert.AreEqual(x, maps.Length); - Type bucketType = Type.GetType("Microsoft.Toolkit.HighPerformance.Buffers.StringPool+Bucket, Microsoft.Toolkit.HighPerformance"); + Type bucketType = Type.GetType("Microsoft.Toolkit.HighPerformance.Buffers.StringPool+FixedSizePriorityMap, Microsoft.Toolkit.HighPerformance"); - int bucketSize = (int)bucketType.GetField("entriesPerBucket", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(buckets.GetValue(0)); + int[] buckets = (int[])bucketType.GetField("buckets", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(maps.GetValue(0)); - Assert.AreEqual(y, bucketSize); + Assert.AreEqual(y, buckets.Length); } [TestCategory("StringPool")] From 62f713fe1d479af468d9cd5258c2011b9c705c3d Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Mon, 13 Jul 2020 17:10:59 +0200 Subject: [PATCH 23/40] ~40% speedup in StringPool, removed system call Replaced Stopwatch.GetTimestamp() with a local counter to remove the overhead of the system call, which was pretty high --- .../Buffers/StringPool.cs | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index a9956477774..0e78f4175fc 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -3,7 +3,6 @@ // See the LICENSE file in the project root for more information. using System; -using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Diagnostics.Contracts; #if NETCOREAPP3_1 @@ -348,6 +347,11 @@ private struct FixedSizePriorityMap /// private int freeList; + /// + /// The current incremental timestamp for the items stored in . + /// + private ulong timestamp; + /// /// A type representing a map entry, ie. a node in a given list. /// @@ -382,7 +386,7 @@ private struct HeapEntry /// /// The timestamp for the current entry (ie. the priority for the item). /// - public long Timestamp; + public ulong Timestamp; /// /// The instance cached in the associated map entry. @@ -406,6 +410,7 @@ public FixedSizePriorityMap(int capacity) this.heapEntries = new HeapEntry[capacity]; this.count = 0; this.freeList = EndOfList; + this.timestamp = 0; } /// @@ -673,8 +678,15 @@ private unsafe void UpdateTimestamp(ref int heapIndex) ref HeapEntry heapEntriesRef = ref this.heapEntries.DangerousGetReference(); ref HeapEntry root = ref Unsafe.Add(ref heapEntriesRef, (IntPtr)(void*)(uint)currentIndex); - // Assign a new timestamp to the target heap node - root.Timestamp = Stopwatch.GetTimestamp(); + // Assign a new timestamp to the target heap node. We use a + // local incremental timestamp instead of using the system timer + // as this greatly reduces the overhead and the time spent in system calls. + // The ulong type provides a massive range and it's unlikely users would ever + // exhaust it anyway (especially considering each map has a separate counter). + // Furthermore, even if this happened, the only consequence would be some newly + // used string instances potentially being discarded too early, but the map + // itself would still continue to work fine (the heap would remain balanced). + root.Timestamp = ++this.timestamp; // Once the timestamp is updated (which will cause the heap to become // unbalanced), start a sift down loop to balance the heap again. From e519f3d255e8d47d0b5fdb538d127b4aee7665c6 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Mon, 13 Jul 2020 20:30:31 +0200 Subject: [PATCH 24/40] Removed outdated remarks (before refactor to map) --- Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index 0e78f4175fc..6c72042b1e3 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -161,11 +161,6 @@ private static int RoundUpPowerOfTwo(int x) /// /// Gets the total number of that can be stored in the current instance. - /// - /// This property only refers to the total number of available slots, ignoring collisions. - /// The requested size should always be set to a higher value than the target number of items - /// that will be stored in the cache, to reduce the number of collisions when caching values. - /// /// public int Size { get; } From b885adcd372edaed86be7ea4cad464022328fb45 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 14 Jul 2020 13:30:45 +0200 Subject: [PATCH 25/40] Added missing timestamp reset --- Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index 6c72042b1e3..79629c6531e 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -512,6 +512,7 @@ public void Reset() this.heapEntries.AsSpan().Clear(); this.count = 0; this.freeList = EndOfList; + this.timestamp = 0; } /// @@ -573,7 +574,7 @@ private unsafe void Insert(string value, int hashcode) } else if (this.count == this.mapEntries.Length) { - // If the current map is empty, first get the oldest value, which is + // If the current map is full, first get the oldest value, which is // always the first item in the heap. Then, free up a slot by // removing that, and insert the new value in that empty location. string key = heapEntriesRef.Value; From 613cac01b00bf36370589675ea01f2955470fbc4 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Wed, 15 Jul 2020 04:10:06 +0200 Subject: [PATCH 26/40] ~90% speed improvements (!), reduced memory usage --- .../Buffers/StringPool.cs | 37 +++++++++---------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index 79629c6531e..2190d8795dd 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -345,7 +345,7 @@ private struct FixedSizePriorityMap /// /// The current incremental timestamp for the items stored in . /// - private ulong timestamp; + private uint timestamp; /// /// A type representing a map entry, ie. a node in a given list. @@ -381,12 +381,7 @@ private struct HeapEntry /// /// The timestamp for the current entry (ie. the priority for the item). /// - public ulong Timestamp; - - /// - /// The instance cached in the associated map entry. - /// - public string Value; + public uint Timestamp; /// /// The 0-based index for the map entry corresponding to the current item. @@ -577,10 +572,17 @@ private unsafe void Insert(string value, int hashcode) // If the current map is full, first get the oldest value, which is // always the first item in the heap. Then, free up a slot by // removing that, and insert the new value in that empty location. - string key = heapEntriesRef.Value; - - entryIndex = Remove(key); + entryIndex = heapEntriesRef.MapIndex; heapIndex = 0; + + ref MapEntry removedEntry = ref Unsafe.Add(ref mapEntriesRef, (IntPtr)(void*)(uint)entryIndex); + + // The removal logic can be extremely optimized in this case, as we + // can retrieve the precomputed hashcode for the target entry by doing + // a lookup on the target map node, and we can also skip all the comparisons + // while traversing the target chain since we know in advance the index of + // the target node which will contain the item to remove from the map. + Remove(removedEntry.HashCode, entryIndex); } else { @@ -604,7 +606,6 @@ private unsafe void Insert(string value, int hashcode) this.count++; // Link the heap node with the current entry - targetHeapEntry.Value = value; targetHeapEntry.MapIndex = entryIndex; // Update the timestamp and balance the heap again @@ -614,15 +615,14 @@ private unsafe void Insert(string value, int hashcode) /// /// Removes a specified instance from the map to free up one slot. /// - /// The target instance to remove. - /// The index of the freed up slot within . + /// The precomputed hashcode of the instance to remove. + /// The index of the target map node to remove. /// The input instance needs to already exist in the map. [MethodImpl(MethodImplOptions.NoInlining)] - private unsafe int Remove(string key) + private unsafe void Remove(int hashcode, int mapIndex) { ref MapEntry mapEntriesRef = ref this.mapEntries.DangerousGetReference(); int - hashcode = StringPool.GetHashCode(key.AsSpan()), bucketIndex = hashcode & (this.buckets.Length - 1), entryIndex = this.buckets.DangerousGetReferenceAt(bucketIndex) - 1, lastIndex = EndOfList; @@ -634,8 +634,7 @@ private unsafe int Remove(string key) ref MapEntry candidate = ref Unsafe.Add(ref mapEntriesRef, (IntPtr)(void*)(uint)entryIndex); // Check the current value for a match - if (candidate.HashCode == hashcode && - candidate.Value.Equals(key)) + if (entryIndex == mapIndex) { // If this was not the first list node, update the parent as well if (lastIndex != EndOfList) @@ -652,7 +651,7 @@ private unsafe int Remove(string key) this.count--; - return entryIndex; + return; } // Move to the following node in the current list @@ -677,7 +676,7 @@ private unsafe void UpdateTimestamp(ref int heapIndex) // Assign a new timestamp to the target heap node. We use a // local incremental timestamp instead of using the system timer // as this greatly reduces the overhead and the time spent in system calls. - // The ulong type provides a massive range and it's unlikely users would ever + // The uint type provides a large enough range and it's unlikely users would ever // exhaust it anyway (especially considering each map has a separate counter). // Furthermore, even if this happened, the only consequence would be some newly // used string instances potentially being discarded too early, but the map From 05b3bc8f84726232737fa3100bc88c2b8610b98e Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Mon, 20 Jul 2020 13:36:23 +0200 Subject: [PATCH 27/40] Tweaked nullability attributes --- Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index 2190d8795dd..8158cb61fd0 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -360,7 +360,7 @@ private struct MapEntry /// /// The instance cached in this entry. /// - public string Value; + public string? Value; /// /// The 0-based index for the next node in the current list. @@ -532,11 +532,11 @@ public void Reset() entry = ref Unsafe.Add(ref mapEntriesRef, (IntPtr)(void*)(uint)i); if (entry.HashCode == hashcode && - entry.Value.AsSpan().SequenceEqual(span)) + entry.Value!.AsSpan().SequenceEqual(span)) { UpdateTimestamp(ref entry.HeapIndex); - return ref entry.Value!; + return ref entry.Value; } } From 658c1a43ab14ab31536d85b58fb87c1497094333 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Thu, 23 Jul 2020 19:38:41 +0200 Subject: [PATCH 28/40] Minor code tweaks --- .../Buffers/StringPool.cs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index 8158cb61fd0..d7ead9e3612 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -522,8 +522,8 @@ public void Reset() ref MapEntry mapEntriesRef = ref this.mapEntries.DangerousGetReference(); ref MapEntry entry = ref Unsafe.AsRef(null); int - bucketIndex = hashcode & (this.buckets.Length - 1), - length = this.buckets.Length; + length = this.buckets.Length, + bucketIndex = hashcode & (length - 1); for (int i = this.buckets.DangerousGetReferenceAt(bucketIndex) - 1; (uint)i < (uint)length; @@ -667,8 +667,9 @@ private unsafe void Remove(int hashcode, int mapIndex) [MethodImpl(MethodImplOptions.NoInlining)] private unsafe void UpdateTimestamp(ref int heapIndex) { - int currentIndex = heapIndex; - + int + currentIndex = heapIndex, + count = this.count; ref MapEntry mapEntriesRef = ref this.mapEntries.DangerousGetReference(); ref HeapEntry heapEntriesRef = ref this.heapEntries.DangerousGetReference(); ref HeapEntry root = ref Unsafe.Add(ref heapEntriesRef, (IntPtr)(void*)(uint)currentIndex); @@ -701,7 +702,7 @@ private unsafe void UpdateTimestamp(ref int heapIndex) targetIndex = currentIndex; // Check and update the left child, if necessary - if (left < this.count) + if (left < count) { ref HeapEntry child = ref Unsafe.Add(ref heapEntriesRef, (IntPtr)(void*)(uint)left); @@ -713,7 +714,7 @@ private unsafe void UpdateTimestamp(ref int heapIndex) } // Same check as above for the right child - if (right < this.count) + if (right < count) { ref HeapEntry child = ref Unsafe.Add(ref heapEntriesRef, (IntPtr)(void*)(uint)right); From 73e5adbf4292ec99ff456c263cbc617304f45e27 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Mon, 27 Jul 2020 01:50:34 +0200 Subject: [PATCH 29/40] Minor codegen improvements --- .../Buffers/StringPool.cs | 28 +++++++++++-------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index d7ead9e3612..ad9e024af25 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -420,9 +420,9 @@ public object SyncRoot [MethodImpl(MethodImplOptions.AggressiveInlining)] public unsafe void Add(string value, int hashcode) { - ref string? target = ref TryGet(value.AsSpan(), hashcode); + ref string target = ref TryGet(value.AsSpan(), hashcode); - if (Unsafe.AreSame(ref target!, ref Unsafe.AsRef(null))) + if (Unsafe.AreSame(ref target, ref Unsafe.AsRef(null))) { Insert(value, hashcode); } @@ -439,9 +439,11 @@ public unsafe void Add(string value, int hashcode) /// The precomputed hashcode for . /// A instance with the contents of . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public string GetOrAdd(string value, int hashcode) + public unsafe string GetOrAdd(string value, int hashcode) { - if (TryGet(value.AsSpan(), hashcode, out string? result)) + ref string result = ref TryGet(value.AsSpan(), hashcode); + + if (!Unsafe.AreSame(ref result, ref Unsafe.AsRef(null))) { return result; } @@ -458,11 +460,13 @@ public string GetOrAdd(string value, int hashcode) /// The precomputed hashcode for . /// A instance with the contents of , cached if possible. [MethodImpl(MethodImplOptions.AggressiveInlining)] - public string GetOrAdd(ReadOnlySpan span, int hashcode) + public unsafe string GetOrAdd(ReadOnlySpan span, int hashcode) { - if (TryGet(span, hashcode, out string? result)) + ref string result = ref TryGet(span, hashcode); + + if (!Unsafe.AreSame(ref result, ref Unsafe.AsRef(null))) { - return result!; + return result; } string value = span.ToString(); @@ -482,9 +486,9 @@ public string GetOrAdd(ReadOnlySpan span, int hashcode) [MethodImpl(MethodImplOptions.AggressiveInlining)] public unsafe bool TryGet(ReadOnlySpan span, int hashcode, [NotNullWhen(true)] out string? value) { - ref string? result = ref TryGet(span, hashcode); + ref string result = ref TryGet(span, hashcode); - if (!Unsafe.AreSame(ref result!, ref Unsafe.AsRef(null))) + if (!Unsafe.AreSame(ref result, ref Unsafe.AsRef(null))) { value = result; @@ -517,7 +521,7 @@ public void Reset() /// The precomputed hashcode for . /// A reference to the slot where the target instance could be. [MethodImpl(MethodImplOptions.NoInlining)] - private unsafe ref string? TryGet(ReadOnlySpan span, int hashcode) + private unsafe ref string TryGet(ReadOnlySpan span, int hashcode) { ref MapEntry mapEntriesRef = ref this.mapEntries.DangerousGetReference(); ref MapEntry entry = ref Unsafe.AsRef(null); @@ -536,11 +540,11 @@ public void Reset() { UpdateTimestamp(ref entry.HeapIndex); - return ref entry.Value; + return ref entry.Value!; } } - return ref Unsafe.AsRef(null); + return ref Unsafe.AsRef(null); } /// From 59ac91e24dab09d40f24fbcd97b05c7ef55aecab Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Wed, 29 Jul 2020 18:43:17 +0200 Subject: [PATCH 30/40] Fixed min-heap violation on timestamp overflow --- .../Buffers/StringPool.cs | 60 +++++++++++++++++-- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index ad9e024af25..bafaf79665f 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -677,16 +677,38 @@ private unsafe void UpdateTimestamp(ref int heapIndex) ref MapEntry mapEntriesRef = ref this.mapEntries.DangerousGetReference(); ref HeapEntry heapEntriesRef = ref this.heapEntries.DangerousGetReference(); ref HeapEntry root = ref Unsafe.Add(ref heapEntriesRef, (IntPtr)(void*)(uint)currentIndex); + uint timestamp = this.timestamp; + + // Check if incrementing the current timestamp for the heap node to update + // would result in an overflow. If that happened, we could end up violating + // the min-heap property (the value of each node has to always be <= than that + // of its child nodes), eg. if we were updating a node that was not the root. + // In that scenario, we could end up with a node somewhere along the heap with + // a value lower than that of its parent node (as the timestamp would be 0). + // To guard against this, we just check the current timestamp value, and if + // the maximum value has been reached, we reinitialize the entire heap. This + // is done in a non-inlined call, so we don't increase the codegen size in this + // method. The reinitialization simply traverses the heap in breadth-first order + // (ie. level by level), and assigns incrementing timestamp to all nodes starting + // from 0. The value of the current timestamp is then just set to the current size. + if (timestamp == uint.MaxValue) + { + // We use a goto here as this path is very rarely taken. Doing so + // causes the generated asm to contain a forward jump to the fallback + // path if this branch is taken, whereas the normal execution path will + // not need to execute any jumps at all. This is done to reduce the overhead + // introduced by this check in all the invocations where this point is not reached. + goto Fallback; + } + + Downheap: // Assign a new timestamp to the target heap node. We use a // local incremental timestamp instead of using the system timer // as this greatly reduces the overhead and the time spent in system calls. // The uint type provides a large enough range and it's unlikely users would ever // exhaust it anyway (especially considering each map has a separate counter). - // Furthermore, even if this happened, the only consequence would be some newly - // used string instances potentially being discarded too early, but the map - // itself would still continue to work fine (the heap would remain balanced). - root.Timestamp = ++this.timestamp; + root.Timestamp = this.timestamp = timestamp + 1; // Once the timestamp is updated (which will cause the heap to become // unbalanced), start a sift down loop to balance the heap again. @@ -750,6 +772,36 @@ private unsafe void UpdateTimestamp(ref int heapIndex) root = minimum; minimum = temp; } + + Fallback: + + UpdateAllTimestamps(); + + // After having updated all the timestamps, if the heap contains N items, the + // node in the bottom right corner will have a value of N - 1. Since the timestamp + // is incremented by 1 before starting the downheap execution, here we simply + // update the local timestamp to be N - 1, so that the code above will set the + // timestamp of the node currently being updated to exactly N. + timestamp = (uint)(count - 1); + + goto Downheap; + } + + /// + /// Updates the timestamp of all the current heap nodes in incrementing order. + /// The heap is always guaranteed to be complete binary tree, so when it contains + /// a given number of nodes, those are all contiguous from the start of the array. + /// + [MethodImpl(MethodImplOptions.NoInlining)] + private unsafe void UpdateAllTimestamps() + { + int count = this.count; + ref HeapEntry heapEntriesRef = ref this.heapEntries.DangerousGetReference(); + + for (int i = 0; i < count; i++) + { + Unsafe.Add(ref heapEntriesRef, (IntPtr)(void*)(uint)i).Timestamp = (uint)i; + } } } From 1754186b8ca3d4351029cb96d91ef6c781da2ca1 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Wed, 29 Jul 2020 19:05:21 +0200 Subject: [PATCH 31/40] Improved unit test for failed constructor --- .../Buffers/Test_StringPool.cs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs index 8afa8b87d9c..ea314abc9ba 100644 --- a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs +++ b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs @@ -52,12 +52,20 @@ public void Test_StringPool_Cctor_Ok(int minimumSize, int x, int y, int size) [DataRow(0)] [DataRow(-3248234)] [DataRow(int.MinValue)] - [ExpectedException(typeof(ArgumentOutOfRangeException))] public void Test_StringPool_Cctor_Fail(int size) { - var pool = new StringPool(size); - - Assert.Fail(); + try + { + var pool = new StringPool(size); + + Assert.Fail(); + } + catch (ArgumentOutOfRangeException e) + { + var cctor = typeof(StringPool).GetConstructor(new[] { typeof(int) }); + + Assert.AreEqual(cctor.GetParameters()[0].Name, e.ParamName); + } } [TestCategory("StringPool")] From 533973c9eaa6f022b75308c3ab198addff0971ab Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Wed, 29 Jul 2020 19:13:26 +0200 Subject: [PATCH 32/40] Minor optimization (removed duplicate ref computation) --- Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index bafaf79665f..4a7e31d44dd 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -714,8 +714,6 @@ private unsafe void UpdateTimestamp(ref int heapIndex) // unbalanced), start a sift down loop to balance the heap again. while (true) { - root = ref Unsafe.Add(ref heapEntriesRef, (IntPtr)(void*)(uint)currentIndex); - // The heap is 0-based (so that the array length can remain the same // as the power of 2 value used for the other arrays in this type). // This means that children of each node are at positions: @@ -771,6 +769,9 @@ private unsafe void UpdateTimestamp(ref int heapIndex) root = minimum; minimum = temp; + + // Update the reference to the root node + root = ref Unsafe.Add(ref heapEntriesRef, (IntPtr)(void*)(uint)currentIndex); } Fallback: From 1941370b0a0c673a2d874c207bac70909c99cbbe Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Wed, 29 Jul 2020 21:02:34 +0200 Subject: [PATCH 33/40] Added unit test for overflow fix --- .../Buffers/Test_StringPool.cs | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs index ea314abc9ba..43f08350847 100644 --- a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs +++ b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System; +using System.Linq; using System.Reflection; using System.Text; using Microsoft.Toolkit.HighPerformance.Buffers; @@ -235,5 +236,66 @@ public void Test_StringPool_GetOrAdd_Encoding_Misc() Assert.AreSame(windowsCommunityToolkit2, windowsCommunityToolkit3); } + + [TestCategory("StringPool")] + [TestMethod] + public void Test_StringPool_GetOrAdd_Overflow() + { + var pool = new StringPool(32); + + // Fill the pool + for (int i = 0; i < 4096; i++) + { + _ = pool.GetOrAdd(i.ToString()); + } + + // Force an overflow + string text = "Hello world"; + + for (uint i = 0; i < uint.MaxValue; i++) + { + _ = pool.GetOrAdd(text); + } + + // Get the buckets + Array maps = (Array)typeof(StringPool).GetField("maps", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(pool); + + Type + bucketType = Type.GetType("Microsoft.Toolkit.HighPerformance.Buffers.StringPool+FixedSizePriorityMap, Microsoft.Toolkit.HighPerformance"), + heapEntryType = Type.GetType("Microsoft.Toolkit.HighPerformance.Buffers.StringPool+FixedSizePriorityMap+HeapEntry, Microsoft.Toolkit.HighPerformance"); + + foreach (var map in maps) + { + // Get the heap for each bucket + Array heapEntries = (Array)bucketType.GetField("heapEntries", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(map); + FieldInfo fieldInfo = heapEntryType.GetField("Timestamp"); + + // Extract the array with the timestamps in the heap nodes + uint[] array = heapEntries.Cast().Select(entry => (uint)fieldInfo.GetValue(entry)).ToArray(); + + static bool IsMinHeap(uint[] array) + { + for (int i = 0; i < array.Length; i++) + { + int + left = (i * 2) + 1, + right = (i * 2) + 2; + + if ((left < array.Length && + array[left] <= array[i]) || + (right < array.Length && + array[right] <= array[i])) + { + return false; + } + } + + return true; + } + + // Verify that the current heap is indeed valid after the overflow + Assert.IsTrue(IsMinHeap(array)); + } + } } } From 92ed638165360738e9c3f891504df97a87bdaf91 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Thu, 30 Jul 2020 01:49:11 +0200 Subject: [PATCH 34/40] Reduced runtime of overflow unit test --- .../Buffers/Test_StringPool.cs | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs index 43f08350847..d8ea7f6b2cf 100644 --- a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs +++ b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs @@ -249,20 +249,28 @@ public void Test_StringPool_GetOrAdd_Overflow() _ = pool.GetOrAdd(i.ToString()); } - // Force an overflow - string text = "Hello world"; + // Get the buckets + Array maps = (Array)typeof(StringPool).GetField("maps", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(pool); - for (uint i = 0; i < uint.MaxValue; i++) + Type bucketType = Type.GetType("Microsoft.Toolkit.HighPerformance.Buffers.StringPool+FixedSizePriorityMap, Microsoft.Toolkit.HighPerformance"); + FieldInfo timestampInfo = bucketType.GetField("timestamp", BindingFlags.Instance | BindingFlags.NonPublic); + + // Force the timestamp to be the maximum value, or the test would take too long + for (int i = 0; i < maps.LongLength; i++) { - _ = pool.GetOrAdd(text); + object map = maps.GetValue(i); + + timestampInfo.SetValue(map, uint.MaxValue); + + maps.SetValue(map, i); } - // Get the buckets - Array maps = (Array)typeof(StringPool).GetField("maps", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(pool); + // Force an overflow + string text = "Hello world"; + + _ = pool.GetOrAdd(text); - Type - bucketType = Type.GetType("Microsoft.Toolkit.HighPerformance.Buffers.StringPool+FixedSizePriorityMap, Microsoft.Toolkit.HighPerformance"), - heapEntryType = Type.GetType("Microsoft.Toolkit.HighPerformance.Buffers.StringPool+FixedSizePriorityMap+HeapEntry, Microsoft.Toolkit.HighPerformance"); + Type heapEntryType = Type.GetType("Microsoft.Toolkit.HighPerformance.Buffers.StringPool+FixedSizePriorityMap+HeapEntry, Microsoft.Toolkit.HighPerformance"); foreach (var map in maps) { From 5def54982936433dfdd3c5e29ce953beaf000877 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Thu, 30 Jul 2020 13:30:43 +0200 Subject: [PATCH 35/40] Fixed a typo --- Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index 4a7e31d44dd..c1374077452 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -689,7 +689,7 @@ private unsafe void UpdateTimestamp(ref int heapIndex) // the maximum value has been reached, we reinitialize the entire heap. This // is done in a non-inlined call, so we don't increase the codegen size in this // method. The reinitialization simply traverses the heap in breadth-first order - // (ie. level by level), and assigns incrementing timestamp to all nodes starting + // (ie. level by level), and assigns incrementing timestamps to all nodes starting // from 0. The value of the current timestamp is then just set to the current size. if (timestamp == uint.MaxValue) { From 4a9f9710398e6ef8d36f588cc57b9d9c30a32733 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Sat, 1 Aug 2020 00:38:58 +0200 Subject: [PATCH 36/40] Renamed StringPool.Default -> Shared Done to keep the naming consistent with ArrayPool.Shared --- .../Buffers/StringPool.cs | 12 +++++++++--- .../Buffers/Test_StringPool.cs | 10 +++++----- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index c1374077452..988e1f44a94 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -31,7 +31,7 @@ namespace Microsoft.Toolkit.HighPerformance.Buffers public sealed class StringPool { /// - /// The size for the instance. + /// The size used by default by the parameterless constructor. /// private const int DefaultSize = 2048; @@ -155,9 +155,15 @@ private static int RoundUpPowerOfTwo(int x) } /// - /// Gets the default instance. + /// Gets the shared instance. /// - public static StringPool Default { get; } = new StringPool(); + /// + /// The shared pool provides a singleton, reusable instance that + /// can be accessed directly, and that pools instances for the entire + /// process. Since is thread-safe, the shared instance can be used + /// concurrently by multiple threads without the need for manual synchronization. + /// + public static StringPool Shared { get; } = new StringPool(); /// /// Gets the total number of that can be stored in the current instance. diff --git a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs index d8ea7f6b2cf..8c1ff9a7436 100644 --- a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs +++ b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs @@ -73,9 +73,9 @@ public void Test_StringPool_Cctor_Fail(int size) [TestMethod] public void Test_StringPool_Add_Empty() { - StringPool.Default.Add(string.Empty); + StringPool.Shared.Add(string.Empty); - bool found = StringPool.Default.TryGet(ReadOnlySpan.Empty, out string? text); + bool found = StringPool.Shared.TryGet(ReadOnlySpan.Empty, out string? text); Assert.IsTrue(found); Assert.AreSame(string.Empty, text); @@ -130,7 +130,7 @@ public void Test_StringPool_Add_Misc() [TestMethod] public void Test_StringPool_GetOrAdd_String_Empty() { - string empty = StringPool.Default.GetOrAdd(string.Empty); + string empty = StringPool.Shared.GetOrAdd(string.Empty); Assert.AreSame(string.Empty, empty); } @@ -166,7 +166,7 @@ public void Test_StringPool_GetOrAdd_String_Misc() [TestMethod] public void Test_StringPool_GetOrAdd_ReadOnlySpan_Empty() { - string empty = StringPool.Default.GetOrAdd(ReadOnlySpan.Empty); + string empty = StringPool.Shared.GetOrAdd(ReadOnlySpan.Empty); Assert.AreSame(string.Empty, empty); } @@ -205,7 +205,7 @@ public void Test_StringPool_GetOrAdd_ReadOnlySpan_Misc() [TestMethod] public void Test_StringPool_GetOrAdd_Encoding_Empty() { - string empty = StringPool.Default.GetOrAdd(ReadOnlySpan.Empty, Encoding.ASCII); + string empty = StringPool.Shared.GetOrAdd(ReadOnlySpan.Empty, Encoding.ASCII); Assert.AreSame(string.Empty, empty); } From 96e8e48381158531f725721e9740e0b858a361d7 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 8 Sep 2020 23:44:07 +0200 Subject: [PATCH 37/40] Improved phrasing of LFU cache mode Co-authored-by: Michael Hawker MSFT (XAML Llama) <24302614+michael-hawker@users.noreply.github.com> --- Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index 988e1f44a94..79322cb2f8f 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -25,7 +25,7 @@ namespace Microsoft.Toolkit.HighPerformance.Buffers /// The method provides a best-effort alternative to just creating /// a new instance every time, in order to minimize the number of duplicated instances. /// The type will internally manage a highly efficient priority queue for the - /// cached instances, so that when the full capacity is reached, the last recently + /// cached instances, so that when the full capacity is reached, the least frequently /// used values will be automatically discarded to leave room for new values to cache. /// public sealed class StringPool From 45e1329012ed9a5b52d5c3d4608f02d3ac0b5ce6 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Wed, 9 Sep 2020 13:30:36 +0200 Subject: [PATCH 38/40] Added XML docs about FixedSizePriorityMap --- Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index 79322cb2f8f..757b9b5fdb5 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -311,6 +311,15 @@ public void Reset() /// /// A configurable map containing a group of cached instances. /// + /// + /// Instances of this type are stored in an array within and they are + /// always accessed by reference - essentially as if this type had been a class. The type is + /// also private, so there's no risk for users to directly access it and accidentally copy an + /// instance, which would lead to bugs due to values becoming out of sync with the internal state + /// (that is, because instances would be copied by value, so primitive fields would not be shared). + /// The reason why we're using a struct here is to remove an indirection level and improve cache + /// locality when accessing individual buckets from the methods in the type. + /// private struct FixedSizePriorityMap { /// From 9b21cd0e30da9d3e4931af421410ac037a1fa104 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Mon, 14 Sep 2020 22:43:03 +0200 Subject: [PATCH 39/40] Removed unnecessary code --- .../Buffers/StringPool.cs | 32 +++---------------- 1 file changed, 5 insertions(+), 27 deletions(-) diff --git a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs index 757b9b5fdb5..d2a98ae8c94 100644 --- a/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs +++ b/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs @@ -327,11 +327,6 @@ private struct FixedSizePriorityMap /// private const int EndOfList = -1; - /// - /// The index representing the negative offset for the start of the free list. - /// - private const int StartOfFreeList = -3; - /// /// The array of 1-based indices for the items stored in . /// @@ -352,11 +347,6 @@ private struct FixedSizePriorityMap /// private int count; - /// - /// The 1-based index for the start of the free list within . - /// - private int freeList; - /// /// The current incremental timestamp for the items stored in . /// @@ -414,7 +404,6 @@ public FixedSizePriorityMap(int capacity) this.mapEntries = new MapEntry[capacity]; this.heapEntries = new HeapEntry[capacity]; this.count = 0; - this.freeList = EndOfList; this.timestamp = 0; } @@ -525,7 +514,6 @@ public void Reset() this.mapEntries.AsSpan().Clear(); this.heapEntries.AsSpan().Clear(); this.count = 0; - this.freeList = EndOfList; this.timestamp = 0; } @@ -575,22 +563,11 @@ private unsafe void Insert(string value, int hashcode) ref HeapEntry heapEntriesRef = ref this.heapEntries.DangerousGetReference(); int entryIndex, heapIndex; - // If the free list is not empty, get that map node and update the field - if (this.freeList != EndOfList) - { - entryIndex = this.freeList; - heapIndex = this.count; - - int nextIndex = Unsafe.Add(ref mapEntriesRef, (IntPtr)(void*)(uint)entryIndex).NextIndex; - - // Update the free list pointer: either to the next free node or to the invalid marker - this.freeList = StartOfFreeList - nextIndex; - } - else if (this.count == this.mapEntries.Length) + // If the current map is full, first get the oldest value, which is + // always the first item in the heap. Then, free up a slot by + // removing that, and insert the new value in that empty location. + if (this.count == this.mapEntries.Length) { - // If the current map is full, first get the oldest value, which is - // always the first item in the heap. Then, free up a slot by - // removing that, and insert the new value in that empty location. entryIndex = heapEntriesRef.MapIndex; heapIndex = 0; @@ -605,6 +582,7 @@ private unsafe void Insert(string value, int hashcode) } else { + // If the free list is not empty, get that map node and update the field entryIndex = this.count; heapIndex = this.count; } From 1739fb8195f2479772747328c2a755df6b987761 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Mon, 14 Sep 2020 22:51:06 +0200 Subject: [PATCH 40/40] Improved code coverage --- .../Buffers/Test_StringPool.cs | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs index 8c1ff9a7436..b679609d40a 100644 --- a/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs +++ b/UnitTests/UnitTests.HighPerformance.Shared/Buffers/Test_StringPool.cs @@ -5,6 +5,7 @@ using System; using System.Linq; using System.Reflection; +using System.Runtime.CompilerServices; using System.Text; using Microsoft.Toolkit.HighPerformance.Buffers; using Microsoft.VisualStudio.TestTools.UnitTesting; @@ -126,6 +127,40 @@ public void Test_StringPool_Add_Misc() Assert.AreSame(windowsCommunityToolkit, windowsCommunityToolkit2); } + [TestCategory("StringPool")] + [TestMethod] + public void Test_StringPool_Add_Overwrite() + { + var pool = new StringPool(); + + var today = DateTime.Today; + + var text1 = ToStringNoInlining(today); + + pool.Add(text1); + + Assert.IsTrue(pool.TryGet(text1.AsSpan(), out string? result)); + + Assert.AreSame(text1, result); + + var text2 = ToStringNoInlining(today); + + pool.Add(text2); + + Assert.IsTrue(pool.TryGet(text2.AsSpan(), out result)); + + Assert.AreNotSame(text1, result); + Assert.AreSame(text2, result); + } + + // Separate method just to ensure the JIT can't optimize things away + // and make the test fail because different string instances are interned + [MethodImpl(MethodImplOptions.NoInlining)] + private static string ToStringNoInlining(object obj) + { + return obj.ToString(); + } + [TestCategory("StringPool")] [TestMethod] public void Test_StringPool_GetOrAdd_String_Empty()