From 787d190ac280a1f711ec4a7307ca79623e498880 Mon Sep 17 00:00:00 2001 From: Jan Kotas Date: Fri, 24 Mar 2017 18:34:08 -0700 Subject: [PATCH 1/3] Expose Interlocked.MemoryBarrierProcessWide Fixes https://github.com/dotnet/corefx/issues/16799 --- src/System.Threading/ref/System.Threading.cs | 1 + .../tests/Configurations.props | 1 + .../tests/InterlockedTests.cs | 2 +- .../tests/InterlockedTests.netcoreapp.cs | 116 ++++++++++++++++++ .../tests/System.Threading.Tests.csproj | 1 + 5 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 src/System.Threading/tests/InterlockedTests.netcoreapp.cs diff --git a/src/System.Threading/ref/System.Threading.cs b/src/System.Threading/ref/System.Threading.cs index 2347a4f1d824..a564bfde4ca7 100644 --- a/src/System.Threading/ref/System.Threading.cs +++ b/src/System.Threading/ref/System.Threading.cs @@ -171,6 +171,7 @@ public static partial class Interlocked public static int Increment(ref int location) { throw null; } public static long Increment(ref long location) { throw null; } public static void MemoryBarrier() { } + public static void MemoryBarrierProcessWide() { } public static long Read(ref long location) { throw null; } } public static partial class LazyInitializer diff --git a/src/System.Threading/tests/Configurations.props b/src/System.Threading/tests/Configurations.props index c398e42e8994..77a4b65bc94a 100644 --- a/src/System.Threading/tests/Configurations.props +++ b/src/System.Threading/tests/Configurations.props @@ -3,6 +3,7 @@ netstandard; + netcoreapp; \ No newline at end of file diff --git a/src/System.Threading/tests/InterlockedTests.cs b/src/System.Threading/tests/InterlockedTests.cs index ebdd8e6ae16d..83cac4605280 100644 --- a/src/System.Threading/tests/InterlockedTests.cs +++ b/src/System.Threading/tests/InterlockedTests.cs @@ -8,7 +8,7 @@ namespace System.Threading.Tests { - public class InterlockedTests + public partial class InterlockedTests { [Fact] public void IncrementDecrement_int() diff --git a/src/System.Threading/tests/InterlockedTests.netcoreapp.cs b/src/System.Threading/tests/InterlockedTests.netcoreapp.cs new file mode 100644 index 000000000000..ff964d941cda --- /dev/null +++ b/src/System.Threading/tests/InterlockedTests.netcoreapp.cs @@ -0,0 +1,116 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Generic; +using System.Diagnostics; +using System.Threading.Tasks; +using Xunit; + +namespace System.Threading.Tests +{ + public partial class InterlockedTests + { + // Taking this lock on the same thread repeatedly is very fast because of it has no interlocked operations. + // Switching the thread where the lock is taken is expensive because of allocation and FlushProcessWriteBuffers. + class AsymmetricLock + { + public class LockCookie + { + internal LockCookie(int threadId) + { + ThreadId = threadId; + Taken = false; + } + + public void Exit() + { + Debug.Assert(ThreadId == Environment.CurrentManagedThreadId); + Taken = false; + } + + internal readonly int ThreadId; + internal bool Taken; + } + + LockCookie _current = new LockCookie(-1); + + // Returning LockCookie to call Exit on is the fastest implementation because of it works naturally with the RCU pattern. + // The traditional Enter/Exit lock interface would require thread local storage or some other scheme to reclaim the cookie. + public LockCookie Enter() + { + int currentThreadId = Environment.CurrentManagedThreadId; + + LockCookie entry = _current; + + if (entry.ThreadId == currentThreadId) + { + entry.Taken = true; + + // + // If other thread started stealing the ownership, we need to take slow path. + // + // Volatile works here, but it is too big of a hammer because of it will result into memory barrier on ARM that + // we do not need here. We really just need to make sure that the compiler won't reorder the read with the above write. + // RyuJIT won't reorder them today, but more advanced optimizers might. + // + if (Volatile.Read(ref _current) == entry) + { + return entry; + } + + entry.Taken = false; + } + + return EnterSlow(); + } + + private LockCookie EnterSlow() + { + // Attempt to steal the ownership. Take a regular lock to make sure that only thread is trying to steal it at a time. + lock (this) + { + // We are the new fast thread now! + var oldEntry = _current; + _current = new LockCookie(Environment.CurrentManagedThreadId); + + // After MemoryBarrierProcessWide, we can be sure that the Volatile.Read done by the fast thread will see that it is not a fast + // thread anymore, and thus it will not attempt to enter the lock. + Interlocked.MemoryBarrierProcessWide(); + + // Keep looping as long as the lock is taken by other thread + SpinWait sw = new SpinWait(); + while (oldEntry.Taken) + sw.SpinOnce(); + + _current.Taken = true; + return _current; + } + } + } + + [Fact] + public void MemoryBarrierProcessWide() + { + // Stress MemoryBarrierProcessWide correctness using a simple AsymmetricLock + + AsymmetricLock asymmetricLock = new AsymmetricLock(); + List threads = new List(); + int count = 0; + for (int i = 0; i < 1000; i++) + { + threads.Add(Task.Run(() => + { + for (int j = 0; j < 1000; j++) + { + var cookie = asymmetricLock.Enter(); + count++; + cookie.Exit(); + } + })); + } + Task.WaitAll(threads.ToArray()); + Assert.Equal(1000*1000, count); + } + } +} \ No newline at end of file diff --git a/src/System.Threading/tests/System.Threading.Tests.csproj b/src/System.Threading/tests/System.Threading.Tests.csproj index 0986636e9d74..48bf771e14f0 100644 --- a/src/System.Threading/tests/System.Threading.Tests.csproj +++ b/src/System.Threading/tests/System.Threading.Tests.csproj @@ -17,6 +17,7 @@ + From f477eee510fcd89e130ce572930340528517d88c Mon Sep 17 00:00:00 2001 From: Jan Kotas Date: Fri, 24 Mar 2017 19:11:49 -0700 Subject: [PATCH 2/3] CR feedback --- .../tests/InterlockedTests.netcoreapp.cs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/System.Threading/tests/InterlockedTests.netcoreapp.cs b/src/System.Threading/tests/InterlockedTests.netcoreapp.cs index ff964d941cda..0818d99fe674 100644 --- a/src/System.Threading/tests/InterlockedTests.netcoreapp.cs +++ b/src/System.Threading/tests/InterlockedTests.netcoreapp.cs @@ -11,9 +11,9 @@ namespace System.Threading.Tests { public partial class InterlockedTests { - // Taking this lock on the same thread repeatedly is very fast because of it has no interlocked operations. + // Taking this lock on the same thread repeatedly is very fast because it has no interlocked operations. // Switching the thread where the lock is taken is expensive because of allocation and FlushProcessWriteBuffers. - class AsymmetricLock + private class AsymmetricLock { public class LockCookie { @@ -33,7 +33,7 @@ public void Exit() internal bool Taken; } - LockCookie _current = new LockCookie(-1); + private LockCookie _current = new LockCookie(-1); // Returning LockCookie to call Exit on is the fastest implementation because of it works naturally with the RCU pattern. // The traditional Enter/Exit lock interface would require thread local storage or some other scheme to reclaim the cookie. @@ -67,7 +67,7 @@ public LockCookie Enter() private LockCookie EnterSlow() { - // Attempt to steal the ownership. Take a regular lock to make sure that only thread is trying to steal it at a time. + // Attempt to steal the ownership. Take a regular lock to ensure that only one thread is trying to steal it at a time. lock (this) { // We are the new fast thread now! @@ -113,4 +113,4 @@ public void MemoryBarrierProcessWide() Assert.Equal(1000*1000, count); } } -} \ No newline at end of file +} From 4f8db28518da2684adff68aee01cd830519e6df6 Mon Sep 17 00:00:00 2001 From: Jan Kotas Date: Sat, 25 Mar 2017 13:57:01 -0700 Subject: [PATCH 3/3] More CR feedback --- .../tests/InterlockedTests.netcoreapp.cs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/System.Threading/tests/InterlockedTests.netcoreapp.cs b/src/System.Threading/tests/InterlockedTests.netcoreapp.cs index 0818d99fe674..8349ffcf6ca9 100644 --- a/src/System.Threading/tests/InterlockedTests.netcoreapp.cs +++ b/src/System.Threading/tests/InterlockedTests.netcoreapp.cs @@ -5,6 +5,7 @@ using System.Collections.Generic; using System.Diagnostics; using System.Threading.Tasks; +using System.Runtime.CompilerServices; using Xunit; namespace System.Threading.Tests @@ -35,6 +36,12 @@ public void Exit() private LockCookie _current = new LockCookie(-1); + [MethodImpl(MethodImplOptions.NoInlining)] + private static T VolatileReadWithoutBarrier(ref T location) + { + return location; + } + // Returning LockCookie to call Exit on is the fastest implementation because of it works naturally with the RCU pattern. // The traditional Enter/Exit lock interface would require thread local storage or some other scheme to reclaim the cookie. public LockCookie Enter() @@ -50,11 +57,12 @@ public LockCookie Enter() // // If other thread started stealing the ownership, we need to take slow path. // - // Volatile works here, but it is too big of a hammer because of it will result into memory barrier on ARM that - // we do not need here. We really just need to make sure that the compiler won't reorder the read with the above write. - // RyuJIT won't reorder them today, but more advanced optimizers might. + // Make sure that the compiler won't reorder the read with the above write by wrapping the read in no-inline method. + // RyuJIT won't reorder them today, but more advanced optimizers might. Regular Volatile.Read would be too big of + // a hammer because of it will result into memory barrier on ARM that we do not need here. + // // - if (Volatile.Read(ref _current) == entry) + if (VolatileReadWithoutBarrier(ref _current) == entry) { return entry; }