From 386e64d55c3027acaaaf8959c493d51af1691db5 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Mon, 4 May 2020 16:21:47 +0200 Subject: [PATCH 01/14] use struct wrapper for better perf --- .../Net/Sockets/SocketAsyncContext.Unix.cs | 12 ++++++ .../Net/Sockets/SocketAsyncEngine.Unix.cs | 37 ++++++++++--------- 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncContext.Unix.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncContext.Unix.cs index a268bfd6557ea4..da9928cbc85fee 100644 --- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncContext.Unix.cs +++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncContext.Unix.cs @@ -2067,4 +2067,16 @@ public static void OutputTrace(string s) public static string IdOf(object o) => o == null ? "(null)" : $"{o.GetType().Name}#{o.GetHashCode():X2}"; } + + // struct wrapper is used in order to improve the performance of the epoll thread hot path by up to 3% of some TechEmpower benchmarks + // the goal is to have a dedicated generic instantiation and using: + // System.Collections.Concurrent.ConcurrentDictionary`2[System.IntPtr,System.Net.Sockets.SocketAsyncContextWrapper]::TryGetValueInternal(!0,int32,!1&) + // instead of: + // System.Collections.Concurrent.ConcurrentDictionary`2[System.IntPtr,System.__Canon]::TryGetValueInternal(!0,int32,!1&) + internal readonly struct SocketAsyncContextWrapper + { + public SocketAsyncContextWrapper(SocketAsyncContext context) => Context = context; + + internal SocketAsyncContext Context { get; } + } } diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs index 26a8be4bf128a0..d2de4d1e41d13a 100644 --- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs +++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs @@ -129,7 +129,7 @@ public bool TryRegister(SafeSocketHandle socket, out Interop.Error error) // // Maps handle values to SocketAsyncContext instances. // - private readonly ConcurrentDictionary _handleToContextMap = new ConcurrentDictionary(); + private readonly ConcurrentDictionary _handleToContextMap = new ConcurrentDictionary(); // // Queue of events generated by EventLoop() that would be processed by the thread pool @@ -208,7 +208,7 @@ private IntPtr AllocateHandle(SocketAsyncContext context) Debug.Assert(!IsFull, "Expected !IsFull"); IntPtr handle = _nextHandle; - _handleToContextMap.TryAdd(handle, context); + _handleToContextMap.TryAdd(handle, new SocketAsyncContextWrapper(context)); _nextHandle = IntPtr.Add(_nextHandle, 1); _outstandingHandles = IntPtr.Add(_outstandingHandles, 1); @@ -318,7 +318,7 @@ private void EventLoop() { bool shutdown = false; Interop.Sys.SocketEvent* buffer = _buffer; - ConcurrentDictionary handleToContextMap = _handleToContextMap; + ConcurrentDictionary handleToContextMap = _handleToContextMap; ConcurrentQueue eventQueue = _eventQueue; while (!shutdown) { @@ -343,27 +343,30 @@ private void EventLoop() else { Debug.Assert(handle.ToInt64() < MaxHandles.ToInt64(), $"Unexpected values: handle={handle}, MaxHandles={MaxHandles}"); - handleToContextMap.TryGetValue(handle, out SocketAsyncContext? context); - if (context != null) + if (handleToContextMap.TryGetValue(handle, out SocketAsyncContextWrapper contextWrapper)) { - Interop.Sys.SocketEvents events = buffer[i].Events; - events = context.HandleSyncEventsSpeculatively(events); - if (events != Interop.Sys.SocketEvents.None) + SocketAsyncContext? context = contextWrapper.Context; + if (context != null) { - var ev = new SocketIOEvent(context, events); - eventQueue.Enqueue(ev); - enqueuedEvent = true; + Interop.Sys.SocketEvents events = buffer[i].Events; + events = context.HandleSyncEventsSpeculatively(events); + if (events != Interop.Sys.SocketEvents.None) + { + var ev = new SocketIOEvent(context, events); + eventQueue.Enqueue(ev); + enqueuedEvent = true; + + // This is necessary when the JIT generates unoptimized code (debug builds, live debugging, + // quick JIT, etc.) to ensure that the context does not remain referenced by this method, as + // such code may keep the stack location live for longer than necessary + ev = default; + } // This is necessary when the JIT generates unoptimized code (debug builds, live debugging, // quick JIT, etc.) to ensure that the context does not remain referenced by this method, as // such code may keep the stack location live for longer than necessary - ev = default; + context = null; } - - // This is necessary when the JIT generates unoptimized code (debug builds, live debugging, - // quick JIT, etc.) to ensure that the context does not remain referenced by this method, as - // such code may keep the stack location live for longer than necessary - context = null; } } } From dc6b496bdf8d8397fe7663ff021d808ccdd26281 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Mon, 4 May 2020 16:28:27 +0200 Subject: [PATCH 02/14] check the most common case first don't access static variable in a loop --- .../Net/Sockets/SocketAsyncEngine.Unix.cs | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs index d2de4d1e41d13a..c8dbb4d0be66cc 100644 --- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs +++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs @@ -320,6 +320,7 @@ private void EventLoop() Interop.Sys.SocketEvent* buffer = _buffer; ConcurrentDictionary handleToContextMap = _handleToContextMap; ConcurrentQueue eventQueue = _eventQueue; + IntPtr shutdownHandle = ShutdownHandle; while (!shutdown) { int numEvents = EventBufferCount; @@ -336,39 +337,38 @@ private void EventLoop() for (int i = 0; i < numEvents; i++) { IntPtr handle = buffer[i].Data; - if (handle == ShutdownHandle) - { - shutdown = true; - } - else + + if (handleToContextMap.TryGetValue(handle, out SocketAsyncContextWrapper contextWrapper)) { Debug.Assert(handle.ToInt64() < MaxHandles.ToInt64(), $"Unexpected values: handle={handle}, MaxHandles={MaxHandles}"); - if (handleToContextMap.TryGetValue(handle, out SocketAsyncContextWrapper contextWrapper)) + + SocketAsyncContext? context = contextWrapper.Context; + if (context != null) { - SocketAsyncContext? context = contextWrapper.Context; - if (context != null) + Interop.Sys.SocketEvents events = buffer[i].Events; + events = context.HandleSyncEventsSpeculatively(events); + if (events != Interop.Sys.SocketEvents.None) { - Interop.Sys.SocketEvents events = buffer[i].Events; - events = context.HandleSyncEventsSpeculatively(events); - if (events != Interop.Sys.SocketEvents.None) - { - var ev = new SocketIOEvent(context, events); - eventQueue.Enqueue(ev); - enqueuedEvent = true; - - // This is necessary when the JIT generates unoptimized code (debug builds, live debugging, - // quick JIT, etc.) to ensure that the context does not remain referenced by this method, as - // such code may keep the stack location live for longer than necessary - ev = default; - } + var ev = new SocketIOEvent(context, events); + eventQueue.Enqueue(ev); + enqueuedEvent = true; // This is necessary when the JIT generates unoptimized code (debug builds, live debugging, // quick JIT, etc.) to ensure that the context does not remain referenced by this method, as // such code may keep the stack location live for longer than necessary - context = null; + ev = default; } + + // This is necessary when the JIT generates unoptimized code (debug builds, live debugging, + // quick JIT, etc.) to ensure that the context does not remain referenced by this method, as + // such code may keep the stack location live for longer than necessary + context = null; } } + else if (handle == shutdownHandle) + { + shutdown = true; + } } if (enqueuedEvent) From 02092b71e1cb91b77ae85c8fbb6bb1fc9d56d061 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Mon, 4 May 2020 16:34:26 +0200 Subject: [PATCH 03/14] simplify the source code --- .../Net/Sockets/SocketAsyncEngine.Unix.cs | 28 ++++++++----------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs index c8dbb4d0be66cc..01319ceb04a5fd 100644 --- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs +++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs @@ -321,6 +321,7 @@ private void EventLoop() ConcurrentDictionary handleToContextMap = _handleToContextMap; ConcurrentQueue eventQueue = _eventQueue; IntPtr shutdownHandle = ShutdownHandle; + SocketAsyncContext? context = null; while (!shutdown) { int numEvents = EventBufferCount; @@ -338,32 +339,25 @@ private void EventLoop() { IntPtr handle = buffer[i].Data; - if (handleToContextMap.TryGetValue(handle, out SocketAsyncContextWrapper contextWrapper)) + if (handleToContextMap.TryGetValue(handle, out SocketAsyncContextWrapper contextWrapper) && (context = contextWrapper.Context) != null) { Debug.Assert(handle.ToInt64() < MaxHandles.ToInt64(), $"Unexpected values: handle={handle}, MaxHandles={MaxHandles}"); - SocketAsyncContext? context = contextWrapper.Context; - if (context != null) + Interop.Sys.SocketEvents events = buffer[i].Events; + events = context.HandleSyncEventsSpeculatively(events); + if (events != Interop.Sys.SocketEvents.None) { - Interop.Sys.SocketEvents events = buffer[i].Events; - events = context.HandleSyncEventsSpeculatively(events); - if (events != Interop.Sys.SocketEvents.None) - { - var ev = new SocketIOEvent(context, events); - eventQueue.Enqueue(ev); - enqueuedEvent = true; - - // This is necessary when the JIT generates unoptimized code (debug builds, live debugging, - // quick JIT, etc.) to ensure that the context does not remain referenced by this method, as - // such code may keep the stack location live for longer than necessary - ev = default; - } + var ev = new SocketIOEvent(context, events); + eventQueue.Enqueue(ev); + enqueuedEvent = true; // This is necessary when the JIT generates unoptimized code (debug builds, live debugging, // quick JIT, etc.) to ensure that the context does not remain referenced by this method, as // such code may keep the stack location live for longer than necessary - context = null; + ev = default; } + + context = null; } else if (handle == shutdownHandle) { From fb1ea27922491fe59da43bc3557ed5532cfd4cd3 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Mon, 4 May 2020 16:41:19 +0200 Subject: [PATCH 04/14] use Span instead of raw pointers --- .../src/System/Net/Sockets/SocketAsyncEngine.Unix.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs index 01319ceb04a5fd..1dd75c406c53e3 100644 --- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs +++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs @@ -335,15 +335,15 @@ private void EventLoop() Debug.Assert(numEvents > 0, $"Unexpected numEvents: {numEvents}"); bool enqueuedEvent = false; - for (int i = 0; i < numEvents; i++) + foreach (var socketEvent in new ReadOnlySpan(buffer, numEvents)) { - IntPtr handle = buffer[i].Data; + IntPtr handle = socketEvent.Data; if (handleToContextMap.TryGetValue(handle, out SocketAsyncContextWrapper contextWrapper) && (context = contextWrapper.Context) != null) { Debug.Assert(handle.ToInt64() < MaxHandles.ToInt64(), $"Unexpected values: handle={handle}, MaxHandles={MaxHandles}"); - Interop.Sys.SocketEvents events = buffer[i].Events; + Interop.Sys.SocketEvents events = socketEvent.Events; events = context.HandleSyncEventsSpeculatively(events); if (events != Interop.Sys.SocketEvents.None) { From c8c3fba07b51018a289f2f747abe17496abf6952 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Mon, 4 May 2020 17:00:39 +0200 Subject: [PATCH 05/14] force inline HandleSyncEventsSpeculatively --- .../src/System/Net/Sockets/SocketAsyncContext.Unix.cs | 3 ++- .../src/System/Net/Sockets/SocketAsyncEngine.Unix.cs | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncContext.Unix.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncContext.Unix.cs index da9928cbc85fee..aa1f5f462cd147 100644 --- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncContext.Unix.cs +++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncContext.Unix.cs @@ -1986,7 +1986,8 @@ public SocketError SendFileAsync(SafeFileHandle fileHandle, long offset, long co // be scheduled instead. It's not functionally incorrect to schedule the release of a synchronous operation, just it may // lead to thread pool starvation issues if the synchronous operations are blocking thread pool threads (typically not // advised) and more threads are not immediately available to run work items that would release those operations. - public unsafe Interop.Sys.SocketEvents HandleSyncEventsSpeculatively(Interop.Sys.SocketEvents events) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Interop.Sys.SocketEvents HandleSyncEventsSpeculatively(Interop.Sys.SocketEvents events) { if ((events & Interop.Sys.SocketEvents.Error) != 0) { diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs index 1dd75c406c53e3..94c46dc071cc56 100644 --- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs +++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs @@ -343,8 +343,7 @@ private void EventLoop() { Debug.Assert(handle.ToInt64() < MaxHandles.ToInt64(), $"Unexpected values: handle={handle}, MaxHandles={MaxHandles}"); - Interop.Sys.SocketEvents events = socketEvent.Events; - events = context.HandleSyncEventsSpeculatively(events); + Interop.Sys.SocketEvents events = context.HandleSyncEventsSpeculatively(socketEvent.Events); if (events != Interop.Sys.SocketEvents.None) { var ev = new SocketIOEvent(context, events); From 3dd7784e85d147ca25086bb5a822182aa2c5dbaa Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Tue, 5 May 2020 16:41:23 +0200 Subject: [PATCH 06/14] use a Dictionary under a lock instead of ConcurrentDictionary, it improves the perf for ARM and for scenarios with MANY clients --- .../Net/Sockets/SocketAsyncContext.Unix.cs | 4 +- .../Net/Sockets/SocketAsyncEngine.Unix.cs | 69 +++++++++++-------- 2 files changed, 42 insertions(+), 31 deletions(-) diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncContext.Unix.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncContext.Unix.cs index aa1f5f462cd147..e03892beed350f 100644 --- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncContext.Unix.cs +++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncContext.Unix.cs @@ -2071,9 +2071,9 @@ public static void OutputTrace(string s) // struct wrapper is used in order to improve the performance of the epoll thread hot path by up to 3% of some TechEmpower benchmarks // the goal is to have a dedicated generic instantiation and using: - // System.Collections.Concurrent.ConcurrentDictionary`2[System.IntPtr,System.Net.Sockets.SocketAsyncContextWrapper]::TryGetValueInternal(!0,int32,!1&) + // System.Collections.Dictionary`2[System.IntPtr,System.Net.Sockets.SocketAsyncContextWrapper]::TryGetValueInternal(!0,int32,!1&) // instead of: - // System.Collections.Concurrent.ConcurrentDictionary`2[System.IntPtr,System.__Canon]::TryGetValueInternal(!0,int32,!1&) + // System.Collections.Dictionary`2[System.IntPtr,System.__Canon]::TryGetValueInternal(!0,int32,!1&) internal readonly struct SocketAsyncContextWrapper { public SocketAsyncContextWrapper(SocketAsyncContext context) => Context = context; diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs index 94c46dc071cc56..99aaa6b95be215 100644 --- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs +++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs @@ -2,13 +2,11 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; using System.Collections.Concurrent; +using System.Collections.Generic; using System.Diagnostics; using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; using System.Threading; -using System.Threading.Tasks; namespace System.Net.Sockets { @@ -127,9 +125,9 @@ public bool TryRegister(SafeSocketHandle socket, out Interop.Error error) private IntPtr _outstandingHandles; // - // Maps handle values to SocketAsyncContext instances. + // Maps handle values to SocketAsyncContext instances. Must be used under a lock! // - private readonly ConcurrentDictionary _handleToContextMap = new ConcurrentDictionary(); + private readonly Dictionary _handleToContextMap = new Dictionary(); // // Queue of events generated by EventLoop() that would be processed by the thread pool @@ -208,7 +206,10 @@ private IntPtr AllocateHandle(SocketAsyncContext context) Debug.Assert(!IsFull, "Expected !IsFull"); IntPtr handle = _nextHandle; - _handleToContextMap.TryAdd(handle, new SocketAsyncContextWrapper(context)); + lock (_handleToContextMap) + { + _handleToContextMap.TryAdd(handle, new SocketAsyncContextWrapper(context)); + } _nextHandle = IntPtr.Add(_nextHandle, 1); _outstandingHandles = IntPtr.Add(_outstandingHandles, 1); @@ -225,7 +226,14 @@ private void FreeHandle(IntPtr handle) lock (s_lock) { - if (_handleToContextMap.TryRemove(handle, out _)) + bool removed = false; + + lock (_handleToContextMap) + { + removed = _handleToContextMap.Remove(handle, out _); + } + + if (removed) { _outstandingHandles = IntPtr.Subtract(_outstandingHandles, 1); Debug.Assert(_outstandingHandles.ToInt64() >= 0, $"Unexpected _outstandingHandles: {_outstandingHandles}"); @@ -318,7 +326,7 @@ private void EventLoop() { bool shutdown = false; Interop.Sys.SocketEvent* buffer = _buffer; - ConcurrentDictionary handleToContextMap = _handleToContextMap; + Dictionary handleToContextMap = _handleToContextMap; ConcurrentQueue eventQueue = _eventQueue; IntPtr shutdownHandle = ShutdownHandle; SocketAsyncContext? context = null; @@ -335,32 +343,35 @@ private void EventLoop() Debug.Assert(numEvents > 0, $"Unexpected numEvents: {numEvents}"); bool enqueuedEvent = false; - foreach (var socketEvent in new ReadOnlySpan(buffer, numEvents)) + lock (_handleToContextMap) { - IntPtr handle = socketEvent.Data; - - if (handleToContextMap.TryGetValue(handle, out SocketAsyncContextWrapper contextWrapper) && (context = contextWrapper.Context) != null) + foreach (var socketEvent in new ReadOnlySpan(buffer, numEvents)) { - Debug.Assert(handle.ToInt64() < MaxHandles.ToInt64(), $"Unexpected values: handle={handle}, MaxHandles={MaxHandles}"); + IntPtr handle = socketEvent.Data; - Interop.Sys.SocketEvents events = context.HandleSyncEventsSpeculatively(socketEvent.Events); - if (events != Interop.Sys.SocketEvents.None) + if (handleToContextMap.TryGetValue(handle, out SocketAsyncContextWrapper contextWrapper) && (context = contextWrapper.Context) != null) { - var ev = new SocketIOEvent(context, events); - eventQueue.Enqueue(ev); - enqueuedEvent = true; - - // This is necessary when the JIT generates unoptimized code (debug builds, live debugging, - // quick JIT, etc.) to ensure that the context does not remain referenced by this method, as - // such code may keep the stack location live for longer than necessary - ev = default; + Debug.Assert(handle.ToInt64() < MaxHandles.ToInt64(), $"Unexpected values: handle={handle}, MaxHandles={MaxHandles}"); + + Interop.Sys.SocketEvents events = context.HandleSyncEventsSpeculatively(socketEvent.Events); + if (events != Interop.Sys.SocketEvents.None) + { + var ev = new SocketIOEvent(context, events); + eventQueue.Enqueue(ev); + enqueuedEvent = true; + + // This is necessary when the JIT generates unoptimized code (debug builds, live debugging, + // quick JIT, etc.) to ensure that the context does not remain referenced by this method, as + // such code may keep the stack location live for longer than necessary + ev = default; + } + + context = null; + } + else if (handle == shutdownHandle) + { + shutdown = true; } - - context = null; - } - else if (handle == shutdownHandle) - { - shutdown = true; } } From b4c61ae4d59c9ee0ec1c55b8e8a30c1abed290ec Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Tue, 5 May 2020 17:24:30 +0200 Subject: [PATCH 07/14] address code review feedback --- .../src/System/Net/Sockets/SocketAsyncEngine.Unix.cs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs index 99aaa6b95be215..9b53de374adf58 100644 --- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs +++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs @@ -208,6 +208,7 @@ private IntPtr AllocateHandle(SocketAsyncContext context) IntPtr handle = _nextHandle; lock (_handleToContextMap) { + Debug.Assert(handle != ShutdownHandle, "ShutdownHandle must not be added to the dictionary"); _handleToContextMap.TryAdd(handle, new SocketAsyncContextWrapper(context)); } @@ -366,6 +367,9 @@ private void EventLoop() ev = default; } + // This is necessary when the JIT generates unoptimized code (debug builds, live debugging, + // quick JIT, etc.) to ensure that the context does not remain referenced by this method, as + // such code may keep the stack location live for longer than necessary context = null; } else if (handle == shutdownHandle) From 240b5c4339c43c2e3a5680b019d5b67b852e7843 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Tue, 5 May 2020 17:30:04 +0200 Subject: [PATCH 08/14] swtich to a single epoll thread by default --- .../Net/Sockets/SocketAsyncEngine.Unix.cs | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs index 9b53de374adf58..e283eb9fec322a 100644 --- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs +++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs @@ -54,16 +54,21 @@ public bool TryRegister(SafeSocketHandle socket, out Interop.Error error) private static readonly object s_lock = new object(); - // In debug builds, force there to be 2 engines. In release builds, use half the number of processors when - // there are at least 6. The lower bound is to avoid using multiple engines on systems which aren't servers. -#pragma warning disable CA1802 // const works for debug, but needs to be static readonly for release - private static readonly int s_engineCount = + private static readonly int s_engineCount = GetEnginesCount(); + + private static int GetEnginesCount() + { + if (uint.TryParse(Environment.GetEnvironmentVariable("SYSTEM_NET_SOCKETS_ENGINE_COUNT"), out uint count)) + { + return (int)count; + } + #if DEBUG - 2; + return 2; // use two engines to make sure that this code path is covered with tests #else - Environment.ProcessorCount >= 6 ? Environment.ProcessorCount / 2 : 1; + return 1; // having a single epoll thread is optimal #endif -#pragma warning restore CA1802 + } // // The current engines. We replace an engine when it runs out of "handle" values. From 3ad2e285dbb595c03fbda0d4b3b921c8b1fb27f5 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Tue, 5 May 2020 18:09:10 +0200 Subject: [PATCH 09/14] code review fixes --- .../src/System/Net/Sockets/SocketAsyncEngine.Unix.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs index e283eb9fec322a..db4ef54a661d9d 100644 --- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs +++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs @@ -58,7 +58,7 @@ public bool TryRegister(SafeSocketHandle socket, out Interop.Error error) private static int GetEnginesCount() { - if (uint.TryParse(Environment.GetEnvironmentVariable("SYSTEM_NET_SOCKETS_ENGINE_COUNT"), out uint count)) + if (uint.TryParse(Environment.GetEnvironmentVariable("DOTNET_SYSTEM_NET_SOCKETS_THREAD_COUNT"), out uint count)) { return (int)count; } @@ -236,7 +236,7 @@ private void FreeHandle(IntPtr handle) lock (_handleToContextMap) { - removed = _handleToContextMap.Remove(handle, out _); + removed = _handleToContextMap.Remove(handle); } if (removed) From 8fa2222e98ffa189a325a5ca5397f97b4d54330b Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Wed, 6 May 2020 16:49:24 +0200 Subject: [PATCH 10/14] Revert "use a Dictionary under a lock instead of ConcurrentDictionary, it improves the perf for ARM and for scenarios with MANY clients" This reverts commit 3dd7784e85d147ca25086bb5a822182aa2c5dbaa. # Conflicts: # src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs --- .../Net/Sockets/SocketAsyncContext.Unix.cs | 4 +- .../Net/Sockets/SocketAsyncEngine.Unix.cs | 69 ++++++++----------- 2 files changed, 31 insertions(+), 42 deletions(-) diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncContext.Unix.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncContext.Unix.cs index e03892beed350f..aa1f5f462cd147 100644 --- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncContext.Unix.cs +++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncContext.Unix.cs @@ -2071,9 +2071,9 @@ public static void OutputTrace(string s) // struct wrapper is used in order to improve the performance of the epoll thread hot path by up to 3% of some TechEmpower benchmarks // the goal is to have a dedicated generic instantiation and using: - // System.Collections.Dictionary`2[System.IntPtr,System.Net.Sockets.SocketAsyncContextWrapper]::TryGetValueInternal(!0,int32,!1&) + // System.Collections.Concurrent.ConcurrentDictionary`2[System.IntPtr,System.Net.Sockets.SocketAsyncContextWrapper]::TryGetValueInternal(!0,int32,!1&) // instead of: - // System.Collections.Dictionary`2[System.IntPtr,System.__Canon]::TryGetValueInternal(!0,int32,!1&) + // System.Collections.Concurrent.ConcurrentDictionary`2[System.IntPtr,System.__Canon]::TryGetValueInternal(!0,int32,!1&) internal readonly struct SocketAsyncContextWrapper { public SocketAsyncContextWrapper(SocketAsyncContext context) => Context = context; diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs index db4ef54a661d9d..841b60607ddcdb 100644 --- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs +++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs @@ -2,11 +2,13 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using System.Collections.Concurrent; -using System.Collections.Generic; using System.Diagnostics; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using System.Threading; +using System.Threading.Tasks; namespace System.Net.Sockets { @@ -130,9 +132,9 @@ private static int GetEnginesCount() private IntPtr _outstandingHandles; // - // Maps handle values to SocketAsyncContext instances. Must be used under a lock! + // Maps handle values to SocketAsyncContext instances. // - private readonly Dictionary _handleToContextMap = new Dictionary(); + private readonly ConcurrentDictionary _handleToContextMap = new ConcurrentDictionary(); // // Queue of events generated by EventLoop() that would be processed by the thread pool @@ -211,11 +213,8 @@ private IntPtr AllocateHandle(SocketAsyncContext context) Debug.Assert(!IsFull, "Expected !IsFull"); IntPtr handle = _nextHandle; - lock (_handleToContextMap) - { - Debug.Assert(handle != ShutdownHandle, "ShutdownHandle must not be added to the dictionary"); - _handleToContextMap.TryAdd(handle, new SocketAsyncContextWrapper(context)); - } + Debug.Assert(handle != ShutdownHandle, "ShutdownHandle must not be added to the dictionary"); + _handleToContextMap.TryAdd(handle, new SocketAsyncContextWrapper(context)); _nextHandle = IntPtr.Add(_nextHandle, 1); _outstandingHandles = IntPtr.Add(_outstandingHandles, 1); @@ -232,14 +231,7 @@ private void FreeHandle(IntPtr handle) lock (s_lock) { - bool removed = false; - - lock (_handleToContextMap) - { - removed = _handleToContextMap.Remove(handle); - } - - if (removed) + if (_handleToContextMap.TryRemove(handle, out _)) { _outstandingHandles = IntPtr.Subtract(_outstandingHandles, 1); Debug.Assert(_outstandingHandles.ToInt64() >= 0, $"Unexpected _outstandingHandles: {_outstandingHandles}"); @@ -332,7 +324,7 @@ private void EventLoop() { bool shutdown = false; Interop.Sys.SocketEvent* buffer = _buffer; - Dictionary handleToContextMap = _handleToContextMap; + ConcurrentDictionary handleToContextMap = _handleToContextMap; ConcurrentQueue eventQueue = _eventQueue; IntPtr shutdownHandle = ShutdownHandle; SocketAsyncContext? context = null; @@ -349,38 +341,35 @@ private void EventLoop() Debug.Assert(numEvents > 0, $"Unexpected numEvents: {numEvents}"); bool enqueuedEvent = false; - lock (_handleToContextMap) + foreach (var socketEvent in new ReadOnlySpan(buffer, numEvents)) { - foreach (var socketEvent in new ReadOnlySpan(buffer, numEvents)) + IntPtr handle = socketEvent.Data; + + if (handleToContextMap.TryGetValue(handle, out SocketAsyncContextWrapper contextWrapper) && (context = contextWrapper.Context) != null) { - IntPtr handle = socketEvent.Data; + Debug.Assert(handle.ToInt64() < MaxHandles.ToInt64(), $"Unexpected values: handle={handle}, MaxHandles={MaxHandles}"); - if (handleToContextMap.TryGetValue(handle, out SocketAsyncContextWrapper contextWrapper) && (context = contextWrapper.Context) != null) + Interop.Sys.SocketEvents events = context.HandleSyncEventsSpeculatively(socketEvent.Events); + if (events != Interop.Sys.SocketEvents.None) { - Debug.Assert(handle.ToInt64() < MaxHandles.ToInt64(), $"Unexpected values: handle={handle}, MaxHandles={MaxHandles}"); - - Interop.Sys.SocketEvents events = context.HandleSyncEventsSpeculatively(socketEvent.Events); - if (events != Interop.Sys.SocketEvents.None) - { - var ev = new SocketIOEvent(context, events); - eventQueue.Enqueue(ev); - enqueuedEvent = true; - - // This is necessary when the JIT generates unoptimized code (debug builds, live debugging, - // quick JIT, etc.) to ensure that the context does not remain referenced by this method, as - // such code may keep the stack location live for longer than necessary - ev = default; - } + var ev = new SocketIOEvent(context, events); + eventQueue.Enqueue(ev); + enqueuedEvent = true; // This is necessary when the JIT generates unoptimized code (debug builds, live debugging, // quick JIT, etc.) to ensure that the context does not remain referenced by this method, as // such code may keep the stack location live for longer than necessary - context = null; - } - else if (handle == shutdownHandle) - { - shutdown = true; + ev = default; } + + // This is necessary when the JIT generates unoptimized code (debug builds, live debugging, + // quick JIT, etc.) to ensure that the context does not remain referenced by this method, as + // such code may keep the stack location live for longer than necessary + context = null; + } + else if (handle == shutdownHandle) + { + shutdown = true; } } From fe10f44b40a54f29eeb4783f4b54b8b1a909e490 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Wed, 6 May 2020 16:52:07 +0200 Subject: [PATCH 11/14] apply code review suggestions --- .../Net/Sockets/SocketAsyncContext.Unix.cs | 12 ------------ .../System/Net/Sockets/SocketAsyncEngine.Unix.cs | 16 +++++++++++++--- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncContext.Unix.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncContext.Unix.cs index aa1f5f462cd147..fd6c66e3454e2f 100644 --- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncContext.Unix.cs +++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncContext.Unix.cs @@ -2068,16 +2068,4 @@ public static void OutputTrace(string s) public static string IdOf(object o) => o == null ? "(null)" : $"{o.GetType().Name}#{o.GetHashCode():X2}"; } - - // struct wrapper is used in order to improve the performance of the epoll thread hot path by up to 3% of some TechEmpower benchmarks - // the goal is to have a dedicated generic instantiation and using: - // System.Collections.Concurrent.ConcurrentDictionary`2[System.IntPtr,System.Net.Sockets.SocketAsyncContextWrapper]::TryGetValueInternal(!0,int32,!1&) - // instead of: - // System.Collections.Concurrent.ConcurrentDictionary`2[System.IntPtr,System.__Canon]::TryGetValueInternal(!0,int32,!1&) - internal readonly struct SocketAsyncContextWrapper - { - public SocketAsyncContextWrapper(SocketAsyncContext context) => Context = context; - - internal SocketAsyncContext Context { get; } - } } diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs index 841b60607ddcdb..acfa602b534405 100644 --- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs +++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs @@ -2,13 +2,10 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; using System.Collections.Concurrent; using System.Diagnostics; using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; using System.Threading; -using System.Threading.Tasks; namespace System.Net.Sockets { @@ -366,6 +363,7 @@ private void EventLoop() // quick JIT, etc.) to ensure that the context does not remain referenced by this method, as // such code may keep the stack location live for longer than necessary context = null; + contextWrapper = default; } else if (handle == shutdownHandle) { @@ -493,6 +491,18 @@ private bool TryRegister(SafeSocketHandle socket, IntPtr handle, out Interop.Err return error == Interop.Error.SUCCESS; } + // struct wrapper is used in order to improve the performance of the epoll thread hot path by up to 3% of some TechEmpower benchmarks + // the goal is to have a dedicated generic instantiation and using: + // System.Collections.Concurrent.ConcurrentDictionary`2[System.IntPtr,System.Net.Sockets.SocketAsyncContextWrapper]::TryGetValueInternal(!0,int32,!1&) + // instead of: + // System.Collections.Concurrent.ConcurrentDictionary`2[System.IntPtr,System.__Canon]::TryGetValueInternal(!0,int32,!1&) + private readonly struct SocketAsyncContextWrapper + { + public SocketAsyncContextWrapper(SocketAsyncContext context) => Context = context; + + internal SocketAsyncContext Context { get; } + } + private readonly struct SocketIOEvent { public SocketAsyncContext Context { get; } From d85744a063c54e048b02d969f9fb846d6486a8d1 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Wed, 6 May 2020 16:57:16 +0200 Subject: [PATCH 12/14] change the heuristic, single epoll thread is not always enough --- .../Net/Sockets/SocketAsyncEngine.Unix.cs | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs index acfa602b534405..6d52a08e13dbba 100644 --- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs +++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs @@ -5,6 +5,7 @@ using System.Collections.Concurrent; using System.Diagnostics; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using System.Threading; namespace System.Net.Sockets @@ -62,11 +63,24 @@ private static int GetEnginesCount() return (int)count; } -#if DEBUG - return 2; // use two engines to make sure that this code path is covered with tests -#else - return 1; // having a single epoll thread is optimal -#endif + // the data that stands behind this heuristic can be found at https://github.com/dotnet/runtime/pull/35800#issuecomment-624719500 + // the goal is to have a single epoll thread per every 28 cores + const int coresPerSingleEpollThread = 28; + int result = Environment.ProcessorCount / coresPerSingleEpollThread; + + // and "round" it up, in a way that 29 cores gets 2 epoll threads + if (Environment.ProcessorCount % coresPerSingleEpollThread != 0) + { + result += 1; + } + + // and double that for ARM where some operations like memory barriers are more expensive and we need more producer threads + if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64 || RuntimeInformation.ProcessArchitecture == Architecture.Arm) + { + result *= 2; + } + + return Math.Min(result, Environment.ProcessorCount / 2); } // From 6b45dda29cffa707bb4eadbd5bfccb026528d71a Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Thu, 7 May 2020 16:27:33 +0200 Subject: [PATCH 13/14] simplify the heuristic and add a comment --- .../Net/Sockets/SocketAsyncEngine.Unix.cs | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs index 6d52a08e13dbba..af17007ba0bd36 100644 --- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs +++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs @@ -58,29 +58,29 @@ public bool TryRegister(SafeSocketHandle socket, out Interop.Error error) private static int GetEnginesCount() { + // The responsibility of SocketAsyncEngine is to get notifications from epoll|kqueue + // and schedule corresponding work items to ThreadPool (socket reads and writes). + // + // Using TechEmpower benchmarks that generate a LOT of SMALL socket reads and writes under a VERY HIGH load + // we have observed that a single engine is capable of keeping busy up to thirty x64 and eight ARM64 CPU Cores. + // + // The vast majority of real-life scenarios is never going to generate such a huge load (hundreds of thousands of requests per second) + // and having a single producer should be almost always enough. + // + // We want to be sure that we can handle extreme loads and that's why we have decided to use these values. + // + // It's impossible to predict all possible scenarios so we have added a possibility to configure this value using environment variables. if (uint.TryParse(Environment.GetEnvironmentVariable("DOTNET_SYSTEM_NET_SOCKETS_THREAD_COUNT"), out uint count)) { return (int)count; } - // the data that stands behind this heuristic can be found at https://github.com/dotnet/runtime/pull/35800#issuecomment-624719500 - // the goal is to have a single epoll thread per every 28 cores - const int coresPerSingleEpollThread = 28; - int result = Environment.ProcessorCount / coresPerSingleEpollThread; - - // and "round" it up, in a way that 29 cores gets 2 epoll threads - if (Environment.ProcessorCount % coresPerSingleEpollThread != 0) - { - result += 1; - } - - // and double that for ARM where some operations like memory barriers are more expensive and we need more producer threads - if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64 || RuntimeInformation.ProcessArchitecture == Architecture.Arm) - { - result *= 2; - } + Architecture architecture = RuntimeInformation.ProcessArchitecture; + int coresPerEngine = architecture == Architecture.Arm64 || architecture == Architecture.Arm + ? 8 + : 30; - return Math.Min(result, Environment.ProcessorCount / 2); + return Math.Max(1, (int)Math.Round(Environment.ProcessorCount / (double)coresPerEngine)); } // From f79e0b68c93a4b793f733f718b1529802e96ffc5 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Thu, 7 May 2020 17:05:59 +0200 Subject: [PATCH 14/14] apply the naming suggestions --- .../src/System/Net/Sockets/SocketAsyncEngine.Unix.cs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs index af17007ba0bd36..badf27e85d358b 100644 --- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs +++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs @@ -54,9 +54,9 @@ public bool TryRegister(SafeSocketHandle socket, out Interop.Error error) private static readonly object s_lock = new object(); - private static readonly int s_engineCount = GetEnginesCount(); + private static readonly int s_maxEngineCount = GetEngineCount(); - private static int GetEnginesCount() + private static int GetEngineCount() { // The responsibility of SocketAsyncEngine is to get notifications from epoll|kqueue // and schedule corresponding work items to ThreadPool (socket reads and writes). @@ -87,7 +87,7 @@ private static int GetEnginesCount() // The current engines. We replace an engine when it runs out of "handle" values. // Must be accessed under s_lock. // - private static readonly SocketAsyncEngine?[] s_currentEngines = new SocketAsyncEngine?[s_engineCount]; + private static readonly SocketAsyncEngine?[] s_currentEngines = new SocketAsyncEngine?[s_maxEngineCount]; private static int s_allocateFromEngine = 0; private readonly IntPtr _port; @@ -122,7 +122,7 @@ private static int GetEnginesCount() // private static readonly IntPtr MaxHandles = IntPtr.Size == 4 ? (IntPtr)int.MaxValue : (IntPtr)long.MaxValue; #endif - private static readonly IntPtr MinHandlesForAdditionalEngine = s_engineCount == 1 ? MaxHandles : (IntPtr)32; + private static readonly IntPtr MinHandlesForAdditionalEngine = s_maxEngineCount == 1 ? MaxHandles : (IntPtr)32; // // Sentinel handle value to identify events from the "shutdown pipe," used to signal an event loop to stop @@ -213,7 +213,7 @@ private static void AllocateToken(SocketAsyncContext context, out SocketAsyncEng // Round-robin to the next engine once we have sufficient sockets on this one. if (!engine.HasLowNumberOfSockets) { - s_allocateFromEngine = (s_allocateFromEngine + 1) % s_engineCount; + s_allocateFromEngine = (s_allocateFromEngine + 1) % s_maxEngineCount; } } }