From 4924cf55ad63de4038506e30e4b0e3aa4d34c9ec Mon Sep 17 00:00:00 2001 From: Ben Adams Date: Sat, 1 Oct 2016 05:15:50 +0100 Subject: [PATCH 1/2] Use De Bruijn sequence for FindFirstEqualByte --- .../Infrastructure/MemoryPoolIterator.cs | 108 ++++++++++++++---- .../MemoryPoolIteratorExtensions.cs | 32 +++--- .../Internal/KestrelEngine.cs | 8 ++ 3 files changed, 113 insertions(+), 35 deletions(-) diff --git a/src/Microsoft.AspNetCore.Server.Kestrel/Internal/Infrastructure/MemoryPoolIterator.cs b/src/Microsoft.AspNetCore.Server.Kestrel/Internal/Infrastructure/MemoryPoolIterator.cs index ccc54cf52..f0ed7ecdf 100644 --- a/src/Microsoft.AspNetCore.Server.Kestrel/Internal/Infrastructure/MemoryPoolIterator.cs +++ b/src/Microsoft.AspNetCore.Server.Kestrel/Internal/Infrastructure/MemoryPoolIterator.cs @@ -4,13 +4,42 @@ using System; using System.Diagnostics; using System.Numerics; +using System.Runtime.CompilerServices; using System.Threading; namespace Microsoft.AspNetCore.Server.Kestrel.Internal.Infrastructure { public struct MemoryPoolIterator { + // When this variable is set in KestrelEngine.JitReadonlyConsts it will + // deterministically convert the following readonly static values to jitted consts + // and embed the init code in that start up called function. + // + // This means when they are next used the have been pre-evaluated and the jit can + // either directly embed them or use them for branch elimiation. + internal static bool StaticReadonlysJitted; + + // De Bruijn sequence https://github.com/aspnet/KestrelHttpServer/issues/1129 + const ulong DEBRUIJN_SEQ64 = 0x03f79d71b4cb0a89; + + // De Bruijn sequence table used for verification + private static readonly byte[] _debruijn64Xor = + { + 0, 5, 0, 7, 6, 3, 0, 7, + 7, 6, 5, 4, 3, 2, 0, 7, + 6, 7, 4, 6, 6, 5, 2, 5, + 4, 4, 3, 2, 2, 1, 0, 7, + 5, 6, 3, 7, 5, 4, 1, 6, + 4, 6, 2, 5, 3, 2, 1, 5, + 3, 4, 1, 4, 2, 3, 1, 3, + 1, 2, 1, 1, 0, 0, 0, 7, + }; + + // Convert these returns to jitted consts + private static readonly bool IsHardwareAccelerated = Vector.IsHardwareAccelerated; + private static readonly bool IsLittleEndian = BitConverter.IsLittleEndian; private static readonly int _vectorSpan = Vector.Count; + private static readonly int _vectorUlongSpan = Vector.Count; private MemoryPoolBlock _block; private int _index; @@ -122,7 +151,9 @@ public void Skip(int bytesToSkip) { if (wasLastBlock) { - throw new InvalidOperationException("Attempted to skip more bytes than available."); + // Removed throw code to reduce inline size + // https://github.com/dotnet/coreclr/pull/6103 + ThrowInvalidOperationException_SkippedMoreThanAvailable(); } else { @@ -262,7 +293,7 @@ public unsafe int Seek( // Need unit tests to test Vector path #if !DEBUG // Check will be Jitted away https://github.com/dotnet/coreclr/issues/1079 - if (Vector.IsHardwareAccelerated) + if (IsHardwareAccelerated) { #endif if (following >= _vectorSpan) @@ -370,7 +401,7 @@ public unsafe int Seek( // Need unit tests to test Vector path #if !DEBUG // Check will be Jitted away https://github.com/dotnet/coreclr/issues/1079 - if (Vector.IsHardwareAccelerated) + if (IsHardwareAccelerated) { #endif if (following >= _vectorSpan) @@ -483,7 +514,7 @@ public unsafe int Seek( // Need unit tests to test Vector path #if !DEBUG // Check will be Jitted away https://github.com/dotnet/coreclr/issues/1079 - if (Vector.IsHardwareAccelerated) + if (IsHardwareAccelerated) { #endif if (following >= _vectorSpan) @@ -626,7 +657,7 @@ public unsafe int Seek( // Need unit tests to test Vector path #if !DEBUG // Check will be Jitted away https://github.com/dotnet/coreclr/issues/1079 - if (Vector.IsHardwareAccelerated) + if (IsHardwareAccelerated) { #endif if (following >= _vectorSpan) @@ -748,27 +779,43 @@ public unsafe int Seek( /// /// The first index of the result vector /// byteEquals = 0 + // Force inlining (70 IL bytes, 113 bytes asm w/ inlines) Issue: https://github.com/dotnet/coreclr/issues/7386 + [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int FindFirstEqualByte(ref Vector byteEquals) { - if (!BitConverter.IsLittleEndian) return FindFirstEqualByteSlow(ref byteEquals); - - // Quasi-tree search - var vector64 = Vector.AsVectorInt64(byteEquals); - for (var i = 0; i < Vector.Count; i++) + // Jitted const eliminates branch + if (IsLittleEndian) { - var longValue = vector64[i]; - if (longValue == 0) continue; + var vector64 = Vector.AsVectorUInt64(byteEquals); + ulong longValue = 0; + var i = 0; + // Should only be called when byte in Vector. Since range check + // can't be elminated, make loop one larger to throw if not found + // rather than doing the throw ourselves + for (; i < _vectorUlongSpan + 1; i++) + { + longValue = vector64[i]; + if (longValue != 0) + { + break; + } + } - return (i << 3) + - ((longValue & 0x00000000ffffffff) > 0 - ? (longValue & 0x000000000000ffff) > 0 - ? (longValue & 0x00000000000000ff) > 0 ? 0 : 1 - : (longValue & 0x0000000000ff0000) > 0 ? 2 : 3 - : (longValue & 0x0000ffff00000000) > 0 - ? (longValue & 0x000000ff00000000) > 0 ? 4 : 5 - : (longValue & 0x00ff000000000000) > 0 ? 6 : 7); + // Single LEA instruction with jitted const (using function result) + return i * 8 + DeBruijnFindByteXor(longValue); } - throw new InvalidOperationException(); + else + { + return FindFirstEqualByteSlow(ref byteEquals); + } + } + + // Force inlining (29 IL bytes) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int DeBruijnFindByteXor(ulong v) + { + var index = (((v ^ (v - 1)) * DEBRUIJN_SEQ64) >> 58); + return _debruijn64Xor[(int)index]; } // Internal for testing @@ -856,7 +903,9 @@ public int GetLength(MemoryPoolIterator end) } else if (block.Next == null) { - throw new InvalidOperationException("end did not follow iterator"); + // Removed throw code to reduce inline size + // https://github.com/dotnet/coreclr/pull/6103 + ThrowInvalidOperationException_EndDidNotFollow(); } else { @@ -1041,5 +1090,20 @@ public unsafe void CopyFromAscii(string data) _block = block; _index = blockIndex; } + + private static void ThrowInvalidOperationException_EndDidNotFollow() + { + // Removed throw code to reduce inline size + // https://github.com/dotnet/coreclr/pull/6103 + throw new InvalidOperationException("end did not follow iterator"); + } + + private static void ThrowInvalidOperationException_SkippedMoreThanAvailable() + { + // Removed throw code to reduce inline size + // https://github.com/dotnet/coreclr/pull/6103 + throw new InvalidOperationException("Attempted to skip more bytes than available."); + } + } } diff --git a/src/Microsoft.AspNetCore.Server.Kestrel/Internal/Infrastructure/MemoryPoolIteratorExtensions.cs b/src/Microsoft.AspNetCore.Server.Kestrel/Internal/Infrastructure/MemoryPoolIteratorExtensions.cs index 59978837f..a5e3749f5 100644 --- a/src/Microsoft.AspNetCore.Server.Kestrel/Internal/Infrastructure/MemoryPoolIteratorExtensions.cs +++ b/src/Microsoft.AspNetCore.Server.Kestrel/Internal/Infrastructure/MemoryPoolIteratorExtensions.cs @@ -16,6 +16,14 @@ public static class MemoryPoolIteratorExtensions public const string Http10Version = "HTTP/1.0"; public const string Http11Version = "HTTP/1.1"; + // When this variable is set in KestrelEngine.JitReadonlyConsts it will + // deterministically convert the following readonly static values to jitted consts + // and embed the init code in that start up called function. + // + // This means when they are next used the have been pre-evaluated and the jit can + // either directly embed them or use them for branch elimiation. + internal static bool StaticReadonlysJitted; + // readonly primitive statics can be Jit'd to consts https://github.com/dotnet/coreclr/issues/1079 private readonly static long _httpConnectMethodLong = GetAsciiStringAsLong("CONNECT "); private readonly static long _httpDeleteMethodLong = GetAsciiStringAsLong("DELETE \0"); @@ -36,19 +44,17 @@ public static class MemoryPoolIteratorExtensions private readonly static long _mask5Chars = GetMaskAsLong(new byte[] { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00 }); private readonly static long _mask4Chars = GetMaskAsLong(new byte[] { 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }); - private readonly static Tuple[] _knownMethods = new Tuple[8]; - - static MemoryPoolIteratorExtensions() - { - _knownMethods[0] = Tuple.Create(_mask4Chars, _httpPutMethodLong, HttpMethods.Put); - _knownMethods[1] = Tuple.Create(_mask5Chars, _httpPostMethodLong, HttpMethods.Post); - _knownMethods[2] = Tuple.Create(_mask5Chars, _httpHeadMethodLong, HttpMethods.Head); - _knownMethods[3] = Tuple.Create(_mask6Chars, _httpTraceMethodLong, HttpMethods.Trace); - _knownMethods[4] = Tuple.Create(_mask6Chars, _httpPatchMethodLong, HttpMethods.Patch); - _knownMethods[5] = Tuple.Create(_mask7Chars, _httpDeleteMethodLong, HttpMethods.Delete); - _knownMethods[6] = Tuple.Create(_mask8Chars, _httpConnectMethodLong, HttpMethods.Connect); - _knownMethods[7] = Tuple.Create(_mask8Chars, _httpOptionsMethodLong, HttpMethods.Options); - } + private readonly static Tuple[] _knownMethods = new Tuple[] + { + Tuple.Create(_mask4Chars, _httpPutMethodLong, HttpMethods.Put), + Tuple.Create(_mask5Chars, _httpPostMethodLong, HttpMethods.Post), + Tuple.Create(_mask5Chars, _httpHeadMethodLong, HttpMethods.Head), + Tuple.Create(_mask6Chars, _httpTraceMethodLong, HttpMethods.Trace), + Tuple.Create(_mask6Chars, _httpPatchMethodLong, HttpMethods.Patch), + Tuple.Create(_mask7Chars, _httpDeleteMethodLong, HttpMethods.Delete), + Tuple.Create(_mask8Chars, _httpConnectMethodLong, HttpMethods.Connect), + Tuple.Create(_mask8Chars, _httpOptionsMethodLong, HttpMethods.Options) + }; private unsafe static long GetAsciiStringAsLong(string str) { diff --git a/src/Microsoft.AspNetCore.Server.Kestrel/Internal/KestrelEngine.cs b/src/Microsoft.AspNetCore.Server.Kestrel/Internal/KestrelEngine.cs index 1f7c4c7a3..9b80ed88d 100644 --- a/src/Microsoft.AspNetCore.Server.Kestrel/Internal/KestrelEngine.cs +++ b/src/Microsoft.AspNetCore.Server.Kestrel/Internal/KestrelEngine.cs @@ -38,6 +38,8 @@ internal KestrelEngine(Libuv uv, ServiceContext context) public void Start(int count) { + JitReadonlyConsts(); + for (var index = 0; index < count; index++) { Threads.Add(new KestrelThread(this)); @@ -49,6 +51,12 @@ public void Start(int count) } } + public static void JitReadonlyConsts() + { + MemoryPoolIterator.StaticReadonlysJitted = true; + MemoryPoolIteratorExtensions.StaticReadonlysJitted = true; + } + public void Dispose() { Task.WaitAll(Threads.Select(thread => thread.StopAsync(TimeSpan.FromSeconds(2.5))).ToArray()); From 311309c0449d8c9f3e61f4c3000c9c9408d7fe6e Mon Sep 17 00:00:00 2001 From: Ben Adams Date: Mon, 3 Oct 2016 07:27:09 +0100 Subject: [PATCH 2/2] Use BensMagicNumber to find byte --- .../Infrastructure/MemoryPoolIterator.cs | 32 ++++++------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/src/Microsoft.AspNetCore.Server.Kestrel/Internal/Infrastructure/MemoryPoolIterator.cs b/src/Microsoft.AspNetCore.Server.Kestrel/Internal/Infrastructure/MemoryPoolIterator.cs index f0ed7ecdf..204fdb6dc 100644 --- a/src/Microsoft.AspNetCore.Server.Kestrel/Internal/Infrastructure/MemoryPoolIterator.cs +++ b/src/Microsoft.AspNetCore.Server.Kestrel/Internal/Infrastructure/MemoryPoolIterator.cs @@ -19,21 +19,8 @@ public struct MemoryPoolIterator // either directly embed them or use them for branch elimiation. internal static bool StaticReadonlysJitted; - // De Bruijn sequence https://github.com/aspnet/KestrelHttpServer/issues/1129 - const ulong DEBRUIJN_SEQ64 = 0x03f79d71b4cb0a89; - - // De Bruijn sequence table used for verification - private static readonly byte[] _debruijn64Xor = - { - 0, 5, 0, 7, 6, 3, 0, 7, - 7, 6, 5, 4, 3, 2, 0, 7, - 6, 7, 4, 6, 6, 5, 2, 5, - 4, 4, 3, 2, 2, 1, 0, 7, - 5, 6, 3, 7, 5, 4, 1, 6, - 4, 6, 2, 5, 3, 2, 1, 5, - 3, 4, 1, 4, 2, 3, 1, 3, - 1, 2, 1, 1, 0, 0, 0, 7, - }; + // Ben Adam's Magic Number for finding set bytes + private const ulong BENS_MAGIC_NUMBER = 0x81018202830380; // Convert these returns to jitted consts private static readonly bool IsHardwareAccelerated = Vector.IsHardwareAccelerated; @@ -339,7 +326,6 @@ public unsafe int Seek( #if !DEBUG } #endif - var pCurrent = (block.DataFixedPtr + index); var pEnd = pCurrent + Math.Min(following, limit - bytesScanned); do @@ -779,8 +765,8 @@ public unsafe int Seek( /// /// The first index of the result vector /// byteEquals = 0 - // Force inlining (70 IL bytes, 113 bytes asm w/ inlines) Issue: https://github.com/dotnet/coreclr/issues/7386 - [MethodImpl(MethodImplOptions.AggressiveInlining)] + // Force inlining (70 IL bytes, 98 bytes asm w/ inlines) Issue: https://github.com/dotnet/coreclr/issues/7386 + //[MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int FindFirstEqualByte(ref Vector byteEquals) { // Jitted const eliminates branch @@ -802,7 +788,7 @@ internal static int FindFirstEqualByte(ref Vector byteEquals) } // Single LEA instruction with jitted const (using function result) - return i * 8 + DeBruijnFindByteXor(longValue); + return i * 8 + BensMagicNumberFindByte(longValue); } else { @@ -810,12 +796,12 @@ internal static int FindFirstEqualByte(ref Vector byteEquals) } } - // Force inlining (29 IL bytes) + // Force inlining (23 IL bytes) [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int DeBruijnFindByteXor(ulong v) + private static int BensMagicNumberFindByte(ulong ulongValue) { - var index = (((v ^ (v - 1)) * DEBRUIJN_SEQ64) >> 58); - return _debruijn64Xor[(int)index]; + var flag = (ulongValue & ((ulong)-(long)ulongValue)) >> 8; + return (int) ((flag * BENS_MAGIC_NUMBER) >> 55) & 7; } // Internal for testing