From 09bbc3ca5f467aa24c7f0162ffcc003c1dcb3f90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Foidl?= Date: Mon, 28 Jun 2021 14:27:36 +0200 Subject: [PATCH 1/3] Used VectorXYZ.Create for constants in Base64 --- .../src/System/Buffers/Text/Base64.cs | 8 -- .../src/System/Buffers/Text/Base64Decoder.cs | 90 +++++++++++++++++-- .../src/System/Buffers/Text/Base64Encoder.cs | 60 +++++++++++-- 3 files changed, 139 insertions(+), 19 deletions(-) diff --git a/src/libraries/System.Memory/src/System/Buffers/Text/Base64.cs b/src/libraries/System.Memory/src/System/Buffers/Text/Base64.cs index 60d1558a50f50c..7506cbe1eb0140 100644 --- a/src/libraries/System.Memory/src/System/Buffers/Text/Base64.cs +++ b/src/libraries/System.Memory/src/System/Buffers/Text/Base64.cs @@ -2,20 +2,12 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; using Internal.Runtime.CompilerServices; namespace System.Buffers.Text { public static partial class Base64 { - private static TVector ReadVector(ReadOnlySpan data) - { - ref sbyte tmp = ref MemoryMarshal.GetReference(data); - return Unsafe.As(ref tmp); - } - [Conditional("DEBUG")] private static unsafe void AssertRead(byte* src, byte* srcStart, int srcLength) { diff --git a/src/libraries/System.Memory/src/System/Buffers/Text/Base64Decoder.cs b/src/libraries/System.Memory/src/System/Buffers/Text/Base64Decoder.cs index b6c639781c2c2b..b2b0b13a6f43ca 100644 --- a/src/libraries/System.Memory/src/System/Buffers/Text/Base64Decoder.cs +++ b/src/libraries/System.Memory/src/System/Buffers/Text/Base64Decoder.cs @@ -100,7 +100,7 @@ public static unsafe OperationStatus DecodeFromUtf8(ReadOnlySpan utf8, Spa maxSrcLength = (destLength / 3) * 4; } - ref sbyte decodingMap = ref MemoryMarshal.GetReference(s_decodingMap); + ref sbyte decodingMap = ref MemoryMarshal.GetReference(DecodingMap); srcMax = srcBytes + (uint)maxSrcLength; while (src < srcMax) @@ -275,7 +275,7 @@ public static unsafe OperationStatus DecodeFromUtf8InPlace(Span buffer, ou if (bufferLength == 0) goto DoneExit; - ref sbyte decodingMap = ref MemoryMarshal.GetReference(s_decodingMap); + ref sbyte decodingMap = ref MemoryMarshal.GetReference(DecodingMap); while (sourceIndex < bufferLength - 4) { @@ -362,14 +362,64 @@ private static unsafe void Avx2Decode(ref byte* srcBytes, ref byte* destBytes, b // See SSSE3-version below for an explanation of how the code works. // The JIT won't hoist these "constants", so help it +#if NET6_0_OR_GREATER + Vector256 lutHi = Vector256.Create( + 0x10, 0x10, 0x01, 0x02, + 0x04, 0x08, 0x04, 0x08, + 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x01, 0x02, + 0x04, 0x08, 0x04, 0x08, + 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10); + + Vector256 lutLo = Vector256.Create( + 0x15, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x13, 0x1A, + 0x1B, 0x1B, 0x1B, 0x1A, + 0x15, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x13, 0x1A, + 0x1B, 0x1B, 0x1B, 0x1A); + + Vector256 lutShift = Vector256.Create( + 0, 16, 19, 4, + -65, -65, -71, -71, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 16, 19, 4, + -65, -65, -71, -71, + 0, 0, 0, 0, + 0, 0, 0, 0); + Vector256 packBytesInLaneMask = Vector256.Create( + 2, 1, 0, 6, + 5, 4, 10, 9, + 8, 14, 13, 12, + -1, -1, -1, -1, + 2, 1, 0, 6, + 5, 4, 10, 9, + 8, 14, 13, 12, + -1, -1, -1, -1); + Vector256 packLanesControl = Vector256.Create( + 0, 0, 0, 0, + 1, 0, 0, 0, + 2, 0, 0, 0, + 4, 0, 0, 0, + 5, 0, 0, 0, + 6, 0, 0, 0, + -1, -1, -1, -1, + -1, -1, -1, -1).AsInt32(); +#else Vector256 lutHi = ReadVector>(s_avxDecodeLutHi); Vector256 lutLo = ReadVector>(s_avxDecodeLutLo); Vector256 lutShift = ReadVector>(s_avxDecodeLutShift); + Vector256 packBytesInLaneMask = ReadVector>(s_avxDecodePackBytesInLaneMask); + Vector256 packLanesControl = ReadVector>(s_avxDecodePackLanesControl).AsInt32(); +#endif Vector256 mask2F = Vector256.Create((sbyte)'/'); Vector256 mergeConstant0 = Vector256.Create(0x01400140).AsSByte(); Vector256 mergeConstant1 = Vector256.Create(0x00011000).AsInt16(); - Vector256 packBytesInLaneMask = ReadVector>(s_avxDecodePackBytesInLaneMask); - Vector256 packLanesControl = ReadVector>(s_avxDecodePackLanesControl).AsInt32(); byte* src = srcBytes; byte* dest = destBytes; @@ -508,13 +558,39 @@ private static unsafe void Ssse3Decode(ref byte* srcBytes, ref byte* destBytes, // 1111 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 // The JIT won't hoist these "constants", so help it +#if NET6_0_OR_GREATER + Vector128 lutHi = Vector128.Create( + 0x10, 0x10, 0x01, 0x02, + 0x04, 0x08, 0x04, 0x08, + 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10); + + Vector128 lutLo = Vector128.Create( + 0x15, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x13, 0x1A, + 0x1B, 0x1B, 0x1B, 0x1A); + + Vector128 lutShift = Vector128.Create( + 0, 16, 19, 4, + -65, -65, -71, -71, + 0, 0, 0, 0, + 0, 0, 0, 0); + + Vector128 packBytesMask = Vector128.Create( + 2, 1, 0, 6, + 5, 4, 10, 9, + 8, 14, 13, 12, + -1, -1, -1, -1); +#else Vector128 lutHi = ReadVector>(s_sseDecodeLutHi); Vector128 lutLo = ReadVector>(s_sseDecodeLutLo); Vector128 lutShift = ReadVector>(s_sseDecodeLutShift); + Vector128 packBytesMask = ReadVector>(s_sseDecodePackBytesMask); +#endif Vector128 mask2F = Vector128.Create((sbyte)'/'); Vector128 mergeConstant0 = Vector128.Create(0x01400140).AsSByte(); Vector128 mergeConstant1 = Vector128.Create(0x00011000).AsInt16(); - Vector128 packBytesMask = ReadVector>(s_sseDecodePackBytesMask); Vector128 zero = Vector128.Zero; byte* src = srcBytes; @@ -613,7 +689,7 @@ private static unsafe void WriteThreeLowOrderBytes(byte* destination, int value) } // Pre-computing this table using a custom string(s_characters) and GenerateDecodingMapAndVerify (found in tests) - private static ReadOnlySpan s_decodingMap => new sbyte[] { + private static ReadOnlySpan DecodingMap => new sbyte[] { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, //62 is placed at index 43 (for +), 63 at index 47 (for /) @@ -632,6 +708,7 @@ private static unsafe void WriteThreeLowOrderBytes(byte* destination, int value) -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, }; +#if !NET6_0_OR_GREATER private static ReadOnlySpan s_sseDecodePackBytesMask => new sbyte[] { 2, 1, 0, 6, 5, 4, 10, 9, @@ -714,5 +791,6 @@ private static unsafe void WriteThreeLowOrderBytes(byte* destination, int value) 0, 0, 0, 0, 0, 0, 0, 0 }; +#endif } } diff --git a/src/libraries/System.Memory/src/System/Buffers/Text/Base64Encoder.cs b/src/libraries/System.Memory/src/System/Buffers/Text/Base64Encoder.cs index 0ee99479128bc4..86cce73b3bf5be 100644 --- a/src/libraries/System.Memory/src/System/Buffers/Text/Base64Encoder.cs +++ b/src/libraries/System.Memory/src/System/Buffers/Text/Base64Encoder.cs @@ -85,7 +85,7 @@ public static unsafe OperationStatus EncodeToUtf8(ReadOnlySpan bytes, Span } } - ref byte encodingMap = ref MemoryMarshal.GetReference(s_encodingMap); + ref byte encodingMap = ref MemoryMarshal.GetReference(EncodingMap); uint result = 0; srcMax -= 2; @@ -189,7 +189,7 @@ public static unsafe OperationStatus EncodeToUtf8InPlace(Span buffer, int uint destinationIndex = (uint)(encodedLength - 4); uint sourceIndex = (uint)(dataLength - leftover); uint result = 0; - ref byte encodingMap = ref MemoryMarshal.GetReference(s_encodingMap); + ref byte encodingMap = ref MemoryMarshal.GetReference(EncodingMap); // encode last pack to avoid conditional in the main loop if (leftover != 0) @@ -241,14 +241,36 @@ private static unsafe void Avx2Encode(ref byte* srcBytes, ref byte* destBytes, b // l k j i h g f e d c b a 0 0 0 0 // The JIT won't hoist these "constants", so help it +#if NET6_0_OR_GREATER + Vector256 shuffleVec = Vector256.Create( + 5, 4, 6, 5, + 8, 7, 9, 8, + 11, 10, 12, 11, + 14, 13, 15, 14, + 1, 0, 2, 1, + 4, 3, 5, 4, + 7, 6, 8, 7, + 10, 9, 11, 10); + + Vector256 lut = Vector256.Create( + 65, 71, -4, -4, + -4, -4, -4, -4, + -4, -4, -4, -4, + -19, -16, 0, 0, + 65, 71, -4, -4, + -4, -4, -4, -4, + -4, -4, -4, -4, + -19, -16, 0, 0); +#else Vector256 shuffleVec = ReadVector>(s_avxEncodeShuffleVec); + Vector256 lut = ReadVector>(s_avxEncodeLut); +#endif Vector256 maskAC = Vector256.Create(0x0fc0fc00).AsSByte(); Vector256 maskBB = Vector256.Create(0x003f03f0).AsSByte(); Vector256 shiftAC = Vector256.Create(0x04000040).AsUInt16(); Vector256 shiftBB = Vector256.Create(0x01000010).AsInt16(); Vector256 const51 = Vector256.Create((byte)51); Vector256 const25 = Vector256.Create((sbyte)25); - Vector256 lut = ReadVector>(s_avxEncodeLut); byte* src = srcBytes; byte* dest = destBytes; @@ -258,7 +280,19 @@ private static unsafe void Avx2Encode(ref byte* srcBytes, ref byte* destBytes, b Vector256 str = Avx.LoadVector256(src).AsSByte(); // shift by 4 bytes, as required by Reshuffle +#if NET6_0_OR_GREATER + str = Avx2.PermuteVar8x32(str.AsInt32(), Vector256.Create( + 0, 0, 0, 0, + 0, 0, 0, 0, + 1, 0, 0, 0, + 2, 0, 0, 0, + 3, 0, 0, 0, + 4, 0, 0, 0, + 5, 0, 0, 0, + 6, 0, 0, 0).AsInt32()).AsSByte(); +#else str = Avx2.PermuteVar8x32(str.AsInt32(), ReadVector>(s_avxEncodePermuteVec).AsInt32()).AsSByte(); +#endif // Next loads are done at src-4, as required by Reshuffle, so shift it once src -= 4; @@ -380,14 +414,28 @@ private static unsafe void Ssse3Encode(ref byte* srcBytes, ref byte* destBytes, // 0 0 0 0 l k j i h g f e d c b a // The JIT won't hoist these "constants", so help it +#if NET6_0_OR_GREATER + Vector128 shuffleVec = Vector128.Create( + 1, 0, 2, 1, + 4, 3, 5, 4, + 7, 6, 8, 7, + 10, 9, 11, 10); + + Vector128 lut = Vector128.Create( + 65, 71, -4, -4, + -4, -4, -4, -4, + -4, -4, -4, -4, + -19, -16, 0, 0); +#else Vector128 shuffleVec = ReadVector>(s_sseEncodeShuffleVec); + Vector128 lut = ReadVector>(s_sseEncodeLut); +#endif Vector128 maskAC = Vector128.Create(0x0fc0fc00).AsSByte(); Vector128 maskBB = Vector128.Create(0x003f03f0).AsSByte(); Vector128 shiftAC = Vector128.Create(0x04000040).AsUInt16(); Vector128 shiftBB = Vector128.Create(0x01000010).AsInt16(); Vector128 const51 = Vector128.Create((byte)51); Vector128 const25 = Vector128.Create((sbyte)25); - Vector128 lut = ReadVector>(s_sseEncodeLut); byte* src = srcBytes; byte* dest = destBytes; @@ -543,7 +591,7 @@ private static unsafe uint EncodeAndPadTwo(byte* oneByte, ref byte encodingMap) private const int MaximumEncodeLength = (int.MaxValue / 4) * 3; // 1610612733 // Pre-computing this table using a custom string(s_characters) and GenerateEncodingMapAndVerify (found in tests) - private static ReadOnlySpan s_encodingMap => new byte[] { + private static ReadOnlySpan EncodingMap => new byte[] { 65, 66, 67, 68, 69, 70, 71, 72, //A..H 73, 74, 75, 76, 77, 78, 79, 80, //I..P 81, 82, 83, 84, 85, 86, 87, 88, //Q..X @@ -554,6 +602,7 @@ private static unsafe uint EncodeAndPadTwo(byte* oneByte, ref byte encodingMap) 52, 53, 54, 55, 56, 57, 43, 47 //4..9, +, / }; +#if !NET6_0_OR_GREATER private static ReadOnlySpan s_sseEncodeShuffleVec => new sbyte[] { 1, 0, 2, 1, 4, 3, 5, 4, @@ -600,5 +649,6 @@ private static unsafe uint EncodeAndPadTwo(byte* oneByte, ref byte encodingMap) -4, -4, -4, -4, -19, -16, 0, 0 }; +#endif } } From eb8fded4f395201f85691fac4e27bcb4d3c84c26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Foidl?= Date: Mon, 28 Jun 2021 15:11:29 +0200 Subject: [PATCH 2/3] Used VectorXYZ.Create for constants in BitArray --- .../src/System/Collections/BitArray.cs | 58 ++++++++++++++++--- 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/src/libraries/System.Collections/src/System/Collections/BitArray.cs b/src/libraries/System.Collections/src/System/Collections/BitArray.cs index 1fddd453b92009..a58ced4e1413db 100644 --- a/src/libraries/System.Collections/src/System/Collections/BitArray.cs +++ b/src/libraries/System.Collections/src/System/Collections/BitArray.cs @@ -120,9 +120,11 @@ public BitArray(byte[] bytes) _version = 0; } +#if !NET6_0_OR_GREATER private static readonly Vector128 s_bitMask128 = BitConverter.IsLittleEndian ? Vector128.Create(0x80402010_08040201).AsByte() : Vector128.Create(0x01020408_10204080).AsByte(); +#endif private const uint Vector128ByteCount = 16; private const uint Vector128IntCount = 4; @@ -190,6 +192,15 @@ public unsafe BitArray(bool[] values) // However comparison against zero can be replaced to cmeq against zero (vceqzq_s8) // See dotnet/runtime#33972 for details Vector128 zero = Vector128.Zero; + +#if NET6_0_OR_GREATER + Vector128 bitMask128 = BitConverter.IsLittleEndian ? + Vector128.Create(0x80402010_08040201).AsByte() : + Vector128.Create(0x01020408_10204080).AsByte(); +#else + Vector128 bitMask128 = s_bitMask128; +#endif + fixed (bool* ptr = values) { for (; (i + Vector128ByteCount * 2u) <= (uint)values.Length; i += Vector128ByteCount * 2u) @@ -199,7 +210,7 @@ public unsafe BitArray(bool[] values) // and combine by ORing all of them together (In this case, adding all of them does the same thing) Vector128 lowerVector = AdvSimd.LoadVector128((byte*)ptr + i); Vector128 lowerIsFalse = AdvSimd.CompareEqual(lowerVector, zero); - Vector128 bitsExtracted1 = AdvSimd.And(lowerIsFalse, s_bitMask128); + Vector128 bitsExtracted1 = AdvSimd.And(lowerIsFalse, bitMask128); bitsExtracted1 = AdvSimd.Arm64.AddPairwise(bitsExtracted1, bitsExtracted1); bitsExtracted1 = AdvSimd.Arm64.AddPairwise(bitsExtracted1, bitsExtracted1); bitsExtracted1 = AdvSimd.Arm64.AddPairwise(bitsExtracted1, bitsExtracted1); @@ -207,7 +218,7 @@ public unsafe BitArray(bool[] values) Vector128 upperVector = AdvSimd.LoadVector128((byte*)ptr + i + Vector128.Count); Vector128 upperIsFalse = AdvSimd.CompareEqual(upperVector, zero); - Vector128 bitsExtracted2 = AdvSimd.And(upperIsFalse, s_bitMask128); + Vector128 bitsExtracted2 = AdvSimd.And(upperIsFalse, bitMask128); bitsExtracted2 = AdvSimd.Arm64.AddPairwise(bitsExtracted2, bitsExtracted2); bitsExtracted2 = AdvSimd.Arm64.AddPairwise(bitsExtracted2, bitsExtracted2); bitsExtracted2 = AdvSimd.Arm64.AddPairwise(bitsExtracted2, bitsExtracted2); @@ -857,11 +868,13 @@ public int Length } } +#if !NET6_0_OR_GREATER // The mask used when shuffling a single int into Vector128/256. // On little endian machines, the lower 8 bits of int belong in the first byte, next lower 8 in the second and so on. // We place the bytes that contain the bits to its respective byte so that we can mask out only the relevant bits later. private static readonly Vector128 s_lowerShuffleMask_CopyToBoolArray = Vector128.Create(0, 0x01010101_01010101).AsByte(); private static readonly Vector128 s_upperShuffleMask_CopyToBoolArray = Vector128.Create(0x02020202_02020202, 0x03030303_03030303).AsByte(); +#endif public unsafe void CopyTo(Array array, int index) { @@ -953,9 +966,20 @@ public unsafe void CopyTo(Array array, int index) if (m_length < BitsPerInt32) goto LessThan32; +#if NET6_0_OR_GREATER + // The mask used when shuffling a single int into Vector128/256. + // On little endian machines, the lower 8 bits of int belong in the first byte, next lower 8 in the second and so on. + // We place the bytes that contain the bits to its respective byte so that we can mask out only the relevant bits later. + Vector128 lowerShuffleMask_CopyToBoolArray = Vector128.Create(0, 0x01010101_01010101).AsByte(); + Vector128 upperShuffleMask_CopyToBoolArray = Vector128.Create(0x02020202_02020202, 0x03030303_03030303).AsByte(); +#else + Vector128 lowerShuffleMask_CopyToBoolArray = s_lowerShuffleMask_CopyToBoolArray; + Vector128 upperShuffleMask_CopyToBoolArray = s_upperShuffleMask_CopyToBoolArray; +#endif + if (Avx2.IsSupported) { - Vector256 shuffleMask = Vector256.Create(s_lowerShuffleMask_CopyToBoolArray, s_upperShuffleMask_CopyToBoolArray); + Vector256 shuffleMask = Vector256.Create(lowerShuffleMask_CopyToBoolArray, upperShuffleMask_CopyToBoolArray); Vector256 bitMask = Vector256.Create(0x80402010_08040201).AsByte(); Vector256 ones = Vector256.Create((byte)1); @@ -977,9 +1001,16 @@ public unsafe void CopyTo(Array array, int index) } else if (Ssse3.IsSupported) { - Vector128 lowerShuffleMask = s_lowerShuffleMask_CopyToBoolArray; - Vector128 upperShuffleMask = s_upperShuffleMask_CopyToBoolArray; + Vector128 lowerShuffleMask = lowerShuffleMask_CopyToBoolArray; + Vector128 upperShuffleMask = upperShuffleMask_CopyToBoolArray; Vector128 ones = Vector128.Create((byte)1); +#if NET6_0_OR_GREATER + Vector128 bitMask128 = BitConverter.IsLittleEndian ? + Vector128.Create(0x80402010_08040201).AsByte() : + Vector128.Create(0x01020408_10204080).AsByte(); +#else + Vector128 bitMask128 = s_bitMask128; +#endif fixed (bool* destination = &boolArray[index]) { @@ -989,12 +1020,12 @@ public unsafe void CopyTo(Array array, int index) Vector128 scalar = Vector128.CreateScalarUnsafe(bits); Vector128 shuffledLower = Ssse3.Shuffle(scalar.AsByte(), lowerShuffleMask); - Vector128 extractedLower = Sse2.And(shuffledLower, s_bitMask128); + Vector128 extractedLower = Sse2.And(shuffledLower, bitMask128); Vector128 normalizedLower = Sse2.Min(extractedLower, ones); Sse2.Store((byte*)destination + i, normalizedLower); Vector128 shuffledHigher = Ssse3.Shuffle(scalar.AsByte(), upperShuffleMask); - Vector128 extractedHigher = Sse2.And(shuffledHigher, s_bitMask128); + Vector128 extractedHigher = Sse2.And(shuffledHigher, bitMask128); Vector128 normalizedHigher = Sse2.Min(extractedHigher, ones); Sse2.Store((byte*)destination + i + Vector128.Count, normalizedHigher); } @@ -1003,6 +1034,15 @@ public unsafe void CopyTo(Array array, int index) else if (AdvSimd.IsSupported) { Vector128 ones = Vector128.Create((byte)1); + +#if NET6_0_OR_GREATER + Vector128 bitMask128 = BitConverter.IsLittleEndian ? + Vector128.Create(0x80402010_08040201).AsByte() : + Vector128.Create(0x01020408_10204080).AsByte(); +#else + Vector128 bitMask128 = s_bitMask128; +#endif + fixed (bool* destination = &boolArray[index]) { for (; (i + Vector128ByteCount * 2u) <= (uint)m_length; i += Vector128ByteCount * 2u) @@ -1028,12 +1068,12 @@ public unsafe void CopyTo(Array array, int index) vector = AdvSimd.Arm64.ZipLow(vector, vector); Vector128 shuffledLower = AdvSimd.Arm64.ZipLow(vector, vector); - Vector128 extractedLower = AdvSimd.And(shuffledLower, s_bitMask128); + Vector128 extractedLower = AdvSimd.And(shuffledLower, bitMask128); Vector128 normalizedLower = AdvSimd.Min(extractedLower, ones); AdvSimd.Store((byte*)destination + i, normalizedLower); Vector128 shuffledHigher = AdvSimd.Arm64.ZipHigh(vector, vector); - Vector128 extractedHigher = AdvSimd.And(shuffledHigher, s_bitMask128); + Vector128 extractedHigher = AdvSimd.And(shuffledHigher, bitMask128); Vector128 normalizedHigher = AdvSimd.Min(extractedHigher, ones); AdvSimd.Store((byte*)destination + i + Vector128.Count, normalizedHigher); } From fa0dbdcc21a8f5dd519bf99d801429f53f0238ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Foidl?= Date: Mon, 28 Jun 2021 17:42:47 +0200 Subject: [PATCH 3/3] Remove conditional compilation It's only built for NetCoreAppCurrent, so no need to special case older runtimes. --- .../src/System/Collections/BitArray.cs | 33 ------ .../src/System/Buffers/Text/Base64Decoder.cs | 104 +----------------- .../src/System/Buffers/Text/Base64Encoder.cs | 65 +---------- 3 files changed, 6 insertions(+), 196 deletions(-) diff --git a/src/libraries/System.Collections/src/System/Collections/BitArray.cs b/src/libraries/System.Collections/src/System/Collections/BitArray.cs index a58ced4e1413db..9b312acc5ac3da 100644 --- a/src/libraries/System.Collections/src/System/Collections/BitArray.cs +++ b/src/libraries/System.Collections/src/System/Collections/BitArray.cs @@ -120,12 +120,6 @@ public BitArray(byte[] bytes) _version = 0; } -#if !NET6_0_OR_GREATER - private static readonly Vector128 s_bitMask128 = BitConverter.IsLittleEndian ? - Vector128.Create(0x80402010_08040201).AsByte() : - Vector128.Create(0x01020408_10204080).AsByte(); -#endif - private const uint Vector128ByteCount = 16; private const uint Vector128IntCount = 4; private const uint Vector256ByteCount = 32; @@ -192,14 +186,9 @@ public unsafe BitArray(bool[] values) // However comparison against zero can be replaced to cmeq against zero (vceqzq_s8) // See dotnet/runtime#33972 for details Vector128 zero = Vector128.Zero; - -#if NET6_0_OR_GREATER Vector128 bitMask128 = BitConverter.IsLittleEndian ? Vector128.Create(0x80402010_08040201).AsByte() : Vector128.Create(0x01020408_10204080).AsByte(); -#else - Vector128 bitMask128 = s_bitMask128; -#endif fixed (bool* ptr = values) { @@ -868,14 +857,6 @@ public int Length } } -#if !NET6_0_OR_GREATER - // The mask used when shuffling a single int into Vector128/256. - // On little endian machines, the lower 8 bits of int belong in the first byte, next lower 8 in the second and so on. - // We place the bytes that contain the bits to its respective byte so that we can mask out only the relevant bits later. - private static readonly Vector128 s_lowerShuffleMask_CopyToBoolArray = Vector128.Create(0, 0x01010101_01010101).AsByte(); - private static readonly Vector128 s_upperShuffleMask_CopyToBoolArray = Vector128.Create(0x02020202_02020202, 0x03030303_03030303).AsByte(); -#endif - public unsafe void CopyTo(Array array, int index) { if (array == null) @@ -966,16 +947,11 @@ public unsafe void CopyTo(Array array, int index) if (m_length < BitsPerInt32) goto LessThan32; -#if NET6_0_OR_GREATER // The mask used when shuffling a single int into Vector128/256. // On little endian machines, the lower 8 bits of int belong in the first byte, next lower 8 in the second and so on. // We place the bytes that contain the bits to its respective byte so that we can mask out only the relevant bits later. Vector128 lowerShuffleMask_CopyToBoolArray = Vector128.Create(0, 0x01010101_01010101).AsByte(); Vector128 upperShuffleMask_CopyToBoolArray = Vector128.Create(0x02020202_02020202, 0x03030303_03030303).AsByte(); -#else - Vector128 lowerShuffleMask_CopyToBoolArray = s_lowerShuffleMask_CopyToBoolArray; - Vector128 upperShuffleMask_CopyToBoolArray = s_upperShuffleMask_CopyToBoolArray; -#endif if (Avx2.IsSupported) { @@ -1004,13 +980,9 @@ public unsafe void CopyTo(Array array, int index) Vector128 lowerShuffleMask = lowerShuffleMask_CopyToBoolArray; Vector128 upperShuffleMask = upperShuffleMask_CopyToBoolArray; Vector128 ones = Vector128.Create((byte)1); -#if NET6_0_OR_GREATER Vector128 bitMask128 = BitConverter.IsLittleEndian ? Vector128.Create(0x80402010_08040201).AsByte() : Vector128.Create(0x01020408_10204080).AsByte(); -#else - Vector128 bitMask128 = s_bitMask128; -#endif fixed (bool* destination = &boolArray[index]) { @@ -1034,14 +1006,9 @@ public unsafe void CopyTo(Array array, int index) else if (AdvSimd.IsSupported) { Vector128 ones = Vector128.Create((byte)1); - -#if NET6_0_OR_GREATER Vector128 bitMask128 = BitConverter.IsLittleEndian ? Vector128.Create(0x80402010_08040201).AsByte() : Vector128.Create(0x01020408_10204080).AsByte(); -#else - Vector128 bitMask128 = s_bitMask128; -#endif fixed (bool* destination = &boolArray[index]) { diff --git a/src/libraries/System.Memory/src/System/Buffers/Text/Base64Decoder.cs b/src/libraries/System.Memory/src/System/Buffers/Text/Base64Decoder.cs index b2b0b13a6f43ca..e71b3b7f2d2fd4 100644 --- a/src/libraries/System.Memory/src/System/Buffers/Text/Base64Decoder.cs +++ b/src/libraries/System.Memory/src/System/Buffers/Text/Base64Decoder.cs @@ -362,7 +362,6 @@ private static unsafe void Avx2Decode(ref byte* srcBytes, ref byte* destBytes, b // See SSSE3-version below for an explanation of how the code works. // The JIT won't hoist these "constants", so help it -#if NET6_0_OR_GREATER Vector256 lutHi = Vector256.Create( 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, @@ -392,6 +391,7 @@ private static unsafe void Avx2Decode(ref byte* srcBytes, ref byte* destBytes, b -65, -65, -71, -71, 0, 0, 0, 0, 0, 0, 0, 0); + Vector256 packBytesInLaneMask = Vector256.Create( 2, 1, 0, 6, 5, 4, 10, 9, @@ -401,6 +401,7 @@ private static unsafe void Avx2Decode(ref byte* srcBytes, ref byte* destBytes, b 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1); + Vector256 packLanesControl = Vector256.Create( 0, 0, 0, 0, 1, 0, 0, 0, @@ -410,13 +411,7 @@ private static unsafe void Avx2Decode(ref byte* srcBytes, ref byte* destBytes, b 6, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1).AsInt32(); -#else - Vector256 lutHi = ReadVector>(s_avxDecodeLutHi); - Vector256 lutLo = ReadVector>(s_avxDecodeLutLo); - Vector256 lutShift = ReadVector>(s_avxDecodeLutShift); - Vector256 packBytesInLaneMask = ReadVector>(s_avxDecodePackBytesInLaneMask); - Vector256 packLanesControl = ReadVector>(s_avxDecodePackLanesControl).AsInt32(); -#endif + Vector256 mask2F = Vector256.Create((sbyte)'/'); Vector256 mergeConstant0 = Vector256.Create(0x01400140).AsSByte(); Vector256 mergeConstant1 = Vector256.Create(0x00011000).AsInt16(); @@ -558,7 +553,6 @@ private static unsafe void Ssse3Decode(ref byte* srcBytes, ref byte* destBytes, // 1111 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 // The JIT won't hoist these "constants", so help it -#if NET6_0_OR_GREATER Vector128 lutHi = Vector128.Create( 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, @@ -582,12 +576,7 @@ private static unsafe void Ssse3Decode(ref byte* srcBytes, ref byte* destBytes, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1); -#else - Vector128 lutHi = ReadVector>(s_sseDecodeLutHi); - Vector128 lutLo = ReadVector>(s_sseDecodeLutLo); - Vector128 lutShift = ReadVector>(s_sseDecodeLutShift); - Vector128 packBytesMask = ReadVector>(s_sseDecodePackBytesMask); -#endif + Vector128 mask2F = Vector128.Create((sbyte)'/'); Vector128 mergeConstant0 = Vector128.Create(0x01400140).AsSByte(); Vector128 mergeConstant1 = Vector128.Create(0x00011000).AsInt16(); @@ -707,90 +696,5 @@ private static unsafe void WriteThreeLowOrderBytes(byte* destination, int value) -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, }; - -#if !NET6_0_OR_GREATER - private static ReadOnlySpan s_sseDecodePackBytesMask => new sbyte[] { - 2, 1, 0, 6, - 5, 4, 10, 9, - 8, 14, 13, 12, - -1, -1, -1, -1 - }; - - private static ReadOnlySpan s_sseDecodeLutLo => new sbyte[] { - 0x15, 0x11, 0x11, 0x11, - 0x11, 0x11, 0x11, 0x11, - 0x11, 0x11, 0x13, 0x1A, - 0x1B, 0x1B, 0x1B, 0x1A - }; - - private static ReadOnlySpan s_sseDecodeLutHi => new sbyte[] { - 0x10, 0x10, 0x01, 0x02, - 0x04, 0x08, 0x04, 0x08, - 0x10, 0x10, 0x10, 0x10, - 0x10, 0x10, 0x10, 0x10 - }; - - private static ReadOnlySpan s_sseDecodeLutShift => new sbyte[] { - 0, 16, 19, 4, - -65, -65, -71, -71, - 0, 0, 0, 0, - 0, 0, 0, 0 - }; - - private static ReadOnlySpan s_avxDecodePackBytesInLaneMask => new sbyte[] { - 2, 1, 0, 6, - 5, 4, 10, 9, - 8, 14, 13, 12, - -1, -1, -1, -1, - 2, 1, 0, 6, - 5, 4, 10, 9, - 8, 14, 13, 12, - -1, -1, -1, -1 - }; - - private static ReadOnlySpan s_avxDecodePackLanesControl => new sbyte[] { - 0, 0, 0, 0, - 1, 0, 0, 0, - 2, 0, 0, 0, - 4, 0, 0, 0, - 5, 0, 0, 0, - 6, 0, 0, 0, - -1, -1, -1, -1, - -1, -1, -1, -1 - }; - - private static ReadOnlySpan s_avxDecodeLutLo => new sbyte[] { - 0x15, 0x11, 0x11, 0x11, - 0x11, 0x11, 0x11, 0x11, - 0x11, 0x11, 0x13, 0x1A, - 0x1B, 0x1B, 0x1B, 0x1A, - 0x15, 0x11, 0x11, 0x11, - 0x11, 0x11, 0x11, 0x11, - 0x11, 0x11, 0x13, 0x1A, - 0x1B, 0x1B, 0x1B, 0x1A - }; - - private static ReadOnlySpan s_avxDecodeLutHi => new sbyte[] { - 0x10, 0x10, 0x01, 0x02, - 0x04, 0x08, 0x04, 0x08, - 0x10, 0x10, 0x10, 0x10, - 0x10, 0x10, 0x10, 0x10, - 0x10, 0x10, 0x01, 0x02, - 0x04, 0x08, 0x04, 0x08, - 0x10, 0x10, 0x10, 0x10, - 0x10, 0x10, 0x10, 0x10 - }; - - private static ReadOnlySpan s_avxDecodeLutShift => new sbyte[] { - 0, 16, 19, 4, - -65, -65, -71, -71, - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 16, 19, 4, - -65, -65, -71, -71, - 0, 0, 0, 0, - 0, 0, 0, 0 - }; -#endif } } diff --git a/src/libraries/System.Memory/src/System/Buffers/Text/Base64Encoder.cs b/src/libraries/System.Memory/src/System/Buffers/Text/Base64Encoder.cs index 86cce73b3bf5be..99add7b72e8a9a 100644 --- a/src/libraries/System.Memory/src/System/Buffers/Text/Base64Encoder.cs +++ b/src/libraries/System.Memory/src/System/Buffers/Text/Base64Encoder.cs @@ -241,7 +241,6 @@ private static unsafe void Avx2Encode(ref byte* srcBytes, ref byte* destBytes, b // l k j i h g f e d c b a 0 0 0 0 // The JIT won't hoist these "constants", so help it -#if NET6_0_OR_GREATER Vector256 shuffleVec = Vector256.Create( 5, 4, 6, 5, 8, 7, 9, 8, @@ -261,10 +260,7 @@ private static unsafe void Avx2Encode(ref byte* srcBytes, ref byte* destBytes, b -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0); -#else - Vector256 shuffleVec = ReadVector>(s_avxEncodeShuffleVec); - Vector256 lut = ReadVector>(s_avxEncodeLut); -#endif + Vector256 maskAC = Vector256.Create(0x0fc0fc00).AsSByte(); Vector256 maskBB = Vector256.Create(0x003f03f0).AsSByte(); Vector256 shiftAC = Vector256.Create(0x04000040).AsUInt16(); @@ -280,7 +276,6 @@ private static unsafe void Avx2Encode(ref byte* srcBytes, ref byte* destBytes, b Vector256 str = Avx.LoadVector256(src).AsSByte(); // shift by 4 bytes, as required by Reshuffle -#if NET6_0_OR_GREATER str = Avx2.PermuteVar8x32(str.AsInt32(), Vector256.Create( 0, 0, 0, 0, 0, 0, 0, 0, @@ -290,9 +285,6 @@ private static unsafe void Avx2Encode(ref byte* srcBytes, ref byte* destBytes, b 4, 0, 0, 0, 5, 0, 0, 0, 6, 0, 0, 0).AsInt32()).AsSByte(); -#else - str = Avx2.PermuteVar8x32(str.AsInt32(), ReadVector>(s_avxEncodePermuteVec).AsInt32()).AsSByte(); -#endif // Next loads are done at src-4, as required by Reshuffle, so shift it once src -= 4; @@ -414,7 +406,6 @@ private static unsafe void Ssse3Encode(ref byte* srcBytes, ref byte* destBytes, // 0 0 0 0 l k j i h g f e d c b a // The JIT won't hoist these "constants", so help it -#if NET6_0_OR_GREATER Vector128 shuffleVec = Vector128.Create( 1, 0, 2, 1, 4, 3, 5, 4, @@ -426,10 +417,7 @@ private static unsafe void Ssse3Encode(ref byte* srcBytes, ref byte* destBytes, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0); -#else - Vector128 shuffleVec = ReadVector>(s_sseEncodeShuffleVec); - Vector128 lut = ReadVector>(s_sseEncodeLut); -#endif + Vector128 maskAC = Vector128.Create(0x0fc0fc00).AsSByte(); Vector128 maskBB = Vector128.Create(0x003f03f0).AsSByte(); Vector128 shiftAC = Vector128.Create(0x04000040).AsUInt16(); @@ -601,54 +589,5 @@ private static unsafe uint EncodeAndPadTwo(byte* oneByte, ref byte encodingMap) 119, 120, 121, 122, 48, 49, 50, 51, //w..z, 0..3 52, 53, 54, 55, 56, 57, 43, 47 //4..9, +, / }; - -#if !NET6_0_OR_GREATER - private static ReadOnlySpan s_sseEncodeShuffleVec => new sbyte[] { - 1, 0, 2, 1, - 4, 3, 5, 4, - 7, 6, 8, 7, - 10, 9, 11, 10 - }; - - private static ReadOnlySpan s_sseEncodeLut => new sbyte[] { - 65, 71, -4, -4, - -4, -4, -4, -4, - -4, -4, -4, -4, - -19, -16, 0, 0 - }; - - private static ReadOnlySpan s_avxEncodePermuteVec => new sbyte[] { - 0, 0, 0, 0, - 0, 0, 0, 0, - 1, 0, 0, 0, - 2, 0, 0, 0, - 3, 0, 0, 0, - 4, 0, 0, 0, - 5, 0, 0, 0, - 6, 0, 0, 0 - }; - - private static ReadOnlySpan s_avxEncodeShuffleVec => new sbyte[] { - 5, 4, 6, 5, - 8, 7, 9, 8, - 11, 10, 12, 11, - 14, 13, 15, 14, - 1, 0, 2, 1, - 4, 3, 5, 4, - 7, 6, 8, 7, - 10, 9, 11, 10 - }; - - private static ReadOnlySpan s_avxEncodeLut => new sbyte[] { - 65, 71, -4, -4, - -4, -4, -4, -4, - -4, -4, -4, -4, - -19, -16, 0, 0, - 65, 71, -4, -4, - -4, -4, -4, -4, - -4, -4, -4, -4, - -19, -16, 0, 0 - }; -#endif } }