From cde8b1277114fde343f654be5c42e85b808f3680 Mon Sep 17 00:00:00 2001 From: Ben Adams Date: Thu, 24 Jan 2019 03:58:04 +0000 Subject: [PATCH 1/5] Speedup .SequenceCompareTo(byte, ...) --- .../shared/System/SpanHelpers.Byte.cs | 127 +++++++++++++++++- 1 file changed, 120 insertions(+), 7 deletions(-) diff --git a/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs b/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs index 63a564f0de7d..c8cb0439abcb 100644 --- a/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs +++ b/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs @@ -1324,18 +1324,131 @@ public static unsafe int SequenceCompareTo(ref byte first, int firstLength, ref IntPtr offset = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations IntPtr nLength = (IntPtr)(void*)minLength; - if (Vector.IsHardwareAccelerated && (byte*)nLength > (byte*)Vector.Count) + if (Avx2.IsSupported) { - nLength -= Vector.Count; - while ((byte*)nLength > (byte*)offset) + if ((byte*)nLength >= (byte*)Vector256.Count) { - if (LoadVector(ref first, offset) != LoadVector(ref second, offset)) + nLength -= Vector256.Count; + int matches; + while ((byte*)nLength > (byte*)offset) { - goto NotEqual; + matches = Avx2.MoveMask(Avx2.CompareEqual(LoadVector256(ref first, offset), LoadVector256(ref second, offset))); + if (matches == -1) + { + // All matched + offset += Vector256.Count; + continue; + } + + goto Difference; } - offset += Vector.Count; + // Move to Vector length from end for final compare + offset = nLength; + matches = Avx2.MoveMask(Avx2.CompareEqual(LoadVector256(ref first, offset), LoadVector256(ref second, offset))); + if (matches == -1) + { + // All matched + goto Equal; + } + Difference: + // Invert matches to find differences + int differences = ~matches; + offset = (IntPtr)((int)(byte*)offset + BitOps.TrailingZeroCount(matches)); + + int result = Unsafe.AddByteOffset(ref first, offset).CompareTo(Unsafe.AddByteOffset(ref second, offset)); + Debug.Assert(result != 0); + + return result; + } + + if ((byte*)nLength >= (byte*)Vector128.Count) + { + nLength -= Vector128.Count; + int matches; + if ((byte*)nLength > (byte*)offset) + { + matches = Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset))); + if (matches == 0xFFFF) + { + // All matched + offset += Vector128.Count; + } + else + { + goto Difference; + } + } + // Move to Vector length from end for final compare + offset = nLength; + matches = Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset))); + if (matches == 0xFFFF) + { + // All matched + goto Equal; + } + Difference: + // Invert matches to find differences + int differences = ~matches; + offset = (IntPtr)((int)(byte*)offset + BitOps.TrailingZeroCount(matches)); + + int result = Unsafe.AddByteOffset(ref first, offset).CompareTo(Unsafe.AddByteOffset(ref second, offset)); + Debug.Assert(result != 0); + + return result; + } + } + else if (Sse2.IsSupported) + { + if ((byte*)nLength >= (byte*)Vector128.Count) + { + nLength -= Vector128.Count; + int matches; + while ((byte*)nLength > (byte*)offset) + { + matches = Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset))); + if (matches == 0xFFFF) + { + // All matched + offset += Vector128.Count; + continue; + } + + goto Difference; + } + // Move to Vector length from end for final compare + offset = nLength; + matches = Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset))); + if (matches == 0xFFFF) + { + // All matched + goto Equal; + } + Difference: + // Invert matches to find differences + int differences = ~matches; + offset = (IntPtr)((int)(byte*)offset + BitOps.TrailingZeroCount(matches)); + + int result = Unsafe.AddByteOffset(ref first, offset).CompareTo(Unsafe.AddByteOffset(ref second, offset)); + Debug.Assert(result != 0); + + return result; + } + } + else if (Vector.IsHardwareAccelerated) + { + if ((byte*)nLength > (byte*)Vector.Count) + { + nLength -= Vector.Count; + while ((byte*)nLength > (byte*)offset) + { + if (LoadVector(ref first, offset) != LoadVector(ref second, offset)) + { + goto NotEqual; + } + offset += Vector.Count; + } + goto NotEqual; } - goto NotEqual; } if ((byte*)nLength > (byte*)sizeof(UIntPtr)) From 9dd3f71a378da75e28e86b28fdd08abd156bcf3d Mon Sep 17 00:00:00 2001 From: Ben Adams Date: Thu, 24 Jan 2019 04:57:59 +0000 Subject: [PATCH 2/5] fix copypasta --- .../shared/System/SpanHelpers.Byte.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs b/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs index c8cb0439abcb..dd7f5d4c0ef5 100644 --- a/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs +++ b/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs @@ -1353,7 +1353,7 @@ public static unsafe int SequenceCompareTo(ref byte first, int firstLength, ref Difference: // Invert matches to find differences int differences = ~matches; - offset = (IntPtr)((int)(byte*)offset + BitOps.TrailingZeroCount(matches)); + offset = (IntPtr)((int)(byte*)offset + BitOps.TrailingZeroCount(differences)); int result = Unsafe.AddByteOffset(ref first, offset).CompareTo(Unsafe.AddByteOffset(ref second, offset)); Debug.Assert(result != 0); @@ -1389,7 +1389,7 @@ public static unsafe int SequenceCompareTo(ref byte first, int firstLength, ref Difference: // Invert matches to find differences int differences = ~matches; - offset = (IntPtr)((int)(byte*)offset + BitOps.TrailingZeroCount(matches)); + offset = (IntPtr)((int)(byte*)offset + BitOps.TrailingZeroCount(differences)); int result = Unsafe.AddByteOffset(ref first, offset).CompareTo(Unsafe.AddByteOffset(ref second, offset)); Debug.Assert(result != 0); @@ -1426,7 +1426,7 @@ public static unsafe int SequenceCompareTo(ref byte first, int firstLength, ref Difference: // Invert matches to find differences int differences = ~matches; - offset = (IntPtr)((int)(byte*)offset + BitOps.TrailingZeroCount(matches)); + offset = (IntPtr)((int)(byte*)offset + BitOps.TrailingZeroCount(differences)); int result = Unsafe.AddByteOffset(ref first, offset).CompareTo(Unsafe.AddByteOffset(ref second, offset)); Debug.Assert(result != 0); From 5f6dcb865830fbcb718b7ae38805247399557f99 Mon Sep 17 00:00:00 2001 From: Ben Adams Date: Thu, 24 Jan 2019 13:59:42 +0000 Subject: [PATCH 3/5] Rename jump location --- .../shared/System/SpanHelpers.Byte.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs b/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs index dd7f5d4c0ef5..d295e09baacf 100644 --- a/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs +++ b/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs @@ -1443,11 +1443,11 @@ public static unsafe int SequenceCompareTo(ref byte first, int firstLength, ref { if (LoadVector(ref first, offset) != LoadVector(ref second, offset)) { - goto NotEqual; + goto BytewiseCheck; } offset += Vector.Count; } - goto NotEqual; + goto BytewiseCheck; } } @@ -1458,13 +1458,13 @@ public static unsafe int SequenceCompareTo(ref byte first, int firstLength, ref { if (LoadUIntPtr(ref first, offset) != LoadUIntPtr(ref second, offset)) { - goto NotEqual; + goto BytewiseCheck; } offset += sizeof(UIntPtr); } } - NotEqual: // Workaround for https://github.com/dotnet/coreclr/issues/13549 + BytewiseCheck: // Workaround for https://github.com/dotnet/coreclr/issues/13549 while ((byte*)minLength > (byte*)offset) { int result = Unsafe.AddByteOffset(ref first, offset).CompareTo(Unsafe.AddByteOffset(ref second, offset)); From 80205fe83f6b7c8d10c7990aaf9ea1e486bfb1fa Mon Sep 17 00:00:00 2001 From: Ben Adams Date: Thu, 24 Jan 2019 23:13:16 +0000 Subject: [PATCH 4/5] Better annotations for clarity --- .../shared/System/SpanHelpers.Byte.cs | 128 ++++++++++++------ 1 file changed, 87 insertions(+), 41 deletions(-) diff --git a/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs b/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs index d295e09baacf..e1965f79a819 100644 --- a/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs +++ b/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs @@ -276,13 +276,16 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length) { Vector256 search = LoadVector256(ref searchSpace, offset); int matches = Avx2.MoveMask(Avx2.CompareEqual(values, search)); + // Note that MoveMask has converted the equal vector elements into a set of bit flags, + // So the bit position in 'matches' corresponds to the element offset. if (matches == 0) { + // Zero flags set so no matches offset += Vector256.Count; continue; } - // Find offset of first match + // Find bitflag offset of first match and add to current offset return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches); } while ((byte*)nLength > (byte*)offset); } @@ -294,13 +297,16 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length) Vector128 search = LoadVector128(ref searchSpace, offset); int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search)); + // Note that MoveMask has converted the equal vector elements into a set of bit flags, + // So the bit position in 'matches' corresponds to the element offset. if (matches == 0) { + // Zero flags set so no matches offset += Vector128.Count; } else { - // Find offset of first match + // Find bitflag offset of first match and add to current offset return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches); } } @@ -324,13 +330,16 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length) Vector128 search = LoadVector128(ref searchSpace, offset); int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search)); + // Note that MoveMask has converted the equal vector elements into a set of bit flags, + // So the bit position in 'matches' corresponds to the element offset. if (matches == 0) { + // Zero flags set so no matches offset += Vector128.Count; continue; } - // Find offset of first match + // Find bitflag offset of first match and add to current offset return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches); } @@ -358,7 +367,7 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length) continue; } - // Find offset of first match + // Find offset of first match and add to current offset return (int)(byte*)offset + LocateFirstFoundByte(matches); } @@ -499,7 +508,7 @@ public static unsafe int LastIndexOf(ref byte searchSpace, byte value, int lengt continue; } - // Find offset of first match + // Find offset of first match and add to current offset return (int)(offset) - Vector.Count + LocateLastFoundByte(matches); } if ((byte*)offset > (byte*)0) @@ -630,13 +639,16 @@ public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte valu Vector256 search = LoadVector256(ref searchSpace, offset); int matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search)); matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search)); + // Note that MoveMask has converted the equal vector elements into a set of bit flags, + // So the bit position in 'matches' corresponds to the element offset. if (matches == 0) { + // Zero flags set so no matches offset += Vector256.Count; continue; } - // Find offset of first match + // Find bitflag offset of first match and add to current offset return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches); } while ((byte*)nLength > (byte*)offset); } @@ -650,13 +662,16 @@ public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte valu Vector128 search = LoadVector128(ref searchSpace, offset); int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search)); matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search)); + // Note that MoveMask has converted the equal vector elements into a set of bit flags, + // So the bit position in 'matches' corresponds to the element offset. if (matches == 0) { + // Zero flags set so no matches offset += Vector128.Count; } else { - // Find offset of first match + // Find bitflag offset of first match and add to current offset return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches); } } @@ -682,13 +697,16 @@ public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte valu Vector128 search = LoadVector128(ref searchSpace, offset); int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search)); matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search)); + // Note that MoveMask has converted the equal vector elements into a set of bit flags, + // So the bit position in 'matches' corresponds to the element offset. if (matches == 0) { + // Zero flags set so no matches offset += Vector128.Count; continue; } - // Find offset of first match + // Find bitflag offset of first match and add to current offset return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches); } @@ -720,7 +738,7 @@ public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte valu continue; } - // Find offset of first match + // Find offset of first match and add to current offset return (int)(byte*)offset + LocateFirstFoundByte(matches); } @@ -755,8 +773,8 @@ public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte valu Debug.Assert(length >= 0); uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions - uint uValue1 = value1; // Use uint for comparisons to avoid unnecessary 8->32 extensions - uint uValue2 = value2; // Use uint for comparisons to avoid unnecessary 8->32 extensions + uint uValue1 = value1; + uint uValue2 = value2; IntPtr offset = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations IntPtr nLength = (IntPtr)length; @@ -856,13 +874,16 @@ public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte valu int matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search)); matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search)); matches |= Avx2.MoveMask(Avx2.CompareEqual(values2, search)); + // Note that MoveMask has converted the equal vector elements into a set of bit flags, + // So the bit position in 'matches' corresponds to the element offset. if (matches == 0) { + // Zero flags set so no matches offset += Vector256.Count; continue; } - // Find offset of first match + // Find bitflag offset of first match and add to current offset return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches); } while ((byte*)nLength > (byte*)offset); } @@ -878,13 +899,16 @@ public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte valu int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search)); matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search)); matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search)); + // Note that MoveMask has converted the equal vector elements into a set of bit flags, + // So the bit position in 'matches' corresponds to the element offset. if (matches == 0) { + // Zero flags set so no matches offset += Vector128.Count; } else { - // Find offset of first match + // Find bitflag offset of first match and add to current offset return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches); } } @@ -912,13 +936,16 @@ public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte valu int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search)); matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search)); matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search)); + // Note that MoveMask has converted the equal vector elements into a set of bit flags, + // So the bit position in 'matches' corresponds to the element offset. if (matches == 0) { + // Zero flags set so no matches offset += Vector128.Count; continue; } - // Find offset of first match + // Find bitflag offset of first match and add to current offset return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches); } @@ -955,7 +982,7 @@ public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte valu continue; } - // Find offset of first match + // Find offset of first match and add to current offset return (int)(byte*)offset + LocateFirstFoundByte(matches); } @@ -990,7 +1017,7 @@ public static unsafe int LastIndexOfAny(ref byte searchSpace, byte value0, byte Debug.Assert(length >= 0); uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions - uint uValue1 = value1; // Use uint for comparisons to avoid unnecessary 8->32 extensions + uint uValue1 = value1; IntPtr offset = (IntPtr)length; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations IntPtr nLength = (IntPtr)length; @@ -1080,7 +1107,7 @@ public static unsafe int LastIndexOfAny(ref byte searchSpace, byte value0, byte continue; } - // Find offset of first match + // Find offset of first match and add to current offset return (int)(offset) - Vector.Count + LocateLastFoundByte(matches); } @@ -1114,8 +1141,8 @@ public static unsafe int LastIndexOfAny(ref byte searchSpace, byte value0, byte Debug.Assert(length >= 0); uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions - uint uValue1 = value1; // Use uint for comparisons to avoid unnecessary 8->32 extensions - uint uValue2 = value2; // Use uint for comparisons to avoid unnecessary 8->32 extensions + uint uValue1 = value1; + uint uValue2 = value2; IntPtr offset = (IntPtr)length; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations IntPtr nLength = (IntPtr)length; @@ -1210,7 +1237,7 @@ public static unsafe int LastIndexOfAny(ref byte searchSpace, byte value0, byte continue; } - // Find offset of first match + // Find offset of first match and add to current offset return (int)(offset) - Vector.Count + LocateLastFoundByte(matches); } @@ -1329,11 +1356,15 @@ public static unsafe int SequenceCompareTo(ref byte first, int firstLength, ref if ((byte*)nLength >= (byte*)Vector256.Count) { nLength -= Vector256.Count; - int matches; + uint matches; while ((byte*)nLength > (byte*)offset) { - matches = Avx2.MoveMask(Avx2.CompareEqual(LoadVector256(ref first, offset), LoadVector256(ref second, offset))); - if (matches == -1) + matches = (uint)Avx2.MoveMask(Avx2.CompareEqual(LoadVector256(ref first, offset), LoadVector256(ref second, offset))); + // Note that MoveMask has converted the equal vector elements into a set of bit flags, + // So the bit position in 'matches' corresponds to the element offset. + + // 32 elements in Vector256 so we compare to uint.MaxValue to check if everything matched + if (matches == uint.MaxValue) { // All matched offset += Vector256.Count; @@ -1344,16 +1375,21 @@ public static unsafe int SequenceCompareTo(ref byte first, int firstLength, ref } // Move to Vector length from end for final compare offset = nLength; - matches = Avx2.MoveMask(Avx2.CompareEqual(LoadVector256(ref first, offset), LoadVector256(ref second, offset))); - if (matches == -1) + matches = (uint)Avx2.MoveMask(Avx2.CompareEqual(LoadVector256(ref first, offset), LoadVector256(ref second, offset))); + // Note that MoveMask has converted the equal vector elements into a set of bit flags, + // So the bit position in 'matches' corresponds to the element offset. + + // 32 elements in Vector256 so we compare to uint.MaxValue to check if everything matched + if (matches == uint.MaxValue) { // All matched goto Equal; } Difference: // Invert matches to find differences - int differences = ~matches; - offset = (IntPtr)((int)(byte*)offset + BitOps.TrailingZeroCount(differences)); + uint differences = ~matches; + // Find bitflag offset of first difference and add to current offset + offset = (IntPtr)((int)(byte*)offset + BitOps.TrailingZeroCount((int)differences)); int result = Unsafe.AddByteOffset(ref first, offset).CompareTo(Unsafe.AddByteOffset(ref second, offset)); Debug.Assert(result != 0); @@ -1364,11 +1400,11 @@ public static unsafe int SequenceCompareTo(ref byte first, int firstLength, ref if ((byte*)nLength >= (byte*)Vector128.Count) { nLength -= Vector128.Count; - int matches; + uint matches; if ((byte*)nLength > (byte*)offset) { - matches = Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset))); - if (matches == 0xFFFF) + matches = (uint)Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset))); + if (matches == ushort.MaxValue) { // All matched offset += Vector128.Count; @@ -1380,16 +1416,21 @@ public static unsafe int SequenceCompareTo(ref byte first, int firstLength, ref } // Move to Vector length from end for final compare offset = nLength; - matches = Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset))); - if (matches == 0xFFFF) + matches = (uint)Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset))); + // Note that MoveMask has converted the equal vector elements into a set of bit flags, + // So the bit position in 'matches' corresponds to the element offset. + + // 16 elements in Vector128 so we compare to ushort.MaxValue to check if everything matched + if (matches == ushort.MaxValue) { // All matched goto Equal; } Difference: // Invert matches to find differences - int differences = ~matches; - offset = (IntPtr)((int)(byte*)offset + BitOps.TrailingZeroCount(differences)); + uint differences = ~matches; + // Find bitflag offset of first difference and add to current offset + offset = (IntPtr)((int)(byte*)offset + BitOps.TrailingZeroCount((int)differences)); int result = Unsafe.AddByteOffset(ref first, offset).CompareTo(Unsafe.AddByteOffset(ref second, offset)); Debug.Assert(result != 0); @@ -1402,11 +1443,11 @@ public static unsafe int SequenceCompareTo(ref byte first, int firstLength, ref if ((byte*)nLength >= (byte*)Vector128.Count) { nLength -= Vector128.Count; - int matches; + uint matches; while ((byte*)nLength > (byte*)offset) { - matches = Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset))); - if (matches == 0xFFFF) + matches = (uint)Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset))); + if (matches == ushort.MaxValue) { // All matched offset += Vector128.Count; @@ -1417,16 +1458,21 @@ public static unsafe int SequenceCompareTo(ref byte first, int firstLength, ref } // Move to Vector length from end for final compare offset = nLength; - matches = Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset))); - if (matches == 0xFFFF) + matches = (uint)Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset))); + // Note that MoveMask has converted the equal vector elements into a set of bit flags, + // So the bit position in 'matches' corresponds to the element offset. + + // 16 elements in Vector128 so we compare to ushort.MaxValue to check if everything matched + if (matches == ushort.MaxValue) { // All matched goto Equal; } Difference: // Invert matches to find differences - int differences = ~matches; - offset = (IntPtr)((int)(byte*)offset + BitOps.TrailingZeroCount(differences)); + uint differences = ~matches; + // Find bitflag offset of first difference and add to current offset + offset = (IntPtr)((int)(byte*)offset + BitOps.TrailingZeroCount((int)differences)); int result = Unsafe.AddByteOffset(ref first, offset).CompareTo(Unsafe.AddByteOffset(ref second, offset)); Debug.Assert(result != 0); From 8b852b6b25e546bafc1af243acc59f626d2c3aaa Mon Sep 17 00:00:00 2001 From: Ben Adams Date: Fri, 25 Jan 2019 00:01:27 +0000 Subject: [PATCH 5/5] More clarity, by less repeats --- .../shared/System/SpanHelpers.Byte.cs | 52 +++++++++---------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs b/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs index e1965f79a819..3062a405b519 100644 --- a/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs +++ b/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs @@ -296,9 +296,8 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length) Vector128 values = Vector128.Create(value); Vector128 search = LoadVector128(ref searchSpace, offset); + // Same method as above int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search)); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. if (matches == 0) { // Zero flags set so no matches @@ -329,9 +328,8 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length) { Vector128 search = LoadVector128(ref searchSpace, offset); + // Same method as above int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search)); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. if (matches == 0) { // Zero flags set so no matches @@ -637,10 +635,11 @@ public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte valu do { Vector256 search = LoadVector256(ref searchSpace, offset); - int matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search)); - matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search)); // Note that MoveMask has converted the equal vector elements into a set of bit flags, // So the bit position in 'matches' corresponds to the element offset. + int matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search)); + // Bitwise Or to combine the flagged matches for the second value to our match flags + matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search)); if (matches == 0) { // Zero flags set so no matches @@ -660,10 +659,9 @@ public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte valu Vector128 values1 = Vector128.Create(value1); Vector128 search = LoadVector128(ref searchSpace, offset); + // Same method as above int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search)); matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search)); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. if (matches == 0) { // Zero flags set so no matches @@ -695,10 +693,9 @@ public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte valu while ((byte*)nLength > (byte*)offset) { Vector128 search = LoadVector128(ref searchSpace, offset); + // Same method as above int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search)); matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search)); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. if (matches == 0) { // Zero flags set so no matches @@ -871,11 +868,13 @@ public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte valu do { Vector256 search = LoadVector256(ref searchSpace, offset); + // Note that MoveMask has converted the equal vector elements into a set of bit flags, + // So the bit position in 'matches' corresponds to the element offset. int matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search)); + // Bitwise Or to combine the flagged matches for the second value to our match flags matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search)); + // Bitwise Or to combine the flagged matches for the third value to our match flags matches |= Avx2.MoveMask(Avx2.CompareEqual(values2, search)); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. if (matches == 0) { // Zero flags set so no matches @@ -896,11 +895,10 @@ public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte valu Vector128 values2 = Vector128.Create(value2); Vector128 search = LoadVector128(ref searchSpace, offset); + // Same method as above int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search)); matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search)); matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search)); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. if (matches == 0) { // Zero flags set so no matches @@ -933,11 +931,10 @@ public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte valu while ((byte*)nLength > (byte*)offset) { Vector128 search = LoadVector128(ref searchSpace, offset); + // Same method as above int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search)); matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search)); matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search)); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. if (matches == 0) { // Zero flags set so no matches @@ -1375,11 +1372,8 @@ public static unsafe int SequenceCompareTo(ref byte first, int firstLength, ref } // Move to Vector length from end for final compare offset = nLength; + // Same as method as above matches = (uint)Avx2.MoveMask(Avx2.CompareEqual(LoadVector256(ref first, offset), LoadVector256(ref second, offset))); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - - // 32 elements in Vector256 so we compare to uint.MaxValue to check if everything matched if (matches == uint.MaxValue) { // All matched @@ -1404,6 +1398,10 @@ public static unsafe int SequenceCompareTo(ref byte first, int firstLength, ref if ((byte*)nLength > (byte*)offset) { matches = (uint)Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset))); + // Note that MoveMask has converted the equal vector elements into a set of bit flags, + // So the bit position in 'matches' corresponds to the element offset. + + // 16 elements in Vector128 so we compare to ushort.MaxValue to check if everything matched if (matches == ushort.MaxValue) { // All matched @@ -1416,11 +1414,8 @@ public static unsafe int SequenceCompareTo(ref byte first, int firstLength, ref } // Move to Vector length from end for final compare offset = nLength; + // Same as method as above matches = (uint)Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset))); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - - // 16 elements in Vector128 so we compare to ushort.MaxValue to check if everything matched if (matches == ushort.MaxValue) { // All matched @@ -1447,6 +1442,10 @@ public static unsafe int SequenceCompareTo(ref byte first, int firstLength, ref while ((byte*)nLength > (byte*)offset) { matches = (uint)Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset))); + // Note that MoveMask has converted the equal vector elements into a set of bit flags, + // So the bit position in 'matches' corresponds to the element offset. + + // 16 elements in Vector128 so we compare to ushort.MaxValue to check if everything matched if (matches == ushort.MaxValue) { // All matched @@ -1458,11 +1457,8 @@ public static unsafe int SequenceCompareTo(ref byte first, int firstLength, ref } // Move to Vector length from end for final compare offset = nLength; + // Same as method as above matches = (uint)Sse2.MoveMask(Sse2.CompareEqual(LoadVector128(ref first, offset), LoadVector128(ref second, offset))); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - - // 16 elements in Vector128 so we compare to ushort.MaxValue to check if everything matched if (matches == ushort.MaxValue) { // All matched