Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ static Utf16Utility()
Vector128<ushort> vectorA800 = Vector128.Create((ushort)0xA800);
Vector128<short> vector8800 = Vector128.Create(unchecked((short)0x8800));
Vector128<ushort> vectorZero = Vector128<ushort>.Zero;

Vector128<byte> bitMask128 = BitConverter.IsLittleEndian ?
Vector128.Create(0x80402010_08040201).AsByte() :
Vector128.Create(0x01020408_10204080).AsByte();
Comment on lines +91 to +93
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the JIT know that Vector128.Create is side-effect free so it can eliminate this code when the bitMask128 variable is unused? If not, this is adding unnecessary overhead to the x86 path.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure. @echesakovMSFT @tannergooding @kunalspathak do you know the answer to this?

Regardless of the answer, since the code was moved from being a static field (which was initialized for all architectures) to a local variable (which is also available for all architectures), I assume the behavior would not be worse than before.

If we think this needs to be addressed to avoid the unnecessary overhead in x86, I can create an issue so it gets fixed later.

I would like to get this merged as soon as possible to unblock net472 (prevent the PNS exception on runtime), so the backport to Prev8 gets approved.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The static would only have been initialized once. The local is created for every invocation.

The simple workaround, if indeed the JIT isn't eliminating it as dead, would just be to re-use the zero vector above so this one becomes an unused alias. i.e.:

Vector128<byte> bitMask128 = !AdvSimd.IsSupported ? vectorZero :
    BitConverter.IsLittleEndian ?
    Vector128.Create(0x80402010_08040201).AsByte() :
    Vector128.Create(0x01020408_10204080).AsByte();

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, thanks for the clarification, @saucecontrol . I'll add it to the list of TODOs to address in the next PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do eliminate dead stores. Here is a sample test on x64:

Vector128<byte> bitMask128 = BitConverter.IsLittleEndian ?
            Vector128.Create(0x80402010_08040201).AsByte() :
            Vector128.Create(0x01020408_10204080).AsByte();

if (AdvSimd.IsSupported)
{
    Console.WriteLine(bitMask128);
}
else
{
    Console.WriteLine(1);
}

Produces:

G_M58174_IG01:
       4883EC28             sub      rsp, 40
                                                ;; bbWeight=1    PerfScore 0.25
G_M58174_IG02:
       B901000000           mov      ecx, 1
       E832FDFFFF           call     System.Console:WriteLine(int)
       B864000000           mov      eax, 100
                                                ;; bbWeight=1    PerfScore 1.50
G_M58174_IG03:
       4883C428             add      rsp, 40
       C3                   ret
                                                ;; bbWeight=1    PerfScore 1.25


do
{
Vector128<ushort> utf16Data;
Expand Down Expand Up @@ -127,7 +132,7 @@ static Utf16Utility()
uint debugMask;
if (AdvSimd.Arm64.IsSupported)
{
debugMask = GetNonAsciiBytes(charIsNonAscii.AsByte());
debugMask = GetNonAsciiBytes(charIsNonAscii.AsByte(), bitMask128);
}
else
{
Expand All @@ -145,7 +150,7 @@ static Utf16Utility()
if (AdvSimd.IsSupported)
{
charIsThreeByteUtf8Encoded = AdvSimd.Subtract(vectorZero, AdvSimd.ShiftRightLogical(utf16Data, 11));
mask = GetNonAsciiBytes(AdvSimd.Or(charIsNonAscii, charIsThreeByteUtf8Encoded).AsByte());
mask = GetNonAsciiBytes(AdvSimd.Or(charIsNonAscii, charIsThreeByteUtf8Encoded).AsByte(), bitMask128);
}
else
{
Expand Down Expand Up @@ -185,7 +190,7 @@ static Utf16Utility()
if (AdvSimd.Arm64.IsSupported)
{
utf16Data = AdvSimd.Add(utf16Data, vectorA800);
mask = GetNonAsciiBytes(AdvSimd.CompareLessThan(utf16Data.AsInt16(), vector8800).AsByte());
mask = GetNonAsciiBytes(AdvSimd.CompareLessThan(utf16Data.AsInt16(), vector8800).AsByte(), bitMask128);
}
else
{
Expand Down Expand Up @@ -219,7 +224,7 @@ static Utf16Utility()
uint mask2;
if (AdvSimd.Arm64.IsSupported)
{
mask2 = GetNonAsciiBytes(AdvSimd.ShiftRightLogical(utf16Data, 3).AsByte());
mask2 = GetNonAsciiBytes(AdvSimd.ShiftRightLogical(utf16Data, 3).AsByte(), bitMask128);
}
else
{
Expand Down Expand Up @@ -480,17 +485,13 @@ static Utf16Utility()
return pInputBuffer;
}

private static readonly Vector128<byte> s_bitMask128 = BitConverter.IsLittleEndian ?
Vector128.Create(0x80402010_08040201).AsByte() :
Vector128.Create(0x01020408_10204080).AsByte();

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint GetNonAsciiBytes(Vector128<byte> value)
private static uint GetNonAsciiBytes(Vector128<byte> value, Vector128<byte> bitMask128)
{
Debug.Assert(AdvSimd.Arm64.IsSupported);

Vector128<byte> mostSignificantBitIsSet = AdvSimd.ShiftRightArithmetic(value.AsSByte(), 7).AsByte();
Vector128<byte> extractedBits = AdvSimd.And(mostSignificantBitIsSet, s_bitMask128);
Vector128<byte> extractedBits = AdvSimd.And(mostSignificantBitIsSet, bitMask128);

// self-pairwise add until all flags have moved to the first two bytes of the vector
extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits);
Expand Down