Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,9 @@ public override unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan<byte> utf
// Load the next 16 bytes.
Vector128<sbyte> sourceValue = Sse2.LoadVector128(startingAddress);

Vector128<sbyte> mask = Sse2Helper.CreateAsciiMask(sourceValue);
int index = Sse2.MoveMask(mask);
// Check for ASCII text. Any byte that's not in the ASCII range will already be negative when
// casted to signed byte.
int index = Sse2.MoveMask(sourceValue);

if (index != 0)
{
Expand Down Expand Up @@ -196,6 +197,7 @@ public override unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan<byte> utf
idx += utf8BytesConsumedForScalar;
}
}
Debug.Assert(idx == utf8Text.Length);
Comment thread
ahsonkhan marked this conversation as resolved.

idx = -1; // All bytes are allowed.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,17 @@ public static Vector128<short> CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder
{
Debug.Assert(Sse2.IsSupported);

// Space ' ', anything in the control characters range, and anything above short.MaxValue but less than or equal char.MaxValue
Vector128<short> mask = Sse2.CompareLessThan(sourceValue, s_mask_UInt16_0x20);
// Anything in the control characters range, and anything above short.MaxValue but less than or equal char.MaxValue
// That's because anything between 32768 and 65535 (inclusive) will overflow and become negative.
Vector128<short> mask = Sse2.CompareLessThan(sourceValue, s_spaceMaskInt16);

mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x22)); // Quotation Mark '"'
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x5C)); // Reverse Solidus '\'
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_quotationMarkMaskInt16));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_reverseSolidusMaskInt16));

// Anything above the ASCII range, and also including the leftover control character in the ASCII range - 0x7F
// When this method is called with only ASCII data, 0x7F is the only value that would meet this comparison.
// However, when called from "Default", the source could contain characters outside the ASCII range.
mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_tildeMaskInt16));
Comment thread
ahsonkhan marked this conversation as resolved.

return mask;
}
Expand All @@ -31,10 +37,16 @@ public static Vector128<sbyte> CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder
{
Debug.Assert(Sse2.IsSupported);

Vector128<sbyte> mask = Sse2.CompareLessThan(sourceValue, s_mask_SByte_0x20); // Control characters, and anything above 0x7E since sbyte.MaxValue is 0x7E
// Anything in the control characters range (except 0x7F), and anything above sbyte.MaxValue but less than or equal byte.MaxValue
// That's because anything between 128 and 255 (inclusive) will overflow and become negative.
Vector128<sbyte> mask = Sse2.CompareLessThan(sourceValue, s_spaceMaskSByte);

mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_quotationMarkMaskSByte));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_reverseSolidusMaskSByte));

mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x22)); // Quotation Mark "
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x5C)); // Reverse Solidus \
// Leftover control character in the ASCII range - 0x7F
// Since we are dealing with sbytes, 0x7F is the only value that would meet this comparison.
mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_tildeMaskSByte));

return mask;
}
Expand All @@ -46,14 +58,12 @@ public static Vector128<short> CreateEscapingMask_DefaultJavaScriptEncoderBasicL

Vector128<short> mask = CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);

mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x26)); // Ampersand '&'
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x27)); // Apostrophe '''
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x2B)); // Plus sign '+'
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x3C)); // Less Than Sign '<'
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x3E)); // Greater Than Sign '>'
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x60)); // Grave Access '`'

mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_mask_UInt16_0x7E)); // Tilde '~', anything above the ASCII range
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_ampersandMaskInt16));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_apostropheMaskInt16));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_plusSignMaskInt16));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_lessThanSignMaskInt16));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_greaterThanSignMaskInt16));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_graveAccentMaskInt16));

return mask;
}
Expand All @@ -65,12 +75,12 @@ public static Vector128<sbyte> CreateEscapingMask_DefaultJavaScriptEncoderBasicL

Vector128<sbyte> mask = CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);

mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x26)); // Ampersand &
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x27)); // Apostrophe '
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x2B)); // Plus sign +
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x3C)); // Less Than Sign <
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x3E)); // Greater Than Sign >
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x60)); // Grave Access `
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_ampersandMaskSByte));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_apostropheMaskSByte));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_plusSignMaskSByte));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_lessThanSignMaskSByte));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_greaterThanSignMaskSByte));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_graveAccentMaskSByte));

return mask;
}
Expand All @@ -80,48 +90,38 @@ public static Vector128<short> CreateAsciiMask(Vector128<short> sourceValue)
{
Debug.Assert(Sse2.IsSupported);

Vector128<short> mask = Sse2.CompareLessThan(sourceValue, s_mask_UInt16_0x00); // Null, anything above short.MaxValue but less than or equal char.MaxValue
mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_mask_UInt16_0x7E)); // Tilde '~', anything above the ASCII range
// Anything above short.MaxValue but less than or equal char.MaxValue
// That's because anything between 32768 and 65535 (inclusive) will overflow and become negative.
Vector128<short> mask = Sse2.CompareLessThan(sourceValue, s_nullMaskInt16);

return mask;
}
// Anything above the ASCII range
mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_maxAsciiCharacterMaskInt16));

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<sbyte> CreateAsciiMask(Vector128<sbyte> sourceValue)
Comment thread
ahsonkhan marked this conversation as resolved.
{
Debug.Assert(Sse2.IsSupported);

// Null, anything above sbyte.MaxValue but less than or equal byte.MaxValue (i.e. anything above the ASCII range)
Vector128<sbyte> mask = Sse2.CompareLessThan(sourceValue, s_mask_SByte_0x00);
return mask;
}

private static readonly Vector128<short> s_mask_UInt16_0x00 = Vector128<short>.Zero; // Null

private static readonly Vector128<short> s_mask_UInt16_0x20 = Vector128.Create((short)0x20); // Space ' '
Copy link
Copy Markdown
Author

@ahsonkhan ahsonkhan Oct 25, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Renaming these to address some of the leftover feedback from #41845 (comment)

cc @tannergooding

Copy link
Copy Markdown
Author

@ahsonkhan ahsonkhan Oct 25, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a heads up @gfoidl - since this will likely conflict with your changes being built-on-top (specifically #42073).

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the hint -- I'll incorporate this, so no conflict will occur.


private static readonly Vector128<short> s_mask_UInt16_0x22 = Vector128.Create((short)0x22); // Quotation Mark '"'
private static readonly Vector128<short> s_mask_UInt16_0x26 = Vector128.Create((short)0x26); // Ampersand '&'
private static readonly Vector128<short> s_mask_UInt16_0x27 = Vector128.Create((short)0x27); // Apostrophe '''
private static readonly Vector128<short> s_mask_UInt16_0x2B = Vector128.Create((short)0x2B); // Plus sign '+'
private static readonly Vector128<short> s_mask_UInt16_0x3C = Vector128.Create((short)0x3C); // Less Than Sign '<'
private static readonly Vector128<short> s_mask_UInt16_0x3E = Vector128.Create((short)0x3E); // Greater Than Sign '>'
private static readonly Vector128<short> s_mask_UInt16_0x5C = Vector128.Create((short)0x5C); // Reverse Solidus '\'
private static readonly Vector128<short> s_mask_UInt16_0x60 = Vector128.Create((short)0x60); // Grave Access '`'

private static readonly Vector128<short> s_mask_UInt16_0x7E = Vector128.Create((short)0x7E); // Tilde '~'

private static readonly Vector128<sbyte> s_mask_SByte_0x00 = Vector128<sbyte>.Zero; // Null

private static readonly Vector128<sbyte> s_mask_SByte_0x20 = Vector128.Create((sbyte)0x20); // Space ' '

private static readonly Vector128<sbyte> s_mask_SByte_0x22 = Vector128.Create((sbyte)0x22); // Quotation Mark '"'
private static readonly Vector128<sbyte> s_mask_SByte_0x26 = Vector128.Create((sbyte)0x26); // Ampersand '&'
private static readonly Vector128<sbyte> s_mask_SByte_0x27 = Vector128.Create((sbyte)0x27); // Apostrophe '''
private static readonly Vector128<sbyte> s_mask_SByte_0x2B = Vector128.Create((sbyte)0x2B); // Plus sign '+'
private static readonly Vector128<sbyte> s_mask_SByte_0x3C = Vector128.Create((sbyte)0x3C); // Less Than Sign '<'
private static readonly Vector128<sbyte> s_mask_SByte_0x3E = Vector128.Create((sbyte)0x3E); // Greater Than Sign '>'
private static readonly Vector128<sbyte> s_mask_SByte_0x5C = Vector128.Create((sbyte)0x5C); // Reverse Solidus '\'
private static readonly Vector128<sbyte> s_mask_SByte_0x60 = Vector128.Create((sbyte)0x60); // Grave Access '`'
private static readonly Vector128<short> s_nullMaskInt16 = Vector128<short>.Zero;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe s_zeroMaskInt16?

Copy link
Copy Markdown
Author

@ahsonkhan ahsonkhan Oct 26, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this is in the context of characters, I have been using character names. In this case, that's null (not zero, which just happens to be the value):
https://www.fileformat.info/info/unicode/char/0000/index.htm

I can see why using zero could be useful (similar to using s_maxAsciiCharacterMaskInt16 rather than delete for 0x7F), but not sure if it's worth changing.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

null reminds me of null 😉
The vector consists of 0s, so zero. It's usage is in CompareLessThan so it's more a comparison against a value, rather than null.

But this is more or less a quibble, and so it's up to you whether it's changed or not.

private static readonly Vector128<short> s_spaceMaskInt16 = Vector128.Create((short)' ');
private static readonly Vector128<short> s_quotationMarkMaskInt16 = Vector128.Create((short)'"');
private static readonly Vector128<short> s_ampersandMaskInt16 = Vector128.Create((short)'&');
private static readonly Vector128<short> s_apostropheMaskInt16 = Vector128.Create((short)'\'');
private static readonly Vector128<short> s_plusSignMaskInt16 = Vector128.Create((short)'+');
private static readonly Vector128<short> s_lessThanSignMaskInt16 = Vector128.Create((short)'<');
private static readonly Vector128<short> s_greaterThanSignMaskInt16 = Vector128.Create((short)'>');
private static readonly Vector128<short> s_reverseSolidusMaskInt16 = Vector128.Create((short)'\\');
private static readonly Vector128<short> s_graveAccentMaskInt16 = Vector128.Create((short)'`');
private static readonly Vector128<short> s_tildeMaskInt16 = Vector128.Create((short)'~');
private static readonly Vector128<short> s_maxAsciiCharacterMaskInt16 = Vector128.Create((short)0x7F); // Delete control character

private static readonly Vector128<sbyte> s_spaceMaskSByte = Vector128.Create((sbyte)' ');
private static readonly Vector128<sbyte> s_quotationMarkMaskSByte = Vector128.Create((sbyte)'"');
private static readonly Vector128<sbyte> s_ampersandMaskSByte = Vector128.Create((sbyte)'&');
private static readonly Vector128<sbyte> s_apostropheMaskSByte = Vector128.Create((sbyte)'\'');
private static readonly Vector128<sbyte> s_plusSignMaskSByte = Vector128.Create((sbyte)'+');
private static readonly Vector128<sbyte> s_lessThanSignMaskSByte = Vector128.Create((sbyte)'<');
private static readonly Vector128<sbyte> s_greaterThanSignMaskSByte = Vector128.Create((sbyte)'>');
private static readonly Vector128<sbyte> s_reverseSolidusMaskSByte = Vector128.Create((sbyte)'\\');
private static readonly Vector128<sbyte> s_graveAccentMaskSByte = Vector128.Create((sbyte)'`');
private static readonly Vector128<sbyte> s_tildeMaskSByte = Vector128.Create((sbyte)'~');
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -722,8 +722,9 @@ public virtual unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan<byte> utf8
// Load the next 16 bytes.
Vector128<sbyte> sourceValue = Sse2.LoadVector128(startingAddress);

Vector128<sbyte> mask = Sse2Helper.CreateAsciiMask(sourceValue);
int index = Sse2.MoveMask(mask);
// Check for ASCII text. Any byte that's not in the ASCII range will already be negative when
// casted to signed byte.
int index = Sse2.MoveMask(sourceValue);

if (index != 0)
{
Expand Down Expand Up @@ -820,6 +821,7 @@ public virtual unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan<byte> utf8
idx += utf8BytesConsumedForScalar;
}
}
Debug.Assert(idx == utf8Text.Length);

idx = -1; // All bytes are allowed.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,9 @@ public override unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan<byte> utf
// Load the next 16 bytes.
Vector128<sbyte> sourceValue = Sse2.LoadVector128(startingAddress);

Vector128<sbyte> mask = Sse2Helper.CreateAsciiMask(sourceValue);
int index = Sse2.MoveMask(mask);
// Check for ASCII text. Any byte that's not in the ASCII range will already be negative when
// casted to signed byte.
int index = Sse2.MoveMask(sourceValue);

if (index != 0)
{
Expand Down Expand Up @@ -194,7 +195,7 @@ public override unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan<byte> utf
else
{
// Check if any of the 16 bytes need to be escaped.
mask = Sse2Helper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);
Vector128<sbyte> mask = Sse2Helper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);

index = Sse2.MoveMask(mask);
// If index == 0, that means none of the 16 bytes needed to be escaped.
Expand Down Expand Up @@ -245,6 +246,7 @@ public override unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan<byte> utf
idx += utf8BytesConsumedForScalar;
}
}
Debug.Assert(idx == utf8Text.Length);

idx = -1; // All bytes are allowed.

Expand Down
Loading