From 5d09005ecc396afb5eaedf78c7787654e4e7f0cf Mon Sep 17 00:00:00 2001 From: Daniel Svensson Date: Thu, 7 Jul 2022 16:10:46 +0200 Subject: [PATCH 01/23] Speed up text encoding --- ...m.Private.DataContractSerialization.csproj | 1 + .../src/System/Xml/XmlStreamNodeWriter.cs | 154 ++++++++++++------ 2 files changed, 107 insertions(+), 48 deletions(-) diff --git a/src/libraries/System.Private.DataContractSerialization/src/System.Private.DataContractSerialization.csproj b/src/libraries/System.Private.DataContractSerialization/src/System.Private.DataContractSerialization.csproj index f8df7d3599de5e..ce0f12cc90aeab 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System.Private.DataContractSerialization.csproj +++ b/src/libraries/System.Private.DataContractSerialization/src/System.Private.DataContractSerialization.csproj @@ -163,6 +163,7 @@ + diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs index 6075d0a9c608e6..ef8ce3af19e56d 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs @@ -3,6 +3,8 @@ using System.IO; using System.Text; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; using System.Runtime.Serialization; using System.Threading.Tasks; @@ -16,6 +18,8 @@ internal abstract class XmlStreamNodeWriter : XmlNodeWriter private bool _ownsStream; private const int bufferLength = 512; private const int maxBytesPerChar = 3; + private const int CharsPerLong = 4; + private const ulong LongNonAsciiMask = 0xff80ff80ff80ff80; private Encoding? _encoding; private static readonly UTF8Encoding s_UTF8Encoding = new UTF8Encoding(false, true); @@ -56,18 +60,6 @@ public int Position } } - private int GetByteCount(char[] chars) - { - if (_encoding == null) - { - return s_UTF8Encoding.GetByteCount(chars); - } - else - { - return _encoding.GetByteCount(chars); - } - } - protected byte[] GetBuffer(int count, out int offset) { DiagnosticUtility.DebugAssert(count >= 0 && count <= bufferLength, ""); @@ -344,37 +336,75 @@ protected unsafe void UnsafeWriteUnicodeChars(char* chars, int charCount) protected unsafe int UnsafeGetUnicodeChars(char* chars, int charCount, byte[] buffer, int offset) { - char* charsMax = chars + charCount; - while (chars < charsMax) + if (BitConverter.IsLittleEndian) { - char value = *chars++; - buffer[offset++] = (byte)value; - value >>= 8; - buffer[offset++] = (byte)value; + new ReadOnlySpan((byte*)chars, 2 * charCount) + .CopyTo(buffer.AsSpan(offset)); } + else + { + char* charsMax = chars + charCount; + while (chars < charsMax) + { + char value = *chars++; + buffer[offset++] = (byte)value; + buffer[offset++] = (byte)(value >> 8); + } + } + return charCount * 2; } protected unsafe int UnsafeGetUTF8Length(char* chars, int charCount) { char* charsMax = chars + charCount; - while (chars < charsMax) + + // This method is only called from 2 places and will use length of at least (128/3 and 256/3) respectivly + // AVX is faster for at least 2048 chars, probably more + // for other cases the encoding path is better optimized than any fast path done here. + if (Avx.IsSupported) { - if (*chars >= 0x80) - break; + char* simdMax = charsMax - (Vector256.Count - 1); + char* longMax = charsMax - (CharsPerLong - 1); - chars++; - } + var mask = Vector256.Create((ushort)0xff80); + while (chars < simdMax) + { + var l = Vector256.Load((ushort*)chars); + if (!Avx.TestZ(l, mask)) + { + if (Sse41.TestZ(l.GetLower(), mask.GetLower())) + chars += Vector128.Count; + goto NonAscii; + } - if (chars == charsMax) - return charCount; + chars += Vector256.Count; + } - char[] chArray = new char[charsMax - chars]; - for (int i = 0; i < chArray.Length; i++) - { - chArray[i] = chars[i]; + while (chars < longMax) + { + if ((*(ulong*)chars & LongNonAsciiMask) != 0) + goto NonAscii; + + chars += CharsPerLong; + } + + while (chars < charsMax) + { + if (*chars >= 0x80) + goto NonAscii; + + chars++; + } + + return charCount; } - return (int)(chars - (charsMax - charCount)) + GetByteCount(chArray); + + NonAscii: + int numRemaining = (int)(charsMax - chars); + int numAscii = charCount - numRemaining; + + return numAscii + (_encoding ?? s_UTF8Encoding).GetByteCount(chars, numRemaining); } protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffer, int offset) @@ -386,36 +416,64 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe byte* bytes = _bytes; byte* bytesMax = &bytes[buffer.Length - offset]; char* charsMax = &chars[charCount]; + char* simdMax = &chars[charCount - (Vector128.Count - 1)]; + char* longMax = &chars[charCount - (CharsPerLong - 1)]; - while (true) + if (Sse41.IsSupported) { - while (chars < charsMax) + if (chars < simdMax) { - char t = *chars; - if (t >= 0x80) - break; - - *bytes = (byte)t; - bytes++; - chars++; + var mask = Vector128.Create(unchecked((short)0xff80)); + do + { + var v = Sse2.LoadVector128((short*)chars); + if (!Sse41.TestZ(v, mask)) + goto NonAscii; + + Sse2.StoreScalar((long*)bytes, Sse2.PackUnsignedSaturate(v, v).AsInt64()); + bytes += Vector128.Count; + chars += Vector128.Count; + } while (chars < simdMax); } + } + // Directly jump to system encoding for larger strings, since it is faster even for the all Ascii case + else if ((BitConverter.IsLittleEndian && charCount > 60) + || (!BitConverter.IsLittleEndian && charCount > 16)) + { + goto NonAscii; + } - if (chars >= charsMax) - break; - - char* charsStart = chars; - while (chars < charsMax && *chars >= 0x80) + if (BitConverter.IsLittleEndian) + { + while (chars < longMax) { - chars++; + ulong l = *(ulong*)chars; + if ((l & LongNonAsciiMask) != 0) + goto NonAscii; + + // 0x00dd00cc_00bb00aa => 0x00ddddcc_ccbbbbaa + l |= (l >> 8); + *(ushort*)bytes = (ushort)l; + *(ushort*)(bytes + 2) = (ushort)(l >> 32); + bytes += CharsPerLong; + chars += CharsPerLong; } + } - bytes += (_encoding ?? s_UTF8Encoding).GetBytes(charsStart, (int)(chars - charsStart), bytes, (int)(bytesMax - bytes)); + while (chars < charsMax) + { + char t = *chars; + if (t >= 0x80) + goto NonAscii; - if (chars >= charsMax) - break; + *bytes = (byte)t; + bytes++; + chars++; } return (int)(bytes - _bytes); + NonAscii: + return (int)(bytes - _bytes) + (_encoding ?? s_UTF8Encoding).GetBytes(chars, (int)(charsMax - chars), bytes, (int)(bytesMax - bytes)); } } return 0; From 63c760c7526beead24477a8ba7580b62743b041c Mon Sep 17 00:00:00 2001 From: Daniel Svensson Date: Mon, 18 Jul 2022 14:57:27 +0200 Subject: [PATCH 02/23] Update implementation --- ...m.Private.DataContractSerialization.csproj | 1 + .../src/System/Xml/XmlStreamNodeWriter.cs | 111 ++++++------------ .../System.Runtime.Serialization.Xml.sln | 36 ++++-- 3 files changed, 66 insertions(+), 82 deletions(-) diff --git a/src/libraries/System.Private.DataContractSerialization/src/System.Private.DataContractSerialization.csproj b/src/libraries/System.Private.DataContractSerialization/src/System.Private.DataContractSerialization.csproj index ce0f12cc90aeab..f77b9ada15537d 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System.Private.DataContractSerialization.csproj +++ b/src/libraries/System.Private.DataContractSerialization/src/System.Private.DataContractSerialization.csproj @@ -159,6 +159,7 @@ + diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs index ef8ce3af19e56d..da60a78394c7f6 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.IO; +using System.Numerics; using System.Text; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; @@ -18,8 +19,6 @@ internal abstract class XmlStreamNodeWriter : XmlNodeWriter private bool _ownsStream; private const int bufferLength = 512; private const int maxBytesPerChar = 3; - private const int CharsPerLong = 4; - private const ulong LongNonAsciiMask = 0xff80ff80ff80ff80; private Encoding? _encoding; private static readonly UTF8Encoding s_UTF8Encoding = new UTF8Encoding(false, true); @@ -362,42 +361,23 @@ protected unsafe int UnsafeGetUTF8Length(char* chars, int charCount) // This method is only called from 2 places and will use length of at least (128/3 and 256/3) respectivly // AVX is faster for at least 2048 chars, probably more // for other cases the encoding path is better optimized than any fast path done here. - if (Avx.IsSupported) + if (Vector.IsHardwareAccelerated + && Vector.Count > Vector128.Count + && Vector.Count < charCount && charCount <= 2048) { - char* simdMax = charsMax - (Vector256.Count - 1); - char* longMax = charsMax - (CharsPerLong - 1); + char* lastSimd = chars + charCount - Vector.Count; + var mask = new Vector(unchecked((short)0xff80)); - var mask = Vector256.Create((ushort)0xff80); - while (chars < simdMax) + while (chars < lastSimd) { - var l = Vector256.Load((ushort*)chars); - if (!Avx.TestZ(l, mask)) - { - if (Sse41.TestZ(l.GetLower(), mask.GetLower())) - chars += Vector128.Count; - goto NonAscii; - } - - chars += Vector256.Count; - } - - while (chars < longMax) - { - if ((*(ulong*)chars & LongNonAsciiMask) != 0) + if (((*(Vector*)chars) & mask) != Vector.Zero) goto NonAscii; - chars += CharsPerLong; + chars += Vector.Count; } - while (chars < charsMax) - { - if (*chars >= 0x80) - goto NonAscii; - - chars++; - } - - return charCount; + if ((*(Vector*)lastSimd & mask) == Vector.Zero) + return charCount; } NonAscii: @@ -416,62 +396,47 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe byte* bytes = _bytes; byte* bytesMax = &bytes[buffer.Length - offset]; char* charsMax = &chars[charCount]; - char* simdMax = &chars[charCount - (Vector128.Count - 1)]; - char* longMax = &chars[charCount - (CharsPerLong - 1)]; + char* simdLast = chars + charCount - Vector128.Count; - if (Sse41.IsSupported) + if (Sse41.IsSupported && charCount >= Vector128.Count) { - if (chars < simdMax) + var mask = Vector128.Create(unchecked((short)0xff80)); + + while (chars < simdLast) { - var mask = Vector128.Create(unchecked((short)0xff80)); - do - { - var v = Sse2.LoadVector128((short*)chars); - if (!Sse41.TestZ(v, mask)) - goto NonAscii; - - Sse2.StoreScalar((long*)bytes, Sse2.PackUnsignedSaturate(v, v).AsInt64()); - bytes += Vector128.Count; - chars += Vector128.Count; - } while (chars < simdMax); + var v = Sse2.LoadVector128((short*)chars); + if (!Sse41.TestZ(v, mask)) + goto NonAscii; + + Sse2.StoreScalar((long*)bytes, Sse2.PackUnsignedSaturate(v, v).AsInt64()); + bytes += Vector128.Count; + chars += Vector128.Count; } + + var v2 = Sse2.LoadVector128((short*)simdLast); + if (!Sse41.TestZ(v2, mask)) + goto NonAscii; + + Sse2.StoreScalar((long*)(bytesMax - sizeof(long)), Sse2.PackUnsignedSaturate(v2, v2).AsInt64()); + return charCount; } // Directly jump to system encoding for larger strings, since it is faster even for the all Ascii case - else if ((BitConverter.IsLittleEndian && charCount > 60) - || (!BitConverter.IsLittleEndian && charCount > 16)) - { - goto NonAscii; - } - - if (BitConverter.IsLittleEndian) + else if (charCount < 16) { - while (chars < longMax) + while (chars < charsMax) { - ulong l = *(ulong*)chars; - if ((l & LongNonAsciiMask) != 0) + char t = *chars; + if (t >= 0x80) goto NonAscii; - // 0x00dd00cc_00bb00aa => 0x00ddddcc_ccbbbbaa - l |= (l >> 8); - *(ushort*)bytes = (ushort)l; - *(ushort*)(bytes + 2) = (ushort)(l >> 32); - bytes += CharsPerLong; - chars += CharsPerLong; + *bytes = (byte)t; + bytes++; + chars++; } - } - - while (chars < charsMax) - { - char t = *chars; - if (t >= 0x80) - goto NonAscii; - *bytes = (byte)t; - bytes++; - chars++; + return charCount; } - return (int)(bytes - _bytes); NonAscii: return (int)(bytes - _bytes) + (_encoding ?? s_UTF8Encoding).GetBytes(chars, (int)(charsMax - chars), bytes, (int)(bytesMax - bytes)); } diff --git a/src/libraries/System.Runtime.Serialization.Xml/System.Runtime.Serialization.Xml.sln b/src/libraries/System.Runtime.Serialization.Xml/System.Runtime.Serialization.Xml.sln index a31621359a3adc..90df4d52f8809e 100644 --- a/src/libraries/System.Runtime.Serialization.Xml/System.Runtime.Serialization.Xml.sln +++ b/src/libraries/System.Runtime.Serialization.Xml/System.Runtime.Serialization.Xml.sln @@ -1,4 +1,8 @@ -Microsoft Visual Studio Solution File, Format Version 12.00 + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.2.32616.157 +MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TestUtilities", "..\Common\tests\TestUtilities\TestUtilities.csproj", "{CBA80130-6773-4DF9-995C-DC6CBED89CB5}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.Win32.Primitives", "..\Microsoft.Win32.Primitives\ref\Microsoft.Win32.Primitives.csproj", "{E5DB95E1-94AA-405C-9FFE-09B1E2498EE2}" @@ -45,6 +49,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{DB29DBEF-FA4 EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "gen", "gen", "{DE71D38E-4154-477C-9C27-3FA4ADB4098F}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Numerics.Vectors", "..\System.Numerics.Vectors\ref\System.Numerics.Vectors.csproj", "{EFE0C13B-6902-4FC9-91DD-F180420B36C8}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Runtime.Intrinsics", "..\System.Runtime.Intrinsics\ref\System.Runtime.Intrinsics.csproj", "{7F8A8D87-B49A-4C7B-8474-AC83F8CFD40B}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -127,30 +135,40 @@ Global {DF2255F4-F671-4C15-9100-D8079992E19D}.Debug|Any CPU.Build.0 = Debug|Any CPU {DF2255F4-F671-4C15-9100-D8079992E19D}.Release|Any CPU.ActiveCfg = Release|Any CPU {DF2255F4-F671-4C15-9100-D8079992E19D}.Release|Any CPU.Build.0 = Release|Any CPU + {EFE0C13B-6902-4FC9-91DD-F180420B36C8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {EFE0C13B-6902-4FC9-91DD-F180420B36C8}.Debug|Any CPU.Build.0 = Debug|Any CPU + {EFE0C13B-6902-4FC9-91DD-F180420B36C8}.Release|Any CPU.ActiveCfg = Release|Any CPU + {EFE0C13B-6902-4FC9-91DD-F180420B36C8}.Release|Any CPU.Build.0 = Release|Any CPU + {7F8A8D87-B49A-4C7B-8474-AC83F8CFD40B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {7F8A8D87-B49A-4C7B-8474-AC83F8CFD40B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {7F8A8D87-B49A-4C7B-8474-AC83F8CFD40B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {7F8A8D87-B49A-4C7B-8474-AC83F8CFD40B}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection GlobalSection(NestedProjects) = preSolution {CBA80130-6773-4DF9-995C-DC6CBED89CB5} = {41101B02-36C9-476B-98D5-1A6E105BBF4A} - {8B069551-9B95-464E-BB40-C56817506FEC} = {41101B02-36C9-476B-98D5-1A6E105BBF4A} - {8FF5E841-29F6-4DB7-A4F8-9281FBDA0B9C} = {41101B02-36C9-476B-98D5-1A6E105BBF4A} {E5DB95E1-94AA-405C-9FFE-09B1E2498EE2} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} {7DF41C40-FE5D-41DF-B106-3DD77BE4D4B5} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} + {1392041A-E2CA-4553-BEAF-363974651B81} = {DB29DBEF-FA4E-4334-AFB8-BFB2DA82D1DE} {E3347E75-EAE8-4E6B-98D1-7230B1EE5450} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} {5EE18CED-28AE-4415-B5A3-C31123BF57E1} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} {E813073E-07A7-4C88-A505-484CB33C9DC4} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} + {76AC3DDD-2B38-489F-A8B0-8E43054595DB} = {DB29DBEF-FA4E-4334-AFB8-BFB2DA82D1DE} + {7D7457FD-B88C-4375-926D-7D46C71E34A7} = {DE71D38E-4154-477C-9C27-3FA4ADB4098F} + {D5FF2DBA-F304-4ACB-8F82-B8F9321E22A9} = {DE71D38E-4154-477C-9C27-3FA4ADB4098F} {DAD8EBB8-A1D6-4E8F-A334-D7F0273280D1} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} {0C045A64-AE30-47CC-A931-5B5C6C9EF06D} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} {19F785D2-F7A4-41AB-9301-A6AD7E40B238} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} - {9759BE1C-98A0-4319-AC82-D432002BD66B} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} - {DF2255F4-F671-4C15-9100-D8079992E19D} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} - {1392041A-E2CA-4553-BEAF-363974651B81} = {DB29DBEF-FA4E-4334-AFB8-BFB2DA82D1DE} - {76AC3DDD-2B38-489F-A8B0-8E43054595DB} = {DB29DBEF-FA4E-4334-AFB8-BFB2DA82D1DE} {6FD10BE0-24C8-456E-8B9A-FD101C05C961} = {DB29DBEF-FA4E-4334-AFB8-BFB2DA82D1DE} - {7D7457FD-B88C-4375-926D-7D46C71E34A7} = {DE71D38E-4154-477C-9C27-3FA4ADB4098F} - {D5FF2DBA-F304-4ACB-8F82-B8F9321E22A9} = {DE71D38E-4154-477C-9C27-3FA4ADB4098F} + {8B069551-9B95-464E-BB40-C56817506FEC} = {41101B02-36C9-476B-98D5-1A6E105BBF4A} + {8FF5E841-29F6-4DB7-A4F8-9281FBDA0B9C} = {41101B02-36C9-476B-98D5-1A6E105BBF4A} + {9759BE1C-98A0-4319-AC82-D432002BD66B} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} {6E942A4A-405E-4AAD-89A7-006358A8A004} = {DE71D38E-4154-477C-9C27-3FA4ADB4098F} + {DF2255F4-F671-4C15-9100-D8079992E19D} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} + {EFE0C13B-6902-4FC9-91DD-F180420B36C8} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} + {7F8A8D87-B49A-4C7B-8474-AC83F8CFD40B} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {EE9FB522-4B73-4E3E-B63D-C21826BB7B5D} From 196ce48755d9a8a93a6cdaeabc011f09eb90764b Mon Sep 17 00:00:00 2001 From: Daniel Svensson Date: Tue, 26 Jul 2022 09:46:01 +0200 Subject: [PATCH 03/23] Add tests for binary xml strings --- .../tests/XmlDictionaryWriterTest.cs | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/src/libraries/System.Runtime.Serialization.Xml/tests/XmlDictionaryWriterTest.cs b/src/libraries/System.Runtime.Serialization.Xml/tests/XmlDictionaryWriterTest.cs index 719a06d6e48cf9..93ec5c147ed078 100644 --- a/src/libraries/System.Runtime.Serialization.Xml/tests/XmlDictionaryWriterTest.cs +++ b/src/libraries/System.Runtime.Serialization.Xml/tests/XmlDictionaryWriterTest.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; +using System.Diagnostics; using System.IO; using System.Linq; using System.Runtime.Serialization; @@ -320,6 +321,71 @@ public static void FragmentTest() Assert.False(FragmentHelper.CanFragment(writer)); } + [Fact] + public static void BinaryWritel_WriteString() + { + const byte Chars8Text = 152; + const byte Chars16Text = 154; + MemoryStream ms = new MemoryStream(); + XmlDictionaryWriter writer = (XmlDictionaryWriter)XmlDictionaryWriter.CreateBinaryWriter(ms); + writer.WriteStartElement("root"); + + int[] lengths = new[] { 7, 8, 9, 15, 16, 17, 31, 32, 36, 258 }; + byte[] buffer = new byte[lengths.Max() + 1]; + + foreach (var length in lengths) + { + string allAscii = string.Create(length, null, (Span chars, object _) => + { + for (int i = 0; i < chars.Length; ++i) + chars[i] = (char)(i % 128); + }); + string multiByteLast = string.Create(length, null, (Span chars, object _) => + { + for (int i = 0; i < chars.Length; ++i) + chars[i] = (char)(i % 128); + chars[^1] = 'ä'; + }); + + int numBytes = Encoding.UTF8.GetBytes(allAscii, buffer); + Debug.Assert(numBytes == length); + ValidateWriteText(ms, writer, allAscii, buffer.AsSpan(0, numBytes)); + + numBytes = Encoding.UTF8.GetBytes(multiByteLast, buffer); + Debug.Assert(numBytes == length + 1); + ValidateWriteText(ms, writer, multiByteLast, buffer.AsSpan(0, numBytes)); + } + + static void ValidateWriteText(MemoryStream ms, XmlDictionaryWriter writer, string text, ReadOnlySpan expected) + { + writer.Flush(); + ms.Seek(0, SeekOrigin.Begin); + ms.SetLength(0); + writer.WriteString(text); + writer.Flush(); + + ms.TryGetBuffer(out ArraySegment arraySegment); + ReadOnlySpan buffer = arraySegment; + + if (expected.Length <= byte.MaxValue) + { + Assert.Equal(Chars8Text, buffer[0]); + Assert.Equal(expected.Length, buffer[1]); + buffer = buffer.Slice(2); + } + else if (expected.Length <= ushort.MaxValue) + { + Assert.Equal(Chars16Text, buffer[0]); + Assert.Equal(expected.Length, (int)(buffer[1]) | ((int)buffer[2] << 8)); + buffer = buffer.Slice(3); + } + else + Assert.Fail("test use to long length"); + + AssertExtensions.SequenceEqual(expected, buffer); + } + } + private static bool ReadTest(MemoryStream ms, Encoding encoding, ReaderWriterFactory.ReaderWriterType rwType, byte[] byteArray) { ms.Position = 0; From 4d8078aaca039f0ddfcaabb173ca239b5df47c3c Mon Sep 17 00:00:00 2001 From: Daniel Svensson Date: Tue, 26 Jul 2022 09:58:17 +0200 Subject: [PATCH 04/23] limit counting code to 256 bit vectors --- .../src/System/Xml/XmlStreamNodeWriter.cs | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs index da60a78394c7f6..683cf72f6147f6 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs @@ -359,24 +359,22 @@ protected unsafe int UnsafeGetUTF8Length(char* chars, int charCount) char* charsMax = chars + charCount; // This method is only called from 2 places and will use length of at least (128/3 and 256/3) respectivly - // AVX is faster for at least 2048 chars, probably more - // for other cases the encoding path is better optimized than any fast path done here. - if (Vector.IsHardwareAccelerated - && Vector.Count > Vector128.Count - && Vector.Count < charCount && charCount <= 2048) + // We avoid Vector sine it is unsure how downclocking due to AVX512 would affect total throughput + if (Vector256.IsHardwareAccelerated + && Vector256.Count < charCount && charCount <= 2048) { - char* lastSimd = chars + charCount - Vector.Count; - var mask = new Vector(unchecked((short)0xff80)); + char* lastSimd = chars + charCount - Vector256.Count; + Vector256 mask = Vector256.Create(unchecked((short)0xff80)); while (chars < lastSimd) { - if (((*(Vector*)chars) & mask) != Vector.Zero) + if (((*(Vector256*)chars) & mask) != Vector256.Zero) goto NonAscii; - chars += Vector.Count; + chars += Vector256.Count; } - if ((*(Vector*)lastSimd & mask) == Vector.Zero) + if ((*(Vector256*)lastSimd & mask) == Vector256.Zero) return charCount; } @@ -396,15 +394,15 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe byte* bytes = _bytes; byte* bytesMax = &bytes[buffer.Length - offset]; char* charsMax = &chars[charCount]; - char* simdLast = chars + charCount - Vector128.Count; if (Sse41.IsSupported && charCount >= Vector128.Count) { - var mask = Vector128.Create(unchecked((short)0xff80)); + Vector128 mask = Vector128.Create(unchecked((short)0xff80)); + char* simdLast = chars + charCount - Vector128.Count; while (chars < simdLast) { - var v = Sse2.LoadVector128((short*)chars); + Vector128 v = *(Vector128*)chars; if (!Sse41.TestZ(v, mask)) goto NonAscii; @@ -413,7 +411,7 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe chars += Vector128.Count; } - var v2 = Sse2.LoadVector128((short*)simdLast); + Vector128 v2 = Sse2.LoadVector128((short*)simdLast); if (!Sse41.TestZ(v2, mask)) goto NonAscii; From 6e5aabb8651188013524821b073ca9591d4aba93 Mon Sep 17 00:00:00 2001 From: Daniel Svensson Date: Wed, 3 Aug 2022 17:05:58 +0200 Subject: [PATCH 05/23] reword comment --- .../src/System/Xml/XmlStreamNodeWriter.cs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs index 683cf72f6147f6..7a0e05a27f8d57 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs @@ -395,10 +395,10 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe byte* bytesMax = &bytes[buffer.Length - offset]; char* charsMax = &chars[charCount]; - if (Sse41.IsSupported && charCount >= Vector128.Count) + if (Sse41.IsSupported && charCount >= Vector128.Count) { Vector128 mask = Vector128.Create(unchecked((short)0xff80)); - char* simdLast = chars + charCount - Vector128.Count; + char* simdLast = chars + charCount - Vector128.Count; while (chars < simdLast) { @@ -407,8 +407,8 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe goto NonAscii; Sse2.StoreScalar((long*)bytes, Sse2.PackUnsignedSaturate(v, v).AsInt64()); - bytes += Vector128.Count; - chars += Vector128.Count; + bytes += Vector128.Count; + chars += Vector128.Count; } Vector128 v2 = Sse2.LoadVector128((short*)simdLast); @@ -418,7 +418,7 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe Sse2.StoreScalar((long*)(bytesMax - sizeof(long)), Sse2.PackUnsignedSaturate(v2, v2).AsInt64()); return charCount; } - // Directly jump to system encoding for larger strings, since it is faster even for the all Ascii case + // Fast path for small strings, skip and use Encoding.GetBytes for larger strings since it is faster even for the all Ascii case else if (charCount < 16) { while (chars < charsMax) From 70fa18968afe4433a902de807ab7af7e3379946b Mon Sep 17 00:00:00 2001 From: Daniel Svensson Date: Wed, 3 Aug 2022 17:11:37 +0200 Subject: [PATCH 06/23] rename test --- .../tests/XmlDictionaryWriterTest.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Runtime.Serialization.Xml/tests/XmlDictionaryWriterTest.cs b/src/libraries/System.Runtime.Serialization.Xml/tests/XmlDictionaryWriterTest.cs index 93ec5c147ed078..eec4384b8deb3b 100644 --- a/src/libraries/System.Runtime.Serialization.Xml/tests/XmlDictionaryWriterTest.cs +++ b/src/libraries/System.Runtime.Serialization.Xml/tests/XmlDictionaryWriterTest.cs @@ -322,7 +322,7 @@ public static void FragmentTest() } [Fact] - public static void BinaryWritel_WriteString() + public static void XmlBaseWriter_WriteString() { const byte Chars8Text = 152; const byte Chars16Text = 154; From b34d25980d6e3b7a18804c1b0c543caf5bf25787 Mon Sep 17 00:00:00 2001 From: Daniel Svensson Date: Wed, 3 Aug 2022 17:19:47 +0200 Subject: [PATCH 07/23] move bytesmax --- .../src/System/Xml/XmlStreamNodeWriter.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs index 7a0e05a27f8d57..942dca77896780 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs @@ -392,7 +392,6 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe fixed (byte* _bytes = &buffer[offset]) { byte* bytes = _bytes; - byte* bytesMax = &bytes[buffer.Length - offset]; char* charsMax = &chars[charCount]; if (Sse41.IsSupported && charCount >= Vector128.Count) @@ -415,10 +414,10 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe if (!Sse41.TestZ(v2, mask)) goto NonAscii; - Sse2.StoreScalar((long*)(bytesMax - sizeof(long)), Sse2.PackUnsignedSaturate(v2, v2).AsInt64()); + Sse2.StoreScalar((long*)(_bytes + charCount - sizeof(long)), Sse2.PackUnsignedSaturate(v2, v2).AsInt64()); return charCount; } - // Fast path for small strings, skip and use Encoding.GetBytes for larger strings since it is faster even for the all Ascii case + // Fast path for small strings, skip and use Encoding.GetBytes for larger strings since it is faster even for the all-Ascii case else if (charCount < 16) { while (chars < charsMax) @@ -436,6 +435,7 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe } NonAscii: + byte* bytesMax = &bytes[buffer.Length - offset]; return (int)(bytes - _bytes) + (_encoding ?? s_UTF8Encoding).GetBytes(chars, (int)(charsMax - chars), bytes, (int)(bytesMax - bytes)); } } From 5df5ae032b5ca55716d3079308729c4789814e02 Mon Sep 17 00:00:00 2001 From: Daniel Svensson Date: Thu, 4 Aug 2022 08:12:54 +0200 Subject: [PATCH 08/23] Fix bytesMax after moving variable initialization --- .../src/System/Xml/XmlStreamNodeWriter.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs index 942dca77896780..f8b5a3ba38b36a 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs @@ -435,7 +435,7 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe } NonAscii: - byte* bytesMax = &bytes[buffer.Length - offset]; + byte* bytesMax = _bytes + buffer.Length - offset; return (int)(bytes - _bytes) + (_encoding ?? s_UTF8Encoding).GetBytes(chars, (int)(charsMax - chars), bytes, (int)(bytesMax - bytes)); } } From a790fbbb7ac6b4769a7b10177c3c5c65a10bec3e Mon Sep 17 00:00:00 2001 From: Daniel Svensson Date: Thu, 4 Aug 2022 12:41:36 +0200 Subject: [PATCH 09/23] use unicode escape value in test --- .../tests/XmlDictionaryWriterTest.cs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/libraries/System.Runtime.Serialization.Xml/tests/XmlDictionaryWriterTest.cs b/src/libraries/System.Runtime.Serialization.Xml/tests/XmlDictionaryWriterTest.cs index eec4384b8deb3b..50273a8eb91bc8 100644 --- a/src/libraries/System.Runtime.Serialization.Xml/tests/XmlDictionaryWriterTest.cs +++ b/src/libraries/System.Runtime.Serialization.Xml/tests/XmlDictionaryWriterTest.cs @@ -331,7 +331,7 @@ public static void XmlBaseWriter_WriteString() writer.WriteStartElement("root"); int[] lengths = new[] { 7, 8, 9, 15, 16, 17, 31, 32, 36, 258 }; - byte[] buffer = new byte[lengths.Max() + 1]; + byte[] buffer = new byte[lengths.Max() * 1]; foreach (var length in lengths) { @@ -344,16 +344,16 @@ public static void XmlBaseWriter_WriteString() { for (int i = 0; i < chars.Length; ++i) chars[i] = (char)(i % 128); - chars[^1] = 'ä'; + chars[^1] = '\u00E4'; // 'ä' - Latin Small Letter a with Diaeresis. Latin-1 Supplement. }); int numBytes = Encoding.UTF8.GetBytes(allAscii, buffer); - Debug.Assert(numBytes == length); - ValidateWriteText(ms, writer, allAscii, buffer.AsSpan(0, numBytes)); + Assert.True(numBytes == length, "Test setup wrong - allAscii"); + ValidateWriteText(ms, writer, allAscii, expected: buffer.AsSpan(0, numBytes)); numBytes = Encoding.UTF8.GetBytes(multiByteLast, buffer); - Debug.Assert(numBytes == length + 1); - ValidateWriteText(ms, writer, multiByteLast, buffer.AsSpan(0, numBytes)); + Assert.True(numBytes == length + 1, "Test setup wrong - multiByte"); + ValidateWriteText(ms, writer, multiByteLast, expected: buffer.AsSpan(0, numBytes)); } static void ValidateWriteText(MemoryStream ms, XmlDictionaryWriter writer, string text, ReadOnlySpan expected) From 2b82ac87004c1eb4ee4b2657f3f68b61f5d463e1 Mon Sep 17 00:00:00 2001 From: Daniel Svensson Date: Thu, 4 Aug 2022 19:50:57 +0200 Subject: [PATCH 10/23] fix test typo "*" -> "+" --- .../tests/XmlDictionaryWriterTest.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Runtime.Serialization.Xml/tests/XmlDictionaryWriterTest.cs b/src/libraries/System.Runtime.Serialization.Xml/tests/XmlDictionaryWriterTest.cs index 50273a8eb91bc8..2e2a5ddfaa69e1 100644 --- a/src/libraries/System.Runtime.Serialization.Xml/tests/XmlDictionaryWriterTest.cs +++ b/src/libraries/System.Runtime.Serialization.Xml/tests/XmlDictionaryWriterTest.cs @@ -331,7 +331,7 @@ public static void XmlBaseWriter_WriteString() writer.WriteStartElement("root"); int[] lengths = new[] { 7, 8, 9, 15, 16, 17, 31, 32, 36, 258 }; - byte[] buffer = new byte[lengths.Max() * 1]; + byte[] buffer = new byte[lengths.Max() + 1]; foreach (var length in lengths) { From 301e53119fd0e9ee556041af596b8fe71b747506 Mon Sep 17 00:00:00 2001 From: Daniel Svensson Date: Fri, 12 Aug 2022 20:04:26 +0200 Subject: [PATCH 11/23] Update src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs Co-authored-by: Stephen Toub --- .../src/System/Xml/XmlStreamNodeWriter.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs index f8b5a3ba38b36a..ae4c333cfad4f9 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs @@ -337,7 +337,7 @@ protected unsafe int UnsafeGetUnicodeChars(char* chars, int charCount, byte[] bu { if (BitConverter.IsLittleEndian) { - new ReadOnlySpan((byte*)chars, 2 * charCount) + new ReadOnlySpan((byte*)chars, sizeof(char) * charCount) .CopyTo(buffer.AsSpan(offset)); } else From 5a2130603b28db3611f2d0d3f8692d8892ae9c8f Mon Sep 17 00:00:00 2001 From: Daniel Svensson Date: Fri, 12 Aug 2022 21:16:56 +0200 Subject: [PATCH 12/23] Remvoe vectorized code from UnsafeGetUTF8Length --- .../src/System/Xml/XmlStreamNodeWriter.cs | 28 +------------------ 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs index ae4c333cfad4f9..c31b027a57437f 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs @@ -356,33 +356,7 @@ protected unsafe int UnsafeGetUnicodeChars(char* chars, int charCount, byte[] bu protected unsafe int UnsafeGetUTF8Length(char* chars, int charCount) { - char* charsMax = chars + charCount; - - // This method is only called from 2 places and will use length of at least (128/3 and 256/3) respectivly - // We avoid Vector sine it is unsure how downclocking due to AVX512 would affect total throughput - if (Vector256.IsHardwareAccelerated - && Vector256.Count < charCount && charCount <= 2048) - { - char* lastSimd = chars + charCount - Vector256.Count; - Vector256 mask = Vector256.Create(unchecked((short)0xff80)); - - while (chars < lastSimd) - { - if (((*(Vector256*)chars) & mask) != Vector256.Zero) - goto NonAscii; - - chars += Vector256.Count; - } - - if ((*(Vector256*)lastSimd & mask) == Vector256.Zero) - return charCount; - } - - NonAscii: - int numRemaining = (int)(charsMax - chars); - int numAscii = charCount - numRemaining; - - return numAscii + (_encoding ?? s_UTF8Encoding).GetByteCount(chars, numRemaining); + return (_encoding ?? s_UTF8Encoding).GetByteCount(chars, charCount); } protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffer, int offset) From 048cade0ea5f5d8f9fb0b6be0dcb897895dd1b47 Mon Sep 17 00:00:00 2001 From: Daniel Svensson Date: Thu, 8 Sep 2022 21:46:21 +0200 Subject: [PATCH 13/23] Fix overfload --- .../src/System/Xml/XmlStreamNodeWriter.cs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs index 7339053cada7e5..132d5d14c06fb6 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs @@ -4,6 +4,7 @@ using System.IO; using System.Numerics; using System.Text; +using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; using System.Runtime.Serialization; @@ -342,8 +343,8 @@ protected unsafe int UnsafeGetUnicodeChars(char* chars, int charCount, byte[] bu { if (BitConverter.IsLittleEndian) { - new ReadOnlySpan((byte*)chars, sizeof(char) * charCount) - .CopyTo(buffer.AsSpan(offset)); + new ReadOnlySpan(chars, charCount) + .CopyTo(MemoryMarshal.Cast(buffer.AsSpan(offset))); } else { @@ -389,7 +390,7 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe chars += Vector128.Count; } - Vector128 v2 = Sse2.LoadVector128((short*)simdLast); + Vector128 v2 = *(Vector128*)simdLast; if (!Sse41.TestZ(v2, mask)) goto NonAscii; From 287e73725d2a973f82875e1ed5384ecb49ceabe5 Mon Sep 17 00:00:00 2001 From: Daniel Svensson Date: Mon, 24 Oct 2022 08:14:30 +0200 Subject: [PATCH 14/23] use for loop which seems faster --- .../src/System/Xml/XmlStreamNodeWriter.cs | 25 ++++++++----------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs index 132d5d14c06fb6..1005dbf08bca9a 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs @@ -376,21 +376,18 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe if (Sse41.IsSupported && charCount >= Vector128.Count) { - Vector128 mask = Vector128.Create(unchecked((short)0xff80)); - char* simdLast = chars + charCount - Vector128.Count; - - while (chars < simdLast) + var mask = Vector128.Create(unchecked((short)0xff80)); + uint lastSimd = (uint)(charCount - Vector128.Count); + for (uint i = 0; i < lastSimd; i += (uint)Vector128.Count) { - Vector128 v = *(Vector128*)chars; + var v = *(Vector128*)(chars + i); if (!Sse41.TestZ(v, mask)) goto NonAscii; - Sse2.StoreScalar((long*)bytes, Sse2.PackUnsignedSaturate(v, v).AsInt64()); - bytes += Vector128.Count; - chars += Vector128.Count; + Sse2.StoreScalar((long*)(bytes + i), Sse2.PackUnsignedSaturate(v, v).AsInt64()); } - Vector128 v2 = *(Vector128*)simdLast; + var v2 = *(Vector128*)(chars + charCount - Vector128.Count); if (!Sse41.TestZ(v2, mask)) goto NonAscii; @@ -398,17 +395,15 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe return charCount; } // Fast path for small strings, skip and use Encoding.GetBytes for larger strings since it is faster even for the all-Ascii case - else if (charCount < 16) + else if (Sse41.IsSupported || charCount < 16) { - while (chars < charsMax) + for (uint i = 0; i < (uint)charCount; ++i) { - char t = *chars; + char t = chars[i]; if (t >= 0x80) goto NonAscii; - *bytes = (byte)t; - bytes++; - chars++; + bytes[i] = (byte)t; } return charCount; From ab29682b659bcef0938521f701e968989da1b5d9 Mon Sep 17 00:00:00 2001 From: Daniel Svensson Date: Mon, 6 Mar 2023 21:39:26 +0100 Subject: [PATCH 15/23] remove vector loop --- .../src/System/Xml/XmlStreamNodeWriter.cs | 33 ++++--------------- 1 file changed, 7 insertions(+), 26 deletions(-) diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs index bf682b3d88d5ce..af182761b3c377 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs @@ -373,38 +373,19 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe byte* bytes = _bytes; char* charsMax = &chars[charCount]; - if (Sse41.IsSupported && charCount >= Vector128.Count) + // Fast path for small strings, skip and use Encoding.GetBytes for larger strings since it is faster + if (charCount < 16) { - var mask = Vector128.Create(unchecked((short)0xff80)); - uint lastSimd = (uint)(charCount - Vector128.Count); - for (uint i = 0; i < lastSimd; i += (uint)Vector128.Count) + while (chars < charsMax) { - var v = *(Vector128*)(chars + i); - if (!Sse41.TestZ(v, mask)) - goto NonAscii; - - Sse2.StoreScalar((long*)(bytes + i), Sse2.PackUnsignedSaturate(v, v).AsInt64()); - } - - var v2 = *(Vector128*)(chars + charCount - Vector128.Count); - if (!Sse41.TestZ(v2, mask)) - goto NonAscii; - - Sse2.StoreScalar((long*)(_bytes + charCount - sizeof(long)), Sse2.PackUnsignedSaturate(v2, v2).AsInt64()); - return charCount; - } - // Fast path for small strings, skip and use Encoding.GetBytes for larger strings since it is faster even for the all-Ascii case - else if (Sse41.IsSupported || charCount < 16) - { - for (uint i = 0; i < (uint)charCount; ++i) - { - char t = chars[i]; + char t = *chars; if (t >= 0x80) goto NonAscii; - bytes[i] = (byte)t; + *bytes = (byte)t; + bytes++; + chars++; } - return charCount; } From 251391f37ffd96a3da5179f5962e59cb5b30e2bc Mon Sep 17 00:00:00 2001 From: Daniel Svensson Date: Sat, 11 Mar 2023 14:26:32 +0100 Subject: [PATCH 16/23] make sealed encoding to allow devirtualisation --- .../Serialization/DataContractSerializer.cs | 26 ++++++++++++++----- .../src/System/Xml/XmlStreamNodeWriter.cs | 2 +- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Runtime/Serialization/DataContractSerializer.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Runtime/Serialization/DataContractSerializer.cs index b98bad5c8cd4c5..e0cee19a77f5a0 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Runtime/Serialization/DataContractSerializer.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Runtime/Serialization/DataContractSerializer.cs @@ -33,13 +33,27 @@ public sealed class DataContractSerializer : XmlObjectSerializer private static SerializationOption s_option = IsReflectionBackupAllowed() ? SerializationOption.ReflectionAsBackup : SerializationOption.CodeGenOnly; private static bool s_optionAlreadySet; - internal static UTF8Encoding UTF8NoBom { get; } = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: false); - internal static UTF8Encoding ValidatingUTF8 { get; } = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true); + internal sealed class SealedUTF8Encoding : UTF8Encoding + { + public SealedUTF8Encoding(bool encoderShouldEmitUTF8Identifier, bool throwOnInvalidBytes) + : base(encoderShouldEmitUTF8Identifier, throwOnInvalidBytes) + { } + } + + internal sealed class SealedUnicodeEncoding : UnicodeEncoding + { + public SealedUnicodeEncoding(bool bigEndian, bool byteOrderMark, bool throwOnInvalidBytes) + : base(bigEndian, byteOrderMark, throwOnInvalidBytes) + { } + } + + internal static SealedUTF8Encoding UTF8NoBom { get; } = new SealedUTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: false); + internal static SealedUTF8Encoding ValidatingUTF8 { get; } = new SealedUTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true); - internal static UnicodeEncoding UTF16NoBom { get; } = new UnicodeEncoding(bigEndian: false, byteOrderMark: false, throwOnInvalidBytes: false); - internal static UnicodeEncoding BEUTF16NoBom { get; } = new UnicodeEncoding(bigEndian: true, byteOrderMark: false, throwOnInvalidBytes: false); - internal static UnicodeEncoding ValidatingUTF16 { get; } = new UnicodeEncoding(bigEndian: false, byteOrderMark: false, throwOnInvalidBytes: true); - internal static UnicodeEncoding ValidatingBEUTF16 { get; } = new UnicodeEncoding(bigEndian: true, byteOrderMark: false, throwOnInvalidBytes: true); + internal static SealedUnicodeEncoding UTF16NoBom { get; } = new SealedUnicodeEncoding(bigEndian: false, byteOrderMark: false, throwOnInvalidBytes: false); + internal static SealedUnicodeEncoding BEUTF16NoBom { get; } = new SealedUnicodeEncoding(bigEndian: true, byteOrderMark: false, throwOnInvalidBytes: false); + internal static SealedUnicodeEncoding ValidatingUTF16 { get; } = new SealedUnicodeEncoding(bigEndian: false, byteOrderMark: false, throwOnInvalidBytes: true); + internal static SealedUnicodeEncoding ValidatingBEUTF16 { get; } = new SealedUnicodeEncoding(bigEndian: true, byteOrderMark: false, throwOnInvalidBytes: true); internal static Base64Encoding Base64Encoding { get; } = new Base64Encoding(); internal static BinHexEncoding BinHexEncoding { get; } = new BinHexEncoding(); diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs index af182761b3c377..3966678217380a 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs @@ -374,7 +374,7 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe char* charsMax = &chars[charCount]; // Fast path for small strings, skip and use Encoding.GetBytes for larger strings since it is faster - if (charCount < 16) + if (charCount < 32) { while (chars < charsMax) { From a5907391f7451188c3b43526d887f4ed231d4880 Mon Sep 17 00:00:00 2001 From: Daniel Svensson Date: Mon, 20 Mar 2023 23:23:09 +0100 Subject: [PATCH 17/23] back some changes --- ...m.Private.DataContractSerialization.csproj | 5 +-- .../src/System/Xml/XmlStreamNodeWriter.cs | 22 +++++++----- .../System.Runtime.Serialization.Xml.sln | 34 ++++--------------- .../tests/XmlDictionaryWriterTest.cs | 1 - 4 files changed, 21 insertions(+), 41 deletions(-) diff --git a/src/libraries/System.Private.DataContractSerialization/src/System.Private.DataContractSerialization.csproj b/src/libraries/System.Private.DataContractSerialization/src/System.Private.DataContractSerialization.csproj index c6929051c0d69a..e5751e43206791 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System.Private.DataContractSerialization.csproj +++ b/src/libraries/System.Private.DataContractSerialization/src/System.Private.DataContractSerialization.csproj @@ -5,7 +5,6 @@ true false - true @@ -26,7 +25,6 @@ - @@ -36,6 +34,7 @@ + @@ -160,12 +159,10 @@ - - diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs index 3966678217380a..43d83b0db3f2f6 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs @@ -370,12 +370,12 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe { fixed (byte* _bytes = &buffer[offset]) { - byte* bytes = _bytes; - char* charsMax = &chars[charCount]; - - // Fast path for small strings, skip and use Encoding.GetBytes for larger strings since it is faster - if (charCount < 32) + // Fast path for small strings, use Encoding.GetBytes for larger strings since it is faster when vectorization is possible + if (charCount < 8) { + byte* bytes = _bytes; + char* charsMax = &chars[charCount]; + while (chars < charsMax) { char t = *chars; @@ -387,11 +387,15 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe chars++; } return charCount; - } - NonAscii: - byte* bytesMax = _bytes + buffer.Length - offset; - return (int)(bytes - _bytes) + (_encoding ?? DataContractSerializer.ValidatingUTF8).GetBytes(chars, (int)(charsMax - chars), bytes, (int)(bytesMax - bytes)); + NonAscii: + byte* bytesMax = _bytes + buffer.Length - offset; + return (int)(bytes - _bytes) + (_encoding ?? Encoding.UTF8).GetBytes(chars, (int)(charsMax - chars), bytes, (int)(bytesMax - bytes)); + } + else + { + return (_encoding ?? Encoding.UTF8).GetBytes(chars, charCount, _bytes, buffer.Length - offset); + } } } return 0; diff --git a/src/libraries/System.Runtime.Serialization.Xml/System.Runtime.Serialization.Xml.sln b/src/libraries/System.Runtime.Serialization.Xml/System.Runtime.Serialization.Xml.sln index 8a9dc609c3f215..1175b6a261e17e 100644 --- a/src/libraries/System.Runtime.Serialization.Xml/System.Runtime.Serialization.Xml.sln +++ b/src/libraries/System.Runtime.Serialization.Xml/System.Runtime.Serialization.Xml.sln @@ -1,8 +1,4 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 17 -VisualStudioVersion = 17.2.32616.157 -MinimumVisualStudioVersion = 10.0.40219.1 +Microsoft Visual Studio Solution File, Format Version 12.00 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TestUtilities", "..\Common\tests\TestUtilities\TestUtilities.csproj", "{CBA80130-6773-4DF9-995C-DC6CBED89CB5}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.Win32.Primitives", "..\Microsoft.Win32.Primitives\ref\Microsoft.Win32.Primitives.csproj", "{E5DB95E1-94AA-405C-9FFE-09B1E2498EE2}" @@ -51,10 +47,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{DB29DBEF-FA4 EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "gen", "gen", "{DE71D38E-4154-477C-9C27-3FA4ADB4098F}" EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Numerics.Vectors", "..\System.Numerics.Vectors\ref\System.Numerics.Vectors.csproj", "{EFE0C13B-6902-4FC9-91DD-F180420B36C8}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Runtime.Intrinsics", "..\System.Runtime.Intrinsics\ref\System.Runtime.Intrinsics.csproj", "{7F8A8D87-B49A-4C7B-8474-AC83F8CFD40B}" -EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -141,14 +133,6 @@ Global {DF2255F4-F671-4C15-9100-D8079992E19D}.Debug|Any CPU.Build.0 = Debug|Any CPU {DF2255F4-F671-4C15-9100-D8079992E19D}.Release|Any CPU.ActiveCfg = Release|Any CPU {DF2255F4-F671-4C15-9100-D8079992E19D}.Release|Any CPU.Build.0 = Release|Any CPU - {EFE0C13B-6902-4FC9-91DD-F180420B36C8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {EFE0C13B-6902-4FC9-91DD-F180420B36C8}.Debug|Any CPU.Build.0 = Debug|Any CPU - {EFE0C13B-6902-4FC9-91DD-F180420B36C8}.Release|Any CPU.ActiveCfg = Release|Any CPU - {EFE0C13B-6902-4FC9-91DD-F180420B36C8}.Release|Any CPU.Build.0 = Release|Any CPU - {7F8A8D87-B49A-4C7B-8474-AC83F8CFD40B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {7F8A8D87-B49A-4C7B-8474-AC83F8CFD40B}.Debug|Any CPU.Build.0 = Debug|Any CPU - {7F8A8D87-B49A-4C7B-8474-AC83F8CFD40B}.Release|Any CPU.ActiveCfg = Release|Any CPU - {7F8A8D87-B49A-4C7B-8474-AC83F8CFD40B}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -160,24 +144,20 @@ Global {45263D7D-249E-4810-8F7D-1DEF25515210} = {41101B02-36C9-476B-98D5-1A6E105BBF4A} {E5DB95E1-94AA-405C-9FFE-09B1E2498EE2} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} {7DF41C40-FE5D-41DF-B106-3DD77BE4D4B5} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} - {1392041A-E2CA-4553-BEAF-363974651B81} = {DB29DBEF-FA4E-4334-AFB8-BFB2DA82D1DE} {E3347E75-EAE8-4E6B-98D1-7230B1EE5450} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} {5EE18CED-28AE-4415-B5A3-C31123BF57E1} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} {E813073E-07A7-4C88-A505-484CB33C9DC4} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} - {76AC3DDD-2B38-489F-A8B0-8E43054595DB} = {DB29DBEF-FA4E-4334-AFB8-BFB2DA82D1DE} - {7D7457FD-B88C-4375-926D-7D46C71E34A7} = {DE71D38E-4154-477C-9C27-3FA4ADB4098F} - {D5FF2DBA-F304-4ACB-8F82-B8F9321E22A9} = {DE71D38E-4154-477C-9C27-3FA4ADB4098F} {DAD8EBB8-A1D6-4E8F-A334-D7F0273280D1} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} {0C045A64-AE30-47CC-A931-5B5C6C9EF06D} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} {19F785D2-F7A4-41AB-9301-A6AD7E40B238} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} - {6FD10BE0-24C8-456E-8B9A-FD101C05C961} = {DB29DBEF-FA4E-4334-AFB8-BFB2DA82D1DE} - {8B069551-9B95-464E-BB40-C56817506FEC} = {41101B02-36C9-476B-98D5-1A6E105BBF4A} - {8FF5E841-29F6-4DB7-A4F8-9281FBDA0B9C} = {41101B02-36C9-476B-98D5-1A6E105BBF4A} {9759BE1C-98A0-4319-AC82-D432002BD66B} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} - {6E942A4A-405E-4AAD-89A7-006358A8A004} = {DE71D38E-4154-477C-9C27-3FA4ADB4098F} {DF2255F4-F671-4C15-9100-D8079992E19D} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} - {EFE0C13B-6902-4FC9-91DD-F180420B36C8} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} - {7F8A8D87-B49A-4C7B-8474-AC83F8CFD40B} = {18E62E91-73A2-48AE-BEFF-CE7C64DF759D} + {1392041A-E2CA-4553-BEAF-363974651B81} = {DB29DBEF-FA4E-4334-AFB8-BFB2DA82D1DE} + {76AC3DDD-2B38-489F-A8B0-8E43054595DB} = {DB29DBEF-FA4E-4334-AFB8-BFB2DA82D1DE} + {6FD10BE0-24C8-456E-8B9A-FD101C05C961} = {DB29DBEF-FA4E-4334-AFB8-BFB2DA82D1DE} + {7D7457FD-B88C-4375-926D-7D46C71E34A7} = {DE71D38E-4154-477C-9C27-3FA4ADB4098F} + {D5FF2DBA-F304-4ACB-8F82-B8F9321E22A9} = {DE71D38E-4154-477C-9C27-3FA4ADB4098F} + {6E942A4A-405E-4AAD-89A7-006358A8A004} = {DE71D38E-4154-477C-9C27-3FA4ADB4098F} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {EE9FB522-4B73-4E3E-B63D-C21826BB7B5D} diff --git a/src/libraries/System.Runtime.Serialization.Xml/tests/XmlDictionaryWriterTest.cs b/src/libraries/System.Runtime.Serialization.Xml/tests/XmlDictionaryWriterTest.cs index 5bdf747ecbd74b..b3b5a8495cf5be 100644 --- a/src/libraries/System.Runtime.Serialization.Xml/tests/XmlDictionaryWriterTest.cs +++ b/src/libraries/System.Runtime.Serialization.Xml/tests/XmlDictionaryWriterTest.cs @@ -4,7 +4,6 @@ using System; using System.Buffers.Binary; using System.Collections.Generic; -using System.Diagnostics; using System.IO; using System.Linq; using System.Runtime.Serialization; From 46b63142bcbb99241960dc4b80a4947f8703ac2b Mon Sep 17 00:00:00 2001 From: Daniel Svensson Date: Sat, 25 Mar 2023 14:12:29 +0100 Subject: [PATCH 18/23] use uint for UnsafeGetUTF8Chars comparison --- .../src/System/Xml/XmlStreamNodeWriter.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs index 43d83b0db3f2f6..634ef006f7ceeb 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs @@ -371,7 +371,7 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe fixed (byte* _bytes = &buffer[offset]) { // Fast path for small strings, use Encoding.GetBytes for larger strings since it is faster when vectorization is possible - if (charCount < 8) + if ((uint)charCount < 16) { byte* bytes = _bytes; char* charsMax = &chars[charCount]; From 82f88808405221ffbb2f435c20ef6c1e9b257d6a Mon Sep 17 00:00:00 2001 From: Daniel Svensson Date: Sun, 26 Mar 2023 15:09:09 +0200 Subject: [PATCH 19/23] revert more changes --- .../src/System.Private.DataContractSerialization.csproj | 3 ++- .../src/System/Xml/XmlStreamNodeWriter.cs | 3 --- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Private.DataContractSerialization/src/System.Private.DataContractSerialization.csproj b/src/libraries/System.Private.DataContractSerialization/src/System.Private.DataContractSerialization.csproj index e5751e43206791..5835fbeec550b3 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System.Private.DataContractSerialization.csproj +++ b/src/libraries/System.Private.DataContractSerialization/src/System.Private.DataContractSerialization.csproj @@ -5,6 +5,7 @@ true false + true @@ -25,6 +26,7 @@ + @@ -34,7 +36,6 @@ - diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs index 634ef006f7ceeb..6a8ad349d7287b 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs @@ -2,11 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.IO; -using System.Numerics; using System.Text; using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; using System.Runtime.Serialization; using System.Threading.Tasks; From d78aade65fe8ba3b809312f2a591d94a7ee8ea22 Mon Sep 17 00:00:00 2001 From: Daniel Svensson Date: Sun, 26 Mar 2023 16:58:31 +0200 Subject: [PATCH 20/23] Fix cutoff based on new measurements --- .../src/System/Xml/XmlStreamNodeWriter.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs index 6a8ad349d7287b..8823345752e881 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs @@ -368,7 +368,7 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe fixed (byte* _bytes = &buffer[offset]) { // Fast path for small strings, use Encoding.GetBytes for larger strings since it is faster when vectorization is possible - if ((uint)charCount < 16) + if ((uint)charCount < 25) { byte* bytes = _bytes; char* charsMax = &chars[charCount]; @@ -387,11 +387,11 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe NonAscii: byte* bytesMax = _bytes + buffer.Length - offset; - return (int)(bytes - _bytes) + (_encoding ?? Encoding.UTF8).GetBytes(chars, (int)(charsMax - chars), bytes, (int)(bytesMax - bytes)); + return (int)(bytes - _bytes) + (_encoding ?? DataContractSerializer.ValidatingUTF8).GetBytes(chars, (int)(charsMax - chars), bytes, (int)(bytesMax - bytes)); } else { - return (_encoding ?? Encoding.UTF8).GetBytes(chars, charCount, _bytes, buffer.Length - offset); + return (_encoding ?? DataContractSerializer.ValidatingUTF8).GetBytes(chars, charCount, _bytes, buffer.Length - offset); } } } From 3b20be87c847ce38dd92e5c4e9dee6463f9cd36b Mon Sep 17 00:00:00 2001 From: Daniel Svensson Date: Sun, 26 Mar 2023 20:44:22 +0200 Subject: [PATCH 21/23] use BinaryPrimitives.ReverseEndianness as suggested --- .../src/System/Xml/XmlStreamNodeWriter.cs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs index 8823345752e881..ddc3286ff7e3ac 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Binary; using System.IO; using System.Text; using System.Runtime.InteropServices; @@ -335,7 +336,7 @@ protected unsafe void UnsafeWriteUnicodeChars(char* chars, int charCount) } } - protected unsafe int UnsafeGetUnicodeChars(char* chars, int charCount, byte[] buffer, int offset) + protected static unsafe int UnsafeGetUnicodeChars(char* chars, int charCount, byte[] buffer, int offset) { if (BitConverter.IsLittleEndian) { @@ -344,13 +345,8 @@ protected unsafe int UnsafeGetUnicodeChars(char* chars, int charCount, byte[] bu } else { - char* charsMax = chars + charCount; - while (chars < charsMax) - { - char value = *chars++; - buffer[offset++] = (byte)value; - buffer[offset++] = (byte)(value >> 8); - } + BinaryPrimitives.ReverseEndianness(new ReadOnlySpan(chars, charCount), + MemoryMarshal.Cast(buffer.AsSpan(offset))); } return charCount * 2; @@ -358,6 +354,7 @@ protected unsafe int UnsafeGetUnicodeChars(char* chars, int charCount, byte[] bu protected unsafe int UnsafeGetUTF8Length(char* chars, int charCount) { + // Length will always be at least ( 128 / maxBytesPerChar) = 42 return (_encoding ?? DataContractSerializer.ValidatingUTF8).GetByteCount(chars, charCount); } From 9c86b05465c152a136a669a81ca1e8e77014932d Mon Sep 17 00:00:00 2001 From: Daniel Svensson Date: Mon, 27 Mar 2023 08:01:59 +0200 Subject: [PATCH 22/23] Update cutoff from 24 to 32 chars before calling, due to regression for text based DataContractSerializer --- .../src/System/Xml/XmlStreamNodeWriter.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs index ddc3286ff7e3ac..f4b2b49bd63592 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs @@ -365,7 +365,7 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe fixed (byte* _bytes = &buffer[offset]) { // Fast path for small strings, use Encoding.GetBytes for larger strings since it is faster when vectorization is possible - if ((uint)charCount < 25) + if ((uint)charCount < 32) { byte* bytes = _bytes; char* charsMax = &chars[charCount]; From ccfb008413d9e0de738698eba7590c811f7e26fc Mon Sep 17 00:00:00 2001 From: Daniel Svensson Date: Sun, 2 Apr 2023 20:03:48 +0200 Subject: [PATCH 23/23] Remove sealed encoding since it only improves XmlConvert --- .../Serialization/DataContractSerializer.cs | 26 +++++-------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Runtime/Serialization/DataContractSerializer.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Runtime/Serialization/DataContractSerializer.cs index e0cee19a77f5a0..b98bad5c8cd4c5 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Runtime/Serialization/DataContractSerializer.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Runtime/Serialization/DataContractSerializer.cs @@ -33,27 +33,13 @@ public sealed class DataContractSerializer : XmlObjectSerializer private static SerializationOption s_option = IsReflectionBackupAllowed() ? SerializationOption.ReflectionAsBackup : SerializationOption.CodeGenOnly; private static bool s_optionAlreadySet; - internal sealed class SealedUTF8Encoding : UTF8Encoding - { - public SealedUTF8Encoding(bool encoderShouldEmitUTF8Identifier, bool throwOnInvalidBytes) - : base(encoderShouldEmitUTF8Identifier, throwOnInvalidBytes) - { } - } - - internal sealed class SealedUnicodeEncoding : UnicodeEncoding - { - public SealedUnicodeEncoding(bool bigEndian, bool byteOrderMark, bool throwOnInvalidBytes) - : base(bigEndian, byteOrderMark, throwOnInvalidBytes) - { } - } - - internal static SealedUTF8Encoding UTF8NoBom { get; } = new SealedUTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: false); - internal static SealedUTF8Encoding ValidatingUTF8 { get; } = new SealedUTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true); + internal static UTF8Encoding UTF8NoBom { get; } = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: false); + internal static UTF8Encoding ValidatingUTF8 { get; } = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true); - internal static SealedUnicodeEncoding UTF16NoBom { get; } = new SealedUnicodeEncoding(bigEndian: false, byteOrderMark: false, throwOnInvalidBytes: false); - internal static SealedUnicodeEncoding BEUTF16NoBom { get; } = new SealedUnicodeEncoding(bigEndian: true, byteOrderMark: false, throwOnInvalidBytes: false); - internal static SealedUnicodeEncoding ValidatingUTF16 { get; } = new SealedUnicodeEncoding(bigEndian: false, byteOrderMark: false, throwOnInvalidBytes: true); - internal static SealedUnicodeEncoding ValidatingBEUTF16 { get; } = new SealedUnicodeEncoding(bigEndian: true, byteOrderMark: false, throwOnInvalidBytes: true); + internal static UnicodeEncoding UTF16NoBom { get; } = new UnicodeEncoding(bigEndian: false, byteOrderMark: false, throwOnInvalidBytes: false); + internal static UnicodeEncoding BEUTF16NoBom { get; } = new UnicodeEncoding(bigEndian: true, byteOrderMark: false, throwOnInvalidBytes: false); + internal static UnicodeEncoding ValidatingUTF16 { get; } = new UnicodeEncoding(bigEndian: false, byteOrderMark: false, throwOnInvalidBytes: true); + internal static UnicodeEncoding ValidatingBEUTF16 { get; } = new UnicodeEncoding(bigEndian: true, byteOrderMark: false, throwOnInvalidBytes: true); internal static Base64Encoding Base64Encoding { get; } = new Base64Encoding(); internal static BinHexEncoding BinHexEncoding { get; } = new BinHexEncoding();