From 226e188c8ac2faec96eb02669bf29062ae909345 Mon Sep 17 00:00:00 2001 From: MihaZupan Date: Thu, 9 Apr 2020 19:25:29 +0200 Subject: [PATCH 1/9] Reduce allocations in string.Normalize --- .../Interop.Normalization.cs | 6 +- .../Windows/Normaliz/Interop.Normalization.cs | 14 +-- .../Globalization/Normalization.Unix.cs | 48 +++++++--- .../Globalization/Normalization.Windows.cs | 90 +++++++++++-------- 4 files changed, 104 insertions(+), 54 deletions(-) diff --git a/src/libraries/Common/src/Interop/Unix/System.Globalization.Native/Interop.Normalization.cs b/src/libraries/Common/src/Interop/Unix/System.Globalization.Native/Interop.Normalization.cs index 09307d22545f6a..fe3181f6aff5b3 100644 --- a/src/libraries/Common/src/Interop/Unix/System.Globalization.Native/Interop.Normalization.cs +++ b/src/libraries/Common/src/Interop/Unix/System.Globalization.Native/Interop.Normalization.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using System.Runtime.InteropServices; using System.Text; @@ -12,7 +13,10 @@ internal static partial class Globalization [DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_IsNormalized")] internal static extern int IsNormalized(NormalizationForm normalizationForm, string src, int srcLen); + internal static int NormalizeString(NormalizationForm normalizationForm, string src, int srcLen, Span dstBuffer) => + NormalizeString(normalizationForm, src, srcLen, ref MemoryMarshal.GetReference(dstBuffer), dstBuffer.Length); + [DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_NormalizeString")] - internal static extern int NormalizeString(NormalizationForm normalizationForm, string src, int srcLen, [Out] char[] dstBuffer, int dstBufferCapacity); + private static extern int NormalizeString(NormalizationForm normalizationForm, string src, int srcLen, ref char dstBuffer, int dstBufferCapacity); } } diff --git a/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Normalization.cs b/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Normalization.cs index 3e954483d18136..77fb0a1dd7627e 100644 --- a/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Normalization.cs +++ b/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Normalization.cs @@ -2,22 +2,26 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using System.Runtime.InteropServices; +using System.Text; internal static partial class Interop { internal static partial class Normaliz { [DllImport("Normaliz.dll", CharSet = CharSet.Unicode, SetLastError = true)] - internal static extern bool IsNormalizedString(int normForm, string source, int length); + internal static extern bool IsNormalizedString(NormalizationForm normForm, string source, int length); + + internal static int NormalizeString(NormalizationForm normForm, string source, int sourceLength, Span destination) => + NormalizeString(normForm, source, sourceLength, ref MemoryMarshal.GetReference(destination), destination.Length); [DllImport("Normaliz.dll", CharSet = CharSet.Unicode, SetLastError = true)] - internal static extern int NormalizeString( - int normForm, + private static extern int NormalizeString( + NormalizationForm normForm, string source, int sourceLength, - [System.Runtime.InteropServices.OutAttribute] - char[]? destination, + ref char destination, int destinationLength); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs index 37c9024c33d426..89eac782cb3536 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Buffers; using System.Diagnostics; using System.Text; @@ -41,26 +42,49 @@ internal static string Normalize(string strInput, NormalizationForm normalizatio ValidateArguments(strInput, normalizationForm); - char[] buf = new char[strInput.Length]; - - for (int attempts = 2; attempts > 0; attempts--) + char[]? toReturn = null; + try { - int realLen = Interop.Globalization.NormalizeString(normalizationForm, strInput, strInput.Length, buf, buf.Length); + int realLen = strInput.Length; - if (realLen == -1) + for (int attempt = 0; attempt < 2; attempt++) { - throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput)); + if (toReturn != null) + { + // Clear toReturn first to ensure we don't return the same buffer twice + char[] temp = toReturn; + toReturn = null; + ArrayPool.Shared.Return(temp); + } + + Span buffer = realLen <= 512 + ? stackalloc char[512] + : (toReturn = ArrayPool.Shared.Rent(realLen)); + + realLen = Interop.Globalization.NormalizeString(normalizationForm, strInput, strInput.Length, buffer); + + if (realLen == -1) + { + throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput)); + } + + if (realLen <= buffer.Length) + { + return new string(buffer.Slice(0, realLen)); + } + + Debug.Assert(realLen > 512); } - if (realLen <= buf.Length) + throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput)); + } + finally + { + if (toReturn != null) { - return new string(buf, 0, realLen); + ArrayPool.Shared.Return(toReturn); } - - buf = new char[realLen]; } - - throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput)); } // ----------------------------- diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs index e96789d1695e64..8b15bf9e8f1715 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Buffers; using System.Diagnostics; using System.Runtime.InteropServices; using System.Text; @@ -24,7 +25,7 @@ internal static bool IsNormalized(string strInput, NormalizationForm normalizati // The only way to know if IsNormalizedString failed is through checking the Win32 last error // IsNormalizedString pinvoke has SetLastError attribute property which will set the last error // to 0 (ERROR_SUCCESS) before executing the calls. - bool result = Interop.Normaliz.IsNormalizedString((int)normalizationForm, strInput, strInput.Length); + bool result = Interop.Normaliz.IsNormalizedString(normalizationForm, strInput, strInput.Length); int lastError = Marshal.GetLastWin32Error(); switch (lastError) @@ -70,7 +71,7 @@ internal static string Normalize(string strInput, NormalizationForm normalizatio // to 0 (ERROR_SUCCESS) before executing the calls. // Guess our buffer size first - int iLength = Interop.Normaliz.NormalizeString((int)normalizationForm, strInput, strInput.Length, null, 0); + int iLength = Interop.Normaliz.NormalizeString(normalizationForm, strInput, strInput.Length, Span.Empty); int lastError = Marshal.GetLastWin32Error(); // Could have an error (actually it'd be quite hard to have an error here) @@ -102,47 +103,64 @@ internal static string Normalize(string strInput, NormalizationForm normalizatio // Don't break for empty strings (only possible for D & KD and not really possible at that) if (iLength == 0) return string.Empty; - // Someplace to stick our buffer - char[] cBuffer; - - while (true) + char[]? toReturn = null; + try { - // (re)allocation buffer and normalize string - cBuffer = new char[iLength]; + Span buffer = iLength <= 512 + ? stackalloc char[512] + : (toReturn = ArrayPool.Shared.Rent(iLength)); - // NormalizeString pinvoke has SetLastError attribute property which will set the last error - // to 0 (ERROR_SUCCESS) before executing the calls. - iLength = Interop.Normaliz.NormalizeString((int)normalizationForm, strInput, strInput.Length, cBuffer, cBuffer.Length); - lastError = Marshal.GetLastWin32Error(); + while (true) + { + // NormalizeString pinvoke has SetLastError attribute property which will set the last error + // to 0 (ERROR_SUCCESS) before executing the calls. + iLength = Interop.Normaliz.NormalizeString(normalizationForm, strInput, strInput.Length, buffer); + lastError = Marshal.GetLastWin32Error(); - if (lastError == Interop.Errors.ERROR_SUCCESS) - break; + if (lastError == Interop.Errors.ERROR_SUCCESS) + break; + + // Could have an error (actually it'd be quite hard to have an error here) + switch (lastError) + { + // Do appropriate stuff for the individual errors: + case Interop.Errors.ERROR_INSUFFICIENT_BUFFER: + iLength = Math.Abs(iLength); + Debug.Assert(iLength > buffer.Length, "Buffer overflow should have iLength > cBuffer.Length"); + if (toReturn != null) + { + // Clear toReturn first to ensure we don't return the same buffer twice + char[] temp = toReturn; + toReturn = null; + ArrayPool.Shared.Return(temp); + } + buffer = toReturn = ArrayPool.Shared.Rent(iLength); + continue; + + case Interop.Errors.ERROR_INVALID_PARAMETER: + case Interop.Errors.ERROR_NO_UNICODE_TRANSLATION: + // Illegal code point or order found. Ie: FFFE or D800 D800, etc. + throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput)); + + case Interop.Errors.ERROR_NOT_ENOUGH_MEMORY: + throw new OutOfMemoryException(); + + default: + // We shouldn't get here... + throw new InvalidOperationException(SR.Format(SR.UnknownError_Num, lastError)); + } + } - // Could have an error (actually it'd be quite hard to have an error here) - switch (lastError) + // Copy our buffer into our new string, which will be the appropriate size + return new string(buffer.Slice(0, iLength)); + } + finally + { + if (toReturn != null) { - // Do appropriate stuff for the individual errors: - case Interop.Errors.ERROR_INSUFFICIENT_BUFFER: - iLength = Math.Abs(iLength); - Debug.Assert(iLength > cBuffer.Length, "Buffer overflow should have iLength > cBuffer.Length"); - continue; - - case Interop.Errors.ERROR_INVALID_PARAMETER: - case Interop.Errors.ERROR_NO_UNICODE_TRANSLATION: - // Illegal code point or order found. Ie: FFFE or D800 D800, etc. - throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput)); - - case Interop.Errors.ERROR_NOT_ENOUGH_MEMORY: - throw new OutOfMemoryException(); - - default: - // We shouldn't get here... - throw new InvalidOperationException(SR.Format(SR.UnknownError_Num, lastError)); + ArrayPool.Shared.Return(toReturn); } } - - // Copy our buffer into our new string, which will be the appropriate size - return new string(cBuffer, 0, iLength); } } } From 231e647e6deb099e65a1b1a26d4b36c2c2cc6dfa Mon Sep 17 00:00:00 2001 From: MihaZupan Date: Thu, 9 Apr 2020 21:04:45 +0200 Subject: [PATCH 2/9] Move stackalloc out of the loop --- .../Globalization/Normalization.Unix.cs | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs index 89eac782cb3536..ee475ba0046f87 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs @@ -45,22 +45,12 @@ internal static string Normalize(string strInput, NormalizationForm normalizatio char[]? toReturn = null; try { - int realLen = strInput.Length; + Span buffer = strInput.Length <= 512 + ? stackalloc char[512] + : (toReturn = ArrayPool.Shared.Rent(strInput.Length)); for (int attempt = 0; attempt < 2; attempt++) { - if (toReturn != null) - { - // Clear toReturn first to ensure we don't return the same buffer twice - char[] temp = toReturn; - toReturn = null; - ArrayPool.Shared.Return(temp); - } - - Span buffer = realLen <= 512 - ? stackalloc char[512] - : (toReturn = ArrayPool.Shared.Rent(realLen)); - realLen = Interop.Globalization.NormalizeString(normalizationForm, strInput, strInput.Length, buffer); if (realLen == -1) @@ -74,6 +64,19 @@ internal static string Normalize(string strInput, NormalizationForm normalizatio } Debug.Assert(realLen > 512); + + if (attempt == 0) + { + if (toReturn != null) + { + // Clear toReturn first to ensure we don't return the same buffer twice + char[] temp = toReturn; + toReturn = null; + ArrayPool.Shared.Return(temp); + } + + buffer = toReturn = ArrayPool.Shared.Rent(realLen); + } } throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput)); From 4dce18a139d6398c2d6121034533a6e81f5552a9 Mon Sep 17 00:00:00 2001 From: MihaZupan Date: Thu, 9 Apr 2020 21:05:45 +0200 Subject: [PATCH 3/9] Use char* instead of ref char, pin manually --- .../Interop.Normalization.cs | 15 ++++++++++++--- .../Windows/Normaliz/Interop.Normalization.cs | 19 ++++++++++++++----- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/src/libraries/Common/src/Interop/Unix/System.Globalization.Native/Interop.Normalization.cs b/src/libraries/Common/src/Interop/Unix/System.Globalization.Native/Interop.Normalization.cs index fe3181f6aff5b3..3b7a5c0123a40f 100644 --- a/src/libraries/Common/src/Interop/Unix/System.Globalization.Native/Interop.Normalization.cs +++ b/src/libraries/Common/src/Interop/Unix/System.Globalization.Native/Interop.Normalization.cs @@ -13,10 +13,19 @@ internal static partial class Globalization [DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_IsNormalized")] internal static extern int IsNormalized(NormalizationForm normalizationForm, string src, int srcLen); - internal static int NormalizeString(NormalizationForm normalizationForm, string src, int srcLen, Span dstBuffer) => - NormalizeString(normalizationForm, src, srcLen, ref MemoryMarshal.GetReference(dstBuffer), dstBuffer.Length); + internal static int NormalizeString(NormalizationForm normalizationForm, string src, int srcLen, Span dstBuffer) + { + unsafe + { + fixed (char* pSrc = src) + fixed (char* pDest = &MemoryMarshal.GetReference(dstBuffer)) + { + return NormalizeString(normalizationForm, pSrc, srcLen, pDest, dstBuffer.Length); + } + } + } [DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_NormalizeString")] - private static extern int NormalizeString(NormalizationForm normalizationForm, string src, int srcLen, ref char dstBuffer, int dstBufferCapacity); + private static extern unsafe int NormalizeString(NormalizationForm normalizationForm, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity); } } diff --git a/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Normalization.cs b/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Normalization.cs index 77fb0a1dd7627e..2f00a63c17aab8 100644 --- a/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Normalization.cs +++ b/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Normalization.cs @@ -13,15 +13,24 @@ internal static partial class Normaliz [DllImport("Normaliz.dll", CharSet = CharSet.Unicode, SetLastError = true)] internal static extern bool IsNormalizedString(NormalizationForm normForm, string source, int length); - internal static int NormalizeString(NormalizationForm normForm, string source, int sourceLength, Span destination) => - NormalizeString(normForm, source, sourceLength, ref MemoryMarshal.GetReference(destination), destination.Length); + internal static int NormalizeString(NormalizationForm normForm, string source, int sourceLength, Span destination) + { + unsafe + { + fixed (char* pSrc = source) + fixed (char* pDest = &MemoryMarshal.GetReference(destination)) + { + return NormalizeString(normForm, pSrc, sourceLength, pDest, destination.Length); + } + } + } [DllImport("Normaliz.dll", CharSet = CharSet.Unicode, SetLastError = true)] - private static extern int NormalizeString( + private static extern unsafe int NormalizeString( NormalizationForm normForm, - string source, + char* source, int sourceLength, - ref char destination, + char* destination, int destinationLength); } } From ee37454487769d7914354de7b93a42268ad0d6be Mon Sep 17 00:00:00 2001 From: MihaZupan Date: Thu, 9 Apr 2020 22:36:12 +0200 Subject: [PATCH 4/9] Remove interop wrappers, pin in call sites, don't guess the output length on Windows --- .../Interop.Normalization.cs | 17 +--- .../Windows/Normaliz/Interop.Normalization.cs | 17 +--- .../Globalization/Normalization.Unix.cs | 20 +++- .../Globalization/Normalization.Windows.cs | 92 +++++++++---------- 4 files changed, 64 insertions(+), 82 deletions(-) diff --git a/src/libraries/Common/src/Interop/Unix/System.Globalization.Native/Interop.Normalization.cs b/src/libraries/Common/src/Interop/Unix/System.Globalization.Native/Interop.Normalization.cs index 3b7a5c0123a40f..f4d7c9b4bbcea5 100644 --- a/src/libraries/Common/src/Interop/Unix/System.Globalization.Native/Interop.Normalization.cs +++ b/src/libraries/Common/src/Interop/Unix/System.Globalization.Native/Interop.Normalization.cs @@ -2,7 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; using System.Runtime.InteropServices; using System.Text; @@ -11,21 +10,9 @@ internal static partial class Interop internal static partial class Globalization { [DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_IsNormalized")] - internal static extern int IsNormalized(NormalizationForm normalizationForm, string src, int srcLen); - - internal static int NormalizeString(NormalizationForm normalizationForm, string src, int srcLen, Span dstBuffer) - { - unsafe - { - fixed (char* pSrc = src) - fixed (char* pDest = &MemoryMarshal.GetReference(dstBuffer)) - { - return NormalizeString(normalizationForm, pSrc, srcLen, pDest, dstBuffer.Length); - } - } - } + internal static extern int IsNormalized(NormalizationForm normalizationForm, char* src, int srcLen); [DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_NormalizeString")] - private static extern unsafe int NormalizeString(NormalizationForm normalizationForm, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity); + internal static extern unsafe int NormalizeString(NormalizationForm normalizationForm, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity); } } diff --git a/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Normalization.cs b/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Normalization.cs index 2f00a63c17aab8..362b7ffb4eefec 100644 --- a/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Normalization.cs +++ b/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Normalization.cs @@ -2,7 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; using System.Runtime.InteropServices; using System.Text; @@ -11,22 +10,10 @@ internal static partial class Interop internal static partial class Normaliz { [DllImport("Normaliz.dll", CharSet = CharSet.Unicode, SetLastError = true)] - internal static extern bool IsNormalizedString(NormalizationForm normForm, string source, int length); - - internal static int NormalizeString(NormalizationForm normForm, string source, int sourceLength, Span destination) - { - unsafe - { - fixed (char* pSrc = source) - fixed (char* pDest = &MemoryMarshal.GetReference(destination)) - { - return NormalizeString(normForm, pSrc, sourceLength, pDest, destination.Length); - } - } - } + internal static extern unsafe BOOL IsNormalizedString(NormalizationForm normForm, char* source, int length); [DllImport("Normaliz.dll", CharSet = CharSet.Unicode, SetLastError = true)] - private static extern unsafe int NormalizeString( + internal static extern unsafe int NormalizeString( NormalizationForm normForm, char* source, int sourceLength, diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs index ee475ba0046f87..47cc7d618d47b9 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs @@ -4,6 +4,7 @@ using System.Buffers; using System.Diagnostics; +using System.Runtime.InteropServices; using System.Text; namespace System.Globalization @@ -21,7 +22,14 @@ internal static bool IsNormalized(string strInput, NormalizationForm normalizati ValidateArguments(strInput, normalizationForm); - int ret = Interop.Globalization.IsNormalized(normalizationForm, strInput, strInput.Length); + int ret; + unsafe + { + fixed (char* pInput = strInput) + { + ret = Interop.Globalization.IsNormalized(normalizationForm, pInput, strInput.Length); + } + } if (ret == -1) { @@ -51,7 +59,15 @@ internal static string Normalize(string strInput, NormalizationForm normalizatio for (int attempt = 0; attempt < 2; attempt++) { - realLen = Interop.Globalization.NormalizeString(normalizationForm, strInput, strInput.Length, buffer); + int realLen; + unsafe + { + fixed (char* pInput = strInput) + fixed (char* pDest = &MemoryMarshal.GetReference(buffer)) + { + realLen = Interop.Globalization.NormalizeString(normalizationForm, pInput, strInput.Length, pDest, buffer.Length); + } + } if (realLen == -1) { diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs index 8b15bf9e8f1715..840b84a4a3b5f1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs @@ -25,7 +25,14 @@ internal static bool IsNormalized(string strInput, NormalizationForm normalizati // The only way to know if IsNormalizedString failed is through checking the Win32 last error // IsNormalizedString pinvoke has SetLastError attribute property which will set the last error // to 0 (ERROR_SUCCESS) before executing the calls. - bool result = Interop.Normaliz.IsNormalizedString(normalizationForm, strInput, strInput.Length); + Interop.BOOL result; + unsafe + { + fixed (char* pInput = strInput) + { + result = Interop.Normaliz.IsNormalizedString(normalizationForm, pInput, strInput.Length); + } + } int lastError = Marshal.GetLastWin32Error(); switch (lastError) @@ -52,7 +59,7 @@ internal static bool IsNormalized(string strInput, NormalizationForm normalizati throw new InvalidOperationException(SR.Format(SR.UnknownError_Num, lastError)); } - return result; + return result == Interop.BOOL.TRUE; } internal static string Normalize(string strInput, NormalizationForm normalizationForm) @@ -66,67 +73,47 @@ internal static string Normalize(string strInput, NormalizationForm normalizatio Debug.Assert(strInput != null); - // we depend on Win32 last error when calling NormalizeString - // NormalizeString pinvoke has SetLastError attribute property which will set the last error - // to 0 (ERROR_SUCCESS) before executing the calls. - - // Guess our buffer size first - int iLength = Interop.Normaliz.NormalizeString(normalizationForm, strInput, strInput.Length, Span.Empty); - - int lastError = Marshal.GetLastWin32Error(); - // Could have an error (actually it'd be quite hard to have an error here) - if ((lastError != Interop.Errors.ERROR_SUCCESS) || iLength < 0) + if (strInput.Length == 0) { - if (lastError == Interop.Errors.ERROR_INVALID_PARAMETER) - { - if (normalizationForm != NormalizationForm.FormC && - normalizationForm != NormalizationForm.FormD && - normalizationForm != NormalizationForm.FormKC && - normalizationForm != NormalizationForm.FormKD) - { - throw new ArgumentException(SR.Argument_InvalidNormalizationForm, nameof(normalizationForm)); - } - - throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput)); - } - - // We shouldn't really be able to get here..., guessing length is - // a trivial math function... - // Can't really be Out of Memory, but just in case: - if (lastError == Interop.Errors.ERROR_NOT_ENOUGH_MEMORY) - throw new OutOfMemoryException(); - - // Who knows what happened? Not us! - throw new InvalidOperationException(SR.Format(SR.UnknownError_Num, lastError)); + return string.Empty; } - // Don't break for empty strings (only possible for D & KD and not really possible at that) - if (iLength == 0) return string.Empty; - char[]? toReturn = null; try { - Span buffer = iLength <= 512 + Span buffer = strInput.Length <= 512 ? stackalloc char[512] - : (toReturn = ArrayPool.Shared.Rent(iLength)); + : (toReturn = ArrayPool.Shared.Rent(strInput.Length)); while (true) { + // we depend on Win32 last error when calling NormalizeString // NormalizeString pinvoke has SetLastError attribute property which will set the last error // to 0 (ERROR_SUCCESS) before executing the calls. - iLength = Interop.Normaliz.NormalizeString(normalizationForm, strInput, strInput.Length, buffer); - lastError = Marshal.GetLastWin32Error(); - - if (lastError == Interop.Errors.ERROR_SUCCESS) - break; + int realLength; + unsafe + { + fixed (char* pInput = strInput) + fixed (char* pDest = &MemoryMarshal.GetReference(buffer)) + { + realLength = Interop.Normaliz.NormalizeString(normalizationForm, pInput, strInput.Length, pDest, buffer.Length); + } + } + int lastError = Marshal.GetLastWin32Error(); - // Could have an error (actually it'd be quite hard to have an error here) switch (lastError) { + case Interop.Errors.ERROR_SUCCESS: + if (realLength == 0) + { + return string.Empty; + } + return new string(buffer.Slice(0, realLength)); + // Do appropriate stuff for the individual errors: case Interop.Errors.ERROR_INSUFFICIENT_BUFFER: - iLength = Math.Abs(iLength); - Debug.Assert(iLength > buffer.Length, "Buffer overflow should have iLength > cBuffer.Length"); + realLength = Math.Abs(realLength); + Debug.Assert(realLength > buffer.Length, "Buffer overflow should have iLength > cBuffer.Length"); if (toReturn != null) { // Clear toReturn first to ensure we don't return the same buffer twice @@ -134,11 +121,19 @@ internal static string Normalize(string strInput, NormalizationForm normalizatio toReturn = null; ArrayPool.Shared.Return(temp); } - buffer = toReturn = ArrayPool.Shared.Rent(iLength); + buffer = toReturn = ArrayPool.Shared.Rent(realLength); continue; case Interop.Errors.ERROR_INVALID_PARAMETER: case Interop.Errors.ERROR_NO_UNICODE_TRANSLATION: + if (normalizationForm != NormalizationForm.FormC && + normalizationForm != NormalizationForm.FormD && + normalizationForm != NormalizationForm.FormKC && + normalizationForm != NormalizationForm.FormKD) + { + throw new ArgumentException(SR.Argument_InvalidNormalizationForm, nameof(normalizationForm)); + } + // Illegal code point or order found. Ie: FFFE or D800 D800, etc. throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput)); @@ -150,9 +145,6 @@ internal static string Normalize(string strInput, NormalizationForm normalizatio throw new InvalidOperationException(SR.Format(SR.UnknownError_Num, lastError)); } } - - // Copy our buffer into our new string, which will be the appropriate size - return new string(buffer.Slice(0, iLength)); } finally { From 6ffdc742deae73823dd6bb8b2872a689b00a6a11 Mon Sep 17 00:00:00 2001 From: MihaZupan Date: Thu, 9 Apr 2020 22:45:05 +0200 Subject: [PATCH 5/9] Add missing unsafe modifier --- .../Unix/System.Globalization.Native/Interop.Normalization.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/Common/src/Interop/Unix/System.Globalization.Native/Interop.Normalization.cs b/src/libraries/Common/src/Interop/Unix/System.Globalization.Native/Interop.Normalization.cs index f4d7c9b4bbcea5..f981eac6405db3 100644 --- a/src/libraries/Common/src/Interop/Unix/System.Globalization.Native/Interop.Normalization.cs +++ b/src/libraries/Common/src/Interop/Unix/System.Globalization.Native/Interop.Normalization.cs @@ -10,7 +10,7 @@ internal static partial class Interop internal static partial class Globalization { [DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_IsNormalized")] - internal static extern int IsNormalized(NormalizationForm normalizationForm, char* src, int srcLen); + internal static extern unsafe int IsNormalized(NormalizationForm normalizationForm, char* src, int srcLen); [DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_NormalizeString")] internal static extern unsafe int NormalizeString(NormalizationForm normalizationForm, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity); From 77ff0e3e2c784c20c2ba4537926afb4d33883978 Mon Sep 17 00:00:00 2001 From: MihaZupan Date: Thu, 9 Apr 2020 22:55:45 +0200 Subject: [PATCH 6/9] Remove explicit empty string check --- .../src/System/Globalization/Normalization.Windows.cs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs index 840b84a4a3b5f1..0df9f34e31d334 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs @@ -104,10 +104,6 @@ internal static string Normalize(string strInput, NormalizationForm normalizatio switch (lastError) { case Interop.Errors.ERROR_SUCCESS: - if (realLength == 0) - { - return string.Empty; - } return new string(buffer.Slice(0, realLength)); // Do appropriate stuff for the individual errors: From 970dd0cb1a1f13294192de89c52b9b378035214c Mon Sep 17 00:00:00 2001 From: MihaZupan Date: Thu, 9 Apr 2020 22:58:07 +0200 Subject: [PATCH 7/9] Mark methods unsafe instead of using unsafe blocks --- .../Globalization/Normalization.Unix.cs | 20 +++++++------------ .../Globalization/Normalization.Windows.cs | 20 +++++++------------ 2 files changed, 14 insertions(+), 26 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs index 47cc7d618d47b9..5d4e7bb7f56314 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs @@ -11,7 +11,7 @@ namespace System.Globalization { internal static partial class Normalization { - internal static bool IsNormalized(string strInput, NormalizationForm normalizationForm) + internal static unsafe bool IsNormalized(string strInput, NormalizationForm normalizationForm) { if (GlobalizationMode.Invariant) { @@ -23,12 +23,9 @@ internal static bool IsNormalized(string strInput, NormalizationForm normalizati ValidateArguments(strInput, normalizationForm); int ret; - unsafe + fixed (char* pInput = strInput) { - fixed (char* pInput = strInput) - { - ret = Interop.Globalization.IsNormalized(normalizationForm, pInput, strInput.Length); - } + ret = Interop.Globalization.IsNormalized(normalizationForm, pInput, strInput.Length); } if (ret == -1) @@ -39,7 +36,7 @@ internal static bool IsNormalized(string strInput, NormalizationForm normalizati return ret == 1; } - internal static string Normalize(string strInput, NormalizationForm normalizationForm) + internal static unsafe string Normalize(string strInput, NormalizationForm normalizationForm) { if (GlobalizationMode.Invariant) { @@ -60,13 +57,10 @@ internal static string Normalize(string strInput, NormalizationForm normalizatio for (int attempt = 0; attempt < 2; attempt++) { int realLen; - unsafe + fixed (char* pInput = strInput) + fixed (char* pDest = &MemoryMarshal.GetReference(buffer)) { - fixed (char* pInput = strInput) - fixed (char* pDest = &MemoryMarshal.GetReference(buffer)) - { - realLen = Interop.Globalization.NormalizeString(normalizationForm, pInput, strInput.Length, pDest, buffer.Length); - } + realLen = Interop.Globalization.NormalizeString(normalizationForm, pInput, strInput.Length, pDest, buffer.Length); } if (realLen == -1) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs index 0df9f34e31d334..03520237c2a808 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs @@ -11,7 +11,7 @@ namespace System.Globalization { internal static partial class Normalization { - internal static bool IsNormalized(string strInput, NormalizationForm normalizationForm) + internal static unsafe bool IsNormalized(string strInput, NormalizationForm normalizationForm) { if (GlobalizationMode.Invariant) { @@ -26,12 +26,9 @@ internal static bool IsNormalized(string strInput, NormalizationForm normalizati // IsNormalizedString pinvoke has SetLastError attribute property which will set the last error // to 0 (ERROR_SUCCESS) before executing the calls. Interop.BOOL result; - unsafe + fixed (char* pInput = strInput) { - fixed (char* pInput = strInput) - { - result = Interop.Normaliz.IsNormalizedString(normalizationForm, pInput, strInput.Length); - } + result = Interop.Normaliz.IsNormalizedString(normalizationForm, pInput, strInput.Length); } int lastError = Marshal.GetLastWin32Error(); @@ -62,7 +59,7 @@ internal static bool IsNormalized(string strInput, NormalizationForm normalizati return result == Interop.BOOL.TRUE; } - internal static string Normalize(string strInput, NormalizationForm normalizationForm) + internal static unsafe string Normalize(string strInput, NormalizationForm normalizationForm) { if (GlobalizationMode.Invariant) { @@ -91,13 +88,10 @@ internal static string Normalize(string strInput, NormalizationForm normalizatio // NormalizeString pinvoke has SetLastError attribute property which will set the last error // to 0 (ERROR_SUCCESS) before executing the calls. int realLength; - unsafe + fixed (char* pInput = strInput) + fixed (char* pDest = &MemoryMarshal.GetReference(buffer)) { - fixed (char* pInput = strInput) - fixed (char* pDest = &MemoryMarshal.GetReference(buffer)) - { - realLength = Interop.Normaliz.NormalizeString(normalizationForm, pInput, strInput.Length, pDest, buffer.Length); - } + realLength = Interop.Normaliz.NormalizeString(normalizationForm, pInput, strInput.Length, pDest, buffer.Length); } int lastError = Marshal.GetLastWin32Error(); From ff64c9fc3a82a9490482d737d0f23ce445ebb84e Mon Sep 17 00:00:00 2001 From: MihaZupan Date: Thu, 9 Apr 2020 23:47:01 +0200 Subject: [PATCH 8/9] Return the original string if unchanged --- .../src/System/Globalization/Normalization.Unix.cs | 5 ++++- .../src/System/Globalization/Normalization.Windows.cs | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs index 5d4e7bb7f56314..e2e3d3c3095ca9 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs @@ -70,7 +70,10 @@ internal static unsafe string Normalize(string strInput, NormalizationForm norma if (realLen <= buffer.Length) { - return new string(buffer.Slice(0, realLen)); + ReadOnlySpan result = buffer.Slice(0, realLen); + return result.SequenceEqual(strInput) + ? strInput + : new string(result); } Debug.Assert(realLen > 512); diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs index 03520237c2a808..864e8cdba0c431 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs @@ -98,7 +98,10 @@ internal static unsafe string Normalize(string strInput, NormalizationForm norma switch (lastError) { case Interop.Errors.ERROR_SUCCESS: - return new string(buffer.Slice(0, realLength)); + ReadOnlySpan result = buffer.Slice(0, realLength); + return result.SequenceEqual(strInput) + ? strInput + : new string(result); // Do appropriate stuff for the individual errors: case Interop.Errors.ERROR_INSUFFICIENT_BUFFER: From eba5544a9f5b6556d37883ceb06ebcac920dbe06 Mon Sep 17 00:00:00 2001 From: MihaZupan Date: Fri, 10 Apr 2020 07:43:32 +0200 Subject: [PATCH 9/9] Use const for StackallocThreshold --- .../src/System/Globalization/Normalization.Unix.cs | 8 +++++--- .../src/System/Globalization/Normalization.Windows.cs | 9 ++++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs index e2e3d3c3095ca9..0a95d018f0ab03 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs @@ -50,8 +50,10 @@ internal static unsafe string Normalize(string strInput, NormalizationForm norma char[]? toReturn = null; try { - Span buffer = strInput.Length <= 512 - ? stackalloc char[512] + const int StackallocThreshold = 512; + + Span buffer = strInput.Length <= StackallocThreshold + ? stackalloc char[StackallocThreshold] : (toReturn = ArrayPool.Shared.Rent(strInput.Length)); for (int attempt = 0; attempt < 2; attempt++) @@ -76,7 +78,7 @@ internal static unsafe string Normalize(string strInput, NormalizationForm norma : new string(result); } - Debug.Assert(realLen > 512); + Debug.Assert(realLen > StackallocThreshold); if (attempt == 0) { diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs index 864e8cdba0c431..65e459a13a3e92 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs @@ -56,7 +56,7 @@ internal static unsafe bool IsNormalized(string strInput, NormalizationForm norm throw new InvalidOperationException(SR.Format(SR.UnknownError_Num, lastError)); } - return result == Interop.BOOL.TRUE; + return result != Interop.BOOL.FALSE; } internal static unsafe string Normalize(string strInput, NormalizationForm normalizationForm) @@ -78,8 +78,10 @@ internal static unsafe string Normalize(string strInput, NormalizationForm norma char[]? toReturn = null; try { - Span buffer = strInput.Length <= 512 - ? stackalloc char[512] + const int StackallocThreshold = 512; + + Span buffer = strInput.Length <= StackallocThreshold + ? stackalloc char[StackallocThreshold] : (toReturn = ArrayPool.Shared.Rent(strInput.Length)); while (true) @@ -114,6 +116,7 @@ internal static unsafe string Normalize(string strInput, NormalizationForm norma toReturn = null; ArrayPool.Shared.Return(temp); } + Debug.Assert(realLength > StackallocThreshold); buffer = toReturn = ArrayPool.Shared.Rent(realLength); continue;