-
Notifications
You must be signed in to change notification settings - Fork 5.4k
Reduce allocations in string.Normalize #34774
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
226e188
Reduce allocations in string.Normalize
MihaZupan 231e647
Move stackalloc out of the loop
MihaZupan 4dce18a
Use char* instead of ref char, pin manually
MihaZupan ee37454
Remove interop wrappers, pin in call sites, don't guess the output le…
MihaZupan 6ffdc74
Add missing unsafe modifier
MihaZupan 77ff0e3
Remove explicit empty string check
MihaZupan 970dd0c
Mark methods unsafe instead of using unsafe blocks
MihaZupan ff64c9f
Return the original string if unchanged
MihaZupan eba5544
Use const for StackallocThreshold
MihaZupan File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,7 @@ | |
| // The .NET Foundation licenses this file to you under the MIT license. | ||
| // See the LICENSE file in the project root for more information. | ||
|
|
||
| using System.Buffers; | ||
| using System.Diagnostics; | ||
| using System.Runtime.InteropServices; | ||
| using System.Text; | ||
|
|
@@ -10,7 +11,7 @@ namespace System.Globalization | |
| { | ||
| internal static partial class Normalization | ||
| { | ||
| internal static bool IsNormalized(string strInput, NormalizationForm normalizationForm) | ||
| internal static unsafe bool IsNormalized(string strInput, NormalizationForm normalizationForm) | ||
| { | ||
| if (GlobalizationMode.Invariant) | ||
| { | ||
|
|
@@ -24,7 +25,11 @@ internal static bool IsNormalized(string strInput, NormalizationForm normalizati | |
| // The only way to know if IsNormalizedString failed is through checking the Win32 last error | ||
| // IsNormalizedString pinvoke has SetLastError attribute property which will set the last error | ||
| // to 0 (ERROR_SUCCESS) before executing the calls. | ||
| bool result = Interop.Normaliz.IsNormalizedString((int)normalizationForm, strInput, strInput.Length); | ||
| Interop.BOOL result; | ||
| fixed (char* pInput = strInput) | ||
| { | ||
| result = Interop.Normaliz.IsNormalizedString(normalizationForm, pInput, strInput.Length); | ||
| } | ||
|
|
||
| int lastError = Marshal.GetLastWin32Error(); | ||
| switch (lastError) | ||
|
|
@@ -51,10 +56,10 @@ internal static bool IsNormalized(string strInput, NormalizationForm normalizati | |
| throw new InvalidOperationException(SR.Format(SR.UnknownError_Num, lastError)); | ||
| } | ||
|
|
||
| return result; | ||
| return result != Interop.BOOL.FALSE; | ||
| } | ||
|
|
||
| internal static string Normalize(string strInput, NormalizationForm normalizationForm) | ||
| internal static unsafe string Normalize(string strInput, NormalizationForm normalizationForm) | ||
| { | ||
| if (GlobalizationMode.Invariant) | ||
| { | ||
|
|
@@ -65,84 +70,85 @@ internal static string Normalize(string strInput, NormalizationForm normalizatio | |
|
|
||
| Debug.Assert(strInput != null); | ||
|
|
||
| // we depend on Win32 last error when calling NormalizeString | ||
| // NormalizeString pinvoke has SetLastError attribute property which will set the last error | ||
| // to 0 (ERROR_SUCCESS) before executing the calls. | ||
|
|
||
| // Guess our buffer size first | ||
| int iLength = Interop.Normaliz.NormalizeString((int)normalizationForm, strInput, strInput.Length, null, 0); | ||
| if (strInput.Length == 0) | ||
| { | ||
| return string.Empty; | ||
| } | ||
|
|
||
| int lastError = Marshal.GetLastWin32Error(); | ||
| // Could have an error (actually it'd be quite hard to have an error here) | ||
| if ((lastError != Interop.Errors.ERROR_SUCCESS) || iLength < 0) | ||
| char[]? toReturn = null; | ||
| try | ||
| { | ||
| if (lastError == Interop.Errors.ERROR_INVALID_PARAMETER) | ||
| const int StackallocThreshold = 512; | ||
|
|
||
| Span<char> buffer = strInput.Length <= StackallocThreshold | ||
| ? stackalloc char[StackallocThreshold] | ||
| : (toReturn = ArrayPool<char>.Shared.Rent(strInput.Length)); | ||
|
|
||
| while (true) | ||
| { | ||
| if (normalizationForm != NormalizationForm.FormC && | ||
| normalizationForm != NormalizationForm.FormD && | ||
| normalizationForm != NormalizationForm.FormKC && | ||
| normalizationForm != NormalizationForm.FormKD) | ||
| // we depend on Win32 last error when calling NormalizeString | ||
| // NormalizeString pinvoke has SetLastError attribute property which will set the last error | ||
| // to 0 (ERROR_SUCCESS) before executing the calls. | ||
| int realLength; | ||
| fixed (char* pInput = strInput) | ||
| fixed (char* pDest = &MemoryMarshal.GetReference(buffer)) | ||
| { | ||
| throw new ArgumentException(SR.Argument_InvalidNormalizationForm, nameof(normalizationForm)); | ||
| realLength = Interop.Normaliz.NormalizeString(normalizationForm, pInput, strInput.Length, pDest, buffer.Length); | ||
| } | ||
| int lastError = Marshal.GetLastWin32Error(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this valid even if realLength > 0?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (I see the above comment now) |
||
|
|
||
| throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput)); | ||
| switch (lastError) | ||
| { | ||
| case Interop.Errors.ERROR_SUCCESS: | ||
| ReadOnlySpan<char> result = buffer.Slice(0, realLength); | ||
| return result.SequenceEqual(strInput) | ||
| ? strInput | ||
| : new string(result); | ||
|
|
||
| // Do appropriate stuff for the individual errors: | ||
| case Interop.Errors.ERROR_INSUFFICIENT_BUFFER: | ||
| realLength = Math.Abs(realLength); | ||
| Debug.Assert(realLength > buffer.Length, "Buffer overflow should have iLength > cBuffer.Length"); | ||
| if (toReturn != null) | ||
| { | ||
| // Clear toReturn first to ensure we don't return the same buffer twice | ||
| char[] temp = toReturn; | ||
| toReturn = null; | ||
| ArrayPool<char>.Shared.Return(temp); | ||
| } | ||
| Debug.Assert(realLength > StackallocThreshold); | ||
| buffer = toReturn = ArrayPool<char>.Shared.Rent(realLength); | ||
| continue; | ||
|
|
||
| case Interop.Errors.ERROR_INVALID_PARAMETER: | ||
| case Interop.Errors.ERROR_NO_UNICODE_TRANSLATION: | ||
| if (normalizationForm != NormalizationForm.FormC && | ||
| normalizationForm != NormalizationForm.FormD && | ||
| normalizationForm != NormalizationForm.FormKC && | ||
| normalizationForm != NormalizationForm.FormKD) | ||
| { | ||
| throw new ArgumentException(SR.Argument_InvalidNormalizationForm, nameof(normalizationForm)); | ||
| } | ||
|
|
||
| // Illegal code point or order found. Ie: FFFE or D800 D800, etc. | ||
| throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput)); | ||
|
|
||
| case Interop.Errors.ERROR_NOT_ENOUGH_MEMORY: | ||
| throw new OutOfMemoryException(); | ||
|
|
||
| default: | ||
| // We shouldn't get here... | ||
| throw new InvalidOperationException(SR.Format(SR.UnknownError_Num, lastError)); | ||
| } | ||
| } | ||
|
|
||
| // We shouldn't really be able to get here..., guessing length is | ||
| // a trivial math function... | ||
| // Can't really be Out of Memory, but just in case: | ||
| if (lastError == Interop.Errors.ERROR_NOT_ENOUGH_MEMORY) | ||
| throw new OutOfMemoryException(); | ||
|
|
||
| // Who knows what happened? Not us! | ||
| throw new InvalidOperationException(SR.Format(SR.UnknownError_Num, lastError)); | ||
| } | ||
|
|
||
| // Don't break for empty strings (only possible for D & KD and not really possible at that) | ||
| if (iLength == 0) return string.Empty; | ||
|
|
||
| // Someplace to stick our buffer | ||
| char[] cBuffer; | ||
|
|
||
| while (true) | ||
| finally | ||
| { | ||
| // (re)allocation buffer and normalize string | ||
| cBuffer = new char[iLength]; | ||
|
|
||
| // NormalizeString pinvoke has SetLastError attribute property which will set the last error | ||
| // to 0 (ERROR_SUCCESS) before executing the calls. | ||
| iLength = Interop.Normaliz.NormalizeString((int)normalizationForm, strInput, strInput.Length, cBuffer, cBuffer.Length); | ||
| lastError = Marshal.GetLastWin32Error(); | ||
|
|
||
| if (lastError == Interop.Errors.ERROR_SUCCESS) | ||
| break; | ||
|
|
||
| // Could have an error (actually it'd be quite hard to have an error here) | ||
| switch (lastError) | ||
| if (toReturn != null) | ||
| { | ||
| // Do appropriate stuff for the individual errors: | ||
| case Interop.Errors.ERROR_INSUFFICIENT_BUFFER: | ||
| iLength = Math.Abs(iLength); | ||
| Debug.Assert(iLength > cBuffer.Length, "Buffer overflow should have iLength > cBuffer.Length"); | ||
| continue; | ||
|
|
||
| case Interop.Errors.ERROR_INVALID_PARAMETER: | ||
| case Interop.Errors.ERROR_NO_UNICODE_TRANSLATION: | ||
| // Illegal code point or order found. Ie: FFFE or D800 D800, etc. | ||
| throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput)); | ||
|
|
||
| case Interop.Errors.ERROR_NOT_ENOUGH_MEMORY: | ||
| throw new OutOfMemoryException(); | ||
|
|
||
| default: | ||
| // We shouldn't get here... | ||
| throw new InvalidOperationException(SR.Format(SR.UnknownError_Num, lastError)); | ||
| ArrayPool<char>.Shared.Return(toReturn); | ||
| } | ||
| } | ||
|
|
||
| // Copy our buffer into our new string, which will be the appropriate size | ||
| return new string(cBuffer, 0, iLength); | ||
| } | ||
| } | ||
| } | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Heads up: we generally don't return buffers to the shared pool inside finally blocks. The sole exception is where we're in 100% control of every single code path that might be invoked inside the preceding try.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We're inconsistent on this, e.g.
runtime/src/libraries/System.Private.CoreLib/src/System/IO/Stream.cs
Lines 114 to 130 in 26b6e4e
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I know, but we should try to get it under check for new code (like this). The GitHub UI is misbehaving for me so I can't see the rest of the code. So this usage might not be problematic at all but somebody should double check.