This repository was archived by the owner on Jan 23, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2.6k
Improve perf of Encoding.GetEncoding(int) #6907
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -87,8 +87,6 @@ namespace System.Text | |
| public abstract class Encoding : ICloneable | ||
| { | ||
| private static Encoding defaultEncoding; | ||
|
|
||
| private static volatile Hashtable encodings; | ||
|
|
||
| // | ||
| // The following values are from mlang.idl. These values | ||
|
|
@@ -373,6 +371,7 @@ public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding, | |
| return dstEncoding.GetBytes(srcEncoding.GetChars(bytes, index, count)); | ||
| } | ||
|
|
||
| #if FEATURE_CODEPAGES_FILE | ||
| // Private object for locking instead of locking on a public type for SQL reliability work. | ||
| private static Object s_InternalSyncObject; | ||
| private static Object InternalSyncObject { | ||
|
|
@@ -385,6 +384,11 @@ private static Object InternalSyncObject { | |
| } | ||
| } | ||
|
|
||
| // On Desktop, encoding instances that aren't cached in a static field are cached in | ||
| // a hash table by codepage. | ||
| private static volatile Hashtable encodings; | ||
| #endif | ||
|
|
||
| #if !FEATURE_CORECLR | ||
| [System.Security.SecurityCritical] | ||
| #endif | ||
|
|
@@ -420,9 +424,34 @@ public static Encoding GetEncoding(int codepage) | |
|
|
||
| // Our Encoding | ||
|
|
||
| // See if the encoding is cached in a static field. | ||
| switch (codepage) | ||
| { | ||
| case CodePageDefault: return Default; // 0 | ||
| case CodePageUnicode: return Unicode; // 1200 | ||
| case CodePageBigEndian: return BigEndianUnicode; // 1201 | ||
| case CodePageUTF32: return UTF32; // 12000 | ||
| case CodePageUTF32BE: return BigEndianUTF32; // 12001 | ||
| case CodePageUTF7: return UTF7; // 65000 | ||
| case CodePageUTF8: return UTF8; // 65001 | ||
| case CodePageASCII: return ASCII; // 20127 | ||
| case ISO_8859_1: return Latin1; // 28591 | ||
|
|
||
| // We don't allow the following special code page values that Win32 allows. | ||
| case CodePageNoOEM: // 1 CP_OEMCP | ||
| case CodePageNoMac: // 2 CP_MACCP | ||
| case CodePageNoThread: // 3 CP_THREAD_ACP | ||
| case CodePageNoSymbol: // 42 CP_SYMBOL | ||
| throw new ArgumentException(Environment.GetResourceString( | ||
| "Argument_CodepageNotSupported", codepage), "codepage"); | ||
| } | ||
|
|
||
| #if FEATURE_CODEPAGES_FILE | ||
| object key = codepage; // Box once | ||
|
|
||
| // See if we have a hash table with our encoding in it already. | ||
| if (encodings != null) { | ||
| result = (Encoding)encodings[codepage]; | ||
| result = (Encoding)encodings[key]; | ||
| } | ||
|
|
||
| if (result == null) | ||
|
|
@@ -437,92 +466,34 @@ public static Encoding GetEncoding(int codepage) | |
| } | ||
|
|
||
| // Double check that we don't have one in the table (in case another thread beat us here) | ||
| if ((result = (Encoding)encodings[codepage]) != null) | ||
| if ((result = (Encoding)encodings[key]) != null) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note: If you decide to follow my advice and use the generic Dictionary instead, then this should be converted to a |
||
| return result; | ||
|
|
||
| // Special case the commonly used Encoding classes here, then call | ||
| // GetEncodingRare to avoid loading classes like MLangCodePageEncoding | ||
| // and ASCIIEncoding. ASP.NET uses UTF-8 & ISO-8859-1. | ||
| switch (codepage) | ||
| if (codepage == CodePageWindows1252) | ||
| { | ||
| case CodePageDefault: // 0, default code page | ||
| result = Encoding.Default; | ||
| break; | ||
| case CodePageUnicode: // 1200, Unicode | ||
| result = Unicode; | ||
| break; | ||
| case CodePageBigEndian: // 1201, big endian unicode | ||
| result = BigEndianUnicode; | ||
| break; | ||
| #if FEATURE_CODEPAGES_FILE | ||
| case CodePageWindows1252: // 1252, Windows | ||
| result = new SBCSCodePageEncoding(codepage); | ||
| break; | ||
| #else | ||
| // on desktop, UTF7 is handled by GetEncodingRare. | ||
| // On Coreclr, we handle this directly without bringing GetEncodingRare, so that we get real UTF-7 encoding. | ||
| case CodePageUTF7: // 65000, UTF7 | ||
| result = UTF7; | ||
| break; | ||
| case CodePageUTF32: // 12000 | ||
| result = UTF32; | ||
| break; | ||
| case CodePageUTF32BE: // 12001 | ||
| result = new UTF32Encoding(true, true); | ||
| break; | ||
| #endif | ||
| case CodePageUTF8: // 65001, UTF8 | ||
| result = UTF8; | ||
| break; | ||
|
|
||
| // These are (hopefully) not very common, but also shouldn't slow us down much and make default | ||
| // case able to handle more code pages by calling GetEncodingCodePage | ||
| case CodePageNoOEM: // 1 | ||
| case CodePageNoMac: // 2 | ||
| case CodePageNoThread: // 3 | ||
| case CodePageNoSymbol: // 42 | ||
| // Win32 also allows the following special code page values. We won't allow them except in the | ||
| // CP_ACP case. | ||
| // #define CP_ACP 0 // default to ANSI code page | ||
| // #define CP_OEMCP 1 // default to OEM code page | ||
| // #define CP_MACCP 2 // default to MAC code page | ||
| // #define CP_THREAD_ACP 3 // current thread's ANSI code page | ||
| // #define CP_SYMBOL 42 // SYMBOL translations | ||
| throw new ArgumentException(Environment.GetResourceString( | ||
| "Argument_CodepageNotSupported", codepage), "codepage"); | ||
| // Have to do ASCII and Latin 1 first so they don't get loaded as code pages | ||
| case CodePageASCII: // 20127 | ||
| result = ASCII; | ||
| break; | ||
| case ISO_8859_1: // 28591 | ||
| result = Latin1; | ||
| break; | ||
| default: | ||
| { | ||
| #if FEATURE_CODEPAGES_FILE | ||
| // 1st assume its a code page. | ||
| result = GetEncodingCodePage(codepage); | ||
| if (result == null) | ||
| result = GetEncodingRare(codepage); | ||
| break; | ||
| #else | ||
| // Is it a valid code page? | ||
| if (EncodingTable.GetCodePageDataItem(codepage) == null) | ||
| { | ||
| throw new NotSupportedException( | ||
| Environment.GetResourceString("NotSupported_NoCodepageData", codepage)); | ||
| } | ||
|
|
||
| result = UTF8; | ||
| break; | ||
| #endif // FEATURE_CODEPAGES_FILE | ||
| } | ||
| result = new SBCSCodePageEncoding(codepage); | ||
| } | ||
| encodings.Add(codepage, result); | ||
| } | ||
| else | ||
| { | ||
| result = GetEncodingCodePage(codepage) ?? GetEncodingRare(codepage); | ||
| } | ||
|
|
||
| Contract.Assert(result != null, "result != null"); | ||
|
|
||
| encodings.Add(key, result); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jkotas You may have to comment since I don't work with multithreaded code too much, but I believe the scope of the lock statement can be reduced to just around this |
||
| } | ||
| } | ||
| return result; | ||
| #else | ||
| // Is it a valid code page? | ||
| if (EncodingTable.GetCodePageDataItem(codepage) == null) | ||
| { | ||
| throw new NotSupportedException( | ||
| Environment.GetResourceString("NotSupported_NoCodepageData", codepage)); | ||
| } | ||
|
|
||
| return UTF8; | ||
| #endif // FEATURE_CODEPAGES_FILE | ||
| } | ||
|
|
||
| [Pure] | ||
|
|
@@ -553,15 +524,6 @@ private static Encoding GetEncodingRare(int codepage) | |
| Encoding result; | ||
| switch (codepage) | ||
| { | ||
| case CodePageUTF7: // 65000 | ||
| result = UTF7; | ||
| break; | ||
| case CodePageUTF32: // 12000 | ||
| result = UTF32; | ||
| break; | ||
| case CodePageUTF32BE: // 12001 | ||
| result = new UTF32Encoding(true, true); | ||
| break; | ||
| case ISCIIAssemese: | ||
| case ISCIIBengali: | ||
| case ISCIIDevanagari: | ||
|
|
@@ -1478,6 +1440,13 @@ public virtual String GetString(byte[] bytes, int index, int count) | |
|
|
||
| public static Encoding UTF32 => UTF32Encoding.s_default; | ||
|
|
||
| // Returns an encoding for the UTF-32 format. The returned encoding will be | ||
| // an instance of the UTF32Encoding class. | ||
| // | ||
| // It will use big endian byte order. | ||
|
|
||
| private static Encoding BigEndianUTF32 => UTF32Encoding.s_bigEndianDefault; | ||
|
|
||
| public override bool Equals(Object value) { | ||
| Encoding that = value as Encoding; | ||
| if (that != null) | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe this should be converted to a
Dictionary<int, Encoding>instead to help reduce usages of the non-generic collections.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also you should consider removing the
volatilemodifier, and instead usingInterlocked.CompareExchangeto initialize it. I believe thelockstatement is still needed, however, since a regular Dictionary does not support multiple writers (so concurrent Adds may fail).Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't really want to significantly change the desktop implementation as part of this PR (besides the parts that improve CoreCLR), hence I left the existing
Hashtable-based code mostly as-is (other than the change to only box codepage once).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree that it is best to not churn the desktop implementation as part of this PR.