Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 60 additions & 91 deletions src/mscorlib/src/System/Text/Encoding.cs
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,6 @@ namespace System.Text
public abstract class Encoding : ICloneable
{
private static Encoding defaultEncoding;

private static volatile Hashtable encodings;

//
// The following values are from mlang.idl. These values
Expand Down Expand Up @@ -373,6 +371,7 @@ public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding,
return dstEncoding.GetBytes(srcEncoding.GetChars(bytes, index, count));
}

#if FEATURE_CODEPAGES_FILE
// Private object for locking instead of locking on a public type for SQL reliability work.
private static Object s_InternalSyncObject;
private static Object InternalSyncObject {
Expand All @@ -385,6 +384,11 @@ private static Object InternalSyncObject {
}
}

// On Desktop, encoding instances that aren't cached in a static field are cached in
// a hash table by codepage.
private static volatile Hashtable encodings;
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this should be converted to a Dictionary<int, Encoding> instead to help reduce usages of the non-generic collections.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also you should consider removing the volatile modifier, and instead using Interlocked.CompareExchange to initialize it. I believe the lock statement is still needed, however, since a regular Dictionary does not support multiple writers (so concurrent Adds may fail).

Copy link
Copy Markdown
Author

@justinvp justinvp Aug 25, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really want to significantly change the desktop implementation as part of this PR (besides the parts that improve CoreCLR), hence I left the existing Hashtable-based code mostly as-is (other than the change to only box codepage once).

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that it is best to not churn the desktop implementation as part of this PR.

#endif

#if !FEATURE_CORECLR
[System.Security.SecurityCritical]
#endif
Expand Down Expand Up @@ -420,9 +424,34 @@ public static Encoding GetEncoding(int codepage)

// Our Encoding

// See if the encoding is cached in a static field.
switch (codepage)
{
case CodePageDefault: return Default; // 0
case CodePageUnicode: return Unicode; // 1200
case CodePageBigEndian: return BigEndianUnicode; // 1201
case CodePageUTF32: return UTF32; // 12000
case CodePageUTF32BE: return BigEndianUTF32; // 12001
case CodePageUTF7: return UTF7; // 65000
case CodePageUTF8: return UTF8; // 65001
case CodePageASCII: return ASCII; // 20127
case ISO_8859_1: return Latin1; // 28591

// We don't allow the following special code page values that Win32 allows.
case CodePageNoOEM: // 1 CP_OEMCP
case CodePageNoMac: // 2 CP_MACCP
case CodePageNoThread: // 3 CP_THREAD_ACP
case CodePageNoSymbol: // 42 CP_SYMBOL
throw new ArgumentException(Environment.GetResourceString(
"Argument_CodepageNotSupported", codepage), "codepage");
}

#if FEATURE_CODEPAGES_FILE
object key = codepage; // Box once

// See if we have a hash table with our encoding in it already.
if (encodings != null) {
result = (Encoding)encodings[codepage];
result = (Encoding)encodings[key];
}

if (result == null)
Expand All @@ -437,92 +466,34 @@ public static Encoding GetEncoding(int codepage)
}

// Double check that we don't have one in the table (in case another thread beat us here)
if ((result = (Encoding)encodings[codepage]) != null)
if ((result = (Encoding)encodings[key]) != null)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: If you decide to follow my advice and use the generic Dictionary instead, then this should be converted to a TryGetValue check.

return result;

// Special case the commonly used Encoding classes here, then call
// GetEncodingRare to avoid loading classes like MLangCodePageEncoding
// and ASCIIEncoding. ASP.NET uses UTF-8 & ISO-8859-1.
switch (codepage)
if (codepage == CodePageWindows1252)
{
case CodePageDefault: // 0, default code page
result = Encoding.Default;
break;
case CodePageUnicode: // 1200, Unicode
result = Unicode;
break;
case CodePageBigEndian: // 1201, big endian unicode
result = BigEndianUnicode;
break;
#if FEATURE_CODEPAGES_FILE
case CodePageWindows1252: // 1252, Windows
result = new SBCSCodePageEncoding(codepage);
break;
#else
// on desktop, UTF7 is handled by GetEncodingRare.
// On Coreclr, we handle this directly without bringing GetEncodingRare, so that we get real UTF-7 encoding.
case CodePageUTF7: // 65000, UTF7
result = UTF7;
break;
case CodePageUTF32: // 12000
result = UTF32;
break;
case CodePageUTF32BE: // 12001
result = new UTF32Encoding(true, true);
break;
#endif
case CodePageUTF8: // 65001, UTF8
result = UTF8;
break;

// These are (hopefully) not very common, but also shouldn't slow us down much and make default
// case able to handle more code pages by calling GetEncodingCodePage
case CodePageNoOEM: // 1
case CodePageNoMac: // 2
case CodePageNoThread: // 3
case CodePageNoSymbol: // 42
// Win32 also allows the following special code page values. We won't allow them except in the
// CP_ACP case.
// #define CP_ACP 0 // default to ANSI code page
// #define CP_OEMCP 1 // default to OEM code page
// #define CP_MACCP 2 // default to MAC code page
// #define CP_THREAD_ACP 3 // current thread's ANSI code page
// #define CP_SYMBOL 42 // SYMBOL translations
throw new ArgumentException(Environment.GetResourceString(
"Argument_CodepageNotSupported", codepage), "codepage");
// Have to do ASCII and Latin 1 first so they don't get loaded as code pages
case CodePageASCII: // 20127
result = ASCII;
break;
case ISO_8859_1: // 28591
result = Latin1;
break;
default:
{
#if FEATURE_CODEPAGES_FILE
// 1st assume its a code page.
result = GetEncodingCodePage(codepage);
if (result == null)
result = GetEncodingRare(codepage);
break;
#else
// Is it a valid code page?
if (EncodingTable.GetCodePageDataItem(codepage) == null)
{
throw new NotSupportedException(
Environment.GetResourceString("NotSupported_NoCodepageData", codepage));
}

result = UTF8;
break;
#endif // FEATURE_CODEPAGES_FILE
}
result = new SBCSCodePageEncoding(codepage);
}
encodings.Add(codepage, result);
}
else
{
result = GetEncodingCodePage(codepage) ?? GetEncodingRare(codepage);
}

Contract.Assert(result != null, "result != null");

encodings.Add(key, result);
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jkotas You may have to comment since I don't work with multithreaded code too much, but I believe the scope of the lock statement can be reduced to just around this Add.

}
}
return result;
#else
// Is it a valid code page?
if (EncodingTable.GetCodePageDataItem(codepage) == null)
{
throw new NotSupportedException(
Environment.GetResourceString("NotSupported_NoCodepageData", codepage));
}

return UTF8;
#endif // FEATURE_CODEPAGES_FILE
}

[Pure]
Expand Down Expand Up @@ -553,15 +524,6 @@ private static Encoding GetEncodingRare(int codepage)
Encoding result;
switch (codepage)
{
case CodePageUTF7: // 65000
result = UTF7;
break;
case CodePageUTF32: // 12000
result = UTF32;
break;
case CodePageUTF32BE: // 12001
result = new UTF32Encoding(true, true);
break;
case ISCIIAssemese:
case ISCIIBengali:
case ISCIIDevanagari:
Expand Down Expand Up @@ -1478,6 +1440,13 @@ public virtual String GetString(byte[] bytes, int index, int count)

public static Encoding UTF32 => UTF32Encoding.s_default;

// Returns an encoding for the UTF-32 format. The returned encoding will be
// an instance of the UTF32Encoding class.
//
// It will use big endian byte order.

private static Encoding BigEndianUTF32 => UTF32Encoding.s_bigEndianDefault;

public override bool Equals(Object value) {
Encoding that = value as Encoding;
if (that != null)
Expand Down
3 changes: 2 additions & 1 deletion src/mscorlib/src/System/Text/UTF32Encoding.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@ 2 21 00000000 000xxxxx hhhhhhll llllllll
Real Unicode value = (HighSurrogate - 0xD800) * 0x400 + (LowSurrogate - 0xDC00) + 0x10000
*/

// Used by Encoding.UTF32 for lazy initialization
// Used by Encoding.UTF32/BigEndianUTF32 for lazy initialization
// The initialization code will not be run until a static member of the class is referenced
internal static readonly UTF32Encoding s_default = new UTF32Encoding(bigEndian: false, byteOrderMark: true);
internal static readonly UTF32Encoding s_bigEndianDefault = new UTF32Encoding(bigEndian: true, byteOrderMark: true);

private bool emitUTF32ByteOrderMark = false;
private bool isThrowException = false;
Expand Down