Skip to content
This repository was archived by the owner on Nov 1, 2020. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 30 additions & 87 deletions src/System.Private.CoreLib/src/System/Text/Encoding.cs
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,6 @@ namespace System.Text
[System.Runtime.InteropServices.ComVisible(true)]
public abstract class Encoding : ICloneable
{
private static EncodingCache s_encodings;

// Special Case Code Pages
private const int CodePageDefault = 0;
private const int CodePageNoOEM = 1; // OEM Code page not supported
Expand Down Expand Up @@ -274,103 +272,41 @@ public static Encoding GetEncoding(int codepage)

Contract.EndContractBlock();

// Lazily initialize the encoding cache
if (s_encodings == null)
Interlocked.CompareExchange<EncodingCache>(ref s_encodings, new EncodingCache(), null);

#if CORERT
// CORERT-TODO: For now, always return UTF8 encoding
// https://github.com/dotnet/corert/issues/213
return UTF8;
#else
return s_encodings.GetOrAdd(codepage);
#endif
}

private sealed class EncodingCache : ConcurrentUnifier<int, Encoding>
{
protected sealed override Encoding Factory(int codePage)
{
return GetEncodingInternal(codePage);
}
}

private static Encoding GetEncodingInternal(int codepage)
{
Encoding result;

// Special case the commonly used Encoding classes here, then call
// GetEncodingRare to avoid loading classes like MLangCodePageEncoding
// and ASCIIEncoding. ASP.NET uses UTF-8 & ISO-8859-1.
switch (codepage)
{
case CodePageDefault: // 0, default code page
result = UTF8;
break;
case CodePageUnicode: // 1200, Unicode
result = Unicode;
break;
case CodePageBigEndian: // 1201, big endian unicode
result = BigEndianUnicode;
break;

// on desktop, UTF7 is handled by GetEncodingRare.
// On Coreclr, we handle this directly without bringing GetEncodingRare, so that we get real UTF-7 encoding.
case CodePageUTF7: // 65000, UTF7
result = UTF7;
break;

case CodePageUTF32: // 12000
result = UTF32;
break;
case CodePageUTF32BE: // 12001
result = new UTF32Encoding(true, true);
break;

case CodePageUTF8: // 65001, UTF8
result = UTF8;
break;

// These are (hopefully) not very common, but also shouldn't slow us down much and make default
// case able to handle more code pages by calling GetEncodingCodePage
case CodePageNoOEM: // 1
case CodePageNoMac: // 2
case CodePageNoThread: // 3
case CodePageNoSymbol: // 42
// Win32 also allows the following special code page values. We won't allow them except in the
// CP_ACP case.
// #define CP_ACP 0 // default to ANSI code page
// #define CP_OEMCP 1 // default to OEM code page
// #define CP_MACCP 2 // default to MAC code page
// #define CP_THREAD_ACP 3 // current thread's ANSI code page
// #define CP_SYMBOL 42 // SYMBOL translations
case CodePageDefault: return UTF8; // 0
case CodePageUnicode: return Unicode; // 1200
case CodePageBigEndian: return BigEndianUnicode; // 1201
case CodePageUTF32: return UTF32; // 12000
case CodePageUTF32BE: return BigEndianUTF32; // 12001
case CodePageUTF7: return UTF7; // 65000
case CodePageUTF8: return UTF8; // 65001
case CodePageASCII: return ASCII; // 20127
case ISO_8859_1: return Latin1; // 28591

// We don't allow the following special code page values that Win32 allows.
case CodePageNoOEM: // 1 CP_OEMCP
case CodePageNoMac: // 2 CP_MACCP
case CodePageNoThread: // 3 CP_THREAD_ACP
case CodePageNoSymbol: // 42 CP_SYMBOL
throw new ArgumentException(SR.Format(
SR.Argument_CodepageNotSupported, codepage), "codepage");
}

// Have to do ASCII and Latin 1 first so they don't get loaded as code pages
case CodePageASCII: // 20127
result = ASCII;
break;

case ISO_8859_1: // 28591
result = Latin1;
break;

default:
{
// Is it a valid code page?
if (EncodingTable.GetWebNameFromCodePage(codepage) == null)
{
throw new NotSupportedException(
SR.Format(SR.NotSupported_NoCodepageData, codepage));
}

result = UTF8;
break;
}
// Is it a valid code page?
if (EncodingTable.GetWebNameFromCodePage(codepage) == null)
{
throw new NotSupportedException(
SR.Format(SR.NotSupported_NoCodepageData, codepage));
}

return result;
return UTF8;
#endif
}

[Pure]
Expand Down Expand Up @@ -1115,6 +1051,13 @@ public virtual String GetString(byte[] bytes, int index, int count)

public static Encoding UTF32 => UTF32Encoding.s_default;

// Returns an encoding for the UTF-32 format. The returned encoding will be
// an instance of the UTF32Encoding class.
//
// It will use big endian byte order.

private static Encoding BigEndianUTF32 => UTF32Encoding.s_bigEndianDefault;

public override bool Equals(Object value)
{
Encoding that = value as Encoding;
Expand Down
3 changes: 2 additions & 1 deletion src/System.Private.CoreLib/src/System/Text/UTF32Encoding.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,10 @@ 2 21 00000000 000xxxxx hhhhhhll llllllll
Real Unicode value = (HighSurrogate - 0xD800) * 0x400 + (LowSurrogate - 0xDC00) + 0x10000
*/

// Used by Encoding.UTF32 for lazy initialization
// Used by Encoding.UTF32/BigEndianUTF32 for lazy initialization
// The initialization code will not be run until a static member of the class is referenced
internal static readonly UTF32Encoding s_default = new UTF32Encoding(bigEndian: false, byteOrderMark: true);
internal static readonly UTF32Encoding s_bigEndianDefault = new UTF32Encoding(bigEndian: true, byteOrderMark: true);

private bool _emitUTF32ByteOrderMark = false;
private bool _isThrowException = false;
Expand Down