From bce6ef4fb20b20e54fa48b8c501fe77572b9fdec Mon Sep 17 00:00:00 2001 From: Tarek Mahmoud Sayed Date: Fri, 5 Jun 2020 16:10:27 -0700 Subject: [PATCH 1/6] Support the Encoding enumeration in the Encoding Providers --- .../src/System/Text/Encoding.cs | 11 ++- .../src/System/Text/EncodingInfo.cs | 58 +++++++++++++++- .../src/System/Text/EncodingProvider.cs | 30 +++++++++ .../src/System/Text/EncodingTable.cs | 21 ++++++ .../System.Runtime/ref/System.Runtime.cs | 3 +- .../ref/System.Text.Encoding.CodePages.csproj | 6 +- ...stem.Text.Encoding.CodePages.netcoreapp.cs | 11 +++ .../src/System.Text.Encoding.CodePages.csproj | 7 +- .../src/System/Text/BaseCodePageEncoding.cs | 48 +++++++++++++ .../CodePagesEncodingProvider.netcoreapp.cs | 13 ++++ .../src/System/Text/EncodingNLS.cs | 39 ++++++++++- .../src/System/Text/EncodingTable.cs | 2 +- .../tests/EncodingCodePages.netcoreapp.cs | 67 +++++++++++++++++++ ...ystem.Text.Encoding.CodePages.Tests.csproj | 1 + 14 files changed, 306 insertions(+), 11 deletions(-) create mode 100644 src/libraries/System.Text.Encoding.CodePages/ref/System.Text.Encoding.CodePages.netcoreapp.cs create mode 100644 src/libraries/System.Text.Encoding.CodePages/src/System/Text/CodePagesEncodingProvider.netcoreapp.cs create mode 100644 src/libraries/System.Text.Encoding.CodePages/tests/EncodingCodePages.netcoreapp.cs diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs index 3f0d0350610bad..6b6a130b32c0e9 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System.Diagnostics; +using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; using System.IO; using System.Runtime.InteropServices; @@ -291,7 +292,15 @@ public static Encoding GetEncoding(string name, } // Return a list of all EncodingInfo objects describing all of our encodings - public static EncodingInfo[] GetEncodings() => EncodingTable.GetEncodings(); + /// + /// Get the list from the runtime and all registered encoding providers + /// + /// The list of the objects + public static EncodingInfo[] GetEncodings() + { + Dictionary? result = EncodingProvider.GetEncodingListFromProviders(); + return result == null ? EncodingTable.GetEncodings() : EncodingTable.GetEncodings(result); + } public virtual byte[] GetPreamble() => Array.Empty(); diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/EncodingInfo.cs b/src/libraries/System.Private.CoreLib/src/System/Text/EncodingInfo.cs index febecfa6651f6b..cb5be89797e5ff 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/EncodingInfo.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/EncodingInfo.cs @@ -6,6 +6,24 @@ namespace System.Text { public sealed class EncodingInfo { + /// + /// Construct an object. + /// + /// The object which created this object + /// The encoding codepage + /// The encoding name + /// The encoding display name + /// + public EncodingInfo(EncodingProvider provider, int codePage, string name, string displayName) : this(codePage, name, displayName) + { + if (name == null || displayName == null || provider == null) + { + throw new ArgumentNullException(name == null ? nameof(name) : (displayName == null ? nameof(displayName) : nameof(provider))); + } + + Provider = provider; + } + internal EncodingInfo(int codePage, string name, string displayName) { CodePage = codePage; @@ -13,27 +31,63 @@ internal EncodingInfo(int codePage, string name, string displayName) DisplayName = displayName; } + /// + /// Get the encoding codepage number + /// + /// The codepage integer number public int CodePage { get; } + + /// + /// Get the encoding name + /// + /// The encoding name string public string Name { get; } + + /// + /// Get the encoding display name + /// + /// The encoding display name string public string DisplayName { get; } + /// + /// Get the object match the information in the object + /// + /// The object public Encoding GetEncoding() { - return Encoding.GetEncoding(CodePage); + Encoding? encoding = null; + + if (Provider != null) + { + encoding = Provider.GetEncoding(CodePage); + } + + return encoding ?? Encoding.GetEncoding(CodePage); } + /// + /// Compare this object to other object. + /// + /// The other object to compare with this object + /// True if the value object is EncodingInfo object and has a codepage equals to this EncodingInfo object codepage. Othewise, it returns False public override bool Equals(object? value) { if (value is EncodingInfo that) { - return this.CodePage == that.CodePage; + return CodePage == that.CodePage; } return false; } + /// + /// Get a hashcode represent the current EncodingInfo object + /// + /// The integer value represent the hash code of the EncodingInfo object. The hashcode is basically the encoding codepage value public override int GetHashCode() { return CodePage; } + + internal EncodingProvider? Provider {get;} } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/EncodingProvider.cs b/src/libraries/System.Private.CoreLib/src/System/Text/EncodingProvider.cs index ac7fc541f60105..34543d65aad9eb 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/EncodingProvider.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/EncodingProvider.cs @@ -2,6 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Collections.Generic; + namespace System.Text { public abstract class EncodingProvider @@ -37,6 +39,8 @@ public EncodingProvider() { } return enc; } + public virtual IEnumerable GetEncodings() => Array.Empty(); + internal static void AddProvider(EncodingProvider provider) { if (provider == null) @@ -78,6 +82,32 @@ internal static void AddProvider(EncodingProvider provider) return null; } + internal static Dictionary? GetEncodingListFromProviders() + { + if (s_providers == null) + return null; + + EncodingProvider[] providers = s_providers; + Dictionary result = new Dictionary(); + + foreach (EncodingProvider provider in providers) + { + IEnumerable? encodingInfoList = provider.GetEncodings(); + if (encodingInfoList != null) + { + foreach (EncodingInfo ei in encodingInfoList) + { + if (!result.TryGetValue(ei.CodePage, out _)) + { + result[ei.CodePage] = ei; + } + } + } + } + + return result; + } + internal static Encoding? GetEncodingFromProvider(string encodingName) { if (s_providers == null) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/EncodingTable.cs b/src/libraries/System.Private.CoreLib/src/System/Text/EncodingTable.cs index cc69f793d6e8f7..b568f382c983d7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/EncodingTable.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/EncodingTable.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System.Collections; +using System.Collections.Generic; using System.Diagnostics; using System.Threading; @@ -119,6 +120,26 @@ internal static EncodingInfo[] GetEncodings() return arrayEncodingInfo; } + internal static EncodingInfo[] GetEncodings(Dictionary encodingInfoList) + { + Debug.Assert(encodingInfoList != null); + + for (int i = 0; i < s_mappedCodePages.Length; i++) + { + if (!encodingInfoList.TryGetValue(s_mappedCodePages[i], out _)) + { + encodingInfoList[s_mappedCodePages[i]] = new EncodingInfo(s_mappedCodePages[i], s_webNames[s_webNameIndices[i]..s_webNameIndices[i + 1]], + GetDisplayName(s_mappedCodePages[i], i)); + } + } + + var collection = encodingInfoList.Values; + EncodingInfo[] result = new EncodingInfo[collection.Count]; + collection.CopyTo(result, 0); + + return result; + } + internal static CodePageDataItem? GetCodePageDataItem(int codePage) { if (s_codePageToCodePageData == null) diff --git a/src/libraries/System.Runtime/ref/System.Runtime.cs b/src/libraries/System.Runtime/ref/System.Runtime.cs index 4c39802764aae4..7a2d7b4f6ce1b2 100644 --- a/src/libraries/System.Runtime/ref/System.Runtime.cs +++ b/src/libraries/System.Runtime/ref/System.Runtime.cs @@ -10262,7 +10262,7 @@ public static void RegisterProvider(System.Text.EncodingProvider provider) { } } public sealed partial class EncodingInfo { - internal EncodingInfo() { } + public EncodingInfo(System.Text.EncodingProvider provider, int codePage, string name, string displayName) {} public int CodePage { get { throw null; } } public string DisplayName { get { throw null; } } public string Name { get { throw null; } } @@ -10277,6 +10277,7 @@ public EncodingProvider() { } public virtual System.Text.Encoding? GetEncoding(int codepage, System.Text.EncoderFallback encoderFallback, System.Text.DecoderFallback decoderFallback) { throw null; } public abstract System.Text.Encoding? GetEncoding(string name); public virtual System.Text.Encoding? GetEncoding(string name, System.Text.EncoderFallback encoderFallback, System.Text.DecoderFallback decoderFallback) { throw null; } + public virtual System.Collections.Generic.IEnumerable GetEncodings() { throw null; } } public enum NormalizationForm { diff --git a/src/libraries/System.Text.Encoding.CodePages/ref/System.Text.Encoding.CodePages.csproj b/src/libraries/System.Text.Encoding.CodePages/ref/System.Text.Encoding.CodePages.csproj index 23981a702056bc..c29c17eb07596c 100644 --- a/src/libraries/System.Text.Encoding.CodePages/ref/System.Text.Encoding.CodePages.csproj +++ b/src/libraries/System.Text.Encoding.CodePages/ref/System.Text.Encoding.CodePages.csproj @@ -1,9 +1,13 @@ enable - netstandard2.0 + $(NetCoreAppCurrent);netstandard2.0 + + + + \ No newline at end of file diff --git a/src/libraries/System.Text.Encoding.CodePages/ref/System.Text.Encoding.CodePages.netcoreapp.cs b/src/libraries/System.Text.Encoding.CodePages/ref/System.Text.Encoding.CodePages.netcoreapp.cs new file mode 100644 index 00000000000000..06b46e94552231 --- /dev/null +++ b/src/libraries/System.Text.Encoding.CodePages/ref/System.Text.Encoding.CodePages.netcoreapp.cs @@ -0,0 +1,11 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Text +{ + public sealed partial class CodePagesEncodingProvider : System.Text.EncodingProvider + { + public override System.Collections.Generic.IEnumerable GetEncodings() { throw null; } + } +} diff --git a/src/libraries/System.Text.Encoding.CodePages/src/System.Text.Encoding.CodePages.csproj b/src/libraries/System.Text.Encoding.CodePages/src/System.Text.Encoding.CodePages.csproj index 778cc8effa9e53..5c0b1f3b7c9c75 100644 --- a/src/libraries/System.Text.Encoding.CodePages/src/System.Text.Encoding.CodePages.csproj +++ b/src/libraries/System.Text.Encoding.CodePages/src/System.Text.Encoding.CodePages.csproj @@ -2,13 +2,13 @@ true enable - $(NetCoreAppCurrent)-Windows_NT;netstandard2.0;netcoreapp2.0-Windows_NT;netstandard2.0-Windows_NT + $(NetCoreAppCurrent);$(NetCoreAppCurrent)-Windows_NT;netstandard2.0;netcoreapp2.0-Windows_NT;netstandard2.0-Windows_NT true - netstandard2.0;net461 + netstandard2.0;net461 @@ -48,6 +48,9 @@ + + + codepages.nlp diff --git a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.cs b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.cs index 7b4f57b5776287..eb357ca0ce1567 100644 --- a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.cs +++ b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.cs @@ -8,6 +8,7 @@ using System.Runtime.InteropServices; using Microsoft.Win32.SafeHandles; using System.Runtime.Serialization; +using System.Runtime.CompilerServices; namespace System.Text { @@ -185,6 +186,53 @@ private unsafe void LoadCodePageTables() LoadManagedCodePage(); } + internal static unsafe EncodingInfo [] GetEncodings(CodePagesEncodingProvider provider) + { + lock (s_streamLock) + { + s_codePagesEncodingDataStream.Seek(CODEPAGE_DATA_FILE_HEADER_SIZE, SeekOrigin.Begin); + + int codePagesCount; + fixed (byte* pBytes = &s_codePagesDataHeader[0]) + { + CodePageDataFileHeader* pDataHeader = (CodePageDataFileHeader*)pBytes; + codePagesCount = pDataHeader->CodePageCount; + } + + EncodingInfo [] encodingInfoList = new EncodingInfo[codePagesCount]; + + Span pCodePageIndexBytes = stackalloc byte[sizeof(CodePageIndex)]; // 40 bytes + CodePageIndex* pCodePageIndex = (CodePageIndex*) Unsafe.AsPointer(ref pCodePageIndexBytes.GetPinnableReference()); + + for (int i = 0; i < codePagesCount; i++) + { + s_codePagesEncodingDataStream.Read(pCodePageIndexBytes); + + string codePageName; + switch (pCodePageIndex->CodePage) + { + // Fixup some encoding names. + case 950: codePageName = "big5"; break; + case 10002: codePageName = "x-mac-chinesetrad"; break; + case 20833: codePageName = "x-ebcdic-koreanextended"; break; + default: codePageName = new string((char*) pCodePageIndex); break; + } + + string? resourceName = EncodingNLS.GetLocalizedEncodingNameResource(pCodePageIndex->CodePage); + string? displayName = null; + + if (resourceName != null && resourceName.StartsWith("Globalization_cp_", StringComparison.OrdinalIgnoreCase)) + { + displayName = SR.GetResourceString(resourceName); + } + + encodingInfoList[i] = new EncodingInfo(provider, pCodePageIndex->CodePage, codePageName, displayName ?? codePageName); + } + + return encodingInfoList; + } + } + // Look up the code page pointer private unsafe bool FindCodePage(int codePage) { diff --git a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/CodePagesEncodingProvider.netcoreapp.cs b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/CodePagesEncodingProvider.netcoreapp.cs new file mode 100644 index 00000000000000..2260eee7891766 --- /dev/null +++ b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/CodePagesEncodingProvider.netcoreapp.cs @@ -0,0 +1,13 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Generic; + +namespace System.Text +{ + public sealed partial class CodePagesEncodingProvider : EncodingProvider + { + public override System.Collections.Generic.IEnumerable GetEncodings() => BaseCodePageEncoding.GetEncodings(this); + } +} diff --git a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/EncodingNLS.cs b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/EncodingNLS.cs index 713c3201a220fa..caec5e57d46022 100644 --- a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/EncodingNLS.cs +++ b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/EncodingNLS.cs @@ -392,7 +392,7 @@ public override string EncodingName } } - private static string? GetLocalizedEncodingNameResource(int codePage) => + internal static string? GetLocalizedEncodingNameResource(int codePage) => codePage switch { 37 => SR.Globalization_cp_37, @@ -549,12 +549,45 @@ public override string WebName _webName = EncodingTable.GetWebNameFromCodePage(CodePage); if (_webName == null) { - throw new NotSupportedException( - SR.Format(SR.NotSupported_NoCodepageData, CodePage)); + throw new NotSupportedException(SR.Format(SR.NotSupported_NoCodepageData, CodePage)); } } return _webName; } } + + public override string HeaderName + { + get + { + switch (CodePage) + { + case 932: return "iso-2022-jp"; + case 50221: return "iso-2022-jp"; + case 50225: return "euc-kr"; + default: return WebName; + } + } + } + + public override string BodyName + { + get + { + switch (CodePage) + { + case 932: return "iso-2022-jp"; + case 1250: return "iso-8859-2"; + case 1251: return "koi8-r"; + case 1252: return "iso-8859-1"; + case 1253: return "iso-8859-7"; + case 1254: return "iso-8859-9"; + case 50221: return "iso-2022-jp"; + case 50225: return "iso-2022-kr"; + default: return WebName; + } + } + } + } } diff --git a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/EncodingTable.cs b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/EncodingTable.cs index 3b2a19cc80e0cc..d663e6273e283d 100644 --- a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/EncodingTable.cs +++ b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/EncodingTable.cs @@ -188,7 +188,7 @@ private static int CompareOrdinal(string s1, string s2, int index, int length) } } - //Nope, we didn't find it. + // Nope, we didn't find it. return null; } } diff --git a/src/libraries/System.Text.Encoding.CodePages/tests/EncodingCodePages.netcoreapp.cs b/src/libraries/System.Text.Encoding.CodePages/tests/EncodingCodePages.netcoreapp.cs new file mode 100644 index 00000000000000..34b899b8734c3d --- /dev/null +++ b/src/libraries/System.Text.Encoding.CodePages/tests/EncodingCodePages.netcoreapp.cs @@ -0,0 +1,67 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.DotNet.RemoteExecutor; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using Xunit; + +namespace System.Text.Tests +{ + public partial class EncodingTest : IClassFixture + { + private class EncodingInformation + { + public EncodingInformation(int codePage, string name) + { + CodePage = codePage; + Name = name; + } + + public int CodePage { get; } + public string Name { get; } + } + + private static EncodingInformation [] s_defaultEncoding = new EncodingInformation [] + { + new EncodingInformation(1200, "utf-16"), + new EncodingInformation(1201, "utf-16BE"), + new EncodingInformation(12000, "utf-32"), + new EncodingInformation(12001, "utf-32BE"), + new EncodingInformation(20127, "us-ascii"), + new EncodingInformation(28591, "iso-8859-1"), + new EncodingInformation(65001, "utf-8") + }; + + [Fact] + public void TestGetEncodings() + { + RemoteExecutor.Invoke(() => { + EncodingInfo [] list = Encoding.GetEncodings(); + + foreach (EncodingInformation eInfo in s_defaultEncoding) + { + Assert.NotNull(list.FirstOrDefault(o => o.CodePage == eInfo.CodePage && o.Name == eInfo.Name)); + } + }).Dispose(); + } + + [Fact] + public void TestGetEncodingsWithProvider() + { + RemoteExecutor.Invoke(() => { + Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); + + foreach (EncodingInfo ei in Encoding.GetEncodings()) + { + Encoding encoding = ei.GetEncoding(); + Assert.Equal(ei.CodePage, encoding.CodePage); + + Assert.True(ei.Name.Equals(encoding.WebName, StringComparison.OrdinalIgnoreCase), $"Encodinginfo.Name `{ei.Name}` != Encoding.WebName `{encoding.WebName}`"); + } + }).Dispose(); + } + } +} diff --git a/src/libraries/System.Text.Encoding.CodePages/tests/System.Text.Encoding.CodePages.Tests.csproj b/src/libraries/System.Text.Encoding.CodePages/tests/System.Text.Encoding.CodePages.Tests.csproj index b02a5646a7f8b3..353fe29ea301c2 100644 --- a/src/libraries/System.Text.Encoding.CodePages/tests/System.Text.Encoding.CodePages.Tests.csproj +++ b/src/libraries/System.Text.Encoding.CodePages/tests/System.Text.Encoding.CodePages.Tests.csproj @@ -6,5 +6,6 @@ + \ No newline at end of file From 6c82451a5484eff402bc1243aff9e56ad4415217 Mon Sep 17 00:00:00 2001 From: Tarek Mahmoud Sayed Date: Fri, 5 Jun 2020 17:03:22 -0700 Subject: [PATCH 2/6] Fix build --- .../src/System.Text.Encoding.CodePages.csproj | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/libraries/System.Text.Encoding.CodePages/src/System.Text.Encoding.CodePages.csproj b/src/libraries/System.Text.Encoding.CodePages/src/System.Text.Encoding.CodePages.csproj index 5c0b1f3b7c9c75..3ec8d102ed7c00 100644 --- a/src/libraries/System.Text.Encoding.CodePages/src/System.Text.Encoding.CodePages.csproj +++ b/src/libraries/System.Text.Encoding.CodePages/src/System.Text.Encoding.CodePages.csproj @@ -48,6 +48,9 @@ + + + From 245f7f839a5b014a25a733e4a9569f743614040b Mon Sep 17 00:00:00 2001 From: Tarek Mahmoud Sayed Date: Sun, 7 Jun 2020 19:42:12 -0700 Subject: [PATCH 3/6] Fix netstandard build --- .../src/System.Text.Encoding.CodePages.csproj | 1 + .../src/System/Text/BaseCodePageEncoding.cs | 49 +-------------- .../Text/BaseCodePageEncoding.netcoreapp.cs | 60 +++++++++++++++++++ 3 files changed, 62 insertions(+), 48 deletions(-) create mode 100644 src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.netcoreapp.cs diff --git a/src/libraries/System.Text.Encoding.CodePages/src/System.Text.Encoding.CodePages.csproj b/src/libraries/System.Text.Encoding.CodePages/src/System.Text.Encoding.CodePages.csproj index 3ec8d102ed7c00..88407f0f334d26 100644 --- a/src/libraries/System.Text.Encoding.CodePages/src/System.Text.Encoding.CodePages.csproj +++ b/src/libraries/System.Text.Encoding.CodePages/src/System.Text.Encoding.CodePages.csproj @@ -53,6 +53,7 @@ + diff --git a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.cs b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.cs index eb357ca0ce1567..5d26bfff272e9b 100644 --- a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.cs +++ b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.cs @@ -8,7 +8,6 @@ using System.Runtime.InteropServices; using Microsoft.Win32.SafeHandles; using System.Runtime.Serialization; -using System.Runtime.CompilerServices; namespace System.Text { @@ -42,7 +41,7 @@ namespace System.Text // WORD byteReplace; // 2 bytes = 48 // default replacement byte(s) // BYTE[] data; // data section // } - internal abstract class BaseCodePageEncoding : EncodingNLS, ISerializable + internal abstract partial class BaseCodePageEncoding : EncodingNLS, ISerializable { internal const string CODE_PAGE_DATA_FILE_NAME = "codepages.nlp"; @@ -186,52 +185,6 @@ private unsafe void LoadCodePageTables() LoadManagedCodePage(); } - internal static unsafe EncodingInfo [] GetEncodings(CodePagesEncodingProvider provider) - { - lock (s_streamLock) - { - s_codePagesEncodingDataStream.Seek(CODEPAGE_DATA_FILE_HEADER_SIZE, SeekOrigin.Begin); - - int codePagesCount; - fixed (byte* pBytes = &s_codePagesDataHeader[0]) - { - CodePageDataFileHeader* pDataHeader = (CodePageDataFileHeader*)pBytes; - codePagesCount = pDataHeader->CodePageCount; - } - - EncodingInfo [] encodingInfoList = new EncodingInfo[codePagesCount]; - - Span pCodePageIndexBytes = stackalloc byte[sizeof(CodePageIndex)]; // 40 bytes - CodePageIndex* pCodePageIndex = (CodePageIndex*) Unsafe.AsPointer(ref pCodePageIndexBytes.GetPinnableReference()); - - for (int i = 0; i < codePagesCount; i++) - { - s_codePagesEncodingDataStream.Read(pCodePageIndexBytes); - - string codePageName; - switch (pCodePageIndex->CodePage) - { - // Fixup some encoding names. - case 950: codePageName = "big5"; break; - case 10002: codePageName = "x-mac-chinesetrad"; break; - case 20833: codePageName = "x-ebcdic-koreanextended"; break; - default: codePageName = new string((char*) pCodePageIndex); break; - } - - string? resourceName = EncodingNLS.GetLocalizedEncodingNameResource(pCodePageIndex->CodePage); - string? displayName = null; - - if (resourceName != null && resourceName.StartsWith("Globalization_cp_", StringComparison.OrdinalIgnoreCase)) - { - displayName = SR.GetResourceString(resourceName); - } - - encodingInfoList[i] = new EncodingInfo(provider, pCodePageIndex->CodePage, codePageName, displayName ?? codePageName); - } - - return encodingInfoList; - } - } // Look up the code page pointer private unsafe bool FindCodePage(int codePage) diff --git a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.netcoreapp.cs b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.netcoreapp.cs new file mode 100644 index 00000000000000..05040c9b354b65 --- /dev/null +++ b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.netcoreapp.cs @@ -0,0 +1,60 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.IO; +using System.Runtime.Serialization; +using System.Runtime.CompilerServices; + +namespace System.Text +{ + internal abstract partial class BaseCodePageEncoding : EncodingNLS, ISerializable + { + internal static unsafe EncodingInfo [] GetEncodings(CodePagesEncodingProvider provider) + { + lock (s_streamLock) + { + s_codePagesEncodingDataStream.Seek(CODEPAGE_DATA_FILE_HEADER_SIZE, SeekOrigin.Begin); + + int codePagesCount; + fixed (byte* pBytes = &s_codePagesDataHeader[0]) + { + CodePageDataFileHeader* pDataHeader = (CodePageDataFileHeader*)pBytes; + codePagesCount = pDataHeader->CodePageCount; + } + + EncodingInfo [] encodingInfoList = new EncodingInfo[codePagesCount]; + + Span pCodePageIndexBytes = stackalloc byte[sizeof(CodePageIndex)]; // 40 bytes + CodePageIndex* pCodePageIndex = (CodePageIndex*) Unsafe.AsPointer(ref pCodePageIndexBytes.GetPinnableReference()); + + for (int i = 0; i < codePagesCount; i++) + { + s_codePagesEncodingDataStream.Read(pCodePageIndexBytes); + + string codePageName; + switch (pCodePageIndex->CodePage) + { + // Fixup some encoding names. + case 950: codePageName = "big5"; break; + case 10002: codePageName = "x-mac-chinesetrad"; break; + case 20833: codePageName = "x-ebcdic-koreanextended"; break; + default: codePageName = new string((char*) pCodePageIndex); break; + } + + string? resourceName = EncodingNLS.GetLocalizedEncodingNameResource(pCodePageIndex->CodePage); + string? displayName = null; + + if (resourceName != null && resourceName.StartsWith("Globalization_cp_", StringComparison.OrdinalIgnoreCase)) + { + displayName = SR.GetResourceString(resourceName); + } + + encodingInfoList[i] = new EncodingInfo(provider, pCodePageIndex->CodePage, codePageName, displayName ?? codePageName); + } + + return encodingInfoList; + } + } + } +} From 9b1a91aae35d89e88510919c4f8254562c79ae9c Mon Sep 17 00:00:00 2001 From: Tarek Mahmoud Sayed Date: Sun, 7 Jun 2020 20:31:33 -0700 Subject: [PATCH 4/6] Address the feedback --- .../src/System/Text/Encoding.cs | 3 +- .../src/System/Text/EncodingInfo.cs | 14 +----- .../src/System/Text/EncodingProvider.cs | 13 ++--- .../src/System/Text/EncodingTable.cs | 33 ++++++++----- .../src/System/Text/EncodingNLS.cs | 49 +++++++------------ 5 files changed, 48 insertions(+), 64 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs index 6b6a130b32c0e9..08257200289838 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs @@ -291,9 +291,8 @@ public static Encoding GetEncoding(string name, GetEncoding(EncodingTable.GetCodePageFromName(name), encoderFallback, decoderFallback); } - // Return a list of all EncodingInfo objects describing all of our encodings /// - /// Get the list from the runtime and all registered encoding providers + /// Get the list from the runtime and all registered encoding providers /// /// The list of the objects public static EncodingInfo[] GetEncodings() diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/EncodingInfo.cs b/src/libraries/System.Private.CoreLib/src/System/Text/EncodingInfo.cs index cb5be89797e5ff..37215fe05ec066 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/EncodingInfo.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/EncodingInfo.cs @@ -53,17 +53,7 @@ internal EncodingInfo(int codePage, string name, string displayName) /// Get the object match the information in the object /// /// The object - public Encoding GetEncoding() - { - Encoding? encoding = null; - - if (Provider != null) - { - encoding = Provider.GetEncoding(CodePage); - } - - return encoding ?? Encoding.GetEncoding(CodePage); - } + public Encoding GetEncoding() => Provider?.GetEncoding(CodePage) ?? Encoding.GetEncoding(CodePage); /// /// Compare this object to other object. @@ -88,6 +78,6 @@ public override int GetHashCode() return CodePage; } - internal EncodingProvider? Provider {get;} + internal EncodingProvider? Provider { get; } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/EncodingProvider.cs b/src/libraries/System.Private.CoreLib/src/System/Text/EncodingProvider.cs index 34543d65aad9eb..340329e25316e9 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/EncodingProvider.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/EncodingProvider.cs @@ -68,10 +68,10 @@ internal static void AddProvider(EncodingProvider provider) internal static Encoding? GetEncodingFromProvider(int codepage) { - if (s_providers == null) + EncodingProvider[]? providers = s_providers; + if (providers == null) return null; - EncodingProvider[] providers = s_providers; foreach (EncodingProvider provider in providers) { Encoding? enc = provider.GetEncoding(codepage); @@ -84,10 +84,10 @@ internal static void AddProvider(EncodingProvider provider) internal static Dictionary? GetEncodingListFromProviders() { - if (s_providers == null) + EncodingProvider[]? providers = s_providers; + if (providers == null) return null; - EncodingProvider[] providers = s_providers; Dictionary result = new Dictionary(); foreach (EncodingProvider provider in providers) @@ -97,10 +97,7 @@ internal static void AddProvider(EncodingProvider provider) { foreach (EncodingInfo ei in encodingInfoList) { - if (!result.TryGetValue(ei.CodePage, out _)) - { - result[ei.CodePage] = ei; - } + result.TryAdd(ei.CodePage, ei); } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/EncodingTable.cs b/src/libraries/System.Private.CoreLib/src/System/Text/EncodingTable.cs index b568f382c983d7..5bcca1e952c3c8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/EncodingTable.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/EncodingTable.cs @@ -106,14 +106,17 @@ private static int InternalGetCodePageFromName(string name) // Return a list of all EncodingInfo objects describing all of our encodings internal static EncodingInfo[] GetEncodings() { - EncodingInfo[] arrayEncodingInfo = new EncodingInfo[s_mappedCodePages.Length]; + ushort[] mappedCodePages = s_mappedCodePages; + EncodingInfo[] arrayEncodingInfo = new EncodingInfo[mappedCodePages.Length]; + string webNames = s_webNames; + int[] webNameIndices = s_webNameIndices; - for (int i = 0; i < s_mappedCodePages.Length; i++) + for (int i = 0; i < mappedCodePages.Length; i++) { arrayEncodingInfo[i] = new EncodingInfo( - s_mappedCodePages[i], - s_webNames[s_webNameIndices[i]..s_webNameIndices[i + 1]], - GetDisplayName(s_mappedCodePages[i], i) + mappedCodePages[i], + webNames[webNameIndices[i]..webNameIndices[i + 1]], + GetDisplayName(mappedCodePages[i], i) ); } @@ -123,19 +126,25 @@ internal static EncodingInfo[] GetEncodings() internal static EncodingInfo[] GetEncodings(Dictionary encodingInfoList) { Debug.Assert(encodingInfoList != null); + ushort[] mappedCodePages = s_mappedCodePages; + string webNames = s_webNames; + int[] webNameIndices = s_webNameIndices; - for (int i = 0; i < s_mappedCodePages.Length; i++) + for (int i = 0; i < mappedCodePages.Length; i++) { - if (!encodingInfoList.TryGetValue(s_mappedCodePages[i], out _)) + if (!encodingInfoList.TryGetValue(mappedCodePages[i], out _)) { - encodingInfoList[s_mappedCodePages[i]] = new EncodingInfo(s_mappedCodePages[i], s_webNames[s_webNameIndices[i]..s_webNameIndices[i + 1]], - GetDisplayName(s_mappedCodePages[i], i)); + encodingInfoList[mappedCodePages[i]] = new EncodingInfo(mappedCodePages[i], webNames[webNameIndices[i]..webNameIndices[i + 1]], + GetDisplayName(mappedCodePages[i], i)); } } - var collection = encodingInfoList.Values; - EncodingInfo[] result = new EncodingInfo[collection.Count]; - collection.CopyTo(result, 0); + var result = new EncodingInfo[encodingInfoList.Count]; + int j = 0; + foreach (KeyValuePair pair in encodingInfoList) + { + result[j++] = pair.Value; + } return result; } diff --git a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/EncodingNLS.cs b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/EncodingNLS.cs index caec5e57d46022..0bcfef5d209101 100644 --- a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/EncodingNLS.cs +++ b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/EncodingNLS.cs @@ -556,38 +556,27 @@ public override string WebName } } - public override string HeaderName - { - get + public override string HeaderName => + CodePage switch { - switch (CodePage) - { - case 932: return "iso-2022-jp"; - case 50221: return "iso-2022-jp"; - case 50225: return "euc-kr"; - default: return WebName; - } - } - } + 932 => "iso-2022-jp", + 50221 => "iso-2022-jp", + 50225 => "euc-kr", + _ => WebName, + }; - public override string BodyName - { - get + public override string BodyName => + CodePage switch { - switch (CodePage) - { - case 932: return "iso-2022-jp"; - case 1250: return "iso-8859-2"; - case 1251: return "koi8-r"; - case 1252: return "iso-8859-1"; - case 1253: return "iso-8859-7"; - case 1254: return "iso-8859-9"; - case 50221: return "iso-2022-jp"; - case 50225: return "iso-2022-kr"; - default: return WebName; - } - } - } - + 932 => "iso-2022-jp", + 1250 => "iso-8859-2", + 1251 => "koi8-r", + 1252 => "iso-8859-1", + 1253 => "iso-8859-7", + 1254 => "iso-8859-9", + 50221 => "iso-2022-jp", + 50225 => "iso-2022-kr", + _ => WebName, + }; } } From 173de50c631f95edb38726006c8b9e192417f49b Mon Sep 17 00:00:00 2001 From: Tarek Mahmoud Sayed Date: Mon, 8 Jun 2020 14:16:36 -0700 Subject: [PATCH 5/6] More Feedback --- .../src/System/Text/EncodingInfo.cs | 15 ++++----------- .../src/System/Text/EncodingTable.cs | 4 ++-- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/EncodingInfo.cs b/src/libraries/System.Private.CoreLib/src/System/Text/EncodingInfo.cs index 37215fe05ec066..b1ce0cd5c62672 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/EncodingInfo.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/EncodingInfo.cs @@ -59,20 +59,13 @@ internal EncodingInfo(int codePage, string name, string displayName) /// Compare this object to other object. /// /// The other object to compare with this object - /// True if the value object is EncodingInfo object and has a codepage equals to this EncodingInfo object codepage. Othewise, it returns False - public override bool Equals(object? value) - { - if (value is EncodingInfo that) - { - return CodePage == that.CodePage; - } - return false; - } + /// True if the value object is EncodingInfo object and has a codepage equals to this EncodingInfo object codepage. Otherwise, it returns False + public override bool Equals(object? value) => value is EncodingInfo that && CodePage == that.CodePage; /// - /// Get a hashcode represent the current EncodingInfo object + /// Get a hashcode representing the current EncodingInfo object. /// - /// The integer value represent the hash code of the EncodingInfo object. The hashcode is basically the encoding codepage value + /// The integer value representing the hash code of the EncodingInfo object. public override int GetHashCode() { return CodePage; diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/EncodingTable.cs b/src/libraries/System.Private.CoreLib/src/System/Text/EncodingTable.cs index 5bcca1e952c3c8..40904f1cbc2c38 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/EncodingTable.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/EncodingTable.cs @@ -132,10 +132,10 @@ internal static EncodingInfo[] GetEncodings(Dictionary encodi for (int i = 0; i < mappedCodePages.Length; i++) { - if (!encodingInfoList.TryGetValue(mappedCodePages[i], out _)) + if (!encodingInfoList.ContainsKey(mappedCodePages[i])) { encodingInfoList[mappedCodePages[i]] = new EncodingInfo(mappedCodePages[i], webNames[webNameIndices[i]..webNameIndices[i + 1]], - GetDisplayName(mappedCodePages[i], i)); + GetDisplayName(mappedCodePages[i], i)); } } From f0e05e99ee1b3ade4d67c25fed7bc129cca9dcc5 Mon Sep 17 00:00:00 2001 From: Tarek Mahmoud Sayed Date: Mon, 8 Jun 2020 15:26:21 -0700 Subject: [PATCH 6/6] More Feedback --- .../System/Text/BaseCodePageEncoding.netcoreapp.cs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.netcoreapp.cs b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.netcoreapp.cs index 05040c9b354b65..9c416ac0ef4fba 100644 --- a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.netcoreapp.cs +++ b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.netcoreapp.cs @@ -25,24 +25,24 @@ internal static unsafe EncodingInfo [] GetEncodings(CodePagesEncodingProvider pr EncodingInfo [] encodingInfoList = new EncodingInfo[codePagesCount]; - Span pCodePageIndexBytes = stackalloc byte[sizeof(CodePageIndex)]; // 40 bytes - CodePageIndex* pCodePageIndex = (CodePageIndex*) Unsafe.AsPointer(ref pCodePageIndexBytes.GetPinnableReference()); + CodePageIndex codePageIndex = default; + Span pCodePageIndex = new Span(&codePageIndex, Unsafe.SizeOf()); for (int i = 0; i < codePagesCount; i++) { - s_codePagesEncodingDataStream.Read(pCodePageIndexBytes); + s_codePagesEncodingDataStream.Read(pCodePageIndex); string codePageName; - switch (pCodePageIndex->CodePage) + switch (codePageIndex.CodePage) { // Fixup some encoding names. case 950: codePageName = "big5"; break; case 10002: codePageName = "x-mac-chinesetrad"; break; case 20833: codePageName = "x-ebcdic-koreanextended"; break; - default: codePageName = new string((char*) pCodePageIndex); break; + default: codePageName = new string(&codePageIndex.CodePageName); break; } - string? resourceName = EncodingNLS.GetLocalizedEncodingNameResource(pCodePageIndex->CodePage); + string? resourceName = EncodingNLS.GetLocalizedEncodingNameResource(codePageIndex.CodePage); string? displayName = null; if (resourceName != null && resourceName.StartsWith("Globalization_cp_", StringComparison.OrdinalIgnoreCase)) @@ -50,7 +50,7 @@ internal static unsafe EncodingInfo [] GetEncodings(CodePagesEncodingProvider pr displayName = SR.GetResourceString(resourceName); } - encodingInfoList[i] = new EncodingInfo(provider, pCodePageIndex->CodePage, codePageName, displayName ?? codePageName); + encodingInfoList[i] = new EncodingInfo(provider, codePageIndex.CodePage, codePageName, displayName ?? codePageName); } return encodingInfoList;