From 38da3901b7a638c57607bc5ab4ce5bc28cb1bd78 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Wed, 20 Feb 2019 16:37:44 -0800 Subject: [PATCH 1/9] Add Utf8String experimental package and skeleton ref APIs --- .../packageIndex.json | 6 + pkg/descriptions.json | 7 + src/Directory.Build.props | 9 ++ src/System.Utf8String/Directory.Build.props | 11 ++ src/System.Utf8String/System.Utf8String.sln | 50 ++++++++ .../pkg/System.Utf8String.pkgproj | 11 ++ .../ref/Configurations.props | 8 ++ .../ref/System.Utf8String.cs | 33 +++++ .../ref/System.Utf8String.csproj | 14 ++ .../src/Configurations.props | 9 ++ .../src/Resources/Strings.resx | 120 ++++++++++++++++++ .../src/System.Utf8String.csproj | 14 ++ .../tests/Configurations.props | 8 ++ .../tests/System.Utf8String.Tests.csproj | 12 ++ 14 files changed, 312 insertions(+) create mode 100644 src/System.Utf8String/Directory.Build.props create mode 100644 src/System.Utf8String/System.Utf8String.sln create mode 100644 src/System.Utf8String/pkg/System.Utf8String.pkgproj create mode 100644 src/System.Utf8String/ref/Configurations.props create mode 100644 src/System.Utf8String/ref/System.Utf8String.cs create mode 100644 src/System.Utf8String/ref/System.Utf8String.csproj create mode 100644 src/System.Utf8String/src/Configurations.props create mode 100644 src/System.Utf8String/src/Resources/Strings.resx create mode 100644 src/System.Utf8String/src/System.Utf8String.csproj create mode 100644 src/System.Utf8String/tests/Configurations.props create mode 100644 src/System.Utf8String/tests/System.Utf8String.Tests.csproj diff --git a/pkg/Microsoft.Private.PackageBaseline/packageIndex.json b/pkg/Microsoft.Private.PackageBaseline/packageIndex.json index 0f193a2b57ef..4a6e084e0fa1 100644 --- a/pkg/Microsoft.Private.PackageBaseline/packageIndex.json +++ b/pkg/Microsoft.Private.PackageBaseline/packageIndex.json @@ -5307,6 +5307,12 @@ "uap10.0.16299": "4.0.1.0" } }, + "System.Utf8String": { + "InboxOn": {}, + "AssemblyVersionInPackageVersion": { + "4.0.0.0": "4.6.0" + } + }, "System.ValueTuple": { "StableVersions": [ "4.3.0", diff --git a/pkg/descriptions.json b/pkg/descriptions.json index 0512d12459de..4afa30ab25ee 100644 --- a/pkg/descriptions.json +++ b/pkg/descriptions.json @@ -2131,6 +2131,13 @@ "System.Transactions.TransactionScope" ] }, + { + "Name": "System.Utf8String", + "Description": "Provides types for representation of UTF-8 string data.", + "CommonTypes": [ + "System.Utf8String" + ] + }, { "Name": "System.ValueTuple", "Description": "Provides the System.ValueTuple structs, which implement the underlying types for tuples in C# and Visual Basic.", diff --git a/src/Directory.Build.props b/src/Directory.Build.props index 5e9a4ce72efb..e8481eaba3d8 100644 --- a/src/Directory.Build.props +++ b/src/Directory.Build.props @@ -30,4 +30,13 @@ + + + + + diff --git a/src/System.Utf8String/Directory.Build.props b/src/System.Utf8String/Directory.Build.props new file mode 100644 index 000000000000..25e3ba27477f --- /dev/null +++ b/src/System.Utf8String/Directory.Build.props @@ -0,0 +1,11 @@ + + + + + 4.0.0.0 + + Open + + true + + diff --git a/src/System.Utf8String/System.Utf8String.sln b/src/System.Utf8String/System.Utf8String.sln new file mode 100644 index 000000000000..1c40f6b5fb60 --- /dev/null +++ b/src/System.Utf8String/System.Utf8String.sln @@ -0,0 +1,50 @@ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.27213.1 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Utf8String.Tests", "tests\System.Utf8String.Tests.csproj", "{72E9FB32-4692-4692-A10B-9F053F8F1A88}" + ProjectSection(ProjectDependencies) = postProject + {D4266847-6692-481B-9459-6141DB7DA339} = {D4266847-6692-481B-9459-6141DB7DA339} + EndProjectSection +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Utf8String", "src\System.Utf8String.csproj", "{D4266847-6692-481B-9459-6141DB7DA339}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Utf8String", "ref\System.Utf8String.csproj", "{7AF57E6B-2CED-45C9-8BCA-5BBA60D018E0}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{7EC8921F-E96F-445B-AA33-453515641D93}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ref", "ref", "{8691446A-CA54-49FD-87B9-57A0C6B48095}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{FEB087F5-EF72-429D-8A0E-7636B84A1537}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {D4266847-6692-481B-9459-6141DB7DA339}.Debug|Any CPU.ActiveCfg = netcoreapp-Debug|Any CPU + {D4266847-6692-481B-9459-6141DB7DA339}.Debug|Any CPU.Build.0 = netcoreapp-Debug|Any CPU + {D4266847-6692-481B-9459-6141DB7DA339}.Release|Any CPU.ActiveCfg = netcoreapp-Release|Any CPU + {D4266847-6692-481B-9459-6141DB7DA339}.Release|Any CPU.Build.0 = netcoreapp-Release|Any CPU + {7AF57E6B-2CED-45C9-8BCA-5BBA60D018E0}.Debug|Any CPU.ActiveCfg = netcoreapp-Debug|Any CPU + {7AF57E6B-2CED-45C9-8BCA-5BBA60D018E0}.Debug|Any CPU.Build.0 = netcoreapp-Debug|Any CPU + {7AF57E6B-2CED-45C9-8BCA-5BBA60D018E0}.Release|Any CPU.ActiveCfg = netcoreapp-Release|Any CPU + {7AF57E6B-2CED-45C9-8BCA-5BBA60D018E0}.Release|Any CPU.Build.0 = netcoreapp-Release|Any CPU + {72E9FB32-4692-4692-A10B-9F053F8F1A88}.Debug|Any CPU.ActiveCfg = netcoreapp-Debug|Any CPU + {72E9FB32-4692-4692-A10B-9F053F8F1A88}.Debug|Any CPU.Build.0 = netcoreapp-Debug|Any CPU + {72E9FB32-4692-4692-A10B-9F053F8F1A88}.Release|Any CPU.ActiveCfg = netcoreapp-Release|Any CPU + {72E9FB32-4692-4692-A10B-9F053F8F1A88}.Release|Any CPU.Build.0 = netcoreapp-Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(NestedProjects) = preSolution + {D4266847-6692-481B-9459-6141DB7DA339} = {7EC8921F-E96F-445B-AA33-453515641D93} + {7AF57E6B-2CED-45C9-8BCA-5BBA60D018E0} = {8691446A-CA54-49FD-87B9-57A0C6B48095} + {72E9FB32-4692-4692-A10B-9F053F8F1A88} = {FEB087F5-EF72-429D-8A0E-7636B84A1537} + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {7196F6AB-8F22-4E4D-B6D1-3C2CFF86229C} + EndGlobalSection +EndGlobal diff --git a/src/System.Utf8String/pkg/System.Utf8String.pkgproj b/src/System.Utf8String/pkg/System.Utf8String.pkgproj new file mode 100644 index 000000000000..d0ddd94ca65a --- /dev/null +++ b/src/System.Utf8String/pkg/System.Utf8String.pkgproj @@ -0,0 +1,11 @@ + + + + + + netcoreapp3.0; + + + + + diff --git a/src/System.Utf8String/ref/Configurations.props b/src/System.Utf8String/ref/Configurations.props new file mode 100644 index 000000000000..d3ac8a63c74a --- /dev/null +++ b/src/System.Utf8String/ref/Configurations.props @@ -0,0 +1,8 @@ + + + + + netcoreapp; + + + diff --git a/src/System.Utf8String/ref/System.Utf8String.cs b/src/System.Utf8String/ref/System.Utf8String.cs new file mode 100644 index 000000000000..eae099c5ac27 --- /dev/null +++ b/src/System.Utf8String/ref/System.Utf8String.cs @@ -0,0 +1,33 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// ------------------------------------------------------------------------------ +// Changes to this file must follow the http://aka.ms/api-review process. +// ------------------------------------------------------------------------------ + +namespace System +{ + public sealed partial class Utf8String : IEquatable + { + public static readonly Utf8String Empty; + public Utf8String(ReadOnlySpan value) { } + public Utf8String(byte[] value, int startIndex, int length) { } + [CLSCompliant(false)] + public unsafe Utf8String(byte* value) { } + public Utf8String(ReadOnlySpan value) { } + public Utf8String(char[] value, int startIndex, int length) { } + [CLSCompliant(false)] + public unsafe Utf8String(char* value) { } + public Utf8String(string value) { } + public static bool operator ==(Utf8String a, Utf8String b) => throw null; + public static bool operator !=(Utf8String a, Utf8String b) => throw null; + public int Length => throw null; + [ComponentModel.EditorBrowsable(ComponentModel.EditorBrowsableState.Never)] // for compiler use only + public override bool Equals(object obj) => throw null; + public bool Equals(Utf8String value) => throw null; + public static bool Equals(Utf8String a, Utf8String b) => throw null; + public override int GetHashCode() => throw null; + public ref readonly byte GetPinnableReference() => throw null; + public override string ToString() => throw null; + } +} diff --git a/src/System.Utf8String/ref/System.Utf8String.csproj b/src/System.Utf8String/ref/System.Utf8String.csproj new file mode 100644 index 000000000000..1c4f8ef6495a --- /dev/null +++ b/src/System.Utf8String/ref/System.Utf8String.csproj @@ -0,0 +1,14 @@ + + + true + {7AF57E6B-2CED-45C9-8BCA-5BBA60D018E0} + netcoreapp-Debug;netcoreapp-Release + + + + + + + + + diff --git a/src/System.Utf8String/src/Configurations.props b/src/System.Utf8String/src/Configurations.props new file mode 100644 index 000000000000..e75400d142ff --- /dev/null +++ b/src/System.Utf8String/src/Configurations.props @@ -0,0 +1,9 @@ + + + + + netcoreapp-Windows_NT; + netcoreapp-Unix; + + + diff --git a/src/System.Utf8String/src/Resources/Strings.resx b/src/System.Utf8String/src/Resources/Strings.resx new file mode 100644 index 000000000000..1af7de150c99 --- /dev/null +++ b/src/System.Utf8String/src/Resources/Strings.resx @@ -0,0 +1,120 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text/microsoft-resx + + + 2.0 + + + System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + \ No newline at end of file diff --git a/src/System.Utf8String/src/System.Utf8String.csproj b/src/System.Utf8String/src/System.Utf8String.csproj new file mode 100644 index 000000000000..3b2f458c1af5 --- /dev/null +++ b/src/System.Utf8String/src/System.Utf8String.csproj @@ -0,0 +1,14 @@ + + + {D4266847-6692-481B-9459-6141DB7DA339} + true + true + netcoreapp-Unix-Debug;netcoreapp-Unix-Release;netcoreapp-Windows_NT-Debug;netcoreapp-Windows_NT-Release; + + + + + + + + diff --git a/src/System.Utf8String/tests/Configurations.props b/src/System.Utf8String/tests/Configurations.props new file mode 100644 index 000000000000..d3ac8a63c74a --- /dev/null +++ b/src/System.Utf8String/tests/Configurations.props @@ -0,0 +1,8 @@ + + + + + netcoreapp; + + + diff --git a/src/System.Utf8String/tests/System.Utf8String.Tests.csproj b/src/System.Utf8String/tests/System.Utf8String.Tests.csproj new file mode 100644 index 000000000000..17349ae71775 --- /dev/null +++ b/src/System.Utf8String/tests/System.Utf8String.Tests.csproj @@ -0,0 +1,12 @@ + + + true + {72E9FB32-4692-4692-A10B-9F053F8F1A88} + true + netcoreapp-Debug;netcoreapp-Release; + true + + + + + From b1058492a82fad7e4e8f92ba12548ac81f5a315f Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Wed, 27 Feb 2019 14:36:12 -0800 Subject: [PATCH 2/9] Add some basic Utf8String tests --- .../ref/System.Utf8String.cs | 6 +- .../src/System.Utf8String.csproj | 3 +- .../src/Utf8StringExtensions.cs | 27 +++ .../tests/System.Utf8String.Tests.csproj | 10 +- .../tests/System/Utf8StringTests.Ctor.cs | 212 ++++++++++++++++++ .../tests/System/Utf8StringTests.cs | 28 +++ .../tests/System/Utf8TestUtilities.cs | 28 +++ .../tests/Xunit/SpanAssert.cs | 22 ++ 8 files changed, 333 insertions(+), 3 deletions(-) create mode 100644 src/System.Utf8String/src/Utf8StringExtensions.cs create mode 100644 src/System.Utf8String/tests/System/Utf8StringTests.Ctor.cs create mode 100644 src/System.Utf8String/tests/System/Utf8StringTests.cs create mode 100644 src/System.Utf8String/tests/System/Utf8TestUtilities.cs create mode 100644 src/System.Utf8String/tests/Xunit/SpanAssert.cs diff --git a/src/System.Utf8String/ref/System.Utf8String.cs b/src/System.Utf8String/ref/System.Utf8String.cs index eae099c5ac27..3673502e2ca6 100644 --- a/src/System.Utf8String/ref/System.Utf8String.cs +++ b/src/System.Utf8String/ref/System.Utf8String.cs @@ -22,12 +22,16 @@ public Utf8String(string value) { } public static bool operator ==(Utf8String a, Utf8String b) => throw null; public static bool operator !=(Utf8String a, Utf8String b) => throw null; public int Length => throw null; - [ComponentModel.EditorBrowsable(ComponentModel.EditorBrowsableState.Never)] // for compiler use only public override bool Equals(object obj) => throw null; public bool Equals(Utf8String value) => throw null; public static bool Equals(Utf8String a, Utf8String b) => throw null; public override int GetHashCode() => throw null; + [ComponentModel.EditorBrowsable(ComponentModel.EditorBrowsableState.Never)] // for compiler use only public ref readonly byte GetPinnableReference() => throw null; public override string ToString() => throw null; } + public static partial class Utf8StringExtensions + { + public static ReadOnlySpan AsBytes(this Utf8String text) => throw null; + } } diff --git a/src/System.Utf8String/src/System.Utf8String.csproj b/src/System.Utf8String/src/System.Utf8String.csproj index 3b2f458c1af5..4d709abdbb61 100644 --- a/src/System.Utf8String/src/System.Utf8String.csproj +++ b/src/System.Utf8String/src/System.Utf8String.csproj @@ -4,9 +4,10 @@ true true netcoreapp-Unix-Debug;netcoreapp-Unix-Release;netcoreapp-Windows_NT-Debug;netcoreapp-Windows_NT-Release; + System - + diff --git a/src/System.Utf8String/src/Utf8StringExtensions.cs b/src/System.Utf8String/src/Utf8StringExtensions.cs new file mode 100644 index 000000000000..6ed44a799a60 --- /dev/null +++ b/src/System.Utf8String/src/Utf8StringExtensions.cs @@ -0,0 +1,27 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Text; +using Internal.Runtime.CompilerServices; + +namespace System +{ + public static class Utf8StringExtensions + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static ReadOnlySpan AsBytes(this Utf8String text) + { + if (text is null) + { + return default; + } + + return MemoryMarshal.CreateReadOnlySpan(ref Unsafe.AsRef(in text.GetPinnableReference()), text.Length); + } + } +} diff --git a/src/System.Utf8String/tests/System.Utf8String.Tests.csproj b/src/System.Utf8String/tests/System.Utf8String.Tests.csproj index 17349ae71775..0f3c0b403ca3 100644 --- a/src/System.Utf8String/tests/System.Utf8String.Tests.csproj +++ b/src/System.Utf8String/tests/System.Utf8String.Tests.csproj @@ -5,8 +5,16 @@ true netcoreapp-Debug;netcoreapp-Release; true + System + + + + true - + + + + diff --git a/src/System.Utf8String/tests/System/Utf8StringTests.Ctor.cs b/src/System.Utf8String/tests/System/Utf8StringTests.Ctor.cs new file mode 100644 index 000000000000..883b00841895 --- /dev/null +++ b/src/System.Utf8String/tests/System/Utf8StringTests.Ctor.cs @@ -0,0 +1,212 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Reflection; +using Xunit; + +using static System.Tests.Utf8TestUtilities; + +namespace System.Tests +{ + public unsafe partial class Utf8StringTests + { + [Fact] + public static void Ctor_ByteArrayOffset_Empty_ReturnsEmpty() + { + byte[] inputData = new byte[] { (byte)'H', (byte)'e', (byte)'l', (byte)'l', (byte)'o' }; + Assert.Same(Utf8String.Empty, new Utf8String(inputData, 3, 0)); + } + + [Fact] + public static void Ctor_ByteArrayOffset_ValidData_ReturnsOriginalContents() + { + byte[] inputData = new byte[] { (byte)'x', (byte)'H', (byte)'e', (byte)'l', (byte)'l', (byte)'o', (byte)'x' }; + Utf8String expected = u8("Hello"); + + var actual = new Utf8String(inputData, 1, 5); + Assert.Equal(expected, actual); + } + + [Fact] + public static void Ctor_ByteArrayOffset_InvalidData_FixesUpData() + { + byte[] inputData = new byte[] { (byte)'x', (byte)'H', (byte)'e', (byte)0xFF, (byte)'l', (byte)'o', (byte)'x' }; + Utf8String expected = u8("He\uFFFDlo"); + + var actual = new Utf8String(inputData, 1, 5); + Assert.Equal(expected, actual); + } + + [Fact] + public static void Ctor_BytePointer_NullOrEmpty_ReturnsEmpty() + { + byte nullByte = 0; + + Assert.Same(Utf8String.Empty, new Utf8String((byte*)null)); + Assert.Same(Utf8String.Empty, new Utf8String(&nullByte)); + } + + [Fact] + public static void Ctor_BytePointer_ValidData_ReturnsOriginalContents() + { + byte[] inputData = new byte[] { (byte)'H', (byte)'e', (byte)'l', (byte)'l', (byte)'o', (byte)'\0' }; + Utf8String expected = u8("Hello"); + + fixed (byte* pData = inputData) + { + var actual = new Utf8String(pData); + Assert.Equal(expected, actual); + } + } + + [Fact] + public static void Ctor_BytePointer_InvalidData_FixesUpData() + { + byte[] inputData = new byte[] { (byte)'H', (byte)'e', (byte)0xFF, (byte)'l', (byte)'o', (byte)'\0' }; + Utf8String expected = u8("He\uFFFDlo"); + + fixed (byte* pData = inputData) + { + var actual = new Utf8String(pData); + Assert.Equal(expected, actual); + } + } + + [Fact] + public static void Ctor_ByteSpan_Empty_ReturnsEmpty() + { + Assert.Same(Utf8String.Empty, new Utf8String(ReadOnlySpan.Empty)); + } + + [Fact] + public static void Ctor_ByteSpan_ValidData_ReturnsOriginalContents() + { + byte[] inputData = new byte[] { (byte)'H', (byte)'e', (byte)'l', (byte)'l', (byte)'o' }; + Utf8String expected = u8("Hello"); + + var actual = new Utf8String(inputData.AsSpan()); + Assert.Equal(expected, actual); + } + + [Fact] + public static void Ctor_ByteSpan_InvalidData_FixesUpData() + { + byte[] inputData = new byte[] { (byte)'H', (byte)'e', (byte)0xFF, (byte)'l', (byte)'o' }; + Utf8String expected = u8("He\uFFFDlo"); + + var actual = new Utf8String(inputData.AsSpan()); + Assert.Equal(expected, actual); + } + + [Fact] + public static void Ctor_CharArrayOffset_Empty_ReturnsEmpty() + { + char[] inputData = "Hello".ToCharArray(); + Assert.Same(Utf8String.Empty, new Utf8String(inputData, 3, 0)); + } + + [Fact] + public static void Ctor_CharArrayOffset_ValidData_ReturnsOriginalContents() + { + char[] inputData = "xHellox".ToCharArray(); + Utf8String expected = u8("Hello"); + + var actual = new Utf8String(inputData, 1, 5); + Assert.Equal(expected, actual); + } + + [Fact] + public static void Ctor_CharArrayOffset_InvalidData_FixesUpData() + { + char[] inputData = new char[] { 'x', 'H', 'e', '\uD800', 'l', 'o', 'x' }; + Utf8String expected = u8("He\uFFFDlo"); + + var actual = new Utf8String(inputData, 1, 5); + Assert.Equal(expected, actual); + } + + [Fact] + public static void Ctor_CharPointer_NullOrEmpty_ReturnsEmpty() + { + char nullChar = '\0'; + + Assert.Same(Utf8String.Empty, new Utf8String((char*)null)); + Assert.Same(Utf8String.Empty, new Utf8String(&nullChar)); + } + + [Fact] + public static void Ctor_CharPointer_ValidData_ReturnsOriginalContents() + { + const string inputData = "Hello"; + Utf8String expected = u8("Hello"); + + fixed (char* pData = inputData) + { + var actual = new Utf8String(pData); + Assert.Equal(expected, actual); + } + } + + [Fact] + public static void Ctor_CharPointer_InvalidData_FixesUpData() + { + char[] inputData = new char[] { 'H', 'e', '\uD800', 'l', 'o', '\0' }; + Utf8String expected = u8("He\uFFFDlo"); + + fixed (char* pData = inputData) + { + var actual = new Utf8String(pData); + Assert.Equal(expected, actual); + } + } + + [Fact] + public static void Ctor_CharSpan_Empty_ReturnsEmpty() + { + Assert.Same(Utf8String.Empty, new Utf8String(ReadOnlySpan.Empty)); + } + + [Fact] + public static void Ctor_CharSpan_ValidData_ReturnsOriginalContents() + { + char[] inputData = "Hello".ToCharArray(); + Utf8String expected = u8("Hello"); + + var actual = new Utf8String(inputData.AsSpan()); + Assert.Equal(expected, actual); + } + + [Fact] + public static void Ctor_CharSpan_InvalidData_FixesUpData() + { + char[] inputData = new char[] { 'H', 'e', '\uD800', 'l', 'o' }; + Utf8String expected = u8("He\uFFFDlo"); + + var actual = new Utf8String(inputData.AsSpan()); + Assert.Equal(expected, actual); + } + + [Fact] + public static void Ctor_String_NullOrEmpty_ReturnsEmpty() + { + Assert.Same(Utf8String.Empty, new Utf8String((string)null)); + Assert.Same(Utf8String.Empty, new Utf8String(string.Empty)); + } + + [Fact] + public static void Ctor_String_ValidData_ReturnsOriginalContents() + { + Assert.Equal(u8("Hello"), new Utf8String("Hello")); + } + + [Fact] + public static void Ctor_String_InvalidData_FixesUpData() + { + Assert.Equal(u8("He\uFFFDlo"), new Utf8String("He\uD800lo")); + } + } +} diff --git a/src/System.Utf8String/tests/System/Utf8StringTests.cs b/src/System.Utf8String/tests/System/Utf8StringTests.cs new file mode 100644 index 000000000000..08edf43634e6 --- /dev/null +++ b/src/System.Utf8String/tests/System/Utf8StringTests.cs @@ -0,0 +1,28 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Reflection; +using Xunit; + +namespace System.Tests +{ + public unsafe partial class Utf8StringTests + { + [Fact] + public static void Empty_HasLengthZero() + { + Assert.Equal(0, Utf8String.Empty.Length); + SpanAssert.Equal(ReadOnlySpan.Empty, Utf8String.Empty.AsBytes()); + } + + [Fact] + public static void Empty_ReturnsSingleton() + { + Assert.Same(Utf8String.Empty, Utf8String.Empty); + } + } +} diff --git a/src/System.Utf8String/tests/System/Utf8TestUtilities.cs b/src/System.Utf8String/tests/System/Utf8TestUtilities.cs new file mode 100644 index 000000000000..37edbb27b3ec --- /dev/null +++ b/src/System.Utf8String/tests/System/Utf8TestUtilities.cs @@ -0,0 +1,28 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Text; + +namespace System.Tests +{ + public static class Utf8TestUtilities + { + /// + /// Mimics returning a literal instance. + /// + public static Utf8String u8(string str) + { + if (string.IsNullOrEmpty(str)) + { + return Utf8String.Empty; + } + + // TODO: Call into ctor. + + return new Utf8String(str); + } + } +} diff --git a/src/System.Utf8String/tests/Xunit/SpanAssert.cs b/src/System.Utf8String/tests/Xunit/SpanAssert.cs new file mode 100644 index 000000000000..919744945be0 --- /dev/null +++ b/src/System.Utf8String/tests/Xunit/SpanAssert.cs @@ -0,0 +1,22 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; + +namespace Xunit +{ + public static class SpanAssert + { + public static void Equal(ReadOnlySpan a, ReadOnlySpan b, IEqualityComparer comparer = null) where T : IEquatable + { + Assert.Equal(a.ToArray(), b.ToArray(), comparer); + } + + public static void Equal(Span a, Span b, IEqualityComparer comparer = null) where T : IEquatable + { + Assert.Equal(a.ToArray(), b.ToArray(), comparer); + } + } +} From 98fbbcf5f131dd996ef4d3ae66201fa23cff39de Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Sun, 10 Mar 2019 23:12:42 -0700 Subject: [PATCH 3/9] Add Utf8String basic tests --- .../ref/Configurations.props | 3 +- .../ref/CoreFx.Private.TestUtilities.csproj | 11 +- ...CoreFx.Private.TestUtilities.netcoreapp.cs | 30 ++ .../src/CoreFx.Private.TestUtilities.csproj | 7 + .../System/Buffers/BoundedMemory.Creation.cs | 67 ++++ .../src/System/Buffers/BoundedMemory.Unix.cs | 47 +++ .../System/Buffers/BoundedMemory.Windows.cs | 333 ++++++++++++++++++ .../src/System/Buffers/BoundedMemory.cs | 49 +++ .../src/System/Buffers/PoisonPagePlacement.cs | 26 ++ src/System.Runtime/ref/System.Runtime.cs | 8 + .../tests/System.Runtime.Tests.csproj | 3 + .../Unicode/Utf8Tests.ToBytes.netcoreapp.cs | 264 ++++++++++++++ .../Unicode/Utf8Tests.ToChars.netcoreapp.cs | 304 ++++++++++++++++ .../Text/Unicode/Utf8Tests.netcoreapp.cs | 141 ++++++++ .../ref/System.Utf8String.cs | 77 +++- .../src/System.Utf8String.csproj | 3 - .../src/Utf8StringExtensions.cs | 27 -- .../tests/System.Utf8String.Tests.csproj | 7 +- .../tests/System/Char8Tests.cs | 112 ++++++ .../tests/System/MemoryTests.cs | 156 ++++++++ .../tests/System/ReflectionTests.cs | 36 ++ .../tests/System/Utf8ExtensionsTests.cs | 209 +++++++++++ .../tests/System/Utf8StringTests.Ctor.cs | 53 ++- .../tests/System/Utf8StringTests.Substring.cs | 134 +++++++ .../tests/System/Utf8StringTests.cs | 158 ++++++++- .../tests/System/Utf8TestUtilities.cs | 61 +++- .../tests/Xunit/SpanAssert.cs | 18 +- 27 files changed, 2266 insertions(+), 78 deletions(-) create mode 100644 src/CoreFx.Private.TestUtilities/ref/CoreFx.Private.TestUtilities.netcoreapp.cs create mode 100644 src/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Creation.cs create mode 100644 src/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Unix.cs create mode 100644 src/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Windows.cs create mode 100644 src/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.cs create mode 100644 src/CoreFx.Private.TestUtilities/src/System/Buffers/PoisonPagePlacement.cs create mode 100644 src/System.Runtime/tests/System/Text/Unicode/Utf8Tests.ToBytes.netcoreapp.cs create mode 100644 src/System.Runtime/tests/System/Text/Unicode/Utf8Tests.ToChars.netcoreapp.cs create mode 100644 src/System.Runtime/tests/System/Text/Unicode/Utf8Tests.netcoreapp.cs delete mode 100644 src/System.Utf8String/src/Utf8StringExtensions.cs create mode 100644 src/System.Utf8String/tests/System/Char8Tests.cs create mode 100644 src/System.Utf8String/tests/System/MemoryTests.cs create mode 100644 src/System.Utf8String/tests/System/ReflectionTests.cs create mode 100644 src/System.Utf8String/tests/System/Utf8ExtensionsTests.cs create mode 100644 src/System.Utf8String/tests/System/Utf8StringTests.Substring.cs diff --git a/src/CoreFx.Private.TestUtilities/ref/Configurations.props b/src/CoreFx.Private.TestUtilities/ref/Configurations.props index ff0d415e4593..04ae535c9867 100644 --- a/src/CoreFx.Private.TestUtilities/ref/Configurations.props +++ b/src/CoreFx.Private.TestUtilities/ref/Configurations.props @@ -1,7 +1,8 @@  + netcoreapp; netstandard; - \ No newline at end of file + diff --git a/src/CoreFx.Private.TestUtilities/ref/CoreFx.Private.TestUtilities.csproj b/src/CoreFx.Private.TestUtilities/ref/CoreFx.Private.TestUtilities.csproj index 01f758e62796..33f30d61e84f 100644 --- a/src/CoreFx.Private.TestUtilities/ref/CoreFx.Private.TestUtilities.csproj +++ b/src/CoreFx.Private.TestUtilities/ref/CoreFx.Private.TestUtilities.csproj @@ -1,13 +1,20 @@ - + {E2E59C98-998F-9965-991D-99411166AF6F} false true $(RepoRoot)\external\test-runtime\XUnit.Runtime.depproj - netstandard-Debug;netstandard-Release + netcoreapp-Debug;netcoreapp-Release;netstandard-Debug;netstandard-Release + + + + + + + \ No newline at end of file diff --git a/src/CoreFx.Private.TestUtilities/ref/CoreFx.Private.TestUtilities.netcoreapp.cs b/src/CoreFx.Private.TestUtilities/ref/CoreFx.Private.TestUtilities.netcoreapp.cs new file mode 100644 index 000000000000..ed68ba164397 --- /dev/null +++ b/src/CoreFx.Private.TestUtilities/ref/CoreFx.Private.TestUtilities.netcoreapp.cs @@ -0,0 +1,30 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// ------------------------------------------------------------------------------ +// Changes to this file must follow the http://aka.ms/api-review process. +// ------------------------------------------------------------------------------ + +namespace System.Buffers +{ + public static partial class BoundedMemory + { + public static System.Buffers.BoundedMemory Allocate(int elementCount, System.Buffers.PoisonPagePlacement placement = System.Buffers.PoisonPagePlacement.After) where T : unmanaged { throw null; } + public static System.Buffers.BoundedMemory AllocateFromExistingData(System.ReadOnlySpan data, System.Buffers.PoisonPagePlacement placement = System.Buffers.PoisonPagePlacement.After) where T : unmanaged { throw null; } + public static System.Buffers.BoundedMemory AllocateFromExistingData(T[] data, System.Buffers.PoisonPagePlacement placement = System.Buffers.PoisonPagePlacement.After) where T : unmanaged { throw null; } + } + public abstract partial class BoundedMemory : IDisposable where T : unmanaged + { + public abstract bool IsReadonly { get; } + public abstract System.Memory Memory { get; } + public abstract System.Span Span { get; } + public abstract void Dispose(); + public abstract void MakeReadonly(); + public abstract void MakeWriteable(); + } + public enum PoisonPagePlacement + { + After = 0, + Before = 1, + } +} diff --git a/src/CoreFx.Private.TestUtilities/src/CoreFx.Private.TestUtilities.csproj b/src/CoreFx.Private.TestUtilities/src/CoreFx.Private.TestUtilities.csproj index 262fb54bd404..58b301b1e8d1 100644 --- a/src/CoreFx.Private.TestUtilities/src/CoreFx.Private.TestUtilities.csproj +++ b/src/CoreFx.Private.TestUtilities/src/CoreFx.Private.TestUtilities.csproj @@ -12,6 +12,13 @@ Test Utilities are not supported on this platform netcoreapp-Unix-Debug;netcoreapp-Unix-Release;netcoreapp-Windows_NT-Debug;netcoreapp-Windows_NT-Release;netcoreapp2.0-Unix-Debug;netcoreapp2.0-Unix-Release;netcoreapp2.0-Windows_NT-Debug;netcoreapp2.0-Windows_NT-Release;netcoreappaot-Windows_NT-Debug;netcoreappaot-Windows_NT-Release;netfx-Windows_NT-Debug;netfx-Windows_NT-Release;netstandard-Debug;netstandard-Release;uap-Windows_NT-Debug;uap-Windows_NT-Release;uapaot-Windows_NT-Debug;uapaot-Windows_NT-Release + + + + + + + diff --git a/src/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Creation.cs b/src/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Creation.cs new file mode 100644 index 000000000000..b77cd3c24687 --- /dev/null +++ b/src/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Creation.cs @@ -0,0 +1,67 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.InteropServices; + +namespace System.Buffers +{ + /// + /// Contains factory methods to create instances. + /// + public static partial class BoundedMemory + { + /// + /// Allocates a new region which is immediately preceded by + /// or immediately followed by a poison (MEM_NOACCESS) page. If + /// is , then attempting to read the memory + /// immediately before the returned will result in an AV. + /// If is , then + /// attempting to read the memory immediately after the returned + /// will result in AV. + /// + /// + /// The newly-allocated memory will be populated with random data. + /// + public static BoundedMemory Allocate(int elementCount, PoisonPagePlacement placement = PoisonPagePlacement.After) where T : unmanaged + { + if (elementCount < 0) + { + throw new ArgumentOutOfRangeException(nameof(elementCount)); + } + if (placement != PoisonPagePlacement.Before && placement != PoisonPagePlacement.After) + { + throw new ArgumentOutOfRangeException(nameof(placement)); + } + + var retVal = AllocateWithoutDataPopulation(elementCount, placement); + new Random().NextBytes(MemoryMarshal.AsBytes(retVal.Span)); // doesn't need to be cryptographically strong + return retVal; + } + + /// + /// Similar to , but populates the allocated + /// native memory block from existing data rather than using random data. + /// + public static BoundedMemory AllocateFromExistingData(ReadOnlySpan data, PoisonPagePlacement placement = PoisonPagePlacement.After) where T : unmanaged + { + if (placement != PoisonPagePlacement.Before && placement != PoisonPagePlacement.After) + { + throw new ArgumentOutOfRangeException(nameof(placement)); + } + + var retVal = AllocateWithoutDataPopulation(data.Length, placement); + data.CopyTo(retVal.Span); + return retVal; + } + + /// + /// Similar to , but populates the allocated + /// native memory block from existing data rather than using random data. + /// + public static BoundedMemory AllocateFromExistingData(T[] data, PoisonPagePlacement placement = PoisonPagePlacement.After) where T : unmanaged + { + return AllocateFromExistingData(new ReadOnlySpan(data), placement); + } + } +} diff --git a/src/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Unix.cs b/src/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Unix.cs new file mode 100644 index 000000000000..aa9d87a397b2 --- /dev/null +++ b/src/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Unix.cs @@ -0,0 +1,47 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers +{ + public static partial class BoundedMemory + { + private static UnixImplementation AllocateWithoutDataPopulation(int elementCount, PoisonPagePlacement placement) where T : unmanaged + { + // On non-Windows platforms, we don't yet have support for changing the permissions of individual pages. + + return new UnixImplementation(elementCount); + } + + private sealed class UnixImplementation : BoundedMemory where T : unmanaged + { + private readonly T[] _buffer; + + public UnixImplementation(int elementCount) + { + _buffer = new T[elementCount]; + } + + public override bool IsReadonly => false; + + public override Memory Memory => _buffer; + + public override Span Span => _buffer; + + public override void Dispose() + { + // no-op + } + + public override void MakeReadonly() + { + // no-op + } + + public override void MakeWriteable() + { + // no-op + } + } + } +} diff --git a/src/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Windows.cs b/src/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Windows.cs new file mode 100644 index 000000000000..d60df689a5e5 --- /dev/null +++ b/src/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Windows.cs @@ -0,0 +1,333 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Buffers; +using System.Runtime.ConstrainedExecution; +using System.Runtime.InteropServices; +using System.Security; + +namespace System.Buffers +{ + public static unsafe partial class BoundedMemory + { + private static readonly int SystemPageSize = Environment.SystemPageSize; + + private static WindowsImplementation AllocateWithoutDataPopulation(int elementCount, PoisonPagePlacement placement) where T : unmanaged + { + long cb, totalBytesToAllocate; + checked + { + cb = elementCount * sizeof(T); + totalBytesToAllocate = cb; + + // We only need to round the count up if it's not an exact multiple + // of the system page size. + + var leftoverBytes = totalBytesToAllocate % SystemPageSize; + if (leftoverBytes != 0) + { + totalBytesToAllocate += SystemPageSize - leftoverBytes; + } + + // Finally, account for the poison pages at the front and back. + + totalBytesToAllocate += 2 * SystemPageSize; + } + + // Reserve and commit the entire range as NOACCESS. + + var handle = UnsafeNativeMethods.VirtualAlloc( + lpAddress: IntPtr.Zero, + dwSize: (IntPtr)totalBytesToAllocate /* cast throws OverflowException if out of range */, + flAllocationType: VirtualAllocAllocationType.MEM_RESERVE | VirtualAllocAllocationType.MEM_COMMIT, + flProtect: VirtualAllocProtection.PAGE_NOACCESS); + + if (handle == null || handle.IsInvalid) + { + Marshal.ThrowExceptionForHR(Marshal.GetHRForLastWin32Error()); + throw new InvalidOperationException("VirtualAlloc failed unexpectedly."); + } + + // Done allocating! Now carve out a READWRITE section bookended by the NOACCESS + // pages and return that carved-out section to the caller. Since memory protection + // flags only apply at page-level granularity, we need to "left-align" or "right- + // align" the section we carve out so that it's guaranteed adjacent to one of + // the NOACCESS bookend pages. + + return new WindowsImplementation( + handle: handle, + byteOffsetIntoHandle: (placement == PoisonPagePlacement.Before) + ? SystemPageSize /* just after leading poison page */ + : checked((int)(totalBytesToAllocate - SystemPageSize - cb)) /* just before trailing poison page */, + elementCount: elementCount) + { + Protection = VirtualAllocProtection.PAGE_READWRITE + }; + } + + private sealed class WindowsImplementation : BoundedMemory where T : unmanaged + { + private readonly VirtualAllocHandle _handle; + private readonly int _byteOffsetIntoHandle; + private readonly int _elementCount; + private readonly BoundedMemoryManager _memoryManager; + + internal WindowsImplementation(VirtualAllocHandle handle, int byteOffsetIntoHandle, int elementCount) + { + _handle = handle; + _byteOffsetIntoHandle = byteOffsetIntoHandle; + _elementCount = elementCount; + _memoryManager = new BoundedMemoryManager(this); + } + + public override bool IsReadonly => (Protection != VirtualAllocProtection.PAGE_READWRITE); + + internal VirtualAllocProtection Protection + { + get + { + bool refAdded = false; + try + { + _handle.DangerousAddRef(ref refAdded); + if (UnsafeNativeMethods.VirtualQuery( + lpAddress: _handle.DangerousGetHandle() + _byteOffsetIntoHandle, + lpBuffer: out var memoryInfo, + dwLength: (IntPtr)sizeof(MEMORY_BASIC_INFORMATION)) == IntPtr.Zero) + { + Marshal.ThrowExceptionForHR(Marshal.GetHRForLastWin32Error()); + throw new InvalidOperationException("VirtualQuery failed unexpectedly."); + } + return memoryInfo.Protect; + } + finally + { + if (refAdded) + { + _handle.DangerousRelease(); + } + } + } + set + { + if (_elementCount > 0) + { + bool refAdded = false; + try + { + _handle.DangerousAddRef(ref refAdded); + if (!UnsafeNativeMethods.VirtualProtect( + lpAddress: _handle.DangerousGetHandle() + _byteOffsetIntoHandle, + dwSize: (IntPtr)(&((T*)null)[_elementCount]), + flNewProtect: value, + lpflOldProtect: out _)) + { + Marshal.ThrowExceptionForHR(Marshal.GetHRForLastWin32Error()); + throw new InvalidOperationException("VirtualProtect failed unexpectedly."); + } + } + finally + { + if (refAdded) + { + _handle.DangerousRelease(); + } + } + } + } + } + + public override Memory Memory => _memoryManager.Memory; + + public override Span Span + { + get + { + bool refAdded = false; + try + { + _handle.DangerousAddRef(ref refAdded); + return new Span((void*)(_handle.DangerousGetHandle() + _byteOffsetIntoHandle), _elementCount); + } + finally + { + if (refAdded) + { + _handle.DangerousRelease(); + } + } + } + } + + public override void Dispose() + { + _handle.Dispose(); + } + + public override void MakeReadonly() + { + Protection = VirtualAllocProtection.PAGE_READONLY; + } + + public override void MakeWriteable() + { + Protection = VirtualAllocProtection.PAGE_READWRITE; + } + + private sealed class BoundedMemoryManager : MemoryManager + { + private readonly WindowsImplementation _impl; + + public BoundedMemoryManager(WindowsImplementation impl) + { + _impl = impl; + } + + public override Memory Memory => CreateMemory(_impl._elementCount); + + protected override void Dispose(bool disposing) + { + // no-op; the handle will be disposed separately + } + + public override Span GetSpan() + { + throw new NotImplementedException(); + } + + public override MemoryHandle Pin(int elementIndex) + { + if ((uint)elementIndex > (uint)_impl._elementCount) + { + throw new ArgumentOutOfRangeException(paramName: nameof(elementIndex)); + } + + bool refAdded = false; + try + { + _impl._handle.DangerousAddRef(ref refAdded); + return new MemoryHandle((T*)(_impl._handle.DangerousGetHandle() + _impl._byteOffsetIntoHandle) + elementIndex); + } + finally + { + if (refAdded) + { + _impl._handle.DangerousRelease(); + } + } + } + + public override void Unpin() + { + // no-op - we don't unpin native memory + } + } + } + + // from winnt.h + [Flags] + private enum VirtualAllocAllocationType : uint + { + MEM_COMMIT = 0x1000, + MEM_RESERVE = 0x2000, + MEM_DECOMMIT = 0x4000, + MEM_RELEASE = 0x8000, + MEM_FREE = 0x10000, + MEM_PRIVATE = 0x20000, + MEM_MAPPED = 0x40000, + MEM_RESET = 0x80000, + MEM_TOP_DOWN = 0x100000, + MEM_WRITE_WATCH = 0x200000, + MEM_PHYSICAL = 0x400000, + MEM_ROTATE = 0x800000, + MEM_LARGE_PAGES = 0x20000000, + MEM_4MB_PAGES = 0x80000000, + } + + // from winnt.h + [Flags] + private enum VirtualAllocProtection : uint + { + PAGE_NOACCESS = 0x01, + PAGE_READONLY = 0x02, + PAGE_READWRITE = 0x04, + PAGE_WRITECOPY = 0x08, + PAGE_EXECUTE = 0x10, + PAGE_EXECUTE_READ = 0x20, + PAGE_EXECUTE_READWRITE = 0x40, + PAGE_EXECUTE_WRITECOPY = 0x80, + PAGE_GUARD = 0x100, + PAGE_NOCACHE = 0x200, + PAGE_WRITECOMBINE = 0x400, + } + + [StructLayout(LayoutKind.Sequential)] + private struct MEMORY_BASIC_INFORMATION + { + public IntPtr BaseAddress; + public IntPtr AllocationBase; + public VirtualAllocProtection AllocationProtect; + public IntPtr RegionSize; + public VirtualAllocAllocationType State; + public VirtualAllocProtection Protect; + public VirtualAllocAllocationType Type; + }; + + private sealed class VirtualAllocHandle : SafeHandle + { + // Called by P/Invoke when returning SafeHandles + private VirtualAllocHandle() + : base(IntPtr.Zero, ownsHandle: true) + { + } + + // Do not provide a finalizer - SafeHandle's critical finalizer will + // call ReleaseHandle for you. + + public override bool IsInvalid => (handle == IntPtr.Zero); + + protected override bool ReleaseHandle() => + UnsafeNativeMethods.VirtualFree(handle, IntPtr.Zero, VirtualAllocAllocationType.MEM_RELEASE); + } + + [SuppressUnmanagedCodeSecurity] + private static class UnsafeNativeMethods + { + private const string KERNEL32_LIB = "kernel32.dll"; + + // https://msdn.microsoft.com/en-us/library/windows/desktop/aa366887(v=vs.85).aspx + [DllImport(KERNEL32_LIB, CallingConvention = CallingConvention.Winapi, SetLastError = true)] + public static extern VirtualAllocHandle VirtualAlloc( + [In] IntPtr lpAddress, + [In] IntPtr dwSize, + [In] VirtualAllocAllocationType flAllocationType, + [In] VirtualAllocProtection flProtect); + + // https://msdn.microsoft.com/en-us/library/windows/desktop/aa366892(v=vs.85).aspx + [DllImport(KERNEL32_LIB, CallingConvention = CallingConvention.Winapi, SetLastError = true)] + [ReliabilityContract(Consistency.WillNotCorruptState, Cer.Success)] + [return: MarshalAs(UnmanagedType.Bool)] + public static extern bool VirtualFree( + [In] IntPtr lpAddress, + [In] IntPtr dwSize, + [In] VirtualAllocAllocationType dwFreeType); + + // https://msdn.microsoft.com/en-us/library/windows/desktop/aa366898(v=vs.85).aspx + [DllImport(KERNEL32_LIB, CallingConvention = CallingConvention.Winapi, SetLastError = true)] + [return: MarshalAs(UnmanagedType.Bool)] + public static extern bool VirtualProtect( + [In] IntPtr lpAddress, + [In] IntPtr dwSize, + [In] VirtualAllocProtection flNewProtect, + [Out] out VirtualAllocProtection lpflOldProtect); + + // https://msdn.microsoft.com/en-us/library/windows/desktop/aa366902(v=vs.85).aspx + [DllImport(KERNEL32_LIB, CallingConvention = CallingConvention.Winapi, SetLastError = true)] + public static extern IntPtr VirtualQuery( + [In] IntPtr lpAddress, + [Out] out MEMORY_BASIC_INFORMATION lpBuffer, + [In] IntPtr dwLength); + } + } +} diff --git a/src/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.cs b/src/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.cs new file mode 100644 index 000000000000..cc39ff3cd804 --- /dev/null +++ b/src/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.cs @@ -0,0 +1,49 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers +{ + /// + /// Represents a region of native memory. The property can be used + /// to get a backed by this memory region. + /// + public abstract class BoundedMemory : IDisposable where T : unmanaged + { + /// + /// Returns a value stating whether this native memory block is readonly. + /// + public abstract bool IsReadonly { get; } + + /// + /// Gets the which represents this native memory. + /// This instance must be kept alive while working with the . + /// + public abstract Memory Memory { get; } + + /// + /// Gets the which represents this native memory. + /// This instance must be kept alive while working with the . + /// + public abstract Span Span { get; } + + /// + /// Disposes this instance. + /// + public abstract void Dispose(); + + /// + /// Sets this native memory block to be readonly. Writes to this block will cause an AV. + /// This method has no effect if the memory block is zero length or if the underlying + /// OS does not support marking the memory block as readonly. + /// + public abstract void MakeReadonly(); + + /// + /// Sets this native memory block to be read+write. + /// This method has no effect if the memory block is zero length or if the underlying + /// OS does not support marking the memory block as read+write. + /// + public abstract void MakeWriteable(); + } +} \ No newline at end of file diff --git a/src/CoreFx.Private.TestUtilities/src/System/Buffers/PoisonPagePlacement.cs b/src/CoreFx.Private.TestUtilities/src/System/Buffers/PoisonPagePlacement.cs new file mode 100644 index 000000000000..ea2caa3136b5 --- /dev/null +++ b/src/CoreFx.Private.TestUtilities/src/System/Buffers/PoisonPagePlacement.cs @@ -0,0 +1,26 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers +{ + /// + /// Dictates where the poison page should be placed. + /// + public enum PoisonPagePlacement + { + /// + /// The poison page should be placed immediately after the memory region. + /// Attempting to access the memory page immediately following the + /// span will result in an AV. + /// + After, + + /// + /// The poison page should be placed immediately before the memory region. + /// Attempting to access the memory page immediately before the + /// span will result in an AV. + /// + Before, + } +} \ No newline at end of file diff --git a/src/System.Runtime/ref/System.Runtime.cs b/src/System.Runtime/ref/System.Runtime.cs index 1b6b0e1113a0..049d1d6c37eb 100644 --- a/src/System.Runtime/ref/System.Runtime.cs +++ b/src/System.Runtime/ref/System.Runtime.cs @@ -7854,6 +7854,14 @@ void System.Collections.IEnumerator.Reset() { } void System.IDisposable.Dispose() { } } } +namespace System.Text.Unicode +{ + public static partial class Utf8 + { + public static System.Buffers.OperationStatus FromUtf16(ReadOnlySpan source, Span destination, out int numCharsRead, out int numBytesWritten, bool replaceInvalidSequences = true, bool isFinalBlock = true) { throw null; } + public static System.Buffers.OperationStatus ToUtf16(ReadOnlySpan source, Span destination, out int numBytesRead, out int numCharsWritten, bool replaceInvalidSequences = true, bool isFinalBlock = true) { throw null; } + } +} namespace System.Threading { public readonly partial struct CancellationToken diff --git a/src/System.Runtime/tests/System.Runtime.Tests.csproj b/src/System.Runtime/tests/System.Runtime.Tests.csproj index d7786d021124..c31ed0254f1f 100644 --- a/src/System.Runtime/tests/System.Runtime.Tests.csproj +++ b/src/System.Runtime/tests/System.Runtime.Tests.csproj @@ -263,6 +263,9 @@ + + + diff --git a/src/System.Runtime/tests/System/Text/Unicode/Utf8Tests.ToBytes.netcoreapp.cs b/src/System.Runtime/tests/System/Text/Unicode/Utf8Tests.ToBytes.netcoreapp.cs new file mode 100644 index 000000000000..18ceedc2f832 --- /dev/null +++ b/src/System.Runtime/tests/System/Text/Unicode/Utf8Tests.ToBytes.netcoreapp.cs @@ -0,0 +1,264 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Buffers; +using System.Linq; +using Xunit; + +namespace System.Text.Unicode.Tests +{ + public partial class Utf8Tests + { + [Theory] + [InlineData("", "")] // empty string is OK + [InlineData(X_UTF16, X_UTF8)] + [InlineData(E_ACUTE_UTF16, E_ACUTE_UTF8)] + [InlineData(EURO_SYMBOL_UTF16, EURO_SYMBOL_UTF8)] + public void ToBytes_WithSmallValidBuffers(string utf16Input, string expectedUtf8TranscodingHex) + { + // These test cases are for the "slow processing" code path at the end of TranscodeToUtf8, + // so inputs should be less than 2 chars. + + Assert.InRange(utf16Input.Length, 0, 1); + + ToBytes_Test_Core( + utf16Input: utf16Input, + destinationSize: expectedUtf8TranscodingHex.Length / 2, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.Done, + expectedNumCharsRead: utf16Input.Length, + expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex)); + } + + [Theory] + [InlineData("AB")] // 2 ASCII chars, hits fast inner loop + [InlineData("ABCD")] // 4 ASCII chars, hits fast inner loop + [InlineData("ABCDEF")] // 6 ASCII chars, hits fast inner loop + [InlineData("ABCDEFGH")] // 8 ASCII chars, hits fast inner loop + [InlineData("ABCDEFGHIJ")] // 10 ASCII chars, hits fast inner loop + [InlineData("ABCDEF" + E_ACUTE_UTF16 + "HIJ")] // interrupts inner loop due to non-ASCII char in first char of first DWORD + [InlineData("ABCDEFG" + EURO_SYMBOL_UTF16 + "IJ")] // interrupts inner loop due to non-ASCII char in second char of first DWORD + [InlineData("ABCDEFGH" + E_ACUTE_UTF16 + "J")] // interrupts inner loop due to non-ASCII char in first char of second DWORD + [InlineData("ABCDEFGHI" + EURO_SYMBOL_UTF16)] // interrupts inner loop due to non-ASCII char in second char of second DWORD + [InlineData(X_UTF16 + E_ACUTE_UTF16)] // drains first ASCII char then falls down to slow path + [InlineData(X_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16)] // drains first ASCII char then consumes 2x 2-byte sequences at once + [InlineData(E_ACUTE_UTF16 + X_UTF16)] // no first ASCII char to drain, consumes 2-byte seq followed by ASCII char + [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16)] // stay within 2x 2-byte sequence processing loop + [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + X_UTF16)] // break out of 2x 2-byte seq loop due to ASCII data in second char of DWORD + [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + X_UTF16 + X_UTF16)] // break out of 2x 2-byte seq loop due to ASCII data in first char of DWORD + [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + EURO_SYMBOL_UTF16)] // break out of 2x 2-byte seq loop due to 3-byte data + [InlineData(E_ACUTE_UTF16 + EURO_SYMBOL_UTF16)] // 2-byte logic sees next char isn't ASCII, cannot read full DWORD from remaining input buffer, falls down to slow drain loop + [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + X_UTF16)] // 2x 3-byte logic can't read a full DWORD from next part of buffer, falls down to slow drain loop + [InlineData(EURO_SYMBOL_UTF16 + X_UTF16)] // 3-byte processing loop consumes trailing ASCII char, but can't read next DWORD, falls down to slow drain loop + [InlineData(EURO_SYMBOL_UTF16 + X_UTF16 + X_UTF16)] // 3-byte processing loop consumes trailing ASCII char, but can't read next DWORD, falls down to slow drain loop + [InlineData(EURO_SYMBOL_UTF16 + E_ACUTE_UTF16)] // 3-byte processing loop can't consume next ASCII char, can't read DWORD, falls down to slow drain loop + [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16)] // stay within 2x 3-byte sequence processing loop + [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + X_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16)] // consume stray ASCII char at beginning of DWORD after 2x 3-byte sequence + [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + X_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16)] // consume stray ASCII char at end of DWORD after 2x 3-byte sequence + [InlineData(EURO_SYMBOL_UTF16 + E_ACUTE_UTF16 + X_UTF16)] // consume 2-byte sequence as second char in DWORD which begins with 3-byte encoded char + [InlineData(EURO_SYMBOL_UTF16 + GRINNING_FACE_UTF16)] // 3-byte sequence followed by 4-byte sequence + [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + GRINNING_FACE_UTF16)] // 2x 3-byte sequence followed by 4-byte sequence + [InlineData(GRINNING_FACE_UTF16)] // single 4-byte surrogate char pair + [InlineData(GRINNING_FACE_UTF16 + EURO_SYMBOL_UTF16)] // 4-byte surrogate char pair, cannot read next DWORD, falls down to slow drain loop + public void ToBytes_WithLargeValidBuffers(string utf16Input) + { + // These test cases are for the "fast processing" code which is the main loop of TranscodeToUtf8, + // so inputs should be at least 2 chars. + + Assert.True(utf16Input.Length >= 2); + + // We're going to run the tests with destination buffer lengths ranging from 0 all the way + // to buffers large enough to hold the full output. This allows us to test logic that + // detects whether we're about to overrun our destination buffer and instead returns DestinationTooSmall. + + Rune[] enumeratedScalars = utf16Input.EnumerateRunes().ToArray(); + + // 0-length buffer test + ToBytes_Test_Core( + utf16Input: utf16Input, + destinationSize: 0, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.DestinationTooSmall, + expectedNumCharsRead: 0, + expectedUtf8Transcoding: ReadOnlySpan.Empty); + + int expectedNumCharsConsumed = 0; + byte[] concatenatedUtf8 = Array.Empty(); + + for (int i = 0; i < enumeratedScalars.Length; i++) + { + Rune thisScalar = enumeratedScalars[i]; + + // provide partial destination buffers all the way up to (but not including) enough to hold the next full scalar encoding + for (int j = 1; j < thisScalar.Utf8SequenceLength; j++) + { + ToBytes_Test_Core( + utf16Input: utf16Input, + destinationSize: concatenatedUtf8.Length + j, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.DestinationTooSmall, + expectedNumCharsRead: expectedNumCharsConsumed, + expectedUtf8Transcoding: concatenatedUtf8); + } + + // now provide a destination buffer large enough to hold the next full scalar encoding + + expectedNumCharsConsumed += thisScalar.Utf16SequenceLength; + concatenatedUtf8 = concatenatedUtf8.Concat(ToUtf8(thisScalar)).ToArray(); + + ToBytes_Test_Core( + utf16Input: utf16Input, + destinationSize: concatenatedUtf8.Length, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: (i == enumeratedScalars.Length - 1) ? OperationStatus.Done : OperationStatus.DestinationTooSmall, + expectedNumCharsRead: expectedNumCharsConsumed, + expectedUtf8Transcoding: concatenatedUtf8); + } + } + + [Theory] + [InlineData('\uD800', OperationStatus.NeedMoreData)] // standalone high surrogate + [InlineData('\uDFFF', OperationStatus.InvalidData)] // standalone low surrogate + public void ToBytes_WithOnlyStandaloneSurrogates(char charValue, OperationStatus expectedOperationStatus) + { + ToBytes_Test_Core( + utf16Input: new[] { charValue }, + destinationSize: 0, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: expectedOperationStatus, + expectedNumCharsRead: 0, + expectedUtf8Transcoding: Span.Empty); + } + + [Theory] + [InlineData("", 0, "")] // swapped surrogate pair characters + [InlineData("A", 1, "41")] // consume standalone ASCII char, then swapped surrogate pair characters + [InlineData("AB", 1, "41")] // consume standalone ASCII char, then standalone high surrogate char + [InlineData("AB", 1, "41")] // consume standalone ASCII char, then standalone low surrogate char + [InlineData("AB", 2, "4142")] // consume two ASCII chars, then standalone high surrogate char + [InlineData("AB", 2, "4142")] // consume two ASCII chars, then standalone low surrogate char + public void ToBytes_WithInvalidSurrogates(string utf16Input, int expectedNumCharsConsumed, string expectedUtf8TranscodingHex) + { + // xUnit can't handle ill-formed strings in [InlineData], so we replace here. + + utf16Input = utf16Input.Replace("", "\uD800").Replace("", "\uDFFF"); + + // These test cases are for the "fast processing" code which is the main loop of TranscodeToUtf8, + // so inputs should be at least 2 chars. + + Assert.True(utf16Input.Length >= 2); + + ToBytes_Test_Core( + utf16Input: utf16Input, + destinationSize: expectedUtf8TranscodingHex.Length / 2, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.InvalidData, + expectedNumCharsRead: expectedNumCharsConsumed, + expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex)); + } + + [Theory] + [InlineData("", REPLACEMENT_CHAR_UTF8)] // standalone low surr. and incomplete high surr. + [InlineData("", REPLACEMENT_CHAR_UTF8)] // standalone high surr. and incomplete high surr. + [InlineData("", REPLACEMENT_CHAR_UTF8 + REPLACEMENT_CHAR_UTF8)] // standalone low surr. and incomplete low surr. + [InlineData("ABCD", "41" + REPLACEMENT_CHAR_UTF8 + "42" + REPLACEMENT_CHAR_UTF8 + "43" + REPLACEMENT_CHAR_UTF8 + "44")] // standalone low, low, high surrounded by other data + public void ToBytes_WithReplacements(string utf16Input, string expectedUtf8TranscodingHex) + { + // xUnit can't handle ill-formed strings in [InlineData], so we replace here. + + utf16Input = utf16Input.Replace("", "\uD800").Replace("", "\uDFFF"); + + bool isFinalCharHighSurrogate = char.IsHighSurrogate(utf16Input.Last()); + + ToBytes_Test_Core( + utf16Input: utf16Input, + destinationSize: expectedUtf8TranscodingHex.Length / 2, + replaceInvalidSequences: true, + isFinalChunk: false, + expectedOperationStatus: (isFinalCharHighSurrogate) ? OperationStatus.NeedMoreData : OperationStatus.Done, + expectedNumCharsRead: (isFinalCharHighSurrogate) ? (utf16Input.Length - 1) : utf16Input.Length, + expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex)); + + if (isFinalCharHighSurrogate) + { + // Also test with isFinalChunk = true + ToBytes_Test_Core( + utf16Input: utf16Input, + destinationSize: expectedUtf8TranscodingHex.Length / 2 + Rune.ReplacementChar.Utf8SequenceLength /* for replacement char */, + replaceInvalidSequences: true, + isFinalChunk: true, + expectedOperationStatus: OperationStatus.Done, + expectedNumCharsRead: utf16Input.Length, + expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex + REPLACEMENT_CHAR_UTF8)); + } + } + + [Theory] + [InlineData(E_ACUTE_UTF16 + "", true, 1, OperationStatus.DestinationTooSmall, E_ACUTE_UTF8)] // not enough output buffer to hold U+FFFD + [InlineData(E_ACUTE_UTF16 + "", true, 2, OperationStatus.Done, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8)] // replace standalone low surr. at end + [InlineData(E_ACUTE_UTF16 + "", true, 1, OperationStatus.DestinationTooSmall, E_ACUTE_UTF8)] // not enough output buffer to hold U+FFFD + [InlineData(E_ACUTE_UTF16 + "", true, 2, OperationStatus.Done, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8)] // replace standalone high surr. at end + [InlineData(E_ACUTE_UTF16 + "", false, 1, OperationStatus.NeedMoreData, E_ACUTE_UTF8)] // don't replace standalone high surr. at end + [InlineData(E_ACUTE_UTF16 + "" + X_UTF16, true, 2, OperationStatus.DestinationTooSmall, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8)] // not enough output buffer to hold 'X' + [InlineData(E_ACUTE_UTF16 + "" + X_UTF16, false, 2, OperationStatus.DestinationTooSmall, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8)] // not enough output buffer to hold 'X' + [InlineData(E_ACUTE_UTF16 + "" + X_UTF16, true, 3, OperationStatus.Done, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8 + X_UTF8)] // replacement followed by 'X' + [InlineData(E_ACUTE_UTF16 + "" + X_UTF16, false, 3, OperationStatus.Done, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8 + X_UTF8)] // replacement followed by 'X' + public void ToBytes_WithReplacements_AndCustomBufferSizes(string utf16Input, bool isFinalChunk, int expectedNumCharsConsumed, OperationStatus expectedOperationStatus, string expectedUtf8TranscodingHex) + { + // xUnit can't handle ill-formed strings in [InlineData], so we replace here. + + utf16Input = utf16Input.Replace("", "\uD800").Replace("", "\uDFFF"); + + ToBytes_Test_Core( + utf16Input: utf16Input, + destinationSize: expectedUtf8TranscodingHex.Length / 2, + replaceInvalidSequences: true, + isFinalChunk: isFinalChunk, + expectedOperationStatus: expectedOperationStatus, + expectedNumCharsRead: expectedNumCharsConsumed, + expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex)); + } + + [Fact] + public void ToBytes_AllPossibleScalarValues() + { + ToBytes_Test_Core( + utf16Input: s_allScalarsAsUtf16.Span, + destinationSize: s_allScalarsAsUtf8.Length, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.Done, + expectedNumCharsRead: s_allScalarsAsUtf16.Length, + expectedUtf8Transcoding: s_allScalarsAsUtf8.Span); + } + + private static void ToBytes_Test_Core(ReadOnlySpan utf16Input, int destinationSize, bool replaceInvalidSequences, bool isFinalChunk, OperationStatus expectedOperationStatus, int expectedNumCharsRead, ReadOnlySpan expectedUtf8Transcoding) + { + // Arrange + + using (BoundedMemory boundedSource = BoundedMemory.AllocateFromExistingData(utf16Input)) + using (BoundedMemory boundedDestination = BoundedMemory.Allocate(destinationSize)) + { + boundedSource.MakeReadonly(); + + // Act + + OperationStatus actualOperationStatus = Utf8.FromUtf16(boundedSource.Span, boundedDestination.Span, out int actualNumCharsRead, out int actualNumBytesWritten, replaceInvalidSequences, isFinalChunk); + + // Assert + + Assert.Equal(expectedOperationStatus, actualOperationStatus); + Assert.Equal(expectedNumCharsRead, actualNumCharsRead); + Assert.Equal(expectedUtf8Transcoding.Length, actualNumBytesWritten); + Assert.Equal(expectedUtf8Transcoding.ToArray(), boundedDestination.Span.Slice(0, actualNumBytesWritten).ToArray()); + } + } + } +} diff --git a/src/System.Runtime/tests/System/Text/Unicode/Utf8Tests.ToChars.netcoreapp.cs b/src/System.Runtime/tests/System/Text/Unicode/Utf8Tests.ToChars.netcoreapp.cs new file mode 100644 index 000000000000..6dda95dffc10 --- /dev/null +++ b/src/System.Runtime/tests/System/Text/Unicode/Utf8Tests.ToChars.netcoreapp.cs @@ -0,0 +1,304 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Buffers; +using System.Linq; +using Xunit; + +namespace System.Text.Unicode.Tests +{ + public partial class Utf8Tests + { + [Theory] + [InlineData("80", 0, "")] // sequence cannot begin with continuation character + [InlineData("8182", 0, "")] // sequence cannot begin with continuation character + [InlineData("838485", 0, "")] // sequence cannot begin with continuation character + [InlineData(X_UTF8 + "80", 1, X_UTF16)] // sequence cannot begin with continuation character + [InlineData(X_UTF8 + "8182", 1, X_UTF16)] // sequence cannot begin with continuation character + [InlineData("C0", 0, "")] // [ C0 ] is always invalid + [InlineData("C080", 0, "")] // [ C0 ] is always invalid + [InlineData("C08081", 0, "")] // [ C0 ] is always invalid + [InlineData(X_UTF8 + "C1", 1, X_UTF16)] // [ C1 ] is always invalid + [InlineData(X_UTF8 + "C180", 1, X_UTF16)] // [ C1 ] is always invalid + [InlineData(X_UTF8 + "C27F", 1, X_UTF16)] // [ C2 ] is improperly terminated + [InlineData("E2827F", 0, "")] // [ E2 82 ] is improperly terminated + [InlineData("E09F80", 0, "")] // [ E0 9F ... ] is overlong + [InlineData("E0C080", 0, "")] // [ E0 ] is improperly terminated + [InlineData("ED7F80", 0, "")] // [ ED ] is improperly terminated + [InlineData("EDA080", 0, "")] // [ ED A0 ... ] is surrogate + public void ToChars_WithSmallInvalidBuffers(string utf8HexInput, int expectedNumBytesConsumed, string expectedUtf16Transcoding) + { + // These test cases are for the "slow processing" code path at the end of TranscodeToUtf16, + // so inputs should be less than 4 bytes. + + Assert.InRange(utf8HexInput.Length, 0, 6); + + ToChars_Test_Core( + utf8Input: DecodeHex(utf8HexInput), + destinationSize: expectedUtf16Transcoding.Length, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.InvalidData, + expectedNumBytesRead: expectedNumBytesConsumed, + expectedUtf16Transcoding: expectedUtf16Transcoding); + } + + [Theory] + [InlineData("C2", 0, "")] // [ C2 ] is an incomplete sequence + [InlineData("E282", 0, "")] // [ E2 82 ] is an incomplete sequence + [InlineData(X_UTF8 + "C2", 1, X_UTF16)] // [ C2 ] is an incomplete sequence + [InlineData(X_UTF8 + "E0", 1, X_UTF16)] // [ E0 ] is an incomplete sequence + [InlineData(X_UTF8 + "E0BF", 1, X_UTF16)] // [ E0 BF ] is an incomplete sequence + [InlineData(X_UTF8 + "F0", 1, X_UTF16)] // [ F0 ] is an incomplete sequence + [InlineData(X_UTF8 + "F0BF", 1, X_UTF16)] // [ F0 BF ] is an incomplete sequence + [InlineData(X_UTF8 + "F0BFA0", 1, X_UTF16)] // [ F0 BF A0 ] is an incomplete sequence + [InlineData(E_ACUTE_UTF8 + "C2", 2, E_ACUTE_UTF16)] // [ C2 ] is an incomplete sequence + [InlineData(E_ACUTE_UTF8 + "E0", 2, E_ACUTE_UTF16)] // [ E0 ] is an incomplete sequence + [InlineData(E_ACUTE_UTF8 + "F0", 2, E_ACUTE_UTF16)] // [ F0 ] is an incomplete sequence + [InlineData(E_ACUTE_UTF8 + "E0BF", 2, E_ACUTE_UTF16)] // [ E0 BF ] is an incomplete sequence + [InlineData(E_ACUTE_UTF8 + "F0BF", 2, E_ACUTE_UTF16)] // [ F0 BF ] is an incomplete sequence + [InlineData(EURO_SYMBOL_UTF8 + "C2", 3, EURO_SYMBOL_UTF16)] // [ C2 ] is an incomplete sequence + [InlineData(EURO_SYMBOL_UTF8 + "E0", 3, EURO_SYMBOL_UTF16)] // [ E0 ] is an incomplete sequence + [InlineData(EURO_SYMBOL_UTF8 + "F0", 3, EURO_SYMBOL_UTF16)] // [ F0 ] is an incomplete sequence + public void ToChars_WithVariousIncompleteBuffers(string utf8HexInput, int expectedNumBytesConsumed, string expectedUtf16Transcoding) + { + // These test cases are for the "slow processing" code path at the end of TranscodeToUtf16, + // so inputs should be less than 4 bytes. + + ToChars_Test_Core( + utf8Input: DecodeHex(utf8HexInput), + destinationSize: expectedUtf16Transcoding.Length, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.NeedMoreData, + expectedNumBytesRead: expectedNumBytesConsumed, + expectedUtf16Transcoding: expectedUtf16Transcoding); + } + + [Theory] + /* SMALL VALID BUFFERS - tests drain loop at end of method */ + [InlineData("")] // empty string is OK + [InlineData("X")] + [InlineData("XY")] + [InlineData("XYZ")] + [InlineData(E_ACUTE_UTF16)] + [InlineData(X_UTF16 + E_ACUTE_UTF16)] + [InlineData(E_ACUTE_UTF16 + X_UTF16)] + [InlineData(EURO_SYMBOL_UTF16)] + /* LARGE VALID BUFFERS - test main loop at beginning of method */ + [InlineData(E_ACUTE_UTF16 + "ABCD" + "0123456789:;<=>?")] // Loop unrolling at end of buffer + [InlineData(E_ACUTE_UTF16 + "ABCD" + "0123456789:;<=>?" + "01234567" + E_ACUTE_UTF16 + "89:;<=>?")] // Loop unrolling interrupted by non-ASCII + [InlineData("ABC" + E_ACUTE_UTF16 + "0123")] // 3 ASCII bytes followed by non-ASCII + [InlineData("AB" + E_ACUTE_UTF16 + "0123")] // 2 ASCII bytes followed by non-ASCII + [InlineData("A" + E_ACUTE_UTF16 + "0123")] // 1 ASCII byte followed by non-ASCII + [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16)] // 4x 2-byte sequences, exercises optimization code path in 2-byte sequence processing + [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + "PQ")] // 3x 2-byte sequences + 2 ASCII bytes, exercises optimization code path in 2-byte sequence processing + [InlineData(E_ACUTE_UTF16 + "PQ")] // single 2-byte sequence + 2 trailing ASCII bytes, exercises draining logic in 2-byte sequence processing + [InlineData(E_ACUTE_UTF16 + "P" + E_ACUTE_UTF16 + "0@P")] // single 2-byte sequences + 1 trailing ASCII byte + 2-byte sequence, exercises draining logic in 2-byte sequence processing + [InlineData(EURO_SYMBOL_UTF16 + "@")] // single 3-byte sequence + 1 trailing ASCII byte, exercises draining logic in 3-byte sequence processing + [InlineData(EURO_SYMBOL_UTF16 + "@P`")] // single 3-byte sequence + 3 trailing ASCII byte, exercises draining logic and "running out of data" logic in 3-byte sequence processing + [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16)] // 3x 3-byte sequences, exercises "stay within 3-byte loop" logic in 3-byte sequence processing + [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16)] // 4x 3-byte sequences, exercises "consume multiple bytes at a time" logic in 3-byte sequence processing + [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + E_ACUTE_UTF16)] // 3x 3-byte sequences + single 2-byte sequence, exercises "consume multiple bytes at a time" logic in 3-byte sequence processing + [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16)] // 2x 3-byte sequences + 4x 2-byte sequences, exercises "consume multiple bytes at a time" logic in 3-byte sequence processing + [InlineData(GRINNING_FACE_UTF16 + GRINNING_FACE_UTF16)] // 2x 4-byte sequences, exercises 4-byte sequence processing + [InlineData(GRINNING_FACE_UTF16 + "@AB")] // single 4-byte sequence + 3 ASCII bytes, exercises 4-byte sequence processing and draining logic + [InlineData("\U0001F938\U0001F3FD\u200D\u2640\uFE0F")] // U+1F938 U+1F3FD U+200D U+2640 U+FE0F WOMAN CARTWHEELING: MEDIUM SKIN TONE, exercising switching between multiple sequence lengths + public void ToChars_ValidBuffers(string utf16Input) + { + // We're going to run the tests with destination buffer lengths ranging from 0 all the way + // to buffers large enough to hold the full output. This allows us to test logic that + // detects whether we're about to overrun our destination buffer and instead returns DestinationTooSmall. + + Rune[] enumeratedScalars = utf16Input.EnumerateRunes().ToArray(); + + // Convert entire input to UTF-8 using our unit test reference logic. + + byte[] utf8Input = enumeratedScalars.SelectMany(ToUtf8).ToArray(); + + // 0-length buffer test + ToChars_Test_Core( + utf8Input: utf8Input, + destinationSize: 0, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: (utf8Input.Length == 0) ? OperationStatus.Done : OperationStatus.DestinationTooSmall, + expectedNumBytesRead: 0, + expectedUtf16Transcoding: ReadOnlySpan.Empty); + + int expectedNumBytesConsumed = 0; + char[] concatenatedUtf16 = Array.Empty(); + + for (int i = 0; i < enumeratedScalars.Length; i++) + { + Rune thisScalar = enumeratedScalars[i]; + + // if this is an astral scalar value, quickly test a buffer that's not large enough to contain the entire UTF-16 encoding + + if (!thisScalar.IsBmp) + { + ToChars_Test_Core( + utf8Input: utf8Input, + destinationSize: concatenatedUtf16.Length + 1, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.DestinationTooSmall, + expectedNumBytesRead: expectedNumBytesConsumed, + expectedUtf16Transcoding: concatenatedUtf16); + } + + // now provide a destination buffer large enough to hold the next full scalar encoding + + expectedNumBytesConsumed += thisScalar.Utf8SequenceLength; + concatenatedUtf16 = concatenatedUtf16.Concat(ToUtf16(thisScalar)).ToArray(); + + ToChars_Test_Core( + utf8Input: utf8Input, + destinationSize: concatenatedUtf16.Length, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: (i == enumeratedScalars.Length - 1) ? OperationStatus.Done : OperationStatus.DestinationTooSmall, + expectedNumBytesRead: expectedNumBytesConsumed, + expectedUtf16Transcoding: concatenatedUtf16); + } + } + + [Theory] + [InlineData("3031" + "80" + "202122232425", 2, "01")] // Continuation character at start of sequence should match no bitmask + [InlineData("3031" + "C080" + "2021222324", 2, "01")] // Overlong 2-byte sequence at start of DWORD + [InlineData("3031" + "C180" + "2021222324", 2, "01")] // Overlong 2-byte sequence at start of DWORD + [InlineData("C280" + "C180", 2, "\u0080")] // Overlong 2-byte sequence at end of DWORD + [InlineData("C27F" + "C280", 0, "")] // Improperly terminated 2-byte sequence at start of DWORD + [InlineData("C2C0" + "C280", 0, "")] // Improperly terminated 2-byte sequence at start of DWORD + [InlineData("C280" + "C27F", 2, "\u0080")] // Improperly terminated 2-byte sequence at end of DWORD + [InlineData("C280" + "C2C0", 2, "\u0080")] // Improperly terminated 2-byte sequence at end of DWORD + [InlineData("C280" + "C280" + "80203040", 4, "\u0080\u0080")] // Continuation character at start of sequence, within "stay in 2-byte processing" optimization + [InlineData("C280" + "C280" + "C180" + "C280", 4, "\u0080\u0080")] // Overlong 2-byte sequence at start of DWORD, within "stay in 2-byte processing" optimization + [InlineData("C280" + "C280" + "C280" + "C180", 6, "\u0080\u0080\u0080")] // Overlong 2-byte sequence at end of DWORD, within "stay in 2-byte processing" optimization + [InlineData("3031" + "E09F80" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Overlong 3-byte sequence at start of DWORD + [InlineData("3031" + "E07F80" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Improperly terminated 3-byte sequence at start of DWORD + [InlineData("3031" + "E0C080" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Improperly terminated 3-byte sequence at start of DWORD + [InlineData("3031" + "E17F80" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Improperly terminated 3-byte sequence at start of DWORD + [InlineData("3031" + "E1C080" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Improperly terminated 3-byte sequence at start of DWORD + [InlineData("3031" + "EDA080" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Surrogate 3-byte sequence at start of DWORD + [InlineData("3031" + "F5808080", 2, "01")] // [ F5 ] is always invalid + [InlineData("3031" + "F6808080", 2, "01")] // [ F6 ] is always invalid + [InlineData("3031" + "F7808080", 2, "01")] // [ F7 ] is always invalid + [InlineData("3031" + "F8808080", 2, "01")] // [ F8 ] is always invalid + [InlineData("3031" + "F9808080", 2, "01")] // [ F9 ] is always invalid + [InlineData("3031" + "FA808080", 2, "01")] // [ FA ] is always invalid + [InlineData("3031" + "FB808080", 2, "01")] // [ FB ] is always invalid + [InlineData("3031" + "FC808080", 2, "01")] // [ FC ] is always invalid + [InlineData("3031" + "FD808080", 2, "01")] // [ FD ] is always invalid + [InlineData("3031" + "FE808080", 2, "01")] // [ FE ] is always invalid + [InlineData("3031" + "FF808080", 2, "01")] // [ FF ] is always invalid + public void ToChars_WithLargeInvalidBuffers(string utf8HexInput, int expectedNumBytesConsumed, string expectedUtf16Transcoding) + { + // These test cases are for the "fast processing" code which is the main loop of TranscodeToUtf16, + // so inputs should be less >= 4 bytes. + + Assert.True(utf8HexInput.Length >= 8); + + ToChars_Test_Core( + utf8Input: DecodeHex(utf8HexInput), + destinationSize: expectedUtf16Transcoding.Length, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.InvalidData, + expectedNumBytesRead: expectedNumBytesConsumed, + expectedUtf16Transcoding: expectedUtf16Transcoding); + } + + [Theory] + [InlineData(X_UTF8 + "80" + X_UTF8, X_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // stray continuation byte [ 80 ] + [InlineData(X_UTF8 + "FF" + X_UTF8, X_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // invalid UTF-8 byte [ FF ] + [InlineData(X_UTF8 + "C2" + X_UTF8, X_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // 2-byte sequence starter [ C2 ] not followed by continuation byte + [InlineData(X_UTF8 + "C1C180" + X_UTF8, X_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // [ C1 80 ] is overlong but consists of two maximal invalid subsequences, each of length 1 byte + [InlineData(X_UTF8 + E_ACUTE_UTF8 + "E08080", X_UTF16 + E_ACUTE_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16)] // [ E0 80 ] is overlong 2-byte sequence (1 byte maximal invalid subsequence), and following [ 80 ] is stray continuation byte + [InlineData(GRINNING_FACE_UTF8 + "F08F8080" + GRINNING_FACE_UTF8, GRINNING_FACE_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + GRINNING_FACE_UTF16)] // [ F0 8F ] is overlong 4-byte sequence (1 byte maximal invalid subsequence), and following [ 80 ] instances are stray continuation bytes + [InlineData(GRINNING_FACE_UTF8 + "F4908080" + GRINNING_FACE_UTF8, GRINNING_FACE_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + GRINNING_FACE_UTF16)] // [ F4 90 ] is out-of-range 4-byte sequence (1 byte maximal invalid subsequence), and following [ 80 ] instances are stray continuation bytes + [InlineData(E_ACUTE_UTF8 + "EDA0" + X_UTF8, E_ACUTE_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // [ ED A0 ] is encoding of UTF-16 surrogate code point, so consists of two maximal invalid subsequences, each of length 1 byte + [InlineData(E_ACUTE_UTF8 + "ED80" + X_UTF8, E_ACUTE_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // [ ED 80 ] is incomplete 3-byte sequence, so is 2-byte maximal invalid subsequence + [InlineData(E_ACUTE_UTF8 + "F380" + X_UTF8, E_ACUTE_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // [ F3 80 ] is incomplete 4-byte sequence, so is 2-byte maximal invalid subsequence + [InlineData(E_ACUTE_UTF8 + "F38080" + X_UTF8, E_ACUTE_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // [ F3 80 80 ] is incomplete 4-byte sequence, so is 3-byte maximal invalid subsequence + public void ToChars_WithReplacement(string utf8HexInput, string expectedUtf16Transcoding) + { + // First run the test with isFinalBlock = false, + // both with and without some bytes of incomplete trailing data. + + ToChars_Test_Core( + utf8Input: DecodeHex(utf8HexInput), + destinationSize: expectedUtf16Transcoding.Length, + replaceInvalidSequences: true, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.Done, + expectedNumBytesRead: utf8HexInput.Length / 2, + expectedUtf16Transcoding: expectedUtf16Transcoding); + + ToChars_Test_Core( + utf8Input: DecodeHex(utf8HexInput + "E0BF" /* trailing data */), + destinationSize: expectedUtf16Transcoding.Length, + replaceInvalidSequences: true, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.NeedMoreData, + expectedNumBytesRead: utf8HexInput.Length / 2, + expectedUtf16Transcoding: expectedUtf16Transcoding); + + // Then run the test with isFinalBlock = true, with incomplete trailing data. + + ToChars_Test_Core( + utf8Input: DecodeHex(utf8HexInput + "E0BF" /* trailing data */), + destinationSize: expectedUtf16Transcoding.Length, + replaceInvalidSequences: true, + isFinalChunk: true, + expectedOperationStatus: OperationStatus.DestinationTooSmall, + expectedNumBytesRead: utf8HexInput.Length / 2, + expectedUtf16Transcoding: expectedUtf16Transcoding); + + ToChars_Test_Core( + utf8Input: DecodeHex(utf8HexInput + "E0BF" /* trailing data */), + destinationSize: expectedUtf16Transcoding.Length + 1, // allow room for U+FFFD + replaceInvalidSequences: true, + isFinalChunk: true, + expectedOperationStatus: OperationStatus.Done, + expectedNumBytesRead: utf8HexInput.Length / 2 + 2, + expectedUtf16Transcoding: expectedUtf16Transcoding + REPLACEMENT_CHAR_UTF16); + } + + [Fact] + public void ToChars_AllPossibleScalarValues() + { + ToChars_Test_Core( + utf8Input: s_allScalarsAsUtf8.Span, + destinationSize: s_allScalarsAsUtf16.Length, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.Done, + expectedNumBytesRead: s_allScalarsAsUtf8.Length, + expectedUtf16Transcoding: s_allScalarsAsUtf16.Span); + } + + private static void ToChars_Test_Core(ReadOnlySpan utf8Input, int destinationSize, bool replaceInvalidSequences, bool isFinalChunk, OperationStatus expectedOperationStatus, int expectedNumBytesRead, ReadOnlySpan expectedUtf16Transcoding) + { + // Arrange + + using (BoundedMemory boundedSource = BoundedMemory.AllocateFromExistingData(utf8Input)) + using (BoundedMemory boundedDestination = BoundedMemory.Allocate(destinationSize)) + { + boundedSource.MakeReadonly(); + + // Act + + OperationStatus actualOperationStatus = Utf8.ToUtf16(boundedSource.Span, boundedDestination.Span, out int actualNumBytesRead, out int actualNumCharsWritten, replaceInvalidSequences, isFinalChunk); + + // Assert + + Assert.Equal(expectedOperationStatus, actualOperationStatus); + Assert.Equal(expectedNumBytesRead, actualNumBytesRead); + Assert.Equal(expectedUtf16Transcoding.Length, actualNumCharsWritten); + Assert.Equal(expectedUtf16Transcoding.ToString(), boundedDestination.Span.Slice(0, actualNumCharsWritten).ToString()); + } + } + } +} diff --git a/src/System.Runtime/tests/System/Text/Unicode/Utf8Tests.netcoreapp.cs b/src/System.Runtime/tests/System/Text/Unicode/Utf8Tests.netcoreapp.cs new file mode 100644 index 000000000000..087235a81b74 --- /dev/null +++ b/src/System.Runtime/tests/System/Text/Unicode/Utf8Tests.netcoreapp.cs @@ -0,0 +1,141 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Buffers; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using System.Text.RegularExpressions; +using Xunit; + +namespace System.Text.Unicode.Tests +{ + public partial class Utf8Tests + { + private const string X_UTF8 = "58"; // U+0058 LATIN CAPITAL LETTER X, 1 byte + private const string X_UTF16 = "X"; + + private const string Y_UTF8 = "59"; // U+0058 LATIN CAPITAL LETTER Y, 1 byte + private const string Y_UTF16 = "Y"; + + private const string Z_UTF8 = "5A"; // U+0058 LATIN CAPITAL LETTER Z, 1 byte + private const string Z_UTF16 = "Z"; + + private const string E_ACUTE_UTF8 = "C3A9"; // U+00E9 LATIN SMALL LETTER E WITH ACUTE, 2 bytes + private const string E_ACUTE_UTF16 = "\u00E9"; + + private const string EURO_SYMBOL_UTF8 = "E282AC"; // U+20AC EURO SIGN, 3 bytes + private const string EURO_SYMBOL_UTF16 = "\u20AC"; + + private const string REPLACEMENT_CHAR_UTF8 = "EFBFBD"; // U+FFFD REPLACEMENT CHAR, 3 bytes + private const string REPLACEMENT_CHAR_UTF16 = "\uFFFD"; + + private const string GRINNING_FACE_UTF8 = "F09F9880"; // U+1F600 GRINNING FACE, 4 bytes + private const string GRINNING_FACE_UTF16 = "\U0001F600"; + + // All valid scalars [ U+0000 .. U+D7FF ] and [ U+E000 .. U+10FFFF ]. + private static readonly IEnumerable s_allValidScalars = Enumerable.Range(0x0000, 0xD800).Concat(Enumerable.Range(0xE000, 0x110000 - 0xE000)).Select(value => new Rune(value)); + + private static readonly ReadOnlyMemory s_allScalarsAsUtf16; + private static readonly ReadOnlyMemory s_allScalarsAsUtf8; + + static Utf8Tests() + { + List allScalarsAsUtf16 = new List(); + List allScalarsAsUtf8 = new List(); + + foreach (Rune rune in s_allValidScalars) + { + allScalarsAsUtf16.AddRange(ToUtf16(rune)); + allScalarsAsUtf8.AddRange(ToUtf8(rune)); + } + + s_allScalarsAsUtf16 = allScalarsAsUtf16.ToArray().AsMemory(); + s_allScalarsAsUtf8 = allScalarsAsUtf8.ToArray().AsMemory(); + } + + /* + * COMMON UTILITIES FOR UNIT TESTS + */ + + private static byte[] DecodeHex(ReadOnlySpan inputHex) + { + Assert.True(Regex.IsMatch(inputHex.ToString(), "^([0-9a-fA-F]{2})*$"), "Input must be an even number of hex characters."); + + byte[] retVal = new byte[inputHex.Length / 2]; + for (int i = 0; i < retVal.Length; i++) + { + retVal[i] = byte.Parse(inputHex.Slice(i * 2, 2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture); + } + return retVal; + } + + // !! IMPORTANT !! + // Don't delete this implementation, as we use it as a reference to make sure the framework's + // transcoding logic is correct. + private static byte[] ToUtf8(Rune rune) + { + Assert.True(Rune.IsValid(rune.Value), $"Rune with value U+{(uint)rune.Value:X4} is not well-formed."); + + if (rune.Value < 0x80) + { + return new[] + { + (byte)rune.Value + }; + } + else if (rune.Value < 0x0800) + { + return new[] + { + (byte)((rune.Value >> 6) | 0xC0), + (byte)((rune.Value & 0x3F) | 0x80) + }; + } + else if (rune.Value < 0x10000) + { + return new[] + { + (byte)((rune.Value >> 12) | 0xE0), + (byte)(((rune.Value >> 6) & 0x3F) | 0x80), + (byte)((rune.Value & 0x3F) | 0x80) + }; + } + else + { + return new[] + { + (byte)((rune.Value >> 18) | 0xF0), + (byte)(((rune.Value >> 12) & 0x3F) | 0x80), + (byte)(((rune.Value >> 6) & 0x3F) | 0x80), + (byte)((rune.Value & 0x3F) | 0x80) + }; + } + } + + // !! IMPORTANT !! + // Don't delete this implementation, as we use it as a reference to make sure the framework's + // transcoding logic is correct. + private static char[] ToUtf16(Rune rune) + { + Assert.True(Rune.IsValid(rune.Value), $"Rune with value U+{(uint)rune.Value:X4} is not well-formed."); + + if (rune.IsBmp) + { + return new[] + { + (char)rune.Value + }; + } + else + { + return new[] + { + (char)((rune.Value >> 10) + 0xD800 - 0x40), + (char)((rune.Value & 0x03FF) + 0xDC00) + }; + } + } + } +} diff --git a/src/System.Utf8String/ref/System.Utf8String.cs b/src/System.Utf8String/ref/System.Utf8String.cs index 3673502e2ca6..7ec621dcacb0 100644 --- a/src/System.Utf8String/ref/System.Utf8String.cs +++ b/src/System.Utf8String/ref/System.Utf8String.cs @@ -7,6 +7,67 @@ namespace System { + public readonly partial struct Char8 : IComparable, IEquatable + { + private readonly int _dummy; + public static bool operator ==(Char8 a, Char8 b) => throw null; + public static bool operator !=(Char8 a, Char8 b) => throw null; + public static bool operator <(Char8 a, Char8 b) => throw null; + public static bool operator <=(Char8 a, Char8 b) => throw null; + public static bool operator >(Char8 a, Char8 b) => throw null; + public static bool operator >=(Char8 a, Char8 b) => throw null; + public static implicit operator byte(Char8 value) => throw null; + [CLSCompliant(false)] + public static explicit operator sbyte(Char8 value) => throw null; + public static explicit operator char(Char8 value) => throw null; + public static implicit operator short(Char8 value) => throw null; + [CLSCompliant(false)] + public static implicit operator ushort(Char8 value) => throw null; + public static implicit operator int(Char8 value) => throw null; + [CLSCompliant(false)] + public static implicit operator uint(Char8 value) => throw null; + public static implicit operator long(Char8 value) => throw null; + [CLSCompliant(false)] + public static implicit operator ulong(Char8 value) => throw null; + public static implicit operator Char8(byte value) => throw null; + [CLSCompliant(false)] + public static explicit operator Char8(sbyte value) => throw null; + public static explicit operator Char8(char value) => throw null; + public static explicit operator Char8(short value) => throw null; + [CLSCompliant(false)] + public static explicit operator Char8(ushort value) => throw null; + public static explicit operator Char8(int value) => throw null; + [CLSCompliant(false)] + public static explicit operator Char8(uint value) => throw null; + public static explicit operator Char8(long value) => throw null; + [CLSCompliant(false)] + public static explicit operator Char8(ulong value) => throw null; + public int CompareTo(Char8 other) => throw null; + public override bool Equals(object obj) => throw null; + public bool Equals(Char8 other) => throw null; + public override int GetHashCode() => throw null; + public override string ToString() => throw null; + } + public static partial class Utf8Extensions + { + public static ReadOnlySpan AsBytes(this ReadOnlySpan text) => throw null; + public static ReadOnlySpan AsBytes(this Utf8String text) => throw null; + public static ReadOnlySpan AsBytes(this Utf8String text, int start) => throw null; + public static ReadOnlySpan AsBytes(this Utf8String text, int start, int length) => throw null; + public static ReadOnlySpan AsSpan(this Utf8String text) => throw null; + public static ReadOnlySpan AsSpan(this Utf8String text, int start) => throw null; + public static ReadOnlySpan AsSpan(this Utf8String text, int start, int length) => throw null; + public static ReadOnlyMemory AsMemory(this Utf8String text) => throw null; + public static ReadOnlyMemory AsMemory(this Utf8String text, int start) => throw null; + public static ReadOnlyMemory AsMemory(this Utf8String text, Index startIndex) => throw null; + public static ReadOnlyMemory AsMemory(this Utf8String text, int start, int length) => throw null; + public static ReadOnlyMemory AsMemory(this Utf8String text, Range range) => throw null; + public static ReadOnlyMemory AsMemoryBytes(this Utf8String text) => throw null; + public static ReadOnlyMemory AsMemoryBytes(this Utf8String text, int start) => throw null; + public static ReadOnlyMemory AsMemoryBytes(this Utf8String text, Index startIndex) => throw null; + public static ReadOnlyMemory AsMemoryBytes(this Utf8String text, int start, int length) => throw null; + public static ReadOnlyMemory AsMemoryBytes(this Utf8String text, Range range) => throw null; + } public sealed partial class Utf8String : IEquatable { public static readonly Utf8String Empty; @@ -19,8 +80,13 @@ public Utf8String(char[] value, int startIndex, int length) { } [CLSCompliant(false)] public unsafe Utf8String(char* value) { } public Utf8String(string value) { } + public static explicit operator ReadOnlySpan(Utf8String value) => throw null; + public static implicit operator ReadOnlySpan(Utf8String value) => throw null; public static bool operator ==(Utf8String a, Utf8String b) => throw null; public static bool operator !=(Utf8String a, Utf8String b) => throw null; + public Char8 this[Index index] => throw null; + public Char8 this[int index] => throw null; + public Utf8String this[Range range] => throw null; public int Length => throw null; public override bool Equals(object obj) => throw null; public bool Equals(Utf8String value) => throw null; @@ -28,10 +94,13 @@ public Utf8String(string value) { } public override int GetHashCode() => throw null; [ComponentModel.EditorBrowsable(ComponentModel.EditorBrowsableState.Never)] // for compiler use only public ref readonly byte GetPinnableReference() => throw null; + public static bool IsNullOrEmpty(Utf8String value) => throw null; + public Utf8String Substring(Index startIndex) => throw null; + public Utf8String Substring(int startIndex) => throw null; + public Utf8String Substring(int startIndex, int length) => throw null; + public Utf8String Substring(Range range) => throw null; + public byte[] ToByteArray() => throw null; + public byte[] ToByteArray(int startIndex, int length) => throw null; public override string ToString() => throw null; } - public static partial class Utf8StringExtensions - { - public static ReadOnlySpan AsBytes(this Utf8String text) => throw null; - } } diff --git a/src/System.Utf8String/src/System.Utf8String.csproj b/src/System.Utf8String/src/System.Utf8String.csproj index 4d709abdbb61..1a00831c3b0e 100644 --- a/src/System.Utf8String/src/System.Utf8String.csproj +++ b/src/System.Utf8String/src/System.Utf8String.csproj @@ -6,9 +6,6 @@ netcoreapp-Unix-Debug;netcoreapp-Unix-Release;netcoreapp-Windows_NT-Debug;netcoreapp-Windows_NT-Release; System - - - diff --git a/src/System.Utf8String/src/Utf8StringExtensions.cs b/src/System.Utf8String/src/Utf8StringExtensions.cs deleted file mode 100644 index 6ed44a799a60..000000000000 --- a/src/System.Utf8String/src/Utf8StringExtensions.cs +++ /dev/null @@ -1,27 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Collections.Generic; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Text; -using Internal.Runtime.CompilerServices; - -namespace System -{ - public static class Utf8StringExtensions - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static ReadOnlySpan AsBytes(this Utf8String text) - { - if (text is null) - { - return default; - } - - return MemoryMarshal.CreateReadOnlySpan(ref Unsafe.AsRef(in text.GetPinnableReference()), text.Length); - } - } -} diff --git a/src/System.Utf8String/tests/System.Utf8String.Tests.csproj b/src/System.Utf8String/tests/System.Utf8String.Tests.csproj index 0f3c0b403ca3..c08cc5cfed84 100644 --- a/src/System.Utf8String/tests/System.Utf8String.Tests.csproj +++ b/src/System.Utf8String/tests/System.Utf8String.Tests.csproj @@ -12,9 +12,14 @@ true - + + + + + + diff --git a/src/System.Utf8String/tests/System/Char8Tests.cs b/src/System.Utf8String/tests/System/Char8Tests.cs new file mode 100644 index 000000000000..7f3a132af7f6 --- /dev/null +++ b/src/System.Utf8String/tests/System/Char8Tests.cs @@ -0,0 +1,112 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Globalization; +using Xunit; + +namespace System.Tests +{ + public unsafe partial class Char8Tests + { + [Theory] + [InlineData(10, 20, -1)] + [InlineData(20, 10, 1)] + [InlineData(30, 30, 0)] + public static void CompareTo(Char8 a, Char8 b, int expectedSign) + { + Assert.Equal(expectedSign, Math.Sign(a.CompareTo(b))); + } + + [Theory] + [InlineData(-1)] + [InlineData(0xFF)] + [InlineData(0x80)] + [InlineData(0x00)] + [InlineData(0x1234)] + [InlineData(0x12345678)] + [InlineData(0x1234567812345678)] + public static void CastOperators(long value) + { + // Only the low byte is preserved when casting through Char8. + + Assert.Equal((byte)value, (byte)(Char8)(byte)value); + Assert.Equal((sbyte)value, (sbyte)(Char8)(sbyte)value); + Assert.Equal((char)(value & 0xFF), (char)(Char8)(char)value); + Assert.Equal((short)(value & 0xFF), (short)(Char8)(short)value); + Assert.Equal((ushort)(value & 0xFF), (ushort)(Char8)(ushort)value); + Assert.Equal((int)(value & 0xFF), (int)(Char8)(int)value); + Assert.Equal((uint)(value & 0xFF), (uint)(Char8)(uint)value); + Assert.Equal((long)(value & 0xFF), (long)(Char8)(long)value); + Assert.Equal((ulong)(value & 0xFF), (ulong)(Char8)(ulong)value); + } + + [Fact] + public static void EqualsObject() + { + Assert.False(((Char8)42).Equals((object)null)); + Assert.False(((Char8)42).Equals((object)(int)42)); + Assert.False(((Char8)42).Equals((object)(Char8)43)); + Assert.True(((Char8)42).Equals((object)(Char8)42)); + } + + [Fact] + public static void EqualsChar8() + { + Assert.True(((Char8)42).Equals(42)); // implicit cast to Char8 + Assert.False(((Char8)42).Equals(43)); // implicit cast to Char8 + } + + [Fact] + public static void GetHashCode_ReturnsValue() + { + for (int i = 0; i <= byte.MaxValue; i++) + { + Assert.Equal(i, ((Char8)i).GetHashCode()); + } + } + + [Theory] + [InlineData(10, 20, false)] + [InlineData(20, 10, false)] + [InlineData(30, 30, true)] + public static void OperatorEquals(Char8 a, Char8 b, bool expected) + { + Assert.Equal(expected, (Char8)a == (Char8)b); + Assert.NotEqual(expected, (Char8)a != (Char8)b); + } + + [Theory] + [InlineData(10, 20, true)] + [InlineData(20, 10, false)] + [InlineData(29, 30, true)] + [InlineData(30, 30, false)] + [InlineData(31, 30, false)] + public static void OperatorLessThan(Char8 a, Char8 b, bool expected) + { + Assert.Equal(expected, (Char8)a < (Char8)b); + Assert.NotEqual(expected, (Char8)a >= (Char8)b); + } + + [Theory] + [InlineData(10, 20, false)] + [InlineData(20, 10, true)] + [InlineData(29, 30, false)] + [InlineData(30, 30, false)] + [InlineData(31, 30, true)] + public static void OperatorGreaterThan(Char8 a, Char8 b, bool expected) + { + Assert.Equal(expected, (Char8)a > (Char8)b); + Assert.NotEqual(expected, (Char8)a <= (Char8)b); + } + + [Fact] + public static void ToString_ReturnsHexValue() + { + for (int i = 0; i <= byte.MaxValue; i++) + { + Assert.Equal(i.ToString("X2", CultureInfo.InvariantCulture), ((Char8)i).ToString()); + } + } + } +} diff --git a/src/System.Utf8String/tests/System/MemoryTests.cs b/src/System.Utf8String/tests/System/MemoryTests.cs new file mode 100644 index 000000000000..2f168bf7c8ae --- /dev/null +++ b/src/System.Utf8String/tests/System/MemoryTests.cs @@ -0,0 +1,156 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Buffers; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using Xunit; + +using static System.Tests.Utf8TestUtilities; + +namespace System.Tests +{ + public partial class MemoryTests + { + [Fact] + public static void MemoryMarshal_TryGetArrayOfByte_Utf8String() + { + ReadOnlyMemory rom = u8("Hello").AsMemoryBytes(); + + Assert.False(MemoryMarshal.TryGetArray(rom, out ArraySegment segment)); + Assert.True(default(ArraySegment).Equals(segment)); + } + + [Fact] + public static void MemoryMarshal_TryGetArrayOfChar8_Utf8String() + { + ReadOnlyMemory rom = u8("Hello").AsMemory(); + + Assert.False(MemoryMarshal.TryGetArray(rom, out ArraySegment segment)); + Assert.True(default(ArraySegment).Equals(segment)); + } + + [Fact] + public unsafe static void MemoryOfByte_WithUtf8String_Pin() + { + Utf8String theString = u8("Hello"); + ReadOnlyMemory rom = theString.AsMemoryBytes(); + MemoryHandle memHandle = default; + try + { + memHandle = Unsafe.As, Memory>(ref rom).Pin(); + Assert.True(memHandle.Pointer == Unsafe.AsPointer(ref Unsafe.AsRef(in theString.GetPinnableReference()))); + } + finally + { + memHandle.Dispose(); + } + } + + [Fact] + public static void MemoryOfByte_WithUtf8String_ToString() + { + ReadOnlyMemory rom = u8("Hello").AsMemoryBytes(); + Assert.Equal("System.Memory[5]", Unsafe.As, Memory>(ref rom).ToString()); + } + + [Fact] + public unsafe static void MemoryOfChar8_WithUtf8String_Pin() + { + Utf8String theString = u8("Hello"); + ReadOnlyMemory rom = theString.AsMemory(); + MemoryHandle memHandle = default; + try + { + memHandle = Unsafe.As, Memory>(ref rom).Pin(); + Assert.True(memHandle.Pointer == Unsafe.AsPointer(ref Unsafe.AsRef(in theString.GetPinnableReference()))); + } + finally + { + memHandle.Dispose(); + } + } + + [Fact] + public static void MemoryOfChar8_WithUtf8String_ToString() + { + ReadOnlyMemory rom = u8("Hello").AsMemory(); + Assert.Equal("Hello", Unsafe.As, Memory>(ref rom).ToString()); + } + + [Fact] + public unsafe static void ReadOnlyMemoryOfByte_WithUtf8String_Pin() + { + Utf8String theString = u8("Hello"); + ReadOnlyMemory rom = theString.AsMemoryBytes(); + MemoryHandle memHandle = default; + try + { + memHandle = rom.Pin(); + Assert.True(memHandle.Pointer == Unsafe.AsPointer(ref Unsafe.AsRef(in theString.GetPinnableReference()))); + } + finally + { + memHandle.Dispose(); + } + } + + [Fact] + public static void ReadOnlyMemoryOfByte_WithUtf8String_ToString() + { + Assert.Equal("System.ReadOnlyMemory[5]", u8("Hello").AsMemoryBytes().ToString()); + } + + [Fact] + public unsafe static void ReadOnlyMemoryOfChar8_WithUtf8String_Pin() + { + Utf8String theString = u8("Hello"); + ReadOnlyMemory rom = theString.AsMemory(); + MemoryHandle memHandle = default; + try + { + memHandle = rom.Pin(); + Assert.True(memHandle.Pointer == Unsafe.AsPointer(ref Unsafe.AsRef(in theString.GetPinnableReference()))); + } + finally + { + memHandle.Dispose(); + } + } + + [Fact] + public static void ReadOnlyMemoryOfChar8_WithUtf8String_ToString() + { + Assert.Equal("Hello", u8("Hello").AsMemory().ToString()); + } + + [Fact] + public static void ReadOnlySpanOfByte_ToString() + { + ReadOnlySpan span = stackalloc byte[] { (byte)'H', (byte)'i' }; + Assert.Equal("System.ReadOnlySpan[2]", span.ToString()); + } + + [Fact] + public static void ReadOnlySpanOfChar8_ToString() + { + ReadOnlySpan span = stackalloc Char8[] { (Char8)'H', (Char8)'i' }; + Assert.Equal("Hi", span.ToString()); + } + + [Fact] + public static void SpanOfByte_ToString() + { + Span span = stackalloc byte[] { (byte)'H', (byte)'i' }; + Assert.Equal("System.Span[2]", span.ToString()); + } + + [Fact] + public static void SpanOfChar8_ToString() + { + Span span = stackalloc Char8[] { (Char8)'H', (Char8)'i' }; + Assert.Equal("Hi", span.ToString()); + } + } +} diff --git a/src/System.Utf8String/tests/System/ReflectionTests.cs b/src/System.Utf8String/tests/System/ReflectionTests.cs new file mode 100644 index 000000000000..4cbd4cf192d2 --- /dev/null +++ b/src/System.Utf8String/tests/System/ReflectionTests.cs @@ -0,0 +1,36 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.Serialization; +using Xunit; + +using static System.Tests.Utf8TestUtilities; + +namespace System.Tests +{ + public partial class ReflectionTests + { + [Fact] + public static void ActivatorCreateInstance_CanCallParameterfulCtor() + { + Utf8String theString = (Utf8String)Activator.CreateInstance(typeof(Utf8String), "Hello"); + Assert.Equal(u8("Hello"), theString); + } + + [Fact] + public static void ActivatorCreateInstance_CannotCallParameterlessCtor() + { + Assert.Throws(() => Activator.CreateInstance(typeof(Utf8String))); + } + + [Fact] + public static void FormatterServices_GetUninitializedObject_Throws() + { + // Like String, shouldn't be able to create an uninitialized Utf8String. + + Assert.Throws(() => FormatterServices.GetSafeUninitializedObject(typeof(Utf8String))); + Assert.Throws(() => FormatterServices.GetUninitializedObject(typeof(Utf8String))); + } + } +} diff --git a/src/System.Utf8String/tests/System/Utf8ExtensionsTests.cs b/src/System.Utf8String/tests/System/Utf8ExtensionsTests.cs new file mode 100644 index 000000000000..a3c218230a3c --- /dev/null +++ b/src/System.Utf8String/tests/System/Utf8ExtensionsTests.cs @@ -0,0 +1,209 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using Xunit; + +using static System.Tests.Utf8TestUtilities; + +namespace System.Tests +{ + public partial class Utf8ExtensionsTests + { + [Fact] + public unsafe void AsBytes_FromSpan_Default() + { + // First, a default span should become a default span. + + Assert.True(default(ReadOnlySpan) == new ReadOnlySpan().AsBytes()); + + // Next, an empty but non-default span should become an empty but non-default span. + + Assert.True(new ReadOnlySpan((void*)0x12345, 0) == new ReadOnlySpan((void*)0x12345, 0).AsBytes()); + + // Finally, a span wrapping data should become a span wrapping that same data. + + Utf8String theString = u8("Hello"); + Assert.True(MemoryMarshal.CreateReadOnlySpan(ref Unsafe.AsRef(in theString.GetPinnableReference()), 5) == ((ReadOnlySpan)theString).AsBytes()); + } + + [Fact] + public void AsBytes_FromUtf8String() + { + Assert.True(default(ReadOnlySpan) == ((Utf8String)null).AsBytes()); + + Utf8String theString = u8("Hello"); + Assert.True(MemoryMarshal.CreateReadOnlySpan(ref Unsafe.AsRef(in theString.GetPinnableReference()), 5) == theString.AsBytes()); + } + + [Fact] + public void AsBytes_FromUtf8String_WithStart() + { + Assert.True(default(ReadOnlySpan) == ((Utf8String)null).AsBytes(0)); + Assert.True(u8("Hello").AsBytes(5).IsEmpty); + + SpanAssert.Equal(new byte[] { (byte)'e', (byte)'l', (byte)'l', (byte)'o' }, u8("Hello").AsBytes(1)); + } + + [Fact] + public void AsBytes_FromUtf8String_WithStart_ArgOutOfRange() + { + Assert.Throws("start", () => ((Utf8String)null).AsBytes(1)); + Assert.Throws("start", () => u8("Hello").AsBytes(-1)); + Assert.Throws("start", () => u8("Hello").AsBytes(6)); + } + + [Fact] + public void AsBytes_FromUtf8String_WithStartAndLength() + { + Assert.True(default(ReadOnlySpan) == ((Utf8String)null).AsBytes(0, 0)); + Assert.True(u8("Hello").AsBytes(5, 0).IsEmpty); + + SpanAssert.Equal(new byte[] { (byte)'e', (byte)'l', (byte)'l' }, u8("Hello").AsBytes(1, 3)); + } + + [Fact] + public void AsBytes_FromUtf8String_WithStartAndLength_ArgOutOfRange() + { + Assert.Throws("start", () => ((Utf8String)null).AsBytes(0, 1)); + Assert.Throws("start", () => ((Utf8String)null).AsBytes(1, 0)); + Assert.Throws("start", () => u8("Hello").AsBytes(5, 1)); + Assert.Throws("start", () => u8("Hello").AsBytes(4, -2)); + } + + [Fact] + public void AsMemory_FromUtf8String() + { + Assert.True(default(ReadOnlyMemory).Equals(((Utf8String)null).AsMemory())); + + Utf8String theString = u8("Hello"); + Assert.True(MemoryMarshal.CreateReadOnlySpan(ref Unsafe.As(ref Unsafe.AsRef(in theString.GetPinnableReference())), 5) == theString.AsMemory().Span); + } + + [Fact] + public void AsMemory_FromUtf8String_WithStart() + { + Assert.True(default(ReadOnlyMemory).Equals(((Utf8String)null).AsMemory(0))); + Assert.True(u8("Hello").AsMemory(5).IsEmpty); + + SpanAssert.Equal(new Char8[] { (Char8)'e', (Char8)'l', (Char8)'l', (Char8)'o' }, u8("Hello").AsMemory(1).Span); + } + + [Fact] + public void AsMemory_FromUtf8String_WithStart_ArgOutOfRange() + { + Assert.Throws("start", () => ((Utf8String)null).AsMemory(1)); + Assert.Throws("start", () => u8("Hello").AsMemory(-1)); + Assert.Throws("start", () => u8("Hello").AsMemory(6)); + } + + [Fact] + public void AsMemory_FromUtf8String_WithStartAndLength() + { + Assert.True(default(ReadOnlyMemory).Equals(((Utf8String)null).AsMemory(0, 0))); + Assert.True(u8("Hello").AsMemory(5, 0).IsEmpty); + + SpanAssert.Equal(new Char8[] { (Char8)'e', (Char8)'l', (Char8)'l' }, u8("Hello").AsMemory(1, 3).Span); + } + + [Fact] + public void AsMemory_FromUtf8String_WithStartAndLength_ArgOutOfRange() + { + Assert.Throws("start", () => ((Utf8String)null).AsMemory(0, 1)); + Assert.Throws("start", () => ((Utf8String)null).AsMemory(1, 0)); + Assert.Throws("start", () => u8("Hello").AsMemory(5, 1)); + Assert.Throws("start", () => u8("Hello").AsMemory(4, -2)); + } + + [Fact] + public void AsMemoryBytes_FromUtf8String() + { + Assert.True(default(ReadOnlyMemory).Equals(((Utf8String)null).AsMemoryBytes())); + + Utf8String theString = u8("Hello"); + Assert.True(MemoryMarshal.CreateReadOnlySpan(ref Unsafe.AsRef(in theString.GetPinnableReference()), 5) == theString.AsMemoryBytes().Span); + } + + [Fact] + public void AsMemoryBytes_FromUtf8String_WithStart() + { + Assert.True(default(ReadOnlyMemory).Equals(((Utf8String)null).AsMemoryBytes(0))); + Assert.True(u8("Hello").AsMemoryBytes(5).IsEmpty); + + SpanAssert.Equal(new byte[] { (byte)'e', (byte)'l', (byte)'l', (byte)'o' }, u8("Hello").AsMemoryBytes(1).Span); + } + + [Fact] + public void AsMemoryBytes_FromUtf8String_WithStart_ArgOutOfRange() + { + Assert.Throws("start", () => ((Utf8String)null).AsMemoryBytes(1)); + Assert.Throws("start", () => u8("Hello").AsMemoryBytes(-1)); + Assert.Throws("start", () => u8("Hello").AsMemoryBytes(6)); + } + + [Fact] + public void AsMemoryBytes_FromUtf8String_WithStartAndLength() + { + Assert.True(default(ReadOnlyMemory).Equals(((Utf8String)null).AsMemoryBytes(0, 0))); + Assert.True(u8("Hello").AsMemoryBytes(5, 0).IsEmpty); + + SpanAssert.Equal(new byte[] { (byte)'e', (byte)'l', (byte)'l' }, u8("Hello").AsMemoryBytes(1, 3).Span); + } + + [Fact] + public void AsMemoryBytes_FromUtf8String_WithStartAndLength_ArgOutOfRange() + { + Assert.Throws("start", () => ((Utf8String)null).AsMemoryBytes(0, 1)); + Assert.Throws("start", () => ((Utf8String)null).AsMemoryBytes(1, 0)); + Assert.Throws("start", () => u8("Hello").AsMemoryBytes(5, 1)); + Assert.Throws("start", () => u8("Hello").AsMemoryBytes(4, -2)); + } + + [Fact] + public void AsSpan_FromUtf8String() + { + Assert.True(default(ReadOnlySpan) == ((Utf8String)null).AsSpan()); + + Utf8String theString = u8("Hello"); + Assert.True(MemoryMarshal.CreateReadOnlySpan(ref Unsafe.As(ref Unsafe.AsRef(in theString.GetPinnableReference())), 5) == theString.AsSpan()); + } + + [Fact] + public void AsSpan_FromUtf8String_WithStart() + { + Assert.True(default(ReadOnlySpan) == ((Utf8String)null).AsSpan(0)); + Assert.True(u8("Hello").AsSpan(5).IsEmpty); + + SpanAssert.Equal(new Char8[] { (Char8)'e', (Char8)'l', (Char8)'l', (Char8)'o' }, u8("Hello").AsSpan(1)); + } + + [Fact] + public void AsSpan_FromUtf8String_WithStart_ArgOutOfRange() + { + Assert.Throws("start", () => ((Utf8String)null).AsSpan(1)); + Assert.Throws("start", () => u8("Hello").AsSpan(-1)); + Assert.Throws("start", () => u8("Hello").AsSpan(6)); + } + + [Fact] + public void AsSpan_FromUtf8String_WithStartAndLength() + { + Assert.True(default(ReadOnlySpan) == ((Utf8String)null).AsSpan(0, 0)); + Assert.True(u8("Hello").AsSpan(5, 0).IsEmpty); + + SpanAssert.Equal(new Char8[] { (Char8)'e', (Char8)'l', (Char8)'l' }, u8("Hello").AsSpan(1, 3)); + } + + [Fact] + public void AsSpan_FromUtf8String_WithStartAndLength_ArgOutOfRange() + { + Assert.Throws("start", () => ((Utf8String)null).AsSpan(0, 1)); + Assert.Throws("start", () => ((Utf8String)null).AsSpan(1, 0)); + Assert.Throws("start", () => u8("Hello").AsSpan(5, 1)); + Assert.Throws("start", () => u8("Hello").AsSpan(4, -2)); + } + } +} diff --git a/src/System.Utf8String/tests/System/Utf8StringTests.Ctor.cs b/src/System.Utf8String/tests/System/Utf8StringTests.Ctor.cs index 883b00841895..48628033c2c0 100644 --- a/src/System.Utf8String/tests/System/Utf8StringTests.Ctor.cs +++ b/src/System.Utf8String/tests/System/Utf8StringTests.Ctor.cs @@ -2,10 +2,9 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; -using System.Collections.Generic; -using System.Globalization; -using System.Reflection; +using System.Buffers; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using Xunit; using static System.Tests.Utf8TestUtilities; @@ -44,22 +43,23 @@ public static void Ctor_ByteArrayOffset_InvalidData_FixesUpData() [Fact] public static void Ctor_BytePointer_NullOrEmpty_ReturnsEmpty() { - byte nullByte = 0; + byte[] inputData = new byte[] { 0 }; // standalone null byte - Assert.Same(Utf8String.Empty, new Utf8String((byte*)null)); - Assert.Same(Utf8String.Empty, new Utf8String(&nullByte)); + using (BoundedMemory boundedMemory = BoundedMemory.AllocateFromExistingData(inputData)) + { + Assert.Same(Utf8String.Empty, new Utf8String((byte*)null)); + Assert.Same(Utf8String.Empty, new Utf8String((byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(boundedMemory.Span)))); + } } [Fact] public static void Ctor_BytePointer_ValidData_ReturnsOriginalContents() { byte[] inputData = new byte[] { (byte)'H', (byte)'e', (byte)'l', (byte)'l', (byte)'o', (byte)'\0' }; - Utf8String expected = u8("Hello"); - fixed (byte* pData = inputData) + using (BoundedMemory boundedMemory = BoundedMemory.AllocateFromExistingData(inputData)) { - var actual = new Utf8String(pData); - Assert.Equal(expected, actual); + Assert.Equal(u8("Hello"), new Utf8String((byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(boundedMemory.Span)))); } } @@ -67,12 +67,10 @@ public static void Ctor_BytePointer_ValidData_ReturnsOriginalContents() public static void Ctor_BytePointer_InvalidData_FixesUpData() { byte[] inputData = new byte[] { (byte)'H', (byte)'e', (byte)0xFF, (byte)'l', (byte)'o', (byte)'\0' }; - Utf8String expected = u8("He\uFFFDlo"); - fixed (byte* pData = inputData) + using (BoundedMemory boundedMemory = BoundedMemory.AllocateFromExistingData(inputData)) { - var actual = new Utf8String(pData); - Assert.Equal(expected, actual); + Assert.Equal(u8("He\uFFFDlo"), new Utf8String((byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(boundedMemory.Span)))); } } @@ -132,35 +130,34 @@ public static void Ctor_CharArrayOffset_InvalidData_FixesUpData() [Fact] public static void Ctor_CharPointer_NullOrEmpty_ReturnsEmpty() { - char nullChar = '\0'; + char[] inputData = new char[] { '\0' }; // standalone null char - Assert.Same(Utf8String.Empty, new Utf8String((char*)null)); - Assert.Same(Utf8String.Empty, new Utf8String(&nullChar)); + using (BoundedMemory boundedMemory = BoundedMemory.AllocateFromExistingData(inputData)) + { + Assert.Same(Utf8String.Empty, new Utf8String((char*)null)); + Assert.Same(Utf8String.Empty, new Utf8String((char*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(boundedMemory.Span)))); + } } [Fact] public static void Ctor_CharPointer_ValidData_ReturnsOriginalContents() { - const string inputData = "Hello"; - Utf8String expected = u8("Hello"); + char[] inputData = new char[] { 'H', 'e', 'l', 'l', 'o', '\0' }; - fixed (char* pData = inputData) + using (BoundedMemory boundedMemory = BoundedMemory.AllocateFromExistingData(inputData)) { - var actual = new Utf8String(pData); - Assert.Equal(expected, actual); + Assert.Equal(u8("Hello"), new Utf8String((char*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(boundedMemory.Span)))); } } [Fact] public static void Ctor_CharPointer_InvalidData_FixesUpData() { - char[] inputData = new char[] { 'H', 'e', '\uD800', 'l', 'o', '\0' }; - Utf8String expected = u8("He\uFFFDlo"); + char[] inputData = new char[] { 'H', 'e', '\uD800', 'l', 'o', '\0' }; // standalone surrogate - fixed (char* pData = inputData) + using (BoundedMemory boundedMemory = BoundedMemory.AllocateFromExistingData(inputData)) { - var actual = new Utf8String(pData); - Assert.Equal(expected, actual); + Assert.Equal(u8("He\uFFFDlo"), new Utf8String((char*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(boundedMemory.Span)))); } } diff --git a/src/System.Utf8String/tests/System/Utf8StringTests.Substring.cs b/src/System.Utf8String/tests/System/Utf8StringTests.Substring.cs new file mode 100644 index 000000000000..b024dac6520e --- /dev/null +++ b/src/System.Utf8String/tests/System/Utf8StringTests.Substring.cs @@ -0,0 +1,134 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Reflection; +using Xunit; + +using static System.Tests.Utf8TestUtilities; + +namespace System.Tests +{ + public unsafe partial class Utf8StringTests + { + [Theory] + [InlineData("Hello", 0, false, "Hello")] + [InlineData("Hello", 0, true, "")] + [InlineData("Hello", 2, false, "llo")] + [InlineData("Hello", 2, true, "lo")] + [InlineData("Hello", 5, false, "")] + [InlineData("Hello", 5, true, "Hello")] + [InlineData("", 0, true, "")] + [InlineData("", 0, false, "")] + public static void Substring_Index(string sAsString, int indexValue, bool fromEnd, string expectedAsString) + { + Index index = new Index(indexValue, fromEnd); + + void Substring_IndexCore(Utf8String s, Utf8String expected) + { + Assert.Equal(expected, s.Substring(index)); + + if (index.Value == 0) + { + Assert.Same(index.IsFromEnd ? Utf8String.Empty : s, s.Substring(index)); + } + + if (index.Value == s.Length) + { + Assert.Same(index.IsFromEnd ? s : Utf8String.Empty, s.Substring(index)); + } + }; + + Substring_IndexCore(new Utf8String(sAsString), new Utf8String(expectedAsString)); + } + + [Theory] + [InlineData("Hello", 0, 5, "Hello")] + [InlineData("Hello", 0, 3, "Hel")] + [InlineData("Hello", 2, 3, "llo")] + [InlineData("Hello", 5, 0, "")] + [InlineData("", 0, 0, "")] + public static void Substring_Int(string sAsString, int startIndex, int length, string expectedAsString) + { + void Substring_IntCore(Utf8String s, Utf8String expected) + { + if (startIndex + length == s.Length) + { + Assert.Equal(expected, s.Substring(startIndex)); + Assert.Equal(expected, new Utf8String(s.AsBytes(startIndex))); + + if (length == 0) + { + Assert.Same(Utf8String.Empty, s.Substring(startIndex)); + } + } + Assert.Equal(expected, s.Substring(startIndex, length)); + + Assert.Equal(expected, new Utf8String(s.AsBytes(startIndex, length))); + + if (length == s.Length) + { + Assert.Same(s, s.Substring(startIndex)); + Assert.Same(s, s.Substring(startIndex, length)); + } + else if (length == 0) + { + Assert.Same(Utf8String.Empty, s.Substring(startIndex, length)); + } + }; + + Substring_IntCore(new Utf8String(sAsString), new Utf8String(expectedAsString)); + } + + [Fact] + public static void Substring_Range() + { + void Substring_RangeCore(Utf8String s, Range range, Utf8String expected) + { + Assert.Equal(expected, s.Substring(range)); + Assert.Equal(expected, s[range]); + + if (expected.Length == s.Length) + { + Assert.Same(s, s.Substring(range)); + Assert.Same(s, s[range]); + } + + if (expected.Length == 0) + { + Assert.Same(Utf8String.Empty, s.Substring(range)); + Assert.Same(Utf8String.Empty, s[range]); + } + }; + + Substring_RangeCore(u8("Hello"), .., u8("Hello")); + Substring_RangeCore(u8("Hello"), 0..3, u8("Hel")); + Substring_RangeCore(u8("Hello"), ..^4, u8("H")); + Substring_RangeCore(u8("Hello"), 1.., u8("ello")); + Substring_RangeCore(u8("Hello"), ..^5, Utf8String.Empty); + } + + [Fact] + public static void Substring_Invalid() + { + // Start index < 0 + AssertExtensions.Throws("startIndex", () => u8("foo").Substring(-1)); + AssertExtensions.Throws("startIndex", () => u8("foo").Substring(-1, 0)); + + // Start index > string.Length + AssertExtensions.Throws("startIndex", () => u8("foo").Substring(4)); + AssertExtensions.Throws("startIndex", () => u8("foo").Substring(4, 0)); + + // Length < 0 or length > string.Length + AssertExtensions.Throws("length", () => u8("foo").Substring(0, -1)); + AssertExtensions.Throws("length", () => u8("foo").Substring(0, 4)); + + // Start index + length > string.Length + AssertExtensions.Throws("length", () => u8("foo").Substring(3, 2)); + AssertExtensions.Throws("length", () => u8("foo").Substring(2, 2)); + } + } +} diff --git a/src/System.Utf8String/tests/System/Utf8StringTests.cs b/src/System.Utf8String/tests/System/Utf8StringTests.cs index 08edf43634e6..fb62b311111a 100644 --- a/src/System.Utf8String/tests/System/Utf8StringTests.cs +++ b/src/System.Utf8String/tests/System/Utf8StringTests.cs @@ -2,12 +2,10 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; -using System.Collections.Generic; -using System.Globalization; -using System.Reflection; using Xunit; +using static System.Tests.Utf8TestUtilities; + namespace System.Tests { public unsafe partial class Utf8StringTests @@ -24,5 +22,157 @@ public static void Empty_ReturnsSingleton() { Assert.Same(Utf8String.Empty, Utf8String.Empty); } + + [Theory] + [InlineData(null, null, true)] + [InlineData("", null, false)] + [InlineData(null, "", false)] + [InlineData("hello", null, false)] + [InlineData(null, "hello", false)] + [InlineData("hello", "hello", true)] + [InlineData("hello", "Hello", false)] + [InlineData("hello there", "hello", false)] + public static void Equality_Ordinal(string aString, string bString, bool expected) + { + Utf8String a = u8(aString); + Utf8String b = u8(bString); + + // Operators + + Assert.Equal(expected, a == b); + Assert.NotEqual(expected, a != b); + + // Static methods + + Assert.Equal(expected, Utf8String.Equals(a, b)); + + // Instance methods + + if (a != null) + { + Assert.Equal(expected, a.Equals(b)); + Assert.Equal(expected, a.Equals((object)b)); + } + } + + [Fact] + public static void GetHashCode_ReturnsRandomized() + { + Utf8String a = u8("Hello"); + Utf8String b = new Utf8String(a.AsBytes()); + + Assert.NotSame(a, b); + Assert.Equal(a.GetHashCode(), b.GetHashCode()); + + Utf8String c = u8("Goodbye"); + Utf8String d = new Utf8String(c.AsBytes()); + + Assert.NotSame(c, d); + Assert.Equal(c.GetHashCode(), d.GetHashCode()); + + Assert.NotEqual(a.GetHashCode(), c.GetHashCode()); + } + + [Fact] + public static void GetPinnableReference_CalledMultipleTimes_ReturnsSameValue() + { + var utf8 = u8("Hello!"); + + fixed (byte* pA = utf8) + fixed (byte* pB = utf8) + { + Assert.True(pA == pB); + } + } + + [Fact] + public static void GetPinnableReference_Empty() + { + fixed (byte* pStr = Utf8String.Empty) + { + Assert.True(pStr != null); + Assert.Equal((byte)0, *pStr); // should point to null terminator + } + } + + [Fact] + public static void GetPinnableReference_NotEmpty() + { + fixed (byte* pStr = u8("Hello!")) + { + Assert.True(pStr != null); + + Assert.Equal((byte)'H', pStr[0]); + Assert.Equal((byte)'e', pStr[1]); + Assert.Equal((byte)'l', pStr[2]); + Assert.Equal((byte)'l', pStr[3]); + Assert.Equal((byte)'o', pStr[4]); + Assert.Equal((byte)'!', pStr[5]); + Assert.Equal((byte)'\0', pStr[6]); + } + } + + [Theory] + [InlineData("", true)] + [InlineData("not empty", false)] + public static void IsNullOrEmpty(string value, bool expectedIsNullOrEmpty) + { + Assert.Equal(expectedIsNullOrEmpty, Utf8String.IsNullOrEmpty(new Utf8String(value))); + } + + [Fact] + public static void IsNullOrEmpty_Null_ReturnsTrue() + { + Assert.True(Utf8String.IsNullOrEmpty(null)); + } + + [Fact] + public static void ToByteArray_Empty() + { + Assert.Same(Array.Empty(), Utf8String.Empty.ToByteArray()); + Assert.Same(Array.Empty(), u8("Hello!").ToByteArray(0, 0)); + Assert.Same(Array.Empty(), u8("Hello!").ToByteArray(3, 0)); + Assert.Same(Array.Empty(), u8("Hello!").ToByteArray(6, 0)); + } + + [Fact] + public static void ToByteArray_NotEmpty() + { + Assert.Equal(new byte[] { (byte)'H', (byte)'i' }, u8("Hi").ToByteArray()); + Assert.Equal(new byte[] { (byte)'l', (byte)'l', (byte)'o' }, u8("Hello!").ToByteArray(2, 3)); + } + + [Theory] + [InlineData("", 1, 0, "startIndex")] + [InlineData("", 0, 1, "length")] + [InlineData("Hello", 5, 2, "length")] + [InlineData("Hello", 5, -1, "length")] + [InlineData("Hello", -1, 4, "startIndex")] + public static void ToByteArray_Invalid(string value, int startIndex, int length, string exceptionParamName) + { + Utf8String utf8String = u8(value); + Assert.Throws(exceptionParamName, () => utf8String.ToByteArray(startIndex, length)); + } + + [Theory] + [InlineData("")] + [InlineData("Hello!")] + public static void ToString_ReturnsUtf16(string value) + { + Assert.Equal(value, u8(value).ToString()); + } + + [Fact] + public static void ToString_ReturnsUtf16_WithFixups() + { + Utf8String newString = new Utf8String("Hello"); + + fixed (byte* pNewString = newString) + { + pNewString[2] = 0xFF; // corrupt this data + } + + Assert.Equal("He\uFFFDlo", newString.ToString()); + } } } diff --git a/src/System.Utf8String/tests/System/Utf8TestUtilities.cs b/src/System.Utf8String/tests/System/Utf8TestUtilities.cs index 37edbb27b3ec..20f9f0f63fba 100644 --- a/src/System.Utf8String/tests/System/Utf8TestUtilities.cs +++ b/src/System.Utf8String/tests/System/Utf8TestUtilities.cs @@ -2,27 +2,76 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; -using System.Collections.Generic; +using System.IO; +using System.Reflection; using System.Text; +using Xunit; namespace System.Tests { public static class Utf8TestUtilities { + private static readonly Lazy> _utf8StringFactory = CreateUtf8StringFactory(); + + private static Lazy> CreateUtf8StringFactory() + { + return new Lazy>(() => + { + MethodInfo fastAllocateMethod = typeof(Utf8String).GetMethod("FastAllocate", BindingFlags.NonPublic | BindingFlags.Static, null, new[] { typeof(int) }, null); + Assert.NotNull(fastAllocateMethod); + return (Func)fastAllocateMethod.CreateDelegate(typeof(Func)); + }); + } + /// /// Mimics returning a literal instance. /// - public static Utf8String u8(string str) + public unsafe static Utf8String u8(string str) { - if (string.IsNullOrEmpty(str)) + if (str is null) + { + return null; + } + else if (str.Length == 0) { return Utf8String.Empty; } - // TODO: Call into ctor. + // First, transcode UTF-16 to UTF-8. We use direct by-scalar transcoding here + // because we have good reference implementation tests for this and it'll help + // catch any errors we introduce to our bulk transcoding implementations. + + MemoryStream memStream = new MemoryStream(); + + Span utf8Bytes = stackalloc byte[4]; // 4 UTF-8 code units is the largest any scalar value can be encoded as + + int index = 0; + while (index < str.Length) + { + if (Rune.TryGetRuneAt(str, index, out Rune value) && value.TryEncodeToUtf8Bytes(utf8Bytes, out int bytesWritten)) + { + memStream.Write(utf8Bytes.Slice(0, bytesWritten)); + index += value.Utf16SequenceLength; + } + else + { + throw new ArgumentException($"String '{str}' is not a well-formed UTF-16 string."); + } + } + + Assert.True(memStream.TryGetBuffer(out ArraySegment buffer)); + + // Now allocate a UTF-8 string instance and set this as the contents. + // We do it this way rather than go through a public ctor because we don't + // want the "control" part of our unit tests to depend on the code under test. + + Utf8String newUtf8String = _utf8StringFactory.Value(buffer.Count); + fixed (byte* pNewUtf8String = newUtf8String) + { + buffer.AsSpan().CopyTo(new Span(pNewUtf8String, newUtf8String.Length)); + } - return new Utf8String(str); + return newUtf8String; } } } diff --git a/src/System.Utf8String/tests/Xunit/SpanAssert.cs b/src/System.Utf8String/tests/Xunit/SpanAssert.cs index 919744945be0..59f21c0e6b45 100644 --- a/src/System.Utf8String/tests/Xunit/SpanAssert.cs +++ b/src/System.Utf8String/tests/Xunit/SpanAssert.cs @@ -11,12 +11,26 @@ public static class SpanAssert { public static void Equal(ReadOnlySpan a, ReadOnlySpan b, IEqualityComparer comparer = null) where T : IEquatable { - Assert.Equal(a.ToArray(), b.ToArray(), comparer); + if (comparer is null) + { + Assert.Equal(a.ToArray(), b.ToArray()); + } + else + { + Assert.Equal(a.ToArray(), b.ToArray(), comparer); + } } public static void Equal(Span a, Span b, IEqualityComparer comparer = null) where T : IEquatable { - Assert.Equal(a.ToArray(), b.ToArray(), comparer); + if (comparer is null) + { + Assert.Equal(a.ToArray(), b.ToArray()); + } + else + { + Assert.Equal(a.ToArray(), b.ToArray(), comparer); + } } } } From d991c08862b7ba6e08a07de292e33ede74d1017f Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Wed, 13 Mar 2019 14:15:02 -0700 Subject: [PATCH 4/9] PR naming feedback --- src/System.Utf8String/ref/System.Utf8String.cs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/System.Utf8String/ref/System.Utf8String.cs b/src/System.Utf8String/ref/System.Utf8String.cs index 7ec621dcacb0..648fc1165d4d 100644 --- a/src/System.Utf8String/ref/System.Utf8String.cs +++ b/src/System.Utf8String/ref/System.Utf8String.cs @@ -10,12 +10,12 @@ namespace System public readonly partial struct Char8 : IComparable, IEquatable { private readonly int _dummy; - public static bool operator ==(Char8 a, Char8 b) => throw null; - public static bool operator !=(Char8 a, Char8 b) => throw null; - public static bool operator <(Char8 a, Char8 b) => throw null; - public static bool operator <=(Char8 a, Char8 b) => throw null; - public static bool operator >(Char8 a, Char8 b) => throw null; - public static bool operator >=(Char8 a, Char8 b) => throw null; + public static bool operator ==(Char8 left, Char8 right) => throw null; + public static bool operator !=(Char8 left, Char8 right) => throw null; + public static bool operator <(Char8 left, Char8 right) => throw null; + public static bool operator <=(Char8 left, Char8 right) => throw null; + public static bool operator >(Char8 left, Char8 right) => throw null; + public static bool operator >=(Char8 left, Char8 right) => throw null; public static implicit operator byte(Char8 value) => throw null; [CLSCompliant(false)] public static explicit operator sbyte(Char8 value) => throw null; @@ -82,15 +82,15 @@ public unsafe Utf8String(char* value) { } public Utf8String(string value) { } public static explicit operator ReadOnlySpan(Utf8String value) => throw null; public static implicit operator ReadOnlySpan(Utf8String value) => throw null; - public static bool operator ==(Utf8String a, Utf8String b) => throw null; - public static bool operator !=(Utf8String a, Utf8String b) => throw null; + public static bool operator ==(Utf8String left, Utf8String right) => throw null; + public static bool operator !=(Utf8String left, Utf8String right) => throw null; public Char8 this[Index index] => throw null; public Char8 this[int index] => throw null; public Utf8String this[Range range] => throw null; public int Length => throw null; public override bool Equals(object obj) => throw null; public bool Equals(Utf8String value) => throw null; - public static bool Equals(Utf8String a, Utf8String b) => throw null; + public static bool Equals(Utf8String left, Utf8String right) => throw null; public override int GetHashCode() => throw null; [ComponentModel.EditorBrowsable(ComponentModel.EditorBrowsableState.Never)] // for compiler use only public ref readonly byte GetPinnableReference() => throw null; From ff549d3f33006ae1d35be461486b6e35b3a1aa7a Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Wed, 13 Mar 2019 17:34:46 -0700 Subject: [PATCH 5/9] Add Utf8StringContent --- .../ref/System.Utf8String.cs | 11 ++ .../ref/System.Utf8String.csproj | 8 +- .../src/System.Utf8String.csproj | 7 + .../src/System/IO/Utf8StringStream.cs | 133 ++++++++++++++++++ .../src/System/Net/Http/Utf8StringContent.cs | 55 ++++++++ 5 files changed, 211 insertions(+), 3 deletions(-) create mode 100644 src/System.Utf8String/src/System/IO/Utf8StringStream.cs create mode 100644 src/System.Utf8String/src/System/Net/Http/Utf8StringContent.cs diff --git a/src/System.Utf8String/ref/System.Utf8String.cs b/src/System.Utf8String/ref/System.Utf8String.cs index 648fc1165d4d..f84272572246 100644 --- a/src/System.Utf8String/ref/System.Utf8String.cs +++ b/src/System.Utf8String/ref/System.Utf8String.cs @@ -104,3 +104,14 @@ public Utf8String(string value) { } public override string ToString() => throw null; } } +namespace System.Net.Http +{ + public sealed partial class Utf8StringContent : System.Net.Http.HttpContent + { + public Utf8StringContent(Utf8String content) { } + public Utf8StringContent(Utf8String content, string mediaType) { } + protected override System.Threading.Tasks.Task CreateContentReadStreamAsync() => throw null; + protected override System.Threading.Tasks.Task SerializeToStreamAsync(System.IO.Stream stream, System.Net.TransportContext context) => throw null; + protected override bool TryComputeLength(out long length) => throw null; + } +} diff --git a/src/System.Utf8String/ref/System.Utf8String.csproj b/src/System.Utf8String/ref/System.Utf8String.csproj index 1c4f8ef6495a..083ec8bdeed3 100644 --- a/src/System.Utf8String/ref/System.Utf8String.csproj +++ b/src/System.Utf8String/ref/System.Utf8String.csproj @@ -1,4 +1,4 @@ - + true {7AF57E6B-2CED-45C9-8BCA-5BBA60D018E0} @@ -8,7 +8,9 @@ - - + + + + diff --git a/src/System.Utf8String/src/System.Utf8String.csproj b/src/System.Utf8String/src/System.Utf8String.csproj index 1a00831c3b0e..964097b580ae 100644 --- a/src/System.Utf8String/src/System.Utf8String.csproj +++ b/src/System.Utf8String/src/System.Utf8String.csproj @@ -6,7 +6,14 @@ netcoreapp-Unix-Debug;netcoreapp-Unix-Release;netcoreapp-Windows_NT-Debug;netcoreapp-Windows_NT-Release; System + + + + + + + diff --git a/src/System.Utf8String/src/System/IO/Utf8StringStream.cs b/src/System.Utf8String/src/System/IO/Utf8StringStream.cs new file mode 100644 index 000000000000..c2ffa238f458 --- /dev/null +++ b/src/System.Utf8String/src/System/IO/Utf8StringStream.cs @@ -0,0 +1,133 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Threading; +using System.Threading.Tasks; + +namespace System.IO +{ + internal sealed class Utf8StringStream : Stream + { + private readonly Utf8String _content; + private int _position; + + public Utf8StringStream(Utf8String content) + { + _content = content ?? Utf8String.Empty; + } + + public override bool CanRead => true; + + public override bool CanSeek => true; + + public override bool CanTimeout => true; + + public override bool CanWrite => false; + + public override long Length => _content.Length; + + public override long Position + { + get => _position; + set + { + if ((ulong)value > (uint)_content.Length) + { + throw new ArgumentOutOfRangeException(nameof(value)); + } + + _position = (int)value; + } + } + + public override void Flush() + { + /* no-op */ + } + + public override Task FlushAsync(CancellationToken cancellationToken) + { + /* no-op */ + return Task.CompletedTask; + } + + public override int Read(byte[] buffer, int offset, int count) + { + return Read(new Span(buffer, offset, count)); + } + + public override int Read(Span buffer) + { + ReadOnlySpan contentToWrite = _content.AsBytes(_position); + if (buffer.Length < contentToWrite.Length) + { + contentToWrite = contentToWrite.Slice(buffer.Length); + } + + contentToWrite.CopyTo(buffer); + _position += contentToWrite.Length; + + return contentToWrite.Length; + } + + public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) + { + return Task.FromResult(Read(new Span(buffer, offset, count))); + } + + public override ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = default) + { + return new ValueTask(Read(buffer.Span)); + } + + public override int ReadByte() + { + int position = _position; + if ((uint)position >= (uint)_content.Length) + { + return -1; + } + + _position++; + return _content[position]; + } + + public override long Seek(long offset, SeekOrigin origin) + { + switch (origin) + { + case SeekOrigin.Begin: + break; + case SeekOrigin.Current: + offset += _position; + break; + case SeekOrigin.End: + offset += _content.Length; + break; + default: + throw new ArgumentOutOfRangeException(nameof(origin)); + } + + if ((ulong)offset > (uint)_content.Length) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } + + _position = (int)offset; + return offset; + } + + public override void SetLength(long value) => throw new NotSupportedException(); + + public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException(); + + public override void Write(ReadOnlySpan buffer) => throw new NotSupportedException(); + + public override Task WriteAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) => throw new NotSupportedException(); + + public override ValueTask WriteAsync(ReadOnlyMemory buffer, CancellationToken cancellationToken = default) => throw new NotSupportedException(); + + public override void WriteByte(byte value) => throw new NotSupportedException(); + } +} diff --git a/src/System.Utf8String/src/System/Net/Http/Utf8StringContent.cs b/src/System.Utf8String/src/System/Net/Http/Utf8StringContent.cs new file mode 100644 index 000000000000..18b36eed9f0e --- /dev/null +++ b/src/System.Utf8String/src/System/Net/Http/Utf8StringContent.cs @@ -0,0 +1,55 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.IO; +using System.Net.Http.Headers; +using System.Threading.Tasks; + +namespace System.Net.Http +{ + public sealed class Utf8StringContent : HttpContent + { + private const string DefaultMediaType = "text/plain"; + + private readonly Utf8String _content; + + public Utf8StringContent(Utf8String content) + : this(content, mediaType: null) + { + } + + public Utf8StringContent(Utf8String content, string mediaType) + { + if (content is null) + { + throw new ArgumentNullException(nameof(content)); + } + + _content = content; + + // Initialize the 'Content-Type' header with information provided by parameters. + + Headers.ContentType = new MediaTypeHeaderValue(mediaType ?? DefaultMediaType) + { + CharSet = "utf-8" // Encoding.UTF8.WebName + }; + } + + protected override Task CreateContentReadStreamAsync() + { + return Task.FromResult(new Utf8StringStream(_content)); + } + + protected override Task SerializeToStreamAsync(Stream stream, TransportContext context) + { + return stream.WriteAsync(_content.AsMemoryBytes()).AsTask(); + } + + protected override bool TryComputeLength(out long length) + { + length = _content.Length; + return true; + } + } +} From 5be9a855b0ace52851d3ab327be4dc5646883865 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Wed, 13 Mar 2019 18:22:12 -0700 Subject: [PATCH 6/9] Rename System.Utf8String to System.Utf8String.Experimental --- pkg/Microsoft.Private.PackageBaseline/packageIndex.json | 2 +- pkg/descriptions.json | 3 ++- src/Directory.Build.props | 2 +- .../Directory.Build.props | 0 .../System.Utf8String.Experimental.sln} | 6 +++--- .../pkg/System.Utf8String.Experimental.pkgproj} | 4 ++-- .../ref/Configurations.props | 0 .../ref/System.Utf8String.Experimental.csproj} | 0 .../ref/System.Utf8String.cs | 0 .../src/Configurations.props | 0 .../src/Resources/Strings.resx | 0 .../src/System.Utf8String.Experimental.csproj} | 0 .../src/System/IO/Utf8StringStream.cs | 0 .../src/System/Net/Http/Utf8StringContent.cs | 0 .../tests/Configurations.props | 0 .../tests/System.Utf8String.Experimental.Tests.csproj} | 0 .../tests/System/Char8Tests.cs | 0 .../tests/System/MemoryTests.cs | 0 .../tests/System/ReflectionTests.cs | 0 .../tests/System/Utf8ExtensionsTests.cs | 0 .../tests/System/Utf8StringTests.Ctor.cs | 0 .../tests/System/Utf8StringTests.Substring.cs | 0 .../tests/System/Utf8StringTests.cs | 0 .../tests/System/Utf8TestUtilities.cs | 0 .../tests/Xunit/SpanAssert.cs | 0 25 files changed, 9 insertions(+), 8 deletions(-) rename src/{System.Utf8String => System.Utf8String.Experimental}/Directory.Build.props (100%) rename src/{System.Utf8String/System.Utf8String.sln => System.Utf8String.Experimental/System.Utf8String.Experimental.sln} (89%) rename src/{System.Utf8String/pkg/System.Utf8String.pkgproj => System.Utf8String.Experimental/pkg/System.Utf8String.Experimental.pkgproj} (78%) rename src/{System.Utf8String => System.Utf8String.Experimental}/ref/Configurations.props (100%) rename src/{System.Utf8String/ref/System.Utf8String.csproj => System.Utf8String.Experimental/ref/System.Utf8String.Experimental.csproj} (100%) rename src/{System.Utf8String => System.Utf8String.Experimental}/ref/System.Utf8String.cs (100%) rename src/{System.Utf8String => System.Utf8String.Experimental}/src/Configurations.props (100%) rename src/{System.Utf8String => System.Utf8String.Experimental}/src/Resources/Strings.resx (100%) rename src/{System.Utf8String/src/System.Utf8String.csproj => System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj} (100%) rename src/{System.Utf8String => System.Utf8String.Experimental}/src/System/IO/Utf8StringStream.cs (100%) rename src/{System.Utf8String => System.Utf8String.Experimental}/src/System/Net/Http/Utf8StringContent.cs (100%) rename src/{System.Utf8String => System.Utf8String.Experimental}/tests/Configurations.props (100%) rename src/{System.Utf8String/tests/System.Utf8String.Tests.csproj => System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj} (100%) rename src/{System.Utf8String => System.Utf8String.Experimental}/tests/System/Char8Tests.cs (100%) rename src/{System.Utf8String => System.Utf8String.Experimental}/tests/System/MemoryTests.cs (100%) rename src/{System.Utf8String => System.Utf8String.Experimental}/tests/System/ReflectionTests.cs (100%) rename src/{System.Utf8String => System.Utf8String.Experimental}/tests/System/Utf8ExtensionsTests.cs (100%) rename src/{System.Utf8String => System.Utf8String.Experimental}/tests/System/Utf8StringTests.Ctor.cs (100%) rename src/{System.Utf8String => System.Utf8String.Experimental}/tests/System/Utf8StringTests.Substring.cs (100%) rename src/{System.Utf8String => System.Utf8String.Experimental}/tests/System/Utf8StringTests.cs (100%) rename src/{System.Utf8String => System.Utf8String.Experimental}/tests/System/Utf8TestUtilities.cs (100%) rename src/{System.Utf8String => System.Utf8String.Experimental}/tests/Xunit/SpanAssert.cs (100%) diff --git a/pkg/Microsoft.Private.PackageBaseline/packageIndex.json b/pkg/Microsoft.Private.PackageBaseline/packageIndex.json index 4a6e084e0fa1..415e45dd7e84 100644 --- a/pkg/Microsoft.Private.PackageBaseline/packageIndex.json +++ b/pkg/Microsoft.Private.PackageBaseline/packageIndex.json @@ -5307,7 +5307,7 @@ "uap10.0.16299": "4.0.1.0" } }, - "System.Utf8String": { + "System.Utf8String.Experimental": { "InboxOn": {}, "AssemblyVersionInPackageVersion": { "4.0.0.0": "4.6.0" diff --git a/pkg/descriptions.json b/pkg/descriptions.json index 4afa30ab25ee..e3b4369e6e8f 100644 --- a/pkg/descriptions.json +++ b/pkg/descriptions.json @@ -2132,9 +2132,10 @@ ] }, { - "Name": "System.Utf8String", + "Name": "System.Utf8String.Experimental", "Description": "Provides types for representation of UTF-8 string data.", "CommonTypes": [ + "System.Char8", "System.Utf8String" ] }, diff --git a/src/Directory.Build.props b/src/Directory.Build.props index 561f4cecce8e..7063f069799d 100644 --- a/src/Directory.Build.props +++ b/src/Directory.Build.props @@ -38,6 +38,6 @@ so they can't inadvertently take a dependency on it.) --> - + diff --git a/src/System.Utf8String/Directory.Build.props b/src/System.Utf8String.Experimental/Directory.Build.props similarity index 100% rename from src/System.Utf8String/Directory.Build.props rename to src/System.Utf8String.Experimental/Directory.Build.props diff --git a/src/System.Utf8String/System.Utf8String.sln b/src/System.Utf8String.Experimental/System.Utf8String.Experimental.sln similarity index 89% rename from src/System.Utf8String/System.Utf8String.sln rename to src/System.Utf8String.Experimental/System.Utf8String.Experimental.sln index 1c40f6b5fb60..2d4bdcc1c43a 100644 --- a/src/System.Utf8String/System.Utf8String.sln +++ b/src/System.Utf8String.Experimental/System.Utf8String.Experimental.sln @@ -2,14 +2,14 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 15 VisualStudioVersion = 15.0.27213.1 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Utf8String.Tests", "tests\System.Utf8String.Tests.csproj", "{72E9FB32-4692-4692-A10B-9F053F8F1A88}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Utf8String.Experimental.Tests", "tests\System.Utf8String.Experimental.Tests.csproj", "{72E9FB32-4692-4692-A10B-9F053F8F1A88}" ProjectSection(ProjectDependencies) = postProject {D4266847-6692-481B-9459-6141DB7DA339} = {D4266847-6692-481B-9459-6141DB7DA339} EndProjectSection EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Utf8String", "src\System.Utf8String.csproj", "{D4266847-6692-481B-9459-6141DB7DA339}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Utf8String.Experimental", "src\System.Utf8String.Experimental.csproj", "{D4266847-6692-481B-9459-6141DB7DA339}" EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Utf8String", "ref\System.Utf8String.csproj", "{7AF57E6B-2CED-45C9-8BCA-5BBA60D018E0}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Utf8String.Experimental", "ref\System.Utf8String.Experimental.csproj", "{7AF57E6B-2CED-45C9-8BCA-5BBA60D018E0}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{7EC8921F-E96F-445B-AA33-453515641D93}" EndProject diff --git a/src/System.Utf8String/pkg/System.Utf8String.pkgproj b/src/System.Utf8String.Experimental/pkg/System.Utf8String.Experimental.pkgproj similarity index 78% rename from src/System.Utf8String/pkg/System.Utf8String.pkgproj rename to src/System.Utf8String.Experimental/pkg/System.Utf8String.Experimental.pkgproj index d0ddd94ca65a..016952009282 100644 --- a/src/System.Utf8String/pkg/System.Utf8String.pkgproj +++ b/src/System.Utf8String.Experimental/pkg/System.Utf8String.Experimental.pkgproj @@ -2,10 +2,10 @@ - + netcoreapp3.0; - + diff --git a/src/System.Utf8String/ref/Configurations.props b/src/System.Utf8String.Experimental/ref/Configurations.props similarity index 100% rename from src/System.Utf8String/ref/Configurations.props rename to src/System.Utf8String.Experimental/ref/Configurations.props diff --git a/src/System.Utf8String/ref/System.Utf8String.csproj b/src/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.csproj similarity index 100% rename from src/System.Utf8String/ref/System.Utf8String.csproj rename to src/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.csproj diff --git a/src/System.Utf8String/ref/System.Utf8String.cs b/src/System.Utf8String.Experimental/ref/System.Utf8String.cs similarity index 100% rename from src/System.Utf8String/ref/System.Utf8String.cs rename to src/System.Utf8String.Experimental/ref/System.Utf8String.cs diff --git a/src/System.Utf8String/src/Configurations.props b/src/System.Utf8String.Experimental/src/Configurations.props similarity index 100% rename from src/System.Utf8String/src/Configurations.props rename to src/System.Utf8String.Experimental/src/Configurations.props diff --git a/src/System.Utf8String/src/Resources/Strings.resx b/src/System.Utf8String.Experimental/src/Resources/Strings.resx similarity index 100% rename from src/System.Utf8String/src/Resources/Strings.resx rename to src/System.Utf8String.Experimental/src/Resources/Strings.resx diff --git a/src/System.Utf8String/src/System.Utf8String.csproj b/src/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj similarity index 100% rename from src/System.Utf8String/src/System.Utf8String.csproj rename to src/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj diff --git a/src/System.Utf8String/src/System/IO/Utf8StringStream.cs b/src/System.Utf8String.Experimental/src/System/IO/Utf8StringStream.cs similarity index 100% rename from src/System.Utf8String/src/System/IO/Utf8StringStream.cs rename to src/System.Utf8String.Experimental/src/System/IO/Utf8StringStream.cs diff --git a/src/System.Utf8String/src/System/Net/Http/Utf8StringContent.cs b/src/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.cs similarity index 100% rename from src/System.Utf8String/src/System/Net/Http/Utf8StringContent.cs rename to src/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.cs diff --git a/src/System.Utf8String/tests/Configurations.props b/src/System.Utf8String.Experimental/tests/Configurations.props similarity index 100% rename from src/System.Utf8String/tests/Configurations.props rename to src/System.Utf8String.Experimental/tests/Configurations.props diff --git a/src/System.Utf8String/tests/System.Utf8String.Tests.csproj b/src/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj similarity index 100% rename from src/System.Utf8String/tests/System.Utf8String.Tests.csproj rename to src/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj diff --git a/src/System.Utf8String/tests/System/Char8Tests.cs b/src/System.Utf8String.Experimental/tests/System/Char8Tests.cs similarity index 100% rename from src/System.Utf8String/tests/System/Char8Tests.cs rename to src/System.Utf8String.Experimental/tests/System/Char8Tests.cs diff --git a/src/System.Utf8String/tests/System/MemoryTests.cs b/src/System.Utf8String.Experimental/tests/System/MemoryTests.cs similarity index 100% rename from src/System.Utf8String/tests/System/MemoryTests.cs rename to src/System.Utf8String.Experimental/tests/System/MemoryTests.cs diff --git a/src/System.Utf8String/tests/System/ReflectionTests.cs b/src/System.Utf8String.Experimental/tests/System/ReflectionTests.cs similarity index 100% rename from src/System.Utf8String/tests/System/ReflectionTests.cs rename to src/System.Utf8String.Experimental/tests/System/ReflectionTests.cs diff --git a/src/System.Utf8String/tests/System/Utf8ExtensionsTests.cs b/src/System.Utf8String.Experimental/tests/System/Utf8ExtensionsTests.cs similarity index 100% rename from src/System.Utf8String/tests/System/Utf8ExtensionsTests.cs rename to src/System.Utf8String.Experimental/tests/System/Utf8ExtensionsTests.cs diff --git a/src/System.Utf8String/tests/System/Utf8StringTests.Ctor.cs b/src/System.Utf8String.Experimental/tests/System/Utf8StringTests.Ctor.cs similarity index 100% rename from src/System.Utf8String/tests/System/Utf8StringTests.Ctor.cs rename to src/System.Utf8String.Experimental/tests/System/Utf8StringTests.Ctor.cs diff --git a/src/System.Utf8String/tests/System/Utf8StringTests.Substring.cs b/src/System.Utf8String.Experimental/tests/System/Utf8StringTests.Substring.cs similarity index 100% rename from src/System.Utf8String/tests/System/Utf8StringTests.Substring.cs rename to src/System.Utf8String.Experimental/tests/System/Utf8StringTests.Substring.cs diff --git a/src/System.Utf8String/tests/System/Utf8StringTests.cs b/src/System.Utf8String.Experimental/tests/System/Utf8StringTests.cs similarity index 100% rename from src/System.Utf8String/tests/System/Utf8StringTests.cs rename to src/System.Utf8String.Experimental/tests/System/Utf8StringTests.cs diff --git a/src/System.Utf8String/tests/System/Utf8TestUtilities.cs b/src/System.Utf8String.Experimental/tests/System/Utf8TestUtilities.cs similarity index 100% rename from src/System.Utf8String/tests/System/Utf8TestUtilities.cs rename to src/System.Utf8String.Experimental/tests/System/Utf8TestUtilities.cs diff --git a/src/System.Utf8String/tests/Xunit/SpanAssert.cs b/src/System.Utf8String.Experimental/tests/Xunit/SpanAssert.cs similarity index 100% rename from src/System.Utf8String/tests/Xunit/SpanAssert.cs rename to src/System.Utf8String.Experimental/tests/Xunit/SpanAssert.cs From 1ca2b2e2d5dc3004da75ff4d2cb3e3e3ec4427ec Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Wed, 13 Mar 2019 18:41:21 -0700 Subject: [PATCH 7/9] Add unit tests for Utf8StringContent --- ...ystem.Utf8String.Experimental.Tests.csproj | 1 + .../System/Net/Http/Utf8StringContentTests.cs | 44 +++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 src/System.Utf8String.Experimental/tests/System/Net/Http/Utf8StringContentTests.cs diff --git a/src/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj b/src/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj index c08cc5cfed84..477785d44616 100644 --- a/src/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj +++ b/src/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj @@ -14,6 +14,7 @@ + diff --git a/src/System.Utf8String.Experimental/tests/System/Net/Http/Utf8StringContentTests.cs b/src/System.Utf8String.Experimental/tests/System/Net/Http/Utf8StringContentTests.cs new file mode 100644 index 000000000000..87c31c6dce39 --- /dev/null +++ b/src/System.Utf8String.Experimental/tests/System/Net/Http/Utf8StringContentTests.cs @@ -0,0 +1,44 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.IO; +using System.Text; +using System.Threading.Tasks; +using Xunit; + +using static System.Tests.Utf8TestUtilities; + +namespace System.Net.Http.Tests +{ + public partial class Utf8StringContentTests + { + [Fact] + public static void Ctor_NullContent_Throws() + { + Assert.Throws("content", () => new Utf8StringContent(null)); + Assert.Throws("content", () => new Utf8StringContent(null, "application/json")); + } + + [Theory] + [InlineData(null, "text/plain")] + [InlineData("application/json", "application/json")] + public static void Ctor_SetsContentTypeHeader(string mediaTypeForCtor, string expectedMediaType) + { + HttpContent httpContent = new Utf8StringContent(u8("Hello"), mediaTypeForCtor); + + Assert.Equal(expectedMediaType, httpContent.Headers.ContentType.MediaType); + Assert.Equal(Encoding.UTF8.WebName, httpContent.Headers.ContentType.CharSet); + } + + [Fact] + public static async Task Ctor_GetStream() + { + MemoryStream memoryStream = new MemoryStream(); + + await new Utf8StringContent(u8("Hello")).CopyToAsync(memoryStream); + + Assert.Equal(u8("Hello").ToByteArray(), memoryStream.ToArray()); + } + } +} From ae519ceb3e1548860379290bc9d1a1f3e9fbe563 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Thu, 14 Mar 2019 15:07:46 -0700 Subject: [PATCH 8/9] Add ref asm and tests for Contains and friends Add ref asm for Contains and friends --- .../ref/System.Utf8String.cs | 8 ++ ...ystem.Utf8String.Experimental.Tests.csproj | 1 + .../tests/System/Utf8StringTests.Searching.cs | 97 +++++++++++++++++++ 3 files changed, 106 insertions(+) create mode 100644 src/System.Utf8String.Experimental/tests/System/Utf8StringTests.Searching.cs diff --git a/src/System.Utf8String.Experimental/ref/System.Utf8String.cs b/src/System.Utf8String.Experimental/ref/System.Utf8String.cs index f84272572246..7e9bc8f56e95 100644 --- a/src/System.Utf8String.Experimental/ref/System.Utf8String.cs +++ b/src/System.Utf8String.Experimental/ref/System.Utf8String.cs @@ -88,13 +88,21 @@ public Utf8String(string value) { } public Char8 this[int index] => throw null; public Utf8String this[Range range] => throw null; public int Length => throw null; + public bool Contains(char value) => throw null; + public bool Contains(System.Text.Rune value) => throw null; + public bool EndsWith(char value) => throw null; + public bool EndsWith(System.Text.Rune value) => throw null; public override bool Equals(object obj) => throw null; public bool Equals(Utf8String value) => throw null; public static bool Equals(Utf8String left, Utf8String right) => throw null; public override int GetHashCode() => throw null; [ComponentModel.EditorBrowsable(ComponentModel.EditorBrowsableState.Never)] // for compiler use only public ref readonly byte GetPinnableReference() => throw null; + public int IndexOf(char value) => throw null; + public int IndexOf(System.Text.Rune value) => throw null; public static bool IsNullOrEmpty(Utf8String value) => throw null; + public bool StartsWith(char value) => throw null; + public bool StartsWith(System.Text.Rune value) => throw null; public Utf8String Substring(Index startIndex) => throw null; public Utf8String Substring(int startIndex) => throw null; public Utf8String Substring(int startIndex, int length) => throw null; diff --git a/src/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj b/src/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj index 477785d44616..5ce4935e8ebf 100644 --- a/src/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj +++ b/src/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj @@ -19,6 +19,7 @@ + diff --git a/src/System.Utf8String.Experimental/tests/System/Utf8StringTests.Searching.cs b/src/System.Utf8String.Experimental/tests/System/Utf8StringTests.Searching.cs new file mode 100644 index 000000000000..3359cef7fa1d --- /dev/null +++ b/src/System.Utf8String.Experimental/tests/System/Utf8StringTests.Searching.cs @@ -0,0 +1,97 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Generic; +using System.Text; +using Xunit; + +using static System.Tests.Utf8TestUtilities; + +namespace System.Tests +{ + public unsafe partial class Utf8StringTests + { + [Theory] + [MemberData(nameof(IndexOfTestData))] + public static void Contains_And_IndexOf_CharRune_Ordinal(Utf8String utf8String, Rune searchValue, int expectedIndex) + { + // Contains + + if (searchValue.IsBmp) + { + Assert.Equal(expectedIndex >= 0, utf8String.Contains((char)searchValue.Value)); + } + Assert.Equal(expectedIndex >= 0, utf8String.Contains(searchValue)); + + // IndexOf + + if (searchValue.IsBmp) + { + Assert.Equal(expectedIndex, utf8String.IndexOf((char)searchValue.Value)); + } + Assert.Equal(expectedIndex, utf8String.IndexOf(searchValue)); + } + + [Theory] + [MemberData(nameof(IndexOfTestData))] + public static void StartsWith_And_EndsWith_CharRune_Ordinal(Utf8String utf8String, Rune searchValue, int expectedIndex) + { + // StartsWith + + if (searchValue.IsBmp) + { + Assert.Equal(expectedIndex == 0, utf8String.StartsWith((char)searchValue.Value)); + } + Assert.Equal(expectedIndex == 0, utf8String.StartsWith(searchValue)); + + // EndsWith + + bool endsWithExpectedValue = (expectedIndex >= 0) && (expectedIndex + searchValue.Utf8SequenceLength) == utf8String.Length; + + if (searchValue.IsBmp) + { + Assert.Equal(endsWithExpectedValue, utf8String.EndsWith((char)searchValue.Value)); + } + Assert.Equal(endsWithExpectedValue, utf8String.EndsWith(searchValue)); + } + + [Fact] + public static void Searching_StandaloneSurrogate_Fails() + { + Utf8String utf8String = u8("\ud800\udfff"); + + Assert.False(utf8String.Contains('\ud800')); + Assert.False(utf8String.Contains('\udfff')); + + Assert.Equal(-1, utf8String.IndexOf('\ud800')); + Assert.Equal(-1, utf8String.IndexOf('\udfff')); + + Assert.False(utf8String.StartsWith('\ud800')); + Assert.False(utf8String.StartsWith('\udfff')); + + Assert.False(utf8String.EndsWith('\ud800')); + Assert.False(utf8String.EndsWith('\udfff')); + } + + public static IEnumerable IndexOfTestData + { + get + { + yield return new object[] { Utf8String.Empty, default(Rune), -1 }; + yield return new object[] { u8("Hello"), (Rune)'H', 0 }; + yield return new object[] { u8("Hello"), (Rune)'h', -1 }; + yield return new object[] { u8("Hello"), (Rune)'O', -1 }; + yield return new object[] { u8("Hello"), (Rune)'o', 4 }; + yield return new object[] { u8("Hello"), (Rune)'L', -1 }; + yield return new object[] { u8("Hello"), (Rune)'l', 2 }; + yield return new object[] { u8("\U00012345\U0010ABCD"), (Rune)0x00012345, 0 }; + yield return new object[] { u8("\U00012345\U0010ABCD"), (Rune)0x0010ABCD, 4 }; + yield return new object[] { u8("abc\ufffdef"), (Rune)'c', 2 }; + yield return new object[] { u8("abc\ufffdef"), (Rune)'\ufffd', 3 }; + yield return new object[] { u8("abc\ufffdef"), (Rune)'d', -1 }; + yield return new object[] { u8("abc\ufffdef"), (Rune)'e', 6 }; + } + } + } +} From fbbc8b9244a16a585c8daddf6a566dc787862c33 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Mon, 25 Mar 2019 18:11:13 -0700 Subject: [PATCH 9/9] Fix ref asm project references --- .../ref/System.Utf8String.Experimental.csproj | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.csproj b/src/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.csproj index 083ec8bdeed3..6563d2c44bdc 100644 --- a/src/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.csproj +++ b/src/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.csproj @@ -8,9 +8,9 @@ - - - - + + + +