From 33225f14d8e766ec9b7b07fb81dbad2ba4204cb4 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Wed, 15 Apr 2020 18:46:34 -0700 Subject: [PATCH 1/6] Implement TranscodingStream --- .../System/Buffers/BoundedMemory.Windows.cs | 5 +- .../System/IO/StreamExtensions.cs | 21 + .../tests/TestUtilities/TestUtilities.csproj | 1 + .../System.Private.CoreLib.Shared.projitems | 1 + .../src/System/Text/Encoding.cs | 46 + .../src/System/Text/Rune.cs | 11 +- .../src/System/Text/TranscodingStream.cs | 598 ++++++++++++ .../System.Runtime/ref/System.Runtime.cs | 1 + .../tests/Encoding/TranscodingStreamTests.cs | 894 ++++++++++++++++++ .../tests/System.Text.Encoding.Tests.csproj | 4 + 10 files changed, 1574 insertions(+), 8 deletions(-) create mode 100644 src/libraries/Common/tests/TestUtilities/System/IO/StreamExtensions.cs create mode 100644 src/libraries/System.Private.CoreLib/src/System/Text/TranscodingStream.cs create mode 100644 src/libraries/System.Text.Encoding/tests/Encoding/TranscodingStreamTests.cs diff --git a/src/libraries/Common/tests/TestUtilities/System/Buffers/BoundedMemory.Windows.cs b/src/libraries/Common/tests/TestUtilities/System/Buffers/BoundedMemory.Windows.cs index 5de8ebe55432b5..8dbcb8c122bb48 100644 --- a/src/libraries/Common/tests/TestUtilities/System/Buffers/BoundedMemory.Windows.cs +++ b/src/libraries/Common/tests/TestUtilities/System/Buffers/BoundedMemory.Windows.cs @@ -191,10 +191,7 @@ protected override void Dispose(bool disposing) // no-op; the handle will be disposed separately } - public override Span GetSpan() - { - throw new NotImplementedException(); - } + public override Span GetSpan() => _impl.Span; public override MemoryHandle Pin(int elementIndex) { diff --git a/src/libraries/Common/tests/TestUtilities/System/IO/StreamExtensions.cs b/src/libraries/Common/tests/TestUtilities/System/IO/StreamExtensions.cs new file mode 100644 index 00000000000000..05a5b7e3ef1d4c --- /dev/null +++ b/src/libraries/Common/tests/TestUtilities/System/IO/StreamExtensions.cs @@ -0,0 +1,21 @@ +using System.Threading; +using System.Threading.Tasks; + +namespace System.IO +{ + public static class StreamExtensions + { + public static async Task ReadByteAsync(this Stream stream, CancellationToken cancellationToken = default) + { + byte[] buffer = new byte[1]; + + int numBytesRead = await stream.ReadAsync(buffer, 0, 1, cancellationToken); + if (numBytesRead == 0) + { + return -1; // EOF + } + + return buffer[0]; + } + } +} diff --git a/src/libraries/Common/tests/TestUtilities/TestUtilities.csproj b/src/libraries/Common/tests/TestUtilities/TestUtilities.csproj index 559cca7f20346b..de0759ec59e228 100644 --- a/src/libraries/Common/tests/TestUtilities/TestUtilities.csproj +++ b/src/libraries/Common/tests/TestUtilities/TestUtilities.csproj @@ -11,6 +11,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 8fe6a6dca457c8..d4c251665dcf5a 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -870,6 +870,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs index bbf5d62abea0bb..0efe07f2588caa 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs @@ -4,6 +4,7 @@ using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.IO; using System.Runtime.InteropServices; using System.Runtime.Serialization; @@ -1040,6 +1041,51 @@ value is Encoding that && public override int GetHashCode() => _codePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode(); + + /// + /// Creates a which serves to transcode data between an inner + /// and an outer , similar to . + /// + /// The to wrap. + /// The associated with . + /// The associated with the returned + /// by this method. + /// if disposing the returned by this method + /// should not dispose . + /// A which transcodes the contents of + /// as . + /// + /// The returned 's and properties + /// will reflect whether is readable or writable. If + /// is full-duplex, the returned will be as well. However, the returned + /// is not seekable, even if 's property returns . + /// + public static Stream CreateTranscodingStream(Stream innerStream, Encoding innerStreamEncoding, Encoding outerStreamEncoding, bool leaveOpen = false) + { + if (innerStream is null) + { + throw new ArgumentNullException(nameof(innerStream)); + } + + if (innerStreamEncoding is null) + { + throw new ArgumentNullException(nameof(innerStreamEncoding)); + } + + if (outerStreamEncoding is null) + { + throw new ArgumentNullException(nameof(outerStreamEncoding)); + } + + // We can't entirely optimize away the case where innerStreamEncoding == outerStreamEncoding. For example, + // the Encoding might perform a lossy conversion when it sees invalid data, so we still need to call it + // to perform basic validation. It's also possible that somebody subclassed one of the built-in types + // like ASCIIEncoding or UTF8Encoding and is running some non-standard logic. If this becomes a bottleneck + // we can consider targeted optimizations in a future release. + + return new TranscodingStream(innerStream, innerStreamEncoding, outerStreamEncoding, leaveOpen); + } + internal virtual char[] GetBestFitUnicodeToBytesData() => // Normally we don't have any best fit data. Array.Empty(); diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs index 3a703d661ff789..9ad17cd45fa7aa 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs @@ -20,6 +20,9 @@ namespace System.Text [DebuggerDisplay("{DebuggerDisplay,nq}")] public readonly struct Rune : IComparable, IEquatable { + internal const int MaxUtf16CharsPerRune = 2; // supplementary plane code points are encoded as 2 UTF-16 code units + internal const int MaxUtf8BytesPerRune = 4; // supplementary plane code points are encoded as 4 UTF-8 code units + private const char HighSurrogateStart = '\ud800'; private const char LowSurrogateStart = '\udc00'; private const int HighSurrogateRange = 0x3FF; @@ -185,8 +188,8 @@ private static Rune ChangeCaseCultureAware(Rune rune, TextInfo textInfo, bool to Debug.Assert(!GlobalizationMode.Invariant, "This should've been checked by the caller."); Debug.Assert(textInfo != null, "This should've been checked by the caller."); - Span original = stackalloc char[2]; // worst case scenario = 2 code units (for a surrogate pair) - Span modified = stackalloc char[2]; // case change should preserve UTF-16 code unit count + Span original = stackalloc char[MaxUtf16CharsPerRune]; + Span modified = stackalloc char[MaxUtf16CharsPerRune]; int charCount = rune.EncodeToUtf16(original); original = original.Slice(0, charCount); @@ -220,8 +223,8 @@ private static Rune ChangeCaseCultureAware(Rune rune, CultureInfo culture, bool Debug.Assert(!GlobalizationMode.Invariant, "This should've been checked by the caller."); Debug.Assert(culture != null, "This should've been checked by the caller."); - Span original = stackalloc char[2]; // worst case scenario = 2 code units (for a surrogate pair) - Span modified = stackalloc char[2]; // case change should preserve UTF-16 code unit count + Span original = stackalloc char[MaxUtf16CharsPerRune]; + Span modified = stackalloc char[MaxUtf16CharsPerRune]; int charCount = rune.EncodeToUtf16(original); original = original.Slice(0, charCount); diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/TranscodingStream.cs b/src/libraries/System.Private.CoreLib/src/System/Text/TranscodingStream.cs new file mode 100644 index 00000000000000..4110f490e25777 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Text/TranscodingStream.cs @@ -0,0 +1,598 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Buffers; +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.IO; +using System.Runtime.InteropServices; +using System.Threading; +using System.Threading.Tasks; + +namespace System.Text +{ + internal sealed class TranscodingStream : Stream + { + private const int DefaultReadByteBufferSize = 4 * 1024; // lifted from StreamReader.cs (FileStream) + + // We optimistically assume 1 byte ~ 1 char during transcoding. This is a good rule of thumb + // but isn't always appropriate: transcoding between single-byte and multi-byte encodings + // will violate this, as will any invalid data fixups performed by the transcoder itself. + // To account for these unknowns we have a minimum scratch buffer size we use during the + // transcoding process. This should be generous enough to account for even the largest + // fallback mechanism we're likely to see in the real world. + + private const int MinWriteRentedArraySize = 4 * 1024; + private const int MaxWriteRentedArraySize = 1024 * 1024; + + private readonly Encoding _innerEncoding; + private readonly Encoding _thisEncoding; + private Stream? _innerStream; // null if the wrapper has been disposed + private readonly bool _leaveOpen; + + /* + * Fields used for writing bytes [this] -> chars -> bytes [inner] + * Lazily initialized the first time we need to write + */ + + private Encoder? _innerEncoder; + private Decoder? _thisDecoder; + + /* + * Fields used for reading bytes [inner] -> chars -> bytes [this] + * Lazily initialized the first time we need to read + */ + + private Encoder? _thisEncoder; + private Decoder? _innerDecoder; + private int _readCharBufferMaxSize; // the maximum number of characters _innerDecoder.ReadChars can return + private ArraySegment _pendingReadData; // contains the data that Read() should return + + internal TranscodingStream(Stream innerStream, Encoding innerEncoding, Encoding thisEncoding, bool leaveOpen) + { + Debug.Assert(innerStream != null); + Debug.Assert(innerEncoding != null); + Debug.Assert(thisEncoding != null); + + _innerStream = innerStream; + _leaveOpen = leaveOpen; + + _innerEncoding = innerEncoding; + _thisEncoding = thisEncoding; + } + + /* + * Most CanXyz methods delegate to the inner stream, returning false + * if this instance has been disposed. CanSeek is always false. + */ + + public override bool CanRead => _innerStream?.CanRead ?? false; + + public override bool CanSeek => false; + + public override bool CanWrite => _innerStream?.CanWrite ?? false; + + public override long Length => throw new NotSupportedException(SR.NotSupported_UnseekableStream); + + public override long Position + { + get => throw new NotSupportedException(SR.NotSupported_UnseekableStream); + set => throw new NotSupportedException(SR.NotSupported_UnseekableStream); + } + + public override IAsyncResult BeginRead(byte[] buffer, int offset, int count, AsyncCallback? callback, object? state) + => TaskToApm.Begin(ReadAsync(buffer, offset, count, CancellationToken.None), callback, state); + + public override IAsyncResult BeginWrite(byte[] buffer, int offset, int count, AsyncCallback? callback, object? state) + => TaskToApm.Begin(WriteAsync(buffer, offset, count, CancellationToken.None), callback, state); + + protected override void Dispose(bool disposing) + { + Debug.Assert(disposing, "This type isn't finalizable."); + + if (_innerStream is null) + { + return; // dispose called multiple times, ignore + } + + // First, flush any pending data to the inner stream. + + ArraySegment pendingData = FinalFlushWriteBuffers(); + if (pendingData.Count != 0) + { + _innerStream.Write(pendingData); + } + + // Mark our object as disposed + + Stream innerStream = _innerStream; + _innerStream = null; + + // And dispose the inner stream if needed + + if (!_leaveOpen) + { + innerStream.Dispose(); + } + } + + public override ValueTask DisposeAsync() + { + if (_innerStream is null) + { + return default; // dispose called multiple times, ignore + } + + // First, get any pending data destined for the inner stream. + + ArraySegment pendingData = FinalFlushWriteBuffers(); + + if (pendingData.Count == 0) + { + // Fast path: just dispose of the object graph. + // No need to write anything to the stream first. + + Stream innerStream = _innerStream; + _innerStream = null; + + return (_leaveOpen) + ? default /* no work to do */ + : innerStream.DisposeAsync(); + } + + // Slower path; need to perform an async write followed by an async dispose. + + return DisposeAsyncCore(pendingData); + async ValueTask DisposeAsyncCore(ArraySegment pendingData) + { + Debug.Assert(pendingData.Count != 0); + + Stream innerStream = _innerStream; + + await innerStream.WriteAsync(pendingData.AsMemory()).ConfigureAwait(false); + _innerStream = null; + + if (!_leaveOpen) + { + await innerStream.DisposeAsync().ConfigureAwait(false); + } + } + } + + public override int EndRead(IAsyncResult asyncResult) + => TaskToApm.End(asyncResult); + + public override void EndWrite(IAsyncResult asyncResult) + => TaskToApm.End(asyncResult); + +#pragma warning disable CS3016 // Arrays as attribute arguments is not CLS-compliant +#pragma warning disable CS8774 // Member must have a non-null value when exiting. + + // Sets up the data structures that are necessary before any read operation takes place, + // throwing if the object is in a state where reads are not possible. + [MemberNotNull(nameof(_innerStream), nameof(_innerDecoder), nameof(_thisEncoder))] + private void EnsurePreReadConditions() + { + ThrowIfDisposed(); + if (_innerDecoder is null) + { + InitializeReadDataStructures(); + } + + void InitializeReadDataStructures() + { + if (!CanRead) + { + throw Error.GetReadNotSupported(); + } + + _innerDecoder = _innerEncoding.GetDecoder(); + _thisEncoder = _thisEncoding.GetEncoder(); + _readCharBufferMaxSize = _innerEncoding.GetMaxCharCount(DefaultReadByteBufferSize); + + // Can't use ArrayPool for the below array since it's an instance field of this object. + // But since we never expose the raw array contents to our callers we can get away + // with skipping the array zero-init during allocation. The segment points to the + // data which we haven't yet read; however, we own the entire backing array and can + // re-create the segment as needed once the array is repopulated. + + _pendingReadData = new ArraySegment(GC.AllocateUninitializedArray(_thisEncoding.GetMaxByteCount(_readCharBufferMaxSize)), 0, 0); + } + } + + // Sets up the data structures that are necessary before any write operation takes place, + // throwing if the object is in a state where writes are not possible. + [MemberNotNull(nameof(_innerStream), nameof(_thisDecoder), nameof(_innerEncoder))] + private void EnsurePreWriteConditions() + { + ThrowIfDisposed(); + if (_innerEncoder is null) + { + InitializeReadDataStructures(); + } + + void InitializeReadDataStructures() + { + if (!CanWrite) + { + throw Error.GetWriteNotSupported(); + } + + _innerEncoder = _innerEncoding.GetEncoder(); + _thisDecoder = _thisEncoding.GetDecoder(); + } + } + +#pragma warning restore CS8774 // Member must have a non-null value when exiting. +#pragma warning restore CS3016 // Arrays as attribute arguments is not CLS-compliant + + // returns any pending data that needs to be flushed to the inner stream before disposal + private ArraySegment FinalFlushWriteBuffers() + { + // If this stream was never used for writing, no-op. + + if (_thisDecoder is null || _innerEncoder is null) + { + return default; + } + + // Having leftover data in our buffers should be very rare since it should only + // occur if the end of the stream contains an incomplete multi-byte sequence. + // Let's not bother complicating this logic with array pool rentals or allocation- + // avoiding loops. + + // convert bytes [this] -> chars + + char[] chars = Array.Empty(); + int charCount = _thisDecoder.GetCharCount(Array.Empty(), 0, 0, flush: true); + if (charCount > 0) + { + chars = new char[charCount]; + charCount = _thisDecoder.GetChars(Array.Empty(), 0, 0, chars, 0, flush: true); + } + + // convert chars -> bytes [inner] + + byte[] bytes = Array.Empty(); + int byteCount = _innerEncoder.GetByteCount(chars, 0, charCount, flush: true); + if (byteCount > 0) + { + bytes = new byte[byteCount]; + byteCount = _innerEncoder.GetBytes(chars, 0, charCount, bytes, 0, flush: true); + } + + return new ArraySegment(bytes, 0, byteCount); + } + + public override void Flush() + { + // Don't pass flush: true to our inner decoder + encoder here, since it could cause data + // corruption if a flush occurs mid-stream. Wait until the stream is being closed. + + ThrowIfDisposed(); + _innerStream.Flush(); + } + + public override Task FlushAsync(CancellationToken cancellationToken) + { + // Don't pass flush: true to our inner decoder + encoder here, since it could cause data + // corruption if a flush occurs mid-stream. Wait until the stream is being closed. + + ThrowIfDisposed(); + return _innerStream.FlushAsync(cancellationToken); + } + + public override int Read(byte[] buffer, int offset, int count) + { + if (buffer is null) + { + throw new ArgumentNullException(nameof(buffer)); + } + + return Read(new Span(buffer, offset, count)); + } + + public override int Read(Span buffer) + { + EnsurePreReadConditions(); + + // If there's no data in our pending read buffer, we'll need to populate it from + // the inner stream. We read the inner stream's bytes, decode that to chars using + // the 'inner' encoding, then re-encode those chars under the 'this' encoding. + // We've already calculated the worst-case expansions for the intermediate buffers, + // so we use GetChars / GetBytes instead of Convert to simplify the below code + // and to ensure an exception is thrown if the Encoding reported an incorrect + // worst-case expansion. + + if (_pendingReadData.Count == 0) + { + byte[] rentedBytes = ArrayPool.Shared.Rent(DefaultReadByteBufferSize); + char[] rentedChars = ArrayPool.Shared.Rent(_readCharBufferMaxSize); + + try + { + // Beware: Use our constant value instead of 'rentedBytes.Length' for the count + // parameter below. The reason for this is that the array pool could've returned + // a larger-than-expected array, but our worst-case expansion calculations + // performed earlier didn't take that into account. + + int innerBytesReadJustNow = _innerStream.Read(rentedBytes, 0, DefaultReadByteBufferSize); + bool isEofReached = (innerBytesReadJustNow == 0); + + // convert bytes [inner] -> chars, then convert chars -> bytes [this] + + int charsDecodedJustNow = _innerDecoder.GetChars(rentedBytes, 0, innerBytesReadJustNow, rentedChars, 0, flush: isEofReached); + int pendingReadDataPopulatedJustNow = _thisEncoder.GetBytes(rentedChars, 0, charsDecodedJustNow, _pendingReadData.Array!, 0, flush: isEofReached); + + _pendingReadData = new ArraySegment(_pendingReadData.Array!, 0, pendingReadDataPopulatedJustNow); + } + finally + { + ArrayPool.Shared.Return(rentedBytes); + ArrayPool.Shared.Return(rentedChars); + } + } + + // At this point: (a) we've populated our pending read buffer and there's + // useful data to return to our caller; or (b) the pending read buffer is + // empty because the inner stream has reached EOF and all pending read data + // has already been flushed, and we should return 0. + + int bytesToReturn = Math.Min(_pendingReadData.Count, buffer.Length); + _pendingReadData.AsSpan(0, bytesToReturn).CopyTo(buffer); + _pendingReadData = _pendingReadData[bytesToReturn..]; + return bytesToReturn; + } + + public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) + { + if (buffer is null) + { + throw new ArgumentNullException(nameof(buffer)); + } + + return ReadAsync(new Memory(buffer, offset, count), cancellationToken).AsTask(); + } + + public override ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken) + { + EnsurePreReadConditions(); + + if (cancellationToken.IsCancellationRequested) + { + return new ValueTask(Task.FromCanceled(cancellationToken)); + } + + return ReadAsyncCore(buffer, cancellationToken); + async ValueTask ReadAsyncCore(Memory buffer, CancellationToken cancellationToken) + { + // If there's no data in our pending read buffer, we'll need to populate it from + // the inner stream. We read the inner stream's bytes, decode that to chars using + // the 'inner' encoding, then re-encode those chars under the 'this' encoding. + // We've already calculated the worst-case expansions for the intermediate buffers, + // so we use GetChars / GetBytes instead of Convert to simplify the below code + // and to ensure an exception is thrown if the Encoding reported an incorrect + // worst-case expansion. + + if (_pendingReadData.Count == 0) + { + byte[] rentedBytes = ArrayPool.Shared.Rent(DefaultReadByteBufferSize); + char[] rentedChars = ArrayPool.Shared.Rent(_readCharBufferMaxSize); + + try + { + // Beware: Use our constant value instead of 'rentedBytes.Length' when creating + // the Mem struct. The reason for this is that the array pool could've returned + // a larger-than-expected array, but our worst-case expansion calculations + // performed earlier didn't take that into account. + + int innerBytesReadJustNow = await _innerStream.ReadAsync(rentedBytes.AsMemory(0, DefaultReadByteBufferSize), cancellationToken).ConfigureAwait(false); + bool isEofReached = (innerBytesReadJustNow == 0); + + // convert bytes [inner] -> chars, then convert chars -> bytes [this] + + int charsDecodedJustNow = _innerDecoder.GetChars(rentedBytes, 0, innerBytesReadJustNow, rentedChars, 0, flush: isEofReached); + int pendingReadDataPopulatedJustNow = _thisEncoder.GetBytes(rentedChars, 0, charsDecodedJustNow, _pendingReadData.Array!, 0, flush: isEofReached); + + _pendingReadData = new ArraySegment(_pendingReadData.Array!, 0, pendingReadDataPopulatedJustNow); + } + finally + { + ArrayPool.Shared.Return(rentedBytes); + ArrayPool.Shared.Return(rentedChars); + } + } + + // At this point: (a) we've populated our pending read buffer and there's + // useful data to return to our caller; or (b) the pending read buffer is + // empty because the inner stream has reached EOF and all pending read data + // has already been flushed, and we should return 0. + + int bytesToReturn = Math.Min(_pendingReadData.Count, buffer.Length); + _pendingReadData.AsSpan(0, bytesToReturn).CopyTo(buffer.Span); + _pendingReadData = _pendingReadData[bytesToReturn..]; + return bytesToReturn; + } + } + + public override int ReadByte() + { + Span buffer = stackalloc byte[1]; + int bytesRead = Read(buffer); + return (bytesRead == 0) ? -1 /* EOF */ : buffer[0]; + } + + public override long Seek(long offset, SeekOrigin origin) + => throw new NotSupportedException(SR.NotSupported_UnseekableStream); + + public override void SetLength(long value) + => throw new NotSupportedException(SR.NotSupported_UnseekableStream); + + [StackTraceHidden] +#pragma warning disable CS3016 // Arrays as attribute arguments is not CLS-compliant + [MemberNotNull(new[] { nameof(_innerStream) })] +#pragma warning restore CS3016 // Arrays as attribute arguments is not CLS-compliant + private void ThrowIfDisposed() + { + if (_innerStream is null) + { + ThrowObjectDisposedException(); + } + } + + [DoesNotReturn] + [StackTraceHidden] + private void ThrowObjectDisposedException() + { + throw new ObjectDisposedException( + objectName: GetType().Name, + message: SR.ObjectDisposed_StreamClosed); + } + + public override void Write(byte[] buffer, int offset, int count) + { + if (buffer is null) + { + throw new ArgumentNullException(nameof(buffer)); + } + + Write(new ReadOnlySpan(buffer, offset, count)); + } + + public override void Write(ReadOnlySpan buffer) + { + EnsurePreWriteConditions(); + + int rentalLength = Math.Clamp(buffer.Length, MinWriteRentedArraySize, MaxWriteRentedArraySize); + + char[] rentedChars = ArrayPool.Shared.Rent(rentalLength); + byte[] rentedBytes = ArrayPool.Shared.Rent(rentalLength); + + try + { + WriteCore(buffer, rentedChars, rentedBytes); + } + finally + { + ArrayPool.Shared.Return(rentedChars); + ArrayPool.Shared.Return(rentedBytes); + } + + void WriteCore(ReadOnlySpan remainingOuterEncodedBytes, char[] scratchChars, byte[] scratchBytes) + { + bool decoderFinished, encoderFinished; + do + { + // convert bytes [this] -> chars + + _thisDecoder.Convert( + bytes: remainingOuterEncodedBytes, + chars: scratchChars, + flush: false, + out int bytesConsumed, + out int charsWritten, + out decoderFinished); + + remainingOuterEncodedBytes = remainingOuterEncodedBytes[bytesConsumed..]; + + // convert chars -> bytes [inner] + + Span decodedChars = scratchChars.AsSpan(..charsWritten); + + do + { + _innerEncoder.Convert( + chars: decodedChars, + bytes: scratchBytes, + flush: false, + out int charsConsumed, + out int bytesWritten, + out encoderFinished); + + decodedChars = decodedChars[charsConsumed..]; + + // It's more likely that the inner stream provides an optimized implementation of + // Write(byte[], ...) over Write(ROS), so we'll prefer the byte[]-based overloads. + + _innerStream.Write(scratchBytes, 0, bytesWritten); + } while (!encoderFinished); + } while (!decoderFinished); + } + } + + public override Task WriteAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) + { + if (buffer is null) + { + throw new ArgumentNullException(nameof(buffer)); + } + + return WriteAsync(new ReadOnlyMemory(buffer, offset, count), cancellationToken).AsTask(); + } + + public override ValueTask WriteAsync(ReadOnlyMemory buffer, CancellationToken cancellationToken) + { + EnsurePreWriteConditions(); + + if (cancellationToken.IsCancellationRequested) + { + return new ValueTask(Task.FromCanceled(cancellationToken)); + } + + return WriteAsyncCore(buffer, cancellationToken); + async ValueTask WriteAsyncCore(ReadOnlyMemory remainingOuterEncodedBytes, CancellationToken cancellationToken) + { + int rentalLength = Math.Clamp(remainingOuterEncodedBytes.Length, MinWriteRentedArraySize, MaxWriteRentedArraySize); + + char[] scratchChars = ArrayPool.Shared.Rent(rentalLength); + byte[] scratchBytes = ArrayPool.Shared.Rent(rentalLength); + + try + { + bool decoderFinished, encoderFinished; + do + { + // convert bytes [this] -> chars + + _thisDecoder.Convert( + bytes: remainingOuterEncodedBytes.Span, + chars: scratchChars, + flush: false, + out int bytesConsumed, + out int charsWritten, + out decoderFinished); + + remainingOuterEncodedBytes = remainingOuterEncodedBytes[bytesConsumed..]; + + // convert chars -> bytes [inner] + + ArraySegment decodedChars = new ArraySegment(scratchChars, 0, charsWritten); + + do + { + _innerEncoder.Convert( + chars: decodedChars, + bytes: scratchBytes, + flush: false, + out int charsConsumed, + out int bytesWritten, + out encoderFinished); + + decodedChars = decodedChars[charsConsumed..]; + await _innerStream.WriteAsync(new ReadOnlyMemory(scratchBytes, 0, bytesWritten), cancellationToken).ConfigureAwait(false); + } while (!encoderFinished); + } while (!decoderFinished); + } + finally + { + ArrayPool.Shared.Return(scratchChars); + ArrayPool.Shared.Return(scratchBytes); + } + } + } + + public override void WriteByte(byte value) + => Write(MemoryMarshal.CreateReadOnlySpan(ref value, 1)); + } +} diff --git a/src/libraries/System.Runtime/ref/System.Runtime.cs b/src/libraries/System.Runtime/ref/System.Runtime.cs index 92c33cee8bb966..18b38a9dac0134 100644 --- a/src/libraries/System.Runtime/ref/System.Runtime.cs +++ b/src/libraries/System.Runtime/ref/System.Runtime.cs @@ -10104,6 +10104,7 @@ protected Encoding(int codePage, System.Text.EncoderFallback? encoderFallback, S public virtual object Clone() { throw null; } public static byte[] Convert(System.Text.Encoding srcEncoding, System.Text.Encoding dstEncoding, byte[] bytes) { throw null; } public static byte[] Convert(System.Text.Encoding srcEncoding, System.Text.Encoding dstEncoding, byte[] bytes, int index, int count) { throw null; } + public static System.IO.Stream CreateTranscodingStream(System.IO.Stream innerStream, System.Text.Encoding innerStreamEncoding, System.Text.Encoding outerStreamEncoding, bool leaveOpen = false) { throw null; } public override bool Equals(object? value) { throw null; } [System.CLSCompliantAttribute(false)] public unsafe virtual int GetByteCount(char* chars, int count) { throw null; } diff --git a/src/libraries/System.Text.Encoding/tests/Encoding/TranscodingStreamTests.cs b/src/libraries/System.Text.Encoding/tests/Encoding/TranscodingStreamTests.cs new file mode 100644 index 00000000000000..8f029499ef34ad --- /dev/null +++ b/src/libraries/System.Text.Encoding/tests/Encoding/TranscodingStreamTests.cs @@ -0,0 +1,894 @@ +using System.Buffers; +using System.Collections.Generic; +using System.IO; +using System.Threading; +using System.Threading.Tasks; +using Moq; +using Xunit; + +namespace System.Text.Tests +{ + public class TranscodingStreamTests + { + public static IEnumerable ReadWriteTestBufferLengths + { + get + { + yield return new object[] { 1 }; + yield return new object[] { 4 * 1024 }; + yield return new object[] { 128 * 1024 }; + yield return new object[] { 2 * 1024 * 1024 }; + } + } + + [Fact] + public void AsyncMethods_ReturnCanceledTaskIfCancellationTokenTripped() + { + // Arrange + + CancellationTokenSource cts = new CancellationTokenSource(); + CancellationToken expectedCancellationToken = cts.Token; + cts.Cancel(); + + var innerStreamMock = new Mock(MockBehavior.Strict); // only CanRead/CanWrite should ever be invoked + innerStreamMock.Setup(o => o.CanRead).Returns(true); + innerStreamMock.Setup(o => o.CanWrite).Returns(true); + + Stream transcodingStream = Encoding.CreateTranscodingStream(innerStreamMock.Object, Encoding.UTF8, Encoding.UTF8, leaveOpen: true); + + // Act & assert + + RunTest(() => transcodingStream.ReadAsync(new byte[0], 0, 0, expectedCancellationToken)); + RunTest(() => transcodingStream.ReadAsync(Memory.Empty, expectedCancellationToken).AsTask()); + RunTest(() => transcodingStream.WriteAsync(new byte[0], 0, 0, expectedCancellationToken)); + RunTest(() => transcodingStream.WriteAsync(ReadOnlyMemory.Empty, expectedCancellationToken).AsTask()); + + void RunTest(Func callback) + { + Task task = callback(); + Assert.True(task.IsCanceled); + Assert.Equal(expectedCancellationToken, Assert.Throws(() => task.GetAwaiter().GetResult()).CancellationToken); + } + } + + [Fact] + public void CreateTranscodingStream_InvalidArgs() + { + Assert.Throws("innerStream", () => Encoding.CreateTranscodingStream(null, Encoding.UTF8, Encoding.UTF8)); + Assert.Throws("innerStreamEncoding", () => Encoding.CreateTranscodingStream(Stream.Null, null, Encoding.UTF8)); + Assert.Throws("outerStreamEncoding", () => Encoding.CreateTranscodingStream(Stream.Null, Encoding.UTF8, null)); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public void CanRead_DelegatesToInnerStream(bool expectedCanRead) + { + // Arrange + + var innerStreamMock = new Mock(); + innerStreamMock.Setup(o => o.CanRead).Returns(expectedCanRead); + Stream transcodingStream = Encoding.CreateTranscodingStream(innerStreamMock.Object, Encoding.UTF8, Encoding.UTF8, leaveOpen: true); + + // Act + + bool actualCanReadBeforeDispose = transcodingStream.CanRead; + transcodingStream.Dispose(); + bool actualCanReadAfterDispose = transcodingStream.CanRead; + + // Assert + + Assert.Equal(expectedCanRead, actualCanReadBeforeDispose); + Assert.False(actualCanReadAfterDispose); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public void CanWrite_DelegatesToInnerStream(bool expectedCanWrite) + { + // Arrange + + var innerStreamMock = new Mock(); + innerStreamMock.Setup(o => o.CanWrite).Returns(expectedCanWrite); + Stream transcodingStream = Encoding.CreateTranscodingStream(innerStreamMock.Object, Encoding.UTF8, Encoding.UTF8, leaveOpen: true); + + // Act + + bool actualCanWriteBeforeDispose = transcodingStream.CanWrite; + transcodingStream.Dispose(); + bool actualCanWriteAfterDispose = transcodingStream.CanWrite; + + // Assert + + Assert.Equal(expectedCanWrite, actualCanWriteBeforeDispose); + Assert.False(actualCanWriteAfterDispose); + } + + [Fact] + public void Dispose_MakesMostSubsequentOperationsThrow() + { + // Arrange + + MemoryStream innerStream = new MemoryStream(); + Stream transcodingStream = Encoding.CreateTranscodingStream(innerStream, Encoding.UTF8, Encoding.UTF8, leaveOpen: true); + + // Act + + transcodingStream.Dispose(); + + // Assert + // For Task/ValueTask-returning methods, we want the exception to be thrown synchronously. + + Assert.False(transcodingStream.CanRead); + Assert.False(transcodingStream.CanSeek); + Assert.False(transcodingStream.CanWrite); + + Assert.Throws(() => (object)transcodingStream.BeginRead(new byte[0], 0, 0, null, null)); + Assert.Throws(() => (object)transcodingStream.BeginWrite(new byte[0], 0, 0, null, null)); + Assert.Throws(() => transcodingStream.Flush()); + Assert.Throws(() => (object)transcodingStream.FlushAsync()); + Assert.Throws(() => (object)transcodingStream.Read(new byte[0], 0, 0)); + Assert.Throws(() => (object)transcodingStream.Read(Span.Empty)); + Assert.Throws(() => (object)transcodingStream.ReadAsync(new byte[0], 0, 0)); + Assert.Throws(() => (object)transcodingStream.ReadAsync(Memory.Empty)); + Assert.Throws(() => (object)transcodingStream.ReadByte()); + Assert.Throws(() => transcodingStream.Write(new byte[0], 0, 0)); + Assert.Throws(() => transcodingStream.Write(ReadOnlySpan.Empty)); + Assert.Throws(() => (object)transcodingStream.WriteAsync(new byte[0], 0, 0)); + Assert.Throws(() => (object)transcodingStream.WriteAsync(ReadOnlyMemory.Empty)); + Assert.Throws(() => transcodingStream.WriteByte((byte)'x')); + } + + [Fact] + public void Dispose_WithLeaveOpenFalse_DisposesInnerStream() + { + // Sync + + MemoryStream innerStream = new MemoryStream(); + Stream transcodingStream = Encoding.CreateTranscodingStream(innerStream, Encoding.UTF8, Encoding.UTF8, leaveOpen: false); + transcodingStream.Dispose(); + transcodingStream.Dispose(); // calling it a second time should no-op + Assert.Throws(() => innerStream.Read(Span.Empty)); + + // Async + + innerStream = new MemoryStream(); + transcodingStream = Encoding.CreateTranscodingStream(innerStream, Encoding.UTF8, Encoding.UTF8, leaveOpen: false); + transcodingStream.DisposeAsync().GetAwaiter().GetResult(); + transcodingStream.DisposeAsync().GetAwaiter().GetResult(); // calling it a second time should no-op + Assert.Throws(() => innerStream.Read(Span.Empty)); + } + + [Fact] + public void Dispose_WithLeaveOpenTrue_DoesNotDisposeInnerStream() + { + // Sync + + MemoryStream innerStream = new MemoryStream(); + Stream transcodingStream = Encoding.CreateTranscodingStream(innerStream, Encoding.UTF8, Encoding.UTF8, leaveOpen: true); + transcodingStream.Dispose(); + transcodingStream.Dispose(); // calling it a second time should no-op + innerStream.Read(Span.Empty); // shouldn't throw + + // Async + + innerStream = new MemoryStream(); + transcodingStream = Encoding.CreateTranscodingStream(innerStream, Encoding.UTF8, Encoding.UTF8, leaveOpen: true); + transcodingStream.DisposeAsync().GetAwaiter().GetResult(); + transcodingStream.DisposeAsync().GetAwaiter().GetResult(); // calling it a second time should no-op + innerStream.Read(Span.Empty); // shouldn't throw + } + + [Fact] + public void Flush_FlushesInnerStreamButNotDecodedState() + { + // Arrange + + CancellationToken expectedCancellationToken = new CancellationTokenSource().Token; + Task expectedFlushAsyncTask = Task.FromResult("just some task"); + + var innerStreamMock = new Mock() { CallBase = true }; + innerStreamMock.Setup(o => o.FlushAsync(expectedCancellationToken)).Returns(expectedFlushAsyncTask); + Stream transcodingStream = Encoding.CreateTranscodingStream(innerStreamMock.Object, Encoding.UTF8, Encoding.UTF8, leaveOpen: true); + + transcodingStream.Write(new byte[] { 0x7A, 0xE0 }); + innerStreamMock.Verify(o => o.Flush(), Times.Never); + innerStreamMock.Verify(o => o.FlushAsync(It.IsAny()), Times.Never); + + // Act & assert - sync flush + + transcodingStream.Flush(); + innerStreamMock.Verify(o => o.Flush(), Times.Once); + innerStreamMock.Verify(o => o.FlushAsync(It.IsAny()), Times.Never); + + // Act & assert - async flush + // This also validates that we flowed the CancellationToken as expected + + Task actualFlushAsyncReturnedTask = transcodingStream.FlushAsync(expectedCancellationToken); + Assert.Same(expectedFlushAsyncTask, actualFlushAsyncReturnedTask); + innerStreamMock.Verify(o => o.Flush(), Times.Once); + innerStreamMock.Verify(o => o.FlushAsync(expectedCancellationToken), Times.Once); + + Assert.Equal("z", Encoding.UTF8.GetString(innerStreamMock.Object.ToArray())); // [ E0 ] shouldn't have been flushed + } + + [Fact] + public void IdenticalInnerAndOuterEncodings_DoesNotActAsPassthrough() + { + // Test read + // [ C0 ] is never a valid UTF-8 byte, should be replaced with U+FFFD + + MemoryStream innerStream = new MemoryStream(new byte[] { 0xC0 }); + Stream transcodingStream = Encoding.CreateTranscodingStream(innerStream, Encoding.UTF8, Encoding.UTF8); + + Assert.Equal(0xEF, transcodingStream.ReadByte()); + Assert.Equal(0xBF, transcodingStream.ReadByte()); + Assert.Equal(0xBD, transcodingStream.ReadByte()); + Assert.Equal(-1 /* eof */, transcodingStream.ReadByte()); + + // Test write + + innerStream = new MemoryStream(); + transcodingStream = Encoding.CreateTranscodingStream(innerStream, Encoding.UTF8, Encoding.UTF8); + transcodingStream.WriteByte(0xC0); + Assert.Equal(new byte[] { 0xEF, 0xBF, 0xBD }, innerStream.ToArray()); + } + + [Theory] + [MemberData(nameof(ReadWriteTestBufferLengths))] + public void Read_ByteArray(int bufferLength) + { + // Tests TranscodingStream.Read(byte[], int, int) + + byte[] buffer = new byte[bufferLength + 3]; + + RunReadTest((transcodingStream, sink) => + { + int numBytesRead = transcodingStream.Read(buffer, 1, bufferLength); + Assert.True(numBytesRead >= 0); + Assert.True(numBytesRead <= bufferLength); + + sink.Write(buffer, 1, numBytesRead); + return numBytesRead; + }); + } + + [Fact] + public void Read_ByteArray_WithInvalidArgs_Throws() + { + Stream transcodingStream = Encoding.CreateTranscodingStream(new MemoryStream(), Encoding.UTF8, Encoding.UTF8); + + Assert.Throws("buffer", () => transcodingStream.Read(null, 0, 0)); + Assert.Throws(() => transcodingStream.Read(new byte[5], -1, -1)); + Assert.Throws(() => transcodingStream.Read(new byte[5], 3, -1)); + Assert.Throws(() => transcodingStream.Read(new byte[5], 5, 1)); + Assert.Throws(() => transcodingStream.Read(new byte[5], 6, -1)); + Assert.Throws(() => transcodingStream.Read(new byte[5], 6, 0)); + } + + [Fact] + public void Read_ByteByByte() + { + // Tests TranscodingStream.ReadByte + + RunReadTest((transcodingStream, sink) => + { + int value = transcodingStream.ReadByte(); + if (value < 0) + { + return 0; + } + + sink.WriteByte(checked((byte)value)); + return 1; + }); + } + + [Theory] + [MemberData(nameof(ReadWriteTestBufferLengths))] + public void Read_Span(int bufferLength) + { + // Tests TranscodingStream.Read(Span) + + using BoundedMemory boundedMemoryBuffer = BoundedMemory.Allocate(bufferLength); + + RunReadTest((transcodingStream, sink) => + { + int numBytesRead = transcodingStream.Read(boundedMemoryBuffer.Span); + Assert.True(numBytesRead >= 0); + Assert.True(numBytesRead <= bufferLength); + + sink.Write(boundedMemoryBuffer.Span[0..numBytesRead]); + return numBytesRead; + }); + } + + private void RunReadTest(Func callback) + { + MemoryStream sink = new MemoryStream(); + + MemoryStream innerStream = new MemoryStream(); + Stream transcodingStream = Encoding.CreateTranscodingStream(innerStream, + innerStreamEncoding: Encoding.UTF8, + outerStreamEncoding: CustomAsciiEncoding); + + // Test with a small string, then test with a large string + + RunOneTestIteration(128); + RunOneTestIteration(10 * 1024 * 1024); + + Assert.Equal(-1, transcodingStream.ReadByte()); // should've reached EOF + + // Now put some invalid data into the inner stream as EOF. + + innerStream.SetLength(0); // reset + innerStream.WriteByte(0xC0); + innerStream.Position = 0; + + sink.SetLength(0); // reset + int numBytesReadJustNow; + do + { + numBytesReadJustNow = callback(transcodingStream, sink); + Assert.True(numBytesReadJustNow >= 0); + } while (numBytesReadJustNow > 0); + + Assert.Equal("[FFFD]", ErrorCheckingAsciiEncoding.GetString(sink.ToArray())); + Assert.Equal(-1, transcodingStream.ReadByte()); // should've reached EOF + + void RunOneTestIteration(int stringLength) + { + sink.SetLength(0); // reset + + string expectedStringContents = GetVeryLongAsciiString(stringLength); + innerStream.SetLength(0); // reset + innerStream.Write(Encoding.UTF8.GetBytes(expectedStringContents)); + innerStream.Position = 0; + + int numBytesReadJustNow; + do + { + numBytesReadJustNow = callback(transcodingStream, sink); + Assert.True(numBytesReadJustNow >= 0); + } while (numBytesReadJustNow > 0); + + Assert.Equal(expectedStringContents, ErrorCheckingAsciiEncoding.GetString(sink.ToArray())); + } + } + + [Fact] + public Task ReadApm() + { + // Tests TranscodingStream.BeginRead / EndRead + + byte[] buffer = new byte[1024 * 1024]; + + return RunReadTestAsync((transcodingStream, cancellationToken, sink) => + { + TaskCompletionSource tcs = new TaskCompletionSource(); + object expectedState = new object(); + + try + { + IAsyncResult asyncResult = transcodingStream.BeginRead(buffer, 1, buffer.Length - 2, (asyncResult) => + { + try + { + int numBytesReadJustNow = transcodingStream.EndRead(asyncResult); + Assert.True(numBytesReadJustNow >= 0); + Assert.True(numBytesReadJustNow < buffer.Length - 3); + sink.Write(buffer, 1, numBytesReadJustNow); + tcs.SetResult(numBytesReadJustNow); + } + catch (Exception ex) + { + tcs.SetException(ex); + } + }, expectedState); + Assert.Same(expectedState, asyncResult.AsyncState); + } + catch (Exception ex) + { + tcs.SetException(ex); + } + + return new ValueTask(tcs.Task); + }, + suppressExpectedCancellationTokenAsserts: true); // APM pattern doesn't allow flowing CancellationToken + } + + [Theory] + [MemberData(nameof(ReadWriteTestBufferLengths))] + public Task ReadAsync_ByteArray(int bufferLength) + { + // Tests TranscodingStream.ReadAsync(byte[], int, int, CancellationToken) + + byte[] buffer = new byte[bufferLength + 3]; + + return RunReadTestAsync(async (transcodingStream, cancellationToken, sink) => + { + int numBytesRead = await transcodingStream.ReadAsync(buffer, 1, bufferLength, cancellationToken); + Assert.True(numBytesRead >= 0); + Assert.True(numBytesRead <= bufferLength); + + sink.Write(buffer, 1, numBytesRead); + return numBytesRead; + }); + } + + [Theory] + [MemberData(nameof(ReadWriteTestBufferLengths))] + public async Task ReadAsync_Memory(int bufferLength) + { + // Tests TranscodingStream.ReadAsync(Memory, CancellationToken) + + using BoundedMemory boundedMemoryBuffer = BoundedMemory.Allocate(bufferLength); + + await RunReadTestAsync(async (transcodingStream, cancellationToken, sink) => + { + int numBytesRead = await transcodingStream.ReadAsync(boundedMemoryBuffer.Memory, cancellationToken); + Assert.True(numBytesRead >= 0); + Assert.True(numBytesRead <= bufferLength); + + sink.Write(boundedMemoryBuffer.Span[0..numBytesRead]); + return numBytesRead; + }); + } + + [Fact] + public void ReadAsync_WithInvalidArgs_Throws() + { + Stream transcodingStream = Encoding.CreateTranscodingStream(new MemoryStream(), Encoding.UTF8, Encoding.UTF8); + + Assert.Throws("buffer", () => (object)transcodingStream.ReadAsync(null, 0, 0)); + Assert.Throws(() => (object)transcodingStream.ReadAsync(new byte[5], -1, -1)); + Assert.Throws(() => (object)transcodingStream.ReadAsync(new byte[5], 3, -1)); + Assert.Throws(() => (object)transcodingStream.ReadAsync(new byte[5], 5, 1)); + Assert.Throws(() => (object)transcodingStream.ReadAsync(new byte[5], 6, -1)); + Assert.Throws(() => (object)transcodingStream.ReadAsync(new byte[5], 6, 0)); + } + + [Fact] + public void ReadApm_WithInvalidArgs_Throws() + { + Stream transcodingStream = Encoding.CreateTranscodingStream(new MemoryStream(), Encoding.UTF8, Encoding.UTF8); + + Assert.Throws("buffer", () => transcodingStream.BeginRead(null, 0, 0, null, null)); + Assert.Throws(() => transcodingStream.BeginRead(new byte[5], -1, -1, null, null)); + Assert.Throws(() => transcodingStream.BeginRead(new byte[5], 3, -1, null, null)); + Assert.Throws(() => transcodingStream.BeginRead(new byte[5], 5, 1, null, null)); + Assert.Throws(() => transcodingStream.BeginRead(new byte[5], 6, -1, null, null)); + Assert.Throws(() => transcodingStream.BeginRead(new byte[5], 6, 0, null, null)); + } + + private async Task RunReadTestAsync(Func> callback, bool suppressExpectedCancellationTokenAsserts = false) + { + CancellationToken expectedCancellationToken = new CancellationTokenSource().Token; + MemoryStream sink = new MemoryStream(); + MemoryStream innerStream = new MemoryStream(); + + var delegatingInnerStreamMock = new Mock(MockBehavior.Strict); + delegatingInnerStreamMock.Setup(o => o.CanRead).Returns(true); + + if (suppressExpectedCancellationTokenAsserts) + { + delegatingInnerStreamMock.Setup(o => o.ReadAsync(It.IsAny>(), It.IsAny())) + .Returns, CancellationToken>(innerStream.ReadAsync); + } + else + { + delegatingInnerStreamMock.Setup(o => o.ReadAsync(It.IsAny>(), expectedCancellationToken)) + .Returns, CancellationToken>(innerStream.ReadAsync); + } + + Stream transcodingStream = Encoding.CreateTranscodingStream( + innerStream: delegatingInnerStreamMock.Object, + innerStreamEncoding: Encoding.UTF8, + outerStreamEncoding: CustomAsciiEncoding); + + // Test with a small string, then test with a large string + + await RunOneTestIteration(128); + await RunOneTestIteration(10 * 1024 * 1024); + + Assert.Equal(-1, await transcodingStream.ReadByteAsync(expectedCancellationToken)); // should've reached EOF + + // Now put some invalid data into the inner stream as EOF. + + innerStream.SetLength(0); // reset + innerStream.WriteByte(0xC0); + innerStream.Position = 0; + + sink.SetLength(0); // reset + int numBytesReadJustNow; + do + { + numBytesReadJustNow = await callback(transcodingStream, expectedCancellationToken, sink); + Assert.True(numBytesReadJustNow >= 0); + } while (numBytesReadJustNow > 0); + + Assert.Equal("[FFFD]", ErrorCheckingAsciiEncoding.GetString(sink.ToArray())); + Assert.Equal(-1, await transcodingStream.ReadByteAsync(expectedCancellationToken)); // should've reached EOF + + async Task RunOneTestIteration(int stringLength) + { + sink.SetLength(0); // reset + + string expectedStringContents = GetVeryLongAsciiString(stringLength); + innerStream.SetLength(0); // reset + innerStream.Write(Encoding.UTF8.GetBytes(expectedStringContents)); + innerStream.Position = 0; + + int numBytesReadJustNow; + do + { + numBytesReadJustNow = await callback(transcodingStream, expectedCancellationToken, sink); + Assert.True(numBytesReadJustNow >= 0); + } while (numBytesReadJustNow > 0); + + Assert.Equal(expectedStringContents, ErrorCheckingAsciiEncoding.GetString(sink.ToArray())); + } + } + + [Fact] + public void ReadTimeout_WriteTimeout_NotSupported() + { + // Arrange - allow inner stream to support ReadTimeout + WriteTimeout + + var innerStreamMock = new Mock(); + innerStreamMock.SetupProperty(o => o.ReadTimeout); + innerStreamMock.SetupProperty(o => o.WriteTimeout); + Stream transcodingStream = Encoding.CreateTranscodingStream(Stream.Null, Encoding.UTF8, Encoding.UTF8, leaveOpen: true); + + // Act & assert - TranscodingStream shouldn't support ReadTimeout + WriteTimeout + + Assert.False(transcodingStream.CanTimeout); + Assert.Throws(() => transcodingStream.ReadTimeout); + Assert.Throws(() => transcodingStream.ReadTimeout = 42); + Assert.Throws(() => transcodingStream.WriteTimeout); + Assert.Throws(() => transcodingStream.WriteTimeout = 42); + } + + [Fact] + public void Seek_AlwaysThrows() + { + // MemoryStream is seekable, but we're not + Stream transcodingStream = Encoding.CreateTranscodingStream(new MemoryStream(), Encoding.UTF8, Encoding.UTF8); + + Assert.False(transcodingStream.CanSeek); + Assert.Throws(() => transcodingStream.Length); + Assert.Throws(() => transcodingStream.Position); + Assert.Throws(() => transcodingStream.Position = 0); + Assert.Throws(() => transcodingStream.Seek(0, SeekOrigin.Current)); + Assert.Throws(() => transcodingStream.SetLength(0)); + } + + [Fact] + public void Write() + { + MemoryStream innerStream = new MemoryStream(); + Stream transcodingStream = Encoding.CreateTranscodingStream( + innerStream, + innerStreamEncoding: ErrorCheckingUnicodeEncoding /* throws on error */, + outerStreamEncoding: Encoding.UTF8 /* performs substition */, + leaveOpen: true); + + // First, test Write(byte[], int, int) + + transcodingStream.Write(Encoding.UTF8.GetBytes("abcdefg"), 2, 3); + Assert.Equal("cde", ErrorCheckingUnicodeEncoding.GetString(innerStream.ToArray())); + + // Then test WriteByte(byte) + + transcodingStream.WriteByte((byte)'z'); + Assert.Equal("cdez", ErrorCheckingUnicodeEncoding.GetString(innerStream.ToArray())); + + // We'll write U+00E0 (utf-8: [C3 A0]) byte-by-byte. + // We shouldn't flush any intermediate bytes. + + transcodingStream.WriteByte((byte)0xC3); + Assert.Equal("cdez", ErrorCheckingUnicodeEncoding.GetString(innerStream.ToArray())); + + transcodingStream.WriteByte((byte)0xA0); + Assert.Equal("cdez\u00E0", ErrorCheckingUnicodeEncoding.GetString(innerStream.ToArray())); + + innerStream.SetLength(0); // reset inner stream + + // Then test Write(ROS), once with a short string and once with a long string + + string asciiString = GetVeryLongAsciiString(128); + byte[] asciiBytesAsUtf8 = Encoding.UTF8.GetBytes(asciiString); + transcodingStream.Write(asciiBytesAsUtf8.AsSpan()); + Assert.Equal(asciiString, ErrorCheckingUnicodeEncoding.GetString(innerStream.ToArray())); + + innerStream.SetLength(0); // reset inner stream + + asciiString = GetVeryLongAsciiString(16 * 1024 * 1024); + asciiBytesAsUtf8 = Encoding.UTF8.GetBytes(asciiString); + transcodingStream.Write(asciiBytesAsUtf8.AsSpan()); + Assert.Equal(asciiString, ErrorCheckingUnicodeEncoding.GetString(innerStream.ToArray())); + + innerStream.SetLength(0); // reset inner stream + + // Close the outer stream and ensure no leftover data was written to the inner stream + + transcodingStream.Close(); + Assert.Equal(0, innerStream.Position); + } + + [Fact] + public void Write_WithPartialData() + { + MemoryStream innerStream = new MemoryStream(); + Stream transcodingStream = Encoding.CreateTranscodingStream( + innerStream, + innerStreamEncoding: CustomAsciiEncoding /* performs custom substitution */, + outerStreamEncoding: Encoding.UTF8 /* performs U+FFFD substition */, + leaveOpen: true); + + // First, write some incomplete data + + transcodingStream.Write(new byte[] { 0x78, 0x79, 0x7A, 0xC3 }); // [C3] shouldn't be flushed yet + Assert.Equal("xyz", ErrorCheckingAsciiEncoding.GetString(innerStream.ToArray())); + + // Flushing should have no effect + + transcodingStream.Flush(); + Assert.Equal("xyz", ErrorCheckingAsciiEncoding.GetString(innerStream.ToArray())); + + // Provide the second byte of the multi-byte sequence + + transcodingStream.WriteByte(0xA0); // [C3 A0] = U+00E0 + Assert.Equal("xyz[00E0]", ErrorCheckingAsciiEncoding.GetString(innerStream.ToArray())); + + // Provide an incomplete sequence, then close the stream. + // Closing the stream should flush the underlying buffers and write the replacement char. + + transcodingStream.Write(new byte[] { 0xE0, 0xBF }); // first 2 bytes of incomplete 3-byte sequence + Assert.Equal("xyz[00E0]", ErrorCheckingAsciiEncoding.GetString(innerStream.ToArray())); // wasn't flushed yet + + transcodingStream.Close(); + Assert.Equal("xyz[00E0][FFFD]", ErrorCheckingAsciiEncoding.GetString(innerStream.ToArray())); + } + + [Fact] + public void Write_WithInvalidArgs_Throws() + { + Stream transcodingStream = Encoding.CreateTranscodingStream(new MemoryStream(), Encoding.UTF8, Encoding.UTF8); + + Assert.Throws("buffer", () => transcodingStream.Write(null, 0, 0)); + Assert.Throws(() => transcodingStream.Write(new byte[5], -1, -1)); + Assert.Throws(() => transcodingStream.Write(new byte[5], 3, -1)); + Assert.Throws(() => transcodingStream.Write(new byte[5], 5, 1)); + Assert.Throws(() => transcodingStream.Write(new byte[5], 6, -1)); + Assert.Throws(() => transcodingStream.Write(new byte[5], 6, 0)); + } + + [Fact] + public async Task WriteAsync() + { + MemoryStream sink = new MemoryStream(); + CancellationToken expectedFlushAsyncCancellationToken = new CancellationTokenSource().Token; + CancellationToken expectedWriteAsyncCancellationToken = new CancellationTokenSource().Token; + + var innerStreamMock = new Mock(MockBehavior.Strict); + innerStreamMock.Setup(o => o.CanWrite).Returns(true); + innerStreamMock.Setup(o => o.WriteAsync(It.IsAny>(), expectedWriteAsyncCancellationToken)) + .Returns, CancellationToken>(sink.WriteAsync); + innerStreamMock.Setup(o => o.FlushAsync(expectedFlushAsyncCancellationToken)).Returns(Task.CompletedTask); + + Stream transcodingStream = Encoding.CreateTranscodingStream( + innerStreamMock.Object, + innerStreamEncoding: ErrorCheckingUnicodeEncoding, + outerStreamEncoding: Encoding.UTF8 /* performs U+FFFD substition */, + leaveOpen: true); + + // First, test WriteAsync(byte[], int, int, CancellationToken) + + await transcodingStream.WriteAsync(Encoding.UTF8.GetBytes("abcdefg"), 2, 3, expectedWriteAsyncCancellationToken); + Assert.Equal("cde", ErrorCheckingUnicodeEncoding.GetString(sink.ToArray())); + + // We'll write U+00E0 (utf-8: [C3 A0]) byte-by-byte. + // We shouldn't flush any intermediate bytes. + + await transcodingStream.WriteAsync(new byte[] { 0xC3, 0xA0 }, 0, 1, expectedWriteAsyncCancellationToken); + await transcodingStream.FlushAsync(expectedFlushAsyncCancellationToken); + Assert.Equal("cde", ErrorCheckingUnicodeEncoding.GetString(sink.ToArray())); + + await transcodingStream.WriteAsync(new byte[] { 0xC3, 0xA0 }, 1, 1, expectedWriteAsyncCancellationToken); + Assert.Equal("cde\u00E0", ErrorCheckingUnicodeEncoding.GetString(sink.ToArray())); + + sink.SetLength(0); // reset sink + + // Then test WriteAsync(ROM, CancellationToken), once with a short string and once with a long string + + string asciiString = GetVeryLongAsciiString(128); + byte[] asciiBytesAsUtf8 = Encoding.UTF8.GetBytes(asciiString); + await transcodingStream.WriteAsync(asciiBytesAsUtf8.AsMemory(), expectedWriteAsyncCancellationToken); + Assert.Equal(asciiString, ErrorCheckingUnicodeEncoding.GetString(sink.ToArray())); + + sink.SetLength(0); // reset sink + + asciiString = GetVeryLongAsciiString(16 * 1024 * 1024); + asciiBytesAsUtf8 = Encoding.UTF8.GetBytes(asciiString); + await transcodingStream.WriteAsync(asciiBytesAsUtf8.AsMemory(), expectedWriteAsyncCancellationToken); + Assert.Equal(asciiString, ErrorCheckingUnicodeEncoding.GetString(sink.ToArray())); + + sink.SetLength(0); // reset sink + + // Close the outer stream and ensure no leftover data was written to the inner stream + + ValueTask actualDisposeTask = transcodingStream.DisposeAsync(); + Assert.Equal(default(ValueTask), actualDisposeTask); // should've completed synchronously + Assert.Equal(0, sink.Position); + } + + [Fact] + public async Task WriteAsync_WithPartialData() + { + MemoryStream sink = new MemoryStream(); + CancellationToken expectedCancellationToken = new CancellationTokenSource().Token; + + var innerStreamMock = new Mock(MockBehavior.Strict); + innerStreamMock.Setup(o => o.CanWrite).Returns(true); + innerStreamMock.Setup(o => o.WriteAsync(It.IsAny>(), expectedCancellationToken)) + .Returns, CancellationToken>(sink.WriteAsync); + + Stream transcodingStream = Encoding.CreateTranscodingStream( + innerStreamMock.Object, + innerStreamEncoding: CustomAsciiEncoding /* performs custom substitution */, + outerStreamEncoding: Encoding.UTF8 /* performs U+FFFD substition */, + leaveOpen: true); + + // First, write some incomplete data + + await transcodingStream.WriteAsync(new byte[] { 0x78, 0x79, 0x7A, 0xC3 }, expectedCancellationToken); // [C3] shouldn't be flushed yet + Assert.Equal("xyz", ErrorCheckingAsciiEncoding.GetString(sink.ToArray())); + + // Provide the second byte of the multi-byte sequence + + await transcodingStream.WriteAsync(new byte[] { 0xA0 }, expectedCancellationToken); // [C3 A0] = U+00E0 + Assert.Equal("xyz[00E0]", ErrorCheckingAsciiEncoding.GetString(sink.ToArray())); + + // Provide an incomplete sequence, then close the stream. + // Closing the stream should flush the underlying buffers and write the replacement char. + + await transcodingStream.WriteAsync(new byte[] { 0xE0, 0xBF }, expectedCancellationToken); // first 2 bytes of incomplete 3-byte sequence + Assert.Equal("xyz[00E0]", ErrorCheckingAsciiEncoding.GetString(sink.ToArray())); // wasn't flushed yet + + // The call to DisposeAsync() will call innerStream.WriteAsync without a CancellationToken. + + innerStreamMock.Setup(o => o.WriteAsync(It.IsAny>(), CancellationToken.None)) + .Returns, CancellationToken>(sink.WriteAsync); + + await transcodingStream.DisposeAsync(); + Assert.Equal("xyz[00E0][FFFD]", ErrorCheckingAsciiEncoding.GetString(sink.ToArray())); + } + + [Fact] + public void WriteAsync_WithInvalidArgs_Throws() + { + Stream transcodingStream = Encoding.CreateTranscodingStream(new MemoryStream(), Encoding.UTF8, Encoding.UTF8); + + Assert.Throws("buffer", () => (object)transcodingStream.WriteAsync(null, 0, 0)); + Assert.Throws(() => (object)transcodingStream.WriteAsync(new byte[5], -1, -1)); + Assert.Throws(() => (object)transcodingStream.WriteAsync(new byte[5], 3, -1)); + Assert.Throws(() => (object)transcodingStream.WriteAsync(new byte[5], 5, 1)); + Assert.Throws(() => (object)transcodingStream.WriteAsync(new byte[5], 6, -1)); + Assert.Throws(() => (object)transcodingStream.WriteAsync(new byte[5], 6, 0)); + } + + [Fact] + public void WriteApm() + { + // Arrange + + MemoryStream sink = new MemoryStream(); + object expectedState = new object(); + + var innerStreamMock = new Mock(MockBehavior.Strict); + innerStreamMock.Setup(o => o.CanWrite).Returns(true); + innerStreamMock.Setup(o => o.WriteAsync(It.IsAny>(), CancellationToken.None)) + .Returns, CancellationToken>(sink.WriteAsync); + + Stream transcodingStream = Encoding.CreateTranscodingStream(innerStreamMock.Object, Encoding.UTF8, Encoding.UTF8); + + // Act + + IAsyncResult asyncResult = transcodingStream.BeginWrite(Encoding.UTF8.GetBytes("abcdefg"), 1, 3, null, expectedState); + transcodingStream.EndWrite(asyncResult); + + // Assert + + Assert.Equal(expectedState, asyncResult.AsyncState); + Assert.Equal("bcd", Encoding.UTF8.GetString(sink.ToArray())); + } + + [Fact] + public void WriteApm_WithInvalidArgs_Throws() + { + Stream transcodingStream = Encoding.CreateTranscodingStream(new MemoryStream(), Encoding.UTF8, Encoding.UTF8); + + Assert.Throws("buffer", () => transcodingStream.BeginWrite(null, 0, 0, null, null)); + Assert.Throws(() => transcodingStream.BeginWrite(new byte[5], -1, -1, null, null)); + Assert.Throws(() => transcodingStream.BeginWrite(new byte[5], 3, -1, null, null)); + Assert.Throws(() => transcodingStream.BeginWrite(new byte[5], 5, 1, null, null)); + Assert.Throws(() => transcodingStream.BeginWrite(new byte[5], 6, -1, null, null)); + Assert.Throws(() => transcodingStream.BeginWrite(new byte[5], 6, 0, null, null)); + } + + // returns "abc...xyzabc...xyzabc..." + private static string GetVeryLongAsciiString(int length) + { + return string.Create(length, (object)null, (buffer, _) => + { + for (int i = 0; i < buffer.Length; i++) + { + buffer[i] = (char)('a' + (i % 26)); + } + }); + } + + // A custom ASCIIEncoding where both encoder + decoder fallbacks have been specified + private static readonly Encoding CustomAsciiEncoding = Encoding.GetEncoding( + "ascii", new CustomEncoderFallback(), new DecoderReplacementFallback("\uFFFD")); + + private static readonly Encoding ErrorCheckingAsciiEncoding + = Encoding.GetEncoding("ascii", EncoderFallback.ExceptionFallback, DecoderFallback.ExceptionFallback); + + private static readonly UnicodeEncoding ErrorCheckingUnicodeEncoding + = new UnicodeEncoding(bigEndian: false, byteOrderMark: false, throwOnInvalidBytes: true); + + // A custom encoder fallback which substitutes unknown chars with "[xxxx]" (the code point as hex) + private sealed class CustomEncoderFallback : EncoderFallback + { + public override int MaxCharCount => 8; // = "[10FFFF]".Length + + public override EncoderFallbackBuffer CreateFallbackBuffer() + { + return new CustomEncoderFallbackBuffer(); + } + + private sealed class CustomEncoderFallbackBuffer : EncoderFallbackBuffer + { + private string _remaining = string.Empty; + private int _remainingIdx = 0; + + public override int Remaining => _remaining.Length - _remainingIdx; + + public override bool Fallback(char charUnknownHigh, char charUnknownLow, int index) + => FallbackCommon((uint)char.ConvertToUtf32(charUnknownHigh, charUnknownLow)); + + public override bool Fallback(char charUnknown, int index) + => FallbackCommon(charUnknown); + + private bool FallbackCommon(uint codePoint) + { + Assert.True(codePoint <= 0x10FFFF); + _remaining = FormattableString.Invariant($"[{codePoint:X4}]"); + _remainingIdx = 0; + return true; + } + + public override char GetNextChar() + { + return (_remainingIdx < _remaining.Length) + ? _remaining[_remainingIdx++] + : '\0' /* end of string reached */; + } + + public override bool MovePrevious() + { + if (_remainingIdx == 0) + { + return false; + } + + _remainingIdx--; + return true; + } + } + } + } +} diff --git a/src/libraries/System.Text.Encoding/tests/System.Text.Encoding.Tests.csproj b/src/libraries/System.Text.Encoding/tests/System.Text.Encoding.Tests.csproj index e30fdcac586139..3cae369b090f79 100644 --- a/src/libraries/System.Text.Encoding/tests/System.Text.Encoding.Tests.csproj +++ b/src/libraries/System.Text.Encoding/tests/System.Text.Encoding.Tests.csproj @@ -33,6 +33,7 @@ + @@ -76,4 +77,7 @@ + + + \ No newline at end of file From 115528657af58dd7be62a78253c015229a83ff62 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Fri, 17 Apr 2020 20:35:27 -0700 Subject: [PATCH 2/6] PR cleanup --- .../System/Buffers/BoundedMemory.Windows.cs | 5 ++++- .../src/System/Text/Rune.cs | 11 ++++------- .../tests/Encoding/TranscodingStreamTests.cs | 15 +++++++-------- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/libraries/Common/tests/TestUtilities/System/Buffers/BoundedMemory.Windows.cs b/src/libraries/Common/tests/TestUtilities/System/Buffers/BoundedMemory.Windows.cs index 8dbcb8c122bb48..5de8ebe55432b5 100644 --- a/src/libraries/Common/tests/TestUtilities/System/Buffers/BoundedMemory.Windows.cs +++ b/src/libraries/Common/tests/TestUtilities/System/Buffers/BoundedMemory.Windows.cs @@ -191,7 +191,10 @@ protected override void Dispose(bool disposing) // no-op; the handle will be disposed separately } - public override Span GetSpan() => _impl.Span; + public override Span GetSpan() + { + throw new NotImplementedException(); + } public override MemoryHandle Pin(int elementIndex) { diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs index 9ad17cd45fa7aa..3a703d661ff789 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs @@ -20,9 +20,6 @@ namespace System.Text [DebuggerDisplay("{DebuggerDisplay,nq}")] public readonly struct Rune : IComparable, IEquatable { - internal const int MaxUtf16CharsPerRune = 2; // supplementary plane code points are encoded as 2 UTF-16 code units - internal const int MaxUtf8BytesPerRune = 4; // supplementary plane code points are encoded as 4 UTF-8 code units - private const char HighSurrogateStart = '\ud800'; private const char LowSurrogateStart = '\udc00'; private const int HighSurrogateRange = 0x3FF; @@ -188,8 +185,8 @@ private static Rune ChangeCaseCultureAware(Rune rune, TextInfo textInfo, bool to Debug.Assert(!GlobalizationMode.Invariant, "This should've been checked by the caller."); Debug.Assert(textInfo != null, "This should've been checked by the caller."); - Span original = stackalloc char[MaxUtf16CharsPerRune]; - Span modified = stackalloc char[MaxUtf16CharsPerRune]; + Span original = stackalloc char[2]; // worst case scenario = 2 code units (for a surrogate pair) + Span modified = stackalloc char[2]; // case change should preserve UTF-16 code unit count int charCount = rune.EncodeToUtf16(original); original = original.Slice(0, charCount); @@ -223,8 +220,8 @@ private static Rune ChangeCaseCultureAware(Rune rune, CultureInfo culture, bool Debug.Assert(!GlobalizationMode.Invariant, "This should've been checked by the caller."); Debug.Assert(culture != null, "This should've been checked by the caller."); - Span original = stackalloc char[MaxUtf16CharsPerRune]; - Span modified = stackalloc char[MaxUtf16CharsPerRune]; + Span original = stackalloc char[2]; // worst case scenario = 2 code units (for a surrogate pair) + Span modified = stackalloc char[2]; // case change should preserve UTF-16 code unit count int charCount = rune.EncodeToUtf16(original); original = original.Slice(0, charCount); diff --git a/src/libraries/System.Text.Encoding/tests/Encoding/TranscodingStreamTests.cs b/src/libraries/System.Text.Encoding/tests/Encoding/TranscodingStreamTests.cs index 8f029499ef34ad..2490f26bffd27a 100644 --- a/src/libraries/System.Text.Encoding/tests/Encoding/TranscodingStreamTests.cs +++ b/src/libraries/System.Text.Encoding/tests/Encoding/TranscodingStreamTests.cs @@ -1,5 +1,4 @@ -using System.Buffers; -using System.Collections.Generic; +using System.Collections.Generic; using System.IO; using System.Threading; using System.Threading.Tasks; @@ -291,15 +290,15 @@ public void Read_Span(int bufferLength) { // Tests TranscodingStream.Read(Span) - using BoundedMemory boundedMemoryBuffer = BoundedMemory.Allocate(bufferLength); + byte[] buffer = new byte[bufferLength]; RunReadTest((transcodingStream, sink) => { - int numBytesRead = transcodingStream.Read(boundedMemoryBuffer.Span); + int numBytesRead = transcodingStream.Read(buffer.AsSpan()); Assert.True(numBytesRead >= 0); Assert.True(numBytesRead <= bufferLength); - sink.Write(boundedMemoryBuffer.Span[0..numBytesRead]); + sink.Write(buffer.AsSpan(0..numBytesRead)); return numBytesRead; }); } @@ -423,15 +422,15 @@ public async Task ReadAsync_Memory(int bufferLength) { // Tests TranscodingStream.ReadAsync(Memory, CancellationToken) - using BoundedMemory boundedMemoryBuffer = BoundedMemory.Allocate(bufferLength); + byte[] buffer = new byte[bufferLength]; await RunReadTestAsync(async (transcodingStream, cancellationToken, sink) => { - int numBytesRead = await transcodingStream.ReadAsync(boundedMemoryBuffer.Memory, cancellationToken); + int numBytesRead = await transcodingStream.ReadAsync(buffer.AsMemory(), cancellationToken); Assert.True(numBytesRead >= 0); Assert.True(numBytesRead <= bufferLength); - sink.Write(boundedMemoryBuffer.Span[0..numBytesRead]); + sink.Write(buffer.AsSpan(0..numBytesRead)); return numBytesRead; }); } From 0daeb6ff9e9c9e0879be4454a0f4ab95c21637f9 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Mon, 20 Apr 2020 15:00:53 -0700 Subject: [PATCH 3/6] Wire TranscodingStream through System.Text.Json --- .../src/System.Net.Http.Json.csproj | 4 ++-- .../Net/Http/Json/HttpContentJsonExtensions.cs | 8 ++++++++ .../src/System/Net/Http/Json/JsonContent.cs | 14 ++++++++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Net.Http.Json/src/System.Net.Http.Json.csproj b/src/libraries/System.Net.Http.Json/src/System.Net.Http.Json.csproj index 04dd510bb1c79f..c7a463449ef8f1 100644 --- a/src/libraries/System.Net.Http.Json/src/System.Net.Http.Json.csproj +++ b/src/libraries/System.Net.Http.Json/src/System.Net.Http.Json.csproj @@ -9,8 +9,6 @@ - - @@ -21,6 +19,8 @@ + + diff --git a/src/libraries/System.Net.Http.Json/src/System/Net/Http/Json/HttpContentJsonExtensions.cs b/src/libraries/System.Net.Http.Json/src/System/Net/Http/Json/HttpContentJsonExtensions.cs index 6d1d309760f3a9..a5645cf0081e1a 100644 --- a/src/libraries/System.Net.Http.Json/src/System/Net/Http/Json/HttpContentJsonExtensions.cs +++ b/src/libraries/System.Net.Http.Json/src/System/Net/Http/Json/HttpContentJsonExtensions.cs @@ -38,7 +38,11 @@ public static Task ReadFromJsonAsync(this HttpContent content, JsonSeriali // Wrap content stream into a transcoding stream that buffers the data transcoded from the sourceEncoding to utf-8. if (sourceEncoding != null && sourceEncoding != Encoding.UTF8) { +#if NETCOREAPP + contentStream = Encoding.CreateTranscodingStream(contentStream, innerStreamEncoding: sourceEncoding, outerStreamEncoding: Encoding.UTF8); +#else contentStream = new TranscodingReadStream(contentStream, sourceEncoding); +#endif } using (contentStream) @@ -54,7 +58,11 @@ private static async Task ReadFromJsonAsyncCore(HttpContent content, Encod // Wrap content stream into a transcoding stream that buffers the data transcoded from the sourceEncoding to utf-8. if (sourceEncoding != null && sourceEncoding != Encoding.UTF8) { +#if NETCOREAPP + contentStream = Encoding.CreateTranscodingStream(contentStream, innerStreamEncoding: sourceEncoding, outerStreamEncoding: Encoding.UTF8); +#else contentStream = new TranscodingReadStream(contentStream, sourceEncoding); +#endif } using (contentStream) diff --git a/src/libraries/System.Net.Http.Json/src/System/Net/Http/Json/JsonContent.cs b/src/libraries/System.Net.Http.Json/src/System/Net/Http/Json/JsonContent.cs index a84d375b137c1f..62e9d92b1deb14 100644 --- a/src/libraries/System.Net.Http.Json/src/System/Net/Http/Json/JsonContent.cs +++ b/src/libraries/System.Net.Http.Json/src/System/Net/Http/Json/JsonContent.cs @@ -67,6 +67,19 @@ private async Task SerializeToStreamAsyncCore(Stream targetStream, CancellationT // Wrap provided stream into a transcoding stream that buffers the data transcoded from utf-8 to the targetEncoding. if (targetEncoding != null && targetEncoding != Encoding.UTF8) { +#if NETCOREAPP + Stream transcodingStream = Encoding.CreateTranscodingStream(targetStream, targetEncoding, Encoding.UTF8, leaveOpen: true); + try + { + await JsonSerializer.SerializeAsync(transcodingStream, Value, ObjectType, _jsonSerializerOptions, cancellationToken).ConfigureAwait(false); + } + finally + { + // DisposeAsync will flush any partial write buffers. In practice our partial write + // buffers should be empty as we expect JsonSerializer to emit only well-formed UTF-8 data. + await transcodingStream.DisposeAsync().ConfigureAwait(false); + } +#else using (TranscodingWriteStream transcodingStream = new TranscodingWriteStream(targetStream, targetEncoding)) { await JsonSerializer.SerializeAsync(transcodingStream, Value, ObjectType, _jsonSerializerOptions, cancellationToken).ConfigureAwait(false); @@ -75,6 +88,7 @@ private async Task SerializeToStreamAsyncCore(Stream targetStream, CancellationT // acceptable to Flush a Stream (multiple times) prior to completion. await transcodingStream.FinalWriteAsync(cancellationToken).ConfigureAwait(false); } +#endif } else { From 744489f744dd0cc2eb0217be800d89ce842f57fa Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Mon, 20 Apr 2020 15:07:54 -0700 Subject: [PATCH 4/6] Add missing license headers --- .../tests/TestUtilities/System/IO/StreamExtensions.cs | 6 +++++- .../tests/Encoding/TranscodingStreamTests.cs | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/libraries/Common/tests/TestUtilities/System/IO/StreamExtensions.cs b/src/libraries/Common/tests/TestUtilities/System/IO/StreamExtensions.cs index 05a5b7e3ef1d4c..30ed13c80a2521 100644 --- a/src/libraries/Common/tests/TestUtilities/System/IO/StreamExtensions.cs +++ b/src/libraries/Common/tests/TestUtilities/System/IO/StreamExtensions.cs @@ -1,4 +1,8 @@ -using System.Threading; +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Threading; using System.Threading.Tasks; namespace System.IO diff --git a/src/libraries/System.Text.Encoding/tests/Encoding/TranscodingStreamTests.cs b/src/libraries/System.Text.Encoding/tests/Encoding/TranscodingStreamTests.cs index 2490f26bffd27a..ae121d4c1d9232 100644 --- a/src/libraries/System.Text.Encoding/tests/Encoding/TranscodingStreamTests.cs +++ b/src/libraries/System.Text.Encoding/tests/Encoding/TranscodingStreamTests.cs @@ -1,4 +1,8 @@ -using System.Collections.Generic; +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Generic; using System.IO; using System.Threading; using System.Threading.Tasks; From 9c6f7d69378b1fe4121bfb67908fbb7ca0ff3407 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Mon, 20 Apr 2020 15:10:39 -0700 Subject: [PATCH 5/6] Address PR feedback --- .../src/System/Text/Encoding.cs | 1 - .../src/System/Text/TranscodingStream.cs | 53 ++++++++++--------- 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs index 0efe07f2588caa..203fd784f1ee70 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs @@ -1041,7 +1041,6 @@ value is Encoding that && public override int GetHashCode() => _codePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode(); - /// /// Creates a which serves to transcode data between an inner /// and an outer , similar to . diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/TranscodingStream.cs b/src/libraries/System.Private.CoreLib/src/System/Text/TranscodingStream.cs index 4110f490e25777..575cc248dd1bba 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/TranscodingStream.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/TranscodingStream.cs @@ -28,7 +28,7 @@ internal sealed class TranscodingStream : Stream private readonly Encoding _innerEncoding; private readonly Encoding _thisEncoding; - private Stream? _innerStream; // null if the wrapper has been disposed + private Stream _innerStream; // null if the wrapper has been disposed private readonly bool _leaveOpen; /* @@ -47,7 +47,9 @@ internal sealed class TranscodingStream : Stream private Encoder? _thisEncoder; private Decoder? _innerDecoder; private int _readCharBufferMaxSize; // the maximum number of characters _innerDecoder.ReadChars can return - private ArraySegment _pendingReadData; // contains the data that Read() should return + private byte[]? _readBuffer; // contains the data that Read() should return + private int _readBufferOffset; + private int _readBufferCount; internal TranscodingStream(Stream innerStream, Encoding innerEncoding, Encoding thisEncoding, bool leaveOpen) { @@ -107,7 +109,7 @@ protected override void Dispose(bool disposing) // Mark our object as disposed Stream innerStream = _innerStream; - _innerStream = null; + _innerStream = null!; // And dispose the inner stream if needed @@ -134,7 +136,7 @@ public override ValueTask DisposeAsync() // No need to write anything to the stream first. Stream innerStream = _innerStream; - _innerStream = null; + _innerStream = null!; return (_leaveOpen) ? default /* no work to do */ @@ -151,7 +153,7 @@ async ValueTask DisposeAsyncCore(ArraySegment pendingData) Stream innerStream = _innerStream; await innerStream.WriteAsync(pendingData.AsMemory()).ConfigureAwait(false); - _innerStream = null; + _innerStream = null!; if (!_leaveOpen) { @@ -171,7 +173,7 @@ public override void EndWrite(IAsyncResult asyncResult) // Sets up the data structures that are necessary before any read operation takes place, // throwing if the object is in a state where reads are not possible. - [MemberNotNull(nameof(_innerStream), nameof(_innerDecoder), nameof(_thisEncoder))] + [MemberNotNull(nameof(_innerDecoder), nameof(_thisEncoder), nameof(_readBuffer))] private void EnsurePreReadConditions() { ThrowIfDisposed(); @@ -197,13 +199,13 @@ void InitializeReadDataStructures() // data which we haven't yet read; however, we own the entire backing array and can // re-create the segment as needed once the array is repopulated. - _pendingReadData = new ArraySegment(GC.AllocateUninitializedArray(_thisEncoding.GetMaxByteCount(_readCharBufferMaxSize)), 0, 0); + _readBuffer = GC.AllocateUninitializedArray(_thisEncoding.GetMaxByteCount(_readCharBufferMaxSize)); } } // Sets up the data structures that are necessary before any write operation takes place, // throwing if the object is in a state where writes are not possible. - [MemberNotNull(nameof(_innerStream), nameof(_thisDecoder), nameof(_innerEncoder))] + [MemberNotNull(nameof(_thisDecoder), nameof(_innerEncoder))] private void EnsurePreWriteConditions() { ThrowIfDisposed(); @@ -237,12 +239,12 @@ private ArraySegment FinalFlushWriteBuffers() return default; } + // convert bytes [this] -> chars // Having leftover data in our buffers should be very rare since it should only // occur if the end of the stream contains an incomplete multi-byte sequence. // Let's not bother complicating this logic with array pool rentals or allocation- // avoiding loops. - // convert bytes [this] -> chars char[] chars = Array.Empty(); int charCount = _thisDecoder.GetCharCount(Array.Empty(), 0, 0, flush: true); @@ -253,6 +255,8 @@ private ArraySegment FinalFlushWriteBuffers() } // convert chars -> bytes [inner] + // It's possible that _innerEncoder might need to perform some end-of-text fixup + // (due to flush: true), even if _thisDecoder didn't need to do so. byte[] bytes = Array.Empty(); int byteCount = _innerEncoder.GetByteCount(chars, 0, charCount, flush: true); @@ -305,7 +309,7 @@ public override int Read(Span buffer) // and to ensure an exception is thrown if the Encoding reported an incorrect // worst-case expansion. - if (_pendingReadData.Count == 0) + if (_readBufferCount == 0) { byte[] rentedBytes = ArrayPool.Shared.Rent(DefaultReadByteBufferSize); char[] rentedChars = ArrayPool.Shared.Rent(_readCharBufferMaxSize); @@ -323,9 +327,10 @@ public override int Read(Span buffer) // convert bytes [inner] -> chars, then convert chars -> bytes [this] int charsDecodedJustNow = _innerDecoder.GetChars(rentedBytes, 0, innerBytesReadJustNow, rentedChars, 0, flush: isEofReached); - int pendingReadDataPopulatedJustNow = _thisEncoder.GetBytes(rentedChars, 0, charsDecodedJustNow, _pendingReadData.Array!, 0, flush: isEofReached); + int pendingReadDataPopulatedJustNow = _thisEncoder.GetBytes(rentedChars, 0, charsDecodedJustNow, _readBuffer, 0, flush: isEofReached); - _pendingReadData = new ArraySegment(_pendingReadData.Array!, 0, pendingReadDataPopulatedJustNow); + _readBufferOffset = 0; + _readBufferCount = pendingReadDataPopulatedJustNow; } finally { @@ -339,9 +344,10 @@ public override int Read(Span buffer) // empty because the inner stream has reached EOF and all pending read data // has already been flushed, and we should return 0. - int bytesToReturn = Math.Min(_pendingReadData.Count, buffer.Length); - _pendingReadData.AsSpan(0, bytesToReturn).CopyTo(buffer); - _pendingReadData = _pendingReadData[bytesToReturn..]; + int bytesToReturn = Math.Min(_readBufferCount, buffer.Length); + _readBuffer.AsSpan(_readBufferOffset, bytesToReturn).CopyTo(buffer); + _readBufferOffset += bytesToReturn; + _readBufferCount -= bytesToReturn; return bytesToReturn; } @@ -375,7 +381,7 @@ async ValueTask ReadAsyncCore(Memory buffer, CancellationToken cancel // and to ensure an exception is thrown if the Encoding reported an incorrect // worst-case expansion. - if (_pendingReadData.Count == 0) + if (_readBufferCount == 0) { byte[] rentedBytes = ArrayPool.Shared.Rent(DefaultReadByteBufferSize); char[] rentedChars = ArrayPool.Shared.Rent(_readCharBufferMaxSize); @@ -393,9 +399,10 @@ async ValueTask ReadAsyncCore(Memory buffer, CancellationToken cancel // convert bytes [inner] -> chars, then convert chars -> bytes [this] int charsDecodedJustNow = _innerDecoder.GetChars(rentedBytes, 0, innerBytesReadJustNow, rentedChars, 0, flush: isEofReached); - int pendingReadDataPopulatedJustNow = _thisEncoder.GetBytes(rentedChars, 0, charsDecodedJustNow, _pendingReadData.Array!, 0, flush: isEofReached); + int pendingReadDataPopulatedJustNow = _thisEncoder.GetBytes(rentedChars, 0, charsDecodedJustNow, _readBuffer, 0, flush: isEofReached); - _pendingReadData = new ArraySegment(_pendingReadData.Array!, 0, pendingReadDataPopulatedJustNow); + _readBufferOffset = 0; + _readBufferCount = pendingReadDataPopulatedJustNow; } finally { @@ -409,9 +416,10 @@ async ValueTask ReadAsyncCore(Memory buffer, CancellationToken cancel // empty because the inner stream has reached EOF and all pending read data // has already been flushed, and we should return 0. - int bytesToReturn = Math.Min(_pendingReadData.Count, buffer.Length); - _pendingReadData.AsSpan(0, bytesToReturn).CopyTo(buffer.Span); - _pendingReadData = _pendingReadData[bytesToReturn..]; + int bytesToReturn = Math.Min(_readBufferCount, buffer.Length); + _readBuffer.AsSpan(_readBufferOffset, bytesToReturn).CopyTo(buffer.Span); + _readBufferOffset += bytesToReturn; + _readBufferCount -= bytesToReturn; return bytesToReturn; } } @@ -430,9 +438,6 @@ public override void SetLength(long value) => throw new NotSupportedException(SR.NotSupported_UnseekableStream); [StackTraceHidden] -#pragma warning disable CS3016 // Arrays as attribute arguments is not CLS-compliant - [MemberNotNull(new[] { nameof(_innerStream) })] -#pragma warning restore CS3016 // Arrays as attribute arguments is not CLS-compliant private void ThrowIfDisposed() { if (_innerStream is null) From 29107b26b8365cbb49a8f870d51ab815afd22e18 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Wed, 29 Apr 2020 14:30:10 -0700 Subject: [PATCH 6/6] PR feedback --- .../src/System/Text/TranscodingStream.cs | 25 ++++++++----------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/TranscodingStream.cs b/src/libraries/System.Private.CoreLib/src/System/Text/TranscodingStream.cs index 575cc248dd1bba..44f620635c131a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/TranscodingStream.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/TranscodingStream.cs @@ -151,9 +151,9 @@ async ValueTask DisposeAsyncCore(ArraySegment pendingData) Debug.Assert(pendingData.Count != 0); Stream innerStream = _innerStream; + _innerStream = null!; await innerStream.WriteAsync(pendingData.AsMemory()).ConfigureAwait(false); - _innerStream = null!; if (!_leaveOpen) { @@ -471,20 +471,10 @@ public override void Write(ReadOnlySpan buffer) int rentalLength = Math.Clamp(buffer.Length, MinWriteRentedArraySize, MaxWriteRentedArraySize); - char[] rentedChars = ArrayPool.Shared.Rent(rentalLength); - byte[] rentedBytes = ArrayPool.Shared.Rent(rentalLength); + char[] scratchChars = ArrayPool.Shared.Rent(rentalLength); + byte[] scratchBytes = ArrayPool.Shared.Rent(rentalLength); try - { - WriteCore(buffer, rentedChars, rentedBytes); - } - finally - { - ArrayPool.Shared.Return(rentedChars); - ArrayPool.Shared.Return(rentedBytes); - } - - void WriteCore(ReadOnlySpan remainingOuterEncodedBytes, char[] scratchChars, byte[] scratchBytes) { bool decoderFinished, encoderFinished; do @@ -492,14 +482,14 @@ void WriteCore(ReadOnlySpan remainingOuterEncodedBytes, char[] scratchChar // convert bytes [this] -> chars _thisDecoder.Convert( - bytes: remainingOuterEncodedBytes, + bytes: buffer, chars: scratchChars, flush: false, out int bytesConsumed, out int charsWritten, out decoderFinished); - remainingOuterEncodedBytes = remainingOuterEncodedBytes[bytesConsumed..]; + buffer = buffer[bytesConsumed..]; // convert chars -> bytes [inner] @@ -524,6 +514,11 @@ void WriteCore(ReadOnlySpan remainingOuterEncodedBytes, char[] scratchChar } while (!encoderFinished); } while (!decoderFinished); } + finally + { + ArrayPool.Shared.Return(scratchChars); + ArrayPool.Shared.Return(scratchBytes); + } } public override Task WriteAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken)