From 43a04b05cc0d0455a9d414eac2fd8e77fc8eedd6 Mon Sep 17 00:00:00 2001 From: Jan Kotas Date: Thu, 22 Aug 2019 14:58:47 -0700 Subject: [PATCH 1/2] Fix BinaryReader.ReadChars for fragmented Streams BinaryReader.ReadChars incorrectly read more than necessary from the underlying Stream when multi-byte characters straddled the read chunks. Fixes https://github.com/dotnet/corefx/issues/40455 --- .../shared/System/IO/BinaryReader.cs | 19 +++++++++++++++++++ .../shared/System/Text/DecoderNLS.cs | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/System.Private.CoreLib/shared/System/IO/BinaryReader.cs b/src/System.Private.CoreLib/shared/System/IO/BinaryReader.cs index fc2ab5fd0ff7..4f01c7ec9522 100644 --- a/src/System.Private.CoreLib/shared/System/IO/BinaryReader.cs +++ b/src/System.Private.CoreLib/shared/System/IO/BinaryReader.cs @@ -384,6 +384,25 @@ private int InternalReadChars(Span buffer) { numBytes <<= 1; } + + // We do not want to read even a single byte more than necessary. + // + // Subtract pending bytes that the decoder may be holding onto. This assumes that each + // decoded char corresponds to one or more bytes. Note that custom encodings or encodings with + // a custom replacement sequence may violate this assumption. + if (numBytes > 1) + { + DecoderNLS? decoder = _decoder as DecoderNLS; + // For internal decoders, we can check whether the decoder has any pending state. + // For custom decoders, assume that the decoder has pending state. + if (decoder == null || decoder.HasState) + { + numBytes -= 1; + if (_2BytesPerChar && numBytes > 2) + numBytes -= 2; + } + } + if (numBytes > MaxCharBytesSize) { numBytes = MaxCharBytesSize; diff --git a/src/System.Private.CoreLib/shared/System/Text/DecoderNLS.cs b/src/System.Private.CoreLib/shared/System/Text/DecoderNLS.cs index 7d453f73daa9..499c0bad6a78 100644 --- a/src/System.Private.CoreLib/shared/System/Text/DecoderNLS.cs +++ b/src/System.Private.CoreLib/shared/System/Text/DecoderNLS.cs @@ -217,7 +217,7 @@ public override unsafe void Convert(byte* bytes, int byteCount, public bool MustFlush => _mustFlush; // Anything left in our decoder? - internal virtual bool HasState => false; + internal virtual bool HasState => _leftoverByteCount != 0; // Allow encoding to clear our must flush instead of throwing (in ThrowCharsOverflow) internal void ClearMustFlush() From 5e7a50ea93aab46492e536bb9a6457778369bf04 Mon Sep 17 00:00:00 2001 From: Jan Kotas Date: Fri, 23 Aug 2019 12:28:10 -0700 Subject: [PATCH 2/2] Add comment --- src/System.Private.CoreLib/shared/System/IO/BinaryReader.cs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/System.Private.CoreLib/shared/System/IO/BinaryReader.cs b/src/System.Private.CoreLib/shared/System/IO/BinaryReader.cs index 4f01c7ec9522..98e853b6190c 100644 --- a/src/System.Private.CoreLib/shared/System/IO/BinaryReader.cs +++ b/src/System.Private.CoreLib/shared/System/IO/BinaryReader.cs @@ -398,6 +398,9 @@ private int InternalReadChars(Span buffer) if (decoder == null || decoder.HasState) { numBytes -= 1; + + // The worst case is charsRemaining = 2 and UTF32Decoder holding onto 3 pending bytes. We need to read just + // one byte in this case. if (_2BytesPerChar && numBytes > 2) numBytes -= 2; }