diff --git a/.gitignore b/.gitignore index 6b6eb255ba51d6..7ccc53c98ddd2c 100644 --- a/.gitignore +++ b/.gitignore @@ -373,3 +373,4 @@ test:.cs *.tempLog.xml *.testResults.xml *.testStats.csv +*.md diff --git a/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs b/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs index 564bbc97eb7511..e2a0b46ca17a8d 100644 --- a/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs +++ b/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs @@ -127,9 +127,9 @@ internal ZipArchiveEntry() { } public string Name { get { throw null; } } public void Delete() { } public System.IO.Stream Open() { throw null; } - public System.IO.Stream Open(FileAccess access) { throw null; } - public System.Threading.Tasks.Task OpenAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public System.IO.Stream Open(System.IO.FileAccess access) { throw null; } public System.Threading.Tasks.Task OpenAsync(System.IO.FileAccess access, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public System.Threading.Tasks.Task OpenAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } public override string ToString() { throw null; } } public enum ZipArchiveMode @@ -144,6 +144,36 @@ public enum ZipCompressionMethod Deflate = 8, Deflate64 = 9, } + public sealed partial class ZipForwardReadEntry + { + internal ZipForwardReadEntry() { } + public long CompressedLength { get { throw null; } } + public System.IO.Compression.ZipCompressionMethod CompressionMethod { get { throw null; } } + [System.CLSCompliantAttribute(false)] + public uint Crc32 { get { throw null; } } + public System.IO.Stream? DataStream { get { throw null; } } + public string FullName { get { throw null; } } + [System.CLSCompliantAttribute(false)] + public ushort GeneralPurposeBitFlags { get { throw null; } } + public bool IsDirectory { get { throw null; } } + public bool IsEncrypted { get { throw null; } } + public System.DateTimeOffset LastModified { get { throw null; } } + public long Length { get { throw null; } } + public string Name { get { throw null; } } + [System.CLSCompliantAttribute(false)] + public ushort VersionNeeded { get { throw null; } } + public void ExtractToFile(string destinationFileName, bool overwrite) { } + public System.Threading.Tasks.Task ExtractToFileAsync(string destinationFileName, bool overwrite, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + } + public sealed partial class ZipStreamReader : System.IAsyncDisposable, System.IDisposable + { + public ZipStreamReader(System.IO.Stream stream, bool leaveOpen = false) { } + public ZipStreamReader(System.IO.Stream stream, System.Text.Encoding? entryNameEncoding, bool leaveOpen = false) { } + public void Dispose() { } + public System.Threading.Tasks.ValueTask DisposeAsync() { throw null; } + public System.IO.Compression.ZipForwardReadEntry? GetNextEntry(bool copyData = false) { throw null; } + public System.Threading.Tasks.ValueTask GetNextEntryAsync(bool copyData = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + } public sealed partial class ZLibCompressionOptions { public ZLibCompressionOptions() { } diff --git a/src/libraries/System.IO.Compression/src/Resources/Strings.resx b/src/libraries/System.IO.Compression/src/Resources/Strings.resx index bbb10afbcf342a..090c8bccff69d6 100644 --- a/src/libraries/System.IO.Compression/src/Resources/Strings.resx +++ b/src/libraries/System.IO.Compression/src/Resources/Strings.resx @@ -371,10 +371,22 @@ An attempt was made to move the position before the beginning of the stream. + + Stored compression entries with data descriptors cannot be read in forward-only mode because the compressed size is unknown. + The CRC32 checksum of the extracted data does not match the expected value from the archive. The decompressed data length does not match the expected value from the archive. + + Encrypted entries with data descriptors cannot be read in forward-only mode because the compressed size is unknown. + + + Cannot extract a directory entry or an entry with no data to a file. Check IsDirectory and DataStream before calling ExtractToFile. + + + The ZIP archive contains an invalid local file header signature. + diff --git a/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj b/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj index bcdcf1b3417f43..b0ae33ad312bc2 100644 --- a/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj +++ b/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj @@ -1,4 +1,4 @@ - + $(NetCoreAppCurrent)-windows;$(NetCoreAppCurrent)-unix;$(NetCoreAppCurrent)-browser;$(NetCoreAppCurrent)-wasi;$(NetCoreAppCurrent) @@ -41,12 +41,9 @@ - - - + + + @@ -55,28 +52,25 @@ - + + + - + - - - + + + diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs index ea2fc10ec55699..9dc4d73a4c496d 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs @@ -2,7 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; -using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; @@ -96,10 +95,8 @@ private void ThrowIfCantSeek() public override int Read(byte[] buffer, int offset, int count) { - ThrowIfDisposed(); - ThrowIfCantRead(); - - return _baseStream.Read(buffer, offset, count); + ValidateBufferArguments(buffer, offset, count); + return Read(buffer.AsSpan(offset, count)); } public override int Read(Span buffer) @@ -120,10 +117,8 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { - ThrowIfDisposed(); - ThrowIfCantRead(); - - return _baseStream.ReadAsync(buffer, offset, count, cancellationToken); + ValidateBufferArguments(buffer, offset, count); + return ReadAsync(buffer.AsMemory(offset, count), cancellationToken).AsTask(); } public override ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = default) @@ -210,7 +205,6 @@ public override void Flush() { ThrowIfDisposed(); ThrowIfCantWrite(); - _baseStream.Flush(); } @@ -218,7 +212,6 @@ public override Task FlushAsync(CancellationToken cancellationToken) { ThrowIfDisposed(); ThrowIfCantWrite(); - return _baseStream.FlushAsync(cancellationToken); } @@ -451,6 +444,12 @@ public override void Flush() throw new NotSupportedException(SR.WritingNotSupported); } + public override Task FlushAsync(CancellationToken cancellationToken) + { + ThrowIfDisposed(); + throw new NotSupportedException(SR.WritingNotSupported); + } + // Close the stream for reading. Note that this does NOT close the superStream (since // the substream is just 'a chunk' of the super-stream protected override void Dispose(bool disposing) @@ -713,6 +712,106 @@ public override async ValueTask DisposeAsync() } } + /// + /// A read-only, forward-only stream that limits the number of bytes + /// that can be read from an underlying stream without closing it. + /// Used by to bound compressed entry data. + /// + internal sealed class BoundedReadOnlyStream : Stream + { + private readonly Stream _baseStream; + private long _remaining; + private bool _isDisposed; + + public BoundedReadOnlyStream(Stream baseStream, long length) + { + _baseStream = baseStream; + _remaining = length; + } + + public override bool CanRead => !_isDisposed && _baseStream.CanRead; + public override bool CanSeek => false; + public override bool CanWrite => false; + public override long Length => throw new NotSupportedException(); + + public override long Position + { + get => throw new NotSupportedException(); + set => throw new NotSupportedException(); + } + + private void ThrowIfDisposed() + { + if (_isDisposed) + throw new ObjectDisposedException(GetType().ToString(), SR.HiddenStreamName); + } + + public override int Read(byte[] buffer, int offset, int count) + => Read(buffer.AsSpan(offset, count)); + + public override int Read(Span buffer) + { + ThrowIfDisposed(); + + if (_remaining <= 0) + { + return 0; + } + + if (buffer.Length > _remaining) + { + buffer = buffer.Slice(0, (int)_remaining); + } + + int bytesRead = _baseStream.Read(buffer); + _remaining -= bytesRead; + + return bytesRead; + } + + public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) + => ReadAsync(buffer.AsMemory(offset, count), cancellationToken).AsTask(); + + public override async ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = default) + { + ThrowIfDisposed(); + + if (_remaining <= 0) + { + return 0; + } + + if (buffer.Length > _remaining) + { + buffer = buffer.Slice(0, (int)_remaining); + } + + int bytesRead = await _baseStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false); + _remaining -= bytesRead; + + return bytesRead; + } + + public override void Flush() { } + public override Task FlushAsync(CancellationToken cancellationToken) => Task.CompletedTask; + public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException(); + public override void SetLength(long value) => throw new NotSupportedException(); + public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException(); + + protected override void Dispose(bool disposing) + { + _isDisposed = true; + base.Dispose(disposing); + } + + public override ValueTask DisposeAsync() + { + _isDisposed = true; + + return base.DisposeAsync(); + } + } + internal sealed class CrcValidatingReadStream : Stream { private readonly Stream _baseStream; @@ -735,6 +834,9 @@ public CrcValidatingReadStream(Stream baseStream, uint expectedCrc, long expecte _runningCrc = 0; } + internal uint RunningCrc => _runningCrc; + internal long TotalBytesRead => _totalBytesRead; + public override bool CanRead => !_isDisposed && _baseStream.CanRead; public override bool CanSeek => !_isDisposed && _baseStream.CanSeek; public override bool CanWrite => false; @@ -967,4 +1069,246 @@ public override async ValueTask DisposeAsync() await base.DisposeAsync().ConfigureAwait(false); } } + + /// + /// Wraps a non-seekable stream and reports as + /// so that can rewind + /// unconsumed input via Seek(-n, SeekOrigin.Current) after + /// decompression finishes. Maintains a rolling history buffer of recently + /// read bytes to satisfy that backward seek. + /// + internal sealed class ReadAheadStream : Stream + { + private readonly Stream _baseStream; + private readonly byte[] _history; + private int _historyCount; + private byte[]? _pushback; + private int _pushbackOffset; + private int _pushbackCount; + private long _position; + private bool _isDisposed; + + public ReadAheadStream(Stream baseStream, int historyCapacity = 8192) + { + _baseStream = baseStream; + _history = new byte[historyCapacity]; + } + + public override bool CanRead => !_isDisposed && _baseStream.CanRead; + public override bool CanSeek => !_isDisposed; + public override bool CanWrite => false; + + public override long Length + { + get + { + ThrowIfDisposed(); + throw new NotSupportedException(); + } + } + + public override long Position + { + get + { + ThrowIfDisposed(); + return _position; + } + set + { + ThrowIfDisposed(); + throw new NotSupportedException(); + } + } + + public override int Read(byte[] buffer, int offset, int count) + { + ValidateBufferArguments(buffer, offset, count); + return Read(buffer.AsSpan(offset, count)); + } + + public override int Read(Span buffer) + { + ThrowIfDisposed(); + + int totalRead = 0; + + if (_pushbackCount > 0) + { + int fromPushback = Math.Min(buffer.Length, _pushbackCount); + _pushback.AsSpan(_pushbackOffset, fromPushback).CopyTo(buffer); + RecordHistory(buffer.Slice(0, fromPushback)); + _pushbackOffset += fromPushback; + _pushbackCount -= fromPushback; + totalRead += fromPushback; + buffer = buffer.Slice(fromPushback); + + if (_pushbackCount == 0) + { + _pushback = null; + } + } + + if (buffer.Length > 0) + { + int fromBase = _baseStream.Read(buffer); + if (fromBase > 0) + { + RecordHistory(buffer.Slice(0, fromBase)); + totalRead += fromBase; + } + } + + _position += totalRead; + return totalRead; + } + + public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) + { + ValidateBufferArguments(buffer, offset, count); + return ReadAsync(buffer.AsMemory(offset, count), cancellationToken).AsTask(); + } + + public override async ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = default) + { + ThrowIfDisposed(); + + int totalRead = 0; + + if (_pushbackCount > 0) + { + int fromPushback = Math.Min(buffer.Length, _pushbackCount); + _pushback.AsSpan(_pushbackOffset, fromPushback).CopyTo(buffer.Span); + RecordHistory(buffer.Span.Slice(0, fromPushback)); + _pushbackOffset += fromPushback; + _pushbackCount -= fromPushback; + totalRead += fromPushback; + buffer = buffer.Slice(fromPushback); + + if (_pushbackCount == 0) + { + _pushback = null; + } + } + + if (buffer.Length > 0) + { + int fromBase = await _baseStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false); + if (fromBase > 0) + { + RecordHistory(buffer.Span.Slice(0, fromBase)); + totalRead += fromBase; + } + } + + _position += totalRead; + return totalRead; + } + + public override long Seek(long offset, SeekOrigin origin) + { + ThrowIfDisposed(); + + if (origin is SeekOrigin.Current && offset < 0) + { + int rewindBytes = checked((int)(-offset)); + + if (rewindBytes > _historyCount) + { + throw new IOException(SR.IO_SeekBeforeBegin); + } + + // Create new pushback by prepending rewound history bytes to any + // existing unread pushback. This preserves bytes that haven't been + // consumed yet (from a previous seek) so they are not lost. + int existingPushback = _pushbackCount; + byte[] newPushback = new byte[rewindBytes + existingPushback]; + Array.Copy(_history, _historyCount - rewindBytes, newPushback, 0, rewindBytes); + + if (existingPushback > 0) + { + Array.Copy(_pushback!, _pushbackOffset, newPushback, rewindBytes, existingPushback); + } + + _pushback = newPushback; + _pushbackOffset = 0; + _pushbackCount = newPushback.Length; + _historyCount -= rewindBytes; + _position -= rewindBytes; + + return _position; + } + + throw new NotSupportedException(); + } + + private void RecordHistory(ReadOnlySpan data) + { + if (data.Length >= _history.Length) + { + data.Slice(data.Length - _history.Length).CopyTo(_history); + _historyCount = _history.Length; + } + else if (_historyCount + data.Length <= _history.Length) + { + data.CopyTo(_history.AsSpan(_historyCount)); + _historyCount += data.Length; + } + else + { + int toKeep = _history.Length - data.Length; + Array.Copy(_history, _historyCount - toKeep, _history, 0, toKeep); + data.CopyTo(_history.AsSpan(toKeep)); + _historyCount = _history.Length; + } + } + + public override void Flush() + { + ThrowIfDisposed(); + } + + public override Task FlushAsync(CancellationToken cancellationToken) + { + ThrowIfDisposed(); + return Task.CompletedTask; + } + + public override void SetLength(long value) + { + ThrowIfDisposed(); + throw new NotSupportedException(); + } + + public override void Write(byte[] buffer, int offset, int count) + { + ThrowIfDisposed(); + throw new NotSupportedException(); + } + + private void ThrowIfDisposed() + { + ObjectDisposedException.ThrowIf(_isDisposed, this); + } + + protected override void Dispose(bool disposing) + { + if (disposing && !_isDisposed) + { + _baseStream.Dispose(); + _isDisposed = true; + } + base.Dispose(disposing); + } + + public override async ValueTask DisposeAsync() + { + if (!_isDisposed) + { + await _baseStream.DisposeAsync().ConfigureAwait(false); + _isDisposed = true; + } + await base.DisposeAsync().ConfigureAwait(false); + } + } } diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipForwardReadEntry.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipForwardReadEntry.cs new file mode 100644 index 00000000000000..03fdf117b89d42 --- /dev/null +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipForwardReadEntry.cs @@ -0,0 +1,240 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Threading; +using System.Threading.Tasks; + +namespace System.IO.Compression; + +/// +/// Represents a single entry read from a ZIP archive by . +/// Provides metadata from the local file header and a for +/// reading the decompressed entry data. +/// +/// +/// +/// When copyData is (the default), the +/// reads directly from the underlying archive stream. It is invalidated when the reader +/// advances to the next entry via . Any unread +/// data is automatically drained at that point. +/// +/// +/// When copyData is , the decompressed data is copied into a +/// and the entry remains valid after the reader advances. +/// +/// +public sealed class ZipForwardReadEntry +{ + private uint _crc32; + private long _compressedLength; + private long _length; + + internal ZipForwardReadEntry( + string fullName, + ZipCompressionMethod compressionMethod, + DateTimeOffset lastModified, + uint crc32, + long compressedLength, + long length, + ushort generalPurposeBitFlags, + ushort versionNeeded, + bool hasDataDescriptor, + Stream? dataStream) + { + FullName = fullName; + CompressionMethod = compressionMethod; + LastModified = lastModified; + _crc32 = crc32; + _compressedLength = compressedLength; + _length = length; + GeneralPurposeBitFlags = generalPurposeBitFlags; + VersionNeeded = versionNeeded; + HasDataDescriptor = hasDataDescriptor; + DataStream = dataStream; + } + + /// + /// Gets the full name (relative path) of the entry, including any directory path. + /// + public string FullName { get; } + + /// + /// Gets the file name portion of the entry (the part after the last directory separator). + /// + public string Name => Path.GetFileName(FullName); + + /// + /// Gets the compression method used for this entry. + /// + public ZipCompressionMethod CompressionMethod { get; } + + /// + /// Gets the last modification date and time of the entry. + /// + public DateTimeOffset LastModified { get; } + + /// + /// Gets the CRC-32 checksum of the uncompressed data. + /// + /// + /// When bit 3 (data descriptor) is set in the local header, this value is initially + /// zero and is populated after the compressed data has been fully read. + /// + [CLSCompliant(false)] + public uint Crc32 => _crc32; + + /// + /// Gets the compressed size of the entry in bytes. + /// + /// + /// When bit 3 (data descriptor) is set in the local header, this value is initially + /// zero and is populated after the compressed data has been fully read. + /// + public long CompressedLength => _compressedLength; + + /// + /// Gets the uncompressed size of the entry in bytes. + /// + /// + /// When bit 3 (data descriptor) is set in the local header, this value is initially + /// zero and is populated after the compressed data has been fully read. + /// + public long Length => _length; + + /// + /// Gets the raw general purpose bit flags from the local file header. + /// + [CLSCompliant(false)] + public ushort GeneralPurposeBitFlags { get; } + + /// + /// Gets a value indicating whether the entry is encrypted. + /// + public bool IsEncrypted => (GeneralPurposeBitFlags & 1) != 0; + + /// + /// Gets a value indicating whether the entry represents a directory. + /// + public bool IsDirectory => FullName.Length > 0 && (FullName[^1] is '/' or '\\'); + + /// + /// Gets the minimum ZIP specification version needed to extract this entry. + /// + [CLSCompliant(false)] + public ushort VersionNeeded { get; } + + /// + /// Gets the decompressed data stream for this entry, or + /// if the entry has no data (e.g. a directory entry). + /// + /// + /// When copyData was on the + /// call that produced this entry, + /// the stream reads directly from the archive and is invalidated when the reader + /// advances to the next entry. When copyData was , + /// the data has been copied into a that remains valid + /// independently. + /// + public Stream? DataStream { get; } + + /// + /// Extracts the entry to a file on disk. + /// + /// The path of the file to create. + /// + /// to overwrite an existing file; otherwise . + /// + /// is . + /// is empty. + /// + /// The entry is a directory ( is ). + /// + public void ExtractToFile(string destinationFileName, bool overwrite) + { + ArgumentException.ThrowIfNullOrEmpty(destinationFileName); + + if (IsDirectory) + { + throw new InvalidOperationException(SR.ZipStreamEntryNoDataToExtract); + } + + FileMode mode = overwrite ? FileMode.Create : FileMode.CreateNew; + using FileStream fs = new(destinationFileName, mode, FileAccess.Write, FileShare.None); + DataStream?.CopyTo(fs); + + try + { + File.SetLastWriteTime(destinationFileName, LastModified.DateTime); + } + catch + { + // Some platforms (e.g. Android) may not support setting the last write time. + // Extraction should not fail because of that. + } + } + + /// + /// Asynchronously extracts the entry to a file on disk. + /// + /// The path of the file to create. + /// + /// to overwrite an existing file; otherwise . + /// + /// A token to monitor for cancellation requests. + /// is . + /// is empty. + /// + /// The entry is a directory ( is ). + /// + public async Task ExtractToFileAsync(string destinationFileName, bool overwrite, + CancellationToken cancellationToken = default) + { + ArgumentException.ThrowIfNullOrEmpty(destinationFileName); + + if (IsDirectory) + { + throw new InvalidOperationException(SR.ZipStreamEntryNoDataToExtract); + } + + FileMode mode = overwrite ? FileMode.Create : FileMode.CreateNew; + FileStream fs = new(destinationFileName, mode, FileAccess.Write, FileShare.None, + bufferSize: 0x1000, useAsync: true); + await using (fs.ConfigureAwait(false)) + { + if (DataStream is not null) + { + await DataStream.CopyToAsync(fs, cancellationToken).ConfigureAwait(false); + } + } + + try + { + File.SetLastWriteTime(destinationFileName, LastModified.DateTime); + } + catch + { + // Some platforms (e.g. Android) may not support setting the last write time. + // Extraction should not fail because of that. + } + } + + internal bool HasDataDescriptor { get; } + + internal void UpdateDataDescriptor(uint crc32, long compressedLength, long length, + uint runningCrc, long totalBytesRead) + { + if (runningCrc != crc32) + { + throw new InvalidDataException(SR.CrcMismatch); + } + + if (totalBytesRead != length) + { + throw new InvalidDataException(SR.UnexpectedStreamLength); + } + + _crc32 = crc32; + _compressedLength = compressedLength; + _length = length; + } +} diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs new file mode 100644 index 00000000000000..287eb0971625a7 --- /dev/null +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs @@ -0,0 +1,604 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Buffers.Binary; +using System.Text; +using System.Threading; +using System.Threading.Tasks; + +namespace System.IO.Compression; + +/// +/// Provides a forward-only reader for ZIP archives that reads entries sequentially +/// from a stream without requiring the stream to be seekable. +/// +/// +/// +/// Unlike , which reads the central directory at the end +/// of the archive, walks local file headers in order +/// and decompresses data on the fly. This makes it suitable for network streams, +/// pipes, and other non-seekable sources. +/// +/// +/// This mirrors the TarReader / TarEntry pattern in +/// System.Formats.Tar. +/// +/// +public sealed class ZipStreamReader : IDisposable, IAsyncDisposable +{ + private const ushort DataDescriptorBitFlag = 0x8; + private const ushort UnicodeFileNameBitFlag = 0x800; + + private bool _isDisposed; + private readonly bool _leaveOpen; + private readonly Encoding? _entryNameEncoding; + private ZipForwardReadEntry? _previousEntry; + private readonly Stream _archiveStream; + private bool _reachedEnd; + + /// + /// Initializes a new that reads from the specified stream. + /// + /// The archive stream to read from. + /// + /// to leave the stream open after the reader is disposed; + /// otherwise, . + /// + public ZipStreamReader(Stream stream, bool leaveOpen = false) + : this(stream, entryNameEncoding: null, leaveOpen) + { + } + + /// + /// Initializes a new that reads from the specified stream + /// using the given encoding for entry names. + /// + /// The archive stream to read from. + /// + /// The encoding to use when reading entry names that do not have the UTF-8 bit flag set, + /// or to use UTF-8. + /// + /// + /// to leave the stream open after the reader is disposed; + /// otherwise, . + /// + public ZipStreamReader(Stream stream, Encoding? entryNameEncoding, bool leaveOpen = false) + { + ArgumentNullException.ThrowIfNull(stream); + + if (!stream.CanRead) + { + throw new ArgumentException(SR.NotSupported_UnreadableStream, nameof(stream)); + } + + // ReadAheadStream makes non-seekable streams appear seekable so that + // DeflateStream.TryRewindStream can push back unconsumed input after + // decompression finishes. Already-seekable streams need no wrapper. + _archiveStream = stream.CanSeek ? stream : new ReadAheadStream(stream); + _leaveOpen = leaveOpen; + _entryNameEncoding = entryNameEncoding; + } + + /// + /// Reads the next entry from the ZIP archive stream by parsing the local file header. + /// + /// + /// to copy the entry's decompressed data into a + /// that remains valid after the reader advances; to read directly + /// from the archive stream (invalidated on the next call). + /// + /// + /// The next , or if there are no more entries. + /// + /// The reader has been disposed. + /// The archive stream contains invalid data. + public ZipForwardReadEntry? GetNextEntry(bool copyData = false) + { + ObjectDisposedException.ThrowIf(_isDisposed, this); + + if (_reachedEnd) + { + return null; + } + + AdvanceDataStreamIfNeeded(); + + byte[] headerBytes = new byte[ZipLocalFileHeader.SizeOfLocalHeader]; + int bytesRead = _archiveStream.ReadAtLeast(headerBytes, headerBytes.Length, throwOnEndOfStream: false); + + if (bytesRead < ZipLocalFileHeader.SizeOfLocalHeader) + { + _reachedEnd = true; + return null; + } + + if (!headerBytes.AsSpan().StartsWith(ZipLocalFileHeader.SignatureConstantBytes)) + { + if (IsKnownEndOfEntriesSignature(headerBytes)) + { + _reachedEnd = true; + return null; + } + + throw new InvalidDataException(SR.ZipStreamInvalidLocalFileHeader); + } + + int dynamicLength = GetDynamicHeaderLength(headerBytes); + byte[] dynamicBuffer = new byte[dynamicLength]; + _archiveStream.ReadExactly(dynamicBuffer); + + ParseLocalFileHeader(headerBytes, dynamicBuffer, + out string fullName, out ushort versionNeeded, out ushort generalPurposeBitFlags, + out ushort compressionMethod, out DateTimeOffset lastModified, out uint crc32, + out long compressedSize, out long uncompressedSize, out bool hasDataDescriptor); + + bool isEncrypted = (generalPurposeBitFlags & 1) != 0; + + ZipCompressionMethod method = (ZipCompressionMethod)compressionMethod; + + Stream? dataStream = CreateDataStream( + method, compressedSize, uncompressedSize, + crc32, hasDataDescriptor, isEncrypted, out CrcValidatingReadStream? crcStream); + + Stream? originalDataStream = null; + if (copyData && dataStream is not null) + { + originalDataStream = dataStream; + MemoryStream ms = new(); + dataStream.CopyTo(ms); + ms.Position = 0; + dataStream = ms; + } + + ZipForwardReadEntry entry = new( + fullName, method, lastModified, crc32, + compressedSize, uncompressedSize, generalPurposeBitFlags, versionNeeded, + hasDataDescriptor, dataStream); + + if (copyData && hasDataDescriptor && crcStream is not null) + { + ReadDataDescriptor(entry, crcStream); + } + + // Dispose the original decompression/CRC stream after copying (and after + // reading the data descriptor when applicable) to release inflater resources. + originalDataStream?.Dispose(); + + if (!copyData) + { + _previousEntry = entry; + } + + return entry; + } + + /// + /// Asynchronously reads the next entry from the ZIP archive stream. + /// + /// + /// to copy the entry's decompressed data into a + /// that remains valid after the reader advances; to read directly + /// from the archive stream (invalidated on the next call). + /// + /// A token to monitor for cancellation requests. + /// + /// The next , or if there are no more entries. + /// + public async ValueTask GetNextEntryAsync( + bool copyData = false, CancellationToken cancellationToken = default) + { + ObjectDisposedException.ThrowIf(_isDisposed, this); + + if (_reachedEnd) + { + return null; + } + + await AdvanceDataStreamIfNeededAsync(cancellationToken).ConfigureAwait(false); + + byte[] headerBytes = new byte[ZipLocalFileHeader.SizeOfLocalHeader]; + int bytesRead = await _archiveStream.ReadAtLeastAsync( + headerBytes.AsMemory(0, ZipLocalFileHeader.SizeOfLocalHeader), + ZipLocalFileHeader.SizeOfLocalHeader, + throwOnEndOfStream: false, + cancellationToken).ConfigureAwait(false); + + if (bytesRead < ZipLocalFileHeader.SizeOfLocalHeader) + { + _reachedEnd = true; + return null; + } + + if (!headerBytes.AsSpan().StartsWith(ZipLocalFileHeader.SignatureConstantBytes)) + { + if (IsKnownEndOfEntriesSignature(headerBytes)) + { + _reachedEnd = true; + return null; + } + + throw new InvalidDataException(SR.ZipStreamInvalidLocalFileHeader); + } + + int dynamicLength = GetDynamicHeaderLength(headerBytes); + byte[] dynamicBuffer = new byte[dynamicLength]; + await _archiveStream.ReadExactlyAsync(dynamicBuffer.AsMemory(0, dynamicLength), cancellationToken).ConfigureAwait(false); + + ParseLocalFileHeader(headerBytes, dynamicBuffer, + out string fullName, out ushort versionNeeded, out ushort generalPurposeBitFlags, + out ushort compressionMethod, out DateTimeOffset lastModified, out uint crc32, + out long compressedSize, out long uncompressedSize, out bool hasDataDescriptor); + + ZipCompressionMethod method = (ZipCompressionMethod)compressionMethod; + bool isEncrypted = (generalPurposeBitFlags & 1) != 0; + + Stream? dataStream = CreateDataStream( + method, compressedSize, uncompressedSize, crc32, + hasDataDescriptor, isEncrypted, out CrcValidatingReadStream? crcStream); + + Stream? originalDataStream = null; + if (copyData && dataStream is not null) + { + originalDataStream = dataStream; + MemoryStream ms = new(); + await dataStream.CopyToAsync(ms, cancellationToken).ConfigureAwait(false); + ms.Position = 0; + dataStream = ms; + } + + ZipForwardReadEntry entry = new( + fullName, method, lastModified, crc32, + compressedSize, uncompressedSize, generalPurposeBitFlags, versionNeeded, + hasDataDescriptor, dataStream); + + if (copyData && hasDataDescriptor && crcStream is not null) + { + await ReadDataDescriptorAsync(entry, crcStream, cancellationToken).ConfigureAwait(false); + } + + // Dispose the original decompression/CRC stream after copying (and after + // reading the data descriptor when applicable) to release inflater resources. + if (originalDataStream is not null) + { + await originalDataStream.DisposeAsync().ConfigureAwait(false); + } + + if (!copyData) + { + _previousEntry = entry; + } + + return entry; + } + + private Stream? CreateDataStream( + ZipCompressionMethod compressionMethod, + long compressedSize, + long uncompressedSize, + uint crc32, + bool hasDataDescriptor, + bool isEncrypted, + out CrcValidatingReadStream? crcStream) + { + crcStream = null; + + if (!hasDataDescriptor && compressedSize == 0) + { + return null; + } + + // Encrypted entries cannot be decompressed without decryption. + // When the compressed size is known (no data descriptor), return a bounded + // stream so the reader can drain past the encrypted bytes and find the next + // local file header. When a data descriptor is present the compressed size + // is unknown, so we cannot determine the entry boundary. + if (isEncrypted) + { + if (hasDataDescriptor) + { + throw new NotSupportedException(SR.ZipStreamEncryptedDataDescriptorNotSupported); + } + + return new BoundedReadOnlyStream(_archiveStream, compressedSize); + } + + Stream source = hasDataDescriptor + ? _archiveStream + : new BoundedReadOnlyStream(_archiveStream, compressedSize); + + Stream decompressed = CreateDecompressionStream(source, compressionMethod, uncompressedSize, leaveOpen: hasDataDescriptor); + + crcStream = hasDataDescriptor + // Data-descriptor entries: CRC and length are unknown until after the data is read. + // Use sentinel values to disable validation while still tracking RunningCrc and TotalBytesRead + // for later verification against the data descriptor. + ? new CrcValidatingReadStream(decompressed, expectedCrc: 0, expectedLength: long.MaxValue) + : new CrcValidatingReadStream(decompressed, crc32, uncompressedSize); + + return crcStream; + } + + /// + /// Creates the appropriate decompression stream for the given compression method. + /// + private static Stream CreateDecompressionStream( + Stream source, ZipCompressionMethod compressionMethod, long uncompressedSize, bool leaveOpen) + { + return compressionMethod switch + { + ZipCompressionMethod.Deflate when leaveOpen => + new DeflateStream(source, CompressionMode.Decompress, leaveOpen: true), + ZipCompressionMethod.Deflate => + new DeflateStream(source, CompressionMode.Decompress, uncompressedSize), + ZipCompressionMethod.Deflate64 => + new DeflateManagedStream(source, ZipCompressionMethod.Deflate64, leaveOpen ? -1 : uncompressedSize), + ZipCompressionMethod.Stored when leaveOpen => + throw new NotSupportedException(SR.ZipStreamStoredDataDescriptorNotSupported), + ZipCompressionMethod.Stored => source, + _ => throw new NotSupportedException(SR.UnsupportedCompression) + }; + } + + private void AdvanceDataStreamIfNeeded() + { + if (_previousEntry is null) + { + return; + } + + ZipForwardReadEntry entry = _previousEntry; + _previousEntry = null; + + DrainStream(entry.DataStream); + + if (entry.HasDataDescriptor && entry.DataStream is CrcValidatingReadStream crcStream) + { + ReadDataDescriptor(entry, crcStream); + } + } + + private async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancellationToken) + { + if (_previousEntry is null) + { + return; + } + + ZipForwardReadEntry entry = _previousEntry; + _previousEntry = null; + + await DrainStreamAsync(entry.DataStream, cancellationToken).ConfigureAwait(false); + + if (entry.HasDataDescriptor && entry.DataStream is CrcValidatingReadStream crcStream) + { + await ReadDataDescriptorAsync(entry, crcStream, cancellationToken).ConfigureAwait(false); + } + } + + private static void DrainStream(Stream? stream) + { + if (stream is not null) + { + stream.CopyTo(Stream.Null); + } + } + + private static async ValueTask DrainStreamAsync(Stream? stream, CancellationToken cancellationToken) + { + if (stream is not null) + { + await stream.CopyToAsync(Stream.Null, cancellationToken).ConfigureAwait(false); + } + } + + /// + /// Returns the combined length of the filename and extra field from the fixed local file header, + /// so the caller can read exactly that many bytes via sync or async I/O. + /// + private static int GetDynamicHeaderLength(ReadOnlySpan headerBytes) + { + ushort filenameLength = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.FilenameLength..]); + ushort extraFieldLength = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.ExtraFieldLength..]); + return filenameLength + extraFieldLength; + } + + /// + /// Parses all local file header fields from the fixed header bytes and the already-read + /// dynamic buffer (filename + extra field). This method performs no I/O. + /// + private void ParseLocalFileHeader( + ReadOnlySpan headerBytes, + ReadOnlySpan dynamicBuffer, + out string fullName, + out ushort versionNeeded, + out ushort generalPurposeBitFlags, + out ushort compressionMethod, + out DateTimeOffset lastModified, + out uint crc32, + out long compressedSize, + out long uncompressedSize, + out bool hasDataDescriptor) + { + versionNeeded = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.VersionNeededToExtract..]); + generalPurposeBitFlags = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.GeneralPurposeBitFlags..]); + compressionMethod = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.CompressionMethod..]); + uint lastModifiedRaw = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.LastModified..]); + crc32 = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.Crc32..]); + uint compressedSizeSmall = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.CompressedSize..]); + uint uncompressedSizeSmall = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.UncompressedSize..]); + ushort filenameLength = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.FilenameLength..]); + ushort extraFieldLength = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.ExtraFieldLength..]); + + lastModified = new DateTimeOffset(ZipHelper.DosTimeToDateTime(lastModifiedRaw)); + hasDataDescriptor = (generalPurposeBitFlags & DataDescriptorBitFlag) != 0; + + Encoding encoding = (generalPurposeBitFlags & UnicodeFileNameBitFlag) != 0 + ? Encoding.UTF8 + : _entryNameEncoding ?? Encoding.UTF8; + + fullName = encoding.GetString(dynamicBuffer[..filenameLength]); + + bool compressedSizeInZip64 = compressedSizeSmall == ZipHelper.Mask32Bit; + bool uncompressedSizeInZip64 = uncompressedSizeSmall == ZipHelper.Mask32Bit; + + if (compressedSizeInZip64 || uncompressedSizeInZip64) + { + Zip64ExtraField zip64 = Zip64ExtraField.GetJustZip64Block( + dynamicBuffer.Slice(filenameLength, extraFieldLength), + readUncompressedSize: uncompressedSizeInZip64, + readCompressedSize: compressedSizeInZip64, + readLocalHeaderOffset: false, + readStartDiskNumber: false); + + compressedSize = zip64.CompressedSize ?? compressedSizeSmall; + uncompressedSize = zip64.UncompressedSize ?? uncompressedSizeSmall; + } + else + { + compressedSize = compressedSizeSmall; + uncompressedSize = uncompressedSizeSmall; + } + } + + private void ReadDataDescriptor(ZipForwardReadEntry entry, CrcValidatingReadStream crcStream) + { + byte[] buffer = new byte[28]; // Max: sig(4) + crc(4) + sizes64(16) + peek(4) + + _archiveStream.ReadExactly(buffer.AsSpan(0, 4)); + int offset = 0; + int totalRead = 4; + + if (buffer.AsSpan(0, 4).SequenceEqual(ZipLocalFileHeader.DataDescriptorSignatureConstantBytes)) + { + offset = 4; + _archiveStream.ReadExactly(buffer.AsSpan(4, 4)); + totalRead = 8; + } + + // Read 20 bytes: up to 16 for sizes (64-bit) + 4 to peek at the next signature. + _archiveStream.ReadExactly(buffer.AsSpan(totalRead, 20)); + + // Probe: if 4 bytes after 32-bit sizes form a known ZIP signature, + // the descriptor uses 32-bit sizes; otherwise assume 64-bit. + bool isZip64 = !IsKnownZipSignature(buffer.AsSpan(totalRead + 8, 4)); + int sizesBytes = isZip64 ? 16 : 8; + + // Seek back over the bytes we read past the actual sizes. + int overRead = 20 - sizesBytes; + _archiveStream.Seek(-overRead, SeekOrigin.Current); + + ParseDataDescriptor(buffer, offset, isZip64, entry, crcStream); + } + + private async ValueTask ReadDataDescriptorAsync( + ZipForwardReadEntry entry, CrcValidatingReadStream crcStream, CancellationToken cancellationToken) + { + byte[] buffer = new byte[28]; // Max: sig(4) + crc(4) + sizes64(16) + peek(4) + + await _archiveStream.ReadExactlyAsync(buffer.AsMemory(0, 4), cancellationToken).ConfigureAwait(false); + int offset = 0; + int totalRead = 4; + + if (buffer.AsSpan(0, 4).SequenceEqual(ZipLocalFileHeader.DataDescriptorSignatureConstantBytes)) + { + offset = 4; + await _archiveStream.ReadExactlyAsync(buffer.AsMemory(4, 4), cancellationToken).ConfigureAwait(false); + totalRead = 8; + } + + // Read 20 bytes: up to 16 for sizes (64-bit) + 4 to peek at the next signature. + await _archiveStream.ReadExactlyAsync(buffer.AsMemory(totalRead, 20), cancellationToken).ConfigureAwait(false); + + // Probe: if 4 bytes after 32-bit sizes form a known ZIP signature, + // the descriptor uses 32-bit sizes; otherwise assume 64-bit. + bool isZip64 = !IsKnownZipSignature(buffer.AsSpan(totalRead + 8, 4)); + int sizesBytes = isZip64 ? 16 : 8; + + // Seek back over the bytes we read past the actual sizes. + int overRead = 20 - sizesBytes; + _archiveStream.Seek(-overRead, SeekOrigin.Current); + + ParseDataDescriptor(buffer, offset, isZip64, entry, crcStream); + } + + /// + /// Parses the data descriptor fields from an already-read buffer and updates + /// the entry with the CRC-32, compressed size, and uncompressed size. No I/O. + /// + private static void ParseDataDescriptor( + ReadOnlySpan buffer, int offset, bool isZip64, + ZipForwardReadEntry entry, CrcValidatingReadStream crcStream) + { + uint crc32 = BinaryPrimitives.ReadUInt32LittleEndian(buffer[offset..]); + int sizesOffset = offset + 4; + + if (isZip64) + { + entry.UpdateDataDescriptor( + crc32, + compressedLength: BinaryPrimitives.ReadInt64LittleEndian(buffer[sizesOffset..]), + length: BinaryPrimitives.ReadInt64LittleEndian(buffer[(sizesOffset + 8)..]), + crcStream.RunningCrc, crcStream.TotalBytesRead); + } + else + { + entry.UpdateDataDescriptor( + crc32, + compressedLength: BinaryPrimitives.ReadUInt32LittleEndian(buffer[sizesOffset..]), + length: BinaryPrimitives.ReadUInt32LittleEndian(buffer[(sizesOffset + 4)..]), + crcStream.RunningCrc, crcStream.TotalBytesRead); + } + } + + /// + /// Returns when the first four bytes of + /// match a known end-of-entries signature + /// (central directory header or end-of-central-directory). + /// + private static bool IsKnownEndOfEntriesSignature(ReadOnlySpan headerBytes) + { + ReadOnlySpan sig = headerBytes[..4]; + return sig.SequenceEqual(ZipCentralDirectoryFileHeader.SignatureConstantBytes) + || sig.SequenceEqual(ZipEndOfCentralDirectoryBlock.SignatureConstantBytes) + || sig.SequenceEqual(Zip64EndOfCentralDirectoryRecord.SignatureConstantBytes); + } + + /// + /// Returns when starts with any + /// recognized ZIP structure signature (local header, central directory, EOCD, or ZIP64 EOCD). + /// Used to probe the data descriptor format by peeking at the bytes that follow. + /// + private static bool IsKnownZipSignature(ReadOnlySpan bytes) + { + return bytes.StartsWith(ZipLocalFileHeader.SignatureConstantBytes) + || bytes.StartsWith(ZipCentralDirectoryFileHeader.SignatureConstantBytes) + || bytes.StartsWith(ZipEndOfCentralDirectoryBlock.SignatureConstantBytes) + || bytes.StartsWith(Zip64EndOfCentralDirectoryRecord.SignatureConstantBytes); + } + + public void Dispose() + { + if (!_isDisposed) + { + _isDisposed = true; + + if (!_leaveOpen) + { + _archiveStream.Dispose(); + } + } + } + + public async ValueTask DisposeAsync() + { + if (!_isDisposed) + { + _isDisposed = true; + + if (!_leaveOpen) + { + await _archiveStream.DisposeAsync().ConfigureAwait(false); + } + } + } +} diff --git a/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj b/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj index 284ccb348c15fb..dbeb797609f5bc 100644 --- a/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj +++ b/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj @@ -27,6 +27,7 @@ + diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_StreamEntryReadTests.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_StreamEntryReadTests.cs new file mode 100644 index 00000000000000..63785e922c4398 --- /dev/null +++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_StreamEntryReadTests.cs @@ -0,0 +1,612 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using Xunit; + +namespace System.IO.Compression.Tests +{ + public partial class zip_StreamEntryReadTests : ZipFileTestBase + { + private static readonly byte[] s_smallContent = "Hello, small world!"u8.ToArray(); + private static readonly byte[] s_mediumContent = new byte[8192]; + private static readonly byte[] s_largeContent = new byte[65536]; + + static zip_StreamEntryReadTests() + { + Random rng = new(42); + rng.NextBytes(s_mediumContent); + rng.NextBytes(s_largeContent); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task Read_DeflateWithKnownSize_ReturnsDecompressedData(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + byte[][] expectedContents = [s_smallContent, s_mediumContent, s_largeContent]; + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + for (int i = 0; i < expectedContents.Length; i++) + { + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); + + Assert.NotNull(entry); + Assert.NotNull(entry.DataStream); + Assert.False(entry.IsDirectory); + Assert.Equal(ZipCompressionMethod.Deflate, entry.CompressionMethod); + + byte[] decompressed = await ReadStreamFully(entry.DataStream, async); + Assert.Equal(expectedContents[i], decompressed); + } + + ZipForwardReadEntry? end = await GetNextEntry(reader, async); + Assert.Null(end); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task Read_StoredWithKnownSize_ReturnsUncompressedData(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.NoCompression, seekable: true); + byte[][] expectedContents = [s_smallContent, s_mediumContent, s_largeContent]; + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + for (int i = 0; i < expectedContents.Length; i++) + { + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); + + Assert.NotNull(entry); + Assert.NotNull(entry.DataStream); + Assert.Equal(ZipCompressionMethod.Stored, entry.CompressionMethod); + + byte[] decompressed = await ReadStreamFully(entry.DataStream, async); + Assert.Equal(expectedContents[i], decompressed); + } + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task Read_DeflateWithDataDescriptor_ReturnsDecompressedData(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: false); + byte[][] expectedContents = [s_smallContent, s_mediumContent, s_largeContent]; + + using MemoryStream archiveStream = new(zipBytes); + using WrappedStream nonSeekableStream = new(archiveStream, canRead: true, canWrite: false, canSeek: false); + using ZipStreamReader reader = new(nonSeekableStream); + + for (int i = 0; i < expectedContents.Length; i++) + { + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); + + Assert.NotNull(entry); + Assert.NotNull(entry.DataStream); + Assert.Equal(ZipCompressionMethod.Deflate, entry.CompressionMethod); + + byte[] decompressed = await ReadStreamFully(entry.DataStream, async); + Assert.Equal(expectedContents[i], decompressed); + } + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task Read_StoredWithDataDescriptor_ThrowsNotSupported(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.NoCompression, seekable: false); + + using MemoryStream archiveStream = new(zipBytes); + using WrappedStream nonSeekableStream = new(archiveStream, canRead: true, canWrite: false, canSeek: false); + using ZipStreamReader reader = new(nonSeekableStream); + + if (async) + { + await Assert.ThrowsAsync(() => reader.GetNextEntryAsync().AsTask()); + } + else + { + Assert.Throws(() => reader.GetNextEntry()); + } + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task CopyData_PreservesEntryAfterAdvancing(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using WrappedStream nonSeekableStream = new(archiveStream, canRead: true, canWrite: false, canSeek: false); + using ZipStreamReader reader = new(nonSeekableStream); + + ZipForwardReadEntry? entry = await GetNextEntry(reader, async, copyData: true); + + Assert.NotNull(entry); + Assert.NotNull(entry.DataStream); + + ZipForwardReadEntry? next = await GetNextEntry(reader, async); + Assert.NotNull(next); + + entry.DataStream.Position = 0; + byte[] decompressed = await ReadStreamFully(entry.DataStream, async); + Assert.Equal(s_smallContent, decompressed); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task PartialRead_ThenGetNextEntry_AdvancesCorrectly(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + ZipForwardReadEntry? first = await GetNextEntry(reader, async); + Assert.NotNull(first); + + byte[] partial = new byte[5]; + await ReadStream(first.DataStream!, partial, async); + + ZipForwardReadEntry? second = await GetNextEntry(reader, async); + + Assert.NotNull(second); + Assert.Equal("medium.bin", second.FullName); + + byte[] decompressed = await ReadStreamFully(second.DataStream!, async); + Assert.Equal(s_mediumContent, decompressed); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task PartialRead_DataDescriptor_ThenGetNextEntry_AdvancesCorrectly(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: false); + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + ZipForwardReadEntry? first = await GetNextEntry(reader, async); + Assert.NotNull(first); + + byte[] partial = new byte[3]; + await ReadStream(first.DataStream!, partial, async); + + ZipForwardReadEntry? second = await GetNextEntry(reader, async); + + Assert.NotNull(second); + Assert.Equal("medium.bin", second.FullName); + + byte[] decompressed = await ReadStreamFully(second.DataStream!, async); + Assert.Equal(s_mediumContent, decompressed); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task Deflate64Entry_ReturnsDecompressedData(bool async) + { + MemoryStream ms = await StreamHelpers.CreateTempCopyStream(compat("deflate64.zip")); + + using ZipStreamReader reader = new(ms); + + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); + + Assert.NotNull(entry); + Assert.Equal(ZipCompressionMethod.Deflate64, entry.CompressionMethod); + Assert.NotNull(entry.DataStream); + + byte[] data = await ReadStreamFully(entry.DataStream, async); + Assert.True(data.Length > 0); + } + + [Theory] + [InlineData("empty.txt", false, true)] + [InlineData("empty.txt", false, false)] + [InlineData("mydir/", true, true)] + [InlineData("mydir/", true, false)] + public async Task ZeroLengthEntry_HasNullDataStream(string entryName, bool expectedIsDirectory, bool async) + { + using MemoryStream ms = new(); + using (ZipArchive archive = new(ms, ZipArchiveMode.Create, leaveOpen: true)) + { + archive.CreateEntry(entryName); + } + + ms.Position = 0; + using ZipStreamReader reader = new(ms); + + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); + + Assert.NotNull(entry); + Assert.Equal(entryName, entry.FullName); + Assert.Equal(expectedIsDirectory, entry.IsDirectory); + Assert.Null(entry.DataStream); + Assert.Equal(0, entry.CompressedLength); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task EncryptedEntry_ReportsIsEncrypted(bool async) + { + MemoryStream ms = await StreamHelpers.CreateTempCopyStream(zfile("encrypted_entries_weak.zip")); + + using ZipStreamReader reader = new(ms); + + bool foundEncrypted = false; + bool foundUnencrypted = false; + + ZipForwardReadEntry? entry; + while ((entry = await GetNextEntry(reader, async)) is not null) + { + if (entry.IsEncrypted) + foundEncrypted = true; + else + foundUnencrypted = true; + } + + Assert.True(foundEncrypted); + Assert.True(foundUnencrypted); + } + + [Fact] + public async Task AsyncCancellation_ThrowsOperationCanceled() + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + using CancellationTokenSource cts = new(); + cts.Cancel(); + + await Assert.ThrowsAnyAsync( + () => reader.GetNextEntryAsync(cancellationToken: cts.Token).AsTask()); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task Dispose_WhileEntryPartiallyRead_DoesNotThrow(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + ZipStreamReader reader = new(archiveStream); + + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); + + Assert.NotNull(entry); + + byte[] partial = new byte[5]; + await ReadStream(entry.DataStream!, partial, async); + + await DisposeReader(reader, async); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task EmptyArchive_ReturnsNull(bool async) + { + using MemoryStream ms = new(); + using (new ZipArchive(ms, ZipArchiveMode.Create, leaveOpen: true)) { } + + ms.Position = 0; + using ZipStreamReader reader = new(ms); + + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); + + Assert.Null(entry); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task LeaveOpen_DoesNotDisposeStream(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + + ZipStreamReader reader = new(archiveStream, leaveOpen: true); + await DisposeReader(reader, async); + + Assert.True(archiveStream.CanRead); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task Constructor_WithEncoding_ReadsEntryNames(bool async) + { + using MemoryStream ms = new(); + using (ZipArchive archive = new(ms, ZipArchiveMode.Create, leaveOpen: true)) + { + AddEntry(archive, "hello.txt", s_smallContent, CompressionLevel.Optimal); + } + + ms.Position = 0; + using ZipStreamReader reader = new(ms, entryNameEncoding: Encoding.UTF8, leaveOpen: true); + + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); + + Assert.NotNull(entry); + Assert.Equal("hello.txt", entry.FullName); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task MultipleEntries_MixedSkipAndRead(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + // Skip first entry + ZipForwardReadEntry? first = await GetNextEntry(reader, async); + Assert.NotNull(first); + + // Read second entry fully + ZipForwardReadEntry? second = await GetNextEntry(reader, async); + Assert.NotNull(second); + byte[] data = await ReadStreamFully(second.DataStream!, async); + Assert.Equal(s_mediumContent, data); + + // Skip third entry + ZipForwardReadEntry? third = await GetNextEntry(reader, async); + Assert.NotNull(third); + + // Confirm end + ZipForwardReadEntry? end = await GetNextEntry(reader, async); + Assert.Null(end); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task GetNextEntry_AfterDispose_ThrowsObjectDisposedException(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + ZipStreamReader reader = new(archiveStream); + + // Read one entry to ensure the reader was functional. + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); + Assert.NotNull(entry); + + await DisposeReader(reader, async); + + if (async) + { + await Assert.ThrowsAsync(() => reader.GetNextEntryAsync().AsTask()); + } + else + { + Assert.Throws(() => reader.GetNextEntry()); + } + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task CopyData_WithDataDescriptor_PreservesEntryAfterAdvancing(bool async) + { + // seekable: false triggers data descriptors for Deflate entries. + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: false); + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + // Read first entry with copyData: true — exercises the path that + // eagerly decompresses, copies into a MemoryStream, then reads the + // data descriptor to validate CRC. + ZipForwardReadEntry? first = await GetNextEntry(reader, async, copyData: true); + + Assert.NotNull(first); + Assert.NotNull(first.DataStream); + + // Advance to the next entry to confirm the stream position is correct + // after consuming the data descriptor. + ZipForwardReadEntry? second = await GetNextEntry(reader, async); + Assert.NotNull(second); + Assert.Equal("medium.bin", second.FullName); + + // The copied first entry's data should still be fully readable. + first.DataStream.Position = 0; + byte[] decompressed = await ReadStreamFully(first.DataStream, async); + Assert.Equal(s_smallContent, decompressed); + + // Also verify the second entry's data is correct. + byte[] secondData = await ReadStreamFully(second.DataStream!, async); + Assert.Equal(s_mediumContent, secondData); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task ExtractToFile_CreatesFileWithExpectedContent(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); + + Assert.NotNull(entry); + Assert.Equal("small.txt", entry.FullName); + + string tempPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); + try + { + await ExtractEntryToFile(entry, tempPath, overwrite: true, async); + + byte[] written = File.ReadAllBytes(tempPath); + Assert.Equal(s_smallContent, written); + } + finally + { + File.Delete(tempPath); + } + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task ExtractToFile_OverwriteTrue_ReplacesExistingFile(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); + + Assert.NotNull(entry); + + string tempPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); + try + { + // Create a pre-existing file with different content. + File.WriteAllText(tempPath, "old content"); + + await ExtractEntryToFile(entry, tempPath, overwrite: true, async); + + byte[] written = File.ReadAllBytes(tempPath); + Assert.Equal(s_smallContent, written); + } + finally + { + File.Delete(tempPath); + } + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task ExtractToFile_OverwriteFalse_ThrowsWhenFileExists(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); + + Assert.NotNull(entry); + + string tempPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); + try + { + File.WriteAllText(tempPath, "existing"); + + if (async) + await Assert.ThrowsAsync(() => entry.ExtractToFileAsync(tempPath, overwrite: false)); + else + Assert.Throws(() => entry.ExtractToFile(tempPath, overwrite: false)); + } + finally + { + File.Delete(tempPath); + } + } + + [Fact] + public void Constructor_NullStream_ThrowsArgumentNullException() + { + Assert.Throws("stream", () => new ZipStreamReader(null!)); + } + + [Fact] + public void Constructor_UnreadableStream_ThrowsArgumentException() + { + using MemoryStream ms = new(); + using WrappedStream unreadable = new(ms, canRead: false, canWrite: true, canSeek: true); + + Assert.Throws("stream", () => new ZipStreamReader(unreadable)); + } + + // ── Sync/async dispatch helpers ────────────────────────────────────── + + private static async ValueTask GetNextEntry( + ZipStreamReader reader, bool async, bool copyData = false) + { + return async + ? await reader.GetNextEntryAsync(copyData: copyData) + : reader.GetNextEntry(copyData: copyData); + } + + private static async Task ExtractEntryToFile( + ZipForwardReadEntry entry, string destinationFileName, bool overwrite, bool async) + { + if (async) + await entry.ExtractToFileAsync(destinationFileName, overwrite); + else + entry.ExtractToFile(destinationFileName, overwrite); + } + + private static async Task DisposeReader(ZipStreamReader reader, bool async) + { + if (async) + await reader.DisposeAsync(); + else + reader.Dispose(); + } + + private static async ValueTask ReadStream(Stream stream, byte[] buffer, bool async) + { + return async + ? await stream.ReadAsync(buffer) + : stream.Read(buffer); + } + + // ── Test data helpers ──────────────────────────────────────────────── + + private static byte[] CreateZipWithEntries(CompressionLevel compressionLevel, bool seekable) + { + MemoryStream ms = new(); + + Stream writeStream = seekable + ? ms + : new WrappedStream(ms, canRead: true, canWrite: true, canSeek: false); + + using (ZipArchive archive = new(writeStream, ZipArchiveMode.Create, leaveOpen: true)) + { + AddEntry(archive, "small.txt", s_smallContent, compressionLevel); + AddEntry(archive, "medium.bin", s_mediumContent, compressionLevel); + AddEntry(archive, "large.bin", s_largeContent, compressionLevel); + } + + return ms.ToArray(); + } + + private static void AddEntry(ZipArchive archive, string name, byte[] contents, CompressionLevel level) + { + ZipArchiveEntry entry = archive.CreateEntry(name, level); + using Stream stream = entry.Open(); + stream.Write(contents); + } + + private static async Task ReadStreamFully(Stream stream, bool async) + { + using MemoryStream result = new(); + byte[] buffer = new byte[4096]; + + int bytesRead; + if (async) + { + while ((bytesRead = await stream.ReadAsync(buffer)) > 0) + { + result.Write(buffer, 0, bytesRead); + } + } + else + { + while ((bytesRead = stream.Read(buffer)) > 0) + { + result.Write(buffer, 0, bytesRead); + } + } + + return result.ToArray(); + } + } +}