From 7430121fa656c284429a4bf162f403d08ed1ef25 Mon Sep 17 00:00:00 2001 From: alinpahontu2912 Date: Thu, 19 Mar 2026 12:48:45 +0100 Subject: [PATCH 1/8] initial implementation for streaming zip entries --- .gitignore | 1 + .../ref/System.IO.Compression.cs | 28 ++ .../src/Resources/Strings.resx | 3 + .../src/System.IO.Compression.csproj | 28 +- .../System/IO/Compression/ZipStreamEntry.cs | 357 +++++++++++++++ .../System/IO/Compression/ZipStreamReader.cs | 408 ++++++++++++++++++ .../tests/System.IO.Compression.Tests.csproj | 1 + .../ZipArchive/zip_StreamEntryReadTests.cs | 216 ++++++++++ 8 files changed, 1025 insertions(+), 17 deletions(-) create mode 100644 src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamEntry.cs create mode 100644 src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs create mode 100644 src/libraries/System.IO.Compression/tests/ZipArchive/zip_StreamEntryReadTests.cs diff --git a/.gitignore b/.gitignore index 6b6eb255ba51d6..7ccc53c98ddd2c 100644 --- a/.gitignore +++ b/.gitignore @@ -373,3 +373,4 @@ test:.cs *.tempLog.xml *.testResults.xml *.testStats.csv +*.md diff --git a/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs b/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs index 412fa9f2a25535..43b9c7765900fa 100644 --- a/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs +++ b/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs @@ -144,6 +144,34 @@ public enum ZipCompressionMethod Deflate = 8, Deflate64 = 9, } + public sealed partial class ZipStreamEntry + { + internal ZipStreamEntry() { } + public long CompressedLength { get { throw null; } } + public System.IO.Compression.ZipCompressionMethod CompressionMethod { get { throw null; } } + [System.CLSCompliantAttribute(false)] + public uint Crc32 { get { throw null; } } + public string FullName { get { throw null; } } + [System.CLSCompliantAttribute(false)] + public ushort GeneralPurposeBitFlags { get { throw null; } } + public bool IsDirectory { get { throw null; } } + public bool IsEncrypted { get { throw null; } } + public System.DateTimeOffset LastModified { get { throw null; } } + public long Length { get { throw null; } } + public string Name { get { throw null; } } + [System.CLSCompliantAttribute(false)] + public ushort VersionNeeded { get { throw null; } } + public int Read(System.Span buffer) { throw null; } + public System.Threading.Tasks.ValueTask ReadAsync(System.Memory buffer, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + } + public sealed partial class ZipStreamReader : System.IAsyncDisposable, System.IDisposable + { + public ZipStreamReader(System.IO.Stream archiveStream, bool leaveOpen = false, System.Text.Encoding? entryNameEncoding = null) { } + public void Dispose() { } + public System.Threading.Tasks.ValueTask DisposeAsync() { throw null; } + public System.IO.Compression.ZipStreamEntry? GetNextEntry() { throw null; } + public System.Threading.Tasks.ValueTask GetNextEntryAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + } public sealed partial class ZLibCompressionOptions { public ZLibCompressionOptions() { } diff --git a/src/libraries/System.IO.Compression/src/Resources/Strings.resx b/src/libraries/System.IO.Compression/src/Resources/Strings.resx index 5fd5e9e3cedc88..12dc182c12fbb3 100644 --- a/src/libraries/System.IO.Compression/src/Resources/Strings.resx +++ b/src/libraries/System.IO.Compression/src/Resources/Strings.resx @@ -308,4 +308,7 @@ An attempt was made to move the position before the beginning of the stream. + + Stored compression entries with data descriptors cannot be read in forward-only mode because the compressed size is unknown. + diff --git a/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj b/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj index 91ad2914646cd3..4b6fe2fc148a11 100644 --- a/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj +++ b/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj @@ -1,4 +1,4 @@ - + $(NetCoreAppCurrent)-windows;$(NetCoreAppCurrent)-unix;$(NetCoreAppCurrent)-browser;$(NetCoreAppCurrent)-wasi;$(NetCoreAppCurrent) @@ -41,12 +41,9 @@ - - - + + + @@ -55,28 +52,25 @@ - + + + - + - - - + + + diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamEntry.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamEntry.cs new file mode 100644 index 00000000000000..7f59dc2a76b0b4 --- /dev/null +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamEntry.cs @@ -0,0 +1,357 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Buffers; +using System.Diagnostics; +using System.Threading; +using System.Threading.Tasks; + +namespace System.IO.Compression; + +public sealed class ZipStreamEntry +{ + private readonly Stream? _archiveStream; + private readonly bool _hasDataDescriptor; + private readonly BoundedReadOnlyStream? _boundedStream; + private Stream? _decompressionStream; + private uint _crc32; + private long _compressedLength; + private long _length; + + internal ZipStreamEntry( + string fullName, + ZipCompressionMethod compressionMethod, + DateTimeOffset lastModified, + uint crc32, + long compressedLength, + long length, + ushort generalPurposeBitFlags, + ushort versionNeeded, + Stream? archiveStream, + bool hasDataDescriptor) + { + FullName = fullName; + CompressionMethod = compressionMethod; + LastModified = lastModified; + _crc32 = crc32; + _compressedLength = compressedLength; + _length = length; + GeneralPurposeBitFlags = generalPurposeBitFlags; + VersionNeeded = versionNeeded; + _hasDataDescriptor = hasDataDescriptor; + + if (archiveStream is not null) + { + if (hasDataDescriptor) + { + _archiveStream = archiveStream; + } + else + { + _boundedStream = new BoundedReadOnlyStream(archiveStream, compressedLength); + } + } + } + + /// + /// Gets the full name (relative path) of the entry, including any directory path. + /// + public string FullName { get; } + + /// + /// Gets the file name portion of the entry (the part after the last directory separator). + /// + public string Name => Path.GetFileName(FullName); + + /// + /// Gets the compression method used for this entry. + /// + public ZipCompressionMethod CompressionMethod { get; } + + /// + /// Gets the last modification date and time of the entry. + /// + public DateTimeOffset LastModified { get; } + + /// + /// Gets the CRC-32 checksum of the uncompressed data. + /// + /// + /// When bit 3 (data descriptor) is set in the local header, this value is initially + /// zero and is populated after the compressed data has been fully read. + /// + [CLSCompliant(false)] + public uint Crc32 => _crc32; + + /// + /// Gets the compressed size of the entry in bytes. + /// + /// + /// When bit 3 (data descriptor) is set in the local header, this value is initially + /// zero and is populated after the compressed data has been fully read. + /// + public long CompressedLength => _compressedLength; + + /// + /// Gets the uncompressed size of the entry in bytes. + /// + /// + /// When bit 3 (data descriptor) is set in the local header, this value is initially + /// zero and is populated after the compressed data has been fully read. + /// + public long Length => _length; + + /// + /// Gets the raw general purpose bit flags from the local file header. + /// + [CLSCompliant(false)] + public ushort GeneralPurposeBitFlags { get; } + + /// + /// Gets a value indicating whether the entry is encrypted. + /// + public bool IsEncrypted => (GeneralPurposeBitFlags & 1) != 0; + + /// + /// Gets a value indicating whether the entry represents a directory. + /// + public bool IsDirectory => FullName.Length > 0 && (FullName[^1] is '/' or '\\'); + + /// + /// Gets the minimum ZIP specification version needed to extract this entry. + /// + [CLSCompliant(false)] + public ushort VersionNeeded { get; } + + /// + /// Reads decompressed data from this entry into the provided buffer. + /// The data is transparently decompressed based on the entry's compression method. + /// + /// The buffer to read decompressed data into. + /// The number of bytes read, or 0 if all data has been consumed. + /// + /// The entry uses an unsupported compression method, or is a Stored entry with a data descriptor. + /// + public int Read(Span buffer) + { + Stream stream = GetOrCreateDecompressionStream(); + + return stream.Read(buffer); + } + + /// + /// Asynchronously reads decompressed data from this entry into the provided buffer. + /// The data is transparently decompressed based on the entry's compression method. + /// + /// The buffer to read decompressed data into. + /// A token to monitor for cancellation requests. + /// The number of bytes read, or 0 if all data has been consumed. + /// + /// The entry uses an unsupported compression method, or is a Stored entry with a data descriptor. + /// + public async ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = default) + { + Stream stream = GetOrCreateDecompressionStream(); + + return await stream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false); + } + + private Stream GetOrCreateDecompressionStream() + { + if (_decompressionStream is not null) + { + return _decompressionStream; + } + + if (_hasDataDescriptor) + { + // Data descriptor entries have unknown compressed size in the local header. + // Deflate/Deflate64 streams are self-terminating, so they can be decompressed + // without knowing the compressed size. Stored data has no termination marker, + // so it cannot be decompressed without the size. + Debug.Assert(_archiveStream is not null); + + _decompressionStream = CompressionMethod switch + { + ZipCompressionMethod.Deflate => new DeflateStream(_archiveStream, CompressionMode.Decompress, leaveOpen: true), + ZipCompressionMethod.Deflate64 => new DeflateManagedStream(_archiveStream, ZipCompressionMethod.Deflate64, uncompressedSize: -1), + ZipCompressionMethod.Stored => throw new NotSupportedException(SR.ZipStreamStoredDataDescriptorNotSupported), + _ => throw new NotSupportedException(SR.UnsupportedCompression) + }; + } + else if (_boundedStream is not null) + { + _decompressionStream = CompressionMethod switch + { + ZipCompressionMethod.Deflate => new DeflateStream(_boundedStream, CompressionMode.Decompress, _length), + ZipCompressionMethod.Deflate64 => new DeflateManagedStream(_boundedStream, ZipCompressionMethod.Deflate64, _length), + ZipCompressionMethod.Stored => _boundedStream, + _ => throw new NotSupportedException(SR.UnsupportedCompression) + }; + } + else + { + // Entry has no data (e.g. empty file or directory). + _decompressionStream = Stream.Null; + } + + return _decompressionStream; + } + + internal bool HasDataDescriptor => _hasDataDescriptor; + + internal void SkipCompressedData() + { + // For known-size entries, drain the bounded stream to advance the archive + // past remaining compressed bytes. For data descriptor entries, drain the + // decompression stream which detects the end of the self-terminating format. + Stream? streamToDrain = _boundedStream; + + if (streamToDrain is null && _hasDataDescriptor && _archiveStream is not null) + { + streamToDrain = GetOrCreateDecompressionStream(); + } + + if (streamToDrain is null) + { + return; + } + + byte[] skipBuffer = ArrayPool.Shared.Rent(4096); + try + { + while (streamToDrain.Read(skipBuffer) > 0) { } + } + finally + { + ArrayPool.Shared.Return(skipBuffer); + } + } + + internal async ValueTask SkipCompressedDataAsync(CancellationToken cancellationToken) + { + Stream? streamToDrain = _boundedStream; + + if (streamToDrain is null && _hasDataDescriptor && _archiveStream is not null) + { + streamToDrain = GetOrCreateDecompressionStream(); + } + + if (streamToDrain is null) + { + return; + } + + byte[] skipBuffer = ArrayPool.Shared.Rent(4096); + try + { + while (await streamToDrain.ReadAsync(skipBuffer.AsMemory(), cancellationToken).ConfigureAwait(false) > 0) { } + } + finally + { + ArrayPool.Shared.Return(skipBuffer); + } + } + + internal void UpdateDataDescriptor(uint crc32, long compressedLength, long length) + { + _crc32 = crc32; + _compressedLength = compressedLength; + _length = length; + } + + /// + /// A read-only, forward-only stream that limits the number of bytes + /// that can be read from an underlying stream without closing it. + /// + private sealed class BoundedReadOnlyStream : Stream + { + private readonly Stream _baseStream; + private long _remaining; + + public BoundedReadOnlyStream(Stream baseStream, long length) + { + _baseStream = baseStream; + _remaining = length; + } + + public override bool CanRead => true; + public override bool CanSeek => false; + public override bool CanWrite => false; + public override long Length => throw new NotSupportedException(); + + public override long Position + { + get => throw new NotSupportedException(); + set => throw new NotSupportedException(); + } + + public override int Read(byte[] buffer, int offset, int count) + => Read(buffer.AsSpan(offset, count)); + + public override int Read(Span buffer) + { + if (_remaining <= 0) + { + return 0; + } + + if (buffer.Length > _remaining) + { + buffer = buffer.Slice(0, (int)_remaining); + } + + int bytesRead = _baseStream.Read(buffer); + _remaining -= bytesRead; + + return bytesRead; + } + + public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) + => ReadAsync(buffer.AsMemory(offset, count), cancellationToken).AsTask(); + + public override ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = default) + { + if (_remaining <= 0) + { + return new ValueTask(0); + } + + if (buffer.Length > _remaining) + { + buffer = buffer.Slice(0, (int)_remaining); + } + + return ReadAsyncCore(buffer, cancellationToken); + } + + private async ValueTask ReadAsyncCore(Memory buffer, CancellationToken cancellationToken) + { + int bytesRead = await _baseStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false); + _remaining -= bytesRead; + + return bytesRead; + } + + public override void Flush() { } + public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException(); + public override void SetLength(long value) => throw new NotSupportedException(); + public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException(); + + protected override void Dispose(bool disposing) + { + if (disposing) + { + _baseStream.Dispose(); + } + + base.Dispose(disposing); + } + + public override async ValueTask DisposeAsync() + { + await _baseStream.DisposeAsync().ConfigureAwait(false); + await base.DisposeAsync().ConfigureAwait(false); + } + } +} diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs new file mode 100644 index 00000000000000..aebce74ac9ba39 --- /dev/null +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs @@ -0,0 +1,408 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Buffers; +using System.Buffers.Binary; +using System.Text; +using System.Threading; +using System.Threading.Tasks; + +namespace System.IO.Compression; + +public sealed class ZipStreamReader : IDisposable, IAsyncDisposable +{ + private const ushort DataDescriptorBitFlag = 0x8; + private const ushort UnicodeFileNameBitFlag = 0x800; + + private bool _isDisposed; + private readonly bool _leaveOpen; + private readonly Encoding? _entryNameEncoding; + private ZipStreamEntry? _currentEntry; + private readonly Stream _archiveStream; + private bool _reachedEnd; + + public ZipStreamReader(Stream archiveStream, bool leaveOpen = false, Encoding? entryNameEncoding = null) + { + ArgumentNullException.ThrowIfNull(archiveStream); + + if (!archiveStream.CanRead) + { + throw new ArgumentException(SR.NotSupported_UnreadableStream, nameof(archiveStream)); + } + + _archiveStream = archiveStream; + _leaveOpen = leaveOpen; + _entryNameEncoding = entryNameEncoding; + } + + /// + /// Reads the next entry from the ZIP archive stream by parsing the local file header. + /// + /// + /// The next , or if there are no more entries. + /// + /// The reader has been disposed. + /// The archive stream contains invalid data. + public ZipStreamEntry? GetNextEntry() + { + ObjectDisposedException.ThrowIf(_isDisposed, this); + + if (_reachedEnd) + { + return null; + } + + AdvancePastCurrentEntry(); + + Span headerBytes = stackalloc byte[ZipLocalFileHeader.SizeOfLocalHeader]; + int bytesRead = _archiveStream.ReadAtLeast(headerBytes, headerBytes.Length, throwOnEndOfStream: false); + + if (bytesRead < ZipLocalFileHeader.SizeOfLocalHeader) + { + _reachedEnd = true; + return null; + } + + if (!headerBytes.StartsWith(ZipLocalFileHeader.SignatureConstantBytes)) + { + _reachedEnd = true; + return null; + } + + ReadLocalFileHeader(headerBytes, out string fullName, out ushort versionNeeded, out ushort generalPurposeBitFlags, + out ushort compressionMethod, out DateTimeOffset lastModified, out uint crc32, + out long compressedSize, out long uncompressedSize, out bool hasDataDescriptor); + + _currentEntry = new ZipStreamEntry( + fullName, (ZipCompressionMethod)compressionMethod, lastModified, crc32, + compressedSize, uncompressedSize, generalPurposeBitFlags, versionNeeded, + _archiveStream, hasDataDescriptor); + + return _currentEntry; + } + + /// + /// Asynchronously reads the next entry from the ZIP archive stream by parsing the local file header. + /// + /// A token to monitor for cancellation requests. + /// + /// The next , or if there are no more entries. + /// + /// The reader has been disposed. + /// The archive stream contains invalid data. + public async ValueTask GetNextEntryAsync(CancellationToken cancellationToken = default) + { + ObjectDisposedException.ThrowIf(_isDisposed, this); + + if (_reachedEnd) + { + return null; + } + + await AdvancePastCurrentEntryAsync(cancellationToken).ConfigureAwait(false); + + byte[] headerBytes = ArrayPool.Shared.Rent(ZipLocalFileHeader.SizeOfLocalHeader); + try + { + int bytesRead = await _archiveStream.ReadAtLeastAsync( + headerBytes.AsMemory(0, ZipLocalFileHeader.SizeOfLocalHeader), + ZipLocalFileHeader.SizeOfLocalHeader, + throwOnEndOfStream: false, + cancellationToken).ConfigureAwait(false); + + if (bytesRead < ZipLocalFileHeader.SizeOfLocalHeader) + { + _reachedEnd = true; + return null; + } + + if (!headerBytes.AsSpan().StartsWith(ZipLocalFileHeader.SignatureConstantBytes)) + { + _reachedEnd = true; + return null; + } + + await ReadLocalFileHeaderAsync(headerBytes, cancellationToken).ConfigureAwait(false); + } + finally + { + ArrayPool.Shared.Return(headerBytes); + } + + return _currentEntry; + } + + private void AdvancePastCurrentEntry() + { + if (_currentEntry is null) + { + return; + } + + _currentEntry.SkipCompressedData(); + + if (_currentEntry.HasDataDescriptor) + { + ReadDataDescriptor(_currentEntry); + } + + _currentEntry = null; + } + + private async ValueTask AdvancePastCurrentEntryAsync(CancellationToken cancellationToken) + { + if (_currentEntry is null) + { + return; + } + + await _currentEntry.SkipCompressedDataAsync(cancellationToken).ConfigureAwait(false); + + if (_currentEntry.HasDataDescriptor) + { + await ReadDataDescriptorAsync(_currentEntry, cancellationToken).ConfigureAwait(false); + } + + _currentEntry = null; + } + + private void ReadLocalFileHeader( + ReadOnlySpan headerBytes, + out string fullName, + out ushort versionNeeded, + out ushort generalPurposeBitFlags, + out ushort compressionMethod, + out DateTimeOffset lastModified, + out uint crc32, + out long compressedSize, + out long uncompressedSize, + out bool hasDataDescriptor) + { + versionNeeded = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.VersionNeededToExtract..]); + generalPurposeBitFlags = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.GeneralPurposeBitFlags..]); + compressionMethod = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.CompressionMethod..]); + uint lastModifiedRaw = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.LastModified..]); + crc32 = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.Crc32..]); + uint compressedSizeSmall = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.CompressedSize..]); + uint uncompressedSizeSmall = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.UncompressedSize..]); + ushort filenameLength = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.FilenameLength..]); + ushort extraFieldLength = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.ExtraFieldLength..]); + + lastModified = new DateTimeOffset(ZipHelper.DosTimeToDateTime(lastModifiedRaw)); + hasDataDescriptor = (generalPurposeBitFlags & DataDescriptorBitFlag) != 0; + + int dynamicLength = filenameLength + extraFieldLength; + byte[]? rentedBuffer = null; + Span dynamicBuffer = dynamicLength <= 512 + ? stackalloc byte[512].Slice(0, dynamicLength) + : (rentedBuffer = ArrayPool.Shared.Rent(dynamicLength)).AsSpan(0, dynamicLength); + + try + { + _archiveStream.ReadExactly(dynamicBuffer); + + Encoding encoding = (generalPurposeBitFlags & UnicodeFileNameBitFlag) != 0 + ? Encoding.UTF8 + : _entryNameEncoding ?? Encoding.UTF8; + + fullName = encoding.GetString(dynamicBuffer[..filenameLength]); + + // Handle Zip64 extra field for sizes + bool compressedSizeInZip64 = compressedSizeSmall == ZipHelper.Mask32Bit; + bool uncompressedSizeInZip64 = uncompressedSizeSmall == ZipHelper.Mask32Bit; + + if (compressedSizeInZip64 || uncompressedSizeInZip64) + { + Zip64ExtraField zip64 = Zip64ExtraField.GetJustZip64Block( + dynamicBuffer.Slice(filenameLength, extraFieldLength), + readUncompressedSize: uncompressedSizeInZip64, + readCompressedSize: compressedSizeInZip64, + readLocalHeaderOffset: false, + readStartDiskNumber: false); + + compressedSize = zip64.CompressedSize ?? compressedSizeSmall; + uncompressedSize = zip64.UncompressedSize ?? uncompressedSizeSmall; + } + else + { + compressedSize = compressedSizeSmall; + uncompressedSize = uncompressedSizeSmall; + } + } + finally + { + if (rentedBuffer is not null) + { + ArrayPool.Shared.Return(rentedBuffer); + } + } + } + + private async ValueTask ReadLocalFileHeaderAsync(byte[] headerBytes, CancellationToken cancellationToken) + { + ushort versionNeeded = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.VersionNeededToExtract)); + ushort generalPurposeBitFlags = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.GeneralPurposeBitFlags)); + ushort compressionMethod = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.CompressionMethod)); + uint lastModifiedRaw = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.LastModified)); + uint crc32 = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.Crc32)); + uint compressedSizeSmall = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.CompressedSize)); + uint uncompressedSizeSmall = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.UncompressedSize)); + ushort filenameLength = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.FilenameLength)); + ushort extraFieldLength = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.ExtraFieldLength)); + + DateTimeOffset lastModified = new DateTimeOffset(ZipHelper.DosTimeToDateTime(lastModifiedRaw)); + bool hasDataDescriptor = (generalPurposeBitFlags & DataDescriptorBitFlag) != 0; + + int dynamicLength = filenameLength + extraFieldLength; + byte[] dynamicBuffer = ArrayPool.Shared.Rent(dynamicLength); + + try + { + await _archiveStream.ReadExactlyAsync(dynamicBuffer.AsMemory(0, dynamicLength), cancellationToken).ConfigureAwait(false); + + Encoding encoding = (generalPurposeBitFlags & UnicodeFileNameBitFlag) != 0 + ? Encoding.UTF8 + : _entryNameEncoding ?? Encoding.UTF8; + + string fullName = encoding.GetString(dynamicBuffer.AsSpan(0, filenameLength)); + + bool compressedSizeInZip64 = compressedSizeSmall == ZipHelper.Mask32Bit; + bool uncompressedSizeInZip64 = uncompressedSizeSmall == ZipHelper.Mask32Bit; + long compressedSize; + long uncompressedSize; + + if (compressedSizeInZip64 || uncompressedSizeInZip64) + { + Zip64ExtraField zip64 = Zip64ExtraField.GetJustZip64Block( + dynamicBuffer.AsSpan(filenameLength, extraFieldLength), + readUncompressedSize: uncompressedSizeInZip64, + readCompressedSize: compressedSizeInZip64, + readLocalHeaderOffset: false, + readStartDiskNumber: false); + + compressedSize = zip64.CompressedSize ?? compressedSizeSmall; + uncompressedSize = zip64.UncompressedSize ?? uncompressedSizeSmall; + } + else + { + compressedSize = compressedSizeSmall; + uncompressedSize = uncompressedSizeSmall; + } + + _currentEntry = new ZipStreamEntry( + fullName, (ZipCompressionMethod)compressionMethod, lastModified, crc32, + compressedSize, uncompressedSize, generalPurposeBitFlags, versionNeeded, + _archiveStream, hasDataDescriptor); + } + finally + { + ArrayPool.Shared.Return(dynamicBuffer); + } + } + + private void ReadDataDescriptor(ZipStreamEntry entry) + { + // Data descriptor layout (signature is optional): + // [signature 4B] + CRC-32 4B + compressed size (4B or 8B) + uncompressed size (4B or 8B) + // Read incrementally to avoid consuming bytes from the next entry. + Span buffer = stackalloc byte[24]; + + _archiveStream.ReadExactly(buffer[..4]); + int offset = 0; + int totalRead = 4; + + if (buffer[..4].SequenceEqual(ZipLocalFileHeader.DataDescriptorSignatureConstantBytes)) + { + offset = 4; + _archiveStream.ReadExactly(buffer.Slice(4, 4)); + totalRead = 8; + } + + bool isZip64 = entry.VersionNeeded >= (ushort)ZipVersionNeededValues.Zip64; + int sizesBytes = isZip64 ? 16 : 8; + _archiveStream.ReadExactly(buffer.Slice(totalRead, sizesBytes)); + + uint crc32 = BinaryPrimitives.ReadUInt32LittleEndian(buffer[offset..]); + int sizesOffset = offset + 4; + + if (isZip64) + { + entry.UpdateDataDescriptor( + crc32, + compressedLength: BinaryPrimitives.ReadInt64LittleEndian(buffer[sizesOffset..]), + length: BinaryPrimitives.ReadInt64LittleEndian(buffer[(sizesOffset + 8)..])); + } + else + { + entry.UpdateDataDescriptor( + crc32, + compressedLength: BinaryPrimitives.ReadUInt32LittleEndian(buffer[sizesOffset..]), + length: BinaryPrimitives.ReadUInt32LittleEndian(buffer[(sizesOffset + 4)..])); + } + } + + private async ValueTask ReadDataDescriptorAsync(ZipStreamEntry entry, CancellationToken cancellationToken) + { + byte[] buffer = new byte[24]; + + await _archiveStream.ReadExactlyAsync(buffer.AsMemory(0, 4), cancellationToken).ConfigureAwait(false); + int offset = 0; + int totalRead = 4; + + if (buffer.AsSpan(0, 4).SequenceEqual(ZipLocalFileHeader.DataDescriptorSignatureConstantBytes)) + { + offset = 4; + await _archiveStream.ReadExactlyAsync(buffer.AsMemory(4, 4), cancellationToken).ConfigureAwait(false); + totalRead = 8; + } + + bool isZip64 = entry.VersionNeeded >= (ushort)ZipVersionNeededValues.Zip64; + int sizesBytes = isZip64 ? 16 : 8; + await _archiveStream.ReadExactlyAsync(buffer.AsMemory(totalRead, sizesBytes), cancellationToken).ConfigureAwait(false); + + uint crc32 = BinaryPrimitives.ReadUInt32LittleEndian(buffer.AsSpan(offset)); + int sizesOffset = offset + 4; + + if (isZip64) + { + entry.UpdateDataDescriptor( + crc32, + compressedLength: BinaryPrimitives.ReadInt64LittleEndian(buffer.AsSpan(sizesOffset)), + length: BinaryPrimitives.ReadInt64LittleEndian(buffer.AsSpan(sizesOffset + 8))); + } + else + { + entry.UpdateDataDescriptor( + crc32, + compressedLength: BinaryPrimitives.ReadUInt32LittleEndian(buffer.AsSpan(sizesOffset)), + length: BinaryPrimitives.ReadUInt32LittleEndian(buffer.AsSpan(sizesOffset + 4))); + } + } + + public void Dispose() + { + if (!_isDisposed) + { + _isDisposed = true; + + if (!_leaveOpen) + { + _archiveStream.Dispose(); + } + } + } + + public async ValueTask DisposeAsync() + { + if (!_isDisposed) + { + _isDisposed = true; + + if (!_leaveOpen) + { + await _archiveStream.DisposeAsync().ConfigureAwait(false); + } + } + } + +} diff --git a/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj b/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj index fa2d85fc0656da..feeef5318db132 100644 --- a/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj +++ b/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj @@ -27,6 +27,7 @@ + diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_StreamEntryReadTests.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_StreamEntryReadTests.cs new file mode 100644 index 00000000000000..1aeae8905250c1 --- /dev/null +++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_StreamEntryReadTests.cs @@ -0,0 +1,216 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Threading.Tasks; +using Xunit; + +namespace System.IO.Compression.Tests +{ + public partial class zip_StreamEntryReadTests : ZipFileTestBase + { + private static readonly byte[] s_smallContent = "Hello, small world!"u8.ToArray(); + private static readonly byte[] s_mediumContent = new byte[8192]; + private static readonly byte[] s_largeContent = new byte[65536]; + + static zip_StreamEntryReadTests() + { + Random rng = new(42); + rng.NextBytes(s_mediumContent); + rng.NextBytes(s_largeContent); + } + + public static IEnumerable DeflateWithKnownSize_Data() + { + foreach (bool async in _bools) + { + yield return new object[] { async }; + } + } + + public static IEnumerable StoredWithKnownSize_Data() + { + foreach (bool async in _bools) + { + yield return new object[] { async }; + } + } + + public static IEnumerable DeflateWithDataDescriptor_Data() + { + foreach (bool async in _bools) + { + yield return new object[] { async }; + } + } + + public static IEnumerable StoredWithDataDescriptor_Data() + { + foreach (bool async in _bools) + { + yield return new object[] { async }; + } + } + + [Theory] + [MemberData(nameof(DeflateWithKnownSize_Data))] + public async Task Read_DeflateWithKnownSize_ReturnsDecompressedData(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + byte[][] expectedContents = [s_smallContent, s_mediumContent, s_largeContent]; + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + for (int i = 0; i < expectedContents.Length; i++) + { + ZipStreamEntry entry = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + + Assert.NotNull(entry); + Assert.False(entry.IsDirectory); + Assert.Equal(ZipCompressionMethod.Deflate, entry.CompressionMethod); + + byte[] decompressed = await ReadEntryFully(entry, async); + Assert.Equal(expectedContents[i], decompressed); + } + + ZipStreamEntry end = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + Assert.Null(end); + } + + [Theory] + [MemberData(nameof(StoredWithKnownSize_Data))] + public async Task Read_StoredWithKnownSize_ReturnsUncompressedData(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.NoCompression, seekable: true); + byte[][] expectedContents = [s_smallContent, s_mediumContent, s_largeContent]; + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + for (int i = 0; i < expectedContents.Length; i++) + { + ZipStreamEntry entry = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + + Assert.NotNull(entry); + Assert.Equal(ZipCompressionMethod.Stored, entry.CompressionMethod); + + byte[] decompressed = await ReadEntryFully(entry, async); + Assert.Equal(expectedContents[i], decompressed); + } + } + + [Theory] + [MemberData(nameof(DeflateWithDataDescriptor_Data))] + public async Task Read_DeflateWithDataDescriptor_ReturnsDecompressedData(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: false); + byte[][] expectedContents = [s_smallContent, s_mediumContent, s_largeContent]; + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + for (int i = 0; i < expectedContents.Length; i++) + { + ZipStreamEntry entry = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + + Assert.NotNull(entry); + Assert.Equal(ZipCompressionMethod.Deflate, entry.CompressionMethod); + + byte[] decompressed = await ReadEntryFully(entry, async); + Assert.Equal(expectedContents[i], decompressed); + } + } + + [Theory] + [MemberData(nameof(StoredWithDataDescriptor_Data))] + public async Task Read_StoredWithDataDescriptor_ThrowsNotSupported(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.NoCompression, seekable: false); + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + ZipStreamEntry entry = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + + Assert.NotNull(entry); + Assert.Equal(ZipCompressionMethod.Stored, entry.CompressionMethod); + + byte[] buffer = new byte[256]; + + if (async) + { + await Assert.ThrowsAsync(() => entry.ReadAsync(buffer).AsTask()); + } + else + { + Assert.Throws(() => entry.Read(buffer)); + } + } + + /// + /// Creates a ZIP archive with three entries of different sizes (small, medium, large). + /// When is true, the archive is written to a seekable stream + /// so entries have known sizes. When false, a non-seekable wrapper is used so the + /// archive writer sets the data descriptor bit. + /// + private static byte[] CreateZipWithEntries(CompressionLevel compressionLevel, bool seekable) + { + MemoryStream ms = new(); + + Stream writeStream = seekable + ? ms + : new WrappedStream(ms, canRead: true, canWrite: true, canSeek: false); + + using (ZipArchive archive = new(writeStream, ZipArchiveMode.Create, leaveOpen: true)) + { + AddEntry(archive, "small.txt", s_smallContent, compressionLevel); + AddEntry(archive, "medium.bin", s_mediumContent, compressionLevel); + AddEntry(archive, "large.bin", s_largeContent, compressionLevel); + } + + return ms.ToArray(); + } + + private static void AddEntry(ZipArchive archive, string name, byte[] contents, CompressionLevel level) + { + ZipArchiveEntry entry = archive.CreateEntry(name, level); + using Stream stream = entry.Open(); + stream.Write(contents); + } + + private static async Task ReadEntryFully(ZipStreamEntry entry, bool async) + { + using MemoryStream result = new(); + byte[] buffer = new byte[4096]; + + int bytesRead; + if (async) + { + while ((bytesRead = await entry.ReadAsync(buffer)) > 0) + { + result.Write(buffer, 0, bytesRead); + } + } + else + { + while ((bytesRead = entry.Read(buffer)) > 0) + { + result.Write(buffer, 0, bytesRead); + } + } + + return result.ToArray(); + } + } +} From eeed5c3d49ef13f81391ff917f43cdc542d68df8 Mon Sep 17 00:00:00 2001 From: alinpahontu2912 Date: Thu, 19 Mar 2026 12:48:45 +0100 Subject: [PATCH 2/8] initial implementation for streaming zip entries --- .gitignore | 1 + .../ref/System.IO.Compression.cs | 28 ++ .../src/Resources/Strings.resx | 3 + .../src/System.IO.Compression.csproj | 28 +- .../System/IO/Compression/ZipStreamEntry.cs | 357 +++++++++++++++ .../System/IO/Compression/ZipStreamReader.cs | 408 ++++++++++++++++++ .../tests/System.IO.Compression.Tests.csproj | 1 + .../ZipArchive/zip_StreamEntryReadTests.cs | 216 ++++++++++ 8 files changed, 1025 insertions(+), 17 deletions(-) create mode 100644 src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamEntry.cs create mode 100644 src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs create mode 100644 src/libraries/System.IO.Compression/tests/ZipArchive/zip_StreamEntryReadTests.cs diff --git a/.gitignore b/.gitignore index 6b6eb255ba51d6..7ccc53c98ddd2c 100644 --- a/.gitignore +++ b/.gitignore @@ -373,3 +373,4 @@ test:.cs *.tempLog.xml *.testResults.xml *.testStats.csv +*.md diff --git a/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs b/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs index 564bbc97eb7511..39d425e41cbfbe 100644 --- a/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs +++ b/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs @@ -144,6 +144,34 @@ public enum ZipCompressionMethod Deflate = 8, Deflate64 = 9, } + public sealed partial class ZipStreamEntry + { + internal ZipStreamEntry() { } + public long CompressedLength { get { throw null; } } + public System.IO.Compression.ZipCompressionMethod CompressionMethod { get { throw null; } } + [System.CLSCompliantAttribute(false)] + public uint Crc32 { get { throw null; } } + public string FullName { get { throw null; } } + [System.CLSCompliantAttribute(false)] + public ushort GeneralPurposeBitFlags { get { throw null; } } + public bool IsDirectory { get { throw null; } } + public bool IsEncrypted { get { throw null; } } + public System.DateTimeOffset LastModified { get { throw null; } } + public long Length { get { throw null; } } + public string Name { get { throw null; } } + [System.CLSCompliantAttribute(false)] + public ushort VersionNeeded { get { throw null; } } + public int Read(System.Span buffer) { throw null; } + public System.Threading.Tasks.ValueTask ReadAsync(System.Memory buffer, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + } + public sealed partial class ZipStreamReader : System.IAsyncDisposable, System.IDisposable + { + public ZipStreamReader(System.IO.Stream archiveStream, bool leaveOpen = false, System.Text.Encoding? entryNameEncoding = null) { } + public void Dispose() { } + public System.Threading.Tasks.ValueTask DisposeAsync() { throw null; } + public System.IO.Compression.ZipStreamEntry? GetNextEntry() { throw null; } + public System.Threading.Tasks.ValueTask GetNextEntryAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + } public sealed partial class ZLibCompressionOptions { public ZLibCompressionOptions() { } diff --git a/src/libraries/System.IO.Compression/src/Resources/Strings.resx b/src/libraries/System.IO.Compression/src/Resources/Strings.resx index bbb10afbcf342a..4aaf4d9b78d92d 100644 --- a/src/libraries/System.IO.Compression/src/Resources/Strings.resx +++ b/src/libraries/System.IO.Compression/src/Resources/Strings.resx @@ -371,6 +371,9 @@ An attempt was made to move the position before the beginning of the stream. + + Stored compression entries with data descriptors cannot be read in forward-only mode because the compressed size is unknown. + The CRC32 checksum of the extracted data does not match the expected value from the archive. diff --git a/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj b/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj index bcdcf1b3417f43..14956148e62d4b 100644 --- a/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj +++ b/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj @@ -1,4 +1,4 @@ - + $(NetCoreAppCurrent)-windows;$(NetCoreAppCurrent)-unix;$(NetCoreAppCurrent)-browser;$(NetCoreAppCurrent)-wasi;$(NetCoreAppCurrent) @@ -41,12 +41,9 @@ - - - + + + @@ -55,28 +52,25 @@ - + + + - + - - - + + + diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamEntry.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamEntry.cs new file mode 100644 index 00000000000000..7f59dc2a76b0b4 --- /dev/null +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamEntry.cs @@ -0,0 +1,357 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Buffers; +using System.Diagnostics; +using System.Threading; +using System.Threading.Tasks; + +namespace System.IO.Compression; + +public sealed class ZipStreamEntry +{ + private readonly Stream? _archiveStream; + private readonly bool _hasDataDescriptor; + private readonly BoundedReadOnlyStream? _boundedStream; + private Stream? _decompressionStream; + private uint _crc32; + private long _compressedLength; + private long _length; + + internal ZipStreamEntry( + string fullName, + ZipCompressionMethod compressionMethod, + DateTimeOffset lastModified, + uint crc32, + long compressedLength, + long length, + ushort generalPurposeBitFlags, + ushort versionNeeded, + Stream? archiveStream, + bool hasDataDescriptor) + { + FullName = fullName; + CompressionMethod = compressionMethod; + LastModified = lastModified; + _crc32 = crc32; + _compressedLength = compressedLength; + _length = length; + GeneralPurposeBitFlags = generalPurposeBitFlags; + VersionNeeded = versionNeeded; + _hasDataDescriptor = hasDataDescriptor; + + if (archiveStream is not null) + { + if (hasDataDescriptor) + { + _archiveStream = archiveStream; + } + else + { + _boundedStream = new BoundedReadOnlyStream(archiveStream, compressedLength); + } + } + } + + /// + /// Gets the full name (relative path) of the entry, including any directory path. + /// + public string FullName { get; } + + /// + /// Gets the file name portion of the entry (the part after the last directory separator). + /// + public string Name => Path.GetFileName(FullName); + + /// + /// Gets the compression method used for this entry. + /// + public ZipCompressionMethod CompressionMethod { get; } + + /// + /// Gets the last modification date and time of the entry. + /// + public DateTimeOffset LastModified { get; } + + /// + /// Gets the CRC-32 checksum of the uncompressed data. + /// + /// + /// When bit 3 (data descriptor) is set in the local header, this value is initially + /// zero and is populated after the compressed data has been fully read. + /// + [CLSCompliant(false)] + public uint Crc32 => _crc32; + + /// + /// Gets the compressed size of the entry in bytes. + /// + /// + /// When bit 3 (data descriptor) is set in the local header, this value is initially + /// zero and is populated after the compressed data has been fully read. + /// + public long CompressedLength => _compressedLength; + + /// + /// Gets the uncompressed size of the entry in bytes. + /// + /// + /// When bit 3 (data descriptor) is set in the local header, this value is initially + /// zero and is populated after the compressed data has been fully read. + /// + public long Length => _length; + + /// + /// Gets the raw general purpose bit flags from the local file header. + /// + [CLSCompliant(false)] + public ushort GeneralPurposeBitFlags { get; } + + /// + /// Gets a value indicating whether the entry is encrypted. + /// + public bool IsEncrypted => (GeneralPurposeBitFlags & 1) != 0; + + /// + /// Gets a value indicating whether the entry represents a directory. + /// + public bool IsDirectory => FullName.Length > 0 && (FullName[^1] is '/' or '\\'); + + /// + /// Gets the minimum ZIP specification version needed to extract this entry. + /// + [CLSCompliant(false)] + public ushort VersionNeeded { get; } + + /// + /// Reads decompressed data from this entry into the provided buffer. + /// The data is transparently decompressed based on the entry's compression method. + /// + /// The buffer to read decompressed data into. + /// The number of bytes read, or 0 if all data has been consumed. + /// + /// The entry uses an unsupported compression method, or is a Stored entry with a data descriptor. + /// + public int Read(Span buffer) + { + Stream stream = GetOrCreateDecompressionStream(); + + return stream.Read(buffer); + } + + /// + /// Asynchronously reads decompressed data from this entry into the provided buffer. + /// The data is transparently decompressed based on the entry's compression method. + /// + /// The buffer to read decompressed data into. + /// A token to monitor for cancellation requests. + /// The number of bytes read, or 0 if all data has been consumed. + /// + /// The entry uses an unsupported compression method, or is a Stored entry with a data descriptor. + /// + public async ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = default) + { + Stream stream = GetOrCreateDecompressionStream(); + + return await stream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false); + } + + private Stream GetOrCreateDecompressionStream() + { + if (_decompressionStream is not null) + { + return _decompressionStream; + } + + if (_hasDataDescriptor) + { + // Data descriptor entries have unknown compressed size in the local header. + // Deflate/Deflate64 streams are self-terminating, so they can be decompressed + // without knowing the compressed size. Stored data has no termination marker, + // so it cannot be decompressed without the size. + Debug.Assert(_archiveStream is not null); + + _decompressionStream = CompressionMethod switch + { + ZipCompressionMethod.Deflate => new DeflateStream(_archiveStream, CompressionMode.Decompress, leaveOpen: true), + ZipCompressionMethod.Deflate64 => new DeflateManagedStream(_archiveStream, ZipCompressionMethod.Deflate64, uncompressedSize: -1), + ZipCompressionMethod.Stored => throw new NotSupportedException(SR.ZipStreamStoredDataDescriptorNotSupported), + _ => throw new NotSupportedException(SR.UnsupportedCompression) + }; + } + else if (_boundedStream is not null) + { + _decompressionStream = CompressionMethod switch + { + ZipCompressionMethod.Deflate => new DeflateStream(_boundedStream, CompressionMode.Decompress, _length), + ZipCompressionMethod.Deflate64 => new DeflateManagedStream(_boundedStream, ZipCompressionMethod.Deflate64, _length), + ZipCompressionMethod.Stored => _boundedStream, + _ => throw new NotSupportedException(SR.UnsupportedCompression) + }; + } + else + { + // Entry has no data (e.g. empty file or directory). + _decompressionStream = Stream.Null; + } + + return _decompressionStream; + } + + internal bool HasDataDescriptor => _hasDataDescriptor; + + internal void SkipCompressedData() + { + // For known-size entries, drain the bounded stream to advance the archive + // past remaining compressed bytes. For data descriptor entries, drain the + // decompression stream which detects the end of the self-terminating format. + Stream? streamToDrain = _boundedStream; + + if (streamToDrain is null && _hasDataDescriptor && _archiveStream is not null) + { + streamToDrain = GetOrCreateDecompressionStream(); + } + + if (streamToDrain is null) + { + return; + } + + byte[] skipBuffer = ArrayPool.Shared.Rent(4096); + try + { + while (streamToDrain.Read(skipBuffer) > 0) { } + } + finally + { + ArrayPool.Shared.Return(skipBuffer); + } + } + + internal async ValueTask SkipCompressedDataAsync(CancellationToken cancellationToken) + { + Stream? streamToDrain = _boundedStream; + + if (streamToDrain is null && _hasDataDescriptor && _archiveStream is not null) + { + streamToDrain = GetOrCreateDecompressionStream(); + } + + if (streamToDrain is null) + { + return; + } + + byte[] skipBuffer = ArrayPool.Shared.Rent(4096); + try + { + while (await streamToDrain.ReadAsync(skipBuffer.AsMemory(), cancellationToken).ConfigureAwait(false) > 0) { } + } + finally + { + ArrayPool.Shared.Return(skipBuffer); + } + } + + internal void UpdateDataDescriptor(uint crc32, long compressedLength, long length) + { + _crc32 = crc32; + _compressedLength = compressedLength; + _length = length; + } + + /// + /// A read-only, forward-only stream that limits the number of bytes + /// that can be read from an underlying stream without closing it. + /// + private sealed class BoundedReadOnlyStream : Stream + { + private readonly Stream _baseStream; + private long _remaining; + + public BoundedReadOnlyStream(Stream baseStream, long length) + { + _baseStream = baseStream; + _remaining = length; + } + + public override bool CanRead => true; + public override bool CanSeek => false; + public override bool CanWrite => false; + public override long Length => throw new NotSupportedException(); + + public override long Position + { + get => throw new NotSupportedException(); + set => throw new NotSupportedException(); + } + + public override int Read(byte[] buffer, int offset, int count) + => Read(buffer.AsSpan(offset, count)); + + public override int Read(Span buffer) + { + if (_remaining <= 0) + { + return 0; + } + + if (buffer.Length > _remaining) + { + buffer = buffer.Slice(0, (int)_remaining); + } + + int bytesRead = _baseStream.Read(buffer); + _remaining -= bytesRead; + + return bytesRead; + } + + public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) + => ReadAsync(buffer.AsMemory(offset, count), cancellationToken).AsTask(); + + public override ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = default) + { + if (_remaining <= 0) + { + return new ValueTask(0); + } + + if (buffer.Length > _remaining) + { + buffer = buffer.Slice(0, (int)_remaining); + } + + return ReadAsyncCore(buffer, cancellationToken); + } + + private async ValueTask ReadAsyncCore(Memory buffer, CancellationToken cancellationToken) + { + int bytesRead = await _baseStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false); + _remaining -= bytesRead; + + return bytesRead; + } + + public override void Flush() { } + public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException(); + public override void SetLength(long value) => throw new NotSupportedException(); + public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException(); + + protected override void Dispose(bool disposing) + { + if (disposing) + { + _baseStream.Dispose(); + } + + base.Dispose(disposing); + } + + public override async ValueTask DisposeAsync() + { + await _baseStream.DisposeAsync().ConfigureAwait(false); + await base.DisposeAsync().ConfigureAwait(false); + } + } +} diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs new file mode 100644 index 00000000000000..aebce74ac9ba39 --- /dev/null +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs @@ -0,0 +1,408 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Buffers; +using System.Buffers.Binary; +using System.Text; +using System.Threading; +using System.Threading.Tasks; + +namespace System.IO.Compression; + +public sealed class ZipStreamReader : IDisposable, IAsyncDisposable +{ + private const ushort DataDescriptorBitFlag = 0x8; + private const ushort UnicodeFileNameBitFlag = 0x800; + + private bool _isDisposed; + private readonly bool _leaveOpen; + private readonly Encoding? _entryNameEncoding; + private ZipStreamEntry? _currentEntry; + private readonly Stream _archiveStream; + private bool _reachedEnd; + + public ZipStreamReader(Stream archiveStream, bool leaveOpen = false, Encoding? entryNameEncoding = null) + { + ArgumentNullException.ThrowIfNull(archiveStream); + + if (!archiveStream.CanRead) + { + throw new ArgumentException(SR.NotSupported_UnreadableStream, nameof(archiveStream)); + } + + _archiveStream = archiveStream; + _leaveOpen = leaveOpen; + _entryNameEncoding = entryNameEncoding; + } + + /// + /// Reads the next entry from the ZIP archive stream by parsing the local file header. + /// + /// + /// The next , or if there are no more entries. + /// + /// The reader has been disposed. + /// The archive stream contains invalid data. + public ZipStreamEntry? GetNextEntry() + { + ObjectDisposedException.ThrowIf(_isDisposed, this); + + if (_reachedEnd) + { + return null; + } + + AdvancePastCurrentEntry(); + + Span headerBytes = stackalloc byte[ZipLocalFileHeader.SizeOfLocalHeader]; + int bytesRead = _archiveStream.ReadAtLeast(headerBytes, headerBytes.Length, throwOnEndOfStream: false); + + if (bytesRead < ZipLocalFileHeader.SizeOfLocalHeader) + { + _reachedEnd = true; + return null; + } + + if (!headerBytes.StartsWith(ZipLocalFileHeader.SignatureConstantBytes)) + { + _reachedEnd = true; + return null; + } + + ReadLocalFileHeader(headerBytes, out string fullName, out ushort versionNeeded, out ushort generalPurposeBitFlags, + out ushort compressionMethod, out DateTimeOffset lastModified, out uint crc32, + out long compressedSize, out long uncompressedSize, out bool hasDataDescriptor); + + _currentEntry = new ZipStreamEntry( + fullName, (ZipCompressionMethod)compressionMethod, lastModified, crc32, + compressedSize, uncompressedSize, generalPurposeBitFlags, versionNeeded, + _archiveStream, hasDataDescriptor); + + return _currentEntry; + } + + /// + /// Asynchronously reads the next entry from the ZIP archive stream by parsing the local file header. + /// + /// A token to monitor for cancellation requests. + /// + /// The next , or if there are no more entries. + /// + /// The reader has been disposed. + /// The archive stream contains invalid data. + public async ValueTask GetNextEntryAsync(CancellationToken cancellationToken = default) + { + ObjectDisposedException.ThrowIf(_isDisposed, this); + + if (_reachedEnd) + { + return null; + } + + await AdvancePastCurrentEntryAsync(cancellationToken).ConfigureAwait(false); + + byte[] headerBytes = ArrayPool.Shared.Rent(ZipLocalFileHeader.SizeOfLocalHeader); + try + { + int bytesRead = await _archiveStream.ReadAtLeastAsync( + headerBytes.AsMemory(0, ZipLocalFileHeader.SizeOfLocalHeader), + ZipLocalFileHeader.SizeOfLocalHeader, + throwOnEndOfStream: false, + cancellationToken).ConfigureAwait(false); + + if (bytesRead < ZipLocalFileHeader.SizeOfLocalHeader) + { + _reachedEnd = true; + return null; + } + + if (!headerBytes.AsSpan().StartsWith(ZipLocalFileHeader.SignatureConstantBytes)) + { + _reachedEnd = true; + return null; + } + + await ReadLocalFileHeaderAsync(headerBytes, cancellationToken).ConfigureAwait(false); + } + finally + { + ArrayPool.Shared.Return(headerBytes); + } + + return _currentEntry; + } + + private void AdvancePastCurrentEntry() + { + if (_currentEntry is null) + { + return; + } + + _currentEntry.SkipCompressedData(); + + if (_currentEntry.HasDataDescriptor) + { + ReadDataDescriptor(_currentEntry); + } + + _currentEntry = null; + } + + private async ValueTask AdvancePastCurrentEntryAsync(CancellationToken cancellationToken) + { + if (_currentEntry is null) + { + return; + } + + await _currentEntry.SkipCompressedDataAsync(cancellationToken).ConfigureAwait(false); + + if (_currentEntry.HasDataDescriptor) + { + await ReadDataDescriptorAsync(_currentEntry, cancellationToken).ConfigureAwait(false); + } + + _currentEntry = null; + } + + private void ReadLocalFileHeader( + ReadOnlySpan headerBytes, + out string fullName, + out ushort versionNeeded, + out ushort generalPurposeBitFlags, + out ushort compressionMethod, + out DateTimeOffset lastModified, + out uint crc32, + out long compressedSize, + out long uncompressedSize, + out bool hasDataDescriptor) + { + versionNeeded = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.VersionNeededToExtract..]); + generalPurposeBitFlags = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.GeneralPurposeBitFlags..]); + compressionMethod = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.CompressionMethod..]); + uint lastModifiedRaw = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.LastModified..]); + crc32 = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.Crc32..]); + uint compressedSizeSmall = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.CompressedSize..]); + uint uncompressedSizeSmall = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.UncompressedSize..]); + ushort filenameLength = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.FilenameLength..]); + ushort extraFieldLength = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.ExtraFieldLength..]); + + lastModified = new DateTimeOffset(ZipHelper.DosTimeToDateTime(lastModifiedRaw)); + hasDataDescriptor = (generalPurposeBitFlags & DataDescriptorBitFlag) != 0; + + int dynamicLength = filenameLength + extraFieldLength; + byte[]? rentedBuffer = null; + Span dynamicBuffer = dynamicLength <= 512 + ? stackalloc byte[512].Slice(0, dynamicLength) + : (rentedBuffer = ArrayPool.Shared.Rent(dynamicLength)).AsSpan(0, dynamicLength); + + try + { + _archiveStream.ReadExactly(dynamicBuffer); + + Encoding encoding = (generalPurposeBitFlags & UnicodeFileNameBitFlag) != 0 + ? Encoding.UTF8 + : _entryNameEncoding ?? Encoding.UTF8; + + fullName = encoding.GetString(dynamicBuffer[..filenameLength]); + + // Handle Zip64 extra field for sizes + bool compressedSizeInZip64 = compressedSizeSmall == ZipHelper.Mask32Bit; + bool uncompressedSizeInZip64 = uncompressedSizeSmall == ZipHelper.Mask32Bit; + + if (compressedSizeInZip64 || uncompressedSizeInZip64) + { + Zip64ExtraField zip64 = Zip64ExtraField.GetJustZip64Block( + dynamicBuffer.Slice(filenameLength, extraFieldLength), + readUncompressedSize: uncompressedSizeInZip64, + readCompressedSize: compressedSizeInZip64, + readLocalHeaderOffset: false, + readStartDiskNumber: false); + + compressedSize = zip64.CompressedSize ?? compressedSizeSmall; + uncompressedSize = zip64.UncompressedSize ?? uncompressedSizeSmall; + } + else + { + compressedSize = compressedSizeSmall; + uncompressedSize = uncompressedSizeSmall; + } + } + finally + { + if (rentedBuffer is not null) + { + ArrayPool.Shared.Return(rentedBuffer); + } + } + } + + private async ValueTask ReadLocalFileHeaderAsync(byte[] headerBytes, CancellationToken cancellationToken) + { + ushort versionNeeded = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.VersionNeededToExtract)); + ushort generalPurposeBitFlags = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.GeneralPurposeBitFlags)); + ushort compressionMethod = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.CompressionMethod)); + uint lastModifiedRaw = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.LastModified)); + uint crc32 = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.Crc32)); + uint compressedSizeSmall = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.CompressedSize)); + uint uncompressedSizeSmall = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.UncompressedSize)); + ushort filenameLength = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.FilenameLength)); + ushort extraFieldLength = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.ExtraFieldLength)); + + DateTimeOffset lastModified = new DateTimeOffset(ZipHelper.DosTimeToDateTime(lastModifiedRaw)); + bool hasDataDescriptor = (generalPurposeBitFlags & DataDescriptorBitFlag) != 0; + + int dynamicLength = filenameLength + extraFieldLength; + byte[] dynamicBuffer = ArrayPool.Shared.Rent(dynamicLength); + + try + { + await _archiveStream.ReadExactlyAsync(dynamicBuffer.AsMemory(0, dynamicLength), cancellationToken).ConfigureAwait(false); + + Encoding encoding = (generalPurposeBitFlags & UnicodeFileNameBitFlag) != 0 + ? Encoding.UTF8 + : _entryNameEncoding ?? Encoding.UTF8; + + string fullName = encoding.GetString(dynamicBuffer.AsSpan(0, filenameLength)); + + bool compressedSizeInZip64 = compressedSizeSmall == ZipHelper.Mask32Bit; + bool uncompressedSizeInZip64 = uncompressedSizeSmall == ZipHelper.Mask32Bit; + long compressedSize; + long uncompressedSize; + + if (compressedSizeInZip64 || uncompressedSizeInZip64) + { + Zip64ExtraField zip64 = Zip64ExtraField.GetJustZip64Block( + dynamicBuffer.AsSpan(filenameLength, extraFieldLength), + readUncompressedSize: uncompressedSizeInZip64, + readCompressedSize: compressedSizeInZip64, + readLocalHeaderOffset: false, + readStartDiskNumber: false); + + compressedSize = zip64.CompressedSize ?? compressedSizeSmall; + uncompressedSize = zip64.UncompressedSize ?? uncompressedSizeSmall; + } + else + { + compressedSize = compressedSizeSmall; + uncompressedSize = uncompressedSizeSmall; + } + + _currentEntry = new ZipStreamEntry( + fullName, (ZipCompressionMethod)compressionMethod, lastModified, crc32, + compressedSize, uncompressedSize, generalPurposeBitFlags, versionNeeded, + _archiveStream, hasDataDescriptor); + } + finally + { + ArrayPool.Shared.Return(dynamicBuffer); + } + } + + private void ReadDataDescriptor(ZipStreamEntry entry) + { + // Data descriptor layout (signature is optional): + // [signature 4B] + CRC-32 4B + compressed size (4B or 8B) + uncompressed size (4B or 8B) + // Read incrementally to avoid consuming bytes from the next entry. + Span buffer = stackalloc byte[24]; + + _archiveStream.ReadExactly(buffer[..4]); + int offset = 0; + int totalRead = 4; + + if (buffer[..4].SequenceEqual(ZipLocalFileHeader.DataDescriptorSignatureConstantBytes)) + { + offset = 4; + _archiveStream.ReadExactly(buffer.Slice(4, 4)); + totalRead = 8; + } + + bool isZip64 = entry.VersionNeeded >= (ushort)ZipVersionNeededValues.Zip64; + int sizesBytes = isZip64 ? 16 : 8; + _archiveStream.ReadExactly(buffer.Slice(totalRead, sizesBytes)); + + uint crc32 = BinaryPrimitives.ReadUInt32LittleEndian(buffer[offset..]); + int sizesOffset = offset + 4; + + if (isZip64) + { + entry.UpdateDataDescriptor( + crc32, + compressedLength: BinaryPrimitives.ReadInt64LittleEndian(buffer[sizesOffset..]), + length: BinaryPrimitives.ReadInt64LittleEndian(buffer[(sizesOffset + 8)..])); + } + else + { + entry.UpdateDataDescriptor( + crc32, + compressedLength: BinaryPrimitives.ReadUInt32LittleEndian(buffer[sizesOffset..]), + length: BinaryPrimitives.ReadUInt32LittleEndian(buffer[(sizesOffset + 4)..])); + } + } + + private async ValueTask ReadDataDescriptorAsync(ZipStreamEntry entry, CancellationToken cancellationToken) + { + byte[] buffer = new byte[24]; + + await _archiveStream.ReadExactlyAsync(buffer.AsMemory(0, 4), cancellationToken).ConfigureAwait(false); + int offset = 0; + int totalRead = 4; + + if (buffer.AsSpan(0, 4).SequenceEqual(ZipLocalFileHeader.DataDescriptorSignatureConstantBytes)) + { + offset = 4; + await _archiveStream.ReadExactlyAsync(buffer.AsMemory(4, 4), cancellationToken).ConfigureAwait(false); + totalRead = 8; + } + + bool isZip64 = entry.VersionNeeded >= (ushort)ZipVersionNeededValues.Zip64; + int sizesBytes = isZip64 ? 16 : 8; + await _archiveStream.ReadExactlyAsync(buffer.AsMemory(totalRead, sizesBytes), cancellationToken).ConfigureAwait(false); + + uint crc32 = BinaryPrimitives.ReadUInt32LittleEndian(buffer.AsSpan(offset)); + int sizesOffset = offset + 4; + + if (isZip64) + { + entry.UpdateDataDescriptor( + crc32, + compressedLength: BinaryPrimitives.ReadInt64LittleEndian(buffer.AsSpan(sizesOffset)), + length: BinaryPrimitives.ReadInt64LittleEndian(buffer.AsSpan(sizesOffset + 8))); + } + else + { + entry.UpdateDataDescriptor( + crc32, + compressedLength: BinaryPrimitives.ReadUInt32LittleEndian(buffer.AsSpan(sizesOffset)), + length: BinaryPrimitives.ReadUInt32LittleEndian(buffer.AsSpan(sizesOffset + 4))); + } + } + + public void Dispose() + { + if (!_isDisposed) + { + _isDisposed = true; + + if (!_leaveOpen) + { + _archiveStream.Dispose(); + } + } + } + + public async ValueTask DisposeAsync() + { + if (!_isDisposed) + { + _isDisposed = true; + + if (!_leaveOpen) + { + await _archiveStream.DisposeAsync().ConfigureAwait(false); + } + } + } + +} diff --git a/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj b/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj index 284ccb348c15fb..dbeb797609f5bc 100644 --- a/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj +++ b/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj @@ -27,6 +27,7 @@ + diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_StreamEntryReadTests.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_StreamEntryReadTests.cs new file mode 100644 index 00000000000000..1aeae8905250c1 --- /dev/null +++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_StreamEntryReadTests.cs @@ -0,0 +1,216 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Threading.Tasks; +using Xunit; + +namespace System.IO.Compression.Tests +{ + public partial class zip_StreamEntryReadTests : ZipFileTestBase + { + private static readonly byte[] s_smallContent = "Hello, small world!"u8.ToArray(); + private static readonly byte[] s_mediumContent = new byte[8192]; + private static readonly byte[] s_largeContent = new byte[65536]; + + static zip_StreamEntryReadTests() + { + Random rng = new(42); + rng.NextBytes(s_mediumContent); + rng.NextBytes(s_largeContent); + } + + public static IEnumerable DeflateWithKnownSize_Data() + { + foreach (bool async in _bools) + { + yield return new object[] { async }; + } + } + + public static IEnumerable StoredWithKnownSize_Data() + { + foreach (bool async in _bools) + { + yield return new object[] { async }; + } + } + + public static IEnumerable DeflateWithDataDescriptor_Data() + { + foreach (bool async in _bools) + { + yield return new object[] { async }; + } + } + + public static IEnumerable StoredWithDataDescriptor_Data() + { + foreach (bool async in _bools) + { + yield return new object[] { async }; + } + } + + [Theory] + [MemberData(nameof(DeflateWithKnownSize_Data))] + public async Task Read_DeflateWithKnownSize_ReturnsDecompressedData(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + byte[][] expectedContents = [s_smallContent, s_mediumContent, s_largeContent]; + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + for (int i = 0; i < expectedContents.Length; i++) + { + ZipStreamEntry entry = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + + Assert.NotNull(entry); + Assert.False(entry.IsDirectory); + Assert.Equal(ZipCompressionMethod.Deflate, entry.CompressionMethod); + + byte[] decompressed = await ReadEntryFully(entry, async); + Assert.Equal(expectedContents[i], decompressed); + } + + ZipStreamEntry end = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + Assert.Null(end); + } + + [Theory] + [MemberData(nameof(StoredWithKnownSize_Data))] + public async Task Read_StoredWithKnownSize_ReturnsUncompressedData(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.NoCompression, seekable: true); + byte[][] expectedContents = [s_smallContent, s_mediumContent, s_largeContent]; + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + for (int i = 0; i < expectedContents.Length; i++) + { + ZipStreamEntry entry = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + + Assert.NotNull(entry); + Assert.Equal(ZipCompressionMethod.Stored, entry.CompressionMethod); + + byte[] decompressed = await ReadEntryFully(entry, async); + Assert.Equal(expectedContents[i], decompressed); + } + } + + [Theory] + [MemberData(nameof(DeflateWithDataDescriptor_Data))] + public async Task Read_DeflateWithDataDescriptor_ReturnsDecompressedData(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: false); + byte[][] expectedContents = [s_smallContent, s_mediumContent, s_largeContent]; + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + for (int i = 0; i < expectedContents.Length; i++) + { + ZipStreamEntry entry = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + + Assert.NotNull(entry); + Assert.Equal(ZipCompressionMethod.Deflate, entry.CompressionMethod); + + byte[] decompressed = await ReadEntryFully(entry, async); + Assert.Equal(expectedContents[i], decompressed); + } + } + + [Theory] + [MemberData(nameof(StoredWithDataDescriptor_Data))] + public async Task Read_StoredWithDataDescriptor_ThrowsNotSupported(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.NoCompression, seekable: false); + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + ZipStreamEntry entry = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + + Assert.NotNull(entry); + Assert.Equal(ZipCompressionMethod.Stored, entry.CompressionMethod); + + byte[] buffer = new byte[256]; + + if (async) + { + await Assert.ThrowsAsync(() => entry.ReadAsync(buffer).AsTask()); + } + else + { + Assert.Throws(() => entry.Read(buffer)); + } + } + + /// + /// Creates a ZIP archive with three entries of different sizes (small, medium, large). + /// When is true, the archive is written to a seekable stream + /// so entries have known sizes. When false, a non-seekable wrapper is used so the + /// archive writer sets the data descriptor bit. + /// + private static byte[] CreateZipWithEntries(CompressionLevel compressionLevel, bool seekable) + { + MemoryStream ms = new(); + + Stream writeStream = seekable + ? ms + : new WrappedStream(ms, canRead: true, canWrite: true, canSeek: false); + + using (ZipArchive archive = new(writeStream, ZipArchiveMode.Create, leaveOpen: true)) + { + AddEntry(archive, "small.txt", s_smallContent, compressionLevel); + AddEntry(archive, "medium.bin", s_mediumContent, compressionLevel); + AddEntry(archive, "large.bin", s_largeContent, compressionLevel); + } + + return ms.ToArray(); + } + + private static void AddEntry(ZipArchive archive, string name, byte[] contents, CompressionLevel level) + { + ZipArchiveEntry entry = archive.CreateEntry(name, level); + using Stream stream = entry.Open(); + stream.Write(contents); + } + + private static async Task ReadEntryFully(ZipStreamEntry entry, bool async) + { + using MemoryStream result = new(); + byte[] buffer = new byte[4096]; + + int bytesRead; + if (async) + { + while ((bytesRead = await entry.ReadAsync(buffer)) > 0) + { + result.Write(buffer, 0, bytesRead); + } + } + else + { + while ((bytesRead = entry.Read(buffer)) > 0) + { + result.Write(buffer, 0, bytesRead); + } + } + + return result.ToArray(); + } + } +} From 1c6bffd9cfc02d1aa5059fd7b6aa98a2c8e7c05d Mon Sep 17 00:00:00 2001 From: alinpahontu2912 Date: Tue, 24 Mar 2026 14:04:54 +0100 Subject: [PATCH 3/8] add crc validation to entries --- .../src/Resources/Strings.resx | 3 - .../System/IO/Compression/ZipStreamEntry.cs | 65 +++++++++++++++++-- 2 files changed, 61 insertions(+), 7 deletions(-) diff --git a/src/libraries/System.IO.Compression/src/Resources/Strings.resx b/src/libraries/System.IO.Compression/src/Resources/Strings.resx index d8735943732612..4aaf4d9b78d92d 100644 --- a/src/libraries/System.IO.Compression/src/Resources/Strings.resx +++ b/src/libraries/System.IO.Compression/src/Resources/Strings.resx @@ -374,9 +374,6 @@ Stored compression entries with data descriptors cannot be read in forward-only mode because the compressed size is unknown. - - Stored compression entries with data descriptors cannot be read in forward-only mode because the compressed size is unknown. - The CRC32 checksum of the extracted data does not match the expected value from the archive. diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamEntry.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamEntry.cs index 7f59dc2a76b0b4..ccfe08d58fa140 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamEntry.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamEntry.cs @@ -18,6 +18,10 @@ public sealed class ZipStreamEntry private long _compressedLength; private long _length; + // Running CRC32 computed over decompressed bytes for data-descriptor entries + private uint _runningCrc; + private long _totalDecompressedBytesRead; + internal ZipStreamEntry( string fullName, ZipCompressionMethod compressionMethod, @@ -132,11 +136,21 @@ internal ZipStreamEntry( /// /// The entry uses an unsupported compression method, or is a Stored entry with a data descriptor. /// + /// + /// The decompressed data does not match the expected CRC-32 checksum or length. + /// public int Read(Span buffer) { Stream stream = GetOrCreateDecompressionStream(); + int bytesRead = stream.Read(buffer); + + if (_hasDataDescriptor && bytesRead > 0) + { + _runningCrc = Crc32Helper.UpdateCrc32(_runningCrc, buffer.Slice(0, bytesRead)); + _totalDecompressedBytesRead += bytesRead; + } - return stream.Read(buffer); + return bytesRead; } /// @@ -149,11 +163,21 @@ public int Read(Span buffer) /// /// The entry uses an unsupported compression method, or is a Stored entry with a data descriptor. /// + /// + /// The decompressed data does not match the expected CRC-32 checksum or length. + /// public async ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = default) { Stream stream = GetOrCreateDecompressionStream(); + int bytesRead = await stream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false); - return await stream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false); + if (_hasDataDescriptor && bytesRead > 0) + { + _runningCrc = Crc32Helper.UpdateCrc32(_runningCrc, buffer.Span.Slice(0, bytesRead)); + _totalDecompressedBytesRead += bytesRead; + } + + return bytesRead; } private Stream GetOrCreateDecompressionStream() @@ -188,6 +212,12 @@ private Stream GetOrCreateDecompressionStream() ZipCompressionMethod.Stored => _boundedStream, _ => throw new NotSupportedException(SR.UnsupportedCompression) }; + + // Wrap with CRC validation for entries with known CRC and uncompressed length. + // CrcValidatingReadStream computes a running CRC over decompressed bytes and + // throws InvalidDataException if the checksum does not match once all expected + // bytes have been read. + _decompressionStream = new CrcValidatingReadStream(_decompressionStream, _crc32, _length); } else { @@ -220,7 +250,15 @@ internal void SkipCompressedData() byte[] skipBuffer = ArrayPool.Shared.Rent(4096); try { - while (streamToDrain.Read(skipBuffer) > 0) { } + int bytesRead; + while ((bytesRead = streamToDrain.Read(skipBuffer)) > 0) + { + if (_hasDataDescriptor) + { + _runningCrc = Crc32Helper.UpdateCrc32(_runningCrc, skipBuffer.AsSpan(0, bytesRead)); + _totalDecompressedBytesRead += bytesRead; + } + } } finally { @@ -245,7 +283,15 @@ internal async ValueTask SkipCompressedDataAsync(CancellationToken cancellationT byte[] skipBuffer = ArrayPool.Shared.Rent(4096); try { - while (await streamToDrain.ReadAsync(skipBuffer.AsMemory(), cancellationToken).ConfigureAwait(false) > 0) { } + int bytesRead; + while ((bytesRead = await streamToDrain.ReadAsync(skipBuffer.AsMemory(), cancellationToken).ConfigureAwait(false)) > 0) + { + if (_hasDataDescriptor) + { + _runningCrc = Crc32Helper.UpdateCrc32(_runningCrc, skipBuffer.AsSpan(0, bytesRead)); + _totalDecompressedBytesRead += bytesRead; + } + } } finally { @@ -258,6 +304,17 @@ internal void UpdateDataDescriptor(uint crc32, long compressedLength, long lengt _crc32 = crc32; _compressedLength = compressedLength; _length = length; + + // Validate that the decompressed data matches the data descriptor values. + if (_runningCrc != crc32) + { + throw new InvalidDataException(SR.CrcMismatch); + } + + if (_totalDecompressedBytesRead != length) + { + throw new InvalidDataException(SR.UnexpectedStreamLength); + } } /// From bb924b80f8ffb897c375b040ffe53a88485f2ded Mon Sep 17 00:00:00 2001 From: alinpahontu2912 Date: Wed, 25 Mar 2026 12:03:06 +0100 Subject: [PATCH 4/8] add more tests, update name and simplify logic to follow tarreader more closely --- .../ref/System.IO.Compression.cs | 16 +- .../src/Resources/Strings.resx | 3 + .../src/System.IO.Compression.csproj | 5 +- .../System/IO/Compression/ZipCustomStreams.cs | 104 +++- .../IO/Compression/ZipForwardReadEntry.cs | 202 +++++++ .../System/IO/Compression/ZipStreamEntry.cs | 414 ------------- .../System/IO/Compression/ZipStreamReader.cs | 504 ++++++++++------ .../ZipArchive/zip_StreamEntryReadTests.cs | 547 ++++++++++++++++-- 8 files changed, 1113 insertions(+), 682 deletions(-) create mode 100644 src/libraries/System.IO.Compression/src/System/IO/Compression/ZipForwardReadEntry.cs delete mode 100644 src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamEntry.cs diff --git a/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs b/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs index 39d425e41cbfbe..ab2e7eb6ad878f 100644 --- a/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs +++ b/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs @@ -144,13 +144,14 @@ public enum ZipCompressionMethod Deflate = 8, Deflate64 = 9, } - public sealed partial class ZipStreamEntry + public sealed partial class ZipForwardReadEntry { - internal ZipStreamEntry() { } + internal ZipForwardReadEntry() { } public long CompressedLength { get { throw null; } } public System.IO.Compression.ZipCompressionMethod CompressionMethod { get { throw null; } } [System.CLSCompliantAttribute(false)] public uint Crc32 { get { throw null; } } + public System.IO.Stream? DataStream { get { throw null; } } public string FullName { get { throw null; } } [System.CLSCompliantAttribute(false)] public ushort GeneralPurposeBitFlags { get { throw null; } } @@ -161,16 +162,17 @@ internal ZipStreamEntry() { } public string Name { get { throw null; } } [System.CLSCompliantAttribute(false)] public ushort VersionNeeded { get { throw null; } } - public int Read(System.Span buffer) { throw null; } - public System.Threading.Tasks.ValueTask ReadAsync(System.Memory buffer, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public void ExtractToFile(string destinationFileName, bool overwrite) { } + public System.Threading.Tasks.Task ExtractToFileAsync(string destinationFileName, bool overwrite, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } } public sealed partial class ZipStreamReader : System.IAsyncDisposable, System.IDisposable { - public ZipStreamReader(System.IO.Stream archiveStream, bool leaveOpen = false, System.Text.Encoding? entryNameEncoding = null) { } + public ZipStreamReader(System.IO.Stream stream, bool leaveOpen = false) { } + public ZipStreamReader(System.IO.Stream stream, System.Text.Encoding? entryNameEncoding, bool leaveOpen = false) { } public void Dispose() { } public System.Threading.Tasks.ValueTask DisposeAsync() { throw null; } - public System.IO.Compression.ZipStreamEntry? GetNextEntry() { throw null; } - public System.Threading.Tasks.ValueTask GetNextEntryAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public System.IO.Compression.ZipForwardReadEntry? GetNextEntry(bool copyData = false) { throw null; } + public System.Threading.Tasks.ValueTask GetNextEntryAsync(bool copyData = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } } public sealed partial class ZLibCompressionOptions { diff --git a/src/libraries/System.IO.Compression/src/Resources/Strings.resx b/src/libraries/System.IO.Compression/src/Resources/Strings.resx index 4aaf4d9b78d92d..458d6bdd77c969 100644 --- a/src/libraries/System.IO.Compression/src/Resources/Strings.resx +++ b/src/libraries/System.IO.Compression/src/Resources/Strings.resx @@ -380,4 +380,7 @@ The decompressed data length does not match the expected value from the archive. + + Encrypted entries with data descriptors cannot be read in forward-only mode because the compressed size is unknown. + diff --git a/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj b/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj index 515eacaeb46c6b..b0ae33ad312bc2 100644 --- a/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj +++ b/src/libraries/System.IO.Compression/src/System.IO.Compression.csproj @@ -53,7 +53,7 @@ - + @@ -71,9 +71,6 @@ - - - diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs index ea2fc10ec55699..4c019aac0615a2 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs @@ -2,7 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; -using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; @@ -209,17 +208,12 @@ private void NotifyWrite() public override void Flush() { ThrowIfDisposed(); - ThrowIfCantWrite(); - - _baseStream.Flush(); } public override Task FlushAsync(CancellationToken cancellationToken) { ThrowIfDisposed(); - ThrowIfCantWrite(); - - return _baseStream.FlushAsync(cancellationToken); + return Task.CompletedTask; } protected override void Dispose(bool disposing) @@ -713,6 +707,96 @@ public override async ValueTask DisposeAsync() } } + /// + /// A read-only, forward-only stream that limits the number of bytes + /// that can be read from an underlying stream without closing it. + /// Used by to bound compressed entry data. + /// + internal sealed class BoundedReadOnlyStream : Stream + { + private readonly Stream _baseStream; + private long _remaining; + private bool _isDisposed; + + public BoundedReadOnlyStream(Stream baseStream, long length) + { + _baseStream = baseStream; + _remaining = length; + } + + public override bool CanRead => !_isDisposed; + public override bool CanSeek => false; + public override bool CanWrite => false; + public override long Length => throw new NotSupportedException(); + + public override long Position + { + get => throw new NotSupportedException(); + set => throw new NotSupportedException(); + } + + public override int Read(byte[] buffer, int offset, int count) + => Read(buffer.AsSpan(offset, count)); + + public override int Read(Span buffer) + { + if (_remaining <= 0) + { + return 0; + } + + if (buffer.Length > _remaining) + { + buffer = buffer.Slice(0, (int)_remaining); + } + + int bytesRead = _baseStream.Read(buffer); + _remaining -= bytesRead; + + return bytesRead; + } + + public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) + => ReadAsync(buffer.AsMemory(offset, count), cancellationToken).AsTask(); + + public override async ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = default) + { + if (_remaining <= 0) + { + return 0; + } + + if (buffer.Length > _remaining) + { + buffer = buffer.Slice(0, (int)_remaining); + } + + int bytesRead = await _baseStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false); + _remaining -= bytesRead; + + return bytesRead; + } + + public override void Flush() { } + public override Task FlushAsync(CancellationToken cancellationToken) => Task.CompletedTask; + public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException(); + public override void SetLength(long value) => throw new NotSupportedException(); + public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException(); + + protected override void Dispose(bool disposing) + { + _isDisposed = true; + base.Dispose(disposing); + } + + public override ValueTask DisposeAsync() + { + _isDisposed = true; + + return base.DisposeAsync(); + } + } + internal sealed class CrcValidatingReadStream : Stream { private readonly Stream _baseStream; @@ -735,6 +819,9 @@ public CrcValidatingReadStream(Stream baseStream, uint expectedCrc, long expecte _runningCrc = 0; } + internal uint RunningCrc => _runningCrc; + internal long TotalBytesRead => _totalBytesRead; + public override bool CanRead => !_isDisposed && _baseStream.CanRead; public override bool CanSeek => !_isDisposed && _baseStream.CanSeek; public override bool CanWrite => false; @@ -895,13 +982,12 @@ public override void Write(byte[] buffer, int offset, int count) public override void Flush() { ThrowIfDisposed(); - throw new NotSupportedException(SR.WritingNotSupported); } public override Task FlushAsync(CancellationToken cancellationToken) { ThrowIfDisposed(); - throw new NotSupportedException(SR.WritingNotSupported); + return Task.CompletedTask; } public override long Seek(long offset, SeekOrigin origin) diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipForwardReadEntry.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipForwardReadEntry.cs new file mode 100644 index 00000000000000..35b230ecffdd9c --- /dev/null +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipForwardReadEntry.cs @@ -0,0 +1,202 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Threading; +using System.Threading.Tasks; + +namespace System.IO.Compression; + +/// +/// Represents a single entry read from a ZIP archive by . +/// Provides metadata from the local file header and a for +/// reading the decompressed entry data. +/// +/// +/// +/// When copyData is (the default), the +/// reads directly from the underlying archive stream. It is invalidated when the reader +/// advances to the next entry via . Any unread +/// data is automatically drained at that point. +/// +/// +/// When copyData is , the decompressed data is copied into a +/// and the entry remains valid after the reader advances. +/// +/// +public sealed class ZipForwardReadEntry +{ + private uint _crc32; + private long _compressedLength; + private long _length; + + internal ZipForwardReadEntry( + string fullName, + ZipCompressionMethod compressionMethod, + DateTimeOffset lastModified, + uint crc32, + long compressedLength, + long length, + ushort generalPurposeBitFlags, + ushort versionNeeded, + bool hasDataDescriptor, + Stream? dataStream) + { + FullName = fullName; + CompressionMethod = compressionMethod; + LastModified = lastModified; + _crc32 = crc32; + _compressedLength = compressedLength; + _length = length; + GeneralPurposeBitFlags = generalPurposeBitFlags; + VersionNeeded = versionNeeded; + HasDataDescriptor = hasDataDescriptor; + DataStream = dataStream; + } + + /// + /// Gets the full name (relative path) of the entry, including any directory path. + /// + public string FullName { get; } + + /// + /// Gets the file name portion of the entry (the part after the last directory separator). + /// + public string Name => Path.GetFileName(FullName); + + /// + /// Gets the compression method used for this entry. + /// + public ZipCompressionMethod CompressionMethod { get; } + + /// + /// Gets the last modification date and time of the entry. + /// + public DateTimeOffset LastModified { get; } + + /// + /// Gets the CRC-32 checksum of the uncompressed data. + /// + /// + /// When bit 3 (data descriptor) is set in the local header, this value is initially + /// zero and is populated after the compressed data has been fully read. + /// + [CLSCompliant(false)] + public uint Crc32 => _crc32; + + /// + /// Gets the compressed size of the entry in bytes. + /// + /// + /// When bit 3 (data descriptor) is set in the local header, this value is initially + /// zero and is populated after the compressed data has been fully read. + /// + public long CompressedLength => _compressedLength; + + /// + /// Gets the uncompressed size of the entry in bytes. + /// + /// + /// When bit 3 (data descriptor) is set in the local header, this value is initially + /// zero and is populated after the compressed data has been fully read. + /// + public long Length => _length; + + /// + /// Gets the raw general purpose bit flags from the local file header. + /// + [CLSCompliant(false)] + public ushort GeneralPurposeBitFlags { get; } + + /// + /// Gets a value indicating whether the entry is encrypted. + /// + public bool IsEncrypted => (GeneralPurposeBitFlags & 1) != 0; + + /// + /// Gets a value indicating whether the entry represents a directory. + /// + public bool IsDirectory => FullName.Length > 0 && (FullName[^1] is '/' or '\\'); + + /// + /// Gets the minimum ZIP specification version needed to extract this entry. + /// + [CLSCompliant(false)] + public ushort VersionNeeded { get; } + + /// + /// Gets the decompressed data stream for this entry, or + /// if the entry has no data (e.g. a directory entry). + /// + /// + /// When copyData was on the + /// call that produced this entry, + /// the stream reads directly from the archive and is invalidated when the reader + /// advances to the next entry. When copyData was , + /// the data has been copied into a that remains valid + /// independently. + /// + public Stream? DataStream { get; internal set; } + + /// + /// Extracts the entry to a file on disk. + /// + /// The path of the file to create. + /// + /// to overwrite an existing file; otherwise . + /// + /// is null or empty. + public void ExtractToFile(string destinationFileName, bool overwrite) + { + ArgumentException.ThrowIfNullOrEmpty(destinationFileName); + + FileMode mode = overwrite ? FileMode.Create : FileMode.CreateNew; + using FileStream fs = new(destinationFileName, mode, FileAccess.Write, FileShare.None); + DataStream?.CopyTo(fs); + } + + /// + /// Asynchronously extracts the entry to a file on disk. + /// + /// The path of the file to create. + /// + /// to overwrite an existing file; otherwise . + /// + /// A token to monitor for cancellation requests. + /// is null or empty. + public async Task ExtractToFileAsync(string destinationFileName, bool overwrite, + CancellationToken cancellationToken = default) + { + ArgumentException.ThrowIfNullOrEmpty(destinationFileName); + + FileMode mode = overwrite ? FileMode.Create : FileMode.CreateNew; + FileStream fs = new(destinationFileName, mode, FileAccess.Write, FileShare.None, + bufferSize: 0x1000, useAsync: true); + await using (fs.ConfigureAwait(false)) + { + if (DataStream is not null) + { + await DataStream.CopyToAsync(fs, cancellationToken).ConfigureAwait(false); + } + } + } + + internal bool HasDataDescriptor { get; } + + internal void UpdateDataDescriptor(uint crc32, long compressedLength, long length, + uint runningCrc, long totalBytesRead) + { + if (runningCrc != crc32) + { + throw new InvalidDataException(SR.CrcMismatch); + } + + if (totalBytesRead != length) + { + throw new InvalidDataException(SR.UnexpectedStreamLength); + } + + _crc32 = crc32; + _compressedLength = compressedLength; + _length = length; + } +} diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamEntry.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamEntry.cs deleted file mode 100644 index ccfe08d58fa140..00000000000000 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamEntry.cs +++ /dev/null @@ -1,414 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Buffers; -using System.Diagnostics; -using System.Threading; -using System.Threading.Tasks; - -namespace System.IO.Compression; - -public sealed class ZipStreamEntry -{ - private readonly Stream? _archiveStream; - private readonly bool _hasDataDescriptor; - private readonly BoundedReadOnlyStream? _boundedStream; - private Stream? _decompressionStream; - private uint _crc32; - private long _compressedLength; - private long _length; - - // Running CRC32 computed over decompressed bytes for data-descriptor entries - private uint _runningCrc; - private long _totalDecompressedBytesRead; - - internal ZipStreamEntry( - string fullName, - ZipCompressionMethod compressionMethod, - DateTimeOffset lastModified, - uint crc32, - long compressedLength, - long length, - ushort generalPurposeBitFlags, - ushort versionNeeded, - Stream? archiveStream, - bool hasDataDescriptor) - { - FullName = fullName; - CompressionMethod = compressionMethod; - LastModified = lastModified; - _crc32 = crc32; - _compressedLength = compressedLength; - _length = length; - GeneralPurposeBitFlags = generalPurposeBitFlags; - VersionNeeded = versionNeeded; - _hasDataDescriptor = hasDataDescriptor; - - if (archiveStream is not null) - { - if (hasDataDescriptor) - { - _archiveStream = archiveStream; - } - else - { - _boundedStream = new BoundedReadOnlyStream(archiveStream, compressedLength); - } - } - } - - /// - /// Gets the full name (relative path) of the entry, including any directory path. - /// - public string FullName { get; } - - /// - /// Gets the file name portion of the entry (the part after the last directory separator). - /// - public string Name => Path.GetFileName(FullName); - - /// - /// Gets the compression method used for this entry. - /// - public ZipCompressionMethod CompressionMethod { get; } - - /// - /// Gets the last modification date and time of the entry. - /// - public DateTimeOffset LastModified { get; } - - /// - /// Gets the CRC-32 checksum of the uncompressed data. - /// - /// - /// When bit 3 (data descriptor) is set in the local header, this value is initially - /// zero and is populated after the compressed data has been fully read. - /// - [CLSCompliant(false)] - public uint Crc32 => _crc32; - - /// - /// Gets the compressed size of the entry in bytes. - /// - /// - /// When bit 3 (data descriptor) is set in the local header, this value is initially - /// zero and is populated after the compressed data has been fully read. - /// - public long CompressedLength => _compressedLength; - - /// - /// Gets the uncompressed size of the entry in bytes. - /// - /// - /// When bit 3 (data descriptor) is set in the local header, this value is initially - /// zero and is populated after the compressed data has been fully read. - /// - public long Length => _length; - - /// - /// Gets the raw general purpose bit flags from the local file header. - /// - [CLSCompliant(false)] - public ushort GeneralPurposeBitFlags { get; } - - /// - /// Gets a value indicating whether the entry is encrypted. - /// - public bool IsEncrypted => (GeneralPurposeBitFlags & 1) != 0; - - /// - /// Gets a value indicating whether the entry represents a directory. - /// - public bool IsDirectory => FullName.Length > 0 && (FullName[^1] is '/' or '\\'); - - /// - /// Gets the minimum ZIP specification version needed to extract this entry. - /// - [CLSCompliant(false)] - public ushort VersionNeeded { get; } - - /// - /// Reads decompressed data from this entry into the provided buffer. - /// The data is transparently decompressed based on the entry's compression method. - /// - /// The buffer to read decompressed data into. - /// The number of bytes read, or 0 if all data has been consumed. - /// - /// The entry uses an unsupported compression method, or is a Stored entry with a data descriptor. - /// - /// - /// The decompressed data does not match the expected CRC-32 checksum or length. - /// - public int Read(Span buffer) - { - Stream stream = GetOrCreateDecompressionStream(); - int bytesRead = stream.Read(buffer); - - if (_hasDataDescriptor && bytesRead > 0) - { - _runningCrc = Crc32Helper.UpdateCrc32(_runningCrc, buffer.Slice(0, bytesRead)); - _totalDecompressedBytesRead += bytesRead; - } - - return bytesRead; - } - - /// - /// Asynchronously reads decompressed data from this entry into the provided buffer. - /// The data is transparently decompressed based on the entry's compression method. - /// - /// The buffer to read decompressed data into. - /// A token to monitor for cancellation requests. - /// The number of bytes read, or 0 if all data has been consumed. - /// - /// The entry uses an unsupported compression method, or is a Stored entry with a data descriptor. - /// - /// - /// The decompressed data does not match the expected CRC-32 checksum or length. - /// - public async ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = default) - { - Stream stream = GetOrCreateDecompressionStream(); - int bytesRead = await stream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false); - - if (_hasDataDescriptor && bytesRead > 0) - { - _runningCrc = Crc32Helper.UpdateCrc32(_runningCrc, buffer.Span.Slice(0, bytesRead)); - _totalDecompressedBytesRead += bytesRead; - } - - return bytesRead; - } - - private Stream GetOrCreateDecompressionStream() - { - if (_decompressionStream is not null) - { - return _decompressionStream; - } - - if (_hasDataDescriptor) - { - // Data descriptor entries have unknown compressed size in the local header. - // Deflate/Deflate64 streams are self-terminating, so they can be decompressed - // without knowing the compressed size. Stored data has no termination marker, - // so it cannot be decompressed without the size. - Debug.Assert(_archiveStream is not null); - - _decompressionStream = CompressionMethod switch - { - ZipCompressionMethod.Deflate => new DeflateStream(_archiveStream, CompressionMode.Decompress, leaveOpen: true), - ZipCompressionMethod.Deflate64 => new DeflateManagedStream(_archiveStream, ZipCompressionMethod.Deflate64, uncompressedSize: -1), - ZipCompressionMethod.Stored => throw new NotSupportedException(SR.ZipStreamStoredDataDescriptorNotSupported), - _ => throw new NotSupportedException(SR.UnsupportedCompression) - }; - } - else if (_boundedStream is not null) - { - _decompressionStream = CompressionMethod switch - { - ZipCompressionMethod.Deflate => new DeflateStream(_boundedStream, CompressionMode.Decompress, _length), - ZipCompressionMethod.Deflate64 => new DeflateManagedStream(_boundedStream, ZipCompressionMethod.Deflate64, _length), - ZipCompressionMethod.Stored => _boundedStream, - _ => throw new NotSupportedException(SR.UnsupportedCompression) - }; - - // Wrap with CRC validation for entries with known CRC and uncompressed length. - // CrcValidatingReadStream computes a running CRC over decompressed bytes and - // throws InvalidDataException if the checksum does not match once all expected - // bytes have been read. - _decompressionStream = new CrcValidatingReadStream(_decompressionStream, _crc32, _length); - } - else - { - // Entry has no data (e.g. empty file or directory). - _decompressionStream = Stream.Null; - } - - return _decompressionStream; - } - - internal bool HasDataDescriptor => _hasDataDescriptor; - - internal void SkipCompressedData() - { - // For known-size entries, drain the bounded stream to advance the archive - // past remaining compressed bytes. For data descriptor entries, drain the - // decompression stream which detects the end of the self-terminating format. - Stream? streamToDrain = _boundedStream; - - if (streamToDrain is null && _hasDataDescriptor && _archiveStream is not null) - { - streamToDrain = GetOrCreateDecompressionStream(); - } - - if (streamToDrain is null) - { - return; - } - - byte[] skipBuffer = ArrayPool.Shared.Rent(4096); - try - { - int bytesRead; - while ((bytesRead = streamToDrain.Read(skipBuffer)) > 0) - { - if (_hasDataDescriptor) - { - _runningCrc = Crc32Helper.UpdateCrc32(_runningCrc, skipBuffer.AsSpan(0, bytesRead)); - _totalDecompressedBytesRead += bytesRead; - } - } - } - finally - { - ArrayPool.Shared.Return(skipBuffer); - } - } - - internal async ValueTask SkipCompressedDataAsync(CancellationToken cancellationToken) - { - Stream? streamToDrain = _boundedStream; - - if (streamToDrain is null && _hasDataDescriptor && _archiveStream is not null) - { - streamToDrain = GetOrCreateDecompressionStream(); - } - - if (streamToDrain is null) - { - return; - } - - byte[] skipBuffer = ArrayPool.Shared.Rent(4096); - try - { - int bytesRead; - while ((bytesRead = await streamToDrain.ReadAsync(skipBuffer.AsMemory(), cancellationToken).ConfigureAwait(false)) > 0) - { - if (_hasDataDescriptor) - { - _runningCrc = Crc32Helper.UpdateCrc32(_runningCrc, skipBuffer.AsSpan(0, bytesRead)); - _totalDecompressedBytesRead += bytesRead; - } - } - } - finally - { - ArrayPool.Shared.Return(skipBuffer); - } - } - - internal void UpdateDataDescriptor(uint crc32, long compressedLength, long length) - { - _crc32 = crc32; - _compressedLength = compressedLength; - _length = length; - - // Validate that the decompressed data matches the data descriptor values. - if (_runningCrc != crc32) - { - throw new InvalidDataException(SR.CrcMismatch); - } - - if (_totalDecompressedBytesRead != length) - { - throw new InvalidDataException(SR.UnexpectedStreamLength); - } - } - - /// - /// A read-only, forward-only stream that limits the number of bytes - /// that can be read from an underlying stream without closing it. - /// - private sealed class BoundedReadOnlyStream : Stream - { - private readonly Stream _baseStream; - private long _remaining; - - public BoundedReadOnlyStream(Stream baseStream, long length) - { - _baseStream = baseStream; - _remaining = length; - } - - public override bool CanRead => true; - public override bool CanSeek => false; - public override bool CanWrite => false; - public override long Length => throw new NotSupportedException(); - - public override long Position - { - get => throw new NotSupportedException(); - set => throw new NotSupportedException(); - } - - public override int Read(byte[] buffer, int offset, int count) - => Read(buffer.AsSpan(offset, count)); - - public override int Read(Span buffer) - { - if (_remaining <= 0) - { - return 0; - } - - if (buffer.Length > _remaining) - { - buffer = buffer.Slice(0, (int)_remaining); - } - - int bytesRead = _baseStream.Read(buffer); - _remaining -= bytesRead; - - return bytesRead; - } - - public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) - => ReadAsync(buffer.AsMemory(offset, count), cancellationToken).AsTask(); - - public override ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = default) - { - if (_remaining <= 0) - { - return new ValueTask(0); - } - - if (buffer.Length > _remaining) - { - buffer = buffer.Slice(0, (int)_remaining); - } - - return ReadAsyncCore(buffer, cancellationToken); - } - - private async ValueTask ReadAsyncCore(Memory buffer, CancellationToken cancellationToken) - { - int bytesRead = await _baseStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false); - _remaining -= bytesRead; - - return bytesRead; - } - - public override void Flush() { } - public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException(); - public override void SetLength(long value) => throw new NotSupportedException(); - public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException(); - - protected override void Dispose(bool disposing) - { - if (disposing) - { - _baseStream.Dispose(); - } - - base.Dispose(disposing); - } - - public override async ValueTask DisposeAsync() - { - await _baseStream.DisposeAsync().ConfigureAwait(false); - await base.DisposeAsync().ConfigureAwait(false); - } - } -} diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs index aebce74ac9ba39..4ceaf899aae495 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs @@ -1,7 +1,6 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System.Buffers; using System.Buffers.Binary; using System.Text; using System.Threading; @@ -9,6 +8,22 @@ namespace System.IO.Compression; +/// +/// Provides a forward-only reader for ZIP archives that reads entries sequentially +/// from a stream without requiring the stream to be seekable. +/// +/// +/// +/// Unlike , which reads the central directory at the end +/// of the archive, walks local file headers in order +/// and decompresses data on the fly. This makes it suitable for network streams, +/// pipes, and other non-seekable sources. +/// +/// +/// This mirrors the TarReader / TarEntry pattern in +/// System.Formats.Tar. +/// +/// public sealed class ZipStreamReader : IDisposable, IAsyncDisposable { private const ushort DataDescriptorBitFlag = 0x8; @@ -17,20 +32,46 @@ public sealed class ZipStreamReader : IDisposable, IAsyncDisposable private bool _isDisposed; private readonly bool _leaveOpen; private readonly Encoding? _entryNameEncoding; - private ZipStreamEntry? _currentEntry; + private ZipForwardReadEntry? _previousEntry; private readonly Stream _archiveStream; private bool _reachedEnd; - public ZipStreamReader(Stream archiveStream, bool leaveOpen = false, Encoding? entryNameEncoding = null) + /// + /// Initializes a new that reads from the specified stream. + /// + /// The archive stream to read from. + /// + /// to leave the stream open after the reader is disposed; + /// otherwise, . + /// + public ZipStreamReader(Stream stream, bool leaveOpen = false) + : this(stream, entryNameEncoding: null, leaveOpen) { - ArgumentNullException.ThrowIfNull(archiveStream); + } - if (!archiveStream.CanRead) + /// + /// Initializes a new that reads from the specified stream + /// using the given encoding for entry names. + /// + /// The archive stream to read from. + /// + /// The encoding to use when reading entry names that do not have the UTF-8 bit flag set, + /// or to use UTF-8. + /// + /// + /// to leave the stream open after the reader is disposed; + /// otherwise, . + /// + public ZipStreamReader(Stream stream, Encoding? entryNameEncoding, bool leaveOpen = false) + { + ArgumentNullException.ThrowIfNull(stream); + + if (!stream.CanRead) { - throw new ArgumentException(SR.NotSupported_UnreadableStream, nameof(archiveStream)); + throw new ArgumentException(SR.NotSupported_UnreadableStream, nameof(stream)); } - _archiveStream = archiveStream; + _archiveStream = stream; _leaveOpen = leaveOpen; _entryNameEncoding = entryNameEncoding; } @@ -38,12 +79,17 @@ public ZipStreamReader(Stream archiveStream, bool leaveOpen = false, Encoding? e /// /// Reads the next entry from the ZIP archive stream by parsing the local file header. /// + /// + /// to copy the entry's decompressed data into a + /// that remains valid after the reader advances; to read directly + /// from the archive stream (invalidated on the next call). + /// /// - /// The next , or if there are no more entries. + /// The next , or if there are no more entries. /// /// The reader has been disposed. /// The archive stream contains invalid data. - public ZipStreamEntry? GetNextEntry() + public ZipForwardReadEntry? GetNextEntry(bool copyData = false) { ObjectDisposedException.ThrowIf(_isDisposed, this); @@ -52,9 +98,9 @@ public ZipStreamReader(Stream archiveStream, bool leaveOpen = false, Encoding? e return null; } - AdvancePastCurrentEntry(); + AdvanceDataStreamIfNeeded(); - Span headerBytes = stackalloc byte[ZipLocalFileHeader.SizeOfLocalHeader]; + byte[] headerBytes = new byte[ZipLocalFileHeader.SizeOfLocalHeader]; int bytesRead = _archiveStream.ReadAtLeast(headerBytes, headerBytes.Length, throwOnEndOfStream: false); if (bytesRead < ZipLocalFileHeader.SizeOfLocalHeader) @@ -69,28 +115,61 @@ public ZipStreamReader(Stream archiveStream, bool leaveOpen = false, Encoding? e return null; } - ReadLocalFileHeader(headerBytes, out string fullName, out ushort versionNeeded, out ushort generalPurposeBitFlags, + int dynamicLength = GetDynamicHeaderLength(headerBytes); + byte[] dynamicBuffer = new byte[dynamicLength]; + _archiveStream.ReadExactly(dynamicBuffer); + + ParseLocalFileHeader(headerBytes, dynamicBuffer, + out string fullName, out ushort versionNeeded, out ushort generalPurposeBitFlags, out ushort compressionMethod, out DateTimeOffset lastModified, out uint crc32, out long compressedSize, out long uncompressedSize, out bool hasDataDescriptor); - _currentEntry = new ZipStreamEntry( + bool isEncrypted = (generalPurposeBitFlags & 1) != 0; + + Stream? dataStream = CreateDataStream( + (ZipCompressionMethod)compressionMethod, compressedSize, uncompressedSize, + crc32, hasDataDescriptor, isEncrypted, out CrcValidatingReadStream? crcStream); + + if (copyData && dataStream is not null) + { + MemoryStream ms = new(); + dataStream.CopyTo(ms); + ms.Position = 0; + dataStream = ms; + } + + ZipForwardReadEntry entry = new( fullName, (ZipCompressionMethod)compressionMethod, lastModified, crc32, compressedSize, uncompressedSize, generalPurposeBitFlags, versionNeeded, - _archiveStream, hasDataDescriptor); + hasDataDescriptor, dataStream); + + if (copyData && hasDataDescriptor && crcStream is not null) + { + ReadDataDescriptor(entry, crcStream); + } + + if (!copyData) + { + _previousEntry = entry; + } - return _currentEntry; + return entry; } /// - /// Asynchronously reads the next entry from the ZIP archive stream by parsing the local file header. + /// Asynchronously reads the next entry from the ZIP archive stream. /// + /// + /// to copy the entry's decompressed data into a + /// that remains valid after the reader advances; to read directly + /// from the archive stream (invalidated on the next call). + /// /// A token to monitor for cancellation requests. /// - /// The next , or if there are no more entries. + /// The next , or if there are no more entries. /// - /// The reader has been disposed. - /// The archive stream contains invalid data. - public async ValueTask GetNextEntryAsync(CancellationToken cancellationToken = default) + public async ValueTask GetNextEntryAsync( + bool copyData = false, CancellationToken cancellationToken = default) { ObjectDisposedException.ThrowIf(_isDisposed, this); @@ -99,75 +178,207 @@ public ZipStreamReader(Stream archiveStream, bool leaveOpen = false, Encoding? e return null; } - await AdvancePastCurrentEntryAsync(cancellationToken).ConfigureAwait(false); + await AdvanceDataStreamIfNeededAsync(cancellationToken).ConfigureAwait(false); - byte[] headerBytes = ArrayPool.Shared.Rent(ZipLocalFileHeader.SizeOfLocalHeader); - try + byte[] headerBytes = new byte[ZipLocalFileHeader.SizeOfLocalHeader]; + int bytesRead = await _archiveStream.ReadAtLeastAsync( + headerBytes.AsMemory(0, ZipLocalFileHeader.SizeOfLocalHeader), + ZipLocalFileHeader.SizeOfLocalHeader, + throwOnEndOfStream: false, + cancellationToken).ConfigureAwait(false); + + if (bytesRead < ZipLocalFileHeader.SizeOfLocalHeader) { - int bytesRead = await _archiveStream.ReadAtLeastAsync( - headerBytes.AsMemory(0, ZipLocalFileHeader.SizeOfLocalHeader), - ZipLocalFileHeader.SizeOfLocalHeader, - throwOnEndOfStream: false, - cancellationToken).ConfigureAwait(false); + _reachedEnd = true; + return null; + } - if (bytesRead < ZipLocalFileHeader.SizeOfLocalHeader) - { - _reachedEnd = true; - return null; - } + if (!headerBytes.AsSpan().StartsWith(ZipLocalFileHeader.SignatureConstantBytes)) + { + _reachedEnd = true; + return null; + } - if (!headerBytes.AsSpan().StartsWith(ZipLocalFileHeader.SignatureConstantBytes)) - { - _reachedEnd = true; - return null; - } + int dynamicLength = GetDynamicHeaderLength(headerBytes); + byte[] dynamicBuffer = new byte[dynamicLength]; + await _archiveStream.ReadExactlyAsync(dynamicBuffer.AsMemory(0, dynamicLength), cancellationToken).ConfigureAwait(false); - await ReadLocalFileHeaderAsync(headerBytes, cancellationToken).ConfigureAwait(false); + ParseLocalFileHeader(headerBytes, dynamicBuffer, + out string fullName, out ushort versionNeeded, out ushort generalPurposeBitFlags, + out ushort compressionMethod, out DateTimeOffset lastModified, out uint crc32, + out long compressedSize, out long uncompressedSize, out bool hasDataDescriptor); + + ZipCompressionMethod method = (ZipCompressionMethod)compressionMethod; + bool isEncrypted = (generalPurposeBitFlags & 1) != 0; + + Stream? dataStream = CreateDataStream( + method, compressedSize, uncompressedSize, crc32, + hasDataDescriptor, isEncrypted, out CrcValidatingReadStream? crcStream); + + if (copyData && dataStream is not null) + { + MemoryStream ms = new(); + await dataStream.CopyToAsync(ms, cancellationToken).ConfigureAwait(false); + ms.Position = 0; + dataStream = ms; + } + + ZipForwardReadEntry entry = new( + fullName, method, lastModified, crc32, + compressedSize, uncompressedSize, generalPurposeBitFlags, versionNeeded, + hasDataDescriptor, dataStream); + + if (copyData && hasDataDescriptor && crcStream is not null) + { + await ReadDataDescriptorAsync(entry, crcStream, cancellationToken).ConfigureAwait(false); } - finally + + if (!copyData) { - ArrayPool.Shared.Return(headerBytes); + _previousEntry = entry; } - return _currentEntry; + return entry; } - private void AdvancePastCurrentEntry() + private Stream? CreateDataStream( + ZipCompressionMethod compressionMethod, + long compressedSize, + long uncompressedSize, + uint crc32, + bool hasDataDescriptor, + bool isEncrypted, + out CrcValidatingReadStream? crcStream) { - if (_currentEntry is null) + crcStream = null; + + if (!hasDataDescriptor && compressedSize == 0) { - return; + return null; } - _currentEntry.SkipCompressedData(); + // Encrypted entries cannot be decompressed without decryption. + // When the compressed size is known (no data descriptor), return a bounded + // stream so the reader can drain past the encrypted bytes and find the next + // local file header. When a data descriptor is present the compressed size + // is unknown, so we cannot determine the entry boundary. + if (isEncrypted) + { + if (hasDataDescriptor) + { + throw new NotSupportedException(SR.ZipStreamEncryptedDataDescriptorNotSupported); + } + + return new BoundedReadOnlyStream(_archiveStream, compressedSize); + } - if (_currentEntry.HasDataDescriptor) + Stream source = hasDataDescriptor + ? _archiveStream + : new BoundedReadOnlyStream(_archiveStream, compressedSize); + + Stream decompressed = CreateDecompressionStream(source, compressionMethod, uncompressedSize, leaveOpen: hasDataDescriptor); + + crcStream = hasDataDescriptor + // Data-descriptor entries: CRC and length are unknown until after the data is read. + // Use sentinel values to disable validation while still tracking RunningCrc and TotalBytesRead + // for later verification against the data descriptor. + ? new CrcValidatingReadStream(decompressed, expectedCrc: 0, expectedLength: long.MaxValue) + : new CrcValidatingReadStream(decompressed, crc32, uncompressedSize); + + return crcStream; + } + + /// + /// Creates the appropriate decompression stream for the given compression method. + /// + private static Stream CreateDecompressionStream( + Stream source, ZipCompressionMethod compressionMethod, long uncompressedSize, bool leaveOpen) + { + return compressionMethod switch { - ReadDataDescriptor(_currentEntry); + ZipCompressionMethod.Deflate when leaveOpen => + new DeflateStream(source, CompressionMode.Decompress, leaveOpen: true), + ZipCompressionMethod.Deflate => + new DeflateStream(source, CompressionMode.Decompress, uncompressedSize), + ZipCompressionMethod.Deflate64 => + new DeflateManagedStream(source, ZipCompressionMethod.Deflate64, leaveOpen ? -1 : uncompressedSize), + ZipCompressionMethod.Stored when leaveOpen => + throw new NotSupportedException(SR.ZipStreamStoredDataDescriptorNotSupported), + ZipCompressionMethod.Stored => source, + _ => throw new NotSupportedException(SR.UnsupportedCompression) + }; + } + + private void AdvanceDataStreamIfNeeded() + { + if (_previousEntry is null) + { + return; } - _currentEntry = null; + ZipForwardReadEntry entry = _previousEntry; + _previousEntry = null; + + DrainStream(entry.DataStream); + + if (entry.HasDataDescriptor && entry.DataStream is CrcValidatingReadStream crcStream) + { + ReadDataDescriptor(entry, crcStream); + } } - private async ValueTask AdvancePastCurrentEntryAsync(CancellationToken cancellationToken) + private async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancellationToken) { - if (_currentEntry is null) + if (_previousEntry is null) { return; } - await _currentEntry.SkipCompressedDataAsync(cancellationToken).ConfigureAwait(false); + ZipForwardReadEntry entry = _previousEntry; + _previousEntry = null; - if (_currentEntry.HasDataDescriptor) + await DrainStreamAsync(entry.DataStream, cancellationToken).ConfigureAwait(false); + + if (entry.HasDataDescriptor && entry.DataStream is CrcValidatingReadStream crcStream) + { + await ReadDataDescriptorAsync(entry, crcStream, cancellationToken).ConfigureAwait(false); + } + } + + private static void DrainStream(Stream? stream) + { + if (stream is not null) + { + stream.CopyTo(Stream.Null); + } + } + + private static async ValueTask DrainStreamAsync(Stream? stream, CancellationToken cancellationToken) + { + if (stream is not null) { - await ReadDataDescriptorAsync(_currentEntry, cancellationToken).ConfigureAwait(false); + await stream.CopyToAsync(Stream.Null, cancellationToken).ConfigureAwait(false); } + } - _currentEntry = null; + /// + /// Returns the combined length of the filename and extra field from the fixed local file header, + /// so the caller can read exactly that many bytes via sync or async I/O. + /// + private static int GetDynamicHeaderLength(ReadOnlySpan headerBytes) + { + ushort filenameLength = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.FilenameLength..]); + ushort extraFieldLength = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes[ZipLocalFileHeader.FieldLocations.ExtraFieldLength..]); + return filenameLength + extraFieldLength; } - private void ReadLocalFileHeader( + /// + /// Parses all local file header fields from the fixed header bytes and the already-read + /// dynamic buffer (filename + extra field). This method performs no I/O. + /// + private void ParseLocalFileHeader( ReadOnlySpan headerBytes, + ReadOnlySpan dynamicBuffer, out string fullName, out ushort versionNeeded, out ushort generalPurposeBitFlags, @@ -191,157 +402,58 @@ private void ReadLocalFileHeader( lastModified = new DateTimeOffset(ZipHelper.DosTimeToDateTime(lastModifiedRaw)); hasDataDescriptor = (generalPurposeBitFlags & DataDescriptorBitFlag) != 0; - int dynamicLength = filenameLength + extraFieldLength; - byte[]? rentedBuffer = null; - Span dynamicBuffer = dynamicLength <= 512 - ? stackalloc byte[512].Slice(0, dynamicLength) - : (rentedBuffer = ArrayPool.Shared.Rent(dynamicLength)).AsSpan(0, dynamicLength); - - try - { - _archiveStream.ReadExactly(dynamicBuffer); + Encoding encoding = (generalPurposeBitFlags & UnicodeFileNameBitFlag) != 0 + ? Encoding.UTF8 + : _entryNameEncoding ?? Encoding.UTF8; - Encoding encoding = (generalPurposeBitFlags & UnicodeFileNameBitFlag) != 0 - ? Encoding.UTF8 - : _entryNameEncoding ?? Encoding.UTF8; + fullName = encoding.GetString(dynamicBuffer[..filenameLength]); - fullName = encoding.GetString(dynamicBuffer[..filenameLength]); + bool compressedSizeInZip64 = compressedSizeSmall == ZipHelper.Mask32Bit; + bool uncompressedSizeInZip64 = uncompressedSizeSmall == ZipHelper.Mask32Bit; - // Handle Zip64 extra field for sizes - bool compressedSizeInZip64 = compressedSizeSmall == ZipHelper.Mask32Bit; - bool uncompressedSizeInZip64 = uncompressedSizeSmall == ZipHelper.Mask32Bit; - - if (compressedSizeInZip64 || uncompressedSizeInZip64) - { - Zip64ExtraField zip64 = Zip64ExtraField.GetJustZip64Block( - dynamicBuffer.Slice(filenameLength, extraFieldLength), - readUncompressedSize: uncompressedSizeInZip64, - readCompressedSize: compressedSizeInZip64, - readLocalHeaderOffset: false, - readStartDiskNumber: false); - - compressedSize = zip64.CompressedSize ?? compressedSizeSmall; - uncompressedSize = zip64.UncompressedSize ?? uncompressedSizeSmall; - } - else - { - compressedSize = compressedSizeSmall; - uncompressedSize = uncompressedSizeSmall; - } - } - finally + if (compressedSizeInZip64 || uncompressedSizeInZip64) { - if (rentedBuffer is not null) - { - ArrayPool.Shared.Return(rentedBuffer); - } - } - } - - private async ValueTask ReadLocalFileHeaderAsync(byte[] headerBytes, CancellationToken cancellationToken) - { - ushort versionNeeded = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.VersionNeededToExtract)); - ushort generalPurposeBitFlags = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.GeneralPurposeBitFlags)); - ushort compressionMethod = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.CompressionMethod)); - uint lastModifiedRaw = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.LastModified)); - uint crc32 = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.Crc32)); - uint compressedSizeSmall = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.CompressedSize)); - uint uncompressedSizeSmall = BinaryPrimitives.ReadUInt32LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.UncompressedSize)); - ushort filenameLength = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.FilenameLength)); - ushort extraFieldLength = BinaryPrimitives.ReadUInt16LittleEndian(headerBytes.AsSpan(ZipLocalFileHeader.FieldLocations.ExtraFieldLength)); - - DateTimeOffset lastModified = new DateTimeOffset(ZipHelper.DosTimeToDateTime(lastModifiedRaw)); - bool hasDataDescriptor = (generalPurposeBitFlags & DataDescriptorBitFlag) != 0; - - int dynamicLength = filenameLength + extraFieldLength; - byte[] dynamicBuffer = ArrayPool.Shared.Rent(dynamicLength); - - try - { - await _archiveStream.ReadExactlyAsync(dynamicBuffer.AsMemory(0, dynamicLength), cancellationToken).ConfigureAwait(false); - - Encoding encoding = (generalPurposeBitFlags & UnicodeFileNameBitFlag) != 0 - ? Encoding.UTF8 - : _entryNameEncoding ?? Encoding.UTF8; - - string fullName = encoding.GetString(dynamicBuffer.AsSpan(0, filenameLength)); - - bool compressedSizeInZip64 = compressedSizeSmall == ZipHelper.Mask32Bit; - bool uncompressedSizeInZip64 = uncompressedSizeSmall == ZipHelper.Mask32Bit; - long compressedSize; - long uncompressedSize; - - if (compressedSizeInZip64 || uncompressedSizeInZip64) - { - Zip64ExtraField zip64 = Zip64ExtraField.GetJustZip64Block( - dynamicBuffer.AsSpan(filenameLength, extraFieldLength), - readUncompressedSize: uncompressedSizeInZip64, - readCompressedSize: compressedSizeInZip64, - readLocalHeaderOffset: false, - readStartDiskNumber: false); - - compressedSize = zip64.CompressedSize ?? compressedSizeSmall; - uncompressedSize = zip64.UncompressedSize ?? uncompressedSizeSmall; - } - else - { - compressedSize = compressedSizeSmall; - uncompressedSize = uncompressedSizeSmall; - } - - _currentEntry = new ZipStreamEntry( - fullName, (ZipCompressionMethod)compressionMethod, lastModified, crc32, - compressedSize, uncompressedSize, generalPurposeBitFlags, versionNeeded, - _archiveStream, hasDataDescriptor); + Zip64ExtraField zip64 = Zip64ExtraField.GetJustZip64Block( + dynamicBuffer.Slice(filenameLength, extraFieldLength), + readUncompressedSize: uncompressedSizeInZip64, + readCompressedSize: compressedSizeInZip64, + readLocalHeaderOffset: false, + readStartDiskNumber: false); + + compressedSize = zip64.CompressedSize ?? compressedSizeSmall; + uncompressedSize = zip64.UncompressedSize ?? uncompressedSizeSmall; } - finally + else { - ArrayPool.Shared.Return(dynamicBuffer); + compressedSize = compressedSizeSmall; + uncompressedSize = uncompressedSizeSmall; } } - private void ReadDataDescriptor(ZipStreamEntry entry) + private void ReadDataDescriptor(ZipForwardReadEntry entry, CrcValidatingReadStream crcStream) { - // Data descriptor layout (signature is optional): - // [signature 4B] + CRC-32 4B + compressed size (4B or 8B) + uncompressed size (4B or 8B) - // Read incrementally to avoid consuming bytes from the next entry. - Span buffer = stackalloc byte[24]; + byte[] buffer = new byte[24]; - _archiveStream.ReadExactly(buffer[..4]); + _archiveStream.ReadExactly(buffer.AsSpan(0, 4)); int offset = 0; int totalRead = 4; - if (buffer[..4].SequenceEqual(ZipLocalFileHeader.DataDescriptorSignatureConstantBytes)) + if (buffer.AsSpan(0, 4).SequenceEqual(ZipLocalFileHeader.DataDescriptorSignatureConstantBytes)) { offset = 4; - _archiveStream.ReadExactly(buffer.Slice(4, 4)); + _archiveStream.ReadExactly(buffer.AsSpan(4, 4)); totalRead = 8; } bool isZip64 = entry.VersionNeeded >= (ushort)ZipVersionNeededValues.Zip64; int sizesBytes = isZip64 ? 16 : 8; - _archiveStream.ReadExactly(buffer.Slice(totalRead, sizesBytes)); - - uint crc32 = BinaryPrimitives.ReadUInt32LittleEndian(buffer[offset..]); - int sizesOffset = offset + 4; + _archiveStream.ReadExactly(buffer.AsSpan(totalRead, sizesBytes)); - if (isZip64) - { - entry.UpdateDataDescriptor( - crc32, - compressedLength: BinaryPrimitives.ReadInt64LittleEndian(buffer[sizesOffset..]), - length: BinaryPrimitives.ReadInt64LittleEndian(buffer[(sizesOffset + 8)..])); - } - else - { - entry.UpdateDataDescriptor( - crc32, - compressedLength: BinaryPrimitives.ReadUInt32LittleEndian(buffer[sizesOffset..]), - length: BinaryPrimitives.ReadUInt32LittleEndian(buffer[(sizesOffset + 4)..])); - } + ParseDataDescriptor(buffer, offset, isZip64, entry, crcStream); } - private async ValueTask ReadDataDescriptorAsync(ZipStreamEntry entry, CancellationToken cancellationToken) + private async ValueTask ReadDataDescriptorAsync( + ZipForwardReadEntry entry, CrcValidatingReadStream crcStream, CancellationToken cancellationToken) { byte[] buffer = new byte[24]; @@ -360,22 +472,35 @@ private async ValueTask ReadDataDescriptorAsync(ZipStreamEntry entry, Cancellati int sizesBytes = isZip64 ? 16 : 8; await _archiveStream.ReadExactlyAsync(buffer.AsMemory(totalRead, sizesBytes), cancellationToken).ConfigureAwait(false); - uint crc32 = BinaryPrimitives.ReadUInt32LittleEndian(buffer.AsSpan(offset)); + ParseDataDescriptor(buffer, offset, isZip64, entry, crcStream); + } + + /// + /// Parses the data descriptor fields from an already-read buffer and updates + /// the entry with the CRC-32, compressed size, and uncompressed size. No I/O. + /// + private static void ParseDataDescriptor( + ReadOnlySpan buffer, int offset, bool isZip64, + ZipForwardReadEntry entry, CrcValidatingReadStream crcStream) + { + uint crc32 = BinaryPrimitives.ReadUInt32LittleEndian(buffer[offset..]); int sizesOffset = offset + 4; if (isZip64) { entry.UpdateDataDescriptor( crc32, - compressedLength: BinaryPrimitives.ReadInt64LittleEndian(buffer.AsSpan(sizesOffset)), - length: BinaryPrimitives.ReadInt64LittleEndian(buffer.AsSpan(sizesOffset + 8))); + compressedLength: BinaryPrimitives.ReadInt64LittleEndian(buffer[sizesOffset..]), + length: BinaryPrimitives.ReadInt64LittleEndian(buffer[(sizesOffset + 8)..]), + crcStream.RunningCrc, crcStream.TotalBytesRead); } else { entry.UpdateDataDescriptor( crc32, - compressedLength: BinaryPrimitives.ReadUInt32LittleEndian(buffer.AsSpan(sizesOffset)), - length: BinaryPrimitives.ReadUInt32LittleEndian(buffer.AsSpan(sizesOffset + 4))); + compressedLength: BinaryPrimitives.ReadUInt32LittleEndian(buffer[sizesOffset..]), + length: BinaryPrimitives.ReadUInt32LittleEndian(buffer[(sizesOffset + 4)..]), + crcStream.RunningCrc, crcStream.TotalBytesRead); } } @@ -404,5 +529,4 @@ public async ValueTask DisposeAsync() } } } - } diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_StreamEntryReadTests.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_StreamEntryReadTests.cs index 1aeae8905250c1..cd1d7fd515ec6b 100644 --- a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_StreamEntryReadTests.cs +++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_StreamEntryReadTests.cs @@ -1,7 +1,8 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System.Collections.Generic; +using System.Text; +using System.Threading; using System.Threading.Tasks; using Xunit; @@ -20,40 +21,8 @@ static zip_StreamEntryReadTests() rng.NextBytes(s_largeContent); } - public static IEnumerable DeflateWithKnownSize_Data() - { - foreach (bool async in _bools) - { - yield return new object[] { async }; - } - } - - public static IEnumerable StoredWithKnownSize_Data() - { - foreach (bool async in _bools) - { - yield return new object[] { async }; - } - } - - public static IEnumerable DeflateWithDataDescriptor_Data() - { - foreach (bool async in _bools) - { - yield return new object[] { async }; - } - } - - public static IEnumerable StoredWithDataDescriptor_Data() - { - foreach (bool async in _bools) - { - yield return new object[] { async }; - } - } - [Theory] - [MemberData(nameof(DeflateWithKnownSize_Data))] + [MemberData(nameof(Get_Booleans_Data))] public async Task Read_DeflateWithKnownSize_ReturnsDecompressedData(bool async) { byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); @@ -64,26 +33,27 @@ public async Task Read_DeflateWithKnownSize_ReturnsDecompressedData(bool async) for (int i = 0; i < expectedContents.Length; i++) { - ZipStreamEntry entry = async + ZipForwardReadEntry entry = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); Assert.NotNull(entry); + Assert.NotNull(entry.DataStream); Assert.False(entry.IsDirectory); Assert.Equal(ZipCompressionMethod.Deflate, entry.CompressionMethod); - byte[] decompressed = await ReadEntryFully(entry, async); + byte[] decompressed = await ReadStreamFully(entry.DataStream, async); Assert.Equal(expectedContents[i], decompressed); } - ZipStreamEntry end = async + ZipForwardReadEntry end = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); Assert.Null(end); } [Theory] - [MemberData(nameof(StoredWithKnownSize_Data))] + [MemberData(nameof(Get_Booleans_Data))] public async Task Read_StoredWithKnownSize_ReturnsUncompressedData(bool async) { byte[] zipBytes = CreateZipWithEntries(CompressionLevel.NoCompression, seekable: true); @@ -94,20 +64,21 @@ public async Task Read_StoredWithKnownSize_ReturnsUncompressedData(bool async) for (int i = 0; i < expectedContents.Length; i++) { - ZipStreamEntry entry = async + ZipForwardReadEntry entry = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); Assert.NotNull(entry); + Assert.NotNull(entry.DataStream); Assert.Equal(ZipCompressionMethod.Stored, entry.CompressionMethod); - byte[] decompressed = await ReadEntryFully(entry, async); + byte[] decompressed = await ReadStreamFully(entry.DataStream, async); Assert.Equal(expectedContents[i], decompressed); } } [Theory] - [MemberData(nameof(DeflateWithDataDescriptor_Data))] + [MemberData(nameof(Get_Booleans_Data))] public async Task Read_DeflateWithDataDescriptor_ReturnsDecompressedData(bool async) { byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: false); @@ -118,20 +89,21 @@ public async Task Read_DeflateWithDataDescriptor_ReturnsDecompressedData(bool as for (int i = 0; i < expectedContents.Length; i++) { - ZipStreamEntry entry = async + ZipForwardReadEntry entry = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); Assert.NotNull(entry); + Assert.NotNull(entry.DataStream); Assert.Equal(ZipCompressionMethod.Deflate, entry.CompressionMethod); - byte[] decompressed = await ReadEntryFully(entry, async); + byte[] decompressed = await ReadStreamFully(entry.DataStream, async); Assert.Equal(expectedContents[i], decompressed); } } [Theory] - [MemberData(nameof(StoredWithDataDescriptor_Data))] + [MemberData(nameof(Get_Booleans_Data))] public async Task Read_StoredWithDataDescriptor_ThrowsNotSupported(bool async) { byte[] zipBytes = CreateZipWithEntries(CompressionLevel.NoCompression, seekable: false); @@ -139,31 +111,376 @@ public async Task Read_StoredWithDataDescriptor_ThrowsNotSupported(bool async) using MemoryStream archiveStream = new(zipBytes); using ZipStreamReader reader = new(archiveStream); - ZipStreamEntry entry = async + if (async) + { + await Assert.ThrowsAsync(() => reader.GetNextEntryAsync().AsTask()); + } + else + { + Assert.Throws(() => reader.GetNextEntry()); + } + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task CopyData_PreservesEntryAfterAdvancing(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + ZipForwardReadEntry entry = async + ? await reader.GetNextEntryAsync(copyData: true) + : reader.GetNextEntry(copyData: true); + + Assert.NotNull(entry); + Assert.NotNull(entry.DataStream); + + ZipForwardReadEntry next = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + Assert.NotNull(next); + + entry.DataStream.Position = 0; + byte[] decompressed = await ReadStreamFully(entry.DataStream, async); + Assert.Equal(s_smallContent, decompressed); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task PartialRead_ThenGetNextEntry_AdvancesCorrectly(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + ZipForwardReadEntry first = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + Assert.NotNull(first); + + byte[] partial = new byte[5]; + if (async) + await first.DataStream!.ReadAsync(partial); + else + first.DataStream!.Read(partial); + + ZipForwardReadEntry second = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + + Assert.NotNull(second); + Assert.Equal("medium.bin", second.FullName); + + byte[] decompressed = await ReadStreamFully(second.DataStream!, async); + Assert.Equal(s_mediumContent, decompressed); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task PartialRead_DataDescriptor_ThenGetNextEntry_AdvancesCorrectly(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: false); + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + ZipForwardReadEntry first = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + Assert.NotNull(first); + + byte[] partial = new byte[3]; + if (async) + await first.DataStream!.ReadAsync(partial); + else + first.DataStream!.Read(partial); + + ZipForwardReadEntry second = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + + Assert.NotNull(second); + Assert.Equal("medium.bin", second.FullName); + + byte[] decompressed = await ReadStreamFully(second.DataStream!, async); + Assert.Equal(s_mediumContent, decompressed); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task Deflate64Entry_ReturnsDecompressedData(bool async) + { + MemoryStream ms = await StreamHelpers.CreateTempCopyStream(compat("deflate64.zip")); + + using ZipStreamReader reader = new(ms); + + ZipForwardReadEntry entry = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + + Assert.NotNull(entry); + Assert.Equal(ZipCompressionMethod.Deflate64, entry.CompressionMethod); + Assert.NotNull(entry.DataStream); + + byte[] data = await ReadStreamFully(entry.DataStream, async); + Assert.True(data.Length > 0); + } + + [Theory] + [InlineData("empty.txt", false, true)] + [InlineData("empty.txt", false, false)] + [InlineData("mydir/", true, true)] + [InlineData("mydir/", true, false)] + public async Task ZeroLengthEntry_HasNullDataStream(string entryName, bool expectedIsDirectory, bool async) + { + using MemoryStream ms = new(); + using (ZipArchive archive = new(ms, ZipArchiveMode.Create, leaveOpen: true)) + { + archive.CreateEntry(entryName); + } + + ms.Position = 0; + using ZipStreamReader reader = new(ms); + + ZipForwardReadEntry entry = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + + Assert.NotNull(entry); + Assert.Equal(entryName, entry.FullName); + Assert.Equal(expectedIsDirectory, entry.IsDirectory); + Assert.Null(entry.DataStream); + Assert.Equal(0, entry.CompressedLength); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task EncryptedEntry_ReportsIsEncrypted(bool async) + { + MemoryStream ms = await StreamHelpers.CreateTempCopyStream(zfile("encrypted_entries_weak.zip")); + + using ZipStreamReader reader = new(ms); + + bool foundEncrypted = false; + bool foundUnencrypted = false; + + ZipForwardReadEntry entry; + while ((entry = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry()) is not null) + { + if (entry.IsEncrypted) + foundEncrypted = true; + else + foundUnencrypted = true; + } + + Assert.True(foundEncrypted); + Assert.True(foundUnencrypted); + } + + [Fact] + public async Task AsyncCancellation_ThrowsOperationCanceled() + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + using CancellationTokenSource cts = new(); + cts.Cancel(); + + await Assert.ThrowsAnyAsync( + () => reader.GetNextEntryAsync(cancellationToken: cts.Token).AsTask()); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task Dispose_WhileEntryPartiallyRead_DoesNotThrow(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + ZipStreamReader reader = new(archiveStream); + + ZipForwardReadEntry entry = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); Assert.NotNull(entry); - Assert.Equal(ZipCompressionMethod.Stored, entry.CompressionMethod); - byte[] buffer = new byte[256]; + byte[] partial = new byte[5]; + if (async) + await entry.DataStream!.ReadAsync(partial); + else + entry.DataStream!.Read(partial); + + if (async) + await reader.DisposeAsync(); + else + reader.Dispose(); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task EmptyArchive_ReturnsNull(bool async) + { + using MemoryStream ms = new(); + using (new ZipArchive(ms, ZipArchiveMode.Create, leaveOpen: true)) { } + + ms.Position = 0; + using ZipStreamReader reader = new(ms); + + ZipForwardReadEntry entry = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + + Assert.Null(entry); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task LeaveOpen_DoesNotDisposeStream(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + + ZipStreamReader reader = new(archiveStream, leaveOpen: true); + if (async) + await reader.DisposeAsync(); + else + reader.Dispose(); + + Assert.True(archiveStream.CanRead); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task Constructor_WithEncoding_ReadsEntryNames(bool async) + { + using MemoryStream ms = new(); + using (ZipArchive archive = new(ms, ZipArchiveMode.Create, leaveOpen: true)) + { + AddEntry(archive, "hello.txt", s_smallContent, CompressionLevel.Optimal); + } + + ms.Position = 0; + using ZipStreamReader reader = new(ms, entryNameEncoding: Encoding.UTF8, leaveOpen: true); + + ZipForwardReadEntry entry = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + + Assert.NotNull(entry); + Assert.Equal("hello.txt", entry.FullName); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task MultipleEntries_MixedSkipAndRead(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + // Skip first entry + ZipForwardReadEntry first = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + Assert.NotNull(first); + + // Read second entry fully + ZipForwardReadEntry second = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + Assert.NotNull(second); + byte[] data = await ReadStreamFully(second.DataStream!, async); + Assert.Equal(s_mediumContent, data); + + // Skip third entry + ZipForwardReadEntry third = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + Assert.NotNull(third); + + // Confirm end + ZipForwardReadEntry end = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + Assert.Null(end); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task GetNextEntry_AfterDispose_ThrowsObjectDisposedException(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + ZipStreamReader reader = new(archiveStream); + + // Read one entry to ensure the reader was functional. + ZipForwardReadEntry entry = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + Assert.NotNull(entry); + + if (async) + await reader.DisposeAsync(); + else + reader.Dispose(); if (async) { - await Assert.ThrowsAsync(() => entry.ReadAsync(buffer).AsTask()); + await Assert.ThrowsAsync(() => reader.GetNextEntryAsync().AsTask()); } else { - Assert.Throws(() => entry.Read(buffer)); + Assert.Throws(() => reader.GetNextEntry()); } } - /// - /// Creates a ZIP archive with three entries of different sizes (small, medium, large). - /// When is true, the archive is written to a seekable stream - /// so entries have known sizes. When false, a non-seekable wrapper is used so the - /// archive writer sets the data descriptor bit. - /// + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task CopyData_WithDataDescriptor_PreservesEntryAfterAdvancing(bool async) + { + // seekable: false triggers data descriptors for Deflate entries. + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: false); + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + // Read first entry with copyData: true — exercises the path that + // eagerly decompresses, copies into a MemoryStream, then reads the + // data descriptor to validate CRC. + ZipForwardReadEntry first = async + ? await reader.GetNextEntryAsync(copyData: true) + : reader.GetNextEntry(copyData: true); + + Assert.NotNull(first); + Assert.NotNull(first.DataStream); + + // Advance to the next entry to confirm the stream position is correct + // after consuming the data descriptor. + ZipForwardReadEntry second = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + Assert.NotNull(second); + Assert.Equal("medium.bin", second.FullName); + + // The copied first entry's data should still be fully readable. + first.DataStream.Position = 0; + byte[] decompressed = await ReadStreamFully(first.DataStream, async); + Assert.Equal(s_smallContent, decompressed); + + // Also verify the second entry's data is correct. + byte[] secondData = await ReadStreamFully(second.DataStream!, async); + Assert.Equal(s_mediumContent, secondData); + } + private static byte[] CreateZipWithEntries(CompressionLevel compressionLevel, bool seekable) { MemoryStream ms = new(); @@ -189,7 +506,7 @@ private static void AddEntry(ZipArchive archive, string name, byte[] contents, C stream.Write(contents); } - private static async Task ReadEntryFully(ZipStreamEntry entry, bool async) + private static async Task ReadStreamFully(Stream stream, bool async) { using MemoryStream result = new(); byte[] buffer = new byte[4096]; @@ -197,14 +514,14 @@ private static async Task ReadEntryFully(ZipStreamEntry entry, bool asyn int bytesRead; if (async) { - while ((bytesRead = await entry.ReadAsync(buffer)) > 0) + while ((bytesRead = await stream.ReadAsync(buffer)) > 0) { result.Write(buffer, 0, bytesRead); } } else { - while ((bytesRead = entry.Read(buffer)) > 0) + while ((bytesRead = stream.Read(buffer)) > 0) { result.Write(buffer, 0, bytesRead); } @@ -212,5 +529,119 @@ private static async Task ReadEntryFully(ZipStreamEntry entry, bool asyn return result.ToArray(); } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task ExtractToFile_CreatesFileWithExpectedContent(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + ZipForwardReadEntry entry = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + + Assert.NotNull(entry); + Assert.Equal("small.txt", entry.FullName); + + string tempPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); + try + { + if (async) + await entry.ExtractToFileAsync(tempPath, overwrite: true); + else + entry.ExtractToFile(tempPath, overwrite: true); + + byte[] written = File.ReadAllBytes(tempPath); + Assert.Equal(s_smallContent, written); + } + finally + { + File.Delete(tempPath); + } + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task ExtractToFile_OverwriteTrue_ReplacesExistingFile(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + ZipForwardReadEntry entry = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + + Assert.NotNull(entry); + + string tempPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); + try + { + // Create a pre-existing file with different content. + File.WriteAllText(tempPath, "old content"); + + if (async) + await entry.ExtractToFileAsync(tempPath, overwrite: true); + else + entry.ExtractToFile(tempPath, overwrite: true); + + byte[] written = File.ReadAllBytes(tempPath); + Assert.Equal(s_smallContent, written); + } + finally + { + File.Delete(tempPath); + } + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task ExtractToFile_OverwriteFalse_ThrowsWhenFileExists(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using ZipStreamReader reader = new(archiveStream); + + ZipForwardReadEntry entry = async + ? await reader.GetNextEntryAsync() + : reader.GetNextEntry(); + + Assert.NotNull(entry); + + string tempPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); + try + { + File.WriteAllText(tempPath, "existing"); + + if (async) + await Assert.ThrowsAsync(() => entry.ExtractToFileAsync(tempPath, overwrite: false)); + else + Assert.Throws(() => entry.ExtractToFile(tempPath, overwrite: false)); + } + finally + { + File.Delete(tempPath); + } + } + + [Fact] + public void Constructor_NullStream_ThrowsArgumentNullException() + { + Assert.Throws("stream", () => new ZipStreamReader(null!)); + } + + [Fact] + public void Constructor_UnreadableStream_ThrowsArgumentException() + { + using MemoryStream ms = new(); + using WrappedStream unreadable = new(ms, canRead: false, canWrite: true, canSeek: true); + + Assert.Throws("stream", () => new ZipStreamReader(unreadable)); + } } } From 20684bf508d1f0d18598587860c6485c265eef92 Mon Sep 17 00:00:00 2001 From: alinpahontu2912 Date: Fri, 27 Mar 2026 11:58:58 +0100 Subject: [PATCH 5/8] fix copilot comments --- .../System/IO/Compression/ZipCustomStreams.cs | 15 +++++- .../System/IO/Compression/ZipStreamReader.cs | 8 ++- .../ZipArchive/zip_StreamEntryReadTests.cs | 52 +++++++++---------- 3 files changed, 46 insertions(+), 29 deletions(-) diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs index 4c019aac0615a2..c24f244de9f2cf 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs @@ -208,12 +208,13 @@ private void NotifyWrite() public override void Flush() { ThrowIfDisposed(); + _baseStream.Flush(); } public override Task FlushAsync(CancellationToken cancellationToken) { ThrowIfDisposed(); - return Task.CompletedTask; + return _baseStream.FlushAsync(cancellationToken); } protected override void Dispose(bool disposing) @@ -724,7 +725,7 @@ public BoundedReadOnlyStream(Stream baseStream, long length) _remaining = length; } - public override bool CanRead => !_isDisposed; + public override bool CanRead => !_isDisposed && _baseStream.CanRead; public override bool CanSeek => false; public override bool CanWrite => false; public override long Length => throw new NotSupportedException(); @@ -735,11 +736,19 @@ public override long Position set => throw new NotSupportedException(); } + private void ThrowIfDisposed() + { + if (_isDisposed) + throw new ObjectDisposedException(GetType().ToString(), SR.HiddenStreamName); + } + public override int Read(byte[] buffer, int offset, int count) => Read(buffer.AsSpan(offset, count)); public override int Read(Span buffer) { + ThrowIfDisposed(); + if (_remaining <= 0) { return 0; @@ -761,6 +770,8 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel public override async ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = default) { + ThrowIfDisposed(); + if (_remaining <= 0) { return 0; diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs index 4ceaf899aae495..bddab5e14f6f46 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs @@ -109,7 +109,7 @@ public ZipStreamReader(Stream stream, Encoding? entryNameEncoding, bool leaveOpe return null; } - if (!headerBytes.StartsWith(ZipLocalFileHeader.SignatureConstantBytes)) + if (!headerBytes.AsSpan().StartsWith(ZipLocalFileHeader.SignatureConstantBytes)) { _reachedEnd = true; return null; @@ -130,8 +130,10 @@ public ZipStreamReader(Stream stream, Encoding? entryNameEncoding, bool leaveOpe (ZipCompressionMethod)compressionMethod, compressedSize, uncompressedSize, crc32, hasDataDescriptor, isEncrypted, out CrcValidatingReadStream? crcStream); + Stream? originalDataStream = null; if (copyData && dataStream is not null) { + originalDataStream = dataStream; MemoryStream ms = new(); dataStream.CopyTo(ms); ms.Position = 0; @@ -148,6 +150,10 @@ public ZipStreamReader(Stream stream, Encoding? entryNameEncoding, bool leaveOpe ReadDataDescriptor(entry, crcStream); } + // Dispose the original decompression/CRC stream after copying (and after + // reading the data descriptor when applicable) to release inflater resources. + originalDataStream?.Dispose(); + if (!copyData) { _previousEntry = entry; diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_StreamEntryReadTests.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_StreamEntryReadTests.cs index cd1d7fd515ec6b..ee2e1d6561d30c 100644 --- a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_StreamEntryReadTests.cs +++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_StreamEntryReadTests.cs @@ -33,7 +33,7 @@ public async Task Read_DeflateWithKnownSize_ReturnsDecompressedData(bool async) for (int i = 0; i < expectedContents.Length; i++) { - ZipForwardReadEntry entry = async + ZipForwardReadEntry? entry = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); @@ -46,7 +46,7 @@ public async Task Read_DeflateWithKnownSize_ReturnsDecompressedData(bool async) Assert.Equal(expectedContents[i], decompressed); } - ZipForwardReadEntry end = async + ZipForwardReadEntry? end = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); Assert.Null(end); @@ -64,7 +64,7 @@ public async Task Read_StoredWithKnownSize_ReturnsUncompressedData(bool async) for (int i = 0; i < expectedContents.Length; i++) { - ZipForwardReadEntry entry = async + ZipForwardReadEntry? entry = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); @@ -89,7 +89,7 @@ public async Task Read_DeflateWithDataDescriptor_ReturnsDecompressedData(bool as for (int i = 0; i < expectedContents.Length; i++) { - ZipForwardReadEntry entry = async + ZipForwardReadEntry? entry = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); @@ -130,14 +130,14 @@ public async Task CopyData_PreservesEntryAfterAdvancing(bool async) using MemoryStream archiveStream = new(zipBytes); using ZipStreamReader reader = new(archiveStream); - ZipForwardReadEntry entry = async + ZipForwardReadEntry? entry = async ? await reader.GetNextEntryAsync(copyData: true) : reader.GetNextEntry(copyData: true); Assert.NotNull(entry); Assert.NotNull(entry.DataStream); - ZipForwardReadEntry next = async + ZipForwardReadEntry? next = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); Assert.NotNull(next); @@ -156,7 +156,7 @@ public async Task PartialRead_ThenGetNextEntry_AdvancesCorrectly(bool async) using MemoryStream archiveStream = new(zipBytes); using ZipStreamReader reader = new(archiveStream); - ZipForwardReadEntry first = async + ZipForwardReadEntry? first = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); Assert.NotNull(first); @@ -167,7 +167,7 @@ public async Task PartialRead_ThenGetNextEntry_AdvancesCorrectly(bool async) else first.DataStream!.Read(partial); - ZipForwardReadEntry second = async + ZipForwardReadEntry? second = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); @@ -187,7 +187,7 @@ public async Task PartialRead_DataDescriptor_ThenGetNextEntry_AdvancesCorrectly( using MemoryStream archiveStream = new(zipBytes); using ZipStreamReader reader = new(archiveStream); - ZipForwardReadEntry first = async + ZipForwardReadEntry? first = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); Assert.NotNull(first); @@ -198,7 +198,7 @@ public async Task PartialRead_DataDescriptor_ThenGetNextEntry_AdvancesCorrectly( else first.DataStream!.Read(partial); - ZipForwardReadEntry second = async + ZipForwardReadEntry? second = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); @@ -217,7 +217,7 @@ public async Task Deflate64Entry_ReturnsDecompressedData(bool async) using ZipStreamReader reader = new(ms); - ZipForwardReadEntry entry = async + ZipForwardReadEntry? entry = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); @@ -245,7 +245,7 @@ public async Task ZeroLengthEntry_HasNullDataStream(string entryName, bool expec ms.Position = 0; using ZipStreamReader reader = new(ms); - ZipForwardReadEntry entry = async + ZipForwardReadEntry? entry = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); @@ -267,7 +267,7 @@ public async Task EncryptedEntry_ReportsIsEncrypted(bool async) bool foundEncrypted = false; bool foundUnencrypted = false; - ZipForwardReadEntry entry; + ZipForwardReadEntry? entry; while ((entry = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry()) is not null) { if (entry.IsEncrypted) @@ -304,7 +304,7 @@ public async Task Dispose_WhileEntryPartiallyRead_DoesNotThrow(bool async) using MemoryStream archiveStream = new(zipBytes); ZipStreamReader reader = new(archiveStream); - ZipForwardReadEntry entry = async + ZipForwardReadEntry? entry = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); @@ -332,7 +332,7 @@ public async Task EmptyArchive_ReturnsNull(bool async) ms.Position = 0; using ZipStreamReader reader = new(ms); - ZipForwardReadEntry entry = async + ZipForwardReadEntry? entry = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); @@ -369,7 +369,7 @@ public async Task Constructor_WithEncoding_ReadsEntryNames(bool async) ms.Position = 0; using ZipStreamReader reader = new(ms, entryNameEncoding: Encoding.UTF8, leaveOpen: true); - ZipForwardReadEntry entry = async + ZipForwardReadEntry? entry = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); @@ -387,13 +387,13 @@ public async Task MultipleEntries_MixedSkipAndRead(bool async) using ZipStreamReader reader = new(archiveStream); // Skip first entry - ZipForwardReadEntry first = async + ZipForwardReadEntry? first = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); Assert.NotNull(first); // Read second entry fully - ZipForwardReadEntry second = async + ZipForwardReadEntry? second = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); Assert.NotNull(second); @@ -401,13 +401,13 @@ public async Task MultipleEntries_MixedSkipAndRead(bool async) Assert.Equal(s_mediumContent, data); // Skip third entry - ZipForwardReadEntry third = async + ZipForwardReadEntry? third = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); Assert.NotNull(third); // Confirm end - ZipForwardReadEntry end = async + ZipForwardReadEntry? end = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); Assert.Null(end); @@ -423,7 +423,7 @@ public async Task GetNextEntry_AfterDispose_ThrowsObjectDisposedException(bool a ZipStreamReader reader = new(archiveStream); // Read one entry to ensure the reader was functional. - ZipForwardReadEntry entry = async + ZipForwardReadEntry? entry = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); Assert.NotNull(entry); @@ -456,7 +456,7 @@ public async Task CopyData_WithDataDescriptor_PreservesEntryAfterAdvancing(bool // Read first entry with copyData: true — exercises the path that // eagerly decompresses, copies into a MemoryStream, then reads the // data descriptor to validate CRC. - ZipForwardReadEntry first = async + ZipForwardReadEntry? first = async ? await reader.GetNextEntryAsync(copyData: true) : reader.GetNextEntry(copyData: true); @@ -465,7 +465,7 @@ public async Task CopyData_WithDataDescriptor_PreservesEntryAfterAdvancing(bool // Advance to the next entry to confirm the stream position is correct // after consuming the data descriptor. - ZipForwardReadEntry second = async + ZipForwardReadEntry? second = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); Assert.NotNull(second); @@ -539,7 +539,7 @@ public async Task ExtractToFile_CreatesFileWithExpectedContent(bool async) using MemoryStream archiveStream = new(zipBytes); using ZipStreamReader reader = new(archiveStream); - ZipForwardReadEntry entry = async + ZipForwardReadEntry? entry = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); @@ -572,7 +572,7 @@ public async Task ExtractToFile_OverwriteTrue_ReplacesExistingFile(bool async) using MemoryStream archiveStream = new(zipBytes); using ZipStreamReader reader = new(archiveStream); - ZipForwardReadEntry entry = async + ZipForwardReadEntry? entry = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); @@ -607,7 +607,7 @@ public async Task ExtractToFile_OverwriteFalse_ThrowsWhenFileExists(bool async) using MemoryStream archiveStream = new(zipBytes); using ZipStreamReader reader = new(archiveStream); - ZipForwardReadEntry entry = async + ZipForwardReadEntry? entry = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry(); From 3507cc678880374b34d6474ba1ab697a6863426f Mon Sep 17 00:00:00 2001 From: alinpahontu2912 Date: Mon, 30 Mar 2026 15:11:43 +0200 Subject: [PATCH 6/8] added new stream to avoid deflatestream seekback error and address comments --- .../ref/System.IO.Compression.cs | 4 +- .../src/Resources/Strings.resx | 3 + .../System/IO/Compression/ZipCustomStreams.cs | 264 +++++++++++++++- .../IO/Compression/ZipForwardReadEntry.cs | 45 ++- .../System/IO/Compression/ZipStreamReader.cs | 20 +- .../ZipArchive/zip_StreamEntryReadTests.cs | 285 ++++++++---------- 6 files changed, 442 insertions(+), 179 deletions(-) diff --git a/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs b/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs index ab2e7eb6ad878f..e2a0b46ca17a8d 100644 --- a/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs +++ b/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs @@ -127,9 +127,9 @@ internal ZipArchiveEntry() { } public string Name { get { throw null; } } public void Delete() { } public System.IO.Stream Open() { throw null; } - public System.IO.Stream Open(FileAccess access) { throw null; } - public System.Threading.Tasks.Task OpenAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public System.IO.Stream Open(System.IO.FileAccess access) { throw null; } public System.Threading.Tasks.Task OpenAsync(System.IO.FileAccess access, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public System.Threading.Tasks.Task OpenAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } public override string ToString() { throw null; } } public enum ZipArchiveMode diff --git a/src/libraries/System.IO.Compression/src/Resources/Strings.resx b/src/libraries/System.IO.Compression/src/Resources/Strings.resx index 458d6bdd77c969..aebc0c92634465 100644 --- a/src/libraries/System.IO.Compression/src/Resources/Strings.resx +++ b/src/libraries/System.IO.Compression/src/Resources/Strings.resx @@ -383,4 +383,7 @@ Encrypted entries with data descriptors cannot be read in forward-only mode because the compressed size is unknown. + + Cannot extract a directory entry or an entry with no data to a file. Check IsDirectory and DataStream before calling ExtractToFile. + diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs index c24f244de9f2cf..172a18b6ba467c 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs @@ -95,10 +95,8 @@ private void ThrowIfCantSeek() public override int Read(byte[] buffer, int offset, int count) { - ThrowIfDisposed(); - ThrowIfCantRead(); - - return _baseStream.Read(buffer, offset, count); + ValidateBufferArguments(buffer, offset, count); + return Read(buffer.AsSpan(offset, count)); } public override int Read(Span buffer) @@ -119,10 +117,8 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { - ThrowIfDisposed(); - ThrowIfCantRead(); - - return _baseStream.ReadAsync(buffer, offset, count, cancellationToken); + ValidateBufferArguments(buffer, offset, count); + return ReadAsync(buffer.AsMemory(offset, count), cancellationToken).AsTask(); } public override ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = default) @@ -1064,4 +1060,256 @@ public override async ValueTask DisposeAsync() await base.DisposeAsync().ConfigureAwait(false); } } + + /// + /// Wraps a non-seekable stream and reports as + /// so that can rewind + /// unconsumed input via Seek(-n, SeekOrigin.Current) after + /// decompression finishes. Maintains a rolling history buffer of recently + /// read bytes to satisfy that backward seek. + /// + internal sealed class ReadAheadStream : Stream + { + private readonly Stream _baseStream; + private readonly byte[] _history; + private int _historyCount; + private byte[]? _pushback; + private int _pushbackOffset; + private int _pushbackCount; + private long _position; + private bool _isDisposed; + + public ReadAheadStream(Stream baseStream, int historyCapacity = 8192) + { + _baseStream = baseStream; + _history = new byte[historyCapacity]; + } + + public override bool CanRead => !_isDisposed && _baseStream.CanRead; + public override bool CanSeek => !_isDisposed; + public override bool CanWrite => false; + + public override long Length + { + get + { + ThrowIfDisposed(); + throw new NotSupportedException(); + } + } + + public override long Position + { + get + { + ThrowIfDisposed(); + return _position; + } + set + { + ThrowIfDisposed(); + throw new NotSupportedException(); + } + } + + public override int Read(byte[] buffer, int offset, int count) + { + ValidateBufferArguments(buffer, offset, count); + return Read(buffer.AsSpan(offset, count)); + } + + public override int Read(Span buffer) + { + ThrowIfDisposed(); + + int totalRead = 0; + + if (_pushbackCount > 0) + { + int fromPushback = Math.Min(buffer.Length, _pushbackCount); + _pushback.AsSpan(_pushbackOffset, fromPushback).CopyTo(buffer); + RecordHistory(buffer.Slice(0, fromPushback)); + _pushbackOffset += fromPushback; + _pushbackCount -= fromPushback; + totalRead += fromPushback; + buffer = buffer.Slice(fromPushback); + + if (_pushbackCount == 0) + { + _pushback = null; + } + } + + if (buffer.Length > 0) + { + int fromBase = _baseStream.Read(buffer); + if (fromBase > 0) + { + RecordHistory(buffer.Slice(0, fromBase)); + totalRead += fromBase; + } + } + + _position += totalRead; + return totalRead; + } + + public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) + { + ValidateBufferArguments(buffer, offset, count); + return ReadAsync(buffer.AsMemory(offset, count), cancellationToken).AsTask(); + } + + public override async ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = default) + { + ThrowIfDisposed(); + + int totalRead = 0; + + if (_pushbackCount > 0) + { + int fromPushback = Math.Min(buffer.Length, _pushbackCount); + _pushback.AsSpan(_pushbackOffset, fromPushback).CopyTo(buffer.Span); + RecordHistory(buffer.Span.Slice(0, fromPushback)); + _pushbackOffset += fromPushback; + _pushbackCount -= fromPushback; + totalRead += fromPushback; + buffer = buffer.Slice(fromPushback); + + if (_pushbackCount == 0) + { + _pushback = null; + } + } + + if (buffer.Length > 0) + { + int fromBase = await _baseStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false); + if (fromBase > 0) + { + RecordHistory(buffer.Span.Slice(0, fromBase)); + totalRead += fromBase; + } + } + + _position += totalRead; + return totalRead; + } + + public override long Seek(long offset, SeekOrigin origin) + { + ThrowIfDisposed(); + + if (origin is SeekOrigin.Current && offset < 0) + { + int rewindBytes = checked((int)(-offset)); + + // Merge any unread pushback bytes back into history before + // processing a new seek, so they are not silently lost. + if (_pushbackCount > 0) + { + RestorePushbackToHistory(); + } + + if (rewindBytes > _historyCount) + { + throw new IOException(SR.IO_SeekBeforeBegin); + } + + _pushback = new byte[rewindBytes]; + Array.Copy(_history, _historyCount - rewindBytes, _pushback, 0, rewindBytes); + _pushbackOffset = 0; + _pushbackCount = rewindBytes; + _historyCount -= rewindBytes; + _position -= rewindBytes; + + return _position; + } + + throw new NotSupportedException(); + } + + /// + /// Merges any remaining unread pushback bytes back into history so they + /// are not lost when a new seek is requested. + /// + private void RestorePushbackToHistory() + { + if (_pushbackCount > 0) + { + RecordHistory(_pushback.AsSpan(_pushbackOffset, _pushbackCount)); + _pushbackCount = 0; + _pushback = null; + } + } + + private void RecordHistory(ReadOnlySpan data) + { + if (data.Length >= _history.Length) + { + data.Slice(data.Length - _history.Length).CopyTo(_history); + _historyCount = _history.Length; + } + else if (_historyCount + data.Length <= _history.Length) + { + data.CopyTo(_history.AsSpan(_historyCount)); + _historyCount += data.Length; + } + else + { + int toKeep = _history.Length - data.Length; + Array.Copy(_history, _historyCount - toKeep, _history, 0, toKeep); + data.CopyTo(_history.AsSpan(toKeep)); + _historyCount = _history.Length; + } + } + + public override void Flush() + { + ThrowIfDisposed(); + } + + public override Task FlushAsync(CancellationToken cancellationToken) + { + ThrowIfDisposed(); + return Task.CompletedTask; + } + + public override void SetLength(long value) + { + ThrowIfDisposed(); + throw new NotSupportedException(); + } + + public override void Write(byte[] buffer, int offset, int count) + { + ThrowIfDisposed(); + throw new NotSupportedException(); + } + + private void ThrowIfDisposed() + { + ObjectDisposedException.ThrowIf(_isDisposed, this); + } + + protected override void Dispose(bool disposing) + { + if (disposing && !_isDisposed) + { + _baseStream.Dispose(); + _isDisposed = true; + } + base.Dispose(disposing); + } + + public override async ValueTask DisposeAsync() + { + if (!_isDisposed) + { + await _baseStream.DisposeAsync().ConfigureAwait(false); + _isDisposed = true; + } + await base.DisposeAsync().ConfigureAwait(false); + } + } } diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipForwardReadEntry.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipForwardReadEntry.cs index 35b230ecffdd9c..0ba6c8d808d335 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipForwardReadEntry.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipForwardReadEntry.cs @@ -135,7 +135,7 @@ internal ZipForwardReadEntry( /// the data has been copied into a that remains valid /// independently. /// - public Stream? DataStream { get; internal set; } + public Stream? DataStream { get; } /// /// Extracts the entry to a file on disk. @@ -145,13 +145,31 @@ internal ZipForwardReadEntry( /// to overwrite an existing file; otherwise . /// /// is null or empty. + /// + /// The entry is a directory or has no data ( is ). + /// public void ExtractToFile(string destinationFileName, bool overwrite) { ArgumentException.ThrowIfNullOrEmpty(destinationFileName); + if (DataStream is null) + { + throw new InvalidOperationException(SR.ZipStreamEntryNoDataToExtract); + } + FileMode mode = overwrite ? FileMode.Create : FileMode.CreateNew; using FileStream fs = new(destinationFileName, mode, FileAccess.Write, FileShare.None); - DataStream?.CopyTo(fs); + DataStream.CopyTo(fs); + + try + { + File.SetLastWriteTime(destinationFileName, LastModified.DateTime); + } + catch + { + // Some platforms (e.g. Android) may not support setting the last write time. + // Extraction should not fail because of that. + } } /// @@ -163,20 +181,35 @@ public void ExtractToFile(string destinationFileName, bool overwrite) /// /// A token to monitor for cancellation requests. /// is null or empty. + /// + /// The entry is a directory or has no data ( is ). + /// public async Task ExtractToFileAsync(string destinationFileName, bool overwrite, CancellationToken cancellationToken = default) { ArgumentException.ThrowIfNullOrEmpty(destinationFileName); + if (DataStream is null) + { + throw new InvalidOperationException(SR.ZipStreamEntryNoDataToExtract); + } + FileMode mode = overwrite ? FileMode.Create : FileMode.CreateNew; FileStream fs = new(destinationFileName, mode, FileAccess.Write, FileShare.None, bufferSize: 0x1000, useAsync: true); await using (fs.ConfigureAwait(false)) { - if (DataStream is not null) - { - await DataStream.CopyToAsync(fs, cancellationToken).ConfigureAwait(false); - } + await DataStream.CopyToAsync(fs, cancellationToken).ConfigureAwait(false); + } + + try + { + File.SetLastWriteTime(destinationFileName, LastModified.DateTime); + } + catch + { + // Some platforms (e.g. Android) may not support setting the last write time. + // Extraction should not fail because of that. } } diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs index bddab5e14f6f46..33262d21ab3986 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs @@ -71,7 +71,10 @@ public ZipStreamReader(Stream stream, Encoding? entryNameEncoding, bool leaveOpe throw new ArgumentException(SR.NotSupported_UnreadableStream, nameof(stream)); } - _archiveStream = stream; + // ReadAheadStream makes non-seekable streams appear seekable so that + // DeflateStream.TryRewindStream can push back unconsumed input after + // decompression finishes. Already-seekable streams need no wrapper. + _archiveStream = stream.CanSeek ? stream : new ReadAheadStream(stream); _leaveOpen = leaveOpen; _entryNameEncoding = entryNameEncoding; } @@ -126,8 +129,10 @@ public ZipStreamReader(Stream stream, Encoding? entryNameEncoding, bool leaveOpe bool isEncrypted = (generalPurposeBitFlags & 1) != 0; + ZipCompressionMethod method = (ZipCompressionMethod)compressionMethod; + Stream? dataStream = CreateDataStream( - (ZipCompressionMethod)compressionMethod, compressedSize, uncompressedSize, + method, compressedSize, uncompressedSize, crc32, hasDataDescriptor, isEncrypted, out CrcValidatingReadStream? crcStream); Stream? originalDataStream = null; @@ -141,7 +146,7 @@ public ZipStreamReader(Stream stream, Encoding? entryNameEncoding, bool leaveOpe } ZipForwardReadEntry entry = new( - fullName, (ZipCompressionMethod)compressionMethod, lastModified, crc32, + fullName, method, lastModified, crc32, compressedSize, uncompressedSize, generalPurposeBitFlags, versionNeeded, hasDataDescriptor, dataStream); @@ -221,8 +226,10 @@ public ZipStreamReader(Stream stream, Encoding? entryNameEncoding, bool leaveOpe method, compressedSize, uncompressedSize, crc32, hasDataDescriptor, isEncrypted, out CrcValidatingReadStream? crcStream); + Stream? originalDataStream = null; if (copyData && dataStream is not null) { + originalDataStream = dataStream; MemoryStream ms = new(); await dataStream.CopyToAsync(ms, cancellationToken).ConfigureAwait(false); ms.Position = 0; @@ -239,6 +246,13 @@ public ZipStreamReader(Stream stream, Encoding? entryNameEncoding, bool leaveOpe await ReadDataDescriptorAsync(entry, crcStream, cancellationToken).ConfigureAwait(false); } + // Dispose the original decompression/CRC stream after copying (and after + // reading the data descriptor when applicable) to release inflater resources. + if (originalDataStream is not null) + { + await originalDataStream.DisposeAsync().ConfigureAwait(false); + } + if (!copyData) { _previousEntry = entry; diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_StreamEntryReadTests.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_StreamEntryReadTests.cs index ee2e1d6561d30c..63785e922c4398 100644 --- a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_StreamEntryReadTests.cs +++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_StreamEntryReadTests.cs @@ -33,9 +33,7 @@ public async Task Read_DeflateWithKnownSize_ReturnsDecompressedData(bool async) for (int i = 0; i < expectedContents.Length; i++) { - ZipForwardReadEntry? entry = async - ? await reader.GetNextEntryAsync() - : reader.GetNextEntry(); + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); Assert.NotNull(entry); Assert.NotNull(entry.DataStream); @@ -46,9 +44,7 @@ public async Task Read_DeflateWithKnownSize_ReturnsDecompressedData(bool async) Assert.Equal(expectedContents[i], decompressed); } - ZipForwardReadEntry? end = async - ? await reader.GetNextEntryAsync() - : reader.GetNextEntry(); + ZipForwardReadEntry? end = await GetNextEntry(reader, async); Assert.Null(end); } @@ -64,9 +60,7 @@ public async Task Read_StoredWithKnownSize_ReturnsUncompressedData(bool async) for (int i = 0; i < expectedContents.Length; i++) { - ZipForwardReadEntry? entry = async - ? await reader.GetNextEntryAsync() - : reader.GetNextEntry(); + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); Assert.NotNull(entry); Assert.NotNull(entry.DataStream); @@ -85,13 +79,12 @@ public async Task Read_DeflateWithDataDescriptor_ReturnsDecompressedData(bool as byte[][] expectedContents = [s_smallContent, s_mediumContent, s_largeContent]; using MemoryStream archiveStream = new(zipBytes); - using ZipStreamReader reader = new(archiveStream); + using WrappedStream nonSeekableStream = new(archiveStream, canRead: true, canWrite: false, canSeek: false); + using ZipStreamReader reader = new(nonSeekableStream); for (int i = 0; i < expectedContents.Length; i++) { - ZipForwardReadEntry? entry = async - ? await reader.GetNextEntryAsync() - : reader.GetNextEntry(); + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); Assert.NotNull(entry); Assert.NotNull(entry.DataStream); @@ -109,7 +102,8 @@ public async Task Read_StoredWithDataDescriptor_ThrowsNotSupported(bool async) byte[] zipBytes = CreateZipWithEntries(CompressionLevel.NoCompression, seekable: false); using MemoryStream archiveStream = new(zipBytes); - using ZipStreamReader reader = new(archiveStream); + using WrappedStream nonSeekableStream = new(archiveStream, canRead: true, canWrite: false, canSeek: false); + using ZipStreamReader reader = new(nonSeekableStream); if (async) { @@ -128,18 +122,15 @@ public async Task CopyData_PreservesEntryAfterAdvancing(bool async) byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); using MemoryStream archiveStream = new(zipBytes); - using ZipStreamReader reader = new(archiveStream); + using WrappedStream nonSeekableStream = new(archiveStream, canRead: true, canWrite: false, canSeek: false); + using ZipStreamReader reader = new(nonSeekableStream); - ZipForwardReadEntry? entry = async - ? await reader.GetNextEntryAsync(copyData: true) - : reader.GetNextEntry(copyData: true); + ZipForwardReadEntry? entry = await GetNextEntry(reader, async, copyData: true); Assert.NotNull(entry); Assert.NotNull(entry.DataStream); - ZipForwardReadEntry? next = async - ? await reader.GetNextEntryAsync() - : reader.GetNextEntry(); + ZipForwardReadEntry? next = await GetNextEntry(reader, async); Assert.NotNull(next); entry.DataStream.Position = 0; @@ -156,20 +147,13 @@ public async Task PartialRead_ThenGetNextEntry_AdvancesCorrectly(bool async) using MemoryStream archiveStream = new(zipBytes); using ZipStreamReader reader = new(archiveStream); - ZipForwardReadEntry? first = async - ? await reader.GetNextEntryAsync() - : reader.GetNextEntry(); + ZipForwardReadEntry? first = await GetNextEntry(reader, async); Assert.NotNull(first); byte[] partial = new byte[5]; - if (async) - await first.DataStream!.ReadAsync(partial); - else - first.DataStream!.Read(partial); + await ReadStream(first.DataStream!, partial, async); - ZipForwardReadEntry? second = async - ? await reader.GetNextEntryAsync() - : reader.GetNextEntry(); + ZipForwardReadEntry? second = await GetNextEntry(reader, async); Assert.NotNull(second); Assert.Equal("medium.bin", second.FullName); @@ -187,20 +171,13 @@ public async Task PartialRead_DataDescriptor_ThenGetNextEntry_AdvancesCorrectly( using MemoryStream archiveStream = new(zipBytes); using ZipStreamReader reader = new(archiveStream); - ZipForwardReadEntry? first = async - ? await reader.GetNextEntryAsync() - : reader.GetNextEntry(); + ZipForwardReadEntry? first = await GetNextEntry(reader, async); Assert.NotNull(first); byte[] partial = new byte[3]; - if (async) - await first.DataStream!.ReadAsync(partial); - else - first.DataStream!.Read(partial); + await ReadStream(first.DataStream!, partial, async); - ZipForwardReadEntry? second = async - ? await reader.GetNextEntryAsync() - : reader.GetNextEntry(); + ZipForwardReadEntry? second = await GetNextEntry(reader, async); Assert.NotNull(second); Assert.Equal("medium.bin", second.FullName); @@ -217,9 +194,7 @@ public async Task Deflate64Entry_ReturnsDecompressedData(bool async) using ZipStreamReader reader = new(ms); - ZipForwardReadEntry? entry = async - ? await reader.GetNextEntryAsync() - : reader.GetNextEntry(); + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); Assert.NotNull(entry); Assert.Equal(ZipCompressionMethod.Deflate64, entry.CompressionMethod); @@ -245,9 +220,7 @@ public async Task ZeroLengthEntry_HasNullDataStream(string entryName, bool expec ms.Position = 0; using ZipStreamReader reader = new(ms); - ZipForwardReadEntry? entry = async - ? await reader.GetNextEntryAsync() - : reader.GetNextEntry(); + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); Assert.NotNull(entry); Assert.Equal(entryName, entry.FullName); @@ -268,7 +241,7 @@ public async Task EncryptedEntry_ReportsIsEncrypted(bool async) bool foundUnencrypted = false; ZipForwardReadEntry? entry; - while ((entry = async ? await reader.GetNextEntryAsync() : reader.GetNextEntry()) is not null) + while ((entry = await GetNextEntry(reader, async)) is not null) { if (entry.IsEncrypted) foundEncrypted = true; @@ -304,22 +277,14 @@ public async Task Dispose_WhileEntryPartiallyRead_DoesNotThrow(bool async) using MemoryStream archiveStream = new(zipBytes); ZipStreamReader reader = new(archiveStream); - ZipForwardReadEntry? entry = async - ? await reader.GetNextEntryAsync() - : reader.GetNextEntry(); + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); Assert.NotNull(entry); byte[] partial = new byte[5]; - if (async) - await entry.DataStream!.ReadAsync(partial); - else - entry.DataStream!.Read(partial); + await ReadStream(entry.DataStream!, partial, async); - if (async) - await reader.DisposeAsync(); - else - reader.Dispose(); + await DisposeReader(reader, async); } [Theory] @@ -332,9 +297,7 @@ public async Task EmptyArchive_ReturnsNull(bool async) ms.Position = 0; using ZipStreamReader reader = new(ms); - ZipForwardReadEntry? entry = async - ? await reader.GetNextEntryAsync() - : reader.GetNextEntry(); + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); Assert.Null(entry); } @@ -348,10 +311,7 @@ public async Task LeaveOpen_DoesNotDisposeStream(bool async) using MemoryStream archiveStream = new(zipBytes); ZipStreamReader reader = new(archiveStream, leaveOpen: true); - if (async) - await reader.DisposeAsync(); - else - reader.Dispose(); + await DisposeReader(reader, async); Assert.True(archiveStream.CanRead); } @@ -369,9 +329,7 @@ public async Task Constructor_WithEncoding_ReadsEntryNames(bool async) ms.Position = 0; using ZipStreamReader reader = new(ms, entryNameEncoding: Encoding.UTF8, leaveOpen: true); - ZipForwardReadEntry? entry = async - ? await reader.GetNextEntryAsync() - : reader.GetNextEntry(); + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); Assert.NotNull(entry); Assert.Equal("hello.txt", entry.FullName); @@ -387,29 +345,21 @@ public async Task MultipleEntries_MixedSkipAndRead(bool async) using ZipStreamReader reader = new(archiveStream); // Skip first entry - ZipForwardReadEntry? first = async - ? await reader.GetNextEntryAsync() - : reader.GetNextEntry(); + ZipForwardReadEntry? first = await GetNextEntry(reader, async); Assert.NotNull(first); // Read second entry fully - ZipForwardReadEntry? second = async - ? await reader.GetNextEntryAsync() - : reader.GetNextEntry(); + ZipForwardReadEntry? second = await GetNextEntry(reader, async); Assert.NotNull(second); byte[] data = await ReadStreamFully(second.DataStream!, async); Assert.Equal(s_mediumContent, data); // Skip third entry - ZipForwardReadEntry? third = async - ? await reader.GetNextEntryAsync() - : reader.GetNextEntry(); + ZipForwardReadEntry? third = await GetNextEntry(reader, async); Assert.NotNull(third); // Confirm end - ZipForwardReadEntry? end = async - ? await reader.GetNextEntryAsync() - : reader.GetNextEntry(); + ZipForwardReadEntry? end = await GetNextEntry(reader, async); Assert.Null(end); } @@ -423,15 +373,10 @@ public async Task GetNextEntry_AfterDispose_ThrowsObjectDisposedException(bool a ZipStreamReader reader = new(archiveStream); // Read one entry to ensure the reader was functional. - ZipForwardReadEntry? entry = async - ? await reader.GetNextEntryAsync() - : reader.GetNextEntry(); + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); Assert.NotNull(entry); - if (async) - await reader.DisposeAsync(); - else - reader.Dispose(); + await DisposeReader(reader, async); if (async) { @@ -456,18 +401,14 @@ public async Task CopyData_WithDataDescriptor_PreservesEntryAfterAdvancing(bool // Read first entry with copyData: true — exercises the path that // eagerly decompresses, copies into a MemoryStream, then reads the // data descriptor to validate CRC. - ZipForwardReadEntry? first = async - ? await reader.GetNextEntryAsync(copyData: true) - : reader.GetNextEntry(copyData: true); + ZipForwardReadEntry? first = await GetNextEntry(reader, async, copyData: true); Assert.NotNull(first); Assert.NotNull(first.DataStream); // Advance to the next entry to confirm the stream position is correct // after consuming the data descriptor. - ZipForwardReadEntry? second = async - ? await reader.GetNextEntryAsync() - : reader.GetNextEntry(); + ZipForwardReadEntry? second = await GetNextEntry(reader, async); Assert.NotNull(second); Assert.Equal("medium.bin", second.FullName); @@ -481,55 +422,6 @@ public async Task CopyData_WithDataDescriptor_PreservesEntryAfterAdvancing(bool Assert.Equal(s_mediumContent, secondData); } - private static byte[] CreateZipWithEntries(CompressionLevel compressionLevel, bool seekable) - { - MemoryStream ms = new(); - - Stream writeStream = seekable - ? ms - : new WrappedStream(ms, canRead: true, canWrite: true, canSeek: false); - - using (ZipArchive archive = new(writeStream, ZipArchiveMode.Create, leaveOpen: true)) - { - AddEntry(archive, "small.txt", s_smallContent, compressionLevel); - AddEntry(archive, "medium.bin", s_mediumContent, compressionLevel); - AddEntry(archive, "large.bin", s_largeContent, compressionLevel); - } - - return ms.ToArray(); - } - - private static void AddEntry(ZipArchive archive, string name, byte[] contents, CompressionLevel level) - { - ZipArchiveEntry entry = archive.CreateEntry(name, level); - using Stream stream = entry.Open(); - stream.Write(contents); - } - - private static async Task ReadStreamFully(Stream stream, bool async) - { - using MemoryStream result = new(); - byte[] buffer = new byte[4096]; - - int bytesRead; - if (async) - { - while ((bytesRead = await stream.ReadAsync(buffer)) > 0) - { - result.Write(buffer, 0, bytesRead); - } - } - else - { - while ((bytesRead = stream.Read(buffer)) > 0) - { - result.Write(buffer, 0, bytesRead); - } - } - - return result.ToArray(); - } - [Theory] [MemberData(nameof(Get_Booleans_Data))] public async Task ExtractToFile_CreatesFileWithExpectedContent(bool async) @@ -539,9 +431,7 @@ public async Task ExtractToFile_CreatesFileWithExpectedContent(bool async) using MemoryStream archiveStream = new(zipBytes); using ZipStreamReader reader = new(archiveStream); - ZipForwardReadEntry? entry = async - ? await reader.GetNextEntryAsync() - : reader.GetNextEntry(); + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); Assert.NotNull(entry); Assert.Equal("small.txt", entry.FullName); @@ -549,10 +439,7 @@ public async Task ExtractToFile_CreatesFileWithExpectedContent(bool async) string tempPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); try { - if (async) - await entry.ExtractToFileAsync(tempPath, overwrite: true); - else - entry.ExtractToFile(tempPath, overwrite: true); + await ExtractEntryToFile(entry, tempPath, overwrite: true, async); byte[] written = File.ReadAllBytes(tempPath); Assert.Equal(s_smallContent, written); @@ -572,9 +459,7 @@ public async Task ExtractToFile_OverwriteTrue_ReplacesExistingFile(bool async) using MemoryStream archiveStream = new(zipBytes); using ZipStreamReader reader = new(archiveStream); - ZipForwardReadEntry? entry = async - ? await reader.GetNextEntryAsync() - : reader.GetNextEntry(); + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); Assert.NotNull(entry); @@ -584,10 +469,7 @@ public async Task ExtractToFile_OverwriteTrue_ReplacesExistingFile(bool async) // Create a pre-existing file with different content. File.WriteAllText(tempPath, "old content"); - if (async) - await entry.ExtractToFileAsync(tempPath, overwrite: true); - else - entry.ExtractToFile(tempPath, overwrite: true); + await ExtractEntryToFile(entry, tempPath, overwrite: true, async); byte[] written = File.ReadAllBytes(tempPath); Assert.Equal(s_smallContent, written); @@ -607,9 +489,7 @@ public async Task ExtractToFile_OverwriteFalse_ThrowsWhenFileExists(bool async) using MemoryStream archiveStream = new(zipBytes); using ZipStreamReader reader = new(archiveStream); - ZipForwardReadEntry? entry = async - ? await reader.GetNextEntryAsync() - : reader.GetNextEntry(); + ZipForwardReadEntry? entry = await GetNextEntry(reader, async); Assert.NotNull(entry); @@ -643,5 +523,90 @@ public void Constructor_UnreadableStream_ThrowsArgumentException() Assert.Throws("stream", () => new ZipStreamReader(unreadable)); } + + // ── Sync/async dispatch helpers ────────────────────────────────────── + + private static async ValueTask GetNextEntry( + ZipStreamReader reader, bool async, bool copyData = false) + { + return async + ? await reader.GetNextEntryAsync(copyData: copyData) + : reader.GetNextEntry(copyData: copyData); + } + + private static async Task ExtractEntryToFile( + ZipForwardReadEntry entry, string destinationFileName, bool overwrite, bool async) + { + if (async) + await entry.ExtractToFileAsync(destinationFileName, overwrite); + else + entry.ExtractToFile(destinationFileName, overwrite); + } + + private static async Task DisposeReader(ZipStreamReader reader, bool async) + { + if (async) + await reader.DisposeAsync(); + else + reader.Dispose(); + } + + private static async ValueTask ReadStream(Stream stream, byte[] buffer, bool async) + { + return async + ? await stream.ReadAsync(buffer) + : stream.Read(buffer); + } + + // ── Test data helpers ──────────────────────────────────────────────── + + private static byte[] CreateZipWithEntries(CompressionLevel compressionLevel, bool seekable) + { + MemoryStream ms = new(); + + Stream writeStream = seekable + ? ms + : new WrappedStream(ms, canRead: true, canWrite: true, canSeek: false); + + using (ZipArchive archive = new(writeStream, ZipArchiveMode.Create, leaveOpen: true)) + { + AddEntry(archive, "small.txt", s_smallContent, compressionLevel); + AddEntry(archive, "medium.bin", s_mediumContent, compressionLevel); + AddEntry(archive, "large.bin", s_largeContent, compressionLevel); + } + + return ms.ToArray(); + } + + private static void AddEntry(ZipArchive archive, string name, byte[] contents, CompressionLevel level) + { + ZipArchiveEntry entry = archive.CreateEntry(name, level); + using Stream stream = entry.Open(); + stream.Write(contents); + } + + private static async Task ReadStreamFully(Stream stream, bool async) + { + using MemoryStream result = new(); + byte[] buffer = new byte[4096]; + + int bytesRead; + if (async) + { + while ((bytesRead = await stream.ReadAsync(buffer)) > 0) + { + result.Write(buffer, 0, bytesRead); + } + } + else + { + while ((bytesRead = stream.Read(buffer)) > 0) + { + result.Write(buffer, 0, bytesRead); + } + } + + return result.ToArray(); + } } } From 8a33d4652605e89b65a96535180b0eb83165477e Mon Sep 17 00:00:00 2001 From: alinpahontu2912 Date: Tue, 31 Mar 2026 14:29:51 +0200 Subject: [PATCH 7/8] revert custom streams ops to fix failing tests --- .../src/System/IO/Compression/ZipCustomStreams.cs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs index 172a18b6ba467c..fb653fdb3f4cce 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs @@ -204,12 +204,14 @@ private void NotifyWrite() public override void Flush() { ThrowIfDisposed(); + ThrowIfCantWrite(); _baseStream.Flush(); } public override Task FlushAsync(CancellationToken cancellationToken) { ThrowIfDisposed(); + ThrowIfCantWrite(); return _baseStream.FlushAsync(cancellationToken); } @@ -442,6 +444,12 @@ public override void Flush() throw new NotSupportedException(SR.WritingNotSupported); } + public override Task FlushAsync(CancellationToken cancellationToken) + { + ThrowIfDisposed(); + throw new NotSupportedException(SR.WritingNotSupported); + } + // Close the stream for reading. Note that this does NOT close the superStream (since // the substream is just 'a chunk' of the super-stream protected override void Dispose(bool disposing) @@ -477,7 +485,7 @@ internal sealed class CheckSumAndSizeWriteStream : Stream // parameters are initialPosition, currentPosition, checkSum, baseBaseStream, zipArchiveEntry and onClose handler private readonly Action _saveCrcAndSizes; - // parameters to saveCrcAndSizes are + // parameters to saveCrcAndSize are // initialPosition (initialPosition in baseBaseStream), // currentPosition (in this CheckSumAndSizeWriteStream), // checkSum (of data passed into this CheckSumAndSizeWriteStream), @@ -989,12 +997,13 @@ public override void Write(byte[] buffer, int offset, int count) public override void Flush() { ThrowIfDisposed(); + throw new NotSupportedException(SR.WritingNotSupported); } public override Task FlushAsync(CancellationToken cancellationToken) { ThrowIfDisposed(); - return Task.CompletedTask; + throw new NotSupportedException(SR.WritingNotSupported); } public override long Seek(long offset, SeekOrigin origin) From 384add2cc2e521f8e2e8edf5da893b843fd0c858 Mon Sep 17 00:00:00 2001 From: alinpahontu2912 Date: Tue, 7 Apr 2026 14:27:14 +0200 Subject: [PATCH 8/8] address copilot feedback --- .../src/Resources/Strings.resx | 3 + .../System/IO/Compression/ZipCustomStreams.cs | 38 ++++------ .../IO/Compression/ZipForwardReadEntry.cs | 21 +++--- .../System/IO/Compression/ZipStreamReader.cs | 72 ++++++++++++++++--- 4 files changed, 92 insertions(+), 42 deletions(-) diff --git a/src/libraries/System.IO.Compression/src/Resources/Strings.resx b/src/libraries/System.IO.Compression/src/Resources/Strings.resx index aebc0c92634465..090c8bccff69d6 100644 --- a/src/libraries/System.IO.Compression/src/Resources/Strings.resx +++ b/src/libraries/System.IO.Compression/src/Resources/Strings.resx @@ -386,4 +386,7 @@ Cannot extract a directory entry or an entry with no data to a file. Check IsDirectory and DataStream before calling ExtractToFile. + + The ZIP archive contains an invalid local file header signature. + diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs index fb653fdb3f4cce..9dc4d73a4c496d 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs @@ -485,7 +485,7 @@ internal sealed class CheckSumAndSizeWriteStream : Stream // parameters are initialPosition, currentPosition, checkSum, baseBaseStream, zipArchiveEntry and onClose handler private readonly Action _saveCrcAndSizes; - // parameters to saveCrcAndSize are + // parameters to saveCrcAndSizes are // initialPosition (initialPosition in baseBaseStream), // currentPosition (in this CheckSumAndSizeWriteStream), // checkSum (of data passed into this CheckSumAndSizeWriteStream), @@ -1213,22 +1213,26 @@ public override long Seek(long offset, SeekOrigin origin) { int rewindBytes = checked((int)(-offset)); - // Merge any unread pushback bytes back into history before - // processing a new seek, so they are not silently lost. - if (_pushbackCount > 0) + if (rewindBytes > _historyCount) { - RestorePushbackToHistory(); + throw new IOException(SR.IO_SeekBeforeBegin); } - if (rewindBytes > _historyCount) + // Create new pushback by prepending rewound history bytes to any + // existing unread pushback. This preserves bytes that haven't been + // consumed yet (from a previous seek) so they are not lost. + int existingPushback = _pushbackCount; + byte[] newPushback = new byte[rewindBytes + existingPushback]; + Array.Copy(_history, _historyCount - rewindBytes, newPushback, 0, rewindBytes); + + if (existingPushback > 0) { - throw new IOException(SR.IO_SeekBeforeBegin); + Array.Copy(_pushback!, _pushbackOffset, newPushback, rewindBytes, existingPushback); } - _pushback = new byte[rewindBytes]; - Array.Copy(_history, _historyCount - rewindBytes, _pushback, 0, rewindBytes); + _pushback = newPushback; _pushbackOffset = 0; - _pushbackCount = rewindBytes; + _pushbackCount = newPushback.Length; _historyCount -= rewindBytes; _position -= rewindBytes; @@ -1238,20 +1242,6 @@ public override long Seek(long offset, SeekOrigin origin) throw new NotSupportedException(); } - /// - /// Merges any remaining unread pushback bytes back into history so they - /// are not lost when a new seek is requested. - /// - private void RestorePushbackToHistory() - { - if (_pushbackCount > 0) - { - RecordHistory(_pushback.AsSpan(_pushbackOffset, _pushbackCount)); - _pushbackCount = 0; - _pushback = null; - } - } - private void RecordHistory(ReadOnlySpan data) { if (data.Length >= _history.Length) diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipForwardReadEntry.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipForwardReadEntry.cs index 0ba6c8d808d335..03fdf117b89d42 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipForwardReadEntry.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipForwardReadEntry.cs @@ -144,22 +144,23 @@ internal ZipForwardReadEntry( /// /// to overwrite an existing file; otherwise . /// - /// is null or empty. + /// is . + /// is empty. /// - /// The entry is a directory or has no data ( is ). + /// The entry is a directory ( is ). /// public void ExtractToFile(string destinationFileName, bool overwrite) { ArgumentException.ThrowIfNullOrEmpty(destinationFileName); - if (DataStream is null) + if (IsDirectory) { throw new InvalidOperationException(SR.ZipStreamEntryNoDataToExtract); } FileMode mode = overwrite ? FileMode.Create : FileMode.CreateNew; using FileStream fs = new(destinationFileName, mode, FileAccess.Write, FileShare.None); - DataStream.CopyTo(fs); + DataStream?.CopyTo(fs); try { @@ -180,16 +181,17 @@ public void ExtractToFile(string destinationFileName, bool overwrite) /// to overwrite an existing file; otherwise . /// /// A token to monitor for cancellation requests. - /// is null or empty. + /// is . + /// is empty. /// - /// The entry is a directory or has no data ( is ). + /// The entry is a directory ( is ). /// public async Task ExtractToFileAsync(string destinationFileName, bool overwrite, CancellationToken cancellationToken = default) { ArgumentException.ThrowIfNullOrEmpty(destinationFileName); - if (DataStream is null) + if (IsDirectory) { throw new InvalidOperationException(SR.ZipStreamEntryNoDataToExtract); } @@ -199,7 +201,10 @@ public async Task ExtractToFileAsync(string destinationFileName, bool overwrite, bufferSize: 0x1000, useAsync: true); await using (fs.ConfigureAwait(false)) { - await DataStream.CopyToAsync(fs, cancellationToken).ConfigureAwait(false); + if (DataStream is not null) + { + await DataStream.CopyToAsync(fs, cancellationToken).ConfigureAwait(false); + } } try diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs index 33262d21ab3986..287eb0971625a7 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipStreamReader.cs @@ -114,8 +114,13 @@ public ZipStreamReader(Stream stream, Encoding? entryNameEncoding, bool leaveOpe if (!headerBytes.AsSpan().StartsWith(ZipLocalFileHeader.SignatureConstantBytes)) { - _reachedEnd = true; - return null; + if (IsKnownEndOfEntriesSignature(headerBytes)) + { + _reachedEnd = true; + return null; + } + + throw new InvalidDataException(SR.ZipStreamInvalidLocalFileHeader); } int dynamicLength = GetDynamicHeaderLength(headerBytes); @@ -206,8 +211,13 @@ public ZipStreamReader(Stream stream, Encoding? entryNameEncoding, bool leaveOpe if (!headerBytes.AsSpan().StartsWith(ZipLocalFileHeader.SignatureConstantBytes)) { - _reachedEnd = true; - return null; + if (IsKnownEndOfEntriesSignature(headerBytes)) + { + _reachedEnd = true; + return null; + } + + throw new InvalidDataException(SR.ZipStreamInvalidLocalFileHeader); } int dynamicLength = GetDynamicHeaderLength(headerBytes); @@ -452,7 +462,7 @@ private void ParseLocalFileHeader( private void ReadDataDescriptor(ZipForwardReadEntry entry, CrcValidatingReadStream crcStream) { - byte[] buffer = new byte[24]; + byte[] buffer = new byte[28]; // Max: sig(4) + crc(4) + sizes64(16) + peek(4) _archiveStream.ReadExactly(buffer.AsSpan(0, 4)); int offset = 0; @@ -465,9 +475,17 @@ private void ReadDataDescriptor(ZipForwardReadEntry entry, CrcValidatingReadStre totalRead = 8; } - bool isZip64 = entry.VersionNeeded >= (ushort)ZipVersionNeededValues.Zip64; + // Read 20 bytes: up to 16 for sizes (64-bit) + 4 to peek at the next signature. + _archiveStream.ReadExactly(buffer.AsSpan(totalRead, 20)); + + // Probe: if 4 bytes after 32-bit sizes form a known ZIP signature, + // the descriptor uses 32-bit sizes; otherwise assume 64-bit. + bool isZip64 = !IsKnownZipSignature(buffer.AsSpan(totalRead + 8, 4)); int sizesBytes = isZip64 ? 16 : 8; - _archiveStream.ReadExactly(buffer.AsSpan(totalRead, sizesBytes)); + + // Seek back over the bytes we read past the actual sizes. + int overRead = 20 - sizesBytes; + _archiveStream.Seek(-overRead, SeekOrigin.Current); ParseDataDescriptor(buffer, offset, isZip64, entry, crcStream); } @@ -475,7 +493,7 @@ private void ReadDataDescriptor(ZipForwardReadEntry entry, CrcValidatingReadStre private async ValueTask ReadDataDescriptorAsync( ZipForwardReadEntry entry, CrcValidatingReadStream crcStream, CancellationToken cancellationToken) { - byte[] buffer = new byte[24]; + byte[] buffer = new byte[28]; // Max: sig(4) + crc(4) + sizes64(16) + peek(4) await _archiveStream.ReadExactlyAsync(buffer.AsMemory(0, 4), cancellationToken).ConfigureAwait(false); int offset = 0; @@ -488,9 +506,17 @@ private async ValueTask ReadDataDescriptorAsync( totalRead = 8; } - bool isZip64 = entry.VersionNeeded >= (ushort)ZipVersionNeededValues.Zip64; + // Read 20 bytes: up to 16 for sizes (64-bit) + 4 to peek at the next signature. + await _archiveStream.ReadExactlyAsync(buffer.AsMemory(totalRead, 20), cancellationToken).ConfigureAwait(false); + + // Probe: if 4 bytes after 32-bit sizes form a known ZIP signature, + // the descriptor uses 32-bit sizes; otherwise assume 64-bit. + bool isZip64 = !IsKnownZipSignature(buffer.AsSpan(totalRead + 8, 4)); int sizesBytes = isZip64 ? 16 : 8; - await _archiveStream.ReadExactlyAsync(buffer.AsMemory(totalRead, sizesBytes), cancellationToken).ConfigureAwait(false); + + // Seek back over the bytes we read past the actual sizes. + int overRead = 20 - sizesBytes; + _archiveStream.Seek(-overRead, SeekOrigin.Current); ParseDataDescriptor(buffer, offset, isZip64, entry, crcStream); } @@ -524,6 +550,32 @@ private static void ParseDataDescriptor( } } + /// + /// Returns when the first four bytes of + /// match a known end-of-entries signature + /// (central directory header or end-of-central-directory). + /// + private static bool IsKnownEndOfEntriesSignature(ReadOnlySpan headerBytes) + { + ReadOnlySpan sig = headerBytes[..4]; + return sig.SequenceEqual(ZipCentralDirectoryFileHeader.SignatureConstantBytes) + || sig.SequenceEqual(ZipEndOfCentralDirectoryBlock.SignatureConstantBytes) + || sig.SequenceEqual(Zip64EndOfCentralDirectoryRecord.SignatureConstantBytes); + } + + /// + /// Returns when starts with any + /// recognized ZIP structure signature (local header, central directory, EOCD, or ZIP64 EOCD). + /// Used to probe the data descriptor format by peeking at the bytes that follow. + /// + private static bool IsKnownZipSignature(ReadOnlySpan bytes) + { + return bytes.StartsWith(ZipLocalFileHeader.SignatureConstantBytes) + || bytes.StartsWith(ZipCentralDirectoryFileHeader.SignatureConstantBytes) + || bytes.StartsWith(ZipEndOfCentralDirectoryBlock.SignatureConstantBytes) + || bytes.StartsWith(Zip64EndOfCentralDirectoryRecord.SignatureConstantBytes); + } + public void Dispose() { if (!_isDisposed)