From 60deae6ed600dc6efc24eb6d6f1acadbd8b7d169 Mon Sep 17 00:00:00 2001 From: alinpahontu2912 Date: Tue, 7 Apr 2026 16:01:24 +0200 Subject: [PATCH 1/6] Add ZipArchiveMode.ForwardRead for forward-only sequential ZIP reading Adds a new ForwardRead mode to ZipArchive that enables forward-only sequential reading of ZIP entries from non-seekable streams, using local file headers instead of the central directory. Changes: - ZipArchiveMode.cs: Add ForwardRead = 3 enum value - ZipCustomStreams.cs: Add BoundedReadOnlyStream and ReadAheadStream helper stream classes - ZipArchive.cs: Add GetNextEntry()/GetNextEntryAsync() methods, ForwardRead constructor case, ValidateMode/DecideArchiveStream support, data descriptor parsing, and property guards - ZipArchive.Async.cs: Add ForwardRead cases to CreateAsync and DisposeAsyncCore - ZipArchiveEntry.cs: Add forward-read constructor, ForwardReadDataStream property, UpdateFromDataDescriptor method, OpenInForwardReadMode, and property setter guards - Strings.resx: Add ForwardRead error message strings - ref/System.IO.Compression.cs: Add public API surface - Tests: Add comprehensive zip_ForwardReadTests covering deflate, stored, data descriptors, non-seekable streams, empty archives, partial reads, error cases, and async operations Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../ref/System.IO.Compression.cs | 3 + .../src/Resources/Strings.resx | 15 + .../System/IO/Compression/ZipArchive.Async.cs | 5 + .../src/System/IO/Compression/ZipArchive.cs | 404 ++++++++++++++- .../System/IO/Compression/ZipArchiveEntry.cs | 81 +++ .../System/IO/Compression/ZipArchiveMode.cs | 8 +- .../System/IO/Compression/ZipCustomStreams.cs | 327 ++++++++++++ .../tests/System.IO.Compression.Tests.csproj | 1 + .../tests/ZipArchive/zip_ForwardReadTests.cs | 482 ++++++++++++++++++ 9 files changed, 1322 insertions(+), 4 deletions(-) create mode 100644 src/libraries/System.IO.Compression/tests/ZipArchive/zip_ForwardReadTests.cs diff --git a/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs b/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs index 564bbc97eb7511..437d1109de5e37 100644 --- a/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs +++ b/src/libraries/System.IO.Compression/ref/System.IO.Compression.cs @@ -108,6 +108,8 @@ protected virtual void Dispose(bool disposing) { } public System.Threading.Tasks.ValueTask DisposeAsync() { throw null; } protected virtual System.Threading.Tasks.ValueTask DisposeAsyncCore() { throw null; } public System.IO.Compression.ZipArchiveEntry? GetEntry(string entryName) { throw null; } + public System.IO.Compression.ZipArchiveEntry? GetNextEntry() { throw null; } + public System.Threading.Tasks.ValueTask GetNextEntryAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } } public partial class ZipArchiveEntry { @@ -137,6 +139,7 @@ public enum ZipArchiveMode Read = 0, Create = 1, Update = 2, + ForwardRead = 3, } public enum ZipCompressionMethod { diff --git a/src/libraries/System.IO.Compression/src/Resources/Strings.resx b/src/libraries/System.IO.Compression/src/Resources/Strings.resx index bbb10afbcf342a..5fc55777154b61 100644 --- a/src/libraries/System.IO.Compression/src/Resources/Strings.resx +++ b/src/libraries/System.IO.Compression/src/Resources/Strings.resx @@ -377,4 +377,19 @@ The decompressed data length does not match the expected value from the archive. + + This operation is not supported in ForwardRead mode. + + + GetNextEntry is only supported when the archive is opened in ForwardRead mode. + + + Stored entries with data descriptors cannot be read in ForwardRead mode because the entry boundary cannot be determined. + + + Encrypted entries with data descriptors cannot be read in ForwardRead mode. + + + The archive stream contains an invalid local file header. + diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.Async.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.Async.cs index 20d5f40735cbc2..cae68745faa1f5 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.Async.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.Async.cs @@ -98,6 +98,9 @@ public static async Task CreateAsync(Stream stream, ZipArchiveMode m // directory up-front await zipArchive.EnsureCentralDirectoryReadAsync(cancellationToken).ConfigureAwait(false); break; + case ZipArchiveMode.ForwardRead: + zipArchive._readEntries = true; + break; case ZipArchiveMode.Update: default: Debug.Assert(mode == ZipArchiveMode.Update); @@ -147,6 +150,8 @@ protected virtual async ValueTask DisposeAsyncCore() { case ZipArchiveMode.Read: break; + case ZipArchiveMode.ForwardRead: + break; case ZipArchiveMode.Create: await WriteFileAsync().ConfigureAwait(false); break; diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs index 70cd68b810af37..fb81ab46f6d951 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs @@ -5,12 +5,15 @@ // Zip Spec here: http://www.pkware.com/documents/casestudies/APPNOTE.TXT using System.Buffers; +using System.Buffers.Binary; using System.Collections.Generic; using System.Collections.ObjectModel; using System.ComponentModel; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Text; +using System.Threading; +using System.Threading.Tasks; namespace System.IO.Compression { @@ -34,6 +37,8 @@ public partial class ZipArchive : IDisposable, IAsyncDisposable private byte[] _archiveComment; private Encoding? _entryNameAndCommentEncoding; private long _firstDeletedEntryOffset; + private ZipArchiveEntry? _forwardReadPreviousEntry; + private bool _forwardReadReachedEnd; #if DEBUG_FORCE_ZIP64 public bool _forceZip64; @@ -150,6 +155,9 @@ public ZipArchive(Stream stream, ZipArchiveMode mode, bool leaveOpen, Encoding? case ZipArchiveMode.Read: ReadEndOfCentralDirectory(); break; + case ZipArchiveMode.ForwardRead: + _readEntries = true; + break; case ZipArchiveMode.Update: default: Debug.Assert(mode == ZipArchiveMode.Update); @@ -231,6 +239,8 @@ public ReadOnlyCollection Entries { if (_mode == ZipArchiveMode.Create) throw new NotSupportedException(SR.EntriesInCreateMode); + if (_mode == ZipArchiveMode.ForwardRead) + throw new NotSupportedException(SR.ForwardReadOnly); ThrowIfDisposed(); @@ -299,6 +309,8 @@ protected virtual void Dispose(bool disposing) { case ZipArchiveMode.Read: break; + case ZipArchiveMode.ForwardRead: + break; case ZipArchiveMode.Create: WriteFile(); break; @@ -349,12 +361,388 @@ protected virtual void Dispose(bool disposing) if (_mode == ZipArchiveMode.Create) throw new NotSupportedException(SR.EntriesInCreateMode); + if (_mode == ZipArchiveMode.ForwardRead) + throw new NotSupportedException(SR.ForwardReadOnly); EnsureCentralDirectoryRead(); _entriesDictionary.TryGetValue(entryName, out ZipArchiveEntry? result); return result; } + /// + /// Reads the next entry from the archive when opened in mode. + /// + /// The next in the archive, or if no more entries exist. + /// The archive was not opened in mode. + /// The archive has been disposed. + /// The archive contains invalid data. + public ZipArchiveEntry? GetNextEntry() + { + ThrowIfDisposed(); + if (_mode != ZipArchiveMode.ForwardRead) + throw new NotSupportedException(SR.GetNextEntryNotInForwardRead); + + if (_forwardReadReachedEnd) + return null; + + DrainPreviousEntry(); + return ReadNextLocalFileHeader(); + } + + /// + /// Asynchronously reads the next entry from the archive when opened in mode. + /// + /// A cancellation token to observe. + /// A representing the next entry, or if no more entries exist. + /// The archive was not opened in mode. + /// The archive has been disposed. + /// The archive contains invalid data. + public ValueTask GetNextEntryAsync(CancellationToken cancellationToken = default) + { + ThrowIfDisposed(); + if (_mode != ZipArchiveMode.ForwardRead) + throw new NotSupportedException(SR.GetNextEntryNotInForwardRead); + + cancellationToken.ThrowIfCancellationRequested(); + + if (_forwardReadReachedEnd) + return new ValueTask((ZipArchiveEntry?)null); + + return GetNextEntryAsyncCore(cancellationToken); + } + + private async ValueTask GetNextEntryAsyncCore(CancellationToken cancellationToken) + { + await DrainPreviousEntryAsync(cancellationToken).ConfigureAwait(false); + return await ReadNextLocalFileHeaderAsync(cancellationToken).ConfigureAwait(false); + } + + private void DrainPreviousEntry() + { + if (_forwardReadPreviousEntry is not { } prev) + return; + + Stream? dataStream = prev.ForwardReadDataStream; + if (dataStream != null) + { + dataStream.CopyTo(Stream.Null); + dataStream.Dispose(); + } + + if (prev.HasDataDescriptor) + { + ReadDataDescriptor(prev); + } + + _forwardReadPreviousEntry = null; + } + + private async ValueTask DrainPreviousEntryAsync(CancellationToken cancellationToken) + { + if (_forwardReadPreviousEntry is not { } prev) + return; + + Stream? dataStream = prev.ForwardReadDataStream; + if (dataStream != null) + { + await dataStream.CopyToAsync(Stream.Null, cancellationToken).ConfigureAwait(false); + await dataStream.DisposeAsync().ConfigureAwait(false); + } + + if (prev.HasDataDescriptor) + { + ReadDataDescriptor(prev); + } + + _forwardReadPreviousEntry = null; + } + + private void ReadDataDescriptor(ZipArchiveEntry entry) + { + // Data descriptor can be: + // Without signature: CRC32(4) + CompressedSize(4) + UncompressedSize(4) = 12 bytes + // With signature: Sig(4) + CRC32(4) + CompressedSize(4) + UncompressedSize(4) = 16 bytes + // Or Zip64: Sig(4) + CRC32(4) + CompressedSize(8) + UncompressedSize(8) = 24 bytes + + // Read first 4 bytes to check for signature + Span sigBuf = stackalloc byte[4]; + _archiveStream.ReadExactly(sigBuf); + + uint possibleSig = BinaryPrimitives.ReadUInt32LittleEndian(sigBuf); + bool hasSignature = possibleSig == 0x08074B50; + + if (hasSignature) + { + // Probe whether this is a 32-bit or 64-bit descriptor. + // Read CRC32(4) + field1(4) + field2(4) + field3(4) = 16 more bytes + Span probe = stackalloc byte[20]; + _archiveStream.ReadExactly(probe); + + uint crc32 = BinaryPrimitives.ReadUInt32LittleEndian(probe); + + // Try 32-bit interpretation first: sizes are 4 bytes each + // After 32-bit descriptor, next 4 bytes would be at probe[12..16] + uint nextSig32 = BinaryPrimitives.ReadUInt32LittleEndian(probe.Slice(12)); + + if (IsKnownZipSignature(nextSig32)) + { + // 32-bit descriptor + uint compressedSize32 = BinaryPrimitives.ReadUInt32LittleEndian(probe.Slice(4)); + uint uncompressedSize32 = BinaryPrimitives.ReadUInt32LittleEndian(probe.Slice(8)); + entry.UpdateFromDataDescriptor(crc32, compressedSize32, uncompressedSize32); + // Seek back the 8 over-read bytes (nextSig32 + 4 more bytes) + _archiveStream.Seek(-8, SeekOrigin.Current); + } + else + { + // 64-bit descriptor: read 4 more bytes to complete it + Span extra = stackalloc byte[4]; + _archiveStream.ReadExactly(extra); + + long compressedSize64 = BinaryPrimitives.ReadInt64LittleEndian(probe.Slice(4)); + long uncompressedSize64 = BinaryPrimitives.ReadInt64LittleEndian(probe.Slice(12)); + // extra contains the 4 bytes we read that aren't part of sizes + // Actually: with signature, zip64 is: sig(4) + crc(4) + compsize(8) + uncompsize(8) = 24 + // We read sig(4) already, then probe(20) = crc(4) + comp(8) + uncomp first 4 + // Then extra(4) = uncomp last 4 + // Reconstruct uncompressedSize64 from probe[12..16] and extra[0..4] + Span uncomp64Buf = stackalloc byte[8]; + probe.Slice(12, 4).CopyTo(uncomp64Buf); + extra.CopyTo(uncomp64Buf.Slice(4)); + uncompressedSize64 = BinaryPrimitives.ReadInt64LittleEndian(uncomp64Buf); + + entry.UpdateFromDataDescriptor(crc32, compressedSize64, uncompressedSize64); + } + } + else + { + // No signature - first 4 bytes are CRC32 + // Read the remaining 8 bytes (compressedSize + uncompressedSize as 32-bit) + // But could be 64-bit: try 32-bit first, probe next 4 bytes + Span rest = stackalloc byte[12]; + _archiveStream.ReadExactly(rest); + + uint crc32 = possibleSig; + uint nextSig = BinaryPrimitives.ReadUInt32LittleEndian(rest.Slice(8)); + + if (IsKnownZipSignature(nextSig)) + { + // 32-bit, no signature + uint compressedSize32 = BinaryPrimitives.ReadUInt32LittleEndian(rest); + uint uncompressedSize32 = BinaryPrimitives.ReadUInt32LittleEndian(rest.Slice(4)); + entry.UpdateFromDataDescriptor(crc32, compressedSize32, uncompressedSize32); + // Seek back the 4 over-read bytes + _archiveStream.Seek(-4, SeekOrigin.Current); + } + else + { + // 64-bit, no signature + Span extra = stackalloc byte[4]; + _archiveStream.ReadExactly(extra); + + long compressedSize64 = BinaryPrimitives.ReadInt64LittleEndian(rest); + Span uncomp64Buf = stackalloc byte[8]; + rest.Slice(8, 4).CopyTo(uncomp64Buf); + extra.CopyTo(uncomp64Buf.Slice(4)); + long uncompressedSize64 = BinaryPrimitives.ReadInt64LittleEndian(uncomp64Buf); + + entry.UpdateFromDataDescriptor(crc32, compressedSize64, uncompressedSize64); + } + } + } + + private static bool IsKnownZipSignature(uint sig) + { + return sig == 0x04034B50 // Local file header + || sig == 0x02014B50 // Central directory + || sig == 0x06054B50 // EOCD + || sig == 0x06064B50; // Zip64 EOCD + } + + private ZipArchiveEntry? ReadNextLocalFileHeader() + { + const int HeaderSize = ZipLocalFileHeader.SizeOfLocalHeader; + Span header = stackalloc byte[HeaderSize]; + + int bytesRead = _archiveStream.ReadAtLeast(header, HeaderSize, throwOnEndOfStream: false); + if (bytesRead < HeaderSize) + { + _forwardReadReachedEnd = true; + return null; + } + + // Check signature + if (!header.Slice(0, 4).SequenceEqual(ZipLocalFileHeader.SignatureConstantBytes)) + { + uint sig = BinaryPrimitives.ReadUInt32LittleEndian(header); + if (IsKnownZipSignature(sig)) + { + _forwardReadReachedEnd = true; + return null; + } + throw new InvalidDataException(SR.ForwardReadInvalidLocalFileHeader); + } + + return ParseLocalFileHeader(header); + } + + private async ValueTask ReadNextLocalFileHeaderAsync(CancellationToken cancellationToken) + { + const int HeaderSize = ZipLocalFileHeader.SizeOfLocalHeader; + byte[] header = new byte[HeaderSize]; + + int totalRead = 0; + while (totalRead < HeaderSize) + { + int read = await _archiveStream.ReadAsync(header.AsMemory(totalRead, HeaderSize - totalRead), cancellationToken).ConfigureAwait(false); + if (read == 0) + break; + totalRead += read; + } + + if (totalRead < HeaderSize) + { + _forwardReadReachedEnd = true; + return null; + } + + if (!header.AsSpan(0, 4).SequenceEqual(ZipLocalFileHeader.SignatureConstantBytes)) + { + uint sig = BinaryPrimitives.ReadUInt32LittleEndian(header); + if (IsKnownZipSignature(sig)) + { + _forwardReadReachedEnd = true; + return null; + } + throw new InvalidDataException(SR.ForwardReadInvalidLocalFileHeader); + } + + return ParseLocalFileHeader(header); + } + + private ZipArchiveEntry ParseLocalFileHeader(ReadOnlySpan header) + { + ushort versionNeeded = BinaryPrimitives.ReadUInt16LittleEndian(header.Slice(ZipLocalFileHeader.FieldLocations.VersionNeededToExtract)); + ushort generalPurposeBitFlags = BinaryPrimitives.ReadUInt16LittleEndian(header.Slice(ZipLocalFileHeader.FieldLocations.GeneralPurposeBitFlags)); + ushort compressionMethodValue = BinaryPrimitives.ReadUInt16LittleEndian(header.Slice(ZipLocalFileHeader.FieldLocations.CompressionMethod)); + uint lastModified = BinaryPrimitives.ReadUInt32LittleEndian(header.Slice(ZipLocalFileHeader.FieldLocations.LastModified)); + uint crc32 = BinaryPrimitives.ReadUInt32LittleEndian(header.Slice(ZipLocalFileHeader.FieldLocations.Crc32)); + uint compressedSizeSmall = BinaryPrimitives.ReadUInt32LittleEndian(header.Slice(ZipLocalFileHeader.FieldLocations.CompressedSize)); + uint uncompressedSizeSmall = BinaryPrimitives.ReadUInt32LittleEndian(header.Slice(ZipLocalFileHeader.FieldLocations.UncompressedSize)); + ushort filenameLength = BinaryPrimitives.ReadUInt16LittleEndian(header.Slice(ZipLocalFileHeader.FieldLocations.FilenameLength)); + ushort extraFieldLength = BinaryPrimitives.ReadUInt16LittleEndian(header.Slice(ZipLocalFileHeader.FieldLocations.ExtraFieldLength)); + + // Read filename + byte[] filenameBytes = new byte[filenameLength]; + if (filenameLength > 0) + _archiveStream.ReadExactly(filenameBytes); + + // Read extra field + byte[] extraFieldBytes = new byte[extraFieldLength]; + if (extraFieldLength > 0) + _archiveStream.ReadExactly(extraFieldBytes); + + long compressedSize = compressedSizeSmall; + long uncompressedSize = uncompressedSizeSmall; + + // Handle Zip64 extra field + if (compressedSizeSmall == ZipHelper.Mask32Bit || uncompressedSizeSmall == ZipHelper.Mask32Bit) + { + Zip64ExtraField zip64 = Zip64ExtraField.GetJustZip64Block(extraFieldBytes, + readUncompressedSize: uncompressedSizeSmall == ZipHelper.Mask32Bit, + readCompressedSize: compressedSizeSmall == ZipHelper.Mask32Bit, + readLocalHeaderOffset: false, + readStartDiskNumber: false); + + if (zip64.UncompressedSize.HasValue) + uncompressedSize = zip64.UncompressedSize.Value; + if (zip64.CompressedSize.HasValue) + compressedSize = zip64.CompressedSize.Value; + } + + bool hasDataDescriptor = (generalPurposeBitFlags & (ushort)ZipArchiveEntry.BitFlagValues.DataDescriptor) != 0; + bool isEncrypted = (generalPurposeBitFlags & (ushort)ZipArchiveEntry.BitFlagValues.IsEncrypted) != 0; + ZipCompressionMethod compressionMethod = (ZipCompressionMethod)compressionMethodValue; + + // Decode entry name + bool isUtf8 = (generalPurposeBitFlags & (ushort)ZipArchiveEntry.BitFlagValues.UnicodeFileNameAndComment) != 0; + Encoding nameEncoding = isUtf8 ? Encoding.UTF8 : (EntryNameAndCommentEncoding ?? Encoding.UTF8); + string fullName = nameEncoding.GetString(filenameBytes); + + DateTimeOffset lastModifiedDto = new DateTimeOffset(ZipHelper.DosTimeToDateTime(lastModified)); + + // Handle unsupported combinations + if (hasDataDescriptor) + { + if (compressionMethod == ZipCompressionMethod.Stored) + throw new NotSupportedException(SR.ForwardReadStoredDataDescriptorNotSupported); + if (isEncrypted) + throw new NotSupportedException(SR.ForwardReadEncryptedDataDescriptorNotSupported); + } + + // Build the data stream + Stream? dataStream = null; + + bool isDirectory = fullName.Length > 0 && + (fullName[^1] == '/' || fullName[^1] == '\\'); + bool isEmptyEntry = !hasDataDescriptor && compressedSize == 0 && uncompressedSize == 0; + + if (!isDirectory && !isEmptyEntry) + { + if (hasDataDescriptor) + { + // Data descriptor: unknown size, let DeflateStream detect end + Stream decompressor; + if (compressionMethod == ZipCompressionMethod.Deflate) + { + decompressor = new DeflateStream(_archiveStream, CompressionMode.Decompress, leaveOpen: true); + } + else if (compressionMethod == ZipCompressionMethod.Deflate64) + { + decompressor = new DeflateManagedStream(_archiveStream, ZipCompressionMethod.Deflate64, -1); + } + else + { + // Should not reach here (stored with DD is thrown above) + decompressor = _archiveStream; + } + dataStream = new CrcValidatingReadStream(decompressor, expectedCrc: 0, expectedLength: long.MaxValue); + } + else if (isEncrypted) + { + // Encrypted without data descriptor: return bounded raw stream (no decryption) + dataStream = new BoundedReadOnlyStream(_archiveStream, compressedSize); + } + else + { + // Known size, not encrypted + Stream bounded = new BoundedReadOnlyStream(_archiveStream, compressedSize); + Stream decompressor; + if (compressionMethod == ZipCompressionMethod.Deflate) + { + decompressor = new DeflateStream(bounded, CompressionMode.Decompress, uncompressedSize); + } + else if (compressionMethod == ZipCompressionMethod.Deflate64) + { + decompressor = new DeflateManagedStream(bounded, ZipCompressionMethod.Deflate64, uncompressedSize); + } + else + { + // Stored + decompressor = bounded; + } + dataStream = new CrcValidatingReadStream(decompressor, crc32, uncompressedSize); + } + } + + var entry = new ZipArchiveEntry(this, fullName, compressionMethod, lastModifiedDto, + crc32, compressedSize, uncompressedSize, generalPurposeBitFlags, versionNeeded, + hasDataDescriptor, dataStream); + + _forwardReadPreviousEntry = entry; + return entry; + } + internal Stream ArchiveStream => _archiveStream; internal uint NumberOfThisDisk => _numberOfThisDisk; @@ -434,6 +822,8 @@ private ZipArchiveEntry DoCreateEntry(string entryName, CompressionLevel? compre if (_mode == ZipArchiveMode.Read) throw new NotSupportedException(SR.CreateInReadMode); + if (_mode == ZipArchiveMode.ForwardRead) + throw new NotSupportedException(SR.ForwardReadOnly); ThrowIfDisposed(); @@ -959,6 +1349,10 @@ private static bool ValidateMode(ZipArchiveMode mode, Stream stream) isReadModeAndUnseekable = true; } break; + case ZipArchiveMode.ForwardRead: + if (!stream.CanRead) + throw new ArgumentException(SR.ReadModeCapabilities); + break; case ZipArchiveMode.Update: if (!stream.CanRead || !stream.CanWrite || !stream.CanSeek) throw new ArgumentException(SR.UpdateModeCapabilities); @@ -977,9 +1371,13 @@ private static Stream DecideArchiveStream(ZipArchiveMode mode, Stream stream) { ArgumentNullException.ThrowIfNull(stream); - return mode == ZipArchiveMode.Create && !stream.CanSeek ? - new PositionPreservingWriteOnlyStreamWrapper(stream) : - stream; + if (mode == ZipArchiveMode.Create && !stream.CanSeek) + return new PositionPreservingWriteOnlyStreamWrapper(stream); + + if (mode == ZipArchiveMode.ForwardRead && !stream.CanSeek) + return new ReadAheadStream(stream); + + return stream; } diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs index c3ee984e673f12..5dee765db9d7fa 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs @@ -47,6 +47,8 @@ public partial class ZipArchiveEntry private byte[]? _lhTrailingExtraFieldData; private byte[] _fileComment; private readonly CompressionLevel _compressionLevel; + private Stream? _forwardReadDataStream; + private bool _hasDataDescriptor; // Initializes a ZipArchiveEntry instance for an existing archive entry. internal ZipArchiveEntry(ZipArchive archive, ZipCentralDirectoryFileHeader cd) @@ -160,6 +162,51 @@ internal ZipArchiveEntry(ZipArchive archive, string entryName) Changes = ZipArchive.ChangeState.Unchanged; } + // Initializes a ZipArchiveEntry instance for forward-read mode from local file header data. + internal ZipArchiveEntry(ZipArchive archive, string fullName, ZipCompressionMethod compressionMethod, + DateTimeOffset lastModified, uint crc32, long compressedSize, long uncompressedSize, + ushort generalPurposeBitFlags, ushort versionNeeded, bool hasDataDescriptor, Stream? dataStream) + { + _archive = archive; + _originallyInArchive = true; + _hasDataDescriptor = hasDataDescriptor; + + _diskNumberStart = 0; + _versionMadeByPlatform = CurrentZipPlatform; + _versionMadeBySpecification = (ZipVersionNeededValues)versionNeeded; + _versionToExtract = (ZipVersionNeededValues)versionNeeded; + _generalPurposeBitFlag = (BitFlagValues)generalPurposeBitFlags; + _isEncrypted = (_generalPurposeBitFlag & BitFlagValues.IsEncrypted) != 0; + _storedCompressionMethod = compressionMethod; + _lastModified = lastModified; + _compressedSize = compressedSize; + _uncompressedSize = uncompressedSize; + _crc32 = crc32; + _offsetOfLocalHeader = 0; + _storedOffsetOfCompressedData = null; + _externalFileAttr = 0; + + _compressedBytes = null; + _storedUncompressedData = null; + _currentlyOpenForWrite = false; + _everOpenedForWrite = false; + _outstandingWriteStream = null; + + _storedEntryNameBytes = (_generalPurposeBitFlag & BitFlagValues.UnicodeFileNameAndComment) != 0 + ? Encoding.UTF8.GetBytes(fullName) + : (archive.EntryNameAndCommentEncoding ?? Encoding.UTF8).GetBytes(fullName); + _storedEntryName = fullName; + + _cdUnknownExtraFields = null; + _lhUnknownExtraFields = null; + + _fileComment = Array.Empty(); + _compressionLevel = MapCompressionLevel(_generalPurposeBitFlag, compressionMethod); + _forwardReadDataStream = dataStream; + + Changes = ZipArchive.ChangeState.Unchanged; + } + /// /// The ZipArchive that this entry belongs to. If this entry has been deleted, this will return null. /// @@ -173,6 +220,17 @@ internal ZipArchiveEntry(ZipArchive archive, string entryName) /// public bool IsEncrypted => _isEncrypted; + internal Stream? ForwardReadDataStream => _forwardReadDataStream; + + internal bool HasDataDescriptor => _hasDataDescriptor; + + internal void UpdateFromDataDescriptor(uint crc32, long compressedSize, long uncompressedSize) + { + _crc32 = crc32; + _compressedSize = compressedSize; + _uncompressedSize = uncompressedSize; + } + /// /// Gets the compression method used to compress the entry. /// @@ -212,6 +270,8 @@ public int ExternalAttributes set { ThrowIfInvalidArchive(); + if (_archive.Mode == ZipArchiveMode.ForwardRead) + throw new NotSupportedException(SR.ForwardReadOnly); _externalFileAttr = (uint)value; Changes |= ZipArchive.ChangeState.FixedLengthMetadata; } @@ -230,6 +290,8 @@ public string Comment get => DecodeEntryString(_fileComment); set { + if (_archive.Mode == ZipArchiveMode.ForwardRead) + throw new NotSupportedException(SR.ForwardReadOnly); _fileComment = ZipHelper.GetEncodedTruncatedBytesFromString(value, _archive.EntryNameAndCommentEncoding, ushort.MaxValue, out bool isUTF8); if (isUTF8) @@ -295,6 +357,8 @@ public DateTimeOffset LastWriteTime ThrowIfInvalidArchive(); if (_archive.Mode == ZipArchiveMode.Read) throw new NotSupportedException(SR.ReadOnlyArchive); + if (_archive.Mode == ZipArchiveMode.ForwardRead) + throw new NotSupportedException(SR.ForwardReadOnly); if (_archive.Mode == ZipArchiveMode.Create && _everOpenedForWrite) throw new IOException(SR.FrozenAfterWrite); if (value.DateTime.Year < ZipHelper.ValidZipDate_YearMin || value.DateTime.Year > ZipHelper.ValidZipDate_YearMax) @@ -371,6 +435,8 @@ public Stream Open() return OpenInReadMode(checkOpenable: true); case ZipArchiveMode.Create: return OpenInWriteMode(); + case ZipArchiveMode.ForwardRead: + return OpenInForwardReadMode(); case ZipArchiveMode.Update: default: Debug.Assert(_archive.Mode == ZipArchiveMode.Update); @@ -416,6 +482,11 @@ public Stream Open(FileAccess access) throw new InvalidOperationException(SR.CannotBeReadInCreateMode); return OpenInWriteMode(); + case ZipArchiveMode.ForwardRead: + if (access != FileAccess.Read) + throw new InvalidOperationException(SR.CannotBeWrittenInReadMode); + return OpenInForwardReadMode(); + case ZipArchiveMode.Update: default: Debug.Assert(_archive.Mode == ZipArchiveMode.Update); @@ -846,6 +917,16 @@ private CrcValidatingReadStream OpenInReadModeGetDataCompressor(long offsetOfCom return new CrcValidatingReadStream(decompressedStream, _crc32, _uncompressedSize); } + private WrappedStream OpenInForwardReadMode() + { + if (_forwardReadDataStream == null) + throw new InvalidDataException(SR.LocalFileHeaderCorrupt); + + // Wrap so user disposal does not close our internal data stream. + // DrainPreviousEntry will drain and dispose _forwardReadDataStream itself. + return new WrappedStream(_forwardReadDataStream, closeBaseStream: false); + } + private WrappedStream OpenInWriteMode() { if (_everOpenedForWrite) diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveMode.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveMode.cs index 9119fc1a9aba90..eac6d5972dd813 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveMode.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveMode.cs @@ -26,6 +26,12 @@ public enum ZipArchiveMode /// The underlying file or stream must be readable, writable and seekable. /// No data will be written to the underlying file or stream until the archive is disposed. /// - Update + Update, + /// + /// Only forward-only sequential reading of entries is permitted using . + /// Entries are read from local file headers instead of the central directory, enabling reading from non-seekable streams + /// without buffering the entire archive. The underlying stream must be readable but need not be seekable. + /// + ForwardRead = 3 } } diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs index ea2fc10ec55699..80579731910495 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs @@ -967,4 +967,331 @@ public override async ValueTask DisposeAsync() await base.DisposeAsync().ConfigureAwait(false); } } + + internal sealed class BoundedReadOnlyStream : Stream + { + private readonly Stream _baseStream; + private long _remaining; + private bool _isDisposed; + + public BoundedReadOnlyStream(Stream baseStream, long length) + { + _baseStream = baseStream; + _remaining = length; + } + + public override bool CanRead => !_isDisposed && _baseStream.CanRead; + public override bool CanSeek => false; + public override bool CanWrite => false; + public override long Length => throw new NotSupportedException(); + + public override long Position + { + get => throw new NotSupportedException(); + set => throw new NotSupportedException(); + } + + private void ThrowIfDisposed() + { + if (_isDisposed) + throw new ObjectDisposedException(GetType().ToString(), SR.HiddenStreamName); + } + + public override int Read(byte[] buffer, int offset, int count) + => Read(buffer.AsSpan(offset, count)); + + public override int Read(Span buffer) + { + ThrowIfDisposed(); + + if (_remaining <= 0) + { + return 0; + } + + if (buffer.Length > _remaining) + { + buffer = buffer.Slice(0, (int)_remaining); + } + + int bytesRead = _baseStream.Read(buffer); + _remaining -= bytesRead; + + return bytesRead; + } + + public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) + => ReadAsync(buffer.AsMemory(offset, count), cancellationToken).AsTask(); + + public override async ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = default) + { + ThrowIfDisposed(); + + if (_remaining <= 0) + { + return 0; + } + + if (buffer.Length > _remaining) + { + buffer = buffer.Slice(0, (int)_remaining); + } + + int bytesRead = await _baseStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false); + _remaining -= bytesRead; + + return bytesRead; + } + + public override void Flush() { } + public override Task FlushAsync(CancellationToken cancellationToken) => Task.CompletedTask; + public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException(); + public override void SetLength(long value) => throw new NotSupportedException(); + public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException(); + + protected override void Dispose(bool disposing) + { + _isDisposed = true; + base.Dispose(disposing); + } + + public override ValueTask DisposeAsync() + { + _isDisposed = true; + + return base.DisposeAsync(); + } + } + + internal sealed class ReadAheadStream : Stream + { + private readonly Stream _baseStream; + private readonly byte[] _history; + private int _historyCount; + private byte[]? _pushback; + private int _pushbackOffset; + private int _pushbackCount; + private long _position; + private bool _isDisposed; + + public ReadAheadStream(Stream baseStream, int historyCapacity = 8192) + { + _baseStream = baseStream; + _history = new byte[historyCapacity]; + } + + public override bool CanRead => !_isDisposed && _baseStream.CanRead; + public override bool CanSeek => !_isDisposed; + public override bool CanWrite => false; + + public override long Length + { + get + { + ThrowIfDisposed(); + throw new NotSupportedException(); + } + } + + public override long Position + { + get + { + ThrowIfDisposed(); + return _position; + } + set + { + ThrowIfDisposed(); + throw new NotSupportedException(); + } + } + + public override int Read(byte[] buffer, int offset, int count) + { + ValidateBufferArguments(buffer, offset, count); + return Read(buffer.AsSpan(offset, count)); + } + + public override int Read(Span buffer) + { + ThrowIfDisposed(); + + int totalRead = 0; + + if (_pushbackCount > 0) + { + int fromPushback = Math.Min(buffer.Length, _pushbackCount); + _pushback.AsSpan(_pushbackOffset, fromPushback).CopyTo(buffer); + RecordHistory(buffer.Slice(0, fromPushback)); + _pushbackOffset += fromPushback; + _pushbackCount -= fromPushback; + totalRead += fromPushback; + buffer = buffer.Slice(fromPushback); + + if (_pushbackCount == 0) + { + _pushback = null; + } + } + + if (buffer.Length > 0) + { + int fromBase = _baseStream.Read(buffer); + if (fromBase > 0) + { + RecordHistory(buffer.Slice(0, fromBase)); + totalRead += fromBase; + } + } + + _position += totalRead; + return totalRead; + } + + public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) + { + ValidateBufferArguments(buffer, offset, count); + return ReadAsync(buffer.AsMemory(offset, count), cancellationToken).AsTask(); + } + + public override async ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = default) + { + ThrowIfDisposed(); + + int totalRead = 0; + + if (_pushbackCount > 0) + { + int fromPushback = Math.Min(buffer.Length, _pushbackCount); + _pushback.AsSpan(_pushbackOffset, fromPushback).CopyTo(buffer.Span); + RecordHistory(buffer.Span.Slice(0, fromPushback)); + _pushbackOffset += fromPushback; + _pushbackCount -= fromPushback; + totalRead += fromPushback; + buffer = buffer.Slice(fromPushback); + + if (_pushbackCount == 0) + { + _pushback = null; + } + } + + if (buffer.Length > 0) + { + int fromBase = await _baseStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false); + if (fromBase > 0) + { + RecordHistory(buffer.Span.Slice(0, fromBase)); + totalRead += fromBase; + } + } + + _position += totalRead; + return totalRead; + } + + public override long Seek(long offset, SeekOrigin origin) + { + ThrowIfDisposed(); + + if (origin is SeekOrigin.Current && offset < 0) + { + int rewindBytes = checked((int)(-offset)); + + if (rewindBytes > _historyCount) + { + throw new IOException(SR.IO_SeekBeforeBegin); + } + + int existingPushback = _pushbackCount; + byte[] newPushback = new byte[rewindBytes + existingPushback]; + Array.Copy(_history, _historyCount - rewindBytes, newPushback, 0, rewindBytes); + + if (existingPushback > 0) + { + Array.Copy(_pushback!, _pushbackOffset, newPushback, rewindBytes, existingPushback); + } + + _pushback = newPushback; + _pushbackOffset = 0; + _pushbackCount = newPushback.Length; + _historyCount -= rewindBytes; + _position -= rewindBytes; + + return _position; + } + + throw new NotSupportedException(); + } + + private void RecordHistory(ReadOnlySpan data) + { + if (data.Length >= _history.Length) + { + data.Slice(data.Length - _history.Length).CopyTo(_history); + _historyCount = _history.Length; + } + else if (_historyCount + data.Length <= _history.Length) + { + data.CopyTo(_history.AsSpan(_historyCount)); + _historyCount += data.Length; + } + else + { + int toKeep = _history.Length - data.Length; + Array.Copy(_history, _historyCount - toKeep, _history, 0, toKeep); + data.CopyTo(_history.AsSpan(toKeep)); + _historyCount = _history.Length; + } + } + + public override void Flush() + { + ThrowIfDisposed(); + } + + public override Task FlushAsync(CancellationToken cancellationToken) + { + ThrowIfDisposed(); + return Task.CompletedTask; + } + + public override void SetLength(long value) + { + ThrowIfDisposed(); + throw new NotSupportedException(); + } + + public override void Write(byte[] buffer, int offset, int count) + { + ThrowIfDisposed(); + throw new NotSupportedException(); + } + + private void ThrowIfDisposed() + { + ObjectDisposedException.ThrowIf(_isDisposed, this); + } + + protected override void Dispose(bool disposing) + { + if (disposing && !_isDisposed) + { + _baseStream.Dispose(); + _isDisposed = true; + } + base.Dispose(disposing); + } + + public override async ValueTask DisposeAsync() + { + if (!_isDisposed) + { + await _baseStream.DisposeAsync().ConfigureAwait(false); + _isDisposed = true; + } + await base.DisposeAsync().ConfigureAwait(false); + } + } } diff --git a/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj b/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj index 284ccb348c15fb..896df4da141c23 100644 --- a/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj +++ b/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj @@ -29,6 +29,7 @@ + diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ForwardReadTests.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ForwardReadTests.cs new file mode 100644 index 00000000000000..8da52f1dc4c43b --- /dev/null +++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ForwardReadTests.cs @@ -0,0 +1,482 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using Xunit; + +namespace System.IO.Compression.Tests +{ + public class zip_ForwardReadTests : ZipFileTestBase + { + private static readonly byte[] s_smallContent = "Hello, small world!"u8.ToArray(); + private static readonly byte[] s_mediumContent = new byte[8192]; + private static readonly byte[] s_largeContent = new byte[65536]; + + static zip_ForwardReadTests() + { + Random rng = new(42); + rng.NextBytes(s_mediumContent); + rng.NextBytes(s_largeContent); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task Read_DeflateWithKnownSize_ReturnsDecompressedData(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + byte[][] expectedContents = [s_smallContent, s_mediumContent, s_largeContent]; + + using MemoryStream archiveStream = new(zipBytes); + using ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead); + + for (int i = 0; i < expectedContents.Length; i++) + { + ZipArchiveEntry? entry = await GetNextEntry(archive, async); + + Assert.NotNull(entry); + Assert.Equal(ZipCompressionMethod.Deflate, entry.CompressionMethod); + + using Stream dataStream = entry.Open(); + byte[] decompressed = await ReadStreamFully(dataStream, async); + Assert.Equal(expectedContents[i], decompressed); + } + + ZipArchiveEntry? end = await GetNextEntry(archive, async); + Assert.Null(end); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task Read_StoredWithKnownSize_ReturnsUncompressedData(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.NoCompression, seekable: true); + byte[][] expectedContents = [s_smallContent, s_mediumContent, s_largeContent]; + + using MemoryStream archiveStream = new(zipBytes); + using ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead); + + for (int i = 0; i < expectedContents.Length; i++) + { + ZipArchiveEntry? entry = await GetNextEntry(archive, async); + + Assert.NotNull(entry); + Assert.Equal(ZipCompressionMethod.Stored, entry.CompressionMethod); + + using Stream dataStream = entry.Open(); + byte[] decompressed = await ReadStreamFully(dataStream, async); + Assert.Equal(expectedContents[i], decompressed); + } + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task Read_DeflateWithDataDescriptor_ReturnsDecompressedData(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: false); + byte[][] expectedContents = [s_smallContent, s_mediumContent, s_largeContent]; + + using MemoryStream archiveStream = new(zipBytes); + using WrappedStream nonSeekableStream = new(archiveStream, canRead: true, canWrite: false, canSeek: false, null); + using ZipArchive archive = new(nonSeekableStream, ZipArchiveMode.ForwardRead); + + for (int i = 0; i < expectedContents.Length; i++) + { + ZipArchiveEntry? entry = await GetNextEntry(archive, async); + + Assert.NotNull(entry); + Assert.Equal(ZipCompressionMethod.Deflate, entry.CompressionMethod); + + using Stream dataStream = entry.Open(); + byte[] decompressed = await ReadStreamFully(dataStream, async); + Assert.Equal(expectedContents[i], decompressed); + } + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task Read_FromNonSeekableStream(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using WrappedStream nonSeekableStream = new(archiveStream, canRead: true, canWrite: false, canSeek: false, null); + using ZipArchive archive = new(nonSeekableStream, ZipArchiveMode.ForwardRead); + + ZipArchiveEntry? entry = await GetNextEntry(archive, async); + Assert.NotNull(entry); + Assert.Equal("small.txt", entry.FullName); + + using Stream dataStream = entry.Open(); + byte[] decompressed = await ReadStreamFully(dataStream, async); + Assert.Equal(s_smallContent, decompressed); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task EmptyArchive_ReturnsNull(bool async) + { + using MemoryStream ms = new(); + using (new ZipArchive(ms, ZipArchiveMode.Create, leaveOpen: true)) { } + + ms.Position = 0; + using ZipArchive archive = new(ms, ZipArchiveMode.ForwardRead); + + ZipArchiveEntry? entry = await GetNextEntry(archive, async); + Assert.Null(entry); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task PartialRead_ThenGetNextEntry_AdvancesCorrectly(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead); + + ZipArchiveEntry? first = await GetNextEntry(archive, async); + Assert.NotNull(first); + + // Read only a few bytes + using (Stream ds = first.Open()) + { + byte[] partial = new byte[5]; + await ReadStream(ds, partial, async); + } + + ZipArchiveEntry? second = await GetNextEntry(archive, async); + + Assert.NotNull(second); + Assert.Equal("medium.bin", second.FullName); + + using Stream dataStream = second.Open(); + byte[] decompressed = await ReadStreamFully(dataStream, async); + Assert.Equal(s_mediumContent, decompressed); + } + + [Fact] + public void Entries_ThrowsNotSupportedException() + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead); + + Assert.Throws(() => archive.Entries); + } + + [Fact] + public void GetEntry_ThrowsNotSupportedException() + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead); + + Assert.Throws(() => archive.GetEntry("small.txt")); + } + + [Fact] + public void CreateEntry_ThrowsNotSupportedException() + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead); + + Assert.Throws(() => archive.CreateEntry("new.txt")); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task GetNextEntry_AfterDispose_ThrowsObjectDisposedException(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead, leaveOpen: true); + + ZipArchiveEntry? entry = await GetNextEntry(archive, async); + Assert.NotNull(entry); + + archive.Dispose(); + + if (async) + { + await Assert.ThrowsAsync(() => archive.GetNextEntryAsync().AsTask()); + } + else + { + Assert.Throws(() => archive.GetNextEntry()); + } + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task AsyncGetNextEntryAsync_Works(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead); + + ZipArchiveEntry? entry = await GetNextEntry(archive, async); + Assert.NotNull(entry); + Assert.Equal("small.txt", entry.FullName); + } + + [Fact] + public async Task AsyncCancellation_ThrowsOperationCanceled() + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead); + + using CancellationTokenSource cts = new(); + cts.Cancel(); + + await Assert.ThrowsAnyAsync( + () => archive.GetNextEntryAsync(cancellationToken: cts.Token).AsTask()); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task MultipleEntries_MixedSkipAndRead(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead); + + // Skip first entry (don't read data) + ZipArchiveEntry? first = await GetNextEntry(archive, async); + Assert.NotNull(first); + + // Read second entry fully + ZipArchiveEntry? second = await GetNextEntry(archive, async); + Assert.NotNull(second); + using (Stream ds = second.Open()) + { + byte[] data = await ReadStreamFully(ds, async); + Assert.Equal(s_mediumContent, data); + } + + // Skip third entry + ZipArchiveEntry? third = await GetNextEntry(archive, async); + Assert.NotNull(third); + + // Confirm end + ZipArchiveEntry? end = await GetNextEntry(archive, async); + Assert.Null(end); + } + + [Fact] + public void GetNextEntry_NotInForwardReadMode_ThrowsNotSupportedException() + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using ZipArchive archive = new(archiveStream, ZipArchiveMode.Read); + + Assert.Throws(() => archive.GetNextEntry()); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task StoredWithDataDescriptor_ThrowsNotSupported(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.NoCompression, seekable: false); + + using MemoryStream archiveStream = new(zipBytes); + using WrappedStream nonSeekableStream = new(archiveStream, canRead: true, canWrite: false, canSeek: false, null); + using ZipArchive archive = new(nonSeekableStream, ZipArchiveMode.ForwardRead); + + if (async) + { + await Assert.ThrowsAsync(() => archive.GetNextEntryAsync().AsTask()); + } + else + { + Assert.Throws(() => archive.GetNextEntry()); + } + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task PartialRead_DataDescriptor_ThenGetNextEntry_AdvancesCorrectly(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: false); + + using MemoryStream archiveStream = new(zipBytes); + using ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead); + + ZipArchiveEntry? first = await GetNextEntry(archive, async); + Assert.NotNull(first); + + // Read only a few bytes via Open() + using (Stream ds = first.Open()) + { + byte[] partial = new byte[3]; + await ReadStream(ds, partial, async); + } + + ZipArchiveEntry? second = await GetNextEntry(archive, async); + + Assert.NotNull(second); + Assert.Equal("medium.bin", second.FullName); + + using Stream dataStream2 = second.Open(); + byte[] decompressed = await ReadStreamFully(dataStream2, async); + Assert.Equal(s_mediumContent, decompressed); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task ZeroLengthEntry_ReturnsEntryWithEmptyStream(bool async) + { + using MemoryStream ms = new(); + using (ZipArchive create = new(ms, ZipArchiveMode.Create, leaveOpen: true)) + { + create.CreateEntry("empty.txt"); + } + + ms.Position = 0; + using ZipArchive archive = new(ms, ZipArchiveMode.ForwardRead); + + ZipArchiveEntry? entry = await GetNextEntry(archive, async); + + Assert.NotNull(entry); + Assert.Equal("empty.txt", entry.FullName); + Assert.Equal(0, entry.CompressedLength); + + // Confirm end + ZipArchiveEntry? end = await GetNextEntry(archive, async); + Assert.Null(end); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task DirectoryEntry_ReturnsEntryWithNoDataStream(bool async) + { + using MemoryStream ms = new(); + using (ZipArchive create = new(ms, ZipArchiveMode.Create, leaveOpen: true)) + { + create.CreateEntry("mydir/"); + } + + ms.Position = 0; + using ZipArchive archive = new(ms, ZipArchiveMode.ForwardRead); + + ZipArchiveEntry? entry = await GetNextEntry(archive, async); + + Assert.NotNull(entry); + Assert.Equal("mydir/", entry.FullName); + } + + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task Dispose_WhileEntryPartiallyRead_DoesNotThrow(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead, leaveOpen: true); + + ZipArchiveEntry? entry = await GetNextEntry(archive, async); + Assert.NotNull(entry); + + // Partially read via Open() + Stream ds = entry.Open(); + byte[] partial = new byte[5]; + await ReadStream(ds, partial, async); + + // Dispose should not throw + archive.Dispose(); + } + + [Fact] + public void LeaveOpen_DoesNotDisposeStream() + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + + ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead, leaveOpen: true); + archive.Dispose(); + + Assert.True(archiveStream.CanRead); + } + + // ── Sync/async dispatch helpers ────────────────────────────────────── + + private static async ValueTask GetNextEntry( + ZipArchive archive, bool async) + { + return async + ? await archive.GetNextEntryAsync() + : archive.GetNextEntry(); + } + + private static async ValueTask ReadStream(Stream stream, byte[] buffer, bool async) + { + return async + ? await stream.ReadAsync(buffer) + : stream.Read(buffer); + } + + // ── Test data helpers ──────────────────────────────────────────────── + + private static byte[] CreateZipWithEntries(CompressionLevel compressionLevel, bool seekable) + { + MemoryStream ms = new(); + + Stream writeStream = seekable + ? ms + : new WrappedStream(ms, canRead: true, canWrite: true, canSeek: false, null); + + using (ZipArchive archive = new(writeStream, ZipArchiveMode.Create, leaveOpen: true)) + { + AddEntry(archive, "small.txt", s_smallContent, compressionLevel); + AddEntry(archive, "medium.bin", s_mediumContent, compressionLevel); + AddEntry(archive, "large.bin", s_largeContent, compressionLevel); + } + + return ms.ToArray(); + } + + private static void AddEntry(ZipArchive archive, string name, byte[] contents, CompressionLevel level) + { + ZipArchiveEntry entry = archive.CreateEntry(name, level); + using Stream stream = entry.Open(); + stream.Write(contents); + } + + private static async Task ReadStreamFully(Stream stream, bool async) + { + using MemoryStream result = new(); + byte[] buffer = new byte[4096]; + + int bytesRead; + if (async) + { + while ((bytesRead = await stream.ReadAsync(buffer)) > 0) + { + result.Write(buffer, 0, bytesRead); + } + } + else + { + while ((bytesRead = stream.Read(buffer)) > 0) + { + result.Write(buffer, 0, bytesRead); + } + } + + return result.ToArray(); + } + } +} From 3986bcd4dceabdc3a8331f4ab13d8d91c2c2c4f4 Mon Sep 17 00:00:00 2001 From: alinpahontu2912 Date: Wed, 8 Apr 2026 13:26:41 +0200 Subject: [PATCH 2/6] fix bugs --- .../src/System/IO/Compression/ZipArchive.cs | 206 ++++++++---------- .../System/IO/Compression/ZipArchiveEntry.cs | 11 +- .../System/IO/Compression/ZipCustomStreams.cs | 7 +- .../tests/ZipArchive/zip_ForwardReadTests.cs | 29 ++- 4 files changed, 115 insertions(+), 138 deletions(-) diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs index fb81ab46f6d951..51ea1a9779a069 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs @@ -451,7 +451,7 @@ private async ValueTask DrainPreviousEntryAsync(CancellationToken cancellationTo if (prev.HasDataDescriptor) { - ReadDataDescriptor(prev); + await ReadDataDescriptorAsync(prev, cancellationToken).ConfigureAwait(false); } _forwardReadPreviousEntry = null; @@ -459,95 +459,79 @@ private async ValueTask DrainPreviousEntryAsync(CancellationToken cancellationTo private void ReadDataDescriptor(ZipArchiveEntry entry) { - // Data descriptor can be: - // Without signature: CRC32(4) + CompressedSize(4) + UncompressedSize(4) = 12 bytes - // With signature: Sig(4) + CRC32(4) + CompressedSize(4) + UncompressedSize(4) = 16 bytes - // Or Zip64: Sig(4) + CRC32(4) + CompressedSize(8) + UncompressedSize(8) = 24 bytes - - // Read first 4 bytes to check for signature - Span sigBuf = stackalloc byte[4]; - _archiveStream.ReadExactly(sigBuf); - - uint possibleSig = BinaryPrimitives.ReadUInt32LittleEndian(sigBuf); - bool hasSignature = possibleSig == 0x08074B50; - - if (hasSignature) + // Data descriptor formats (all start after entry data): + // 32-bit with sig: sig(4) + crc(4) + comp(4) + uncomp(4) = 16 bytes + // 32-bit no sig: crc(4) + comp(4) + uncomp(4) = 12 bytes + // 64-bit with sig: sig(4) + crc(4) + comp(8) + uncomp(8) = 24 bytes + // 64-bit no sig: crc(4) + comp(8) + uncomp(8) = 20 bytes + // Read the maximum (24 bytes), determine format, seek back the unused portion. + // + // Note: When the archive stream is a ReadAheadStream (non-seekable ForwardRead), + // Seek(negative, Current) uses a limited history buffer (default 8KB). + // The small rewinds here (at most 12 bytes) are well within that limit. + + Span buf = stackalloc byte[24]; + _archiveStream.ReadExactly(buf); + + int pos = 0; + uint firstWord = BinaryPrimitives.ReadUInt32LittleEndian(buf); + if (firstWord == 0x08074B50) + pos = 4; // skip signature + + uint crc32 = BinaryPrimitives.ReadUInt32LittleEndian(buf.Slice(pos)); + pos += 4; + + // Probe 32-bit: if the 4 bytes after comp(4)+uncomp(4) are a known signature, it's 32-bit. + uint afterThirtyTwo = BinaryPrimitives.ReadUInt32LittleEndian(buf.Slice(pos + 8)); + if (IsKnownZipSignature(afterThirtyTwo)) { - // Probe whether this is a 32-bit or 64-bit descriptor. - // Read CRC32(4) + field1(4) + field2(4) + field3(4) = 16 more bytes - Span probe = stackalloc byte[20]; - _archiveStream.ReadExactly(probe); - - uint crc32 = BinaryPrimitives.ReadUInt32LittleEndian(probe); - - // Try 32-bit interpretation first: sizes are 4 bytes each - // After 32-bit descriptor, next 4 bytes would be at probe[12..16] - uint nextSig32 = BinaryPrimitives.ReadUInt32LittleEndian(probe.Slice(12)); - - if (IsKnownZipSignature(nextSig32)) - { - // 32-bit descriptor - uint compressedSize32 = BinaryPrimitives.ReadUInt32LittleEndian(probe.Slice(4)); - uint uncompressedSize32 = BinaryPrimitives.ReadUInt32LittleEndian(probe.Slice(8)); - entry.UpdateFromDataDescriptor(crc32, compressedSize32, uncompressedSize32); - // Seek back the 8 over-read bytes (nextSig32 + 4 more bytes) - _archiveStream.Seek(-8, SeekOrigin.Current); - } - else - { - // 64-bit descriptor: read 4 more bytes to complete it - Span extra = stackalloc byte[4]; - _archiveStream.ReadExactly(extra); - - long compressedSize64 = BinaryPrimitives.ReadInt64LittleEndian(probe.Slice(4)); - long uncompressedSize64 = BinaryPrimitives.ReadInt64LittleEndian(probe.Slice(12)); - // extra contains the 4 bytes we read that aren't part of sizes - // Actually: with signature, zip64 is: sig(4) + crc(4) + compsize(8) + uncompsize(8) = 24 - // We read sig(4) already, then probe(20) = crc(4) + comp(8) + uncomp first 4 - // Then extra(4) = uncomp last 4 - // Reconstruct uncompressedSize64 from probe[12..16] and extra[0..4] - Span uncomp64Buf = stackalloc byte[8]; - probe.Slice(12, 4).CopyTo(uncomp64Buf); - extra.CopyTo(uncomp64Buf.Slice(4)); - uncompressedSize64 = BinaryPrimitives.ReadInt64LittleEndian(uncomp64Buf); - - entry.UpdateFromDataDescriptor(crc32, compressedSize64, uncompressedSize64); - } + uint compressedSize = BinaryPrimitives.ReadUInt32LittleEndian(buf.Slice(pos)); + uint uncompressedSize = BinaryPrimitives.ReadUInt32LittleEndian(buf.Slice(pos + 4)); + entry.UpdateFromDataDescriptor(crc32, compressedSize, uncompressedSize); + int consumed = pos + 8; + _archiveStream.Seek(consumed - 24, SeekOrigin.Current); } else { - // No signature - first 4 bytes are CRC32 - // Read the remaining 8 bytes (compressedSize + uncompressedSize as 32-bit) - // But could be 64-bit: try 32-bit first, probe next 4 bytes - Span rest = stackalloc byte[12]; - _archiveStream.ReadExactly(rest); + long compressedSize = BinaryPrimitives.ReadInt64LittleEndian(buf.Slice(pos)); + long uncompressedSize = BinaryPrimitives.ReadInt64LittleEndian(buf.Slice(pos + 8)); + entry.UpdateFromDataDescriptor(crc32, compressedSize, uncompressedSize); + int consumed = pos + 16; + if (consumed < 24) + _archiveStream.Seek(consumed - 24, SeekOrigin.Current); + } + } - uint crc32 = possibleSig; - uint nextSig = BinaryPrimitives.ReadUInt32LittleEndian(rest.Slice(8)); + private async ValueTask ReadDataDescriptorAsync(ZipArchiveEntry entry, CancellationToken cancellationToken) + { + byte[] buf = new byte[24]; + await _archiveStream.ReadExactlyAsync(buf, cancellationToken).ConfigureAwait(false); - if (IsKnownZipSignature(nextSig)) - { - // 32-bit, no signature - uint compressedSize32 = BinaryPrimitives.ReadUInt32LittleEndian(rest); - uint uncompressedSize32 = BinaryPrimitives.ReadUInt32LittleEndian(rest.Slice(4)); - entry.UpdateFromDataDescriptor(crc32, compressedSize32, uncompressedSize32); - // Seek back the 4 over-read bytes - _archiveStream.Seek(-4, SeekOrigin.Current); - } - else - { - // 64-bit, no signature - Span extra = stackalloc byte[4]; - _archiveStream.ReadExactly(extra); + int pos = 0; + uint firstWord = BinaryPrimitives.ReadUInt32LittleEndian(buf); + if (firstWord == 0x08074B50) + pos = 4; - long compressedSize64 = BinaryPrimitives.ReadInt64LittleEndian(rest); - Span uncomp64Buf = stackalloc byte[8]; - rest.Slice(8, 4).CopyTo(uncomp64Buf); - extra.CopyTo(uncomp64Buf.Slice(4)); - long uncompressedSize64 = BinaryPrimitives.ReadInt64LittleEndian(uncomp64Buf); + uint crc32 = BinaryPrimitives.ReadUInt32LittleEndian(buf.AsSpan(pos)); + pos += 4; - entry.UpdateFromDataDescriptor(crc32, compressedSize64, uncompressedSize64); - } + uint afterThirtyTwo = BinaryPrimitives.ReadUInt32LittleEndian(buf.AsSpan(pos + 8)); + if (IsKnownZipSignature(afterThirtyTwo)) + { + uint compressedSize = BinaryPrimitives.ReadUInt32LittleEndian(buf.AsSpan(pos)); + uint uncompressedSize = BinaryPrimitives.ReadUInt32LittleEndian(buf.AsSpan(pos + 4)); + entry.UpdateFromDataDescriptor(crc32, compressedSize, uncompressedSize); + int consumed = pos + 8; + _archiveStream.Seek(consumed - 24, SeekOrigin.Current); + } + else + { + long compressedSize = BinaryPrimitives.ReadInt64LittleEndian(buf.AsSpan(pos)); + long uncompressedSize = BinaryPrimitives.ReadInt64LittleEndian(buf.AsSpan(pos + 8)); + entry.UpdateFromDataDescriptor(crc32, compressedSize, uncompressedSize); + int consumed = pos + 16; + if (consumed < 24) + _archiveStream.Seek(consumed - 24, SeekOrigin.Current); } } @@ -559,6 +543,17 @@ private static bool IsKnownZipSignature(uint sig) || sig == 0x06064B50; // Zip64 EOCD } + private static Stream CreateForwardReadDecompressor(Stream source, ZipCompressionMethod compressionMethod, long uncompressedSize, bool leaveOpen) + { + return compressionMethod switch + { + ZipCompressionMethod.Deflate when leaveOpen => new DeflateStream(source, CompressionMode.Decompress, leaveOpen: true), + ZipCompressionMethod.Deflate => new DeflateStream(source, CompressionMode.Decompress, uncompressedSize), + ZipCompressionMethod.Deflate64 => new DeflateManagedStream(source, ZipCompressionMethod.Deflate64, uncompressedSize), + _ => source, + }; + } + private ZipArchiveEntry? ReadNextLocalFileHeader() { const int HeaderSize = ZipLocalFileHeader.SizeOfLocalHeader; @@ -591,16 +586,8 @@ private static bool IsKnownZipSignature(uint sig) const int HeaderSize = ZipLocalFileHeader.SizeOfLocalHeader; byte[] header = new byte[HeaderSize]; - int totalRead = 0; - while (totalRead < HeaderSize) - { - int read = await _archiveStream.ReadAsync(header.AsMemory(totalRead, HeaderSize - totalRead), cancellationToken).ConfigureAwait(false); - if (read == 0) - break; - totalRead += read; - } - - if (totalRead < HeaderSize) + int bytesRead = await _archiveStream.ReadAtLeastAsync(header, HeaderSize, throwOnEndOfStream: false, cancellationToken).ConfigureAwait(false); + if (bytesRead < HeaderSize) { _forwardReadReachedEnd = true; return null; @@ -691,21 +678,11 @@ private ZipArchiveEntry ParseLocalFileHeader(ReadOnlySpan header) { if (hasDataDescriptor) { - // Data descriptor: unknown size, let DeflateStream detect end - Stream decompressor; - if (compressionMethod == ZipCompressionMethod.Deflate) - { - decompressor = new DeflateStream(_archiveStream, CompressionMode.Decompress, leaveOpen: true); - } - else if (compressionMethod == ZipCompressionMethod.Deflate64) - { - decompressor = new DeflateManagedStream(_archiveStream, ZipCompressionMethod.Deflate64, -1); - } - else - { - // Should not reach here (stored with DD is thrown above) - decompressor = _archiveStream; - } + // Data descriptor: unknown size, let DeflateStream detect end. + // Because ReadAheadStream.CanSeek returns true, DeflateStream will + // automatically rewind unconsumed bytes after decompression finishes, + // leaving the archive stream positioned right after the compressed data. + Stream decompressor = CreateForwardReadDecompressor(_archiveStream, compressionMethod, -1, leaveOpen: true); dataStream = new CrcValidatingReadStream(decompressor, expectedCrc: 0, expectedLength: long.MaxValue); } else if (isEncrypted) @@ -717,20 +694,7 @@ private ZipArchiveEntry ParseLocalFileHeader(ReadOnlySpan header) { // Known size, not encrypted Stream bounded = new BoundedReadOnlyStream(_archiveStream, compressedSize); - Stream decompressor; - if (compressionMethod == ZipCompressionMethod.Deflate) - { - decompressor = new DeflateStream(bounded, CompressionMode.Decompress, uncompressedSize); - } - else if (compressionMethod == ZipCompressionMethod.Deflate64) - { - decompressor = new DeflateManagedStream(bounded, ZipCompressionMethod.Deflate64, uncompressedSize); - } - else - { - // Stored - decompressor = bounded; - } + Stream decompressor = CreateForwardReadDecompressor(bounded, compressionMethod, uncompressedSize, leaveOpen: false); dataStream = new CrcValidatingReadStream(decompressor, crc32, uncompressedSize); } } diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs index 5dee765db9d7fa..e44acd6ee3e4f2 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs @@ -48,6 +48,7 @@ public partial class ZipArchiveEntry private byte[] _fileComment; private readonly CompressionLevel _compressionLevel; private Stream? _forwardReadDataStream; + private bool _forwardReadStreamOpened; private bool _hasDataDescriptor; // Initializes a ZipArchiveEntry instance for an existing archive entry. @@ -355,10 +356,8 @@ public DateTimeOffset LastWriteTime set { ThrowIfInvalidArchive(); - if (_archive.Mode == ZipArchiveMode.Read) + if (_archive.Mode is ZipArchiveMode.Read or ZipArchiveMode.ForwardRead) throw new NotSupportedException(SR.ReadOnlyArchive); - if (_archive.Mode == ZipArchiveMode.ForwardRead) - throw new NotSupportedException(SR.ForwardReadOnly); if (_archive.Mode == ZipArchiveMode.Create && _everOpenedForWrite) throw new IOException(SR.FrozenAfterWrite); if (value.DateTime.Year < ZipHelper.ValidZipDate_YearMin || value.DateTime.Year > ZipHelper.ValidZipDate_YearMax) @@ -919,8 +918,12 @@ private CrcValidatingReadStream OpenInReadModeGetDataCompressor(long offsetOfCom private WrappedStream OpenInForwardReadMode() { - if (_forwardReadDataStream == null) + if (_forwardReadDataStream is null) throw new InvalidDataException(SR.LocalFileHeaderCorrupt); + if (_forwardReadStreamOpened) + throw new IOException(SR.ForwardReadOnly); + + _forwardReadStreamOpened = true; // Wrap so user disposal does not close our internal data stream. // DrainPreviousEntry will drain and dispose _forwardReadDataStream itself. diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs index 80579731910495..ddd3af7a9f9c36 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs @@ -993,8 +993,7 @@ public override long Position private void ThrowIfDisposed() { - if (_isDisposed) - throw new ObjectDisposedException(GetType().ToString(), SR.HiddenStreamName); + ObjectDisposedException.ThrowIf(_isDisposed, this); } public override int Read(byte[] buffer, int offset, int count) @@ -1049,6 +1048,8 @@ public override void Flush() { } public override void SetLength(long value) => throw new NotSupportedException(); public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException(); + // Does not dispose the base stream — BoundedReadOnlyStream is a window + // into the shared archive stream, which outlives individual entries. protected override void Dispose(bool disposing) { _isDisposed = true; @@ -1081,6 +1082,8 @@ public ReadAheadStream(Stream baseStream, int historyCapacity = 8192) } public override bool CanRead => !_isDisposed && _baseStream.CanRead; + // Must report true: DeflateStream checks CanSeek to rewind unconsumed bytes + // after decompression finishes, which is critical for data descriptor entries. public override bool CanSeek => !_isDisposed; public override bool CanWrite => false; diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ForwardReadTests.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ForwardReadTests.cs index 8da52f1dc4c43b..6758aac84b2f13 100644 --- a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ForwardReadTests.cs +++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ForwardReadTests.cs @@ -411,6 +411,22 @@ public void LeaveOpen_DoesNotDisposeStream() Assert.True(archiveStream.CanRead); } + [Theory] + [MemberData(nameof(Get_Booleans_Data))] + public async Task Open_CalledTwice_Throws(bool async) + { + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + + using MemoryStream archiveStream = new(zipBytes); + using ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead); + + ZipArchiveEntry? entry = await GetNextEntry(archive, async); + Assert.NotNull(entry); + + using Stream first = entry.Open(); + Assert.Throws(() => entry.Open()); + } + // ── Sync/async dispatch helpers ────────────────────────────────────── private static async ValueTask GetNextEntry( @@ -458,22 +474,13 @@ private static void AddEntry(ZipArchive archive, string name, byte[] contents, C private static async Task ReadStreamFully(Stream stream, bool async) { using MemoryStream result = new(); - byte[] buffer = new byte[4096]; - - int bytesRead; if (async) { - while ((bytesRead = await stream.ReadAsync(buffer)) > 0) - { - result.Write(buffer, 0, bytesRead); - } + await stream.CopyToAsync(result); } else { - while ((bytesRead = stream.Read(buffer)) > 0) - { - result.Write(buffer, 0, bytesRead); - } + stream.CopyTo(result); } return result.ToArray(); From 66707618d205ced51e37a2f05ceb0b9e39bc4cb5 Mon Sep 17 00:00:00 2001 From: alinpahontu2912 Date: Tue, 21 Apr 2026 10:49:39 +0200 Subject: [PATCH 3/6] refactor header parsing and testing + remove boundedreadstream --- .../src/Resources/Strings.resx | 9 + .../System/IO/Compression/ZipArchive.Async.cs | 1 + .../src/System/IO/Compression/ZipArchive.cs | 358 +++++---------- .../System/IO/Compression/ZipArchiveEntry.cs | 67 +-- .../System/IO/Compression/ZipBlocks.Async.cs | 123 ++++++ .../src/System/IO/Compression/ZipBlocks.cs | 155 +++++++ .../System/IO/Compression/ZipCustomStreams.cs | 103 +---- .../tests/ZipArchive/zip_ForwardReadTests.cs | 411 ++++++------------ 8 files changed, 586 insertions(+), 641 deletions(-) diff --git a/src/libraries/System.IO.Compression/src/Resources/Strings.resx b/src/libraries/System.IO.Compression/src/Resources/Strings.resx index 5fc55777154b61..8cdfe82e76ec92 100644 --- a/src/libraries/System.IO.Compression/src/Resources/Strings.resx +++ b/src/libraries/System.IO.Compression/src/Resources/Strings.resx @@ -392,4 +392,13 @@ The archive stream contains an invalid local file header. + + Encrypted entries are not supported in ForwardRead mode. + + + This property is not available because the entry uses a data descriptor and the metadata cannot be determined in ForwardRead mode. + + + This entry has no data to read. It may be a directory entry or an empty entry. + diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.Async.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.Async.cs index cae68745faa1f5..13c1cc76d855c5 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.Async.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.Async.cs @@ -151,6 +151,7 @@ protected virtual async ValueTask DisposeAsyncCore() case ZipArchiveMode.Read: break; case ZipArchiveMode.ForwardRead: + await DrainPreviousEntryAsync(default).ConfigureAwait(false); break; case ZipArchiveMode.Create: await WriteFileAsync().ConfigureAwait(false); diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs index 51ea1a9779a069..391caceeb23877 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs @@ -310,6 +310,7 @@ protected virtual void Dispose(bool disposing) case ZipArchiveMode.Read: break; case ZipArchiveMode.ForwardRead: + DrainPreviousEntry(); break; case ZipArchiveMode.Create: WriteFile(); @@ -386,7 +387,31 @@ protected virtual void Dispose(bool disposing) return null; DrainPreviousEntry(); - return ReadNextLocalFileHeader(); + + ZipLocalFileHeader.ForwardReadHeaderData? headerData = + ZipLocalFileHeader.TryReadForForwardRead(_archiveStream, EntryNameAndCommentEncoding); + + if (headerData is null) + { + _forwardReadReachedEnd = true; + return null; + } + + var data = headerData.Value; + + if (data.HasDataDescriptor) + { + if (data.CompressionMethod == ZipCompressionMethod.Stored) + throw new NotSupportedException(SR.ForwardReadStoredDataDescriptorNotSupported); + if (data.IsEncrypted) + throw new NotSupportedException(SR.ForwardReadEncryptedDataDescriptorNotSupported); + } + + Stream? dataStream = BuildForwardReadDataStream(data); + var entry = new ZipArchiveEntry(this, data, dataStream); + _forwardReadPreviousEntry = entry; + + return entry; } /// @@ -414,133 +439,122 @@ protected virtual void Dispose(bool disposing) private async ValueTask GetNextEntryAsyncCore(CancellationToken cancellationToken) { await DrainPreviousEntryAsync(cancellationToken).ConfigureAwait(false); - return await ReadNextLocalFileHeaderAsync(cancellationToken).ConfigureAwait(false); - } - private void DrainPreviousEntry() - { - if (_forwardReadPreviousEntry is not { } prev) - return; + ZipLocalFileHeader.ForwardReadHeaderData? headerData = + await ZipLocalFileHeader.TryReadForForwardReadAsync(_archiveStream, EntryNameAndCommentEncoding, cancellationToken).ConfigureAwait(false); - Stream? dataStream = prev.ForwardReadDataStream; - if (dataStream != null) + if (headerData is null) { - dataStream.CopyTo(Stream.Null); - dataStream.Dispose(); + _forwardReadReachedEnd = true; + return null; } - if (prev.HasDataDescriptor) + var data = headerData.Value; + + if (data.HasDataDescriptor) { - ReadDataDescriptor(prev); + if (data.CompressionMethod == ZipCompressionMethod.Stored) + throw new NotSupportedException(SR.ForwardReadStoredDataDescriptorNotSupported); + if (data.IsEncrypted) + throw new NotSupportedException(SR.ForwardReadEncryptedDataDescriptorNotSupported); } - _forwardReadPreviousEntry = null; + Stream? dataStream = BuildForwardReadDataStream(data); + var entry = new ZipArchiveEntry(this, data, dataStream); + _forwardReadPreviousEntry = entry; + + return entry; } - private async ValueTask DrainPreviousEntryAsync(CancellationToken cancellationToken) + private void DrainPreviousEntry() => + DrainPreviousEntryCore(useAsync: false, cancellationToken: default).GetAwaiter().GetResult(); + + private ValueTask DrainPreviousEntryAsync(CancellationToken cancellationToken) => + new ValueTask(DrainPreviousEntryCore(useAsync: true, cancellationToken)); + + private async Task DrainPreviousEntryCore(bool useAsync, CancellationToken cancellationToken) { if (_forwardReadPreviousEntry is not { } prev) return; Stream? dataStream = prev.ForwardReadDataStream; - if (dataStream != null) + if (dataStream is not null) { - await dataStream.CopyToAsync(Stream.Null, cancellationToken).ConfigureAwait(false); - await dataStream.DisposeAsync().ConfigureAwait(false); - } + byte[] buffer = new byte[4096]; + if (useAsync) + { + while (await dataStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false) > 0) { } + } + else + { + while (dataStream.Read(buffer) > 0) { } + } - if (prev.HasDataDescriptor) - { - await ReadDataDescriptorAsync(prev, cancellationToken).ConfigureAwait(false); - } + var crcResult = (dataStream as CrcValidatingReadStream)?.GetFinalCrcResult(); - _forwardReadPreviousEntry = null; - } + if (useAsync) + await dataStream.DisposeAsync().ConfigureAwait(false); + else + dataStream.Dispose(); - private void ReadDataDescriptor(ZipArchiveEntry entry) - { - // Data descriptor formats (all start after entry data): - // 32-bit with sig: sig(4) + crc(4) + comp(4) + uncomp(4) = 16 bytes - // 32-bit no sig: crc(4) + comp(4) + uncomp(4) = 12 bytes - // 64-bit with sig: sig(4) + crc(4) + comp(8) + uncomp(8) = 24 bytes - // 64-bit no sig: crc(4) + comp(8) + uncomp(8) = 20 bytes - // Read the maximum (24 bytes), determine format, seek back the unused portion. - // - // Note: When the archive stream is a ReadAheadStream (non-seekable ForwardRead), - // Seek(negative, Current) uses a limited history buffer (default 8KB). - // The small rewinds here (at most 12 bytes) are well within that limit. - - Span buf = stackalloc byte[24]; - _archiveStream.ReadExactly(buf); - - int pos = 0; - uint firstWord = BinaryPrimitives.ReadUInt32LittleEndian(buf); - if (firstWord == 0x08074B50) - pos = 4; // skip signature - - uint crc32 = BinaryPrimitives.ReadUInt32LittleEndian(buf.Slice(pos)); - pos += 4; - - // Probe 32-bit: if the 4 bytes after comp(4)+uncomp(4) are a known signature, it's 32-bit. - uint afterThirtyTwo = BinaryPrimitives.ReadUInt32LittleEndian(buf.Slice(pos + 8)); - if (IsKnownZipSignature(afterThirtyTwo)) - { - uint compressedSize = BinaryPrimitives.ReadUInt32LittleEndian(buf.Slice(pos)); - uint uncompressedSize = BinaryPrimitives.ReadUInt32LittleEndian(buf.Slice(pos + 4)); - entry.UpdateFromDataDescriptor(crc32, compressedSize, uncompressedSize); - int consumed = pos + 8; - _archiveStream.Seek(consumed - 24, SeekOrigin.Current); + if (prev.HasDataDescriptor) + { + var (crc32, _, uncompressedSize) = useAsync + ? await ZipLocalFileHeader.ReadDataDescriptorAsync(_archiveStream, prev.IsZip64SizeFields, cancellationToken).ConfigureAwait(false) + : ZipLocalFileHeader.ReadDataDescriptor(_archiveStream, prev.IsZip64SizeFields); + + if (crcResult is { } actual) + { + if (actual.Crc32 != crc32) + throw new InvalidDataException(SR.CrcMismatch); + if (actual.BytesRead != uncompressedSize) + throw new InvalidDataException(SR.UnexpectedStreamLength); + } + } } - else + else if (prev.HasDataDescriptor) { - long compressedSize = BinaryPrimitives.ReadInt64LittleEndian(buf.Slice(pos)); - long uncompressedSize = BinaryPrimitives.ReadInt64LittleEndian(buf.Slice(pos + 8)); - entry.UpdateFromDataDescriptor(crc32, compressedSize, uncompressedSize); - int consumed = pos + 16; - if (consumed < 24) - _archiveStream.Seek(consumed - 24, SeekOrigin.Current); + if (useAsync) + await ZipLocalFileHeader.ReadDataDescriptorAsync(_archiveStream, prev.IsZip64SizeFields, cancellationToken).ConfigureAwait(false); + else + ZipLocalFileHeader.ReadDataDescriptor(_archiveStream, prev.IsZip64SizeFields); } + + _forwardReadPreviousEntry = null; } - private async ValueTask ReadDataDescriptorAsync(ZipArchiveEntry entry, CancellationToken cancellationToken) + private Stream? BuildForwardReadDataStream(ZipLocalFileHeader.ForwardReadHeaderData data) { - byte[] buf = new byte[24]; - await _archiveStream.ReadExactlyAsync(buf, cancellationToken).ConfigureAwait(false); + bool isDirectory = data.FullName.Length > 0 && + (data.FullName[^1] == '/' || data.FullName[^1] == '\\'); + bool isEmptyEntry = !data.HasDataDescriptor && data.CompressedSize == 0 && data.UncompressedSize == 0; - int pos = 0; - uint firstWord = BinaryPrimitives.ReadUInt32LittleEndian(buf); - if (firstWord == 0x08074B50) - pos = 4; + if (isDirectory || isEmptyEntry) + return null; - uint crc32 = BinaryPrimitives.ReadUInt32LittleEndian(buf.AsSpan(pos)); - pos += 4; + if (data.CompressionMethod != ZipCompressionMethod.Stored && + data.CompressionMethod != ZipCompressionMethod.Deflate && + data.CompressionMethod != ZipCompressionMethod.Deflate64) + { + throw new InvalidDataException(SR.UnsupportedCompression); + } - uint afterThirtyTwo = BinaryPrimitives.ReadUInt32LittleEndian(buf.AsSpan(pos + 8)); - if (IsKnownZipSignature(afterThirtyTwo)) + if (data.HasDataDescriptor) { - uint compressedSize = BinaryPrimitives.ReadUInt32LittleEndian(buf.AsSpan(pos)); - uint uncompressedSize = BinaryPrimitives.ReadUInt32LittleEndian(buf.AsSpan(pos + 4)); - entry.UpdateFromDataDescriptor(crc32, compressedSize, uncompressedSize); - int consumed = pos + 8; - _archiveStream.Seek(consumed - 24, SeekOrigin.Current); + Stream decompressor = CreateForwardReadDecompressor(_archiveStream, data.CompressionMethod, -1, leaveOpen: true); + + return new CrcValidatingReadStream(decompressor, expectedCrc: 0, expectedLength: long.MaxValue); } - else + + if (data.IsEncrypted) { - long compressedSize = BinaryPrimitives.ReadInt64LittleEndian(buf.AsSpan(pos)); - long uncompressedSize = BinaryPrimitives.ReadInt64LittleEndian(buf.AsSpan(pos + 8)); - entry.UpdateFromDataDescriptor(crc32, compressedSize, uncompressedSize); - int consumed = pos + 16; - if (consumed < 24) - _archiveStream.Seek(consumed - 24, SeekOrigin.Current); + return new SubReadStream(_archiveStream, _archiveStream.Position, data.CompressedSize); } - } - private static bool IsKnownZipSignature(uint sig) - { - return sig == 0x04034B50 // Local file header - || sig == 0x02014B50 // Central directory - || sig == 0x06054B50 // EOCD - || sig == 0x06064B50; // Zip64 EOCD + Stream bounded = new SubReadStream(_archiveStream, _archiveStream.Position, data.CompressedSize); + Stream decompressor2 = CreateForwardReadDecompressor(bounded, data.CompressionMethod, data.UncompressedSize, leaveOpen: false); + + return new CrcValidatingReadStream(decompressor2, data.Crc32, data.UncompressedSize); } private static Stream CreateForwardReadDecompressor(Stream source, ZipCompressionMethod compressionMethod, long uncompressedSize, bool leaveOpen) @@ -549,164 +563,12 @@ private static Stream CreateForwardReadDecompressor(Stream source, ZipCompressio { ZipCompressionMethod.Deflate when leaveOpen => new DeflateStream(source, CompressionMode.Decompress, leaveOpen: true), ZipCompressionMethod.Deflate => new DeflateStream(source, CompressionMode.Decompress, uncompressedSize), + ZipCompressionMethod.Deflate64 when leaveOpen => new DeflateManagedStream(source, ZipCompressionMethod.Deflate64, -1), ZipCompressionMethod.Deflate64 => new DeflateManagedStream(source, ZipCompressionMethod.Deflate64, uncompressedSize), _ => source, }; } - private ZipArchiveEntry? ReadNextLocalFileHeader() - { - const int HeaderSize = ZipLocalFileHeader.SizeOfLocalHeader; - Span header = stackalloc byte[HeaderSize]; - - int bytesRead = _archiveStream.ReadAtLeast(header, HeaderSize, throwOnEndOfStream: false); - if (bytesRead < HeaderSize) - { - _forwardReadReachedEnd = true; - return null; - } - - // Check signature - if (!header.Slice(0, 4).SequenceEqual(ZipLocalFileHeader.SignatureConstantBytes)) - { - uint sig = BinaryPrimitives.ReadUInt32LittleEndian(header); - if (IsKnownZipSignature(sig)) - { - _forwardReadReachedEnd = true; - return null; - } - throw new InvalidDataException(SR.ForwardReadInvalidLocalFileHeader); - } - - return ParseLocalFileHeader(header); - } - - private async ValueTask ReadNextLocalFileHeaderAsync(CancellationToken cancellationToken) - { - const int HeaderSize = ZipLocalFileHeader.SizeOfLocalHeader; - byte[] header = new byte[HeaderSize]; - - int bytesRead = await _archiveStream.ReadAtLeastAsync(header, HeaderSize, throwOnEndOfStream: false, cancellationToken).ConfigureAwait(false); - if (bytesRead < HeaderSize) - { - _forwardReadReachedEnd = true; - return null; - } - - if (!header.AsSpan(0, 4).SequenceEqual(ZipLocalFileHeader.SignatureConstantBytes)) - { - uint sig = BinaryPrimitives.ReadUInt32LittleEndian(header); - if (IsKnownZipSignature(sig)) - { - _forwardReadReachedEnd = true; - return null; - } - throw new InvalidDataException(SR.ForwardReadInvalidLocalFileHeader); - } - - return ParseLocalFileHeader(header); - } - - private ZipArchiveEntry ParseLocalFileHeader(ReadOnlySpan header) - { - ushort versionNeeded = BinaryPrimitives.ReadUInt16LittleEndian(header.Slice(ZipLocalFileHeader.FieldLocations.VersionNeededToExtract)); - ushort generalPurposeBitFlags = BinaryPrimitives.ReadUInt16LittleEndian(header.Slice(ZipLocalFileHeader.FieldLocations.GeneralPurposeBitFlags)); - ushort compressionMethodValue = BinaryPrimitives.ReadUInt16LittleEndian(header.Slice(ZipLocalFileHeader.FieldLocations.CompressionMethod)); - uint lastModified = BinaryPrimitives.ReadUInt32LittleEndian(header.Slice(ZipLocalFileHeader.FieldLocations.LastModified)); - uint crc32 = BinaryPrimitives.ReadUInt32LittleEndian(header.Slice(ZipLocalFileHeader.FieldLocations.Crc32)); - uint compressedSizeSmall = BinaryPrimitives.ReadUInt32LittleEndian(header.Slice(ZipLocalFileHeader.FieldLocations.CompressedSize)); - uint uncompressedSizeSmall = BinaryPrimitives.ReadUInt32LittleEndian(header.Slice(ZipLocalFileHeader.FieldLocations.UncompressedSize)); - ushort filenameLength = BinaryPrimitives.ReadUInt16LittleEndian(header.Slice(ZipLocalFileHeader.FieldLocations.FilenameLength)); - ushort extraFieldLength = BinaryPrimitives.ReadUInt16LittleEndian(header.Slice(ZipLocalFileHeader.FieldLocations.ExtraFieldLength)); - - // Read filename - byte[] filenameBytes = new byte[filenameLength]; - if (filenameLength > 0) - _archiveStream.ReadExactly(filenameBytes); - - // Read extra field - byte[] extraFieldBytes = new byte[extraFieldLength]; - if (extraFieldLength > 0) - _archiveStream.ReadExactly(extraFieldBytes); - - long compressedSize = compressedSizeSmall; - long uncompressedSize = uncompressedSizeSmall; - - // Handle Zip64 extra field - if (compressedSizeSmall == ZipHelper.Mask32Bit || uncompressedSizeSmall == ZipHelper.Mask32Bit) - { - Zip64ExtraField zip64 = Zip64ExtraField.GetJustZip64Block(extraFieldBytes, - readUncompressedSize: uncompressedSizeSmall == ZipHelper.Mask32Bit, - readCompressedSize: compressedSizeSmall == ZipHelper.Mask32Bit, - readLocalHeaderOffset: false, - readStartDiskNumber: false); - - if (zip64.UncompressedSize.HasValue) - uncompressedSize = zip64.UncompressedSize.Value; - if (zip64.CompressedSize.HasValue) - compressedSize = zip64.CompressedSize.Value; - } - - bool hasDataDescriptor = (generalPurposeBitFlags & (ushort)ZipArchiveEntry.BitFlagValues.DataDescriptor) != 0; - bool isEncrypted = (generalPurposeBitFlags & (ushort)ZipArchiveEntry.BitFlagValues.IsEncrypted) != 0; - ZipCompressionMethod compressionMethod = (ZipCompressionMethod)compressionMethodValue; - - // Decode entry name - bool isUtf8 = (generalPurposeBitFlags & (ushort)ZipArchiveEntry.BitFlagValues.UnicodeFileNameAndComment) != 0; - Encoding nameEncoding = isUtf8 ? Encoding.UTF8 : (EntryNameAndCommentEncoding ?? Encoding.UTF8); - string fullName = nameEncoding.GetString(filenameBytes); - - DateTimeOffset lastModifiedDto = new DateTimeOffset(ZipHelper.DosTimeToDateTime(lastModified)); - - // Handle unsupported combinations - if (hasDataDescriptor) - { - if (compressionMethod == ZipCompressionMethod.Stored) - throw new NotSupportedException(SR.ForwardReadStoredDataDescriptorNotSupported); - if (isEncrypted) - throw new NotSupportedException(SR.ForwardReadEncryptedDataDescriptorNotSupported); - } - - // Build the data stream - Stream? dataStream = null; - - bool isDirectory = fullName.Length > 0 && - (fullName[^1] == '/' || fullName[^1] == '\\'); - bool isEmptyEntry = !hasDataDescriptor && compressedSize == 0 && uncompressedSize == 0; - - if (!isDirectory && !isEmptyEntry) - { - if (hasDataDescriptor) - { - // Data descriptor: unknown size, let DeflateStream detect end. - // Because ReadAheadStream.CanSeek returns true, DeflateStream will - // automatically rewind unconsumed bytes after decompression finishes, - // leaving the archive stream positioned right after the compressed data. - Stream decompressor = CreateForwardReadDecompressor(_archiveStream, compressionMethod, -1, leaveOpen: true); - dataStream = new CrcValidatingReadStream(decompressor, expectedCrc: 0, expectedLength: long.MaxValue); - } - else if (isEncrypted) - { - // Encrypted without data descriptor: return bounded raw stream (no decryption) - dataStream = new BoundedReadOnlyStream(_archiveStream, compressedSize); - } - else - { - // Known size, not encrypted - Stream bounded = new BoundedReadOnlyStream(_archiveStream, compressedSize); - Stream decompressor = CreateForwardReadDecompressor(bounded, compressionMethod, uncompressedSize, leaveOpen: false); - dataStream = new CrcValidatingReadStream(decompressor, crc32, uncompressedSize); - } - } - - var entry = new ZipArchiveEntry(this, fullName, compressionMethod, lastModifiedDto, - crc32, compressedSize, uncompressedSize, generalPurposeBitFlags, versionNeeded, - hasDataDescriptor, dataStream); - - _forwardReadPreviousEntry = entry; - return entry; - } - internal Stream ArchiveStream => _archiveStream; internal uint NumberOfThisDisk => _numberOfThisDisk; diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs index e44acd6ee3e4f2..21909f88a923c4 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs @@ -47,9 +47,16 @@ public partial class ZipArchiveEntry private byte[]? _lhTrailingExtraFieldData; private byte[] _fileComment; private readonly CompressionLevel _compressionLevel; + + // Forward-read fields: + // _hasDataDescriptor: permanent flag from the local file header indicating sizes/CRC + // are in a trailing data descriptor. When true, Crc32/CompressedLength/Length always throw. + // _isZip64SizeFields: whether the data descriptor uses 64-bit sizes, + // determined by whether the local header had 0xFFFFFFFF size markers. private Stream? _forwardReadDataStream; private bool _forwardReadStreamOpened; private bool _hasDataDescriptor; + private bool _isZip64SizeFields; // Initializes a ZipArchiveEntry instance for an existing archive entry. internal ZipArchiveEntry(ZipArchive archive, ZipCentralDirectoryFileHeader cd) @@ -163,26 +170,25 @@ internal ZipArchiveEntry(ZipArchive archive, string entryName) Changes = ZipArchive.ChangeState.Unchanged; } - // Initializes a ZipArchiveEntry instance for forward-read mode from local file header data. - internal ZipArchiveEntry(ZipArchive archive, string fullName, ZipCompressionMethod compressionMethod, - DateTimeOffset lastModified, uint crc32, long compressedSize, long uncompressedSize, - ushort generalPurposeBitFlags, ushort versionNeeded, bool hasDataDescriptor, Stream? dataStream) + // Initializes a ZipArchiveEntry instance for forward-read mode from parsed local file header data. + internal ZipArchiveEntry(ZipArchive archive, ZipLocalFileHeader.ForwardReadHeaderData headerData, Stream? dataStream) { _archive = archive; _originallyInArchive = true; - _hasDataDescriptor = hasDataDescriptor; + _hasDataDescriptor = headerData.HasDataDescriptor; + _isZip64SizeFields = headerData.IsZip64SizeFields; _diskNumberStart = 0; _versionMadeByPlatform = CurrentZipPlatform; - _versionMadeBySpecification = (ZipVersionNeededValues)versionNeeded; - _versionToExtract = (ZipVersionNeededValues)versionNeeded; - _generalPurposeBitFlag = (BitFlagValues)generalPurposeBitFlags; - _isEncrypted = (_generalPurposeBitFlag & BitFlagValues.IsEncrypted) != 0; - _storedCompressionMethod = compressionMethod; - _lastModified = lastModified; - _compressedSize = compressedSize; - _uncompressedSize = uncompressedSize; - _crc32 = crc32; + _versionMadeBySpecification = (ZipVersionNeededValues)headerData.VersionNeeded; + _versionToExtract = (ZipVersionNeededValues)headerData.VersionNeeded; + _generalPurposeBitFlag = (BitFlagValues)headerData.GeneralPurposeBitFlags; + _isEncrypted = headerData.IsEncrypted; + _storedCompressionMethod = headerData.CompressionMethod; + _lastModified = headerData.LastModified; + _compressedSize = headerData.CompressedSize; + _uncompressedSize = headerData.UncompressedSize; + _crc32 = headerData.Crc32; _offsetOfLocalHeader = 0; _storedOffsetOfCompressedData = null; _externalFileAttr = 0; @@ -193,16 +199,14 @@ internal ZipArchiveEntry(ZipArchive archive, string fullName, ZipCompressionMeth _everOpenedForWrite = false; _outstandingWriteStream = null; - _storedEntryNameBytes = (_generalPurposeBitFlag & BitFlagValues.UnicodeFileNameAndComment) != 0 - ? Encoding.UTF8.GetBytes(fullName) - : (archive.EntryNameAndCommentEncoding ?? Encoding.UTF8).GetBytes(fullName); - _storedEntryName = fullName; + _storedEntryNameBytes = headerData.FilenameBytes.ToArray(); + _storedEntryName = headerData.FullName; _cdUnknownExtraFields = null; _lhUnknownExtraFields = null; _fileComment = Array.Empty(); - _compressionLevel = MapCompressionLevel(_generalPurposeBitFlag, compressionMethod); + _compressionLevel = MapCompressionLevel(_generalPurposeBitFlag, headerData.CompressionMethod); _forwardReadDataStream = dataStream; Changes = ZipArchive.ChangeState.Unchanged; @@ -214,7 +218,15 @@ internal ZipArchiveEntry(ZipArchive archive, string fullName, ZipCompressionMeth public ZipArchive Archive => _archive; [CLSCompliant(false)] - public uint Crc32 => _crc32; + public uint Crc32 + { + get + { + if (_hasDataDescriptor && _archive.Mode == ZipArchiveMode.ForwardRead) + throw new InvalidOperationException(SR.ForwardReadMetadataNotYetAvailable); + return _crc32; + } + } /// /// Gets a value that indicates whether the entry is encrypted. @@ -225,12 +237,7 @@ internal ZipArchiveEntry(ZipArchive archive, string fullName, ZipCompressionMeth internal bool HasDataDescriptor => _hasDataDescriptor; - internal void UpdateFromDataDescriptor(uint crc32, long compressedSize, long uncompressedSize) - { - _crc32 = crc32; - _compressedSize = compressedSize; - _uncompressedSize = uncompressedSize; - } + internal bool IsZip64SizeFields => _isZip64SizeFields; /// /// Gets the compression method used to compress the entry. @@ -256,6 +263,8 @@ public long CompressedLength { get { + if (_hasDataDescriptor && _archive.Mode == ZipArchiveMode.ForwardRead) + throw new InvalidOperationException(SR.ForwardReadMetadataNotYetAvailable); if (_everOpenedForWrite) throw new InvalidOperationException(SR.LengthAfterWrite); return _compressedSize; @@ -376,6 +385,8 @@ public long Length { get { + if (_hasDataDescriptor && _archive.Mode == ZipArchiveMode.ForwardRead) + throw new InvalidOperationException(SR.ForwardReadMetadataNotYetAvailable); if (_everOpenedForWrite) throw new InvalidOperationException(SR.LengthAfterWrite); return _uncompressedSize; @@ -918,8 +929,10 @@ private CrcValidatingReadStream OpenInReadModeGetDataCompressor(long offsetOfCom private WrappedStream OpenInForwardReadMode() { + if (_isEncrypted) + throw new NotSupportedException(SR.ForwardReadEncryptedNotSupported); if (_forwardReadDataStream is null) - throw new InvalidDataException(SR.LocalFileHeaderCorrupt); + throw new InvalidOperationException(SR.ForwardReadNoDataStream); if (_forwardReadStreamOpened) throw new IOException(SR.ForwardReadOnly); diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.Async.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.Async.cs index fea75ba16db328..a67db42fc743d8 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.Async.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.Async.cs @@ -2,8 +2,10 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Buffers; +using System.Buffers.Binary; using System.Collections.Generic; using System.Diagnostics; +using System.Text; using System.Threading; using System.Threading.Tasks; @@ -156,6 +158,127 @@ public static async Task TrySkipBlockAsync(Stream stream, CancellationToke bytesRead = await stream.ReadAtLeastAsync(blockBytes, blockBytes.Length, throwOnEndOfStream: false, cancellationToken).ConfigureAwait(false); return TrySkipBlockFinalize(stream, blockBytes, bytesRead); } + + /// + /// Async variant of . + /// + internal static async ValueTask TryReadForForwardReadAsync(Stream stream, Encoding? entryNameEncoding, CancellationToken cancellationToken) + { + byte[] header = new byte[SizeOfLocalHeader]; + int bytesRead = await stream.ReadAtLeastAsync(header, SizeOfLocalHeader, throwOnEndOfStream: false, cancellationToken).ConfigureAwait(false); + + if (bytesRead == 0) + return null; + if (bytesRead < SizeOfLocalHeader) + { + if (bytesRead >= FieldLengths.Signature && IsEndOfEntriesSignature(BinaryPrimitives.ReadUInt32LittleEndian(header))) + return null; + throw new InvalidDataException(SR.ForwardReadInvalidLocalFileHeader); + } + + if (!header.AsSpan(0, FieldLengths.Signature).SequenceEqual(SignatureConstantBytes)) + { + uint sig = BinaryPrimitives.ReadUInt32LittleEndian(header); + if (IsEndOfEntriesSignature(sig)) + return null; + throw new InvalidDataException(SR.ForwardReadInvalidLocalFileHeader); + } + + return await ParseForwardReadHeaderAsync(header, stream, entryNameEncoding, cancellationToken).ConfigureAwait(false); + + static async ValueTask ParseForwardReadHeaderAsync(byte[] header, Stream stream, Encoding? entryNameEncoding, CancellationToken cancellationToken) + { + ushort versionNeeded = BinaryPrimitives.ReadUInt16LittleEndian(header.AsSpan(FieldLocations.VersionNeededToExtract)); + ushort generalPurposeBitFlags = BinaryPrimitives.ReadUInt16LittleEndian(header.AsSpan(FieldLocations.GeneralPurposeBitFlags)); + ushort compressionMethodValue = BinaryPrimitives.ReadUInt16LittleEndian(header.AsSpan(FieldLocations.CompressionMethod)); + uint lastModified = BinaryPrimitives.ReadUInt32LittleEndian(header.AsSpan(FieldLocations.LastModified)); + uint crc32 = BinaryPrimitives.ReadUInt32LittleEndian(header.AsSpan(FieldLocations.Crc32)); + uint compressedSizeSmall = BinaryPrimitives.ReadUInt32LittleEndian(header.AsSpan(FieldLocations.CompressedSize)); + uint uncompressedSizeSmall = BinaryPrimitives.ReadUInt32LittleEndian(header.AsSpan(FieldLocations.UncompressedSize)); + ushort filenameLength = BinaryPrimitives.ReadUInt16LittleEndian(header.AsSpan(FieldLocations.FilenameLength)); + ushort extraFieldLength = BinaryPrimitives.ReadUInt16LittleEndian(header.AsSpan(FieldLocations.ExtraFieldLength)); + + byte[] filenameBytes = new byte[filenameLength]; + if (filenameLength > 0) + await stream.ReadExactlyAsync(filenameBytes, cancellationToken).ConfigureAwait(false); + + byte[] extraFieldBytes = new byte[extraFieldLength]; + if (extraFieldLength > 0) + await stream.ReadExactlyAsync(extraFieldBytes, cancellationToken).ConfigureAwait(false); + + long compressedSize = compressedSizeSmall; + long uncompressedSize = uncompressedSizeSmall; + bool isZip64SizeFields = false; + + if (compressedSizeSmall == ZipHelper.Mask32Bit || uncompressedSizeSmall == ZipHelper.Mask32Bit) + { + isZip64SizeFields = true; + Zip64ExtraField zip64 = Zip64ExtraField.GetJustZip64Block(extraFieldBytes, + readUncompressedSize: uncompressedSizeSmall == ZipHelper.Mask32Bit, + readCompressedSize: compressedSizeSmall == ZipHelper.Mask32Bit, + readLocalHeaderOffset: false, + readStartDiskNumber: false); + + if (zip64.UncompressedSize.HasValue) + uncompressedSize = zip64.UncompressedSize.Value; + if (zip64.CompressedSize.HasValue) + compressedSize = zip64.CompressedSize.Value; + } + + bool isUtf8 = (generalPurposeBitFlags & (ushort)ZipArchiveEntry.BitFlagValues.UnicodeFileNameAndComment) != 0; + Encoding nameEncoding = isUtf8 ? Encoding.UTF8 : (entryNameEncoding ?? Encoding.UTF8); + string fullName = nameEncoding.GetString(filenameBytes); + + DateTimeOffset lastModifiedDto = new DateTimeOffset(ZipHelper.DosTimeToDateTime(lastModified)); + + return new ForwardReadHeaderData( + versionNeeded, generalPurposeBitFlags, (ZipCompressionMethod)compressionMethodValue, + lastModifiedDto, crc32, compressedSize, uncompressedSize, + fullName, filenameBytes, isZip64SizeFields); + } + } + + /// + /// Async variant of . + /// + internal static async ValueTask<(uint Crc32, long CompressedSize, long UncompressedSize)> ReadDataDescriptorAsync(Stream stream, bool isZip64, CancellationToken cancellationToken) + { + byte[] firstFour = new byte[4]; + await stream.ReadExactlyAsync(firstFour, cancellationToken).ConfigureAwait(false); + + uint firstWord = BinaryPrimitives.ReadUInt32LittleEndian(firstFour); + bool hasSignature = firstWord == 0x08074B50; + + int remainingSize = (hasSignature ? 4 : 0) + (isZip64 ? 16 : 8); + byte[] remaining = new byte[remainingSize]; + await stream.ReadExactlyAsync(remaining, cancellationToken).ConfigureAwait(false); + + int pos = 0; + uint crc32; + if (hasSignature) + { + crc32 = BinaryPrimitives.ReadUInt32LittleEndian(remaining.AsSpan(pos)); + pos += 4; + } + else + { + crc32 = firstWord; + } + + long compressedSize, uncompressedSize; + if (isZip64) + { + compressedSize = BinaryPrimitives.ReadInt64LittleEndian(remaining.AsSpan(pos)); + uncompressedSize = BinaryPrimitives.ReadInt64LittleEndian(remaining.AsSpan(pos + 8)); + } + else + { + compressedSize = BinaryPrimitives.ReadUInt32LittleEndian(remaining.AsSpan(pos)); + uncompressedSize = BinaryPrimitives.ReadUInt32LittleEndian(remaining.AsSpan(pos + 4)); + } + + return (crc32, compressedSize, uncompressedSize); + } } internal sealed partial class ZipCentralDirectoryFileHeader diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs index 5e70cf29fc5eaa..4f8a704b47dd5c 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs @@ -7,6 +7,7 @@ using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Runtime.InteropServices; +using System.Text; using System.Threading; using System.Threading.Tasks; @@ -670,6 +671,160 @@ public static bool TrySkipBlock(Stream stream) bytesRead = stream.ReadAtLeast(blockBytes, blockBytes.Length, throwOnEndOfStream: false); return TrySkipBlockFinalize(stream, blockBytes, bytesRead); } + + /// + /// Parsed data from a local file header for forward-read mode. + /// + internal readonly struct ForwardReadHeaderData( + ushort versionNeeded, ushort generalPurposeBitFlags, ZipCompressionMethod compressionMethod, + DateTimeOffset lastModified, uint crc32, long compressedSize, long uncompressedSize, + string fullName, ReadOnlyMemory filenameBytes, bool isZip64SizeFields) + { + internal readonly ushort VersionNeeded = versionNeeded; + internal readonly ushort GeneralPurposeBitFlags = generalPurposeBitFlags; + internal readonly ZipCompressionMethod CompressionMethod = compressionMethod; + internal readonly DateTimeOffset LastModified = lastModified; + internal readonly uint Crc32 = crc32; + internal readonly long CompressedSize = compressedSize; + internal readonly long UncompressedSize = uncompressedSize; + internal readonly string FullName = fullName; + internal readonly ReadOnlyMemory FilenameBytes = filenameBytes; + internal readonly bool IsZip64SizeFields = isZip64SizeFields; + + internal bool HasDataDescriptor => (GeneralPurposeBitFlags & 0x0008) != 0; + internal bool IsEncrypted => (GeneralPurposeBitFlags & 0x0001) != 0; + } + + /// + /// Returns true if the given signature indicates the end of local file entries + /// (central directory, EOCD, or Zip64 EOCD). + /// + internal static bool IsEndOfEntriesSignature(uint sig) => + sig == 0x02014B50 // Central directory + || sig == 0x06054B50 // EOCD + || sig == 0x06064B50; // Zip64 EOCD + + /// + /// Tries to read a local file header for forward-read mode. + /// Returns null if EOF is reached or a non-local-header signature is encountered. + /// + internal static ForwardReadHeaderData? TryReadForForwardRead(Stream stream, Encoding? entryNameEncoding) + { + Span header = stackalloc byte[SizeOfLocalHeader]; + int bytesRead = stream.ReadAtLeast(header, SizeOfLocalHeader, throwOnEndOfStream: false); + + if (bytesRead == 0) + return null; + if (bytesRead < SizeOfLocalHeader) + { + if (bytesRead >= FieldLengths.Signature && IsEndOfEntriesSignature(BinaryPrimitives.ReadUInt32LittleEndian(header))) + return null; + throw new InvalidDataException(SR.ForwardReadInvalidLocalFileHeader); + } + + if (!header[..FieldLengths.Signature].SequenceEqual(SignatureConstantBytes)) + { + uint sig = BinaryPrimitives.ReadUInt32LittleEndian(header); + if (IsEndOfEntriesSignature(sig)) + return null; + throw new InvalidDataException(SR.ForwardReadInvalidLocalFileHeader); + } + + return ParseForwardReadHeader(header, stream, entryNameEncoding); + + static ForwardReadHeaderData ParseForwardReadHeader(ReadOnlySpan header, Stream stream, Encoding? entryNameEncoding) + { + ushort versionNeeded = BinaryPrimitives.ReadUInt16LittleEndian(header[FieldLocations.VersionNeededToExtract..]); + ushort generalPurposeBitFlags = BinaryPrimitives.ReadUInt16LittleEndian(header[FieldLocations.GeneralPurposeBitFlags..]); + ushort compressionMethodValue = BinaryPrimitives.ReadUInt16LittleEndian(header[FieldLocations.CompressionMethod..]); + uint lastModified = BinaryPrimitives.ReadUInt32LittleEndian(header[FieldLocations.LastModified..]); + uint crc32 = BinaryPrimitives.ReadUInt32LittleEndian(header[FieldLocations.Crc32..]); + uint compressedSizeSmall = BinaryPrimitives.ReadUInt32LittleEndian(header[FieldLocations.CompressedSize..]); + uint uncompressedSizeSmall = BinaryPrimitives.ReadUInt32LittleEndian(header[FieldLocations.UncompressedSize..]); + ushort filenameLength = BinaryPrimitives.ReadUInt16LittleEndian(header[FieldLocations.FilenameLength..]); + ushort extraFieldLength = BinaryPrimitives.ReadUInt16LittleEndian(header[FieldLocations.ExtraFieldLength..]); + + byte[] filenameBytes = new byte[filenameLength]; + if (filenameLength > 0) + stream.ReadExactly(filenameBytes); + + byte[] extraFieldBytes = new byte[extraFieldLength]; + if (extraFieldLength > 0) + stream.ReadExactly(extraFieldBytes); + + long compressedSize = compressedSizeSmall; + long uncompressedSize = uncompressedSizeSmall; + bool isZip64SizeFields = false; + + if (compressedSizeSmall == ZipHelper.Mask32Bit || uncompressedSizeSmall == ZipHelper.Mask32Bit) + { + isZip64SizeFields = true; + Zip64ExtraField zip64 = Zip64ExtraField.GetJustZip64Block(extraFieldBytes, + readUncompressedSize: uncompressedSizeSmall == ZipHelper.Mask32Bit, + readCompressedSize: compressedSizeSmall == ZipHelper.Mask32Bit, + readLocalHeaderOffset: false, + readStartDiskNumber: false); + + if (zip64.UncompressedSize.HasValue) + uncompressedSize = zip64.UncompressedSize.Value; + if (zip64.CompressedSize.HasValue) + compressedSize = zip64.CompressedSize.Value; + } + + bool isUtf8 = (generalPurposeBitFlags & (ushort)ZipArchiveEntry.BitFlagValues.UnicodeFileNameAndComment) != 0; + Encoding nameEncoding = isUtf8 ? Encoding.UTF8 : (entryNameEncoding ?? Encoding.UTF8); + string fullName = nameEncoding.GetString(filenameBytes); + + DateTimeOffset lastModifiedDto = new DateTimeOffset(ZipHelper.DosTimeToDateTime(lastModified)); + + return new ForwardReadHeaderData( + versionNeeded, generalPurposeBitFlags, (ZipCompressionMethod)compressionMethodValue, + lastModifiedDto, crc32, compressedSize, uncompressedSize, + fullName, filenameBytes, isZip64SizeFields); + } + } + + /// + /// Reads a data descriptor using signature-first parsing. No seek operations. + /// + internal static (uint Crc32, long CompressedSize, long UncompressedSize) ReadDataDescriptor(Stream stream, bool isZip64) + { + Span firstFour = stackalloc byte[4]; + stream.ReadExactly(firstFour); + + uint firstWord = BinaryPrimitives.ReadUInt32LittleEndian(firstFour); + bool hasSignature = firstWord == 0x08074B50; + + int remainingSize = (hasSignature ? 4 : 0) + (isZip64 ? 16 : 8); + Span remaining = stackalloc byte[20]; // max: sig-CRC(4) + comp(8) + uncomp(8) = 20 + stream.ReadExactly(remaining[..remainingSize]); + + int pos = 0; + uint crc32; + if (hasSignature) + { + crc32 = BinaryPrimitives.ReadUInt32LittleEndian(remaining[pos..]); + pos += 4; + } + else + { + crc32 = firstWord; + } + + long compressedSize, uncompressedSize; + if (isZip64) + { + compressedSize = BinaryPrimitives.ReadInt64LittleEndian(remaining[pos..]); + uncompressedSize = BinaryPrimitives.ReadInt64LittleEndian(remaining[(pos + 8)..]); + } + else + { + compressedSize = BinaryPrimitives.ReadUInt32LittleEndian(remaining[pos..]); + uncompressedSize = BinaryPrimitives.ReadUInt32LittleEndian(remaining[(pos + 4)..]); + } + + return (crc32, compressedSize, uncompressedSize); + } } internal sealed partial class ZipCentralDirectoryFileHeader diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs index ddd3af7a9f9c36..13a340f1cc5929 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipCustomStreams.cs @@ -735,6 +735,13 @@ public CrcValidatingReadStream(Stream baseStream, uint expectedCrc, long expecte _runningCrc = 0; } + internal (uint Crc32, long BytesRead)? GetFinalCrcResult() + { + if (_crcAbandoned) + return null; + return (_runningCrc, _totalBytesRead); + } + public override bool CanRead => !_isDisposed && _baseStream.CanRead; public override bool CanSeek => !_isDisposed && _baseStream.CanSeek; public override bool CanWrite => false; @@ -968,102 +975,6 @@ public override async ValueTask DisposeAsync() } } - internal sealed class BoundedReadOnlyStream : Stream - { - private readonly Stream _baseStream; - private long _remaining; - private bool _isDisposed; - - public BoundedReadOnlyStream(Stream baseStream, long length) - { - _baseStream = baseStream; - _remaining = length; - } - - public override bool CanRead => !_isDisposed && _baseStream.CanRead; - public override bool CanSeek => false; - public override bool CanWrite => false; - public override long Length => throw new NotSupportedException(); - - public override long Position - { - get => throw new NotSupportedException(); - set => throw new NotSupportedException(); - } - - private void ThrowIfDisposed() - { - ObjectDisposedException.ThrowIf(_isDisposed, this); - } - - public override int Read(byte[] buffer, int offset, int count) - => Read(buffer.AsSpan(offset, count)); - - public override int Read(Span buffer) - { - ThrowIfDisposed(); - - if (_remaining <= 0) - { - return 0; - } - - if (buffer.Length > _remaining) - { - buffer = buffer.Slice(0, (int)_remaining); - } - - int bytesRead = _baseStream.Read(buffer); - _remaining -= bytesRead; - - return bytesRead; - } - - public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) - => ReadAsync(buffer.AsMemory(offset, count), cancellationToken).AsTask(); - - public override async ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = default) - { - ThrowIfDisposed(); - - if (_remaining <= 0) - { - return 0; - } - - if (buffer.Length > _remaining) - { - buffer = buffer.Slice(0, (int)_remaining); - } - - int bytesRead = await _baseStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false); - _remaining -= bytesRead; - - return bytesRead; - } - - public override void Flush() { } - public override Task FlushAsync(CancellationToken cancellationToken) => Task.CompletedTask; - public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException(); - public override void SetLength(long value) => throw new NotSupportedException(); - public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException(); - - // Does not dispose the base stream — BoundedReadOnlyStream is a window - // into the shared archive stream, which outlives individual entries. - protected override void Dispose(bool disposing) - { - _isDisposed = true; - base.Dispose(disposing); - } - - public override ValueTask DisposeAsync() - { - _isDisposed = true; - - return base.DisposeAsync(); - } - } - internal sealed class ReadAheadStream : Stream { private readonly Stream _baseStream; diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ForwardReadTests.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ForwardReadTests.cs index 6758aac84b2f13..d01834285afd77 100644 --- a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ForwardReadTests.cs +++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ForwardReadTests.cs @@ -22,96 +22,119 @@ static zip_ForwardReadTests() rng.NextBytes(s_largeContent); } + // ── Core reading scenarios ────────────────────────────────────────── + [Theory] [MemberData(nameof(Get_Booleans_Data))] - public async Task Read_DeflateWithKnownSize_ReturnsDecompressedData(bool async) + public async Task NonSeekableStream_ConsumeSkipConsume_ReadsCorrectly(bool async) { - byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); - byte[][] expectedContents = [s_smallContent, s_mediumContent, s_largeContent]; + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: false); + byte[][] expected = [s_smallContent, s_mediumContent, s_largeContent]; using MemoryStream archiveStream = new(zipBytes); - using ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead); + using WrappedStream nonSeekable = new(archiveStream, canRead: true, canWrite: false, canSeek: false, null); + using ZipArchive archive = new(nonSeekable, ZipArchiveMode.ForwardRead); - for (int i = 0; i < expectedContents.Length; i++) + // Consume first entry fully + ZipArchiveEntry? first = await GetNextEntry(archive, async); + Assert.NotNull(first); + using (Stream ds = first.Open()) { - ZipArchiveEntry? entry = await GetNextEntry(archive, async); + byte[] data = await ReadStreamFully(ds, async); + Assert.Equal(expected[0], data); + } - Assert.NotNull(entry); - Assert.Equal(ZipCompressionMethod.Deflate, entry.CompressionMethod); + // Skip second entry (don't open/read) + ZipArchiveEntry? second = await GetNextEntry(archive, async); + Assert.NotNull(second); + Assert.Equal("medium.bin", second.FullName); - using Stream dataStream = entry.Open(); - byte[] decompressed = await ReadStreamFully(dataStream, async); - Assert.Equal(expectedContents[i], decompressed); + // Consume third entry fully + ZipArchiveEntry? third = await GetNextEntry(archive, async); + Assert.NotNull(third); + using (Stream ds = third.Open()) + { + byte[] data = await ReadStreamFully(ds, async); + Assert.Equal(expected[2], data); } - ZipArchiveEntry? end = await GetNextEntry(archive, async); - Assert.Null(end); + // End of archive + Assert.Null(await GetNextEntry(archive, async)); } [Theory] [MemberData(nameof(Get_Booleans_Data))] - public async Task Read_StoredWithKnownSize_ReturnsUncompressedData(bool async) + public async Task SeekableStream_StoredEntries_ReadsCorrectly(bool async) { byte[] zipBytes = CreateZipWithEntries(CompressionLevel.NoCompression, seekable: true); - byte[][] expectedContents = [s_smallContent, s_mediumContent, s_largeContent]; + byte[][] expected = [s_smallContent, s_mediumContent, s_largeContent]; using MemoryStream archiveStream = new(zipBytes); using ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead); - for (int i = 0; i < expectedContents.Length; i++) + for (int i = 0; i < expected.Length; i++) { ZipArchiveEntry? entry = await GetNextEntry(archive, async); - Assert.NotNull(entry); Assert.Equal(ZipCompressionMethod.Stored, entry.CompressionMethod); - using Stream dataStream = entry.Open(); - byte[] decompressed = await ReadStreamFully(dataStream, async); - Assert.Equal(expectedContents[i], decompressed); + using Stream ds = entry.Open(); + Assert.Equal(expected[i], await ReadStreamFully(ds, async)); } } [Theory] [MemberData(nameof(Get_Booleans_Data))] - public async Task Read_DeflateWithDataDescriptor_ReturnsDecompressedData(bool async) + public async Task PartialRead_ThenAdvance_ReadsNextEntryCorrectly(bool async) { byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: false); - byte[][] expectedContents = [s_smallContent, s_mediumContent, s_largeContent]; using MemoryStream archiveStream = new(zipBytes); - using WrappedStream nonSeekableStream = new(archiveStream, canRead: true, canWrite: false, canSeek: false, null); - using ZipArchive archive = new(nonSeekableStream, ZipArchiveMode.ForwardRead); + using ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead); + + ZipArchiveEntry? first = await GetNextEntry(archive, async); + Assert.NotNull(first); - for (int i = 0; i < expectedContents.Length; i++) + // Only read a few bytes, don't finish + using (Stream ds = first.Open()) { - ZipArchiveEntry? entry = await GetNextEntry(archive, async); + byte[] partial = new byte[3]; + await ReadStream(ds, partial, async); + } - Assert.NotNull(entry); - Assert.Equal(ZipCompressionMethod.Deflate, entry.CompressionMethod); + // Next entry should still be readable + ZipArchiveEntry? second = await GetNextEntry(archive, async); + Assert.NotNull(second); + Assert.Equal("medium.bin", second.FullName); - using Stream dataStream = entry.Open(); - byte[] decompressed = await ReadStreamFully(dataStream, async); - Assert.Equal(expectedContents[i], decompressed); - } + using Stream ds2 = second.Open(); + Assert.Equal(s_mediumContent, await ReadStreamFully(ds2, async)); } [Theory] [MemberData(nameof(Get_Booleans_Data))] - public async Task Read_FromNonSeekableStream(bool async) + public async Task EmptyAndDirectoryEntries_HandleCorrectly(bool async) { - byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + using MemoryStream ms = new(); + using (ZipArchive create = new(ms, ZipArchiveMode.Create, leaveOpen: true)) + { + create.CreateEntry("mydir/"); + create.CreateEntry("empty.txt"); + } - using MemoryStream archiveStream = new(zipBytes); - using WrappedStream nonSeekableStream = new(archiveStream, canRead: true, canWrite: false, canSeek: false, null); - using ZipArchive archive = new(nonSeekableStream, ZipArchiveMode.ForwardRead); + ms.Position = 0; + using ZipArchive archive = new(ms, ZipArchiveMode.ForwardRead); - ZipArchiveEntry? entry = await GetNextEntry(archive, async); - Assert.NotNull(entry); - Assert.Equal("small.txt", entry.FullName); + ZipArchiveEntry? dir = await GetNextEntry(archive, async); + Assert.NotNull(dir); + Assert.Equal("mydir/", dir.FullName); - using Stream dataStream = entry.Open(); - byte[] decompressed = await ReadStreamFully(dataStream, async); - Assert.Equal(s_smallContent, decompressed); + ZipArchiveEntry? empty = await GetNextEntry(archive, async); + Assert.NotNull(empty); + Assert.Equal("empty.txt", empty.FullName); + Assert.Equal(0, empty.CompressedLength); + + Assert.Null(await GetNextEntry(archive, async)); } [Theory] @@ -124,41 +147,13 @@ public async Task EmptyArchive_ReturnsNull(bool async) ms.Position = 0; using ZipArchive archive = new(ms, ZipArchiveMode.ForwardRead); - ZipArchiveEntry? entry = await GetNextEntry(archive, async); - Assert.Null(entry); + Assert.Null(await GetNextEntry(archive, async)); } - [Theory] - [MemberData(nameof(Get_Booleans_Data))] - public async Task PartialRead_ThenGetNextEntry_AdvancesCorrectly(bool async) - { - byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); - - using MemoryStream archiveStream = new(zipBytes); - using ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead); - - ZipArchiveEntry? first = await GetNextEntry(archive, async); - Assert.NotNull(first); - - // Read only a few bytes - using (Stream ds = first.Open()) - { - byte[] partial = new byte[5]; - await ReadStream(ds, partial, async); - } - - ZipArchiveEntry? second = await GetNextEntry(archive, async); - - Assert.NotNull(second); - Assert.Equal("medium.bin", second.FullName); - - using Stream dataStream = second.Open(); - byte[] decompressed = await ReadStreamFully(dataStream, async); - Assert.Equal(s_mediumContent, decompressed); - } + // ── Unsupported feature guards ────────────────────────────────────── [Fact] - public void Entries_ThrowsNotSupportedException() + public void UnsupportedOperations_Throw() { byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); @@ -166,236 +161,122 @@ public void Entries_ThrowsNotSupportedException() using ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead); Assert.Throws(() => archive.Entries); - } - - [Fact] - public void GetEntry_ThrowsNotSupportedException() - { - byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); - - using MemoryStream archiveStream = new(zipBytes); - using ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead); - Assert.Throws(() => archive.GetEntry("small.txt")); + Assert.Throws(() => archive.CreateEntry("new.txt")); } [Fact] - public void CreateEntry_ThrowsNotSupportedException() + public void GetNextEntry_NotInForwardReadMode_Throws() { byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); using MemoryStream archiveStream = new(zipBytes); - using ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead); + using ZipArchive archive = new(archiveStream, ZipArchiveMode.Read); - Assert.Throws(() => archive.CreateEntry("new.txt")); + Assert.Throws(() => archive.GetNextEntry()); } [Theory] [MemberData(nameof(Get_Booleans_Data))] - public async Task GetNextEntry_AfterDispose_ThrowsObjectDisposedException(bool async) + public async Task StoredWithDataDescriptor_Throws(bool async) { - byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.NoCompression, seekable: false); using MemoryStream archiveStream = new(zipBytes); - ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead, leaveOpen: true); - - ZipArchiveEntry? entry = await GetNextEntry(archive, async); - Assert.NotNull(entry); - - archive.Dispose(); + using WrappedStream nonSeekable = new(archiveStream, canRead: true, canWrite: false, canSeek: false, null); + using ZipArchive archive = new(nonSeekable, ZipArchiveMode.ForwardRead); if (async) - { - await Assert.ThrowsAsync(() => archive.GetNextEntryAsync().AsTask()); - } + await Assert.ThrowsAsync(() => archive.GetNextEntryAsync().AsTask()); else - { - Assert.Throws(() => archive.GetNextEntry()); - } + Assert.Throws(() => archive.GetNextEntry()); } [Theory] [MemberData(nameof(Get_Booleans_Data))] - public async Task AsyncGetNextEntryAsync_Works(bool async) + public async Task EncryptedEntry_MetadataAccessible_OpenThrows(bool async) { byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + // Set encryption bit in first entry's local file header (offset 6) + zipBytes[6] |= 0x01; + using MemoryStream archiveStream = new(zipBytes); using ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead); ZipArchiveEntry? entry = await GetNextEntry(archive, async); Assert.NotNull(entry); + Assert.True(entry.IsEncrypted); Assert.Equal("small.txt", entry.FullName); - } - [Fact] - public async Task AsyncCancellation_ThrowsOperationCanceled() - { - byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); - - using MemoryStream archiveStream = new(zipBytes); - using ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead); - - using CancellationTokenSource cts = new(); - cts.Cancel(); - - await Assert.ThrowsAnyAsync( - () => archive.GetNextEntryAsync(cancellationToken: cts.Token).AsTask()); + Assert.Throws(() => entry.Open()); } [Theory] [MemberData(nameof(Get_Booleans_Data))] - public async Task MultipleEntries_MixedSkipAndRead(bool async) + public async Task DataDescriptorEntry_SizeAndCrcProperties_AlwaysThrow(bool async) { - byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: false); using MemoryStream archiveStream = new(zipBytes); using ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead); - // Skip first entry (don't read data) - ZipArchiveEntry? first = await GetNextEntry(archive, async); - Assert.NotNull(first); - - // Read second entry fully - ZipArchiveEntry? second = await GetNextEntry(archive, async); - Assert.NotNull(second); - using (Stream ds = second.Open()) - { - byte[] data = await ReadStreamFully(ds, async); - Assert.Equal(s_mediumContent, data); - } - - // Skip third entry - ZipArchiveEntry? third = await GetNextEntry(archive, async); - Assert.NotNull(third); - - // Confirm end - ZipArchiveEntry? end = await GetNextEntry(archive, async); - Assert.Null(end); - } - - [Fact] - public void GetNextEntry_NotInForwardReadMode_ThrowsNotSupportedException() - { - byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + ZipArchiveEntry? entry = await GetNextEntry(archive, async); + Assert.NotNull(entry); - using MemoryStream archiveStream = new(zipBytes); - using ZipArchive archive = new(archiveStream, ZipArchiveMode.Read); + // Non-size properties work + Assert.Equal("small.txt", entry.FullName); + _ = entry.LastWriteTime; + Assert.Equal(ZipCompressionMethod.Deflate, entry.CompressionMethod); - Assert.Throws(() => archive.GetNextEntry()); - } + // Size/CRC properties throw — permanently, even after reading + Assert.Throws(() => entry.Crc32); + Assert.Throws(() => entry.CompressedLength); + Assert.Throws(() => entry.Length); - [Theory] - [MemberData(nameof(Get_Booleans_Data))] - public async Task StoredWithDataDescriptor_ThrowsNotSupported(bool async) - { - byte[] zipBytes = CreateZipWithEntries(CompressionLevel.NoCompression, seekable: false); + using (Stream ds = entry.Open()) + await ReadStreamFully(ds, async); - using MemoryStream archiveStream = new(zipBytes); - using WrappedStream nonSeekableStream = new(archiveStream, canRead: true, canWrite: false, canSeek: false, null); - using ZipArchive archive = new(nonSeekableStream, ZipArchiveMode.ForwardRead); + await GetNextEntry(archive, async); // drains data descriptor - if (async) - { - await Assert.ThrowsAsync(() => archive.GetNextEntryAsync().AsTask()); - } - else - { - Assert.Throws(() => archive.GetNextEntry()); - } + Assert.Throws(() => entry.Crc32); + Assert.Throws(() => entry.CompressedLength); + Assert.Throws(() => entry.Length); } [Theory] [MemberData(nameof(Get_Booleans_Data))] - public async Task PartialRead_DataDescriptor_ThenGetNextEntry_AdvancesCorrectly(bool async) + public async Task KnownSizeEntry_SizeAndCrcProperties_Accessible(bool async) { - byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: false); + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); using MemoryStream archiveStream = new(zipBytes); using ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead); - ZipArchiveEntry? first = await GetNextEntry(archive, async); - Assert.NotNull(first); - - // Read only a few bytes via Open() - using (Stream ds = first.Open()) - { - byte[] partial = new byte[3]; - await ReadStream(ds, partial, async); - } - - ZipArchiveEntry? second = await GetNextEntry(archive, async); - - Assert.NotNull(second); - Assert.Equal("medium.bin", second.FullName); - - using Stream dataStream2 = second.Open(); - byte[] decompressed = await ReadStreamFully(dataStream2, async); - Assert.Equal(s_mediumContent, decompressed); - } - - [Theory] - [MemberData(nameof(Get_Booleans_Data))] - public async Task ZeroLengthEntry_ReturnsEntryWithEmptyStream(bool async) - { - using MemoryStream ms = new(); - using (ZipArchive create = new(ms, ZipArchiveMode.Create, leaveOpen: true)) - { - create.CreateEntry("empty.txt"); - } - - ms.Position = 0; - using ZipArchive archive = new(ms, ZipArchiveMode.ForwardRead); - ZipArchiveEntry? entry = await GetNextEntry(archive, async); - Assert.NotNull(entry); - Assert.Equal("empty.txt", entry.FullName); - Assert.Equal(0, entry.CompressedLength); - // Confirm end - ZipArchiveEntry? end = await GetNextEntry(archive, async); - Assert.Null(end); + _ = entry.Crc32; + Assert.True(entry.CompressedLength > 0); + Assert.Equal(s_smallContent.Length, entry.Length); } - [Theory] - [MemberData(nameof(Get_Booleans_Data))] - public async Task DirectoryEntry_ReturnsEntryWithNoDataStream(bool async) - { - using MemoryStream ms = new(); - using (ZipArchive create = new(ms, ZipArchiveMode.Create, leaveOpen: true)) - { - create.CreateEntry("mydir/"); - } - - ms.Position = 0; - using ZipArchive archive = new(ms, ZipArchiveMode.ForwardRead); - - ZipArchiveEntry? entry = await GetNextEntry(archive, async); - - Assert.NotNull(entry); - Assert.Equal("mydir/", entry.FullName); - } + // ── Dispose / lifecycle ───────────────────────────────────────────── [Theory] [MemberData(nameof(Get_Booleans_Data))] - public async Task Dispose_WhileEntryPartiallyRead_DoesNotThrow(bool async) + public async Task GetNextEntry_AfterDispose_Throws(bool async) { byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); using MemoryStream archiveStream = new(zipBytes); ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead, leaveOpen: true); - - ZipArchiveEntry? entry = await GetNextEntry(archive, async); - Assert.NotNull(entry); - - // Partially read via Open() - Stream ds = entry.Open(); - byte[] partial = new byte[5]; - await ReadStream(ds, partial, async); - - // Dispose should not throw archive.Dispose(); + + if (async) + await Assert.ThrowsAsync(() => archive.GetNextEntryAsync().AsTask()); + else + Assert.Throws(() => archive.GetNextEntry()); } [Fact] @@ -404,47 +285,42 @@ public void LeaveOpen_DoesNotDisposeStream() byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); using MemoryStream archiveStream = new(zipBytes); - - ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead, leaveOpen: true); - archive.Dispose(); + new ZipArchive(archiveStream, ZipArchiveMode.ForwardRead, leaveOpen: true).Dispose(); Assert.True(archiveStream.CanRead); } [Theory] [MemberData(nameof(Get_Booleans_Data))] - public async Task Open_CalledTwice_Throws(bool async) + public async Task Dispose_WithPendingDataDescriptor_DoesNotThrow(bool async) { - byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: true); + byte[] zipBytes = CreateZipWithEntries(CompressionLevel.Optimal, seekable: false); using MemoryStream archiveStream = new(zipBytes); - using ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead); + ZipArchive archive = new(archiveStream, ZipArchiveMode.ForwardRead, leaveOpen: true); + + // Read last entry, then dispose — Dispose must drain data descriptor + ZipArchiveEntry? entry; + do { entry = await GetNextEntry(archive, async); } + while (entry is not null && entry.FullName != "large.bin"); - ZipArchiveEntry? entry = await GetNextEntry(archive, async); Assert.NotNull(entry); + using (Stream ds = entry.Open()) + await ReadStreamFully(ds, async); - using Stream first = entry.Open(); - Assert.Throws(() => entry.Open()); + if (async) + await archive.DisposeAsync(); + else + archive.Dispose(); } - // ── Sync/async dispatch helpers ────────────────────────────────────── + // ── Helpers ───────────────────────────────────────────────────────── - private static async ValueTask GetNextEntry( - ZipArchive archive, bool async) - { - return async - ? await archive.GetNextEntryAsync() - : archive.GetNextEntry(); - } + private static async ValueTask GetNextEntry(ZipArchive archive, bool async) => + async ? await archive.GetNextEntryAsync() : archive.GetNextEntry(); - private static async ValueTask ReadStream(Stream stream, byte[] buffer, bool async) - { - return async - ? await stream.ReadAsync(buffer) - : stream.Read(buffer); - } - - // ── Test data helpers ──────────────────────────────────────────────── + private static async ValueTask ReadStream(Stream stream, byte[] buffer, bool async) => + async ? await stream.ReadAsync(buffer) : stream.Read(buffer); private static byte[] CreateZipWithEntries(CompressionLevel compressionLevel, bool seekable) { @@ -462,27 +338,22 @@ private static byte[] CreateZipWithEntries(CompressionLevel compressionLevel, bo } return ms.ToArray(); - } - private static void AddEntry(ZipArchive archive, string name, byte[] contents, CompressionLevel level) - { - ZipArchiveEntry entry = archive.CreateEntry(name, level); - using Stream stream = entry.Open(); - stream.Write(contents); + static void AddEntry(ZipArchive archive, string name, byte[] contents, CompressionLevel level) + { + ZipArchiveEntry entry = archive.CreateEntry(name, level); + using Stream stream = entry.Open(); + stream.Write(contents); + } } private static async Task ReadStreamFully(Stream stream, bool async) { using MemoryStream result = new(); if (async) - { await stream.CopyToAsync(result); - } else - { stream.CopyTo(result); - } - return result.ToArray(); } } From 3b3d5facb30433a29c08f97fc009ddeaef9910c0 Mon Sep 17 00:00:00 2001 From: alinpahontu2912 Date: Tue, 21 Apr 2026 12:23:30 +0200 Subject: [PATCH 4/6] defer datastream creation for entries with known size --- .../src/System/IO/Compression/ZipArchive.Async.cs | 1 - .../src/System/IO/Compression/ZipArchive.cs | 10 ++++------ .../src/System/IO/Compression/ZipArchiveEntry.cs | 13 ++++++++++++- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.Async.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.Async.cs index 13c1cc76d855c5..0b71c1b330b91d 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.Async.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.Async.cs @@ -149,7 +149,6 @@ protected virtual async ValueTask DisposeAsyncCore() switch (_mode) { case ZipArchiveMode.Read: - break; case ZipArchiveMode.ForwardRead: await DrainPreviousEntryAsync(default).ConfigureAwait(false); break; diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs index 391caceeb23877..559c393bec0a73 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs @@ -308,7 +308,6 @@ protected virtual void Dispose(bool disposing) switch (_mode) { case ZipArchiveMode.Read: - break; case ZipArchiveMode.ForwardRead: DrainPreviousEntry(); break; @@ -551,13 +550,12 @@ private async Task DrainPreviousEntryCore(bool useAsync, CancellationToken cance return new SubReadStream(_archiveStream, _archiveStream.Position, data.CompressedSize); } - Stream bounded = new SubReadStream(_archiveStream, _archiveStream.Position, data.CompressedSize); - Stream decompressor2 = CreateForwardReadDecompressor(bounded, data.CompressionMethod, data.UncompressedSize, leaveOpen: false); - - return new CrcValidatingReadStream(decompressor2, data.Crc32, data.UncompressedSize); + // Known size, not encrypted — store lightweight SubReadStream as a bookmark; + // decompressor + CRC wrapper are created lazily in OpenInForwardReadMode. + return new SubReadStream(_archiveStream, _archiveStream.Position, data.CompressedSize); } - private static Stream CreateForwardReadDecompressor(Stream source, ZipCompressionMethod compressionMethod, long uncompressedSize, bool leaveOpen) + internal static Stream CreateForwardReadDecompressor(Stream source, ZipCompressionMethod compressionMethod, long uncompressedSize, bool leaveOpen) { return compressionMethod switch { diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs index 21909f88a923c4..547df6cceff82a 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs @@ -55,6 +55,7 @@ public partial class ZipArchiveEntry // determined by whether the local header had 0xFFFFFFFF size markers. private Stream? _forwardReadDataStream; private bool _forwardReadStreamOpened; + private bool _forwardReadNeedsWrapping; private bool _hasDataDescriptor; private bool _isZip64SizeFields; @@ -208,7 +209,7 @@ internal ZipArchiveEntry(ZipArchive archive, ZipLocalFileHeader.ForwardReadHeade _fileComment = Array.Empty(); _compressionLevel = MapCompressionLevel(_generalPurposeBitFlag, headerData.CompressionMethod); _forwardReadDataStream = dataStream; - + _forwardReadNeedsWrapping = dataStream is not null && !headerData.HasDataDescriptor && !headerData.IsEncrypted; Changes = ZipArchive.ChangeState.Unchanged; } @@ -936,6 +937,16 @@ private WrappedStream OpenInForwardReadMode() if (_forwardReadStreamOpened) throw new IOException(SR.ForwardReadOnly); + // For known-size entries, the data stream is a raw bounded stream — + // lazily wrap it with decompressor + CRC validation on first Open(). + if (_forwardReadNeedsWrapping) + { + Stream decompressor = ZipArchive.CreateForwardReadDecompressor( + _forwardReadDataStream, _storedCompressionMethod, _uncompressedSize, leaveOpen: false); + _forwardReadDataStream = new CrcValidatingReadStream(decompressor, _crc32, _uncompressedSize); + _forwardReadNeedsWrapping = false; + } + _forwardReadStreamOpened = true; // Wrap so user disposal does not close our internal data stream. From f4d497d45cf41e3dc334ac60715a915bd5167648 Mon Sep 17 00:00:00 2001 From: alinpahontu2912 Date: Tue, 21 Apr 2026 14:24:11 +0200 Subject: [PATCH 5/6] fix zip64 header bug --- .../src/System/IO/Compression/ZipArchive.cs | 2 -- .../src/System/IO/Compression/ZipBlocks.Async.cs | 9 +++++++++ .../src/System/IO/Compression/ZipBlocks.cs | 9 +++++++++ .../tests/ZipArchive/zip_ForwardReadTests.cs | 3 --- 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs index 559c393bec0a73..7aae7fa32c312d 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs @@ -5,10 +5,8 @@ // Zip Spec here: http://www.pkware.com/documents/casestudies/APPNOTE.TXT using System.Buffers; -using System.Buffers.Binary; using System.Collections.Generic; using System.Collections.ObjectModel; -using System.ComponentModel; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Text; diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.Async.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.Async.cs index a67db42fc743d8..b19346935fe731 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.Async.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.Async.cs @@ -225,6 +225,15 @@ static async ValueTask ParseForwardReadHeaderAsync(byte[] compressedSize = zip64.CompressedSize.Value; } + // For data descriptor entries written to non-seekable streams, sizes in the + // local header are typically 0 rather than 0xFFFFFFFF, but the data descriptor + // still uses 8-byte fields when the entry requires Zip64. + bool hasDataDescriptor = (generalPurposeBitFlags & (ushort)ZipArchiveEntry.BitFlagValues.DataDescriptor) != 0; + if (!isZip64SizeFields && hasDataDescriptor && versionNeeded >= (ushort)ZipVersionNeededValues.Zip64) + { + isZip64SizeFields = true; + } + bool isUtf8 = (generalPurposeBitFlags & (ushort)ZipArchiveEntry.BitFlagValues.UnicodeFileNameAndComment) != 0; Encoding nameEncoding = isUtf8 ? Encoding.UTF8 : (entryNameEncoding ?? Encoding.UTF8); string fullName = nameEncoding.GetString(filenameBytes); diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs index 4f8a704b47dd5c..a6763a6f42f2d7 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs @@ -771,6 +771,15 @@ static ForwardReadHeaderData ParseForwardReadHeader(ReadOnlySpan header, S compressedSize = zip64.CompressedSize.Value; } + // For data descriptor entries written to non-seekable streams, sizes in the + // local header are typically 0 rather than 0xFFFFFFFF, but the data descriptor + // still uses 8-byte fields when the entry requires Zip64. + bool hasDataDescriptor = (generalPurposeBitFlags & (ushort)ZipArchiveEntry.BitFlagValues.DataDescriptor) != 0; + if (!isZip64SizeFields && hasDataDescriptor && versionNeeded >= (ushort)ZipVersionNeededValues.Zip64) + { + isZip64SizeFields = true; + } + bool isUtf8 = (generalPurposeBitFlags & (ushort)ZipArchiveEntry.BitFlagValues.UnicodeFileNameAndComment) != 0; Encoding nameEncoding = isUtf8 ? Encoding.UTF8 : (entryNameEncoding ?? Encoding.UTF8); string fullName = nameEncoding.GetString(filenameBytes); diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ForwardReadTests.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ForwardReadTests.cs index d01834285afd77..71fff8019f9eb3 100644 --- a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ForwardReadTests.cs +++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ForwardReadTests.cs @@ -1,9 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System.Collections.Generic; -using System.Text; -using System.Threading; using System.Threading.Tasks; using Xunit; From 40a2d0ee81c3e9b23c1c637b8ac9950d795266fc Mon Sep 17 00:00:00 2001 From: alinpahontu2912 Date: Wed, 22 Apr 2026 11:41:59 +0200 Subject: [PATCH 6/6] use adaptive parsing to fix zip64 file reading --- .../src/System/IO/Compression/ZipArchive.cs | 25 +++++---- .../System/IO/Compression/ZipBlocks.Async.cs | 35 +++++++++--- .../src/System/IO/Compression/ZipBlocks.cs | 55 +++++++++++++++---- 3 files changed, 86 insertions(+), 29 deletions(-) diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs index 7aae7fa32c312d..58133ec5cab46e 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs @@ -496,17 +496,22 @@ private async Task DrainPreviousEntryCore(bool useAsync, CancellationToken cance if (prev.HasDataDescriptor) { - var (crc32, _, uncompressedSize) = useAsync - ? await ZipLocalFileHeader.ReadDataDescriptorAsync(_archiveStream, prev.IsZip64SizeFields, cancellationToken).ConfigureAwait(false) - : ZipLocalFileHeader.ReadDataDescriptor(_archiveStream, prev.IsZip64SizeFields); + if (crcResult is not { } actual) + throw new InvalidDataException(SR.LocalFileHeaderCorrupt); - if (crcResult is { } actual) - { - if (actual.Crc32 != crc32) - throw new InvalidDataException(SR.CrcMismatch); - if (actual.BytesRead != uncompressedSize) - throw new InvalidDataException(SR.UnexpectedStreamLength); - } + // Use adaptive parsing: try 32-bit DD first, fall back to Zip64 if + // the parsed values don't match. This handles archives where the writer + // couldn't signal Zip64 in the local header (non-seekable stream writes). + var (crc32, _, uncompressedSize) = useAsync + ? await ZipLocalFileHeader.ReadDataDescriptorAdaptiveAsync( + _archiveStream, actual.Crc32, actual.BytesRead, cancellationToken).ConfigureAwait(false) + : ZipLocalFileHeader.ReadDataDescriptorAdaptive( + _archiveStream, actual.Crc32, actual.BytesRead); + + if (actual.Crc32 != crc32) + throw new InvalidDataException(SR.CrcMismatch); + if (actual.BytesRead != uncompressedSize) + throw new InvalidDataException(SR.UnexpectedStreamLength); } } else if (prev.HasDataDescriptor) diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.Async.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.Async.cs index b19346935fe731..f23fb23c2c7cb1 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.Async.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.Async.cs @@ -225,15 +225,6 @@ static async ValueTask ParseForwardReadHeaderAsync(byte[] compressedSize = zip64.CompressedSize.Value; } - // For data descriptor entries written to non-seekable streams, sizes in the - // local header are typically 0 rather than 0xFFFFFFFF, but the data descriptor - // still uses 8-byte fields when the entry requires Zip64. - bool hasDataDescriptor = (generalPurposeBitFlags & (ushort)ZipArchiveEntry.BitFlagValues.DataDescriptor) != 0; - if (!isZip64SizeFields && hasDataDescriptor && versionNeeded >= (ushort)ZipVersionNeededValues.Zip64) - { - isZip64SizeFields = true; - } - bool isUtf8 = (generalPurposeBitFlags & (ushort)ZipArchiveEntry.BitFlagValues.UnicodeFileNameAndComment) != 0; Encoding nameEncoding = isUtf8 ? Encoding.UTF8 : (entryNameEncoding ?? Encoding.UTF8); string fullName = nameEncoding.GetString(filenameBytes); @@ -288,6 +279,32 @@ static async ValueTask ParseForwardReadHeaderAsync(byte[] return (crc32, compressedSize, uncompressedSize); } + + /// + /// Async variant of . + /// + internal static async ValueTask<(uint Crc32, long CompressedSize, long UncompressedSize)> ReadDataDescriptorAdaptiveAsync( + Stream stream, uint knownCrc32, long knownUncompressedSize, CancellationToken cancellationToken) + { + byte[] firstFour = new byte[4]; + await stream.ReadExactlyAsync(firstFour, cancellationToken).ConfigureAwait(false); + + uint firstWord = BinaryPrimitives.ReadUInt32LittleEndian(firstFour); + bool hasSignature = firstWord == 0x08074B50; + + int smallSize = (hasSignature ? 4 : 0) + 8; + byte[] buf = new byte[20]; + await stream.ReadExactlyAsync(buf.AsMemory(0, smallSize), cancellationToken).ConfigureAwait(false); + + var small = ParseDataDescriptor(buf.AsSpan(0, smallSize), hasSignature, isZip64: false, firstWord); + if (small.Crc32 == knownCrc32 && small.UncompressedSize == knownUncompressedSize) + return small; + + await stream.ReadExactlyAsync(buf.AsMemory(smallSize, 8), cancellationToken).ConfigureAwait(false); + int fullSize = smallSize + 8; + + return ParseDataDescriptor(buf.AsSpan(0, fullSize), hasSignature, isZip64: true, firstWord); + } } internal sealed partial class ZipCentralDirectoryFileHeader diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs index a6763a6f42f2d7..2d653aa317a745 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs @@ -771,15 +771,6 @@ static ForwardReadHeaderData ParseForwardReadHeader(ReadOnlySpan header, S compressedSize = zip64.CompressedSize.Value; } - // For data descriptor entries written to non-seekable streams, sizes in the - // local header are typically 0 rather than 0xFFFFFFFF, but the data descriptor - // still uses 8-byte fields when the entry requires Zip64. - bool hasDataDescriptor = (generalPurposeBitFlags & (ushort)ZipArchiveEntry.BitFlagValues.DataDescriptor) != 0; - if (!isZip64SizeFields && hasDataDescriptor && versionNeeded >= (ushort)ZipVersionNeededValues.Zip64) - { - isZip64SizeFields = true; - } - bool isUtf8 = (generalPurposeBitFlags & (ushort)ZipArchiveEntry.BitFlagValues.UnicodeFileNameAndComment) != 0; Encoding nameEncoding = isUtf8 ? Encoding.UTF8 : (entryNameEncoding ?? Encoding.UTF8); string fullName = nameEncoding.GetString(filenameBytes); @@ -805,9 +796,53 @@ internal static (uint Crc32, long CompressedSize, long UncompressedSize) ReadDat bool hasSignature = firstWord == 0x08074B50; int remainingSize = (hasSignature ? 4 : 0) + (isZip64 ? 16 : 8); - Span remaining = stackalloc byte[20]; // max: sig-CRC(4) + comp(8) + uncomp(8) = 20 + Span remaining = stackalloc byte[20]; stream.ReadExactly(remaining[..remainingSize]); + return ParseDataDescriptor(remaining[..remainingSize], hasSignature, isZip64, firstWord); + } + + /// + /// Reads a data descriptor whose size (32-bit or Zip64) is unknown. + /// Parses as 32-bit first; if the CRC and uncompressed size don't match + /// the expected values, reads additional bytes and re-parses as Zip64. + /// + /// + /// Some writers (including .NET on non-seekable streams) emit a Zip64 + /// data descriptor without setting any Zip64 indicator in the local + /// file header, because the final sizes are not known at header-write + /// time. The known values from the decompressor let us detect the + /// correct layout without relying on header signals. + /// + internal static (uint Crc32, long CompressedSize, long UncompressedSize) ReadDataDescriptorAdaptive( + Stream stream, uint knownCrc32, long knownUncompressedSize) + { + Span firstFour = stackalloc byte[4]; + stream.ReadExactly(firstFour); + + uint firstWord = BinaryPrimitives.ReadUInt32LittleEndian(firstFour); + bool hasSignature = firstWord == 0x08074B50; + + // Read enough for the 32-bit layout: CRC(4) + CompSize(4) + UncompSize(4), + // plus CRC(4) again if the signature consumed firstWord. + int smallSize = (hasSignature ? 4 : 0) + 8; + Span buf = stackalloc byte[20]; + stream.ReadExactly(buf[..smallSize]); + + var small = ParseDataDescriptor(buf[..smallSize], hasSignature, isZip64: false, firstWord); + if (small.Crc32 == knownCrc32 && small.UncompressedSize == knownUncompressedSize) + return small; + + // 32-bit interpretation didn't match — read 8 more bytes for the Zip64 layout. + stream.ReadExactly(buf.Slice(smallSize, 8)); + int fullSize = smallSize + 8; + + return ParseDataDescriptor(buf[..fullSize], hasSignature, isZip64: true, firstWord); + } + + private static (uint Crc32, long CompressedSize, long UncompressedSize) ParseDataDescriptor( + ReadOnlySpan remaining, bool hasSignature, bool isZip64, uint firstWord) + { int pos = 0; uint crc32; if (hasSignature)