-
Notifications
You must be signed in to change notification settings - Fork 5.4k
Feature/zip archive forward read #126646
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Feature/zip archive forward read #126646
Changes from all commits
60deae6
3986bcd
9d2a7b1
6670761
3b3d5fa
f4d497d
40a2d0e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,10 +7,11 @@ | |
| using System.Buffers; | ||
| using System.Collections.Generic; | ||
| using System.Collections.ObjectModel; | ||
| using System.ComponentModel; | ||
| using System.Diagnostics; | ||
| using System.Diagnostics.CodeAnalysis; | ||
| using System.Text; | ||
| using System.Threading; | ||
| using System.Threading.Tasks; | ||
|
|
||
| namespace System.IO.Compression | ||
| { | ||
|
|
@@ -34,6 +35,8 @@ public partial class ZipArchive : IDisposable, IAsyncDisposable | |
| private byte[] _archiveComment; | ||
| private Encoding? _entryNameAndCommentEncoding; | ||
| private long _firstDeletedEntryOffset; | ||
| private ZipArchiveEntry? _forwardReadPreviousEntry; | ||
| private bool _forwardReadReachedEnd; | ||
|
|
||
| #if DEBUG_FORCE_ZIP64 | ||
| public bool _forceZip64; | ||
|
|
@@ -150,6 +153,9 @@ public ZipArchive(Stream stream, ZipArchiveMode mode, bool leaveOpen, Encoding? | |
| case ZipArchiveMode.Read: | ||
| ReadEndOfCentralDirectory(); | ||
| break; | ||
| case ZipArchiveMode.ForwardRead: | ||
| _readEntries = true; | ||
| break; | ||
| case ZipArchiveMode.Update: | ||
| default: | ||
| Debug.Assert(mode == ZipArchiveMode.Update); | ||
|
|
@@ -231,6 +237,8 @@ public ReadOnlyCollection<ZipArchiveEntry> Entries | |
| { | ||
| if (_mode == ZipArchiveMode.Create) | ||
| throw new NotSupportedException(SR.EntriesInCreateMode); | ||
| if (_mode == ZipArchiveMode.ForwardRead) | ||
| throw new NotSupportedException(SR.ForwardReadOnly); | ||
|
|
||
| ThrowIfDisposed(); | ||
|
|
||
|
|
@@ -298,6 +306,8 @@ protected virtual void Dispose(bool disposing) | |
| switch (_mode) | ||
| { | ||
| case ZipArchiveMode.Read: | ||
| case ZipArchiveMode.ForwardRead: | ||
| DrainPreviousEntry(); | ||
| break; | ||
| case ZipArchiveMode.Create: | ||
| WriteFile(); | ||
|
|
@@ -349,12 +359,217 @@ protected virtual void Dispose(bool disposing) | |
|
|
||
| if (_mode == ZipArchiveMode.Create) | ||
| throw new NotSupportedException(SR.EntriesInCreateMode); | ||
| if (_mode == ZipArchiveMode.ForwardRead) | ||
| throw new NotSupportedException(SR.ForwardReadOnly); | ||
|
|
||
| EnsureCentralDirectoryRead(); | ||
| _entriesDictionary.TryGetValue(entryName, out ZipArchiveEntry? result); | ||
| return result; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Reads the next entry from the archive when opened in <see cref="ZipArchiveMode.ForwardRead"/> mode. | ||
| /// </summary> | ||
| /// <returns>The next <see cref="ZipArchiveEntry"/> in the archive, or <see langword="null"/> if no more entries exist.</returns> | ||
| /// <exception cref="NotSupportedException">The archive was not opened in <see cref="ZipArchiveMode.ForwardRead"/> mode.</exception> | ||
| /// <exception cref="ObjectDisposedException">The archive has been disposed.</exception> | ||
| /// <exception cref="InvalidDataException">The archive contains invalid data.</exception> | ||
| public ZipArchiveEntry? GetNextEntry() | ||
| { | ||
| ThrowIfDisposed(); | ||
| if (_mode != ZipArchiveMode.ForwardRead) | ||
| throw new NotSupportedException(SR.GetNextEntryNotInForwardRead); | ||
|
|
||
| if (_forwardReadReachedEnd) | ||
| return null; | ||
|
|
||
| DrainPreviousEntry(); | ||
|
|
||
| ZipLocalFileHeader.ForwardReadHeaderData? headerData = | ||
| ZipLocalFileHeader.TryReadForForwardRead(_archiveStream, EntryNameAndCommentEncoding); | ||
|
|
||
| if (headerData is null) | ||
| { | ||
| _forwardReadReachedEnd = true; | ||
| return null; | ||
| } | ||
|
|
||
| var data = headerData.Value; | ||
|
|
||
| if (data.HasDataDescriptor) | ||
| { | ||
| if (data.CompressionMethod == ZipCompressionMethod.Stored) | ||
| throw new NotSupportedException(SR.ForwardReadStoredDataDescriptorNotSupported); | ||
| if (data.IsEncrypted) | ||
| throw new NotSupportedException(SR.ForwardReadEncryptedDataDescriptorNotSupported); | ||
| } | ||
|
|
||
| Stream? dataStream = BuildForwardReadDataStream(data); | ||
| var entry = new ZipArchiveEntry(this, data, dataStream); | ||
| _forwardReadPreviousEntry = entry; | ||
|
|
||
| return entry; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Asynchronously reads the next entry from the archive when opened in <see cref="ZipArchiveMode.ForwardRead"/> mode. | ||
| /// </summary> | ||
| /// <param name="cancellationToken">A cancellation token to observe.</param> | ||
| /// <returns>A <see cref="ValueTask{TResult}"/> representing the next entry, or <see langword="null"/> if no more entries exist.</returns> | ||
| /// <exception cref="NotSupportedException">The archive was not opened in <see cref="ZipArchiveMode.ForwardRead"/> mode.</exception> | ||
| /// <exception cref="ObjectDisposedException">The archive has been disposed.</exception> | ||
| /// <exception cref="InvalidDataException">The archive contains invalid data.</exception> | ||
| public ValueTask<ZipArchiveEntry?> GetNextEntryAsync(CancellationToken cancellationToken = default) | ||
| { | ||
| ThrowIfDisposed(); | ||
| if (_mode != ZipArchiveMode.ForwardRead) | ||
| throw new NotSupportedException(SR.GetNextEntryNotInForwardRead); | ||
|
|
||
| cancellationToken.ThrowIfCancellationRequested(); | ||
|
|
||
| if (_forwardReadReachedEnd) | ||
| return new ValueTask<ZipArchiveEntry?>((ZipArchiveEntry?)null); | ||
|
|
||
| return GetNextEntryAsyncCore(cancellationToken); | ||
| } | ||
|
|
||
| private async ValueTask<ZipArchiveEntry?> GetNextEntryAsyncCore(CancellationToken cancellationToken) | ||
| { | ||
| await DrainPreviousEntryAsync(cancellationToken).ConfigureAwait(false); | ||
|
|
||
| ZipLocalFileHeader.ForwardReadHeaderData? headerData = | ||
| await ZipLocalFileHeader.TryReadForForwardReadAsync(_archiveStream, EntryNameAndCommentEncoding, cancellationToken).ConfigureAwait(false); | ||
|
|
||
| if (headerData is null) | ||
| { | ||
| _forwardReadReachedEnd = true; | ||
| return null; | ||
| } | ||
|
|
||
| var data = headerData.Value; | ||
|
|
||
| if (data.HasDataDescriptor) | ||
| { | ||
| if (data.CompressionMethod == ZipCompressionMethod.Stored) | ||
| throw new NotSupportedException(SR.ForwardReadStoredDataDescriptorNotSupported); | ||
| if (data.IsEncrypted) | ||
| throw new NotSupportedException(SR.ForwardReadEncryptedDataDescriptorNotSupported); | ||
| } | ||
|
|
||
| Stream? dataStream = BuildForwardReadDataStream(data); | ||
| var entry = new ZipArchiveEntry(this, data, dataStream); | ||
| _forwardReadPreviousEntry = entry; | ||
|
|
||
| return entry; | ||
| } | ||
|
|
||
| private void DrainPreviousEntry() => | ||
| DrainPreviousEntryCore(useAsync: false, cancellationToken: default).GetAwaiter().GetResult(); | ||
|
|
||
| private ValueTask DrainPreviousEntryAsync(CancellationToken cancellationToken) => | ||
| new ValueTask(DrainPreviousEntryCore(useAsync: true, cancellationToken)); | ||
|
|
||
| private async Task DrainPreviousEntryCore(bool useAsync, CancellationToken cancellationToken) | ||
| { | ||
| if (_forwardReadPreviousEntry is not { } prev) | ||
| return; | ||
|
|
||
| Stream? dataStream = prev.ForwardReadDataStream; | ||
| if (dataStream is not null) | ||
| { | ||
| byte[] buffer = new byte[4096]; | ||
| if (useAsync) | ||
| { | ||
| while (await dataStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false) > 0) { } | ||
| } | ||
| else | ||
| { | ||
| while (dataStream.Read(buffer) > 0) { } | ||
| } | ||
|
|
||
| var crcResult = (dataStream as CrcValidatingReadStream)?.GetFinalCrcResult(); | ||
|
|
||
| if (useAsync) | ||
| await dataStream.DisposeAsync().ConfigureAwait(false); | ||
| else | ||
| dataStream.Dispose(); | ||
|
|
||
| if (prev.HasDataDescriptor) | ||
| { | ||
| if (crcResult is not { } actual) | ||
| throw new InvalidDataException(SR.LocalFileHeaderCorrupt); | ||
|
|
||
| // Use adaptive parsing: try 32-bit DD first, fall back to Zip64 if | ||
| // the parsed values don't match. This handles archives where the writer | ||
| // couldn't signal Zip64 in the local header (non-seekable stream writes). | ||
| var (crc32, _, uncompressedSize) = useAsync | ||
| ? await ZipLocalFileHeader.ReadDataDescriptorAdaptiveAsync( | ||
| _archiveStream, actual.Crc32, actual.BytesRead, cancellationToken).ConfigureAwait(false) | ||
| : ZipLocalFileHeader.ReadDataDescriptorAdaptive( | ||
| _archiveStream, actual.Crc32, actual.BytesRead); | ||
|
|
||
| if (actual.Crc32 != crc32) | ||
| throw new InvalidDataException(SR.CrcMismatch); | ||
| if (actual.BytesRead != uncompressedSize) | ||
| throw new InvalidDataException(SR.UnexpectedStreamLength); | ||
| } | ||
| } | ||
| else if (prev.HasDataDescriptor) | ||
| { | ||
| if (useAsync) | ||
| await ZipLocalFileHeader.ReadDataDescriptorAsync(_archiveStream, prev.IsZip64SizeFields, cancellationToken).ConfigureAwait(false); | ||
| else | ||
| ZipLocalFileHeader.ReadDataDescriptor(_archiveStream, prev.IsZip64SizeFields); | ||
| } | ||
|
|
||
| _forwardReadPreviousEntry = null; | ||
| } | ||
|
|
||
| private Stream? BuildForwardReadDataStream(ZipLocalFileHeader.ForwardReadHeaderData data) | ||
| { | ||
| bool isDirectory = data.FullName.Length > 0 && | ||
| (data.FullName[^1] == '/' || data.FullName[^1] == '\\'); | ||
| bool isEmptyEntry = !data.HasDataDescriptor && data.CompressedSize == 0 && data.UncompressedSize == 0; | ||
|
|
||
| if (isDirectory || isEmptyEntry) | ||
| return null; | ||
|
|
||
| if (data.CompressionMethod != ZipCompressionMethod.Stored && | ||
| data.CompressionMethod != ZipCompressionMethod.Deflate && | ||
| data.CompressionMethod != ZipCompressionMethod.Deflate64) | ||
| { | ||
| throw new InvalidDataException(SR.UnsupportedCompression); | ||
| } | ||
|
|
||
| if (data.HasDataDescriptor) | ||
| { | ||
| Stream decompressor = CreateForwardReadDecompressor(_archiveStream, data.CompressionMethod, -1, leaveOpen: true); | ||
|
|
||
| return new CrcValidatingReadStream(decompressor, expectedCrc: 0, expectedLength: long.MaxValue); | ||
| } | ||
|
|
||
| if (data.IsEncrypted) | ||
| { | ||
| return new SubReadStream(_archiveStream, _archiveStream.Position, data.CompressedSize); | ||
| } | ||
|
|
||
| // Known size, not encrypted — store lightweight SubReadStream as a bookmark; | ||
| // decompressor + CRC wrapper are created lazily in OpenInForwardReadMode. | ||
| return new SubReadStream(_archiveStream, _archiveStream.Position, data.CompressedSize); | ||
| } | ||
|
Comment on lines
+556
to
+559
|
||
|
|
||
| internal static Stream CreateForwardReadDecompressor(Stream source, ZipCompressionMethod compressionMethod, long uncompressedSize, bool leaveOpen) | ||
| { | ||
| return compressionMethod switch | ||
| { | ||
| ZipCompressionMethod.Deflate when leaveOpen => new DeflateStream(source, CompressionMode.Decompress, leaveOpen: true), | ||
|
alinpahontu2912 marked this conversation as resolved.
|
||
| ZipCompressionMethod.Deflate => new DeflateStream(source, CompressionMode.Decompress, uncompressedSize), | ||
| ZipCompressionMethod.Deflate64 when leaveOpen => new DeflateManagedStream(source, ZipCompressionMethod.Deflate64, -1), | ||
| ZipCompressionMethod.Deflate64 => new DeflateManagedStream(source, ZipCompressionMethod.Deflate64, uncompressedSize), | ||
| _ => source, | ||
| }; | ||
| } | ||
|
|
||
| internal Stream ArchiveStream => _archiveStream; | ||
|
|
||
| internal uint NumberOfThisDisk => _numberOfThisDisk; | ||
|
|
@@ -434,6 +649,8 @@ private ZipArchiveEntry DoCreateEntry(string entryName, CompressionLevel? compre | |
|
|
||
| if (_mode == ZipArchiveMode.Read) | ||
| throw new NotSupportedException(SR.CreateInReadMode); | ||
| if (_mode == ZipArchiveMode.ForwardRead) | ||
| throw new NotSupportedException(SR.ForwardReadOnly); | ||
|
|
||
| ThrowIfDisposed(); | ||
|
|
||
|
|
@@ -959,6 +1176,10 @@ private static bool ValidateMode(ZipArchiveMode mode, Stream stream) | |
| isReadModeAndUnseekable = true; | ||
| } | ||
| break; | ||
| case ZipArchiveMode.ForwardRead: | ||
| if (!stream.CanRead) | ||
| throw new ArgumentException(SR.ReadModeCapabilities); | ||
| break; | ||
| case ZipArchiveMode.Update: | ||
| if (!stream.CanRead || !stream.CanWrite || !stream.CanSeek) | ||
| throw new ArgumentException(SR.UpdateModeCapabilities); | ||
|
|
@@ -977,9 +1198,13 @@ private static Stream DecideArchiveStream(ZipArchiveMode mode, Stream stream) | |
| { | ||
| ArgumentNullException.ThrowIfNull(stream); | ||
|
|
||
| return mode == ZipArchiveMode.Create && !stream.CanSeek ? | ||
| new PositionPreservingWriteOnlyStreamWrapper(stream) : | ||
| stream; | ||
| if (mode == ZipArchiveMode.Create && !stream.CanSeek) | ||
| return new PositionPreservingWriteOnlyStreamWrapper(stream); | ||
|
|
||
| if (mode == ZipArchiveMode.ForwardRead && !stream.CanSeek) | ||
| return new ReadAheadStream(stream); | ||
|
|
||
| return stream; | ||
| } | ||
|
|
||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In ForwardRead mode on a non-seekable input, empty file entries currently return null from BuildForwardReadDataStream, which makes ZipArchiveEntry.Open() throw (ForwardReadNoDataStream). Empty files are valid entries and should be openable (returning an empty stream) so callers can treat them the same as in Read mode; only directory entries should be non-openable.