Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
e609249
Initial plan
Copilot Mar 6, 2026
543c8f2
Fix TarReader to handle GNU sparse format 1.0 (PAX) - resolve GNU.spa…
Copilot Mar 6, 2026
351d885
Fix copy constructor to propagate _gnuSparseRealSize; improve test as…
Copilot Mar 6, 2026
cd7c720
Merge branch 'main' into copilot/fix-gnu-sparse-format-handling
lewing Mar 9, 2026
5db74bb
Implement full GNU sparse format 1.0 PAX DataStream expansion via Gnu…
Copilot Mar 9, 2026
1015043
Fix GnuSparseStream: add MaxSparseSegments DoS limit, precompute pack…
Copilot Mar 9, 2026
c34240f
Address reviewer feedback: simplify error strings, minor=0 check, typ…
Copilot Mar 10, 2026
583239c
Address remaining reviewer feedback: group sparse attrs, IndexOf-base…
Copilot Mar 10, 2026
498e3e9
Move sparse tests to TarReader.SparseFile.Tests.cs, add corrupted for…
Copilot Mar 10, 2026
fc05df3
Address review: merge sync/async ParseSparseMap, 1024-byte buffer, Ut…
Copilot Mar 10, 2026
24f0fb3
Fix TarReader: implement full GNU sparse format 1.0 (PAX) reading wit…
Copilot Mar 10, 2026
91213c7
Merge remote-tracking branch 'upstream/main' into copilot/fix-gnu-spa…
rzikm Mar 10, 2026
d656d45
Move async sparse tests to TarReader.SparseFile.Tests.cs
rzikm Mar 10, 2026
24e7e62
Fix buffer overflow in ParseSparseMap for long malformed lines
rzikm Mar 10, 2026
f78a16e
Add test: copy sparse entry to new archive preserves expanded content
rzikm Mar 10, 2026
87212d5
Refactor: separate _gnuSparseDataStream from _dataStream, simplify Re…
rzikm Mar 10, 2026
35a72b9
Fix ReadFromPackedData positioning and add copy round-trip test
rzikm Mar 10, 2026
e1fed30
Merge remote-tracking branch 'origin/main' into copilot/fix-gnu-spars…
Copilot Mar 10, 2026
48132ab
Merge latest main, add archive size assert in test, fix FindSegmentFr…
Copilot Mar 10, 2026
e4a2b76
Defer sparse map parsing to first Read for non-seekable stream support
rzikm Mar 11, 2026
451f9e0
Add non-seekable source tests for sparse entry copy round-trip
rzikm Mar 11, 2026
df76255
Verify archive size when copying
rzikm Mar 11, 2026
26cb2ca
Consolidate sparse layout tests into single parameterized test
rzikm Mar 11, 2026
feed417
Deduplicate some tests
rzikm Mar 11, 2026
482b139
Deduplicate remaining sparse tests
rzikm Mar 11, 2026
3e41073
Address code review: remove unused _dataStart, add overflow check, fi…
rzikm Mar 11, 2026
6cdfd74
Use ArrayPool for ParseSparseMap buffer
rzikm Mar 11, 2026
313d905
Validate sparse map segment ordering and bounds
rzikm Mar 11, 2026
2d25c96
Use MemberData for corrupted sparse map tests, exercise all cases syn…
rzikm Mar 11, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
<Compile Include="System\Formats\Tar\TarWriter.cs" />
<Compile Include="System\Formats\Tar\SubReadStream.cs" />
<Compile Include="System\Formats\Tar\SeekableSubReadStream.cs" />
<Compile Include="System\Formats\Tar\GnuSparseStream.cs" />
<Compile Include="$(CommonPath)DisableRuntimeMarshalling.cs" Link="Common\DisableRuntimeMarshalling.cs" />
<Compile Include="$(CommonPath)System\IO\Archiving.Utils.cs" Link="Common\System\IO\Archiving.Utils.cs" />
<Compile Include="$(CommonPath)System\IO\PathInternal.cs" Link="Common\System\IO\PathInternal.cs" />
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ public DateTimeOffset ModificationTime
/// When the <see cref="EntryType"/> indicates an entry that can contain data, this property returns the length in bytes of such data.
/// </summary>
/// <remarks>The entry type that commonly contains data is <see cref="TarEntryType.RegularFile"/> (or <see cref="TarEntryType.V7RegularFile"/> in the <see cref="TarEntryFormat.V7"/> format). Other uncommon entry types that can also contain data are: <see cref="TarEntryType.ContiguousFile"/>, <see cref="TarEntryType.DirectoryList"/>, <see cref="TarEntryType.MultiVolume"/> and <see cref="TarEntryType.SparseFile"/>.</remarks>
public long Length => _header._dataStream != null ? _header._dataStream.Length : _header._size;
public long Length => _header._gnuSparseDataStream?.Length ?? (_header._dataStream is not null ? _header._dataStream.Length : _header._size);

/// <summary>
/// When the <see cref="EntryType"/> indicates a <see cref="TarEntryType.SymbolicLink"/> or a <see cref="TarEntryType.HardLink"/>, this property returns the link target path of such link.
Expand Down Expand Up @@ -252,7 +252,7 @@ public Task ExtractToFileAsync(string destinationFileName, bool overwrite, Cance
/// <exception cref="IOException">An I/O problem occurred.</exception>
public Stream? DataStream
{
get => _header._dataStream;
get => (Stream?)_header._gnuSparseDataStream ?? _header._dataStream;
set
{
if (!IsDataStreamSetterSupported())
Expand All @@ -275,6 +275,8 @@ public Stream? DataStream
_readerOfOrigin = null;
}

_header._gnuSparseDataStream?.Dispose();
_header._gnuSparseDataStream = null;
_header._dataStream?.Dispose();

_header._dataStream = value;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,32 @@ internal void ReplaceNormalAttributesWithExtended(Dictionary<string, string>? di
_size = size;
}

// GNU sparse format 1.0 (encoded via PAX) uses RegularFile type flag ('0') and stores sparse metadata in
// PAX extended attributes. Process all GNU sparse 1.0 attributes together in this block.
if (_typeFlag is TarEntryType.RegularFile or TarEntryType.V7RegularFile)
{
// 'GNU.sparse.name' overrides the placeholder path (e.g. 'GNUSparseFile.0/...') in the header's 'path' field.
if (ExtendedAttributes.TryGetValue(PaxEaGnuSparseName, out string? gnuSparseName))
{
_name = gnuSparseName;
}

// 'GNU.sparse.realsize' is the expanded (virtual) file size; stored separately from _size so that
// _size retains the archive data section length needed for correct stream positioning.
if (TarHelpers.TryGetStringAsBaseTenLong(ExtendedAttributes, PaxEaGnuSparseRealSize, out long gnuSparseRealSize))
{
_gnuSparseRealSize = gnuSparseRealSize;
}

// 'GNU.sparse.major=1' and 'GNU.sparse.minor=0' identify format 1.0, where the data section begins
// with an embedded text-format sparse map followed by the packed non-zero data segments.
if (ExtendedAttributes.TryGetValue(PaxEaGnuSparseMajor, out string? gnuSparseMajor) && gnuSparseMajor == "1" &&
ExtendedAttributes.TryGetValue(PaxEaGnuSparseMinor, out string? gnuSparseMinor) && gnuSparseMinor == "0")
{
_isGnuSparse10 = true;
}
}

// The 'uid' header field only fits 8 bytes, or the user could've stored an override in the extended attributes
if (TarHelpers.TryGetStringAsBaseTenInteger(ExtendedAttributes, PaxEaUid, out int uid))
{
Expand Down Expand Up @@ -220,6 +246,17 @@ internal void ProcessDataBlock(Stream archiveStream, bool copyData)
case TarEntryType.TapeVolume: // Might contain data
default: // Unrecognized entry types could potentially have a data section
_dataStream = GetDataStream(archiveStream, copyData);

// GNU sparse format 1.0 PAX entries embed a sparse map at the start of the
// data section. Create a GnuSparseStream wrapper that presents the expanded
// virtual file content. The sparse map is parsed lazily on first Read, so
// _dataStream remains unconsumed here — TarWriter can copy the raw condensed
// data, and AdvanceDataStreamIfNeeded can advance past it normally.
if (_isGnuSparse10 && _gnuSparseRealSize > 0 && _dataStream is not null)
{
_gnuSparseDataStream = new GnuSparseStream(_dataStream, _gnuSparseRealSize);
}

if (_dataStream is SeekableSubReadStream)
{
TarHelpers.AdvanceStream(archiveStream, _size);
Expand Down Expand Up @@ -282,9 +319,15 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
case TarEntryType.TapeVolume: // Might contain data
default: // Unrecognized entry types could potentially have a data section
_dataStream = await GetDataStreamAsync(archiveStream, copyData, _size, cancellationToken).ConfigureAwait(false);

if (_isGnuSparse10 && _gnuSparseRealSize > 0 && _dataStream is not null)
{
_gnuSparseDataStream = new GnuSparseStream(_dataStream, _gnuSparseRealSize);
}

if (_dataStream is SeekableSubReadStream)
{
await TarHelpers.AdvanceStreamAsync(archiveStream, _size, cancellationToken).ConfigureAwait(false);
TarHelpers.AdvanceStream(archiveStream, _size);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This replaced an async block with a sync one in an async method

}
else if (_dataStream is SubReadStream)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ internal sealed partial class TarHeader
private const string PaxEaDevMajor = "devmajor";
private const string PaxEaDevMinor = "devminor";

// Names of GNU sparse extended attributes (used with GNU sparse format 1.0 encoded via PAX)
private const string PaxEaGnuSparseName = "GNU.sparse.name";
private const string PaxEaGnuSparseRealSize = "GNU.sparse.realsize";
private const string PaxEaGnuSparseMajor = "GNU.sparse.major";
private const string PaxEaGnuSparseMinor = "GNU.sparse.minor";

internal Stream? _dataStream;
internal long _dataOffset;

Expand Down Expand Up @@ -77,6 +83,21 @@ internal sealed partial class TarHeader
private Dictionary<string, string>? _ea;
internal Dictionary<string, string> ExtendedAttributes => _ea ??= new Dictionary<string, string>();

// When a GNU sparse 1.0 PAX entry is read, the real (expanded) file size is stored here.
// This is separate from _size which holds the archive data size and is used for data stream reading.
internal long _gnuSparseRealSize;

// Set to true when GNU.sparse.major=1 is present in the PAX extended attributes,
// indicating this is a GNU sparse format 1.0 entry whose data section contains an
// embedded sparse map followed by the packed data segments.
internal bool _isGnuSparse10;

// When _isGnuSparse10 is true, this wraps _dataStream and presents the expanded virtual
// file content. _dataStream remains the raw (condensed) stream so that TarWriter can
// round-trip the original sparse data and AdvanceDataStreamIfNeeded works without
// special-casing.
internal GnuSparseStream? _gnuSparseDataStream;

// GNU attributes

internal DateTimeOffset _aTime;
Expand Down Expand Up @@ -106,6 +127,9 @@ internal TarHeader(TarEntryFormat format, TarEntryType typeFlag, TarHeader other
_checksum = other._checksum;
_linkName = other._linkName;
_dataStream = other._dataStream;
_gnuSparseRealSize = other._gnuSparseRealSize;
_isGnuSparse10 = other._isGnuSparse10;
_gnuSparseDataStream = other._gnuSparseDataStream;
}

internal void AddExtendedAttributes(IEnumerable<KeyValuePair<string, string>> existing)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
<Compile Include="TarReader\TarReader.TarEntry.ExtractToFile.Tests.cs" />
<Compile Include="TarReader\TarReader.File.Tests.cs" />
<Compile Include="TarReader\TarReader.GetNextEntry.Tests.cs" />
<Compile Include="TarReader\TarReader.SparseFile.Tests.cs" />
<Compile Include="TarReader\TarReader.Tests.cs" />
<Compile Include="TarTestsBase.cs" />
<Compile Include="TarTestsBase.Gnu.cs" />
Expand Down
Loading
Loading