From 987d39f88f687d9158925708174225e25d0521b4 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Sun, 21 Aug 2022 01:29:20 -0400 Subject: [PATCH 01/10] Improve performance of Tar library (#74281) * Avoid unnecessary byte[] allocations * Remove unnecessary use of FileStreamOptions * Clean up Dispose{Async} implementations * Clean up unnecessary consts Not a perf thing, just readability. * Remove MemoryStream/Encoding.UTF8.GetBytes allocations, unnecessary async variants, and overhaul GenerateExtendedAttributesDataStream * Avoid string allocations in ReadMagicAttribute * Avoid allocation in WriteAsOctal * Improve handling of octal * Avoid allocation for version string * Removing boxing and char string allocation in GenerateExtendedAttributeName * Fix a couple unnecessary dictionary lookups * Replace Enum.HasFlag usage * Remove allocations from Write{Posix}Name * Replace ArrayPool use with string.Create * Replace more superfluous ArrayPool usage * Remove ArrayPool use from System.IO.Compression.ZipFile * Fix inverted condition * Use generic math to parse octal * Remove allocations from StringReader and string.Split * Remove magic string allocation for Ustar when not V7 * Remove file name and directory name allocation in GenerateExtendedAttributeName --- .../Common/src/System/IO/Archiving.Utils.cs | 41 +- .../src/Resources/Strings.resx | 3 + .../src/System/Formats/Tar/SubReadStream.cs | 5 +- .../src/System/Formats/Tar/TarFile.cs | 53 +-- .../src/System/Formats/Tar/TarHeader.Read.cs | 229 ++++++----- .../src/System/Formats/Tar/TarHeader.Write.cs | 357 +++++++++--------- .../src/System/Formats/Tar/TarHelpers.cs | 88 ++--- .../src/System/Formats/Tar/TarReader.cs | 74 ++-- .../src/System/Formats/Tar/TarWriter.Unix.cs | 11 +- .../System/Formats/Tar/TarWriter.Windows.cs | 18 +- .../src/System/Formats/Tar/TarWriter.cs | 150 ++++---- .../System/IO/Compression/ZipFile.Create.cs | 58 ++- 12 files changed, 511 insertions(+), 576 deletions(-) diff --git a/src/libraries/Common/src/System/IO/Archiving.Utils.cs b/src/libraries/Common/src/System/IO/Archiving.Utils.cs index 8335dbd26edbc3..80d633228cc0c1 100644 --- a/src/libraries/Common/src/System/IO/Archiving.Utils.cs +++ b/src/libraries/Common/src/System/IO/Archiving.Utils.cs @@ -15,10 +15,9 @@ internal static partial class ArchivingUtils private const char PathSeparatorChar = '/'; private const string PathSeparatorString = "/"; - public static string EntryFromPath(string entry, int offset, int length, ref char[] buffer, bool appendPathSeparator = false) + public static string EntryFromPath(string entry, int offset, int length, bool appendPathSeparator = false) { Debug.Assert(length <= entry.Length - offset); - Debug.Assert(buffer != null); // Remove any leading slashes from the entry name: while (length > 0) @@ -32,26 +31,36 @@ public static string EntryFromPath(string entry, int offset, int length, ref cha } if (length == 0) + { return appendPathSeparator ? PathSeparatorString : string.Empty; + } - int resultLength = appendPathSeparator ? length + 1 : length; - EnsureCapacity(ref buffer, resultLength); - entry.CopyTo(offset, buffer, 0, length); - - // '/' is a more broadly recognized directory separator on all platforms (eg: mac, linux) - // We don't use Path.DirectorySeparatorChar or AltDirectorySeparatorChar because this is - // explicitly trying to standardize to '/' - for (int i = 0; i < length; i++) + if (appendPathSeparator) { - char ch = buffer[i]; - if (ch == Path.DirectorySeparatorChar || ch == Path.AltDirectorySeparatorChar) - buffer[i] = PathSeparatorChar; + length++; } - if (appendPathSeparator) - buffer[length] = PathSeparatorChar; + return string.Create(length, (appendPathSeparator, offset, entry), static (dest, state) => + { + state.entry.AsSpan(state.offset).CopyTo(dest); + + // '/' is a more broadly recognized directory separator on all platforms (eg: mac, linux) + // We don't use Path.DirectorySeparatorChar or AltDirectorySeparatorChar because this is + // explicitly trying to standardize to '/' + for (int i = 0; i < dest.Length; i++) + { + char ch = dest[i]; + if (ch == Path.DirectorySeparatorChar || ch == Path.AltDirectorySeparatorChar) + { + dest[i] = PathSeparatorChar; + } + } - return new string(buffer, 0, resultLength); + if (state.appendPathSeparator) + { + dest[^1] = PathSeparatorChar; + } + }); } public static void EnsureCapacity(ref char[] buffer, int min) diff --git a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx index 79e3188410c3c9..001d85b764bcaf 100644 --- a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx +++ b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx @@ -255,4 +255,7 @@ An attempt was made to move the position before the beginning of the stream. + + Unable to parse number. + diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs index c6b9b6afc8d6db..014165939ac1c7 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs @@ -188,10 +188,7 @@ public override Task FlushAsync(CancellationToken cancellationToken) => // the substream is just 'a chunk' of the super-stream protected override void Dispose(bool disposing) { - if (disposing && !_isDisposed) - { - _isDisposed = true; - } + _isDisposed = true; base.Dispose(disposing); } } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs index a77cb2c36a65ea..9953045a5840df 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs @@ -15,11 +15,6 @@ namespace System.Formats.Tar /// public static class TarFile { - // Windows' MaxPath (260) is used as an arbitrary default capacity, as it is likely - // to be greater than the length of typical entry names from the file system, even - // on non-Windows platforms. The capacity will be increased, if needed. - private const int DefaultCapacity = 260; - /// /// Creates a tar stream that contains all the filesystem entries from the specified directory. /// @@ -283,23 +278,14 @@ private static void CreateFromDirectoryInternal(string sourceDirectoryName, Stre DirectoryInfo di = new(sourceDirectoryName); string basePath = GetBasePathForCreateFromDirectory(di, includeBaseDirectory); - char[] entryNameBuffer = ArrayPool.Shared.Rent(DefaultCapacity); - - try + if (includeBaseDirectory) { - if (includeBaseDirectory) - { - writer.WriteEntry(di.FullName, GetEntryNameForBaseDirectory(di.Name, ref entryNameBuffer)); - } - - foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) - { - writer.WriteEntry(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length, ref entryNameBuffer)); - } + writer.WriteEntry(di.FullName, GetEntryNameForBaseDirectory(di.Name)); } - finally + + foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) { - ArrayPool.Shared.Return(entryNameBuffer); + writer.WriteEntry(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length)); } } } @@ -339,23 +325,14 @@ private static async Task CreateFromDirectoryInternalAsync(string sourceDirector DirectoryInfo di = new(sourceDirectoryName); string basePath = GetBasePathForCreateFromDirectory(di, includeBaseDirectory); - char[] entryNameBuffer = ArrayPool.Shared.Rent(DefaultCapacity); - - try + if (includeBaseDirectory) { - if (includeBaseDirectory) - { - await writer.WriteEntryAsync(di.FullName, GetEntryNameForBaseDirectory(di.Name, ref entryNameBuffer), cancellationToken).ConfigureAwait(false); - } - - foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) - { - await writer.WriteEntryAsync(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length, ref entryNameBuffer), cancellationToken).ConfigureAwait(false); - } + await writer.WriteEntryAsync(di.FullName, GetEntryNameForBaseDirectory(di.Name), cancellationToken).ConfigureAwait(false); } - finally + + foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) { - ArrayPool.Shared.Return(entryNameBuffer); + await writer.WriteEntryAsync(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length), cancellationToken).ConfigureAwait(false); } } } @@ -365,18 +342,18 @@ private static string GetBasePathForCreateFromDirectory(DirectoryInfo di, bool i includeBaseDirectory && di.Parent != null ? di.Parent.FullName : di.FullName; // Constructs the entry name used for a filesystem entry when creating an archive. - private static string GetEntryNameForFileSystemInfo(FileSystemInfo file, int basePathLength, ref char[] entryNameBuffer) + private static string GetEntryNameForFileSystemInfo(FileSystemInfo file, int basePathLength) { int entryNameLength = file.FullName.Length - basePathLength; Debug.Assert(entryNameLength > 0); - bool isDirectory = file.Attributes.HasFlag(FileAttributes.Directory); - return ArchivingUtils.EntryFromPath(file.FullName, basePathLength, entryNameLength, ref entryNameBuffer, appendPathSeparator: isDirectory); + bool isDirectory = (file.Attributes & FileAttributes.Directory) != 0; + return ArchivingUtils.EntryFromPath(file.FullName, basePathLength, entryNameLength, appendPathSeparator: isDirectory); } - private static string GetEntryNameForBaseDirectory(string name, ref char[] entryNameBuffer) + private static string GetEntryNameForBaseDirectory(string name) { - return ArchivingUtils.EntryFromPath(name, 0, name.Length, ref entryNameBuffer, appendPathSeparator: true); + return ArchivingUtils.EntryFromPath(name, 0, name.Length, appendPathSeparator: true); } // Extracts an archive into the specified directory. diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs index e89c9d4579dd38..f826a2c1e1b099 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs @@ -357,14 +357,14 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca { return null; } - int checksum = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(spanChecksum); + int checksum = (int)TarHelpers.ParseOctal(spanChecksum); // Zero checksum means the whole header is empty if (checksum == 0) { return null; } - long size = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Size, FieldLengths.Size)); + long size = (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.Size, FieldLengths.Size)); if (size < 0) { throw new FormatException(string.Format(SR.TarSizeFieldNegative)); @@ -373,14 +373,14 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca // Continue with the rest of the fields that require no special checks TarHeader header = new(initialFormat, name: TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.Name, FieldLengths.Name)), - mode: TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)), - mTime: TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(TarHelpers.GetTenBaseLongFromOctalAsciiChars(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime))), + mode: (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)), + mTime: TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch((long)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime))), typeFlag: (TarEntryType)buffer[FieldLocations.TypeFlag]) { _checksum = checksum, _size = size, - _uid = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)), - _gid = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)), + _uid = (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)), + _gid = (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)), _linkName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName)) }; @@ -425,16 +425,23 @@ private void ReadMagicAttribute(Span buffer) } // When the magic field is set, the archive is newer than v7. - _magic = Encoding.ASCII.GetString(magic); - - if (_magic == GnuMagic) + if (magic.SequenceEqual(GnuMagicBytes)) { + _magic = GnuMagic; _format = TarEntryFormat.Gnu; } - else if (_format == TarEntryFormat.V7 && _magic == UstarMagic) + else if (magic.SequenceEqual(UstarMagicBytes)) + { + _magic = UstarMagic; + if (_format == TarEntryFormat.V7) + { + // Important: Only change to ustar if we had not changed the format to pax already + _format = TarEntryFormat.Ustar; + } + } + else { - // Important: Only change to ustar if we had not changed the format to pax already - _format = TarEntryFormat.Ustar; + _magic = Encoding.ASCII.GetString(magic); } } @@ -448,19 +455,29 @@ private void ReadVersionAttribute(Span buffer) } Span version = buffer.Slice(FieldLocations.Version, FieldLengths.Version); - - _version = Encoding.ASCII.GetString(version); - - // The POSIX formats have a 6 byte Magic "ustar\0", followed by a 2 byte Version "00" - if ((_format is TarEntryFormat.Ustar or TarEntryFormat.Pax) && _version != UstarVersion) + switch (_format) { - throw new FormatException(string.Format(SR.TarPosixFormatExpected, _name)); - } + case TarEntryFormat.Ustar or TarEntryFormat.Pax: + // The POSIX formats have a 6 byte Magic "ustar\0", followed by a 2 byte Version "00" + if (!version.SequenceEqual(UstarVersionBytes)) + { + throw new FormatException(string.Format(SR.TarPosixFormatExpected, _name)); + } + _version = UstarVersion; + break; - // The GNU format has a Magic+Version 8 byte string "ustar \0" - if (_format == TarEntryFormat.Gnu && _version != GnuVersion) - { - throw new FormatException(string.Format(SR.TarGnuFormatExpected, _name)); + case TarEntryFormat.Gnu: + // The GNU format has a Magic+Version 8 byte string "ustar \0" + if (!version.SequenceEqual(GnuVersionBytes)) + { + throw new FormatException(string.Format(SR.TarGnuFormatExpected, _name)); + } + _version = GnuVersion; + break; + + default: + _version = Encoding.ASCII.GetString(version); + break; } } @@ -477,10 +494,10 @@ private void ReadPosixAndGnuSharedAttributes(Span buffer) if (_typeFlag is TarEntryType.CharacterDevice or TarEntryType.BlockDevice) { // Major number for a character device or block device entry. - _devMajor = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor)); + _devMajor = (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor)); // Minor number for a character device or block device entry. - _devMinor = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor)); + _devMinor = (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor)); } } @@ -489,10 +506,10 @@ private void ReadPosixAndGnuSharedAttributes(Span buffer) private void ReadGnuAttributes(Span buffer) { // Convert byte arrays - long aTime = TarHelpers.GetTenBaseLongFromOctalAsciiChars(buffer.Slice(FieldLocations.ATime, FieldLengths.ATime)); + long aTime = (long)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.ATime, FieldLengths.ATime)); _aTime = TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(aTime); - long cTime = TarHelpers.GetTenBaseLongFromOctalAsciiChars(buffer.Slice(FieldLocations.CTime, FieldLengths.CTime)); + long cTime = (long)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.CTime, FieldLengths.CTime)); _cTime = TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(cTime); // TODO: Read the bytes of the currently unsupported GNU fields, in case user wants to write this entry into another GNU archive, they need to be preserved. https://github.com/dotnet/runtime/issues/68230 @@ -518,11 +535,23 @@ private void ReadUstarAttributes(Span buffer) // Throws if end of stream is reached or if an attribute is malformed. private void ReadExtendedAttributesBlock(Stream archiveStream) { - byte[]? buffer = CreateExtendedAttributesBufferIfSizeIsValid(); - if (buffer != null) + if (_size != 0) { - archiveStream.ReadExactly(buffer); - ReadExtendedAttributesFromBuffer(buffer, _name); + ValidateSize(); + + byte[]? buffer = null; + Span span = _size <= 256 ? + stackalloc byte[256] : + (buffer = ArrayPool.Shared.Rent((int)_size)); + span = span.Slice(0, (int)_size); + + archiveStream.ReadExactly(span); + ReadExtendedAttributesFromBuffer(span, _name); + + if (buffer is not null) + { + ArrayPool.Shared.Return(buffer); + } } } @@ -531,48 +560,43 @@ private void ReadExtendedAttributesBlock(Stream archiveStream) private async ValueTask ReadExtendedAttributesBlockAsync(Stream archiveStream, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); - byte[]? buffer = CreateExtendedAttributesBufferIfSizeIsValid(); - if (buffer != null) + + if (_size != 0) { - await archiveStream.ReadExactlyAsync(buffer, cancellationToken).ConfigureAwait(false); - ReadExtendedAttributesFromBuffer(buffer, _name); - } - } + ValidateSize(); + byte[] buffer = ArrayPool.Shared.Rent((int)_size); + Memory memory = buffer.AsMemory(0, (int)_size); - // Return a byte array if the size field has a valid value for extended attributes. Otherwise, return null, or throw. - private byte[]? CreateExtendedAttributesBufferIfSizeIsValid() - { - Debug.Assert(_typeFlag is TarEntryType.ExtendedAttributes or TarEntryType.GlobalExtendedAttributes); + await archiveStream.ReadExactlyAsync(memory, cancellationToken).ConfigureAwait(false); + ReadExtendedAttributesFromBuffer(memory.Span, _name); - // It is not expected that the extended attributes data section will be longer than Array.MaxLength, considering - // the size field is 12 bytes long, which fits a number with a value under int.MaxValue. - if (_size > Array.MaxLength) - { - throw new InvalidOperationException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag.ToString())); + ArrayPool.Shared.Return(buffer); } + } - if (_size == 0) + private void ValidateSize() + { + if ((uint)_size > (uint)Array.MaxLength) { - return null; + ThrowSizeFieldTooLarge(); } - return new byte[(int)_size]; + [DoesNotReturn] + void ThrowSizeFieldTooLarge() => + throw new InvalidOperationException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag.ToString())); } // Returns a dictionary containing the extended attributes collected from the provided byte buffer. private void ReadExtendedAttributesFromBuffer(ReadOnlySpan buffer, string name) { - string dataAsString = TarHelpers.GetTrimmedUtf8String(buffer); - - using StringReader reader = new(dataAsString); + buffer = TarHelpers.TrimEndingNullsAndSpaces(buffer); - while (TryGetNextExtendedAttribute(reader, out string? key, out string? value)) + while (TryGetNextExtendedAttribute(ref buffer, out string? key, out string? value)) { - if (ExtendedAttributes.ContainsKey(key)) + if (!ExtendedAttributes.TryAdd(key, value)) { throw new FormatException(string.Format(SR.TarDuplicateExtendedAttribute, name)); } - ExtendedAttributes.Add(key, value); } } @@ -581,11 +605,23 @@ private void ReadExtendedAttributesFromBuffer(ReadOnlySpan buffer, string // Throws if end of stream is reached. private void ReadGnuLongPathDataBlock(Stream archiveStream) { - byte[]? buffer = CreateGnuLongDataBufferIfSizeIsValid(); - if (buffer != null) + if (_size != 0) { - archiveStream.ReadExactly(buffer); - ReadGnuLongPathDataFromBuffer(buffer); + ValidateSize(); + + byte[]? buffer = null; + Span span = _size <= 256 ? + stackalloc byte[256] : + (buffer = ArrayPool.Shared.Rent((int)_size)); + span = span.Slice(0, (int)_size); + + archiveStream.ReadExactly(span); + ReadGnuLongPathDataFromBuffer(span); + + if (buffer is not null) + { + ArrayPool.Shared.Return(buffer); + } } } @@ -595,11 +631,17 @@ private void ReadGnuLongPathDataBlock(Stream archiveStream) private async ValueTask ReadGnuLongPathDataBlockAsync(Stream archiveStream, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); - byte[]? buffer = CreateGnuLongDataBufferIfSizeIsValid(); - if (buffer != null) + + if (_size != 0) { - await archiveStream.ReadExactlyAsync(buffer, cancellationToken).ConfigureAwait(false); - ReadGnuLongPathDataFromBuffer(buffer); + ValidateSize(); + byte[] buffer = ArrayPool.Shared.Rent((int)_size); + Memory memory = buffer.AsMemory(0, (int)_size); + + await archiveStream.ReadExactlyAsync(memory, cancellationToken).ConfigureAwait(false); + ReadGnuLongPathDataFromBuffer(memory.Span); + + ArrayPool.Shared.Return(buffer); } } @@ -618,61 +660,64 @@ private void ReadGnuLongPathDataFromBuffer(ReadOnlySpan buffer) } } - // Return a byte array if the size field has a valid value for GNU long metadata entry data. Otherwise, return null, or throw. - private byte[]? CreateGnuLongDataBufferIfSizeIsValid() - { - Debug.Assert(_typeFlag is TarEntryType.LongLink or TarEntryType.LongPath); - - if (_size > Array.MaxLength) - { - throw new InvalidOperationException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag.ToString())); - } - - if (_size == 0) - { - return null; - } - - return new byte[(int)_size]; - } - - // Tries to collect the next extended attribute from the string wrapped by the specified reader. + // Tries to collect the next extended attribute from the string. // Extended attributes are saved in the ISO/IEC 10646-1:2000 standard UTF-8 encoding format. // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html // "LENGTH KEY=VALUE\n" // Where LENGTH is the total number of bytes of that line, from LENGTH itself to the endline, inclusive. // Throws if end of stream is reached or if an attribute is malformed. private static bool TryGetNextExtendedAttribute( - StringReader reader, + ref ReadOnlySpan buffer, [NotNullWhen(returnValue: true)] out string? key, [NotNullWhen(returnValue: true)] out string? value) { key = null; value = null; - string? nextLine = reader.ReadLine(); - if (string.IsNullOrWhiteSpace(nextLine)) + // Slice off the next line. + int newlinePos = buffer.IndexOf((byte)'\n'); + if (newlinePos < 0) { return false; } + ReadOnlySpan line = buffer.Slice(0, newlinePos); - StringSplitOptions splitOptions = StringSplitOptions.RemoveEmptyEntries; + // Update buffer to point to the next line for the next call + buffer = buffer.Slice(newlinePos + 1); - string[] attributeArray = nextLine.Split(' ', 2, splitOptions); - if (attributeArray.Length != 2) + // Find the end of the length and remove everything up through it. + int spacePos = line.IndexOf((byte)' '); + if (spacePos < 0) { return false; } + line = line.Slice(spacePos + 1).TrimStart((byte)' '); - string[] keyAndValueArray = attributeArray[1].Split('=', 2, splitOptions); - if (keyAndValueArray.Length != 2) + // If there are any more spaces, it's malformed. + if (line.IndexOf((byte)' ') >= 0) { return false; } - key = keyAndValueArray[0]; - value = keyAndValueArray[1]; + // Find the equal separator. + int equalPos = line.IndexOf((byte)'='); + if (equalPos < 0) + { + return false; + } + + ReadOnlySpan keySlice = line.Slice(0, equalPos); + ReadOnlySpan valueSlice = line.Slice(equalPos + 1); + + // If the value contains an =, it's malformed. + if (valueSlice.IndexOf((byte)'=') >= 0) + { + return false; + } + // Return the parsed key and value. + key = Encoding.UTF8.GetString(keySlice); + value = Encoding.UTF8.GetString(valueSlice); return true; } } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index ad5cdae22d0315..724274d1689d92 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -1,10 +1,13 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers; +using System.Buffers.Text; using System.Collections.Generic; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.IO; +using System.Numerics; using System.Text; using System.Threading; using System.Threading.Tasks; @@ -14,16 +17,12 @@ namespace System.Formats.Tar // Writes header attributes of a tar archive entry. internal sealed partial class TarHeader { - private static ReadOnlySpan PaxMagicBytes => "ustar\0"u8; - private static ReadOnlySpan PaxVersionBytes => "00"u8; + private static ReadOnlySpan UstarMagicBytes => "ustar\0"u8; + private static ReadOnlySpan UstarVersionBytes => "00"u8; private static ReadOnlySpan GnuMagicBytes => "ustar "u8; private static ReadOnlySpan GnuVersionBytes => " \0"u8; - // Extended Attribute entries have a special format in the Name field: - // "{dirName}/PaxHeaders.{processId}/{fileName}{trailingSeparator}" - private const string PaxHeadersFormat = "{0}/PaxHeaders.{1}/{2}{3}"; - // Predefined text for the Name field of a GNU long metadata entry. Applies for both LongPath ('L') and LongLink ('K'). private const string GnuLongMetadataName = "././@LongLink"; @@ -61,7 +60,7 @@ private long WriteV7FieldsToBuffer(Span buffer) long actualLength = GetTotalDataBytesToWrite(); TarEntryType actualEntryType = TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.V7, _typeFlag); - int tmpChecksum = WriteName(buffer, out _); + int tmpChecksum = WriteName(buffer); tmpChecksum += WriteCommonFields(buffer, actualLength, actualEntryType); _checksum = WriteChecksum(tmpChecksum, buffer); @@ -215,7 +214,7 @@ internal async Task WriteAsGnuAsync(Stream archiveStream, Memory buffer, C // Second, we determine if we need a preceding LongPath, and write it if needed if (_name.Length > FieldLengths.Name) { - TarHeader longPathHeader = await GetGnuLongMetadataHeaderAsync(TarEntryType.LongPath, _name, cancellationToken).ConfigureAwait(false); + TarHeader longPathHeader = GetGnuLongMetadataHeader(TarEntryType.LongPath, _name); await longPathHeader.WriteAsGnuInternalAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); buffer.Span.Clear(); // Reset it to reuse it } @@ -227,34 +226,8 @@ internal async Task WriteAsGnuAsync(Stream archiveStream, Memory buffer, C // Creates and returns a GNU long metadata header, with the specified long text written into its data stream. private static TarHeader GetGnuLongMetadataHeader(TarEntryType entryType, string longText) { - TarHeader longMetadataHeader = GetDefaultGnuLongMetadataHeader(longText.Length, entryType); - Debug.Assert(longMetadataHeader._dataStream != null); - - longMetadataHeader._dataStream.Write(Encoding.UTF8.GetBytes(longText)); - longMetadataHeader._dataStream.Seek(0, SeekOrigin.Begin); // Ensure it gets written into the archive from the beginning - - return longMetadataHeader; - } - - // Asynchronously creates and returns a GNU long metadata header, with the specified long text written into its data stream. - private static async Task GetGnuLongMetadataHeaderAsync(TarEntryType entryType, string longText, CancellationToken cancellationToken) - { - cancellationToken.ThrowIfCancellationRequested(); - - TarHeader longMetadataHeader = GetDefaultGnuLongMetadataHeader(longText.Length, entryType); - Debug.Assert(longMetadataHeader._dataStream != null); - - await longMetadataHeader._dataStream.WriteAsync(Encoding.UTF8.GetBytes(longText), cancellationToken).ConfigureAwait(false); - longMetadataHeader._dataStream.Seek(0, SeekOrigin.Begin); // Ensure it gets written into the archive from the beginning - - return longMetadataHeader; - } - - // Constructs a GNU metadata header with default values for the specified entry type. - private static TarHeader GetDefaultGnuLongMetadataHeader(int longTextLength, TarEntryType entryType) - { - Debug.Assert((entryType is TarEntryType.LongPath && longTextLength > FieldLengths.Name) || - (entryType is TarEntryType.LongLink && longTextLength > FieldLengths.LinkName)); + Debug.Assert((entryType is TarEntryType.LongPath && longText.Length > FieldLengths.Name) || + (entryType is TarEntryType.LongLink && longText.Length > FieldLengths.LinkName)); TarHeader longMetadataHeader = new(TarEntryFormat.Gnu); @@ -264,7 +237,7 @@ private static TarHeader GetDefaultGnuLongMetadataHeader(int longTextLength, Tar longMetadataHeader._gid = 0; longMetadataHeader._mTime = DateTimeOffset.MinValue; // 0 longMetadataHeader._typeFlag = entryType; - longMetadataHeader._dataStream = new MemoryStream(); + longMetadataHeader._dataStream = new MemoryStream(Encoding.UTF8.GetBytes(longText)); return longMetadataHeader; } @@ -302,7 +275,7 @@ private void WriteAsGnuSharedInternal(Span buffer, out long actualLength) { actualLength = GetTotalDataBytesToWrite(); - int tmpChecksum = WriteName(buffer, out _); + int tmpChecksum = WriteName(buffer); tmpChecksum += WriteCommonFields(buffer, actualLength, TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.Gnu, _typeFlag)); tmpChecksum += WriteGnuMagicAndVersion(buffer); tmpChecksum += WritePosixAndGnuSharedFields(buffer); @@ -320,13 +293,13 @@ private void WriteAsPaxExtendedAttributes(Stream archiveStream, Span buffe } // Asynchronously writes the current header as a PAX Extended Attributes entry into the archive stream and returns the value of the final checksum. - private async Task WriteAsPaxExtendedAttributesAsync(Stream archiveStream, Memory buffer, Dictionary extendedAttributes, bool isGea, int globalExtendedAttributesEntryNumber, CancellationToken cancellationToken) + private Task WriteAsPaxExtendedAttributesAsync(Stream archiveStream, Memory buffer, Dictionary extendedAttributes, bool isGea, int globalExtendedAttributesEntryNumber, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); WriteAsPaxExtendedAttributesShared(isGea, globalExtendedAttributesEntryNumber); - _dataStream = await GenerateExtendedAttributesDataStreamAsync(extendedAttributes, cancellationToken).ConfigureAwait(false); - await WriteAsPaxInternalAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); + _dataStream = GenerateExtendedAttributesDataStream(extendedAttributes); + return WriteAsPaxInternalAsync(archiveStream, buffer, cancellationToken); } // Initializes the name, mode and type flag of a PAX extended attributes entry. @@ -385,24 +358,33 @@ private void WriteAsPaxSharedInternal(Span buffer, out long actualLength) _checksum = WriteChecksum(tmpChecksum, buffer); } - // All formats save in the name byte array only the ASCII bytes that fit. The full string is returned in the out byte array. - private int WriteName(Span buffer, out byte[] fullNameBytes) + // All formats save in the name byte array only the ASCII bytes that fit. + private int WriteName(Span buffer) { - fullNameBytes = Encoding.ASCII.GetBytes(_name); - int nameBytesLength = Math.Min(fullNameBytes.Length, FieldLengths.Name); - int checksum = WriteLeftAlignedBytesAndGetChecksum(fullNameBytes.AsSpan(0, nameBytesLength), buffer.Slice(FieldLocations.Name, FieldLengths.Name)); - return checksum; + ReadOnlySpan src = _name.AsSpan(0, Math.Min(_name.Length, FieldLengths.Name)); + Span dest = buffer.Slice(FieldLocations.Name, FieldLengths.Name); + int encoded = Encoding.ASCII.GetBytes(src, dest); + return Checksum(dest.Slice(0, encoded)); } // Ustar and PAX save in the name byte array only the ASCII bytes that fit, and the rest of that string is saved in the prefix field. private int WritePosixName(Span buffer) { - int checksum = WriteName(buffer, out byte[] fullNameBytes); - if (fullNameBytes.Length > FieldLengths.Name) + int checksum = WriteName(buffer); + + if (_name.Length > FieldLengths.Name) { - int prefixBytesLength = Math.Min(fullNameBytes.Length - FieldLengths.Name, FieldLengths.Name); - checksum += WriteLeftAlignedBytesAndGetChecksum(fullNameBytes.AsSpan(FieldLengths.Name, prefixBytesLength), buffer.Slice(FieldLocations.Prefix, FieldLengths.Prefix)); + int prefixBytesLength = Math.Min(_name.Length - FieldLengths.Name, FieldLengths.Name); + Span remaining = prefixBytesLength <= 256 ? + stackalloc byte[prefixBytesLength] : + new byte[prefixBytesLength]; + + int encoded = Encoding.ASCII.GetBytes(_name.AsSpan(FieldLengths.Name), remaining); + Debug.Assert(encoded == remaining.Length); + + checksum += WriteLeftAlignedBytesAndGetChecksum(remaining, buffer.Slice(FieldLocations.Prefix, FieldLengths.Prefix)); } + return checksum; } @@ -413,27 +395,27 @@ private int WriteCommonFields(Span buffer, long actualLength, TarEntryType if (_mode > 0) { - checksum += WriteAsOctal(_mode, buffer, FieldLocations.Mode, FieldLengths.Mode); + checksum += FormatOctal(_mode, buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)); } if (_uid > 0) { - checksum += WriteAsOctal(_uid, buffer, FieldLocations.Uid, FieldLengths.Uid); + checksum += FormatOctal(_uid, buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)); } if (_gid > 0) { - checksum += WriteAsOctal(_gid, buffer, FieldLocations.Gid, FieldLengths.Gid); + checksum += FormatOctal(_gid, buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)); } _size = actualLength; if (_size > 0) { - checksum += WriteAsOctal(_size, buffer, FieldLocations.Size, FieldLengths.Size); + checksum += FormatOctal(_size, buffer.Slice(FieldLocations.Size, FieldLengths.Size)); } - checksum += WriteAsTimestamp(_mTime, buffer, FieldLocations.MTime, FieldLengths.MTime); + checksum += WriteAsTimestamp(_mTime, buffer.Slice(FieldLocations.MTime, FieldLengths.MTime)); char typeFlagChar = (char)actualEntryType; buffer[FieldLocations.TypeFlag] = (byte)typeFlagChar; @@ -441,7 +423,7 @@ private int WriteCommonFields(Span buffer, long actualLength, TarEntryType if (!string.IsNullOrEmpty(_linkName)) { - checksum += WriteAsAsciiString(_linkName, buffer, FieldLocations.LinkName, FieldLengths.LinkName); + checksum += WriteAsAsciiString(_linkName, buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName)); } return checksum; @@ -465,8 +447,8 @@ private long GetTotalDataBytesToWrite() // Writes the magic and version fields of a ustar or pax entry into the specified spans. private static int WritePosixMagicAndVersion(Span buffer) { - int checksum = WriteLeftAlignedBytesAndGetChecksum(PaxMagicBytes, buffer.Slice(FieldLocations.Magic, FieldLengths.Magic)); - checksum += WriteLeftAlignedBytesAndGetChecksum(PaxVersionBytes, buffer.Slice(FieldLocations.Version, FieldLengths.Version)); + int checksum = WriteLeftAlignedBytesAndGetChecksum(UstarMagicBytes, buffer.Slice(FieldLocations.Magic, FieldLengths.Magic)); + checksum += WriteLeftAlignedBytesAndGetChecksum(UstarVersionBytes, buffer.Slice(FieldLocations.Version, FieldLengths.Version)); return checksum; } @@ -485,22 +467,22 @@ private int WritePosixAndGnuSharedFields(Span buffer) if (!string.IsNullOrEmpty(_uName)) { - checksum += WriteAsAsciiString(_uName, buffer, FieldLocations.UName, FieldLengths.UName); + checksum += WriteAsAsciiString(_uName, buffer.Slice(FieldLocations.UName, FieldLengths.UName)); } if (!string.IsNullOrEmpty(_gName)) { - checksum += WriteAsAsciiString(_gName, buffer, FieldLocations.GName, FieldLengths.GName); + checksum += WriteAsAsciiString(_gName, buffer.Slice(FieldLocations.GName, FieldLengths.GName)); } if (_devMajor > 0) { - checksum += WriteAsOctal(_devMajor, buffer, FieldLocations.DevMajor, FieldLengths.DevMajor); + checksum += FormatOctal(_devMajor, buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor)); } if (_devMinor > 0) { - checksum += WriteAsOctal(_devMinor, buffer, FieldLocations.DevMinor, FieldLengths.DevMinor); + checksum += FormatOctal(_devMinor, buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor)); } return checksum; @@ -509,8 +491,8 @@ private int WritePosixAndGnuSharedFields(Span buffer) // Saves the gnu-specific fields into the specified spans. private int WriteGnuFields(Span buffer) { - int checksum = WriteAsTimestamp(_aTime, buffer, FieldLocations.ATime, FieldLengths.ATime); - checksum += WriteAsTimestamp(_cTime, buffer, FieldLocations.CTime, FieldLengths.CTime); + int checksum = WriteAsTimestamp(_aTime, buffer.Slice(FieldLocations.ATime, FieldLengths.ATime)); + checksum += WriteAsTimestamp(_cTime, buffer.Slice(FieldLocations.CTime, FieldLengths.CTime)); if (_gnuUnusedBytes != null) { @@ -524,8 +506,18 @@ private int WriteGnuFields(Span buffer) private static void WriteData(Stream archiveStream, Stream dataStream, long actualLength) { dataStream.CopyTo(archiveStream); // The data gets copied from the current position + int paddingAfterData = TarHelpers.CalculatePadding(actualLength); - archiveStream.Write(new byte[paddingAfterData]); + if (paddingAfterData != 0) + { + Debug.Assert(paddingAfterData <= TarHelpers.RecordSize); + + Span padding = stackalloc byte[TarHelpers.RecordSize]; + padding = padding.Slice(0, paddingAfterData); + padding.Clear(); + + archiveStream.Write(padding); + } } // Asynchronously writes the current header's data stream into the archive stream. @@ -534,44 +526,88 @@ private static async Task WriteDataAsync(Stream archiveStream, Stream dataStream cancellationToken.ThrowIfCancellationRequested(); await dataStream.CopyToAsync(archiveStream, cancellationToken).ConfigureAwait(false); // The data gets copied from the current position + int paddingAfterData = TarHelpers.CalculatePadding(actualLength); - await archiveStream.WriteAsync(new byte[paddingAfterData], cancellationToken).ConfigureAwait(false); + if (paddingAfterData != 0) + { + byte[] buffer = ArrayPool.Shared.Rent(paddingAfterData); + Array.Clear(buffer, 0, paddingAfterData); + + await archiveStream.WriteAsync(buffer.AsMemory(0, paddingAfterData), cancellationToken).ConfigureAwait(false); + + ArrayPool.Shared.Return(buffer); + } } // Dumps into the archive stream an extended attribute entry containing metadata of the entry it precedes. private static Stream? GenerateExtendedAttributesDataStream(Dictionary extendedAttributes) { MemoryStream? dataStream = null; + + byte[]? buffer = null; + Span span = stackalloc byte[512]; + if (extendedAttributes.Count > 0) { dataStream = new MemoryStream(); + foreach ((string attribute, string value) in extendedAttributes) { - byte[] entryBytes = GenerateExtendedAttributeKeyValuePairAsByteArray(Encoding.UTF8.GetBytes(attribute), Encoding.UTF8.GetBytes(value)); - dataStream.Write(entryBytes); + // Generates an extended attribute key value pair string saved into a byte array, following the ISO/IEC 10646-1:2000 standard UTF-8 encoding format. + // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html + + // The format is: + // "XX attribute=value\n" + // where "XX" is the number of characters in the entry, including those required for the count itself. + int length = 3 + Encoding.UTF8.GetByteCount(attribute) + Encoding.UTF8.GetByteCount(value); + length += CountDigits(length); + + // Get a large enough buffer if we don't already have one. + if (span.Length < length) + { + if (buffer is not null) + { + ArrayPool.Shared.Return(buffer); + } + buffer = ArrayPool.Shared.Rent(length); + span = buffer; + } + + // Format the contents. + bool formatted = Utf8Formatter.TryFormat(length, span, out int bytesWritten); + Debug.Assert(formatted); + span[bytesWritten++] = (byte)' '; + bytesWritten += Encoding.UTF8.GetBytes(attribute, span.Slice(bytesWritten)); + span[bytesWritten++] = (byte)'='; + bytesWritten += Encoding.UTF8.GetBytes(value, span.Slice(bytesWritten)); + span[bytesWritten++] = (byte)'\n'; + + // Write it to the stream. + dataStream.Write(span.Slice(0, bytesWritten)); } - dataStream?.Seek(0, SeekOrigin.Begin); // Ensure it gets written into the archive from the beginning + + dataStream.Position = 0; // Ensure it gets written into the archive from the beginning } - return dataStream; - } - // Asynchronously dumps into the archive stream an extended attribute entry containing metadata of the entry it precedes. - private static async Task GenerateExtendedAttributesDataStreamAsync(Dictionary extendedAttributes, CancellationToken cancellationToken) - { - cancellationToken.ThrowIfCancellationRequested(); + if (buffer is not null) + { + ArrayPool.Shared.Return(buffer); + } - MemoryStream? dataStream = null; - if (extendedAttributes.Count > 0) + return dataStream; + + static int CountDigits(int value) { - dataStream = new MemoryStream(); - foreach ((string attribute, string value) in extendedAttributes) + Debug.Assert(value >= 0); + int digits = 1; + while (true) { - byte[] entryBytes = GenerateExtendedAttributeKeyValuePairAsByteArray(Encoding.UTF8.GetBytes(attribute), Encoding.UTF8.GetBytes(value)); - await dataStream.WriteAsync(entryBytes, cancellationToken).ConfigureAwait(false); + value /= 10; + if (value == 0) break; + digits++; } - dataStream?.Seek(0, SeekOrigin.Begin); // Ensure it gets written into the archive from the beginning + return digits; } - return dataStream; } // Some fields that have a reserved spot in the header, may not fit in such field anymore, but they can fit in the @@ -584,10 +620,12 @@ private void CollectExtendedAttributesFromStandardFieldsIfNeeded() { ExtendedAttributes.Add(PaxEaMTime, TarHelpers.GetTimestampStringFromDateTimeOffset(_mTime)); } + if (!string.IsNullOrEmpty(_gName)) { TryAddStringField(ExtendedAttributes, PaxEaGName, _gName, FieldLengths.GName); } + if (!string.IsNullOrEmpty(_uName)) { TryAddStringField(ExtendedAttributes, PaxEaUName, _uName, FieldLengths.UName); @@ -603,7 +641,6 @@ private void CollectExtendedAttributesFromStandardFieldsIfNeeded() ExtendedAttributes.Add(PaxEaSize, _size.ToString()); } - // Adds the specified string to the dictionary if it's longer than the specified max byte length. static void TryAddStringField(Dictionary extendedAttributes, string key, string value, int maxLength) { @@ -614,63 +651,24 @@ static void TryAddStringField(Dictionary extendedAttributes, str } } - // Generates an extended attribute key value pair string saved into a byte array, following the ISO/IEC 10646-1:2000 standard UTF-8 encoding format. - // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html - private static byte[] GenerateExtendedAttributeKeyValuePairAsByteArray(byte[] keyBytes, byte[] valueBytes) - { - // Assuming key="ab" and value="cdef" - - // The " ab=cdef\n" attribute string has a length of 9 chars - int suffixByteCount = 3 + // leading space, equals sign and trailing newline - keyBytes.Length + valueBytes.Length; - - // The count string "9" has a length of 1 char - string suffixByteCountString = suffixByteCount.ToString(); - int firstTotalByteCount = Encoding.ASCII.GetByteCount(suffixByteCountString); - - // If we prepend the count string length to the attribute string, - // the total length increases to 10, which has one more digit - // "9 abc=def\n" - int firstPrefixAndSuffixByteCount = firstTotalByteCount + suffixByteCount; - - // The new count string "10" has an increased length of 2 chars - string prefixAndSuffixByteCountString = firstPrefixAndSuffixByteCount.ToString(); - int realTotalCharCount = Encoding.ASCII.GetByteCount(prefixAndSuffixByteCountString); - - byte[] finalTotalCharCountBytes = Encoding.ASCII.GetBytes(prefixAndSuffixByteCountString); - - // The final string should contain the correct total length now - List bytesList = new(); - - bytesList.AddRange(finalTotalCharCountBytes); - bytesList.Add(TarHelpers.SpaceChar); - bytesList.AddRange(keyBytes); - bytesList.Add(TarHelpers.EqualsChar); - bytesList.AddRange(valueBytes); - bytesList.Add(TarHelpers.NewLineChar); - - Debug.Assert(bytesList.Count == (realTotalCharCount + suffixByteCount)); - - return bytesList.ToArray(); - } - // The checksum accumulator first adds up the byte values of eight space chars, then the final number // is written on top of those spaces on the specified span as ascii. // At the end, it's saved in the header field and the final value returned. - internal int WriteChecksum(int checksum, Span buffer) + internal static int WriteChecksum(int checksum, Span buffer) { // The checksum field is also counted towards the total sum // but as an array filled with spaces - checksum += TarHelpers.SpaceChar * 8; + checksum += (byte)' ' * 8; Span converted = stackalloc byte[FieldLengths.Checksum]; - WriteAsOctal(checksum, converted, 0, converted.Length); + converted.Clear(); + FormatOctal(checksum, converted); Span destination = buffer.Slice(FieldLocations.Checksum, FieldLengths.Checksum); // Checksum field ends with a null and a space - destination[^1] = TarHelpers.SpaceChar; // ' ' - destination[^2] = 0; // '\0' + destination[^1] = (byte)' '; + destination[^2] = (byte)'\0'; int i = destination.Length - 3; int j = converted.Length - 1; @@ -684,7 +682,7 @@ internal int WriteChecksum(int checksum, Span buffer) } else { - destination[i] = TarHelpers.ZeroChar; // Leading zero chars '0' + destination[i] = (byte)'0'; // Leading zero chars } i--; } @@ -697,67 +695,75 @@ private static int WriteLeftAlignedBytesAndGetChecksum(ReadOnlySpan bytesT { Debug.Assert(destination.Length > 1); - int checksum = 0; - - for (int i = 0, j = 0; i < destination.Length && j < bytesToWrite.Length; i++, j++) - { - destination[i] = bytesToWrite[j]; - checksum += destination[i]; - } + // Copy as many bytes as will fit + int numToCopy = Math.Min(bytesToWrite.Length, destination.Length); + bytesToWrite = bytesToWrite.Slice(0, numToCopy); + bytesToWrite.CopyTo(destination); - return checksum; + return Checksum(bytesToWrite); } // Writes the specified bytes aligned to the right, filling all the leading bytes with the zero char 0x30, // ensuring a null terminator is included at the end of the specified span. private static int WriteRightAlignedBytesAndGetChecksum(ReadOnlySpan bytesToWrite, Span destination) { - int checksum = 0; - int i = destination.Length - 1; - int j = bytesToWrite.Length - 1; + Debug.Assert(destination.Length > 1); - while (i >= 0) + // Null terminated + destination[^1] = (byte)'\0'; + + // Copy as many input bytes as will fit + int numToCopy = Math.Min(bytesToWrite.Length, destination.Length - 1); + bytesToWrite = bytesToWrite.Slice(0, numToCopy); + int copyPos = destination.Length - 1 - bytesToWrite.Length; + bytesToWrite.CopyTo(destination.Slice(copyPos)); + + // Fill all leading bytes with zeros + destination.Slice(0, copyPos).Fill((byte)'0'); + + return Checksum(destination); + } + + private static int Checksum(ReadOnlySpan bytes) + { + int checksum = 0; + foreach (byte b in bytes) { - if (i == destination.Length - 1) - { - destination[i] = 0; // null terminated - } - else if (j >= 0) - { - destination[i] = bytesToWrite[j]; - j--; - } - else - { - destination[i] = TarHelpers.ZeroChar; // leading zeros - } - checksum += destination[i]; - i--; + checksum += b; } - return checksum; } // Writes the specified decimal number as a right-aligned octal number and returns its checksum. - internal static int WriteAsOctal(long tenBaseNumber, Span destination, int location, int length) + internal static int FormatOctal(long value, Span destination) { - long octal = TarHelpers.ConvertDecimalToOctal(tenBaseNumber); - byte[] bytes = Encoding.ASCII.GetBytes(octal.ToString()); - return WriteRightAlignedBytesAndGetChecksum(bytes.AsSpan(), destination.Slice(location, length)); + ulong remaining = (ulong)value; + Span digits = stackalloc byte[32]; // longer than any possible octal formatting of a ulong + + int i = digits.Length - 1; + while (true) + { + digits[i] = (byte)('0' + (remaining % 8)); + remaining /= 8; + if (remaining == 0) break; + i--; + } + + return WriteRightAlignedBytesAndGetChecksum(digits.Slice(i), destination); } // Writes the specified DateTimeOffset's Unix time seconds as a right-aligned octal number, and returns its checksum. - private static int WriteAsTimestamp(DateTimeOffset timestamp, Span destination, int location, int length) + private static int WriteAsTimestamp(DateTimeOffset timestamp, Span destination) { long unixTimeSeconds = timestamp.ToUnixTimeSeconds(); - return WriteAsOctal(unixTimeSeconds, destination, location, length); + return FormatOctal(unixTimeSeconds, destination); } // Writes the specified text as an ASCII string aligned to the left, and returns its checksum. - private static int WriteAsAsciiString(string str, Span buffer, int location, int length) + private static int WriteAsAsciiString(string str, Span buffer) { byte[] bytes = Encoding.ASCII.GetBytes(str); - return WriteLeftAlignedBytesAndGetChecksum(bytes.AsSpan(), buffer.Slice(location, length)); + return WriteLeftAlignedBytesAndGetChecksum(bytes.AsSpan(), buffer); } // Gets the special name for the 'name' field in an extended attribute entry. @@ -767,18 +773,15 @@ private static int WriteAsAsciiString(string str, Span buffer, int locatio // - %f: The filename of the file, equivalent to the result of the basename utility on the translated pathname. private string GenerateExtendedAttributeName() { - string? dirName = Path.GetDirectoryName(_name); - dirName = string.IsNullOrEmpty(dirName) ? "." : dirName; - - int processId = Environment.ProcessId; - - string? fileName = Path.GetFileName(_name); - fileName = string.IsNullOrEmpty(fileName) ? "." : fileName; + ReadOnlySpan dirName = Path.GetDirectoryName(_name.AsSpan()); + dirName = dirName.IsEmpty ? "." : dirName; - string trailingSeparator = (_typeFlag is TarEntryType.Directory or TarEntryType.DirectoryList) ? - $"{Path.DirectorySeparatorChar}" : string.Empty; + ReadOnlySpan fileName = Path.GetFileName(_name.AsSpan()); + fileName = fileName.IsEmpty ? "." : fileName; - return string.Format(PaxHeadersFormat, dirName, processId, fileName, trailingSeparator); + return _typeFlag is TarEntryType.Directory or TarEntryType.DirectoryList ? + $"{dirName}/PaxHeaders.{Environment.ProcessId}/{fileName}{Path.DirectorySeparatorChar}" : + $"{dirName}/PaxHeaders.{Environment.ProcessId}/{fileName}"; } // Gets the special name for the 'name' field in a global extended attribute entry. diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs index d4592eb85d4b5f..898daf1e6c3ed7 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs @@ -3,8 +3,10 @@ using System.Buffers; using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; using System.Globalization; using System.IO; +using System.Numerics; using System.Text; using System.Threading; using System.Threading.Tasks; @@ -17,11 +19,6 @@ internal static partial class TarHelpers internal const short RecordSize = 512; internal const int MaxBufferLength = 4096; - internal const int ZeroChar = 0x30; - internal const byte SpaceChar = 0x20; - internal const byte EqualsChar = 0x3d; - internal const byte NewLineChar = 0xa; - // Default mode for TarEntry created for a file-type. private const UnixFileMode DefaultFileMode = UnixFileMode.UserRead | UnixFileMode.UserWrite | @@ -117,36 +114,6 @@ internal static int CalculatePadding(long size) return padding; } - // Returns the specified 8-base number as a 10-base number. - internal static int ConvertDecimalToOctal(int value) - { - int multiplier = 1; - int accum = value; - int actual = 0; - while (accum != 0) - { - actual += (accum % 8) * multiplier; - accum /= 8; - multiplier *= 10; - } - return actual; - } - - // Returns the specified 10-base number as an 8-base number. - internal static long ConvertDecimalToOctal(long value) - { - long multiplier = 1; - long accum = value; - long actual = 0; - while (accum != 0) - { - actual += (accum % 8) * multiplier; - accum /= 8; - multiplier *= 10; - } - return actual; - } - // Returns true if all the bytes in the specified array are nulls, false otherwise. internal static bool IsAllNullBytes(Span buffer) => buffer.IndexOfAnyExcept((byte)0) < 0; @@ -191,9 +158,10 @@ internal static bool TryGetStringAsBaseTenInteger(IReadOnlyDictionary buffer) + /// Parses a byte span that represents an ASCII string containing a number in octal base. + internal static T ParseOctal(ReadOnlySpan buffer) where T : struct, INumber { - string str = GetTrimmedAsciiString(buffer); - return string.IsNullOrEmpty(str) ? 0 : Convert.ToInt32(str, fromBase: 8); - } + buffer = TrimEndingNullsAndSpaces(buffer); - // Receives a byte array that represents an ASCII string containing a number in octal base. - // Converts the array to an octal base number, then transforms it to ten base and returns it. - internal static long GetTenBaseLongFromOctalAsciiChars(Span buffer) - { - string str = GetTrimmedAsciiString(buffer); - return string.IsNullOrEmpty(str) ? 0 : Convert.ToInt64(str, fromBase: 8); + T octalFactor = T.CreateTruncating(8u); + T value = T.Zero; + foreach (byte b in buffer) + { + uint digit = (uint)(b - '0'); + if (digit >= 8) + { + ThrowInvalidNumber(); + } + + value = checked((value * octalFactor) + T.CreateTruncating(digit)); + } + + return value; } + [DoesNotReturn] + private static void ThrowInvalidNumber() => + throw new FormatException(SR.Format(SR.TarInvalidNumber)); + // Returns the string contained in the specified buffer of bytes, // in the specified encoding, removing the trailing null or space chars. private static string GetTrimmedString(ReadOnlySpan buffer, Encoding encoding) + { + buffer = TrimEndingNullsAndSpaces(buffer); + return buffer.IsEmpty ? string.Empty : encoding.GetString(buffer); + } + + internal static ReadOnlySpan TrimEndingNullsAndSpaces(ReadOnlySpan buffer) { int trimmedLength = buffer.Length; - while (trimmedLength > 0 && IsByteNullOrSpace(buffer[trimmedLength - 1])) + while (trimmedLength > 0 && buffer[trimmedLength - 1] is 0 or 32) { trimmedLength--; } - return trimmedLength == 0 ? string.Empty : encoding.GetString(buffer.Slice(0, trimmedLength)); - - static bool IsByteNullOrSpace(byte c) => c is 0 or 32; + return buffer.Slice(0, trimmedLength); } // Returns the ASCII string contained in the specified buffer of bytes, diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs index 7e951c5243e35c..137acbcfbd2d01 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs @@ -54,8 +54,18 @@ public TarReader(Stream archiveStream, bool leaveOpen = false) /// The property of any entry can be replaced with a new stream. If the user decides to replace it on a instance that was obtained using a , the underlying stream gets disposed immediately, freeing the of origin from the responsibility of having to dispose it. public void Dispose() { - Dispose(disposing: true); - GC.SuppressFinalize(this); + if (!_isDisposed) + { + _isDisposed = true; + + if (!_leaveOpen && _dataStreamsToDispose?.Count > 0) + { + foreach (Stream s in _dataStreamsToDispose) + { + s.Dispose(); + } + } + } } /// @@ -64,8 +74,18 @@ public void Dispose() /// The property of any entry can be replaced with a new stream. If the user decides to replace it on a instance that was obtained using a , the underlying stream gets disposed immediately, freeing the of origin from the responsibility of having to dispose it. public async ValueTask DisposeAsync() { - await DisposeAsync(disposing: true).ConfigureAwait(false); - GC.SuppressFinalize(this); + if (!_isDisposed) + { + _isDisposed = true; + + if (!_leaveOpen && _dataStreamsToDispose?.Count > 0) + { + foreach (Stream s in _dataStreamsToDispose) + { + await s.DisposeAsync().ConfigureAwait(false); + } + } + } } /// @@ -249,52 +269,6 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel } } - // Disposes the current instance. - // If 'disposing' is 'false', the method was called from the finalizer. - private void Dispose(bool disposing) - { - if (disposing && !_isDisposed) - { - try - { - if (!_leaveOpen && _dataStreamsToDispose?.Count > 0) - { - foreach (Stream s in _dataStreamsToDispose) - { - s.Dispose(); - } - } - } - finally - { - _isDisposed = true; - } - } - } - - // Asynchronously disposes the current instance. - // If 'disposing' is 'false', the method was called from the finalizer. - private async ValueTask DisposeAsync(bool disposing) - { - if (disposing && !_isDisposed) - { - try - { - if (!_leaveOpen && _dataStreamsToDispose?.Count > 0) - { - foreach (Stream s in _dataStreamsToDispose) - { - await s.DisposeAsync().ConfigureAwait(false); - } - } - } - finally - { - _isDisposed = true; - } - } - } - // Asynchronously retrieves the next entry if one is found. private async ValueTask GetNextEntryInternalAsync(bool copyData, CancellationToken cancellationToken) { diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Unix.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Unix.cs index 7e678bcd918711..28eaf1e3323237 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Unix.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Unix.cs @@ -94,16 +94,7 @@ private TarEntry ConstructEntryForWriting(string fullPath, string entryName, Fil if (entry.EntryType is TarEntryType.RegularFile or TarEntryType.V7RegularFile) { Debug.Assert(entry._header._dataStream == null); - - FileStreamOptions options = new() - { - Mode = FileMode.Open, - Access = FileAccess.Read, - Share = FileShare.Read, - Options = fileOptions - }; - - entry._header._dataStream = new FileStream(fullPath, options); + entry._header._dataStream = new FileStream(fullPath, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, fileOptions); } return entry; diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Windows.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Windows.cs index bfb6cf17f11076..d3fa19a5b12eac 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Windows.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Windows.cs @@ -22,15 +22,15 @@ private TarEntry ConstructEntryForWriting(string fullPath, string entryName, Fil FileAttributes attributes = File.GetAttributes(fullPath); TarEntryType entryType; - if (attributes.HasFlag(FileAttributes.ReparsePoint)) + if ((attributes & FileAttributes.ReparsePoint) != 0) { entryType = TarEntryType.SymbolicLink; } - else if (attributes.HasFlag(FileAttributes.Directory)) + else if ((attributes & FileAttributes.Directory) != 0) { entryType = TarEntryType.Directory; } - else if (attributes.HasFlag(FileAttributes.Normal) || attributes.HasFlag(FileAttributes.Archive)) + else if ((attributes & (FileAttributes.Normal | FileAttributes.Archive)) != 0) { entryType = Format is TarEntryFormat.V7 ? TarEntryType.V7RegularFile : TarEntryType.RegularFile; } @@ -48,7 +48,7 @@ private TarEntry ConstructEntryForWriting(string fullPath, string entryName, Fil _ => throw new FormatException(string.Format(SR.TarInvalidFormat, Format)), }; - FileSystemInfo info = attributes.HasFlag(FileAttributes.Directory) ? new DirectoryInfo(fullPath) : new FileInfo(fullPath); + FileSystemInfo info = (attributes & FileAttributes.Directory) != 0 ? new DirectoryInfo(fullPath) : new FileInfo(fullPath); entry._header._mTime = info.LastWriteTimeUtc; entry._header._aTime = info.LastAccessTimeUtc; @@ -63,16 +63,8 @@ private TarEntry ConstructEntryForWriting(string fullPath, string entryName, Fil if (entry.EntryType is TarEntryType.RegularFile or TarEntryType.V7RegularFile) { - FileStreamOptions options = new() - { - Mode = FileMode.Open, - Access = FileAccess.Read, - Share = FileShare.Read, - Options = fileOptions - }; - Debug.Assert(entry._header._dataStream == null); - entry._header._dataStream = new FileStream(fullPath, options); + entry._header._dataStream = new FileStream(fullPath, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, fileOptions); } return entry; diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs index 94692316840235..7d3977eda68df4 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs @@ -84,8 +84,20 @@ public TarWriter(Stream archiveStream, TarEntryFormat format = TarEntryFormat.Pa /// public void Dispose() { - Dispose(disposing: true); - GC.SuppressFinalize(this); + if (!_isDisposed) + { + _isDisposed = true; + + if (_wroteEntries) + { + WriteFinalRecords(); + } + + if (!_leaveOpen) + { + _archiveStream.Dispose(); + } + } } /// @@ -93,8 +105,20 @@ public void Dispose() /// public async ValueTask DisposeAsync() { - await DisposeAsync(disposing: true).ConfigureAwait(false); - GC.SuppressFinalize(this); + if (!_isDisposed) + { + _isDisposed = true; + + if (_wroteEntries) + { + await WriteFinalRecordsAsync().ConfigureAwait(false); + } + + if (!_leaveOpen) + { + await _archiveStream.DisposeAsync().ConfigureAwait(false); + } + } } /// @@ -241,95 +265,40 @@ public Task WriteEntryAsync(TarEntry entry, CancellationToken cancellationToken return WriteEntryAsyncInternal(entry, cancellationToken); } - // Disposes the current instance. - // If 'disposing' is 'false', the method was called from the finalizer. - private void Dispose(bool disposing) + // Portion of the WriteEntry(entry) method that rents a buffer and writes to the archive. + private void WriteEntryInternal(TarEntry entry) { - if (disposing && !_isDisposed) + Span buffer = stackalloc byte[TarHelpers.RecordSize]; + buffer.Clear(); + + switch (entry.Format) { - try - { - if (_wroteEntries) - { - WriteFinalRecords(); - } + case TarEntryFormat.V7: + entry._header.WriteAsV7(_archiveStream, buffer); + break; + case TarEntryFormat.Ustar: + entry._header.WriteAsUstar(_archiveStream, buffer); + break; - if (!_leaveOpen) + case TarEntryFormat.Pax: + if (entry._header._typeFlag is TarEntryType.GlobalExtendedAttributes) { - _archiveStream.Dispose(); + entry._header.WriteAsPaxGlobalExtendedAttributes(_archiveStream, buffer, _nextGlobalExtendedAttributesEntryNumber++); } - } - finally - { - _isDisposed = true; - } - } - } - - // Asynchronously disposes the current instance. - // If 'disposing' is 'false', the method was called from the finalizer. - private async ValueTask DisposeAsync(bool disposing) - { - if (disposing && !_isDisposed) - { - try - { - if (_wroteEntries) + else { - await WriteFinalRecordsAsync().ConfigureAwait(false); + entry._header.WriteAsPax(_archiveStream, buffer); } + break; + case TarEntryFormat.Gnu: + entry._header.WriteAsGnu(_archiveStream, buffer); + break; - if (!_leaveOpen) - { - await _archiveStream.DisposeAsync().ConfigureAwait(false); - } - } - finally - { - _isDisposed = true; - } - } - } - - // Portion of the WriteEntry(entry) method that rents a buffer and writes to the archive. - private void WriteEntryInternal(TarEntry entry) - { - byte[] rented = ArrayPool.Shared.Rent(minimumLength: TarHelpers.RecordSize); - Span buffer = rented.AsSpan(0, TarHelpers.RecordSize); // minimumLength means the array could've been larger - buffer.Clear(); // Rented arrays aren't clean - try - { - switch (entry.Format) - { - case TarEntryFormat.V7: - entry._header.WriteAsV7(_archiveStream, buffer); - break; - case TarEntryFormat.Ustar: - entry._header.WriteAsUstar(_archiveStream, buffer); - break; - case TarEntryFormat.Pax: - if (entry._header._typeFlag is TarEntryType.GlobalExtendedAttributes) - { - entry._header.WriteAsPaxGlobalExtendedAttributes(_archiveStream, buffer, _nextGlobalExtendedAttributesEntryNumber++); - } - else - { - entry._header.WriteAsPax(_archiveStream, buffer); - } - break; - case TarEntryFormat.Gnu: - entry._header.WriteAsGnu(_archiveStream, buffer); - break; - default: - Debug.Assert(entry.Format == TarEntryFormat.Unknown, "Missing format handler"); - throw new FormatException(string.Format(SR.TarInvalidFormat, Format)); - } - } - finally - { - ArrayPool.Shared.Return(rented); + default: + Debug.Assert(entry.Format == TarEntryFormat.Unknown, "Missing format handler"); + throw new FormatException(string.Format(SR.TarInvalidFormat, Format)); } _wroteEntries = true; @@ -364,7 +333,9 @@ private async Task WriteEntryAsyncInternal(TarEntry entry, CancellationToken can // by two records consisting entirely of zero bytes. private void WriteFinalRecords() { - byte[] emptyRecord = new byte[TarHelpers.RecordSize]; + Span emptyRecord = stackalloc byte[TarHelpers.RecordSize]; + emptyRecord.Clear(); + _archiveStream.Write(emptyRecord); _archiveStream.Write(emptyRecord); } @@ -374,9 +345,14 @@ private void WriteFinalRecords() // This method is called from DisposeAsync, so we don't want to propagate a cancelled CancellationToken. private async ValueTask WriteFinalRecordsAsync() { - byte[] emptyRecord = new byte[TarHelpers.RecordSize]; - await _archiveStream.WriteAsync(emptyRecord, cancellationToken: default).ConfigureAwait(false); - await _archiveStream.WriteAsync(emptyRecord, cancellationToken: default).ConfigureAwait(false); + const int TwoRecordSize = TarHelpers.RecordSize * 2; + + byte[] twoEmptyRecords = ArrayPool.Shared.Rent(TwoRecordSize); + Array.Clear(twoEmptyRecords, 0, TwoRecordSize); + + await _archiveStream.WriteAsync(twoEmptyRecords.AsMemory(0, TwoRecordSize), cancellationToken: default).ConfigureAwait(false); + + ArrayPool.Shared.Return(twoEmptyRecords); } private (string, string) ValidateWriteEntryArguments(string fileName, string? entryName) diff --git a/src/libraries/System.IO.Compression.ZipFile/src/System/IO/Compression/ZipFile.Create.cs b/src/libraries/System.IO.Compression.ZipFile/src/System/IO/Compression/ZipFile.Create.cs index 4370cd16878b17..33923d91b9b8f1 100644 --- a/src/libraries/System.IO.Compression.ZipFile/src/System/IO/Compression/ZipFile.Create.cs +++ b/src/libraries/System.IO.Compression.ZipFile/src/System/IO/Compression/ZipFile.Create.cs @@ -375,49 +375,35 @@ private static void DoCreateFromDirectory(string sourceDirectoryName, string des if (includeBaseDirectory && di.Parent != null) basePath = di.Parent.FullName; - // Windows' MaxPath (260) is used as an arbitrary default capacity, as it is likely - // to be greater than the length of typical entry names from the file system, even - // on non-Windows platforms. The capacity will be increased, if needed. - const int DefaultCapacity = 260; - char[] entryNameBuffer = ArrayPool.Shared.Rent(DefaultCapacity); - - try + foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) { - foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) - { - directoryIsEmpty = false; + directoryIsEmpty = false; - int entryNameLength = file.FullName.Length - basePath.Length; - Debug.Assert(entryNameLength > 0); + int entryNameLength = file.FullName.Length - basePath.Length; + Debug.Assert(entryNameLength > 0); - if (file is FileInfo) - { - // Create entry for file: - string entryName = ArchivingUtils.EntryFromPath(file.FullName, basePath.Length, entryNameLength, ref entryNameBuffer); - ZipFileExtensions.DoCreateEntryFromFile(archive, file.FullName, entryName, compressionLevel); - } - else + if (file is FileInfo) + { + // Create entry for file: + string entryName = ArchivingUtils.EntryFromPath(file.FullName, basePath.Length, entryNameLength); + ZipFileExtensions.DoCreateEntryFromFile(archive, file.FullName, entryName, compressionLevel); + } + else + { + // Entry marking an empty dir: + if (file is DirectoryInfo possiblyEmpty && ArchivingUtils.IsDirEmpty(possiblyEmpty)) { - // Entry marking an empty dir: - if (file is DirectoryInfo possiblyEmpty && ArchivingUtils.IsDirEmpty(possiblyEmpty)) - { - // FullName never returns a directory separator character on the end, - // but Zip archives require it to specify an explicit directory: - string entryName = ArchivingUtils.EntryFromPath(file.FullName, basePath.Length, entryNameLength, ref entryNameBuffer, appendPathSeparator: true); - archive.CreateEntry(entryName); - } + // FullName never returns a directory separator character on the end, + // but Zip archives require it to specify an explicit directory: + string entryName = ArchivingUtils.EntryFromPath(file.FullName, basePath.Length, entryNameLength, appendPathSeparator: true); + archive.CreateEntry(entryName); } - } // foreach - - // If no entries create an empty root directory entry: - if (includeBaseDirectory && directoryIsEmpty) - archive.CreateEntry(ArchivingUtils.EntryFromPath(di.Name, 0, di.Name.Length, ref entryNameBuffer, appendPathSeparator: true)); - } - finally - { - ArrayPool.Shared.Return(entryNameBuffer); + } } + // If no entries create an empty root directory entry: + if (includeBaseDirectory && directoryIsEmpty) + archive.CreateEntry(ArchivingUtils.EntryFromPath(di.Name, 0, di.Name.Length, appendPathSeparator: true)); } } } From 2a3bb360ae4dfc687875af9ff525b3113f251e41 Mon Sep 17 00:00:00 2001 From: Dan Moseley Date: Sun, 21 Aug 2022 22:50:29 -0600 Subject: [PATCH 02/10] fix tar strings (#74321) --- .../System.Formats.Tar/src/System/Formats/Tar/TarEntry.cs | 2 +- .../System.Formats.Tar/src/System/Formats/Tar/TarFile.cs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarEntry.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarEntry.cs index 27fe4af8701a92..7263af221ad657 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarEntry.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarEntry.cs @@ -465,7 +465,7 @@ private void VerifyPathsForEntryType(string filePath, string? linkTargetPath, bo // If the destination contains a directory segment, need to check that it exists if (!string.IsNullOrEmpty(directoryPath) && !Path.Exists(directoryPath)) { - throw new IOException(string.Format(SR.IO_PathNotFound_NoPathName, filePath)); + throw new IOException(string.Format(SR.IO_PathNotFound_Path, filePath)); } if (!Path.Exists(filePath)) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs index 9953045a5840df..33d971bfc56053 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs @@ -217,7 +217,7 @@ public static void ExtractToDirectory(string sourceFileName, string destinationD if (!File.Exists(sourceFileName)) { - throw new FileNotFoundException(string.Format(SR.IO_FileNotFound, sourceFileName)); + throw new FileNotFoundException(string.Format(SR.IO_FileNotFound_FileName, sourceFileName)); } if (!Directory.Exists(destinationDirectoryName)) @@ -256,7 +256,7 @@ public static Task ExtractToDirectoryAsync(string sourceFileName, string destina if (!File.Exists(sourceFileName)) { - return Task.FromException(new FileNotFoundException(string.Format(SR.IO_FileNotFound, sourceFileName))); + return Task.FromException(new FileNotFoundException(string.Format(SR.IO_FileNotFound_FileName, sourceFileName))); } if (!Directory.Exists(destinationDirectoryName)) From 2427e68149920b079452a19d8d8ad245fead5190 Mon Sep 17 00:00:00 2001 From: Adeel Mujahid <3840695+am11@users.noreply.github.com> Date: Mon, 22 Aug 2022 17:50:42 +0300 Subject: [PATCH 03/10] Fix some corner cases in TarReader (#74329) --- .../src/System/Formats/Tar/TarHeader.Read.cs | 26 ++++++++++++++++--- .../src/System/Formats/Tar/TarReader.cs | 10 +++++++ .../tests/TarReader/TarReader.File.Tests.cs | 26 +++++++++++++++++++ 3 files changed, 58 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs index f826a2c1e1b099..e43ae6973607dc 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs @@ -461,18 +461,36 @@ private void ReadVersionAttribute(Span buffer) // The POSIX formats have a 6 byte Magic "ustar\0", followed by a 2 byte Version "00" if (!version.SequenceEqual(UstarVersionBytes)) { - throw new FormatException(string.Format(SR.TarPosixFormatExpected, _name)); + // Check for gnu version header for mixed case + if (!version.SequenceEqual(GnuVersionBytes)) + { + throw new FormatException(string.Format(SR.TarPosixFormatExpected, _name)); + } + + _version = GnuVersion; + } + else + { + _version = UstarVersion; } - _version = UstarVersion; break; case TarEntryFormat.Gnu: // The GNU format has a Magic+Version 8 byte string "ustar \0" if (!version.SequenceEqual(GnuVersionBytes)) { - throw new FormatException(string.Format(SR.TarGnuFormatExpected, _name)); + // Check for ustar or pax version header for mixed case + if (!version.SequenceEqual(UstarVersionBytes)) + { + throw new FormatException(string.Format(SR.TarGnuFormatExpected, _name)); + } + + _version = UstarVersion; + } + else + { + _version = GnuVersion; } - _version = GnuVersion; break; default: diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs index 137acbcfbd2d01..10f3f2524d1ca5 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs @@ -138,6 +138,11 @@ public async ValueTask DisposeAsync() TarEntryFormat.V7 or TarEntryFormat.Unknown or _ => new V7TarEntry(header, this), }; + if (_archiveStream.CanSeek && _archiveStream.Length == _archiveStream.Position) + { + _reachedEndMarkers = true; + } + _previouslyReadEntry = entry; PreserveDataStreamForDisposalIfNeeded(entry); return entry; @@ -291,6 +296,11 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel TarEntryFormat.V7 or TarEntryFormat.Unknown or _ => new V7TarEntry(header, this), }; + if (_archiveStream.CanSeek && _archiveStream.Length == _archiveStream.Position) + { + _reachedEndMarkers = true; + } + _previouslyReadEntry = entry; PreserveDataStreamForDisposalIfNeeded(entry); return entry; diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs index ffc537917fc7af..5e35c12c3aaac9 100644 --- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs @@ -124,5 +124,31 @@ public void Read_Archive_LongFileName_Over100_Under255(TarEntryFormat format, Te [InlineData(TarEntryFormat.Gnu, TestTarFormat.oldgnu)] public void Read_Archive_LongPath_Over255(TarEntryFormat format, TestTarFormat testFormat) => Read_Archive_LongPath_Over255_Internal(format, testFormat); + + [Fact] + public void Read_NodeTarArchives_Successfully() + { + string nodeTarPath = Path.Join(Directory.GetCurrentDirectory(), "tar", "node-tar"); + foreach (string file in Directory.EnumerateFiles(nodeTarPath, "*.tar", SearchOption.AllDirectories)) + { + using FileStream sourceStream = File.Open(file, FileMode.Open, FileAccess.Read, FileShare.Read); + using var reader = new TarReader(sourceStream); + + TarEntry? entry = null; + while (true) + { + Exception ex = Record.Exception(() => entry = reader.GetNextEntry()); + Assert.Null(ex); + + if (entry is null) break; + + ex = Record.Exception(() => entry.Name); + Assert.Null(ex); + + ex = Record.Exception(() => entry.Length); + Assert.Null(ex); + } + } + } } } From c3ad6b65fd5153a5e39064d65f5ed26f04bbb7d7 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Mon, 22 Aug 2022 18:58:38 -0400 Subject: [PATCH 04/10] Fix a few Tar issues post perf improvements (#74338) * Fix a few Tar issues post perf improvements * Update src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs --- .../src/System/IO/Archiving.Utils.Unix.cs | 20 +++++++ .../src/System/IO/Archiving.Utils.Windows.cs | 43 +++++++++++++++ .../Common/src/System/IO/Archiving.Utils.cs | 54 ------------------- .../src/System/Formats/Tar/TarFile.cs | 7 +-- .../src/System/Formats/Tar/TarHeader.Write.cs | 13 +++-- .../System/IO/Compression/ZipFile.Create.cs | 9 ++-- 6 files changed, 78 insertions(+), 68 deletions(-) diff --git a/src/libraries/Common/src/System/IO/Archiving.Utils.Unix.cs b/src/libraries/Common/src/System/IO/Archiving.Utils.Unix.cs index ca9b37759a31cf..18ab525f919b39 100644 --- a/src/libraries/Common/src/System/IO/Archiving.Utils.Unix.cs +++ b/src/libraries/Common/src/System/IO/Archiving.Utils.Unix.cs @@ -6,5 +6,25 @@ namespace System.IO internal static partial class ArchivingUtils { internal static string SanitizeEntryFilePath(string entryPath) => entryPath.Replace('\0', '_'); + + public static unsafe string EntryFromPath(ReadOnlySpan path, bool appendPathSeparator = false) + { + // Remove leading separators. + int nonSlash = path.IndexOfAnyExcept('/'); + if (nonSlash == -1) + { + nonSlash = path.Length; + } + path = path.Slice(nonSlash); + + // Append a separator if necessary. + return (path.IsEmpty, appendPathSeparator) switch + { + (false, false) => path.ToString(), + (false, true) => string.Concat(path, "/"), + (true, false) => string.Empty, + (true, true) => "/", + }; + } } } diff --git a/src/libraries/Common/src/System/IO/Archiving.Utils.Windows.cs b/src/libraries/Common/src/System/IO/Archiving.Utils.Windows.cs index 88f7ffb14f4d29..412563966fc722 100644 --- a/src/libraries/Common/src/System/IO/Archiving.Utils.Windows.cs +++ b/src/libraries/Common/src/System/IO/Archiving.Utils.Windows.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Runtime.InteropServices; using System.Text; namespace System.IO @@ -42,5 +43,47 @@ internal static string SanitizeEntryFilePath(string entryPath) // There weren't any characters to sanitize. Just return the original string. return entryPath; } + + public static unsafe string EntryFromPath(ReadOnlySpan path, bool appendPathSeparator = false) + { + // Remove leading separators. + int nonSlash = path.IndexOfAnyExcept('/', '\\'); + if (nonSlash == -1) + { + nonSlash = path.Length; + } + path = path.Slice(nonSlash); + + // Replace \ with /, and append a separator if necessary. + + if (path.IsEmpty) + { + return appendPathSeparator ? + "/" : + string.Empty; + } + + fixed (char* pathPtr = &MemoryMarshal.GetReference(path)) + { + return string.Create(appendPathSeparator ? path.Length + 1 : path.Length, (appendPathSeparator, (IntPtr)pathPtr, path.Length), static (dest, state) => + { + ReadOnlySpan path = new ReadOnlySpan((char*)state.Item2, state.Length); + path.CopyTo(dest); + if (state.appendPathSeparator) + { + dest[^1] = '/'; + } + + // To ensure tar files remain compatible with Unix, and per the ZIP File Format Specification 4.4.17.1, + // all slashes should be forward slashes. + int pos; + while ((pos = dest.IndexOf('\\')) >= 0) + { + dest[pos] = '/'; + dest = dest.Slice(pos + 1); + } + }); + } + } } } diff --git a/src/libraries/Common/src/System/IO/Archiving.Utils.cs b/src/libraries/Common/src/System/IO/Archiving.Utils.cs index 80d633228cc0c1..23cc77a30c75f1 100644 --- a/src/libraries/Common/src/System/IO/Archiving.Utils.cs +++ b/src/libraries/Common/src/System/IO/Archiving.Utils.cs @@ -9,60 +9,6 @@ namespace System.IO { internal static partial class ArchivingUtils { - // To ensure tar files remain compatible with Unix, - // and per the ZIP File Format Specification 4.4.17.1, - // all slashes should be forward slashes. - private const char PathSeparatorChar = '/'; - private const string PathSeparatorString = "/"; - - public static string EntryFromPath(string entry, int offset, int length, bool appendPathSeparator = false) - { - Debug.Assert(length <= entry.Length - offset); - - // Remove any leading slashes from the entry name: - while (length > 0) - { - if (entry[offset] != Path.DirectorySeparatorChar && - entry[offset] != Path.AltDirectorySeparatorChar) - break; - - offset++; - length--; - } - - if (length == 0) - { - return appendPathSeparator ? PathSeparatorString : string.Empty; - } - - if (appendPathSeparator) - { - length++; - } - - return string.Create(length, (appendPathSeparator, offset, entry), static (dest, state) => - { - state.entry.AsSpan(state.offset).CopyTo(dest); - - // '/' is a more broadly recognized directory separator on all platforms (eg: mac, linux) - // We don't use Path.DirectorySeparatorChar or AltDirectorySeparatorChar because this is - // explicitly trying to standardize to '/' - for (int i = 0; i < dest.Length; i++) - { - char ch = dest[i]; - if (ch == Path.DirectorySeparatorChar || ch == Path.AltDirectorySeparatorChar) - { - dest[i] = PathSeparatorChar; - } - } - - if (state.appendPathSeparator) - { - dest[^1] = PathSeparatorChar; - } - }); - } - public static void EnsureCapacity(ref char[] buffer, int min) { Debug.Assert(buffer != null); diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs index 33d971bfc56053..127b00f13fb557 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs @@ -344,16 +344,13 @@ private static string GetBasePathForCreateFromDirectory(DirectoryInfo di, bool i // Constructs the entry name used for a filesystem entry when creating an archive. private static string GetEntryNameForFileSystemInfo(FileSystemInfo file, int basePathLength) { - int entryNameLength = file.FullName.Length - basePathLength; - Debug.Assert(entryNameLength > 0); - bool isDirectory = (file.Attributes & FileAttributes.Directory) != 0; - return ArchivingUtils.EntryFromPath(file.FullName, basePathLength, entryNameLength, appendPathSeparator: isDirectory); + return ArchivingUtils.EntryFromPath(file.FullName.AsSpan(basePathLength), appendPathSeparator: isDirectory); } private static string GetEntryNameForBaseDirectory(string name) { - return ArchivingUtils.EntryFromPath(name, 0, name.Length, appendPathSeparator: true); + return ArchivingUtils.EntryFromPath(name, appendPathSeparator: true); } // Extracts an archive into the specified directory. diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index 724274d1689d92..2afcfb8a9f23df 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -559,8 +559,16 @@ private static async Task WriteDataAsync(Stream archiveStream, Stream dataStream // The format is: // "XX attribute=value\n" // where "XX" is the number of characters in the entry, including those required for the count itself. + // If prepending the length digits increases the number of digits, we need to expand. int length = 3 + Encoding.UTF8.GetByteCount(attribute) + Encoding.UTF8.GetByteCount(value); - length += CountDigits(length); + int originalDigitCount = CountDigits(length), newDigitCount; + length += originalDigitCount; + while ((newDigitCount = CountDigits(length)) != originalDigitCount) + { + length += newDigitCount - originalDigitCount; + originalDigitCount = newDigitCount; + } + Debug.Assert(length == CountDigits(length) + 3 + Encoding.UTF8.GetByteCount(attribute) + Encoding.UTF8.GetByteCount(value)); // Get a large enough buffer if we don't already have one. if (span.Length < length) @@ -569,8 +577,7 @@ private static async Task WriteDataAsync(Stream archiveStream, Stream dataStream { ArrayPool.Shared.Return(buffer); } - buffer = ArrayPool.Shared.Rent(length); - span = buffer; + span = buffer = ArrayPool.Shared.Rent(length); } // Format the contents. diff --git a/src/libraries/System.IO.Compression.ZipFile/src/System/IO/Compression/ZipFile.Create.cs b/src/libraries/System.IO.Compression.ZipFile/src/System/IO/Compression/ZipFile.Create.cs index 33923d91b9b8f1..70894d2b74f7ef 100644 --- a/src/libraries/System.IO.Compression.ZipFile/src/System/IO/Compression/ZipFile.Create.cs +++ b/src/libraries/System.IO.Compression.ZipFile/src/System/IO/Compression/ZipFile.Create.cs @@ -379,13 +379,10 @@ private static void DoCreateFromDirectory(string sourceDirectoryName, string des { directoryIsEmpty = false; - int entryNameLength = file.FullName.Length - basePath.Length; - Debug.Assert(entryNameLength > 0); - if (file is FileInfo) { // Create entry for file: - string entryName = ArchivingUtils.EntryFromPath(file.FullName, basePath.Length, entryNameLength); + string entryName = ArchivingUtils.EntryFromPath(file.FullName.AsSpan(basePath.Length)); ZipFileExtensions.DoCreateEntryFromFile(archive, file.FullName, entryName, compressionLevel); } else @@ -395,7 +392,7 @@ private static void DoCreateFromDirectory(string sourceDirectoryName, string des { // FullName never returns a directory separator character on the end, // but Zip archives require it to specify an explicit directory: - string entryName = ArchivingUtils.EntryFromPath(file.FullName, basePath.Length, entryNameLength, appendPathSeparator: true); + string entryName = ArchivingUtils.EntryFromPath(file.FullName.AsSpan(basePath.Length), appendPathSeparator: true); archive.CreateEntry(entryName); } } @@ -403,7 +400,7 @@ private static void DoCreateFromDirectory(string sourceDirectoryName, string des // If no entries create an empty root directory entry: if (includeBaseDirectory && directoryIsEmpty) - archive.CreateEntry(ArchivingUtils.EntryFromPath(di.Name, 0, di.Name.Length, appendPathSeparator: true)); + archive.CreateEntry(ArchivingUtils.EntryFromPath(di.Name, appendPathSeparator: true)); } } } From cfc40cab55f7828084351896330b653767f508e6 Mon Sep 17 00:00:00 2001 From: Carlos Sanchez <1175054+carlossanlop@users.noreply.github.com> Date: Mon, 22 Aug 2022 18:08:32 -0700 Subject: [PATCH 05/10] Skip directory symlink recursion on TarFile archive creation (#74376) * Skip directory symlink recursion on TarFile archive creation * Add symlink verification * Address suggestions by danmoseley Co-authored-by: carlossanlop --- .../src/System/Formats/Tar/TarFile.cs | 39 +++++++++++- .../TarFile.CreateFromDirectory.File.Tests.cs | 60 +++++++++++++++++++ ...ile.CreateFromDirectoryAsync.File.Tests.cs | 60 +++++++++++++++++++ 3 files changed, 157 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs index 127b00f13fb557..2e490747df80d0 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs @@ -5,6 +5,7 @@ using System.Collections.Generic; using System.Diagnostics; using System.IO; +using System.IO.Enumeration; using System.Threading; using System.Threading.Tasks; @@ -278,12 +279,20 @@ private static void CreateFromDirectoryInternal(string sourceDirectoryName, Stre DirectoryInfo di = new(sourceDirectoryName); string basePath = GetBasePathForCreateFromDirectory(di, includeBaseDirectory); + bool skipBaseDirRecursion = false; if (includeBaseDirectory) { writer.WriteEntry(di.FullName, GetEntryNameForBaseDirectory(di.Name)); + skipBaseDirRecursion = (di.Attributes & FileAttributes.ReparsePoint) != 0; } - foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) + if (skipBaseDirRecursion) + { + // The base directory is a symlink, do not recurse into it + return; + } + + foreach (FileSystemInfo file in GetFileSystemEnumerationForCreation(sourceDirectoryName)) { writer.WriteEntry(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length)); } @@ -325,18 +334,44 @@ private static async Task CreateFromDirectoryInternalAsync(string sourceDirector DirectoryInfo di = new(sourceDirectoryName); string basePath = GetBasePathForCreateFromDirectory(di, includeBaseDirectory); + bool skipBaseDirRecursion = false; if (includeBaseDirectory) { await writer.WriteEntryAsync(di.FullName, GetEntryNameForBaseDirectory(di.Name), cancellationToken).ConfigureAwait(false); + skipBaseDirRecursion = (di.Attributes & FileAttributes.ReparsePoint) != 0; } - foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) + if (skipBaseDirRecursion) + { + // The base directory is a symlink, do not recurse into it + return; + } + + foreach (FileSystemInfo file in GetFileSystemEnumerationForCreation(sourceDirectoryName)) { await writer.WriteEntryAsync(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length), cancellationToken).ConfigureAwait(false); } } } + // Generates a recursive enumeration of the filesystem entries inside the specified source directory, while + // making sure that directory symlinks do not get recursed. + private static IEnumerable GetFileSystemEnumerationForCreation(string sourceDirectoryName) + { + return new FileSystemEnumerable( + directory: sourceDirectoryName, + transform: (ref FileSystemEntry entry) => entry.ToFileSystemInfo(), + options: new EnumerationOptions() + { + RecurseSubdirectories = true + }) + { + ShouldRecursePredicate = IsNotADirectorySymlink + }; + + static bool IsNotADirectorySymlink(ref FileSystemEntry entry) => entry.IsDirectory && (entry.Attributes & FileAttributes.ReparsePoint) == 0; + } + // Determines what should be the base path for all the entries when creating an archive. private static string GetBasePathForCreateFromDirectory(DirectoryInfo di, bool includeBaseDirectory) => includeBaseDirectory && di.Parent != null ? di.Parent.FullName : di.FullName; diff --git a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectory.File.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectory.File.Tests.cs index 75a3495d94e5e5..388e9639bd2956 100644 --- a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectory.File.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectory.File.Tests.cs @@ -193,5 +193,65 @@ public void IncludeAllSegmentsOfPath(bool includeBaseDirectory) Assert.Null(reader.GetNextEntry()); } + + [Fact] + public void SkipRecursionIntoDirectorySymlinks() + { + using TempDirectory root = new TempDirectory(); + + string destinationArchive = Path.Join(root.Path, "destination.tar"); + + string externalDirectory = Path.Join(root.Path, "externalDirectory"); + Directory.CreateDirectory(externalDirectory); + + File.Create(Path.Join(externalDirectory, "file.txt")).Dispose(); + + string sourceDirectoryName = Path.Join(root.Path, "baseDirectory"); + Directory.CreateDirectory(sourceDirectoryName); + + string subDirectory = Path.Join(sourceDirectoryName, "subDirectory"); + Directory.CreateSymbolicLink(subDirectory, externalDirectory); // Should not recurse here + + TarFile.CreateFromDirectory(sourceDirectoryName, destinationArchive, includeBaseDirectory: false); + + using FileStream archiveStream = File.OpenRead(destinationArchive); + using TarReader reader = new(archiveStream, leaveOpen: false); + + TarEntry entry = reader.GetNextEntry(); + Assert.NotNull(entry); + Assert.Equal("subDirectory/", entry.Name); + Assert.Equal(TarEntryType.SymbolicLink, entry.EntryType); + + Assert.Null(reader.GetNextEntry()); // file.txt should not be found + } + + [Fact] + public void SkipRecursionIntoBaseDirectorySymlink() + { + using TempDirectory root = new TempDirectory(); + + string destinationArchive = Path.Join(root.Path, "destination.tar"); + + string externalDirectory = Path.Join(root.Path, "externalDirectory"); + Directory.CreateDirectory(externalDirectory); + + string subDirectory = Path.Join(externalDirectory, "subDirectory"); + Directory.CreateDirectory(subDirectory); + + string sourceDirectoryName = Path.Join(root.Path, "baseDirectory"); + Directory.CreateSymbolicLink(sourceDirectoryName, externalDirectory); + + TarFile.CreateFromDirectory(sourceDirectoryName, destinationArchive, includeBaseDirectory: true); // Base directory is a symlink, do not recurse + + using FileStream archiveStream = File.OpenRead(destinationArchive); + using TarReader reader = new(archiveStream, leaveOpen: false); + + TarEntry entry = reader.GetNextEntry(); + Assert.NotNull(entry); + Assert.Equal("baseDirectory/", entry.Name); + Assert.Equal(TarEntryType.SymbolicLink, entry.EntryType); + + Assert.Null(reader.GetNextEntry()); + } } } diff --git a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectoryAsync.File.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectoryAsync.File.Tests.cs index c9b4377cd03285..bb1d8ff7cc0cc3 100644 --- a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectoryAsync.File.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectoryAsync.File.Tests.cs @@ -237,5 +237,65 @@ public async Task IncludeAllSegmentsOfPath_Async(bool includeBaseDirectory) } } } + + [Fact] + public async Task SkipRecursionIntoDirectorySymlinksAsync() + { + using TempDirectory root = new TempDirectory(); + + string destinationArchive = Path.Join(root.Path, "destination.tar"); + + string externalDirectory = Path.Join(root.Path, "externalDirectory"); + Directory.CreateDirectory(externalDirectory); + + File.Create(Path.Join(externalDirectory, "file.txt")).Dispose(); + + string sourceDirectoryName = Path.Join(root.Path, "baseDirectory"); + Directory.CreateDirectory(sourceDirectoryName); + + string subDirectory = Path.Join(sourceDirectoryName, "subDirectory"); + Directory.CreateSymbolicLink(subDirectory, externalDirectory); // Should not recurse here + + await TarFile.CreateFromDirectoryAsync(sourceDirectoryName, destinationArchive, includeBaseDirectory: false); + + await using FileStream archiveStream = File.OpenRead(destinationArchive); + await using TarReader reader = new(archiveStream, leaveOpen: false); + + TarEntry entry = await reader.GetNextEntryAsync(); + Assert.NotNull(entry); + Assert.Equal("subDirectory/", entry.Name); + Assert.Equal(TarEntryType.SymbolicLink, entry.EntryType); + + Assert.Null(await reader.GetNextEntryAsync()); // file.txt should not be found + } + + [Fact] + public async Task SkipRecursionIntoBaseDirectorySymlinkAsync() + { + using TempDirectory root = new TempDirectory(); + + string destinationArchive = Path.Join(root.Path, "destination.tar"); + + string externalDirectory = Path.Join(root.Path, "externalDirectory"); + Directory.CreateDirectory(externalDirectory); + + string subDirectory = Path.Join(externalDirectory, "subDirectory"); + Directory.CreateDirectory(subDirectory); + + string sourceDirectoryName = Path.Join(root.Path, "baseDirectory"); + Directory.CreateSymbolicLink(sourceDirectoryName, externalDirectory); + + await TarFile.CreateFromDirectoryAsync(sourceDirectoryName, destinationArchive, includeBaseDirectory: true); // Base directory is a symlink, do not recurse + + await using FileStream archiveStream = File.OpenRead(destinationArchive); + await using TarReader reader = new(archiveStream, leaveOpen: false); + + TarEntry entry = await reader.GetNextEntryAsync(); + Assert.NotNull(entry); + Assert.Equal("baseDirectory/", entry.Name); + Assert.Equal(TarEntryType.SymbolicLink, entry.EntryType); + + Assert.Null(await reader.GetNextEntryAsync()); // subDirectory should not be found + } } } From ce05de7c9ed535122e2e58aefd74b145142ea971 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Cant=C3=BA?= Date: Tue, 23 Aug 2022 01:39:23 -0500 Subject: [PATCH 06/10] SkipBlockAlignmentPadding must be executed for all entries (#74396) --- .../src/System/Formats/Tar/TarReader.cs | 4 +- ...TarFile.ExtractToDirectory.Stream.Tests.cs | 32 ++++++++++++++++ ...le.ExtractToDirectoryAsync.Stream.Tests.cs | 32 ++++++++++++++++ .../TarReader/TarReader.GetNextEntry.Tests.cs | 38 +++++++++++++++++++ .../TarReader.GetNextEntryAsync.Tests.cs | 38 +++++++++++++++++++ 5 files changed, 142 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs index 10f3f2524d1ca5..915cb81caab664 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs @@ -225,10 +225,10 @@ internal void AdvanceDataStreamIfNeeded() { long bytesToSkip = _previouslyReadEntry._header._size - dataStream.Position; TarHelpers.AdvanceStream(_archiveStream, bytesToSkip); - TarHelpers.SkipBlockAlignmentPadding(_archiveStream, _previouslyReadEntry._header._size); dataStream.HasReachedEnd = true; // Now the pointer is beyond the limit, so any read attempts should throw } } + TarHelpers.SkipBlockAlignmentPadding(_archiveStream, _previouslyReadEntry._header._size); } } @@ -267,10 +267,10 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel { long bytesToSkip = _previouslyReadEntry._header._size - dataStream.Position; await TarHelpers.AdvanceStreamAsync(_archiveStream, bytesToSkip, cancellationToken).ConfigureAwait(false); - await TarHelpers.SkipBlockAlignmentPaddingAsync(_archiveStream, _previouslyReadEntry._header._size, cancellationToken).ConfigureAwait(false); dataStream.HasReachedEnd = true; // Now the pointer is beyond the limit, so any read attempts should throw } } + await TarHelpers.SkipBlockAlignmentPaddingAsync(_archiveStream, _previouslyReadEntry._header._size, cancellationToken).ConfigureAwait(false); } } diff --git a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.Stream.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.Stream.Tests.cs index 5a68d5a95d5d26..d53f354ee408d1 100644 --- a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.Stream.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.Stream.Tests.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.IO; +using System.IO.Compression; using System.Linq; using Xunit; @@ -145,5 +146,36 @@ private void Extract_LinkEntry_TargetInsideDirectory_Internal(TarEntryType entry Assert.Equal(2, Directory.GetFileSystemEntries(baseDir).Count()); } + + [Theory] + [InlineData(512)] + [InlineData(512 + 1)] + [InlineData(512 + 512 - 1)] + public void Extract_UnseekableStream_BlockAlignmentPadding_DoesNotAffectNextEntries(int contentSize) + { + byte[] fileContents = new byte[contentSize]; + Array.Fill(fileContents, 0x1); + + using var archive = new MemoryStream(); + using (var compressor = new GZipStream(archive, CompressionMode.Compress, leaveOpen: true)) + { + using var writer = new TarWriter(compressor); + var entry1 = new PaxTarEntry(TarEntryType.RegularFile, "file"); + entry1.DataStream = new MemoryStream(fileContents); + writer.WriteEntry(entry1); + + var entry2 = new PaxTarEntry(TarEntryType.RegularFile, "next-file"); + writer.WriteEntry(entry2); + } + + archive.Position = 0; + using var decompressor = new GZipStream(archive, CompressionMode.Decompress); + using var reader = new TarReader(decompressor); + + using TempDirectory destination = new TempDirectory(); + TarFile.ExtractToDirectory(decompressor, destination.Path, overwriteFiles: true); + + Assert.Equal(2, Directory.GetFileSystemEntries(destination.Path, "*", SearchOption.AllDirectories).Count()); + } } } diff --git a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.Stream.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.Stream.Tests.cs index a35a22c1050f56..741ce8e102deb4 100644 --- a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.Stream.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.Stream.Tests.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.IO; +using System.IO.Compression; using System.Linq; using System.Threading; using System.Threading.Tasks; @@ -174,5 +175,36 @@ private async Task Extract_LinkEntry_TargetInsideDirectory_Internal_Async(TarEnt } } } + + [Theory] + [InlineData(512)] + [InlineData(512 + 1)] + [InlineData(512 + 512 - 1)] + public async Task Extract_UnseekableStream_BlockAlignmentPadding_DoesNotAffectNextEntries_Async(int contentSize) + { + byte[] fileContents = new byte[contentSize]; + Array.Fill(fileContents, 0x1); + + using var archive = new MemoryStream(); + using (var compressor = new GZipStream(archive, CompressionMode.Compress, leaveOpen: true)) + { + using var writer = new TarWriter(compressor); + var entry1 = new PaxTarEntry(TarEntryType.RegularFile, "file"); + entry1.DataStream = new MemoryStream(fileContents); + await writer.WriteEntryAsync(entry1); + + var entry2 = new PaxTarEntry(TarEntryType.RegularFile, "next-file"); + await writer.WriteEntryAsync(entry2); + } + + archive.Position = 0; + using var decompressor = new GZipStream(archive, CompressionMode.Decompress); + using var reader = new TarReader(decompressor); + + using TempDirectory destination = new TempDirectory(); + await TarFile.ExtractToDirectoryAsync(decompressor, destination.Path, overwriteFiles: true); + + Assert.Equal(2, Directory.GetFileSystemEntries(destination.Path, "*", SearchOption.AllDirectories).Count()); + } } } diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntry.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntry.Tests.cs index bad9bf8fa179bf..1c8dfeea19fce5 100644 --- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntry.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntry.Tests.cs @@ -250,5 +250,43 @@ public void GetNextEntry_UnseekableArchive_ReplaceDataStream_ExcludeFromDisposin Assert.Equal("Substituted", streamReader.ReadLine()); } } + + [Theory] + [InlineData(512)] + [InlineData(512 + 1)] + [InlineData(512 + 512 - 1)] + public void BlockAlignmentPadding_DoesNotAffectNextEntries(int contentSize) + { + byte[] fileContents = new byte[contentSize]; + Array.Fill(fileContents, 0x1); + + using var archive = new MemoryStream(); + using (var writer = new TarWriter(archive, leaveOpen: true)) + { + var entry1 = new PaxTarEntry(TarEntryType.RegularFile, "file"); + entry1.DataStream = new MemoryStream(fileContents); + writer.WriteEntry(entry1); + + var entry2 = new PaxTarEntry(TarEntryType.RegularFile, "next-file"); + writer.WriteEntry(entry2); + } + + archive.Position = 0; + using var unseekable = new WrappedStream(archive, archive.CanRead, archive.CanWrite, canSeek: false); + using var reader = new TarReader(unseekable); + + TarEntry e = reader.GetNextEntry(); + Assert.Equal(contentSize, e.Length); + + byte[] buffer = new byte[contentSize]; + while (e.DataStream.Read(buffer) > 0) ; + AssertExtensions.SequenceEqual(fileContents, buffer); + + e = reader.GetNextEntry(); + Assert.Equal(0, e.Length); + + e = reader.GetNextEntry(); + Assert.Null(e); + } } } diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntryAsync.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntryAsync.Tests.cs index 77ddf1f1320621..a2e14598557c95 100644 --- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntryAsync.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntryAsync.Tests.cs @@ -288,5 +288,43 @@ public async Task GetNextEntry_UnseekableArchive_ReplaceDataStream_ExcludeFromDi } } } + + [Theory] + [InlineData(512)] + [InlineData(512 + 1)] + [InlineData(512 + 512 - 1)] + public async Task BlockAlignmentPadding_DoesNotAffectNextEntries_Async(int contentSize) + { + byte[] fileContents = new byte[contentSize]; + Array.Fill(fileContents, 0x1); + + using var archive = new MemoryStream(); + using (var writer = new TarWriter(archive, leaveOpen: true)) + { + var entry1 = new PaxTarEntry(TarEntryType.RegularFile, "file"); + entry1.DataStream = new MemoryStream(fileContents); + await writer.WriteEntryAsync(entry1); + + var entry2 = new PaxTarEntry(TarEntryType.RegularFile, "next-file"); + await writer.WriteEntryAsync(entry2); + } + + archive.Position = 0; + using var unseekable = new WrappedStream(archive, archive.CanRead, archive.CanWrite, canSeek: false); + using var reader = new TarReader(unseekable); + + TarEntry e = await reader.GetNextEntryAsync(); + Assert.Equal(contentSize, e.Length); + + byte[] buffer = new byte[contentSize]; + while (e.DataStream.Read(buffer) > 0) ; + AssertExtensions.SequenceEqual(fileContents, buffer); + + e = await reader.GetNextEntryAsync(); + Assert.Equal(0, e.Length); + + e = await reader.GetNextEntryAsync(); + Assert.Null(e); + } } } From 149337c62885c586721508fbebafdc159c8b9493 Mon Sep 17 00:00:00 2001 From: Dan Moseley Date: Tue, 23 Aug 2022 04:21:02 -0600 Subject: [PATCH 07/10] Set modified timestamps on files being extracted from tar archives (#74400) --- .../src/System/Formats/Tar/TarEntry.cs | 16 +++++++++-- .../TarFile.CreateFromDirectory.File.Tests.cs | 4 +-- ...ile.CreateFromDirectoryAsync.File.Tests.cs | 4 +-- .../TarFile.ExtractToDirectory.File.Tests.cs | 27 +++++++++++++++++++ ...File.ExtractToDirectoryAsync.File.Tests.cs | 27 +++++++++++++++++++ 5 files changed, 72 insertions(+), 6 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarEntry.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarEntry.cs index 7263af221ad657..9c2f0553eb39bf 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarEntry.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarEntry.cs @@ -529,7 +529,7 @@ private void ExtractAsRegularFile(string destinationFileName) DataStream?.CopyTo(fs); } - ArchivingUtils.AttemptSetLastWriteTime(destinationFileName, ModificationTime); + AttemptSetLastWriteTime(destinationFileName, ModificationTime); } // Asynchronously extracts the current entry as a regular file into the specified destination. @@ -551,7 +551,19 @@ private async Task ExtractAsRegularFileAsync(string destinationFileName, Cancell } } - ArchivingUtils.AttemptSetLastWriteTime(destinationFileName, ModificationTime); + AttemptSetLastWriteTime(destinationFileName, ModificationTime); + } + + private static void AttemptSetLastWriteTime(string destinationFileName, DateTimeOffset lastWriteTime) + { + try + { + File.SetLastWriteTime(destinationFileName, lastWriteTime.LocalDateTime); // SetLastWriteTime expects local time + } + catch + { + // Some OSes like Android might not support setting the last write time, the extraction should not fail because of that + } } private FileStreamOptions CreateFileStreamOptions(bool isAsync) diff --git a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectory.File.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectory.File.Tests.cs index 388e9639bd2956..308a9b68ba4def 100644 --- a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectory.File.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectory.File.Tests.cs @@ -194,7 +194,7 @@ public void IncludeAllSegmentsOfPath(bool includeBaseDirectory) Assert.Null(reader.GetNextEntry()); } - [Fact] + [ConditionalFact(typeof(MountHelper), nameof(MountHelper.CanCreateSymbolicLinks))] public void SkipRecursionIntoDirectorySymlinks() { using TempDirectory root = new TempDirectory(); @@ -225,7 +225,7 @@ public void SkipRecursionIntoDirectorySymlinks() Assert.Null(reader.GetNextEntry()); // file.txt should not be found } - [Fact] + [ConditionalFact(typeof(MountHelper), nameof(MountHelper.CanCreateSymbolicLinks))] public void SkipRecursionIntoBaseDirectorySymlink() { using TempDirectory root = new TempDirectory(); diff --git a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectoryAsync.File.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectoryAsync.File.Tests.cs index bb1d8ff7cc0cc3..78b051ae33b08e 100644 --- a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectoryAsync.File.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectoryAsync.File.Tests.cs @@ -238,7 +238,7 @@ public async Task IncludeAllSegmentsOfPath_Async(bool includeBaseDirectory) } } - [Fact] + [ConditionalFact(typeof(MountHelper), nameof(MountHelper.CanCreateSymbolicLinks))] public async Task SkipRecursionIntoDirectorySymlinksAsync() { using TempDirectory root = new TempDirectory(); @@ -269,7 +269,7 @@ public async Task SkipRecursionIntoDirectorySymlinksAsync() Assert.Null(await reader.GetNextEntryAsync()); // file.txt should not be found } - [Fact] + [ConditionalFact(typeof(MountHelper), nameof(MountHelper.CanCreateSymbolicLinks))] public async Task SkipRecursionIntoBaseDirectorySymlinkAsync() { using TempDirectory root = new TempDirectory(); diff --git a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.File.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.File.Tests.cs index ee00ccfaf6ccb8..90c5eba6b9ff97 100644 --- a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.File.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.File.Tests.cs @@ -44,6 +44,33 @@ public void NonExistentDirectory_Throws() Assert.Throws(() => TarFile.ExtractToDirectory(sourceFileName: filePath, destinationDirectoryName: dirPath, overwriteFiles: false)); } + [Fact] + public void SetsLastModifiedTimeOnExtractedFiles() + { + using TempDirectory root = new TempDirectory(); + + string inDir = Path.Join(root.Path, "indir"); + string inFile = Path.Join(inDir, "file"); + + string tarFile = Path.Join(root.Path, "file.tar"); + + string outDir = Path.Join(root.Path, "outdir"); + string outFile = Path.Join(outDir, "file"); + + Directory.CreateDirectory(inDir); + File.Create(inFile).Dispose(); + var dt = new DateTime(2001, 1, 2, 3, 4, 5, DateTimeKind.Local); + File.SetLastWriteTime(inFile, dt); + + TarFile.CreateFromDirectory(sourceDirectoryName: inDir, destinationFileName: tarFile, includeBaseDirectory: false); + + Directory.CreateDirectory(outDir); + TarFile.ExtractToDirectory(sourceFileName: tarFile, destinationDirectoryName: outDir, overwriteFiles: false); + + Assert.True(File.Exists(outFile)); + Assert.InRange(File.GetLastWriteTime(outFile).Ticks, dt.AddSeconds(-3).Ticks, dt.AddSeconds(3).Ticks); // include some slop for filesystem granularity + } + [Theory] [InlineData(TestTarFormat.v7)] [InlineData(TestTarFormat.ustar)] diff --git a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.File.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.File.Tests.cs index 905e9b61bfb8e2..19a28d7f59ce39 100644 --- a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.File.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.File.Tests.cs @@ -56,6 +56,33 @@ public async Task NonExistentDirectory_Throws_Async() } } + [Fact] + public async Task SetsLastModifiedTimeOnExtractedFiles() + { + using TempDirectory root = new TempDirectory(); + + string inDir = Path.Join(root.Path, "indir"); + string inFile = Path.Join(inDir, "file"); + + string tarFile = Path.Join(root.Path, "file.tar"); + + string outDir = Path.Join(root.Path, "outdir"); + string outFile = Path.Join(outDir, "file"); + + Directory.CreateDirectory(inDir); + File.Create(inFile).Dispose(); + var dt = new DateTime(2001, 1, 2, 3, 4, 5, DateTimeKind.Local); + File.SetLastWriteTime(inFile, dt); + + await TarFile.CreateFromDirectoryAsync(sourceDirectoryName: inDir, destinationFileName: tarFile, includeBaseDirectory: false); + + Directory.CreateDirectory(outDir); + await TarFile.ExtractToDirectoryAsync(sourceFileName: tarFile, destinationDirectoryName: outDir, overwriteFiles: false); + + Assert.True(File.Exists(outFile)); + Assert.InRange(File.GetLastWriteTime(outFile).Ticks, dt.AddSeconds(-3).Ticks, dt.AddSeconds(3).Ticks); // include some slop for filesystem granularity + } + [Theory] [InlineData(TestTarFormat.v7)] [InlineData(TestTarFormat.ustar)] From d37016142ece439c7d6fd6a79cb568701c6fe029 Mon Sep 17 00:00:00 2001 From: Carlos Sanchez <1175054+carlossanlop@users.noreply.github.com> Date: Tue, 23 Aug 2022 11:13:58 -0700 Subject: [PATCH 08/10] Add tests for exotic external tar asset archives, fix some more corner case bugs (#74412) * Remove unused _readFirstEntry. Remnant from before we created PaxGlobalExtendedAttributesEntry. * Set the position of the freshly copied data stream to 0, so the first user access of the DataStream property gives them a stream ready to read from the beginning. * Process a PAX actual entry's data block only after the extended attributes are analyzed, in case the size is found as an extended attribute and needs to be overriden. * Add tests to verify the entries of the new external tar assets can be read. Verify their DataStream if available. * Add copyData argument to recent alignment padding tests. * Throw an exception sooner and with a clearer message when a data section is unexpected for the entry type. * Allow trailing nulls and spaces in octal fields. Co-authored-by: @am11 Adeel Mujahid <3840695+am11@users.noreply.github.com> * Throw a clearer exception if the unsupported sparse file entry type is encountered. These entries have additional data that indicates the locations of sparse bytes, which cannot be read with just the size field. So to avoid accidentally offseting the reader, we throw. * Tests. * Rename to TrimLeadingNullsAndSpaces Co-authored-by: carlossanlop --- .../src/Resources/Strings.resx | 3 + .../src/System/Formats/Tar/TarHeader.Read.cs | 25 +- .../src/System/Formats/Tar/TarHelpers.cs | 19 +- .../src/System/Formats/Tar/TarReader.cs | 40 ++- .../TarReader/TarReader.File.Async.Tests.cs | 241 +++++++++++++++++ .../tests/TarReader/TarReader.File.Tests.cs | 245 ++++++++++++++++-- .../TarReader/TarReader.GetNextEntry.Tests.cs | 17 +- .../TarReader.GetNextEntryAsync.Tests.cs | 17 +- .../System.Formats.Tar/tests/TarTestsBase.cs | 163 +++++++++++- 9 files changed, 705 insertions(+), 65 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx index 001d85b764bcaf..5308e9153c9791 100644 --- a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx +++ b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx @@ -205,6 +205,9 @@ The entry is a symbolic link or a hard link but the LinkName field is null or empty. + Entry type '{0}' not supported. + + Entry type '{0}' not supported in format '{1}'. diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs index e43ae6973607dc..5578707d64eb18 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs @@ -20,7 +20,7 @@ internal sealed partial class TarHeader // Attempts to retrieve the next header from the specified tar archive stream. // Throws if end of stream is reached or if any data type conversion fails. // Returns a valid TarHeader object if the attributes were read successfully, null otherwise. - internal static TarHeader? TryGetNextHeader(Stream archiveStream, bool copyData, TarEntryFormat initialFormat) + internal static TarHeader? TryGetNextHeader(Stream archiveStream, bool copyData, TarEntryFormat initialFormat, bool processDataBlock) { // The four supported formats have a header that fits in the default record size Span buffer = stackalloc byte[TarHelpers.RecordSize]; @@ -28,7 +28,7 @@ internal sealed partial class TarHeader archiveStream.ReadExactly(buffer); TarHeader? header = TryReadAttributes(initialFormat, buffer); - if (header != null) + if (header != null && processDataBlock) { header.ProcessDataBlock(archiveStream, copyData); } @@ -39,7 +39,7 @@ internal sealed partial class TarHeader // Asynchronously attempts read all the fields of the next header. // Throws if end of stream is reached or if any data type conversion fails. // Returns true if all the attributes were read successfully, false otherwise. - internal static async ValueTask TryGetNextHeaderAsync(Stream archiveStream, bool copyData, TarEntryFormat initialFormat, CancellationToken cancellationToken) + internal static async ValueTask TryGetNextHeaderAsync(Stream archiveStream, bool copyData, TarEntryFormat initialFormat, bool processDataBlock, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); @@ -50,7 +50,7 @@ internal sealed partial class TarHeader await archiveStream.ReadExactlyAsync(buffer, cancellationToken).ConfigureAwait(false); TarHeader? header = TryReadAttributes(initialFormat, buffer.Span); - if (header != null) + if (header != null && processDataBlock) { await header.ProcessDataBlockAsync(archiveStream, copyData, cancellationToken).ConfigureAwait(false); } @@ -180,7 +180,7 @@ internal void ReplaceNormalAttributesWithExtended(Dictionary? di // will get all the data section read and the stream pointer positioned at the beginning of the next header. // - Block, Character, Directory, Fifo, HardLink and SymbolicLink typeflag entries have no data section so the archive stream pointer will be positioned at the beginning of the next header. // - All other typeflag entries with a data section will generate a stream wrapping the data section: SeekableSubReadStream for seekable archive streams, and SubReadStream for unseekable archive streams. - private void ProcessDataBlock(Stream archiveStream, bool copyData) + internal void ProcessDataBlock(Stream archiveStream, bool copyData) { bool skipBlockAlignmentPadding = true; @@ -199,6 +199,10 @@ private void ProcessDataBlock(Stream archiveStream, bool copyData) case TarEntryType.HardLink: case TarEntryType.SymbolicLink: // No data section + if (_size > 0) + { + throw new FormatException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag)); + } break; case TarEntryType.RegularFile: case TarEntryType.V7RegularFile: // Treated as regular file @@ -257,6 +261,10 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca case TarEntryType.HardLink: case TarEntryType.SymbolicLink: // No data section + if (_size > 0) + { + throw new FormatException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag)); + } break; case TarEntryType.RegularFile: case TarEntryType.V7RegularFile: // Treated as regular file @@ -311,6 +319,8 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca { MemoryStream copiedData = new MemoryStream(); TarHelpers.CopyBytes(archiveStream, copiedData, _size); + // Reset position pointer so the user can do the first DataStream read from the beginning + copiedData.Position = 0; return copiedData; } @@ -336,6 +346,8 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca { MemoryStream copiedData = new MemoryStream(); await TarHelpers.CopyBytesAsync(archiveStream, copiedData, size, cancellationToken).ConfigureAwait(false); + // Reset position pointer so the user can do the first DataStream read from the beginning + copiedData.Position = 0; return copiedData; } @@ -396,12 +408,13 @@ TarEntryType.LongLink or TarEntryType.LongPath or TarEntryType.MultiVolume or TarEntryType.RenamedOrSymlinked or - TarEntryType.SparseFile or TarEntryType.TapeVolume => TarEntryFormat.Gnu, // V7 is the only one that uses 'V7RegularFile'. TarEntryType.V7RegularFile => TarEntryFormat.V7, + TarEntryType.SparseFile => throw new NotSupportedException(string.Format(SR.TarEntryTypeNotSupported, header._typeFlag)), + // We can quickly determine the *minimum* possible format if the entry type // is the POSIX 'RegularFile', although later we could upgrade it to PAX or GNU _ => (header._typeFlag == TarEntryType.RegularFile) ? TarEntryFormat.Ustar : TarEntryFormat.V7 diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs index 898daf1e6c3ed7..c2a1b3b854c14b 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs @@ -203,6 +203,12 @@ internal static TarEntryType GetCorrectTypeFlagForFormat(TarEntryFormat format, internal static T ParseOctal(ReadOnlySpan buffer) where T : struct, INumber { buffer = TrimEndingNullsAndSpaces(buffer); + buffer = TrimLeadingNullsAndSpaces(buffer); + + if (buffer.Length == 0) + { + return T.Zero; + } T octalFactor = T.CreateTruncating(8u); T value = T.Zero; @@ -243,6 +249,17 @@ internal static ReadOnlySpan TrimEndingNullsAndSpaces(ReadOnlySpan b return buffer.Slice(0, trimmedLength); } + private static ReadOnlySpan TrimLeadingNullsAndSpaces(ReadOnlySpan buffer) + { + int newStart = 0; + while (newStart < buffer.Length && buffer[newStart] is 0 or 32) + { + newStart++; + } + + return buffer.Slice(newStart); + } + // Returns the ASCII string contained in the specified buffer of bytes, // removing the trailing null or space chars. internal static string GetTrimmedAsciiString(ReadOnlySpan buffer) => GetTrimmedString(buffer, Encoding.ASCII); @@ -351,7 +368,7 @@ TarEntryType.RegularFile or throw new FormatException(string.Format(SR.TarInvalidFormat, archiveFormat)); } - throw new InvalidOperationException(string.Format(SR.TarEntryTypeNotSupported, entryType, archiveFormat)); + throw new InvalidOperationException(string.Format(SR.TarEntryTypeNotSupportedInFormat, entryType, archiveFormat)); } } } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs index 915cb81caab664..5328d61190280b 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs @@ -19,7 +19,6 @@ public sealed class TarReader : IDisposable, IAsyncDisposable private readonly bool _leaveOpen; private TarEntry? _previouslyReadEntry; private List? _dataStreamsToDispose; - private bool _readFirstEntry; private bool _reachedEndMarkers; internal Stream _archiveStream; @@ -44,7 +43,6 @@ public TarReader(Stream archiveStream, bool leaveOpen = false) _previouslyReadEntry = null; _isDisposed = false; - _readFirstEntry = false; _reachedEndMarkers = false; } @@ -124,11 +122,6 @@ public async ValueTask DisposeAsync() TarHeader? header = TryGetNextEntryHeader(copyData); if (header != null) { - if (!_readFirstEntry) - { - _readFirstEntry = true; - } - TarEntry entry = header._format switch { TarEntryFormat.Pax => header._typeFlag is TarEntryType.GlobalExtendedAttributes ? @@ -282,11 +275,6 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel TarHeader? header = await TryGetNextEntryHeaderAsync(copyData, cancellationToken).ConfigureAwait(false); if (header != null) { - if (!_readFirstEntry) - { - _readFirstEntry = true; - } - TarEntry entry = header._format switch { TarEntryFormat.Pax => header._typeFlag is TarEntryType.GlobalExtendedAttributes ? @@ -319,7 +307,7 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel { Debug.Assert(!_reachedEndMarkers); - TarHeader? header = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Unknown); + TarHeader? header = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Unknown, processDataBlock: true); if (header == null) { @@ -361,7 +349,7 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel Debug.Assert(!_reachedEndMarkers); - TarHeader? header = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Unknown, cancellationToken).ConfigureAwait(false); + TarHeader? header = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Unknown, processDataBlock: true, cancellationToken).ConfigureAwait(false); if (header == null) { return null; @@ -397,9 +385,10 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel // and returns the actual entry with the processed extended attributes saved in the _extendedAttributes dictionary. private bool TryProcessExtendedAttributesHeader(TarHeader extendedAttributesHeader, bool copyData, [NotNullWhen(returnValue: true)] out TarHeader? actualHeader) { - actualHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Pax); + // Don't process the data block of the actual entry just yet, because there's a slim chance + // that the extended attributes contain a size that we need to override in the header + actualHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Pax, processDataBlock: false); - // Now get the actual entry if (actualHeader == null) { return false; @@ -417,6 +406,9 @@ TarEntryType.LongLink or // Replace all the attributes representing standard fields with the extended ones, if any actualHeader.ReplaceNormalAttributesWithExtended(extendedAttributesHeader.ExtendedAttributes); + // We retrieved the extended attributes, now we can read the data, and always with the right size + actualHeader.ProcessDataBlock(_archiveStream, copyData); + return true; } @@ -426,8 +418,9 @@ TarEntryType.LongLink or { cancellationToken.ThrowIfCancellationRequested(); - // Now get the actual entry - TarHeader? actualHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Pax, cancellationToken).ConfigureAwait(false); + // Don't process the data block of the actual entry just yet, because there's a slim chance + // that the extended attributes contain a size that we need to override in the header + TarHeader? actualHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Pax, processDataBlock: false, cancellationToken).ConfigureAwait(false); if (actualHeader == null) { return null; @@ -451,6 +444,9 @@ TarEntryType.LongLink or // Replace all the attributes representing standard fields with the extended ones, if any actualHeader.ReplaceNormalAttributesWithExtended(extendedAttributesHeader.ExtendedAttributes); + // We retrieved the extended attributes, now we can read the data, and always with the right size + actualHeader.ProcessDataBlock(_archiveStream, copyData); + return actualHeader; } @@ -460,7 +456,7 @@ private bool TryProcessGnuMetadataHeader(TarHeader header, bool copyData, out Ta { finalHeader = new(TarEntryFormat.Gnu); - TarHeader? secondHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Gnu); + TarHeader? secondHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Gnu, processDataBlock: true); // Get the second entry, which is the actual entry if (secondHeader == null) @@ -478,7 +474,7 @@ private bool TryProcessGnuMetadataHeader(TarHeader header, bool copyData, out Ta if ((header._typeFlag is TarEntryType.LongLink && secondHeader._typeFlag is TarEntryType.LongPath) || (header._typeFlag is TarEntryType.LongPath && secondHeader._typeFlag is TarEntryType.LongLink)) { - TarHeader? thirdHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Gnu); + TarHeader? thirdHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Gnu, processDataBlock: true); // Get the third entry, which is the actual entry if (thirdHeader == null) @@ -537,7 +533,7 @@ private bool TryProcessGnuMetadataHeader(TarHeader header, bool copyData, out Ta cancellationToken.ThrowIfCancellationRequested(); // Get the second entry, which is the actual entry - TarHeader? secondHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Gnu, cancellationToken).ConfigureAwait(false); + TarHeader? secondHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Gnu, processDataBlock: true, cancellationToken).ConfigureAwait(false); if (secondHeader == null) { return null; @@ -556,7 +552,7 @@ private bool TryProcessGnuMetadataHeader(TarHeader header, bool copyData, out Ta (header._typeFlag is TarEntryType.LongPath && secondHeader._typeFlag is TarEntryType.LongLink)) { // Get the third entry, which is the actual entry - TarHeader? thirdHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Gnu, cancellationToken).ConfigureAwait(false); + TarHeader? thirdHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Gnu, processDataBlock: true, cancellationToken).ConfigureAwait(false); if (thirdHeader == null) { return null; diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs index 14087fb030d020..69a543a206d4b9 100644 --- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs @@ -4,6 +4,7 @@ using System.Collections.Generic; using System.Globalization; using System.IO; +using System.IO.Compression; using System.Linq; using System.Threading.Tasks; using Xunit; @@ -125,5 +126,245 @@ public Task Read_Archive_LongFileName_Over100_Under255_Async(TarEntryFormat form [InlineData(TarEntryFormat.Gnu, TestTarFormat.oldgnu)] public Task Read_Archive_LongPath_Over255_Async(TarEntryFormat format, TestTarFormat testFormat) => Read_Archive_LongPath_Over255_Async_Internal(format, testFormat); + + [Theory] + [MemberData(nameof(GetV7TestCaseNames))] + public Task ReadDataStreamOfTarGzV7Async(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.v7, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetUstarTestCaseNames))] + public Task ReadDataStreamOfTarGzUstarAsync(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.ustar, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public Task ReadDataStreamOfTarGzPaxAsync(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.pax, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public Task ReadDataStreamOfTarGzPaxGeaAsync(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.pax_gea, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public Task ReadDataStreamOfTarGzOldGnuAsync(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.oldgnu, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public Task ReadDataStreamOfTarGzGnuAsync(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.gnu, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetV7TestCaseNames))] + public Task ReadCopiedDataStreamOfTarGzV7Async(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.v7, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetUstarTestCaseNames))] + public Task ReadCopiedDataStreamOfTarGzUstarAsync(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.ustar, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public Task ReadCopiedDataStreamOfTarGzPaxAsync(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.pax, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public Task ReadCopiedDataStreamOfTarGzPaxGeaAsync(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.pax_gea, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public Task ReadCopiedDataStreamOfTarGzOldGnuAsync(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.oldgnu, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public Task ReadCopiedDataStreamOfTarGzGnuAsync(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.gnu, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetGoLangTarTestCaseNames))] + public Task ReadDataStreamOfExternalAssetsGoLangAsync(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternalAsync("golang_tar", testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetNodeTarTestCaseNames))] + public Task ReadDataStreamOfExternalAssetsNodeAsync(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternalAsync("node-tar", testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetRsTarTestCaseNames))] + public Task ReadDataStreamOfExternalAssetsRsAsync(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternalAsync("tar-rs", testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetGoLangTarTestCaseNames))] + public Task ReadCopiedDataStreamOfExternalAssetsGoLangAsync(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternalAsync("golang_tar", testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetNodeTarTestCaseNames))] + public Task ReadCopiedDataStreamOfExternalAssetsNodeAsync(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternalAsync("node-tar", testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetRsTarTestCaseNames))] + public Task ReadCopiedDataStreamOfExternalAssetsRsAsync(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternalAsync("tar-rs", testCaseName, copyData: true); + + [Fact] + public async Task Throw_FifoContainsNonZeroDataSectionAsync() + { + await using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", "hdr-only"); + await using TarReader reader = new TarReader(archiveStream); + Assert.NotNull(await reader.GetNextEntryAsync()); + Assert.NotNull(await reader.GetNextEntryAsync()); + Assert.NotNull(await reader.GetNextEntryAsync()); + Assert.NotNull(await reader.GetNextEntryAsync()); + Assert.NotNull(await reader.GetNextEntryAsync()); + Assert.NotNull(await reader.GetNextEntryAsync()); + Assert.NotNull(await reader.GetNextEntryAsync()); + Assert.NotNull(await reader.GetNextEntryAsync()); + await Assert.ThrowsAsync(async () => await reader.GetNextEntryAsync()); + } + + [Fact] + public async Task Throw_SingleExtendedAttributesEntryWithNoActualEntryAsync() + { + await using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", "pax-path-hdr"); + await using TarReader reader = new TarReader(archiveStream); + await Assert.ThrowsAsync(async () => await reader.GetNextEntryAsync()); + } + + [Theory] + [InlineData("tar-rs", "spaces")] + [InlineData("golang_tar", "v7")] + public async Task AllowSpacesInOctalFieldsAsync(string folderName, string testCaseName) + { + await using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, folderName, testCaseName); + await using TarReader reader = new TarReader(archiveStream); + TarEntry entry; + while ((entry = await reader.GetNextEntryAsync()) != null) + { + AssertExtensions.GreaterThan(entry.Checksum, 0); + AssertExtensions.GreaterThan((int)entry.Mode, 0); + } + } + + [Theory] + [InlineData("pax-multi-hdrs")] // Multiple consecutive PAX metadata entries + [InlineData("gnu-multi-hdrs")] // Multiple consecutive GNU metadata entries + [InlineData("neg-size")] // Garbage chars + [InlineData("invalid-go17")] // Many octal fields are all zero chars + [InlineData("issue11169")] // Checksum with null in the middle + [InlineData("issue10968")] // Garbage chars + [InlineData("writer-big")] // The size field contains an euro char + public async Task Throw_ArchivesWithRandomCharsAsync(string testCaseName) + { + await using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", testCaseName); + await using TarReader reader = new TarReader(archiveStream); + await Assert.ThrowsAsync(async () => await reader.GetNextEntryAsync()); + } + + [Fact] + public async Task GarbageEntryChecksumZeroReturnNullAsync() + { + await using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", "issue12435"); + await using TarReader reader = new TarReader(archiveStream); + Assert.Null(await reader.GetNextEntryAsync()); + } + + [Theory] + [InlineData("golang_tar", "gnu-nil-sparse-data")] + [InlineData("golang_tar", "gnu-nil-sparse-hole")] + [InlineData("golang_tar", "gnu-sparse-big")] + [InlineData("golang_tar", "sparse-formats")] + [InlineData("tar-rs", "sparse-1")] + [InlineData("tar-rs", "sparse")] + public async Task SparseEntryNotSupportedAsync(string testFolderName, string testCaseName) + { + // Currently sparse entries are not supported. + + // There are PAX archives archives in the golang folder that have extended attributes for treating a regular file as a sparse file. + // Sparse entries were created for the GNU format, so they are very rare entry types which are excluded from this test method: + // pax-nil-sparse-data, pax-nil-sparse-hole, pax-sparse-big + + await using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, testFolderName, testCaseName); + await using TarReader reader = new TarReader(archiveStream); + await Assert.ThrowsAsync(async () => await reader.GetNextEntryAsync()); + } + + [Fact] + public async Task DirectoryListRegularFileAndSparseAsync() + { + await using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", "gnu-incremental"); + await using TarReader reader = new TarReader(archiveStream); + TarEntry directoryList = await reader.GetNextEntryAsync(); + + Assert.Equal(TarEntryType.DirectoryList, directoryList.EntryType); + Assert.NotNull(directoryList.DataStream); + Assert.Equal(14, directoryList.Length); + + Assert.NotNull(await reader.GetNextEntryAsync()); // Just a regular file + + await Assert.ThrowsAsync(async () => await reader.GetNextEntryAsync()); // Sparse + } + + [Fact] + public async Task PaxSizeLargerThanMaxAllowedByStreamAsync() + { + await using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", "writer-big-long"); + await using TarReader reader = new TarReader(archiveStream); + // The extended attribute 'size' has the value 17179869184 + // Exception message: Stream length must be non-negative and less than 2^31 - 1 - origin + await Assert.ThrowsAsync(async () => await reader.GetNextEntryAsync()); + } + + private static async Task VerifyDataStreamOfTarUncompressedInternalAsync(string testFolderName, string testCaseName, bool copyData) + { + await using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, testFolderName, testCaseName); + await VerifyDataStreamOfTarInternalAsync(archiveStream, copyData); + } + + private static async Task VerifyDataStreamOfTarGzInternalAsync(TestTarFormat testTarFormat, string testCaseName, bool copyData) + { + await using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.GZip, testTarFormat, testCaseName); + await using GZipStream decompressor = new GZipStream(archiveStream, CompressionMode.Decompress); + await VerifyDataStreamOfTarInternalAsync(decompressor, copyData); + } + + private static async Task VerifyDataStreamOfTarInternalAsync(Stream archiveStream, bool copyData) + { + await using TarReader reader = new TarReader(archiveStream); + + TarEntry entry; + + await using MemoryStream ms = new MemoryStream(); + while ((entry = await reader.GetNextEntryAsync(copyData)) != null) + { + if (entry.EntryType is TarEntryType.V7RegularFile or TarEntryType.RegularFile) + { + if (entry.Length == 0) + { + Assert.Null(entry.DataStream); + } + else + { + Assert.NotNull(entry.DataStream); + Assert.Equal(entry.DataStream.Length, entry.Length); + if (copyData) + { + Assert.True(entry.DataStream.CanSeek); + Assert.Equal(0, entry.DataStream.Position); + } + } + } + } + } } } diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs index 5e35c12c3aaac9..298e3e8be47947 100644 --- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs @@ -4,8 +4,10 @@ using System.Collections.Generic; using System.Globalization; using System.IO; +using System.IO.Compression; using System.Linq; using Xunit; +using static System.Formats.Tar.Tests.TarTestsBase; namespace System.Formats.Tar.Tests { @@ -125,28 +127,241 @@ public void Read_Archive_LongFileName_Over100_Under255(TarEntryFormat format, Te public void Read_Archive_LongPath_Over255(TarEntryFormat format, TestTarFormat testFormat) => Read_Archive_LongPath_Over255_Internal(format, testFormat); + [Theory] + [MemberData(nameof(GetV7TestCaseNames))] + public void ReadDataStreamOfTarGzV7(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.v7, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetUstarTestCaseNames))] + public void ReadDataStreamOfTarGzUstar(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.ustar, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public void ReadDataStreamOfTarGzPax(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.pax, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public void ReadDataStreamOfTarGzPaxGea(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.pax_gea, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public void ReadDataStreamOfTarGzOldGnu(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.oldgnu, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public void ReadDataStreamOfTarGzGnu(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.gnu, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetV7TestCaseNames))] + public void ReadCopiedDataStreamOfTarGzV7(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.v7, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetUstarTestCaseNames))] + public void ReadCopiedDataStreamOfTarGzUstar(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.ustar, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public void ReadCopiedDataStreamOfTarGzPax(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.pax, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public void ReadCopiedDataStreamOfTarGzPaxGea(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.pax_gea, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public void ReadCopiedDataStreamOfTarGzOldGnu(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.oldgnu, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public void ReadCopiedDataStreamOfTarGzGnu(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.gnu, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetGoLangTarTestCaseNames))] + public void ReadDataStreamOfExternalAssetsGoLang(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternal("golang_tar", testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetNodeTarTestCaseNames))] + public void ReadDataStreamOfExternalAssetsNode(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternal("node-tar", testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetRsTarTestCaseNames))] + public void ReadDataStreamOfExternalAssetsRs(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternal("tar-rs", testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetGoLangTarTestCaseNames))] + public void ReadCopiedDataStreamOfExternalAssetsGoLang(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternal("golang_tar", testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetNodeTarTestCaseNames))] + public void ReadCopiedDataStreamOfExternalAssetsNode(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternal("node-tar", testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetRsTarTestCaseNames))] + public void ReadCopiedDataStreamOfExternalAssetsRs(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternal("tar-rs", testCaseName, copyData: true); + + [Fact] + public void Throw_FifoContainsNonZeroDataSection() + { + using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", "hdr-only"); + using TarReader reader = new TarReader(archiveStream); + Assert.NotNull(reader.GetNextEntry()); + Assert.NotNull(reader.GetNextEntry()); + Assert.NotNull(reader.GetNextEntry()); + Assert.NotNull(reader.GetNextEntry()); + Assert.NotNull(reader.GetNextEntry()); + Assert.NotNull(reader.GetNextEntry()); + Assert.NotNull(reader.GetNextEntry()); + Assert.NotNull(reader.GetNextEntry()); + Assert.Throws(() => reader.GetNextEntry()); + } + [Fact] - public void Read_NodeTarArchives_Successfully() + public void Throw_SingleExtendedAttributesEntryWithNoActualEntry() { - string nodeTarPath = Path.Join(Directory.GetCurrentDirectory(), "tar", "node-tar"); - foreach (string file in Directory.EnumerateFiles(nodeTarPath, "*.tar", SearchOption.AllDirectories)) + using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", "pax-path-hdr"); + using TarReader reader = new TarReader(archiveStream); + Assert.Throws(() => reader.GetNextEntry()); + } + + [Theory] + [InlineData("tar-rs", "spaces")] + [InlineData("golang_tar", "v7")] + public void AllowSpacesInOctalFields(string folderName, string testCaseName) + { + using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, folderName, testCaseName); + using TarReader reader = new TarReader(archiveStream); + TarEntry entry; + while ((entry = reader.GetNextEntry()) != null) { - using FileStream sourceStream = File.Open(file, FileMode.Open, FileAccess.Read, FileShare.Read); - using var reader = new TarReader(sourceStream); + AssertExtensions.GreaterThan(entry.Checksum, 0); + AssertExtensions.GreaterThan((int)entry.Mode, 0); + } + } - TarEntry? entry = null; - while (true) - { - Exception ex = Record.Exception(() => entry = reader.GetNextEntry()); - Assert.Null(ex); + [Theory] + [InlineData("pax-multi-hdrs")] // Multiple consecutive PAX metadata entries + [InlineData("gnu-multi-hdrs")] // Multiple consecutive GNU metadata entries + [InlineData("neg-size")] // Garbage chars + [InlineData("invalid-go17")] // Many octal fields are all zero chars + [InlineData("issue11169")] // Checksum with null in the middle + [InlineData("issue10968")] // Garbage chars + [InlineData("writer-big")] // The size field contains an euro char + public void Throw_ArchivesWithRandomChars(string testCaseName) + { + using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", testCaseName); + using TarReader reader = new TarReader(archiveStream); + Assert.Throws(() => reader.GetNextEntry()); + } + + [Fact] + public void GarbageEntryChecksumZeroReturnNull() + { + using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", "issue12435"); + using TarReader reader = new TarReader(archiveStream); + Assert.Null(reader.GetNextEntry()); + } + + [Theory] + [InlineData("golang_tar", "gnu-nil-sparse-data")] + [InlineData("golang_tar", "gnu-nil-sparse-hole")] + [InlineData("golang_tar", "gnu-sparse-big")] + [InlineData("golang_tar", "sparse-formats")] + [InlineData("tar-rs", "sparse-1")] + [InlineData("tar-rs", "sparse")] + public void SparseEntryNotSupported(string testFolderName, string testCaseName) + { + // Currently sparse entries are not supported. + + // There are PAX archives archives in the golang folder that have extended attributes for treating a regular file as a sparse file. + // Sparse entries were created for the GNU format, so they are very rare entry types which are excluded from this test method: + // pax-nil-sparse-data, pax-nil-sparse-hole, pax-sparse-big + + using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, testFolderName, testCaseName); + using TarReader reader = new TarReader(archiveStream); + Assert.Throws(() => reader.GetNextEntry()); + } - if (entry is null) break; + [Fact] + public void DirectoryListRegularFileAndSparse() + { + using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", "gnu-incremental"); + using TarReader reader = new TarReader(archiveStream); + TarEntry directoryList = reader.GetNextEntry(); + + Assert.Equal(TarEntryType.DirectoryList, directoryList.EntryType); + Assert.NotNull(directoryList.DataStream); + Assert.Equal(14, directoryList.Length); + + Assert.NotNull(reader.GetNextEntry()); // Just a regular file + + Assert.Throws(() => reader.GetNextEntry()); // Sparse + } + + [Fact] + public void PaxSizeLargerThanMaxAllowedByStream() + { + using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", "writer-big-long"); + using TarReader reader = new TarReader(archiveStream); + // The extended attribute 'size' has the value 17179869184 + // Exception message: Stream length must be non-negative and less than 2^31 - 1 - origin + Assert.Throws(() => reader.GetNextEntry()); + } - ex = Record.Exception(() => entry.Name); - Assert.Null(ex); + private static void VerifyDataStreamOfTarUncompressedInternal(string testFolderName, string testCaseName, bool copyData) + { + using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, testFolderName, testCaseName); + VerifyDataStreamOfTarInternal(archiveStream, copyData); + } + + private static void VerifyDataStreamOfTarGzInternal(TestTarFormat testTarFormat, string testCaseName, bool copyData) + { + using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.GZip, testTarFormat, testCaseName); + using GZipStream decompressor = new GZipStream(archiveStream, CompressionMode.Decompress); + VerifyDataStreamOfTarInternal(decompressor, copyData); + } - ex = Record.Exception(() => entry.Length); - Assert.Null(ex); + private static void VerifyDataStreamOfTarInternal(Stream archiveStream, bool copyData) + { + using TarReader reader = new TarReader(archiveStream); + + TarEntry entry; + + while ((entry = reader.GetNextEntry(copyData)) != null) + { + if (entry.EntryType is TarEntryType.V7RegularFile or TarEntryType.RegularFile) + { + if (entry.Length == 0) + { + Assert.Null(entry.DataStream); + } + else + { + Assert.NotNull(entry.DataStream); + Assert.Equal(entry.DataStream.Length, entry.Length); + if (copyData) + { + Assert.True(entry.DataStream.CanSeek); + Assert.Equal(0, entry.DataStream.Position); + } + } } } } diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntry.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntry.Tests.cs index 1c8dfeea19fce5..dda0cae56b69f8 100644 --- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntry.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntry.Tests.cs @@ -252,10 +252,13 @@ public void GetNextEntry_UnseekableArchive_ReplaceDataStream_ExcludeFromDisposin } [Theory] - [InlineData(512)] - [InlineData(512 + 1)] - [InlineData(512 + 512 - 1)] - public void BlockAlignmentPadding_DoesNotAffectNextEntries(int contentSize) + [InlineData(512, false)] + [InlineData(512, true)] + [InlineData(512 + 1, false)] + [InlineData(512 + 1, true)] + [InlineData(512 + 512 - 1, false)] + [InlineData(512 + 512 - 1, true)] + public void BlockAlignmentPadding_DoesNotAffectNextEntries(int contentSize, bool copyData) { byte[] fileContents = new byte[contentSize]; Array.Fill(fileContents, 0x1); @@ -275,17 +278,17 @@ public void BlockAlignmentPadding_DoesNotAffectNextEntries(int contentSize) using var unseekable = new WrappedStream(archive, archive.CanRead, archive.CanWrite, canSeek: false); using var reader = new TarReader(unseekable); - TarEntry e = reader.GetNextEntry(); + TarEntry e = reader.GetNextEntry(copyData); Assert.Equal(contentSize, e.Length); byte[] buffer = new byte[contentSize]; while (e.DataStream.Read(buffer) > 0) ; AssertExtensions.SequenceEqual(fileContents, buffer); - e = reader.GetNextEntry(); + e = reader.GetNextEntry(copyData); Assert.Equal(0, e.Length); - e = reader.GetNextEntry(); + e = reader.GetNextEntry(copyData); Assert.Null(e); } } diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntryAsync.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntryAsync.Tests.cs index a2e14598557c95..1dcd9326ee81ee 100644 --- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntryAsync.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntryAsync.Tests.cs @@ -290,10 +290,13 @@ public async Task GetNextEntry_UnseekableArchive_ReplaceDataStream_ExcludeFromDi } [Theory] - [InlineData(512)] - [InlineData(512 + 1)] - [InlineData(512 + 512 - 1)] - public async Task BlockAlignmentPadding_DoesNotAffectNextEntries_Async(int contentSize) + [InlineData(512, false)] + [InlineData(512, true)] + [InlineData(512 + 1, false)] + [InlineData(512 + 1, true)] + [InlineData(512 + 512 - 1, false)] + [InlineData(512 + 512 - 1, true)] + public async Task BlockAlignmentPadding_DoesNotAffectNextEntries_Async(int contentSize, bool copyData) { byte[] fileContents = new byte[contentSize]; Array.Fill(fileContents, 0x1); @@ -313,17 +316,17 @@ public async Task BlockAlignmentPadding_DoesNotAffectNextEntries_Async(int conte using var unseekable = new WrappedStream(archive, archive.CanRead, archive.CanWrite, canSeek: false); using var reader = new TarReader(unseekable); - TarEntry e = await reader.GetNextEntryAsync(); + TarEntry e = await reader.GetNextEntryAsync(copyData); Assert.Equal(contentSize, e.Length); byte[] buffer = new byte[contentSize]; while (e.DataStream.Read(buffer) > 0) ; AssertExtensions.SequenceEqual(fileContents, buffer); - e = await reader.GetNextEntryAsync(); + e = await reader.GetNextEntryAsync(copyData); Assert.Equal(0, e.Length); - e = await reader.GetNextEntryAsync(); + e = await reader.GetNextEntryAsync(copyData); Assert.Null(e); } } diff --git a/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs b/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs index 04d034ce8764f8..23dc653c83a165 100644 --- a/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs +++ b/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs @@ -3,6 +3,8 @@ using System.Collections.Generic; using System.IO; +using System.Linq; +using System.Runtime.CompilerServices; using Microsoft.DotNet.RemoteExecutor; using Xunit; @@ -88,6 +90,94 @@ public abstract partial class TarTestsBase : FileCleanupTestBase protected const string PaxEaDevMajor = "devmajor"; protected const string PaxEaDevMinor = "devminor"; + private static readonly string[] V7TestCaseNames = new[] + { + "file", + "file_hardlink", + "file_symlink", + "folder_file", + "folder_file_utf8", + "folder_subfolder_file", + "foldersymlink_folder_subfolder_file", + "many_small_files" + }; + + private static readonly string[] UstarTestCaseNames = new[] + { + "longpath_splitable_under255", + "specialfiles" }; + + private static readonly string[] PaxAndGnuTestCaseNames = new[] + { + "file_longsymlink", + "longfilename_over100_under255", + "longpath_over255" + }; + + private static readonly string[] GoLangTestCaseNames = new[] + { + "empty", + "file-and-dir", + "gnu-long-nul", + "gnu-not-utf8", + "gnu-utf8", + "gnu", + "hardlink", + "nil-uid", + "pax-bad-hdr-file", + "pax-bad-mtime-file", + "pax-global-records", + "pax-nul-path", + "pax-nul-xattrs", + "pax-pos-size-file", + "pax-records", + "pax", + "star", + "trailing-slash", + "ustar-file-devs", + "ustar-file-reg", + "ustar", + "writer", + "xattrs" + }; + + private static readonly string[] NodeTarTestCaseNames = new[] + { + "bad-cksum", + "body-byte-counts", + "dir", + "emptypax", + "file", + "global-header", + "links-invalid", + "links-strip", + "links", + "long-paths", + "long-pax", + "next-file-has-long", + "null-byte", + "path-missing", + "trailing-slash-corner-case", + "utf8" + }; + + private static readonly string[] RsTarTestCaseNames = new[] + { + "7z_long_path", + "directory", + "duplicate_dirs", + "empty_filename", + "file_times", + "link", + "pax_size", + "pax", + "pax2", + "reading_files", + "simple_missing_last_header", + "simple", + "xattrs" + }; + protected enum CompressionMethod { // Archiving only, no compression @@ -121,30 +211,41 @@ public enum TestTarFormat protected TarTestsBase() { CreateDirectoryDefaultMode = Directory.CreateDirectory(GetRandomDirPath()).UnixFileMode; // '0777 & ~umask' - UMask = ~CreateDirectoryDefaultMode & (UnixFileMode)Convert.ToInt32("777", 8); + UMask = ~CreateDirectoryDefaultMode & (UnixFileMode)Convert.ToInt32("777", + 8); } protected static string GetTestCaseUnarchivedFolderPath(string testCaseName) => - Path.Join(Directory.GetCurrentDirectory(), "unarchived", testCaseName); + Path.Join(Directory.GetCurrentDirectory(), "unarchived", + testCaseName); protected static string GetTarFilePath(CompressionMethod compressionMethod, TestTarFormat format, string testCaseName) + => GetTarFilePath(compressionMethod, format.ToString(), testCaseName); + + protected static string GetTarFilePath(CompressionMethod compressionMethod, string testFolderName, string testCaseName) { (string compressionMethodFolder, string fileExtension) = compressionMethod switch { - CompressionMethod.Uncompressed => ("tar", ".tar"), - CompressionMethod.GZip => ("targz", ".tar.gz"), + CompressionMethod.Uncompressed => ("tar", + ".tar"), + CompressionMethod.GZip => ("targz", + ".tar.gz"), _ => throw new InvalidOperationException($"Unexpected compression method: {compressionMethod}"), }; - return Path.Join(Directory.GetCurrentDirectory(), compressionMethodFolder, format.ToString(), testCaseName + fileExtension); + return Path.Join(Directory.GetCurrentDirectory(), compressionMethodFolder, testFolderName, testCaseName + fileExtension); } // MemoryStream containing the copied contents of the specified file. Meant for reading and writing. protected static MemoryStream GetTarMemoryStream(CompressionMethod compressionMethod, TestTarFormat format, string testCaseName) => - GetMemoryStream(GetTarFilePath(compressionMethod, format, testCaseName)); + GetTarMemoryStream(compressionMethod, format.ToString(), testCaseName); + + protected static MemoryStream GetTarMemoryStream(CompressionMethod compressionMethod, string testFolderName, string testCaseName) => + GetMemoryStream(GetTarFilePath(compressionMethod, testFolderName, testCaseName)); protected static string GetStrangeTarFilePath(string testCaseName) => - Path.Join(Directory.GetCurrentDirectory(), "strange", testCaseName + ".tar"); + Path.Join(Directory.GetCurrentDirectory(), "strange", + testCaseName + ".tar"); protected static MemoryStream GetStrangeTarMemoryStream(string testCaseName) => GetMemoryStream(GetStrangeTarFilePath(testCaseName)); @@ -462,5 +563,53 @@ protected void Verify_Extract(string destination, TarEntry entry, TarEntryType e AssertFileModeEquals(destination, TestPermission1); } + + public static IEnumerable GetNodeTarTestCaseNames() + { + foreach (string name in NodeTarTestCaseNames) + { + yield return new object[] { name }; + } + } + + public static IEnumerable GetGoLangTarTestCaseNames() + { + foreach (string name in GoLangTestCaseNames) + { + yield return new object[] { name }; + } + } + + public static IEnumerable GetRsTarTestCaseNames() + { + foreach (string name in RsTarTestCaseNames) + { + yield return new object[] { name }; + } + } + + public static IEnumerable GetV7TestCaseNames() + { + foreach (string name in V7TestCaseNames) + { + yield return new object[] { name }; + } + } + + public static IEnumerable GetUstarTestCaseNames() + { + foreach (string name in UstarTestCaseNames.Concat(V7TestCaseNames)) + { + yield return new object[] { name }; + } + } + + public static IEnumerable GetPaxAndGnuTestCaseNames() + { + foreach (string name in UstarTestCaseNames.Concat(V7TestCaseNames).Concat(PaxAndGnuTestCaseNames)) + { + yield return new object[] { name }; + } + } } } From 6945d4e3d789acd637eca94d8941a5078971ea84 Mon Sep 17 00:00:00 2001 From: carlossanlop Date: Tue, 23 Aug 2022 11:39:26 -0700 Subject: [PATCH 09/10] Remove Compression changes, keep changes confined to Tar. --- .../src/System/IO/Archiving.Utils.Unix.cs | 20 ------- .../src/System/IO/Archiving.Utils.Windows.cs | 43 -------------- .../Common/src/System/IO/Archiving.Utils.cs | 45 +++++++++++++++ .../src/System/Formats/Tar/TarFile.cs | 4 +- .../src/System/Formats/Tar/TarHelpers.Unix.cs | 20 +++++++ .../System/Formats/Tar/TarHelpers.Windows.cs | 42 ++++++++++++++ .../System/IO/Compression/ZipFile.Create.cs | 57 ++++++++++++------- 7 files changed, 146 insertions(+), 85 deletions(-) diff --git a/src/libraries/Common/src/System/IO/Archiving.Utils.Unix.cs b/src/libraries/Common/src/System/IO/Archiving.Utils.Unix.cs index 18ab525f919b39..ca9b37759a31cf 100644 --- a/src/libraries/Common/src/System/IO/Archiving.Utils.Unix.cs +++ b/src/libraries/Common/src/System/IO/Archiving.Utils.Unix.cs @@ -6,25 +6,5 @@ namespace System.IO internal static partial class ArchivingUtils { internal static string SanitizeEntryFilePath(string entryPath) => entryPath.Replace('\0', '_'); - - public static unsafe string EntryFromPath(ReadOnlySpan path, bool appendPathSeparator = false) - { - // Remove leading separators. - int nonSlash = path.IndexOfAnyExcept('/'); - if (nonSlash == -1) - { - nonSlash = path.Length; - } - path = path.Slice(nonSlash); - - // Append a separator if necessary. - return (path.IsEmpty, appendPathSeparator) switch - { - (false, false) => path.ToString(), - (false, true) => string.Concat(path, "/"), - (true, false) => string.Empty, - (true, true) => "/", - }; - } } } diff --git a/src/libraries/Common/src/System/IO/Archiving.Utils.Windows.cs b/src/libraries/Common/src/System/IO/Archiving.Utils.Windows.cs index 412563966fc722..88f7ffb14f4d29 100644 --- a/src/libraries/Common/src/System/IO/Archiving.Utils.Windows.cs +++ b/src/libraries/Common/src/System/IO/Archiving.Utils.Windows.cs @@ -1,7 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System.Runtime.InteropServices; using System.Text; namespace System.IO @@ -43,47 +42,5 @@ internal static string SanitizeEntryFilePath(string entryPath) // There weren't any characters to sanitize. Just return the original string. return entryPath; } - - public static unsafe string EntryFromPath(ReadOnlySpan path, bool appendPathSeparator = false) - { - // Remove leading separators. - int nonSlash = path.IndexOfAnyExcept('/', '\\'); - if (nonSlash == -1) - { - nonSlash = path.Length; - } - path = path.Slice(nonSlash); - - // Replace \ with /, and append a separator if necessary. - - if (path.IsEmpty) - { - return appendPathSeparator ? - "/" : - string.Empty; - } - - fixed (char* pathPtr = &MemoryMarshal.GetReference(path)) - { - return string.Create(appendPathSeparator ? path.Length + 1 : path.Length, (appendPathSeparator, (IntPtr)pathPtr, path.Length), static (dest, state) => - { - ReadOnlySpan path = new ReadOnlySpan((char*)state.Item2, state.Length); - path.CopyTo(dest); - if (state.appendPathSeparator) - { - dest[^1] = '/'; - } - - // To ensure tar files remain compatible with Unix, and per the ZIP File Format Specification 4.4.17.1, - // all slashes should be forward slashes. - int pos; - while ((pos = dest.IndexOf('\\')) >= 0) - { - dest[pos] = '/'; - dest = dest.Slice(pos + 1); - } - }); - } - } } } diff --git a/src/libraries/Common/src/System/IO/Archiving.Utils.cs b/src/libraries/Common/src/System/IO/Archiving.Utils.cs index 23cc77a30c75f1..8335dbd26edbc3 100644 --- a/src/libraries/Common/src/System/IO/Archiving.Utils.cs +++ b/src/libraries/Common/src/System/IO/Archiving.Utils.cs @@ -9,6 +9,51 @@ namespace System.IO { internal static partial class ArchivingUtils { + // To ensure tar files remain compatible with Unix, + // and per the ZIP File Format Specification 4.4.17.1, + // all slashes should be forward slashes. + private const char PathSeparatorChar = '/'; + private const string PathSeparatorString = "/"; + + public static string EntryFromPath(string entry, int offset, int length, ref char[] buffer, bool appendPathSeparator = false) + { + Debug.Assert(length <= entry.Length - offset); + Debug.Assert(buffer != null); + + // Remove any leading slashes from the entry name: + while (length > 0) + { + if (entry[offset] != Path.DirectorySeparatorChar && + entry[offset] != Path.AltDirectorySeparatorChar) + break; + + offset++; + length--; + } + + if (length == 0) + return appendPathSeparator ? PathSeparatorString : string.Empty; + + int resultLength = appendPathSeparator ? length + 1 : length; + EnsureCapacity(ref buffer, resultLength); + entry.CopyTo(offset, buffer, 0, length); + + // '/' is a more broadly recognized directory separator on all platforms (eg: mac, linux) + // We don't use Path.DirectorySeparatorChar or AltDirectorySeparatorChar because this is + // explicitly trying to standardize to '/' + for (int i = 0; i < length; i++) + { + char ch = buffer[i]; + if (ch == Path.DirectorySeparatorChar || ch == Path.AltDirectorySeparatorChar) + buffer[i] = PathSeparatorChar; + } + + if (appendPathSeparator) + buffer[length] = PathSeparatorChar; + + return new string(buffer, 0, resultLength); + } + public static void EnsureCapacity(ref char[] buffer, int min) { Debug.Assert(buffer != null); diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs index 2e490747df80d0..8d8902f5806d59 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs @@ -380,12 +380,12 @@ private static string GetBasePathForCreateFromDirectory(DirectoryInfo di, bool i private static string GetEntryNameForFileSystemInfo(FileSystemInfo file, int basePathLength) { bool isDirectory = (file.Attributes & FileAttributes.Directory) != 0; - return ArchivingUtils.EntryFromPath(file.FullName.AsSpan(basePathLength), appendPathSeparator: isDirectory); + return TarHelpers.EntryFromPath(file.FullName.AsSpan(basePathLength), appendPathSeparator: isDirectory); } private static string GetEntryNameForBaseDirectory(string name) { - return ArchivingUtils.EntryFromPath(name, appendPathSeparator: true); + return TarHelpers.EntryFromPath(name, appendPathSeparator: true); } // Extracts an archive into the specified directory. diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Unix.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Unix.cs index 3e0a0b814484d4..613816904c9c4a 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Unix.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Unix.cs @@ -114,5 +114,25 @@ internal static void SetPendingModes(SortedDictionary? pen File.SetUnixFileMode(dir.Key, dir.Value & ~umask); } } + + internal static unsafe string EntryFromPath(ReadOnlySpan path, bool appendPathSeparator = false) + { + // Remove leading separators. + int nonSlash = path.IndexOfAnyExcept('/'); + if (nonSlash == -1) + { + nonSlash = path.Length; + } + path = path.Slice(nonSlash); + + // Append a separator if necessary. + return (path.IsEmpty, appendPathSeparator) switch + { + (false, false) => path.ToString(), + (false, true) => string.Concat(path, "/"), + (true, false) => string.Empty, + (true, true) => "/", + }; + } } } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Windows.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Windows.cs index 6569ff237dbf97..8354e11917ca25 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Windows.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Windows.cs @@ -18,5 +18,47 @@ internal static void CreateDirectory(string fullPath, UnixFileMode? mode, Sorted internal static void SetPendingModes(SortedDictionary? pendingModes) => Debug.Assert(pendingModes is null); + + internal static unsafe string EntryFromPath(ReadOnlySpan path, bool appendPathSeparator = false) + { + // Remove leading separators. + int nonSlash = path.IndexOfAnyExcept('/', '\\'); + if (nonSlash == -1) + { + nonSlash = path.Length; + } + path = path.Slice(nonSlash); + + // Replace \ with /, and append a separator if necessary. + + if (path.IsEmpty) + { + return appendPathSeparator ? + "/" : + string.Empty; + } + + fixed (char* pathPtr = &MemoryMarshal.GetReference(path)) + { + return string.Create(appendPathSeparator ? path.Length + 1 : path.Length, (appendPathSeparator, (IntPtr)pathPtr, path.Length), static (dest, state) => + { + ReadOnlySpan path = new ReadOnlySpan((char*)state.Item2, state.Length); + path.CopyTo(dest); + if (state.appendPathSeparator) + { + dest[^1] = '/'; + } + + // To ensure tar files remain compatible with Unix, and per the ZIP File Format Specification 4.4.17.1, + // all slashes should be forward slashes. + int pos; + while ((pos = dest.IndexOf('\\')) >= 0) + { + dest[pos] = '/'; + dest = dest.Slice(pos + 1); + } + }); + } + } } } diff --git a/src/libraries/System.IO.Compression.ZipFile/src/System/IO/Compression/ZipFile.Create.cs b/src/libraries/System.IO.Compression.ZipFile/src/System/IO/Compression/ZipFile.Create.cs index 70894d2b74f7ef..4370cd16878b17 100644 --- a/src/libraries/System.IO.Compression.ZipFile/src/System/IO/Compression/ZipFile.Create.cs +++ b/src/libraries/System.IO.Compression.ZipFile/src/System/IO/Compression/ZipFile.Create.cs @@ -375,32 +375,49 @@ private static void DoCreateFromDirectory(string sourceDirectoryName, string des if (includeBaseDirectory && di.Parent != null) basePath = di.Parent.FullName; - foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) - { - directoryIsEmpty = false; + // Windows' MaxPath (260) is used as an arbitrary default capacity, as it is likely + // to be greater than the length of typical entry names from the file system, even + // on non-Windows platforms. The capacity will be increased, if needed. + const int DefaultCapacity = 260; + char[] entryNameBuffer = ArrayPool.Shared.Rent(DefaultCapacity); - if (file is FileInfo) - { - // Create entry for file: - string entryName = ArchivingUtils.EntryFromPath(file.FullName.AsSpan(basePath.Length)); - ZipFileExtensions.DoCreateEntryFromFile(archive, file.FullName, entryName, compressionLevel); - } - else + try + { + foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) { - // Entry marking an empty dir: - if (file is DirectoryInfo possiblyEmpty && ArchivingUtils.IsDirEmpty(possiblyEmpty)) + directoryIsEmpty = false; + + int entryNameLength = file.FullName.Length - basePath.Length; + Debug.Assert(entryNameLength > 0); + + if (file is FileInfo) { - // FullName never returns a directory separator character on the end, - // but Zip archives require it to specify an explicit directory: - string entryName = ArchivingUtils.EntryFromPath(file.FullName.AsSpan(basePath.Length), appendPathSeparator: true); - archive.CreateEntry(entryName); + // Create entry for file: + string entryName = ArchivingUtils.EntryFromPath(file.FullName, basePath.Length, entryNameLength, ref entryNameBuffer); + ZipFileExtensions.DoCreateEntryFromFile(archive, file.FullName, entryName, compressionLevel); } - } + else + { + // Entry marking an empty dir: + if (file is DirectoryInfo possiblyEmpty && ArchivingUtils.IsDirEmpty(possiblyEmpty)) + { + // FullName never returns a directory separator character on the end, + // but Zip archives require it to specify an explicit directory: + string entryName = ArchivingUtils.EntryFromPath(file.FullName, basePath.Length, entryNameLength, ref entryNameBuffer, appendPathSeparator: true); + archive.CreateEntry(entryName); + } + } + } // foreach + + // If no entries create an empty root directory entry: + if (includeBaseDirectory && directoryIsEmpty) + archive.CreateEntry(ArchivingUtils.EntryFromPath(di.Name, 0, di.Name.Length, ref entryNameBuffer, appendPathSeparator: true)); + } + finally + { + ArrayPool.Shared.Return(entryNameBuffer); } - // If no entries create an empty root directory entry: - if (includeBaseDirectory && directoryIsEmpty) - archive.CreateEntry(ArchivingUtils.EntryFromPath(di.Name, appendPathSeparator: true)); } } } From 550c0ae125d1d38a515ebdd9d92366194f0147e2 Mon Sep 17 00:00:00 2001 From: carlossanlop Date: Tue, 23 Aug 2022 13:30:32 -0700 Subject: [PATCH 10/10] Fix build failure due to missing using in TarHelpers.Windows --- .../src/System/Formats/Tar/TarHelpers.Windows.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Windows.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Windows.cs index 8354e11917ca25..e1025b7bdece68 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Windows.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Windows.cs @@ -3,8 +3,8 @@ using System.Collections.Generic; using System.IO; -using System.Text; using System.Diagnostics; +using System.Runtime.InteropServices; namespace System.Formats.Tar {