From 9a97261653695c7cfdef2f79000f787b22b4402d Mon Sep 17 00:00:00 2001 From: Travis Staloch Date: Sun, 23 Apr 2023 16:16:47 -0700 Subject: [PATCH 1/6] std.tar: support pax headers and gnulong_{name,link} -- 4/20/23 -- tar.zig: * add HeaderIterator() type and convert pipeToFileSystem() to use it. * add initial support for options.executable_bit_only. * initial windows support: * skip symlinks which require admin rights * workaround file.updateTimes() panic by truncating file times * add tests parseNumeric and parsePaxTime ported from https://go.dev/src/archive/tar/strconv_test.go. lib/std/compress/tar/testdata/ * copy a subset of tar files from https://go.dev/src/archive/tar/testdata * gzip them all with -9. results in around 95% file size reduction reader_test.zig: * validate headers against files from testdata/. a port of https://go.dev/src/archive/tar/reader_test.go. test_decompress.zig: * runs tar.pipeToFileSystem() on valid testdata/ files. ------------- std.tar: cleanups and fixes -- 4/22/23 -- tar.zig: * convert V7Header, UstarHeader, StarHeader, GnuHeader to have fields of byte arrays instead of accessor methods. * Header.getFormat() - optimize by using 64 bit compares rather than mem.eql(). * gnu_long{name,link} and pax headers - allow them to be longer than 512 bytes by adding an allocator param to pipeToFileSystem(). * symlinks: cleanup logic and skip if wasi * remove custom toPosixPath() wasn't necessary. * workaround header.name corruption issue after makeOpenPath() call and add notes about it. * skip setDirProperties() entrely. leave as a TODO. src/Package.zig: pass gpa to pipeToFileSystem() tests: minor cosmetic changes ------------- std.tar: fix memory errors -- 4/25/23 -- tar.zig: * previously HeaderIterator.header()'s v7 param was by value, causing it to return invalid pointers to locals for various string fields. now the param along with a few others have been made *const. this solves the perceived 'stack corruption' i thought i was seeing. because v7 is 512 bytes, it wasn't clobbered until a fn w/ a larger stack frame, dir.makeOpenPath -> dir.openDir -> os.toPosixPath, was called. * symlinks - check for and set flags.is_directory param ------------- std.tar: hardlinks, more cleanup -- 4/25/23 -- tar.zig: * properly handle hardlinks by copying files. * improve symlink is_directory check - replace dir.access() call with dir.openFile() + handle error.FileNotFound. * isValidPax(): optimize: replace mem.eql()s with ComptimeStringMap. tests: * add testdata/hardlink.tar.gz ------------- std.tar: more cleanup -- 4/25/23 -- tests: add testdata/dir-symlink.tar ------------- --- lib/std/compress.zig | 1 + lib/std/compress/tar.zig | 4 + lib/std/compress/tar/reader_test.zig | 763 ++++++++++ lib/std/compress/tar/test_common.zig | 13 + lib/std/compress/tar/test_decompress.zig | 50 + .../compress/tar/testdata/dir-symlink.tar.gz | Bin 0 -> 164 bytes .../compress/tar/testdata/gnu-long-nul.tar.gz | Bin 0 -> 175 bytes .../tar/testdata/gnu-multi-hdrs.tar.gz | Bin 0 -> 208 bytes .../tar/testdata/gnu-nil-sparse-data.tar.gz | Bin 0 -> 125 bytes .../compress/tar/testdata/gnu-not-utf8.tar.gz | Bin 0 -> 109 bytes lib/std/compress/tar/testdata/gnu-utf8.tar.gz | Bin 0 -> 148 bytes lib/std/compress/tar/testdata/gnu.tar.gz | Bin 0 -> 163 bytes lib/std/compress/tar/testdata/hardlink.tar.gz | Bin 0 -> 164 bytes .../compress/tar/testdata/issue10968.tar.gz | Bin 0 -> 220 bytes .../compress/tar/testdata/issue11169.tar.gz | Bin 0 -> 119 bytes .../compress/tar/testdata/issue12435.tar.gz | Bin 0 -> 61 bytes lib/std/compress/tar/testdata/neg-size.tar.gz | Bin 0 -> 219 bytes lib/std/compress/tar/testdata/nil-uid.tar.gz | Bin 0 -> 134 bytes .../tar/testdata/pax-bad-hdr-file.tar.gz | Bin 0 -> 746 bytes .../tar/testdata/pax-bad-mtime-file.tar.gz | Bin 0 -> 750 bytes .../tar/testdata/pax-global-records.tar.gz | Bin 0 -> 290 bytes .../tar/testdata/pax-multi-hdrs.tar.gz | Bin 0 -> 218 bytes .../compress/tar/testdata/pax-nul-path.tar.gz | Bin 0 -> 148 bytes .../tar/testdata/pax-nul-xattrs.tar.gz | Bin 0 -> 163 bytes .../tar/testdata/pax-pos-size-file.tar.gz | Bin 0 -> 736 bytes .../compress/tar/testdata/pax-records.tar.gz | Bin 0 -> 186 bytes lib/std/compress/tar/testdata/pax.tar.gz | Bin 0 -> 412 bytes lib/std/compress/tar/testdata/star.tar.gz | Bin 0 -> 179 bytes .../tar/testdata/trailing-slash.tar.gz | Bin 0 -> 134 bytes .../tar/testdata/ustar-file-devs.tar.gz | Bin 0 -> 93 bytes lib/std/compress/tar/testdata/v7.tar.gz | Bin 0 -> 551 bytes lib/std/compress/tar/testdata/xattrs.tar.gz | Bin 0 -> 376 bytes lib/std/tar.zig | 1282 +++++++++++++++-- src/Package.zig | 2 +- 34 files changed, 1973 insertions(+), 142 deletions(-) create mode 100644 lib/std/compress/tar.zig create mode 100644 lib/std/compress/tar/reader_test.zig create mode 100644 lib/std/compress/tar/test_common.zig create mode 100644 lib/std/compress/tar/test_decompress.zig create mode 100644 lib/std/compress/tar/testdata/dir-symlink.tar.gz create mode 100644 lib/std/compress/tar/testdata/gnu-long-nul.tar.gz create mode 100644 lib/std/compress/tar/testdata/gnu-multi-hdrs.tar.gz create mode 100644 lib/std/compress/tar/testdata/gnu-nil-sparse-data.tar.gz create mode 100644 lib/std/compress/tar/testdata/gnu-not-utf8.tar.gz create mode 100644 lib/std/compress/tar/testdata/gnu-utf8.tar.gz create mode 100644 lib/std/compress/tar/testdata/gnu.tar.gz create mode 100644 lib/std/compress/tar/testdata/hardlink.tar.gz create mode 100644 lib/std/compress/tar/testdata/issue10968.tar.gz create mode 100644 lib/std/compress/tar/testdata/issue11169.tar.gz create mode 100644 lib/std/compress/tar/testdata/issue12435.tar.gz create mode 100644 lib/std/compress/tar/testdata/neg-size.tar.gz create mode 100644 lib/std/compress/tar/testdata/nil-uid.tar.gz create mode 100644 lib/std/compress/tar/testdata/pax-bad-hdr-file.tar.gz create mode 100644 lib/std/compress/tar/testdata/pax-bad-mtime-file.tar.gz create mode 100644 lib/std/compress/tar/testdata/pax-global-records.tar.gz create mode 100644 lib/std/compress/tar/testdata/pax-multi-hdrs.tar.gz create mode 100644 lib/std/compress/tar/testdata/pax-nul-path.tar.gz create mode 100644 lib/std/compress/tar/testdata/pax-nul-xattrs.tar.gz create mode 100644 lib/std/compress/tar/testdata/pax-pos-size-file.tar.gz create mode 100644 lib/std/compress/tar/testdata/pax-records.tar.gz create mode 100644 lib/std/compress/tar/testdata/pax.tar.gz create mode 100644 lib/std/compress/tar/testdata/star.tar.gz create mode 100644 lib/std/compress/tar/testdata/trailing-slash.tar.gz create mode 100644 lib/std/compress/tar/testdata/ustar-file-devs.tar.gz create mode 100644 lib/std/compress/tar/testdata/v7.tar.gz create mode 100644 lib/std/compress/tar/testdata/xattrs.tar.gz diff --git a/lib/std/compress.zig b/lib/std/compress.zig index 7e81d9deba26..0a4cffd65bbf 100644 --- a/lib/std/compress.zig +++ b/lib/std/compress.zig @@ -46,4 +46,5 @@ test { _ = xz; _ = zlib; _ = zstd; + _ = @import("compress/tar.zig"); } diff --git a/lib/std/compress/tar.zig b/lib/std/compress/tar.zig new file mode 100644 index 000000000000..8baa9e589805 --- /dev/null +++ b/lib/std/compress/tar.zig @@ -0,0 +1,4 @@ +test { + _ = @import("tar/reader_test.zig"); + _ = @import("tar/test_decompress.zig"); +} diff --git a/lib/std/compress/tar/reader_test.zig b/lib/std/compress/tar/reader_test.zig new file mode 100644 index 000000000000..27d14cef9505 --- /dev/null +++ b/lib/std/compress/tar/reader_test.zig @@ -0,0 +1,763 @@ +//! +//! this is a port of https://go.dev/src/archive/tar/reader_test.go +//! +const std = @import("std"); +const testing = std.testing; +const mem = std.mem; +const tar = std.tar; +const FormatSet = tar.FormatSet; +const unixTime = tar.unixTime; +const FileType = tar.FileType; +const Header = tar.Header; +const builtin = @import("builtin"); +const test_common = @import("test_common.zig"); + +const str_long_x10 = "long" ** 10; +const one_to_nine_slash_x30 = "123456789/" ** 30; +const talloc = std.testing.allocator; + +const TestCase = struct { + file: []const u8, // Test input file + headers: []const Header = &.{}, // Expected headers + chksums: []const []const u8 = &.{}, // MD5 checksum of files, empty if not checked + err: ?anyerror = null, // Expected error to occur + // TODO remove this field when no more test cases are skipped + skip: bool = false, // Wether to skip test case +}; + +test "std.tar validate testdata headers" { + // skip due to 'incorrect alignment', maybe the same as + // https://github.com/ziglang/zig/issues/14036 + if (builtin.os.tag == .windows and builtin.mode == .Debug) + return error.SkipZigTest; + + const test_cases = comptime [_]TestCase{ + .{ + .file = "gnu.tar", + .headers = &.{ .{ + .name = "small.txt", + .mode = 0o640, + .uid = 73025, + .gid = 5000, + .size = 5, + .mtime = unixTime(1244428340, 0), + .type = .normal, + .uname = "dsymonds", + .gname = "eng", + .fmt = FormatSet.initOne(.gnu), + }, .{ + .name = "small2.txt", + .mode = 0o640, + .uid = 73025, + .gid = 5000, + .size = 11, + .mtime = unixTime(1244436044, 0), + .type = .normal, + .uname = "dsymonds", + .gname = "eng", + .fmt = FormatSet.initOne(.gnu), + } }, + .chksums = &.{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", + }, + }, + .{ + .skip = true, + .file = "sparse-formats.tar", + .headers = &.{ .{ + .name = "sparse-gnu", + .mode = 420, + .uid = 1000, + .gid = 1000, + .size = 200, + .mtime = unixTime(1392395740, 0), + .type = @intToEnum(FileType, 0x53), + .linkname = "", + .uname = "david", + .gname = "david", + .dev_major = 0, + .dev_minor = 0, + .fmt = FormatSet.initOne(.gnu), + }, .{ + .name = "sparse-posix-0.0", + .mode = 420, + .uid = 1000, + .gid = 1000, + .size = 200, + .mtime = unixTime(1392342187, 0), + .type = @intToEnum(FileType, 0x30), + .linkname = "", + .uname = "david", + .gname = "david", + .dev_major = 0, + .dev_minor = 0, + .pax_recs = &.{ + "GNU.sparse.size", "200", + "GNU.sparse.numblocks", "95", + "GNU.sparse.map", "1,1,3,1,5,1,7,1,9,1,11,1,13,1,15,1,17,1,19,1,21,1,23,1,25,1,27,1,29,1,31,1,33,1,35,1,37,1,39,1,41,1,43,1,45,1,47,1,49,1,51,1,53,1,55,1,57,1,59,1,61,1,63,1,65,1,67,1,69,1,71,1,73,1,75,1,77,1,79,1,81,1,83,1,85,1,87,1,89,1,91,1,93,1,95,1,97,1,99,1,101,1,103,1,105,1,107,1,109,1,111,1,113,1,115,1,117,1,119,1,121,1,123,1,125,1,127,1,129,1,131,1,133,1,135,1,137,1,139,1,141,1,143,1,145,1,147,1,149,1,151,1,153,1,155,1,157,1,159,1,161,1,163,1,165,1,167,1,169,1,171,1,173,1,175,1,177,1,179,1,181,1,183,1,185,1,187,1,189,1", + }, + .fmt = FormatSet.initOne(.pax), + }, .{ + .name = "sparse-posix-0.1", + .mode = 420, + .uid = 1000, + .gid = 1000, + .size = 200, + .mtime = unixTime(1392340456, 0), + .type = @intToEnum(FileType, 0x30), + .linkname = "", + .uname = "david", + .gname = "david", + .dev_major = 0, + .dev_minor = 0, + .pax_recs = &.{ + "GNU.sparse.size", "200", + "GNU.sparse.numblocks", "95", + "GNU.sparse.map", "1,1,3,1,5,1,7,1,9,1,11,1,13,1,15,1,17,1,19,1,21,1,23,1,25,1,27,1,29,1,31,1,33,1,35,1,37,1,39,1,41,1,43,1,45,1,47,1,49,1,51,1,53,1,55,1,57,1,59,1,61,1,63,1,65,1,67,1,69,1,71,1,73,1,75,1,77,1,79,1,81,1,83,1,85,1,87,1,89,1,91,1,93,1,95,1,97,1,99,1,101,1,103,1,105,1,107,1,109,1,111,1,113,1,115,1,117,1,119,1,121,1,123,1,125,1,127,1,129,1,131,1,133,1,135,1,137,1,139,1,141,1,143,1,145,1,147,1,149,1,151,1,153,1,155,1,157,1,159,1,161,1,163,1,165,1,167,1,169,1,171,1,173,1,175,1,177,1,179,1,181,1,183,1,185,1,187,1,189,1", + "GNU.sparse.name", "sparse-posix-0.1", + }, + .fmt = FormatSet.initOne(.pax), + }, .{ + .name = "sparse-posix-1.0", + .mode = 420, + .uid = 1000, + .gid = 1000, + .size = 200, + .mtime = unixTime(1392337404, 0), + .type = @intToEnum(FileType, 0x30), + .linkname = "", + .uname = "david", + .gname = "david", + .dev_major = 0, + .dev_minor = 0, + .pax_recs = &.{ + "GNU.sparse.major", "1", + "GNU.sparse.minor", "0", + "GNU.sparse.realsize", "200", + "GNU.sparse.name", "sparse-posix-1.0", + }, + .fmt = FormatSet.initOne(.pax), + }, .{ + .name = "end", + .mode = 420, + .uid = 1000, + .gid = 1000, + .size = 4, + .mtime = unixTime(1392398319, 0), + .type = @intToEnum(FileType, 0x30), + .linkname = "", + .uname = "david", + .gname = "david", + .dev_major = 0, + .dev_minor = 0, + .fmt = FormatSet.initOne(.gnu), + } }, + .chksums = &.{ + "6f53234398c2449fe67c1812d993012f", + "6f53234398c2449fe67c1812d993012f", + "6f53234398c2449fe67c1812d993012f", + "6f53234398c2449fe67c1812d993012f", + "b0061974914468de549a2af8ced10316", + }, + }, + .{ + .file = "star.tar", + .headers = &.{ .{ + .name = "small.txt", + .mode = 0o640, + .uid = 73025, + .gid = 5000, + .size = 5, + .mtime = unixTime(1244592783, 0), + .type = .normal, + .uname = "dsymonds", + .gname = "eng", + .atime = unixTime(1244592783, 0), + .ctime = unixTime(1244592783, 0), + }, .{ + .name = "small2.txt", + .mode = 0o640, + .uid = 73025, + .gid = 5000, + .size = 11, + .mtime = unixTime(1244592783, 0), + .type = .normal, + .uname = "dsymonds", + .gname = "eng", + .atime = unixTime(1244592783, 0), + .ctime = unixTime(1244592783, 0), + } }, + }, + .{ + .file = "v7.tar", + .headers = &.{ .{ + .name = "small.txt", + .mode = 0o444, + .uid = 73025, + .gid = 5000, + .size = 5, + .mtime = unixTime(1244593104, 0), + .type = .normal, + }, .{ + .name = "small2.txt", + .mode = 0o444, + .uid = 73025, + .gid = 5000, + .size = 11, + .mtime = unixTime(1244593104, 0), + .type = .normal, + } }, + }, + .{ + .file = "pax.tar", + .headers = &.{ .{ + .name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + .mode = 0o664, + .uid = 1000, + .gid = 1000, + .uname = "shane", + .gname = "shane", + .size = 7, + .mtime = unixTime(1350244992, 23960108), + .ctime = unixTime(1350244992, 23960108), + .atime = unixTime(1350244992, 23960108), + .type = .normal, + .pax_recs = &.{ + "path", "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + "mtime", "1350244992.023960108", + "atime", "1350244992.023960108", + "ctime", "1350244992.023960108", + }, + .fmt = FormatSet.initOne(.pax), + }, .{ + .name = "a/b", + .mode = 0o777, + .uid = 1000, + .gid = 1000, + .uname = "shane", + .gname = "shane", + .size = 0, + .mtime = unixTime(1350266320, 910238425), + .ctime = unixTime(1350266320, 910238425), + .atime = unixTime(1350266320, 910238425), + .type = .symbolic_link, + .linkname = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + .pax_recs = &.{ + "linkpath", "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + "mtime", "1350266320.910238425", + "atime", "1350266320.910238425", + "ctime", "1350266320.910238425", + }, + .fmt = FormatSet.initOne(.pax), + } }, + }, + .{ + .file = "pax-bad-hdr-file.tar", + .err = error.InvalidCharacter, + }, + .{ + .file = "pax-bad-mtime-file.tar", + .err = error.InvalidCharacter, + }, + .{ + .file = "pax-pos-size-file.tar", + .headers = &.{.{ + .name = "foo", + .mode = 0o640, + .uid = 319973, + .gid = 5000, + .size = 999, + .mtime = unixTime(1442282516, 0), + .type = .normal, + .uname = "joetsai", + .gname = "eng", + .pax_recs = &.{ + "size", "000000000000000000000999", + }, + .fmt = FormatSet.initOne(.pax), + }}, + .chksums = &.{ + "0afb597b283fe61b5d4879669a350556", + }, + }, + .{ + .file = "pax-records.tar", + .headers = &.{.{ + .type = .normal, + .name = "file", + .uname = str_long_x10, + .mtime = unixTime(0, 0), + .pax_recs = &.{ + "GOLANG.pkg", "tar", + "comment", "Hello, 世界", + "uname", str_long_x10, + }, + .fmt = FormatSet.initOne(.pax), + }}, + }, + .{ + .file = "pax-global-records.tar", + .headers = &.{ .{ + .type = .global_extended_header, + .name = "global1", + .pax_recs = &.{ "path", "global1", "mtime", "1500000000.0" }, + .fmt = FormatSet.initOne(.pax), + }, .{ + .type = .normal, + .name = "file1", + .mtime = unixTime(0, 0), + .fmt = FormatSet.initOne(.ustar), + }, .{ + .type = .normal, + .name = "file2", + .pax_recs = &.{ "path", "file2" }, + .mtime = unixTime(0, 0), + .fmt = FormatSet.initOne(.pax), + }, .{ + .type = .global_extended_header, + .name = "GlobalHead.0.0", + .pax_recs = &.{ "path", "" }, + .fmt = FormatSet.initOne(.pax), + }, .{ + .type = .normal, + .name = "file3", + .mtime = unixTime(0, 0), + .fmt = FormatSet.initOne(.ustar), + }, .{ + .type = .normal, + .name = "file4", + .mtime = unixTime(1400000000, 0), + .pax_recs = &.{ "mtime", "1400000000" }, + .fmt = FormatSet.initOne(.pax), + } }, + }, + .{ + .file = "nil-uid.tar", // golang.org/issue/5290 + .headers = &.{.{ + .name = "P1050238.JPG.log", + .mode = 0o664, + .uid = 0, + .gid = 0, + .size = 14, + .mtime = unixTime(1365454838, 0), + .type = .normal, + .linkname = "", + .uname = "eyefi", + .gname = "eyefi", + .dev_major = 0, + .dev_minor = 0, + .fmt = FormatSet.initOne(.gnu), + }}, + }, + .{ + .file = "xattrs.tar", + .headers = &.{ + .{ + .name = "small.txt", + .mode = 0o644, + .uid = 1000, + .gid = 10, + .size = 5, + .mtime = unixTime(1386065770, 448252320), + .type = .normal, + .uname = "alex", + .gname = "wheel", + .atime = unixTime(1389782991, 419875220), + .ctime = unixTime(1389782956, 794414986), + .pax_recs = &.{ + "user.key", "value", + "user.key2", "value2", + "security.selinux", ".unconfined_u=.object_r=.default_t=s0\x00", + // Interestingly, selinux encodes the terminating null inside the xattr + "mtime", "1386065770.44825232", + "atime", "1389782991.41987522", + "ctime", "1389782956.794414986", + "SCHILY.xattr.user.key", "value", + "SCHILY.xattr.user.key2", "value2", + "SCHILY.xattr.security.selinux", ".unconfined_u=.object_r=.default_t=s0\x00", + }, + .fmt = FormatSet.initOne(.pax), + }, + .{ + .name = "small2.txt", + .mode = 0o644, + .uid = 1000, + .gid = 10, + .size = 11, + .mtime = unixTime(1386065770, 449252304), + .type = .normal, + .uname = "alex", + .gname = "wheel", + .atime = unixTime(1389782991, 419875220), + .ctime = unixTime(1386065770, 449252304), + .pax_recs = &.{ + "security.selinux", ".unconfined_u=.object_r=.default_t=s0\x00", + "mtime", "1386065770.449252304", + "atime", "1389782991.41987522", + "ctime", "1386065770.449252304", + "SCHILY.xattr.security.selinux", ".unconfined_u=.object_r=.default_t=s0\x00", + }, + .fmt = FormatSet.initOne(.pax), + }, + }, + }, + .{ + // Matches the behavior of GNU, BSD, and STAR tar utilities. + .file = "gnu-multi-hdrs.tar", + .headers = &.{.{ + .name = "GNU2/GNU2/long-path-name", + .linkname = "GNU4/GNU4/long-linkpath-name", + .mtime = unixTime(0, 0), + .type = .symbolic_link, + .fmt = FormatSet.initOne(.gnu), + }}, + }, + .{ + .skip = true, + // GNU tar file with atime and ctime fields set. + // Created with the GNU tar v1.27.1. + // tar --incremental -S -cvf gnu-incremental.tar test2 + .file = "gnu-incremental.tar", + .headers = &.{ .{ + .name = "test2/", + .mode = 16877, + .uid = 1000, + .gid = 1000, + .size = 14, + .mtime = unixTime(1441973427, 0), + .type = @intToEnum(FileType, 'D'), + .uname = "rawr", + .gname = "dsnet", + .atime = unixTime(1441974501, 0), + .ctime = unixTime(1441973436, 0), + .fmt = FormatSet.initOne(.gnu), + }, .{ + .name = "test2/foo", + .mode = 33188, + .uid = 1000, + .gid = 1000, + .size = 64, + .mtime = unixTime(1441973363, 0), + .type = .normal, + .uname = "rawr", + .gname = "dsnet", + .atime = unixTime(1441974501, 0), + .ctime = unixTime(1441973436, 0), + .fmt = FormatSet.initOne(.gnu), + }, .{ + .name = "test2/sparse", + .mode = 33188, + .uid = 1000, + .gid = 1000, + .size = 536870912, + .mtime = unixTime(1441973427, 0), + .type = @intToEnum(FileType, 'S'), + .uname = "rawr", + .gname = "dsnet", + .atime = unixTime(1441991948, 0), + .ctime = unixTime(1441973436, 0), + .fmt = FormatSet.initOne(.gnu), + } }, + }, + .{ + // Matches the behavior of GNU and BSD tar utilities. + .file = "pax-multi-hdrs.tar", + .headers = &.{.{ + .name = "bar", + .linkname = "PAX4/PAX4/long-linkpath-name", + .mtime = unixTime(0, 0), + .type = @intToEnum(tar.FileType, '2'), + .pax_recs = &.{ + "linkpath", "PAX4/PAX4/long-linkpath-name", + }, + .fmt = FormatSet.initOne(.pax), + }}, + }, + .{ + // Both BSD and GNU tar truncate long names at first NUL even + // if there is data following that NUL character. + // This is reasonable as GNU long names are C-strings. + .file = "gnu-long-nul.tar", + .headers = &.{.{ + .name = "0123456789", + .mode = 0o644, + .uid = 1000, + .gid = 1000, + .mtime = unixTime(1486082191, 0), + .type = .normal, + .uname = "rawr", + .gname = "dsnet", + .fmt = FormatSet.initOne(.gnu), + }}, + }, + .{ + // This archive was generated by Writer but is readable by both + // GNU and BSD tar utilities. + // The archive generated by GNU is nearly byte-for-byte identical + // to the Go version except the Go version sets a negative dev_minor + // just to force the GNU format. + .file = "gnu-utf8.tar", + .headers = &.{.{ + .name = "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", + .mode = 0o644, + .uid = 1000, + .gid = 1000, + .mtime = unixTime(0, 0), + .type = .normal, + .uname = "☺", + .gname = "⚹", + .fmt = FormatSet.initOne(.gnu), + }}, + }, + .{ + // This archive was generated by Writer but is readable by both + // GNU and BSD tar utilities. + // The archive generated by GNU is nearly byte-for-byte identical + // to the Go version except the Go version sets a negative dev_minor + // just to force the GNU format. + .file = "gnu-not-utf8.tar", + .headers = &.{.{ + .name = "hi\x80\x81\x82\x83bye", + .mode = 0o644, + .uid = 1000, + .gid = 1000, + .mtime = unixTime(0, 0), + .type = .normal, + .uname = "rawr", + .gname = "dsnet", + .fmt = FormatSet.initOne(.gnu), + }}, + }, + .{ + // BSD tar v3.1.2 and GNU tar v1.27.1 both rejects PAX records + // with NULs in the key. + .file = "pax-nul-xattrs.tar", + .err = error.Header, + }, + .{ + // BSD tar v3.1.2 rejects a PAX path with NUL in the value, while + // GNU tar v1.27.1 simply truncates at first NUL. + // We emulate the behavior of BSD since it is strange doing NUL + // truncations since PAX records are length-prefix strings instead + // of NUL-terminated C-strings. + .file = "pax-nul-path.tar", + .err = error.Header, + }, + .{ + .file = "neg-size.tar", + .err = error.Overflow, + }, + .{ + .file = "issue10968.tar", + .err = error.InvalidCharacter, + }, + .{ + .file = "issue11169.tar", + .err = error.UnexpectedEndOfStream, + }, + .{ + .file = "issue12435.tar", + .err = error.Overflow, + }, + .{ + .skip = true, + // Ensure that we can read back the original Header as written with + // a buggy pre-Go1.8 tar.Writer. + .file = "invalid-go17.tar", + .headers = &.{.{ + .name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo", + .uid = 0o10000000, + .mtime = unixTime(0, 0), + .type = .normal, + }}, + }, + .{ + // USTAR archive with a regular entry with non-zero device numbers. + .file = "ustar-file-devs.tar", + .headers = &.{.{ + .name = "file", + .mode = 0o644, + .type = .normal, + .mtime = unixTime(0, 0), + .dev_major = 1, + .dev_minor = 1, + .fmt = FormatSet.initOne(.ustar), + }}, + }, + .{ + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + .file = "gnu-nil-sparse-data.tar", + .headers = &.{.{ + .name = "sparse.db", + .type = .gnu_sparse, + .size = 1000, + .mtime = unixTime(0, 0), + .fmt = FormatSet.initOne(.gnu), + }}, + }, + .{ + .skip = true, + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + .file = "gnu-nil-sparse-hole.tar", + .headers = &.{.{ + .name = "sparse.db", + .type = .gnu_sparse, + .size = 1000, + .mtime = unixTime(0, 0), + .fmt = FormatSet.initOne(.gnu), + }}, + }, + .{ + .skip = true, + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + .file = "pax-nil-sparse-data.tar", + .headers = &.{.{ + .name = "sparse.db", + .type = .normal, + .size = 1000, + .mtime = unixTime(0, 0), + .pax_recs = &.{ + "size", "1512", + "GNU.sparse.major", "1", + "GNU.sparse.minor", "0", + "GNU.sparse.realsize", "1000", + "GNU.sparse.name", "sparse.db", + }, + .fmt = FormatSet.initOne(.pax), + }}, + }, + .{ + .skip = true, + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + .file = "pax-nil-sparse-hole.tar", + .headers = &.{.{ + .name = "sparse.db", + .type = .normal, + .size = 1000, + .mtime = unixTime(0, 0), + .pax_recs = &.{ + "size", "512", + "GNU.sparse.major", "1", + "GNU.sparse.minor", "0", + "GNU.sparse.realsize", "1000", + "GNU.sparse.name", "sparse.db", + }, + .fmt = FormatSet.initOne(.pax), + }}, + }, + .{ + .file = "trailing-slash.tar", + .headers = &.{.{ + .type = .directory, + .name = one_to_nine_slash_x30, + .mtime = unixTime(0, 0), + .pax_recs = &.{ "path", one_to_nine_slash_x30 }, + .fmt = FormatSet.initOne(.pax), + }}, + }, + }; + + var headers_tested: usize = 0; + var cases_skipped: usize = 0; + var errors: usize = 0; + + inline for (test_cases) |test_case| { + if (test_case.skip) { + cases_skipped += 1; + continue; + } + std.log.info("\n--- test_case.file={s} ---", .{test_case.file}); + var fbs = try test_common.decompressGz("testdata/" ++ test_case.file ++ ".gz", talloc); + defer talloc.free(fbs.buffer); + const reader = fbs.reader(); + var buf: [tar.block_len]u8 = undefined; + var iter = tar.headerIterator(reader, &buf, talloc); + defer iter.deinit(); + for (test_case.headers, 0..) |header_, i| { + // since we don't maintain a hash map of pax records, merge pax_recs + // into this header record + var expected = header_; + var j: usize = 0; + while (j < expected.pax_recs.len) : (j += 2) { + const k = expected.pax_recs[j]; + const val = expected.pax_recs[j + 1]; + try tar.mergePax(.{ k, val }, &expected); + } + std.log.debug("expected: {}", .{expected}); + const mhdr = try iter.next(); + try testing.expect(mhdr != null); + const actual = mhdr.?; + std.log.debug("actual : {}", .{actual}); + + // test Header fields by their type. + // can't use testing.expectEqualDeep() because pax_recs field won't match. + inline for (std.meta.fields(Header)) |fd| { + _ = switch (fd.type) { + []const u8 => testing.expectEqualStrings(@field(expected, fd.name), @field(actual, fd.name)), + []const []const u8 => testing.expect(true), // dummy so types match + i64, + i32, + i128, + => if (@field(expected, fd.name) != -1) + testing.expectEqual(@field(expected, fd.name), @field(actual, fd.name)) + else + testing.expect(true), + FileType, + std.time.Instant, + FormatSet, + => testing.expectEqual(@field(expected, fd.name), @field(actual, fd.name)), + else => return @compileLog(comptime std.fmt.comptimePrint("todo {s}", .{@typeName(fd.type)})), + } catch |e| { + std.log.err("field '{s}' not equal", .{fd.name}); + return e; + }; + } + + if (actual.size == -1) continue; + const block_size = std.mem.alignForwardGeneric(usize, @intCast(usize, actual.size), 512); + // validate checksums if exist or skip over file contents + if (test_case.chksums.len > i) { + var h = std.crypto.hash.Md5.init(.{}); + const content = try talloc.alloc(u8, block_size); + defer talloc.free(content); + _ = try reader.read(content); + h.update(content[0..@intCast(usize, actual.size)]); + var hbuf: [16]u8 = undefined; + h.final(&hbuf); + const hex = std.fmt.bytesToHex(hbuf, .lower); + try testing.expectEqualStrings(test_case.chksums[i], &hex); + } else { // skip over file contents + switch (actual.type) { + .normal, .normal2 => try reader.skipBytes(block_size, .{}), + else => {}, + } + } + } + + // check any remaining headers for errors + var merr: ?anyerror = null; + while (true) { + const next = iter.next() catch |e| { + merr = e; + break; + }; + if (next == null) break; + } + + if (test_case.err) |e| { + if (e != merr) { + errors += 1; + std.log.err("errors don't match. expecting {!} found {?!}", .{ e, merr }); + } + try testing.expect(merr != null); + try testing.expectEqual(e, merr.?); + } + + headers_tested += test_case.headers.len; + } + std.log.info( + "test Reader: tar test cases: {} total, {} passed, {} errored, {} skipped, {} total headers checked.", + .{ test_cases.len, test_cases.len - cases_skipped - errors, errors, cases_skipped, headers_tested }, + ); +} diff --git a/lib/std/compress/tar/test_common.zig b/lib/std/compress/tar/test_common.zig new file mode 100644 index 000000000000..5a5abd412db4 --- /dev/null +++ b/lib/std/compress/tar/test_common.zig @@ -0,0 +1,13 @@ +const std = @import("std"); +/// testing helper for decompressing a .gz file. returns an io.fixedBufferStream +/// with the decompressed data. caller owns the returned FixedBufferStream.buffer +pub fn decompressGz( + comptime file_name: []const u8, + alloc: std.mem.Allocator, +) !std.io.FixedBufferStream([]u8) { + var fbs = std.io.fixedBufferStream(@embedFile(file_name)); + var decompressor = try std.compress.gzip.decompress(alloc, fbs.reader()); + defer decompressor.deinit(); + const decompressed = try decompressor.reader().readAllAlloc(alloc, 1024 * 32); + return std.io.fixedBufferStream(decompressed); +} diff --git a/lib/std/compress/tar/test_decompress.zig b/lib/std/compress/tar/test_decompress.zig new file mode 100644 index 000000000000..b1c862fed814 --- /dev/null +++ b/lib/std/compress/tar/test_decompress.zig @@ -0,0 +1,50 @@ +const std = @import("std"); +const mem = std.mem; +const tar = std.tar; +const testing = std.testing; +const talloc = testing.allocator; +const builtin = @import("builtin"); +const test_common = @import("test_common.zig"); + +test "std.tar decompress testdata" { + // skip due to 'incorrect alignment', maybe the same as + // https://github.com/ziglang/zig/issues/14036 + if (builtin.os.tag == .windows and builtin.mode == .Debug) + return error.SkipZigTest; + + const test_files = [_][]const u8{ + "xattrs.tar", + "gnu-long-nul.tar", + "v7.tar", + "pax-bad-hdr-file.tar", + "pax-global-records.tar", + "star.tar", + "pax-multi-hdrs.tar", + "gnu.tar", + "gnu-utf8.tar", + "trailing-slash.tar", + "pax.tar", + "nil-uid.tar", + "ustar-file-devs.tar", + "pax-pos-size-file.tar", + "hardlink.tar", + "pax-records.tar", + "gnu-multi-hdrs.tar", + "hardlink.tar", + "dir-symlink.tar", + }; + + inline for (test_files) |test_file| { + var fbs = try test_common.decompressGz("testdata/" ++ test_file ++ ".gz", talloc); + defer talloc.free(fbs.buffer); + try testDecompressTarToTmp(&fbs, .{ .mode_mode = .ignore }); + fbs.reset(); + try testDecompressTarToTmp(&fbs, .{ .mode_mode = .executable_bit_only }); + } +} + +fn testDecompressTarToTmp(fbs: *std.io.FixedBufferStream([]u8), options: tar.Options) !void { + var tmpdir = testing.tmpDir(.{}); + defer tmpdir.cleanup(); + try tar.pipeToFileSystem(talloc, tmpdir.dir, fbs.reader(), options); +} diff --git a/lib/std/compress/tar/testdata/dir-symlink.tar.gz b/lib/std/compress/tar/testdata/dir-symlink.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..72db21622fc3339f9f95bf7e4a3c94cd7ba218a3 GIT binary patch literal 164 zcmb2|=3oE==C@aTxehA`v?caGVs$ze__~tmu1CG$w&&|LMK*CNa*MZqzj@{mBfI^v zs_MRF#+gAq-ldwScKwqOZ(22dvUvIt%ieo8Zd7kLn!PS~)^)!5SKv61M~Pz LDs|!+G#D5F4yQ|a literal 0 HcmV?d00001 diff --git a/lib/std/compress/tar/testdata/gnu-long-nul.tar.gz b/lib/std/compress/tar/testdata/gnu-long-nul.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b8be3c6f230674f7013e4e1583172e7eb8eb7a8 GIT binary patch literal 175 zcmV;g08sxQiwFpk`9Wj?17~h^Eo^UYXDx1ZY%X+RascgGdHbqI)|OpPeYTmIL#H~V*e*PmSQle_vGYy)PD$fB7~MHVW4$%#xB z<)8xhIgL>%h#{o4y>IiKg2aPpV-H6t4u8l&qtg91bg%xC)%dQZ^E9AH4w)<05AU^iwFoY??Ge&17~h^Ep2sdbZISUWO8#ZbYXG;?bJ&S!XOZU;W-KyC~tA= z&V|IC2f)Nvti>SSUos1mny9N14f8Js(l%kkS1!0%_&6N=X*k0)$&#}c*V5}MjdIUD zK3B>~1I$B(?zbEgV~}FZpT7I{{!d%3H%j2>%O&WpAs@RTTy{V;5hwqx`_}>o`6vHB z?uIz(M|7Y#Y+#XS{JgHb2_W<*fME`xvYFZ?p%A K=r&yd5&!^IzGN=| literal 0 HcmV?d00001 diff --git a/lib/std/compress/tar/testdata/gnu-nil-sparse-data.tar.gz b/lib/std/compress/tar/testdata/gnu-nil-sparse-data.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..be75c8a29e321390116e5649eb035e230b4b567d GIT binary patch literal 125 zcmV-@0D}J?iwFp2mO^9#17~h^EpBOSEpu>Ta&u)ZWMOn+E_7jX04pv?EGkabOG#p& z9Wa0ZT(qINDVz-vH#9LcG-gl;W}pKoEiOqcQc$3}CrQG=qhJ(_0yF>s9R^K{015yAEs!Z9 literal 0 HcmV?d00001 diff --git a/lib/std/compress/tar/testdata/gnu-not-utf8.tar.gz b/lib/std/compress/tar/testdata/gnu-not-utf8.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..002777c1e011089b154bafc0e40ff6be5ce7c46a GIT binary patch literal 109 zcmV-z0FwV7iwFoY??Ge&17~h^EpBggEp>EeI4*QyasbQ7Y-nt1PO400pdBy(0y7g6 z5N&8~YJg0`04m$i*bpeKV8B2JP+DA)Sfrr9P?T6+gil>cab9W(0Z$@%gkcnnf>AIE P1~~u#^Ds$J00sa6HX|gl literal 0 HcmV?d00001 diff --git a/lib/std/compress/tar/testdata/gnu-utf8.tar.gz b/lib/std/compress/tar/testdata/gnu-utf8.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3cd0c864c5aa9a87afbc2344b7dc5cf611e3db58 GIT binary patch literal 148 zcmV;F0BipriwFoY??Ge&17~h^Ep>EeI4*QyasboQ*VA|K$?z zk%=*!4G}jtH#K2U@L`|>C@n5YEK*Ql7@^?Nj9rgr?0z(3=b&XUjDjH^|Da55W?}-$ zy)|blnjlZ7{K!S;nc_2mS=(epa>2DEd`GxZ=yi>nO7c&EOmHsvIr=585x8F zJh=3CV_@W0%InDA_NH})KXhASVNhqqTmN?758!A2)YblJJGFoBwErVqbDrbW4mmX% RgHlSJ$Qw6LPx}B2008!;O3DBL literal 0 HcmV?d00001 diff --git a/lib/std/compress/tar/testdata/hardlink.tar.gz b/lib/std/compress/tar/testdata/hardlink.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c6a21153aac62f55c1e26c601d9e141661649f2 GIT binary patch literal 164 zcmV;V09*ebiwFq4VK!s}188A#WNc|}Yc6zQasW%q%t_TNsVHHfAus>}GZPaKZD?+4 z0OlJ)_z+-d&R}R{Vs2_?WC~&%7#bUynKLLDFwg;%7MCOz0i9Bolvq+yj89!^Q858e z0s)*JA|N;?v8W_72}qPQJENK`8tL1tp*9#AL>&A~av3L}IHKAxR?kA!;G82zvF=C1 zAp{qxozWvfTF54YUrCX%iTh@qBKaJbR&g=MwVh;K0!32vP9W{~X~`+?_!3YJ^h-^@ Wm58KeQa+7Bf2bFIG}~7I0ssK+Y+)Dx literal 0 HcmV?d00001 diff --git a/lib/std/compress/tar/testdata/issue11169.tar.gz b/lib/std/compress/tar/testdata/issue11169.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e2fa753c4a40d8a98aba52306a7d7b3f573e0839 GIT binary patch literal 119 zcmV--0Eqt|iwFq5l|p0!18H+}b!9OzF*Z3abYXG;)6)+~tnf%pOi3*&)-yCQF*DXr zOiW~;Dlh;7GZPaq4Fu@4fuXSxgQ1bBv7w=fsj;~sg8`6X1eC5|paUo^E=eo`I)!0m Z0%HROa2(hgU;~uM0stn~sA^gQ006QxDBJ)5 literal 0 HcmV?d00001 diff --git a/lib/std/compress/tar/testdata/issue12435.tar.gz b/lib/std/compress/tar/testdata/issue12435.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b9efe3933112f22a7f6902a3c02ea74e99b55ae7 GIT binary patch literal 61 zcmb2|=HRfH=9I$3oLO94nrdidVr;5cl32u$oUp+5$N7%lXLsyn+pCy0KG^Mh%w>F7 RV}U9IgWBExN(Lqd1^|n46<`1W literal 0 HcmV?d00001 diff --git a/lib/std/compress/tar/testdata/neg-size.tar.gz b/lib/std/compress/tar/testdata/neg-size.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e23b632ff45b1860929e69d023254d3c23e61c5 GIT binary patch literal 219 zcmV<103`n(iwFoY??Ge&18!w!EpuslWiE7KasZ8x%?*Sg6og4^z=Q^DV3T>E1naPe zy&FvQum=*$+BR%pd*JROifeHHE^wIlzL|ME19*1Pg6ayQRvqV~xs8ScZU4C5y$r@! z4kB(Gfq*z-7i%SxV6ciXzD^t@l@u(vtKL`Oh oOic_-j0_A=!WS8snCJjefPc7akiQ?pC>Ut~0B&7~;s68y07UmL2><{9 literal 0 HcmV?d00001 diff --git a/lib/std/compress/tar/testdata/pax-bad-hdr-file.tar.gz b/lib/std/compress/tar/testdata/pax-bad-hdr-file.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b8b416fe3f59395d06b7f865986c963f9a4f4438 GIT binary patch literal 746 zcmVt_VNO9cwDw0Ig-R#b%-qy-PZmcQ`7 zQ~$R*Wj)D$T9;PWKk@!E|Fw*+v=`U51_bME^lwB8X1ug|f0~oyd%;oO@^ecYTv`h& zv+PWn@MB~3ZY4yjTZv+L2V9r$-*nLo5e8A_mM%wCF@PPviMN{_X6X7JZA03(p~27L z4EF{>;2~ICq^BS*BBt?`b(wp z?CLPaT>YjnsVhJ{=7%_Tq$85iXg74p7(JU}eQIzxK{$^}>8X{HKTxvYtPih<<>!ie zs}QVA!&b^ksso=u>#RX83B%T-3Vj||y2$|(>`jD>l%o3;wU?u*3gK`_YhgT2{U0+Ex^{;owFntnu%34T#n?v`MkvsL;6q+E6rb{Lh%^h}Yak^}SH^Oh}s%9EB cbyJj&$3)uqzVZ3*Z~fo%1+;!f1^@~G0I#KV2LJ#7 literal 0 HcmV?d00001 diff --git a/lib/std/compress/tar/testdata/pax-bad-mtime-file.tar.gz b/lib/std/compress/tar/testdata/pax-bad-mtime-file.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dded7e108c194133d6e56ee519e53159b04f3191 GIT binary patch literal 750 zcmV4UKbYXG;?UYNen>rB3v+k!j zd!&M~c`dpi2?r7q3>*j?b|xV)A%vHCgiqhJm0IaaUESMCx9U%$nXxtU$YcFo6GnMf z@oh#Ke*%OrCCkg~lcs6Y zG-*&oAJNl2?JFM^fBBO+SJf}?@kI|Z5*7f+f`X*y`k8_6QURib7Eh;0vLXv2S_t54 z`HS#7^?$80){*?Db=m#$C%%8~zn0O3c5r2DylA}*{*|C$j#aDk$2mQ`-#98*eqm|7 zODl0{maQofVW_Rny^E3J-XRPpo@mg^eo z?RKqtmZRL@_H}>0_nq74t;X3ZrgCbW9Wk|Nd`%EYiZNSRNfg?F>U1Y7!>MC1i0hIEbhT14I0tT>R4 zRPS`&k6j%kF;~6G47E2t8H#<9IOu>SJlHi=8KWl?R>wLHM*!wQE^;!Oi(#Dq`60vr62M zhhy_~7`MkT?jK{L*#VbE-gMe_@jMpABT(@$qblFbmQDc5_)V(D)66Zcv36j!vFvP literal 0 HcmV?d00001 diff --git a/lib/std/compress/tar/testdata/pax-global-records.tar.gz b/lib/std/compress/tar/testdata/pax-global-records.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..11100f5a86692f1f80091c5d95ea3bfeffe966ee GIT binary patch literal 290 zcmV+-0p0!|iwFqZltN?z18`w@EoW?RVqt78a%E$0a%6KZbYXG;?U+jn!Y~kqv+gN! zf;ux9TQ{zC=@lZz2UHYW!QAyR2d;TxFJ*Z(PKfleI2_#^r+)ooh%nJ-TkPk%dUs%?gg<3_$Q%Z) z@$&q)$5=j~H2l)}Z-3m6u(G#G{_c{;FsU{J4bXpV`yY`>>6`dJILM9Z>(-0sN9lhG zT;n7EqxioM7T=KnvD1HoMDl+h%o_cR`A6)3^Ns!|@&6$Cp9knaaX0@P$^X6F|A4>$ oVZZ+JFBqBsIdF}4{EuV|GpFAFCKdk+f*^$C4*d&=8vq;t0NpWec}V#)tzqxqj6z{F4? zCo?Y_Rt6aBg9&gM023cA2S)S%X#O7=`QJnzOpNCLkphx{iJ#VhVE)Hon;RM$F(??N U<>wEX{69ni04T}R*8ma#03ZNa8~^|S literal 0 HcmV?d00001 diff --git a/lib/std/compress/tar/testdata/pax-nul-path.tar.gz b/lib/std/compress/tar/testdata/pax-nul-path.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..68929edd12051f92c94e445ddd89916bde368464 GIT binary patch literal 148 zcmV;F0BipriwFoY??Ge&18`w@EpByeEpTCUXfAYNasUfRtnf%pOi3*&)-%vIFf=kY zF*P%{u%s%>00J0r&<4gv#&9-R+{nn-+>Ak?f`Ja8w74X(2xvW}=Ae6ShyWu)LxqCG zk_=lK#Xa3w3|tJOfP`dAkH{gA|1sD`hQ?-w3AIEXa)ddiUAD(3IG70 CV>smi literal 0 HcmV?d00001 diff --git a/lib/std/compress/tar/testdata/pax-nul-xattrs.tar.gz b/lib/std/compress/tar/testdata/pax-nul-xattrs.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3715d4283d200ca470519cb0117603f62b483544 GIT binary patch literal 163 zcmV;U09^kciwFoY??Ge&18`w@EpByeEqGycbaHbpbYXG;3rMW+NKH&hEh^SC&`(NC z(akH($5XC22O!-IfGjv`5F#FGY z=2+K%J;vuzQY0{gPywnWKkg3=ep?kN$kOa_sIsCeK#*nteldRnZ@K^HI%gCYzqHG* zr}xg8jMr6d%bW3*g+cZdIKnV=y!}^{zs! zdRI}5ZamkOn-@d0B9!oP;Z+_-R564dSf;DxI$`Ma8m}Tk+sFh{G{KFD5q}qKE;e#L zy~IohuHe`u23eWlb8Q+26m_m7c+Q0vt6n`9i&&CL=+2joXmsnP=G(4%5!cV#Xox78d31%$CdoOKU|Np(!cODJ2{o4<>>OqOH8Ji(8Obd~BtM#f>xYu=GAyL- zorJ_Ntn^g0CJWJey{WY6v|q`1NbK!yyaYk4tmwsw#d%+h8#-DB z9aY9Uz6mg2FG9(V>Uq3>6-Qm+LMu@EgbmpeZS|OViB)qj8;L;;K1e(RrU_TSC`{@u zd^(idGLY;`B( zve>^J)-;*Mvb={H8YZbOR+DXDDCPVr(~{f7&F!&q#gL(d+EdZD8*4kYgQlp96TXi0 z`R1BdfJ0wd2wC88=$12lBYvAhi;v>*SjbrS`t?E_kE`$ngB6+AOedCMiSlljNgEoN SAOHW>f1XcUdDO4~3IG6(+hE}U literal 0 HcmV?d00001 diff --git a/lib/std/compress/tar/testdata/pax-records.tar.gz b/lib/std/compress/tar/testdata/pax-records.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..22d204a2f3629289c2bc07b5e1f6cbe9a0b8924c GIT binary patch literal 186 zcmV;r07d^FiwFoY??Ge&18`w@EplaJZ*pXFE_7jX01HU0@JLNeNi8bYGtf`V%t>XS zJ}`g)T(p6qu_>Gl7B@6BFf(FMs9>N2C@n5YECO23FfxImg@U`kkE5TvUO{%cEifp! zj7$}h^K)}k^Ga+zQgd?hbrhcLnD%^X50|NdLTO%NZmMlgeqK5;n2TW)40b@HEKgGY o$6yYQ){}W2gq?^G>{z=-Wd8v#-0+fd+8oFT|xKQ&!{o?bp}r~B=0zp)6vMq*L(NCZSAWE501 zbPP-aLLy=kQZjN1N(M$IW)@a9b`DMfK_OugQGvolhHB6iUbizMHXs3;p%PR-kr)o+ z%di07_lId$rTpzYzt{4o{U8v?_-#5o+)VGV@g^=Isi3v7$||aA>Kd8`hDOFFre@|A zmJW_i&MvNQ?jD{2fkD9`p<&?>kqKD^WhKj+C~Htw&2YsqJ{=yPwl~NMB$7!RAg){p z@iRJ|jozJ(4y$E}ZJ+gTr}YouNBXadWEf#d{qG;9=k1wsm*O8(|MRkTD?Z@O1^plU z|Ls4|anb+l`Ir972&nq8KETlazl{Ie`1SGm%X|k|*$u#q2{sj=dYSa-2JlAo?sRm$ z`oDem9J9l_pYi`A{Zsuil>gHb=kY)H|E6i0rfHg{X_}^Knx<)*rfHgHUHkxw7`*ua GC;$NBX2{L} literal 0 HcmV?d00001 diff --git a/lib/std/compress/tar/testdata/star.tar.gz b/lib/std/compress/tar/testdata/star.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8d5e18ad123d1187ff64bcaf60614c8d38dea85 GIT binary patch literal 179 zcmV;k08IZMiwFoY??Ge&19Nm?axQdXaschr!3x753TZ zSR8C`zj){2f+r7=#ewK;Gy+bzA@p004h~RM-Fj literal 0 HcmV?d00001 diff --git a/lib/std/compress/tar/testdata/trailing-slash.tar.gz b/lib/std/compress/tar/testdata/trailing-slash.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0c2e0682b619fc5477591d22db6e60f6dc02229 GIT binary patch literal 134 zcmV;10D1o(iwFoY??Ge&19Wm>X>4h3XDxGVVRL9MbYXG;Gc+k+~^@g6UAo|LC3@1*2dTjDkT90D2tP=>Q4<0NS)R{r~^~ literal 0 HcmV?d00001 diff --git a/lib/std/compress/tar/testdata/ustar-file-devs.tar.gz b/lib/std/compress/tar/testdata/ustar-file-devs.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f459694f7666c35f926c359e3dc948dc0ee9958 GIT binary patch literal 93 zcmb2|=HM`V@0h~GTv}X`SfrblnUkuUl3G@*2jnq$pFZQ6y1>wYfyr1{*npSig^Akj x3kMF|nB#b`p^K$kKFfueVH850Q zP%tz!Gc-^Di9>-YP{_#8*wozA+{ggPH#0CZVo+ch1vCfVnK>oJ2rlCR+urFajycoW z$TZJI=~4g7_0tzNDn9IgqV%x;g*!KpaN-7uO$5?Y8W>r7PLT{S$Gx>-}4ZxXC_ep4WM~%qL!k0 z_y7NrjH1-Ul=zI)oPyM%Vvq<5U~p)cXLyjn#K*8)njt}$l@BBcvR4LZ{;c_nCl-V9 zzY%Q<0D}47&=@`c8=4u9=6`ws_x$|yoK(H!{9G<_@;_L;h<(mS1F5{|f~p6J zM;uuSFE8NvA6Q;gU@0#^YC-xzYG$C77Yu0S#SNtLq6=DHpyz+|^5TysqP*}xDlhsQ p;N<{V-Q(Pa6E%SH{XiO4aEzw>Q7{Td!6+En0051w4^{vU003Lm3-$m2 literal 0 HcmV?d00001 diff --git a/lib/std/compress/tar/testdata/xattrs.tar.gz b/lib/std/compress/tar/testdata/xattrs.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..072976d0ef7efa10c60d366a67aaf328af324b07 GIT binary patch literal 376 zcmV-;0f+t{iwFo!`9Wj?19)L{baHbpbYXG;?UuVv!!Q^|Gv_J#1g`!27FX)TKoJZ` zOiYzYT?jadl-Q8IJ?1KUp=Ah>3O7p7?uD8PK&mfBk6I+yeW z4hY6x$H3-@DdhIId_psA_PABkMU;zDQ2Pnc*y>1Yxcf3Pj|-Hzt{is!n!ur;NOD2@jvwV zCLHoVP_gBI0nz_M5G=mK|LVp6nEm_%u)Q@3{CNBizxx3==zl^t{ZD{I{|~`wUEf-R W^16&=i$o%k|HL!KqskHh6aWC?v9(kH literal 0 HcmV?d00001 diff --git a/lib/std/tar.zig b/lib/std/tar.zig index bc9a22fb7c48..8a67b0cb89c1 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -1,177 +1,1170 @@ -pub const Options = struct { - /// Number of directory levels to skip when extracting files. - strip_components: u32 = 0, - /// How to handle the "mode" property of files from within the tar file. - mode_mode: ModeMode = .executable_bit_only, +pub const Error = error{ + Overflow, + InvalidCharacter, + UnexpectedEndOfStream, + Header, + TarUnexpectedFileType, + TarComponentsOutsideStrippedPrefix, +}; - const ModeMode = enum { - /// The mode from the tar file is completely ignored. Files are created - /// with the default mode when creating files. - ignore, - /// The mode from the tar file is inspected for the owner executable bit - /// only. This bit is copied to the group and other executable bits. - /// Other bits of the mode are left as the default when creating files. - executable_bit_only, +pub const FileType = enum(u8) { + normal = '0', + normal2 = 0, + hard_link = '1', + symbolic_link = '2', + character_special = '3', + block_special = '4', + directory = '5', + fifo = '6', + contiguous = '7', + global_extended_header = 'g', + extended_header = 'x', + gnu_sparse = 'S', + gnu_long_name = 'L', + gnu_long_link = 'K', + _, + + pub const sentinel = @intToEnum(FileType, 0xff); + + pub const named_types_bitset = blk: { + var result = std.StaticBitSet(128).initEmpty(); + for ([_]FileType{ + .directory, .normal, .normal2, .hard_link, + .symbolic_link, .character_special, .block_special, .fifo, + .contiguous, + }) |ft| + result.set(@enumToInt(ft)); + break :blk result; }; }; -pub const Header = struct { - bytes: *const [512]u8, - - pub const FileType = enum(u8) { - normal = '0', - hard_link = '1', - symbolic_link = '2', - character_special = '3', - block_special = '4', - directory = '5', - fifo = '6', - contiguous = '7', - global_extended_header = 'g', - extended_header = 'x', - _, +fn parseOctal(raw: []const u8) !i64 { + // don't need to trim '0's as parseInt() accepts them + const trimmed = mem.trim(u8, raw, " \x00"); + if (trimmed.len == 0) return 0; + return fmt.parseInt(i64, mem.sliceTo(trimmed, 0), 8); +} + +/// Parses the input as being encoded in either base-256 or octal. +/// This function may return negative numbers. +/// Returns errors if parsing fails or an integer overflow occurs. +fn parseNumeric(b: []const u8) !i64 { + // Check for base-256 (binary) format first. + // If the first bit is set, then all following bits constitute a two's + // complement encoded number in big-endian byte order. + if (b.len > 0 and b[0] & 0x80 != 0) { + // Handling negative numbers relies on the following identity: + // -a-1 == ^a + // + // If the number is negative, we use an inversion mask to invert the + // data bytes and treat the value as an unsigned number. + + // inv = 0xff if negative else 0 + const inv = @as(u8, @boolToInt(b[0] & 0x40 != 0)) * 0xff; + + var x: u64 = 0; + for (0..b.len) |i| { + // ignore the signal bit in first byte + const mask = @as(u8, 0xff) >> @boolToInt(i == 0); + const c = b[i] ^ inv & mask; + if (x > 0x00ff_ffff_ffff_ffff) return error.Overflow; + x = x << 8 | c; + } + if (x >= 0x8000_0000_0000_0000) return error.Overflow; + + return if (inv == 0) + @bitCast(i64, x) + else + ~@bitCast(i64, x); + } + + return try parseOctal(b); +} + +test parseNumeric { + const TestCase = struct { + []const u8, + Error!i128, + }; + + const cases = [_]TestCase{ + .{ "", 0 }, + .{ "\x80", 0 }, + .{ "\x80\x00", 0 }, + .{ "\x80\x00\x00", 0 }, + .{ "\xbf", (1 << 6) - 1 }, + .{ "\xbf\xff", (1 << 14) - 1 }, + .{ "\xbf\xff\xff", (1 << 22) - 1 }, + .{ "\xff", -1 }, + .{ "\xff\xff", -1 }, + .{ "\xff\xff\xff", -1 }, + .{ "\xc0", -1 * (1 << 6) }, + .{ "\xc0\x00", -1 * (1 << 14) }, + .{ "\xc0\x00\x00", -1 * (1 << 22) }, + .{ "\x87\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745 }, + .{ "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745 }, + .{ "\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231 }, + .{ "\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231 }, + .{ "\x80\x7f\xff\xff\xff\xff\xff\xff\xff", std.math.maxInt(i64) }, + .{ "\x80\x80\x00\x00\x00\x00\x00\x00\x00", error.Overflow }, + .{ "\xff\x80\x00\x00\x00\x00\x00\x00\x00", std.math.minInt(i64) }, + .{ "\xff\x7f\xff\xff\xff\xff\xff\xff\xff", error.Overflow }, + .{ "\xf5\xec\xd1\xc7\x7e\x5f\x26\x48\x81\x9f\x8f\x9b", error.Overflow }, + + // Test base-8 (octal) encoded values. + .{ "0000000\x00", 0 }, + .{ " \x0000000\x00", 0 }, + .{ " \x0000003\x00", 3 }, + .{ "00000000227\x00", 0o227 }, + .{ "032033\x00 ", 0o32033 }, + .{ "320330\x00 ", 0o320330 }, + .{ "0000660\x00 ", 0o660 }, + .{ "\x00 0000660\x00 ", 0o660 }, + .{ "0123456789abcdef", error.InvalidCharacter }, + .{ "0123456789\x00abcdef", error.InvalidCharacter }, + .{ "01234567\x0089abcdef", 342391 }, + .{ "0123\x7e\x5f\x264123", error.InvalidCharacter }, }; - pub fn fileSize(header: Header) !u64 { - const raw = header.bytes[124..][0..12]; - const ltrimmed = std.mem.trimLeft(u8, raw, "0"); - const rtrimmed = std.mem.trimRight(u8, ltrimmed, " \x00"); - if (rtrimmed.len == 0) return 0; - return std.fmt.parseInt(u64, rtrimmed, 8); + for (cases) |case| { + const input = case[0]; + const expected_or_err = case[1]; + const err_or_void = if (expected_or_err) |expected| + if (parseNumeric(input)) |actual| + std.testing.expectEqual(expected, actual) + else |err| + err + else |err| + std.testing.expectError(err, parseNumeric(input)); + + err_or_void catch |e| { + log.err("parseNumeric failed on {s}:{any}. expected {!} got {!}", .{ input, input, expected_or_err, e }); + return e; + }; } +} + +// Takes a string of the form %d.%d as described in the PAX +// specification. Note that this implementation allows for negative timestamps, +// which is allowed for by the PAX specification, but not always portable. +fn parsePaxTime(s: []const u8) !i128 { + // split into seconds and sub-seconds parts + const parts: [2][]const u8 = if (mem.indexOfScalar(u8, s, '.')) |pos| + .{ s[0..pos], s[pos + 1 ..] } + else + .{ s, "" }; + const ss = parts[0]; + const secs = try fmt.parseInt(i64, ss, 10); - pub fn is_ustar(header: Header) bool { - return std.mem.eql(u8, header.bytes[257..][0..6], "ustar\x00"); + const sn = parts[1]; + if (sn.len == 0) return unixTime(secs, 0); + + const all_digits = for (sn) |c| { + if (!std.ascii.isDigit(c)) break false; + } else true; + if (!all_digits) return error.InvalidCharacter; + + const max_digits = 9; + // add trailing zeroes + var buf = [1]u8{'0'} ** max_digits; + const len = @min(sn.len, max_digits); + mem.copy(u8, &buf, sn[0..len]); + const nsecs = try fmt.parseInt(i64, &buf, 10); + + log.debug("parsePaxTime secs={} nsecs={} sn.len={}", .{ secs, nsecs, sn.len }); + return if (ss.len > 0 and ss[0] == '-') + unixTime(secs, -nsecs) + else + unixTime(secs, nsecs); +} + +test parsePaxTime { + const TestCase = struct { + []const u8, + Error!i128, + }; + + const cases = [_]TestCase{ + .{ "1350244992.023960108", unixTime(1350244992, 23960108) }, + .{ "1350244992.02396010", unixTime(1350244992, 23960100) }, + .{ "1350244992.0239601089", unixTime(1350244992, 23960108) }, + .{ "1350244992.3", unixTime(1350244992, 300000000) }, + .{ "1350244992", unixTime(1350244992, 0) }, + .{ "-1.000000001", unixTime(-1, -1e0 + 0e0) }, + .{ "-1.000001", unixTime(-1, -1e3 + 0e0) }, + .{ "-1.001000", unixTime(-1, -1e6 + 0e0) }, + .{ "-1", unixTime(-1, -0e0 + 0e0) }, + .{ "-1.999000", unixTime(-1, -1e9 + 1e6) }, + .{ "-1.999999", unixTime(-1, -1e9 + 1e3) }, + .{ "-1.999999999", unixTime(-1, -1e9 + 1e0) }, + .{ "0.000000001", unixTime(0, 1e0 + 0e0) }, + .{ "0.000001", unixTime(0, 1e3 + 0e0) }, + .{ "0.001000", unixTime(0, 1e6 + 0e0) }, + .{ "0", unixTime(0, 0e0) }, + .{ "0.999000", unixTime(0, 1e9 - 1e6) }, + .{ "0.999999", unixTime(0, 1e9 - 1e3) }, + .{ "0.999999999", unixTime(0, 1e9 - 1e0) }, + .{ "1.000000001", unixTime(1, 1e0 - 0e0) }, + .{ "1.000001", unixTime(1, 1e3 - 0e0) }, + .{ "1.001000", unixTime(1, 1e6 - 0e0) }, + .{ "1", unixTime(1, 0e0 - 0e0) }, + .{ "1.999000", unixTime(1, 1e9 - 1e6) }, + .{ "1.999999", unixTime(1, 1e9 - 1e3) }, + .{ "1.999999999", unixTime(1, 1e9 - 1e0) }, + .{ "-1350244992.023960108", unixTime(-1350244992, -23960108) }, + .{ "-1350244992.02396010", unixTime(-1350244992, -23960100) }, + .{ "-1350244992.0239601089", unixTime(-1350244992, -23960108) }, + .{ "-1350244992.3", unixTime(-1350244992, -300000000) }, + .{ "-1350244992", unixTime(-1350244992, 0) }, + .{ "", error.InvalidCharacter }, + .{ "0", unixTime(0, 0) }, + .{ "1.", unixTime(1, 0) }, + .{ "0.0", unixTime(0, 0) }, + .{ ".5", error.InvalidCharacter }, + .{ "-1.3", unixTime(-1, -3e8) }, + .{ "-1.0", unixTime(-1, -0e0) }, + .{ "-0.0", unixTime(-0, -0e0) }, + .{ "-0.1", unixTime(-0, -1e8) }, + .{ "-0.01", unixTime(-0, -1e7) }, + .{ "-0.99", unixTime(-0, -99e7) }, + .{ "-0.98", unixTime(-0, -98e7) }, + .{ "-1.1", unixTime(-1, -1e8) }, + .{ "-1.01", unixTime(-1, -1e7) }, + .{ "-2.99", unixTime(-2, -99e7) }, + .{ "-5.98", unixTime(-5, -98e7) }, + .{ "-", error.InvalidCharacter }, + .{ "+", error.InvalidCharacter }, + .{ "-1.-1", error.InvalidCharacter }, + .{ "99999999999999999999999999999999999999999999999", error.Overflow }, + .{ "0.123456789abcdef", error.InvalidCharacter }, + .{ "foo", error.InvalidCharacter }, + .{ "\x00", error.InvalidCharacter }, + .{ "𝟵𝟴𝟳𝟲𝟱.𝟰𝟯𝟮𝟭𝟬", error.InvalidCharacter }, // Unicode numbers (U+1D7EC to U+1D7} + .{ "98765﹒43210", error.InvalidCharacter }, // Unicode period (U+FE} + }; + + for (cases) |case| { + const input = case[0]; + const expected_or_err = case[1]; + const err_or_void = if (expected_or_err) |expected| + if (parsePaxTime(input)) |actual| + std.testing.expectEqual(expected, actual) + else |err| + err + else |err| + std.testing.expectError(err, parsePaxTime(input)); + + err_or_void catch |e| { + log.err("parsePaxTime failed on {s}. expected {!} got {!}", .{ input, expected_or_err, e }); + return e; + }; } +} + +/// merges key-value pair `kv` into hdr if its a valid PAX field. +/// TODO merge PAX schilly xattrs +pub fn mergePax(kv: [2][]const u8, hdr: *Header) !void { + const k = kv[0]; + const v = kv[1]; + log.debug("mergePax k={s} v={s}", .{ k, v }); + if (v.len == 0) return; - /// Includes prefix concatenated, if any. - /// Return value may point into Header buffer, or might point into the - /// argument buffer. - /// TODO: check against "../" and other nefarious things - pub fn fullFileName(header: Header, buffer: *[255]u8) ![]const u8 { - const n = name(header); - if (!is_ustar(header)) - return n; - const p = prefix(header); - if (p.len == 0) - return n; - @memcpy(buffer[0..p.len], p); - buffer[p.len] = '/'; - @memcpy(buffer[p.len + 1 ..][0..n.len], n); - return buffer[0 .. p.len + 1 + n.len]; + const map = std.ComptimeStringMap(std.meta.FieldEnum(Header), .{ + .{ Pax.path, .name }, + .{ Pax.linkpath, .linkname }, + .{ Pax.uname, .uname }, + .{ Pax.gname, .gname }, + .{ Pax.uid, .uid }, + .{ Pax.gid, .gid }, + .{ Pax.atime, .atime }, + .{ Pax.mtime, .mtime }, + .{ Pax.ctime, .ctime }, + .{ Pax.size, .size }, + }); + + if (map.get(k)) |field_enum| switch (field_enum) { + .name => hdr.name = v, + .linkname => hdr.linkname = v, + .uname => hdr.uname = v, + .gname => hdr.gname = v, + .uid => hdr.uid = @truncate(i32, try fmt.parseInt(i64, v, 10)), + .gid => hdr.gid = @truncate(i32, try fmt.parseInt(i64, v, 10)), + .atime => hdr.atime = try parsePaxTime(v), + .ctime => hdr.ctime = try parsePaxTime(v), + .mtime => hdr.mtime = try parsePaxTime(v), + .size => hdr.size = try fmt.parseInt(i64, v, 10), + else => unreachable, + } else { + // TODO merge PAX schilly xattrs + // log.debug("TODO handle pax header key={s}", .{k}); } +} + +// Constants to identify various tar formats. +pub const Format = enum { + unknown, + + // The format of the original Unix V7 tar tool prior to standardization. + v7, + + // ustar represents the USTAR header format defined in POSIX.1-1988. + // + // While this format is compatible with most tar readers, + // the format has several limitations making it unsuitable for some usages. + // Most notably, it cannot support sparse files, files larger than 8GiB, + // filenames larger than 256 characters, and non-ASCII filenames. + // + // Reference: + // http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06 + ustar, + + // pax represents the PAX header format defined in POSIX.1-2001. + // + // PAX extends USTAR by writing a special file with Typeflag TypeXHeader + // preceding the original header. This file contains a set of key-value + // records, which are used to overcome USTAR's shortcomings, in addition to + // providing the ability to have sub-second resolution for timestamps. + // + // Some newer formats add their own extensions to PAX by defining their + // own keys and assigning certain semantic meaning to the associated values. + // For example, sparse file support in PAX is implemented using keys + // defined by the GNU manual (e.g., "GNU.sparse.map"). + // + // Reference: + // http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html + pax, - pub fn name(header: Header) []const u8 { - return str(header, 0, 0 + 100); + // gnu represents the GNU header format. + // + // The GNU header format is older than the USTAR and PAX standards and + // is not compatible with them. The GNU format supports + // arbitrary file sizes, filenames of arbitrary encoding and length, + // sparse files, and other features. + // + // It is recommended that PAX be chosen over GNU unless the target + // application can only parse GNU formatted archives. + // + // Reference: + // https://www.gnu.org/software/tar/manual/html_node/Standard.html + gnu, + + // Schily's tar format, which is incompatible with USTAR. + // This does not cover STAR extensions to the PAX format; these fall under + // the PAX format. + star, +}; + +pub const FormatSet = std.enums.EnumSet(Format); +const fmt_unknown = FormatSet.initOne(.unknown); +const fmt_v7 = FormatSet.initOne(.v7); +const fmt_ustar = FormatSet.initOne(.ustar); +const fmt_pax = FormatSet.initOne(.pax); +const fmt_gnu = FormatSet.initOne(.gnu); +const fmt_star = FormatSet.initOne(.star); +const fmt_ustar_pax = FormatSet.initMany(&.{ .ustar, .pax }); +const fmt_ustar_pax_gnu = FormatSet.initMany(&.{ .ustar, .pax, .gnu }); + +pub const Header = struct { + /// The type of header + type: FileType = FileType.sentinel, + + /// Name of file + name: []const u8 = "", + /// Target name of link (valid for hard_link or symbolic_link) + linkname: []const u8 = "", + + /// Logical file size in bytes + size: i64 = -1, + /// Permission and mode bits + mode: i64 = -1, + /// User ID of owner + uid: i32 = -1, + /// Group ID of owner + gid: i32 = -1, + /// User name of owner + uname: []const u8 = "", + /// Group name of owner + gname: []const u8 = "", + + /// To use atime or ctime, specify the format as PAX or GNU. + /// To use sub-second resolution, specify the format as PAX. + /// Modification time + mtime: i128 = -1, + /// Access time (requires either PAX or GNU support) + atime: i128 = -1, + /// Change time (requires either PAX or GNU support) + ctime: i128 = -1, + + /// Major device number (valid for character_special or block_special) + dev_major: i64 = -1, + /// Minor device number (valid for character_special or block_special) + dev_minor: i64 = -1, + + /// pax_recs is a sequence of key, value PAX extended header records. + /// only used in tests. + pax_recs: if (builtin.is_test) []const []const u8 else void = + if (builtin.is_test) &.{} else {}, + + // fmt specifies the format of the tar header. + // + // This is a best-effort guess at the format. + // Due to liberally reading some non-compliant files, + // it is possible for this to be unknown. + fmt: FormatSet = FormatSet.initEmpty(), + + // TODO remove when unused + fn debugFormatSet(format_set: FormatSet, writer: anytype) !void { + try writer.print(" format_set=", .{}); + var iter = format_set.iterator(); + var i: u8 = 0; + while (iter.next()) |f| : (i += 1) { + if (i != 0) try writer.writeByte('|'); + try writer.print("{s}", .{@tagName(f)}); + } } - pub fn prefix(header: Header) []const u8 { - return str(header, 345, 345 + 155); + // TODO remove when unused + pub fn format(h: Header, comptime _: []const u8, _: fmt.FormatOptions, writer: anytype) !void { + const tagname = inline for (std.meta.fields(FileType)) |field| { + if (@enumToInt(h.type) == field.value) break field.name; + } else "null"; + try writer.print("type={s} size={} name={s} mtime={} mode=0o{o}", .{ tagname, h.size, h.name, h.mtime, h.mode }); + try debugFormatSet(h.fmt, writer); } - pub fn fileType(header: Header) FileType { - const result = @as(FileType, @enumFromInt(header.bytes[156])); - return if (result == @as(FileType, @enumFromInt(0))) .normal else result; + fn structField(comptime field_enum: std.meta.FieldEnum(Header)) std.builtin.Type.StructField { + return @typeInfo(Header).Struct.fields[@enumToInt(field_enum)]; } - fn str(header: Header, start: usize, end: usize) []const u8 { - var i: usize = start; - while (i < end) : (i += 1) { - if (header.bytes[i] == 0) break; + fn fieldDefault(comptime field: std.builtin.Type.StructField) field.type { + return @ptrCast( + *const field.type, + @alignCast(@alignOf(field.type), field.default_value), + ).*; + } + + /// copy all fields from `new_hdr` to `hdr`, but skipping any fields that + /// have default values (as defined in Header). + fn merge(hdr: *Header, new_hdr: Header) void { + // only assign a `hdr` field value if its not equal to the field's default + // value. includes comptime checks that field default values match expectations + inline for (std.meta.fields(Header)) |f| { + switch (f.type) { + FileType => { + const default = comptime fieldDefault(f); + comptime assert(default == FileType.sentinel); + if (@field(new_hdr, f.name) != default) { + @field(hdr, f.name) = @field(new_hdr, f.name); + } + }, + i64, i32, i128 => { + // verify all integer field defaults == -1 + const default = comptime fieldDefault(f); + comptime assert(default == -1); + if (@field(new_hdr, f.name) != default) + @field(hdr, f.name) = @field(new_hdr, f.name); + }, + []const u8 => { + // verify all []const u8 field defaults == "" + const default = comptime fieldDefault(f); + comptime assert(default.len == 0); + if (@field(new_hdr, f.name).len != 0) + @field(hdr, f.name) = @field(new_hdr, f.name); + }, + // skip pax_recs which is only used for testing. + // NOTE: don't try to get fieldDefault() of this field which + // is void in non-testing modes. it will error with + // 'error: alignment must be >= 1'. + []const []const u8, void => assert(mem.eql(u8, f.name, "pax_recs")), + FormatSet => { + const default = comptime fieldDefault(f); + comptime assert(default.eql(FormatSet.initEmpty())); + if (!@field(new_hdr, f.name).eql(default)) + @field(hdr, f.name) = @field(new_hdr, f.name); + }, + else => @compileLog(comptime fmt.comptimePrint("todo {s}", .{@typeName(f.type)})), + } } - return header.bytes[start..i]; } }; -pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void { - switch (options.mode_mode) { - .ignore => {}, - .executable_bit_only => { - // This code does not look at the mode bits yet. To implement this feature, - // the implementation must be adjusted to look at the mode, and check the - // user executable bit, then call fchmod on newly created files when - // the executable bit is supposed to be set. - // It also needs to properly deal with ACLs on Windows. - @panic("TODO: unimplemented: tar ModeMode.executable_bit_only"); - }, - } - var file_name_buffer: [255]u8 = undefined; - var buffer: [512 * 8]u8 = undefined; - var start: usize = 0; - var end: usize = 0; - header: while (true) { - if (buffer.len - start < 1024) { - const dest_end = end - start; - @memcpy(buffer[0..dest_end], buffer[start..end]); - end = dest_end; - start = 0; +pub fn unixTime(tv_sec: i64, tv_nsec: i64) i128 { + const result = @bitCast(i128, [_]i64{ tv_sec * time.ns_per_s, tv_nsec }); + return result; +} + +pub const block_len = 512; +pub const Block = *[block_len]u8; + +const V7Header = extern struct { + file_name: [100]u8, // 0..100 + mode: [8]u8, // 100..108 + uid: [8]u8, // 108..116 + gid: [8]u8, // 116..124 + size: [12]u8, // 124..136 + mod_time: [12]u8, // 136..148 + checksum: [8]u8, // 148..156 + type: FileType, // 156..157 + linked_file_name: [100]u8, // 157..257 + __padding: [255]u8, + + comptime { + assert(@sizeOf(V7Header) == block_len); + } + + /// Returns an (unsigned, signed) pair of checksums for the header block. + /// POSIX specifies a sum of the unsigned byte values, but the Sun tar used + /// signed byte values. + /// We compute and return both. + fn computeChecksum(h: *const V7Header) [2]i64 { + var unsigned: i64 = 0; + var signed: i64 = 0; + const bytes_ = h.bytes(); + for (bytes_, 0..) |_, i| { + const c = if (148 <= i and i < 156) + ' ' // Treat the checksum field itself as all spaces. + else + bytes_[i]; + unsigned += c; + signed += @bitCast(i8, c); } - const ask_header = @min(buffer.len - end, 1024 -| (end - start)); - end += try reader.readAtLeast(buffer[end..], ask_header); - switch (end - start) { - 0 => return, - 1...511 => return error.UnexpectedEndOfStream, - else => {}, + return .{ unsigned, signed }; + } + + inline fn ustar(h: *const V7Header) *const UstarHeader { + return @ptrCast(*const UstarHeader, h); + } + inline fn star(h: *const V7Header) *const StarHeader { + return @ptrCast(*const StarHeader, h); + } + inline fn gnu(h: *const V7Header) *const GnuHeader { + return @ptrCast(*const GnuHeader, h); + } + inline fn bytes(h: *const V7Header) *const [block_len]u8 { + return @ptrCast(*const [block_len]u8, h); + } + + // Magics used to identify various formats. + const magic_gnu = "ustar "; + const version_gnu = " \x00"; + const magic_version_gnu = mem.readIntBig(u64, magic_gnu ++ version_gnu); + const magic_ustar = @truncate(u48, mem.readIntBig(u64, "ustar\x00\x00\x00") >> 16); + const version_ustar = "00"; // unused. left only for documentation + const trailer_star = mem.readIntBig(u32, "tar\x00"); + + fn getFormat(h: *const V7Header) !FormatSet { + const value = try parseOctal(&h.checksum); + const checksums = h.computeChecksum(); + if (value != checksums[0] and value != checksums[1]) + return fmt_unknown; + + const magic_version = h.ustar().magicVersion(); + const magic = @truncate(u48, magic_version >> 16); + + return if (magic == magic_ustar and + mem.readIntBig(u32, &h.star().trailer) == trailer_star) + fmt_star + else if (magic == magic_ustar) + // either ustar or pax is enough info. don't need to check version + fmt_ustar_pax + else if (magic_version == magic_version_gnu) + fmt_gnu + else + fmt_v7; + } +}; + +const V7HeaderDummy = [257]u8; + +const UstarHeader = extern struct { + v7_header: V7HeaderDummy, + magic: [6]u8, // 257..263 + version: [2]u8, // 263..265 + user_name: [32]u8, // 265..297 + group_name: [32]u8, // 297..329 + dev_major: [8]u8, // 329..337 + dev_minor: [8]u8, // 337..345 + filename_prefix: [155]u8, // 345..500 + __padding: [12]u8, + + pub fn magicVersion(ustar: *const UstarHeader) u64 { + return mem.readIntBig(u64, @ptrCast([*]const u8, &ustar.magic)[0..8]); + } + + comptime { + assert(@sizeOf(UstarHeader) == block_len); + } +}; + +const StarHeader = extern struct { + v7_header: V7HeaderDummy, + magic: [6]u8, // 257..263 + version: [2]u8, // 263..265 + user_name: [32]u8, // 265..297 + group_name: [32]u8, // 297..329 + dev_major: [8]u8, // 329..337 + dev_minor: [8]u8, // 337..345 + filename_prefix: [131]u8, // 345..476 + access_time: [12]u8, // 476..488 + change_time: [12]u8, // 488..500 + __padding: [8]u8, + trailer: [4]u8, // 508..512 + + comptime { + assert(@sizeOf(StarHeader) == block_len); + } +}; + +const GnuHeader = extern struct { + v7_header: V7HeaderDummy, + magic: [6]u8, // 257..263 + version: [2]u8, // 263..265 + user_name: [32]u8, // 265..297 + group_name: [32]u8, // 297..329 + dev_major: [8]u8, // 329..337 + dev_minor: [8]u8, // 337..345 + access_time: [12]u8, // 345..357 + change_time: [12]u8, // 357..369 + __padding: [17]u8, + sparse: [24 * 4 + 1]u8, // 386..483 + real_size: [12]u8, // 483..495 + __padding2: [17]u8, + + comptime { + assert(@sizeOf(GnuHeader) == block_len); + } +}; + +pub fn headerIterator( + reader: anytype, + buf: Block, + allocator: mem.Allocator, +) HeaderIterator(@TypeOf(reader)) { + return HeaderIterator(@TypeOf(reader)){ + .reader = reader, + .buf = buf, + .allocator = allocator, + }; +} + +pub fn HeaderIterator(comptime Reader: type) type { + return struct { + reader: Reader, + buf: Block, + pax_buf: std.ArrayListUnmanaged(u8) = .{}, + name_buf: std.ArrayListUnmanaged(u8) = .{}, + linkname_buf: std.ArrayListUnmanaged(u8) = .{}, + allocator: mem.Allocator, + + const Self = @This(); + + pub fn deinit(self: *Self) void { + self.pax_buf.deinit(self.allocator); + self.name_buf.deinit(self.allocator); + self.linkname_buf.deinit(self.allocator); } - const header: Header = .{ .bytes = buffer[start..][0..512] }; - start += 512; - const file_size = try header.fileSize(); - const rounded_file_size = std.mem.alignForward(u64, file_size, 512); - const pad_len = @as(usize, @intCast(rounded_file_size - file_size)); - const unstripped_file_name = try header.fullFileName(&file_name_buffer); - switch (header.fileType()) { - .directory => { - const file_name = try stripComponents(unstripped_file_name, options.strip_components); - if (file_name.len != 0) { - try dir.makePath(file_name); + + /// iterates through the tar archive as if it is a series of + /// files. Internally, the tar format often uses fake "files" to add meta + /// data that describes the next file. These meta data "files" should not + /// normally be visible to the outside. As such, this iterates through + /// one or more "header files" until it finds a "normal file". + pub fn next(self: *Self) !?Header { + var gnu_long_name: []const u8 = ""; + var gnu_long_link: []const u8 = ""; + var format = fmt_ustar_pax_gnu; + var hdr = Header{}; + var pax_hdr = Header{}; + + while (true) { + const v7 = try self.nextV7Header() orelse return null; + hdr = try self.header(v7) orelse return null; + + format.setIntersection(hdr.fmt); + log.debug("hdr={}", .{hdr}); + switch (hdr.type) { + .extended_header, .global_extended_header => { + format.setIntersection(fmt_pax); + var paxiter = self.paxIterator(); + pax_hdr = .{}; + while (try paxiter.next()) |kv| + try mergePax(kv, &pax_hdr); + + if (hdr.type == .global_extended_header) { + var res = Header{ + .name = hdr.name, + .type = hdr.type, + .fmt = format, + }; + res.merge(pax_hdr); + return res; + } + }, + .gnu_long_name => { + format.setIntersection(fmt_gnu); + gnu_long_name = mem.sliceTo(try self.readBlocks( + @intCast(u64, hdr.size), + &self.name_buf, + ), 0); + }, + .gnu_long_link => { + format.setIntersection(fmt_gnu); + gnu_long_link = mem.sliceTo(try self.readBlocks( + @intCast(u64, hdr.size), + &self.linkname_buf, + ), 0); + }, + + else => { + hdr.merge(pax_hdr); + if (gnu_long_name.len > 0) hdr.name = gnu_long_name; + if (gnu_long_link.len > 0) hdr.linkname = gnu_long_link; + if (hdr.type == .normal2) { + hdr.type = if (mem.endsWith(u8, hdr.name, "/")) + .directory + else + .normal; + } + + // Set the final guess at the format. + if (format.contains(.ustar) and format.contains(.pax)) + format.setIntersection(fmt_ustar); + + hdr.fmt = format; + return hdr; + }, } - }, - .normal => { - if (file_size == 0 and unstripped_file_name.len == 0) return; - const file_name = try stripComponents(unstripped_file_name, options.strip_components); + } + unreachable; + } - if (std.fs.path.dirname(file_name)) |dir_name| { - try dir.makePath(dir_name); + /// resets `outbuf` and then reads from `self.reader` into `outbuf` + /// `size` (aligned forward to 512) bytes. returns + /// UnexpectedEndOfStream if less than 512 bytes are read during a read. + // else returns `outbuf.items[0..size]`. + fn readBlocks( + self: *Self, + size: u64, + outbuf: *std.ArrayListUnmanaged(u8), + ) ![]u8 { + var want = mem.alignForwardGeneric(u64, size, block_len); + outbuf.items.len = 0; + var w = outbuf.writer(self.allocator); + var buf: [block_len]u8 = undefined; + while (want > 0) { + switch (try self.reader.read(&buf)) { + 0 => break, + block_len => try w.writeAll(&buf), + else => return error.UnexpectedEndOfStream, } - var file = try dir.createFile(file_name, .{}); - defer file.close(); + want -= block_len; + } + return outbuf.items[0..@intCast(usize, size)]; + } + + inline fn v7Header(self: Self) *const V7Header { + return @ptrCast(*const V7Header, self.buf); + } + + /// Reads n bytes from reader. Returns the following depending on n: + /// 0: null + /// 512: V7Header. also, if v7.fileType() is an extended header, + /// reads contents into pax_buf + /// else: error.UnexpectedEndOfStream + pub fn nextV7Header(self: *Self) !?*const V7Header { + const amt = try self.reader.read(self.buf); + return switch (amt) { + 0 => null, + block_len => blk: { + const v7 = self.v7Header(); + switch (v7.type) { + .global_extended_header, .extended_header => { + const size = @intCast(u64, try parseOctal(&v7.size)); + self.pax_buf.items = try self.readBlocks(size, &self.pax_buf); + }, + else => {}, + } + break :blk v7; + }, + else => error.UnexpectedEndOfStream, + }; + } + + pub fn paxIterator(self: *Self) PaxIterator { + return .{ .bytes = self.pax_buf.items }; + } + + /// provides a forward only iterator over pax records. pax records have + /// the format: 'len key=value'. calling next() yields single + /// (key, value) entry. + pub const PaxIterator = struct { + bytes: []const u8, + + pub fn next(self: *PaxIterator) !?[2][]const u8 { + if (self.bytes.len == 0) return null; + + const nl = mem.indexOfScalar(u8, self.bytes, '\n') orelse + return null; + const pax_record = self.bytes[0..nl]; + self.bytes = self.bytes[nl + 1 ..]; + assert(pax_record.len != 0); + const sp = mem.indexOfScalar(u8, pax_record, ' ') orelse + return error.Header; + const len = try fmt.parseUnsigned(u32, pax_record[0..sp], 10); + const rec = pax_record[sp + 1 .. len - 1]; + const eqidx = mem.indexOfScalar(u8, rec, '=') orelse + return error.Header; + const key = rec[0..eqidx]; + const val = rec[eqidx + 1 ..]; + const kv = .{ key, val }; + if (!isValidPax(kv)) return error.Header; + return kv; + } + + fn hasNul(s: []const u8) bool { + return mem.indexOfScalar(u8, s, 0) != null; + } + + // reports whether the key-value pair is valid where each + // record is formatted as: + // "%d %s=%s\n" % (size, key, value) + // + // Keys and values should be UTF-8, but the number of bad writers out there + // forces us to be a more liberal. + // Thus, we only reject string keys with NUL, and only reject NULs in values + // for the PAX version of the USTAR string fields. + // The key must not contain an '=' character. + fn isValidPax(kv: [2][]const u8) bool { + const map = std.ComptimeStringMap(void, .{ + .{ Pax.path, {} }, + .{ Pax.linkpath, {} }, + .{ Pax.uname, {} }, + .{ Pax.gname, {} }, + }); + + const k = kv[0]; + const v = kv[1]; + return if (k.len == 0 or mem.indexOfScalar(u8, k, '=') != null) + false + else if (map.get(k) != null) + !hasNul(v) + else + !hasNul(k); + } + }; + + fn allZeroes(self: Self) bool { + return mem.allEqual(u8, self.buf, 0); + } + + /// Populates and returns a validated header record + /// Returns null when one of the following occurs: + /// * Exactly 0 bytes are read and EOF is hit. + /// * Exactly 1 block of zeros is read and EOF is hit. + /// * At least 2 blocks of zeros are read. + pub fn header( + self: *Self, + v7: *const V7Header, + ) !?Header { + if (self.allZeroes()) { + _ = try self.nextV7Header() orelse return null; + if (self.allZeroes()) return null; + return error.Header; + } + // Verify the header matches a known format. + const format = try v7.getFormat(); + + if (format.eql(fmt_unknown)) return error.Header; + + var hdr = Header{ + .type = v7.type, + .name = mem.sliceTo(&v7.file_name, 0), + .linkname = mem.sliceTo(&v7.linked_file_name, 0), + .size = try parseNumeric(&v7.size), + .mode = try parseNumeric(&v7.mode), + .uid = @intCast(i32, try parseNumeric(&v7.uid)), + .gid = @intCast(i32, try parseNumeric(&v7.gid)), + .mtime = unixTime(try parseNumeric(&v7.mod_time), 0), + }; + + var prefix: []const u8 = ""; + + // Unpack format specific fields. + if (format.bits.mask > fmt_v7.bits.mask) { + const ustar = v7.ustar(); + hdr.uname = mem.sliceTo(&ustar.user_name, 0); + hdr.gname = mem.sliceTo(&ustar.group_name, 0); + hdr.dev_major = try parseNumeric(&ustar.dev_major); + hdr.dev_minor = try parseNumeric(&ustar.dev_minor); + + if (format.intersectWith(fmt_ustar_pax).bits.mask != 0) { + hdr.fmt = format; + + prefix = mem.sliceTo(&ustar.filename_prefix, 0); + + // set format = unknown if self.buf has any non-ascii chars + for (self.buf) |c| { + if (!std.ascii.isASCII(c)) { + hdr.fmt = fmt_unknown; + break; + } + } + + // Numeric fields must end in NUL + // set format = unknown if any numeric field isn't 0 terminated + const hasNull = struct { + fn func(s: []const u8) bool { + return s[s.len - 1] == 0; + } + }.func; + if (!(hasNull(&v7.size) and hasNull(&v7.mode) and + hasNull(&v7.uid) and hasNull(&v7.gid) and + hasNull(&v7.mod_time) and hasNull(&ustar.dev_major) and + hasNull(&ustar.dev_minor))) + { + hdr.fmt = fmt_unknown; + } + } else if (format.contains(.star)) { + const star = v7.star(); + prefix = mem.sliceTo(&star.filename_prefix, 0); + hdr.atime = unixTime(try parseNumeric(&star.access_time), 0); + hdr.ctime = unixTime(try parseNumeric(&star.change_time), 0); + } else if (format.contains(.gnu)) { + hdr.fmt = format; + const gnu = v7.gnu(); + + if (gnu.access_time[0] != 0) + hdr.atime = unixTime(try parseNumeric(&gnu.access_time), 0); + + if (gnu.change_time[0] != 0) + hdr.ctime = unixTime(try parseNumeric(&gnu.change_time), 0); + } + if (prefix.len > 0) { + self.name_buf.items.len = 0; + const w = self.name_buf.writer(self.allocator); + _ = try w.write(prefix); + try w.writeByte(fs.path.sep); + _ = try w.write(hdr.name); + // add null terminator after end + const len = self.name_buf.items.len; + try w.writeByte(0); + hdr.name = self.name_buf.items[0..len]; + } + } + return hdr; + } + }; +} - var file_off: usize = 0; +const Pax = struct { + const path = "path"; + const linkpath = "linkpath"; + const size = "size"; + const uid = "uid"; + const gid = "gid"; + const uname = "uname"; + const gname = "gname"; + const mtime = "mtime"; + const atime = "atime"; + const ctime = "ctime"; + + const schily_xattr = "SCHILY.xattr."; + + // Keywords for GNU sparse files in a PAX extended header. + const Gnu = struct { + const sparse = "GNU.sparse."; + const sparse_num_blocks = "GNU.sparse.numblocks"; + const sparse_offset = "GNU.sparse.offset"; + const sparse_num_bytes = "GNU.sparse.numbytes"; + const sparse_map = "GNU.sparse.map"; + const sparse_name = "GNU.sparse.name"; + const sparse_major = "GNU.sparse.major"; + const sparse_minor = "GNU.sparse.minor"; + const sparse_size = "GNU.sparse.size"; + const sparse_real_size = "GNU.sparse.realsize"; + }; +}; + +/// return the most significant, 'top' half of the time as an i64 +fn truncateTime(t: i128) i64 { + return @truncate(i64, t >> 64); +} + +const is_windows = builtin.os.tag == .windows; + +fn setFileProperties(file: fs.File, header: Header, options: Options) !void { + comptime assert(Header.fieldDefault(Header.structField(.atime)) == -1); + comptime assert(Header.fieldDefault(Header.structField(.ctime)) == -1); + // TODO not sure 'now' is correct if time is set to its default value. and + // also, maybe this logic should be moved elsewhere. maybe 'fn header()' + const atime = if (header.atime == -1) time.nanoTimestamp() else header.atime; + const mtime = if (header.mtime == -1) time.nanoTimestamp() else header.mtime; + if (is_windows) + // workaround for 'panic: integer cast truncated bits' from + // file.updateTimes() + try file.updateTimes(truncateTime(atime), truncateTime(mtime)) + else + try file.updateTimes(atime, mtime); + + if (options.mode_mode == .executable_bit_only) { + if (std.fs.has_executable_bit) { + // TODO - not sure using file.mode() is correct but it seems to + // match gnu tar behavior on linux while using + // header.mode does not + const mode = try file.mode(); // header.mode + var modebits = std.StaticBitSet(32){ .mask = @intCast(u32, mode) }; + // copy the user exe bit to the group and other exe bits + // these bit indices count from the right: + // u g o + // rwx rwx rwx + // 876_543_210 + // 0b000_000_000 + const has_owner_exe_bit = modebits.isSet(6); + modebits.setValue(3, has_owner_exe_bit); + modebits.setValue(0, has_owner_exe_bit); + log.debug("mode old={o} new={o}", .{ mode, modebits.mask }); + try file.chmod(modebits.mask); + } + } +} + +fn setDirProperties(dir: fs.Dir, header: Header, options: Options) !void { + // FIXME: creating a File from a Dir.fs is incorrect on non-posix systems. + // this is an attempt to re-use setFileProperties() that doesn't work on windows: + // return setFileProperties(fs.File{ .handle = dir.fd }, header, options); + + // TODO implement once https://github.com/ziglang/zig/issues/12377 is solved + // see https://github.com/ziglang/zig/pull/15382#issuecomment-1519136452 + _ = options; + _ = header; + _ = dir; +} + +fn makeOpenPath(dir: fs.Dir, sub_path: []const u8, header: Header, options: Options) !fs.Dir { + var subdir = try dir.makeOpenPath(sub_path, .{}); + try setDirProperties(subdir, header, options); + return subdir; +} + +fn makeSymLink(dir: fs.Dir, target_path: []const u8, symlink_path: []const u8) !void { + // handle dangling symlinks (where target_path doesn't yet exist) by setting + // is_directory = false; + const is_directory = blk: { + const file = dir.openFile(target_path, .{}) catch |e| switch (e) { + error.FileNotFound => break :blk false, + else => return e, + }; + defer file.close(); + const stat = try file.stat(); + break :blk stat.kind == .Directory; + }; + try dir.symLink(target_path, symlink_path, .{ .is_directory = is_directory }); +} + +pub const Options = struct { + /// Number of directory levels to skip when extracting files. + strip_components: u32 = 0, + /// How to handle the "mode" property of files from within the tar file. + mode_mode: ModeMode = .executable_bit_only, + + const ModeMode = enum { + /// The mode from the tar file is completely ignored. Files are created + /// with the default mode when creating files. + ignore, + /// The mode from the tar file is inspected for the owner executable bit + /// only. This bit is copied to the group and other executable bits. + /// Other bits of the mode are left as the default when creating files. + executable_bit_only, + }; +}; + +/// reads tar file contents from `reader` and writes files to `dir`. `allocator` +/// used for potentially long file names when: +/// 1. gnu_long_name or gnu_long_link are present +/// 2. a prefixed filename is used +pub fn pipeToFileSystem( + allocator: mem.Allocator, + dir: fs.Dir, + reader: anytype, + options: Options, +) !void { + var format = FormatSet.initMany(&.{ .ustar, .pax, .gnu }); + var buf: [block_len]u8 = undefined; + var iter = headerIterator(reader, &buf, allocator); + defer iter.deinit(); + + while (try iter.next()) |header| { + const file_name = try stripComponents(header.name, options.strip_components); + log.info("pipeToFileSystem() header.type={s} stripped file_name={s}", .{ @tagName(header.type), file_name }); + if (file_name.len == 0 and + FileType.named_types_bitset.isSet(@enumToInt(header.type))) + continue; + + switch (header.type) { + .directory => { + var subdir = try makeOpenPath(dir, file_name, header, options); + subdir.close(); + }, + .normal, .normal2 => { + var file = try if (fs.path.dirname(file_name)) |sub_path| blk: { + var subdir = try makeOpenPath(dir, sub_path, header, options); + defer subdir.close(); + const basename = file_name[sub_path.len + 1 ..]; + break :blk subdir.createFile(basename, .{}); + } else dir.createFile(file_name, .{}); + defer file.close(); + const want = mem.alignForwardGeneric(u64, @intCast(u64, header.size), block_len); + var lim_reader = std.io.limitedReader(reader, want); + var bytes_left = @intCast(usize, header.size); while (true) { - if (buffer.len - start < 1024) { - const dest_end = end - start; - @memcpy(buffer[0..dest_end], buffer[start..end]); - end = dest_end; - start = 0; + const amt = try lim_reader.read(iter.buf); + switch (amt) { + 0 => break, + block_len => {}, + else => return error.UnexpectedEndOfStream, + } + _ = try file.write(iter.buf[0..std.math.min(bytes_left, block_len)]); + bytes_left -|= block_len; + } + + try setFileProperties(file, header, options); + }, + .symbolic_link => { + if (fs.path.dirname(file_name)) |sub_path| { + const basename = file_name[sub_path.len + 1 ..]; + var subdir = try makeOpenPath(dir, sub_path, header, options); + defer subdir.close(); + log.debug("sub_path={s} basename={s}", .{ sub_path, basename }); + if (is_windows or builtin.os.tag == .wasi) { + // TODO - symlinks on windows / wasi. windows will fail unless run as admin + } else { + try makeSymLink(subdir, header.linkname, basename); } - // Ask for the rounded up file size + 512 for the next header. - // TODO: https://github.com/ziglang/zig/issues/14039 - const ask = @as(usize, @intCast(@min( - buffer.len - end, - rounded_file_size + 512 - file_off -| (end - start), - ))); - end += try reader.readAtLeast(buffer[end..], ask); - if (end - start < ask) return error.UnexpectedEndOfStream; - // TODO: https://github.com/ziglang/zig/issues/14039 - const slice = buffer[start..@as(usize, @intCast(@min(file_size - file_off + start, end)))]; - try file.writeAll(slice); - file_off += slice.len; - start += slice.len; - if (file_off >= file_size) { - start += pad_len; - // Guaranteed since we use a buffer divisible by 512. - assert(start <= end); - continue :header; + } else { + if (is_windows or builtin.os.tag == .wasi) { + // TODO - symlinks on windows / wasi. windows will fail unless run as admin + } else { + try makeSymLink(dir, header.linkname, file_name); } } }, + .hard_link => { + if (fs.path.dirname(file_name)) |sub_path| { + const basename = file_name[sub_path.len + 1 ..]; + var subdir = try makeOpenPath(dir, sub_path, header, options); + defer subdir.close(); + try subdir.copyFile(header.linkname, subdir, basename, .{}); + } else { + try dir.copyFile(header.linkname, dir, file_name, .{}); + } + }, .global_extended_header, .extended_header => { - if (start + rounded_file_size > end) return error.TarHeadersTooBig; - start = @as(usize, @intCast(start + rounded_file_size)); + format.setIntersection(fmt_pax); + }, + else => { + log.err("unsupported type '{s}'", .{@tagName(header.type)}); + return error.TarUnexpectedFileType; }, - .hard_link => return error.TarUnsupportedFileType, - .symbolic_link => return error.TarUnsupportedFileType, - else => return error.TarUnsupportedFileType, } } } @@ -180,9 +1173,10 @@ fn stripComponents(path: []const u8, count: u32) ![]const u8 { var i: usize = 0; var c = count; while (c > 0) : (c -= 1) { - if (std.mem.indexOfScalarPos(u8, path, i, '/')) |pos| { + if (mem.indexOfScalarPos(u8, path, i, '/')) |pos| { i = pos + 1; } else { + log.err("stripComponents() invalid path={s} with count={}\n", .{ path, count }); return error.TarComponentsOutsideStrippedPrefix; } } @@ -196,5 +1190,11 @@ test stripComponents { try expectEqualStrings("c", try stripComponents("a/b/c", 2)); } -const std = @import("std.zig"); +const std = @import("std"); const assert = std.debug.assert; +const mem = std.mem; +const fs = std.fs; +const fmt = std.fmt; +const log = std.log; +const time = std.time; +const builtin = @import("builtin"); diff --git a/src/Package.zig b/src/Package.zig index 2e1dd4e14f1e..13e1469d2ecd 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -600,7 +600,7 @@ fn unpackTarball( var decompress = try compression.decompress(gpa, br.reader()); defer decompress.deinit(); - try std.tar.pipeToFileSystem(out_dir, decompress.reader(), .{ + try std.tar.pipeToFileSystem(gpa, out_dir, decompress.reader(), .{ .strip_components = 1, // TODO: we would like to set this to executable_bit_only, but two // things need to happen before that: From df0829d7348a5e46b81e75ebcaca740c8828d81b Mon Sep 17 00:00:00 2001 From: Travis Staloch Date: Tue, 2 May 2023 03:47:51 -0700 Subject: [PATCH 2/6] std.tar: fix fuzzing crashes part 1 this patch converts all the crashes submitted by @squeek502 (in https://github.com/ziglang/zig/pull/15382#issuecomment-1531070661) to errors. HeaderIterator: * add bounds checks to PaxIterator.next() * convert several unsafe int casts to safe ones misc: * added FileType.tagName() for debugging which returns null for unnamed enum values * make unixTime() fallible --- lib/std/compress/tar/reader_test.zig | 110 +++++++++++----------- lib/std/compress/tar/test_common.zig | 1 + lib/std/tar.zig | 136 +++++++++++++++------------ 3 files changed, 131 insertions(+), 116 deletions(-) diff --git a/lib/std/compress/tar/reader_test.zig b/lib/std/compress/tar/reader_test.zig index 27d14cef9505..8432f2ab26ff 100644 --- a/lib/std/compress/tar/reader_test.zig +++ b/lib/std/compress/tar/reader_test.zig @@ -40,7 +40,7 @@ test "std.tar validate testdata headers" { .uid = 73025, .gid = 5000, .size = 5, - .mtime = unixTime(1244428340, 0), + .mtime = try unixTime(1244428340, 0), .type = .normal, .uname = "dsymonds", .gname = "eng", @@ -51,7 +51,7 @@ test "std.tar validate testdata headers" { .uid = 73025, .gid = 5000, .size = 11, - .mtime = unixTime(1244436044, 0), + .mtime = try unixTime(1244436044, 0), .type = .normal, .uname = "dsymonds", .gname = "eng", @@ -71,7 +71,7 @@ test "std.tar validate testdata headers" { .uid = 1000, .gid = 1000, .size = 200, - .mtime = unixTime(1392395740, 0), + .mtime = try unixTime(1392395740, 0), .type = @intToEnum(FileType, 0x53), .linkname = "", .uname = "david", @@ -85,7 +85,7 @@ test "std.tar validate testdata headers" { .uid = 1000, .gid = 1000, .size = 200, - .mtime = unixTime(1392342187, 0), + .mtime = try unixTime(1392342187, 0), .type = @intToEnum(FileType, 0x30), .linkname = "", .uname = "david", @@ -104,7 +104,7 @@ test "std.tar validate testdata headers" { .uid = 1000, .gid = 1000, .size = 200, - .mtime = unixTime(1392340456, 0), + .mtime = try unixTime(1392340456, 0), .type = @intToEnum(FileType, 0x30), .linkname = "", .uname = "david", @@ -124,7 +124,7 @@ test "std.tar validate testdata headers" { .uid = 1000, .gid = 1000, .size = 200, - .mtime = unixTime(1392337404, 0), + .mtime = try unixTime(1392337404, 0), .type = @intToEnum(FileType, 0x30), .linkname = "", .uname = "david", @@ -144,7 +144,7 @@ test "std.tar validate testdata headers" { .uid = 1000, .gid = 1000, .size = 4, - .mtime = unixTime(1392398319, 0), + .mtime = try unixTime(1392398319, 0), .type = @intToEnum(FileType, 0x30), .linkname = "", .uname = "david", @@ -169,24 +169,24 @@ test "std.tar validate testdata headers" { .uid = 73025, .gid = 5000, .size = 5, - .mtime = unixTime(1244592783, 0), + .mtime = try unixTime(1244592783, 0), .type = .normal, .uname = "dsymonds", .gname = "eng", - .atime = unixTime(1244592783, 0), - .ctime = unixTime(1244592783, 0), + .atime = try unixTime(1244592783, 0), + .ctime = try unixTime(1244592783, 0), }, .{ .name = "small2.txt", .mode = 0o640, .uid = 73025, .gid = 5000, .size = 11, - .mtime = unixTime(1244592783, 0), + .mtime = try unixTime(1244592783, 0), .type = .normal, .uname = "dsymonds", .gname = "eng", - .atime = unixTime(1244592783, 0), - .ctime = unixTime(1244592783, 0), + .atime = try unixTime(1244592783, 0), + .ctime = try unixTime(1244592783, 0), } }, }, .{ @@ -197,7 +197,7 @@ test "std.tar validate testdata headers" { .uid = 73025, .gid = 5000, .size = 5, - .mtime = unixTime(1244593104, 0), + .mtime = try unixTime(1244593104, 0), .type = .normal, }, .{ .name = "small2.txt", @@ -205,7 +205,7 @@ test "std.tar validate testdata headers" { .uid = 73025, .gid = 5000, .size = 11, - .mtime = unixTime(1244593104, 0), + .mtime = try unixTime(1244593104, 0), .type = .normal, } }, }, @@ -219,9 +219,9 @@ test "std.tar validate testdata headers" { .uname = "shane", .gname = "shane", .size = 7, - .mtime = unixTime(1350244992, 23960108), - .ctime = unixTime(1350244992, 23960108), - .atime = unixTime(1350244992, 23960108), + .mtime = try unixTime(1350244992, 23960108), + .ctime = try unixTime(1350244992, 23960108), + .atime = try unixTime(1350244992, 23960108), .type = .normal, .pax_recs = &.{ "path", "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", @@ -238,9 +238,9 @@ test "std.tar validate testdata headers" { .uname = "shane", .gname = "shane", .size = 0, - .mtime = unixTime(1350266320, 910238425), - .ctime = unixTime(1350266320, 910238425), - .atime = unixTime(1350266320, 910238425), + .mtime = try unixTime(1350266320, 910238425), + .ctime = try unixTime(1350266320, 910238425), + .atime = try unixTime(1350266320, 910238425), .type = .symbolic_link, .linkname = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", .pax_recs = &.{ @@ -268,7 +268,7 @@ test "std.tar validate testdata headers" { .uid = 319973, .gid = 5000, .size = 999, - .mtime = unixTime(1442282516, 0), + .mtime = try unixTime(1442282516, 0), .type = .normal, .uname = "joetsai", .gname = "eng", @@ -287,7 +287,7 @@ test "std.tar validate testdata headers" { .type = .normal, .name = "file", .uname = str_long_x10, - .mtime = unixTime(0, 0), + .mtime = try unixTime(0, 0), .pax_recs = &.{ "GOLANG.pkg", "tar", "comment", "Hello, 世界", @@ -306,13 +306,13 @@ test "std.tar validate testdata headers" { }, .{ .type = .normal, .name = "file1", - .mtime = unixTime(0, 0), + .mtime = try unixTime(0, 0), .fmt = FormatSet.initOne(.ustar), }, .{ .type = .normal, .name = "file2", .pax_recs = &.{ "path", "file2" }, - .mtime = unixTime(0, 0), + .mtime = try unixTime(0, 0), .fmt = FormatSet.initOne(.pax), }, .{ .type = .global_extended_header, @@ -322,12 +322,12 @@ test "std.tar validate testdata headers" { }, .{ .type = .normal, .name = "file3", - .mtime = unixTime(0, 0), + .mtime = try unixTime(0, 0), .fmt = FormatSet.initOne(.ustar), }, .{ .type = .normal, .name = "file4", - .mtime = unixTime(1400000000, 0), + .mtime = try unixTime(1400000000, 0), .pax_recs = &.{ "mtime", "1400000000" }, .fmt = FormatSet.initOne(.pax), } }, @@ -340,7 +340,7 @@ test "std.tar validate testdata headers" { .uid = 0, .gid = 0, .size = 14, - .mtime = unixTime(1365454838, 0), + .mtime = try unixTime(1365454838, 0), .type = .normal, .linkname = "", .uname = "eyefi", @@ -359,12 +359,12 @@ test "std.tar validate testdata headers" { .uid = 1000, .gid = 10, .size = 5, - .mtime = unixTime(1386065770, 448252320), + .mtime = try unixTime(1386065770, 448252320), .type = .normal, .uname = "alex", .gname = "wheel", - .atime = unixTime(1389782991, 419875220), - .ctime = unixTime(1389782956, 794414986), + .atime = try unixTime(1389782991, 419875220), + .ctime = try unixTime(1389782956, 794414986), .pax_recs = &.{ "user.key", "value", "user.key2", "value2", @@ -385,12 +385,12 @@ test "std.tar validate testdata headers" { .uid = 1000, .gid = 10, .size = 11, - .mtime = unixTime(1386065770, 449252304), + .mtime = try unixTime(1386065770, 449252304), .type = .normal, .uname = "alex", .gname = "wheel", - .atime = unixTime(1389782991, 419875220), - .ctime = unixTime(1386065770, 449252304), + .atime = try unixTime(1389782991, 419875220), + .ctime = try unixTime(1386065770, 449252304), .pax_recs = &.{ "security.selinux", ".unconfined_u=.object_r=.default_t=s0\x00", "mtime", "1386065770.449252304", @@ -408,7 +408,7 @@ test "std.tar validate testdata headers" { .headers = &.{.{ .name = "GNU2/GNU2/long-path-name", .linkname = "GNU4/GNU4/long-linkpath-name", - .mtime = unixTime(0, 0), + .mtime = try unixTime(0, 0), .type = .symbolic_link, .fmt = FormatSet.initOne(.gnu), }}, @@ -425,12 +425,12 @@ test "std.tar validate testdata headers" { .uid = 1000, .gid = 1000, .size = 14, - .mtime = unixTime(1441973427, 0), + .mtime = try unixTime(1441973427, 0), .type = @intToEnum(FileType, 'D'), .uname = "rawr", .gname = "dsnet", - .atime = unixTime(1441974501, 0), - .ctime = unixTime(1441973436, 0), + .atime = try unixTime(1441974501, 0), + .ctime = try unixTime(1441973436, 0), .fmt = FormatSet.initOne(.gnu), }, .{ .name = "test2/foo", @@ -438,12 +438,12 @@ test "std.tar validate testdata headers" { .uid = 1000, .gid = 1000, .size = 64, - .mtime = unixTime(1441973363, 0), + .mtime = try unixTime(1441973363, 0), .type = .normal, .uname = "rawr", .gname = "dsnet", - .atime = unixTime(1441974501, 0), - .ctime = unixTime(1441973436, 0), + .atime = try unixTime(1441974501, 0), + .ctime = try unixTime(1441973436, 0), .fmt = FormatSet.initOne(.gnu), }, .{ .name = "test2/sparse", @@ -451,12 +451,12 @@ test "std.tar validate testdata headers" { .uid = 1000, .gid = 1000, .size = 536870912, - .mtime = unixTime(1441973427, 0), + .mtime = try unixTime(1441973427, 0), .type = @intToEnum(FileType, 'S'), .uname = "rawr", .gname = "dsnet", - .atime = unixTime(1441991948, 0), - .ctime = unixTime(1441973436, 0), + .atime = try unixTime(1441991948, 0), + .ctime = try unixTime(1441973436, 0), .fmt = FormatSet.initOne(.gnu), } }, }, @@ -466,7 +466,7 @@ test "std.tar validate testdata headers" { .headers = &.{.{ .name = "bar", .linkname = "PAX4/PAX4/long-linkpath-name", - .mtime = unixTime(0, 0), + .mtime = try unixTime(0, 0), .type = @intToEnum(tar.FileType, '2'), .pax_recs = &.{ "linkpath", "PAX4/PAX4/long-linkpath-name", @@ -484,7 +484,7 @@ test "std.tar validate testdata headers" { .mode = 0o644, .uid = 1000, .gid = 1000, - .mtime = unixTime(1486082191, 0), + .mtime = try unixTime(1486082191, 0), .type = .normal, .uname = "rawr", .gname = "dsnet", @@ -503,7 +503,7 @@ test "std.tar validate testdata headers" { .mode = 0o644, .uid = 1000, .gid = 1000, - .mtime = unixTime(0, 0), + .mtime = try unixTime(0, 0), .type = .normal, .uname = "☺", .gname = "⚹", @@ -522,7 +522,7 @@ test "std.tar validate testdata headers" { .mode = 0o644, .uid = 1000, .gid = 1000, - .mtime = unixTime(0, 0), + .mtime = try unixTime(0, 0), .type = .normal, .uname = "rawr", .gname = "dsnet", @@ -568,7 +568,7 @@ test "std.tar validate testdata headers" { .headers = &.{.{ .name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo", .uid = 0o10000000, - .mtime = unixTime(0, 0), + .mtime = try unixTime(0, 0), .type = .normal, }}, }, @@ -579,7 +579,7 @@ test "std.tar validate testdata headers" { .name = "file", .mode = 0o644, .type = .normal, - .mtime = unixTime(0, 0), + .mtime = try unixTime(0, 0), .dev_major = 1, .dev_minor = 1, .fmt = FormatSet.initOne(.ustar), @@ -592,7 +592,7 @@ test "std.tar validate testdata headers" { .name = "sparse.db", .type = .gnu_sparse, .size = 1000, - .mtime = unixTime(0, 0), + .mtime = try unixTime(0, 0), .fmt = FormatSet.initOne(.gnu), }}, }, @@ -604,7 +604,7 @@ test "std.tar validate testdata headers" { .name = "sparse.db", .type = .gnu_sparse, .size = 1000, - .mtime = unixTime(0, 0), + .mtime = try unixTime(0, 0), .fmt = FormatSet.initOne(.gnu), }}, }, @@ -616,7 +616,7 @@ test "std.tar validate testdata headers" { .name = "sparse.db", .type = .normal, .size = 1000, - .mtime = unixTime(0, 0), + .mtime = try unixTime(0, 0), .pax_recs = &.{ "size", "1512", "GNU.sparse.major", "1", @@ -635,7 +635,7 @@ test "std.tar validate testdata headers" { .name = "sparse.db", .type = .normal, .size = 1000, - .mtime = unixTime(0, 0), + .mtime = try unixTime(0, 0), .pax_recs = &.{ "size", "512", "GNU.sparse.major", "1", @@ -651,7 +651,7 @@ test "std.tar validate testdata headers" { .headers = &.{.{ .type = .directory, .name = one_to_nine_slash_x30, - .mtime = unixTime(0, 0), + .mtime = try unixTime(0, 0), .pax_recs = &.{ "path", one_to_nine_slash_x30 }, .fmt = FormatSet.initOne(.pax), }}, diff --git a/lib/std/compress/tar/test_common.zig b/lib/std/compress/tar/test_common.zig index 5a5abd412db4..3b3eabe6b6e6 100644 --- a/lib/std/compress/tar/test_common.zig +++ b/lib/std/compress/tar/test_common.zig @@ -1,4 +1,5 @@ const std = @import("std"); + /// testing helper for decompressing a .gz file. returns an io.fixedBufferStream /// with the decompressed data. caller owns the returned FixedBufferStream.buffer pub fn decompressGz( diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 8a67b0cb89c1..2963b42e846c 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -36,6 +36,12 @@ pub const FileType = enum(u8) { result.set(@enumToInt(ft)); break :blk result; }; + + pub fn tagName(ft: FileType) ?[]const u8 { + return inline for (std.meta.fields(FileType)) |f| { + if (@enumToInt(ft) == f.value) break f.name; + } else null; + } }; fn parseOctal(raw: []const u8) !i64 { @@ -157,7 +163,7 @@ fn parsePaxTime(s: []const u8) !i128 { const secs = try fmt.parseInt(i64, ss, 10); const sn = parts[1]; - if (sn.len == 0) return unixTime(secs, 0); + if (sn.len == 0) return try unixTime(secs, 0); const all_digits = for (sn) |c| { if (!std.ascii.isDigit(c)) break false; @@ -173,9 +179,9 @@ fn parsePaxTime(s: []const u8) !i128 { log.debug("parsePaxTime secs={} nsecs={} sn.len={}", .{ secs, nsecs, sn.len }); return if (ss.len > 0 and ss[0] == '-') - unixTime(secs, -nsecs) + try unixTime(secs, -nsecs) else - unixTime(secs, nsecs); + try unixTime(secs, nsecs); } test parsePaxTime { @@ -185,53 +191,53 @@ test parsePaxTime { }; const cases = [_]TestCase{ - .{ "1350244992.023960108", unixTime(1350244992, 23960108) }, - .{ "1350244992.02396010", unixTime(1350244992, 23960100) }, - .{ "1350244992.0239601089", unixTime(1350244992, 23960108) }, - .{ "1350244992.3", unixTime(1350244992, 300000000) }, - .{ "1350244992", unixTime(1350244992, 0) }, - .{ "-1.000000001", unixTime(-1, -1e0 + 0e0) }, - .{ "-1.000001", unixTime(-1, -1e3 + 0e0) }, - .{ "-1.001000", unixTime(-1, -1e6 + 0e0) }, - .{ "-1", unixTime(-1, -0e0 + 0e0) }, - .{ "-1.999000", unixTime(-1, -1e9 + 1e6) }, - .{ "-1.999999", unixTime(-1, -1e9 + 1e3) }, - .{ "-1.999999999", unixTime(-1, -1e9 + 1e0) }, - .{ "0.000000001", unixTime(0, 1e0 + 0e0) }, - .{ "0.000001", unixTime(0, 1e3 + 0e0) }, - .{ "0.001000", unixTime(0, 1e6 + 0e0) }, - .{ "0", unixTime(0, 0e0) }, - .{ "0.999000", unixTime(0, 1e9 - 1e6) }, - .{ "0.999999", unixTime(0, 1e9 - 1e3) }, - .{ "0.999999999", unixTime(0, 1e9 - 1e0) }, - .{ "1.000000001", unixTime(1, 1e0 - 0e0) }, - .{ "1.000001", unixTime(1, 1e3 - 0e0) }, - .{ "1.001000", unixTime(1, 1e6 - 0e0) }, - .{ "1", unixTime(1, 0e0 - 0e0) }, - .{ "1.999000", unixTime(1, 1e9 - 1e6) }, - .{ "1.999999", unixTime(1, 1e9 - 1e3) }, - .{ "1.999999999", unixTime(1, 1e9 - 1e0) }, - .{ "-1350244992.023960108", unixTime(-1350244992, -23960108) }, - .{ "-1350244992.02396010", unixTime(-1350244992, -23960100) }, - .{ "-1350244992.0239601089", unixTime(-1350244992, -23960108) }, - .{ "-1350244992.3", unixTime(-1350244992, -300000000) }, - .{ "-1350244992", unixTime(-1350244992, 0) }, + .{ "1350244992.023960108", try unixTime(1350244992, 23960108) }, + .{ "1350244992.02396010", try unixTime(1350244992, 23960100) }, + .{ "1350244992.0239601089", try unixTime(1350244992, 23960108) }, + .{ "1350244992.3", try unixTime(1350244992, 300000000) }, + .{ "1350244992", try unixTime(1350244992, 0) }, + .{ "-1.000000001", try unixTime(-1, -1e0 + 0e0) }, + .{ "-1.000001", try unixTime(-1, -1e3 + 0e0) }, + .{ "-1.001000", try unixTime(-1, -1e6 + 0e0) }, + .{ "-1", try unixTime(-1, -0e0 + 0e0) }, + .{ "-1.999000", try unixTime(-1, -1e9 + 1e6) }, + .{ "-1.999999", try unixTime(-1, -1e9 + 1e3) }, + .{ "-1.999999999", try unixTime(-1, -1e9 + 1e0) }, + .{ "0.000000001", try unixTime(0, 1e0 + 0e0) }, + .{ "0.000001", try unixTime(0, 1e3 + 0e0) }, + .{ "0.001000", try unixTime(0, 1e6 + 0e0) }, + .{ "0", try unixTime(0, 0e0) }, + .{ "0.999000", try unixTime(0, 1e9 - 1e6) }, + .{ "0.999999", try unixTime(0, 1e9 - 1e3) }, + .{ "0.999999999", try unixTime(0, 1e9 - 1e0) }, + .{ "1.000000001", try unixTime(1, 1e0 - 0e0) }, + .{ "1.000001", try unixTime(1, 1e3 - 0e0) }, + .{ "1.001000", try unixTime(1, 1e6 - 0e0) }, + .{ "1", try unixTime(1, 0e0 - 0e0) }, + .{ "1.999000", try unixTime(1, 1e9 - 1e6) }, + .{ "1.999999", try unixTime(1, 1e9 - 1e3) }, + .{ "1.999999999", try unixTime(1, 1e9 - 1e0) }, + .{ "-1350244992.023960108", try unixTime(-1350244992, -23960108) }, + .{ "-1350244992.02396010", try unixTime(-1350244992, -23960100) }, + .{ "-1350244992.0239601089", try unixTime(-1350244992, -23960108) }, + .{ "-1350244992.3", try unixTime(-1350244992, -300000000) }, + .{ "-1350244992", try unixTime(-1350244992, 0) }, .{ "", error.InvalidCharacter }, - .{ "0", unixTime(0, 0) }, - .{ "1.", unixTime(1, 0) }, - .{ "0.0", unixTime(0, 0) }, + .{ "0", try unixTime(0, 0) }, + .{ "1.", try unixTime(1, 0) }, + .{ "0.0", try unixTime(0, 0) }, .{ ".5", error.InvalidCharacter }, - .{ "-1.3", unixTime(-1, -3e8) }, - .{ "-1.0", unixTime(-1, -0e0) }, - .{ "-0.0", unixTime(-0, -0e0) }, - .{ "-0.1", unixTime(-0, -1e8) }, - .{ "-0.01", unixTime(-0, -1e7) }, - .{ "-0.99", unixTime(-0, -99e7) }, - .{ "-0.98", unixTime(-0, -98e7) }, - .{ "-1.1", unixTime(-1, -1e8) }, - .{ "-1.01", unixTime(-1, -1e7) }, - .{ "-2.99", unixTime(-2, -99e7) }, - .{ "-5.98", unixTime(-5, -98e7) }, + .{ "-1.3", try unixTime(-1, -3e8) }, + .{ "-1.0", try unixTime(-1, -0e0) }, + .{ "-0.0", try unixTime(-0, -0e0) }, + .{ "-0.1", try unixTime(-0, -1e8) }, + .{ "-0.01", try unixTime(-0, -1e7) }, + .{ "-0.99", try unixTime(-0, -99e7) }, + .{ "-0.98", try unixTime(-0, -98e7) }, + .{ "-1.1", try unixTime(-1, -1e8) }, + .{ "-1.01", try unixTime(-1, -1e7) }, + .{ "-2.99", try unixTime(-2, -99e7) }, + .{ "-5.98", try unixTime(-5, -98e7) }, .{ "-", error.InvalidCharacter }, .{ "+", error.InvalidCharacter }, .{ "-1.-1", error.InvalidCharacter }, @@ -488,8 +494,11 @@ pub const Header = struct { } }; -pub fn unixTime(tv_sec: i64, tv_nsec: i64) i128 { - const result = @bitCast(i128, [_]i64{ tv_sec * time.ns_per_s, tv_nsec }); +pub fn unixTime(tv_sec: i64, tv_nsec: i64) !i128 { + const result = @bitCast(i128, [_]i64{ + try std.math.mul(i64, tv_sec, time.ns_per_s), + tv_nsec, + }); return result; } @@ -759,6 +768,7 @@ pub fn HeaderIterator(comptime Reader: type) type { } want -= block_len; } + if (want != 0) return error.UnexpectedEndOfStream; return outbuf.items[0..@intCast(usize, size)]; } @@ -779,7 +789,8 @@ pub fn HeaderIterator(comptime Reader: type) type { const v7 = self.v7Header(); switch (v7.type) { .global_extended_header, .extended_header => { - const size = @intCast(u64, try parseOctal(&v7.size)); + const size = std.math.cast(u64, try parseOctal(&v7.size)) orelse + return error.Header; self.pax_buf.items = try self.readBlocks(size, &self.pax_buf); }, else => {}, @@ -807,10 +818,11 @@ pub fn HeaderIterator(comptime Reader: type) type { return null; const pax_record = self.bytes[0..nl]; self.bytes = self.bytes[nl + 1 ..]; - assert(pax_record.len != 0); + if (pax_record.len == 0) return error.Header; const sp = mem.indexOfScalar(u8, pax_record, ' ') orelse return error.Header; const len = try fmt.parseUnsigned(u32, pax_record[0..sp], 10); + if (len > pax_record.len + 1 or sp + 2 > len) return error.Header; const rec = pax_record[sp + 1 .. len - 1]; const eqidx = mem.indexOfScalar(u8, rec, '=') orelse return error.Header; @@ -882,9 +894,11 @@ pub fn HeaderIterator(comptime Reader: type) type { .linkname = mem.sliceTo(&v7.linked_file_name, 0), .size = try parseNumeric(&v7.size), .mode = try parseNumeric(&v7.mode), - .uid = @intCast(i32, try parseNumeric(&v7.uid)), - .gid = @intCast(i32, try parseNumeric(&v7.gid)), - .mtime = unixTime(try parseNumeric(&v7.mod_time), 0), + .uid = std.math.cast(i32, try parseNumeric(&v7.uid)) orelse + return error.Header, + .gid = std.math.cast(i32, try parseNumeric(&v7.gid)) orelse + return error.Header, + .mtime = try unixTime(try parseNumeric(&v7.mod_time), 0), }; var prefix: []const u8 = ""; @@ -927,17 +941,17 @@ pub fn HeaderIterator(comptime Reader: type) type { } else if (format.contains(.star)) { const star = v7.star(); prefix = mem.sliceTo(&star.filename_prefix, 0); - hdr.atime = unixTime(try parseNumeric(&star.access_time), 0); - hdr.ctime = unixTime(try parseNumeric(&star.change_time), 0); + hdr.atime = try unixTime(try parseNumeric(&star.access_time), 0); + hdr.ctime = try unixTime(try parseNumeric(&star.change_time), 0); } else if (format.contains(.gnu)) { hdr.fmt = format; const gnu = v7.gnu(); if (gnu.access_time[0] != 0) - hdr.atime = unixTime(try parseNumeric(&gnu.access_time), 0); + hdr.atime = try unixTime(try parseNumeric(&gnu.access_time), 0); if (gnu.change_time[0] != 0) - hdr.ctime = unixTime(try parseNumeric(&gnu.change_time), 0); + hdr.ctime = try unixTime(try parseNumeric(&gnu.change_time), 0); } if (prefix.len > 0) { self.name_buf.items.len = 0; @@ -1095,7 +1109,7 @@ pub fn pipeToFileSystem( while (try iter.next()) |header| { const file_name = try stripComponents(header.name, options.strip_components); - log.info("pipeToFileSystem() header.type={s} stripped file_name={s}", .{ @tagName(header.type), file_name }); + log.info("pipeToFileSystem() header.type={?s} stripped file_name={s}", .{ header.type.tagName(), file_name }); if (file_name.len == 0 and FileType.named_types_bitset.isSet(@enumToInt(header.type))) continue; @@ -1162,7 +1176,7 @@ pub fn pipeToFileSystem( format.setIntersection(fmt_pax); }, else => { - log.err("unsupported type '{s}'", .{@tagName(header.type)}); + log.err("unsupported type '{?s}':{}", .{ header.type.tagName(), @enumToInt(header.type) }); return error.TarUnexpectedFileType; }, } From 52ef4ce331ba6fda2d760011e307ac7d7a4d93fe Mon Sep 17 00:00:00 2001 From: Travis Staloch Date: Tue, 2 May 2023 18:32:52 -0700 Subject: [PATCH 3/6] std.tar: fix fuzzing crashes part 2 prevent crashes and return errors when: * any 'named type' header's file path contains a `NUL` character. * prevent std.bit_set assertion failure when `header.type` is outside the bounds of FileType.named_types_bitset. --- lib/std/tar.zig | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 2963b42e846c..9aa90895275e 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -26,8 +26,10 @@ pub const FileType = enum(u8) { pub const sentinel = @intToEnum(FileType, 0xff); + pub const NamedTypesBitset = std.StaticBitSet(128); + pub const named_types_bitset = blk: { - var result = std.StaticBitSet(128).initEmpty(); + var result = NamedTypesBitset.initEmpty(); for ([_]FileType{ .directory, .normal, .normal2, .hard_link, .symbolic_link, .character_special, .block_special, .fifo, @@ -37,6 +39,14 @@ pub const FileType = enum(u8) { break :blk result; }; + pub fn isNamedType(ft: FileType) bool { + return + // verify not beyond NamedTypesBitset.bit_length to avoid assertion + // failure in std.bit_set + @enumToInt(ft) < NamedTypesBitset.bit_length and + named_types_bitset.isSet(@enumToInt(ft)); + } + pub fn tagName(ft: FileType) ?[]const u8 { return inline for (std.meta.fields(FileType)) |f| { if (@enumToInt(ft) == f.value) break f.name; @@ -1110,9 +1120,16 @@ pub fn pipeToFileSystem( while (try iter.next()) |header| { const file_name = try stripComponents(header.name, options.strip_components); log.info("pipeToFileSystem() header.type={?s} stripped file_name={s}", .{ header.type.tagName(), file_name }); - if (file_name.len == 0 and - FileType.named_types_bitset.isSet(@enumToInt(header.type))) + + const must_validate_path = header.type.isNamedType(); + if (must_validate_path and file_name.len == 0) continue; + // verify that the path doesn't contain NUL characters + // TODO check for other other types of invalid paths + // see https://github.com/ziglang/zig/pull/15382#issuecomment-1532255834 + // and https://github.com/ziglang/zig/pull/14533#issuecomment-1416888193 + if (must_validate_path and mem.indexOfScalar(u8, file_name, 0) != null) + return error.InvalidCharacter; switch (header.type) { .directory => { From 33d1f338659a5292a6bdbf78d9018cca74d16490 Mon Sep 17 00:00:00 2001 From: Travis Staloch Date: Wed, 3 May 2023 12:24:52 -0700 Subject: [PATCH 4/6] std.tar: fix fuzzing crashes part 3 * return error when header.size is negative via math.cast * readBlocks(): change 'size' param from u64 to usize to avoid unnecessary @intCast * std.math -> math --- lib/std/tar.zig | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 9aa90895275e..4c78f62b501c 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -121,9 +121,9 @@ test parseNumeric { .{ "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745 }, .{ "\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231 }, .{ "\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231 }, - .{ "\x80\x7f\xff\xff\xff\xff\xff\xff\xff", std.math.maxInt(i64) }, + .{ "\x80\x7f\xff\xff\xff\xff\xff\xff\xff", math.maxInt(i64) }, .{ "\x80\x80\x00\x00\x00\x00\x00\x00\x00", error.Overflow }, - .{ "\xff\x80\x00\x00\x00\x00\x00\x00\x00", std.math.minInt(i64) }, + .{ "\xff\x80\x00\x00\x00\x00\x00\x00\x00", math.minInt(i64) }, .{ "\xff\x7f\xff\xff\xff\xff\xff\xff\xff", error.Overflow }, .{ "\xf5\xec\xd1\xc7\x7e\x5f\x26\x48\x81\x9f\x8f\x9b", error.Overflow }, @@ -506,7 +506,7 @@ pub const Header = struct { pub fn unixTime(tv_sec: i64, tv_nsec: i64) !i128 { const result = @bitCast(i128, [_]i64{ - try std.math.mul(i64, tv_sec, time.ns_per_s), + try math.mul(i64, tv_sec, time.ns_per_s), tv_nsec, }); return result; @@ -722,14 +722,14 @@ pub fn HeaderIterator(comptime Reader: type) type { .gnu_long_name => { format.setIntersection(fmt_gnu); gnu_long_name = mem.sliceTo(try self.readBlocks( - @intCast(u64, hdr.size), + @intCast(usize, hdr.size), &self.name_buf, ), 0); }, .gnu_long_link => { format.setIntersection(fmt_gnu); gnu_long_link = mem.sliceTo(try self.readBlocks( - @intCast(u64, hdr.size), + @intCast(usize, hdr.size), &self.linkname_buf, ), 0); }, @@ -763,10 +763,10 @@ pub fn HeaderIterator(comptime Reader: type) type { // else returns `outbuf.items[0..size]`. fn readBlocks( self: *Self, - size: u64, + size: usize, outbuf: *std.ArrayListUnmanaged(u8), ) ![]u8 { - var want = mem.alignForwardGeneric(u64, size, block_len); + var want = mem.alignForwardGeneric(usize, size, block_len); outbuf.items.len = 0; var w = outbuf.writer(self.allocator); var buf: [block_len]u8 = undefined; @@ -799,7 +799,7 @@ pub fn HeaderIterator(comptime Reader: type) type { const v7 = self.v7Header(); switch (v7.type) { .global_extended_header, .extended_header => { - const size = std.math.cast(u64, try parseOctal(&v7.size)) orelse + const size = math.cast(usize, try parseOctal(&v7.size)) orelse return error.Header; self.pax_buf.items = try self.readBlocks(size, &self.pax_buf); }, @@ -904,9 +904,9 @@ pub fn HeaderIterator(comptime Reader: type) type { .linkname = mem.sliceTo(&v7.linked_file_name, 0), .size = try parseNumeric(&v7.size), .mode = try parseNumeric(&v7.mode), - .uid = std.math.cast(i32, try parseNumeric(&v7.uid)) orelse + .uid = math.cast(i32, try parseNumeric(&v7.uid)) orelse return error.Header, - .gid = std.math.cast(i32, try parseNumeric(&v7.gid)) orelse + .gid = math.cast(i32, try parseNumeric(&v7.gid)) orelse return error.Header, .mtime = try unixTime(try parseNumeric(&v7.mod_time), 0), }; @@ -1144,9 +1144,11 @@ pub fn pipeToFileSystem( break :blk subdir.createFile(basename, .{}); } else dir.createFile(file_name, .{}); defer file.close(); - const want = mem.alignForwardGeneric(u64, @intCast(u64, header.size), block_len); + const size = math.cast(usize, header.size) orelse + return error.Header; + const want = mem.alignForwardGeneric(usize, size, block_len); var lim_reader = std.io.limitedReader(reader, want); - var bytes_left = @intCast(usize, header.size); + var bytes_left = size; while (true) { const amt = try lim_reader.read(iter.buf); switch (amt) { @@ -1154,7 +1156,7 @@ pub fn pipeToFileSystem( block_len => {}, else => return error.UnexpectedEndOfStream, } - _ = try file.write(iter.buf[0..std.math.min(bytes_left, block_len)]); + _ = try file.write(iter.buf[0..math.min(bytes_left, block_len)]); bytes_left -|= block_len; } @@ -1228,4 +1230,5 @@ const fs = std.fs; const fmt = std.fmt; const log = std.log; const time = std.time; +const math = std.math; const builtin = @import("builtin"); From dd21d6aea49f35a5d715bc6a3fca0c0c556614a4 Mon Sep 17 00:00:00 2001 From: Travis Staloch Date: Thu, 22 Jun 2023 04:38:46 -0700 Subject: [PATCH 5/6] std.tar: chore - update to latest zig * builtins * mem.alignForward * fs.file.File.Kind --- lib/std/compress/tar/reader_test.zig | 20 ++++++++++---------- lib/std/tar.zig | 28 ++++++++++++++-------------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/lib/std/compress/tar/reader_test.zig b/lib/std/compress/tar/reader_test.zig index 8432f2ab26ff..e765a728d796 100644 --- a/lib/std/compress/tar/reader_test.zig +++ b/lib/std/compress/tar/reader_test.zig @@ -72,7 +72,7 @@ test "std.tar validate testdata headers" { .gid = 1000, .size = 200, .mtime = try unixTime(1392395740, 0), - .type = @intToEnum(FileType, 0x53), + .type = @enumFromInt(FileType, 0x53), .linkname = "", .uname = "david", .gname = "david", @@ -86,7 +86,7 @@ test "std.tar validate testdata headers" { .gid = 1000, .size = 200, .mtime = try unixTime(1392342187, 0), - .type = @intToEnum(FileType, 0x30), + .type = @enumFromInt(FileType, 0x30), .linkname = "", .uname = "david", .gname = "david", @@ -105,7 +105,7 @@ test "std.tar validate testdata headers" { .gid = 1000, .size = 200, .mtime = try unixTime(1392340456, 0), - .type = @intToEnum(FileType, 0x30), + .type = @enumFromInt(FileType, 0x30), .linkname = "", .uname = "david", .gname = "david", @@ -125,7 +125,7 @@ test "std.tar validate testdata headers" { .gid = 1000, .size = 200, .mtime = try unixTime(1392337404, 0), - .type = @intToEnum(FileType, 0x30), + .type = @enumFromInt(FileType, 0x30), .linkname = "", .uname = "david", .gname = "david", @@ -145,7 +145,7 @@ test "std.tar validate testdata headers" { .gid = 1000, .size = 4, .mtime = try unixTime(1392398319, 0), - .type = @intToEnum(FileType, 0x30), + .type = @enumFromInt(FileType, 0x30), .linkname = "", .uname = "david", .gname = "david", @@ -426,7 +426,7 @@ test "std.tar validate testdata headers" { .gid = 1000, .size = 14, .mtime = try unixTime(1441973427, 0), - .type = @intToEnum(FileType, 'D'), + .type = @enumFromInt(FileType, 'D'), .uname = "rawr", .gname = "dsnet", .atime = try unixTime(1441974501, 0), @@ -452,7 +452,7 @@ test "std.tar validate testdata headers" { .gid = 1000, .size = 536870912, .mtime = try unixTime(1441973427, 0), - .type = @intToEnum(FileType, 'S'), + .type = @enumFromInt(FileType, 'S'), .uname = "rawr", .gname = "dsnet", .atime = try unixTime(1441991948, 0), @@ -467,7 +467,7 @@ test "std.tar validate testdata headers" { .name = "bar", .linkname = "PAX4/PAX4/long-linkpath-name", .mtime = try unixTime(0, 0), - .type = @intToEnum(tar.FileType, '2'), + .type = @enumFromInt(tar.FileType, '2'), .pax_recs = &.{ "linkpath", "PAX4/PAX4/long-linkpath-name", }, @@ -715,7 +715,7 @@ test "std.tar validate testdata headers" { } if (actual.size == -1) continue; - const block_size = std.mem.alignForwardGeneric(usize, @intCast(usize, actual.size), 512); + const block_size = std.mem.alignForward(usize, @intCast(usize, actual.size), 512); // validate checksums if exist or skip over file contents if (test_case.chksums.len > i) { var h = std.crypto.hash.Md5.init(.{}); @@ -746,7 +746,7 @@ test "std.tar validate testdata headers" { } if (test_case.err) |e| { - if (e != merr) { + if (merr == null or e != merr.?) { errors += 1; std.log.err("errors don't match. expecting {!} found {?!}", .{ e, merr }); } diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 4c78f62b501c..bfb213a001ff 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -24,7 +24,7 @@ pub const FileType = enum(u8) { gnu_long_link = 'K', _, - pub const sentinel = @intToEnum(FileType, 0xff); + pub const sentinel = @enumFromInt(FileType, 0xff); pub const NamedTypesBitset = std.StaticBitSet(128); @@ -35,7 +35,7 @@ pub const FileType = enum(u8) { .symbolic_link, .character_special, .block_special, .fifo, .contiguous, }) |ft| - result.set(@enumToInt(ft)); + result.set(@intFromEnum(ft)); break :blk result; }; @@ -43,13 +43,13 @@ pub const FileType = enum(u8) { return // verify not beyond NamedTypesBitset.bit_length to avoid assertion // failure in std.bit_set - @enumToInt(ft) < NamedTypesBitset.bit_length and - named_types_bitset.isSet(@enumToInt(ft)); + @intFromEnum(ft) < NamedTypesBitset.bit_length and + named_types_bitset.isSet(@intFromEnum(ft)); } pub fn tagName(ft: FileType) ?[]const u8 { return inline for (std.meta.fields(FileType)) |f| { - if (@enumToInt(ft) == f.value) break f.name; + if (@intFromEnum(ft) == f.value) break f.name; } else null; } }; @@ -76,12 +76,12 @@ fn parseNumeric(b: []const u8) !i64 { // data bytes and treat the value as an unsigned number. // inv = 0xff if negative else 0 - const inv = @as(u8, @boolToInt(b[0] & 0x40 != 0)) * 0xff; + const inv = @as(u8, @intFromBool(b[0] & 0x40 != 0)) * 0xff; var x: u64 = 0; for (0..b.len) |i| { // ignore the signal bit in first byte - const mask = @as(u8, 0xff) >> @boolToInt(i == 0); + const mask = @as(u8, 0xff) >> @intFromBool(i == 0); const c = b[i] ^ inv & mask; if (x > 0x00ff_ffff_ffff_ffff) return error.Overflow; x = x << 8 | c; @@ -442,14 +442,14 @@ pub const Header = struct { // TODO remove when unused pub fn format(h: Header, comptime _: []const u8, _: fmt.FormatOptions, writer: anytype) !void { const tagname = inline for (std.meta.fields(FileType)) |field| { - if (@enumToInt(h.type) == field.value) break field.name; + if (@intFromEnum(h.type) == field.value) break field.name; } else "null"; try writer.print("type={s} size={} name={s} mtime={} mode=0o{o}", .{ tagname, h.size, h.name, h.mtime, h.mode }); try debugFormatSet(h.fmt, writer); } fn structField(comptime field_enum: std.meta.FieldEnum(Header)) std.builtin.Type.StructField { - return @typeInfo(Header).Struct.fields[@enumToInt(field_enum)]; + return @typeInfo(Header).Struct.fields[@intFromEnum(field_enum)]; } fn fieldDefault(comptime field: std.builtin.Type.StructField) field.type { @@ -766,7 +766,7 @@ pub fn HeaderIterator(comptime Reader: type) type { size: usize, outbuf: *std.ArrayListUnmanaged(u8), ) ![]u8 { - var want = mem.alignForwardGeneric(usize, size, block_len); + var want = mem.alignForward(usize, size, block_len); outbuf.items.len = 0; var w = outbuf.writer(self.allocator); var buf: [block_len]u8 = undefined; @@ -1080,7 +1080,7 @@ fn makeSymLink(dir: fs.Dir, target_path: []const u8, symlink_path: []const u8) ! }; defer file.close(); const stat = try file.stat(); - break :blk stat.kind == .Directory; + break :blk stat.kind == .directory; }; try dir.symLink(target_path, symlink_path, .{ .is_directory = is_directory }); } @@ -1146,7 +1146,7 @@ pub fn pipeToFileSystem( defer file.close(); const size = math.cast(usize, header.size) orelse return error.Header; - const want = mem.alignForwardGeneric(usize, size, block_len); + const want = mem.alignForward(usize, size, block_len); var lim_reader = std.io.limitedReader(reader, want); var bytes_left = size; while (true) { @@ -1156,7 +1156,7 @@ pub fn pipeToFileSystem( block_len => {}, else => return error.UnexpectedEndOfStream, } - _ = try file.write(iter.buf[0..math.min(bytes_left, block_len)]); + _ = try file.write(iter.buf[0..@min(bytes_left, block_len)]); bytes_left -|= block_len; } @@ -1195,7 +1195,7 @@ pub fn pipeToFileSystem( format.setIntersection(fmt_pax); }, else => { - log.err("unsupported type '{?s}':{}", .{ header.type.tagName(), @enumToInt(header.type) }); + log.err("unsupported type '{?s}':{}", .{ header.type.tagName(), @intFromEnum(header.type) }); return error.TarUnexpectedFileType; }, } From 0289688ecc8524d430e0e1ec0cfdc719408a7e55 Mon Sep 17 00:00:00 2001 From: Travis Staloch Date: Mon, 3 Jul 2023 18:11:15 -0700 Subject: [PATCH 6/6] std.tar: chore - update to zig 0.11.0-dev.3910+689f3163a --- lib/std/compress/tar/reader_test.zig | 20 +++++------ lib/std/tar.zig | 51 +++++++++++++--------------- 2 files changed, 33 insertions(+), 38 deletions(-) diff --git a/lib/std/compress/tar/reader_test.zig b/lib/std/compress/tar/reader_test.zig index e765a728d796..706226a31aa4 100644 --- a/lib/std/compress/tar/reader_test.zig +++ b/lib/std/compress/tar/reader_test.zig @@ -72,7 +72,7 @@ test "std.tar validate testdata headers" { .gid = 1000, .size = 200, .mtime = try unixTime(1392395740, 0), - .type = @enumFromInt(FileType, 0x53), + .type = @as(FileType, @enumFromInt(0x53)), .linkname = "", .uname = "david", .gname = "david", @@ -86,7 +86,7 @@ test "std.tar validate testdata headers" { .gid = 1000, .size = 200, .mtime = try unixTime(1392342187, 0), - .type = @enumFromInt(FileType, 0x30), + .type = @as(FileType, @enumFromInt(0x30)), .linkname = "", .uname = "david", .gname = "david", @@ -105,7 +105,7 @@ test "std.tar validate testdata headers" { .gid = 1000, .size = 200, .mtime = try unixTime(1392340456, 0), - .type = @enumFromInt(FileType, 0x30), + .type = @as(FileType, @enumFromInt(0x30)), .linkname = "", .uname = "david", .gname = "david", @@ -125,7 +125,7 @@ test "std.tar validate testdata headers" { .gid = 1000, .size = 200, .mtime = try unixTime(1392337404, 0), - .type = @enumFromInt(FileType, 0x30), + .type = @as(FileType, @enumFromInt(0x30)), .linkname = "", .uname = "david", .gname = "david", @@ -145,7 +145,7 @@ test "std.tar validate testdata headers" { .gid = 1000, .size = 4, .mtime = try unixTime(1392398319, 0), - .type = @enumFromInt(FileType, 0x30), + .type = @as(FileType, @enumFromInt(0x30)), .linkname = "", .uname = "david", .gname = "david", @@ -426,7 +426,7 @@ test "std.tar validate testdata headers" { .gid = 1000, .size = 14, .mtime = try unixTime(1441973427, 0), - .type = @enumFromInt(FileType, 'D'), + .type = @as(FileType, @enumFromInt('D')), .uname = "rawr", .gname = "dsnet", .atime = try unixTime(1441974501, 0), @@ -452,7 +452,7 @@ test "std.tar validate testdata headers" { .gid = 1000, .size = 536870912, .mtime = try unixTime(1441973427, 0), - .type = @enumFromInt(FileType, 'S'), + .type = @as(FileType, @enumFromInt('S')), .uname = "rawr", .gname = "dsnet", .atime = try unixTime(1441991948, 0), @@ -467,7 +467,7 @@ test "std.tar validate testdata headers" { .name = "bar", .linkname = "PAX4/PAX4/long-linkpath-name", .mtime = try unixTime(0, 0), - .type = @enumFromInt(tar.FileType, '2'), + .type = @as(tar.FileType, @enumFromInt('2')), .pax_recs = &.{ "linkpath", "PAX4/PAX4/long-linkpath-name", }, @@ -715,14 +715,14 @@ test "std.tar validate testdata headers" { } if (actual.size == -1) continue; - const block_size = std.mem.alignForward(usize, @intCast(usize, actual.size), 512); + const block_size = std.mem.alignForward(usize, @as(usize, @intCast(actual.size)), 512); // validate checksums if exist or skip over file contents if (test_case.chksums.len > i) { var h = std.crypto.hash.Md5.init(.{}); const content = try talloc.alloc(u8, block_size); defer talloc.free(content); _ = try reader.read(content); - h.update(content[0..@intCast(usize, actual.size)]); + h.update(content[0..@intCast(actual.size)]); var hbuf: [16]u8 = undefined; h.final(&hbuf); const hex = std.fmt.bytesToHex(hbuf, .lower); diff --git a/lib/std/tar.zig b/lib/std/tar.zig index bfb213a001ff..e6f3b45e8cb5 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -24,7 +24,7 @@ pub const FileType = enum(u8) { gnu_long_link = 'K', _, - pub const sentinel = @enumFromInt(FileType, 0xff); + pub const sentinel: FileType = @enumFromInt(0xff); pub const NamedTypesBitset = std.StaticBitSet(128); @@ -89,9 +89,9 @@ fn parseNumeric(b: []const u8) !i64 { if (x >= 0x8000_0000_0000_0000) return error.Overflow; return if (inv == 0) - @bitCast(i64, x) + @as(i64, @bitCast(x)) else - ~@bitCast(i64, x); + ~@as(i64, @bitCast(x)); } return try parseOctal(b); @@ -303,8 +303,8 @@ pub fn mergePax(kv: [2][]const u8, hdr: *Header) !void { .linkname => hdr.linkname = v, .uname => hdr.uname = v, .gname => hdr.gname = v, - .uid => hdr.uid = @truncate(i32, try fmt.parseInt(i64, v, 10)), - .gid => hdr.gid = @truncate(i32, try fmt.parseInt(i64, v, 10)), + .uid => hdr.uid = @truncate(try fmt.parseInt(i64, v, 10)), + .gid => hdr.gid = @truncate(try fmt.parseInt(i64, v, 10)), .atime => hdr.atime = try parsePaxTime(v), .ctime => hdr.ctime = try parsePaxTime(v), .mtime => hdr.mtime = try parsePaxTime(v), @@ -453,10 +453,9 @@ pub const Header = struct { } fn fieldDefault(comptime field: std.builtin.Type.StructField) field.type { - return @ptrCast( - *const field.type, - @alignCast(@alignOf(field.type), field.default_value), - ).*; + return @as(*const field.type, @ptrCast( + @alignCast(field.default_value), + )).*; } /// copy all fields from `new_hdr` to `hdr`, but skipping any fields that @@ -505,11 +504,7 @@ pub const Header = struct { }; pub fn unixTime(tv_sec: i64, tv_nsec: i64) !i128 { - const result = @bitCast(i128, [_]i64{ - try math.mul(i64, tv_sec, time.ns_per_s), - tv_nsec, - }); - return result; + return @bitCast([_]i64{ try math.mul(i64, tv_sec, time.ns_per_s), tv_nsec }); } pub const block_len = 512; @@ -545,29 +540,29 @@ const V7Header = extern struct { else bytes_[i]; unsigned += c; - signed += @bitCast(i8, c); + signed += @as(i8, @bitCast(c)); } return .{ unsigned, signed }; } inline fn ustar(h: *const V7Header) *const UstarHeader { - return @ptrCast(*const UstarHeader, h); + return @ptrCast(h); } inline fn star(h: *const V7Header) *const StarHeader { - return @ptrCast(*const StarHeader, h); + return @ptrCast(h); } inline fn gnu(h: *const V7Header) *const GnuHeader { - return @ptrCast(*const GnuHeader, h); + return @ptrCast(h); } inline fn bytes(h: *const V7Header) *const [block_len]u8 { - return @ptrCast(*const [block_len]u8, h); + return @ptrCast(h); } // Magics used to identify various formats. const magic_gnu = "ustar "; const version_gnu = " \x00"; const magic_version_gnu = mem.readIntBig(u64, magic_gnu ++ version_gnu); - const magic_ustar = @truncate(u48, mem.readIntBig(u64, "ustar\x00\x00\x00") >> 16); + const magic_ustar: u48 = @truncate(mem.readIntBig(u64, "ustar\x00\x00\x00") >> 16); const version_ustar = "00"; // unused. left only for documentation const trailer_star = mem.readIntBig(u32, "tar\x00"); @@ -578,7 +573,7 @@ const V7Header = extern struct { return fmt_unknown; const magic_version = h.ustar().magicVersion(); - const magic = @truncate(u48, magic_version >> 16); + const magic: u48 = @truncate(magic_version >> 16); return if (magic == magic_ustar and mem.readIntBig(u32, &h.star().trailer) == trailer_star) @@ -607,7 +602,7 @@ const UstarHeader = extern struct { __padding: [12]u8, pub fn magicVersion(ustar: *const UstarHeader) u64 { - return mem.readIntBig(u64, @ptrCast([*]const u8, &ustar.magic)[0..8]); + return mem.readIntBig(u64, @as([*]const u8, @ptrCast(&ustar.magic))[0..8]); } comptime { @@ -722,14 +717,14 @@ pub fn HeaderIterator(comptime Reader: type) type { .gnu_long_name => { format.setIntersection(fmt_gnu); gnu_long_name = mem.sliceTo(try self.readBlocks( - @intCast(usize, hdr.size), + @intCast(hdr.size), &self.name_buf, ), 0); }, .gnu_long_link => { format.setIntersection(fmt_gnu); gnu_long_link = mem.sliceTo(try self.readBlocks( - @intCast(usize, hdr.size), + @intCast(hdr.size), &self.linkname_buf, ), 0); }, @@ -779,11 +774,11 @@ pub fn HeaderIterator(comptime Reader: type) type { want -= block_len; } if (want != 0) return error.UnexpectedEndOfStream; - return outbuf.items[0..@intCast(usize, size)]; + return outbuf.items[0..@intCast(size)]; } inline fn v7Header(self: Self) *const V7Header { - return @ptrCast(*const V7Header, self.buf); + return @ptrCast(self.buf); } /// Reads n bytes from reader. Returns the following depending on n: @@ -1011,7 +1006,7 @@ const Pax = struct { /// return the most significant, 'top' half of the time as an i64 fn truncateTime(t: i128) i64 { - return @truncate(i64, t >> 64); + return @truncate(t >> 64); } const is_windows = builtin.os.tag == .windows; @@ -1036,7 +1031,7 @@ fn setFileProperties(file: fs.File, header: Header, options: Options) !void { // match gnu tar behavior on linux while using // header.mode does not const mode = try file.mode(); // header.mode - var modebits = std.StaticBitSet(32){ .mask = @intCast(u32, mode) }; + var modebits = std.StaticBitSet(32){ .mask = @intCast(mode) }; // copy the user exe bit to the group and other exe bits // these bit indices count from the right: // u g o