diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 8ab7f0d09a1e..74458b0f193b 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -21,50 +21,6 @@ const testing = std.testing; pub const output = @import("tar/output.zig"); -/// Provide this to receive detailed error messages. -/// When this is provided, some errors which would otherwise be returned -/// immediately will instead be added to this structure. The API user must check -/// the errors in diagnostics to know whether the operation succeeded or failed. -pub const Diagnostics = struct { - allocator: std.mem.Allocator, - errors: std.ArrayListUnmanaged(Error) = .{}, - - pub const Error = union(enum) { - unable_to_create_sym_link: struct { - code: anyerror, - file_name: []const u8, - link_name: []const u8, - }, - unable_to_create_file: struct { - code: anyerror, - file_name: []const u8, - }, - unsupported_file_type: struct { - file_name: []const u8, - file_type: Header.Kind, - }, - }; - - pub fn deinit(d: *Diagnostics) void { - for (d.errors.items) |item| { - switch (item) { - .unable_to_create_sym_link => |info| { - d.allocator.free(info.file_name); - d.allocator.free(info.link_name); - }, - .unable_to_create_file => |info| { - d.allocator.free(info.file_name); - }, - .unsupported_file_type => |info| { - d.allocator.free(info.file_name); - }, - } - } - d.errors.deinit(d.allocator); - d.* = undefined; - } -}; - /// pipeToFileSystem options pub const PipeOptions = struct { /// Number of directory levels to skip when extracting files. @@ -73,8 +29,6 @@ pub const PipeOptions = struct { mode_mode: ModeMode = .executable_bit_only, /// Prevents creation of empty directories. exclude_empty_directories: bool = false, - /// Collects error messages during unpacking - diagnostics: ?*Diagnostics = null, pub const ModeMode = enum { /// The mode from the tar file is completely ignored. Files are created @@ -247,8 +201,6 @@ pub const IteratorOptions = struct { file_name_buffer: []u8, /// Use a buffer with length `std.fs.MAX_PATH_BYTES` to match file system capabilities. link_name_buffer: []u8, - /// Collects error messages during unpacking - diagnostics: ?*Diagnostics = null, }; /// Iterates over files in tar archive. @@ -256,7 +208,6 @@ pub const IteratorOptions = struct { pub fn iterator(reader: anytype, options: IteratorOptions) Iterator(@TypeOf(reader)) { return .{ .reader = reader, - .diagnostics = options.diagnostics, .file_name_buffer = options.file_name_buffer, .link_name_buffer = options.link_name_buffer, }; @@ -273,7 +224,6 @@ pub const FileKind = enum { pub fn Iterator(comptime ReaderType: type) type { return struct { reader: ReaderType, - diagnostics: ?*Diagnostics = null, // buffers for heeader and file attributes header_buffer: [Header.SIZE]u8 = undefined, @@ -435,15 +385,11 @@ pub fn Iterator(comptime ReaderType: type) type { }, // All other are unsupported header types else => { - const d = self.diagnostics orelse return error.TarUnsupportedHeader; - try d.errors.append(d.allocator, .{ .unsupported_file_type = .{ - .file_name = try d.allocator.dupe(u8, header.name()), - .file_type = kind, - } }); if (kind == .gnu_sparse) { try self.skipGnuSparseExtendedHeaders(header); } self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig; + return error.TarUnsupportedHeader; }, } } @@ -573,24 +519,11 @@ fn PaxIterator(comptime ReaderType: type) type { /// Saves tar file content to the file systems. pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) !void { - switch (options.mode_mode) { - .ignore => {}, - .executable_bit_only => { - // This code does not look at the mode bits yet. To implement this feature, - // the implementation must be adjusted to look at the mode, and check the - // user executable bit, then call fchmod on newly created files when - // the executable bit is supposed to be set. - // It also needs to properly deal with ACLs on Windows. - @panic("TODO: unimplemented: tar ModeMode.executable_bit_only"); - }, - } - var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; var link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; var iter = iterator(reader, .{ .file_name_buffer = &file_name_buffer, .link_name_buffer = &link_name_buffer, - .diagnostics = options.diagnostics, }); while (try iter.next()) |file| { switch (file.kind) { @@ -605,16 +538,9 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) const file_name = stripComponents(file.name, options.strip_components); if (file_name.len == 0) return error.BadFileName; - if (createDirAndFile(dir, file_name)) |fs_file| { - defer fs_file.close(); - try file.writeAll(fs_file); - } else |err| { - const d = options.diagnostics orelse return err; - try d.errors.append(d.allocator, .{ .unable_to_create_file = .{ - .code = err, - .file_name = try d.allocator.dupe(u8, file_name), - } }); - } + var fs_file = try createDirAndFile(dir, file_name, fileMode(file.mode, options)); + defer fs_file.close(); + try file.writeAll(fs_file); }, .sym_link => { // The file system path of the symbolic link. @@ -623,25 +549,37 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) // The data inside the symbolic link. const link_name = file.link_name; - createDirAndSymlink(dir, link_name, file_name) catch |err| { - const d = options.diagnostics orelse return error.UnableToCreateSymLink; - try d.errors.append(d.allocator, .{ .unable_to_create_sym_link = .{ - .code = err, - .file_name = try d.allocator.dupe(u8, file_name), - .link_name = try d.allocator.dupe(u8, link_name), - } }); - }; + createDirAndSymlink(dir, link_name, file_name) catch return error.UnableToCreateSymLink; }, } } } -fn createDirAndFile(dir: std.fs.Dir, file_name: []const u8) !std.fs.File { - const fs_file = dir.createFile(file_name, .{ .exclusive = true }) catch |err| { +const default_mode = std.fs.File.default_mode; + +fn fileMode(mode: u32, options: PipeOptions) std.fs.File.Mode { + if (!std.fs.has_executable_bit or options.mode_mode == .ignore) + return default_mode; + const S = std.posix.S; + if (mode & S.IXUSR == 0) + return default_mode; + return default_mode | S.IXUSR | S.IXGRP | S.IXOTH; +} + +test fileMode { + if (!std.fs.has_executable_bit) return error.SkipZigTest; + try testing.expectEqual(default_mode, fileMode(0o744, PipeOptions{ .mode_mode = .ignore })); + try testing.expectEqual(0o777, fileMode(0o744, PipeOptions{})); + try testing.expectEqual(0o666, fileMode(0o644, PipeOptions{})); + try testing.expectEqual(0o666, fileMode(0o655, PipeOptions{})); +} + +fn createDirAndFile(dir: std.fs.Dir, file_name: []const u8, mode: std.fs.File.Mode) !std.fs.File { + const fs_file = dir.createFile(file_name, .{ .exclusive = true, .mode = mode }) catch |err| { if (err == error.FileNotFound) { if (std.fs.path.dirname(file_name)) |dir_name| { try dir.makePath(dir_name); - return try dir.createFile(file_name, .{ .exclusive = true }); + return try dir.createFile(file_name, .{ .exclusive = true, .mode = mode }); } } return err; @@ -877,9 +815,9 @@ test "create file and symlink" { var root = testing.tmpDir(.{}); defer root.cleanup(); - var file = try createDirAndFile(root.dir, "file1"); + var file = try createDirAndFile(root.dir, "file1", default_mode); file.close(); - file = try createDirAndFile(root.dir, "a/b/c/file2"); + file = try createDirAndFile(root.dir, "a/b/c/file2", default_mode); file.close(); createDirAndSymlink(root.dir, "a/b/c/file2", "symlink1") catch |err| { @@ -891,7 +829,7 @@ test "create file and symlink" { // Danglink symlnik, file created later try createDirAndSymlink(root.dir, "../../../g/h/i/file4", "j/k/l/symlink3"); - file = try createDirAndFile(root.dir, "g/h/i/file4"); + file = try createDirAndFile(root.dir, "g/h/i/file4", default_mode); file.close(); } @@ -984,9 +922,9 @@ test pipeToFileSystem { // Save tar from `reader` to the file system `dir` pipeToFileSystem(dir, reader, .{ - .mode_mode = .ignore, .strip_components = 1, .exclude_empty_directories = true, + .mode_mode = .ignore, }) catch |err| { // Skip on platform which don't support symlinks if (err == error.UnableToCreateSymLink) return error.SkipZigTest; @@ -1005,6 +943,45 @@ test pipeToFileSystem { ); } +test "executable bit" { + if (!std.fs.has_executable_bit) return error.SkipZigTest; + const S = std.posix.S; + + const data = @embedFile("tar/testdata/example.tar"); + + for ([_]PipeOptions.ModeMode{ .ignore, .executable_bit_only }) |opt| { + var fbs = std.io.fixedBufferStream(data); + const reader = fbs.reader(); + + var tmp = testing.tmpDir(.{ .no_follow = true }); + defer tmp.cleanup(); + + pipeToFileSystem(tmp.dir, reader, .{ + .strip_components = 1, + .exclude_empty_directories = true, + .mode_mode = opt, + }) catch |err| { + // Skip on platform which don't support symlinks + if (err == error.UnableToCreateSymLink) return error.SkipZigTest; + return err; + }; + + const fs = try tmp.dir.statFile("a/file"); + try testing.expect(fs.kind == .file); + if (opt == .executable_bit_only) { + // Executable bit is set for user, group and others + try testing.expect(fs.mode & S.IXUSR > 0); + try testing.expect(fs.mode & S.IXGRP > 0); + try testing.expect(fs.mode & S.IXOTH > 0); + } + if (opt == .ignore) { + try testing.expect(fs.mode & S.IXUSR == 0); + try testing.expect(fs.mode & S.IXGRP == 0); + try testing.expect(fs.mode & S.IXOTH == 0); + } + } +} + fn normalizePath(bytes: []u8) []u8 { const canonical_sep = std.fs.path.sep_posix; if (std.fs.path.sep == canonical_sep) return bytes; diff --git a/lib/std/tar/test.zig b/lib/std/tar/test.zig index abb7d3cbe020..0ad529ad4236 100644 --- a/lib/std/tar/test.zig +++ b/lib/std/tar/test.zig @@ -473,14 +473,14 @@ test "should not overwrite existing file" { defer root.cleanup(); try testing.expectError( error.PathAlreadyExists, - tar.pipeToFileSystem(root.dir, fsb.reader(), .{ .mode_mode = .ignore, .strip_components = 1 }), + tar.pipeToFileSystem(root.dir, fsb.reader(), .{ .strip_components = 1 }), ); // Unpack with strip_components = 0 should pass fsb.reset(); var root2 = std.testing.tmpDir(.{}); defer root2.cleanup(); - try tar.pipeToFileSystem(root2.dir, fsb.reader(), .{ .mode_mode = .ignore, .strip_components = 0 }); + try tar.pipeToFileSystem(root2.dir, fsb.reader(), .{ .strip_components = 0 }); } test "case sensitivity" { @@ -499,7 +499,7 @@ test "case sensitivity" { var root = std.testing.tmpDir(.{}); defer root.cleanup(); - tar.pipeToFileSystem(root.dir, fsb.reader(), .{ .mode_mode = .ignore, .strip_components = 1 }) catch |err| { + tar.pipeToFileSystem(root.dir, fsb.reader(), .{ .strip_components = 1 }) catch |err| { // on case insensitive fs we fail on overwrite existing file try testing.expectEqual(error.PathAlreadyExists, err); return; diff --git a/lib/std/tar/testdata/example.tar b/lib/std/tar/testdata/example.tar index d66c407eaaf0..deeeb65c0090 100644 Binary files a/lib/std/tar/testdata/example.tar and b/lib/std/tar/testdata/example.tar differ diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index d0cfd5ab9491..8fdd25bcf136 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -435,61 +435,62 @@ fn runResource( ) RunError!void { defer resource.deinit(); const arena = f.arena.allocator(); + const gpa = f.arena.child_allocator; const eb = &f.error_bundle; const s = fs.path.sep_str; const cache_root = f.job_queue.global_cache; const rand_int = std.crypto.random.int(u64); + // temporary directory for unpacking; sub path of cache_root const tmp_dir_sub_path = "tmp" ++ s ++ Manifest.hex64(rand_int); + // sub path of cache_root, inside temporary directory, if null package root + // is tmp_dir_sub_path + var package_sub_path: ?[]const u8 = null; { - const tmp_directory_path = try cache_root.join(arena, &.{tmp_dir_sub_path}); - var tmp_directory: Cache.Directory = .{ - .path = tmp_directory_path, - .handle = handle: { - const dir = cache_root.handle.makeOpenPath(tmp_dir_sub_path, .{ - .iterate = true, - }) catch |err| { - try eb.addRootErrorMessage(.{ - .msg = try eb.printString("unable to create temporary directory '{s}': {s}", .{ - tmp_directory_path, @errorName(err), - }), - }); - return error.FetchFailed; - }; - break :handle dir; - }, - }; + var tmp_directory = try f.makeOpenCacheDirectory(tmp_dir_sub_path); defer tmp_directory.handle.close(); - try unpackResource(f, resource, uri_path, tmp_directory); + // Fetch and unpack a URL into a temporary directory. + var unpack = Unpack.init(gpa, tmp_directory.handle); + try unpackResource(f, resource, &unpack, uri_path); + defer unpack.deinit(); + + // Position tmp_directory to sub_path if package root is deeper inside + // temporary directory. + if (unpack.package_sub_path) |sub_path| { + package_sub_path = try fs.path.join(arena, &.{ tmp_dir_sub_path, sub_path }); + tmp_directory.handle.close(); + tmp_directory = try f.makeOpenCacheDirectory(package_sub_path.?); + } else { + // btrfs workaround; reopen tmp_directory + if (native_os == .linux and f.job_queue.work_around_btrfs_bug) { + // https://github.com/ziglang/zig/issues/17095 + tmp_directory.handle.close(); + tmp_directory = try f.makeOpenCacheDirectory(tmp_dir_sub_path); + } + } // Load, parse, and validate the unpacked build.zig.zon file. It is allowed // for the file to be missing, in which case this fetched package is // considered to be a "naked" package. try loadManifest(f, .{ .root_dir = tmp_directory }); - - // Apply the manifest's inclusion rules to the temporary directory by - // deleting excluded files. If any error occurred for files that were - // ultimately excluded, those errors should be ignored, such as failure to - // create symlinks that weren't supposed to be included anyway. - - // Empty directories have already been omitted by `unpackResource`. - + // Manifest's inclusion rules. const filter: Filter = .{ .include_paths = if (f.manifest) |m| m.paths else .{}, }; + // Apply the manifest's inclusion rules to the errors collected during + // unpacking resource. If any error occurred for files that were + // ultimately excluded, those errors will be ignored, such as failure to + // create symlinks that weren't supposed to be included anyway. + try unpack.filterErrors(filter); + try f.bundleUnpackErrors(&unpack); + // Compute the package hash based on the remaining files in the temporary // directory. - - if (native_os == .linux and f.job_queue.work_around_btrfs_bug) { - // https://github.com/ziglang/zig/issues/17095 - tmp_directory.handle.close(); - tmp_directory.handle = cache_root.handle.makeOpenPath(tmp_dir_sub_path, .{ - .iterate = true, - }) catch @panic("btrfs workaround failed"); - } - + // It will also apply the manifest's inclusion rules to the temporary + // directory by deleting excluded files. + // Empty directories have already been omitted by `unpackResource`. f.actual_hash = try computeHash(f, tmp_directory, filter); } @@ -503,7 +504,11 @@ fn runResource( .root_dir = cache_root, .sub_path = try arena.dupe(u8, "p" ++ s ++ Manifest.hexDigest(f.actual_hash)), }; - renameTmpIntoCache(cache_root.handle, tmp_dir_sub_path, f.package_root.sub_path) catch |err| { + renameTmpIntoCache( + cache_root.handle, + if (package_sub_path) |p| p else tmp_dir_sub_path, + f.package_root.sub_path, + ) catch |err| { const src = try cache_root.join(arena, &.{tmp_dir_sub_path}); const dest = try cache_root.join(arena, &.{f.package_root.sub_path}); try eb.addRootErrorMessage(.{ .msg = try eb.printString( @@ -512,6 +517,9 @@ fn runResource( ) }); return error.FetchFailed; }; + if (package_sub_path) |_| { + cache_root.handle.deleteTree(tmp_dir_sub_path) catch {}; + } // Validate the computed hash against the expected hash. If invalid, this // job is done. @@ -544,6 +552,21 @@ fn runResource( return queueJobsForDeps(f); } +fn makeOpenCacheDirectory(f: *Fetch, sub_path: []const u8) RunError!Cache.Directory { + const arena = f.arena.allocator(); + const cache_root = f.job_queue.global_cache; + + const path = try cache_root.join(arena, &.{sub_path}); + return .{ + .path = path, + .handle = cache_root.handle.makeOpenPath(sub_path, .{ .iterate = true }) catch |err| { + return f.failMsg(err, "unable to create temporary directory '{s}': {s}", .{ + path, @errorName(err), + }); + }, + }; +} + /// `computeHash` gets a free check for the existence of `build.zig`, but when /// not computing a hash, we need to do a syscall to check for it. fn checkBuildFileExistence(f: *Fetch) RunError!void { @@ -867,9 +890,9 @@ const FileType = enum { try std.testing.expectEqual(@as(?FileType, .@"tar.xz"), fromContentDisposition("ATTACHMENT; filename=\"stuff.tar.xz\"")); try std.testing.expectEqual(@as(?FileType, .@"tar.xz"), fromContentDisposition("attachment; FileName=\"stuff.tar.xz\"")); try std.testing.expectEqual(@as(?FileType, .@"tar.gz"), fromContentDisposition("attachment; FileName*=UTF-8\'\'xyz%2Fstuff.tar.gz")); + try std.testing.expectEqual(@as(?FileType, .tar), fromContentDisposition("attachment; FileName=\"stuff.tar\"")); try std.testing.expect(fromContentDisposition("attachment FileName=\"stuff.tar.gz\"") == null); - try std.testing.expect(fromContentDisposition("attachment; FileName=\"stuff.tar\"") == null); try std.testing.expect(fromContentDisposition("attachment; FileName\"stuff.gz\"") == null); try std.testing.expect(fromContentDisposition("attachment; size=42") == null); try std.testing.expect(fromContentDisposition("inline; size=42") == null); @@ -1030,8 +1053,8 @@ fn initResource(f: *Fetch, uri: std.Uri, server_header_buffer: []u8) RunError!Re fn unpackResource( f: *Fetch, resource: *Resource, + unpack: *Unpack, uri_path: []const u8, - tmp_directory: Cache.Directory, ) RunError!void { const eb = &f.error_bundle; const file_type = switch (resource.*) { @@ -1093,21 +1116,20 @@ fn unpackResource( .git => .git_pack, - .dir => |dir| return f.recursiveDirectoryCopy(dir, tmp_directory.handle) catch |err| { - return f.fail(f.location_tok, try eb.printString( - "unable to copy directory '{s}': {s}", - .{ uri_path, @errorName(err) }, - )); - }, + .dir => |dir| return unpack.directory(dir) catch |err| return f.failMsg( + err, + "unable to copy directory '{s}': {s}", + .{ uri_path, @errorName(err) }, + ), }; switch (file_type) { - .tar => try unpackTarball(f, tmp_directory.handle, resource.reader()), + .tar => unpack.tarball(resource.reader()) catch |err| return f.unpackTarballError(err), .@"tar.gz" => { const reader = resource.reader(); var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, reader); var dcp = std.compress.gzip.decompressor(br.reader()); - try unpackTarball(f, tmp_directory.handle, dcp.reader()); + unpack.tarball(dcp.reader()) catch |err| return f.unpackTarballError(err); }, .@"tar.xz" => { const gpa = f.arena.child_allocator; @@ -1120,7 +1142,7 @@ fn unpackResource( )); }; defer dcp.deinit(); - try unpackTarball(f, tmp_directory.handle, dcp.reader()); + unpack.tarball(dcp.reader()) catch |err| return f.unpackTarballError(err); }, .@"tar.zst" => { const window_size = std.compress.zstd.DecompressorOptions.default_window_buffer_len; @@ -1130,178 +1152,65 @@ fn unpackResource( var dcp = std.compress.zstd.decompressor(br.reader(), .{ .window_buffer = window_buffer, }); - return unpackTarball(f, tmp_directory.handle, dcp.reader()); + unpack.tarball(dcp.reader()) catch |err| return f.unpackTarballError(err); }, - .git_pack => unpackGitPack(f, tmp_directory.handle, resource) catch |err| switch (err) { - error.FetchFailed => return error.FetchFailed, - error.OutOfMemory => return error.OutOfMemory, - else => |e| return f.fail(f.location_tok, try eb.printString( + .git_pack => { + const want_oid = resource.git.want_oid; + const reader = resource.git.fetch_stream.reader(); + unpack.gitPack(want_oid, reader) catch |err| return f.failMsg( + err, "unable to unpack git files: {s}", - .{@errorName(e)}, - )), + .{@errorName(err)}, + ); }, } } -fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!void { - const eb = &f.error_bundle; - const gpa = f.arena.child_allocator; - - var diagnostics: std.tar.Diagnostics = .{ .allocator = gpa }; - defer diagnostics.deinit(); - - std.tar.pipeToFileSystem(out_dir, reader, .{ - .diagnostics = &diagnostics, - .strip_components = 1, - // https://github.com/ziglang/zig/issues/17463 - .mode_mode = .ignore, - .exclude_empty_directories = true, - }) catch |err| return f.fail(f.location_tok, try eb.printString( - "unable to unpack tarball to temporary directory: {s}", - .{@errorName(err)}, - )); - - if (diagnostics.errors.items.len > 0) { - const notes_len: u32 = @intCast(diagnostics.errors.items.len); - try eb.addRootErrorMessage(.{ - .msg = try eb.addString("unable to unpack tarball"), - .src_loc = try f.srcLoc(f.location_tok), - .notes_len = notes_len, - }); - const notes_start = try eb.reserveNotes(notes_len); - for (diagnostics.errors.items, notes_start..) |item, note_i| { - switch (item) { - .unable_to_create_sym_link => |info| { - eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ - .msg = try eb.printString("unable to create symlink from '{s}' to '{s}': {s}", .{ - info.file_name, info.link_name, @errorName(info.code), - }), - })); - }, - .unable_to_create_file => |info| { - eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ - .msg = try eb.printString("unable to create file '{s}': {s}", .{ - info.file_name, @errorName(info.code), - }), - })); - }, - .unsupported_file_type => |info| { - eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ - .msg = try eb.printString("file '{s}' has unsupported type '{c}'", .{ - info.file_name, @intFromEnum(info.file_type), - }), - })); - }, - } - } - return error.FetchFailed; - } +fn unpackTarballError(f: *Fetch, err: anyerror) RunError { + return f.failMsg(err, "unable to unpack tarball to temporary directory: {s}", .{@errorName(err)}); } -fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource) anyerror!void { +fn failMsg(f: *Fetch, err: anyerror, comptime fmt: []const u8, args: anytype) RunError { const eb = &f.error_bundle; - const gpa = f.arena.child_allocator; - const want_oid = resource.git.want_oid; - const reader = resource.git.fetch_stream.reader(); - // The .git directory is used to store the packfile and associated index, but - // we do not attempt to replicate the exact structure of a real .git - // directory, since that isn't relevant for fetching a package. - { - var pack_dir = try out_dir.makeOpenPath(".git", .{}); - defer pack_dir.close(); - var pack_file = try pack_dir.createFile("pkg.pack", .{ .read = true }); - defer pack_file.close(); - var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init(); - try fifo.pump(reader, pack_file.writer()); - try pack_file.sync(); - - var index_file = try pack_dir.createFile("pkg.idx", .{ .read = true }); - defer index_file.close(); - { - var index_prog_node = f.prog_node.start("Index pack", 0); - defer index_prog_node.end(); - index_prog_node.activate(); - var index_buffered_writer = std.io.bufferedWriter(index_file.writer()); - try git.indexPack(gpa, pack_file, index_buffered_writer.writer()); - try index_buffered_writer.flush(); - try index_file.sync(); - } - - { - var checkout_prog_node = f.prog_node.start("Checkout", 0); - defer checkout_prog_node.end(); - checkout_prog_node.activate(); - var repository = try git.Repository.init(gpa, pack_file, index_file); - defer repository.deinit(); - var diagnostics: git.Diagnostics = .{ .allocator = gpa }; - defer diagnostics.deinit(); - try repository.checkout(out_dir, want_oid, &diagnostics); - - if (diagnostics.errors.items.len > 0) { - const notes_len: u32 = @intCast(diagnostics.errors.items.len); - try eb.addRootErrorMessage(.{ - .msg = try eb.addString("unable to unpack packfile"), - .src_loc = try f.srcLoc(f.location_tok), - .notes_len = notes_len, - }); - const notes_start = try eb.reserveNotes(notes_len); - for (diagnostics.errors.items, notes_start..) |item, note_i| { - switch (item) { - .unable_to_create_sym_link => |info| { - eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ - .msg = try eb.printString("unable to create symlink from '{s}' to '{s}': {s}", .{ - info.file_name, info.link_name, @errorName(info.code), - }), - })); - }, - } - } - return error.InvalidGitPack; - } - } + switch (err) { + error.OutOfMemory => return error.OutOfMemory, + else => return f.fail(f.location_tok, try eb.printString(fmt, args)), } - - try out_dir.deleteTree(".git"); } -fn recursiveDirectoryCopy(f: *Fetch, dir: fs.Dir, tmp_dir: fs.Dir) anyerror!void { - const gpa = f.arena.child_allocator; - // Recursive directory copy. - var it = try dir.walk(gpa); - defer it.deinit(); - while (try it.next()) |entry| { - switch (entry.kind) { - .directory => {}, // omit empty directories - .file => { - dir.copyFile( - entry.path, - tmp_dir, - entry.path, - .{}, - ) catch |err| switch (err) { - error.FileNotFound => { - if (fs.path.dirname(entry.path)) |dirname| try tmp_dir.makePath(dirname); - try dir.copyFile(entry.path, tmp_dir, entry.path, .{}); - }, - else => |e| return e, - }; +fn bundleUnpackErrors(f: *Fetch, unpack: *Unpack) RunError!void { + if (!unpack.hasErrors()) return; + + var errors = unpack.errors; + const eb = &f.error_bundle; + const notes_len: u32 = @intCast(errors.count()); + + try eb.addRootErrorMessage(.{ + .msg = try eb.addString("unable to unpack"), + .src_loc = try f.srcLoc(f.location_tok), + .notes_len = notes_len, + }); + const notes_start = try eb.reserveNotes(notes_len); + for (errors.list.items, notes_start..) |item, note_i| { + switch (item) { + .unable_to_create_sym_link => |info| { + eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ + .msg = try eb.printString("unable to create symlink from '{s}' to '{s}': {s}", .{ + info.sym_link_path, info.target_path, @errorName(info.code), + }), + })); }, - .sym_link => { - var buf: [fs.MAX_PATH_BYTES]u8 = undefined; - const link_name = try dir.readLink(entry.path, &buf); - // TODO: if this would create a symlink to outside - // the destination directory, fail with an error instead. - tmp_dir.symLink(link_name, entry.path, .{}) catch |err| switch (err) { - error.FileNotFound => { - if (fs.path.dirname(entry.path)) |dirname| try tmp_dir.makePath(dirname); - try tmp_dir.symLink(link_name, entry.path, .{}); - }, - else => |e| return e, - }; + .unable_to_create_file => |info| { + eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ + .msg = try eb.printString("unable to create file '{s}': {s}", .{ + info.file_name, @errorName(info.code), + }), + })); }, - else => return error.IllegalFileTypeInPackage, } } + + return error.FetchFailed; } pub fn renameTmpIntoCache( @@ -1626,7 +1535,7 @@ fn normalizePath(bytes: []u8) void { std.mem.replaceScalar(u8, bytes, fs.path.sep, canonical_sep); } -const Filter = struct { +pub const Filter = struct { include_paths: std.StringArrayHashMapUnmanaged(void) = .{}, /// sub_path is relative to the package root. @@ -1695,8 +1604,10 @@ const Package = @import("../Package.zig"); const Manifest = Package.Manifest; const ErrorBundle = std.zig.ErrorBundle; const native_os = builtin.os.tag; +const Unpack = @import("Unpack.zig"); test { _ = Filter; _ = FileType; + _ = Unpack; } diff --git a/src/Package/Fetch/git.zig b/src/Package/Fetch/git.zig index 36652bd88c55..dfe364ddca65 100644 --- a/src/Package/Fetch/git.zig +++ b/src/Package/Fetch/git.zig @@ -36,32 +36,6 @@ test parseOid { try testing.expectError(error.InvalidOid, parseOid("HEAD")); } -pub const Diagnostics = struct { - allocator: Allocator, - errors: std.ArrayListUnmanaged(Error) = .{}, - - pub const Error = union(enum) { - unable_to_create_sym_link: struct { - code: anyerror, - file_name: []const u8, - link_name: []const u8, - }, - }; - - pub fn deinit(d: *Diagnostics) void { - for (d.errors.items) |item| { - switch (item) { - .unable_to_create_sym_link => |info| { - d.allocator.free(info.file_name); - d.allocator.free(info.link_name); - }, - } - } - d.errors.deinit(d.allocator); - d.* = undefined; - } -}; - pub const Repository = struct { odb: Odb, @@ -79,7 +53,6 @@ pub const Repository = struct { repository: *Repository, worktree: std.fs.Dir, commit_oid: Oid, - diagnostics: *Diagnostics, ) !void { try repository.odb.seekOid(commit_oid); const tree_oid = tree_oid: { @@ -87,16 +60,15 @@ pub const Repository = struct { if (commit_object.type != .commit) return error.NotACommit; break :tree_oid try getCommitTree(commit_object.data); }; - try repository.checkoutTree(worktree, tree_oid, "", diagnostics); + try repository.checkoutTree(worktree, tree_oid, ""); } - /// Checks out the tree at `tree_oid` to `worktree`. + /// Checks out the tree at `tree_oid` to `worktree`. fn checkoutTree( repository: *Repository, dir: std.fs.Dir, tree_oid: Oid, current_path: []const u8, - diagnostics: *Diagnostics, ) !void { try repository.odb.seekOid(tree_oid); const tree_object = try repository.odb.readObject(); @@ -104,26 +76,29 @@ pub const Repository = struct { // The tree object may be evicted from the object cache while we're // iterating over it, so we can make a defensive copy here to make sure // it remains valid until we're done with it - const tree_data = try repository.odb.allocator.dupe(u8, tree_object.data); + const allocator = repository.odb.allocator; + const tree_data = try allocator.dupe(u8, tree_object.data); defer repository.odb.allocator.free(tree_data); var tree_iter: TreeIterator = .{ .data = tree_data }; while (try tree_iter.next()) |entry| { + const sub_path = try std.fs.path.join(allocator, &.{ current_path, entry.name }); + defer allocator.free(sub_path); switch (entry.type) { .directory => { - try dir.makeDir(entry.name); - var subdir = try dir.openDir(entry.name, .{}); - defer subdir.close(); - const sub_path = try std.fs.path.join(repository.odb.allocator, &.{ current_path, entry.name }); - defer repository.odb.allocator.free(sub_path); - try repository.checkoutTree(subdir, entry.oid, sub_path, diagnostics); + try dir.makePath(sub_path); + try repository.checkoutTree(dir, entry.oid, sub_path); }, .file => { - var file = try dir.createFile(entry.name, .{}); - defer file.close(); try repository.odb.seekOid(entry.oid); const file_object = try repository.odb.readObject(); if (file_object.type != .blob) return error.InvalidFile; + + var file = dir.createFile(sub_path, .{}) catch |err| { + if (err == error.Skip) continue; + return err; + }; + defer file.close(); try file.writeAll(file_object.data); try file.sync(); }, @@ -131,23 +106,11 @@ pub const Repository = struct { try repository.odb.seekOid(entry.oid); const symlink_object = try repository.odb.readObject(); if (symlink_object.type != .blob) return error.InvalidFile; - const link_name = symlink_object.data; - dir.symLink(link_name, entry.name, .{}) catch |e| { - const file_name = try std.fs.path.join(diagnostics.allocator, &.{ current_path, entry.name }); - errdefer diagnostics.allocator.free(file_name); - const link_name_dup = try diagnostics.allocator.dupe(u8, link_name); - errdefer diagnostics.allocator.free(link_name_dup); - try diagnostics.errors.append(diagnostics.allocator, .{ .unable_to_create_sym_link = .{ - .code = e, - .file_name = file_name, - .link_name = link_name_dup, - } }); - }; + + try dir.symLink(symlink_object.data, sub_path, .{}); }, .gitlink => { - // Consistent with git archive behavior, create the directory but - // do nothing else - try dir.makeDir(entry.name); + try dir.makePath(sub_path); }, } } @@ -183,15 +146,15 @@ pub const Repository = struct { }; }; - fn next(iterator: *TreeIterator) !?Entry { - if (iterator.pos == iterator.data.len) return null; + fn next(iter: *TreeIterator) !?Entry { + if (iter.pos == iter.data.len) return null; - const mode_end = mem.indexOfScalarPos(u8, iterator.data, iterator.pos, ' ') orelse return error.InvalidTree; + const mode_end = mem.indexOfScalarPos(u8, iter.data, iter.pos, ' ') orelse return error.InvalidTree; const mode: packed struct { permission: u9, unused: u3, type: u4, - } = @bitCast(std.fmt.parseUnsigned(u16, iterator.data[iterator.pos..mode_end], 8) catch return error.InvalidTree); + } = @bitCast(std.fmt.parseUnsigned(u16, iter.data[iter.pos..mode_end], 8) catch return error.InvalidTree); const @"type" = std.meta.intToEnum(Entry.Type, mode.type) catch return error.InvalidTree; const executable = switch (mode.permission) { 0 => if (@"type" == .file) return error.InvalidTree else false, @@ -199,19 +162,131 @@ pub const Repository = struct { 0o755 => if (@"type" != .file) return error.InvalidTree else true, else => return error.InvalidTree, }; - iterator.pos = mode_end + 1; + iter.pos = mode_end + 1; - const name_end = mem.indexOfScalarPos(u8, iterator.data, iterator.pos, 0) orelse return error.InvalidTree; - const name = iterator.data[iterator.pos..name_end :0]; - iterator.pos = name_end + 1; + const name_end = mem.indexOfScalarPos(u8, iter.data, iter.pos, 0) orelse return error.InvalidTree; + const name = iter.data[iter.pos..name_end :0]; + iter.pos = name_end + 1; - if (iterator.pos + oid_length > iterator.data.len) return error.InvalidTree; - const oid = iterator.data[iterator.pos..][0..oid_length].*; - iterator.pos += oid_length; + if (iter.pos + oid_length > iter.data.len) return error.InvalidTree; + const oid = iter.data[iter.pos..][0..oid_length].*; + iter.pos += oid_length; return .{ .type = @"type", .executable = executable, .name = name, .oid = oid }; } }; + + /// Iterator over all repository entries at `commit_id`. + pub fn iterator(repository: *Repository, commit_oid: Oid) !Iterator { + const allocator = repository.odb.allocator; + try repository.odb.seekOid(commit_oid); + const tree_oid = tree_oid: { + const commit_object = try repository.odb.readObject(); + if (commit_object.type != .commit) return error.NotACommit; + break :tree_oid try getCommitTree(commit_object.data); + }; + var dirs = std.ArrayList(Iterator.Directory).init(allocator); + try dirs.append(.{ .oid = tree_oid, .path = "" }); + return .{ + .repository = repository, + .dirs = dirs, + }; + } + + pub const Iterator = struct { + pub const Entry = struct { + type: TreeIterator.Entry.Type, + executable: bool = false, + path: []const u8, + name: [:0]const u8, + data: []const u8, + + pub fn reader(entry: Entry) std.io.FixedBufferStream([]const u8) { + return std.io.fixedBufferStream(entry.data); + } + }; + const Directory = struct { + oid: Oid, + path: []const u8, + }; + + repository: *Repository, + tree_iter: ?TreeIterator = null, + tree_path: ?[]const u8 = null, + dirs: std.ArrayList(Directory), + + pub fn next(iter: *Iterator) !?Entry { + var odb = &iter.repository.odb; + const allocator = odb.allocator; + + while (true) { + if (iter.tree_iter) |*tree_iter| { + const tree_path = iter.tree_path.?; + if (try tree_iter.next()) |entry| { + switch (entry.type) { + .directory => { + try iter.dirs.append(.{ + .oid = entry.oid, + .path = try std.fs.path.join(allocator, &.{ tree_path, entry.name }), + }); + return .{ + .type = .directory, + .path = tree_path, + .name = entry.name, + .data = "", + }; + }, + .file, .symlink => { + try odb.seekOid(entry.oid); + const object = try odb.readObject(); + if (object.type != .blob) return error.InvalidFile; + + return .{ + .type = entry.type, + .executable = entry.executable, + .path = tree_path, + .name = entry.name, + .data = object.data, + }; + }, + .gitlink => {}, + } + } else { + allocator.free(tree_iter.data); + allocator.free(tree_path); + iter.tree_path = null; + iter.tree_iter = null; + } + } else { + if (iter.dirs.items.len == 0) return null; + const dir = iter.dirs.pop(); + + try odb.seekOid(dir.oid); + const object = try odb.readObject(); + if (object.type != .tree) return error.NotATree; + + iter.tree_iter = .{ .data = try allocator.dupe(u8, object.data) }; + iter.tree_path = dir.path; + } + } + } + + pub fn deinit(iter: *Iterator) void { + const allocator = iter.repository.odb.allocator; + + if (iter.tree_iter) |*tree_iter| { + allocator.free(tree_iter.data); + iter.tree_iter = null; + } + if (iter.tree_path) |tree_path| { + allocator.free(tree_path); + iter.tree_path = null; + } + for (iter.dirs.items) |dir| allocator.free(dir.path); + iter.dirs.deinit(); + iter.* = undefined; + } + }; }; /// A Git object database backed by a packfile. A packfile index is also used @@ -1364,23 +1439,14 @@ test "packfile indexing and checkout" { // 3. `git fsck` -> note the "dangling commit" ID (which matches the commit // checked out below) // 4. `git checkout dd582c0720819ab7130b103635bd7271b9fd4feb` - const testrepo_pack = @embedFile("git/testdata/testrepo.pack"); - - var git_dir = testing.tmpDir(.{}); - defer git_dir.cleanup(); - var pack_file = try git_dir.dir.createFile("testrepo.pack", .{ .read = true }); - defer pack_file.close(); - try pack_file.writeAll(testrepo_pack); - - var index_file = try git_dir.dir.createFile("testrepo.idx", .{ .read = true }); - defer index_file.close(); - try indexPack(testing.allocator, pack_file, index_file.writer()); + var repo = try TestRepo.open(); + defer repo.close(); // Arbitrary size limit on files read while checking the repository contents // (all files in the test repo are known to be much smaller than this) const max_file_size = 4096; - const index_file_data = try git_dir.dir.readFileAlloc(testing.allocator, "testrepo.idx", max_file_size); + const index_file_data = try repo.git_dir.dir.readFileAlloc(testing.allocator, "testrepo.idx", max_file_size); defer testing.allocator.free(index_file_data); // testrepo.idx is generated by Git. The index created by this file should // match it exactly. Running `git verify-pack -v testrepo.pack` can verify @@ -1388,36 +1454,14 @@ test "packfile indexing and checkout" { const testrepo_idx = @embedFile("git/testdata/testrepo.idx"); try testing.expectEqualSlices(u8, testrepo_idx, index_file_data); - var repository = try Repository.init(testing.allocator, pack_file, index_file); + var repository = try Repository.init(testing.allocator, repo.pack_file, repo.index_file); defer repository.deinit(); var worktree = testing.tmpDir(.{ .iterate = true }); defer worktree.cleanup(); - const commit_id = try parseOid("dd582c0720819ab7130b103635bd7271b9fd4feb"); + try repository.checkout(worktree.dir, try TestRepo.commitID()); - var diagnostics: Diagnostics = .{ .allocator = testing.allocator }; - defer diagnostics.deinit(); - try repository.checkout(worktree.dir, commit_id, &diagnostics); - try testing.expect(diagnostics.errors.items.len == 0); - - const expected_files: []const []const u8 = &.{ - "dir/file", - "dir/subdir/file", - "dir/subdir/file2", - "dir2/file", - "dir3/file", - "dir3/file2", - "file", - "file2", - "file3", - "file4", - "file5", - "file6", - "file7", - "file8", - "file9", - }; var actual_files: std.ArrayListUnmanaged([]u8) = .{}; defer actual_files.deinit(testing.allocator); defer for (actual_files.items) |file| testing.allocator.free(file); @@ -1435,7 +1479,7 @@ test "packfile indexing and checkout" { return mem.lessThan(u8, a, b); } }.lessThan); - try testing.expectEqualDeep(expected_files, actual_files.items); + try testing.expectEqualDeep(TestRepo.expected_files, actual_files.items); const expected_file_contents = \\revision 1 @@ -1458,6 +1502,97 @@ test "packfile indexing and checkout" { try testing.expectEqualStrings(expected_file_contents, actual_file_contents); } +pub const TestRepo = struct { + git_dir: testing.TmpDir, + pack_file: std.fs.File, + index_file: std.fs.File, + + pub const expected_files: []const []const u8 = &.{ + "dir/file", + "dir/subdir/file", + "dir/subdir/file2", + "dir2/file", + "dir3/file", + "dir3/file2", + "file", + "file2", + "file3", + "file4", + "file5", + "file6", + "file7", + "file8", + "file9", + }; + + const testrepo_pack = @embedFile("git/testdata/testrepo.pack"); + + pub fn open() !TestRepo { + var git_dir = testing.tmpDir(.{}); + errdefer git_dir.cleanup(); + + var pack_file = try git_dir.dir.createFile("testrepo.pack", .{ .read = true }); + errdefer pack_file.close(); + try pack_file.writeAll(testrepo_pack); + + var index_file = try git_dir.dir.createFile("testrepo.idx", .{ .read = true }); + errdefer index_file.close(); + try indexPack(testing.allocator, pack_file, index_file.writer()); + + var repository = try Repository.init(testing.allocator, pack_file, index_file); + errdefer repository.deinit(); + + return .{ + .git_dir = git_dir, + .pack_file = pack_file, + .index_file = index_file, + }; + } + + pub fn stream() std.io.FixedBufferStream([]const u8) { + return .{ .buffer = testrepo_pack, .pos = 0 }; + } + + pub fn commitID() !Oid { + return try parseOid("dd582c0720819ab7130b103635bd7271b9fd4feb"); + } + + pub fn close(tr: *TestRepo) void { + tr.index_file.close(); + tr.pack_file.close(); + tr.git_dir.cleanup(); + } +}; + +test "packfile iterator" { + var repo = try TestRepo.open(); + defer repo.close(); + + var repository = try Repository.init(testing.allocator, repo.pack_file, repo.index_file); + defer repository.deinit(); + + var iter = try repository.iterator(try TestRepo.commitID()); + defer iter.deinit(); + + var actual_files = std.ArrayList([]u8).init(testing.allocator); + defer actual_files.deinit(); + defer for (actual_files.items) |file| testing.allocator.free(file); + while (try iter.next()) |entry| { + if (entry.type != .file) continue; + const path = try std.fs.path.join(testing.allocator, &.{ entry.path, entry.name }); + errdefer testing.allocator.free(path); + mem.replaceScalar(u8, path, std.fs.path.sep, '/'); + try actual_files.append(path); + } + + mem.sortUnstable([]u8, actual_files.items, {}, struct { + fn lessThan(_: void, a: []u8, b: []u8) bool { + return mem.lessThan(u8, a, b); + } + }.lessThan); + try testing.expectEqualDeep(TestRepo.expected_files, actual_files.items); +} + /// Checks out a commit of a packfile. Intended for experimenting with and /// benchmarking possible optimizations to the indexing and checkout behavior. pub fn main() !void { @@ -1489,11 +1624,5 @@ pub fn main() !void { std.debug.print("Starting checkout...\n", .{}); var repository = try Repository.init(allocator, pack_file, index_file); defer repository.deinit(); - var diagnostics: Diagnostics = .{ .allocator = allocator }; - defer diagnostics.deinit(); - try repository.checkout(worktree, commit, &diagnostics); - - for (diagnostics.errors.items) |err| { - std.debug.print("Diagnostic: {}\n", .{err}); - } + try repository.checkout(worktree, commit); } diff --git a/src/Package/Unpack.zig b/src/Package/Unpack.zig new file mode 100644 index 000000000000..b58624491bbe --- /dev/null +++ b/src/Package/Unpack.zig @@ -0,0 +1,530 @@ +const std = @import("std"); +const fs = std.fs; +const git = @import("Fetch/git.zig"); +const Filter = @import("Fetch.zig").Filter; + +allocator: std.mem.Allocator, +root: fs.Dir, +package_sub_path: ?[]const u8 = null, +errors: Errors, + +pub const Error = union(enum) { + unable_to_create_sym_link: struct { + code: anyerror, + target_path: []const u8, + sym_link_path: []const u8, + }, + unable_to_create_file: struct { + code: anyerror, + file_name: []const u8, + }, + + pub fn excluded(self: Error, filter: Filter) bool { + switch (self) { + .unable_to_create_file => |info| return !filter.includePath(info.file_name), + .unable_to_create_sym_link => |info| return !filter.includePath(info.target_path), + } + } +}; + +pub const Errors = struct { + allocator: std.mem.Allocator, + list: std.ArrayListUnmanaged(Error) = .{}, + + pub fn deinit(self: *Errors) void { + for (self.list.items) |item| { + self.free(item); + } + self.list.deinit(self.allocator); + self.* = undefined; + } + + fn free(self: *Errors, item: Error) void { + switch (item) { + .unable_to_create_sym_link => |info| { + self.allocator.free(info.target_path); + self.allocator.free(info.sym_link_path); + }, + .unable_to_create_file => |info| { + self.allocator.free(info.file_name); + }, + } + } + + pub fn count(self: *Errors) usize { + return self.list.items.len; + } + + fn createFile(self: *Errors, subdir_path: []const u8, file_path: []const u8, err: anyerror) !void { + try self.list.append(self.allocator, .{ .unable_to_create_file = .{ + .code = err, + .file_name = try std.fs.path.join(self.allocator, &.{ subdir_path, file_path }), + } }); + } + + fn symLink(self: *Errors, subdir_path: []const u8, target_path: []const u8, sym_link_path: []const u8, err: anyerror) !void { + try self.list.append(self.allocator, .{ .unable_to_create_sym_link = .{ + .code = err, + .target_path = try self.allocator.dupe(u8, target_path), + .sym_link_path = try std.fs.path.join(self.allocator, &.{ subdir_path, sym_link_path }), + } }); + } + + fn filterWith(self: *Errors, filter: Filter) !void { + var i = self.list.items.len; + while (i > 0) { + i -= 1; + const item = self.list.items[i]; + if (item.excluded(filter)) { + _ = self.list.swapRemove(i); + self.free(item); + } + } + } + + fn stripRoot(self: *Errors) !void { + if (self.count() == 0) return; + + var old_list = self.list; + self.list = .{}; + for (old_list.items) |item| { + switch (item) { + .unable_to_create_sym_link => |info| { + try self.symLink("", stripComponents(info.target_path, 1), info.sym_link_path, info.code); + }, + .unable_to_create_file => |info| { + try self.createFile("", stripComponents(info.file_name, 1), info.code); + }, + } + self.free(item); + } + old_list.deinit(self.allocator); + } +}; + +pub fn init(allocator: std.mem.Allocator, root: fs.Dir) Self { + return .{ + .allocator = allocator, + .errors = Errors{ .allocator = allocator }, + .root = root, + }; +} + +pub fn deinit(self: *Self) void { + self.errors.deinit(); + if (self.package_sub_path) |package_sub_path| { + self.allocator.free(package_sub_path); + } +} + +const Self = @This(); + +pub fn tarball(self: *Self, reader: anytype) !void { + var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; + var link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; + var iter = std.tar.iterator(reader, .{ + .file_name_buffer = &file_name_buffer, + .link_name_buffer = &link_name_buffer, + }); + while (true) { + if (iter.next() catch |err| switch (err) { + error.TarUnsupportedHeader => continue, + else => return err, + }) |entry| { + switch (entry.kind) { + .directory => {}, // skip empty + .file => { + if (entry.size == 0 and entry.name.len == 0) continue; + if (try self.createFile("", entry.name)) |file| { + defer file.close(); + try entry.writeAll(file); + } + }, + .sym_link => { + try self.symLink("", entry.link_name, entry.name); + }, + } + } else break; + } + try self.findPackageSubPath(); +} + +fn findPackageSubPath(self: *Self) !void { + var iter = self.root.iterate(); + if (try iter.next()) |entry| { + if (try iter.next() != null) return; + if (entry.kind == .directory) { // single directory below root + self.package_sub_path = try self.allocator.dupe(u8, entry.name); + try self.errors.stripRoot(); + } + } +} + +test findPackageSubPath { + var tmp = testing.tmpDir(.{ .iterate = true }); + defer tmp.cleanup(); + + // folder1 + // ├── folder2 + // ├── file1 + // + try tmp.dir.makePath("folder1/folder2"); + (try tmp.dir.createFile("folder1/file1", .{})).close(); + + var unpack = init(testing.allocator, tmp.dir); + try unpack.findPackageSubPath(); + // start at root returns folder1 as package root + try testing.expectEqualStrings("folder1", unpack.package_sub_path.?); + unpack.deinit(); + + // start at folder1 returns null + unpack = init(testing.allocator, try tmp.dir.openDir("folder1", .{ .iterate = true })); + try unpack.findPackageSubPath(); + try testing.expect(unpack.package_sub_path == null); + unpack.deinit(); + + // start at folder1/folder2 returns null + unpack = init(testing.allocator, try tmp.dir.openDir("folder1/folder2", .{ .iterate = true })); + try unpack.findPackageSubPath(); + try testing.expect(unpack.package_sub_path == null); + unpack.deinit(); +} + +pub fn gitPack(self: *Self, commit_oid: git.Oid, reader: anytype) !void { + var pack_dir = try self.root.makeOpenPath(".git", .{}); + defer pack_dir.close(); + var pack_file = try pack_dir.createFile("pkg.pack", .{ .read = true }); + defer pack_file.close(); + var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init(); + try fifo.pump(reader, pack_file.writer()); + try pack_file.sync(); + + var index_file = try pack_dir.createFile("pkg.idx", .{ .read = true }); + defer index_file.close(); + { + var index_buffered_writer = std.io.bufferedWriter(index_file.writer()); + try git.indexPack(self.allocator, pack_file, index_buffered_writer.writer()); + try index_buffered_writer.flush(); + try index_file.sync(); + } + + { + var repository = try git.Repository.init(self.allocator, pack_file, index_file); + defer repository.deinit(); + var iter = try repository.iterator(commit_oid); + defer iter.deinit(); + while (try iter.next()) |entry| { + switch (entry.type) { + .file => { + if (try self.createFile(entry.path, entry.name)) |file| { + defer file.close(); + try file.writeAll(entry.data); + } + }, + .symlink => { + try self.symLink(entry.path, entry.data, entry.name); + }, + else => {}, // skip empty directory + } + } + } + + try self.root.deleteTree(".git"); +} + +pub fn directory(self: *Self, source: fs.Dir) !void { + var it = try source.walk(self.allocator); + defer it.deinit(); + while (try it.next()) |entry| { + switch (entry.kind) { + .directory => {}, // omit empty directories + .file => { + try copyFile(source, entry.path, self.root, entry.path); + }, + .sym_link => { + var buf: [fs.MAX_PATH_BYTES]u8 = undefined; + const link_name = try source.readLink(entry.path, &buf); + try self.symLink("", link_name, entry.path); + }, + else => return error.IllegalFileTypeInPackage, + } + } +} + +pub fn hasErrors(self: *Self) bool { + return self.errors.count() > 0; +} + +pub fn filterErrors(self: *Self, filter: Filter) !void { + try self.errors.filterWith(filter); +} + +fn makePath(self: *Self, sub_path: []const u8) !fs.Dir { + if (sub_path.len == 0) return self.root; + try self.root.makePath(sub_path); + return try self.root.openDir(sub_path, .{}); +} + +fn copyFile(source_dir: fs.Dir, source_path: []const u8, dest_dir: fs.Dir, dest_path: []const u8) !void { + source_dir.copyFile(source_path, dest_dir, dest_path, .{}) catch |err| switch (err) { + error.FileNotFound => { + if (fs.path.dirname(dest_path)) |dirname| try dest_dir.makePath(dirname); + try source_dir.copyFile(source_path, dest_dir, dest_path, .{}); + }, + else => |e| return e, + }; +} + +/// Returns fs.File on success, null on failure. +/// Errors are collected in errors list. +fn createFile(self: *Self, subdir_path: []const u8, file_path: []const u8) !?fs.File { + return createFilePath(self.root, subdir_path, file_path) catch |err| { + try self.errors.createFile(subdir_path, file_path, err); + return null; + }; +} + +fn symLink(self: *Self, subdir_path: []const u8, target_path: []const u8, sym_link_path: []const u8) !void { + symLinkPath(self.root, subdir_path, target_path, sym_link_path) catch |err| { + try self.errors.symLink(subdir_path, target_path, sym_link_path, err); + }; +} + +fn createFilePath(root: fs.Dir, subdir_path: []const u8, file_path: []const u8) !fs.File { + var dir = root; + if (subdir_path.len > 0) { + try dir.makePath(subdir_path); + dir = try dir.openDir(subdir_path, .{}); + } + + return dir.createFile(file_path, .{ .exclusive = true }) catch |err| switch (err) { + error.FileNotFound => { + if (std.fs.path.dirname(file_path)) |dirname| try dir.makePath(dirname); + return try dir.createFile(file_path, .{ .exclusive = true }); + }, + else => |e| return e, + }; +} + +fn symLinkPath(root: fs.Dir, subdir_path: []const u8, target_path: []const u8, sym_link_path: []const u8) !void { + // TODO: if this would create a symlink to outside + // the destination directory, fail with an error instead. + var dir = root; + if (subdir_path.len > 0) { + try dir.makePath(subdir_path); + dir = try dir.openDir(subdir_path, .{}); + } + + dir.symLink(target_path, sym_link_path, .{}) catch |err| switch (err) { + error.FileNotFound => { + if (fs.path.dirname(sym_link_path)) |dirname| try dir.makePath(dirname); + try dir.symLink(target_path, sym_link_path, .{}); + }, + else => |e| return e, + }; +} + +fn stripComponents(path: []const u8, count: u32) []const u8 { + var i: usize = 0; + var c = count; + while (c > 0) : (c -= 1) { + if (std.mem.indexOfScalarPos(u8, path, i, '/')) |pos| { + i = pos + 1; + } else { + i = path.len; + break; + } + } + return path[i..]; +} + +const testing = std.testing; +const Unpack = @This(); + +test "tar stripComponents" { + const expectEqualStrings = std.testing.expectEqualStrings; + try expectEqualStrings("a/b/c", stripComponents("a/b/c", 0)); + try expectEqualStrings("b/c", stripComponents("a/b/c", 1)); + try expectEqualStrings("c", stripComponents("a/b/c", 2)); + try expectEqualStrings("", stripComponents("a/b/c", 3)); + try expectEqualStrings("", stripComponents("a/b/c", 4)); +} + +test gitPack { + var tmp = testing.tmpDir(.{ .iterate = true }); + defer tmp.cleanup(); + + const repo = git.TestRepo; + var stream = repo.stream(); + const reader = stream.reader(); + + // Unpack git repo at commitID from reader + { + var unpack = Unpack.init(testing.allocator, tmp.dir); + defer unpack.deinit(); + try unpack.gitPack(try repo.commitID(), reader); + } + + try expectDirFiles(tmp.dir, repo.expected_files); +} + +const TarHeader = std.tar.output.Header; + +test tarball { + const paths: []const []const u8 = &.{ + "dir/file", + "dir1/dir2/file2", + "dir3/dir4/dir5/file3", + "file", + "file2", + }; + var buf: [paths.len * @sizeOf(TarHeader)]u8 = undefined; + + // tarball with leading root folder + { + try createTarball("package_root", paths, &buf); + var tmp = testing.tmpDir(.{ .iterate = true }); + defer tmp.cleanup(); + + var fbs = std.io.fixedBufferStream(&buf); + + var unpack = Unpack.init(testing.allocator, tmp.dir); + defer unpack.deinit(); + try unpack.tarball(fbs.reader()); + try testing.expectEqualStrings("package_root", unpack.package_sub_path.?); + + try expectDirFiles(try tmp.dir.openDir("package_root", .{ .iterate = true }), paths); + } + // tarball without root + { + try createTarball("", paths, &buf); + var tmp = testing.tmpDir(.{ .iterate = true }); + defer tmp.cleanup(); + + var fbs = std.io.fixedBufferStream(&buf); + + var unpack = Unpack.init(testing.allocator, tmp.dir); + defer unpack.deinit(); + try unpack.tarball(fbs.reader()); + try testing.expect(unpack.package_sub_path == null); + + try expectDirFiles(tmp.dir, paths); + } +} + +test directory { + const paths: []const []const u8 = &.{ + "dir/file", + "dir1/dir2/file2", + "dir3/dir4/dir5/file3", + "file", + "file2", + }; + + var source = testing.tmpDir(.{ .iterate = true }); + defer source.cleanup(); + + for (paths) |path| { + const f = try createFilePath(source.dir, "", path); + f.close(); + } + + var tmp = testing.tmpDir(.{ .iterate = true }); + defer tmp.cleanup(); + + var unpack = Unpack.init(testing.allocator, tmp.dir); + defer unpack.deinit(); + try unpack.directory(source.dir); + + try expectDirFiles(tmp.dir, paths); +} + +test "collect/filter errors" { + const gpa = std.testing.allocator; + + // Tarball with duplicating files path to simulate fs write fail. + const paths: []const []const u8 = &.{ + "dir/file", + "dir1/file1", + "dir/file", + "dir1/file1", + }; + var buf: [paths.len * @sizeOf(TarHeader)]u8 = undefined; + try createTarball("package_root", paths, &buf); + + var tmp = testing.tmpDir(.{ .iterate = true }); + defer tmp.cleanup(); + + var fbs = std.io.fixedBufferStream(&buf); + var unpack = Unpack.init(gpa, tmp.dir); + defer unpack.deinit(); + try unpack.tarball(fbs.reader()); + try testing.expect(unpack.hasErrors()); + try testing.expectEqualStrings("package_root", unpack.package_sub_path.?); + try expectDirFiles(try tmp.dir.openDir("package_root", .{ .iterate = true }), paths[0..2]); + + try testing.expectEqual(2, unpack.errors.count()); + try testing.expectEqualStrings("dir/file", unpack.errors.list.items[0].unable_to_create_file.file_name); + try testing.expectEqualStrings("dir1/file1", unpack.errors.list.items[1].unable_to_create_file.file_name); + + { + var filter: Filter = .{}; + defer filter.include_paths.deinit(gpa); + + // no filter all paths are included + try unpack.filterErrors(filter); + try testing.expectEqual(2, unpack.errors.count()); + + // dir1 is included, dir excluded + try filter.include_paths.put(gpa, "dir1", {}); + try unpack.filterErrors(filter); + try testing.expectEqual(1, unpack.errors.count()); + try testing.expectEqualStrings("dir1/file1", unpack.errors.list.items[0].unable_to_create_file.file_name); + } + { + var filter: Filter = .{}; + defer filter.include_paths.deinit(gpa); + + // only src included that filters all error paths + try filter.include_paths.put(gpa, "src", {}); + try unpack.filterErrors(filter); + try testing.expectEqual(0, unpack.errors.count()); + } +} + +fn createTarball(prefix: []const u8, paths: []const []const u8, buf: []u8) !void { + var fbs = std.io.fixedBufferStream(buf); + const writer = fbs.writer(); + for (paths) |path| { + var hdr = TarHeader.init(); + hdr.typeflag = .regular; + if (prefix.len > 0) { + try hdr.setPath(prefix, path); + } else { + hdr.setName(path); + } + try hdr.updateChecksum(); + try writer.writeAll(std.mem.asBytes(&hdr)); + } +} + +fn expectDirFiles(dir: fs.Dir, expected_files: []const []const u8) !void { + var actual_files: std.ArrayListUnmanaged([]u8) = .{}; + defer actual_files.deinit(testing.allocator); + defer for (actual_files.items) |file| testing.allocator.free(file); + var walker = try dir.walk(testing.allocator); + defer walker.deinit(); + while (try walker.next()) |entry| { + if (entry.kind != .file) continue; + const path = try testing.allocator.dupe(u8, entry.path); + errdefer testing.allocator.free(path); + std.mem.replaceScalar(u8, path, std.fs.path.sep, '/'); + try actual_files.append(testing.allocator, path); + } + std.mem.sortUnstable([]u8, actual_files.items, {}, struct { + fn lessThan(_: void, a: []u8, b: []u8) bool { + return std.mem.lessThan(u8, a, b); + } + }.lessThan); + try testing.expectEqualDeep(expected_files, actual_files.items); +}