From 867e3575f70fe5bc640fd587e4a82ac2ff90d108 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Fri, 15 Mar 2024 19:16:42 +0100 Subject: [PATCH 01/16] package: move unpack logic to a single place Tar pipeToFileSystem and git checkout have repeating logic: collecting diagnostic on errors, writing that errors to error bundle. Recurisive directory copy and pipeToFileSystem are both creating dir when writing file while dir still don't exists. Implementing new logic, thinking about file executable bit, requires changes on few places. This collects file handlling logic on single place and leaves other places with their job: unpacking tarball or git pack. --- src/Package/Fetch.zig | 166 +++------------- src/Package/Fetch/git.zig | 63 ++++++ src/Package/Unpack.zig | 401 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 491 insertions(+), 139 deletions(-) create mode 100644 src/Package/Unpack.zig diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index d0cfd5ab9491..6ab7796f3d3d 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -1143,57 +1143,23 @@ fn unpackResource( } } +const Unpack = @import("Unpack.zig"); + fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!void { const eb = &f.error_bundle; const gpa = f.arena.child_allocator; - var diagnostics: std.tar.Diagnostics = .{ .allocator = gpa }; - defer diagnostics.deinit(); - - std.tar.pipeToFileSystem(out_dir, reader, .{ - .diagnostics = &diagnostics, - .strip_components = 1, - // https://github.com/ziglang/zig/issues/17463 - .mode_mode = .ignore, - .exclude_empty_directories = true, - }) catch |err| return f.fail(f.location_tok, try eb.printString( - "unable to unpack tarball to temporary directory: {s}", - .{@errorName(err)}, - )); - - if (diagnostics.errors.items.len > 0) { - const notes_len: u32 = @intCast(diagnostics.errors.items.len); - try eb.addRootErrorMessage(.{ - .msg = try eb.addString("unable to unpack tarball"), - .src_loc = try f.srcLoc(f.location_tok), - .notes_len = notes_len, - }); - const notes_start = try eb.reserveNotes(notes_len); - for (diagnostics.errors.items, notes_start..) |item, note_i| { - switch (item) { - .unable_to_create_sym_link => |info| { - eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ - .msg = try eb.printString("unable to create symlink from '{s}' to '{s}': {s}", .{ - info.file_name, info.link_name, @errorName(info.code), - }), - })); - }, - .unable_to_create_file => |info| { - eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ - .msg = try eb.printString("unable to create file '{s}': {s}", .{ - info.file_name, @errorName(info.code), - }), - })); - }, - .unsupported_file_type => |info| { - eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ - .msg = try eb.printString("file '{s}' has unsupported type '{c}'", .{ - info.file_name, @intFromEnum(info.file_type), - }), - })); - }, - } - } + var unpack = Unpack{ .allocator = gpa, .root = out_dir }; + defer unpack.deinit(); + unpack.tarball(reader) catch |err| return f.fail( + f.location_tok, + try eb.printString( + "unable to unpack tarball to temporary directory: {s}", + .{@errorName(err)}, + ), + ); + if (unpack.hasErrors()) { + try unpack.bundleErrors(eb, try f.srcLoc(f.location_tok)); return error.FetchFailed; } } @@ -1203,104 +1169,26 @@ fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource) anyerror!void const gpa = f.arena.child_allocator; const want_oid = resource.git.want_oid; const reader = resource.git.fetch_stream.reader(); - // The .git directory is used to store the packfile and associated index, but - // we do not attempt to replicate the exact structure of a real .git - // directory, since that isn't relevant for fetching a package. - { - var pack_dir = try out_dir.makeOpenPath(".git", .{}); - defer pack_dir.close(); - var pack_file = try pack_dir.createFile("pkg.pack", .{ .read = true }); - defer pack_file.close(); - var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init(); - try fifo.pump(reader, pack_file.writer()); - try pack_file.sync(); - - var index_file = try pack_dir.createFile("pkg.idx", .{ .read = true }); - defer index_file.close(); - { - var index_prog_node = f.prog_node.start("Index pack", 0); - defer index_prog_node.end(); - index_prog_node.activate(); - var index_buffered_writer = std.io.bufferedWriter(index_file.writer()); - try git.indexPack(gpa, pack_file, index_buffered_writer.writer()); - try index_buffered_writer.flush(); - try index_file.sync(); - } - { - var checkout_prog_node = f.prog_node.start("Checkout", 0); - defer checkout_prog_node.end(); - checkout_prog_node.activate(); - var repository = try git.Repository.init(gpa, pack_file, index_file); - defer repository.deinit(); - var diagnostics: git.Diagnostics = .{ .allocator = gpa }; - defer diagnostics.deinit(); - try repository.checkout(out_dir, want_oid, &diagnostics); - - if (diagnostics.errors.items.len > 0) { - const notes_len: u32 = @intCast(diagnostics.errors.items.len); - try eb.addRootErrorMessage(.{ - .msg = try eb.addString("unable to unpack packfile"), - .src_loc = try f.srcLoc(f.location_tok), - .notes_len = notes_len, - }); - const notes_start = try eb.reserveNotes(notes_len); - for (diagnostics.errors.items, notes_start..) |item, note_i| { - switch (item) { - .unable_to_create_sym_link => |info| { - eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ - .msg = try eb.printString("unable to create symlink from '{s}' to '{s}': {s}", .{ - info.file_name, info.link_name, @errorName(info.code), - }), - })); - }, - } - } - return error.InvalidGitPack; - } - } + var unpack = Unpack{ .allocator = gpa, .root = out_dir }; + defer unpack.deinit(); + try unpack.gitPack(want_oid, reader); + if (unpack.hasErrors()) { + try unpack.bundleErrors(eb, try f.srcLoc(f.location_tok)); + return error.FetchFailed; } - - try out_dir.deleteTree(".git"); } fn recursiveDirectoryCopy(f: *Fetch, dir: fs.Dir, tmp_dir: fs.Dir) anyerror!void { + const eb = &f.error_bundle; const gpa = f.arena.child_allocator; - // Recursive directory copy. - var it = try dir.walk(gpa); - defer it.deinit(); - while (try it.next()) |entry| { - switch (entry.kind) { - .directory => {}, // omit empty directories - .file => { - dir.copyFile( - entry.path, - tmp_dir, - entry.path, - .{}, - ) catch |err| switch (err) { - error.FileNotFound => { - if (fs.path.dirname(entry.path)) |dirname| try tmp_dir.makePath(dirname); - try dir.copyFile(entry.path, tmp_dir, entry.path, .{}); - }, - else => |e| return e, - }; - }, - .sym_link => { - var buf: [fs.MAX_PATH_BYTES]u8 = undefined; - const link_name = try dir.readLink(entry.path, &buf); - // TODO: if this would create a symlink to outside - // the destination directory, fail with an error instead. - tmp_dir.symLink(link_name, entry.path, .{}) catch |err| switch (err) { - error.FileNotFound => { - if (fs.path.dirname(entry.path)) |dirname| try tmp_dir.makePath(dirname); - try tmp_dir.symLink(link_name, entry.path, .{}); - }, - else => |e| return e, - }; - }, - else => return error.IllegalFileTypeInPackage, - } + + var unpack = Unpack{ .allocator = gpa, .root = tmp_dir }; + defer unpack.deinit(); + try unpack.directory(dir); + if (unpack.hasErrors()) { + try unpack.bundleErrors(eb, try f.srcLoc(f.location_tok)); + return error.FetchFailed; } } diff --git a/src/Package/Fetch/git.zig b/src/Package/Fetch/git.zig index 36652bd88c55..367bef3f5a86 100644 --- a/src/Package/Fetch/git.zig +++ b/src/Package/Fetch/git.zig @@ -153,6 +153,69 @@ pub const Repository = struct { } } + /// Checks out the repository at `commit_oid` to `worktree`. + pub fn checkout2( + repository: *Repository, + worktree: anytype, + commit_oid: Oid, + ) !void { + try repository.odb.seekOid(commit_oid); + const tree_oid = tree_oid: { + const commit_object = try repository.odb.readObject(); + if (commit_object.type != .commit) return error.NotACommit; + break :tree_oid try getCommitTree(commit_object.data); + }; + try repository.checkoutTree2(worktree, tree_oid, ""); + } + + /// Checks out the tree at `tree_oid` to `worktree`. + fn checkoutTree2( + repository: *Repository, + dir: anytype, + tree_oid: Oid, + current_path: []const u8, + ) !void { + try repository.odb.seekOid(tree_oid); + const tree_object = try repository.odb.readObject(); + if (tree_object.type != .tree) return error.NotATree; + // The tree object may be evicted from the object cache while we're + // iterating over it, so we can make a defensive copy here to make sure + // it remains valid until we're done with it + const allocator = repository.odb.allocator; + const tree_data = try allocator.dupe(u8, tree_object.data); + defer repository.odb.allocator.free(tree_data); + + var tree_iter: TreeIterator = .{ .data = tree_data }; + while (try tree_iter.next()) |entry| { + const sub_path = try std.fs.path.join(allocator, &.{ current_path, entry.name }); + defer allocator.free(sub_path); + switch (entry.type) { + .directory => { + try repository.checkoutTree2(dir, entry.oid, sub_path); + }, + .file => { + try repository.odb.seekOid(entry.oid); + const file_object = try repository.odb.readObject(); + if (file_object.type != .blob) return error.InvalidFile; + + if (try dir.createFile(sub_path)) |file| { + defer file.close(); + try file.writeAll(file_object.data); + try file.sync(); + } + }, + .symlink => { + try repository.odb.seekOid(entry.oid); + const symlink_object = try repository.odb.readObject(); + if (symlink_object.type != .blob) return error.InvalidFile; + + try dir.symLink(symlink_object.data, sub_path); + }, + .gitlink => {}, + } + } + } + /// Returns the ID of the tree associated with the given commit (provided as /// raw object data). fn getCommitTree(commit_data: []const u8) !Oid { diff --git a/src/Package/Unpack.zig b/src/Package/Unpack.zig new file mode 100644 index 000000000000..8a70812db31d --- /dev/null +++ b/src/Package/Unpack.zig @@ -0,0 +1,401 @@ +const std = @import("std"); +const fs = std.fs; +const git = @import("Fetch/git.zig"); +const ErrorBundle = std.zig.ErrorBundle; + +allocator: std.mem.Allocator, +root: fs.Dir, + +errors: std.ArrayListUnmanaged(Error) = .{}, + +pub const Error = union(enum) { + unable_to_create_sym_link: struct { + code: anyerror, + target_path: []const u8, + sym_link_path: []const u8, + }, + unable_to_create_file: struct { + code: anyerror, + file_name: []const u8, + }, + unsupported_file_type: struct { + file_name: []const u8, + file_type: u8, + }, +}; + +pub fn deinit(self: *Self) void { + for (self.errors.items) |item| { + switch (item) { + .unable_to_create_sym_link => |info| { + self.allocator.free(info.target_path); + self.allocator.free(info.sym_link_path); + }, + .unable_to_create_file => |info| { + self.allocator.free(info.file_name); + }, + .unsupported_file_type => |info| { + self.allocator.free(info.file_name); + }, + } + } + self.errors.deinit(self.allocator); + self.* = undefined; +} + +const Self = @This(); + +pub fn tarball(self: *Self, reader: anytype) !void { + const strip_components = 1; + // TODO: replace with switch ignore unsupported tar file types in iterator + var diagnostics: std.tar.Diagnostics = .{ .allocator = self.allocator }; + defer diagnostics.deinit(); + + var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; + var link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; + var iter = std.tar.iterator(reader, .{ + .file_name_buffer = &file_name_buffer, + .link_name_buffer = &link_name_buffer, + .diagnostics = &diagnostics, + }); + while (try iter.next()) |entry| { + switch (entry.kind) { + .directory => {}, // skip empty + .file => { + if (entry.size == 0 and entry.name.len == 0) continue; + const file_name = stripComponents(entry.name, strip_components); + if (file_name.len == 0) return error.BadFileName; + if (try self.createFile(file_name)) |file| { + defer file.close(); + try entry.writeAll(file); + } + }, + .sym_link => { + const file_name = stripComponents(entry.name, strip_components); + if (file_name.len == 0) return error.BadFileName; + const link_name = entry.link_name; + try self.symLink(link_name, file_name); + }, + } + } +} + +pub fn gitPack(self: *Self, commit_oid: git.Oid, reader: anytype) !void { + var pack_dir = try self.root.makeOpenPath(".git", .{}); + defer pack_dir.close(); + var pack_file = try pack_dir.createFile("pkg.pack", .{ .read = true }); + defer pack_file.close(); + var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init(); + try fifo.pump(reader, pack_file.writer()); + try pack_file.sync(); + + var index_file = try pack_dir.createFile("pkg.idx", .{ .read = true }); + defer index_file.close(); + { + var index_buffered_writer = std.io.bufferedWriter(index_file.writer()); + try git.indexPack(self.allocator, pack_file, index_buffered_writer.writer()); + try index_buffered_writer.flush(); + try index_file.sync(); + } + + { + var repository = try git.Repository.init(self.allocator, pack_file, index_file); + defer repository.deinit(); + try repository.checkout2(self, commit_oid); + } + + try self.root.deleteTree(".git"); +} + +pub fn directory(self: *Self, source: fs.Dir) !void { + var it = try source.walk(self.allocator); + defer it.deinit(); + while (try it.next()) |entry| { + switch (entry.kind) { + .directory => {}, // omit empty directories + .file => { + try copyFile(source, entry.path, self.root, entry.path); + }, + .sym_link => { + var buf: [fs.MAX_PATH_BYTES]u8 = undefined; + const link_name = try source.readLink(entry.path, &buf); + try self.symLink(link_name, entry.path); + }, + else => return error.IllegalFileTypeInPackage, + } + } +} + +pub fn hasErrors(self: *Self) bool { + return self.errors.items.len > 0; +} + +pub fn bundleErrors(self: *Self, eb: *ErrorBundle.Wip, src_loc: ErrorBundle.SourceLocationIndex) !void { + const notes_len: u32 = @intCast(self.errors.items.len); + try eb.addRootErrorMessage(.{ + .msg = try eb.addString("unable to unpack"), + .src_loc = src_loc, + .notes_len = notes_len, + }); + const notes_start = try eb.reserveNotes(notes_len); + for (self.errors.items, notes_start..) |item, note_i| { + switch (item) { + .unable_to_create_sym_link => |info| { + eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ + .msg = try eb.printString("unable to create symlink from '{s}' to '{s}': {s}", .{ + info.sym_link_path, info.target_path, @errorName(info.code), + }), + })); + }, + .unable_to_create_file => |info| { + eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ + .msg = try eb.printString("unable to create file '{s}': {s}", .{ + info.file_name, @errorName(info.code), + }), + })); + }, + .unsupported_file_type => |info| { + eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ + .msg = try eb.printString("file '{s}' has unsupported type '{c}'", .{ + info.file_name, info.file_type, + }), + })); + }, + } + } +} + +fn copyFile(source_dir: fs.Dir, source_path: []const u8, dest_dir: fs.Dir, dest_path: []const u8) !void { + source_dir.copyFile(source_path, dest_dir, dest_path, .{}) catch |err| switch (err) { + error.FileNotFound => { + if (fs.path.dirname(dest_path)) |dirname| try dest_dir.makePath(dirname); + try source_dir.copyFile(source_path, dest_dir, dest_path, .{}); + }, + else => |e| return e, + }; +} + +/// Returns fs.File on success, null on failure. +/// Errors are collected in errors list. +pub fn createFile(self: *Self, sub_path: []const u8) !?fs.File { + return createFilePath(self.root, sub_path) catch |err| { + try self.errors.append(self.allocator, .{ .unable_to_create_file = .{ + .code = err, + .file_name = try self.allocator.dupe(u8, sub_path), + } }); + return null; + }; +} + +pub fn symLink(self: *Self, target_path: []const u8, sym_link_path: []const u8) !void { + symLinkPath(self.root, target_path, sym_link_path) catch |err| { + try self.errors.append(self.allocator, .{ .unable_to_create_sym_link = .{ + .code = err, + .target_path = try self.allocator.dupe(u8, target_path), + .sym_link_path = try self.allocator.dupe(u8, sym_link_path), + } }); + }; +} + +fn createFilePath(dir: fs.Dir, sub_path: []const u8) !fs.File { + return dir.createFile(sub_path, .{ .exclusive = true }) catch |err| switch (err) { + error.FileNotFound => { + if (std.fs.path.dirname(sub_path)) |dirname| try dir.makePath(dirname); + return try dir.createFile(sub_path, .{ .exclusive = true }); + }, + else => |e| return e, + }; +} + +fn symLinkPath(dir: fs.Dir, target_path: []const u8, sym_link_path: []const u8) !void { + // TODO: if this would create a symlink to outside + // the destination directory, fail with an error instead. + dir.symLink(target_path, sym_link_path, .{}) catch |err| switch (err) { + error.FileNotFound => { + if (fs.path.dirname(sym_link_path)) |dirname| try dir.makePath(dirname); + try dir.symLink(target_path, sym_link_path, .{}); + }, + else => |e| return e, + }; +} + +fn stripComponents(path: []const u8, count: u32) []const u8 { + var i: usize = 0; + var c = count; + while (c > 0) : (c -= 1) { + if (std.mem.indexOfScalarPos(u8, path, i, '/')) |pos| { + i = pos + 1; + } else { + i = path.len; + break; + } + } + return path[i..]; +} + +const testing = std.testing; +const Unpack = @This(); + +test "tar stripComponents" { + const expectEqualStrings = std.testing.expectEqualStrings; + try expectEqualStrings("a/b/c", stripComponents("a/b/c", 0)); + try expectEqualStrings("b/c", stripComponents("a/b/c", 1)); + try expectEqualStrings("c", stripComponents("a/b/c", 2)); + try expectEqualStrings("", stripComponents("a/b/c", 3)); + try expectEqualStrings("", stripComponents("a/b/c", 4)); +} + +test gitPack { + const paths: []const []const u8 = &.{ + "dir/file", + "dir/subdir/file", + "dir/subdir/file2", + "dir2/file", + "dir3/file", + "dir3/file2", + "file", + "file2", + "file3", + "file4", + "file5", + "file6", + "file7", + "file8", + "file9", + }; + + // load git pack with expected files + const data = @embedFile("Fetch/git/testdata/testrepo.pack"); + var fbs = std.io.fixedBufferStream(data); + + var tmp = testing.tmpDir(.{ .iterate = true }); + defer tmp.cleanup(); + + // unpack git pack + { + var unpack = Unpack{ .allocator = testing.allocator, .root = tmp.dir }; + defer unpack.deinit(); + const commit_id = try git.parseOid("dd582c0720819ab7130b103635bd7271b9fd4feb"); + try unpack.gitPack(commit_id, fbs.reader()); + } + + try expectDirFiles(tmp.dir, paths); +} + +const TarHeader = std.tar.output.Header; + +test tarball { + const paths: []const []const u8 = &.{ + "dir/file", + "dir1/dir2/file2", + "dir3/dir4/dir5/file3", + "file", + "file2", + }; + var buf: [paths.len * @sizeOf(TarHeader)]u8 = undefined; + + // create tarball + try createTarball(paths, &buf); + + var tmp = testing.tmpDir(.{ .iterate = true }); + defer tmp.cleanup(); + + // unpack tarball to tmp dir, will strip root dir + { + var fbs = std.io.fixedBufferStream(&buf); + + var unpack = Unpack{ .allocator = testing.allocator, .root = tmp.dir }; + defer unpack.deinit(); + try unpack.tarball(fbs.reader()); + } + + try expectDirFiles(tmp.dir, paths); +} + +test directory { + const paths: []const []const u8 = &.{ + "dir/file", + "dir1/dir2/file2", + "dir3/dir4/dir5/file3", + "file", + "file2", + }; + + var source = testing.tmpDir(.{ .iterate = true }); + defer source.cleanup(); + + for (paths) |path| { + const f = try createFilePath(source.dir, path); + f.close(); + } + + var dest = testing.tmpDir(.{ .iterate = true }); + defer dest.cleanup(); + + var unpack = Unpack{ .allocator = testing.allocator, .root = dest.dir }; + defer unpack.deinit(); + try unpack.directory(source.dir); + + try expectDirFiles(dest.dir, paths); +} + +test "collect errors" { + // Tarball with two files with same path. + // Unpack will have 1 error in errors list. + + const paths: []const []const u8 = &.{ + "dir/file", + "dir/file", + }; + var buf: [paths.len * @sizeOf(TarHeader)]u8 = undefined; + try createTarball(paths, &buf); + + var tmp = testing.tmpDir(.{ .iterate = true }); + defer tmp.cleanup(); + + var fbs = std.io.fixedBufferStream(&buf); + var unpack = Unpack{ .allocator = testing.allocator, .root = tmp.dir }; + defer unpack.deinit(); + try unpack.tarball(fbs.reader()); + + try expectDirFiles(tmp.dir, paths[0..1]); + + try testing.expectEqual(1, unpack.errors.items.len); + try testing.expectEqualStrings(paths[1], unpack.errors.items[0].unable_to_create_file.file_name); +} + +fn createTarball(paths: []const []const u8, buf: []u8) !void { + var fbs = std.io.fixedBufferStream(buf); + const writer = fbs.writer(); + for (paths) |path| { + var hdr = TarHeader.init(); + hdr.typeflag = .regular; + try hdr.setPath("stripped_root", path); + try hdr.updateChecksum(); + try writer.writeAll(std.mem.asBytes(&hdr)); + } +} + +fn expectDirFiles(dir: fs.Dir, expected_files: []const []const u8) !void { + var actual_files: std.ArrayListUnmanaged([]u8) = .{}; + defer actual_files.deinit(testing.allocator); + defer for (actual_files.items) |file| testing.allocator.free(file); + var walker = try dir.walk(testing.allocator); + defer walker.deinit(); + while (try walker.next()) |entry| { + if (entry.kind != .file) continue; + const path = try testing.allocator.dupe(u8, entry.path); + errdefer testing.allocator.free(path); + std.mem.replaceScalar(u8, path, std.fs.path.sep, '/'); + try actual_files.append(testing.allocator, path); + } + std.mem.sortUnstable([]u8, actual_files.items, {}, struct { + fn lessThan(_: void, a: []u8, b: []u8) bool { + return std.mem.lessThan(u8, a, b); + } + }.lessThan); + try testing.expectEqualDeep(expected_files, actual_files.items); +} + +// var buf: [256]u8 = undefined; +// std.debug.print("tmp dir: {s}\n", .{try tmp.dir.realpath(".", &buf)}); From a33578290cbf596a34aa5509d50b41e2355b52c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Sat, 16 Mar 2024 11:13:32 +0100 Subject: [PATCH 02/16] package: refactor unpack errors reporting Just refactoring to put similar code closer. --- src/Package/Fetch.zig | 115 +++++++++++++++++++++++++---------------- src/Package/Unpack.zig | 63 +++++++++------------- 2 files changed, 95 insertions(+), 83 deletions(-) diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index 6ab7796f3d3d..e68b177c7b16 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -1093,12 +1093,7 @@ fn unpackResource( .git => .git_pack, - .dir => |dir| return f.recursiveDirectoryCopy(dir, tmp_directory.handle) catch |err| { - return f.fail(f.location_tok, try eb.printString( - "unable to copy directory '{s}': {s}", - .{ uri_path, @errorName(err) }, - )); - }, + .dir => |dir| return try f.recursiveDirectoryCopy(dir, tmp_directory.handle, uri_path), }; switch (file_type) { @@ -1132,64 +1127,94 @@ fn unpackResource( }); return unpackTarball(f, tmp_directory.handle, dcp.reader()); }, - .git_pack => unpackGitPack(f, tmp_directory.handle, resource) catch |err| switch (err) { - error.FetchFailed => return error.FetchFailed, - error.OutOfMemory => return error.OutOfMemory, - else => |e| return f.fail(f.location_tok, try eb.printString( - "unable to unpack git files: {s}", - .{@errorName(e)}, - )), - }, + .git_pack => try unpackGitPack(f, tmp_directory.handle, resource), } } const Unpack = @import("Unpack.zig"); fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!void { - const eb = &f.error_bundle; - const gpa = f.arena.child_allocator; - - var unpack = Unpack{ .allocator = gpa, .root = out_dir }; + var unpack = Unpack.init(f.arena.child_allocator, out_dir); defer unpack.deinit(); - unpack.tarball(reader) catch |err| return f.fail( - f.location_tok, - try eb.printString( - "unable to unpack tarball to temporary directory: {s}", - .{@errorName(err)}, - ), + unpack.tarball(reader) catch |err| return f.failMsg( + err, + "unable to unpack tarball to temporary directory: {s}", + .{@errorName(err)}, ); - if (unpack.hasErrors()) { - try unpack.bundleErrors(eb, try f.srcLoc(f.location_tok)); - return error.FetchFailed; - } + try f.checkUnpackErrors(&unpack); } -fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource) anyerror!void { - const eb = &f.error_bundle; - const gpa = f.arena.child_allocator; +fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource) RunError!void { + var unpack = Unpack.init(f.arena.child_allocator, out_dir); + defer unpack.deinit(); + const want_oid = resource.git.want_oid; const reader = resource.git.fetch_stream.reader(); + unpack.gitPack(want_oid, reader) catch |err| return f.failMsg( + err, + "unable to unpack git files: {s}", + .{@errorName(err)}, + ); + try f.checkUnpackErrors(&unpack); +} - var unpack = Unpack{ .allocator = gpa, .root = out_dir }; +fn recursiveDirectoryCopy(f: *Fetch, dir: fs.Dir, out_dir: fs.Dir, uri_path: []const u8) RunError!void { + var unpack = Unpack.init(f.arena.child_allocator, out_dir); defer unpack.deinit(); - try unpack.gitPack(want_oid, reader); - if (unpack.hasErrors()) { - try unpack.bundleErrors(eb, try f.srcLoc(f.location_tok)); - return error.FetchFailed; - } + unpack.directory(dir) catch |err| return f.failMsg( + err, + "unable to copy directory '{s}': {s}", + .{ uri_path, @errorName(err) }, + ); + try f.checkUnpackErrors(&unpack); } -fn recursiveDirectoryCopy(f: *Fetch, dir: fs.Dir, tmp_dir: fs.Dir) anyerror!void { +fn failMsg(f: *Fetch, err: anyerror, comptime fmt: []const u8, args: anytype) RunError { const eb = &f.error_bundle; - const gpa = f.arena.child_allocator; + switch (err) { + error.OutOfMemory => return error.OutOfMemory, + else => return f.fail(f.location_tok, try eb.printString(fmt, args)), + } +} - var unpack = Unpack{ .allocator = gpa, .root = tmp_dir }; - defer unpack.deinit(); - try unpack.directory(dir); - if (unpack.hasErrors()) { - try unpack.bundleErrors(eb, try f.srcLoc(f.location_tok)); - return error.FetchFailed; +fn checkUnpackErrors(f: *Fetch, unpack: *Unpack) RunError!void { + if (!unpack.hasErrors()) return; + + const eb = &f.error_bundle; + const notes_len: u32 = @intCast(unpack.errors.items.len); + try eb.addRootErrorMessage(.{ + .msg = try eb.addString("unable to unpack"), + .src_loc = try f.srcLoc(f.location_tok), + .notes_len = notes_len, + }); + const notes_start = try eb.reserveNotes(notes_len); + for (unpack.errors.items, notes_start..) |item, note_i| { + switch (item) { + .unable_to_create_sym_link => |info| { + eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ + .msg = try eb.printString("unable to create symlink from '{s}' to '{s}': {s}", .{ + info.sym_link_path, info.target_path, @errorName(info.code), + }), + })); + }, + .unable_to_create_file => |info| { + eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ + .msg = try eb.printString("unable to create file '{s}': {s}", .{ + info.file_name, @errorName(info.code), + }), + })); + }, + .unsupported_file_type => |info| { + eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ + .msg = try eb.printString("file '{s}' has unsupported type '{c}'", .{ + info.file_name, info.file_type, + }), + })); + }, + } } + + return error.FetchFailed; } pub fn renameTmpIntoCache( diff --git a/src/Package/Unpack.zig b/src/Package/Unpack.zig index 8a70812db31d..28d0fe14a952 100644 --- a/src/Package/Unpack.zig +++ b/src/Package/Unpack.zig @@ -24,6 +24,13 @@ pub const Error = union(enum) { }, }; +pub fn init(allocator: std.mem.Allocator, root: fs.Dir) Self { + return .{ + .allocator = allocator, + .root = root, + }; +} + pub fn deinit(self: *Self) void { for (self.errors.items) |item| { switch (item) { @@ -81,6 +88,21 @@ pub fn tarball(self: *Self, reader: anytype) !void { } pub fn gitPack(self: *Self, commit_oid: git.Oid, reader: anytype) !void { + // Same interface as std.fs.Dir.createFile, symLink + const inf = struct { + parent: *Self, + + pub fn makePath(_: @This(), _: []const u8) !void {} + + pub fn createFile(t: @This(), sub_path: []const u8, _: fs.File.CreateFlags) !fs.File { + return (try t.parent.createFile(sub_path)) orelse error.Skip; + } + + pub fn symLink(t: @This(), target_path: []const u8, sym_link_path: []const u8, _: fs.Dir.SymLinkFlags) !void { + try t.parent.symLink(target_path, sym_link_path); + } + }{ .parent = self }; + var pack_dir = try self.root.makeOpenPath(".git", .{}); defer pack_dir.close(); var pack_file = try pack_dir.createFile("pkg.pack", .{ .read = true }); @@ -101,7 +123,7 @@ pub fn gitPack(self: *Self, commit_oid: git.Oid, reader: anytype) !void { { var repository = try git.Repository.init(self.allocator, pack_file, index_file); defer repository.deinit(); - try repository.checkout2(self, commit_oid); + try repository.checkout2(inf, commit_oid); } try self.root.deleteTree(".git"); @@ -130,41 +152,6 @@ pub fn hasErrors(self: *Self) bool { return self.errors.items.len > 0; } -pub fn bundleErrors(self: *Self, eb: *ErrorBundle.Wip, src_loc: ErrorBundle.SourceLocationIndex) !void { - const notes_len: u32 = @intCast(self.errors.items.len); - try eb.addRootErrorMessage(.{ - .msg = try eb.addString("unable to unpack"), - .src_loc = src_loc, - .notes_len = notes_len, - }); - const notes_start = try eb.reserveNotes(notes_len); - for (self.errors.items, notes_start..) |item, note_i| { - switch (item) { - .unable_to_create_sym_link => |info| { - eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ - .msg = try eb.printString("unable to create symlink from '{s}' to '{s}': {s}", .{ - info.sym_link_path, info.target_path, @errorName(info.code), - }), - })); - }, - .unable_to_create_file => |info| { - eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ - .msg = try eb.printString("unable to create file '{s}': {s}", .{ - info.file_name, @errorName(info.code), - }), - })); - }, - .unsupported_file_type => |info| { - eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ - .msg = try eb.printString("file '{s}' has unsupported type '{c}'", .{ - info.file_name, info.file_type, - }), - })); - }, - } - } -} - fn copyFile(source_dir: fs.Dir, source_path: []const u8, dest_dir: fs.Dir, dest_path: []const u8) !void { source_dir.copyFile(source_path, dest_dir, dest_path, .{}) catch |err| switch (err) { error.FileNotFound => { @@ -177,7 +164,7 @@ fn copyFile(source_dir: fs.Dir, source_path: []const u8, dest_dir: fs.Dir, dest_ /// Returns fs.File on success, null on failure. /// Errors are collected in errors list. -pub fn createFile(self: *Self, sub_path: []const u8) !?fs.File { +fn createFile(self: *Self, sub_path: []const u8) !?fs.File { return createFilePath(self.root, sub_path) catch |err| { try self.errors.append(self.allocator, .{ .unable_to_create_file = .{ .code = err, @@ -187,7 +174,7 @@ pub fn createFile(self: *Self, sub_path: []const u8) !?fs.File { }; } -pub fn symLink(self: *Self, target_path: []const u8, sym_link_path: []const u8) !void { +fn symLink(self: *Self, target_path: []const u8, sym_link_path: []const u8) !void { symLinkPath(self.root, target_path, sym_link_path) catch |err| { try self.errors.append(self.allocator, .{ .unable_to_create_sym_link = .{ .code = err, From 5177943fbf48f691093faa1f9f76230cabc18d5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Sat, 16 Mar 2024 11:24:39 +0100 Subject: [PATCH 03/16] package.git: remove Diagnostic Logic moved to Unpack. --- src/Package/Fetch/git.zig | 144 +++++--------------------------------- src/Package/Unpack.zig | 2 +- 2 files changed, 18 insertions(+), 128 deletions(-) diff --git a/src/Package/Fetch/git.zig b/src/Package/Fetch/git.zig index 367bef3f5a86..7b019d489909 100644 --- a/src/Package/Fetch/git.zig +++ b/src/Package/Fetch/git.zig @@ -36,32 +36,6 @@ test parseOid { try testing.expectError(error.InvalidOid, parseOid("HEAD")); } -pub const Diagnostics = struct { - allocator: Allocator, - errors: std.ArrayListUnmanaged(Error) = .{}, - - pub const Error = union(enum) { - unable_to_create_sym_link: struct { - code: anyerror, - file_name: []const u8, - link_name: []const u8, - }, - }; - - pub fn deinit(d: *Diagnostics) void { - for (d.errors.items) |item| { - switch (item) { - .unable_to_create_sym_link => |info| { - d.allocator.free(info.file_name); - d.allocator.free(info.link_name); - }, - } - } - d.errors.deinit(d.allocator); - d.* = undefined; - } -}; - pub const Repository = struct { odb: Odb, @@ -76,85 +50,6 @@ pub const Repository = struct { /// Checks out the repository at `commit_oid` to `worktree`. pub fn checkout( - repository: *Repository, - worktree: std.fs.Dir, - commit_oid: Oid, - diagnostics: *Diagnostics, - ) !void { - try repository.odb.seekOid(commit_oid); - const tree_oid = tree_oid: { - const commit_object = try repository.odb.readObject(); - if (commit_object.type != .commit) return error.NotACommit; - break :tree_oid try getCommitTree(commit_object.data); - }; - try repository.checkoutTree(worktree, tree_oid, "", diagnostics); - } - - /// Checks out the tree at `tree_oid` to `worktree`. - fn checkoutTree( - repository: *Repository, - dir: std.fs.Dir, - tree_oid: Oid, - current_path: []const u8, - diagnostics: *Diagnostics, - ) !void { - try repository.odb.seekOid(tree_oid); - const tree_object = try repository.odb.readObject(); - if (tree_object.type != .tree) return error.NotATree; - // The tree object may be evicted from the object cache while we're - // iterating over it, so we can make a defensive copy here to make sure - // it remains valid until we're done with it - const tree_data = try repository.odb.allocator.dupe(u8, tree_object.data); - defer repository.odb.allocator.free(tree_data); - - var tree_iter: TreeIterator = .{ .data = tree_data }; - while (try tree_iter.next()) |entry| { - switch (entry.type) { - .directory => { - try dir.makeDir(entry.name); - var subdir = try dir.openDir(entry.name, .{}); - defer subdir.close(); - const sub_path = try std.fs.path.join(repository.odb.allocator, &.{ current_path, entry.name }); - defer repository.odb.allocator.free(sub_path); - try repository.checkoutTree(subdir, entry.oid, sub_path, diagnostics); - }, - .file => { - var file = try dir.createFile(entry.name, .{}); - defer file.close(); - try repository.odb.seekOid(entry.oid); - const file_object = try repository.odb.readObject(); - if (file_object.type != .blob) return error.InvalidFile; - try file.writeAll(file_object.data); - try file.sync(); - }, - .symlink => { - try repository.odb.seekOid(entry.oid); - const symlink_object = try repository.odb.readObject(); - if (symlink_object.type != .blob) return error.InvalidFile; - const link_name = symlink_object.data; - dir.symLink(link_name, entry.name, .{}) catch |e| { - const file_name = try std.fs.path.join(diagnostics.allocator, &.{ current_path, entry.name }); - errdefer diagnostics.allocator.free(file_name); - const link_name_dup = try diagnostics.allocator.dupe(u8, link_name); - errdefer diagnostics.allocator.free(link_name_dup); - try diagnostics.errors.append(diagnostics.allocator, .{ .unable_to_create_sym_link = .{ - .code = e, - .file_name = file_name, - .link_name = link_name_dup, - } }); - }; - }, - .gitlink => { - // Consistent with git archive behavior, create the directory but - // do nothing else - try dir.makeDir(entry.name); - }, - } - } - } - - /// Checks out the repository at `commit_oid` to `worktree`. - pub fn checkout2( repository: *Repository, worktree: anytype, commit_oid: Oid, @@ -165,11 +60,11 @@ pub const Repository = struct { if (commit_object.type != .commit) return error.NotACommit; break :tree_oid try getCommitTree(commit_object.data); }; - try repository.checkoutTree2(worktree, tree_oid, ""); + try repository.checkoutTree(worktree, tree_oid, ""); } /// Checks out the tree at `tree_oid` to `worktree`. - fn checkoutTree2( + fn checkoutTree( repository: *Repository, dir: anytype, tree_oid: Oid, @@ -191,27 +86,32 @@ pub const Repository = struct { defer allocator.free(sub_path); switch (entry.type) { .directory => { - try repository.checkoutTree2(dir, entry.oid, sub_path); + try dir.makePath(sub_path); + try repository.checkoutTree(dir, entry.oid, sub_path); }, .file => { try repository.odb.seekOid(entry.oid); const file_object = try repository.odb.readObject(); if (file_object.type != .blob) return error.InvalidFile; - if (try dir.createFile(sub_path)) |file| { - defer file.close(); - try file.writeAll(file_object.data); - try file.sync(); - } + var file = dir.createFile(sub_path, .{}) catch |err| { + if (err == error.Skip) continue; + return err; + }; + defer file.close(); + try file.writeAll(file_object.data); + try file.sync(); }, .symlink => { try repository.odb.seekOid(entry.oid); const symlink_object = try repository.odb.readObject(); if (symlink_object.type != .blob) return error.InvalidFile; - try dir.symLink(symlink_object.data, sub_path); + try dir.symLink(symlink_object.data, sub_path, .{}); + }, + .gitlink => { + try dir.makePath(sub_path); }, - .gitlink => {}, } } } @@ -1458,11 +1358,7 @@ test "packfile indexing and checkout" { defer worktree.cleanup(); const commit_id = try parseOid("dd582c0720819ab7130b103635bd7271b9fd4feb"); - - var diagnostics: Diagnostics = .{ .allocator = testing.allocator }; - defer diagnostics.deinit(); - try repository.checkout(worktree.dir, commit_id, &diagnostics); - try testing.expect(diagnostics.errors.items.len == 0); + try repository.checkout(worktree.dir, commit_id); const expected_files: []const []const u8 = &.{ "dir/file", @@ -1552,11 +1448,5 @@ pub fn main() !void { std.debug.print("Starting checkout...\n", .{}); var repository = try Repository.init(allocator, pack_file, index_file); defer repository.deinit(); - var diagnostics: Diagnostics = .{ .allocator = allocator }; - defer diagnostics.deinit(); - try repository.checkout(worktree, commit, &diagnostics); - - for (diagnostics.errors.items) |err| { - std.debug.print("Diagnostic: {}\n", .{err}); - } + try repository.checkout(worktree, commit); } diff --git a/src/Package/Unpack.zig b/src/Package/Unpack.zig index 28d0fe14a952..dc52b79a8353 100644 --- a/src/Package/Unpack.zig +++ b/src/Package/Unpack.zig @@ -123,7 +123,7 @@ pub fn gitPack(self: *Self, commit_oid: git.Oid, reader: anytype) !void { { var repository = try git.Repository.init(self.allocator, pack_file, index_file); defer repository.deinit(); - try repository.checkout2(inf, commit_oid); + try repository.checkout(inf, commit_oid); } try self.root.deleteTree(".git"); From b5278846e6353b0e06976822a0ac416d82fcbbcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Sat, 16 Mar 2024 17:49:19 +0100 Subject: [PATCH 04/16] std.tar: remove diagnostic and mode options Diagnostic error collection has been moved to Unpack. Mode does not have implementation, and after this changes implementation will be in Unpack. --- lib/std/tar.zig | 104 ++--------------------------------------- lib/std/tar/test.zig | 6 +-- src/Package/Unpack.zig | 47 ++++++++++--------- 3 files changed, 32 insertions(+), 125 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 8ab7f0d09a1e..bf11caa1fb43 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -21,70 +21,12 @@ const testing = std.testing; pub const output = @import("tar/output.zig"); -/// Provide this to receive detailed error messages. -/// When this is provided, some errors which would otherwise be returned -/// immediately will instead be added to this structure. The API user must check -/// the errors in diagnostics to know whether the operation succeeded or failed. -pub const Diagnostics = struct { - allocator: std.mem.Allocator, - errors: std.ArrayListUnmanaged(Error) = .{}, - - pub const Error = union(enum) { - unable_to_create_sym_link: struct { - code: anyerror, - file_name: []const u8, - link_name: []const u8, - }, - unable_to_create_file: struct { - code: anyerror, - file_name: []const u8, - }, - unsupported_file_type: struct { - file_name: []const u8, - file_type: Header.Kind, - }, - }; - - pub fn deinit(d: *Diagnostics) void { - for (d.errors.items) |item| { - switch (item) { - .unable_to_create_sym_link => |info| { - d.allocator.free(info.file_name); - d.allocator.free(info.link_name); - }, - .unable_to_create_file => |info| { - d.allocator.free(info.file_name); - }, - .unsupported_file_type => |info| { - d.allocator.free(info.file_name); - }, - } - } - d.errors.deinit(d.allocator); - d.* = undefined; - } -}; - /// pipeToFileSystem options pub const PipeOptions = struct { /// Number of directory levels to skip when extracting files. strip_components: u32 = 0, - /// How to handle the "mode" property of files from within the tar file. - mode_mode: ModeMode = .executable_bit_only, /// Prevents creation of empty directories. exclude_empty_directories: bool = false, - /// Collects error messages during unpacking - diagnostics: ?*Diagnostics = null, - - pub const ModeMode = enum { - /// The mode from the tar file is completely ignored. Files are created - /// with the default mode when creating files. - ignore, - /// The mode from the tar file is inspected for the owner executable bit - /// only. This bit is copied to the group and other executable bits. - /// Other bits of the mode are left as the default when creating files. - executable_bit_only, - }; }; const Header = struct { @@ -247,8 +189,6 @@ pub const IteratorOptions = struct { file_name_buffer: []u8, /// Use a buffer with length `std.fs.MAX_PATH_BYTES` to match file system capabilities. link_name_buffer: []u8, - /// Collects error messages during unpacking - diagnostics: ?*Diagnostics = null, }; /// Iterates over files in tar archive. @@ -256,7 +196,6 @@ pub const IteratorOptions = struct { pub fn iterator(reader: anytype, options: IteratorOptions) Iterator(@TypeOf(reader)) { return .{ .reader = reader, - .diagnostics = options.diagnostics, .file_name_buffer = options.file_name_buffer, .link_name_buffer = options.link_name_buffer, }; @@ -273,7 +212,6 @@ pub const FileKind = enum { pub fn Iterator(comptime ReaderType: type) type { return struct { reader: ReaderType, - diagnostics: ?*Diagnostics = null, // buffers for heeader and file attributes header_buffer: [Header.SIZE]u8 = undefined, @@ -435,15 +373,11 @@ pub fn Iterator(comptime ReaderType: type) type { }, // All other are unsupported header types else => { - const d = self.diagnostics orelse return error.TarUnsupportedHeader; - try d.errors.append(d.allocator, .{ .unsupported_file_type = .{ - .file_name = try d.allocator.dupe(u8, header.name()), - .file_type = kind, - } }); if (kind == .gnu_sparse) { try self.skipGnuSparseExtendedHeaders(header); } self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig; + return error.TarUnsupportedHeader; }, } } @@ -573,24 +507,11 @@ fn PaxIterator(comptime ReaderType: type) type { /// Saves tar file content to the file systems. pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) !void { - switch (options.mode_mode) { - .ignore => {}, - .executable_bit_only => { - // This code does not look at the mode bits yet. To implement this feature, - // the implementation must be adjusted to look at the mode, and check the - // user executable bit, then call fchmod on newly created files when - // the executable bit is supposed to be set. - // It also needs to properly deal with ACLs on Windows. - @panic("TODO: unimplemented: tar ModeMode.executable_bit_only"); - }, - } - var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; var link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; var iter = iterator(reader, .{ .file_name_buffer = &file_name_buffer, .link_name_buffer = &link_name_buffer, - .diagnostics = options.diagnostics, }); while (try iter.next()) |file| { switch (file.kind) { @@ -605,16 +526,9 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) const file_name = stripComponents(file.name, options.strip_components); if (file_name.len == 0) return error.BadFileName; - if (createDirAndFile(dir, file_name)) |fs_file| { - defer fs_file.close(); - try file.writeAll(fs_file); - } else |err| { - const d = options.diagnostics orelse return err; - try d.errors.append(d.allocator, .{ .unable_to_create_file = .{ - .code = err, - .file_name = try d.allocator.dupe(u8, file_name), - } }); - } + var fs_file = try createDirAndFile(dir, file_name); + defer fs_file.close(); + try file.writeAll(fs_file); }, .sym_link => { // The file system path of the symbolic link. @@ -623,14 +537,7 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) // The data inside the symbolic link. const link_name = file.link_name; - createDirAndSymlink(dir, link_name, file_name) catch |err| { - const d = options.diagnostics orelse return error.UnableToCreateSymLink; - try d.errors.append(d.allocator, .{ .unable_to_create_sym_link = .{ - .code = err, - .file_name = try d.allocator.dupe(u8, file_name), - .link_name = try d.allocator.dupe(u8, link_name), - } }); - }; + try createDirAndSymlink(dir, link_name, file_name); }, } } @@ -984,7 +891,6 @@ test pipeToFileSystem { // Save tar from `reader` to the file system `dir` pipeToFileSystem(dir, reader, .{ - .mode_mode = .ignore, .strip_components = 1, .exclude_empty_directories = true, }) catch |err| { diff --git a/lib/std/tar/test.zig b/lib/std/tar/test.zig index abb7d3cbe020..0ad529ad4236 100644 --- a/lib/std/tar/test.zig +++ b/lib/std/tar/test.zig @@ -473,14 +473,14 @@ test "should not overwrite existing file" { defer root.cleanup(); try testing.expectError( error.PathAlreadyExists, - tar.pipeToFileSystem(root.dir, fsb.reader(), .{ .mode_mode = .ignore, .strip_components = 1 }), + tar.pipeToFileSystem(root.dir, fsb.reader(), .{ .strip_components = 1 }), ); // Unpack with strip_components = 0 should pass fsb.reset(); var root2 = std.testing.tmpDir(.{}); defer root2.cleanup(); - try tar.pipeToFileSystem(root2.dir, fsb.reader(), .{ .mode_mode = .ignore, .strip_components = 0 }); + try tar.pipeToFileSystem(root2.dir, fsb.reader(), .{ .strip_components = 0 }); } test "case sensitivity" { @@ -499,7 +499,7 @@ test "case sensitivity" { var root = std.testing.tmpDir(.{}); defer root.cleanup(); - tar.pipeToFileSystem(root.dir, fsb.reader(), .{ .mode_mode = .ignore, .strip_components = 1 }) catch |err| { + tar.pipeToFileSystem(root.dir, fsb.reader(), .{ .strip_components = 1 }) catch |err| { // on case insensitive fs we fail on overwrite existing file try testing.expectEqual(error.PathAlreadyExists, err); return; diff --git a/src/Package/Unpack.zig b/src/Package/Unpack.zig index dc52b79a8353..e703dda8545d 100644 --- a/src/Package/Unpack.zig +++ b/src/Package/Unpack.zig @@ -54,36 +54,37 @@ const Self = @This(); pub fn tarball(self: *Self, reader: anytype) !void { const strip_components = 1; - // TODO: replace with switch ignore unsupported tar file types in iterator - var diagnostics: std.tar.Diagnostics = .{ .allocator = self.allocator }; - defer diagnostics.deinit(); var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; var link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; var iter = std.tar.iterator(reader, .{ .file_name_buffer = &file_name_buffer, .link_name_buffer = &link_name_buffer, - .diagnostics = &diagnostics, }); - while (try iter.next()) |entry| { - switch (entry.kind) { - .directory => {}, // skip empty - .file => { - if (entry.size == 0 and entry.name.len == 0) continue; - const file_name = stripComponents(entry.name, strip_components); - if (file_name.len == 0) return error.BadFileName; - if (try self.createFile(file_name)) |file| { - defer file.close(); - try entry.writeAll(file); - } - }, - .sym_link => { - const file_name = stripComponents(entry.name, strip_components); - if (file_name.len == 0) return error.BadFileName; - const link_name = entry.link_name; - try self.symLink(link_name, file_name); - }, - } + while (true) { + if (iter.next() catch |err| switch (err) { + error.TarUnsupportedHeader => continue, + else => return err, + }) |entry| { + switch (entry.kind) { + .directory => {}, // skip empty + .file => { + if (entry.size == 0 and entry.name.len == 0) continue; + const file_name = stripComponents(entry.name, strip_components); + if (file_name.len == 0) return error.BadFileName; + if (try self.createFile(file_name)) |file| { + defer file.close(); + try entry.writeAll(file); + } + }, + .sym_link => { + const file_name = stripComponents(entry.name, strip_components); + if (file_name.len == 0) return error.BadFileName; + const link_name = entry.link_name; + try self.symLink(link_name, file_name); + }, + } + } else break; } } From fa81a18e26af8e1cb94f48ca0f83eacb3d8b88f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Sat, 16 Mar 2024 20:20:38 +0100 Subject: [PATCH 05/16] package: remove ignored error This error type is ignored during tar unpacking. --- src/Package/Fetch.zig | 7 ------- src/Package/Unpack.zig | 7 ------- 2 files changed, 14 deletions(-) diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index e68b177c7b16..c9126b221e41 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -1204,13 +1204,6 @@ fn checkUnpackErrors(f: *Fetch, unpack: *Unpack) RunError!void { }), })); }, - .unsupported_file_type => |info| { - eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ - .msg = try eb.printString("file '{s}' has unsupported type '{c}'", .{ - info.file_name, info.file_type, - }), - })); - }, } } diff --git a/src/Package/Unpack.zig b/src/Package/Unpack.zig index e703dda8545d..878479ca72b7 100644 --- a/src/Package/Unpack.zig +++ b/src/Package/Unpack.zig @@ -18,10 +18,6 @@ pub const Error = union(enum) { code: anyerror, file_name: []const u8, }, - unsupported_file_type: struct { - file_name: []const u8, - file_type: u8, - }, }; pub fn init(allocator: std.mem.Allocator, root: fs.Dir) Self { @@ -41,9 +37,6 @@ pub fn deinit(self: *Self) void { .unable_to_create_file => |info| { self.allocator.free(info.file_name); }, - .unsupported_file_type => |info| { - self.allocator.free(info.file_name); - }, } } self.errors.deinit(self.allocator); From 1db3647a78060271b4b5c19fd8e4a10ced0de872 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Mon, 18 Mar 2024 16:37:00 +0100 Subject: [PATCH 06/16] package: filter errors by include path Option to remove errors from paths not included in package. --- src/Package/Fetch.zig | 2 +- src/Package/Unpack.zig | 138 +++++++++++++++++++++++++++++++---------- 2 files changed, 107 insertions(+), 33 deletions(-) diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index c9126b221e41..1a58292e6cba 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -1532,7 +1532,7 @@ fn normalizePath(bytes: []u8) void { std.mem.replaceScalar(u8, bytes, fs.path.sep, canonical_sep); } -const Filter = struct { +pub const Filter = struct { include_paths: std.StringArrayHashMapUnmanaged(void) = .{}, /// sub_path is relative to the package root. diff --git a/src/Package/Unpack.zig b/src/Package/Unpack.zig index 878479ca72b7..148769a2124a 100644 --- a/src/Package/Unpack.zig +++ b/src/Package/Unpack.zig @@ -5,8 +5,7 @@ const ErrorBundle = std.zig.ErrorBundle; allocator: std.mem.Allocator, root: fs.Dir, - -errors: std.ArrayListUnmanaged(Error) = .{}, +errors: Errors, pub const Error = union(enum) { unable_to_create_sym_link: struct { @@ -18,17 +17,28 @@ pub const Error = union(enum) { code: anyerror, file_name: []const u8, }, + + pub fn filtered(self: Error, filter: Filter) bool { + switch (self) { + .unable_to_create_file => |info| return !filter.includePath(info.file_name), + .unable_to_create_sym_link => |info| return !filter.includePath(info.target_path), + } + } }; -pub fn init(allocator: std.mem.Allocator, root: fs.Dir) Self { - return .{ - .allocator = allocator, - .root = root, - }; -} +pub const Errors = struct { + allocator: std.mem.Allocator, + list: std.ArrayListUnmanaged(Error) = .{}, -pub fn deinit(self: *Self) void { - for (self.errors.items) |item| { + pub fn deinit(self: *Errors) void { + for (self.list.items) |item| { + self.free(item); + } + self.list.deinit(self.allocator); + self.* = undefined; + } + + fn free(self: *Errors, item: Error) void { switch (item) { .unable_to_create_sym_link => |info| { self.allocator.free(info.target_path); @@ -39,8 +49,49 @@ pub fn deinit(self: *Self) void { }, } } - self.errors.deinit(self.allocator); - self.* = undefined; + + pub fn count(self: *Errors) usize { + return self.list.items.len; + } + + fn createFile(self: *Errors, sub_path: []const u8, err: anyerror) !void { + try self.list.append(self.allocator, .{ .unable_to_create_file = .{ + .code = err, + .file_name = try self.allocator.dupe(u8, sub_path), + } }); + } + + fn symLink(self: *Errors, target_path: []const u8, sym_link_path: []const u8, err: anyerror) !void { + try self.list.append(self.allocator, .{ .unable_to_create_sym_link = .{ + .code = err, + .target_path = try self.allocator.dupe(u8, target_path), + .sym_link_path = try self.allocator.dupe(u8, sym_link_path), + } }); + } + + pub fn remove(self: *Errors, filter: Filter) !void { + var i = self.list.items.len; + while (i > 0) { + i -= 1; + const item = self.list.items[i]; + if (item.filtered(filter)) { + _ = self.list.swapRemove(i); + self.free(item); + } + } + } +}; + +pub fn init(allocator: std.mem.Allocator, root: fs.Dir) Self { + return .{ + .allocator = allocator, + .errors = Errors{ .allocator = allocator }, + .root = root, + }; +} + +pub fn deinit(self: *Self) void { + self.errors.deinit(); } const Self = @This(); @@ -143,7 +194,7 @@ pub fn directory(self: *Self, source: fs.Dir) !void { } pub fn hasErrors(self: *Self) bool { - return self.errors.items.len > 0; + return self.errors.count() > 0; } fn copyFile(source_dir: fs.Dir, source_path: []const u8, dest_dir: fs.Dir, dest_path: []const u8) !void { @@ -160,21 +211,14 @@ fn copyFile(source_dir: fs.Dir, source_path: []const u8, dest_dir: fs.Dir, dest_ /// Errors are collected in errors list. fn createFile(self: *Self, sub_path: []const u8) !?fs.File { return createFilePath(self.root, sub_path) catch |err| { - try self.errors.append(self.allocator, .{ .unable_to_create_file = .{ - .code = err, - .file_name = try self.allocator.dupe(u8, sub_path), - } }); + try self.errors.createFile(sub_path, err); return null; }; } fn symLink(self: *Self, target_path: []const u8, sym_link_path: []const u8) !void { symLinkPath(self.root, target_path, sym_link_path) catch |err| { - try self.errors.append(self.allocator, .{ .unable_to_create_sym_link = .{ - .code = err, - .target_path = try self.allocator.dupe(u8, target_path), - .sym_link_path = try self.allocator.dupe(u8, sym_link_path), - } }); + try self.errors.symLink(target_path, sym_link_path, err); }; } @@ -254,7 +298,7 @@ test gitPack { // unpack git pack { - var unpack = Unpack{ .allocator = testing.allocator, .root = tmp.dir }; + var unpack = Unpack.init(testing.allocator, tmp.dir); defer unpack.deinit(); const commit_id = try git.parseOid("dd582c0720819ab7130b103635bd7271b9fd4feb"); try unpack.gitPack(commit_id, fbs.reader()); @@ -285,7 +329,7 @@ test tarball { { var fbs = std.io.fixedBufferStream(&buf); - var unpack = Unpack{ .allocator = testing.allocator, .root = tmp.dir }; + var unpack = Unpack.init(testing.allocator, tmp.dir); defer unpack.deinit(); try unpack.tarball(fbs.reader()); } @@ -313,20 +357,22 @@ test directory { var dest = testing.tmpDir(.{ .iterate = true }); defer dest.cleanup(); - var unpack = Unpack{ .allocator = testing.allocator, .root = dest.dir }; + var unpack = Unpack.init(testing.allocator, dest.dir); defer unpack.deinit(); try unpack.directory(source.dir); try expectDirFiles(dest.dir, paths); } -test "collect errors" { - // Tarball with two files with same path. - // Unpack will have 1 error in errors list. +test "collect/filter errors" { + const gpa = std.testing.allocator; + // Tarball with duplicating files path to simulate fs write fail. const paths: []const []const u8 = &.{ "dir/file", + "dir1/file1", "dir/file", + "dir1/file1", }; var buf: [paths.len * @sizeOf(TarHeader)]u8 = undefined; try createTarball(paths, &buf); @@ -335,16 +381,44 @@ test "collect errors" { defer tmp.cleanup(); var fbs = std.io.fixedBufferStream(&buf); - var unpack = Unpack{ .allocator = testing.allocator, .root = tmp.dir }; + var unpack = Unpack.init(gpa, tmp.dir); defer unpack.deinit(); try unpack.tarball(fbs.reader()); + try testing.expect(unpack.hasErrors()); + + try expectDirFiles(tmp.dir, paths[0..2]); - try expectDirFiles(tmp.dir, paths[0..1]); + try testing.expectEqual(2, unpack.errors.count()); + try testing.expectEqualStrings(paths[2], unpack.errors.list.items[0].unable_to_create_file.file_name); + try testing.expectEqualStrings(paths[3], unpack.errors.list.items[1].unable_to_create_file.file_name); - try testing.expectEqual(1, unpack.errors.items.len); - try testing.expectEqualStrings(paths[1], unpack.errors.items[0].unable_to_create_file.file_name); + { + var filter: Filter = .{}; + defer filter.include_paths.deinit(gpa); + + // no filter all paths are included + try unpack.errors.remove(filter); + try testing.expectEqual(2, unpack.errors.count()); + + // dir1 is included, dir excluded + try filter.include_paths.put(gpa, "dir1", {}); + try unpack.errors.remove(filter); + try testing.expectEqual(1, unpack.errors.count()); + try testing.expectEqualStrings("dir1/file1", unpack.errors.list.items[0].unable_to_create_file.file_name); + } + { + var filter: Filter = .{}; + defer filter.include_paths.deinit(gpa); + + // only src included that filters all error paths + try filter.include_paths.put(gpa, "src", {}); + try unpack.errors.remove(filter); + try testing.expectEqual(0, unpack.errors.count()); + } } +const Filter = @import("Fetch.zig").Filter; + fn createTarball(paths: []const []const u8, buf: []u8) !void { var fbs = std.io.fixedBufferStream(buf); const writer = fbs.writer(); From 8ab45829d5b4e0cb3dcff5507cd997926775f569 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Mon, 26 Feb 2024 00:26:38 +0100 Subject: [PATCH 07/16] fetch: fix failing test Prior to [this](https://github.com/ziglang/zig/commit/ef9966c9855dd855afda767f212abec6e5a36307#diff-08c935ef8c633bb630641d44230597f1cff5afb0e736d451e2ba5569fa53d915R805) commit tar was not a valid extension. After that this one is valid case. --- src/Package/Fetch.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index 1a58292e6cba..e581681cefeb 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -867,9 +867,9 @@ const FileType = enum { try std.testing.expectEqual(@as(?FileType, .@"tar.xz"), fromContentDisposition("ATTACHMENT; filename=\"stuff.tar.xz\"")); try std.testing.expectEqual(@as(?FileType, .@"tar.xz"), fromContentDisposition("attachment; FileName=\"stuff.tar.xz\"")); try std.testing.expectEqual(@as(?FileType, .@"tar.gz"), fromContentDisposition("attachment; FileName*=UTF-8\'\'xyz%2Fstuff.tar.gz")); + try std.testing.expectEqual(@as(?FileType, .tar), fromContentDisposition("attachment; FileName=\"stuff.tar\"")); try std.testing.expect(fromContentDisposition("attachment FileName=\"stuff.tar.gz\"") == null); - try std.testing.expect(fromContentDisposition("attachment; FileName=\"stuff.tar\"") == null); try std.testing.expect(fromContentDisposition("attachment; FileName\"stuff.gz\"") == null); try std.testing.expect(fromContentDisposition("attachment; size=42") == null); try std.testing.expect(fromContentDisposition("inline; size=42") == null); From e136ddf38f7b79c1712bcb4d05a1181c387eaf68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Mon, 18 Mar 2024 20:59:22 +0100 Subject: [PATCH 08/16] package: filter unpack errors for excluded files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before this any unpack error will be raised after the unpack step. Now this is postponed until we get manifest, and if then use included folders from manifest to filter errors which are not for the files from included folders. I made a folder which has symlinks and file names with same names in different casing. ``` sample ├── build.zig ├── build.zig.zon ├── dir1 │   └── dir2 │   └── dir3 │   ├── file1 │   ├── file2 │   └── file3 ├── dir4 │   └── dir5 │   └── dir6 │   ├── file1 -> ../../../dir1/dir2/dir3/file1 │   └── file2_symlink -> ../../../dir1/dir2/dir3/file2 ├── dir7 │   └── dir8 │   └── dir9 │   ├── file4 │   └── File4 └── src ├── main.zig └── root.zig ``` Then pack this into sample.tar and another one sample_src.tar where build.zig.zon has only src in paths (excluded all problematic folders): ```Zig .paths = .{ "src", }, ``` On Linux both tars get unpacked. On Windows and macOS both fail before this changes and second succeds after this change. Windows (with disabled symlinks): ``` C:\Users\ianic\code\zig-bin\zig.exe build --global-cache-dir zig-global-cache-release C:\Users\ianic\code\issues\fetch\build.zig.zon:78:20: error: unable to unpack tarball .url = "http://10.211.55.26:8000/sample.tar", ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ note: unable to create file 'dir7/dir8/dir9/file4': PathAlreadyExists note: unable to create symlink from 'dir4/dir5/dir6/file1' to '../../../dir1/dir2/dir3/file1': AccessDenied note: unable to create symlink from 'dir4/dir5/dir6/file2_symlink' to '../../../dir1/dir2/dir3/file2': AccessDenied C:\Users\ianic\code\issues\fetch\build.zig.zon:85:20: error: unable to unpack tarball .url = "http://10.211.55.26:8000/sample_src.tar", ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ note: unable to create file 'dir7/dir8/dir9/file4': PathAlreadyExists note: unable to create symlink from 'dir4/dir5/dir6/file1' to '../../../dir1/dir2/dir3/file1': AccessDenied note: unable to create symlink from 'dir4/dir5/dir6/file2_symlink' to '../../../dir1/dir2/dir3/file2': AccessDenied PS C:\Users\ianic\code\issues\fetch> C:\Users\ianic\code\zig-bin\bin\zig.exe build --global-cache-dir zig-global-cache-release C:\Users\ianic\code\zig-io\build\stage3\bin\zig.exe build --global-cache-dir zig-global-cache-master C:\Users\ianic\code\issues\fetch\build.zig.zon:78:20: error: unable to unpack .url = "http://10.211.55.26:8000/sample.tar", ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ note: unable to create file 'dir7/dir8/dir9/file4': PathAlreadyExists note: unable to create symlink from 'dir4/dir5/dir6/file1' to '../../../dir1/dir2/dir3/file1': AccessDenied note: unable to create symlink from 'dir4/dir5/dir6/file2_symlink' to '../../../dir1/dir2/dir3/file2': AccessDenied ``` macOS ``` Fetch Packages [45/40] /Users/ianic/code/zig/issues/fetch/build.zig.zon:75:20: error: unable to unpack tarball .url = "file:///Users/ianic/code/zig/issues/fetch/sample.tar", ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ note: unable to create file 'dir7/dir8/dir9/file4': PathAlreadyExists /Users/ianic/code/zig/issues/fetch/build.zig.zon:80:20: error: unable to unpack tarball .url = "file:///Users/ianic/code/zig/issues/fetch/sample_src.tar", ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fetch Packages [41/40] /Users/ianic/code/zig/issues/fetch/build.zig.zon:75:20: error: unable to unpack .url = "file:///Users/ianic/code/zig/issues/fetch/sample.tar", ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ note: unable to create file 'dir7/dir8/dir9/file4': PathAlreadyExists ``` --- src/Package/Fetch.zig | 47 ++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index e581681cefeb..24cdd8644f57 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -461,7 +461,8 @@ fn runResource( }; defer tmp_directory.handle.close(); - try unpackResource(f, resource, uri_path, tmp_directory); + var errors = try unpackResource(f, resource, uri_path, tmp_directory); + defer errors.deinit(); // Load, parse, and validate the unpacked build.zig.zon file. It is allowed // for the file to be missing, in which case this fetched package is @@ -479,6 +480,9 @@ fn runResource( .include_paths = if (f.manifest) |m| m.paths else .{}, }; + try errors.remove(filter); + try f.emitUnpackErrors(&errors); + // Compute the package hash based on the remaining files in the temporary // directory. @@ -1032,7 +1036,7 @@ fn unpackResource( resource: *Resource, uri_path: []const u8, tmp_directory: Cache.Directory, -) RunError!void { +) RunError!Unpack.Errors { const eb = &f.error_bundle; const file_type = switch (resource.*) { .file => FileType.fromPath(uri_path) orelse @@ -1096,15 +1100,15 @@ fn unpackResource( .dir => |dir| return try f.recursiveDirectoryCopy(dir, tmp_directory.handle, uri_path), }; - switch (file_type) { + return switch (file_type) { .tar => try unpackTarball(f, tmp_directory.handle, resource.reader()), - .@"tar.gz" => { + .@"tar.gz" => brk: { const reader = resource.reader(); var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, reader); var dcp = std.compress.gzip.decompressor(br.reader()); - try unpackTarball(f, tmp_directory.handle, dcp.reader()); + break :brk try unpackTarball(f, tmp_directory.handle, dcp.reader()); }, - .@"tar.xz" => { + .@"tar.xz" => brk: { const gpa = f.arena.child_allocator; const reader = resource.reader(); var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, reader); @@ -1115,9 +1119,9 @@ fn unpackResource( )); }; defer dcp.deinit(); - try unpackTarball(f, tmp_directory.handle, dcp.reader()); + break :brk try unpackTarball(f, tmp_directory.handle, dcp.reader()); }, - .@"tar.zst" => { + .@"tar.zst" => brk: { const window_size = std.compress.zstd.DecompressorOptions.default_window_buffer_len; const window_buffer = try f.arena.allocator().create([window_size]u8); const reader = resource.reader(); @@ -1125,28 +1129,26 @@ fn unpackResource( var dcp = std.compress.zstd.decompressor(br.reader(), .{ .window_buffer = window_buffer, }); - return unpackTarball(f, tmp_directory.handle, dcp.reader()); + break :brk try unpackTarball(f, tmp_directory.handle, dcp.reader()); }, .git_pack => try unpackGitPack(f, tmp_directory.handle, resource), - } + }; } const Unpack = @import("Unpack.zig"); -fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!void { +fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!Unpack.Errors { var unpack = Unpack.init(f.arena.child_allocator, out_dir); - defer unpack.deinit(); unpack.tarball(reader) catch |err| return f.failMsg( err, "unable to unpack tarball to temporary directory: {s}", .{@errorName(err)}, ); - try f.checkUnpackErrors(&unpack); + return unpack.errors; } -fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource) RunError!void { +fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource) RunError!Unpack.Errors { var unpack = Unpack.init(f.arena.child_allocator, out_dir); - defer unpack.deinit(); const want_oid = resource.git.want_oid; const reader = resource.git.fetch_stream.reader(); @@ -1155,18 +1157,17 @@ fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource) RunError!void "unable to unpack git files: {s}", .{@errorName(err)}, ); - try f.checkUnpackErrors(&unpack); + return unpack.errors; } -fn recursiveDirectoryCopy(f: *Fetch, dir: fs.Dir, out_dir: fs.Dir, uri_path: []const u8) RunError!void { +fn recursiveDirectoryCopy(f: *Fetch, dir: fs.Dir, out_dir: fs.Dir, uri_path: []const u8) RunError!Unpack.Errors { var unpack = Unpack.init(f.arena.child_allocator, out_dir); - defer unpack.deinit(); unpack.directory(dir) catch |err| return f.failMsg( err, "unable to copy directory '{s}': {s}", .{ uri_path, @errorName(err) }, ); - try f.checkUnpackErrors(&unpack); + return unpack.errors; } fn failMsg(f: *Fetch, err: anyerror, comptime fmt: []const u8, args: anytype) RunError { @@ -1177,18 +1178,18 @@ fn failMsg(f: *Fetch, err: anyerror, comptime fmt: []const u8, args: anytype) Ru } } -fn checkUnpackErrors(f: *Fetch, unpack: *Unpack) RunError!void { - if (!unpack.hasErrors()) return; +fn emitUnpackErrors(f: *Fetch, errors: *Unpack.Errors) RunError!void { + if (errors.count() == 0) return; const eb = &f.error_bundle; - const notes_len: u32 = @intCast(unpack.errors.items.len); + const notes_len: u32 = @intCast(errors.count()); try eb.addRootErrorMessage(.{ .msg = try eb.addString("unable to unpack"), .src_loc = try f.srcLoc(f.location_tok), .notes_len = notes_len, }); const notes_start = try eb.reserveNotes(notes_len); - for (unpack.errors.items, notes_start..) |item, note_i| { + for (errors.list.items, notes_start..) |item, note_i| { switch (item) { .unable_to_create_sym_link => |info| { eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ From 03e5c66633fa79cb9cf8dc96453c4cd800e6d650 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Tue, 19 Mar 2024 15:50:23 +0100 Subject: [PATCH 09/16] pacakge.fetch: update unpack resource comments --- src/Package/Fetch.zig | 108 ++++++++++++++++++----------------------- src/Package/Unpack.zig | 20 ++++---- 2 files changed, 59 insertions(+), 69 deletions(-) diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index 24cdd8644f57..e1ad188540d9 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -435,6 +435,7 @@ fn runResource( ) RunError!void { defer resource.deinit(); const arena = f.arena.allocator(); + const gpa = f.arena.child_allocator; const eb = &f.error_bundle; const s = fs.path.sep_str; const cache_root = f.job_queue.global_cache; @@ -461,30 +462,32 @@ fn runResource( }; defer tmp_directory.handle.close(); - var errors = try unpackResource(f, resource, uri_path, tmp_directory); - defer errors.deinit(); + // Fetch and unpack a URL into a temporary directory. + var unpack = Unpack.init(gpa, tmp_directory.handle); + try unpackResource(f, resource, &unpack, uri_path); + defer unpack.deinit(); // Load, parse, and validate the unpacked build.zig.zon file. It is allowed // for the file to be missing, in which case this fetched package is // considered to be a "naked" package. try loadManifest(f, .{ .root_dir = tmp_directory }); - - // Apply the manifest's inclusion rules to the temporary directory by - // deleting excluded files. If any error occurred for files that were - // ultimately excluded, those errors should be ignored, such as failure to - // create symlinks that weren't supposed to be included anyway. - - // Empty directories have already been omitted by `unpackResource`. - + // Manifest's inclusion rules. const filter: Filter = .{ .include_paths = if (f.manifest) |m| m.paths else .{}, }; - try errors.remove(filter); - try f.emitUnpackErrors(&errors); + // Apply the manifest's inclusion rules to the errors collected during + // unpacking resource. If any error occurred for files that were + // ultimately excluded, those errors will be ignored, such as failure to + // create symlinks that weren't supposed to be included anyway. + try unpack.filterErrors(filter); + try f.bundleUnpackErrors(&unpack); // Compute the package hash based on the remaining files in the temporary // directory. + // It will also apply the manifest's inclusion rules to the temporary + // directory by deleting excluded files. + // Empty directories have already been omitted by `unpackResource`. if (native_os == .linux and f.job_queue.work_around_btrfs_bug) { // https://github.com/ziglang/zig/issues/17095 @@ -1034,9 +1037,9 @@ fn initResource(f: *Fetch, uri: std.Uri, server_header_buffer: []u8) RunError!Re fn unpackResource( f: *Fetch, resource: *Resource, + unpack: *Unpack, uri_path: []const u8, - tmp_directory: Cache.Directory, -) RunError!Unpack.Errors { +) RunError!void { const eb = &f.error_bundle; const file_type = switch (resource.*) { .file => FileType.fromPath(uri_path) orelse @@ -1097,18 +1100,22 @@ fn unpackResource( .git => .git_pack, - .dir => |dir| return try f.recursiveDirectoryCopy(dir, tmp_directory.handle, uri_path), + .dir => |dir| return unpack.directory(dir) catch |err| return f.failMsg( + err, + "unable to copy directory '{s}': {s}", + .{ uri_path, @errorName(err) }, + ), }; - return switch (file_type) { - .tar => try unpackTarball(f, tmp_directory.handle, resource.reader()), - .@"tar.gz" => brk: { + switch (file_type) { + .tar => unpack.tarball(resource.reader()) catch |err| return f.unpackTarballError(err), + .@"tar.gz" => { const reader = resource.reader(); var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, reader); var dcp = std.compress.gzip.decompressor(br.reader()); - break :brk try unpackTarball(f, tmp_directory.handle, dcp.reader()); + unpack.tarball(dcp.reader()) catch |err| return f.unpackTarballError(err); }, - .@"tar.xz" => brk: { + .@"tar.xz" => { const gpa = f.arena.child_allocator; const reader = resource.reader(); var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, reader); @@ -1119,9 +1126,9 @@ fn unpackResource( )); }; defer dcp.deinit(); - break :brk try unpackTarball(f, tmp_directory.handle, dcp.reader()); + unpack.tarball(dcp.reader()) catch |err| return f.unpackTarballError(err); }, - .@"tar.zst" => brk: { + .@"tar.zst" => { const window_size = std.compress.zstd.DecompressorOptions.default_window_buffer_len; const window_buffer = try f.arena.allocator().create([window_size]u8); const reader = resource.reader(); @@ -1129,45 +1136,22 @@ fn unpackResource( var dcp = std.compress.zstd.decompressor(br.reader(), .{ .window_buffer = window_buffer, }); - break :brk try unpackTarball(f, tmp_directory.handle, dcp.reader()); + unpack.tarball(dcp.reader()) catch |err| return f.unpackTarballError(err); }, - .git_pack => try unpackGitPack(f, tmp_directory.handle, resource), - }; -} - -const Unpack = @import("Unpack.zig"); - -fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!Unpack.Errors { - var unpack = Unpack.init(f.arena.child_allocator, out_dir); - unpack.tarball(reader) catch |err| return f.failMsg( - err, - "unable to unpack tarball to temporary directory: {s}", - .{@errorName(err)}, - ); - return unpack.errors; -} - -fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource) RunError!Unpack.Errors { - var unpack = Unpack.init(f.arena.child_allocator, out_dir); - - const want_oid = resource.git.want_oid; - const reader = resource.git.fetch_stream.reader(); - unpack.gitPack(want_oid, reader) catch |err| return f.failMsg( - err, - "unable to unpack git files: {s}", - .{@errorName(err)}, - ); - return unpack.errors; + .git_pack => { + const want_oid = resource.git.want_oid; + const reader = resource.git.fetch_stream.reader(); + unpack.gitPack(want_oid, reader) catch |err| return f.failMsg( + err, + "unable to unpack git files: {s}", + .{@errorName(err)}, + ); + }, + } } -fn recursiveDirectoryCopy(f: *Fetch, dir: fs.Dir, out_dir: fs.Dir, uri_path: []const u8) RunError!Unpack.Errors { - var unpack = Unpack.init(f.arena.child_allocator, out_dir); - unpack.directory(dir) catch |err| return f.failMsg( - err, - "unable to copy directory '{s}': {s}", - .{ uri_path, @errorName(err) }, - ); - return unpack.errors; +fn unpackTarballError(f: *Fetch, err: anyerror) RunError { + return f.failMsg(err, "unable to unpack tarball to temporary directory: {s}", .{@errorName(err)}); } fn failMsg(f: *Fetch, err: anyerror, comptime fmt: []const u8, args: anytype) RunError { @@ -1178,11 +1162,13 @@ fn failMsg(f: *Fetch, err: anyerror, comptime fmt: []const u8, args: anytype) Ru } } -fn emitUnpackErrors(f: *Fetch, errors: *Unpack.Errors) RunError!void { - if (errors.count() == 0) return; +fn bundleUnpackErrors(f: *Fetch, unpack: *Unpack) RunError!void { + if (!unpack.hasErrors()) return; + var errors = unpack.errors; const eb = &f.error_bundle; const notes_len: u32 = @intCast(errors.count()); + try eb.addRootErrorMessage(.{ .msg = try eb.addString("unable to unpack"), .src_loc = try f.srcLoc(f.location_tok), @@ -1602,8 +1588,10 @@ const Package = @import("../Package.zig"); const Manifest = Package.Manifest; const ErrorBundle = std.zig.ErrorBundle; const native_os = builtin.os.tag; +const Unpack = @import("Unpack.zig"); test { _ = Filter; _ = FileType; + _ = Unpack; } diff --git a/src/Package/Unpack.zig b/src/Package/Unpack.zig index 148769a2124a..b3e344bc80aa 100644 --- a/src/Package/Unpack.zig +++ b/src/Package/Unpack.zig @@ -1,7 +1,7 @@ const std = @import("std"); const fs = std.fs; const git = @import("Fetch/git.zig"); -const ErrorBundle = std.zig.ErrorBundle; +const Filter = @import("Fetch.zig").Filter; allocator: std.mem.Allocator, root: fs.Dir, @@ -18,7 +18,7 @@ pub const Error = union(enum) { file_name: []const u8, }, - pub fn filtered(self: Error, filter: Filter) bool { + pub fn excluded(self: Error, filter: Filter) bool { switch (self) { .unable_to_create_file => |info| return !filter.includePath(info.file_name), .unable_to_create_sym_link => |info| return !filter.includePath(info.target_path), @@ -69,12 +69,12 @@ pub const Errors = struct { } }); } - pub fn remove(self: *Errors, filter: Filter) !void { + pub fn filterWith(self: *Errors, filter: Filter) !void { var i = self.list.items.len; while (i > 0) { i -= 1; const item = self.list.items[i]; - if (item.filtered(filter)) { + if (item.excluded(filter)) { _ = self.list.swapRemove(i); self.free(item); } @@ -197,6 +197,10 @@ pub fn hasErrors(self: *Self) bool { return self.errors.count() > 0; } +pub fn filterErrors(self: *Self, filter: Filter) !void { + try self.errors.filterWith(filter); +} + fn copyFile(source_dir: fs.Dir, source_path: []const u8, dest_dir: fs.Dir, dest_path: []const u8) !void { source_dir.copyFile(source_path, dest_dir, dest_path, .{}) catch |err| switch (err) { error.FileNotFound => { @@ -397,12 +401,12 @@ test "collect/filter errors" { defer filter.include_paths.deinit(gpa); // no filter all paths are included - try unpack.errors.remove(filter); + try unpack.filterErrors(filter); try testing.expectEqual(2, unpack.errors.count()); // dir1 is included, dir excluded try filter.include_paths.put(gpa, "dir1", {}); - try unpack.errors.remove(filter); + try unpack.filterErrors(filter); try testing.expectEqual(1, unpack.errors.count()); try testing.expectEqualStrings("dir1/file1", unpack.errors.list.items[0].unable_to_create_file.file_name); } @@ -412,13 +416,11 @@ test "collect/filter errors" { // only src included that filters all error paths try filter.include_paths.put(gpa, "src", {}); - try unpack.errors.remove(filter); + try unpack.filterErrors(filter); try testing.expectEqual(0, unpack.errors.count()); } } -const Filter = @import("Fetch.zig").Filter; - fn createTarball(paths: []const []const u8, buf: []u8) !void { var fbs = std.io.fixedBufferStream(buf); const writer = fbs.writer(); From c0f0e2fe65b0210e0ca2d61af3f550ad0e2d87b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Tue, 19 Mar 2024 17:24:12 +0100 Subject: [PATCH 10/16] std.tar: enable setting executable bit In pipeToFileSystem when option mode_mode option is set on posix file systems. --- lib/std/tar.zig | 85 ++++++++++++++++++++++++++++--- lib/std/tar/testdata/example.tar | Bin 10240 -> 10240 bytes 2 files changed, 78 insertions(+), 7 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index bf11caa1fb43..813d12622c11 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -25,8 +25,20 @@ pub const output = @import("tar/output.zig"); pub const PipeOptions = struct { /// Number of directory levels to skip when extracting files. strip_components: u32 = 0, + /// How to handle the "mode" property of files from within the tar file. + mode_mode: ModeMode = .executable_bit_only, /// Prevents creation of empty directories. exclude_empty_directories: bool = false, + + pub const ModeMode = enum { + /// The mode from the tar file is completely ignored. Files are created + /// with the default mode when creating files. + ignore, + /// The mode from the tar file is inspected for the owner executable bit + /// only. This bit is copied to the group and other executable bits. + /// Other bits of the mode are left as the default when creating files. + executable_bit_only, + }; }; const Header = struct { @@ -526,7 +538,7 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) const file_name = stripComponents(file.name, options.strip_components); if (file_name.len == 0) return error.BadFileName; - var fs_file = try createDirAndFile(dir, file_name); + var fs_file = try createDirAndFile(dir, file_name, fileMode(file.mode, options)); defer fs_file.close(); try file.writeAll(fs_file); }, @@ -543,12 +555,31 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) } } -fn createDirAndFile(dir: std.fs.Dir, file_name: []const u8) !std.fs.File { - const fs_file = dir.createFile(file_name, .{ .exclusive = true }) catch |err| { +const default_mode = std.fs.File.default_mode; + +fn fileMode(mode: u32, options: PipeOptions) std.fs.File.Mode { + const S = std.posix.S; + if (!std.fs.has_executable_bit or + options.mode_mode == .ignore or + mode & S.IXUSR == 0) + return default_mode; + return default_mode | S.IXUSR | S.IXGRP | S.IXOTH; +} + +test fileMode { + if (!std.fs.has_executable_bit) return error.SkipZigTest; + try testing.expectEqual(default_mode, fileMode(0o744, PipeOptions{ .mode_mode = .ignore })); + try testing.expectEqual(0o777, fileMode(0o744, PipeOptions{})); + try testing.expectEqual(0o666, fileMode(0o644, PipeOptions{})); + try testing.expectEqual(0o666, fileMode(0o655, PipeOptions{})); +} + +fn createDirAndFile(dir: std.fs.Dir, file_name: []const u8, mode: std.fs.File.Mode) !std.fs.File { + const fs_file = dir.createFile(file_name, .{ .exclusive = true, .mode = mode }) catch |err| { if (err == error.FileNotFound) { if (std.fs.path.dirname(file_name)) |dir_name| { try dir.makePath(dir_name); - return try dir.createFile(file_name, .{ .exclusive = true }); + return try dir.createFile(file_name, .{ .exclusive = true, .mode = mode }); } } return err; @@ -784,9 +815,9 @@ test "create file and symlink" { var root = testing.tmpDir(.{}); defer root.cleanup(); - var file = try createDirAndFile(root.dir, "file1"); + var file = try createDirAndFile(root.dir, "file1", default_mode); file.close(); - file = try createDirAndFile(root.dir, "a/b/c/file2"); + file = try createDirAndFile(root.dir, "a/b/c/file2", default_mode); file.close(); createDirAndSymlink(root.dir, "a/b/c/file2", "symlink1") catch |err| { @@ -798,7 +829,7 @@ test "create file and symlink" { // Danglink symlnik, file created later try createDirAndSymlink(root.dir, "../../../g/h/i/file4", "j/k/l/symlink3"); - file = try createDirAndFile(root.dir, "g/h/i/file4"); + file = try createDirAndFile(root.dir, "g/h/i/file4", default_mode); file.close(); } @@ -893,6 +924,7 @@ test pipeToFileSystem { pipeToFileSystem(dir, reader, .{ .strip_components = 1, .exclude_empty_directories = true, + .mode_mode = .ignore, }) catch |err| { // Skip on platform which don't support symlinks if (err == error.UnableToCreateSymLink) return error.SkipZigTest; @@ -911,6 +943,45 @@ test pipeToFileSystem { ); } +test "executable bit" { + if (!std.fs.has_executable_bit) return error.SkipZigTest; + const S = std.posix.S; + + const data = @embedFile("tar/testdata/example.tar"); + + for ([_]PipeOptions.ModeMode{ .ignore, .executable_bit_only }) |opt| { + var fbs = std.io.fixedBufferStream(data); + const reader = fbs.reader(); + + var tmp = testing.tmpDir(.{ .no_follow = true }); + defer tmp.cleanup(); + + pipeToFileSystem(tmp.dir, reader, .{ + .strip_components = 1, + .exclude_empty_directories = true, + .mode_mode = opt, + }) catch |err| { + // Skip on platform which don't support symlinks + if (err == error.UnableToCreateSymLink) return error.SkipZigTest; + return err; + }; + + const fs = try tmp.dir.statFile("a/file"); + try testing.expect(fs.kind == .file); + if (opt == .executable_bit_only) { + // Executable bit is set for user, group and others + try testing.expect(fs.mode & S.IXUSR > 0); + try testing.expect(fs.mode & S.IXGRP > 0); + try testing.expect(fs.mode & S.IXOTH > 0); + } + if (opt == .ignore) { + try testing.expect(fs.mode & S.IXUSR == 0); + try testing.expect(fs.mode & S.IXGRP == 0); + try testing.expect(fs.mode & S.IXOTH == 0); + } + } +} + fn normalizePath(bytes: []u8) []u8 { const canonical_sep = std.fs.path.sep_posix; if (std.fs.path.sep == canonical_sep) return bytes; diff --git a/lib/std/tar/testdata/example.tar b/lib/std/tar/testdata/example.tar index d66c407eaaf012d3b4ca11bece236b5a56b9eb67..deeeb65c0090df13f0b4a3cd5fcd99b84046a18f 100644 GIT binary patch delta 57 zcmZn&Xb9NQ%_d}SW^8P1U}(-@U}$J&Zo;5oI@yphWiumtI^)C)*~zUO!mP$d3 Date: Wed, 20 Mar 2024 00:52:03 +0100 Subject: [PATCH 11/16] fix Windows build and test --- lib/std/tar.zig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 813d12622c11..74458b0f193b 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -549,7 +549,7 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) // The data inside the symbolic link. const link_name = file.link_name; - try createDirAndSymlink(dir, link_name, file_name); + createDirAndSymlink(dir, link_name, file_name) catch return error.UnableToCreateSymLink; }, } } @@ -558,10 +558,10 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) const default_mode = std.fs.File.default_mode; fn fileMode(mode: u32, options: PipeOptions) std.fs.File.Mode { + if (!std.fs.has_executable_bit or options.mode_mode == .ignore) + return default_mode; const S = std.posix.S; - if (!std.fs.has_executable_bit or - options.mode_mode == .ignore or - mode & S.IXUSR == 0) + if (mode & S.IXUSR == 0) return default_mode; return default_mode | S.IXUSR | S.IXGRP | S.IXOTH; } From 593b87179ffd0a877e31e0e39f3fff393548a7db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Wed, 20 Mar 2024 16:05:50 +0100 Subject: [PATCH 12/16] fetch.git: add repository iterator --- src/Package/Fetch/git.zig | 257 +++++++++++++++++++++++++++++++------- 1 file changed, 213 insertions(+), 44 deletions(-) diff --git a/src/Package/Fetch/git.zig b/src/Package/Fetch/git.zig index 7b019d489909..56f763f206af 100644 --- a/src/Package/Fetch/git.zig +++ b/src/Package/Fetch/git.zig @@ -146,15 +146,15 @@ pub const Repository = struct { }; }; - fn next(iterator: *TreeIterator) !?Entry { - if (iterator.pos == iterator.data.len) return null; + fn next(iter: *TreeIterator) !?Entry { + if (iter.pos == iter.data.len) return null; - const mode_end = mem.indexOfScalarPos(u8, iterator.data, iterator.pos, ' ') orelse return error.InvalidTree; + const mode_end = mem.indexOfScalarPos(u8, iter.data, iter.pos, ' ') orelse return error.InvalidTree; const mode: packed struct { permission: u9, unused: u3, type: u4, - } = @bitCast(std.fmt.parseUnsigned(u16, iterator.data[iterator.pos..mode_end], 8) catch return error.InvalidTree); + } = @bitCast(std.fmt.parseUnsigned(u16, iter.data[iter.pos..mode_end], 8) catch return error.InvalidTree); const @"type" = std.meta.intToEnum(Entry.Type, mode.type) catch return error.InvalidTree; const executable = switch (mode.permission) { 0 => if (@"type" == .file) return error.InvalidTree else false, @@ -162,19 +162,131 @@ pub const Repository = struct { 0o755 => if (@"type" != .file) return error.InvalidTree else true, else => return error.InvalidTree, }; - iterator.pos = mode_end + 1; + iter.pos = mode_end + 1; - const name_end = mem.indexOfScalarPos(u8, iterator.data, iterator.pos, 0) orelse return error.InvalidTree; - const name = iterator.data[iterator.pos..name_end :0]; - iterator.pos = name_end + 1; + const name_end = mem.indexOfScalarPos(u8, iter.data, iter.pos, 0) orelse return error.InvalidTree; + const name = iter.data[iter.pos..name_end :0]; + iter.pos = name_end + 1; - if (iterator.pos + oid_length > iterator.data.len) return error.InvalidTree; - const oid = iterator.data[iterator.pos..][0..oid_length].*; - iterator.pos += oid_length; + if (iter.pos + oid_length > iter.data.len) return error.InvalidTree; + const oid = iter.data[iter.pos..][0..oid_length].*; + iter.pos += oid_length; return .{ .type = @"type", .executable = executable, .name = name, .oid = oid }; } }; + + /// Iterator over all repository entries at `commit_id`. + pub fn iterator(repository: *Repository, commit_oid: Oid) !Iterator { + const allocator = repository.odb.allocator; + try repository.odb.seekOid(commit_oid); + const tree_oid = tree_oid: { + const commit_object = try repository.odb.readObject(); + if (commit_object.type != .commit) return error.NotACommit; + break :tree_oid try getCommitTree(commit_object.data); + }; + var dirs = std.ArrayList(Iterator.Directory).init(allocator); + try dirs.append(.{ .oid = tree_oid, .path = "" }); + return .{ + .repository = repository, + .dirs = dirs, + }; + } + + pub const Iterator = struct { + pub const Entry = struct { + type: TreeIterator.Entry.Type, + executable: bool = false, + path: []const u8, + name: [:0]const u8, + data: []const u8, + + pub fn reader(entry: Entry) std.io.FixedBufferStream([]const u8) { + return std.io.fixedBufferStream(entry.data); + } + }; + const Directory = struct { + oid: Oid, + path: []const u8, + }; + + repository: *Repository, + tree_iter: ?TreeIterator = null, + tree_path: ?[]const u8 = null, + dirs: std.ArrayList(Directory), + + pub fn next(iter: *Iterator) !?Entry { + var odb = &iter.repository.odb; + const allocator = odb.allocator; + + while (true) { + if (iter.tree_iter) |*tree_iter| { + const tree_path = iter.tree_path.?; + if (try tree_iter.next()) |entry| { + switch (entry.type) { + .directory => { + try iter.dirs.append(.{ + .oid = entry.oid, + .path = try std.fs.path.join(allocator, &.{ tree_path, entry.name }), + }); + return .{ + .type = .directory, + .path = tree_path, + .name = entry.name, + .data = "", + }; + }, + .file, .symlink => { + try odb.seekOid(entry.oid); + const object = try odb.readObject(); + if (object.type != .blob) return error.InvalidFile; + + return .{ + .type = entry.type, + .executable = entry.executable, + .path = tree_path, + .name = entry.name, + .data = object.data, + }; + }, + .gitlink => {}, + } + } else { + allocator.free(tree_iter.data); + allocator.free(tree_path); + iter.tree_path = null; + iter.tree_iter = null; + } + } else { + if (iter.dirs.items.len == 0) return null; + const dir = iter.dirs.pop(); + + try odb.seekOid(dir.oid); + const object = try odb.readObject(); + if (object.type != .tree) return error.NotATree; + + iter.tree_iter = .{ .data = try allocator.dupe(u8, object.data) }; + iter.tree_path = dir.path; + } + } + } + + pub fn deinit(iter: *Iterator) void { + const allocator = iter.repository.odb.allocator; + + if (iter.tree_iter) |*tree_iter| { + allocator.free(tree_iter.data); + iter.tree_iter = null; + } + if (iter.tree_path) |tree_path| { + allocator.free(tree_path); + iter.tree_path = null; + } + for (iter.dirs.items) |dir| allocator.free(dir.path); + iter.dirs.deinit(); + iter.* = undefined; + } + }; }; /// A Git object database backed by a packfile. A packfile index is also used @@ -1327,23 +1439,14 @@ test "packfile indexing and checkout" { // 3. `git fsck` -> note the "dangling commit" ID (which matches the commit // checked out below) // 4. `git checkout dd582c0720819ab7130b103635bd7271b9fd4feb` - const testrepo_pack = @embedFile("git/testdata/testrepo.pack"); - - var git_dir = testing.tmpDir(.{}); - defer git_dir.cleanup(); - var pack_file = try git_dir.dir.createFile("testrepo.pack", .{ .read = true }); - defer pack_file.close(); - try pack_file.writeAll(testrepo_pack); - - var index_file = try git_dir.dir.createFile("testrepo.idx", .{ .read = true }); - defer index_file.close(); - try indexPack(testing.allocator, pack_file, index_file.writer()); + var repo = try TestRepo.open(); + defer repo.close(); // Arbitrary size limit on files read while checking the repository contents // (all files in the test repo are known to be much smaller than this) const max_file_size = 4096; - const index_file_data = try git_dir.dir.readFileAlloc(testing.allocator, "testrepo.idx", max_file_size); + const index_file_data = try repo.git_dir.dir.readFileAlloc(testing.allocator, "testrepo.idx", max_file_size); defer testing.allocator.free(index_file_data); // testrepo.idx is generated by Git. The index created by this file should // match it exactly. Running `git verify-pack -v testrepo.pack` can verify @@ -1351,32 +1454,14 @@ test "packfile indexing and checkout" { const testrepo_idx = @embedFile("git/testdata/testrepo.idx"); try testing.expectEqualSlices(u8, testrepo_idx, index_file_data); - var repository = try Repository.init(testing.allocator, pack_file, index_file); + var repository = try Repository.init(testing.allocator, repo.pack_file, repo.index_file); defer repository.deinit(); var worktree = testing.tmpDir(.{ .iterate = true }); defer worktree.cleanup(); - const commit_id = try parseOid("dd582c0720819ab7130b103635bd7271b9fd4feb"); - try repository.checkout(worktree.dir, commit_id); + try repository.checkout(worktree.dir, repo.commit_id); - const expected_files: []const []const u8 = &.{ - "dir/file", - "dir/subdir/file", - "dir/subdir/file2", - "dir2/file", - "dir3/file", - "dir3/file2", - "file", - "file2", - "file3", - "file4", - "file5", - "file6", - "file7", - "file8", - "file9", - }; var actual_files: std.ArrayListUnmanaged([]u8) = .{}; defer actual_files.deinit(testing.allocator); defer for (actual_files.items) |file| testing.allocator.free(file); @@ -1394,7 +1479,7 @@ test "packfile indexing and checkout" { return mem.lessThan(u8, a, b); } }.lessThan); - try testing.expectEqualDeep(expected_files, actual_files.items); + try testing.expectEqualDeep(repo.expected_files, actual_files.items); const expected_file_contents = \\revision 1 @@ -1417,6 +1502,90 @@ test "packfile indexing and checkout" { try testing.expectEqualStrings(expected_file_contents, actual_file_contents); } +const TestRepo = struct { + git_dir: testing.TmpDir, + pack_file: std.fs.File, + index_file: std.fs.File, + commit_id: Oid, + expected_files: []const []const u8 = &.{ + "dir/file", + "dir/subdir/file", + "dir/subdir/file2", + "dir2/file", + "dir3/file", + "dir3/file2", + "file", + "file2", + "file3", + "file4", + "file5", + "file6", + "file7", + "file8", + "file9", + }, + + fn open() !TestRepo { + const testrepo_pack = @embedFile("git/testdata/testrepo.pack"); + + var git_dir = testing.tmpDir(.{}); + errdefer git_dir.cleanup(); + + var pack_file = try git_dir.dir.createFile("testrepo.pack", .{ .read = true }); + errdefer pack_file.close(); + try pack_file.writeAll(testrepo_pack); + + var index_file = try git_dir.dir.createFile("testrepo.idx", .{ .read = true }); + errdefer index_file.close(); + try indexPack(testing.allocator, pack_file, index_file.writer()); + + var repository = try Repository.init(testing.allocator, pack_file, index_file); + errdefer repository.deinit(); + + return .{ + .git_dir = git_dir, + .pack_file = pack_file, + .index_file = index_file, + .commit_id = try parseOid("dd582c0720819ab7130b103635bd7271b9fd4feb"), + }; + } + + fn close(tr: *TestRepo) void { + tr.index_file.close(); + tr.pack_file.close(); + tr.git_dir.cleanup(); + } +}; + +test "packfile iterator" { + var repo = try TestRepo.open(); + defer repo.close(); + + var repository = try Repository.init(testing.allocator, repo.pack_file, repo.index_file); + defer repository.deinit(); + + var iter = try repository.iterator(repo.commit_id); + defer iter.deinit(); + + var actual_files = std.ArrayList([]u8).init(testing.allocator); + defer actual_files.deinit(); + defer for (actual_files.items) |file| testing.allocator.free(file); + while (try iter.next()) |entry| { + if (entry.type != .file) continue; + const path = try std.fs.path.join(testing.allocator, &.{ entry.path, entry.name }); + errdefer testing.allocator.free(path); + mem.replaceScalar(u8, path, std.fs.path.sep, '/'); + try actual_files.append(path); + } + + mem.sortUnstable([]u8, actual_files.items, {}, struct { + fn lessThan(_: void, a: []u8, b: []u8) bool { + return mem.lessThan(u8, a, b); + } + }.lessThan); + try testing.expectEqualDeep(repo.expected_files, actual_files.items); +} + /// Checks out a commit of a packfile. Intended for experimenting with and /// benchmarking possible optimizations to the indexing and checkout behavior. pub fn main() !void { From ebd6acfee8ce11ab6b4c1614d81275fef23a8233 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Wed, 20 Mar 2024 20:13:07 +0100 Subject: [PATCH 13/16] package.fetch: use git.iterator in unpack --- src/Package/Fetch/git.zig | 25 +++++--- src/Package/Unpack.zig | 124 ++++++++++++++++++-------------------- 2 files changed, 77 insertions(+), 72 deletions(-) diff --git a/src/Package/Fetch/git.zig b/src/Package/Fetch/git.zig index 56f763f206af..a420dc5cb36b 100644 --- a/src/Package/Fetch/git.zig +++ b/src/Package/Fetch/git.zig @@ -1479,7 +1479,7 @@ test "packfile indexing and checkout" { return mem.lessThan(u8, a, b); } }.lessThan); - try testing.expectEqualDeep(repo.expected_files, actual_files.items); + try testing.expectEqualDeep(TestRepo.expected_files, actual_files.items); const expected_file_contents = \\revision 1 @@ -1502,12 +1502,13 @@ test "packfile indexing and checkout" { try testing.expectEqualStrings(expected_file_contents, actual_file_contents); } -const TestRepo = struct { +pub const TestRepo = struct { git_dir: testing.TmpDir, pack_file: std.fs.File, index_file: std.fs.File, commit_id: Oid, - expected_files: []const []const u8 = &.{ + + pub const expected_files: []const []const u8 = &.{ "dir/file", "dir/subdir/file", "dir/subdir/file2", @@ -1523,11 +1524,11 @@ const TestRepo = struct { "file7", "file8", "file9", - }, + }; - fn open() !TestRepo { - const testrepo_pack = @embedFile("git/testdata/testrepo.pack"); + const testrepo_pack = @embedFile("git/testdata/testrepo.pack"); + pub fn open() !TestRepo { var git_dir = testing.tmpDir(.{}); errdefer git_dir.cleanup(); @@ -1550,7 +1551,15 @@ const TestRepo = struct { }; } - fn close(tr: *TestRepo) void { + pub fn stream() std.io.FixedBufferStream([]const u8) { + return .{ .buffer = testrepo_pack, .pos = 0 }; + } + + pub fn commitID() !Oid { + return try parseOid("dd582c0720819ab7130b103635bd7271b9fd4feb"); + } + + pub fn close(tr: *TestRepo) void { tr.index_file.close(); tr.pack_file.close(); tr.git_dir.cleanup(); @@ -1583,7 +1592,7 @@ test "packfile iterator" { return mem.lessThan(u8, a, b); } }.lessThan); - try testing.expectEqualDeep(repo.expected_files, actual_files.items); + try testing.expectEqualDeep(TestRepo.expected_files, actual_files.items); } /// Checks out a commit of a packfile. Intended for experimenting with and diff --git a/src/Package/Unpack.zig b/src/Package/Unpack.zig index b3e344bc80aa..3c9579d58601 100644 --- a/src/Package/Unpack.zig +++ b/src/Package/Unpack.zig @@ -54,18 +54,18 @@ pub const Errors = struct { return self.list.items.len; } - fn createFile(self: *Errors, sub_path: []const u8, err: anyerror) !void { + fn createFile(self: *Errors, subdir_path: []const u8, file_path: []const u8, err: anyerror) !void { try self.list.append(self.allocator, .{ .unable_to_create_file = .{ .code = err, - .file_name = try self.allocator.dupe(u8, sub_path), + .file_name = try std.fs.path.join(testing.allocator, &.{ subdir_path, file_path }), } }); } - fn symLink(self: *Errors, target_path: []const u8, sym_link_path: []const u8, err: anyerror) !void { + fn symLink(self: *Errors, subdir_path: []const u8, target_path: []const u8, sym_link_path: []const u8, err: anyerror) !void { try self.list.append(self.allocator, .{ .unable_to_create_sym_link = .{ .code = err, .target_path = try self.allocator.dupe(u8, target_path), - .sym_link_path = try self.allocator.dupe(u8, sym_link_path), + .sym_link_path = try std.fs.path.join(testing.allocator, &.{ subdir_path, sym_link_path }), } }); } @@ -116,7 +116,7 @@ pub fn tarball(self: *Self, reader: anytype) !void { if (entry.size == 0 and entry.name.len == 0) continue; const file_name = stripComponents(entry.name, strip_components); if (file_name.len == 0) return error.BadFileName; - if (try self.createFile(file_name)) |file| { + if (try self.createFile("", file_name)) |file| { defer file.close(); try entry.writeAll(file); } @@ -125,7 +125,7 @@ pub fn tarball(self: *Self, reader: anytype) !void { const file_name = stripComponents(entry.name, strip_components); if (file_name.len == 0) return error.BadFileName; const link_name = entry.link_name; - try self.symLink(link_name, file_name); + try self.symLink("", link_name, file_name); }, } } else break; @@ -133,21 +133,6 @@ pub fn tarball(self: *Self, reader: anytype) !void { } pub fn gitPack(self: *Self, commit_oid: git.Oid, reader: anytype) !void { - // Same interface as std.fs.Dir.createFile, symLink - const inf = struct { - parent: *Self, - - pub fn makePath(_: @This(), _: []const u8) !void {} - - pub fn createFile(t: @This(), sub_path: []const u8, _: fs.File.CreateFlags) !fs.File { - return (try t.parent.createFile(sub_path)) orelse error.Skip; - } - - pub fn symLink(t: @This(), target_path: []const u8, sym_link_path: []const u8, _: fs.Dir.SymLinkFlags) !void { - try t.parent.symLink(target_path, sym_link_path); - } - }{ .parent = self }; - var pack_dir = try self.root.makeOpenPath(".git", .{}); defer pack_dir.close(); var pack_file = try pack_dir.createFile("pkg.pack", .{ .read = true }); @@ -168,7 +153,22 @@ pub fn gitPack(self: *Self, commit_oid: git.Oid, reader: anytype) !void { { var repository = try git.Repository.init(self.allocator, pack_file, index_file); defer repository.deinit(); - try repository.checkout(inf, commit_oid); + var iter = try repository.iterator(commit_oid); + defer iter.deinit(); + while (try iter.next()) |entry| { + switch (entry.type) { + .file => { + if (try self.createFile(entry.path, entry.name)) |file| { + defer file.close(); + try file.writeAll(entry.data); + } + }, + .symlink => { + try self.symLink(entry.path, entry.data, entry.name); + }, + else => {}, // skip empty directory + } + } } try self.root.deleteTree(".git"); @@ -186,7 +186,7 @@ pub fn directory(self: *Self, source: fs.Dir) !void { .sym_link => { var buf: [fs.MAX_PATH_BYTES]u8 = undefined; const link_name = try source.readLink(entry.path, &buf); - try self.symLink(link_name, entry.path); + try self.symLink("", link_name, entry.path); }, else => return error.IllegalFileTypeInPackage, } @@ -201,6 +201,12 @@ pub fn filterErrors(self: *Self, filter: Filter) !void { try self.errors.filterWith(filter); } +fn makePath(self: *Self, sub_path: []const u8) !fs.Dir { + if (sub_path.len == 0) return self.root; + try self.root.makePath(sub_path); + return try self.root.openDir(sub_path, .{}); +} + fn copyFile(source_dir: fs.Dir, source_path: []const u8, dest_dir: fs.Dir, dest_path: []const u8) !void { source_dir.copyFile(source_path, dest_dir, dest_path, .{}) catch |err| switch (err) { error.FileNotFound => { @@ -213,32 +219,44 @@ fn copyFile(source_dir: fs.Dir, source_path: []const u8, dest_dir: fs.Dir, dest_ /// Returns fs.File on success, null on failure. /// Errors are collected in errors list. -fn createFile(self: *Self, sub_path: []const u8) !?fs.File { - return createFilePath(self.root, sub_path) catch |err| { - try self.errors.createFile(sub_path, err); +fn createFile(self: *Self, subdir_path: []const u8, file_path: []const u8) !?fs.File { + return createFilePath(self.root, subdir_path, file_path) catch |err| { + try self.errors.createFile(subdir_path, file_path, err); return null; }; } -fn symLink(self: *Self, target_path: []const u8, sym_link_path: []const u8) !void { - symLinkPath(self.root, target_path, sym_link_path) catch |err| { - try self.errors.symLink(target_path, sym_link_path, err); +fn symLink(self: *Self, subdir_path: []const u8, target_path: []const u8, sym_link_path: []const u8) !void { + symLinkPath(self.root, subdir_path, target_path, sym_link_path) catch |err| { + try self.errors.symLink(subdir_path, target_path, sym_link_path, err); }; } -fn createFilePath(dir: fs.Dir, sub_path: []const u8) !fs.File { - return dir.createFile(sub_path, .{ .exclusive = true }) catch |err| switch (err) { +fn createFilePath(root: fs.Dir, subdir_path: []const u8, file_path: []const u8) !fs.File { + var dir = root; + if (subdir_path.len > 0) { + try dir.makePath(subdir_path); + dir = try dir.openDir(subdir_path, .{}); + } + + return dir.createFile(file_path, .{ .exclusive = true }) catch |err| switch (err) { error.FileNotFound => { - if (std.fs.path.dirname(sub_path)) |dirname| try dir.makePath(dirname); - return try dir.createFile(sub_path, .{ .exclusive = true }); + if (std.fs.path.dirname(file_path)) |dirname| try dir.makePath(dirname); + return try dir.createFile(file_path, .{ .exclusive = true }); }, else => |e| return e, }; } -fn symLinkPath(dir: fs.Dir, target_path: []const u8, sym_link_path: []const u8) !void { +fn symLinkPath(root: fs.Dir, subdir_path: []const u8, target_path: []const u8, sym_link_path: []const u8) !void { // TODO: if this would create a symlink to outside // the destination directory, fail with an error instead. + var dir = root; + if (subdir_path.len > 0) { + try dir.makePath(subdir_path); + dir = try dir.openDir(subdir_path, .{}); + } + dir.symLink(target_path, sym_link_path, .{}) catch |err| switch (err) { error.FileNotFound => { if (fs.path.dirname(sym_link_path)) |dirname| try dir.makePath(dirname); @@ -275,40 +293,21 @@ test "tar stripComponents" { } test gitPack { - const paths: []const []const u8 = &.{ - "dir/file", - "dir/subdir/file", - "dir/subdir/file2", - "dir2/file", - "dir3/file", - "dir3/file2", - "file", - "file2", - "file3", - "file4", - "file5", - "file6", - "file7", - "file8", - "file9", - }; - - // load git pack with expected files - const data = @embedFile("Fetch/git/testdata/testrepo.pack"); - var fbs = std.io.fixedBufferStream(data); - var tmp = testing.tmpDir(.{ .iterate = true }); defer tmp.cleanup(); - // unpack git pack + const repo = git.TestRepo; + var stream = repo.stream(); + const reader = stream.reader(); + + // Unpack git repo at commitID from reader { var unpack = Unpack.init(testing.allocator, tmp.dir); defer unpack.deinit(); - const commit_id = try git.parseOid("dd582c0720819ab7130b103635bd7271b9fd4feb"); - try unpack.gitPack(commit_id, fbs.reader()); + try unpack.gitPack(try repo.commitID(), reader); } - try expectDirFiles(tmp.dir, paths); + try expectDirFiles(tmp.dir, repo.expected_files); } const TarHeader = std.tar.output.Header; @@ -354,7 +353,7 @@ test directory { defer source.cleanup(); for (paths) |path| { - const f = try createFilePath(source.dir, path); + const f = try createFilePath(source.dir, "", path); f.close(); } @@ -453,6 +452,3 @@ fn expectDirFiles(dir: fs.Dir, expected_files: []const []const u8) !void { }.lessThan); try testing.expectEqualDeep(expected_files, actual_files.items); } - -// var buf: [256]u8 = undefined; -// std.debug.print("tmp dir: {s}\n", .{try tmp.dir.realpath(".", &buf)}); From 124c19cf4d2ca21147cbb2824c82af71be9925fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Wed, 20 Mar 2024 20:15:58 +0100 Subject: [PATCH 14/16] package.git: cleanup anytype Not used from Unpack any more. --- src/Package/Fetch/git.zig | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/Package/Fetch/git.zig b/src/Package/Fetch/git.zig index a420dc5cb36b..dfe364ddca65 100644 --- a/src/Package/Fetch/git.zig +++ b/src/Package/Fetch/git.zig @@ -51,7 +51,7 @@ pub const Repository = struct { /// Checks out the repository at `commit_oid` to `worktree`. pub fn checkout( repository: *Repository, - worktree: anytype, + worktree: std.fs.Dir, commit_oid: Oid, ) !void { try repository.odb.seekOid(commit_oid); @@ -66,7 +66,7 @@ pub const Repository = struct { /// Checks out the tree at `tree_oid` to `worktree`. fn checkoutTree( repository: *Repository, - dir: anytype, + dir: std.fs.Dir, tree_oid: Oid, current_path: []const u8, ) !void { @@ -1460,7 +1460,7 @@ test "packfile indexing and checkout" { var worktree = testing.tmpDir(.{ .iterate = true }); defer worktree.cleanup(); - try repository.checkout(worktree.dir, repo.commit_id); + try repository.checkout(worktree.dir, try TestRepo.commitID()); var actual_files: std.ArrayListUnmanaged([]u8) = .{}; defer actual_files.deinit(testing.allocator); @@ -1506,7 +1506,6 @@ pub const TestRepo = struct { git_dir: testing.TmpDir, pack_file: std.fs.File, index_file: std.fs.File, - commit_id: Oid, pub const expected_files: []const []const u8 = &.{ "dir/file", @@ -1547,7 +1546,6 @@ pub const TestRepo = struct { .git_dir = git_dir, .pack_file = pack_file, .index_file = index_file, - .commit_id = try parseOid("dd582c0720819ab7130b103635bd7271b9fd4feb"), }; } @@ -1573,7 +1571,7 @@ test "packfile iterator" { var repository = try Repository.init(testing.allocator, repo.pack_file, repo.index_file); defer repository.deinit(); - var iter = try repository.iterator(repo.commit_id); + var iter = try repository.iterator(try TestRepo.commitID()); defer iter.deinit(); var actual_files = std.ArrayList([]u8).init(testing.allocator); From 7310a81cd574b571c729e800946687d86b5919a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Wed, 20 Mar 2024 21:43:54 +0100 Subject: [PATCH 15/16] fix build --- src/Package/Unpack.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Package/Unpack.zig b/src/Package/Unpack.zig index 3c9579d58601..2659484f990b 100644 --- a/src/Package/Unpack.zig +++ b/src/Package/Unpack.zig @@ -57,7 +57,7 @@ pub const Errors = struct { fn createFile(self: *Errors, subdir_path: []const u8, file_path: []const u8, err: anyerror) !void { try self.list.append(self.allocator, .{ .unable_to_create_file = .{ .code = err, - .file_name = try std.fs.path.join(testing.allocator, &.{ subdir_path, file_path }), + .file_name = try std.fs.path.join(self.allocator, &.{ subdir_path, file_path }), } }); } @@ -65,7 +65,7 @@ pub const Errors = struct { try self.list.append(self.allocator, .{ .unable_to_create_sym_link = .{ .code = err, .target_path = try self.allocator.dupe(u8, target_path), - .sym_link_path = try std.fs.path.join(testing.allocator, &.{ subdir_path, sym_link_path }), + .sym_link_path = try std.fs.path.join(self.allocator, &.{ subdir_path, sym_link_path }), } }); } From 1f1c8e996590004628918de9933ba573510cebe3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Fri, 22 Mar 2024 21:12:30 +0100 Subject: [PATCH 16/16] package manager: don't strip components in tar Unpack tar without removing leading root folder. Then find package root in unpacked tmp folder. Fixes #17779 --- src/Package/Fetch.zig | 70 +++++++++++++--------- src/Package/Unpack.zig | 132 ++++++++++++++++++++++++++++++++--------- 2 files changed, 147 insertions(+), 55 deletions(-) diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index e1ad188540d9..8fdd25bcf136 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -440,26 +440,14 @@ fn runResource( const s = fs.path.sep_str; const cache_root = f.job_queue.global_cache; const rand_int = std.crypto.random.int(u64); + // temporary directory for unpacking; sub path of cache_root const tmp_dir_sub_path = "tmp" ++ s ++ Manifest.hex64(rand_int); + // sub path of cache_root, inside temporary directory, if null package root + // is tmp_dir_sub_path + var package_sub_path: ?[]const u8 = null; { - const tmp_directory_path = try cache_root.join(arena, &.{tmp_dir_sub_path}); - var tmp_directory: Cache.Directory = .{ - .path = tmp_directory_path, - .handle = handle: { - const dir = cache_root.handle.makeOpenPath(tmp_dir_sub_path, .{ - .iterate = true, - }) catch |err| { - try eb.addRootErrorMessage(.{ - .msg = try eb.printString("unable to create temporary directory '{s}': {s}", .{ - tmp_directory_path, @errorName(err), - }), - }); - return error.FetchFailed; - }; - break :handle dir; - }, - }; + var tmp_directory = try f.makeOpenCacheDirectory(tmp_dir_sub_path); defer tmp_directory.handle.close(); // Fetch and unpack a URL into a temporary directory. @@ -467,6 +455,21 @@ fn runResource( try unpackResource(f, resource, &unpack, uri_path); defer unpack.deinit(); + // Position tmp_directory to sub_path if package root is deeper inside + // temporary directory. + if (unpack.package_sub_path) |sub_path| { + package_sub_path = try fs.path.join(arena, &.{ tmp_dir_sub_path, sub_path }); + tmp_directory.handle.close(); + tmp_directory = try f.makeOpenCacheDirectory(package_sub_path.?); + } else { + // btrfs workaround; reopen tmp_directory + if (native_os == .linux and f.job_queue.work_around_btrfs_bug) { + // https://github.com/ziglang/zig/issues/17095 + tmp_directory.handle.close(); + tmp_directory = try f.makeOpenCacheDirectory(tmp_dir_sub_path); + } + } + // Load, parse, and validate the unpacked build.zig.zon file. It is allowed // for the file to be missing, in which case this fetched package is // considered to be a "naked" package. @@ -488,15 +491,6 @@ fn runResource( // It will also apply the manifest's inclusion rules to the temporary // directory by deleting excluded files. // Empty directories have already been omitted by `unpackResource`. - - if (native_os == .linux and f.job_queue.work_around_btrfs_bug) { - // https://github.com/ziglang/zig/issues/17095 - tmp_directory.handle.close(); - tmp_directory.handle = cache_root.handle.makeOpenPath(tmp_dir_sub_path, .{ - .iterate = true, - }) catch @panic("btrfs workaround failed"); - } - f.actual_hash = try computeHash(f, tmp_directory, filter); } @@ -510,7 +504,11 @@ fn runResource( .root_dir = cache_root, .sub_path = try arena.dupe(u8, "p" ++ s ++ Manifest.hexDigest(f.actual_hash)), }; - renameTmpIntoCache(cache_root.handle, tmp_dir_sub_path, f.package_root.sub_path) catch |err| { + renameTmpIntoCache( + cache_root.handle, + if (package_sub_path) |p| p else tmp_dir_sub_path, + f.package_root.sub_path, + ) catch |err| { const src = try cache_root.join(arena, &.{tmp_dir_sub_path}); const dest = try cache_root.join(arena, &.{f.package_root.sub_path}); try eb.addRootErrorMessage(.{ .msg = try eb.printString( @@ -519,6 +517,9 @@ fn runResource( ) }); return error.FetchFailed; }; + if (package_sub_path) |_| { + cache_root.handle.deleteTree(tmp_dir_sub_path) catch {}; + } // Validate the computed hash against the expected hash. If invalid, this // job is done. @@ -551,6 +552,21 @@ fn runResource( return queueJobsForDeps(f); } +fn makeOpenCacheDirectory(f: *Fetch, sub_path: []const u8) RunError!Cache.Directory { + const arena = f.arena.allocator(); + const cache_root = f.job_queue.global_cache; + + const path = try cache_root.join(arena, &.{sub_path}); + return .{ + .path = path, + .handle = cache_root.handle.makeOpenPath(sub_path, .{ .iterate = true }) catch |err| { + return f.failMsg(err, "unable to create temporary directory '{s}': {s}", .{ + path, @errorName(err), + }); + }, + }; +} + /// `computeHash` gets a free check for the existence of `build.zig`, but when /// not computing a hash, we need to do a syscall to check for it. fn checkBuildFileExistence(f: *Fetch) RunError!void { diff --git a/src/Package/Unpack.zig b/src/Package/Unpack.zig index 2659484f990b..b58624491bbe 100644 --- a/src/Package/Unpack.zig +++ b/src/Package/Unpack.zig @@ -5,6 +5,7 @@ const Filter = @import("Fetch.zig").Filter; allocator: std.mem.Allocator, root: fs.Dir, +package_sub_path: ?[]const u8 = null, errors: Errors, pub const Error = union(enum) { @@ -69,7 +70,7 @@ pub const Errors = struct { } }); } - pub fn filterWith(self: *Errors, filter: Filter) !void { + fn filterWith(self: *Errors, filter: Filter) !void { var i = self.list.items.len; while (i > 0) { i -= 1; @@ -80,6 +81,25 @@ pub const Errors = struct { } } } + + fn stripRoot(self: *Errors) !void { + if (self.count() == 0) return; + + var old_list = self.list; + self.list = .{}; + for (old_list.items) |item| { + switch (item) { + .unable_to_create_sym_link => |info| { + try self.symLink("", stripComponents(info.target_path, 1), info.sym_link_path, info.code); + }, + .unable_to_create_file => |info| { + try self.createFile("", stripComponents(info.file_name, 1), info.code); + }, + } + self.free(item); + } + old_list.deinit(self.allocator); + } }; pub fn init(allocator: std.mem.Allocator, root: fs.Dir) Self { @@ -92,13 +112,14 @@ pub fn init(allocator: std.mem.Allocator, root: fs.Dir) Self { pub fn deinit(self: *Self) void { self.errors.deinit(); + if (self.package_sub_path) |package_sub_path| { + self.allocator.free(package_sub_path); + } } const Self = @This(); pub fn tarball(self: *Self, reader: anytype) !void { - const strip_components = 1; - var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; var link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; var iter = std.tar.iterator(reader, .{ @@ -114,22 +135,59 @@ pub fn tarball(self: *Self, reader: anytype) !void { .directory => {}, // skip empty .file => { if (entry.size == 0 and entry.name.len == 0) continue; - const file_name = stripComponents(entry.name, strip_components); - if (file_name.len == 0) return error.BadFileName; - if (try self.createFile("", file_name)) |file| { + if (try self.createFile("", entry.name)) |file| { defer file.close(); try entry.writeAll(file); } }, .sym_link => { - const file_name = stripComponents(entry.name, strip_components); - if (file_name.len == 0) return error.BadFileName; - const link_name = entry.link_name; - try self.symLink("", link_name, file_name); + try self.symLink("", entry.link_name, entry.name); }, } } else break; } + try self.findPackageSubPath(); +} + +fn findPackageSubPath(self: *Self) !void { + var iter = self.root.iterate(); + if (try iter.next()) |entry| { + if (try iter.next() != null) return; + if (entry.kind == .directory) { // single directory below root + self.package_sub_path = try self.allocator.dupe(u8, entry.name); + try self.errors.stripRoot(); + } + } +} + +test findPackageSubPath { + var tmp = testing.tmpDir(.{ .iterate = true }); + defer tmp.cleanup(); + + // folder1 + // ├── folder2 + // ├── file1 + // + try tmp.dir.makePath("folder1/folder2"); + (try tmp.dir.createFile("folder1/file1", .{})).close(); + + var unpack = init(testing.allocator, tmp.dir); + try unpack.findPackageSubPath(); + // start at root returns folder1 as package root + try testing.expectEqualStrings("folder1", unpack.package_sub_path.?); + unpack.deinit(); + + // start at folder1 returns null + unpack = init(testing.allocator, try tmp.dir.openDir("folder1", .{ .iterate = true })); + try unpack.findPackageSubPath(); + try testing.expect(unpack.package_sub_path == null); + unpack.deinit(); + + // start at folder1/folder2 returns null + unpack = init(testing.allocator, try tmp.dir.openDir("folder1/folder2", .{ .iterate = true })); + try unpack.findPackageSubPath(); + try testing.expect(unpack.package_sub_path == null); + unpack.deinit(); } pub fn gitPack(self: *Self, commit_oid: git.Oid, reader: anytype) !void { @@ -322,22 +380,36 @@ test tarball { }; var buf: [paths.len * @sizeOf(TarHeader)]u8 = undefined; - // create tarball - try createTarball(paths, &buf); + // tarball with leading root folder + { + try createTarball("package_root", paths, &buf); + var tmp = testing.tmpDir(.{ .iterate = true }); + defer tmp.cleanup(); - var tmp = testing.tmpDir(.{ .iterate = true }); - defer tmp.cleanup(); + var fbs = std.io.fixedBufferStream(&buf); + + var unpack = Unpack.init(testing.allocator, tmp.dir); + defer unpack.deinit(); + try unpack.tarball(fbs.reader()); + try testing.expectEqualStrings("package_root", unpack.package_sub_path.?); - // unpack tarball to tmp dir, will strip root dir + try expectDirFiles(try tmp.dir.openDir("package_root", .{ .iterate = true }), paths); + } + // tarball without root { + try createTarball("", paths, &buf); + var tmp = testing.tmpDir(.{ .iterate = true }); + defer tmp.cleanup(); + var fbs = std.io.fixedBufferStream(&buf); var unpack = Unpack.init(testing.allocator, tmp.dir); defer unpack.deinit(); try unpack.tarball(fbs.reader()); - } + try testing.expect(unpack.package_sub_path == null); - try expectDirFiles(tmp.dir, paths); + try expectDirFiles(tmp.dir, paths); + } } test directory { @@ -357,14 +429,14 @@ test directory { f.close(); } - var dest = testing.tmpDir(.{ .iterate = true }); - defer dest.cleanup(); + var tmp = testing.tmpDir(.{ .iterate = true }); + defer tmp.cleanup(); - var unpack = Unpack.init(testing.allocator, dest.dir); + var unpack = Unpack.init(testing.allocator, tmp.dir); defer unpack.deinit(); try unpack.directory(source.dir); - try expectDirFiles(dest.dir, paths); + try expectDirFiles(tmp.dir, paths); } test "collect/filter errors" { @@ -378,7 +450,7 @@ test "collect/filter errors" { "dir1/file1", }; var buf: [paths.len * @sizeOf(TarHeader)]u8 = undefined; - try createTarball(paths, &buf); + try createTarball("package_root", paths, &buf); var tmp = testing.tmpDir(.{ .iterate = true }); defer tmp.cleanup(); @@ -388,12 +460,12 @@ test "collect/filter errors" { defer unpack.deinit(); try unpack.tarball(fbs.reader()); try testing.expect(unpack.hasErrors()); - - try expectDirFiles(tmp.dir, paths[0..2]); + try testing.expectEqualStrings("package_root", unpack.package_sub_path.?); + try expectDirFiles(try tmp.dir.openDir("package_root", .{ .iterate = true }), paths[0..2]); try testing.expectEqual(2, unpack.errors.count()); - try testing.expectEqualStrings(paths[2], unpack.errors.list.items[0].unable_to_create_file.file_name); - try testing.expectEqualStrings(paths[3], unpack.errors.list.items[1].unable_to_create_file.file_name); + try testing.expectEqualStrings("dir/file", unpack.errors.list.items[0].unable_to_create_file.file_name); + try testing.expectEqualStrings("dir1/file1", unpack.errors.list.items[1].unable_to_create_file.file_name); { var filter: Filter = .{}; @@ -420,13 +492,17 @@ test "collect/filter errors" { } } -fn createTarball(paths: []const []const u8, buf: []u8) !void { +fn createTarball(prefix: []const u8, paths: []const []const u8, buf: []u8) !void { var fbs = std.io.fixedBufferStream(buf); const writer = fbs.writer(); for (paths) |path| { var hdr = TarHeader.init(); hdr.typeflag = .regular; - try hdr.setPath("stripped_root", path); + if (prefix.len > 0) { + try hdr.setPath(prefix, path); + } else { + hdr.setName(path); + } try hdr.updateChecksum(); try writer.writeAll(std.mem.asBytes(&hdr)); }