From 333a699797b9befbd50e6a2ad8000e8d28f5f539 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 24 Jun 2020 20:52:09 +0200 Subject: [PATCH 01/14] Add std.os.realpathat and std.fs.Dir.realpath `std.os.realpathat` is similar to `std.os.realpath`, however, it accepts a pair `fd_t, []const u8` of args and thus works out the realpath of a relative path wrt to some opened file descriptor. If the input pathname argument turns out to be an absolute path, this function reverts to calling `realpath` on that pathname completely ignoring the input file descriptor. This behaviour is standard in POSIX and IMHO a good rule of thumb to follow. If the input file descriptor was obtained using `std.fs.cwd()` call, this function reverts to `std.os.getcwd()` to obtain the file descriptor's path. `std.fs.Dir.realpath` integrates `std.os.realpathat` with `std.fs.Dir` but with dynamic memory allocator. --- lib/std/fs.zig | 20 +++++++++- lib/std/fs/test.zig | 20 ++++++++++ lib/std/os.zig | 97 +++++++++++++++++++++++++++++++++++++++------ lib/std/os/test.zig | 24 +++++++++++ 4 files changed, 147 insertions(+), 14 deletions(-) diff --git a/lib/std/fs.zig b/lib/std/fs.zig index 0feaf69d6780..02f245d8107a 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -931,6 +931,24 @@ pub const Dir = struct { return self.openDir(sub_path, open_dir_options); } + /// This function returns the canonicalized absolute pathname of `pathname` + /// relative to this `Dir`. If `pathname` is absolute, ignores this `Dir` handle + /// and returns the canonicalized absolute pathname of `pathname` argument. + /// Caller must free the returned memory. + pub fn realpath(self: Dir, allocator: *Allocator, pathname: []const u8) ![]u8 { + if (builtin.os.tag == .wasi) { + @compileError("Dir.realpath is unsupported in WASI"); + } + // Use of MAX_PATH_BYTES here is valid as the realpath function does not + // have a variant that takes an arbitrary-size buffer. + // TODO(#4812): Consider reimplementing realpath or using the POSIX.1-2008 + // NULL out parameter (GNU's canonicalize_file_name) to handle overelong + // paths. musl supports passing NULL but restricts the output to PATH_MAX + // anyway. + var buf: [MAX_PATH_BYTES]u8 = undefined; + return mem.dupe(allocator, u8, try os.realpathat(self.fd, pathname, &buf)); + } + /// Changes the current working directory to the open directory handle. /// This modifies global state and can have surprising effects in multi- /// threaded applications. Most applications and especially libraries should @@ -1903,7 +1921,7 @@ pub fn selfExeDirPath(out_buffer: []u8) SelfExePathError![]const u8 { } /// `realpath`, except caller must free the returned memory. -/// TODO integrate with `Dir` +/// See also `Dir.realpath`. pub fn realpathAlloc(allocator: *Allocator, pathname: []const u8) ![]u8 { // Use of MAX_PATH_BYTES here is valid as the realpath function does not // have a variant that takes an arbitrary-size buffer. diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig index a3cf2e800268..98c3cc7a0d11 100644 --- a/lib/std/fs/test.zig +++ b/lib/std/fs/test.zig @@ -50,6 +50,26 @@ fn contains(entries: *const std.ArrayList(Dir.Entry), el: Dir.Entry) bool { return false; } +test "Dir.realpath" { + if (builtin.os.tag == .wasi) return error.SkipZigTest; + + var tmp_dir = tmpDir(.{}); + defer tmp_dir.cleanup(); + + var file = try tmp_dir.dir.createFile("test_file", .{}); + defer file.close(); + + const abs_file_path = try tmp_dir.dir.realpath(testing.allocator, "test_file"); + defer testing.allocator.free(abs_file_path); + + const dir_path = try tmp_dir.dir.realpath(testing.allocator, ""); + defer testing.allocator.free(dir_path); + const expected_path = try fs.path.join(testing.allocator, &[_][]const u8{ dir_path, "test_file" }); + defer testing.allocator.free(expected_path); + + testing.expect(mem.eql(u8, abs_file_path, expected_path)); +} + test "readAllAlloc" { var tmp_dir = tmpDir(.{}); defer tmp_dir.cleanup(); diff --git a/lib/std/os.zig b/lib/std/os.zig index 1e1049ae51ce..dae97e5cdf9b 100644 --- a/lib/std/os.zig +++ b/lib/std/os.zig @@ -3981,6 +3981,45 @@ pub fn realpath(pathname: []const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathE pub const realpathC = @compileError("deprecated: renamed realpathZ"); +fn fdPath(fd: fd_t, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { + switch (builtin.os.tag) { + .wasi => @compileError("getting absolute path of an fd is unsupported in WASI"), + .windows => { + var wide_buf: [windows.PATH_MAX_WIDE]u16 = undefined; + const wide_slice = try windows.GetFinalPathNameByHandleW(fd, &wide_buf, wide_buf.len, windows.VOLUME_NAME_DOS); + + // Windows returns \\?\ prepended to the path. + // We strip it to make this function consistent across platforms. + const prefix = [_]u16{ '\\', '\\', '?', '\\' }; + const start_index = if (mem.startsWith(u16, wide_slice, &prefix)) prefix.len else 0; + + // Trust that Windows gives us valid UTF-16LE. + const end_index = std.unicode.utf16leToUtf8(out_buffer, wide_slice[start_index..]) catch unreachable; + return out_buffer[0..end_index]; + }, + .macosx, .ios, .watchos, .tvos => { + // On macOS, we can use F_GETPATH fcntl command to query the OS for + // the path to the file descriptor. + @memset(out_buffer, 0, MAX_PATH_BYTES); + switch (errno(system.fcntl(fd, F_GETPATH, out_buffer))) { + 0 => {}, + EBADF => return error.FileNotFound, + // TODO man pages for fcntl on macOS don't really tell you what + // errno values to expect when command is F_GETPATH... + else => |err| return unexpectedErrno(err), + } + const len = mem.indexOfScalar(u8, out_buffer[0..], @as(u8, 0)) orelse MAX_PATH_BYTES; + return out_buffer[0..len]; + }, + else => { + var procfs_buf: ["/proc/self/fd/-2147483648".len:0]u8 = undefined; + const proc_path = std.fmt.bufPrint(procfs_buf[0..], "/proc/self/fd/{}\x00", .{fd}) catch unreachable; + + return readlinkZ(@ptrCast([*:0]const u8, proc_path.ptr), out_buffer); + }, + } +} + /// Same as `realpath` except `pathname` is null-terminated. pub fn realpathZ(pathname: [*:0]const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { if (builtin.os.tag == .windows) { @@ -3994,10 +4033,7 @@ pub fn realpathZ(pathname: [*:0]const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealP }; defer close(fd); - var procfs_buf: ["/proc/self/fd/-2147483648".len:0]u8 = undefined; - const proc_path = std.fmt.bufPrint(procfs_buf[0..], "/proc/self/fd/{}\x00", .{fd}) catch unreachable; - - return readlinkZ(@ptrCast([*:0]const u8, proc_path.ptr), out_buffer); + return fdPath(fd, out_buffer); } const result_path = std.c.realpath(pathname, out_buffer) orelse switch (std.c._errno().*) { EINVAL => unreachable, @@ -4029,17 +4065,52 @@ pub fn realpathW(pathname: [*:0]const u16, out_buffer: *[MAX_PATH_BYTES]u8) Real ); defer windows.CloseHandle(h_file); - var wide_buf: [windows.PATH_MAX_WIDE]u16 = undefined; - const wide_slice = try windows.GetFinalPathNameByHandleW(h_file, &wide_buf, wide_buf.len, windows.VOLUME_NAME_DOS); + return fdPath(h_file, out_buffer); +} + +/// Similar to `realpath`, however, returns the canonicalized absolute pathname of +/// a `pathname` relative to a file descriptor `fd`. +/// If `pathname` is an absolute path, ignores `fd` and reverts to calling +/// `realpath` on the `pathname` argument. +/// In Unix, if `fd` was obtained using `std.fs.cwd()` call, reverts to calling +/// `std.os.getcwd()` to obtain file descriptor's path. +pub fn realpathat(fd: fd_t, pathname: []const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { + if (builtin.os.tag == .wasi) { + @compileError("realpathat is unsupported in WASI"); + } + if (std.fs.path.isAbsolute(pathname)) { + return realpath(pathname, out_buffer); + } + + var buffer: [MAX_PATH_BYTES]u8 = undefined; + var fd_path: []const u8 = undefined; + if (@hasDecl(@This(), "AT_FDCWD")) { + if (fd == @field(@This(), "AT_FDCWD")) { + fd_path = getcwd(buffer[0..]) catch |err| { + return switch (err) { + GetCwdError.NameTooLong => error.NameTooLong, + GetCwdError.CurrentWorkingDirectoryUnlinked => error.FileNotFound, + else => |e| e, + }; + }; + } else { + fd_path = try fdPath(fd, &buffer); + } + } else { + fd_path = try fdPath(fd, &buffer); + } + + const total_len = fd_path.len + pathname.len + 1; // +1 to account for path separator + if (total_len >= MAX_PATH_BYTES) { + return error.NameTooLong; + } - // Windows returns \\?\ prepended to the path. - // We strip it to make this function consistent across platforms. - const prefix = [_]u16{ '\\', '\\', '?', '\\' }; - const start_index = if (mem.startsWith(u16, wide_slice, &prefix)) prefix.len else 0; + var unnormalized: [MAX_PATH_BYTES]u8 = undefined; + mem.copy(u8, unnormalized[0..], fd_path); + unnormalized[fd_path.len] = std.fs.path.sep; + mem.copy(u8, unnormalized[fd_path.len + 1 ..], pathname); - // Trust that Windows gives us valid UTF-16LE. - const end_index = std.unicode.utf16leToUtf8(out_buffer, wide_slice[start_index..]) catch unreachable; - return out_buffer[0..end_index]; + return realpath(unnormalized[0..total_len], out_buffer); } /// Spurious wakeups are possible and no precision of timing is guaranteed. diff --git a/lib/std/os/test.zig b/lib/std/os/test.zig index e508f5ae20f3..7266d0449af6 100644 --- a/lib/std/os/test.zig +++ b/lib/std/os/test.zig @@ -40,6 +40,30 @@ test "fstatat" { expectEqual(stat, statat); } +test "realpathat" { + if (builtin.os.tag == .wasi) return error.SkipZigTest; + + const cwd = fs.cwd(); + + var buf1: [fs.MAX_PATH_BYTES]u8 = undefined; + const cwd_path = try os.getcwd(&buf1); + + var buf2: [fs.MAX_PATH_BYTES]u8 = undefined; + const cwd_realpathat = try os.realpathat(cwd.fd, "", &buf2); + testing.expect(mem.eql(u8, cwd_path, cwd_realpathat)); + + // Now, open an actual Dir{"."} since on Unix `realpathat` behaves + // in a special way when `fd` equals `cwd.fd` + var dir = try cwd.openDir(".", .{}); + defer dir.close(); + + const cwd_realpathat2 = try os.realpathat(dir.fd, "", &buf2); + testing.expect(mem.eql(u8, cwd_path, cwd_realpathat2)); + + // Finally, try getting a path for something that doesn't exist + testing.expectError(error.FileNotFound, os.realpathat(dir.fd, "definitely_bogus_does_not_exist1234", &buf2)); +} + test "readlinkat" { // enable when `readlinkat` and `symlinkat` are implemented on Windows if (builtin.os.tag == .windows) return error.SkipZigTest; From c833cecc2500175ab31e5000bc8e07d72f43ca43 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 24 Jun 2020 23:39:38 +0200 Subject: [PATCH 02/14] Create file with sharing on on Windows --- lib/std/fs/test.zig | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig index 98c3cc7a0d11..7b7212e4588f 100644 --- a/lib/std/fs/test.zig +++ b/lib/std/fs/test.zig @@ -56,7 +56,10 @@ test "Dir.realpath" { var tmp_dir = tmpDir(.{}); defer tmp_dir.cleanup(); - var file = try tmp_dir.dir.createFile("test_file", .{}); + // We need to open with File.Lock.Shared as otherwise `std.os.realpath` will + // error out with sharing violation on Windows. + // TODO is this a bug on Windows? + var file = try tmp_dir.dir.createFile("test_file", .{ .lock = File.Lock.Shared }); defer file.close(); const abs_file_path = try tmp_dir.dir.realpath(testing.allocator, "test_file"); From a3857cb394afed60f02789dc68d23894f9d74ce9 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 25 Jun 2020 09:03:20 +0200 Subject: [PATCH 03/14] Close the handle after creating the file --- lib/std/fs/test.zig | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig index 7b7212e4588f..5f9551d3e477 100644 --- a/lib/std/fs/test.zig +++ b/lib/std/fs/test.zig @@ -56,11 +56,10 @@ test "Dir.realpath" { var tmp_dir = tmpDir(.{}); defer tmp_dir.cleanup(); - // We need to open with File.Lock.Shared as otherwise `std.os.realpath` will - // error out with sharing violation on Windows. - // TODO is this a bug on Windows? var file = try tmp_dir.dir.createFile("test_file", .{ .lock = File.Lock.Shared }); - defer file.close(); + // We need to close the file immediately as otherwise on Windows we'll end up + // with a sharing violation. + file.close(); const abs_file_path = try tmp_dir.dir.realpath(testing.allocator, "test_file"); defer testing.allocator.free(abs_file_path); From 48a767c3a514240bc35d85a968c95e808451463c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 26 Jun 2020 06:51:39 +0200 Subject: [PATCH 04/14] Use Dir.realpath in 'directories operations on file' testcase --- lib/std/fs/test.zig | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig index 5f9551d3e477..399b622023ac 100644 --- a/lib/std/fs/test.zig +++ b/lib/std/fs/test.zig @@ -119,12 +119,7 @@ test "directory operations on files" { testing.expectError(error.NotDir, tmp_dir.dir.deleteDir(test_file_name)); if (builtin.os.tag != .wasi) { - // TODO: use Dir's realpath function once that exists - const absolute_path = blk: { - const relative_path = try fs.path.join(testing.allocator, &[_][]const u8{ "zig-cache", "tmp", tmp_dir.sub_path[0..], test_file_name }); - defer testing.allocator.free(relative_path); - break :blk try fs.realpathAlloc(testing.allocator, relative_path); - }; + const absolute_path = try tmp_dir.dir.realpath(testing.allocator, test_file_name); defer testing.allocator.free(absolute_path); testing.expectError(error.PathAlreadyExists, fs.makeDirAbsolute(absolute_path)); From 71b3e043a2043b9d9dfc1ec5cfe6751b1558de50 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 27 Jun 2020 12:44:31 +0200 Subject: [PATCH 05/14] Add null-terminated and WTF16 versions This commit adds null-terminated and WTF16 versions of `std.fs.Dir.realpath` and `std.os.realpathat`. Alloc version has been renamed to `std.fs.Dir.realpathAlloc` to be compatible with the naming convention used across Zig's `libstd`. --- lib/std/fs.zig | 61 ++++++++++++++++++++++++++++++++++-- lib/std/fs/test.zig | 32 ++++++++++++++----- lib/std/os.zig | 76 +++++++++++++++++++++++++++++++++++---------- 3 files changed, 143 insertions(+), 26 deletions(-) diff --git a/lib/std/fs.zig b/lib/std/fs.zig index 02f245d8107a..4a4a39c8b48d 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -934,8 +934,65 @@ pub const Dir = struct { /// This function returns the canonicalized absolute pathname of `pathname` /// relative to this `Dir`. If `pathname` is absolute, ignores this `Dir` handle /// and returns the canonicalized absolute pathname of `pathname` argument. - /// Caller must free the returned memory. - pub fn realpath(self: Dir, allocator: *Allocator, pathname: []const u8) ![]u8 { + /// See also `realpath`. + pub fn realpath(self: Dir, pathname: []const u8, out_buffer: []u8) ![]u8 { + if (builtin.os.tag == .wasi) { + @compileError("Dir.realpath is unsupported in WASI"); + } + if (builtin.os.tag == .windows) { + const pathname_w = try os.windows.sliceToPrefixedFileW(pathname); + return self.realpathW(pathname_w.span().ptr, out_buffer); + } + const pathname_c = try os.toPosixPath(pathname); + return self.realpathZ(&pathname_c, out_buffer); + } + + /// Same as `Dir.realpath` except `pathname` is null-terminated. + /// See also `Dir.realpath`, `realpathZ`. + pub fn realpathZ(self: Dir, pathname: [*:0]const u8, out_buffer: []u8) ![]u8 { + // Use of MAX_PATH_BYTES here is valid as the realpath function does not + // have a variant that takes an arbitrary-size buffer. + // TODO(#4812): Consider reimplementing realpath or using the POSIX.1-2008 + // NULL out parameter (GNU's canonicalize_file_name) to handle overelong + // paths. musl supports passing NULL but restricts the output to PATH_MAX + // anyway. + var buf: [MAX_PATH_BYTES]u8 = undefined; + const out_path = try os.realpathatZ(self.fd, pathname, &buf); + + if (out_path.len > out_buffer.len) { + return error.NameTooLong; + } + + mem.copy(u8, out_buffer, out_path); + + return out_buffer[0..out_path.len]; + } + + /// Windows-only. Same as `Dir.realpath` except `pathname` is null-terminated, + /// WTF16 encoded. + /// See also `Dir.realpath`, `realpathW`. + pub fn realpathW(self: Dir, pathname: [*:0]const u16, out_buffer: []u8) ![]u8 { + // Use of MAX_PATH_BYTES here is valid as the realpath function does not + // have a variant that takes an arbitrary-size buffer. + // TODO(#4812): Consider reimplementing realpath or using the POSIX.1-2008 + // NULL out parameter (GNU's canonicalize_file_name) to handle overelong + // paths. musl supports passing NULL but restricts the output to PATH_MAX + // anyway. + var buf: [MAX_PATH_BYTES]u8 = undefined; + const out_path = try os.realpathatW(self.fd, pathname, &buf); + + if (out_path.len > out_buffer.len) { + return error.NameTooLong; + } + + mem.copy(u8, out_buffer, out_path); + + return out_buffer[0..out_path.len]; + } + + /// Same as `Dir.realpath` except caller must free the returned memory. + /// See also `Dir.realpath`, `realpathAlloc`. + pub fn realpathAlloc(self: Dir, allocator: *Allocator, pathname: []const u8) ![]u8 { if (builtin.os.tag == .wasi) { @compileError("Dir.realpath is unsupported in WASI"); } diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig index 399b622023ac..7d8885675097 100644 --- a/lib/std/fs/test.zig +++ b/lib/std/fs/test.zig @@ -61,15 +61,31 @@ test "Dir.realpath" { // with a sharing violation. file.close(); - const abs_file_path = try tmp_dir.dir.realpath(testing.allocator, "test_file"); - defer testing.allocator.free(abs_file_path); + // First, test non-alloc version + { + var buf1: [fs.MAX_PATH_BYTES]u8 = undefined; + const abs_file_path = try tmp_dir.dir.realpath("test_file", buf1[0..]); + + var buf2: [fs.MAX_PATH_BYTES]u8 = undefined; + const dir_path = try tmp_dir.dir.realpath("", buf2[0..]); + const expected_path = try fs.path.join(testing.allocator, &[_][]const u8{ dir_path, "test_file" }); + defer testing.allocator.free(expected_path); - const dir_path = try tmp_dir.dir.realpath(testing.allocator, ""); - defer testing.allocator.free(dir_path); - const expected_path = try fs.path.join(testing.allocator, &[_][]const u8{ dir_path, "test_file" }); - defer testing.allocator.free(expected_path); + testing.expect(mem.eql(u8, abs_file_path, expected_path)); + } - testing.expect(mem.eql(u8, abs_file_path, expected_path)); + // Next, test alloc version + { + const abs_file_path = try tmp_dir.dir.realpathAlloc(testing.allocator, "test_file"); + defer testing.allocator.free(abs_file_path); + + const dir_path = try tmp_dir.dir.realpathAlloc(testing.allocator, ""); + defer testing.allocator.free(dir_path); + const expected_path = try fs.path.join(testing.allocator, &[_][]const u8{ dir_path, "test_file" }); + defer testing.allocator.free(expected_path); + + testing.expect(mem.eql(u8, abs_file_path, expected_path)); + } } test "readAllAlloc" { @@ -119,7 +135,7 @@ test "directory operations on files" { testing.expectError(error.NotDir, tmp_dir.dir.deleteDir(test_file_name)); if (builtin.os.tag != .wasi) { - const absolute_path = try tmp_dir.dir.realpath(testing.allocator, test_file_name); + const absolute_path = try tmp_dir.dir.realpathAlloc(testing.allocator, test_file_name); defer testing.allocator.free(absolute_path); testing.expectError(error.PathAlreadyExists, fs.makeDirAbsolute(absolute_path)); diff --git a/lib/std/os.zig b/lib/std/os.zig index dae97e5cdf9b..5617a90f46d0 100644 --- a/lib/std/os.zig +++ b/lib/std/os.zig @@ -4074,33 +4074,75 @@ pub fn realpathW(pathname: [*:0]const u16, out_buffer: *[MAX_PATH_BYTES]u8) Real /// `realpath` on the `pathname` argument. /// In Unix, if `fd` was obtained using `std.fs.cwd()` call, reverts to calling /// `std.os.getcwd()` to obtain file descriptor's path. +/// See also `realpath`, `realpathatZ`, and `realpathatW`. pub fn realpathat(fd: fd_t, pathname: []const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { if (builtin.os.tag == .wasi) { @compileError("realpathat is unsupported in WASI"); } - if (std.fs.path.isAbsolute(pathname)) { - return realpath(pathname, out_buffer); + if (builtin.os.tag == .windows) { + const pathname_w = try windows.sliceToPrefixedFileW(pathname); + return realpathatW(fd, pathname_w.span().ptr, out_buffer); + } + const pathname_c = try toPosixPath(pathname); + return realpathatZ(fd, &pathname_c, out_buffer); +} + +/// Same as `realpathat` except `pathname` is null-terminated. +/// If `fd` was obtained using `std.fs.cwd()` call, reverts to calling +/// `std.os.getcwd()` to obtain file descriptor's path. +/// See also `realpathat`. +pub fn realpathatZ(fd: fd_t, pathname: [*:0]const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { + if (builtin.os.tag == .windows) { + const pathname_w = try windows.cStrToPrefixedFileW(pathname); + return realpathatW(fd, pathname_w.span().ptr, out_buffer); + } + if (std.fs.path.isAbsolutePosixZ(pathname)) { + return realpathZ(pathname, out_buffer); } var buffer: [MAX_PATH_BYTES]u8 = undefined; var fd_path: []const u8 = undefined; - if (@hasDecl(@This(), "AT_FDCWD")) { - if (fd == @field(@This(), "AT_FDCWD")) { - fd_path = getcwd(buffer[0..]) catch |err| { - return switch (err) { - GetCwdError.NameTooLong => error.NameTooLong, - GetCwdError.CurrentWorkingDirectoryUnlinked => error.FileNotFound, - else => |e| e, - }; + if (fd == @field(@This(), "AT_FDCWD")) { + fd_path = getcwd(buffer[0..]) catch |err| { + return switch (err) { + GetCwdError.NameTooLong => error.NameTooLong, + GetCwdError.CurrentWorkingDirectoryUnlinked => error.FileNotFound, + else => |e| e, }; - } else { - fd_path = try fdPath(fd, &buffer); - } + }; } else { fd_path = try fdPath(fd, &buffer); } - const total_len = fd_path.len + pathname.len + 1; // +1 to account for path separator + const total_len = fd_path.len + mem.lenZ(pathname) + 1; // +1 to account for path separator + if (total_len >= MAX_PATH_BYTES) { + return error.NameTooLong; + } + + var unnormalized: [MAX_PATH_BYTES]u8 = undefined; + mem.copy(u8, unnormalized[0..], fd_path); + unnormalized[fd_path.len] = std.fs.path.sep; + mem.copy(u8, unnormalized[fd_path.len + 1 ..], mem.spanZ(pathname)); + + const unnormalized_c = try toPosixPath(unnormalized[0..total_len]); + + return realpathZ(&unnormalized_c, out_buffer); +} + +/// Windows-only. Same as `realpathat` except `pathname` is null-terminated, WTF16 encoded. +/// See also `realpathat`. +pub fn realpathatW(fd: fd_t, pathname: [*:0]const u16, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { + if (std.fs.path.isAbsoluteWindowsW(pathname)) { + return realpathW(pathname, out_buffer); + } + + var buffer: [MAX_PATH_BYTES]u8 = undefined; + const fd_path = try fdPath(fd, &buffer); + + // Convert UTF16LE to UTF8 + var pathname_u8: [MAX_PATH_BYTES]u8 = undefined; + const end_index = std.unicode.utf16leToUtf8(pathname_u8[0..], wide_slice[start_index..]) catch unreachable; + const total_len = fd_path.len + end_index + 1; // +1 to account for path separator if (total_len >= MAX_PATH_BYTES) { return error.NameTooLong; } @@ -4108,9 +4150,11 @@ pub fn realpathat(fd: fd_t, pathname: []const u8, out_buffer: *[MAX_PATH_BYTES]u var unnormalized: [MAX_PATH_BYTES]u8 = undefined; mem.copy(u8, unnormalized[0..], fd_path); unnormalized[fd_path.len] = std.fs.path.sep; - mem.copy(u8, unnormalized[fd_path.len + 1 ..], pathname); + mem.copy(u8, unnormalized[fd_path.len + 1 ..], pathname_u8[0..end_index]); + + const unnormalized_w = try windows.sliceToPrefixedFileW(unnormalized[0..total_len]); - return realpath(unnormalized[0..total_len], out_buffer); + return realpathW(unnormalized_w.span().ptr, out_buffer); } /// Spurious wakeups are possible and no precision of timing is guaranteed. From 4f2f19aba6f95d7e6cdda72f32eb1052c758c06d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 27 Jun 2020 13:55:49 +0200 Subject: [PATCH 06/14] Fix compilation error in realpathatW --- lib/std/os.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/os.zig b/lib/std/os.zig index 5617a90f46d0..b44a1b2987fb 100644 --- a/lib/std/os.zig +++ b/lib/std/os.zig @@ -4141,7 +4141,7 @@ pub fn realpathatW(fd: fd_t, pathname: [*:0]const u16, out_buffer: *[MAX_PATH_BY // Convert UTF16LE to UTF8 var pathname_u8: [MAX_PATH_BYTES]u8 = undefined; - const end_index = std.unicode.utf16leToUtf8(pathname_u8[0..], wide_slice[start_index..]) catch unreachable; + const end_index = std.unicode.utf16leToUtf8(pathname_u8[0..], mem.spanZ(pathname)) catch unreachable; const total_len = fd_path.len + end_index + 1; // +1 to account for path separator if (total_len >= MAX_PATH_BYTES) { return error.NameTooLong; From 5b53746abec03f65f09fe24291e16411e807a2e5 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 2 Jul 2020 21:31:03 +0200 Subject: [PATCH 07/14] Update lib/std/fs.zig Co-authored-by: Joachim Schmidt --- lib/std/fs.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/fs.zig b/lib/std/fs.zig index 4a4a39c8b48d..37b57d27128d 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -1003,7 +1003,7 @@ pub const Dir = struct { // paths. musl supports passing NULL but restricts the output to PATH_MAX // anyway. var buf: [MAX_PATH_BYTES]u8 = undefined; - return mem.dupe(allocator, u8, try os.realpathat(self.fd, pathname, &buf)); + return allocator.dupe(u8, try os.realpathat(self.fd, pathname, &buf)); } /// Changes the current working directory to the open directory handle. From 198810836fec642fda28c6148fa2ac9ef793275f Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 2 Jul 2020 20:09:43 +0200 Subject: [PATCH 08/14] Use Dir.realpathAlloc instead of fs.path.resolve On non-capability-oriented hosts (i.e., excluding WASI), use `Dir.realpathAlloc` to resolve cached path. Previously, we relied on `std.fs.path.resolve` to do the path resolution which could yield erroneous results since `std.fs.path.resolve` would not actually issue any syscalls, and hence, would not correctly resolve any symlinks. --- lib/std/cache_hash.zig | 87 +++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 53 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 257f407826a9..96ef648673f8 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -8,6 +8,7 @@ const testing = std.testing; const mem = std.mem; const fmt = std.fmt; const Allocator = std.mem.Allocator; +const tmpDir = testing.tmpDir; const base64_encoder = fs.base64_encoder; const base64_decoder = fs.base64_decoder; @@ -41,6 +42,7 @@ pub const File = struct { pub const CacheHash = struct { allocator: *Allocator, blake3: Blake3, + work_dir: fs.Dir, manifest_dir: fs.Dir, manifest_file: ?fs.File, manifest_dirty: bool, @@ -48,11 +50,12 @@ pub const CacheHash = struct { b64_digest: [BASE64_DIGEST_LEN]u8, /// Be sure to call release after successful initialization. - pub fn init(allocator: *Allocator, dir: fs.Dir, manifest_dir_path: []const u8) !CacheHash { + pub fn init(allocator: *Allocator, work_dir: fs.Dir, manifest_dir_path: []const u8) !CacheHash { return CacheHash{ .allocator = allocator, .blake3 = Blake3.init(), - .manifest_dir = try dir.makeOpenPath(manifest_dir_path, .{}), + .work_dir = work_dir, + .manifest_dir = try work_dir.makeOpenPath(manifest_dir_path, .{}), .manifest_file = null, .manifest_dirty = false, .files = ArrayList(File).init(allocator), @@ -100,7 +103,7 @@ pub const CacheHash = struct { assert(self.manifest_file == null); try self.files.ensureCapacity(self.files.items.len + 1); - const resolved_path = try fs.path.resolve(self.allocator, &[_][]const u8{file_path}); + const resolved_path = try self.work_dir.realpathAlloc(self.allocator, file_path); const idx = self.files.items.len; self.files.addOneAssumeCapacity().* = .{ @@ -210,7 +213,7 @@ pub const CacheHash = struct { cache_hash_file.path = try self.allocator.dupe(u8, file_path); } - const this_file = fs.cwd().openFile(cache_hash_file.path.?, .{ .read = true }) catch { + const this_file = self.work_dir.openFile(cache_hash_file.path.?, .{ .read = true }) catch { return error.CacheUnavailable; }; defer this_file.close(); @@ -276,7 +279,7 @@ pub const CacheHash = struct { } fn populateFileHash(self: *CacheHash, ch_file: *File) !void { - const file = try fs.cwd().openFile(ch_file.path.?, .{}); + const file = try self.work_dir.openFile(ch_file.path.?, .{}); defer file.close(); ch_file.stat = try file.stat(); @@ -322,7 +325,7 @@ pub const CacheHash = struct { pub fn addFilePostFetch(self: *CacheHash, file_path: []const u8, max_file_size: usize) ![]u8 { assert(self.manifest_file != null); - const resolved_path = try fs.path.resolve(self.allocator, &[_][]const u8{file_path}); + const resolved_path = try self.work_dir.realpathAlloc(self.allocator, file_path); errdefer self.allocator.free(resolved_path); const new_ch_file = try self.files.addOne(); @@ -347,7 +350,7 @@ pub const CacheHash = struct { pub fn addFilePost(self: *CacheHash, file_path: []const u8) !void { assert(self.manifest_file != null); - const resolved_path = try fs.path.resolve(self.allocator, &[_][]const u8{file_path}); + const resolved_path = try self.work_dir.realpathAlloc(self.allocator, file_path); errdefer self.allocator.free(resolved_path); const new_ch_file = try self.files.addOne(); @@ -470,16 +473,13 @@ fn isProblematicTimestamp(fs_clock: i128) bool { } test "cache file and then recall it" { - if (std.Target.current.os.tag == .wasi) { - // https://github.com/ziglang/zig/issues/5437 - return error.SkipZigTest; - } - const cwd = fs.cwd(); + var tmp = tmpDir(.{}); + defer tmp.cleanup(); const temp_file = "test.txt"; const temp_manifest_dir = "temp_manifest_dir"; - try cwd.writeFile(temp_file, "Hello, world!\n"); + try tmp.dir.writeFile(temp_file, "Hello, world!\n"); while (isProblematicTimestamp(std.time.nanoTimestamp())) { std.time.sleep(1); @@ -489,7 +489,7 @@ test "cache file and then recall it" { var digest2: [BASE64_DIGEST_LEN]u8 = undefined; { - var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + var ch = try CacheHash.init(testing.allocator, tmp.dir, temp_manifest_dir); defer ch.release(); ch.add(true); @@ -503,7 +503,7 @@ test "cache file and then recall it" { digest1 = ch.final(); } { - var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + var ch = try CacheHash.init(testing.allocator, tmp.dir, temp_manifest_dir); defer ch.release(); ch.add(true); @@ -516,9 +516,6 @@ test "cache file and then recall it" { } testing.expectEqual(digest1, digest2); - - try cwd.deleteTree(temp_manifest_dir); - try cwd.deleteFile(temp_file); } test "give problematic timestamp" { @@ -534,18 +531,15 @@ test "give nonproblematic timestamp" { } test "check that changing a file makes cache fail" { - if (std.Target.current.os.tag == .wasi) { - // https://github.com/ziglang/zig/issues/5437 - return error.SkipZigTest; - } - const cwd = fs.cwd(); + var tmp = tmpDir(.{}); + defer tmp.cleanup(); const temp_file = "cache_hash_change_file_test.txt"; const temp_manifest_dir = "cache_hash_change_file_manifest_dir"; const original_temp_file_contents = "Hello, world!\n"; const updated_temp_file_contents = "Hello, world; but updated!\n"; - try cwd.writeFile(temp_file, original_temp_file_contents); + try tmp.dir.writeFile(temp_file, original_temp_file_contents); while (isProblematicTimestamp(std.time.nanoTimestamp())) { std.time.sleep(1); @@ -555,7 +549,7 @@ test "check that changing a file makes cache fail" { var digest2: [BASE64_DIGEST_LEN]u8 = undefined; { - var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + var ch = try CacheHash.init(testing.allocator, tmp.dir, temp_manifest_dir); defer ch.release(); ch.add("1234"); @@ -569,14 +563,14 @@ test "check that changing a file makes cache fail" { digest1 = ch.final(); } - try cwd.writeFile(temp_file, updated_temp_file_contents); + try tmp.dir.writeFile(temp_file, updated_temp_file_contents); while (isProblematicTimestamp(std.time.nanoTimestamp())) { std.time.sleep(1); } { - var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + var ch = try CacheHash.init(testing.allocator, tmp.dir, temp_manifest_dir); defer ch.release(); ch.add("1234"); @@ -592,25 +586,19 @@ test "check that changing a file makes cache fail" { } testing.expect(!mem.eql(u8, digest1[0..], digest2[0..])); - - try cwd.deleteTree(temp_manifest_dir); - try cwd.deleteFile(temp_file); } test "no file inputs" { - if (std.Target.current.os.tag == .wasi) { - // https://github.com/ziglang/zig/issues/5437 - return error.SkipZigTest; - } - const cwd = fs.cwd(); + var tmp = tmpDir(.{}); + defer tmp.cleanup(); + const temp_manifest_dir = "no_file_inputs_manifest_dir"; - defer cwd.deleteTree(temp_manifest_dir) catch unreachable; var digest1: [BASE64_DIGEST_LEN]u8 = undefined; var digest2: [BASE64_DIGEST_LEN]u8 = undefined; { - var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + var ch = try CacheHash.init(testing.allocator, tmp.dir, temp_manifest_dir); defer ch.release(); ch.add("1234"); @@ -621,7 +609,7 @@ test "no file inputs" { digest1 = ch.final(); } { - var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + var ch = try CacheHash.init(testing.allocator, tmp.dir, temp_manifest_dir); defer ch.release(); ch.add("1234"); @@ -633,18 +621,15 @@ test "no file inputs" { } test "CacheHashes with files added after initial hash work" { - if (std.Target.current.os.tag == .wasi) { - // https://github.com/ziglang/zig/issues/5437 - return error.SkipZigTest; - } - const cwd = fs.cwd(); + var tmp = tmpDir(.{}); + defer tmp.cleanup(); const temp_file1 = "cache_hash_post_file_test1.txt"; const temp_file2 = "cache_hash_post_file_test2.txt"; const temp_manifest_dir = "cache_hash_post_file_manifest_dir"; - try cwd.writeFile(temp_file1, "Hello, world!\n"); - try cwd.writeFile(temp_file2, "Hello world the second!\n"); + try tmp.dir.writeFile(temp_file1, "Hello, world!\n"); + try tmp.dir.writeFile(temp_file2, "Hello world the second!\n"); while (isProblematicTimestamp(std.time.nanoTimestamp())) { std.time.sleep(1); @@ -655,7 +640,7 @@ test "CacheHashes with files added after initial hash work" { var digest3: [BASE64_DIGEST_LEN]u8 = undefined; { - var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + var ch = try CacheHash.init(testing.allocator, tmp.dir, temp_manifest_dir); defer ch.release(); ch.add("1234"); @@ -669,7 +654,7 @@ test "CacheHashes with files added after initial hash work" { digest1 = ch.final(); } { - var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + var ch = try CacheHash.init(testing.allocator, tmp.dir, temp_manifest_dir); defer ch.release(); ch.add("1234"); @@ -680,14 +665,14 @@ test "CacheHashes with files added after initial hash work" { testing.expect(mem.eql(u8, &digest1, &digest2)); // Modify the file added after initial hash - try cwd.writeFile(temp_file2, "Hello world the second, updated\n"); + try tmp.dir.writeFile(temp_file2, "Hello world the second, updated\n"); while (isProblematicTimestamp(std.time.nanoTimestamp())) { std.time.sleep(1); } { - var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + var ch = try CacheHash.init(testing.allocator, tmp.dir, temp_manifest_dir); defer ch.release(); ch.add("1234"); @@ -702,8 +687,4 @@ test "CacheHashes with files added after initial hash work" { } testing.expect(!mem.eql(u8, &digest1, &digest3)); - - try cwd.deleteTree(temp_manifest_dir); - try cwd.deleteFile(temp_file1); - try cwd.deleteFile(temp_file2); } From 76d3b83914316c47316a7f31ae304ffc290675c6 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 2 Jul 2020 21:54:17 +0200 Subject: [PATCH 09/14] Implement Dir.realpathWasi and os.realpathatWasi This commit adds `Dir.realpathWasi`, and `os.realpathatWasi`. The latter resolves the path however upto only the `Dir` it is relative to ensuring the sandboxing rules are observed. In fact, it proceeds in two steps: 1. Try opening the path as-is, and check if that works OK. Then, this will imply the path was valid under WASI's capability-oriented security model, and no path traversal attack was attempted. 2. Since we've verified we're OK wrt sandboxing rules, we can analyze the path component-by-component working out the canonicalized path in the process. --- lib/std/fs.zig | 20 ++++++++++++++++---- lib/std/os.zig | 47 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 5 deletions(-) diff --git a/lib/std/fs.zig b/lib/std/fs.zig index 37b57d27128d..bc84f2f6d5f6 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -937,7 +937,7 @@ pub const Dir = struct { /// See also `realpath`. pub fn realpath(self: Dir, pathname: []const u8, out_buffer: []u8) ![]u8 { if (builtin.os.tag == .wasi) { - @compileError("Dir.realpath is unsupported in WASI"); + return self.realpathWasi(pathname, out_buffer); } if (builtin.os.tag == .windows) { const pathname_w = try os.windows.sliceToPrefixedFileW(pathname); @@ -947,6 +947,21 @@ pub const Dir = struct { return self.realpathZ(&pathname_c, out_buffer); } + pub fn realpathWasi(self: Dir, pathname: []const u8, out_buffer: []u8) ![]u8 { + // Use of MAX_PATH_BYTES here is valid as the realpath function does not + // have a variant that takes an arbitrary-size buffer. + var buf: [MAX_PATH_BYTES]u8 = undefined; + const out_path = try os.realpathatWasi(self.fd, pathname, &buf); + + if (out_path.len > out_buffer.len) { + return error.NameTooLong; + } + + mem.copy(u8, out_buffer, out_path); + + return out_buffer[0..out_path.len]; + } + /// Same as `Dir.realpath` except `pathname` is null-terminated. /// See also `Dir.realpath`, `realpathZ`. pub fn realpathZ(self: Dir, pathname: [*:0]const u8, out_buffer: []u8) ![]u8 { @@ -993,9 +1008,6 @@ pub const Dir = struct { /// Same as `Dir.realpath` except caller must free the returned memory. /// See also `Dir.realpath`, `realpathAlloc`. pub fn realpathAlloc(self: Dir, allocator: *Allocator, pathname: []const u8) ![]u8 { - if (builtin.os.tag == .wasi) { - @compileError("Dir.realpath is unsupported in WASI"); - } // Use of MAX_PATH_BYTES here is valid as the realpath function does not // have a variant that takes an arbitrary-size buffer. // TODO(#4812): Consider reimplementing realpath or using the POSIX.1-2008 diff --git a/lib/std/os.zig b/lib/std/os.zig index b44a1b2987fb..2256957ebe87 100644 --- a/lib/std/os.zig +++ b/lib/std/os.zig @@ -4077,7 +4077,7 @@ pub fn realpathW(pathname: [*:0]const u16, out_buffer: *[MAX_PATH_BYTES]u8) Real /// See also `realpath`, `realpathatZ`, and `realpathatW`. pub fn realpathat(fd: fd_t, pathname: []const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { if (builtin.os.tag == .wasi) { - @compileError("realpathat is unsupported in WASI"); + return realpathatWasi(fd, pathname, out_buffer); } if (builtin.os.tag == .windows) { const pathname_w = try windows.sliceToPrefixedFileW(pathname); @@ -4087,6 +4087,51 @@ pub fn realpathat(fd: fd_t, pathname: []const u8, out_buffer: *[MAX_PATH_BYTES]u return realpathatZ(fd, &pathname_c, out_buffer); } +pub fn realpathatWasi(fd: fd_t, pathname: []const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { + // We'll proceed in two steps. + // First, we'll try opening the path as-is, and check if that works OK. + // Then, this will imply the path was valid under WASI's capability-oriented + // security model, and no path traversal attack was attempted. + // Second, since we've verified we're OK wrt sandboxing rules, we can + // analyze the path component-by-component working out the canonicalized path + // in the process. + _ = openatWasi(fd, pathname, 0x0, 0x0, 0x0, 0x0) catch |err| switch (err) { + error.FileLocksNotSupported => unreachable, + else => |e| return e, + }; + + var result_index: usize = 0; + var path_it = mem.tokenize(pathname, "/"); + while (path_it.next()) |component| { + if (mem.eql(u8, component, ".")) { + continue; + } else if (mem.eql(u8, component, "..")) { + while (true) { + if (result_index == 0) + break; + result_index -= 1; + if (out_buffer[result_index] == '/') + break; + } + } else { + if (result_index > 0) { + out_buffer[result_index] = '/'; + result_index += 1; + } + // TODO handle symlinks + mem.copy(u8, out_buffer[result_index..], component); + result_index += component.len; + } + } + + if (result_index == 0) { + out_buffer[result_index] = '.'; + result_index += 1; + } + + return out_buffer[0..result_index]; +} + /// Same as `realpathat` except `pathname` is null-terminated. /// If `fd` was obtained using `std.fs.cwd()` call, reverts to calling /// `std.os.getcwd()` to obtain file descriptor's path. From 381bba246718de0648c0183ea4e42ef207ae40ca Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 2 Jul 2020 23:29:21 +0200 Subject: [PATCH 10/14] Handle symlinks in os.realpathatWasi and add docs --- lib/std/fs.zig | 50 ++++++++++++-------- lib/std/os.zig | 124 ++++++++++++++++++++++++++++--------------------- 2 files changed, 102 insertions(+), 72 deletions(-) diff --git a/lib/std/fs.zig b/lib/std/fs.zig index bc84f2f6d5f6..c0bb804ffb47 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -931,10 +931,16 @@ pub const Dir = struct { return self.openDir(sub_path, open_dir_options); } - /// This function returns the canonicalized absolute pathname of `pathname` - /// relative to this `Dir`. If `pathname` is absolute, ignores this `Dir` handle - /// and returns the canonicalized absolute pathname of `pathname` argument. - /// See also `realpath`. + /// This function behaves differently on different hosts. + /// In WASI, returns the canonicalized relative pathname of `pathname` relative + /// to this `Dir`. If `pathname` is absolute, or an attempt is made at + /// escaping beyond this `Dir`, return `error.AccessDenied`. + /// On other hosts, this function returns the canonicalized absolute pathname of + /// `pathname` relative to this `Dir`. If `pathname` is absolute, ignores this + /// `Dir` handle and returns the canonicalized absolute pathname of `pathname` + /// argument. + /// See also `Dir.realpathZ`, `Dir.realpathW`, `Dir.realpathWasi`, + /// and `Dir.realpathAlloc`. pub fn realpath(self: Dir, pathname: []const u8, out_buffer: []u8) ![]u8 { if (builtin.os.tag == .wasi) { return self.realpathWasi(pathname, out_buffer); @@ -947,21 +953,6 @@ pub const Dir = struct { return self.realpathZ(&pathname_c, out_buffer); } - pub fn realpathWasi(self: Dir, pathname: []const u8, out_buffer: []u8) ![]u8 { - // Use of MAX_PATH_BYTES here is valid as the realpath function does not - // have a variant that takes an arbitrary-size buffer. - var buf: [MAX_PATH_BYTES]u8 = undefined; - const out_path = try os.realpathatWasi(self.fd, pathname, &buf); - - if (out_path.len > out_buffer.len) { - return error.NameTooLong; - } - - mem.copy(u8, out_buffer, out_path); - - return out_buffer[0..out_path.len]; - } - /// Same as `Dir.realpath` except `pathname` is null-terminated. /// See also `Dir.realpath`, `realpathZ`. pub fn realpathZ(self: Dir, pathname: [*:0]const u8, out_buffer: []u8) ![]u8 { @@ -1005,8 +996,27 @@ pub const Dir = struct { return out_buffer[0..out_path.len]; } + /// WASI-only. Returns the canonicalized relative pathname of `pathname` relative + /// to this `Dir`. If `pathname` is absolute, or an attempt is made at + /// escaping beyond this `Dir`, return `error.AccessDenied`. + /// See also `Dir.realpath`, and `Dir.realpathAlloc`. + pub fn realpathWasi(self: Dir, pathname: []const u8, out_buffer: []u8) ![]u8 { + // Use of MAX_PATH_BYTES here is valid as the realpath function does not + // have a variant that takes an arbitrary-size buffer. + var buf: [MAX_PATH_BYTES]u8 = undefined; + const out_path = try os.realpathatWasi(self.fd, pathname, &buf); + + if (out_path.len > out_buffer.len) { + return error.NameTooLong; + } + + mem.copy(u8, out_buffer, out_path); + + return out_buffer[0..out_path.len]; + } + /// Same as `Dir.realpath` except caller must free the returned memory. - /// See also `Dir.realpath`, `realpathAlloc`. + /// See also `Dir.realpath`. pub fn realpathAlloc(self: Dir, allocator: *Allocator, pathname: []const u8) ![]u8 { // Use of MAX_PATH_BYTES here is valid as the realpath function does not // have a variant that takes an arbitrary-size buffer. diff --git a/lib/std/os.zig b/lib/std/os.zig index 2256957ebe87..c7349de84d5c 100644 --- a/lib/std/os.zig +++ b/lib/std/os.zig @@ -4068,13 +4068,17 @@ pub fn realpathW(pathname: [*:0]const u16, out_buffer: *[MAX_PATH_BYTES]u8) Real return fdPath(h_file, out_buffer); } -/// Similar to `realpath`, however, returns the canonicalized absolute pathname of -/// a `pathname` relative to a file descriptor `fd`. -/// If `pathname` is an absolute path, ignores `fd` and reverts to calling -/// `realpath` on the `pathname` argument. -/// In Unix, if `fd` was obtained using `std.fs.cwd()` call, reverts to calling -/// `std.os.getcwd()` to obtain file descriptor's path. -/// See also `realpath`, `realpathatZ`, and `realpathatW`. +/// This function will depend differently on different hosts. +/// In WASI, it calls `realpathatWasi` which observes WASI capability-oriented +/// security model and returns `error.AccessDenied` if the `pathname` is absolute +/// or an attempt is made at escaping beyond the input `fd` file descriptor. +/// On all other hosts, this function is similar to `realpath`, however, returns +/// the canonicalized absolute pathname of a `pathname` relative to a file descriptor +/// `fd`. If `pathname` is an absolute path, ignores `fd` and reverts to calling +/// `realpath` on the `pathname` argument. In particular, on Unix, if `fd` was +/// obtained using `std.fs.cwd()` call, reverts to calling `std.os.getcwd()` to +/// obtain file descriptor's path. +/// See also `realpath`, `realpathatZ`, `realpathatW`, and `realpathatWasi`. pub fn realpathat(fd: fd_t, pathname: []const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { if (builtin.os.tag == .wasi) { return realpathatWasi(fd, pathname, out_buffer); @@ -4087,51 +4091,6 @@ pub fn realpathat(fd: fd_t, pathname: []const u8, out_buffer: *[MAX_PATH_BYTES]u return realpathatZ(fd, &pathname_c, out_buffer); } -pub fn realpathatWasi(fd: fd_t, pathname: []const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { - // We'll proceed in two steps. - // First, we'll try opening the path as-is, and check if that works OK. - // Then, this will imply the path was valid under WASI's capability-oriented - // security model, and no path traversal attack was attempted. - // Second, since we've verified we're OK wrt sandboxing rules, we can - // analyze the path component-by-component working out the canonicalized path - // in the process. - _ = openatWasi(fd, pathname, 0x0, 0x0, 0x0, 0x0) catch |err| switch (err) { - error.FileLocksNotSupported => unreachable, - else => |e| return e, - }; - - var result_index: usize = 0; - var path_it = mem.tokenize(pathname, "/"); - while (path_it.next()) |component| { - if (mem.eql(u8, component, ".")) { - continue; - } else if (mem.eql(u8, component, "..")) { - while (true) { - if (result_index == 0) - break; - result_index -= 1; - if (out_buffer[result_index] == '/') - break; - } - } else { - if (result_index > 0) { - out_buffer[result_index] = '/'; - result_index += 1; - } - // TODO handle symlinks - mem.copy(u8, out_buffer[result_index..], component); - result_index += component.len; - } - } - - if (result_index == 0) { - out_buffer[result_index] = '.'; - result_index += 1; - } - - return out_buffer[0..result_index]; -} - /// Same as `realpathat` except `pathname` is null-terminated. /// If `fd` was obtained using `std.fs.cwd()` call, reverts to calling /// `std.os.getcwd()` to obtain file descriptor's path. @@ -4202,6 +4161,67 @@ pub fn realpathatW(fd: fd_t, pathname: [*:0]const u16, out_buffer: *[MAX_PATH_BY return realpathW(unnormalized_w.span().ptr, out_buffer); } +/// WASI-only. Similar to `realpathat` except it returns the canonicalized relative pathname +/// of a `pathname` relative to a file descriptor `fd`. +/// Since this is targeting WASI, it observes WASI capability-oriented security model and +/// returns `error.AccessDenied` if the `pathname` is absolute or an attempt is made at +/// escaping beyond the input `fd` file descriptor. +pub fn realpathatWasi(fd: fd_t, pathname: []const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { + // We'll proceed in two steps. + // First, we'll try opening the path as-is, and check if that works OK. + // Then, this will imply the path was valid under WASI's capability-oriented + // security model, and no path traversal attack was attempted. + // Second, since we've verified we're OK wrt sandboxing rules, we can + // analyze the path component-by-component working out the canonicalized path + // in the process. + _ = openatWasi(fd, pathname, 0x0, 0x0, 0x0, 0x0) catch |err| switch (err) { + error.FileLocksNotSupported => unreachable, + else => |e| return e, + }; + + var result_index: usize = 0; + var path_it = mem.tokenize(pathname, "/"); + while (path_it.next()) |component| { + if (mem.eql(u8, component, ".")) { + continue; + } else if (mem.eql(u8, component, "..")) { + while (true) { + if (result_index == 0) + break; + result_index -= 1; + if (out_buffer[result_index] == '/') + break; + } + } else { + if (result_index > 0) { + out_buffer[result_index] = '/'; + result_index += 1; + } + + mem.copy(u8, out_buffer[result_index..], component); + const tmp_result_index = result_index + component.len; + + const stat = try fstatatWasi(fd, out_buffer[0..tmp_result_index], 0x0); + if (stat.filetype == wasi.FILETYPE_SYMBOLIC_LINK) { + var buf: [MAX_PATH_BYTES]u8 = undefined; + const target = try readlinkatWasi(fd, out_buffer[0..tmp_result_index], buf[0..]); + + mem.copy(u8, out_buffer[0..], target); + result_index = target.len; + } else { + result_index = tmp_result_index; + } + } + } + + if (result_index == 0) { + out_buffer[result_index] = '.'; + result_index += 1; + } + + return out_buffer[0..result_index]; +} + /// Spurious wakeups are possible and no precision of timing is guaranteed. pub fn nanosleep(seconds: u64, nanoseconds: u64) void { var req = timespec{ From 171a7550b86fe2c236db29752a5208975b6d57bd Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 3 Jul 2020 00:10:50 +0200 Subject: [PATCH 11/14] Handle too long a path in link resolution --- lib/std/os.zig | 1 + lib/std/os/test.zig | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/lib/std/os.zig b/lib/std/os.zig index c7349de84d5c..10f59cd938c9 100644 --- a/lib/std/os.zig +++ b/lib/std/os.zig @@ -4193,6 +4193,7 @@ pub fn realpathatWasi(fd: fd_t, pathname: []const u8, out_buffer: *[MAX_PATH_BYT break; } } else { + if (result_index >= MAX_PATH_BYTES) return error.NameTooLong; if (result_index > 0) { out_buffer[result_index] = '/'; result_index += 1; diff --git a/lib/std/os/test.zig b/lib/std/os/test.zig index 7266d0449af6..58ea76be517f 100644 --- a/lib/std/os/test.zig +++ b/lib/std/os/test.zig @@ -64,6 +64,29 @@ test "realpathat" { testing.expectError(error.FileNotFound, os.realpathat(dir.fd, "definitely_bogus_does_not_exist1234", &buf2)); } +test "realpathatWasi" { + if (builtin.os.tag != .wasi) return error.SkipZigTest; + + var tmp = tmpDir(.{}); + defer tmp.cleanup(); + + // create some dirs + try tmp.dir.makeDir("a"); + try tmp.dir.makeDir("b"); + + // create file in "a/" + try tmp.dir.writeFile("a/file.txt", "nonsense"); + + // create a symlink "b/c" -> "a" + try os.symlinkat("a", tmp.dir.fd, "b/c"); + + // get realpath of "b/c/file.txt" + var buf: [fs.MAX_PATH_BYTES]u8 = undefined; + const realpath = try os.realpathatWasi(tmp.dir.fd, "b/c/file.txt", &buf); + + testing.expect(mem.eql(u8, "a/file.txt", realpath)); +} + test "readlinkat" { // enable when `readlinkat` and `symlinkat` are implemented on Windows if (builtin.os.tag == .windows) return error.SkipZigTest; From 9f1a93026ab18b1a8bebfc66cffd646bf42b940b Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 3 Jul 2020 00:57:29 +0200 Subject: [PATCH 12/14] Fix handling symlinks in realpathatWasi --- lib/std/os.zig | 13 +++++++++++-- lib/std/os/test.zig | 20 +++++++++++--------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/lib/std/os.zig b/lib/std/os.zig index 10f59cd938c9..6ac9fb8f8064 100644 --- a/lib/std/os.zig +++ b/lib/std/os.zig @@ -4179,6 +4179,10 @@ pub fn realpathatWasi(fd: fd_t, pathname: []const u8, out_buffer: *[MAX_PATH_BYT else => |e| return e, }; + return realpathatWasiUnchecked(fd, pathname, out_buffer); +} + +fn realpathatWasiUnchecked(fd: fd_t, pathname: []const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { var result_index: usize = 0; var path_it = mem.tokenize(pathname, "/"); while (path_it.next()) |component| { @@ -4206,9 +4210,14 @@ pub fn realpathatWasi(fd: fd_t, pathname: []const u8, out_buffer: *[MAX_PATH_BYT if (stat.filetype == wasi.FILETYPE_SYMBOLIC_LINK) { var buf: [MAX_PATH_BYTES]u8 = undefined; const target = try readlinkatWasi(fd, out_buffer[0..tmp_result_index], buf[0..]); + mem.copy(u8, out_buffer[result_index..], target); + + // OK, we've dereferenced the symlink, now, call `realpathatWasi` recursively + // to work out the target's canonical form. + const target_canon = try realpathatWasiUnchecked(fd, out_buffer[0 .. result_index + target.len], &buf); + mem.copy(u8, out_buffer[0..], target_canon); - mem.copy(u8, out_buffer[0..], target); - result_index = target.len; + result_index = target_canon.len; } else { result_index = tmp_result_index; } diff --git a/lib/std/os/test.zig b/lib/std/os/test.zig index 58ea76be517f..057e645a1e21 100644 --- a/lib/std/os/test.zig +++ b/lib/std/os/test.zig @@ -71,20 +71,22 @@ test "realpathatWasi" { defer tmp.cleanup(); // create some dirs - try tmp.dir.makeDir("a"); - try tmp.dir.makeDir("b"); + try tmp.dir.makePath("a/b"); - // create file in "a/" - try tmp.dir.writeFile("a/file.txt", "nonsense"); + // create file in "a/b" + try tmp.dir.writeFile("a/b/file.txt", "nonsense"); - // create a symlink "b/c" -> "a" - try os.symlinkat("a", tmp.dir.fd, "b/c"); + // create a symlink "a/c" -> "a/b" + var subdir = try tmp.dir.openDir("a", .{}); + defer subdir.close(); - // get realpath of "b/c/file.txt" + try os.symlinkat("b", subdir.fd, "c"); + + // get realpath of "a/c/file.txt" var buf: [fs.MAX_PATH_BYTES]u8 = undefined; - const realpath = try os.realpathatWasi(tmp.dir.fd, "b/c/file.txt", &buf); + const realpath = try os.realpathatWasi(tmp.dir.fd, "a/c/file.txt", &buf); - testing.expect(mem.eql(u8, "a/file.txt", realpath)); + testing.expect(mem.eql(u8, "a/b/file.txt", realpath)); } test "readlinkat" { From 9c9062f0808eacfc81e0e68b86bd3aa848d4631e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 3 Jul 2020 17:11:28 +0200 Subject: [PATCH 13/14] Tweak std.os and std.fs tests --- lib/std/fs/test.zig | 36 ++++++++++++++++------------- lib/std/os/test.zig | 55 ++++++++++++++++++++++++++++----------------- 2 files changed, 55 insertions(+), 36 deletions(-) diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig index 7d8885675097..0c385a4c7a59 100644 --- a/lib/std/fs/test.zig +++ b/lib/std/fs/test.zig @@ -51,8 +51,6 @@ fn contains(entries: *const std.ArrayList(Dir.Entry), el: Dir.Entry) bool { } test "Dir.realpath" { - if (builtin.os.tag == .wasi) return error.SkipZigTest; - var tmp_dir = tmpDir(.{}); defer tmp_dir.cleanup(); @@ -64,27 +62,35 @@ test "Dir.realpath" { // First, test non-alloc version { var buf1: [fs.MAX_PATH_BYTES]u8 = undefined; - const abs_file_path = try tmp_dir.dir.realpath("test_file", buf1[0..]); + const file_path = try tmp_dir.dir.realpath("test_file", buf1[0..]); - var buf2: [fs.MAX_PATH_BYTES]u8 = undefined; - const dir_path = try tmp_dir.dir.realpath("", buf2[0..]); - const expected_path = try fs.path.join(testing.allocator, &[_][]const u8{ dir_path, "test_file" }); - defer testing.allocator.free(expected_path); + if (builtin.os.tag == .wasi) { + testing.expect(mem.eql(u8, file_path, "test_file")); + } else { + var buf2: [fs.MAX_PATH_BYTES]u8 = undefined; + const dir_path = try tmp_dir.dir.realpath("", buf2[0..]); + const expected_path = try fs.path.join(testing.allocator, &[_][]const u8{ dir_path, "test_file" }); + defer testing.allocator.free(expected_path); - testing.expect(mem.eql(u8, abs_file_path, expected_path)); + testing.expect(mem.eql(u8, file_path, expected_path)); + } } // Next, test alloc version { - const abs_file_path = try tmp_dir.dir.realpathAlloc(testing.allocator, "test_file"); - defer testing.allocator.free(abs_file_path); + const file_path = try tmp_dir.dir.realpathAlloc(testing.allocator, "test_file"); + defer testing.allocator.free(file_path); - const dir_path = try tmp_dir.dir.realpathAlloc(testing.allocator, ""); - defer testing.allocator.free(dir_path); - const expected_path = try fs.path.join(testing.allocator, &[_][]const u8{ dir_path, "test_file" }); - defer testing.allocator.free(expected_path); + if (builtin.os.tag == .wasi) { + testing.expect(mem.eql(u8, file_path, "test_file")); + } else { + const dir_path = try tmp_dir.dir.realpathAlloc(testing.allocator, ""); + defer testing.allocator.free(dir_path); + const expected_path = try fs.path.join(testing.allocator, &[_][]const u8{ dir_path, "test_file" }); + defer testing.allocator.free(expected_path); - testing.expect(mem.eql(u8, abs_file_path, expected_path)); + testing.expect(mem.eql(u8, file_path, expected_path)); + } } } diff --git a/lib/std/os/test.zig b/lib/std/os/test.zig index 057e645a1e21..1b1944a2f66f 100644 --- a/lib/std/os/test.zig +++ b/lib/std/os/test.zig @@ -16,6 +16,7 @@ const builtin = @import("builtin"); const AtomicRmwOp = builtin.AtomicRmwOp; const AtomicOrder = builtin.AtomicOrder; const tmpDir = std.testing.tmpDir; +const TmpDir = std.testing.TmpDir; const Dir = std.fs.Dir; test "fstatat" { @@ -41,31 +42,46 @@ test "fstatat" { } test "realpathat" { - if (builtin.os.tag == .wasi) return error.SkipZigTest; + var tmp = tmpDir(.{}); + defer tmp.cleanup(); - const cwd = fs.cwd(); + // create some dirs + try tmp.dir.makePath("a/b"); - var buf1: [fs.MAX_PATH_BYTES]u8 = undefined; - const cwd_path = try os.getcwd(&buf1); + try testRealpathat(tmp, "a", "a"); + try testRealpathat(tmp, "a/b/..", "a"); + try testRealpathat(tmp, "a/b", "a/b"); - var buf2: [fs.MAX_PATH_BYTES]u8 = undefined; - const cwd_realpathat = try os.realpathat(cwd.fd, "", &buf2); - testing.expect(mem.eql(u8, cwd_path, cwd_realpathat)); + if (builtin.os.tag == .wasi) { + try testRealpathat(tmp, "a/..", "."); + } else { + try testRealpathat(tmp, "a/..", ""); + } - // Now, open an actual Dir{"."} since on Unix `realpathat` behaves - // in a special way when `fd` equals `cwd.fd` - var dir = try cwd.openDir(".", .{}); - defer dir.close(); + var buf: [fs.MAX_PATH_BYTES]u8 = undefined; + testing.expectError(error.FileNotFound, os.realpathat(tmp.dir.fd, "definitely_bogus_does_not_exist1234", &buf)); +} - const cwd_realpathat2 = try os.realpathat(dir.fd, "", &buf2); - testing.expect(mem.eql(u8, cwd_path, cwd_realpathat2)); +fn testRealpathat(tmp: TmpDir, pathname: []const u8, expected: []const u8) !void { + var buf: [fs.MAX_PATH_BYTES]u8 = undefined; + const given = try os.realpathat(tmp.dir.fd, pathname, &buf); - // Finally, try getting a path for something that doesn't exist - testing.expectError(error.FileNotFound, os.realpathat(dir.fd, "definitely_bogus_does_not_exist1234", &buf2)); + if (builtin.os.tag == .wasi) { + testing.expect(mem.eql(u8, given, expected)); + } else { + const absolute_path = blk: { + const relative_path = try fs.path.join(testing.allocator, &[_][]const u8{ "zig-cache", "tmp", tmp.sub_path[0..], expected }); + defer testing.allocator.free(relative_path); + break :blk try fs.realpathAlloc(testing.allocator, relative_path); + }; + defer testing.allocator.free(absolute_path); + testing.expect(mem.eql(u8, given, absolute_path)); + } } -test "realpathatWasi" { - if (builtin.os.tag != .wasi) return error.SkipZigTest; +test "realpathat with symlinks" { + // TODO enable when `symlinkat` is implemented on Windows + if (builtin.os.tag == .windows) return error.SkipZigTest; var tmp = tmpDir(.{}); defer tmp.cleanup(); @@ -83,10 +99,7 @@ test "realpathatWasi" { try os.symlinkat("b", subdir.fd, "c"); // get realpath of "a/c/file.txt" - var buf: [fs.MAX_PATH_BYTES]u8 = undefined; - const realpath = try os.realpathatWasi(tmp.dir.fd, "a/c/file.txt", &buf); - - testing.expect(mem.eql(u8, "a/b/file.txt", realpath)); + try testRealpathat(tmp, "a/c/file.txt", "a/b/file.txt"); } test "readlinkat" { From 7cc613c09dd86bb6490d0d1d84244093d8b45acf Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 4 Jul 2020 11:05:45 +0200 Subject: [PATCH 14/14] Fix std.os.realpathatW to properly canonicalize relative paths This commit fixes `std.os.realpathatW` to properly canonicalize `pathname` if contains '.' or '..'. --- lib/std/os.zig | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/lib/std/os.zig b/lib/std/os.zig index 6ac9fb8f8064..81bb71c598a6 100644 --- a/lib/std/os.zig +++ b/lib/std/os.zig @@ -4145,7 +4145,18 @@ pub fn realpathatW(fd: fd_t, pathname: [*:0]const u16, out_buffer: *[MAX_PATH_BY // Convert UTF16LE to UTF8 var pathname_u8: [MAX_PATH_BYTES]u8 = undefined; - const end_index = std.unicode.utf16leToUtf8(pathname_u8[0..], mem.spanZ(pathname)) catch unreachable; + const end_index = std.unicode.utf16leToUtf8(pathname_u8[0..], mem.spanZ(pathname)) catch |_| { + return error.BadPathName; + }; + + // Check for illegal path characters + for (pathname_u8[0..end_index]) |byte| { + switch (byte) { + '*', '?', '"', '<', '>', '|' => return error.BadPathName, + else => {}, + } + } + const total_len = fd_path.len + end_index + 1; // +1 to account for path separator if (total_len >= MAX_PATH_BYTES) { return error.NameTooLong; @@ -4156,9 +4167,16 @@ pub fn realpathatW(fd: fd_t, pathname: [*:0]const u16, out_buffer: *[MAX_PATH_BY unnormalized[fd_path.len] = std.fs.path.sep; mem.copy(u8, unnormalized[fd_path.len + 1 ..], pathname_u8[0..end_index]); - const unnormalized_w = try windows.sliceToPrefixedFileW(unnormalized[0..total_len]); + // Since we are resolving path relative to some fd, if `pathname` is relative, it may well + // consist of relative components '.' and '..'. Hence, we don't want to prepend '\\?\' which + // forces a fully-qualified path only in all Windows syscalls. Instead, we simply convert `u8` + // to `u16`, and feed that into `realpathW`. + var unnormalized_w: [windows.PATH_MAX_WIDE:0]u16 = undefined; + const len = try std.unicode.utf8ToUtf16Le(unnormalized_w[0..], unnormalized[0..total_len]); + if (len > unnormalized_w.len) return error.NameTooLong; + unnormalized_w[len] = 0; - return realpathW(unnormalized_w.span().ptr, out_buffer); + return realpathW(unnormalized_w[0..len :0].ptr, out_buffer); } /// WASI-only. Similar to `realpathat` except it returns the canonicalized relative pathname