diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 257f407826a9..96ef648673f8 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -8,6 +8,7 @@ const testing = std.testing; const mem = std.mem; const fmt = std.fmt; const Allocator = std.mem.Allocator; +const tmpDir = testing.tmpDir; const base64_encoder = fs.base64_encoder; const base64_decoder = fs.base64_decoder; @@ -41,6 +42,7 @@ pub const File = struct { pub const CacheHash = struct { allocator: *Allocator, blake3: Blake3, + work_dir: fs.Dir, manifest_dir: fs.Dir, manifest_file: ?fs.File, manifest_dirty: bool, @@ -48,11 +50,12 @@ pub const CacheHash = struct { b64_digest: [BASE64_DIGEST_LEN]u8, /// Be sure to call release after successful initialization. - pub fn init(allocator: *Allocator, dir: fs.Dir, manifest_dir_path: []const u8) !CacheHash { + pub fn init(allocator: *Allocator, work_dir: fs.Dir, manifest_dir_path: []const u8) !CacheHash { return CacheHash{ .allocator = allocator, .blake3 = Blake3.init(), - .manifest_dir = try dir.makeOpenPath(manifest_dir_path, .{}), + .work_dir = work_dir, + .manifest_dir = try work_dir.makeOpenPath(manifest_dir_path, .{}), .manifest_file = null, .manifest_dirty = false, .files = ArrayList(File).init(allocator), @@ -100,7 +103,7 @@ pub const CacheHash = struct { assert(self.manifest_file == null); try self.files.ensureCapacity(self.files.items.len + 1); - const resolved_path = try fs.path.resolve(self.allocator, &[_][]const u8{file_path}); + const resolved_path = try self.work_dir.realpathAlloc(self.allocator, file_path); const idx = self.files.items.len; self.files.addOneAssumeCapacity().* = .{ @@ -210,7 +213,7 @@ pub const CacheHash = struct { cache_hash_file.path = try self.allocator.dupe(u8, file_path); } - const this_file = fs.cwd().openFile(cache_hash_file.path.?, .{ .read = true }) catch { + const this_file = self.work_dir.openFile(cache_hash_file.path.?, .{ .read = true }) catch { return error.CacheUnavailable; }; defer this_file.close(); @@ -276,7 +279,7 @@ pub const CacheHash = struct { } fn populateFileHash(self: *CacheHash, ch_file: *File) !void { - const file = try fs.cwd().openFile(ch_file.path.?, .{}); + const file = try self.work_dir.openFile(ch_file.path.?, .{}); defer file.close(); ch_file.stat = try file.stat(); @@ -322,7 +325,7 @@ pub const CacheHash = struct { pub fn addFilePostFetch(self: *CacheHash, file_path: []const u8, max_file_size: usize) ![]u8 { assert(self.manifest_file != null); - const resolved_path = try fs.path.resolve(self.allocator, &[_][]const u8{file_path}); + const resolved_path = try self.work_dir.realpathAlloc(self.allocator, file_path); errdefer self.allocator.free(resolved_path); const new_ch_file = try self.files.addOne(); @@ -347,7 +350,7 @@ pub const CacheHash = struct { pub fn addFilePost(self: *CacheHash, file_path: []const u8) !void { assert(self.manifest_file != null); - const resolved_path = try fs.path.resolve(self.allocator, &[_][]const u8{file_path}); + const resolved_path = try self.work_dir.realpathAlloc(self.allocator, file_path); errdefer self.allocator.free(resolved_path); const new_ch_file = try self.files.addOne(); @@ -470,16 +473,13 @@ fn isProblematicTimestamp(fs_clock: i128) bool { } test "cache file and then recall it" { - if (std.Target.current.os.tag == .wasi) { - // https://github.com/ziglang/zig/issues/5437 - return error.SkipZigTest; - } - const cwd = fs.cwd(); + var tmp = tmpDir(.{}); + defer tmp.cleanup(); const temp_file = "test.txt"; const temp_manifest_dir = "temp_manifest_dir"; - try cwd.writeFile(temp_file, "Hello, world!\n"); + try tmp.dir.writeFile(temp_file, "Hello, world!\n"); while (isProblematicTimestamp(std.time.nanoTimestamp())) { std.time.sleep(1); @@ -489,7 +489,7 @@ test "cache file and then recall it" { var digest2: [BASE64_DIGEST_LEN]u8 = undefined; { - var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + var ch = try CacheHash.init(testing.allocator, tmp.dir, temp_manifest_dir); defer ch.release(); ch.add(true); @@ -503,7 +503,7 @@ test "cache file and then recall it" { digest1 = ch.final(); } { - var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + var ch = try CacheHash.init(testing.allocator, tmp.dir, temp_manifest_dir); defer ch.release(); ch.add(true); @@ -516,9 +516,6 @@ test "cache file and then recall it" { } testing.expectEqual(digest1, digest2); - - try cwd.deleteTree(temp_manifest_dir); - try cwd.deleteFile(temp_file); } test "give problematic timestamp" { @@ -534,18 +531,15 @@ test "give nonproblematic timestamp" { } test "check that changing a file makes cache fail" { - if (std.Target.current.os.tag == .wasi) { - // https://github.com/ziglang/zig/issues/5437 - return error.SkipZigTest; - } - const cwd = fs.cwd(); + var tmp = tmpDir(.{}); + defer tmp.cleanup(); const temp_file = "cache_hash_change_file_test.txt"; const temp_manifest_dir = "cache_hash_change_file_manifest_dir"; const original_temp_file_contents = "Hello, world!\n"; const updated_temp_file_contents = "Hello, world; but updated!\n"; - try cwd.writeFile(temp_file, original_temp_file_contents); + try tmp.dir.writeFile(temp_file, original_temp_file_contents); while (isProblematicTimestamp(std.time.nanoTimestamp())) { std.time.sleep(1); @@ -555,7 +549,7 @@ test "check that changing a file makes cache fail" { var digest2: [BASE64_DIGEST_LEN]u8 = undefined; { - var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + var ch = try CacheHash.init(testing.allocator, tmp.dir, temp_manifest_dir); defer ch.release(); ch.add("1234"); @@ -569,14 +563,14 @@ test "check that changing a file makes cache fail" { digest1 = ch.final(); } - try cwd.writeFile(temp_file, updated_temp_file_contents); + try tmp.dir.writeFile(temp_file, updated_temp_file_contents); while (isProblematicTimestamp(std.time.nanoTimestamp())) { std.time.sleep(1); } { - var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + var ch = try CacheHash.init(testing.allocator, tmp.dir, temp_manifest_dir); defer ch.release(); ch.add("1234"); @@ -592,25 +586,19 @@ test "check that changing a file makes cache fail" { } testing.expect(!mem.eql(u8, digest1[0..], digest2[0..])); - - try cwd.deleteTree(temp_manifest_dir); - try cwd.deleteFile(temp_file); } test "no file inputs" { - if (std.Target.current.os.tag == .wasi) { - // https://github.com/ziglang/zig/issues/5437 - return error.SkipZigTest; - } - const cwd = fs.cwd(); + var tmp = tmpDir(.{}); + defer tmp.cleanup(); + const temp_manifest_dir = "no_file_inputs_manifest_dir"; - defer cwd.deleteTree(temp_manifest_dir) catch unreachable; var digest1: [BASE64_DIGEST_LEN]u8 = undefined; var digest2: [BASE64_DIGEST_LEN]u8 = undefined; { - var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + var ch = try CacheHash.init(testing.allocator, tmp.dir, temp_manifest_dir); defer ch.release(); ch.add("1234"); @@ -621,7 +609,7 @@ test "no file inputs" { digest1 = ch.final(); } { - var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + var ch = try CacheHash.init(testing.allocator, tmp.dir, temp_manifest_dir); defer ch.release(); ch.add("1234"); @@ -633,18 +621,15 @@ test "no file inputs" { } test "CacheHashes with files added after initial hash work" { - if (std.Target.current.os.tag == .wasi) { - // https://github.com/ziglang/zig/issues/5437 - return error.SkipZigTest; - } - const cwd = fs.cwd(); + var tmp = tmpDir(.{}); + defer tmp.cleanup(); const temp_file1 = "cache_hash_post_file_test1.txt"; const temp_file2 = "cache_hash_post_file_test2.txt"; const temp_manifest_dir = "cache_hash_post_file_manifest_dir"; - try cwd.writeFile(temp_file1, "Hello, world!\n"); - try cwd.writeFile(temp_file2, "Hello world the second!\n"); + try tmp.dir.writeFile(temp_file1, "Hello, world!\n"); + try tmp.dir.writeFile(temp_file2, "Hello world the second!\n"); while (isProblematicTimestamp(std.time.nanoTimestamp())) { std.time.sleep(1); @@ -655,7 +640,7 @@ test "CacheHashes with files added after initial hash work" { var digest3: [BASE64_DIGEST_LEN]u8 = undefined; { - var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + var ch = try CacheHash.init(testing.allocator, tmp.dir, temp_manifest_dir); defer ch.release(); ch.add("1234"); @@ -669,7 +654,7 @@ test "CacheHashes with files added after initial hash work" { digest1 = ch.final(); } { - var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + var ch = try CacheHash.init(testing.allocator, tmp.dir, temp_manifest_dir); defer ch.release(); ch.add("1234"); @@ -680,14 +665,14 @@ test "CacheHashes with files added after initial hash work" { testing.expect(mem.eql(u8, &digest1, &digest2)); // Modify the file added after initial hash - try cwd.writeFile(temp_file2, "Hello world the second, updated\n"); + try tmp.dir.writeFile(temp_file2, "Hello world the second, updated\n"); while (isProblematicTimestamp(std.time.nanoTimestamp())) { std.time.sleep(1); } { - var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + var ch = try CacheHash.init(testing.allocator, tmp.dir, temp_manifest_dir); defer ch.release(); ch.add("1234"); @@ -702,8 +687,4 @@ test "CacheHashes with files added after initial hash work" { } testing.expect(!mem.eql(u8, &digest1, &digest3)); - - try cwd.deleteTree(temp_manifest_dir); - try cwd.deleteFile(temp_file1); - try cwd.deleteFile(temp_file2); } diff --git a/lib/std/fs.zig b/lib/std/fs.zig index 0feaf69d6780..c0bb804ffb47 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -931,6 +931,103 @@ pub const Dir = struct { return self.openDir(sub_path, open_dir_options); } + /// This function behaves differently on different hosts. + /// In WASI, returns the canonicalized relative pathname of `pathname` relative + /// to this `Dir`. If `pathname` is absolute, or an attempt is made at + /// escaping beyond this `Dir`, return `error.AccessDenied`. + /// On other hosts, this function returns the canonicalized absolute pathname of + /// `pathname` relative to this `Dir`. If `pathname` is absolute, ignores this + /// `Dir` handle and returns the canonicalized absolute pathname of `pathname` + /// argument. + /// See also `Dir.realpathZ`, `Dir.realpathW`, `Dir.realpathWasi`, + /// and `Dir.realpathAlloc`. + pub fn realpath(self: Dir, pathname: []const u8, out_buffer: []u8) ![]u8 { + if (builtin.os.tag == .wasi) { + return self.realpathWasi(pathname, out_buffer); + } + if (builtin.os.tag == .windows) { + const pathname_w = try os.windows.sliceToPrefixedFileW(pathname); + return self.realpathW(pathname_w.span().ptr, out_buffer); + } + const pathname_c = try os.toPosixPath(pathname); + return self.realpathZ(&pathname_c, out_buffer); + } + + /// Same as `Dir.realpath` except `pathname` is null-terminated. + /// See also `Dir.realpath`, `realpathZ`. + pub fn realpathZ(self: Dir, pathname: [*:0]const u8, out_buffer: []u8) ![]u8 { + // Use of MAX_PATH_BYTES here is valid as the realpath function does not + // have a variant that takes an arbitrary-size buffer. + // TODO(#4812): Consider reimplementing realpath or using the POSIX.1-2008 + // NULL out parameter (GNU's canonicalize_file_name) to handle overelong + // paths. musl supports passing NULL but restricts the output to PATH_MAX + // anyway. + var buf: [MAX_PATH_BYTES]u8 = undefined; + const out_path = try os.realpathatZ(self.fd, pathname, &buf); + + if (out_path.len > out_buffer.len) { + return error.NameTooLong; + } + + mem.copy(u8, out_buffer, out_path); + + return out_buffer[0..out_path.len]; + } + + /// Windows-only. Same as `Dir.realpath` except `pathname` is null-terminated, + /// WTF16 encoded. + /// See also `Dir.realpath`, `realpathW`. + pub fn realpathW(self: Dir, pathname: [*:0]const u16, out_buffer: []u8) ![]u8 { + // Use of MAX_PATH_BYTES here is valid as the realpath function does not + // have a variant that takes an arbitrary-size buffer. + // TODO(#4812): Consider reimplementing realpath or using the POSIX.1-2008 + // NULL out parameter (GNU's canonicalize_file_name) to handle overelong + // paths. musl supports passing NULL but restricts the output to PATH_MAX + // anyway. + var buf: [MAX_PATH_BYTES]u8 = undefined; + const out_path = try os.realpathatW(self.fd, pathname, &buf); + + if (out_path.len > out_buffer.len) { + return error.NameTooLong; + } + + mem.copy(u8, out_buffer, out_path); + + return out_buffer[0..out_path.len]; + } + + /// WASI-only. Returns the canonicalized relative pathname of `pathname` relative + /// to this `Dir`. If `pathname` is absolute, or an attempt is made at + /// escaping beyond this `Dir`, return `error.AccessDenied`. + /// See also `Dir.realpath`, and `Dir.realpathAlloc`. + pub fn realpathWasi(self: Dir, pathname: []const u8, out_buffer: []u8) ![]u8 { + // Use of MAX_PATH_BYTES here is valid as the realpath function does not + // have a variant that takes an arbitrary-size buffer. + var buf: [MAX_PATH_BYTES]u8 = undefined; + const out_path = try os.realpathatWasi(self.fd, pathname, &buf); + + if (out_path.len > out_buffer.len) { + return error.NameTooLong; + } + + mem.copy(u8, out_buffer, out_path); + + return out_buffer[0..out_path.len]; + } + + /// Same as `Dir.realpath` except caller must free the returned memory. + /// See also `Dir.realpath`. + pub fn realpathAlloc(self: Dir, allocator: *Allocator, pathname: []const u8) ![]u8 { + // Use of MAX_PATH_BYTES here is valid as the realpath function does not + // have a variant that takes an arbitrary-size buffer. + // TODO(#4812): Consider reimplementing realpath or using the POSIX.1-2008 + // NULL out parameter (GNU's canonicalize_file_name) to handle overelong + // paths. musl supports passing NULL but restricts the output to PATH_MAX + // anyway. + var buf: [MAX_PATH_BYTES]u8 = undefined; + return allocator.dupe(u8, try os.realpathat(self.fd, pathname, &buf)); + } + /// Changes the current working directory to the open directory handle. /// This modifies global state and can have surprising effects in multi- /// threaded applications. Most applications and especially libraries should @@ -1903,7 +2000,7 @@ pub fn selfExeDirPath(out_buffer: []u8) SelfExePathError![]const u8 { } /// `realpath`, except caller must free the returned memory. -/// TODO integrate with `Dir` +/// See also `Dir.realpath`. pub fn realpathAlloc(allocator: *Allocator, pathname: []const u8) ![]u8 { // Use of MAX_PATH_BYTES here is valid as the realpath function does not // have a variant that takes an arbitrary-size buffer. diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig index a3cf2e800268..0c385a4c7a59 100644 --- a/lib/std/fs/test.zig +++ b/lib/std/fs/test.zig @@ -50,6 +50,50 @@ fn contains(entries: *const std.ArrayList(Dir.Entry), el: Dir.Entry) bool { return false; } +test "Dir.realpath" { + var tmp_dir = tmpDir(.{}); + defer tmp_dir.cleanup(); + + var file = try tmp_dir.dir.createFile("test_file", .{ .lock = File.Lock.Shared }); + // We need to close the file immediately as otherwise on Windows we'll end up + // with a sharing violation. + file.close(); + + // First, test non-alloc version + { + var buf1: [fs.MAX_PATH_BYTES]u8 = undefined; + const file_path = try tmp_dir.dir.realpath("test_file", buf1[0..]); + + if (builtin.os.tag == .wasi) { + testing.expect(mem.eql(u8, file_path, "test_file")); + } else { + var buf2: [fs.MAX_PATH_BYTES]u8 = undefined; + const dir_path = try tmp_dir.dir.realpath("", buf2[0..]); + const expected_path = try fs.path.join(testing.allocator, &[_][]const u8{ dir_path, "test_file" }); + defer testing.allocator.free(expected_path); + + testing.expect(mem.eql(u8, file_path, expected_path)); + } + } + + // Next, test alloc version + { + const file_path = try tmp_dir.dir.realpathAlloc(testing.allocator, "test_file"); + defer testing.allocator.free(file_path); + + if (builtin.os.tag == .wasi) { + testing.expect(mem.eql(u8, file_path, "test_file")); + } else { + const dir_path = try tmp_dir.dir.realpathAlloc(testing.allocator, ""); + defer testing.allocator.free(dir_path); + const expected_path = try fs.path.join(testing.allocator, &[_][]const u8{ dir_path, "test_file" }); + defer testing.allocator.free(expected_path); + + testing.expect(mem.eql(u8, file_path, expected_path)); + } + } +} + test "readAllAlloc" { var tmp_dir = tmpDir(.{}); defer tmp_dir.cleanup(); @@ -97,12 +141,7 @@ test "directory operations on files" { testing.expectError(error.NotDir, tmp_dir.dir.deleteDir(test_file_name)); if (builtin.os.tag != .wasi) { - // TODO: use Dir's realpath function once that exists - const absolute_path = blk: { - const relative_path = try fs.path.join(testing.allocator, &[_][]const u8{ "zig-cache", "tmp", tmp_dir.sub_path[0..], test_file_name }); - defer testing.allocator.free(relative_path); - break :blk try fs.realpathAlloc(testing.allocator, relative_path); - }; + const absolute_path = try tmp_dir.dir.realpathAlloc(testing.allocator, test_file_name); defer testing.allocator.free(absolute_path); testing.expectError(error.PathAlreadyExists, fs.makeDirAbsolute(absolute_path)); diff --git a/lib/std/os.zig b/lib/std/os.zig index 1e1049ae51ce..81bb71c598a6 100644 --- a/lib/std/os.zig +++ b/lib/std/os.zig @@ -3981,6 +3981,45 @@ pub fn realpath(pathname: []const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathE pub const realpathC = @compileError("deprecated: renamed realpathZ"); +fn fdPath(fd: fd_t, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { + switch (builtin.os.tag) { + .wasi => @compileError("getting absolute path of an fd is unsupported in WASI"), + .windows => { + var wide_buf: [windows.PATH_MAX_WIDE]u16 = undefined; + const wide_slice = try windows.GetFinalPathNameByHandleW(fd, &wide_buf, wide_buf.len, windows.VOLUME_NAME_DOS); + + // Windows returns \\?\ prepended to the path. + // We strip it to make this function consistent across platforms. + const prefix = [_]u16{ '\\', '\\', '?', '\\' }; + const start_index = if (mem.startsWith(u16, wide_slice, &prefix)) prefix.len else 0; + + // Trust that Windows gives us valid UTF-16LE. + const end_index = std.unicode.utf16leToUtf8(out_buffer, wide_slice[start_index..]) catch unreachable; + return out_buffer[0..end_index]; + }, + .macosx, .ios, .watchos, .tvos => { + // On macOS, we can use F_GETPATH fcntl command to query the OS for + // the path to the file descriptor. + @memset(out_buffer, 0, MAX_PATH_BYTES); + switch (errno(system.fcntl(fd, F_GETPATH, out_buffer))) { + 0 => {}, + EBADF => return error.FileNotFound, + // TODO man pages for fcntl on macOS don't really tell you what + // errno values to expect when command is F_GETPATH... + else => |err| return unexpectedErrno(err), + } + const len = mem.indexOfScalar(u8, out_buffer[0..], @as(u8, 0)) orelse MAX_PATH_BYTES; + return out_buffer[0..len]; + }, + else => { + var procfs_buf: ["/proc/self/fd/-2147483648".len:0]u8 = undefined; + const proc_path = std.fmt.bufPrint(procfs_buf[0..], "/proc/self/fd/{}\x00", .{fd}) catch unreachable; + + return readlinkZ(@ptrCast([*:0]const u8, proc_path.ptr), out_buffer); + }, + } +} + /// Same as `realpath` except `pathname` is null-terminated. pub fn realpathZ(pathname: [*:0]const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { if (builtin.os.tag == .windows) { @@ -3994,10 +4033,7 @@ pub fn realpathZ(pathname: [*:0]const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealP }; defer close(fd); - var procfs_buf: ["/proc/self/fd/-2147483648".len:0]u8 = undefined; - const proc_path = std.fmt.bufPrint(procfs_buf[0..], "/proc/self/fd/{}\x00", .{fd}) catch unreachable; - - return readlinkZ(@ptrCast([*:0]const u8, proc_path.ptr), out_buffer); + return fdPath(fd, out_buffer); } const result_path = std.c.realpath(pathname, out_buffer) orelse switch (std.c._errno().*) { EINVAL => unreachable, @@ -4029,17 +4065,189 @@ pub fn realpathW(pathname: [*:0]const u16, out_buffer: *[MAX_PATH_BYTES]u8) Real ); defer windows.CloseHandle(h_file); - var wide_buf: [windows.PATH_MAX_WIDE]u16 = undefined; - const wide_slice = try windows.GetFinalPathNameByHandleW(h_file, &wide_buf, wide_buf.len, windows.VOLUME_NAME_DOS); + return fdPath(h_file, out_buffer); +} + +/// This function will depend differently on different hosts. +/// In WASI, it calls `realpathatWasi` which observes WASI capability-oriented +/// security model and returns `error.AccessDenied` if the `pathname` is absolute +/// or an attempt is made at escaping beyond the input `fd` file descriptor. +/// On all other hosts, this function is similar to `realpath`, however, returns +/// the canonicalized absolute pathname of a `pathname` relative to a file descriptor +/// `fd`. If `pathname` is an absolute path, ignores `fd` and reverts to calling +/// `realpath` on the `pathname` argument. In particular, on Unix, if `fd` was +/// obtained using `std.fs.cwd()` call, reverts to calling `std.os.getcwd()` to +/// obtain file descriptor's path. +/// See also `realpath`, `realpathatZ`, `realpathatW`, and `realpathatWasi`. +pub fn realpathat(fd: fd_t, pathname: []const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { + if (builtin.os.tag == .wasi) { + return realpathatWasi(fd, pathname, out_buffer); + } + if (builtin.os.tag == .windows) { + const pathname_w = try windows.sliceToPrefixedFileW(pathname); + return realpathatW(fd, pathname_w.span().ptr, out_buffer); + } + const pathname_c = try toPosixPath(pathname); + return realpathatZ(fd, &pathname_c, out_buffer); +} + +/// Same as `realpathat` except `pathname` is null-terminated. +/// If `fd` was obtained using `std.fs.cwd()` call, reverts to calling +/// `std.os.getcwd()` to obtain file descriptor's path. +/// See also `realpathat`. +pub fn realpathatZ(fd: fd_t, pathname: [*:0]const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { + if (builtin.os.tag == .windows) { + const pathname_w = try windows.cStrToPrefixedFileW(pathname); + return realpathatW(fd, pathname_w.span().ptr, out_buffer); + } + if (std.fs.path.isAbsolutePosixZ(pathname)) { + return realpathZ(pathname, out_buffer); + } + + var buffer: [MAX_PATH_BYTES]u8 = undefined; + var fd_path: []const u8 = undefined; + if (fd == @field(@This(), "AT_FDCWD")) { + fd_path = getcwd(buffer[0..]) catch |err| { + return switch (err) { + GetCwdError.NameTooLong => error.NameTooLong, + GetCwdError.CurrentWorkingDirectoryUnlinked => error.FileNotFound, + else => |e| e, + }; + }; + } else { + fd_path = try fdPath(fd, &buffer); + } + + const total_len = fd_path.len + mem.lenZ(pathname) + 1; // +1 to account for path separator + if (total_len >= MAX_PATH_BYTES) { + return error.NameTooLong; + } + + var unnormalized: [MAX_PATH_BYTES]u8 = undefined; + mem.copy(u8, unnormalized[0..], fd_path); + unnormalized[fd_path.len] = std.fs.path.sep; + mem.copy(u8, unnormalized[fd_path.len + 1 ..], mem.spanZ(pathname)); + + const unnormalized_c = try toPosixPath(unnormalized[0..total_len]); - // Windows returns \\?\ prepended to the path. - // We strip it to make this function consistent across platforms. - const prefix = [_]u16{ '\\', '\\', '?', '\\' }; - const start_index = if (mem.startsWith(u16, wide_slice, &prefix)) prefix.len else 0; + return realpathZ(&unnormalized_c, out_buffer); +} + +/// Windows-only. Same as `realpathat` except `pathname` is null-terminated, WTF16 encoded. +/// See also `realpathat`. +pub fn realpathatW(fd: fd_t, pathname: [*:0]const u16, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { + if (std.fs.path.isAbsoluteWindowsW(pathname)) { + return realpathW(pathname, out_buffer); + } + + var buffer: [MAX_PATH_BYTES]u8 = undefined; + const fd_path = try fdPath(fd, &buffer); + + // Convert UTF16LE to UTF8 + var pathname_u8: [MAX_PATH_BYTES]u8 = undefined; + const end_index = std.unicode.utf16leToUtf8(pathname_u8[0..], mem.spanZ(pathname)) catch |_| { + return error.BadPathName; + }; + + // Check for illegal path characters + for (pathname_u8[0..end_index]) |byte| { + switch (byte) { + '*', '?', '"', '<', '>', '|' => return error.BadPathName, + else => {}, + } + } + + const total_len = fd_path.len + end_index + 1; // +1 to account for path separator + if (total_len >= MAX_PATH_BYTES) { + return error.NameTooLong; + } + + var unnormalized: [MAX_PATH_BYTES]u8 = undefined; + mem.copy(u8, unnormalized[0..], fd_path); + unnormalized[fd_path.len] = std.fs.path.sep; + mem.copy(u8, unnormalized[fd_path.len + 1 ..], pathname_u8[0..end_index]); + + // Since we are resolving path relative to some fd, if `pathname` is relative, it may well + // consist of relative components '.' and '..'. Hence, we don't want to prepend '\\?\' which + // forces a fully-qualified path only in all Windows syscalls. Instead, we simply convert `u8` + // to `u16`, and feed that into `realpathW`. + var unnormalized_w: [windows.PATH_MAX_WIDE:0]u16 = undefined; + const len = try std.unicode.utf8ToUtf16Le(unnormalized_w[0..], unnormalized[0..total_len]); + if (len > unnormalized_w.len) return error.NameTooLong; + unnormalized_w[len] = 0; + + return realpathW(unnormalized_w[0..len :0].ptr, out_buffer); +} + +/// WASI-only. Similar to `realpathat` except it returns the canonicalized relative pathname +/// of a `pathname` relative to a file descriptor `fd`. +/// Since this is targeting WASI, it observes WASI capability-oriented security model and +/// returns `error.AccessDenied` if the `pathname` is absolute or an attempt is made at +/// escaping beyond the input `fd` file descriptor. +pub fn realpathatWasi(fd: fd_t, pathname: []const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { + // We'll proceed in two steps. + // First, we'll try opening the path as-is, and check if that works OK. + // Then, this will imply the path was valid under WASI's capability-oriented + // security model, and no path traversal attack was attempted. + // Second, since we've verified we're OK wrt sandboxing rules, we can + // analyze the path component-by-component working out the canonicalized path + // in the process. + _ = openatWasi(fd, pathname, 0x0, 0x0, 0x0, 0x0) catch |err| switch (err) { + error.FileLocksNotSupported => unreachable, + else => |e| return e, + }; + + return realpathatWasiUnchecked(fd, pathname, out_buffer); +} + +fn realpathatWasiUnchecked(fd: fd_t, pathname: []const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { + var result_index: usize = 0; + var path_it = mem.tokenize(pathname, "/"); + while (path_it.next()) |component| { + if (mem.eql(u8, component, ".")) { + continue; + } else if (mem.eql(u8, component, "..")) { + while (true) { + if (result_index == 0) + break; + result_index -= 1; + if (out_buffer[result_index] == '/') + break; + } + } else { + if (result_index >= MAX_PATH_BYTES) return error.NameTooLong; + if (result_index > 0) { + out_buffer[result_index] = '/'; + result_index += 1; + } + + mem.copy(u8, out_buffer[result_index..], component); + const tmp_result_index = result_index + component.len; + + const stat = try fstatatWasi(fd, out_buffer[0..tmp_result_index], 0x0); + if (stat.filetype == wasi.FILETYPE_SYMBOLIC_LINK) { + var buf: [MAX_PATH_BYTES]u8 = undefined; + const target = try readlinkatWasi(fd, out_buffer[0..tmp_result_index], buf[0..]); + mem.copy(u8, out_buffer[result_index..], target); + + // OK, we've dereferenced the symlink, now, call `realpathatWasi` recursively + // to work out the target's canonical form. + const target_canon = try realpathatWasiUnchecked(fd, out_buffer[0 .. result_index + target.len], &buf); + mem.copy(u8, out_buffer[0..], target_canon); + + result_index = target_canon.len; + } else { + result_index = tmp_result_index; + } + } + } + + if (result_index == 0) { + out_buffer[result_index] = '.'; + result_index += 1; + } - // Trust that Windows gives us valid UTF-16LE. - const end_index = std.unicode.utf16leToUtf8(out_buffer, wide_slice[start_index..]) catch unreachable; - return out_buffer[0..end_index]; + return out_buffer[0..result_index]; } /// Spurious wakeups are possible and no precision of timing is guaranteed. diff --git a/lib/std/os/test.zig b/lib/std/os/test.zig index e508f5ae20f3..1b1944a2f66f 100644 --- a/lib/std/os/test.zig +++ b/lib/std/os/test.zig @@ -16,6 +16,7 @@ const builtin = @import("builtin"); const AtomicRmwOp = builtin.AtomicRmwOp; const AtomicOrder = builtin.AtomicOrder; const tmpDir = std.testing.tmpDir; +const TmpDir = std.testing.TmpDir; const Dir = std.fs.Dir; test "fstatat" { @@ -40,6 +41,67 @@ test "fstatat" { expectEqual(stat, statat); } +test "realpathat" { + var tmp = tmpDir(.{}); + defer tmp.cleanup(); + + // create some dirs + try tmp.dir.makePath("a/b"); + + try testRealpathat(tmp, "a", "a"); + try testRealpathat(tmp, "a/b/..", "a"); + try testRealpathat(tmp, "a/b", "a/b"); + + if (builtin.os.tag == .wasi) { + try testRealpathat(tmp, "a/..", "."); + } else { + try testRealpathat(tmp, "a/..", ""); + } + + var buf: [fs.MAX_PATH_BYTES]u8 = undefined; + testing.expectError(error.FileNotFound, os.realpathat(tmp.dir.fd, "definitely_bogus_does_not_exist1234", &buf)); +} + +fn testRealpathat(tmp: TmpDir, pathname: []const u8, expected: []const u8) !void { + var buf: [fs.MAX_PATH_BYTES]u8 = undefined; + const given = try os.realpathat(tmp.dir.fd, pathname, &buf); + + if (builtin.os.tag == .wasi) { + testing.expect(mem.eql(u8, given, expected)); + } else { + const absolute_path = blk: { + const relative_path = try fs.path.join(testing.allocator, &[_][]const u8{ "zig-cache", "tmp", tmp.sub_path[0..], expected }); + defer testing.allocator.free(relative_path); + break :blk try fs.realpathAlloc(testing.allocator, relative_path); + }; + defer testing.allocator.free(absolute_path); + testing.expect(mem.eql(u8, given, absolute_path)); + } +} + +test "realpathat with symlinks" { + // TODO enable when `symlinkat` is implemented on Windows + if (builtin.os.tag == .windows) return error.SkipZigTest; + + var tmp = tmpDir(.{}); + defer tmp.cleanup(); + + // create some dirs + try tmp.dir.makePath("a/b"); + + // create file in "a/b" + try tmp.dir.writeFile("a/b/file.txt", "nonsense"); + + // create a symlink "a/c" -> "a/b" + var subdir = try tmp.dir.openDir("a", .{}); + defer subdir.close(); + + try os.symlinkat("b", subdir.fd, "c"); + + // get realpath of "a/c/file.txt" + try testRealpathat(tmp, "a/c/file.txt", "a/b/file.txt"); +} + test "readlinkat" { // enable when `readlinkat` and `symlinkat` are implemented on Windows if (builtin.os.tag == .windows) return error.SkipZigTest;