From eb04d5203b804a27701589d616f5450bf6866290 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Sun, 8 Dec 2024 20:43:15 +1100 Subject: [PATCH 1/4] error: map SafetyViolation to EXDEV It would be nice if there was an EUNSAFE we could use, but alas there is no such errno, so we will have to reuse the same errno that openat2() uses when a walk ends up outside of the rootfs. Signed-off-by: Aleksa Sarai --- src/capi/error.rs | 25 ++++++++++++++++++++++--- src/error.rs | 7 ++++++- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/src/capi/error.rs b/src/capi/error.rs index 2895aacf..0cb6df4b 100644 --- a/src/capi/error.rs +++ b/src/capi/error.rs @@ -256,21 +256,40 @@ mod tests { } #[test] - fn cerror_no_errno() { + fn cerror_exdev_errno() { let err = Error::from(ErrorImpl::SafetyViolation { description: "fake safety violation".into(), }); + assert_eq!( + err.kind().errno(), + Some(libc::EXDEV), + "SafetyViolation kind().errno() should return the right error" + ); + + let cerr = CError::from(&err); + assert_eq!( + cerr.saved_errno, + libc::EXDEV as u64, + "cerror should contain EXDEV errno for SafetyViolation" + ); + } + + #[test] + fn cerror_no_errno() { + let parse_err = "a123".parse::().unwrap_err(); + let err = Error::from(parse_err); + assert_eq!( err.kind().errno(), None, - "SafetyViolation kind().errno() should return the no errno" + "ParseIntError kind().errno() should return no errno" ); let cerr = CError::from(&err); assert_eq!( cerr.saved_errno, 0, - "cerror should contain zero errno for SafetyViolation" + "cerror should contain zero errno for ParseIntError" ); } } diff --git a/src/error.rs b/src/error.rs index f4096a56..819e94a6 100644 --- a/src/error.rs +++ b/src/error.rs @@ -163,8 +163,8 @@ impl ErrorKind { match self { ErrorKind::NotImplemented => Some(libc::ENOSYS), ErrorKind::InvalidArgument => Some(libc::EINVAL), + ErrorKind::SafetyViolation => Some(libc::EXDEV), ErrorKind::OsError(errno) => *errno, - // TODO: Should we remap SafetyViolation? _ => None, } } @@ -232,6 +232,11 @@ mod tests { Some(libc::ENOSYS), "ErrorKind::NotImplemented is equivalent to ENOSYS" ); + assert_eq!( + ErrorKind::SafetyViolation.errno(), + Some(libc::EXDEV), + "ErrorKind::SafetyViolation is equivalent to EXDEV" + ); assert_eq!( ErrorKind::OsError(Some(libc::ENOANO)).errno(), Some(libc::ENOANO), From 719fecfecdfde7efb832300c29c240ca5810916f Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Mon, 9 Dec 2024 00:31:14 +1100 Subject: [PATCH 2/4] Root::mkdir_all: switch mkdir loop to use basic openat There is no reason to use the resolver to look a path component under a directory we have a handle to already. The O_NOFOLLOW behaviour was also somewhat hidden behind the way the resolver API works. Signed-off-by: Aleksa Sarai --- src/root.rs | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/root.rs b/src/root.rs index b60a22cd..5f1704cc 100644 --- a/src/root.rs +++ b/src/root.rs @@ -995,6 +995,12 @@ impl RootRef<'_> { // For the remaining components, create a each component one-by-one. for part in remaining_parts { + if part.as_bytes().contains(&b'/') { + Err(ErrorImpl::SafetyViolation { + description: "remaining component for mkdir contains '/'".into(), + })?; + } + // NOTE: mkdirat(2) does not follow trailing symlinks (even if it is // a dangling symlink with only a trailing component missing), so we // can safely create the final component without worrying about @@ -1009,11 +1015,16 @@ impl RootRef<'_> { // Get a handle to the directory we just created. Unfortunately we // can't do an atomic create+open (a-la O_CREAT) with mkdirat(), so // a separate O_NOFOLLOW is the best we can do. - let next = self - .resolver - .resolve(¤t, &part, true) - .and_then(|handle| handle.reopen(OpenFlags::O_DIRECTORY)) - .wrap("failed to open newly-created directory with O_DIRECTORY")?; + let next = syscalls::openat( + ¤t, + &part, + OpenFlags::O_NOFOLLOW | OpenFlags::O_DIRECTORY, + 0, + ) + .map_err(|err| ErrorImpl::RawOsError { + operation: "open newly created directory".into(), + source: err, + })?; // Unfortunately, we cannot create a directory and open it // atomically (a-la O_CREAT). This means an attacker could swap our @@ -1035,7 +1046,7 @@ impl RootRef<'_> { // doesn't seem to provide any practical benefit. // Keep walking. - current = next; + current = next.into(); } Ok(Handle::from_fd(current)) From 012f871b0e9e8f21eb641bb79d8666944463244a Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Mon, 9 Dec 2024 00:10:00 +1100 Subject: [PATCH 3/4] Root: do not error out with EEXIST for racing Root::mkdir_all()s If two programs are doing Root::mkdir_all, the previous logic would return an error if a directory already existed once we got into the "mkdir" portion of the creation. Since we already have to accept that an attacker can swap the inode with a different directory, returning -EEXIST from mkdirat(2) just causes spurious errors. All we care about is that we open a directory. Signed-off-by: Aleksa Sarai --- CHANGELOG.md | 5 +++++ src/root.rs | 20 ++++++++++++++------ src/syscalls.rs | 4 ++-- src/tests/test_root_ops.rs | 20 ++++++++++---------- 4 files changed, 31 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c93e187d..da7d3234 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,6 +56,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/). it would return `-ELOOP` in most cases and in other cases it would return unexpected results because the `O_NOFOLLOW` would have an effect on the magic-link used internally by `Handle::reopen`. +- `Root::mkdir_all` will no longer return `-EEXIST` if another process tried to + do `Root::mkdir_all` at the same time, instead the race winner's directory + will be used by both processes. See [opencontainers/runc#4543][] for more + details. ### Changed ### - syscalls: switch to rustix for most of our syscall wrappers to simplify how @@ -68,6 +72,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). [rustix#1186]: https://github.com/bytecodealliance/rustix/issues/1186 [rustix#1187]: https://github.com/bytecodealliance/rustix/issues/1187 +[opencontainers/runc#4543]: https://github.com/opencontainers/runc/issues/4543 ## [0.1.3] - 2024-10-10 ## diff --git a/src/root.rs b/src/root.rs index 5f1704cc..322af159 100644 --- a/src/root.rs +++ b/src/root.rs @@ -39,7 +39,10 @@ use std::{ path::{Path, PathBuf}, }; -use rustix::fs::{self as rustix_fs, AtFlags}; +use rustix::{ + fs::{self as rustix_fs, AtFlags}, + io::Errno, +}; /// An inode type to be created with [`Root::create`]. #[derive(Clone, Debug)] @@ -1005,12 +1008,17 @@ impl RootRef<'_> { // a dangling symlink with only a trailing component missing), so we // can safely create the final component without worrying about // symlink-exchange attacks. - syscalls::mkdirat(¤t, &part, perm.mode()).map_err(|err| { - ErrorImpl::RawOsError { - operation: "create next directory component".into(), - source: err, + if let Err(err) = syscalls::mkdirat(¤t, &part, perm.mode()) { + // If we got EEXIST then it's possible a racing Root::mkdir_all + // created the directory before us. We can safely continue + // because the following openat() will + if err.errno() != Errno::EXIST { + Err(ErrorImpl::RawOsError { + operation: "create next directory component".into(), + source: err, + })?; } - })?; + } // Get a handle to the directory we just created. Unfortunately we // can't do an atomic create+open (a-la O_CREAT) with mkdirat(), so diff --git a/src/syscalls.rs b/src/syscalls.rs index 9d83c6d6..b305d652 100644 --- a/src/syscalls.rs +++ b/src/syscalls.rs @@ -254,9 +254,9 @@ pub(crate) enum Error { } impl Error { - fn errno(&self) -> &Errno { + pub(crate) fn errno(&self) -> Errno { // XXX: This should probably be a macro... - match self { + *match self { Error::InvalidFd { source, .. } => source, Error::Openat { source, .. } => source, Error::Openat2 { source, .. } => source, diff --git a/src/tests/test_root_ops.rs b/src/tests/test_root_ops.rs index 470a87ac..e90ad194 100644 --- a/src/tests/test_root_ops.rs +++ b/src/tests/test_root_ops.rs @@ -401,14 +401,14 @@ root_op_tests! { nondir_symlink_dotdot: mkdir_all("b-file/../d", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); nondir_symlink_subdir: mkdir_all("b-file/subdir", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); // Dangling symlinks are not followed. - dangling1_trailing: mkdir_all("a-fake1", 0o711) => Err(ErrorKind::OsError(Some(libc::EEXIST))); - dangling1_basic: mkdir_all("a-fake1/foo", 0o711) => Err(ErrorKind::OsError(Some(libc::EEXIST))); + dangling1_trailing: mkdir_all("a-fake1", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); + dangling1_basic: mkdir_all("a-fake1/foo", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); dangling1_dotdot: mkdir_all("a-fake1/../bar/baz", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOENT))); - dangling2_trailing: mkdir_all("a-fake2", 0o711) => Err(ErrorKind::OsError(Some(libc::EEXIST))); - dangling2_basic: mkdir_all("a-fake2/foo", 0o711) => Err(ErrorKind::OsError(Some(libc::EEXIST))); + dangling2_trailing: mkdir_all("a-fake2", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); + dangling2_basic: mkdir_all("a-fake2/foo", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); dangling2_dotdot: mkdir_all("a-fake2/../bar/baz", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOENT))); - dangling3_trailing: mkdir_all("a-fake3", 0o711) => Err(ErrorKind::OsError(Some(libc::EEXIST))); - dangling3_basic: mkdir_all("a-fake3/foo", 0o711) => Err(ErrorKind::OsError(Some(libc::EEXIST))); + dangling3_trailing: mkdir_all("a-fake3", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); + dangling3_basic: mkdir_all("a-fake3/foo", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); dangling3_dotdot: mkdir_all("a-fake3/../bar/baz", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOENT))); // Non-lexical symlinks should work. nonlexical_basic: mkdir_all("target/foo", 0o711) => Ok(()); @@ -423,11 +423,11 @@ root_op_tests! { nonlexical_level3_abs: mkdir_all("link3/target_abs/foo", 0o711) => Ok(()); nonlexical_level3_rel: mkdir_all("link3/target_rel/foo", 0o711) => Ok(()); // But really tricky dangling symlinks should fail. - dangling_tricky1_trailing: mkdir_all("link3/deep_dangling1", 0o711) => Err(ErrorKind::OsError(Some(libc::EEXIST))); - dangling_tricky1_basic: mkdir_all("link3/deep_dangling1/foo", 0o711) => Err(ErrorKind::OsError(Some(libc::EEXIST))); + dangling_tricky1_trailing: mkdir_all("link3/deep_dangling1", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); + dangling_tricky1_basic: mkdir_all("link3/deep_dangling1/foo", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); dangling_tricky1_dotdot: mkdir_all("link3/deep_dangling1/../bar", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOENT))); - dangling_tricky2_trailing: mkdir_all("link3/deep_dangling2", 0o711) => Err(ErrorKind::OsError(Some(libc::EEXIST))); - dangling_tricky2_basic: mkdir_all("link3/deep_dangling2/foo", 0o711) => Err(ErrorKind::OsError(Some(libc::EEXIST))); + dangling_tricky2_trailing: mkdir_all("link3/deep_dangling2", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); + dangling_tricky2_basic: mkdir_all("link3/deep_dangling2/foo", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); dangling_tricky2_dotdot: mkdir_all("link3/deep_dangling2/../bar", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOENT))); // And trying to mkdir inside a loop should fail. loop_trailing: mkdir_all("loop/link", 0o711) => Err(ErrorKind::OsError(Some(libc::ELOOP))); From 69bb306e8ad6fe229f80c8765d771fe7d2a4d556 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Mon, 9 Dec 2024 00:14:44 +1100 Subject: [PATCH 4/4] tests: ensure Root::mkdir_all races don't return EEXIST Signed-off-by: Aleksa Sarai --- src/tests/test_root_ops.rs | 85 +++++++++++++++++++++++++++++++++++++- 1 file changed, 83 insertions(+), 2 deletions(-) diff --git a/src/tests/test_root_ops.rs b/src/tests/test_root_ops.rs index e90ad194..c63edffe 100644 --- a/src/tests/test_root_ops.rs +++ b/src/tests/test_root_ops.rs @@ -259,15 +259,58 @@ macro_rules! root_op_tests { } }; + + ($(#[cfg($ignore_meta:meta)])* @impl mkdir_all_racing [#$num_threads:expr] $test_name:ident ($path:expr, $mode:expr) => $expected_result:expr) => { + paste::paste! { + root_op_tests! { + $(#[cfg_attr(not($ignore_meta), ignore)])* + fn [<$test_name _ $num_threads threads>](root) { + utils::check_root_mkdir_all_racing($num_threads, &root, $path, Permissions::from_mode($mode), $expected_result) + } + } + } + }; + + ($(#[cfg($ignore_meta:meta)])* @impl mkdir_all_racing $test_name:ident ( $($args:tt)* ) => $expected_result:expr) => { + root_op_tests! { + $(#[cfg_attr(not($ignore_meta), ignore)])* + @impl mkdir_all_racing [#2] $test_name( $($args)* ) => $expected_result + } + root_op_tests! { + $(#[cfg_attr(not($ignore_meta), ignore)])* + @impl mkdir_all_racing [#4] $test_name( $($args)* ) => $expected_result + } + root_op_tests! { + $(#[cfg_attr(not($ignore_meta), ignore)])* + @impl mkdir_all_racing [#8] $test_name( $($args)* ) => $expected_result + } + root_op_tests! { + $(#[cfg_attr(not($ignore_meta), ignore)])* + @impl mkdir_all_racing [#16] $test_name( $($args)* ) => $expected_result + } + root_op_tests! { + $(#[cfg_attr(not($ignore_meta), ignore)])* + @impl mkdir_all_racing [#32] $test_name( $($args)* ) => $expected_result + } + root_op_tests! { + $(#[cfg_attr(not($ignore_meta), ignore)])* + @impl mkdir_all_racing [#64] $test_name( $($args)* ) => $expected_result + } + root_op_tests! { + $(#[cfg_attr(not($ignore_meta), ignore)])* + @impl mkdir_all_racing [#128] $test_name( $($args)* ) => $expected_result + } + }; + // root_tests!{ // ... // } - ($($(#[cfg($ignore_meta:meta)])* $test_name:ident: $file_type:ident ( $($args:expr),* ) => $expected_result:expr );+ $(;)?) => { + ($($(#[cfg($ignore_meta:meta)])* $test_name:ident: $file_type:ident ( $($args:tt)* ) => $expected_result:expr );+ $(;)?) => { paste::paste! { $( root_op_tests!{ $(#[cfg($ignore_meta)])* - @impl $file_type [<$file_type _ $test_name>]( $($args),* ) => $expected_result + @impl $file_type [<$file_type _ $test_name>]( $($args)* ) => $expected_result } )* } @@ -437,6 +480,10 @@ root_op_tests! { setgid_selfdir: mkdir_all("setgid-self/a/b/c/d", 0o711) => Ok(()); #[cfg(feature = "_test_as_root")] setgid_otherdir: mkdir_all("setgid-other/a/b/c/d", 0o711) => Ok(()); + + // Check that multiple mkdir_alls racing against each other will not result + // in a spurious error. + plain: mkdir_all_racing("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z", 0o711) => Ok(()); } mod utils { @@ -460,6 +507,8 @@ mod utils { io::{AsFd, OwnedFd}, }, path::{Path, PathBuf}, + sync::{Arc, Barrier}, + thread, }; use anyhow::{Context, Error}; @@ -920,4 +969,36 @@ mod utils { Ok(()) } + + pub(super) fn check_root_mkdir_all_racing>( + num_threads: usize, + root: R, + unsafe_path: P, + perm: Permissions, + expected_result: Result<(), ErrorKind>, + ) -> Result<(), Error> { + let root = &root; + let unsafe_path = unsafe_path.as_ref(); + + // Do lots of runs to try to catch any possible races. + let num_retries = 100 + 1_000 / (1 + (num_threads >> 5)); + for _ in 0..num_retries { + thread::scope(|s| { + let start_barrier = Arc::new(Barrier::new(num_threads)); + for _ in 0..num_threads { + let barrier = Arc::clone(&start_barrier); + let perm = perm.clone(); + s.spawn(move || { + barrier.wait(); + let res = root + .mkdir_all(unsafe_path, &perm) + .with_wrap(|| format!("mkdir_all {unsafe_path:?}")); + tests_common::check_err(&res, &expected_result).expect("unexpected result"); + }); + } + }); + } + + Ok(()) + } }