diff --git a/CHANGELOG.md b/CHANGELOG.md index c93e187d..da7d3234 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,6 +56,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/). it would return `-ELOOP` in most cases and in other cases it would return unexpected results because the `O_NOFOLLOW` would have an effect on the magic-link used internally by `Handle::reopen`. +- `Root::mkdir_all` will no longer return `-EEXIST` if another process tried to + do `Root::mkdir_all` at the same time, instead the race winner's directory + will be used by both processes. See [opencontainers/runc#4543][] for more + details. ### Changed ### - syscalls: switch to rustix for most of our syscall wrappers to simplify how @@ -68,6 +72,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). [rustix#1186]: https://github.com/bytecodealliance/rustix/issues/1186 [rustix#1187]: https://github.com/bytecodealliance/rustix/issues/1187 +[opencontainers/runc#4543]: https://github.com/opencontainers/runc/issues/4543 ## [0.1.3] - 2024-10-10 ## diff --git a/src/capi/error.rs b/src/capi/error.rs index 2895aacf..0cb6df4b 100644 --- a/src/capi/error.rs +++ b/src/capi/error.rs @@ -256,21 +256,40 @@ mod tests { } #[test] - fn cerror_no_errno() { + fn cerror_exdev_errno() { let err = Error::from(ErrorImpl::SafetyViolation { description: "fake safety violation".into(), }); + assert_eq!( + err.kind().errno(), + Some(libc::EXDEV), + "SafetyViolation kind().errno() should return the right error" + ); + + let cerr = CError::from(&err); + assert_eq!( + cerr.saved_errno, + libc::EXDEV as u64, + "cerror should contain EXDEV errno for SafetyViolation" + ); + } + + #[test] + fn cerror_no_errno() { + let parse_err = "a123".parse::().unwrap_err(); + let err = Error::from(parse_err); + assert_eq!( err.kind().errno(), None, - "SafetyViolation kind().errno() should return the no errno" + "ParseIntError kind().errno() should return no errno" ); let cerr = CError::from(&err); assert_eq!( cerr.saved_errno, 0, - "cerror should contain zero errno for SafetyViolation" + "cerror should contain zero errno for ParseIntError" ); } } diff --git a/src/error.rs b/src/error.rs index f4096a56..819e94a6 100644 --- a/src/error.rs +++ b/src/error.rs @@ -163,8 +163,8 @@ impl ErrorKind { match self { ErrorKind::NotImplemented => Some(libc::ENOSYS), ErrorKind::InvalidArgument => Some(libc::EINVAL), + ErrorKind::SafetyViolation => Some(libc::EXDEV), ErrorKind::OsError(errno) => *errno, - // TODO: Should we remap SafetyViolation? _ => None, } } @@ -232,6 +232,11 @@ mod tests { Some(libc::ENOSYS), "ErrorKind::NotImplemented is equivalent to ENOSYS" ); + assert_eq!( + ErrorKind::SafetyViolation.errno(), + Some(libc::EXDEV), + "ErrorKind::SafetyViolation is equivalent to EXDEV" + ); assert_eq!( ErrorKind::OsError(Some(libc::ENOANO)).errno(), Some(libc::ENOANO), diff --git a/src/root.rs b/src/root.rs index b60a22cd..322af159 100644 --- a/src/root.rs +++ b/src/root.rs @@ -39,7 +39,10 @@ use std::{ path::{Path, PathBuf}, }; -use rustix::fs::{self as rustix_fs, AtFlags}; +use rustix::{ + fs::{self as rustix_fs, AtFlags}, + io::Errno, +}; /// An inode type to be created with [`Root::create`]. #[derive(Clone, Debug)] @@ -995,25 +998,41 @@ impl RootRef<'_> { // For the remaining components, create a each component one-by-one. for part in remaining_parts { + if part.as_bytes().contains(&b'/') { + Err(ErrorImpl::SafetyViolation { + description: "remaining component for mkdir contains '/'".into(), + })?; + } + // NOTE: mkdirat(2) does not follow trailing symlinks (even if it is // a dangling symlink with only a trailing component missing), so we // can safely create the final component without worrying about // symlink-exchange attacks. - syscalls::mkdirat(¤t, &part, perm.mode()).map_err(|err| { - ErrorImpl::RawOsError { - operation: "create next directory component".into(), - source: err, + if let Err(err) = syscalls::mkdirat(¤t, &part, perm.mode()) { + // If we got EEXIST then it's possible a racing Root::mkdir_all + // created the directory before us. We can safely continue + // because the following openat() will + if err.errno() != Errno::EXIST { + Err(ErrorImpl::RawOsError { + operation: "create next directory component".into(), + source: err, + })?; } - })?; + } // Get a handle to the directory we just created. Unfortunately we // can't do an atomic create+open (a-la O_CREAT) with mkdirat(), so // a separate O_NOFOLLOW is the best we can do. - let next = self - .resolver - .resolve(¤t, &part, true) - .and_then(|handle| handle.reopen(OpenFlags::O_DIRECTORY)) - .wrap("failed to open newly-created directory with O_DIRECTORY")?; + let next = syscalls::openat( + ¤t, + &part, + OpenFlags::O_NOFOLLOW | OpenFlags::O_DIRECTORY, + 0, + ) + .map_err(|err| ErrorImpl::RawOsError { + operation: "open newly created directory".into(), + source: err, + })?; // Unfortunately, we cannot create a directory and open it // atomically (a-la O_CREAT). This means an attacker could swap our @@ -1035,7 +1054,7 @@ impl RootRef<'_> { // doesn't seem to provide any practical benefit. // Keep walking. - current = next; + current = next.into(); } Ok(Handle::from_fd(current)) diff --git a/src/syscalls.rs b/src/syscalls.rs index 9d83c6d6..b305d652 100644 --- a/src/syscalls.rs +++ b/src/syscalls.rs @@ -254,9 +254,9 @@ pub(crate) enum Error { } impl Error { - fn errno(&self) -> &Errno { + pub(crate) fn errno(&self) -> Errno { // XXX: This should probably be a macro... - match self { + *match self { Error::InvalidFd { source, .. } => source, Error::Openat { source, .. } => source, Error::Openat2 { source, .. } => source, diff --git a/src/tests/test_root_ops.rs b/src/tests/test_root_ops.rs index 470a87ac..c63edffe 100644 --- a/src/tests/test_root_ops.rs +++ b/src/tests/test_root_ops.rs @@ -259,15 +259,58 @@ macro_rules! root_op_tests { } }; + + ($(#[cfg($ignore_meta:meta)])* @impl mkdir_all_racing [#$num_threads:expr] $test_name:ident ($path:expr, $mode:expr) => $expected_result:expr) => { + paste::paste! { + root_op_tests! { + $(#[cfg_attr(not($ignore_meta), ignore)])* + fn [<$test_name _ $num_threads threads>](root) { + utils::check_root_mkdir_all_racing($num_threads, &root, $path, Permissions::from_mode($mode), $expected_result) + } + } + } + }; + + ($(#[cfg($ignore_meta:meta)])* @impl mkdir_all_racing $test_name:ident ( $($args:tt)* ) => $expected_result:expr) => { + root_op_tests! { + $(#[cfg_attr(not($ignore_meta), ignore)])* + @impl mkdir_all_racing [#2] $test_name( $($args)* ) => $expected_result + } + root_op_tests! { + $(#[cfg_attr(not($ignore_meta), ignore)])* + @impl mkdir_all_racing [#4] $test_name( $($args)* ) => $expected_result + } + root_op_tests! { + $(#[cfg_attr(not($ignore_meta), ignore)])* + @impl mkdir_all_racing [#8] $test_name( $($args)* ) => $expected_result + } + root_op_tests! { + $(#[cfg_attr(not($ignore_meta), ignore)])* + @impl mkdir_all_racing [#16] $test_name( $($args)* ) => $expected_result + } + root_op_tests! { + $(#[cfg_attr(not($ignore_meta), ignore)])* + @impl mkdir_all_racing [#32] $test_name( $($args)* ) => $expected_result + } + root_op_tests! { + $(#[cfg_attr(not($ignore_meta), ignore)])* + @impl mkdir_all_racing [#64] $test_name( $($args)* ) => $expected_result + } + root_op_tests! { + $(#[cfg_attr(not($ignore_meta), ignore)])* + @impl mkdir_all_racing [#128] $test_name( $($args)* ) => $expected_result + } + }; + // root_tests!{ // ... // } - ($($(#[cfg($ignore_meta:meta)])* $test_name:ident: $file_type:ident ( $($args:expr),* ) => $expected_result:expr );+ $(;)?) => { + ($($(#[cfg($ignore_meta:meta)])* $test_name:ident: $file_type:ident ( $($args:tt)* ) => $expected_result:expr );+ $(;)?) => { paste::paste! { $( root_op_tests!{ $(#[cfg($ignore_meta)])* - @impl $file_type [<$file_type _ $test_name>]( $($args),* ) => $expected_result + @impl $file_type [<$file_type _ $test_name>]( $($args)* ) => $expected_result } )* } @@ -401,14 +444,14 @@ root_op_tests! { nondir_symlink_dotdot: mkdir_all("b-file/../d", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); nondir_symlink_subdir: mkdir_all("b-file/subdir", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); // Dangling symlinks are not followed. - dangling1_trailing: mkdir_all("a-fake1", 0o711) => Err(ErrorKind::OsError(Some(libc::EEXIST))); - dangling1_basic: mkdir_all("a-fake1/foo", 0o711) => Err(ErrorKind::OsError(Some(libc::EEXIST))); + dangling1_trailing: mkdir_all("a-fake1", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); + dangling1_basic: mkdir_all("a-fake1/foo", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); dangling1_dotdot: mkdir_all("a-fake1/../bar/baz", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOENT))); - dangling2_trailing: mkdir_all("a-fake2", 0o711) => Err(ErrorKind::OsError(Some(libc::EEXIST))); - dangling2_basic: mkdir_all("a-fake2/foo", 0o711) => Err(ErrorKind::OsError(Some(libc::EEXIST))); + dangling2_trailing: mkdir_all("a-fake2", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); + dangling2_basic: mkdir_all("a-fake2/foo", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); dangling2_dotdot: mkdir_all("a-fake2/../bar/baz", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOENT))); - dangling3_trailing: mkdir_all("a-fake3", 0o711) => Err(ErrorKind::OsError(Some(libc::EEXIST))); - dangling3_basic: mkdir_all("a-fake3/foo", 0o711) => Err(ErrorKind::OsError(Some(libc::EEXIST))); + dangling3_trailing: mkdir_all("a-fake3", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); + dangling3_basic: mkdir_all("a-fake3/foo", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); dangling3_dotdot: mkdir_all("a-fake3/../bar/baz", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOENT))); // Non-lexical symlinks should work. nonlexical_basic: mkdir_all("target/foo", 0o711) => Ok(()); @@ -423,11 +466,11 @@ root_op_tests! { nonlexical_level3_abs: mkdir_all("link3/target_abs/foo", 0o711) => Ok(()); nonlexical_level3_rel: mkdir_all("link3/target_rel/foo", 0o711) => Ok(()); // But really tricky dangling symlinks should fail. - dangling_tricky1_trailing: mkdir_all("link3/deep_dangling1", 0o711) => Err(ErrorKind::OsError(Some(libc::EEXIST))); - dangling_tricky1_basic: mkdir_all("link3/deep_dangling1/foo", 0o711) => Err(ErrorKind::OsError(Some(libc::EEXIST))); + dangling_tricky1_trailing: mkdir_all("link3/deep_dangling1", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); + dangling_tricky1_basic: mkdir_all("link3/deep_dangling1/foo", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); dangling_tricky1_dotdot: mkdir_all("link3/deep_dangling1/../bar", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOENT))); - dangling_tricky2_trailing: mkdir_all("link3/deep_dangling2", 0o711) => Err(ErrorKind::OsError(Some(libc::EEXIST))); - dangling_tricky2_basic: mkdir_all("link3/deep_dangling2/foo", 0o711) => Err(ErrorKind::OsError(Some(libc::EEXIST))); + dangling_tricky2_trailing: mkdir_all("link3/deep_dangling2", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); + dangling_tricky2_basic: mkdir_all("link3/deep_dangling2/foo", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOTDIR))); dangling_tricky2_dotdot: mkdir_all("link3/deep_dangling2/../bar", 0o711) => Err(ErrorKind::OsError(Some(libc::ENOENT))); // And trying to mkdir inside a loop should fail. loop_trailing: mkdir_all("loop/link", 0o711) => Err(ErrorKind::OsError(Some(libc::ELOOP))); @@ -437,6 +480,10 @@ root_op_tests! { setgid_selfdir: mkdir_all("setgid-self/a/b/c/d", 0o711) => Ok(()); #[cfg(feature = "_test_as_root")] setgid_otherdir: mkdir_all("setgid-other/a/b/c/d", 0o711) => Ok(()); + + // Check that multiple mkdir_alls racing against each other will not result + // in a spurious error. + plain: mkdir_all_racing("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z", 0o711) => Ok(()); } mod utils { @@ -460,6 +507,8 @@ mod utils { io::{AsFd, OwnedFd}, }, path::{Path, PathBuf}, + sync::{Arc, Barrier}, + thread, }; use anyhow::{Context, Error}; @@ -920,4 +969,36 @@ mod utils { Ok(()) } + + pub(super) fn check_root_mkdir_all_racing>( + num_threads: usize, + root: R, + unsafe_path: P, + perm: Permissions, + expected_result: Result<(), ErrorKind>, + ) -> Result<(), Error> { + let root = &root; + let unsafe_path = unsafe_path.as_ref(); + + // Do lots of runs to try to catch any possible races. + let num_retries = 100 + 1_000 / (1 + (num_threads >> 5)); + for _ in 0..num_retries { + thread::scope(|s| { + let start_barrier = Arc::new(Barrier::new(num_threads)); + for _ in 0..num_threads { + let barrier = Arc::clone(&start_barrier); + let perm = perm.clone(); + s.spawn(move || { + barrier.wait(); + let res = root + .mkdir_all(unsafe_path, &perm) + .with_wrap(|| format!("mkdir_all {unsafe_path:?}")); + tests_common::check_err(&res, &expected_result).expect("unexpected result"); + }); + } + }); + } + + Ok(()) + } }