From 3d777f99bbe0be6825a82e021137226203b50d46 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 8 Aug 2023 17:39:44 -0700 Subject: [PATCH 1/2] tests/int: fix teardown in mounts_sshfs.bats Function teardown assumes that every test case will call setup_sshfs. Currently, this assumption is true, but once a test case that won't call setup_sshfs is added (say because it has some extra "requires" or "skip"), it will break bats, so any bats invocation involving such a test case will end up hanging after the last test case is run. The reason is, we have set -u in helpers.bash to help catching the use of undefined variables. In the above scenario, such a variable is DIR, which is referenced in teardown but is only defined after a call to setup_sshfs. As a result, bash that is running the teardown function will exit upon seeing the first $DIR, and thus teardown_bundle won't be run. This, in turn, results in a stale recvtty process, which inherits bats' fd 3. Until that fd is closed, bats waits for test logs. Long story short, the fix is to - check if DIR is set before referencing it; - unset it after unmount. PS it is still not clear why there is no diagnostics about the failed teardown. Signed-off-by: Kir Kolyshkin --- tests/integration/mounts_sshfs.bats | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/integration/mounts_sshfs.bats b/tests/integration/mounts_sshfs.bats index 540403e4051..ce5e2b2e7e1 100644 --- a/tests/integration/mounts_sshfs.bats +++ b/tests/integration/mounts_sshfs.bats @@ -8,9 +8,12 @@ function setup() { } function teardown() { - # Some distros do not have fusermount installed - # as a dependency of fuse-sshfs, and good ol' umount works. - fusermount -u "$DIR" || umount "$DIR" + if [ -v DIR ]; then + # Some distros do not have fusermount installed + # as a dependency of fuse-sshfs, and good ol' umount works. + fusermount -u "$DIR" || umount "$DIR" + unset DIR + fi teardown_bundle } From 52f6fb271117a1f24548c45e1bea4f3231539b96 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Sun, 6 Aug 2023 13:21:05 +1000 Subject: [PATCH 2/2] rootfs: replace --no-mount-fallback option with better heuristic The original reasoning for this option was to avoid having mount options be overwritten by runc. However, adding command-line arguments has historically been a bad idea because it forces strict-runc-compatible OCI runtimes to copy out-of-spec features directly from runc and these flags are usually quite difficult to enable by users when using runc through several layers of engines and orchestrators. A far more preferable solution is to have a heuristic which detects whether copying the original mount's mount options would override an explicit mount option specified by the user. In this case, we should return an error. Fixes: da780e4d2754 ("Fix bind mounts of filesystems with certain options set") Signed-off-by: Aleksa Sarai --- contrib/completions/bash/runc | 3 -- create.go | 4 --- libcontainer/configs/config.go | 4 --- libcontainer/configs/mount_linux.go | 4 +++ libcontainer/rootfs_linux.go | 31 +++++++++--------- libcontainer/specconv/spec_linux.go | 4 +-- restore.go | 4 --- run.go | 4 --- tests/integration/mounts_sshfs.bats | 50 +++++++++++++++-------------- utils_linux.go | 1 - 10 files changed, 48 insertions(+), 61 deletions(-) diff --git a/contrib/completions/bash/runc b/contrib/completions/bash/runc index 69a3b953750..1f2da1c045f 100644 --- a/contrib/completions/bash/runc +++ b/contrib/completions/bash/runc @@ -461,7 +461,6 @@ _runc_run() { --no-subreaper --no-pivot --no-new-keyring - --no-mount-fallback " local options_with_args=" @@ -568,7 +567,6 @@ _runc_create() { --help --no-pivot --no-new-keyring - --no-mount-fallback " local options_with_args=" @@ -629,7 +627,6 @@ _runc_restore() { --no-pivot --auto-dedup --lazy-pages - --no-mount-fallback " local options_with_args=" diff --git a/create.go b/create.go index 3788a532fce..97854b846cb 100644 --- a/create.go +++ b/create.go @@ -51,10 +51,6 @@ command(s) that get executed on start, edit the args parameter of the spec. See Name: "preserve-fds", Usage: "Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total)", }, - cli.BoolFlag{ - Name: "no-mount-fallback", - Usage: "Do not fallback when the specific configuration is not applicable (e.g., do not try to remount a bind mount again after the first attempt failed on source filesystems that have nodev, noexec, nosuid, noatime, relatime, strictatime, nodiratime set)", - }, }, Action: func(context *cli.Context) error { if err := checkArgs(context, 1, exactArgs); err != nil { diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go index bb5dbba6588..576db59523e 100644 --- a/libcontainer/configs/config.go +++ b/libcontainer/configs/config.go @@ -212,10 +212,6 @@ type Config struct { // RootlessCgroups is set when unlikely to have the full access to cgroups. // When RootlessCgroups is set, cgroups errors are ignored. RootlessCgroups bool `json:"rootless_cgroups,omitempty"` - - // Do not try to remount a bind mount again after the first attempt failed on source - // filesystems that have nodev, noexec, nosuid, noatime, relatime, strictatime, nodiratime set - NoMountFallback bool `json:"no_mount_fallback,omitempty"` } type ( diff --git a/libcontainer/configs/mount_linux.go b/libcontainer/configs/mount_linux.go index a4275df3a03..04bbf6096d6 100644 --- a/libcontainer/configs/mount_linux.go +++ b/libcontainer/configs/mount_linux.go @@ -15,6 +15,10 @@ type Mount struct { // Mount flags. Flags int `json:"flags"` + // Mount flags that were explicitly cleared in the configuration (meaning + // the user explicitly requested that these flags *not* be set). + ClearedFlags int `json:"cleared_flags"` + // Propagation Flags PropagationFlags []int `json:"propagation_flags"` diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index ac3ba183e12..a2ed2827591 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -35,7 +35,6 @@ type mountConfig struct { cgroup2Path string rootlessCgroups bool cgroupns bool - noMountFallback bool } // mountEntry contains mount data specific to a mount point. @@ -84,7 +83,6 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig, mountFds mountFds) ( cgroup2Path: iConfig.Cgroup2Path, rootlessCgroups: iConfig.RootlessCgroups, cgroupns: config.Namespaces.Contains(configs.NEWCGROUP), - noMountFallback: config.NoMountFallback, } for i, m := range config.Mounts { entry := mountEntry{Mount: m} @@ -514,7 +512,7 @@ func mountToRootfs(c *mountConfig, m mountEntry) error { // first check that we have non-default options required before attempting a remount if m.Flags&^(unix.MS_REC|unix.MS_REMOUNT|unix.MS_BIND) != 0 { // only remount if unique mount options are set - if err := remount(m, rootfs, c.noMountFallback); err != nil { + if err := remount(m, rootfs); err != nil { return err } } @@ -1103,33 +1101,36 @@ func writeSystemProperty(key, value string) error { return os.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0o644) } -func remount(m mountEntry, rootfs string, noMountFallback bool) error { +func remount(m mountEntry, rootfs string) error { + const mntLockedFlags = unix.MS_RDONLY | unix.MS_NODEV | unix.MS_NOEXEC | + unix.MS_NOSUID | unix.MS_NOATIME | unix.MS_RELATIME | + unix.MS_STRICTATIME | unix.MS_NODIRATIME + return utils.WithProcfd(rootfs, m.Destination, func(dstFD string) error { flags := uintptr(m.Flags | unix.MS_REMOUNT) err := mountViaFDs(m.Source, m.srcFD, m.Destination, dstFD, m.Device, flags, "") if err == nil { return nil } - // Check if the source has flags set according to noMountFallback src := m.src() var s unix.Statfs_t if err := unix.Statfs(src, &s); err != nil { return &os.PathError{Op: "statfs", Path: src, Err: err} } - var checkflags int - if noMountFallback { - // Check for ro only - checkflags = unix.MS_RDONLY - } else { - // Check for ro, nodev, noexec, nosuid, noatime, relatime, strictatime, - // nodiratime - checkflags = unix.MS_RDONLY | unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NOATIME | unix.MS_RELATIME | unix.MS_STRICTATIME | unix.MS_NODIRATIME + // Check if the source has any MNT_LOCKED flags set. If so, we need to + // pass the same set of flags when running in a user namespace in order + // for the remount to succeed. + if int(s.Flags)&mntLockedFlags == 0 { + return err } - if int(s.Flags)&checkflags == 0 { + // However, if the user explicitly request the flags *not* be set, we + // need to return an error to avoid producing mounts that don't match + // their requirements. + if int(s.Flags)&m.ClearedFlags&mntLockedFlags != 0 { return err } // ... and retry the mount with flags found above. - flags |= uintptr(int(s.Flags) & checkflags) + flags |= uintptr(int(s.Flags) & mntLockedFlags) return mountViaFDs(m.Source, m.srcFD, m.Destination, dstFD, m.Device, flags, "") }) } diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index d3938da516c..bf33fbfc0a8 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -312,7 +312,6 @@ type CreateOpts struct { Spec *specs.Spec RootlessEUID bool RootlessCgroups bool - NoMountFallback bool } // getwd is a wrapper similar to os.Getwd, except it always gets @@ -359,7 +358,6 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { NoNewKeyring: opts.NoNewKeyring, RootlessEUID: opts.RootlessEUID, RootlessCgroups: opts.RootlessCgroups, - NoMountFallback: opts.NoMountFallback, } for _, m := range spec.Mounts { @@ -981,8 +979,10 @@ func parseMountOptions(options []string) *configs.Mount { if f, exists := mountFlags[o]; exists && f.flag != 0 { if f.clear { m.Flags &= ^f.flag + m.ClearedFlags |= f.flag } else { m.Flags |= f.flag + m.ClearedFlags &= ^f.flag } } else if f, exists := mountPropagationMapping[o]; exists && f != 0 { m.PropagationFlags = append(m.PropagationFlags, f) diff --git a/restore.go b/restore.go index de5b48d54c2..d65afcfc788 100644 --- a/restore.go +++ b/restore.go @@ -98,10 +98,6 @@ using the runc checkpoint command.`, Value: "", Usage: "Specify an LSM mount context to be used during restore.", }, - cli.BoolFlag{ - Name: "no-mount-fallback", - Usage: "Do not fallback when the specific configuration is not applicable (e.g., do not try to remount a bind mount again after the first attempt failed on source filesystems that have nodev, noexec, nosuid, noatime, relatime, strictatime, nodiratime set)", - }, }, Action: func(context *cli.Context) error { if err := checkArgs(context, 1, exactArgs); err != nil { diff --git a/run.go b/run.go index 8b4f4d1fb23..82781669d10 100644 --- a/run.go +++ b/run.go @@ -64,10 +64,6 @@ command(s) that get executed on start, edit the args parameter of the spec. See Name: "preserve-fds", Usage: "Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total)", }, - cli.BoolFlag{ - Name: "no-mount-fallback", - Usage: "Do not fallback when the specific configuration is not applicable (e.g., do not try to remount a bind mount again after the first attempt failed on source filesystems that have nodev, noexec, nosuid, noatime, relatime, strictatime, nodiratime set)", - }, }, Action: func(context *cli.Context) error { if err := checkArgs(context, 1, exactArgs); err != nil { diff --git a/tests/integration/mounts_sshfs.bats b/tests/integration/mounts_sshfs.bats index ce5e2b2e7e1..921bc063277 100644 --- a/tests/integration/mounts_sshfs.bats +++ b/tests/integration/mounts_sshfs.bats @@ -33,33 +33,35 @@ function setup_sshfs() { fi } -@test "runc run [rw bind mount of a ro fuse sshfs mount]" { +@test "runc run [implied-rw bind mount of a ro fuse sshfs mount]" { setup_sshfs "ro" + # All of the extra mount flags are needed to force a remount with new flags. update_config ' .mounts += [{ type: "bind", source: "'"$DIR"'", destination: "/mnt", - options: ["rw", "rprivate", "nosuid", "nodev", "rbind"] + options: ["rprivate", "nosuid", "nodev", "rbind", "sync"] }]' - runc run --no-mount-fallback test_busybox + runc run test_busybox [ "$status" -eq 0 ] } -@test "runc run [dev,exec,suid,atime bind mount of a nodev,nosuid,noexec,noatime fuse sshfs mount]" { - setup_sshfs "nodev,nosuid,noexec,noatime" - # The "sync" option is used to trigger a remount with the below options. - # It serves no further purpose. Otherwise only a bind mount without - # applying the below options will be done. +@test "runc run [explicit-rw bind mount of a ro fuse sshfs mount]" { + setup_sshfs "ro" update_config ' .mounts += [{ type: "bind", source: "'"$DIR"'", destination: "/mnt", - options: ["dev", "suid", "exec", "atime", "rprivate", "rbind", "sync"] + options: ["rw", "rprivate", "nosuid", "nodev", "rbind"] }]' runc run test_busybox - [ "$status" -eq 0 ] + # The above will fail because we explicitly requested a mount with a + # MNT_LOCKED mount option cleared (when the source mount has those mounts + # enabled), namely MS_RDONLY. + [ "$status" -ne 0 ] + [[ "$output" == *"runc run failed: unable to start container process: error during container init: error mounting"*"operation not permitted"* ]] } @test "runc run [ro bind mount of a nodev,nosuid,noexec,noatime fuse sshfs mount]" { @@ -75,7 +77,9 @@ function setup_sshfs() { [ "$status" -eq 0 ] } -@test "runc run [dev,exec,suid,atime bind mount of a nodev,nosuid,noexec,noatime fuse sshfs mount without fallback]" { +@test "runc run [dev,exec,suid,atime bind mount of a nodev,nosuid,noexec,noatime fuse sshfs mount]" { + requires rootless + setup_sshfs "nodev,nosuid,noexec,noatime" # The "sync" option is used to trigger a remount with the below options. # It serves no further purpose. Otherwise only a bind mount without @@ -87,27 +91,25 @@ function setup_sshfs() { options: ["dev", "suid", "exec", "atime", "rprivate", "rbind", "sync"] }]' - runc run --no-mount-fallback test_busybox - # The above will fail as we added --no-mount-fallback which causes us not to - # try to remount a bind mount again after the first attempt failed on source - # filesystems that have nodev, noexec, nosuid, noatime set. + runc run test_busybox + # The above will fail because we explicitly requested a mount with several + # MNT_LOCKED mount options cleared (when the source mount has those mounts + # enabled). [ "$status" -ne 0 ] [[ "$output" == *"runc run failed: unable to start container process: error during container init: error mounting"*"operation not permitted"* ]] } -@test "runc run [ro bind mount of a nodev,nosuid,noexec,noatime fuse sshfs mount without fallback]" { - setup_sshfs "nodev,nosuid,noexec,noatime" +@test "runc run [ro,noexec bind mount of a nosuid,noatime fuse sshfs mount]" { + requires rootless + + setup_sshfs "nosuid,noatime" update_config ' .mounts += [{ type: "bind", source: "'"$DIR"'", destination: "/mnt", - options: ["rbind", "ro"] + options: ["rbind", "ro", "exec"] }]' - runc run --no-mount-fallback test_busybox - # The above will fail as we added --no-mount-fallback which causes us not to - # try to remount a bind mount again after the first attempt failed on source - # filesystems that have nodev, noexec, nosuid, noatime set. - [ "$status" -ne 0 ] - [[ "$output" == *"runc run failed: unable to start container process: error during container init: error mounting"*"operation not permitted"* ]] + runc run test_busybox + [ "$status" -eq 0 ] } diff --git a/utils_linux.go b/utils_linux.go index 0f787cb3387..4c00b2092db 100644 --- a/utils_linux.go +++ b/utils_linux.go @@ -175,7 +175,6 @@ func createContainer(context *cli.Context, id string, spec *specs.Spec) (*libcon Spec: spec, RootlessEUID: os.Geteuid() != 0, RootlessCgroups: rootlessCg, - NoMountFallback: context.Bool("no-mount-fallback"), }) if err != nil { return nil, err