From c3c3f91f7ae6faf4ab10e9aaf925ebaf5991015f Mon Sep 17 00:00:00 2001 From: Austin Vazquez Date: Thu, 10 Oct 2024 04:16:57 +0000 Subject: [PATCH] Add container run --security-opt systempaths=unconfined This change adds security option to turn off confinement for system paths (masked paths, read-only paths) for the container. Signed-off-by: Austin Vazquez --- cmd/nerdctl/container/container_run.go | 1 + .../container_run_security_linux_test.go | 55 +++++++++++++++++++ docs/command-reference.md | 1 + pkg/cmd/container/run_security_linux.go | 13 ++++- 4 files changed, 69 insertions(+), 1 deletion(-) diff --git a/cmd/nerdctl/container/container_run.go b/cmd/nerdctl/container/container_run.go index a514d3795af..b24cd7c3566 100644 --- a/cmd/nerdctl/container/container_run.go +++ b/cmd/nerdctl/container/container_run.go @@ -183,6 +183,7 @@ func setCreateFlags(cmd *cobra.Command) { "seccomp=", "seccomp=" + defaults.SeccompProfileName, "seccomp=unconfined", "apparmor=", "apparmor=" + defaults.AppArmorProfileName, "apparmor=unconfined", "no-new-privileges", + "systempaths=unconfined", "privileged-without-host-devices"}, cobra.ShellCompDirectiveNoFileComp }) // cap-add and cap-drop are defined as StringSlice, not StringArray, to allow specifying "--cap-add=CAP_SYS_ADMIN,CAP_NET_ADMIN" (compatible with Podman) diff --git a/cmd/nerdctl/container/container_run_security_linux_test.go b/cmd/nerdctl/container/container_run_security_linux_test.go index e5c6ec87b1e..6a4cc35bb4b 100644 --- a/cmd/nerdctl/container/container_run_security_linux_test.go +++ b/cmd/nerdctl/container/container_run_security_linux_test.go @@ -193,6 +193,61 @@ func TestRunSeccompCapSysPtrace(t *testing.T) { // Docker/Moby 's seccomp profile allows ptrace(2) by default, but containerd does not (yet): https://github.com/containerd/containerd/issues/6802 } +func TestRunSystemPathsUnconfined(t *testing.T) { + base := testutil.NewBase(t) + + const findmnt = "`apk add -q findmnt && findmnt -R /proc && findmnt -R /sys`" + result := base.Cmd("run", "--rm", testutil.AlpineImage, "sh", "-euxc", findmnt).Run() + defaultContainerOutput := result.Combined() + + var confined []string + + for _, path := range []string{ + "/proc/kcore", + "/proc/keys", + "/proc/latency_stats", + "/proc/sched_debug", + "/proc/scsi", + "/proc/timer_list", + "/proc/timer_stats", + "/sys/firmware", + "/sys/fs/selinux", + } { + // Not each distribution will support every masked path here. + if strings.Contains(defaultContainerOutput, path) { + confined = append(confined, path) + } + } + + assert.Check(t, len(confined) != 0, "Default container has no confined paths to validate") + + result = base.Cmd("run", "--rm", "--security-opt", "systempaths=unconfined", testutil.AlpineImage, "sh", "-euxc", findmnt).Run() + unconfinedContainerOutput := result.Combined() + + for _, path := range confined { + assert.Assert(t, !strings.Contains(unconfinedContainerOutput, path), fmt.Sprintf("%s should not be masked when unconfined", path)) + } + + for _, path := range []string{ + "/proc/acpi", + "/proc/bus", + "/proc/fs", + "/proc/irq", + "/proc/sysrq-trigger", + "/proc/sys", + } { + findmntPath := fmt.Sprintf("`apk add -q findmnt && findmnt %s`", path) + + result := base.Cmd("run", "--rm", testutil.AlpineImage, "sh", "-euxc", findmntPath).Run() + + // Not each distribution will support every read-only path here. + if strings.Contains(result.Combined(), path) { + result = base.Cmd("run", "--rm", "--security-opt", "systempaths=unconfined", testutil.AlpineImage, "sh", "-euxc", findmntPath).Run() + assert.Assert(t, !strings.Contains(result.Combined(), "ro,"), fmt.Sprintf("%s should not be read-only when unconfined", path)) + } + } +} + func TestRunPrivileged(t *testing.T) { // docker does not support --privileged-without-host-devices testutil.DockerIncompatible(t) diff --git a/docs/command-reference.md b/docs/command-reference.md index 486872071fb..cba61fb1372 100644 --- a/docs/command-reference.md +++ b/docs/command-reference.md @@ -230,6 +230,7 @@ Security flags: - :whale: `--security-opt seccomp=`: specify custom seccomp profile - :whale: `--security-opt apparmor=`: specify custom AppArmor profile - :whale: `--security-opt no-new-privileges`: disallow privilege escalation, e.g., setuid and file capabilities +- :whale: `--security-opt systempaths=unconfined`: Turn off confinement for system paths (masked paths, read-only paths) for the container - :nerd_face: `--security-opt privileged-without-host-devices`: Don't pass host devices to privileged containers - :whale: `--cap-add=`: Add Linux capabilities - :whale: `--cap-drop=`: Drop Linux capabilities diff --git a/pkg/cmd/container/run_security_linux.go b/pkg/cmd/container/run_security_linux.go index 4b26d23f766..510310f265a 100644 --- a/pkg/cmd/container/run_security_linux.go +++ b/pkg/cmd/container/run_security_linux.go @@ -45,10 +45,14 @@ var privilegedWithoutDevicesOpts = []oci.SpecOpts{ oci.WithNewPrivileges, } +const ( + systemPathsUnconfined = "unconfined" +) + func generateSecurityOpts(privileged bool, securityOptsMap map[string]string) ([]oci.SpecOpts, error) { for k := range securityOptsMap { switch k { - case "seccomp", "apparmor", "no-new-privileges", "privileged-without-host-devices": + case "seccomp", "apparmor", "no-new-privileges", "systempaths", "privileged-without-host-devices": default: log.L.Warnf("unknown security-opt: %q", k) } @@ -99,6 +103,13 @@ func generateSecurityOpts(privileged bool, securityOptsMap map[string]string) ([ opts = append(opts, oci.WithNewPrivileges) } + if value, ok := securityOptsMap["systempaths"]; ok && value == systemPathsUnconfined { + opts = append(opts, oci.WithMaskedPaths(nil)) + opts = append(opts, oci.WithReadonlyPaths(nil)) + } else if ok && value != systemPathsUnconfined { + return nil, errors.New(`invalid security-opt "systempaths=unconfined"`) + } + privilegedWithoutHostDevices, err := maputil.MapBoolValueAsOpt(securityOptsMap, "privileged-without-host-devices") if err != nil { return nil, err