From 8d372105526a363a6cb50ec558acc121a48dc107 Mon Sep 17 00:00:00 2001 From: fahed dorgaa Date: Fri, 7 Apr 2023 17:59:50 +0200 Subject: [PATCH] support detach-ns Signed-off-by: fahed dorgaa --- cmd/rootlesskit/main.go | 7 +++ go.mod | 1 + go.sum | 2 + pkg/child/child.go | 19 +++++--- pkg/network/lxcusernic/lxcusernic.go | 2 +- pkg/network/network.go | 2 +- pkg/network/parentutils/parentutils.go | 62 +++++++++++++++++++++++--- pkg/network/slirp4netns/slirp4netns.go | 8 ++-- pkg/network/vpnkit/vpnkit.go | 2 +- pkg/parent/parent.go | 32 ++++++++----- pkg/port/portutil/portutil.go | 4 +- 11 files changed, 110 insertions(+), 31 deletions(-) diff --git a/cmd/rootlesskit/main.go b/cmd/rootlesskit/main.go index 1d0c78e4..23c302f0 100644 --- a/cmd/rootlesskit/main.go +++ b/cmd/rootlesskit/main.go @@ -162,6 +162,10 @@ See https://rootlesscontaine.rs/getting-started/common/ . Name: "ipcns", Usage: "create an IPC namespace", }, CategoryProcess), + Categorize(&cli.BoolFlag{ + Name: "detach-ns", + Usage: "detach network namespaces ", + }, CategoryProcess), Categorize(&cli.StringFlag{ Name: "propagation", Usage: "mount propagation [rprivate, rslave]", @@ -265,6 +269,7 @@ func createParentOpt(clicontext *cli.Context, pipeFDEnvKey, stateDirEnvKey, pare CreateCgroupNS: clicontext.Bool("cgroupns"), CreateUTSNS: clicontext.Bool("utsns"), CreateIPCNS: clicontext.Bool("ipcns"), + DetachNS: clicontext.Bool("detach-ns"), ParentEUIDEnvKey: parentEUIDEnvKey, ParentEGIDEnvKey: parentEGIDEnvKey, Propagation: clicontext.String("propagation"), @@ -477,10 +482,12 @@ func (w *logrusDebugWriter) Write(p []byte) (int, error) { func createChildOpt(clicontext *cli.Context, pipeFDEnvKey string, targetCmd []string) (child.Opt, error) { pidns := clicontext.Bool("pidns") + detachNs := clicontext.Bool("detach-ns") opt := child.Opt{ PipeFDEnvKey: pipeFDEnvKey, TargetCmd: targetCmd, MountProcfs: pidns, + DetachNS: detachNs, Propagation: clicontext.String("propagation"), EvacuateCgroup2: clicontext.String("evacuate-cgroup2") != "", } diff --git a/go.mod b/go.mod index c0de724e..97bd87c4 100644 --- a/go.mod +++ b/go.mod @@ -12,6 +12,7 @@ require ( github.com/sirupsen/logrus v1.9.0 github.com/songgao/water v0.0.0-20200317203138-2b4b6d7c09d8 github.com/urfave/cli/v2 v2.25.1 + github.com/vishvananda/netns v0.0.4 golang.org/x/sys v0.7.0 gotest.tools/v3 v3.4.0 ) diff --git a/go.sum b/go.sum index 11fa31d6..3b59aa17 100644 --- a/go.sum +++ b/go.sum @@ -55,6 +55,8 @@ github.com/u-root/uio v0.0.0-20210528114334-82958018845c h1:BFvcl34IGnw8yvJi8hlq github.com/u-root/uio v0.0.0-20210528114334-82958018845c/go.mod h1:LpEX5FO/cB+WF4TYGY1V5qktpaZLkKkSegbr0V4eYXA= github.com/urfave/cli/v2 v2.25.1 h1:zw8dSP7ghX0Gmm8vugrs6q9Ku0wzweqPyshy+syu9Gw= github.com/urfave/cli/v2 v2.25.1/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc= +github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8= +github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM= github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU= github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= diff --git a/pkg/child/child.go b/pkg/child/child.go index 2b510a40..48323642 100644 --- a/pkg/child/child.go +++ b/pkg/child/child.go @@ -11,16 +11,16 @@ import ( "strconv" "syscall" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" - "github.com/rootless-containers/rootlesskit/pkg/common" "github.com/rootless-containers/rootlesskit/pkg/copyup" "github.com/rootless-containers/rootlesskit/pkg/msgutil" "github.com/rootless-containers/rootlesskit/pkg/network" + "github.com/rootless-containers/rootlesskit/pkg/network/parentutils" "github.com/rootless-containers/rootlesskit/pkg/port" "github.com/rootless-containers/rootlesskit/pkg/sigproxy" sigproxysignal "github.com/rootless-containers/rootlesskit/pkg/sigproxy/signal" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" ) var propagationStates = map[string]uintptr{ @@ -151,7 +151,7 @@ func setupCopyDir(driver copyup.ChildDriver, dirs []string) (bool, error) { return false, nil } -func setupNet(msg common.Message, etcWasCopied bool, driver network.ChildDriver) error { +func setupNet(msg common.Message, etcWasCopied bool, driver network.ChildDriver, detachNS bool) error { // HostNetwork if driver == nil { return nil @@ -194,6 +194,7 @@ type Opt struct { NetworkDriver network.ChildDriver // nil for HostNetwork CopyUpDriver copyup.ChildDriver // cannot be nil if len(CopyUpDirs) != 0 CopyUpDirs []string + DetachNS bool PortDriver port.ChildDriver MountProcfs bool // needs to be set if (and only if) parent.Opt.CreatePIDNS is set Propagation string // mount propagation type @@ -219,6 +220,14 @@ func Child(opt Opt) error { return fmt.Errorf("parsing message from fd %d: %w", pipeFD, err) } logrus.Debugf("child: got msg from parent: %+v", msg) + // this variable is hardcoded for test purposes + childNsPath := "/tmp/test/netns" + if err := parentutils.NewNamedNetNs("netns", "/tmp/test/"); err != nil { + return err + } + if err := parentutils.PrepareTap(0, childNsPath, "tap"); err != nil { + return err + } if msg.Stage == 0 { // the parent has configured the child's uid_map and gid_map, but the child doesn't have caps here. // so we exec the child again to obtain caps. @@ -256,7 +265,7 @@ func Child(opt Opt) error { if err := mountSysfs(opt.NetworkDriver == nil, opt.EvacuateCgroup2); err != nil { return err } - if err := setupNet(msg, etcWasCopied, opt.NetworkDriver); err != nil { + if err := setupNet(msg, etcWasCopied, opt.NetworkDriver, opt.DetachNS); err != nil { return err } if opt.MountProcfs { diff --git a/pkg/network/lxcusernic/lxcusernic.go b/pkg/network/lxcusernic/lxcusernic.go index 0c3154bf..fc97920f 100644 --- a/pkg/network/lxcusernic/lxcusernic.go +++ b/pkg/network/lxcusernic/lxcusernic.go @@ -67,7 +67,7 @@ func (d *parentDriver) MTU() int { return d.mtu } -func (d *parentDriver) ConfigureNetwork(childPID int, stateDir string) (*common.NetworkMessage, func() error, error) { +func (d *parentDriver) ConfigureNetwork(childPID int, _ string, _ bool) (*common.NetworkMessage, func() error, error) { var cleanups []func() error dummyLXCPath := "/dev/null" dummyLXCName := "dummy" diff --git a/pkg/network/network.go b/pkg/network/network.go index e0f2256f..6dc288d3 100644 --- a/pkg/network/network.go +++ b/pkg/network/network.go @@ -13,7 +13,7 @@ type ParentDriver interface { // MTU returns MTU MTU() int // ConfigureNetwork sets up Slirp, updates msg, and returns destructor function. - ConfigureNetwork(childPID int, stateDir string) (netmsg *common.NetworkMessage, cleanup func() error, err error) + ConfigureNetwork(childPID int, stateDir string, detachNS bool) (netmsg *common.NetworkMessage, cleanup func() error, err error) } // ChildDriver is called from the child namespace diff --git a/pkg/network/parentutils/parentutils.go b/pkg/network/parentutils/parentutils.go index b83575c1..f9c12227 100644 --- a/pkg/network/parentutils/parentutils.go +++ b/pkg/network/parentutils/parentutils.go @@ -2,17 +2,19 @@ package parentutils import ( "fmt" - "os" + "path" "strconv" "github.com/rootless-containers/rootlesskit/pkg/common" + "github.com/vishvananda/netns" + "golang.org/x/sys/unix" ) -func PrepareTap(pid int, tap string) error { +func PrepareTap(childPID int, childNsPath string, tap string) error { cmds := [][]string{ - nsenter(pid, []string{"ip", "tuntap", "add", "name", tap, "mode", "tap"}), - nsenter(pid, []string{"ip", "link", "set", tap, "up"}), + nsenter(childPID, childNsPath, []string{"ip", "tuntap", "add", "name", tap, "mode", "tap"}), + nsenter(childPID, childNsPath, []string{"ip", "link", "set", tap, "up"}), } if err := common.Execs(os.Stderr, os.Environ(), cmds); err != nil { return fmt.Errorf("executing %v: %w", cmds, err) @@ -20,6 +22,54 @@ func PrepareTap(pid int, tap string) error { return nil } -func nsenter(pid int, cmd []string) []string { - return append([]string{"nsenter", "-t", strconv.Itoa(pid), "-n", "-m", "-U", "--preserve-credentials"}, cmd...) +func nsenter(childPID int, childNsPath string, cmd []string) []string { + var fullCmd []string + if childNsPath != "" { + fullCmd = append([]string{"nsenter", "--net=/tmp/test/netns", "--preserve-credentials"}, cmd...) + } else { + fullCmd = append([]string{"nsenter", "-t", strconv.Itoa(childPID), "-n", "-m", "-U", "--preserve-credentials"}, cmd...) + } + return fullCmd +} + +// NewNamedNetNs creates a new named network namespace in the childPid user namespace +// and mount it to bindMountPath. Current network namespace do not change +func NewNamedNetNs(name, bindMountPath string) error { + if _, err := os.Stat(bindMountPath); os.IsNotExist(err) { + err = os.MkdirAll(bindMountPath, 0755) + if err != nil { + return err + } + } + origns, err := netns.Get() + if err != nil { + return err + } + + newNs, err := netns.New() + if err != nil { + return err + } + namedPath := path.Join(bindMountPath, name) + + f, err := os.OpenFile(namedPath, os.O_CREATE|os.O_EXCL, 0444) + if err != nil { + if perr, ok := err.(*os.PathError); !ok && perr.Err.Error() != "file exists" { + newNs.Close() + return err + } + } + f.Close() + + nsPath := fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid()) + err = unix.Mount(nsPath, namedPath, "bind", unix.MS_BIND, "") + if err != nil { + newNs.Close() + return err + } + // Switch back to the original namespace + if err := netns.Set(origns); err != nil { + return err + } + return nil } diff --git a/pkg/network/slirp4netns/slirp4netns.go b/pkg/network/slirp4netns/slirp4netns.go index 40915d76..6bb26fc6 100644 --- a/pkg/network/slirp4netns/slirp4netns.go +++ b/pkg/network/slirp4netns/slirp4netns.go @@ -172,11 +172,13 @@ func (d *parentDriver) MTU() int { return d.mtu } -func (d *parentDriver) ConfigureNetwork(childPID int, stateDir string) (*common.NetworkMessage, func() error, error) { +func (d *parentDriver) ConfigureNetwork(childPID int, stateDir string, detachNS bool) (*common.NetworkMessage, func() error, error) { tap := d.ifname var cleanups []func() error - if err := parentutils.PrepareTap(childPID, tap); err != nil { - return nil, common.Seq(cleanups), fmt.Errorf("setting up tap %s: %w", tap, err) + if !detachNS { + if err := parentutils.PrepareTap(childPID, "", tap); err != nil { + return nil, common.Seq(cleanups), fmt.Errorf("setting up tap %s: %w", tap, err) + } } readyR, readyW, err := os.Pipe() if err != nil { diff --git a/pkg/network/vpnkit/vpnkit.go b/pkg/network/vpnkit/vpnkit.go index 1ad55e63..df35ffec 100644 --- a/pkg/network/vpnkit/vpnkit.go +++ b/pkg/network/vpnkit/vpnkit.go @@ -83,7 +83,7 @@ func (d *parentDriver) MTU() int { return d.mtu } -func (d *parentDriver) ConfigureNetwork(childPID int, stateDir string) (*common.NetworkMessage, func() error, error) { +func (d *parentDriver) ConfigureNetwork(_ int, stateDir string, _ bool) (*common.NetworkMessage, func() error, error) { var cleanups []func() error vpnkitSocket := filepath.Join(stateDir, "vpnkit-ethernet.sock") vpnkitCtx, vpnkitCancel := context.WithCancel(context.Background()) diff --git a/pkg/parent/parent.go b/pkg/parent/parent.go index 712e9197..4a8dddcd 100644 --- a/pkg/parent/parent.go +++ b/pkg/parent/parent.go @@ -4,6 +4,15 @@ import ( "context" "errors" "fmt" + + "github.com/gofrs/flock" + "github.com/gorilla/mux" + "github.com/rootless-containers/rootlesskit/pkg/api/router" + "github.com/rootless-containers/rootlesskit/pkg/common" + "github.com/rootless-containers/rootlesskit/pkg/msgutil" + "github.com/rootless-containers/rootlesskit/pkg/network" + + //"github.com/rootless-containers/rootlesskit/pkg/network/parentutils" "net" "net/http" "os" @@ -13,22 +22,14 @@ import ( "strconv" "syscall" - "github.com/gofrs/flock" - "github.com/gorilla/mux" - - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" - - "github.com/rootless-containers/rootlesskit/pkg/api/router" - "github.com/rootless-containers/rootlesskit/pkg/common" - "github.com/rootless-containers/rootlesskit/pkg/msgutil" - "github.com/rootless-containers/rootlesskit/pkg/network" "github.com/rootless-containers/rootlesskit/pkg/parent/cgrouputil" "github.com/rootless-containers/rootlesskit/pkg/parent/dynidtools" "github.com/rootless-containers/rootlesskit/pkg/parent/idtools" "github.com/rootless-containers/rootlesskit/pkg/port" "github.com/rootless-containers/rootlesskit/pkg/sigproxy" "github.com/rootless-containers/rootlesskit/pkg/sigproxy/signal" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" ) type Opt struct { @@ -42,6 +43,7 @@ type Opt struct { CreateCgroupNS bool CreateUTSNS bool CreateIPCNS bool + DetachNS bool ParentEUIDEnvKey string // optional env key to propagate geteuid() value ParentEGIDEnvKey string // optional env key to propagate getegid() value Propagation string @@ -62,6 +64,7 @@ const ( StateFileLock = "lock" StateFileChildPID = "child_pid" // decimal pid number text StateFileAPISock = "api.sock" // REST API Socket + StateFileNetNs = "netns" // rootlesskit network namespace ) func checkPreflight(opt Opt) error { @@ -152,9 +155,13 @@ func Parent(opt Opt) error { Pdeathsig: syscall.SIGKILL, Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS, } + if opt.NetworkDriver != nil { - cmd.SysProcAttr.Unshareflags |= syscall.CLONE_NEWNET + if !opt.DetachNS { + cmd.SysProcAttr.Unshareflags |= syscall.CLONE_NEWNET + } } + if opt.CreatePIDNS { // cannot be Unshareflags (panics) cmd.SysProcAttr.Cloneflags |= syscall.CLONE_NEWPID @@ -213,8 +220,9 @@ func Parent(opt Opt) error { StateDir: opt.StateDir, }, } + if opt.NetworkDriver != nil { - netMsg, cleanupNetwork, err := opt.NetworkDriver.ConfigureNetwork(cmd.Process.Pid, opt.StateDir) + netMsg, cleanupNetwork, err := opt.NetworkDriver.ConfigureNetwork(cmd.Process.Pid, opt.StateDir, opt.DetachNS) if cleanupNetwork != nil { defer cleanupNetwork() } diff --git a/pkg/port/portutil/portutil.go b/pkg/port/portutil/portutil.go index a1e649ab..94e3056d 100644 --- a/pkg/port/portutil/portutil.go +++ b/pkg/port/portutil/portutil.go @@ -16,13 +16,13 @@ import ( // // Format is as follows: // -// :[:]:/ +// :[:]:/ // // Note that (child IP being optional) the format can either contain 5 or 4 // components. When using IPv6 IP addresses, addresses must use square brackets // to prevent the colons being mistaken for delimiters. For example: // -// [::1]:8080:[::2]:80/udp +// [::1]:8080:[::2]:80/udp func ParsePortSpec(portSpec string) (*port.Spec, error) { const ( parentIP = iota