From f661e02343b96638503166b4ec87c086cd8e7604 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 30 Jan 2019 16:02:35 +0000 Subject: [PATCH 1/2] factor out bind mount mountpoint creation During rootfs setup all mountpoints (directory and files) are created before bind mounting the bind mounts. This does not happen during container restore via CRIU. If restoring in an identical but newly created rootfs, the restore fails right now. This just factors out the code to create the bind mount mountpoints so that it also can be used during restore. Signed-off-by: Adrian Reber --- libcontainer/rootfs_linux.go | 47 +++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index 6bd6da74ace..f13b226e444 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -182,6 +182,33 @@ func mountCmd(cmd configs.Command) error { return nil } +func prepareBindMount(m *configs.Mount, rootfs string) error { + stat, err := os.Stat(m.Source) + if err != nil { + // error out if the source of a bind mount does not exist as we will be + // unable to bind anything to it. + return err + } + // ensure that the destination of the bind mount is resolved of symlinks at mount time because + // any previous mounts can invalidate the next mount's destination. + // this can happen when a user specifies mounts within other mounts to cause breakouts or other + // evil stuff to try to escape the container's rootfs. + var dest string + if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil { + return err + } + if err := checkMountDestination(rootfs, dest); err != nil { + return err + } + // update the mount with the correct dest after symlinks are resolved. + m.Destination = dest + if err := createIfNotExists(dest, stat.IsDir()); err != nil { + return err + } + + return nil +} + func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error { var ( dest = m.Destination @@ -257,25 +284,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b } return nil case "bind": - stat, err := os.Stat(m.Source) - if err != nil { - // error out if the source of a bind mount does not exist as we will be - // unable to bind anything to it. - return err - } - // ensure that the destination of the bind mount is resolved of symlinks at mount time because - // any previous mounts can invalidate the next mount's destination. - // this can happen when a user specifies mounts within other mounts to cause breakouts or other - // evil stuff to try to escape the container's rootfs. - if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil { - return err - } - if err := checkMountDestination(rootfs, dest); err != nil { - return err - } - // update the mount with the correct dest after symlinks are resolved. - m.Destination = dest - if err := createIfNotExists(dest, stat.IsDir()); err != nil { + if err := prepareBindMount(m, rootfs); err != nil { return err } if err := mountPropagate(m, rootfs, mountLabel); err != nil { From 7354546cc8f8fa67707e00efb630729dbcb1a00b Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 30 Jan 2019 16:29:04 +0000 Subject: [PATCH 2/2] Create mountpoints also on restore runc creates all missing mountpoints when it starts a container, this commit also creates those mountpoints during restore. Now it is possible to restore a container using the same, but newly created rootfs just as during container start. Signed-off-by: Adrian Reber --- libcontainer/container_linux.go | 76 +++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index ef443f6fc16..e8acfeb4417 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -19,6 +19,7 @@ import ( "syscall" // only for SysProcAttr and Signal "time" + "github.com/cyphar/filepath-securejoin" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/criurpc" @@ -1139,6 +1140,75 @@ func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts } } +// makeCriuRestoreMountpoints makes the actual mountpoints for the +// restore using CRIU. This function is inspired from the code in +// rootfs_linux.go +func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error { + switch m.Device { + case "cgroup": + // Do nothing for cgroup, CRIU should handle it + case "bind": + // The prepareBindMount() function checks if source + // exists. So it cannot be used for other filesystem types. + if err := prepareBindMount(m, c.config.Rootfs); err != nil { + return err + } + default: + // for all other file-systems just create the mountpoints + dest, err := securejoin.SecureJoin(c.config.Rootfs, m.Destination) + if err != nil { + return err + } + if err := checkMountDestination(c.config.Rootfs, dest); err != nil { + return err + } + m.Destination = dest + if err := os.MkdirAll(dest, 0755); err != nil { + return err + } + } + return nil +} + +// isPathInPrefixList is a small function for CRIU restore to make sure +// mountpoints, which are on a tmpfs, are not created in the roofs +func isPathInPrefixList(path string, prefix []string) bool { + for _, p := range prefix { + if strings.HasPrefix(path, p+"/") { + return false + } + } + return true +} + +// prepareCriuRestoreMounts tries to set up the rootfs of the +// container to be restored in the same way runc does it for +// initial container creation. Even for a read-only rootfs container +// runc modifies the rootfs to add mountpoints which do not exist. +// This function also creates missing mountpoints as long as they +// are not on top of a tmpfs, as CRIU will restore tmpfs content anyway. +func (c *linuxContainer) prepareCriuRestoreMounts(mounts []*configs.Mount) error { + // First get a list of a all tmpfs mounts + tmpfs := []string{} + for _, m := range mounts { + switch m.Device { + case "tmpfs": + tmpfs = append(tmpfs, m.Destination) + } + } + // Now go through all mounts and create the mountpoints + // if the mountpoints are not on a tmpfs, as CRIU will + // restore the complete tmpfs content from its checkpoint. + for _, m := range mounts { + if isPathInPrefixList(m.Destination, tmpfs) { + if err := c.makeCriuRestoreMountpoints(m); err != nil { + return err + } + } + } + return nil +} + func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { c.m.Lock() defer c.m.Unlock() @@ -1252,6 +1322,12 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { } } + // This will modify the rootfs of the container in the same way runc + // modifies the container during initial creation. + if err := c.prepareCriuRestoreMounts(c.config.Mounts); err != nil { + return err + } + for _, m := range c.config.Mounts { switch m.Device { case "bind":