From 461cf9f1c2dc8093fc2d7c671c3b3efe1de26e54 Mon Sep 17 00:00:00 2001 From: Maksim An Date: Mon, 21 Jun 2021 15:26:40 -0700 Subject: [PATCH] make container's shared memory configurable via annotation add annotation "io.microsoft.container.storage.shm.size-kb" to set container's /dev/shm tmpfs size. this overrides any existing /dev/shm mounts in the spec additionally move the annotations parsing logic into a separate function Signed-off-by: Maksim An --- internal/guest/runtime/hcsv2/spec.go | 81 +++++++++++++++++++ .../guest/runtime/hcsv2/workload_container.go | 39 +-------- test/cri-containerd/container_test.go | 51 ++++++++++++ 3 files changed, 134 insertions(+), 37 deletions(-) diff --git a/internal/guest/runtime/hcsv2/spec.go b/internal/guest/runtime/hcsv2/spec.go index d69793fda0..6abcd509d2 100644 --- a/internal/guest/runtime/hcsv2/spec.go +++ b/internal/guest/runtime/hcsv2/spec.go @@ -3,7 +3,10 @@ package hcsv2 import ( + "context" "fmt" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/opencontainers/runc/libcontainer/devices" "path/filepath" "strconv" "strings" @@ -42,6 +45,18 @@ func isInMounts(target string, mounts []oci.Mount) bool { return false } +// removeMount removes mount from the array if `target` matches `Destination` +func removeMount(target string, mounts []oci.Mount) []oci.Mount { + var result []oci.Mount + for _, m := range mounts { + if m.Destination == target { + continue + } + result = append(result, m) + } + return result +} + func setProcess(spec *oci.Spec) { if spec.Process == nil { spec.Process = &oci.Process{} @@ -152,3 +167,69 @@ func getGroup(spec *oci.Spec, filter func(user.Group) bool) (user.Group, error) } return groups[0], nil } + +// applyAnnotationsToSpec modifies the spec based on additional information from annotations +func applyAnnotationsToSpec(ctx context.Context, spec *oci.Spec) error { + // Check if we need to override container's /dev/shm + if val, ok := spec.Annotations["io.microsoft.container.storage.shm.size-kb"]; ok { + sz, err := strconv.ParseInt(val, 10, 64) + if err != nil { + return errors.Wrap(err, "/dev/shm size must be a valid integer") + } + if sz <= 0 { + return errors.Errorf("/dev/shm size must be a positive integer, got: %d", sz) + } + + // Use the same options as in upstream https://github.com/containerd/containerd/blob/0def98e462706286e6eaeff4a90be22fda75e761/oci/mounts.go#L49 + size := fmt.Sprintf("size=%dk", sz) + mt := oci.Mount{ + Destination: "/dev/shm", + Type: "tmpfs", + Source: "shm", + Options: []string{"nosuid", "noexec", "nodev", "mode=1777", size}, + } + spec.Mounts = removeMount("/dev/shm", spec.Mounts) + spec.Mounts = append(spec.Mounts, mt) + log.G(ctx).WithField("size", size).Debug("set custom /dev/shm size") + } + + // Check if we need to do any capability/device mappings + if spec.Annotations["io.microsoft.virtualmachine.lcow.privileged"] == "true" { + log.G(ctx).Debug("'io.microsoft.virtualmachine.lcow.privileged' set for privileged container") + + // Add all host devices + hostDevices, err := devices.HostDevices() + if err != nil { + return err + } + for _, hostDevice := range hostDevices { + addLinuxDeviceToSpec(ctx, hostDevice, spec, false) + } + + // Set the cgroup access + spec.Linux.Resources.Devices = []oci.LinuxDeviceCgroup{ + { + Allow: true, + Access: "rwm", + }, + } + } else { + tempLinuxDevices := spec.Linux.Devices + spec.Linux.Devices = []oci.LinuxDevice{} + for _, ld := range tempLinuxDevices { + hostDevice, err := devices.DeviceFromPath(ld.Path, "rwm") + if err != nil { + return err + } + addLinuxDeviceToSpec(ctx, hostDevice, spec, true) + } + } + + // Check if we need to set non-default user + if userstr, ok := spec.Annotations["io.microsoft.lcow.userstr"]; ok { + if err := setUserStr(spec, userstr); err != nil { + return err + } + } + return nil +} diff --git a/internal/guest/runtime/hcsv2/workload_container.go b/internal/guest/runtime/hcsv2/workload_container.go index 1aed4983c2..d8179807d3 100644 --- a/internal/guest/runtime/hcsv2/workload_container.go +++ b/internal/guest/runtime/hcsv2/workload_container.go @@ -11,7 +11,6 @@ import ( "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/oc" "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/devices" oci "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" "go.opencensus.io/trace" @@ -119,42 +118,8 @@ func setupWorkloadContainerSpec(ctx context.Context, sbid, id string, spec *oci. // also has a concept of a sandbox/shm file when the IPC NamespaceMode != // NODE. - // Check if we need to do any capability/device mappings - if spec.Annotations["io.microsoft.virtualmachine.lcow.privileged"] == "true" { - log.G(ctx).Debug("'io.microsoft.virtualmachine.lcow.privileged' set for privileged container") - - // Add all host devices - hostDevices, err := devices.HostDevices() - if err != nil { - return err - } - for _, hostDevice := range hostDevices { - addLinuxDeviceToSpec(ctx, hostDevice, spec, false) - } - - // Set the cgroup access - spec.Linux.Resources.Devices = []oci.LinuxDeviceCgroup{ - { - Allow: true, - Access: "rwm", - }, - } - } else { - tempLinuxDevices := spec.Linux.Devices - spec.Linux.Devices = []oci.LinuxDevice{} - for _, ld := range tempLinuxDevices { - hostDevice, err := devices.DeviceFromPath(ld.Path, "rwm") - if err != nil { - return err - } - addLinuxDeviceToSpec(ctx, hostDevice, spec, true) - } - } - - if userstr, ok := spec.Annotations["io.microsoft.lcow.userstr"]; ok { - if err := setUserStr(spec, userstr); err != nil { - return err - } + if err := applyAnnotationsToSpec(ctx, spec); err != nil { + return err } // Force the parent cgroup into our /containers root diff --git a/test/cri-containerd/container_test.go b/test/cri-containerd/container_test.go index bf8fa6fe8f..c92c920c25 100644 --- a/test/cri-containerd/container_test.go +++ b/test/cri-containerd/container_test.go @@ -775,3 +775,54 @@ func Test_RunContainer_ShareScratch_CheckSize_LCOW(t *testing.T) { t.Fatalf("expected available rootfs size to be the same, got: %s and %s", availableSizeContainerOne, availableSizeContainerTwo) } } + +func Test_CreateContainer_DevShmSize(t *testing.T) { + requireFeatures(t, featureLCOW) + + pullRequiredLcowImages(t, []string{imageLcowK8sPause, imageLcowAlpine}) + + client := newTestRuntimeClient(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + podReq := getRunPodSandboxRequest(t, lcowRuntimeHandler, nil) + podID := runPodSandbox(t, client, ctx, podReq) + defer removePodSandbox(t, client, ctx, podID) + + cmd := []string{"ash", "-c", "while true; do sleep 1; done"} + contReq1 := getCreateContainerRequest(podID, "test-container-devshm-256", imageLcowAlpine, cmd, podReq.Config) + // the /dev/shm size is expected to be in KB, set it to 256 MB + size := 256 * 1024 + contReq1.Config.Annotations = map[string]string{ + "io.microsoft.container.storage.shm.size-kb": strconv.Itoa(size), + } + containerID1 := createContainer(t, client, ctx, contReq1) + defer removeContainer(t, client, ctx, containerID1) + + startContainer(t, client, ctx, containerID1) + defer stopContainer(t, client, ctx, containerID1) + + contReq2 := getCreateContainerRequest(podID, "test-container-devshm-default", imageLcowAlpine, cmd, podReq.Config) + containerID2 := createContainer(t, client, ctx, contReq2) + defer removeContainer(t, client, ctx, containerID2) + startContainer(t, client, ctx, containerID2) + defer stopContainer(t, client, ctx, containerID2) + + // check /dev/shm size on container 1, should be set to 256 MB + execResponse1 := execSync(t, client, ctx, &runtime.ExecSyncRequest{ + ContainerId: containerID1, + Cmd: []string{"df", "-h", "/dev/shm"}, + }) + if !strings.Contains(string(execResponse1.Stdout), "256.0M") { + t.Fatalf("expected the size of /dev/shm to be 256MB. Got output instead: %s", string(execResponse1.Stdout)) + } + + // check /dev/shm size on container 2, should be set to default 64 MB + execResponse2 := execSync(t, client, ctx, &runtime.ExecSyncRequest{ + ContainerId: containerID2, + Cmd: []string{"df", "-h", "/dev/shm"}, + }) + if !strings.Contains(string(execResponse2.Stdout), "64.0M") { + t.Fatalf("expected the size of /dev/shm to be 64MB. Got output instead: %s", string(execResponse1.Stdout)) + } +}