From 780349924f4d79bfd282a4d3585e84d576af895c Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Sun, 3 Apr 2022 22:52:58 -0700 Subject: [PATCH 1/4] pkg/cvo/updatepayload: Prune previous payload downloads Avoid [1]: mv: inter-device move failed: '/manifests' to '/etc/cvo/updatepayloads/HbO7IDc7tyIg9utw3sd_tg/manifests/manifests'; unable to remove target: Directory not empty by pruning all scratch directories before downloading a new payload. Also shift the job pruning up here too, so it's all collected together. For jobs, pending and active jobs are now added to the deletion queue too, and we pivot to not retaining any old jobs. Either the older jobs were for other targets, in which case we no longer care about them, or they are looking at the same directory we're about to target with the job we're about to launch, in which case they will likely step on each other's toes. If we want to return to keeping more jobs around, we'd need to pivot to target directory names that are more unique than just "pullspec hash", e.g. by including a job-launch timestap. The directory pruning avoids sticking subsequent download jobs after a corrupted download attempt (e.g. if something terminates a download job part way through its 'mv' call, subsequent download ). Reproducing this issue locally, with /tmp on a different filesystem: $ mkdir -p a /tmp/a/a $ /bin/mv a /tmp /bin/mv: inter-device move failed: 'a' to '/tmp/a'; unable to remove target: Directory not empty $ /bin/mv --version mv (GNU coreutils) 8.31 ... The directory pruning will also avoid leaking previous sets of manifests. Before this commit, the /etc/cvo/updatepayloads directories on the control plane nodes would gradually accumulate more and more release manifest sets as new targets were downloaded for inspection. The manifest sets aren't that big: $ oc adm release extract --to manifests quay.io/openshift-release-dev/ocp-release:4.10.6-x86_64 Extracted release payload from digest sha256:88b394e633e09dc23aa1f1a61ededd8e52478edf34b51a7dbbb21d9abde2511a created at 2022-03-23T07:34:52Z $ du -hs manifests 6.5M manifests But still, better to have the pruning than leak forever. [1]: https://bugzilla.redhat.com/show_bug.cgi?id=2070805#c0 --- pkg/cvo/updatepayload.go | 41 ++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/pkg/cvo/updatepayload.go b/pkg/cvo/updatepayload.go index 52c69925c1..90fa9ce3f9 100644 --- a/pkg/cvo/updatepayload.go +++ b/pkg/cvo/updatepayload.go @@ -135,14 +135,28 @@ func (r *payloadRetriever) targetUpdatePayloadDir(ctx context.Context, update co hash := md5.New() hash.Write([]byte(update.Image)) payloadHash := base64.RawURLEncoding.EncodeToString(hash.Sum(nil)) - tdir := filepath.Join(r.workingDir, payloadHash) - err := payload.ValidateDirectory(tdir) - if os.IsNotExist(err) { - // the dirs don't exist, try fetching the payload to tdir. - err = r.fetchUpdatePayloadToDir(ctx, tdir, update) + + // Prune older jobs and directories while gracefully handling errors. + if err := r.pruneJobs(ctx, 0); err != nil { + klog.Warningf("failed to prune jobs: %v", err) } - if err != nil { + if files, err := os.ReadDir(r.workingDir); err != nil { + klog.Warningf("failed to list update payload directory: %v", err) + } else { + for _, file := range files { + if err := os.RemoveAll(filepath.Join(r.workingDir, file.Name())); err != nil { + klog.Warningf("failed to prune update payload directory: %v", err) + } + } + } + + if err := payload.ValidateDirectory(tdir); os.IsNotExist(err) { + // the dirs don't exist, try fetching the payload to tdir. + if err := r.fetchUpdatePayloadToDir(ctx, tdir, update); err != nil { + return "", err + } + } else if err != nil { return "", err } @@ -217,14 +231,7 @@ func (r *payloadRetriever) fetchUpdatePayloadToDir(ctx context.Context, dir stri }, } - // Prune older jobs while gracefully handling errors. - err := r.pruneJobs(ctx, 2) - if err != nil { - klog.Warningf("failed to prune jobs: %v", err) - } - - _, err = r.kubeClient.BatchV1().Jobs(job.Namespace).Create(ctx, job, metav1.CreateOptions{}) - if err != nil { + if _, err := r.kubeClient.BatchV1().Jobs(job.Namespace).Create(ctx, job, metav1.CreateOptions{}); err != nil { return err } return resourcebuilder.WaitForJobCompletion(ctx, r.kubeClient.BatchV1(), job) @@ -248,12 +255,6 @@ func (r *payloadRetriever) pruneJobs(ctx context.Context, retain int) error { // Ignore jobs not beginning with operatorName case !strings.HasPrefix(job.Name, r.operatorName+"-"): break - // Ignore jobs that have not yet started - case job.Status.StartTime == nil: - break - // Ignore jobs that are still active - case job.Status.Active == 1: - break default: deleteJobs = append(deleteJobs, job) } From d7b26746d44123fc56336d087f92c16fcdde46af Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Mon, 18 Apr 2022 12:56:47 -0700 Subject: [PATCH 2/4] pkg/cvo/updatepayload: Shift previous-download removal into the job I'd attempted to add this cleanup on the CVO side in 507b474632 (pkg/cvo/updatepayload: Prune previous payload downloads, 2022-04-03, #760). But Evgeni pointed out that that doesn't work because the CVO's volume mount is 'readOnly: true': 2022-04-11T20:42:32.056428784Z W0411 20:42:32.056387 1 updatepayload.go:149] failed to prune update payload directory: unlinkat /etc/cvo/updatepayloads/brRTeZnZSIkZ2J2YMdQozg: read-only file system This commit shifts removal into the job. To avoid issues with loading half-populated directories, I've also adjusted the job to populate {baseDir}/{targetName}-{randomSuffix}, and then rename that populated result to {baseDir}/{targetName}. That way, future targetUpdatePayloadDir calls using the ValidateDirectory precheck will successfully distinguish "previous job got everything for this release" (where we don't want a new download job) from "previous job got some bits over, but not everything" (where we do want a new download job). POSIX 2018 requires mv to use rename [1]. While rename discusses atomicity in the informative Rationale section, there's nothing in the normative description that mentions atomicity [2]. Still, "atomic renames" are a common dance on Linux, especially when renaming within a directory, where it is a single directory table being edited. [3] talks through the pattern, and points out the need for a pre-rename fsync or similar if you want to avoid: 1. Open a/file, write to it, and close the file descriptor. 2. Kernel holding file contents in memory, not yet on disk. 3. Atomically move a/ to b/. 4. Crash, losing any kernel-memory caches. 5. b/file exists, but it may be empty or partial. I think the risk of kernel-crashes leaving us with currupted manifests is small enough that we can ignore it, to save the cost of calling fsync. [1]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/mv.html [2]: https://pubs.opengroup.org/onlinepubs/9699919799/functions/rename.html [3]: https://lwn.net/Articles/789600/ --- pkg/cvo/updatepayload.go | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/pkg/cvo/updatepayload.go b/pkg/cvo/updatepayload.go index 90fa9ce3f9..0a48463192 100644 --- a/pkg/cvo/updatepayload.go +++ b/pkg/cvo/updatepayload.go @@ -141,15 +141,6 @@ func (r *payloadRetriever) targetUpdatePayloadDir(ctx context.Context, update co if err := r.pruneJobs(ctx, 0); err != nil { klog.Warningf("failed to prune jobs: %v", err) } - if files, err := os.ReadDir(r.workingDir); err != nil { - klog.Warningf("failed to list update payload directory: %v", err) - } else { - for _, file := range files { - if err := os.RemoveAll(filepath.Join(r.workingDir, file.Name())); err != nil { - klog.Warningf("failed to prune update payload directory: %v", err) - } - } - } if err := payload.ValidateDirectory(tdir); os.IsNotExist(err) { // the dirs don't exist, try fetching the payload to tdir. @@ -290,20 +281,24 @@ func (r *payloadRetriever) pruneJobs(ctx context.Context, retain int) error { // copyPayloadCmd returns a shell command that copies CVO and release manifests from the default location // to the target dir. -// It is made up of 2 commands: -// `mkdir -p && mv ` -// `mkdir -p && mv ` func copyPayloadCmd(tdir string) string { - var ( - fromCVOPath = filepath.Join(payload.DefaultPayloadDir, payload.CVOManifestDir) - toCVOPath = filepath.Join(tdir, payload.CVOManifestDir) - cvoCmd = fmt.Sprintf("mkdir -p %s && mv %s %s", tdir, fromCVOPath, toCVOPath) + baseDir, targetName := filepath.Split(tdir) + tmpDir := filepath.Join(baseDir, fmt.Sprintf("%s-%s", targetName, randutil.String(5))) - fromReleasePath = filepath.Join(payload.DefaultPayloadDir, payload.ReleaseManifestDir) - toReleasePath = filepath.Join(tdir, payload.ReleaseManifestDir) - releaseCmd = fmt.Sprintf("mkdir -p %s && mv %s %s", tdir, fromReleasePath, toReleasePath) - ) - return fmt.Sprintf("%s && %s", cvoCmd, releaseCmd) + cleanupCmd := fmt.Sprintf("rm -fR %s", filepath.Join(baseDir, "*")) + + tmpDirCmd := fmt.Sprintf("mkdir %s", tmpDir) + + fromCVOPath := filepath.Join(payload.DefaultPayloadDir, payload.CVOManifestDir) + toCVOPath := filepath.Join(tmpDir, payload.CVOManifestDir) + cvoCmd := fmt.Sprintf("mv %s %s", fromCVOPath, toCVOPath) + + fromReleasePath := filepath.Join(payload.DefaultPayloadDir, payload.ReleaseManifestDir) + toReleasePath := filepath.Join(tmpDir, payload.ReleaseManifestDir) + releaseCmd := fmt.Sprintf("mv %s %s", fromReleasePath, toReleasePath) + + moveInPlaceCmd := fmt.Sprintf("mv %s %s", tmpDir, tdir) + return strings.Join([]string{cleanupCmd, tmpDirCmd, cvoCmd, releaseCmd, moveInPlaceCmd}, " && ") } // findUpdateFromConfig identifies a desired update from user input or returns false. It will From 7b4638fa47b2eeb7cff850171b901cdf627e7ec5 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Wed, 20 Apr 2022 21:05:47 -0700 Subject: [PATCH 3/4] pkg/cvo/updatepayload: Use initContainers instead of shell &&-chains All of the input strings were internally controlled, so there wasn't a risk of them containing a shell-sensitive character. But that safety was not immediately obvious when reading the code. By converting to initContainers, which run sequentially to completion before the next container is run [1], we have the same effect without involving the shell. As a side benefit, we also get clearer status logging showing exactly which steps have succeeded or failed. [1]: https://kubernetes.io/docs/concepts/workloads/pods/init-containers/#understanding-init-containers --- pkg/cvo/updatepayload.go | 98 ++++++++++++++++++++++------------------ 1 file changed, 54 insertions(+), 44 deletions(-) diff --git a/pkg/cvo/updatepayload.go b/pkg/cvo/updatepayload.go index 0a48463192..46bfb0bb84 100644 --- a/pkg/cvo/updatepayload.go +++ b/pkg/cvo/updatepayload.go @@ -161,16 +161,36 @@ func (r *payloadRetriever) targetUpdatePayloadDir(ctx context.Context, update co func (r *payloadRetriever) fetchUpdatePayloadToDir(ctx context.Context, dir string, update configv1.Update) error { var ( version = update.Version - payload = update.Image + image = update.Image name = fmt.Sprintf("%s-%s-%s", r.operatorName, version, randutil.String(5)) namespace = r.namespace deadline = pointer.Int64Ptr(2 * 60) nodeSelectorKey = "node-role.kubernetes.io/master" nodename = r.nodeName - cmd = []string{"/bin/sh"} - args = []string{"-c", copyPayloadCmd(dir)} ) + baseDir, targetName := filepath.Split(dir) + tmpDir := filepath.Join(baseDir, fmt.Sprintf("%s-%s", targetName, randutil.String(5))) + + setContainerDefaults := func(container corev1.Container) corev1.Container { + container.Image = image + container.VolumeMounts = []corev1.VolumeMount{{ + MountPath: targetUpdatePayloadsDir, + Name: "payloads", + }} + container.SecurityContext = &corev1.SecurityContext{ + Privileged: pointer.BoolPtr(true), + } + container.Resources = corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("10m"), + corev1.ResourceMemory: resource.MustParse("50Mi"), + corev1.ResourceEphemeralStorage: resource.MustParse("2Mi"), + }, + } + return container + } + job := &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ Name: name, @@ -180,26 +200,38 @@ func (r *payloadRetriever) fetchUpdatePayloadToDir(ctx context.Context, dir stri ActiveDeadlineSeconds: deadline, Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "payload", - Image: payload, - Command: cmd, - Args: args, - VolumeMounts: []corev1.VolumeMount{{ - MountPath: targetUpdatePayloadsDir, - Name: "payloads", - }}, - SecurityContext: &corev1.SecurityContext{ - Privileged: pointer.BoolPtr(true), - }, - Resources: corev1.ResourceRequirements{ - Requests: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("10m"), - corev1.ResourceMemory: resource.MustParse("50Mi"), - corev1.ResourceEphemeralStorage: resource.MustParse("2Mi"), + InitContainers: []corev1.Container{ + setContainerDefaults(corev1.Container{ + Name: "cleanup", + Command: []string{"rm", "-fR", filepath.Join(baseDir, "*")}, + }), + setContainerDefaults(corev1.Container{ + Name: "make-temporary-directory", + Command: []string{"mkdir", tmpDir}, + }), + setContainerDefaults(corev1.Container{ + Name: "move-operator-manifests-to-temporary-directory", + Command: []string{ + "mv", + filepath.Join(payload.DefaultPayloadDir, payload.CVOManifestDir), + filepath.Join(tmpDir, payload.CVOManifestDir), }, - }, - }}, + }), + setContainerDefaults(corev1.Container{ + Name: "move-release-manifests-to-temporary-directory", + Command: []string{ + "mv", + filepath.Join(payload.DefaultPayloadDir, payload.ReleaseManifestDir), + filepath.Join(tmpDir, payload.ReleaseManifestDir), + }, + }), + }, + Containers: []corev1.Container{ + setContainerDefaults(corev1.Container{ + Name: "rename-to-final-location", + Command: []string{"mv", tmpDir, dir}, + }), + }, Volumes: []corev1.Volume{{ Name: "payloads", VolumeSource: corev1.VolumeSource{ @@ -279,28 +311,6 @@ func (r *payloadRetriever) pruneJobs(ctx context.Context, retain int) error { return nil } -// copyPayloadCmd returns a shell command that copies CVO and release manifests from the default location -// to the target dir. -func copyPayloadCmd(tdir string) string { - baseDir, targetName := filepath.Split(tdir) - tmpDir := filepath.Join(baseDir, fmt.Sprintf("%s-%s", targetName, randutil.String(5))) - - cleanupCmd := fmt.Sprintf("rm -fR %s", filepath.Join(baseDir, "*")) - - tmpDirCmd := fmt.Sprintf("mkdir %s", tmpDir) - - fromCVOPath := filepath.Join(payload.DefaultPayloadDir, payload.CVOManifestDir) - toCVOPath := filepath.Join(tmpDir, payload.CVOManifestDir) - cvoCmd := fmt.Sprintf("mv %s %s", fromCVOPath, toCVOPath) - - fromReleasePath := filepath.Join(payload.DefaultPayloadDir, payload.ReleaseManifestDir) - toReleasePath := filepath.Join(tmpDir, payload.ReleaseManifestDir) - releaseCmd := fmt.Sprintf("mv %s %s", fromReleasePath, toReleasePath) - - moveInPlaceCmd := fmt.Sprintf("mv %s %s", tmpDir, tdir) - return strings.Join([]string{cleanupCmd, tmpDirCmd, cvoCmd, releaseCmd, moveInPlaceCmd}, " && ") -} - // findUpdateFromConfig identifies a desired update from user input or returns false. It will // resolve payload if the user specifies a version and a matching available update. func findUpdateFromConfig(config *configv1.ClusterVersion) (configv1.Update, bool) { From e202c480b4f2cee4afb17692f8001fbc071722ba Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Tue, 26 Apr 2022 22:38:11 -0700 Subject: [PATCH 4/4] pkg/cvo/updatepayload: Restore shell for rm globbing Some background on recent changes: * 507b474632 (pkg/cvo/updatepayload: Prune previous payload downloads, 2022-04-03, #760) attempted to add CVO-side directory removal, but that failed because the CVO mounts the shared volume 'readOnly: true'. * a5af89d34e (pkg/cvo/updatepayload: Shift previous-download removal into the job, 2022-04-18, #765) shifted removal into the job itself. As far as I can tell, this worked. * c45a981302 (pkg/cvo/updatepayload: Use initContainers instead of shell &&-chains, 2022-04-20, #765) addressed concerns with unquoted shell arguments by pivoting to initContainers and dropping the shell. This broke the * pathname expansion that rm depends on to find directories to remove. This commit returns to using the shell to invoke the rm call, so we get pathname expansion back [1]. But I avoid the possibility of unquoted argument injection by using workingDir to bring in baseDir. [1]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_06 --- pkg/cvo/updatepayload.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/cvo/updatepayload.go b/pkg/cvo/updatepayload.go index 46bfb0bb84..4d1e34dc39 100644 --- a/pkg/cvo/updatepayload.go +++ b/pkg/cvo/updatepayload.go @@ -202,8 +202,9 @@ func (r *payloadRetriever) fetchUpdatePayloadToDir(ctx context.Context, dir stri Spec: corev1.PodSpec{ InitContainers: []corev1.Container{ setContainerDefaults(corev1.Container{ - Name: "cleanup", - Command: []string{"rm", "-fR", filepath.Join(baseDir, "*")}, + Name: "cleanup", + Command: []string{"sh", "-c", "rm -fR *"}, + WorkingDir: baseDir, }), setContainerDefaults(corev1.Container{ Name: "make-temporary-directory",