From 0fedf491152651dfa2b5985dbaadb20d04eef6a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20Barcarol=20Guimar=C3=A3es?= Date: Fri, 17 Jun 2022 16:29:41 +0000 Subject: [PATCH 1/3] pkg/steps: add pod clients to steps with builds --- cmd/ci-operator/main_test.go | 4 ++-- pkg/defaults/defaults.go | 14 +++++++------- pkg/steps/bundle_source.go | 13 ++++++++++++- pkg/steps/git_source.go | 13 ++++++++++++- pkg/steps/index_generator.go | 13 ++++++++++++- pkg/steps/pipeline_image_cache.go | 12 +++++++++++- pkg/steps/project_image.go | 13 ++++++++++++- pkg/steps/rpm_injection.go | 12 +++++++++++- pkg/steps/source.go | 14 ++++++++++++-- 9 files changed, 91 insertions(+), 17 deletions(-) diff --git a/cmd/ci-operator/main_test.go b/cmd/ci-operator/main_test.go index 1e42570194a..374e09087bc 100644 --- a/cmd/ci-operator/main_test.go +++ b/cmd/ci-operator/main_test.go @@ -992,7 +992,7 @@ func TestBuildPartialGraph(t *testing.T) { loggingclient.New(fakectrlruntimeclient.NewFakeClient(&imagev1.ImageStreamTag{ObjectMeta: metav1.ObjectMeta{Name: ":"}})), nil, ), - steps.SourceStep(api.SourceStepConfiguration{From: api.PipelineImageStreamTagReferenceRoot, To: api.PipelineImageStreamTagReferenceSource}, api.ResourceConfiguration{}, nil, &api.JobSpec{}, nil, nil), + steps.SourceStep(api.SourceStepConfiguration{From: api.PipelineImageStreamTagReferenceRoot, To: api.PipelineImageStreamTagReferenceSource}, api.ResourceConfiguration{}, nil, nil, &api.JobSpec{}, nil, nil), steps.ProjectDirectoryImageBuildStep( api.ProjectDirectoryImageBuildStepConfiguration{ From: api.PipelineImageStreamTagReferenceSource, @@ -1001,7 +1001,7 @@ func TestBuildPartialGraph(t *testing.T) { }, To: api.PipelineImageStreamTagReference("oc-bin-image"), }, - &api.ReleaseBuildConfiguration{}, api.ResourceConfiguration{}, nil, nil, nil, + &api.ReleaseBuildConfiguration{}, api.ResourceConfiguration{}, nil, nil, nil, nil, ), steps.OutputImageTagStep(api.OutputImageTagStepConfiguration{From: api.PipelineImageStreamTagReference("oc-bin-image")}, nil, nil), steps.ImagesReadyStep(steps.OutputImageTagStep(api.OutputImageTagStepConfiguration{From: api.PipelineImageStreamTagReference("oc-bin-image")}, nil, nil).Creates()), diff --git a/pkg/defaults/defaults.go b/pkg/defaults/defaults.go index 4495c7eff2c..f516abb863c 100644 --- a/pkg/defaults/defaults.go +++ b/pkg/defaults/defaults.go @@ -244,19 +244,19 @@ func fromConfig( step = steps.InputImageTagStep(&conf, client, jobSpec) inputImages[conf.InputImage] = struct{}{} } else if rawStep.PipelineImageCacheStepConfiguration != nil { - step = steps.PipelineImageCacheStep(*rawStep.PipelineImageCacheStepConfiguration, config.Resources, buildClient, jobSpec, pullSecret) + step = steps.PipelineImageCacheStep(*rawStep.PipelineImageCacheStepConfiguration, config.Resources, buildClient, podClient, jobSpec, pullSecret) } else if rawStep.SourceStepConfiguration != nil { - step = steps.SourceStep(*rawStep.SourceStepConfiguration, config.Resources, buildClient, jobSpec, cloneAuthConfig, pullSecret) + step = steps.SourceStep(*rawStep.SourceStepConfiguration, config.Resources, buildClient, podClient, jobSpec, cloneAuthConfig, pullSecret) } else if rawStep.BundleSourceStepConfiguration != nil { - step = steps.BundleSourceStep(*rawStep.BundleSourceStepConfiguration, config, config.Resources, buildClient, jobSpec, pullSecret) + step = steps.BundleSourceStep(*rawStep.BundleSourceStepConfiguration, config, config.Resources, buildClient, podClient, jobSpec, pullSecret) } else if rawStep.IndexGeneratorStepConfiguration != nil { - step = steps.IndexGeneratorStep(*rawStep.IndexGeneratorStepConfiguration, config, config.Resources, buildClient, jobSpec, pullSecret) + step = steps.IndexGeneratorStep(*rawStep.IndexGeneratorStepConfiguration, config, config.Resources, buildClient, podClient, jobSpec, pullSecret) } else if rawStep.ProjectDirectoryImageBuildStepConfiguration != nil { - step = steps.ProjectDirectoryImageBuildStep(*rawStep.ProjectDirectoryImageBuildStepConfiguration, config, config.Resources, buildClient, jobSpec, pullSecret) + step = steps.ProjectDirectoryImageBuildStep(*rawStep.ProjectDirectoryImageBuildStepConfiguration, config, config.Resources, buildClient, podClient, jobSpec, pullSecret) } else if rawStep.ProjectDirectoryImageBuildInputs != nil { - step = steps.GitSourceStep(*rawStep.ProjectDirectoryImageBuildInputs, config.Resources, buildClient, jobSpec, cloneAuthConfig, pullSecret) + step = steps.GitSourceStep(*rawStep.ProjectDirectoryImageBuildInputs, config.Resources, buildClient, podClient, jobSpec, cloneAuthConfig, pullSecret) } else if rawStep.RPMImageInjectionStepConfiguration != nil { - step = steps.RPMImageInjectionStep(*rawStep.RPMImageInjectionStepConfiguration, config.Resources, buildClient, jobSpec, pullSecret) + step = steps.RPMImageInjectionStep(*rawStep.RPMImageInjectionStepConfiguration, config.Resources, buildClient, podClient, jobSpec, pullSecret) } else if rawStep.RPMServeStepConfiguration != nil { step = steps.RPMServerStep(*rawStep.RPMServeStepConfiguration, client, jobSpec) } else if rawStep.OutputImageTagStepConfiguration != nil { diff --git a/pkg/steps/bundle_source.go b/pkg/steps/bundle_source.go index 9629a1d7029..5b8a7d7c88f 100644 --- a/pkg/steps/bundle_source.go +++ b/pkg/steps/bundle_source.go @@ -13,6 +13,7 @@ import ( buildapi "github.com/openshift/api/build/v1" "github.com/openshift/ci-tools/pkg/api" + "github.com/openshift/ci-tools/pkg/kubernetes" "github.com/openshift/ci-tools/pkg/results" "github.com/openshift/ci-tools/pkg/steps/utils" ) @@ -22,6 +23,7 @@ type bundleSourceStep struct { releaseBuildConfig *api.ReleaseBuildConfiguration resources api.ResourceConfiguration client BuildClient + podClient kubernetes.PodClient jobSpec *api.JobSpec pullSecret *coreapi.Secret } @@ -129,12 +131,21 @@ func (s *bundleSourceStep) Description() string { return fmt.Sprintf("Build image %s from the repository", api.PipelineImageStreamTagReferenceBundleSource) } -func BundleSourceStep(config api.BundleSourceStepConfiguration, releaseBuildConfig *api.ReleaseBuildConfiguration, resources api.ResourceConfiguration, client BuildClient, jobSpec *api.JobSpec, pullSecret *coreapi.Secret) api.Step { +func BundleSourceStep( + config api.BundleSourceStepConfiguration, + releaseBuildConfig *api.ReleaseBuildConfiguration, + resources api.ResourceConfiguration, + client BuildClient, + podClient kubernetes.PodClient, + jobSpec *api.JobSpec, + pullSecret *coreapi.Secret, +) api.Step { return &bundleSourceStep{ config: config, releaseBuildConfig: releaseBuildConfig, resources: resources, client: client, + podClient: podClient, jobSpec: jobSpec, pullSecret: pullSecret, } diff --git a/pkg/steps/git_source.go b/pkg/steps/git_source.go index 85a60e7b07d..82140884a27 100644 --- a/pkg/steps/git_source.go +++ b/pkg/steps/git_source.go @@ -11,6 +11,7 @@ import ( buildapi "github.com/openshift/api/build/v1" "github.com/openshift/ci-tools/pkg/api" + "github.com/openshift/ci-tools/pkg/kubernetes" "github.com/openshift/ci-tools/pkg/results" ) @@ -18,6 +19,7 @@ type gitSourceStep struct { config api.ProjectDirectoryImageBuildInputs resources api.ResourceConfiguration buildClient BuildClient + podClient kubernetes.PodClient jobSpec *api.JobSpec cloneAuthConfig *CloneAuthConfig pullSecret *coreapi.Secret @@ -99,11 +101,20 @@ func determineRefsWorkdir(refs *prowapi.Refs, extraRefs []prowapi.Refs) *prowapi } // GitSourceStep returns gitSourceStep that holds all the required information to create a build from a git source. -func GitSourceStep(config api.ProjectDirectoryImageBuildInputs, resources api.ResourceConfiguration, buildClient BuildClient, jobSpec *api.JobSpec, cloneAuthConfig *CloneAuthConfig, pullSecret *coreapi.Secret) api.Step { +func GitSourceStep( + config api.ProjectDirectoryImageBuildInputs, + resources api.ResourceConfiguration, + buildClient BuildClient, + podClient kubernetes.PodClient, + jobSpec *api.JobSpec, + cloneAuthConfig *CloneAuthConfig, + pullSecret *coreapi.Secret, +) api.Step { return &gitSourceStep{ config: config, resources: resources, buildClient: buildClient, + podClient: podClient, jobSpec: jobSpec, cloneAuthConfig: cloneAuthConfig, pullSecret: pullSecret, diff --git a/pkg/steps/index_generator.go b/pkg/steps/index_generator.go index 3fae17c7ab8..61d6c1d279a 100644 --- a/pkg/steps/index_generator.go +++ b/pkg/steps/index_generator.go @@ -11,6 +11,7 @@ import ( buildapi "github.com/openshift/api/build/v1" "github.com/openshift/ci-tools/pkg/api" + "github.com/openshift/ci-tools/pkg/kubernetes" "github.com/openshift/ci-tools/pkg/results" "github.com/openshift/ci-tools/pkg/steps/utils" ) @@ -20,6 +21,7 @@ type indexGeneratorStep struct { releaseBuildConfig *api.ReleaseBuildConfiguration resources api.ResourceConfiguration client BuildClient + podClient kubernetes.PodClient jobSpec *api.JobSpec pullSecret *coreapi.Secret } @@ -153,12 +155,21 @@ func (s *indexGeneratorStep) Objects() []ctrlruntimeclient.Object { return s.client.Objects() } -func IndexGeneratorStep(config api.IndexGeneratorStepConfiguration, releaseBuildConfig *api.ReleaseBuildConfiguration, resources api.ResourceConfiguration, buildClient BuildClient, jobSpec *api.JobSpec, pullSecret *coreapi.Secret) api.Step { +func IndexGeneratorStep( + config api.IndexGeneratorStepConfiguration, + releaseBuildConfig *api.ReleaseBuildConfiguration, + resources api.ResourceConfiguration, + buildClient BuildClient, + podClient kubernetes.PodClient, + jobSpec *api.JobSpec, + pullSecret *coreapi.Secret, +) api.Step { return &indexGeneratorStep{ config: config, releaseBuildConfig: releaseBuildConfig, resources: resources, client: buildClient, + podClient: podClient, jobSpec: jobSpec, pullSecret: pullSecret, } diff --git a/pkg/steps/pipeline_image_cache.go b/pkg/steps/pipeline_image_cache.go index cb4598c8b8b..083220d00f7 100644 --- a/pkg/steps/pipeline_image_cache.go +++ b/pkg/steps/pipeline_image_cache.go @@ -11,6 +11,7 @@ import ( buildapi "github.com/openshift/api/build/v1" "github.com/openshift/ci-tools/pkg/api" + "github.com/openshift/ci-tools/pkg/kubernetes" "github.com/openshift/ci-tools/pkg/results" "github.com/openshift/ci-tools/pkg/steps/utils" ) @@ -24,6 +25,7 @@ type pipelineImageCacheStep struct { config api.PipelineImageCacheStepConfiguration resources api.ResourceConfiguration client BuildClient + podClient kubernetes.PodClient jobSpec *api.JobSpec pullSecret *coreapi.Secret } @@ -85,11 +87,19 @@ func (s *pipelineImageCacheStep) Objects() []ctrlruntimeclient.Object { return s.client.Objects() } -func PipelineImageCacheStep(config api.PipelineImageCacheStepConfiguration, resources api.ResourceConfiguration, client BuildClient, jobSpec *api.JobSpec, pullSecret *coreapi.Secret) api.Step { +func PipelineImageCacheStep( + config api.PipelineImageCacheStepConfiguration, + resources api.ResourceConfiguration, + client BuildClient, + podClient kubernetes.PodClient, + jobSpec *api.JobSpec, + pullSecret *coreapi.Secret, +) api.Step { return &pipelineImageCacheStep{ config: config, resources: resources, client: client, + podClient: podClient, jobSpec: jobSpec, pullSecret: pullSecret, } diff --git a/pkg/steps/project_image.go b/pkg/steps/project_image.go index 0e4a0446bfa..29e43603581 100644 --- a/pkg/steps/project_image.go +++ b/pkg/steps/project_image.go @@ -14,6 +14,7 @@ import ( imagev1 "github.com/openshift/api/image/v1" "github.com/openshift/ci-tools/pkg/api" + "github.com/openshift/ci-tools/pkg/kubernetes" "github.com/openshift/ci-tools/pkg/results" "github.com/openshift/ci-tools/pkg/steps/utils" ) @@ -23,6 +24,7 @@ type projectDirectoryImageBuildStep struct { releaseBuildConfig *api.ReleaseBuildConfiguration resources api.ResourceConfiguration client BuildClient + podClient kubernetes.PodClient jobSpec *api.JobSpec pullSecret *coreapi.Secret } @@ -161,12 +163,21 @@ func (s *projectDirectoryImageBuildStep) Objects() []ctrlruntimeclient.Object { return s.client.Objects() } -func ProjectDirectoryImageBuildStep(config api.ProjectDirectoryImageBuildStepConfiguration, releaseBuildConfig *api.ReleaseBuildConfiguration, resources api.ResourceConfiguration, buildClient BuildClient, jobSpec *api.JobSpec, pullSecret *coreapi.Secret) api.Step { +func ProjectDirectoryImageBuildStep( + config api.ProjectDirectoryImageBuildStepConfiguration, + releaseBuildConfig *api.ReleaseBuildConfiguration, + resources api.ResourceConfiguration, + buildClient BuildClient, + podClient kubernetes.PodClient, + jobSpec *api.JobSpec, + pullSecret *coreapi.Secret, +) api.Step { return &projectDirectoryImageBuildStep{ config: config, releaseBuildConfig: releaseBuildConfig, resources: resources, client: buildClient, + podClient: podClient, jobSpec: jobSpec, pullSecret: pullSecret, } diff --git a/pkg/steps/rpm_injection.go b/pkg/steps/rpm_injection.go index bb002311085..b179612b8f8 100644 --- a/pkg/steps/rpm_injection.go +++ b/pkg/steps/rpm_injection.go @@ -11,6 +11,7 @@ import ( routev1 "github.com/openshift/api/route/v1" "github.com/openshift/ci-tools/pkg/api" + "github.com/openshift/ci-tools/pkg/kubernetes" "github.com/openshift/ci-tools/pkg/results" ) @@ -23,6 +24,7 @@ type rpmImageInjectionStep struct { config api.RPMImageInjectionStepConfiguration resources api.ResourceConfiguration client BuildClient + podClient kubernetes.PodClient jobSpec *api.JobSpec pullSecret *coreapi.Secret } @@ -84,11 +86,19 @@ func (s *rpmImageInjectionStep) Objects() []ctrlruntimeclient.Object { return s.client.Objects() } -func RPMImageInjectionStep(config api.RPMImageInjectionStepConfiguration, resources api.ResourceConfiguration, buildClient BuildClient, jobSpec *api.JobSpec, pullSecret *coreapi.Secret) api.Step { +func RPMImageInjectionStep( + config api.RPMImageInjectionStepConfiguration, + resources api.ResourceConfiguration, + buildClient BuildClient, + podClient kubernetes.PodClient, + jobSpec *api.JobSpec, + pullSecret *coreapi.Secret, +) api.Step { return &rpmImageInjectionStep{ config: config, resources: resources, client: buildClient, + podClient: podClient, jobSpec: jobSpec, pullSecret: pullSecret, } diff --git a/pkg/steps/source.go b/pkg/steps/source.go index 7c32915c0c9..347bf446da3 100644 --- a/pkg/steps/source.go +++ b/pkg/steps/source.go @@ -26,6 +26,7 @@ import ( imagev1 "github.com/openshift/api/image/v1" "github.com/openshift/ci-tools/pkg/api" + "github.com/openshift/ci-tools/pkg/kubernetes" "github.com/openshift/ci-tools/pkg/results" "github.com/openshift/ci-tools/pkg/steps/loggingclient" "github.com/openshift/ci-tools/pkg/steps/utils" @@ -134,6 +135,7 @@ type sourceStep struct { config api.SourceStepConfiguration resources api.ResourceConfiguration client BuildClient + podClient kubernetes.PodClient jobSpec *api.JobSpec cloneAuthConfig *CloneAuthConfig pullSecret *corev1.Secret @@ -609,12 +611,20 @@ func (s *sourceStep) Objects() []ctrlruntimeclient.Object { return s.client.Objects() } -func SourceStep(config api.SourceStepConfiguration, resources api.ResourceConfiguration, buildClient BuildClient, - jobSpec *api.JobSpec, cloneAuthConfig *CloneAuthConfig, pullSecret *corev1.Secret) api.Step { +func SourceStep( + config api.SourceStepConfiguration, + resources api.ResourceConfiguration, + buildClient BuildClient, + podClient kubernetes.PodClient, + jobSpec *api.JobSpec, + cloneAuthConfig *CloneAuthConfig, + pullSecret *corev1.Secret, +) api.Step { return &sourceStep{ config: config, resources: resources, client: buildClient, + podClient: podClient, jobSpec: jobSpec, cloneAuthConfig: cloneAuthConfig, pullSecret: pullSecret, From 292baebdbab0c9707bd8a24241eab91106064403 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20Barcarol=20Guimar=C3=A3es?= Date: Fri, 17 Jun 2022 16:36:26 +0000 Subject: [PATCH 2/3] ci-operator: add build pending timeout period This follows the similar implementation (and uses the same time period) in pkg/steps/template.go. --- Example (with timeout changed to `1s`): ```yaml resources: src: requests: memory: 1T ``` ``` INFO[2022-06-17T18:22:59Z] Building src INFO[2022-06-17T18:23:05Z] build didn't start running within 1s (phase: Pending): Found 1 events for Pod src-build: * 0x : 0/23 nodes are available: 1 node(s) had taint {node-role.kubernetes.io/ci-builds-worker: ci-builds-worker}, that the pod didn't tolerate, 1 node(s) had taint {node-role.kubernetes.io/ci-prowjobs-worker: ci-prowjobs-worker}, that the pod didn't tolerate, 1 node(s) were unschedulable, 2 Insufficient memory, 2 node(s) had taint {ci.openshift.io/ci-search: true}, that the pod didn't tolerate, 3 node(s) had taint {node-role.kubernetes.io/infra: }, that the pod didn't tolerate, 3 node(s) had taint {node-role.kubernetes.io/master: }, that the pod didn't tolerate, 4 node(s) had taint {node-role.kubernetes.io/ci-longtests-worker: ci-longtests-worker}, that the pod didn't tolerate, 6 node(s) had taint {node-role.kubernetes.io/ci-tests-worker: ci-tests-worker}, that the pod didn't tolerate. ``` --- There is an unfortunate layering violation here in that we are forced to get to the build pod through the `pod-name` annotation and examine it. I could not find a way to do this through the `Build` object (as is possible for logs, for example). A pending build has very little information: ``` status: conditions: - lastTransitionTime: "2022-06-17T11:06:41Z" lastUpdateTime: "2022-06-17T11:06:41Z" status: "False" type: New - lastTransitionTime: "2022-06-17T11:06:41Z" lastUpdateTime: "2022-06-17T11:06:41Z" status: "True" type: Pending output: {} outputDockerImageReference: image-registry.openshift-image-registry.svc:5000/bbguimaraes0/pipeline:src phase: Pending ``` It is only by examining the build pod (reusing the existing code which handles test pods, at least) that the cause can be determined. --- pkg/api/constant.go | 6 ++++++ pkg/steps/bundle_source.go | 2 +- pkg/steps/git_source.go | 2 +- pkg/steps/index_generator.go | 2 +- pkg/steps/pipeline_image_cache.go | 2 +- pkg/steps/project_image.go | 2 +- pkg/steps/rpm_injection.go | 2 +- pkg/steps/source.go | 19 +++++++++++++------ pkg/util/builds.go | 31 +++++++++++++++++++++++++++++++ pkg/util/pods.go | 6 +++--- 10 files changed, 59 insertions(+), 15 deletions(-) create mode 100644 pkg/util/builds.go diff --git a/pkg/api/constant.go b/pkg/api/constant.go index 5bdebc3b68e..5c566a2dc1f 100644 --- a/pkg/api/constant.go +++ b/pkg/api/constant.go @@ -2,6 +2,7 @@ package api import ( "fmt" + "time" "k8s.io/apimachinery/pkg/util/sets" ) @@ -51,6 +52,11 @@ const ( APPCIKubeAPIURL = "https://api.ci.l2s4.p1.openshiftapps.com:6443" + // PodStartTimeout is the maximum amount of time for pods to begin running. + // It causes builds and tests to fail if their pods cannot be scheduled for + // whatever reason, instead of failing due to the Prow job timeout, which is + // much longer and generates errors that are less descriptive. + PodStartTimeout = 30 * time.Minute // CliEnv if the env we use to expose the path to the cli CliEnv = "CLI_DIR" DefaultLeaseEnv = "LEASED_RESOURCE" diff --git a/pkg/steps/bundle_source.go b/pkg/steps/bundle_source.go index 5b8a7d7c88f..6c73fe48b02 100644 --- a/pkg/steps/bundle_source.go +++ b/pkg/steps/bundle_source.go @@ -77,7 +77,7 @@ func (s *bundleSourceStep) run(ctx context.Context) error { s.pullSecret, nil, ) - return handleBuild(ctx, s.client, *build) + return handleBuild(ctx, s.client, s.podClient, *build) } func replaceCommand(pullSpec, with string) string { diff --git a/pkg/steps/git_source.go b/pkg/steps/git_source.go index 82140884a27..be7a82297f6 100644 --- a/pkg/steps/git_source.go +++ b/pkg/steps/git_source.go @@ -44,7 +44,7 @@ func (s *gitSourceStep) run(ctx context.Context) error { secretName = s.cloneAuthConfig.Secret.Name } - return handleBuild(ctx, s.buildClient, *buildFromSource(s.jobSpec, "", api.PipelineImageStreamTagReferenceRoot, buildapi.BuildSource{ + return handleBuild(ctx, s.buildClient, s.podClient, *buildFromSource(s.jobSpec, "", api.PipelineImageStreamTagReferenceRoot, buildapi.BuildSource{ Type: buildapi.BuildSourceGit, Dockerfile: s.config.DockerfileLiteral, ContextDir: s.config.ContextDir, diff --git a/pkg/steps/index_generator.go b/pkg/steps/index_generator.go index 61d6c1d279a..09d76141dad 100644 --- a/pkg/steps/index_generator.go +++ b/pkg/steps/index_generator.go @@ -81,7 +81,7 @@ func (s *indexGeneratorStep) run(ctx context.Context) error { s.pullSecret, nil, ) - err = handleBuild(ctx, s.client, *build) + err = handleBuild(ctx, s.client, s.podClient, *build) if err != nil && strings.Contains(err.Error(), "error checking provided apis") { return results.ForReason("generating_index").WithError(err).Errorf("failed to generate operator index due to invalid bundle info: %v", err) } diff --git a/pkg/steps/pipeline_image_cache.go b/pkg/steps/pipeline_image_cache.go index 083220d00f7..5b8f399f89f 100644 --- a/pkg/steps/pipeline_image_cache.go +++ b/pkg/steps/pipeline_image_cache.go @@ -46,7 +46,7 @@ func (s *pipelineImageCacheStep) run(ctx context.Context) error { if err != nil { return err } - return handleBuild(ctx, s.client, *buildFromSource( + return handleBuild(ctx, s.client, s.podClient, *buildFromSource( s.jobSpec, s.config.From, s.config.To, buildapi.BuildSource{ Type: buildapi.BuildSourceDockerfile, diff --git a/pkg/steps/project_image.go b/pkg/steps/project_image.go index 29e43603581..74c63d3173b 100644 --- a/pkg/steps/project_image.go +++ b/pkg/steps/project_image.go @@ -63,7 +63,7 @@ func (s *projectDirectoryImageBuildStep) run(ctx context.Context) error { s.pullSecret, s.config.BuildArgs, ) - return handleBuild(ctx, s.client, *build) + return handleBuild(ctx, s.client, s.podClient, *build) } type workingDir func(tag string) (string, error) diff --git a/pkg/steps/rpm_injection.go b/pkg/steps/rpm_injection.go index b179612b8f8..8a36da2c62d 100644 --- a/pkg/steps/rpm_injection.go +++ b/pkg/steps/rpm_injection.go @@ -50,7 +50,7 @@ func (s *rpmImageInjectionStep) run(ctx context.Context) error { if err != nil { return err } - return handleBuild(ctx, s.client, *buildFromSource( + return handleBuild(ctx, s.client, s.podClient, *buildFromSource( s.jobSpec, s.config.From, s.config.To, buildapi.BuildSource{ Type: buildapi.BuildSourceDockerfile, diff --git a/pkg/steps/source.go b/pkg/steps/source.go index 347bf446da3..3a4e52296b5 100644 --- a/pkg/steps/source.go +++ b/pkg/steps/source.go @@ -15,7 +15,7 @@ import ( kerrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/errors" + utilerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/wait" prowv1 "k8s.io/test-infra/prow/apis/prowjobs/v1" "k8s.io/test-infra/prow/clonerefs" @@ -161,7 +161,7 @@ func (s *sourceStep) run(ctx context.Context) error { if err != nil { return err } - return handleBuild(ctx, s.client, *createBuild(s.config, s.jobSpec, clonerefsRef, s.resources, s.cloneAuthConfig, s.pullSecret, fromDigest)) + return handleBuild(ctx, s.client, s.podClient, *createBuild(s.config, s.jobSpec, clonerefsRef, s.resources, s.cloneAuthConfig, s.pullSecret, fromDigest)) } func createBuild(config api.SourceStepConfiguration, jobSpec *api.JobSpec, clonerefsRef corev1.ObjectReference, resources api.ResourceConfiguration, cloneAuthConfig *CloneAuthConfig, pullSecret *corev1.Secret, fromDigest string) *buildapi.Build { @@ -371,7 +371,7 @@ func isBuildPhaseTerminated(phase buildapi.BuildPhase) bool { return true } -func handleBuild(ctx context.Context, buildClient BuildClient, build buildapi.Build) error { +func handleBuild(ctx context.Context, buildClient BuildClient, podClient kubernetes.PodClient, build buildapi.Build) error { var buildErrs []error attempts := 5 if boErr := wait.ExponentialBackoff(wait.Backoff{Duration: time.Minute, Factor: 1.5, Steps: attempts}, func() (bool, error) { @@ -381,7 +381,7 @@ func handleBuild(ctx context.Context, buildClient BuildClient, build buildapi.Bu return false, fmt.Errorf("could not create build %s: %w", buildAttempt.Name, err) } - buildErr := waitForBuildOrTimeout(ctx, buildClient, buildAttempt.Namespace, buildAttempt.Name) + buildErr := waitForBuildOrTimeout(ctx, buildClient, podClient, buildAttempt.Namespace, buildAttempt.Name) if buildErr == nil { if err := gatherSuccessfulBuildLog(buildClient, buildAttempt.Namespace, buildAttempt.Name); err != nil { // log error but do not fail successful build @@ -421,7 +421,7 @@ func handleBuild(ctx context.Context, buildClient BuildClient, build buildapi.Bu return false, nil }); boErr != nil { if boErr == wait.ErrWaitTimeout { - return fmt.Errorf("build not successful after %d attempts: %w", attempts, errors.NewAggregate(buildErrs)) + return fmt.Errorf("build not successful after %d attempts: %w", attempts, utilerrors.NewAggregate(buildErrs)) } return boErr } @@ -486,7 +486,7 @@ func hintsAtInfraReason(logSnippet string) bool { strings.Contains(logSnippet, "connection reset by peer") } -func waitForBuildOrTimeout(ctx context.Context, buildClient BuildClient, namespace, name string) error { +func waitForBuildOrTimeout(ctx context.Context, buildClient BuildClient, podClient kubernetes.PodClient, namespace, name string) error { isOK := func(b *buildapi.Build) bool { return b.Status.Phase == buildapi.BuildPhaseComplete } @@ -514,6 +514,7 @@ func waitForBuildOrTimeout(ctx context.Context, buildClient BuildClient, namespa } ticker := time.NewTicker(5 * time.Second) defer ticker.Stop() + var ran bool for { select { case <-ctx.Done(): @@ -523,6 +524,12 @@ func waitForBuildOrTimeout(ctx context.Context, buildClient BuildClient, namespa logrus.WithError(err).Warnf("Failed to get build %s.", name) continue } + ran = ran || (build.Status.Phase == buildapi.BuildPhaseRunning) + if !ran && api.PodStartTimeout < time.Since(build.CreationTimestamp.Time) { + err := util.PendingBuildError(ctx, podClient, build) + logrus.Infof(err.Error()) + return err + } if isOK(build) { logrus.Infof("Build %s succeeded after %s", build.Name, buildDuration(build).Truncate(time.Second)) return nil diff --git a/pkg/util/builds.go b/pkg/util/builds.go new file mode 100644 index 00000000000..8f8011fa28e --- /dev/null +++ b/pkg/util/builds.go @@ -0,0 +1,31 @@ +package util + +import ( + "context" + "errors" + "fmt" + + "github.com/sirupsen/logrus" + + corev1 "k8s.io/api/core/v1" + ctrlruntimeclient "sigs.k8s.io/controller-runtime/pkg/client" + + buildapi "github.com/openshift/api/build/v1" + + "github.com/openshift/ci-tools/pkg/api" + "github.com/openshift/ci-tools/pkg/kubernetes" +) + +// PendingBuildError fetches scheduling errors from the build pod's events +func PendingBuildError(ctx context.Context, client kubernetes.PodClient, build *buildapi.Build) error { + msg := fmt.Sprintf("build didn't start running within %s (phase: %s)", api.PodStartTimeout, build.Status.Phase) + var pod corev1.Pod + if podName, ok := build.Annotations[buildapi.BuildPodNameAnnotation]; !ok { + logrus.Debug("build pod annotation missing") + return errors.New(msg) + } else if err := client.Get(ctx, ctrlruntimeclient.ObjectKey{Namespace: build.Namespace, Name: podName}, &pod); err != nil { + logrus.Warnf("failed to get build pod: %v", err) + return errors.New(msg) + } + return fmt.Errorf("%s: %s\n%s", msg, getReasonsForUnreadyContainers(&pod), getEventsForPod(ctx, &pod, client)) +} diff --git a/pkg/util/pods.go b/pkg/util/pods.go index bced8cdb75e..08bb5358a51 100644 --- a/pkg/util/pods.go +++ b/pkg/util/pods.go @@ -20,6 +20,7 @@ import ( "k8s.io/apimachinery/pkg/util/wait" ctrlruntimeclient "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/openshift/ci-tools/pkg/api" "github.com/openshift/ci-tools/pkg/kubernetes" ) @@ -178,7 +179,6 @@ func waitForPodCompletionOrTimeout(ctx context.Context, podClient kubernetes.Pod podCheckTicker := time.NewTicker(10 * time.Second) defer podCheckTicker.Stop() - podStartTimeout := 30 * time.Minute var podSeenRunning bool for { @@ -197,8 +197,8 @@ func waitForPodCompletionOrTimeout(ctx context.Context, podClient kubernetes.Pod if !podSeenRunning { if podHasStarted(pod) { podSeenRunning = true - } else if time.Since(pod.CreationTimestamp.Time) > podStartTimeout { - message := fmt.Sprintf("pod didn't start running within %s: %s\n%s", podStartTimeout, getReasonsForUnreadyContainers(pod), getEventsForPod(ctx, pod, podClient)) + } else if time.Since(pod.CreationTimestamp.Time) > api.PodStartTimeout { + message := fmt.Sprintf("pod didn't start running within %s: %s\n%s", api.PodStartTimeout, getReasonsForUnreadyContainers(pod), getEventsForPod(ctx, pod, podClient)) logrus.Infof(message) notifier.Complete(name) return pod, errors.New(message) From f458f5ace9b6e4b78b39a1a2b2a21e9911c88ebe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20Barcarol=20Guimar=C3=A3es?= Date: Mon, 20 Jun 2022 11:31:37 +0000 Subject: [PATCH 3/3] pkg/steps: add error reason for pending pods Examples: `src` image: ``` INFO[2022-06-20T11:49:39Z] Reporting job state 'failed' with reason 'executing_graph:step_failed:cloning_source:pod_pending' ``` `images` build: ``` INFO[2022-06-20T11:54:26Z] Reporting job state 'failed' with reason 'executing_graph:step_failed:building_project_image:pod_pending' ``` `tests` container: ``` INFO[2022-06-20T11:56:09Z] Reporting job state 'failed' with reason 'executing_graph:step_failed:running_pod:pod_pending' ``` --- pkg/api/constant.go | 4 ++++ pkg/util/builds.go | 10 +++++++--- pkg/util/pods.go | 3 ++- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/pkg/api/constant.go b/pkg/api/constant.go index 5c566a2dc1f..27d50dca33b 100644 --- a/pkg/api/constant.go +++ b/pkg/api/constant.go @@ -52,6 +52,10 @@ const ( APPCIKubeAPIURL = "https://api.ci.l2s4.p1.openshiftapps.com:6443" + // ReasonPending is the error reason for pods not scheduled in time. + // It is generated when pods are for whatever reason not scheduled before + // `podStartTimeout`. + ReasonPending = "pod_pending" // PodStartTimeout is the maximum amount of time for pods to begin running. // It causes builds and tests to fail if their pods cannot be scheduled for // whatever reason, instead of failing due to the Prow job timeout, which is diff --git a/pkg/util/builds.go b/pkg/util/builds.go index 8f8011fa28e..4786b357666 100644 --- a/pkg/util/builds.go +++ b/pkg/util/builds.go @@ -14,18 +14,22 @@ import ( "github.com/openshift/ci-tools/pkg/api" "github.com/openshift/ci-tools/pkg/kubernetes" + "github.com/openshift/ci-tools/pkg/results" ) // PendingBuildError fetches scheduling errors from the build pod's events func PendingBuildError(ctx context.Context, client kubernetes.PodClient, build *buildapi.Build) error { msg := fmt.Sprintf("build didn't start running within %s (phase: %s)", api.PodStartTimeout, build.Status.Phase) + var ret error var pod corev1.Pod if podName, ok := build.Annotations[buildapi.BuildPodNameAnnotation]; !ok { logrus.Debug("build pod annotation missing") - return errors.New(msg) + ret = errors.New(msg) } else if err := client.Get(ctx, ctrlruntimeclient.ObjectKey{Namespace: build.Namespace, Name: podName}, &pod); err != nil { logrus.Warnf("failed to get build pod: %v", err) - return errors.New(msg) + ret = errors.New(msg) + } else { + ret = fmt.Errorf("%s: %s\n%s", msg, getReasonsForUnreadyContainers(&pod), getEventsForPod(ctx, &pod, client)) } - return fmt.Errorf("%s: %s\n%s", msg, getReasonsForUnreadyContainers(&pod), getEventsForPod(ctx, &pod, client)) + return results.ForReason(api.ReasonPending).ForError(ret) } diff --git a/pkg/util/pods.go b/pkg/util/pods.go index 08bb5358a51..e00e702e955 100644 --- a/pkg/util/pods.go +++ b/pkg/util/pods.go @@ -22,6 +22,7 @@ import ( "github.com/openshift/ci-tools/pkg/api" "github.com/openshift/ci-tools/pkg/kubernetes" + "github.com/openshift/ci-tools/pkg/results" ) func CreateOrRestartPod(ctx context.Context, podClient ctrlruntimeclient.Client, pod *corev1.Pod) (*corev1.Pod, error) { @@ -201,7 +202,7 @@ func waitForPodCompletionOrTimeout(ctx context.Context, podClient kubernetes.Pod message := fmt.Sprintf("pod didn't start running within %s: %s\n%s", api.PodStartTimeout, getReasonsForUnreadyContainers(pod), getEventsForPod(ctx, pod, podClient)) logrus.Infof(message) notifier.Complete(name) - return pod, errors.New(message) + return pod, results.ForReason(api.ReasonPending).ForError(errors.New(message)) } } podLogNewFailedContainers(podClient, pod, completed, notifier, skipLogs)