From b0cb0cca51e579a8a08350852723789c4e8a82de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20Barcarol=20Guimar=C3=A3es?= Date: Fri, 17 Jun 2022 16:29:41 +0000 Subject: [PATCH 1/4] pkg/steps: add pod clients to steps with builds --- cmd/ci-operator/main_test.go | 4 ++-- pkg/defaults/defaults.go | 14 +++++++------- pkg/steps/bundle_source.go | 13 ++++++++++++- pkg/steps/git_source.go | 13 ++++++++++++- pkg/steps/index_generator.go | 13 ++++++++++++- pkg/steps/pipeline_image_cache.go | 12 +++++++++++- pkg/steps/project_image.go | 13 ++++++++++++- pkg/steps/rpm_injection.go | 12 +++++++++++- pkg/steps/source.go | 13 +++++++++++-- 9 files changed, 90 insertions(+), 17 deletions(-) diff --git a/cmd/ci-operator/main_test.go b/cmd/ci-operator/main_test.go index 2541e01ba56..ab88fd6bcf7 100644 --- a/cmd/ci-operator/main_test.go +++ b/cmd/ci-operator/main_test.go @@ -992,7 +992,7 @@ func TestBuildPartialGraph(t *testing.T) { loggingclient.New(fakectrlruntimeclient.NewFakeClient(&imagev1.ImageStreamTag{ObjectMeta: metav1.ObjectMeta{Name: ":"}})), nil, ), - steps.SourceStep(api.SourceStepConfiguration{From: api.PipelineImageStreamTagReferenceRoot, To: api.PipelineImageStreamTagReferenceSource}, api.ResourceConfiguration{}, nil, &api.JobSpec{}, nil, nil), + steps.SourceStep(api.SourceStepConfiguration{From: api.PipelineImageStreamTagReferenceRoot, To: api.PipelineImageStreamTagReferenceSource}, api.ResourceConfiguration{}, nil, nil, &api.JobSpec{}, nil, nil), steps.ProjectDirectoryImageBuildStep( api.ProjectDirectoryImageBuildStepConfiguration{ From: api.PipelineImageStreamTagReferenceSource, @@ -1001,7 +1001,7 @@ func TestBuildPartialGraph(t *testing.T) { }, To: api.PipelineImageStreamTagReference("oc-bin-image"), }, - &api.ReleaseBuildConfiguration{}, api.ResourceConfiguration{}, nil, nil, nil, + &api.ReleaseBuildConfiguration{}, api.ResourceConfiguration{}, nil, nil, nil, nil, ), steps.OutputImageTagStep(api.OutputImageTagStepConfiguration{From: api.PipelineImageStreamTagReference("oc-bin-image")}, nil, nil), steps.ImagesReadyStep(steps.OutputImageTagStep(api.OutputImageTagStepConfiguration{From: api.PipelineImageStreamTagReference("oc-bin-image")}, nil, nil).Creates()), diff --git a/pkg/defaults/defaults.go b/pkg/defaults/defaults.go index ec8e482ddc6..eec19c03250 100644 --- a/pkg/defaults/defaults.go +++ b/pkg/defaults/defaults.go @@ -248,19 +248,19 @@ func fromConfig( step = steps.InputImageTagStep(&conf, client, jobSpec) inputImages[conf.InputImage] = struct{}{} } else if rawStep.PipelineImageCacheStepConfiguration != nil { - step = steps.PipelineImageCacheStep(*rawStep.PipelineImageCacheStepConfiguration, config.Resources, buildClient, jobSpec, pullSecret) + step = steps.PipelineImageCacheStep(*rawStep.PipelineImageCacheStepConfiguration, config.Resources, buildClient, podClient, jobSpec, pullSecret) } else if rawStep.SourceStepConfiguration != nil { - step = steps.SourceStep(*rawStep.SourceStepConfiguration, config.Resources, buildClient, jobSpec, cloneAuthConfig, pullSecret) + step = steps.SourceStep(*rawStep.SourceStepConfiguration, config.Resources, buildClient, podClient, jobSpec, cloneAuthConfig, pullSecret) } else if rawStep.BundleSourceStepConfiguration != nil { - step = steps.BundleSourceStep(*rawStep.BundleSourceStepConfiguration, config, config.Resources, buildClient, jobSpec, pullSecret) + step = steps.BundleSourceStep(*rawStep.BundleSourceStepConfiguration, config, config.Resources, buildClient, podClient, jobSpec, pullSecret) } else if rawStep.IndexGeneratorStepConfiguration != nil { - step = steps.IndexGeneratorStep(*rawStep.IndexGeneratorStepConfiguration, config, config.Resources, buildClient, jobSpec, pullSecret) + step = steps.IndexGeneratorStep(*rawStep.IndexGeneratorStepConfiguration, config, config.Resources, buildClient, podClient, jobSpec, pullSecret) } else if rawStep.ProjectDirectoryImageBuildStepConfiguration != nil { - step = steps.ProjectDirectoryImageBuildStep(*rawStep.ProjectDirectoryImageBuildStepConfiguration, config, config.Resources, buildClient, jobSpec, pullSecret) + step = steps.ProjectDirectoryImageBuildStep(*rawStep.ProjectDirectoryImageBuildStepConfiguration, config, config.Resources, buildClient, podClient, jobSpec, pullSecret) } else if rawStep.ProjectDirectoryImageBuildInputs != nil { - step = steps.GitSourceStep(*rawStep.ProjectDirectoryImageBuildInputs, config.Resources, buildClient, jobSpec, cloneAuthConfig, pullSecret) + step = steps.GitSourceStep(*rawStep.ProjectDirectoryImageBuildInputs, config.Resources, buildClient, podClient, jobSpec, cloneAuthConfig, pullSecret) } else if rawStep.RPMImageInjectionStepConfiguration != nil { - step = steps.RPMImageInjectionStep(*rawStep.RPMImageInjectionStepConfiguration, config.Resources, buildClient, jobSpec, pullSecret) + step = steps.RPMImageInjectionStep(*rawStep.RPMImageInjectionStepConfiguration, config.Resources, buildClient, podClient, jobSpec, pullSecret) } else if rawStep.RPMServeStepConfiguration != nil { step = steps.RPMServerStep(*rawStep.RPMServeStepConfiguration, client, jobSpec) } else if rawStep.OutputImageTagStepConfiguration != nil { diff --git a/pkg/steps/bundle_source.go b/pkg/steps/bundle_source.go index b20816f17ce..b9cf36f48d7 100644 --- a/pkg/steps/bundle_source.go +++ b/pkg/steps/bundle_source.go @@ -13,6 +13,7 @@ import ( buildapi "github.com/openshift/api/build/v1" "github.com/openshift/ci-tools/pkg/api" + "github.com/openshift/ci-tools/pkg/kubernetes" "github.com/openshift/ci-tools/pkg/results" "github.com/openshift/ci-tools/pkg/steps/utils" ) @@ -22,6 +23,7 @@ type bundleSourceStep struct { releaseBuildConfig *api.ReleaseBuildConfiguration resources api.ResourceConfiguration client BuildClient + podClient kubernetes.PodClient jobSpec *api.JobSpec pullSecret *coreapi.Secret } @@ -129,12 +131,21 @@ func (s *bundleSourceStep) Description() string { return fmt.Sprintf("Build image %s from the repository", api.PipelineImageStreamTagReferenceBundleSource) } -func BundleSourceStep(config api.BundleSourceStepConfiguration, releaseBuildConfig *api.ReleaseBuildConfiguration, resources api.ResourceConfiguration, client BuildClient, jobSpec *api.JobSpec, pullSecret *coreapi.Secret) api.Step { +func BundleSourceStep( + config api.BundleSourceStepConfiguration, + releaseBuildConfig *api.ReleaseBuildConfiguration, + resources api.ResourceConfiguration, + client BuildClient, + podClient kubernetes.PodClient, + jobSpec *api.JobSpec, + pullSecret *coreapi.Secret, +) api.Step { return &bundleSourceStep{ config: config, releaseBuildConfig: releaseBuildConfig, resources: resources, client: client, + podClient: podClient, jobSpec: jobSpec, pullSecret: pullSecret, } diff --git a/pkg/steps/git_source.go b/pkg/steps/git_source.go index 1a70204709f..7c762ff8eae 100644 --- a/pkg/steps/git_source.go +++ b/pkg/steps/git_source.go @@ -11,6 +11,7 @@ import ( buildapi "github.com/openshift/api/build/v1" "github.com/openshift/ci-tools/pkg/api" + "github.com/openshift/ci-tools/pkg/kubernetes" "github.com/openshift/ci-tools/pkg/results" ) @@ -18,6 +19,7 @@ type gitSourceStep struct { config api.ProjectDirectoryImageBuildInputs resources api.ResourceConfiguration buildClient BuildClient + podClient kubernetes.PodClient jobSpec *api.JobSpec cloneAuthConfig *CloneAuthConfig pullSecret *coreapi.Secret @@ -99,11 +101,20 @@ func determineRefsWorkdir(refs *prowapi.Refs, extraRefs []prowapi.Refs) *prowapi } // GitSourceStep returns gitSourceStep that holds all the required information to create a build from a git source. -func GitSourceStep(config api.ProjectDirectoryImageBuildInputs, resources api.ResourceConfiguration, buildClient BuildClient, jobSpec *api.JobSpec, cloneAuthConfig *CloneAuthConfig, pullSecret *coreapi.Secret) api.Step { +func GitSourceStep( + config api.ProjectDirectoryImageBuildInputs, + resources api.ResourceConfiguration, + buildClient BuildClient, + podClient kubernetes.PodClient, + jobSpec *api.JobSpec, + cloneAuthConfig *CloneAuthConfig, + pullSecret *coreapi.Secret, +) api.Step { return &gitSourceStep{ config: config, resources: resources, buildClient: buildClient, + podClient: podClient, jobSpec: jobSpec, cloneAuthConfig: cloneAuthConfig, pullSecret: pullSecret, diff --git a/pkg/steps/index_generator.go b/pkg/steps/index_generator.go index 803c85e8702..aac680d7125 100644 --- a/pkg/steps/index_generator.go +++ b/pkg/steps/index_generator.go @@ -16,6 +16,7 @@ import ( imagev1 "github.com/openshift/api/image/v1" "github.com/openshift/ci-tools/pkg/api" + "github.com/openshift/ci-tools/pkg/kubernetes" "github.com/openshift/ci-tools/pkg/results" "github.com/openshift/ci-tools/pkg/steps/utils" ) @@ -25,6 +26,7 @@ type indexGeneratorStep struct { releaseBuildConfig *api.ReleaseBuildConfiguration resources api.ResourceConfiguration client BuildClient + podClient kubernetes.PodClient jobSpec *api.JobSpec pullSecret *coreapi.Secret } @@ -195,12 +197,21 @@ func (s *indexGeneratorStep) Objects() []ctrlruntimeclient.Object { return s.client.Objects() } -func IndexGeneratorStep(config api.IndexGeneratorStepConfiguration, releaseBuildConfig *api.ReleaseBuildConfiguration, resources api.ResourceConfiguration, buildClient BuildClient, jobSpec *api.JobSpec, pullSecret *coreapi.Secret) api.Step { +func IndexGeneratorStep( + config api.IndexGeneratorStepConfiguration, + releaseBuildConfig *api.ReleaseBuildConfiguration, + resources api.ResourceConfiguration, + buildClient BuildClient, + podClient kubernetes.PodClient, + jobSpec *api.JobSpec, + pullSecret *coreapi.Secret, +) api.Step { return &indexGeneratorStep{ config: config, releaseBuildConfig: releaseBuildConfig, resources: resources, client: buildClient, + podClient: podClient, jobSpec: jobSpec, pullSecret: pullSecret, } diff --git a/pkg/steps/pipeline_image_cache.go b/pkg/steps/pipeline_image_cache.go index cb4598c8b8b..083220d00f7 100644 --- a/pkg/steps/pipeline_image_cache.go +++ b/pkg/steps/pipeline_image_cache.go @@ -11,6 +11,7 @@ import ( buildapi "github.com/openshift/api/build/v1" "github.com/openshift/ci-tools/pkg/api" + "github.com/openshift/ci-tools/pkg/kubernetes" "github.com/openshift/ci-tools/pkg/results" "github.com/openshift/ci-tools/pkg/steps/utils" ) @@ -24,6 +25,7 @@ type pipelineImageCacheStep struct { config api.PipelineImageCacheStepConfiguration resources api.ResourceConfiguration client BuildClient + podClient kubernetes.PodClient jobSpec *api.JobSpec pullSecret *coreapi.Secret } @@ -85,11 +87,19 @@ func (s *pipelineImageCacheStep) Objects() []ctrlruntimeclient.Object { return s.client.Objects() } -func PipelineImageCacheStep(config api.PipelineImageCacheStepConfiguration, resources api.ResourceConfiguration, client BuildClient, jobSpec *api.JobSpec, pullSecret *coreapi.Secret) api.Step { +func PipelineImageCacheStep( + config api.PipelineImageCacheStepConfiguration, + resources api.ResourceConfiguration, + client BuildClient, + podClient kubernetes.PodClient, + jobSpec *api.JobSpec, + pullSecret *coreapi.Secret, +) api.Step { return &pipelineImageCacheStep{ config: config, resources: resources, client: client, + podClient: podClient, jobSpec: jobSpec, pullSecret: pullSecret, } diff --git a/pkg/steps/project_image.go b/pkg/steps/project_image.go index fe8c8a80b3d..fa4d16b8641 100644 --- a/pkg/steps/project_image.go +++ b/pkg/steps/project_image.go @@ -14,6 +14,7 @@ import ( imagev1 "github.com/openshift/api/image/v1" "github.com/openshift/ci-tools/pkg/api" + "github.com/openshift/ci-tools/pkg/kubernetes" "github.com/openshift/ci-tools/pkg/results" "github.com/openshift/ci-tools/pkg/steps/utils" ) @@ -23,6 +24,7 @@ type projectDirectoryImageBuildStep struct { releaseBuildConfig *api.ReleaseBuildConfiguration resources api.ResourceConfiguration client BuildClient + podClient kubernetes.PodClient jobSpec *api.JobSpec pullSecret *coreapi.Secret } @@ -161,12 +163,21 @@ func (s *projectDirectoryImageBuildStep) Objects() []ctrlruntimeclient.Object { return s.client.Objects() } -func ProjectDirectoryImageBuildStep(config api.ProjectDirectoryImageBuildStepConfiguration, releaseBuildConfig *api.ReleaseBuildConfiguration, resources api.ResourceConfiguration, buildClient BuildClient, jobSpec *api.JobSpec, pullSecret *coreapi.Secret) api.Step { +func ProjectDirectoryImageBuildStep( + config api.ProjectDirectoryImageBuildStepConfiguration, + releaseBuildConfig *api.ReleaseBuildConfiguration, + resources api.ResourceConfiguration, + buildClient BuildClient, + podClient kubernetes.PodClient, + jobSpec *api.JobSpec, + pullSecret *coreapi.Secret, +) api.Step { return &projectDirectoryImageBuildStep{ config: config, releaseBuildConfig: releaseBuildConfig, resources: resources, client: buildClient, + podClient: podClient, jobSpec: jobSpec, pullSecret: pullSecret, } diff --git a/pkg/steps/rpm_injection.go b/pkg/steps/rpm_injection.go index 7777933293c..129626be9a9 100644 --- a/pkg/steps/rpm_injection.go +++ b/pkg/steps/rpm_injection.go @@ -11,6 +11,7 @@ import ( routev1 "github.com/openshift/api/route/v1" "github.com/openshift/ci-tools/pkg/api" + "github.com/openshift/ci-tools/pkg/kubernetes" "github.com/openshift/ci-tools/pkg/results" ) @@ -23,6 +24,7 @@ type rpmImageInjectionStep struct { config api.RPMImageInjectionStepConfiguration resources api.ResourceConfiguration client BuildClient + podClient kubernetes.PodClient jobSpec *api.JobSpec pullSecret *coreapi.Secret } @@ -84,11 +86,19 @@ func (s *rpmImageInjectionStep) Objects() []ctrlruntimeclient.Object { return s.client.Objects() } -func RPMImageInjectionStep(config api.RPMImageInjectionStepConfiguration, resources api.ResourceConfiguration, buildClient BuildClient, jobSpec *api.JobSpec, pullSecret *coreapi.Secret) api.Step { +func RPMImageInjectionStep( + config api.RPMImageInjectionStepConfiguration, + resources api.ResourceConfiguration, + buildClient BuildClient, + podClient kubernetes.PodClient, + jobSpec *api.JobSpec, + pullSecret *coreapi.Secret, +) api.Step { return &rpmImageInjectionStep{ config: config, resources: resources, client: buildClient, + podClient: podClient, jobSpec: jobSpec, pullSecret: pullSecret, } diff --git a/pkg/steps/source.go b/pkg/steps/source.go index d524f15d71d..4acbd74d552 100644 --- a/pkg/steps/source.go +++ b/pkg/steps/source.go @@ -135,6 +135,7 @@ type sourceStep struct { config api.SourceStepConfiguration resources api.ResourceConfiguration client BuildClient + podClient kubernetes.PodClient jobSpec *api.JobSpec cloneAuthConfig *CloneAuthConfig pullSecret *corev1.Secret @@ -645,12 +646,20 @@ func (s *sourceStep) Objects() []ctrlruntimeclient.Object { return s.client.Objects() } -func SourceStep(config api.SourceStepConfiguration, resources api.ResourceConfiguration, buildClient BuildClient, - jobSpec *api.JobSpec, cloneAuthConfig *CloneAuthConfig, pullSecret *corev1.Secret) api.Step { +func SourceStep( + config api.SourceStepConfiguration, + resources api.ResourceConfiguration, + buildClient BuildClient, + podClient kubernetes.PodClient, + jobSpec *api.JobSpec, + cloneAuthConfig *CloneAuthConfig, + pullSecret *corev1.Secret, +) api.Step { return &sourceStep{ config: config, resources: resources, client: buildClient, + podClient: podClient, jobSpec: jobSpec, cloneAuthConfig: cloneAuthConfig, pullSecret: pullSecret, From 86803b47c8bdcc2087cacbc27adb4c190df190f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20Barcarol=20Guimar=C3=A3es?= Date: Fri, 21 Apr 2023 14:36:20 +0000 Subject: [PATCH 2/4] pkg/testhelper: add configurable pending timeout --- pkg/kubernetes/client.go | 4 ++-- pkg/steps/multi_stage/run_test.go | 5 ++++- pkg/testhelper/kubernetes/pod.go | 5 ++++- pkg/util/pods.go | 2 +- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/pkg/kubernetes/client.go b/pkg/kubernetes/client.go index 35e22728a53..65fa78c0f55 100644 --- a/pkg/kubernetes/client.go +++ b/pkg/kubernetes/client.go @@ -71,7 +71,7 @@ func WaitForConditionOnObject(ctx context.Context, client ctrlruntimeclient.With type PodClient interface { loggingclient.LoggingClient - PendingTimeout() time.Duration + GetPendingTimeout() time.Duration // WithNewLoggingClient returns a new instance of the PodClient that resets // its LoggingClient. WithNewLoggingClient() PodClient @@ -95,7 +95,7 @@ type podClient struct { pendingTimeout time.Duration } -func (c podClient) PendingTimeout() time.Duration { return c.pendingTimeout } +func (c podClient) GetPendingTimeout() time.Duration { return c.pendingTimeout } func (c podClient) Exec(namespace, pod string, opts *coreapi.PodExecOptions) (remotecommand.Executor, error) { u := c.client.Post().Resource("pods").Namespace(namespace).Name(pod).SubResource("exec").VersionedParams(opts, scheme.ParameterCodec).URL() diff --git a/pkg/steps/multi_stage/run_test.go b/pkg/steps/multi_stage/run_test.go index 866f7aed83d..cefd493e554 100644 --- a/pkg/steps/multi_stage/run_test.go +++ b/pkg/steps/multi_stage/run_test.go @@ -88,7 +88,10 @@ func TestRun(t *testing.T) { }, } jobSpec.SetNamespace("ns") - client := &testhelper_kube.FakePodClient{FakePodExecutor: crclient} + client := &testhelper_kube.FakePodClient{ + PendingTimeout: 30 * time.Minute, + FakePodExecutor: crclient, + } step := MultiStageTestStep(api.TestStepConfiguration{ As: name, MultiStageTestConfigurationLiteral: &api.MultiStageTestConfigurationLiteral{ diff --git a/pkg/testhelper/kubernetes/pod.go b/pkg/testhelper/kubernetes/pod.go index 263eeefc19d..8ecfea70d62 100644 --- a/pkg/testhelper/kubernetes/pod.go +++ b/pkg/testhelper/kubernetes/pod.go @@ -95,9 +95,12 @@ func filter(list ctrlruntimeclient.ObjectList, opts ...ctrlruntimeclient.ListOpt type FakePodClient struct { *FakePodExecutor Namespace, Name string + PendingTimeout time.Duration } -func (FakePodClient) PendingTimeout() time.Duration { return 30 * time.Minute } +func (f FakePodClient) GetPendingTimeout() time.Duration { + return f.PendingTimeout +} func (f *FakePodClient) Exec(namespace, name string, opts *coreapi.PodExecOptions) (remotecommand.Executor, error) { if namespace != f.Namespace { diff --git a/pkg/util/pods.go b/pkg/util/pods.go index 5d0f0df7a02..73e74d3f278 100644 --- a/pkg/util/pods.go +++ b/pkg/util/pods.go @@ -143,7 +143,7 @@ func waitForPodCompletionOrTimeout(ctx context.Context, podClient kubernetes.Pod eg, ctx = errgroup.WithContext(ctx) pendingCtx, cancel := context.WithCancel(ctx) pendingCheck := func() error { - timeout := podClient.PendingTimeout() + timeout := podClient.GetPendingTimeout() if pod, err := checkPendingPeriodic(pendingCtx.Done(), timeout, &ret); err != nil { err = fmt.Errorf("pod pending for more than %s: %w: %s\n%s", timeout, err, getReasonsForUnreadyContainers(pod), getEventsForPod(ctx, pod, podClient)) logrus.Info(err) From 7abf0955cbf1c29f1abb75ae0403b8f0880ec96b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20Barcarol=20Guimar=C3=A3es?= Date: Fri, 17 Jun 2022 16:36:26 +0000 Subject: [PATCH 3/4] pkg/steps: add build pending timeout period This implementation is similar (and uses the same time period) as the one in `WaitForPodCompletion` in `pkg/util/pods.go`, although the pending timeout verification for builds is considerable simpler since only one time point --- the creation time --- is considered. --- Example (with a pending timeout of `1s`): ```yaml resources: src: requests: memory: 1T ``` ``` INFO[2022-06-17T18:22:59Z] Building src INFO[2022-06-17T18:23:05Z] build didn't start running within 1s (phase: Pending) ``` --- cmd/ci-operator/main.go | 2 +- pkg/steps/bundle_source.go | 2 +- pkg/steps/git_source.go | 2 +- pkg/steps/index_generator.go | 2 +- pkg/steps/pipeline_image_cache.go | 2 +- pkg/steps/project_image.go | 2 +- pkg/steps/rpm_injection.go | 2 +- pkg/steps/source.go | 113 ++++++++++++++--- pkg/steps/source_test.go | 200 +++++++++++++++++++++++++++++- pkg/util/builds.go | 15 +++ 10 files changed, 311 insertions(+), 31 deletions(-) create mode 100644 pkg/util/builds.go diff --git a/cmd/ci-operator/main.go b/cmd/ci-operator/main.go index d6272b54665..3305be37f46 100644 --- a/cmd/ci-operator/main.go +++ b/cmd/ci-operator/main.go @@ -431,7 +431,7 @@ func bindOptions(flag *flag.FlagSet) *options { // what we will run flag.StringVar(&opt.nodeName, "node", "", "Restrict scheduling of pods to a single node in the cluster. Does not afffect indirectly created pods (e.g. builds).") - flag.DurationVar(&opt.podPendingTimeout, "pod-pending-timeout", 30*time.Minute, "Maximum amount of time created containers can spend before the running state") + flag.DurationVar(&opt.podPendingTimeout, "pod-pending-timeout", 30*time.Minute, "Maximum amount of time created pods can spend before the running state. For test pods, this applies to each container. For builds, it applies to the build execution as a whole.") flag.StringVar(&opt.leaseServer, "lease-server", leaseServerAddress, "Address of the server that manages leases. Required if any test is configured to acquire a lease.") flag.StringVar(&opt.leaseServerCredentialsFile, "lease-server-credentials-file", "", "The path to credentials file used to access the lease server. The content is of the form :.") flag.DurationVar(&opt.leaseAcquireTimeout, "lease-acquire-timeout", leaseAcquireTimeout, "Maximum amount of time to wait for lease acquisition") diff --git a/pkg/steps/bundle_source.go b/pkg/steps/bundle_source.go index b9cf36f48d7..4591a2f21e3 100644 --- a/pkg/steps/bundle_source.go +++ b/pkg/steps/bundle_source.go @@ -77,7 +77,7 @@ func (s *bundleSourceStep) run(ctx context.Context) error { s.pullSecret, nil, ) - return handleBuilds(ctx, s.client, *build) + return handleBuilds(ctx, s.client, s.podClient, *build) } func replaceCommand(pullSpec, with string) string { diff --git a/pkg/steps/git_source.go b/pkg/steps/git_source.go index 7c762ff8eae..6198f701a58 100644 --- a/pkg/steps/git_source.go +++ b/pkg/steps/git_source.go @@ -44,7 +44,7 @@ func (s *gitSourceStep) run(ctx context.Context) error { secretName = s.cloneAuthConfig.Secret.Name } - return handleBuilds(ctx, s.buildClient, *buildFromSource(s.jobSpec, "", api.PipelineImageStreamTagReferenceRoot, buildapi.BuildSource{ + return handleBuilds(ctx, s.buildClient, s.podClient, *buildFromSource(s.jobSpec, "", api.PipelineImageStreamTagReferenceRoot, buildapi.BuildSource{ Type: buildapi.BuildSourceGit, Dockerfile: s.config.DockerfileLiteral, ContextDir: s.config.ContextDir, diff --git a/pkg/steps/index_generator.go b/pkg/steps/index_generator.go index aac680d7125..d8ea1632d59 100644 --- a/pkg/steps/index_generator.go +++ b/pkg/steps/index_generator.go @@ -123,7 +123,7 @@ func (s *indexGeneratorStep) run(ctx context.Context) error { s.pullSecret, nil, ) - err = handleBuilds(ctx, s.client, *build) + err = handleBuilds(ctx, s.client, s.podClient, *build) if err != nil && strings.Contains(err.Error(), "error checking provided apis") { return results.ForReason("generating_index").WithError(err).Errorf("failed to generate operator index due to invalid bundle info: %v", err) } diff --git a/pkg/steps/pipeline_image_cache.go b/pkg/steps/pipeline_image_cache.go index 083220d00f7..5b8f399f89f 100644 --- a/pkg/steps/pipeline_image_cache.go +++ b/pkg/steps/pipeline_image_cache.go @@ -46,7 +46,7 @@ func (s *pipelineImageCacheStep) run(ctx context.Context) error { if err != nil { return err } - return handleBuild(ctx, s.client, *buildFromSource( + return handleBuild(ctx, s.client, s.podClient, *buildFromSource( s.jobSpec, s.config.From, s.config.To, buildapi.BuildSource{ Type: buildapi.BuildSourceDockerfile, diff --git a/pkg/steps/project_image.go b/pkg/steps/project_image.go index fa4d16b8641..9e30df26b41 100644 --- a/pkg/steps/project_image.go +++ b/pkg/steps/project_image.go @@ -63,7 +63,7 @@ func (s *projectDirectoryImageBuildStep) run(ctx context.Context) error { s.pullSecret, s.config.BuildArgs, ) - return handleBuilds(ctx, s.client, *build) + return handleBuilds(ctx, s.client, s.podClient, *build) } type workingDir func(tag string) (string, error) diff --git a/pkg/steps/rpm_injection.go b/pkg/steps/rpm_injection.go index 129626be9a9..cbaec00b772 100644 --- a/pkg/steps/rpm_injection.go +++ b/pkg/steps/rpm_injection.go @@ -50,7 +50,7 @@ func (s *rpmImageInjectionStep) run(ctx context.Context) error { if err != nil { return err } - return handleBuilds(ctx, s.client, *buildFromSource( + return handleBuilds(ctx, s.client, s.podClient, *buildFromSource( s.jobSpec, s.config.From, s.config.To, buildapi.BuildSource{ Type: buildapi.BuildSourceDockerfile, diff --git a/pkg/steps/source.go b/pkg/steps/source.go index 4acbd74d552..af7a20b8b6a 100644 --- a/pkg/steps/source.go +++ b/pkg/steps/source.go @@ -8,9 +8,11 @@ import ( "sort" "strings" "sync" + "sync/atomic" "time" "github.com/sirupsen/logrus" + "golang.org/x/sync/errgroup" corev1 "k8s.io/api/core/v1" kerrors "k8s.io/apimachinery/pkg/api/errors" @@ -161,7 +163,7 @@ func (s *sourceStep) run(ctx context.Context) error { if err != nil { return err } - return handleBuilds(ctx, s.client, *createBuild(s.config, s.jobSpec, clonerefsRef, s.resources, s.cloneAuthConfig, s.pullSecret, fromDigest)) + return handleBuilds(ctx, s.client, s.podClient, *createBuild(s.config, s.jobSpec, clonerefsRef, s.resources, s.cloneAuthConfig, s.pullSecret, fromDigest)) } func createBuild(config api.SourceStepConfiguration, jobSpec *api.JobSpec, clonerefsRef corev1.ObjectReference, resources api.ResourceConfiguration, cloneAuthConfig *CloneAuthConfig, pullSecret *corev1.Secret, fromDigest string) *buildapi.Build { @@ -404,7 +406,7 @@ func isBuildPhaseTerminated(phase buildapi.BuildPhase) bool { return true } -func handleBuilds(ctx context.Context, buildClient BuildClient, build buildapi.Build) error { +func handleBuilds(ctx context.Context, buildClient BuildClient, podClient kubernetes.PodClient, build buildapi.Build) error { var wg sync.WaitGroup builds := constructMultiArchBuilds(build, buildClient.NodeArchitectures()) @@ -414,7 +416,7 @@ func handleBuilds(ctx context.Context, buildClient BuildClient, build buildapi.B for _, build := range builds { go func(b buildapi.Build) { defer wg.Done() - if err := handleBuild(ctx, buildClient, b); err != nil { + if err := handleBuild(ctx, buildClient, podClient, b); err != nil { errChan <- fmt.Errorf("error occurred handling build %s: %w", b.Name, err) } }(build) @@ -450,7 +452,7 @@ func constructMultiArchBuilds(build buildapi.Build, nodeArchitectures []string) return ret } -func handleBuild(ctx context.Context, client BuildClient, build buildapi.Build) error { +func handleBuild(ctx context.Context, client BuildClient, podClient kubernetes.PodClient, build buildapi.Build) error { const attempts = 5 ns, name := build.Namespace, build.Name var errs []error @@ -464,7 +466,7 @@ func handleBuild(ctx context.Context, client BuildClient, build buildapi.Build) } else { return false, fmt.Errorf("could not create build %s: %w", name, err) } - if err := waitForBuildOrTimeout(ctx, client, ns, name); err != nil { + if err := waitForBuildOrTimeout(ctx, client, podClient, ns, name); err != nil { errs = append(errs, err) return false, handleFailedBuild(ctx, client, ns, name, err) } @@ -540,20 +542,80 @@ func hintsAtInfraReason(logSnippet string) bool { strings.Contains(logSnippet, "connection reset by peer") } -func waitForBuildOrTimeout(ctx context.Context, buildClient BuildClient, namespace, name string) error { - return waitForBuild(ctx, buildClient, namespace, name) -} - -func waitForBuild(ctx context.Context, buildClient BuildClient, namespace, name string) error { +func waitForBuildOrTimeout( + ctx context.Context, + buildClient BuildClient, + podClient kubernetes.PodClient, + namespace, name string, +) error { + return waitForBuild(ctx, buildClient, podClient, namespace, name) +} + +// waitForBuild watches a build until it either succeeds or fails +// +// Several subtle aspects are involved in the implementation: +// +// - The particular ci-operator instance executing this function may be the +// one that just created the build, but it may also be one that executes in +// parallel with the one that did, or even one that is being executed at a +// later point and simply reusing an existing build. This means we may be +// watching a build at any point in its lifetime, including long after it +// has been created and/or after it has succeeded/failed. +// - Because builds cannot be completely validated a priori, there is a +// potential that the object in question will stay pending forever. The +// timeout parameter (passed via the Pod client) is used to fail the +// execution early in that case. A timeout must result in an immediate +// error. +// - Because of the volume of tests executing in a given build cluster (and, +// to a lesser extent, to avoid unnecessary delays), this function must use +// a watch instead of polling in order to not overwhelm the API server. +// Economizing API requests when possible is also helpful. +func waitForBuild( + ctx context.Context, + buildClient BuildClient, + podClient kubernetes.PodClient, + namespace, name string, +) error { logrus.WithFields(logrus.Fields{ "namespace": namespace, "name": name, }).Trace("Waiting for build to be complete.") - - evaluatorFunc := func(obj runtime.Object) (bool, error) { - switch build := obj.(type) { - case *buildapi.Build: + // ret contains the latest version of the object received from the watch + // It is always valid in the `pendingCheck` thread since it is only started + // after the first version is seen. + var ret atomic.Pointer[buildapi.Build] + var eg *errgroup.Group + eg, ctx = errgroup.WithContext(ctx) + pendingCtx, cancel := context.WithCancel(ctx) + pendingCheck := func() error { + timeout := podClient.GetPendingTimeout() + select { + case <-pendingCtx.Done(): + case <-time.After(time.Until(ret.Load().CreationTimestamp.Add(timeout))): + // This second load happens much later and must look at the latest + // version of the object. + if err := checkPending(ctx, podClient, ret.Load(), timeout, time.Now()); err != nil { + logrus.Infof(err.Error()) + return err + } + } + return nil + } + eg.Go(func() error { + defer cancel() + return kubernetes.WaitForConditionOnObject(ctx, buildClient, ctrlruntimeclient.ObjectKey{Namespace: namespace, Name: name}, &buildapi.BuildList{}, &buildapi.Build{}, func(obj runtime.Object) (bool, error) { + build := obj.(*buildapi.Build) + // Is this the first time we've received an object? + // Also updates the shared pointer every time so that `pendingCheck` + // has access to the latest version + first := ret.Swap(build) == nil switch build.Status.Phase { + case buildapi.BuildPhaseNew, buildapi.BuildPhasePending: + // Iff this is a (relatively) new build, we need to verify that + // it does not stay pending forever. + if first { + eg.Go(pendingCheck) + } case buildapi.BuildPhaseComplete: logrus.Infof("Build %s succeeded after %s", build.Name, buildDuration(build).Truncate(time.Second)) return true, nil @@ -562,13 +624,26 @@ func waitForBuild(ctx context.Context, buildClient BuildClient, namespace, name printBuildLogs(buildClient, build.Namespace, build.Name) return true, util.AppendLogToError(fmt.Errorf("the build %s failed after %s with reason %s: %s", build.Name, buildDuration(build).Truncate(time.Second), build.Status.Reason, build.Status.Message), build.Status.LogSnippet) } - default: - return false, fmt.Errorf("build/%v ns/%v got an event that did not contain a build: %v", name, namespace, obj) + return false, nil + }, 0) + }) + return eg.Wait() +} + +func checkPending( + ctx context.Context, + podClient kubernetes.PodClient, + build *buildapi.Build, + timeout time.Duration, + now time.Time, +) error { + switch build.Status.Phase { + case buildapi.BuildPhaseNew, buildapi.BuildPhasePending: + if build.CreationTimestamp.Add(timeout).Before(now) { + return util.PendingBuildError(ctx, podClient, build) } - return false, nil } - - return kubernetes.WaitForConditionOnObject(ctx, buildClient, ctrlruntimeclient.ObjectKey{Namespace: namespace, Name: name}, &buildapi.BuildList{}, &buildapi.Build{}, evaluatorFunc, 0) + return nil } func buildDuration(build *buildapi.Build) time.Duration { diff --git a/pkg/steps/source_test.go b/pkg/steps/source_test.go index 7b875064d49..a2a59ed31d4 100644 --- a/pkg/steps/source_test.go +++ b/pkg/steps/source_test.go @@ -2,6 +2,7 @@ package steps import ( "context" + "errors" "fmt" "io" "strings" @@ -22,6 +23,7 @@ import ( "github.com/openshift/ci-tools/pkg/api" "github.com/openshift/ci-tools/pkg/steps/loggingclient" "github.com/openshift/ci-tools/pkg/testhelper" + testhelper_kube "github.com/openshift/ci-tools/pkg/testhelper/kubernetes" ) func TestCreateBuild(t *testing.T) { @@ -386,20 +388,44 @@ func init() { } func TestWaitForBuild(t *testing.T) { + ns := "ns" now := meta.Time{Time: time.Now()} start, end := meta.Time{Time: now.Time.Add(-3 * time.Second)}, now var testCases = []struct { name string buildClient BuildClient + timeout time.Duration expected error }{ + { + name: "timeout", + buildClient: NewBuildClient(loggingclient.New(fakectrlruntimeclient.NewClientBuilder().WithRuntimeObjects( + &buildapi.Build{ + ObjectMeta: meta.ObjectMeta{ + Name: "some-build", + Namespace: ns, + CreationTimestamp: start, + Annotations: map[string]string{ + buildapi.BuildPodNameAnnotation: "some-build-build", + }, + }, + Status: buildapi.BuildStatus{ + Phase: buildapi.BuildPhasePending, + StartTimestamp: &start, + CompletionTimestamp: &end, + }, + }, + ).Build()), nil, nil), + expected: fmt.Errorf("build didn't start running within 0s (phase: Pending)"), + }, { name: "build succeeded", buildClient: NewBuildClient(loggingclient.New(fakectrlruntimeclient.NewClientBuilder().WithRuntimeObjects( &buildapi.Build{ ObjectMeta: meta.ObjectMeta{ - Name: "some-build", - Namespace: "some-ns", + Name: "some-build", + Namespace: ns, + CreationTimestamp: start, }, Status: buildapi.BuildStatus{ Phase: buildapi.BuildPhaseComplete, @@ -407,14 +433,16 @@ func TestWaitForBuild(t *testing.T) { CompletionTimestamp: &end, }, }).Build()), nil, nil), + timeout: 30 * time.Minute, }, { name: "build failed", buildClient: NewFakeBuildClient(loggingclient.New(fakectrlruntimeclient.NewClientBuilder().WithRuntimeObjects( &buildapi.Build{ ObjectMeta: meta.ObjectMeta{ - Name: "some-build", - Namespace: "some-ns", + Name: "some-build", + Namespace: ns, + CreationTimestamp: start, }, Status: buildapi.BuildStatus{ Phase: buildapi.BuildPhaseCancelled, @@ -425,13 +453,70 @@ func TestWaitForBuild(t *testing.T) { CompletionTimestamp: &end, }, }).Build()), "abc\n"), // the line break is for gotestsum https://github.com/gotestyourself/gotestsum/issues/141#issuecomment-1209146526 + timeout: 30 * time.Minute, expected: fmt.Errorf("%s\n\n%s", "the build some-build failed after 3s with reason reason: msg", "snippet"), }, + { + name: "build already succeeded", + buildClient: NewBuildClient(loggingclient.New(fakectrlruntimeclient.NewClientBuilder().WithRuntimeObjects( + &buildapi.Build{ + ObjectMeta: meta.ObjectMeta{ + Name: "some-build", + Namespace: ns, + CreationTimestamp: meta.Time{ + Time: now.Add(-60 * time.Minute), + }, + }, + Status: buildapi.BuildStatus{ + Phase: buildapi.BuildPhaseComplete, + StartTimestamp: &meta.Time{ + Time: now.Add(-60 * time.Minute), + }, + CompletionTimestamp: &meta.Time{ + Time: now.Add(-59 * time.Minute), + }, + }, + }).Build()), nil, nil), + timeout: 30 * time.Minute, + }, + { + name: "build already failed", + buildClient: NewFakeBuildClient(loggingclient.New(fakectrlruntimeclient.NewClientBuilder().WithRuntimeObjects( + &buildapi.Build{ + ObjectMeta: meta.ObjectMeta{ + Name: "some-build", + Namespace: ns, + CreationTimestamp: meta.Time{ + Time: now.Add(-60 * time.Minute), + }, + }, + Status: buildapi.BuildStatus{ + Phase: buildapi.BuildPhaseCancelled, + Reason: "reason", + Message: "msg", + LogSnippet: "snippet", + StartTimestamp: &meta.Time{ + Time: now.Add(-60 * time.Minute), + }, + CompletionTimestamp: &meta.Time{ + Time: now.Add(-59 * time.Minute), + }, + }, + }).Build()), "abc\n"), + timeout: 30 * time.Minute, + expected: fmt.Errorf("%s\n\n%s", "the build some-build failed after 1m0s with reason reason: msg", "snippet"), + }, } for _, testCase := range testCases { t.Run(testCase.name, func(t *testing.T) { - actual := waitForBuild(context.TODO(), testCase.buildClient, "some-ns", "some-build") + client := testhelper_kube.FakePodClient{ + FakePodExecutor: &testhelper_kube.FakePodExecutor{ + LoggingClient: testCase.buildClient, + }, + PendingTimeout: testCase.timeout, + } + actual := waitForBuild(context.TODO(), testCase.buildClient, &client, ns, "some-build") if diff := cmp.Diff(testCase.expected, actual, testhelper.EquateErrorMessage); diff != "" { t.Errorf("%s: mismatch (-expected +actual), diff: %s", testCase.name, diff) } @@ -439,6 +524,111 @@ func TestWaitForBuild(t *testing.T) { } } +func TestCheckPending(t *testing.T) { + now := meta.Time{Time: time.Now()} + for _, tc := range []struct { + name string + build buildapi.Build + expected error + }{{ + name: "build completed", + build: buildapi.Build{ + ObjectMeta: meta.ObjectMeta{ + Name: "some-build", + Namespace: "ns", + CreationTimestamp: meta.Time{ + Time: now.Add(-60 * time.Minute), + }, + }, + Status: buildapi.BuildStatus{Phase: buildapi.BuildPhaseComplete}, + }, + }, { + name: "build cancelled", + build: buildapi.Build{ + ObjectMeta: meta.ObjectMeta{ + Name: "some-build", + Namespace: "ns", + CreationTimestamp: meta.Time{ + Time: now.Add(-60 * time.Minute), + }, + }, + Status: buildapi.BuildStatus{Phase: buildapi.BuildPhaseCancelled}, + }, + }, { + name: "build running", + build: buildapi.Build{ + ObjectMeta: meta.ObjectMeta{ + Name: "some-build", + Namespace: "ns", + CreationTimestamp: meta.Time{ + Time: now.Add(-60 * time.Minute), + }, + }, + Status: buildapi.BuildStatus{Phase: buildapi.BuildPhaseRunning}, + }, + }, { + name: "new build within timeout period", + build: buildapi.Build{ + ObjectMeta: meta.ObjectMeta{ + Name: "some-build", + Namespace: "ns", + CreationTimestamp: now, + }, + Status: buildapi.BuildStatus{Phase: buildapi.BuildPhaseNew}, + }, + }, { + name: "new build outside timeout period", + build: buildapi.Build{ + ObjectMeta: meta.ObjectMeta{ + Name: "some-build", + Namespace: "ns", + CreationTimestamp: meta.Time{ + Time: now.Add(-60 * time.Minute), + }, + }, + Status: buildapi.BuildStatus{Phase: buildapi.BuildPhaseNew}, + }, + expected: errors.New("build didn't start running within 30m0s (phase: New)"), + }, { + name: "build pending within timeout period", + build: buildapi.Build{ + ObjectMeta: meta.ObjectMeta{ + Name: "some-build", + Namespace: "ns", + CreationTimestamp: now, + }, + Status: buildapi.BuildStatus{Phase: buildapi.BuildPhasePending}, + }, + }, { + name: "build pending outside timeout period", + build: buildapi.Build{ + ObjectMeta: meta.ObjectMeta{ + Name: "some-build", + Namespace: "ns", + CreationTimestamp: meta.Time{ + Time: now.Add(-60 * time.Minute), + }, + }, + Status: buildapi.BuildStatus{Phase: buildapi.BuildPhasePending}, + }, + expected: errors.New("build didn't start running within 30m0s (phase: Pending)"), + }} { + t.Run(tc.name, func(t *testing.T) { + timeout := 30 * time.Minute + client := testhelper_kube.FakePodClient{ + FakePodExecutor: &testhelper_kube.FakePodExecutor{ + LoggingClient: loggingclient.New(fakectrlruntimeclient.NewFakeClient()), + }, + PendingTimeout: timeout, + } + actual := checkPending(context.Background(), &client, &tc.build, timeout, now.Time) + if diff := cmp.Diff(tc.expected, actual, testhelper.EquateErrorMessage); diff != "" { + t.Errorf("%s: mismatch (-expected +actual), diff: %s", tc.name, diff) + } + }) + } +} + type fakeBuildClient struct { loggingclient.LoggingClient logContent string diff --git a/pkg/util/builds.go b/pkg/util/builds.go new file mode 100644 index 00000000000..2ed8803d600 --- /dev/null +++ b/pkg/util/builds.go @@ -0,0 +1,15 @@ +package util + +import ( + "context" + "fmt" + + buildapi "github.com/openshift/api/build/v1" + + "github.com/openshift/ci-tools/pkg/kubernetes" +) + +// PendingBuildError produces an error for a build pending timeout +func PendingBuildError(ctx context.Context, client kubernetes.PodClient, build *buildapi.Build) error { + return fmt.Errorf("build didn't start running within %s (phase: %s)", client.GetPendingTimeout(), build.Status.Phase) +} From 2965020a92ff5ad0a11decf41758813f67145e93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20Barcarol=20Guimar=C3=A3es?= Date: Fri, 21 Apr 2023 18:12:41 +0000 Subject: [PATCH 4/4] pkg/steps: add pod events to build pending error ``` INFO[2022-06-17T18:22:59Z] Building src INFO[2022-06-17T18:23:05Z] build didn't start running within 1s (phase: Pending) ``` ``` INFO[2022-06-17T18:22:59Z] Building src INFO[2022-06-17T18:23:05Z] build didn't start running within 1s (phase: Pending): Found 1 events for Pod src-build: * 0x : 0/23 nodes are available: 1 node(s) had taint {node-role.kubernetes.io/ci-builds-worker: ci-builds-worker}, that the pod didn't tolerate, 1 node(s) had taint {node-role.kubernetes.io/ci-prowjobs-worker: ci-prowjobs-worker}, that the pod didn't tolerate, 1 node(s) were unschedulable, 2 Insufficient memory, 2 node(s) had taint {ci.openshift.io/ci-search: true}, that the pod didn't tolerate, 3 node(s) had taint {node-role.kubernetes.io/infra: }, that the pod didn't tolerate, 3 node(s) had taint {node-role.kubernetes.io/master: }, that the pod didn't tolerate, 4 node(s) had taint {node-role.kubernetes.io/ci-longtests-worker: ci-longtests-worker}, that the pod didn't tolerate, 6 node(s) had taint {node-role.kubernetes.io/ci-tests-worker: ci-tests-worker}, that the pod didn't tolerate. ``` There is an unfortunate layering violation in that we are forced to get to the build pod through the `pod-name` annotation and examine it. I could not find a way to do this through the `Build` object (as is possible for logs, for example). A pending build has very little information: ``` status: conditions: - lastTransitionTime: "2022-06-17T11:06:41Z" lastUpdateTime: "2022-06-17T11:06:41Z" status: "False" type: New - lastTransitionTime: "2022-06-17T11:06:41Z" lastUpdateTime: "2022-06-17T11:06:41Z" status: "True" type: Pending output: {} outputDockerImageReference: image-registry.openshift-image-registry.svc:5000/bbguimaraes0/pipeline:src phase: Pending ``` It is only by examining the build pod (which at least is done by the existing code used for test pods) that the cause can be determined and reported. --- pkg/steps/source_test.go | 68 ++++++++++++++++++++++++++++++++++++++++ pkg/util/builds.go | 23 ++++++++++++-- 2 files changed, 88 insertions(+), 3 deletions(-) diff --git a/pkg/steps/source_test.go b/pkg/steps/source_test.go index a2a59ed31d4..6744e73ead1 100644 --- a/pkg/steps/source_test.go +++ b/pkg/steps/source_test.go @@ -418,6 +418,74 @@ func TestWaitForBuild(t *testing.T) { ).Build()), nil, nil), expected: fmt.Errorf("build didn't start running within 0s (phase: Pending)"), }, + { + name: "timeout with pod", + buildClient: NewBuildClient(loggingclient.New(fakectrlruntimeclient.NewClientBuilder().WithRuntimeObjects( + &buildapi.Build{ + ObjectMeta: meta.ObjectMeta{ + Name: "some-build", + Namespace: ns, + CreationTimestamp: start, + Annotations: map[string]string{ + buildapi.BuildPodNameAnnotation: "some-build-build", + }, + }, + Status: buildapi.BuildStatus{ + Phase: buildapi.BuildPhasePending, + StartTimestamp: &start, + CompletionTimestamp: &end, + }, + }, + &coreapi.Pod{ + ObjectMeta: meta.ObjectMeta{ + Name: "some-build-build", + Namespace: ns, + }, + }, + ).Build()), nil, nil), + expected: fmt.Errorf("build didn't start running within 0s (phase: Pending):\nFound 0 events for Pod some-build-build:"), + }, + { + name: "timeout with pod and events", + buildClient: NewBuildClient(loggingclient.New(fakectrlruntimeclient.NewClientBuilder().WithRuntimeObjects( + &buildapi.Build{ + ObjectMeta: meta.ObjectMeta{ + Name: "some-build", + Namespace: ns, + CreationTimestamp: start, + Annotations: map[string]string{ + buildapi.BuildPodNameAnnotation: "some-build-build", + }, + }, + Status: buildapi.BuildStatus{ + Phase: buildapi.BuildPhasePending, + StartTimestamp: &start, + CompletionTimestamp: &end, + }, + }, + &coreapi.Pod{ + ObjectMeta: meta.ObjectMeta{ + Name: "some-build-build", + Namespace: ns, + UID: "UID", + }, + Status: coreapi.PodStatus{ + ContainerStatuses: []coreapi.ContainerStatus{{ + Name: "the-container", + State: coreapi.ContainerState{ + Waiting: &coreapi.ContainerStateWaiting{ + Reason: "the_reason", + Message: "the_message", + }, + }, + }}, + }, + }, + ).Build()), nil, nil), + expected: fmt.Errorf(`build didn't start running within 0s (phase: Pending): +* Container the-container is not ready with reason the_reason and message the_message +Found 0 events for Pod some-build-build:`), + }, { name: "build succeeded", buildClient: NewBuildClient(loggingclient.New(fakectrlruntimeclient.NewClientBuilder().WithRuntimeObjects( diff --git a/pkg/util/builds.go b/pkg/util/builds.go index 2ed8803d600..8bddb13967f 100644 --- a/pkg/util/builds.go +++ b/pkg/util/builds.go @@ -2,14 +2,31 @@ package util import ( "context" + "errors" "fmt" + "github.com/sirupsen/logrus" + + corev1 "k8s.io/api/core/v1" + ctrlruntimeclient "sigs.k8s.io/controller-runtime/pkg/client" + buildapi "github.com/openshift/api/build/v1" "github.com/openshift/ci-tools/pkg/kubernetes" ) -// PendingBuildError produces an error for a build pending timeout -func PendingBuildError(ctx context.Context, client kubernetes.PodClient, build *buildapi.Build) error { - return fmt.Errorf("build didn't start running within %s (phase: %s)", client.GetPendingTimeout(), build.Status.Phase) +// PendingBuildError fetches scheduling errors from the build pod's events +func PendingBuildError(ctx context.Context, client kubernetes.PodClient, build *buildapi.Build) (ret error) { + msg := fmt.Sprintf("build didn't start running within %s (phase: %s)", client.GetPendingTimeout(), build.Status.Phase) + var pod corev1.Pod + if name, ok := build.Annotations[buildapi.BuildPodNameAnnotation]; !ok { + logrus.Warnf("pod annotation missing for build %q", build.Name) + ret = errors.New(msg) + } else if err := client.Get(ctx, ctrlruntimeclient.ObjectKey{Namespace: build.Namespace, Name: name}, &pod); err != nil { + logrus.Warnf("failed to get pod for build %q: %v", build.Name, err) + ret = errors.New(msg) + } else { + ret = fmt.Errorf("%s:%s\n%s", msg, getReasonsForUnreadyContainers(&pod), getEventsForPod(ctx, &pod, client)) + } + return }