From 613ca336109a9c3bc2aba31ffb4636e91d548848 Mon Sep 17 00:00:00 2001 From: Zida Date: Mon, 20 May 2019 23:21:05 +0800 Subject: [PATCH] Surface deployment replica failure status(#496) --- .../serving/v1alpha1/revision_lifecycle.go | 6 ++++ .../v1alpha1/revision_lifecycle_test.go | 27 ++++++++++++++--- .../revision/reconcile_resources.go | 17 +++++++++++ pkg/reconciler/revision/table_test.go | 29 +++++++++++++++++++ pkg/testing/v1alpha1/revision.go | 7 +++++ 5 files changed, 82 insertions(+), 4 deletions(-) diff --git a/pkg/apis/serving/v1alpha1/revision_lifecycle.go b/pkg/apis/serving/v1alpha1/revision_lifecycle.go index 4e8102262510..095184c968f2 100644 --- a/pkg/apis/serving/v1alpha1/revision_lifecycle.go +++ b/pkg/apis/serving/v1alpha1/revision_lifecycle.go @@ -160,6 +160,12 @@ func (rs *RevisionStatus) MarkProgressDeadlineExceeded(message string) { revCondSet.Manage(rs).MarkFalse(RevisionConditionResourcesAvailable, "ProgressDeadlineExceeded", message) } +// MarkNoDeployment changes the "ResourcesAvailable" condition to false to reflect that the +// deployment has already been created, and replica is failed to be created +func (rs *RevisionStatus) MarkNoDeployment(message string) { + revCondSet.Manage(rs).MarkFalse(RevisionConditionResourcesAvailable, "NoDeployment", message) +} + func (rs *RevisionStatus) MarkContainerHealthy() { revCondSet.Manage(rs).MarkTrue(RevisionConditionContainerHealthy) } diff --git a/pkg/apis/serving/v1alpha1/revision_lifecycle_test.go b/pkg/apis/serving/v1alpha1/revision_lifecycle_test.go index 46a1fdaaebe1..a09398266e07 100644 --- a/pkg/apis/serving/v1alpha1/revision_lifecycle_test.go +++ b/pkg/apis/serving/v1alpha1/revision_lifecycle_test.go @@ -21,15 +21,15 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" - "knative.dev/pkg/apis" - "knative.dev/pkg/apis/duck" - duckv1beta1 "knative.dev/pkg/apis/duck/v1beta1" - apitest "knative.dev/pkg/apis/testing" net "github.com/knative/serving/pkg/apis/networking" "github.com/knative/serving/pkg/apis/serving" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" + "knative.dev/pkg/apis" + "knative.dev/pkg/apis/duck" + duckv1beta1 "knative.dev/pkg/apis/duck/v1beta1" + apitest "knative.dev/pkg/apis/testing" ) func TestRevisionDuckTypes(t *testing.T) { @@ -380,6 +380,25 @@ func TestRevisionResourcesUnavailable(t *testing.T) { } } +func TestRevisionNoDeployment(t *testing.T) { + r := &RevisionStatus{} + r.InitializeConditions() + apitest.CheckConditionOngoing(r.duck(), RevisionConditionResourcesAvailable, t) + apitest.CheckConditionOngoing(r.duck(), RevisionConditionContainerHealthy, t) + apitest.CheckConditionOngoing(r.duck(), RevisionConditionReady, t) + + const want = "the error message" + r.MarkNoDeployment(want) + apitest.CheckConditionFailed(r.duck(), RevisionConditionResourcesAvailable, t) + apitest.CheckConditionFailed(r.duck(), RevisionConditionReady, t) + if got := r.GetCondition(RevisionConditionResourcesAvailable); got == nil || got.Message != want { + t.Errorf("MarkNoDeployment = %v, want %v", got, want) + } + if got := r.GetCondition(RevisionConditionReady); got == nil || got.Message != want { + t.Errorf("MarkNoDeployment = %v, want %v", got, want) + } +} + func TestRevisionGetGroupVersionKind(t *testing.T) { r := &Revision{} want := schema.GroupVersionKind{ diff --git a/pkg/reconciler/revision/reconcile_resources.go b/pkg/reconciler/revision/reconcile_resources.go index 5468644b70fd..06e55e2e10b8 100644 --- a/pkg/reconciler/revision/reconcile_resources.go +++ b/pkg/reconciler/revision/reconcile_resources.go @@ -98,6 +98,13 @@ func (c *Reconciler) reconcileDeployment(ctx context.Context, rev *v1alpha1.Revi } } + if hasDeploymentReplicaFailure(deployment) && !rev.Status.IsActivationRequired() { + rev.Status.MarkNoDeployment(fmt.Sprintf( + "The controller could not create a deployment named %s.", deployment.Name)) + c.Recorder.Eventf(rev, corev1.EventTypeNormal, "NoDeployment", + "Revision %s not ready because replica is failed to be created", rev.Name) + } + // Now that we have a Deployment, determine whether there is any relevant // status to surface in the Revision. if hasDeploymentTimedOut(deployment) && !rev.Status.IsActivationRequired() { @@ -196,6 +203,16 @@ func (c *Reconciler) reconcilePA(ctx context.Context, rev *v1alpha1.Revision) er return nil } +func hasDeploymentReplicaFailure(deployment *appsv1.Deployment) bool { + for _, cond := range deployment.Status.Conditions { + // with Type ReplicaFailure and Reason FailedCreate + if cond.Type == appsv1.DeploymentReplicaFailure && cond.Reason == "FailedCreate" { + return true + } + } + return false +} + func hasDeploymentTimedOut(deployment *appsv1.Deployment) bool { // as per https://kubernetes.io/docs/concepts/workloads/controllers/deployment for _, cond := range deployment.Status.Conditions { diff --git a/pkg/reconciler/revision/table_test.go b/pkg/reconciler/revision/table_test.go index af2a70342aa9..c244c19c9456 100644 --- a/pkg/reconciler/revision/table_test.go +++ b/pkg/reconciler/revision/table_test.go @@ -410,6 +410,26 @@ func TestReconcile(t *testing.T) { "deploy-timeout"), }, Key: "foo/deploy-timeout", + }, { + Name: "surface deployment replica failure", + // Test the propagation of replica failure from Deployment. + Objects: []runtime.Object{ + rev("foo", "deploy-replica-failure", + withK8sServiceName("the-taxman"), WithLogURL, MarkActive), + kpa("foo", "deploy-replica-failure"), + replicaFailureDeploy(deploy("foo", "deploy-replica-failure")), + image("foo", "deploy-replica-failure"), + }, + WantStatusUpdates: []clientgotesting.UpdateActionImpl{{ + Object: rev("foo", "deploy-replica-failure", + WithLogURL, AllUnknownConditions, + MarkNoDeployment("The controller could not create a deployment named deploy-replica-failure-deployment.")), + }}, + WantEvents: []string{ + Eventf(corev1.EventTypeNormal, "NoDeployment", "Revision %s not ready because replica is failed to be created", + "deploy-replica-failure"), + }, + Key: "foo/deploy-replica-failure", }, { Name: "surface ImagePullBackoff", // Test the propagation of ImagePullBackoff from user container. @@ -545,6 +565,15 @@ func TestReconcile(t *testing.T) { })) } +func replicaFailureDeploy(deploy *appsv1.Deployment) *appsv1.Deployment { + deploy.Status.Conditions = []appsv1.DeploymentCondition{{ + Type: appsv1.DeploymentReplicaFailure, + Status: corev1.ConditionTrue, + Reason: "FailedCreate", + }} + return deploy +} + func timeoutDeploy(deploy *appsv1.Deployment) *appsv1.Deployment { deploy.Status.Conditions = []appsv1.DeploymentCondition{{ Type: appsv1.DeploymentProgressing, diff --git a/pkg/testing/v1alpha1/revision.go b/pkg/testing/v1alpha1/revision.go index b0580f221c7a..e20256d537d7 100644 --- a/pkg/testing/v1alpha1/revision.go +++ b/pkg/testing/v1alpha1/revision.go @@ -139,6 +139,13 @@ func MarkContainerExiting(exitCode int32, message string) RevisionOption { } } +// MarkNoDeployment calls .Status.MarkNoDeployment on the Revision +func MarkNoDeployment(message string) RevisionOption { + return func(r *v1alpha1.Revision) { + r.Status.MarkNoDeployment(message) + } +} + // MarkResourcesUnavailable calls .Status.MarkResourcesUnavailable on the Revision. func MarkResourcesUnavailable(reason, message string) RevisionOption { return func(r *v1alpha1.Revision) {