From 05a4e2525f904c574651780e2fbea588349fe2d9 Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Sun, 19 May 2019 16:26:30 -0400 Subject: [PATCH 1/2] conditions: Use a consistent constant for the Failing condition This needs to be moved back into openshift/api since it is now part of our public API, but for now ensure it is consistently used. --- docs/dev/metrics.md | 2 +- pkg/cvo/cvo_scenarios_test.go | 36 ++++++------- pkg/cvo/cvo_test.go | 80 ++++++++++++++--------------- pkg/cvo/metrics.go | 4 +- pkg/cvo/metrics_test.go | 6 +-- pkg/cvo/status.go | 13 +++-- pkg/cvo/status_test.go | 4 +- pkg/start/start_integration_test.go | 8 +-- 8 files changed, 80 insertions(+), 73 deletions(-) diff --git a/docs/dev/metrics.md b/docs/dev/metrics.md index 8099036696..da98cf70f6 100644 --- a/docs/dev/metrics.md +++ b/docs/dev/metrics.md @@ -32,7 +32,7 @@ Metrics about cluster operators: # HELP cluster_operator_conditions Report the conditions for active cluster operators. 0 is False and 1 is True. # TYPE cluster_operator_conditions gauge cluster_operator_conditions{condition="Available",name="version",namespace="openshift-cluster-version"} 1 -cluster_operator_conditions{condition="Failing",name="version",namespace="openshift-cluster-version"} 0 +cluster_operator_conditions{condition="Degraded",name="version",namespace="openshift-cluster-version"} 0 cluster_operator_conditions{condition="Progressing",name="version",namespace="openshift-cluster-version"} 0 cluster_operator_conditions{condition="RetrievedUpdates",name="version",namespace="openshift-cluster-version"} 0 # HELP cluster_operator_up Reports key highlights of the active cluster operators. diff --git a/pkg/cvo/cvo_scenarios_test.go b/pkg/cvo/cvo_scenarios_test.go index 505ff026c0..3cc8d3c4ef 100644 --- a/pkg/cvo/cvo_scenarios_test.go +++ b/pkg/cvo/cvo_scenarios_test.go @@ -161,7 +161,7 @@ func TestCVO_StartupAndSync(t *testing.T) { Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, // report back to the user that we don't have enough info to proceed - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionTrue, Message: "No configured operator version, unable to update cluster"}, + {Type: ClusterStatusFailing, Status: configv1.ConditionTrue, Message: "No configured operator version, unable to update cluster"}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Message: "Unable to apply : an error occurred"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -204,7 +204,7 @@ func TestCVO_StartupAndSync(t *testing.T) { Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, // cleared failing status and set progressing - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Message: "Working towards 4.0.1"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -290,7 +290,7 @@ func TestCVO_StartupAndSync(t *testing.T) { }, Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 1.0.0-abc"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 1.0.0-abc"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -432,7 +432,7 @@ func TestCVO_StartupAndSyncUnverifiedPayload(t *testing.T) { Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, // report back to the user that we don't have enough info to proceed - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionTrue, Message: "No configured operator version, unable to update cluster"}, + {Type: ClusterStatusFailing, Status: configv1.ConditionTrue, Message: "No configured operator version, unable to update cluster"}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Message: "Unable to apply : an error occurred"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -475,7 +475,7 @@ func TestCVO_StartupAndSyncUnverifiedPayload(t *testing.T) { Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, // cleared failing status and set progressing - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Message: "Working towards 4.0.1"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -561,7 +561,7 @@ func TestCVO_StartupAndSyncUnverifiedPayload(t *testing.T) { }, Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 1.0.0-abc"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 1.0.0-abc"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -648,7 +648,7 @@ func TestCVO_UpgradeUnverifiedPayload(t *testing.T) { }, Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 1.0.0-abc"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 1.0.0-abc"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -732,7 +732,7 @@ func TestCVO_UpgradeUnverifiedPayload(t *testing.T) { Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 1.0.0-abc"}, // cleared failing status and set progressing - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionTrue, Reason: "ImageVerificationFailed", Message: "The update cannot be verified: some random error"}, + {Type: ClusterStatusFailing, Status: configv1.ConditionTrue, Reason: "ImageVerificationFailed", Message: "The update cannot be verified: some random error"}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Reason: "ImageVerificationFailed", Message: "Unable to apply 1.0.1-abc: the image may not be safe to use"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -836,7 +836,7 @@ func TestCVO_UpgradeUnverifiedPayload(t *testing.T) { }, Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 1.0.1-abc"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 1.0.1-abc"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -874,7 +874,7 @@ func TestCVO_UpgradeVerifiedPayload(t *testing.T) { }, Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 1.0.0-abc"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 1.0.0-abc"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -961,7 +961,7 @@ func TestCVO_UpgradeVerifiedPayload(t *testing.T) { Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 1.0.0-abc"}, // cleared failing status and set progressing - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionTrue, Reason: "ImageVerificationFailed", Message: "The update cannot be verified: some random error"}, + {Type: ClusterStatusFailing, Status: configv1.ConditionTrue, Reason: "ImageVerificationFailed", Message: "The update cannot be verified: some random error"}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Reason: "ImageVerificationFailed", Message: "Unable to apply 1.0.1-abc: the image may not be safe to use"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -1059,7 +1059,7 @@ func TestCVO_UpgradeVerifiedPayload(t *testing.T) { }, Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 1.0.1-abc"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 1.0.1-abc"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -1104,7 +1104,7 @@ func TestCVO_RestartAndReconcile(t *testing.T) { }, Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 1.0.0-abc"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 1.0.0-abc"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -1267,7 +1267,7 @@ func TestCVO_ErrorDuringReconcile(t *testing.T) { }, Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 1.0.0-abc"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 1.0.0-abc"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -1423,7 +1423,7 @@ func TestCVO_ErrorDuringReconcile(t *testing.T) { }, Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 1.0.0-abc"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionTrue, Reason: "UpdatePayloadFailed", Message: "Could not update test \"file-yml\" (3 of 3)"}, + {Type: ClusterStatusFailing, Status: configv1.ConditionTrue, Reason: "UpdatePayloadFailed", Message: "Could not update test \"file-yml\" (3 of 3)"}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Reason: "UpdatePayloadFailed", Message: "Error while reconciling 1.0.0-abc: the update could not be applied"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -1590,7 +1590,7 @@ func TestCVO_ParallelError(t *testing.T) { }, Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Reason: "ClusterOperatorsNotAvailable", Message: "Working towards 1.0.0-abc: 33% complete, waiting on operator-1, operator-2"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -1632,7 +1632,7 @@ func TestCVO_VerifyInitializingPayloadState(t *testing.T) { }, Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 1.0.0-abc"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 1.0.0-abc"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -1691,7 +1691,7 @@ func TestCVO_VerifyUpdatingPayloadState(t *testing.T) { }, Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 1.0.0-abc"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 1.0.0-abc"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, diff --git a/pkg/cvo/cvo_test.go b/pkg/cvo/cvo_test.go index 35e2b3dfc1..4a296c326c 100644 --- a/pkg/cvo/cvo_test.go +++ b/pkg/cvo/cvo_test.go @@ -253,7 +253,7 @@ func TestOperator_sync(t *testing.T) { VersionHash: "", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionTrue, Reason: "UpdatePayloadIntegrity", Message: "unable to apply object"}, + {Type: ClusterStatusFailing, Status: configv1.ConditionTrue, Reason: "UpdatePayloadIntegrity", Message: "unable to apply object"}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Message: "Working towards 4.0.1"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -284,7 +284,7 @@ func TestOperator_sync(t *testing.T) { VersionHash: "", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionTrue, Reason: "UpdatePayloadIntegrity", Message: "unable to apply object"}, + {Type: ClusterStatusFailing, Status: configv1.ConditionTrue, Reason: "UpdatePayloadIntegrity", Message: "unable to apply object"}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Reason: "UpdatePayloadIntegrity", Message: "Unable to apply 0.0.1-abc: the contents of the update are invalid"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -326,7 +326,7 @@ func TestOperator_sync(t *testing.T) { VersionHash: "", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionTrue, Reason: "UpdatePayloadIntegrity", Message: "unable to apply object"}, + {Type: ClusterStatusFailing, Status: configv1.ConditionTrue, Reason: "UpdatePayloadIntegrity", Message: "unable to apply object"}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Message: "Working towards 4.0.1"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -357,7 +357,7 @@ func TestOperator_sync(t *testing.T) { VersionHash: "", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionTrue, Reason: "UpdatePayloadIntegrity", Message: "unable to apply object"}, + {Type: ClusterStatusFailing, Status: configv1.ConditionTrue, Reason: "UpdatePayloadIntegrity", Message: "unable to apply object"}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Reason: "UpdatePayloadIntegrity", Message: "Error while reconciling 0.0.1-abc: the contents of the update are invalid"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -400,7 +400,7 @@ func TestOperator_sync(t *testing.T) { VersionHash: "", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionTrue, Reason: "UpdatePayloadIntegrity", Message: "unable to apply object"}, + {Type: ClusterStatusFailing, Status: configv1.ConditionTrue, Reason: "UpdatePayloadIntegrity", Message: "unable to apply object"}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Message: "Working towards 4.0.1"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -431,7 +431,7 @@ func TestOperator_sync(t *testing.T) { VersionHash: "", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 0.0.1-abc"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionTrue, Reason: "UpdatePayloadIntegrity", Message: "unable to apply object"}, + {Type: ClusterStatusFailing, Status: configv1.ConditionTrue, Reason: "UpdatePayloadIntegrity", Message: "unable to apply object"}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Reason: "UpdatePayloadIntegrity", Message: "Error while reconciling 0.0.1-abc: the contents of the update are invalid"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -469,7 +469,7 @@ func TestOperator_sync(t *testing.T) { VersionHash: "", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionTrue, Message: "unable to apply object"}, + {Type: ClusterStatusFailing, Status: configv1.ConditionTrue, Message: "unable to apply object"}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Message: "Working towards 4.0.1"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -501,7 +501,7 @@ func TestOperator_sync(t *testing.T) { VersionHash: "foo", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionTrue, Message: "injected error"}, + {Type: ClusterStatusFailing, Status: configv1.ConditionTrue, Message: "injected error"}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Message: "Unable to apply 0.0.1-abc: an error occurred"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -553,7 +553,7 @@ func TestOperator_sync(t *testing.T) { VersionHash: "", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionTrue, Message: "file does not exist"}, + {Type: ClusterStatusFailing, Status: configv1.ConditionTrue, Message: "file does not exist"}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Message: "Unable to apply 4.0.1: an error occurred"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -620,7 +620,7 @@ func TestOperator_sync(t *testing.T) { // the order of progressing in the conditions array is preserved {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Message: "Unable to apply 4.0.1: an error occurred"}, {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionTrue, Message: "file does not exist"}, + {Type: ClusterStatusFailing, Status: configv1.ConditionTrue, Message: "file does not exist"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, }, @@ -677,7 +677,7 @@ func TestOperator_sync(t *testing.T) { VersionHash: "", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Message: "Working towards image/image:v4.0.1"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -718,7 +718,7 @@ func TestOperator_sync(t *testing.T) { VersionHash: "", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Message: "Initializing, will work towards image/image:v4.0.1"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -761,7 +761,7 @@ func TestOperator_sync(t *testing.T) { VersionHash: "", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Message: "Working towards 4.0.2"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -815,7 +815,7 @@ func TestOperator_sync(t *testing.T) { VersionHash: "", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Message: "Working towards 4.0.2"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -868,7 +868,7 @@ func TestOperator_sync(t *testing.T) { VersionHash: "", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, // we don't reset the message here until the image is loaded {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Message: "Working towards 4.0.1"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, @@ -925,7 +925,7 @@ func TestOperator_sync(t *testing.T) { VersionHash: "", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, // we don't reset the message here until the image is loaded {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Message: "Working towards 4.0.2"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, @@ -976,7 +976,7 @@ func TestOperator_sync(t *testing.T) { VersionHash: "", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, // we correct the message that was incorrect from the previous state {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Message: "Working towards 4.0.1: 33% complete"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, @@ -1072,7 +1072,7 @@ func TestOperator_sync(t *testing.T) { VersionHash: "", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, // we correct the message that was incorrect from the previous state {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Reason: "DownloadingUpdate", Message: "Working towards image/image:v4.0.1: downloading update"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, @@ -1113,7 +1113,7 @@ func TestOperator_sync(t *testing.T) { VersionHash: "", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Message: "Initializing, will work towards image/image:v4.0.1"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -1145,7 +1145,7 @@ func TestOperator_sync(t *testing.T) { VersionHash: "xyz", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Message: "Working towards 0.0.1-abc"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -1195,7 +1195,7 @@ func TestOperator_sync(t *testing.T) { ObservedGeneration: 2, Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 0.0.1-abc"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 0.0.1-abc"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -1251,7 +1251,7 @@ func TestOperator_sync(t *testing.T) { Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 0.0.1-abc"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, }, @@ -1287,7 +1287,7 @@ func TestOperator_sync(t *testing.T) { Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 0.0.1-abc"}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 0.0.1-abc"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionTrue}, }, }, @@ -1334,7 +1334,7 @@ func TestOperator_sync(t *testing.T) { Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 0.0.1-abc"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, }, @@ -1370,7 +1370,7 @@ func TestOperator_sync(t *testing.T) { Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 0.0.1-abc"}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 0.0.1-abc"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionTrue}, }, }, @@ -1416,7 +1416,7 @@ func TestOperator_sync(t *testing.T) { Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 0.0.1-abc"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, }, @@ -1448,7 +1448,7 @@ func TestOperator_sync(t *testing.T) { Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 0.0.1-abc"}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 0.0.1-abc"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, }, @@ -1509,7 +1509,7 @@ func TestOperator_sync(t *testing.T) { ObservedGeneration: 2, Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 4.0.1"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 4.0.1"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -1582,7 +1582,7 @@ func TestOperator_sync(t *testing.T) { Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: ClusterVersionInvalid, Status: configv1.ConditionTrue, Reason: "InvalidClusterVersion", Message: "The cluster version is invalid: spec.desiredUpdate.version: Invalid value: \"4.0.4\": when image is empty the update must be a previous version or an available update"}, {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 4.0.1"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Reason: "InvalidClusterVersion", Message: "Stopped at 4.0.1: the cluster version is invalid"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -1657,7 +1657,7 @@ func TestOperator_sync(t *testing.T) { Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: ClusterVersionInvalid, Status: configv1.ConditionTrue, Reason: "InvalidClusterVersion", Message: "The cluster version is invalid: spec.desiredUpdate.version: Invalid value: \"4.0.3\": there are multiple possible payloads for this version, specify the exact image"}, {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 4.0.1"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Reason: "InvalidClusterVersion", Message: "Stopped at 4.0.1: the cluster version is invalid"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -1708,7 +1708,7 @@ func TestOperator_sync(t *testing.T) { ObservedGeneration: 2, Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 0.0.1-abc"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 0.0.1-abc"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -1768,7 +1768,7 @@ func TestOperator_sync(t *testing.T) { ObservedGeneration: 2, Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 0.0.1-abc"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 0.0.1-abc"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -1807,7 +1807,7 @@ func TestOperator_sync(t *testing.T) { VersionHash: "y_Kc5IQiIyU=", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 0.0.1-abc"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: "Cluster version is 0.0.1-abc"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -1868,7 +1868,7 @@ func TestOperator_sync(t *testing.T) { Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: ClusterVersionInvalid, Status: configv1.ConditionTrue, Reason: "InvalidClusterVersion", Message: "The cluster version is invalid:\n* spec.upstream: Invalid value: \"#%GG\": must be a valid URL or empty\n* spec.clusterID: Invalid value: \"not-valid-cluster-id\": must be an RFC4122-variant UUID\n"}, {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying 0.0.1-abc"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Reason: "InvalidClusterVersion", Message: "Stopped at 0.0.1-abc: the cluster version is invalid"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -1904,7 +1904,7 @@ func TestOperator_sync(t *testing.T) { VersionHash: "", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Reason: "InvalidClusterVersion", Message: "Stopped at image/image:v4.0.1: the cluster version is invalid"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, {Type: ClusterVersionInvalid, Status: configv1.ConditionTrue, Reason: "InvalidClusterVersion", Message: "The cluster version is invalid:\n* spec.upstream: Invalid value: \"#%GG\": must be a valid URL or empty\n* spec.clusterID: Invalid value: \"not-valid-cluster-id\": must be an RFC4122-variant UUID\n"}, @@ -1943,7 +1943,7 @@ func TestOperator_sync(t *testing.T) { VersionHash: "", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Reason: "InvalidClusterVersion", Message: "Reconciling 0.0.1-abc: the cluster version is invalid"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, {Type: ClusterVersionInvalid, Status: configv1.ConditionTrue, Reason: "InvalidClusterVersion", Message: "The cluster version is invalid:\n* spec.upstream: Invalid value: \"#%GG\": must be a valid URL or empty\n* spec.clusterID: Invalid value: \"not-valid-cluster-id\": must be an RFC4122-variant UUID\n"}, @@ -2155,7 +2155,7 @@ func TestOperator_availableUpdatesSync(t *testing.T) { }, Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying image/image:v4.0.1"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse}, }, }, @@ -2206,7 +2206,7 @@ func TestOperator_availableUpdatesSync(t *testing.T) { }, Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying image/image:v4.0.1"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse, Reason: "RemoteFailed", Message: "Unable to retrieve available updates: unexpected HTTP status: 500 Internal Server Error"}, }, @@ -2262,7 +2262,7 @@ func TestOperator_availableUpdatesSync(t *testing.T) { }, Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying image/image:v4.0.1"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse, Reason: "RemoteFailed", Message: "Unable to retrieve available updates: unexpected HTTP status: 500 Internal Server Error"}, }, @@ -2318,7 +2318,7 @@ func TestOperator_availableUpdatesSync(t *testing.T) { ObservedGeneration: 2, Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue, Message: "Done applying image/image:v4.0.1"}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse}, + {Type: ClusterStatusFailing, Status: configv1.ConditionFalse}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse, Reason: "RemoteFailed", Message: "Unable to retrieve available updates: unexpected HTTP status: 500 Internal Server Error"}, }, diff --git a/pkg/cvo/metrics.go b/pkg/cvo/metrics.go index d32f95465c..5476890d40 100644 --- a/pkg/cvo/metrics.go +++ b/pkg/cvo/metrics.go @@ -142,7 +142,7 @@ func (m *operatorMetrics) Collect(ch chan<- prometheus.Metric) { g.Set(float64(cv.CreationTimestamp.Unix())) ch <- g - failing := resourcemerge.FindOperatorStatusCondition(cv.Status.Conditions, configv1.ClusterStatusConditionType("Failing")) + failing := resourcemerge.FindOperatorStatusCondition(cv.Status.Conditions, ClusterStatusFailing) if update := cv.Spec.DesiredUpdate; update != nil && update.Image != current.Image { g := m.version.WithLabelValues("desired", update.Version, update.Image) g.Set(float64(mostRecentTimestamp(cv))) @@ -215,7 +215,7 @@ func (m *operatorMetrics) Collect(ch chan<- prometheus.Metric) { break } g := m.clusterOperatorUp.WithLabelValues(op.Name, firstVersion) - failing := resourcemerge.IsOperatorStatusConditionTrue(op.Status.Conditions, configv1.ClusterStatusConditionType("Failing")) + failing := resourcemerge.IsOperatorStatusConditionTrue(op.Status.Conditions, ClusterStatusFailing) available := resourcemerge.IsOperatorStatusConditionTrue(op.Status.Conditions, configv1.OperatorAvailable) if available && !failing { g.Set(1) diff --git a/pkg/cvo/metrics_test.go b/pkg/cvo/metrics_test.go index 6b1fafa788..78ecc38c1b 100644 --- a/pkg/cvo/metrics_test.go +++ b/pkg/cvo/metrics_test.go @@ -131,7 +131,7 @@ func Test_operatorMetrics_Collect(t *testing.T) { }, Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionTrue, LastTransitionTime: metav1.Time{Time: time.Unix(5, 0)}}, + {Type: ClusterStatusFailing, Status: configv1.ConditionTrue, LastTransitionTime: metav1.Time{Time: time.Unix(5, 0)}}, }, }, }, @@ -294,7 +294,7 @@ func Test_operatorMetrics_Collect(t *testing.T) { }, Status: configv1.ClusterVersionStatus{ Conditions: []configv1.ClusterOperatorStatusCondition{ - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionTrue, LastTransitionTime: metav1.Time{Time: time.Unix(4, 0)}}, + {Type: ClusterStatusFailing, Status: configv1.ConditionTrue, LastTransitionTime: metav1.Time{Time: time.Unix(4, 0)}}, }, }, }, @@ -328,7 +328,7 @@ func Test_operatorMetrics_Collect(t *testing.T) { }, Status: configv1.ClusterVersionStatus{ Conditions: []configv1.ClusterOperatorStatusCondition{ - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionTrue}, + {Type: ClusterStatusFailing, Status: configv1.ConditionTrue}, }, }, }, diff --git a/pkg/cvo/status.go b/pkg/cvo/status.go index 8e6692b22f..25f2639bae 100644 --- a/pkg/cvo/status.go +++ b/pkg/cvo/status.go @@ -21,6 +21,13 @@ import ( "github.com/openshift/cluster-version-operator/pkg/payload" ) +const ( + // ClusterStatusFailing is set on the ClusterVersion status when a cluster + // cannot reach the desired state. It is considered more serious than Degraded + // and indicates the cluster is not healthy. + ClusterStatusFailing = configv1.ClusterStatusConditionType("Failing") +) + func mergeEqualVersions(current *configv1.UpdateHistory, desired configv1.Update) bool { if len(desired.Image) > 0 && desired.Image == current.Image { if len(desired.Version) == 0 { @@ -221,7 +228,7 @@ func (optr *Operator) syncStatus(original, config *configv1.ClusterVersion, stat // set the failing condition resourcemerge.SetOperatorStatusCondition(&config.Status.Conditions, configv1.ClusterOperatorStatusCondition{ - Type: configv1.ClusterStatusConditionType("Failing"), + Type: ClusterStatusFailing, Status: configv1.ConditionTrue, Reason: reason, Message: err.Error(), @@ -249,7 +256,7 @@ func (optr *Operator) syncStatus(original, config *configv1.ClusterVersion, stat } else { // clear the failure condition - resourcemerge.SetOperatorStatusCondition(&config.Status.Conditions, configv1.ClusterOperatorStatusCondition{Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionFalse, LastTransitionTime: now}) + resourcemerge.SetOperatorStatusCondition(&config.Status.Conditions, configv1.ClusterOperatorStatusCondition{Type: ClusterStatusFailing, Status: configv1.ConditionFalse, LastTransitionTime: now}) // update progressing if status.Reconciling { @@ -365,7 +372,7 @@ func (optr *Operator) syncFailingStatus(original *configv1.ClusterVersion, ierr // reset the failing message resourcemerge.SetOperatorStatusCondition(&config.Status.Conditions, configv1.ClusterOperatorStatusCondition{ - Type: configv1.ClusterStatusConditionType("Failing"), + Type: ClusterStatusFailing, Status: configv1.ConditionTrue, Message: ierr.Error(), LastTransitionTime: now, diff --git a/pkg/cvo/status_test.go b/pkg/cvo/status_test.go index ff50fd8b76..d8c2e75c2e 100644 --- a/pkg/cvo/status_test.go +++ b/pkg/cvo/status_test.go @@ -177,7 +177,7 @@ func TestOperator_syncFailingStatus(t *testing.T) { VersionHash: "", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionTrue, Reason: "UpdatePayloadIntegrity", Message: "unable to apply object"}, + {Type: ClusterStatusFailing, Status: configv1.ConditionTrue, Reason: "UpdatePayloadIntegrity", Message: "unable to apply object"}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Message: "Working towards 4.0.1"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, @@ -212,7 +212,7 @@ func TestOperator_syncFailingStatus(t *testing.T) { VersionHash: "", Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: configv1.ClusterStatusConditionType("Failing"), Status: configv1.ConditionTrue, Reason: "", Message: "bad"}, + {Type: ClusterStatusFailing, Status: configv1.ConditionTrue, Reason: "", Message: "bad"}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Reason: "", Message: "Error ensuring the cluster version is up to date: bad"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, diff --git a/pkg/start/start_integration_test.go b/pkg/start/start_integration_test.go index 64b86589a0..6d4b8c9df7 100644 --- a/pkg/start/start_integration_test.go +++ b/pkg/start/start_integration_test.go @@ -727,7 +727,7 @@ func waitForUpdateAvailable(t *testing.T, client clientset.Interface, ns string, verifyClusterVersionHistory(t, cv) if !allowIncrementalFailure { - if failing := resourcemerge.FindOperatorStatusCondition(cv.Status.Conditions, configv1.ClusterStatusConditionType("Failing")); failing != nil && failing.Status == configv1.ConditionTrue { + if failing := resourcemerge.FindOperatorStatusCondition(cv.Status.Conditions, cvo.ClusterStatusFailing); failing != nil && failing.Status == configv1.ConditionTrue { return false, fmt.Errorf("operator listed as failing (%s): %s", failing.Reason, failing.Message) } } @@ -798,7 +798,7 @@ func waitForUpdateAvailable(t *testing.T, client clientset.Interface, ns string, return false, nil } - if failing := resourcemerge.FindOperatorStatusCondition(cv.Status.Conditions, configv1.ClusterStatusConditionType("Failing")); failing != nil && failing.Status == configv1.ConditionTrue { + if failing := resourcemerge.FindOperatorStatusCondition(cv.Status.Conditions, cvo.ClusterStatusFailing); failing != nil && failing.Status == configv1.ConditionTrue { return false, fmt.Errorf("operator listed as failing (%s): %s", failing.Reason, failing.Message) } @@ -843,7 +843,7 @@ func waitUntilUpgradeFails(t *testing.T, client clientset.Interface, ns string, // just wait until the operator is failing if len(versions) == 0 { - c := resourcemerge.FindOperatorStatusCondition(cv.Status.Conditions, configv1.ClusterStatusConditionType("Failing")) + c := resourcemerge.FindOperatorStatusCondition(cv.Status.Conditions, cvo.ClusterStatusFailing) return c != nil && c.Status == configv1.ConditionTrue, nil } @@ -909,7 +909,7 @@ func waitUntilUpgradeFails(t *testing.T, client clientset.Interface, ns string, return false, fmt.Errorf("upgrading operator to failed image should remain partial: %#v", cv.Status.History) } - failing := resourcemerge.FindOperatorStatusCondition(cv.Status.Conditions, configv1.ClusterStatusConditionType("Failing")) + failing := resourcemerge.FindOperatorStatusCondition(cv.Status.Conditions, cvo.ClusterStatusFailing) if failing == nil || failing.Status != configv1.ConditionTrue { return false, nil } From b820c53cea700873a20eb05cb81bc7e5aaeb2ca1 Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Wed, 5 Jun 2019 15:51:06 -0400 Subject: [PATCH 2/2] metrics: Report reason as a label on the conditions metric Conditions are read by telemetry and the reason incentivizes teams to have good reasons on their operators. Should not increase cardinality significantly, but instead provide better insight. --- docs/dev/metrics.md | 8 ++++---- pkg/cvo/metrics.go | 6 +++--- pkg/cvo/metrics_test.go | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/dev/metrics.md b/docs/dev/metrics.md index da98cf70f6..1c80ee254f 100644 --- a/docs/dev/metrics.md +++ b/docs/dev/metrics.md @@ -31,10 +31,10 @@ Metrics about cluster operators: ``` # HELP cluster_operator_conditions Report the conditions for active cluster operators. 0 is False and 1 is True. # TYPE cluster_operator_conditions gauge -cluster_operator_conditions{condition="Available",name="version",namespace="openshift-cluster-version"} 1 -cluster_operator_conditions{condition="Degraded",name="version",namespace="openshift-cluster-version"} 0 -cluster_operator_conditions{condition="Progressing",name="version",namespace="openshift-cluster-version"} 0 -cluster_operator_conditions{condition="RetrievedUpdates",name="version",namespace="openshift-cluster-version"} 0 +cluster_operator_conditions{condition="Available",name="version",namespace="openshift-cluster-version",reason="Happy"} 1 +cluster_operator_conditions{condition="Degraded",name="version",namespace="openshift-cluster-version",reason=""} 0 +cluster_operator_conditions{condition="Progressing",name="version",namespace="openshift-cluster-version",reason=""} 0 +cluster_operator_conditions{condition="RetrievedUpdates",name="version",namespace="openshift-cluster-version",reason=""} 0 # HELP cluster_operator_up Reports key highlights of the active cluster operators. # TYPE cluster_operator_up gauge cluster_operator_up{name="version",namespace="openshift-cluster-version",version="4.0.1"} 1 diff --git a/pkg/cvo/metrics.go b/pkg/cvo/metrics.go index 5476890d40..e988bd1239 100644 --- a/pkg/cvo/metrics.go +++ b/pkg/cvo/metrics.go @@ -70,7 +70,7 @@ started. clusterOperatorConditions: prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "cluster_operator_conditions", Help: "Report the conditions for active cluster operators. 0 is False and 1 is True.", - }, []string{"name", "condition"}), + }, []string{"name", "condition", "reason"}), clusterOperatorConditionTransitions: prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "cluster_operator_condition_transitions", Help: "Reports the number of times that a condition on a cluster operator changes status", @@ -122,7 +122,7 @@ func (m *operatorMetrics) Describe(ch chan<- *prometheus.Desc) { ch <- m.version.WithLabelValues("", "", "").Desc() ch <- m.availableUpdates.WithLabelValues("", "").Desc() ch <- m.clusterOperatorUp.WithLabelValues("", "").Desc() - ch <- m.clusterOperatorConditions.WithLabelValues("", "").Desc() + ch <- m.clusterOperatorConditions.WithLabelValues("", "", "").Desc() ch <- m.clusterOperatorConditionTransitions.WithLabelValues("", "").Desc() } @@ -227,7 +227,7 @@ func (m *operatorMetrics) Collect(ch chan<- prometheus.Metric) { if condition.Status == configv1.ConditionUnknown { continue } - g := m.clusterOperatorConditions.WithLabelValues(op.Name, string(condition.Type)) + g := m.clusterOperatorConditions.WithLabelValues(op.Name, string(condition.Type), string(condition.Reason)) if condition.Status == configv1.ConditionTrue { g.Set(1) } else { diff --git a/pkg/cvo/metrics_test.go b/pkg/cvo/metrics_test.go index 78ecc38c1b..743deb4de1 100644 --- a/pkg/cvo/metrics_test.go +++ b/pkg/cvo/metrics_test.go @@ -161,7 +161,7 @@ func Test_operatorMetrics_Collect(t *testing.T) { Status: configv1.ClusterOperatorStatus{ Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue}, - {Type: configv1.ClusterStatusConditionType("Custom"), Status: configv1.ConditionFalse}, + {Type: configv1.ClusterStatusConditionType("Custom"), Status: configv1.ConditionFalse, Reason: "CustomReason"}, {Type: configv1.ClusterStatusConditionType("Unknown"), Status: configv1.ConditionUnknown}, }, }, @@ -175,8 +175,8 @@ func Test_operatorMetrics_Collect(t *testing.T) { } expectMetric(t, metrics[0], 0, map[string]string{"type": "current", "version": "", "image": ""}) expectMetric(t, metrics[1], 1, map[string]string{"name": "test", "version": ""}) - expectMetric(t, metrics[2], 1, map[string]string{"name": "test", "condition": "Available"}) - expectMetric(t, metrics[3], 0, map[string]string{"name": "test", "condition": "Custom"}) + expectMetric(t, metrics[2], 1, map[string]string{"name": "test", "condition": "Available", "reason": ""}) + expectMetric(t, metrics[3], 0, map[string]string{"name": "test", "condition": "Custom", "reason": "CustomReason"}) }, }, {