diff --git a/pkg/cvo/cvo_scenarios_test.go b/pkg/cvo/cvo_scenarios_test.go index e388aa8030..6176d4a14f 100644 --- a/pkg/cvo/cvo_scenarios_test.go +++ b/pkg/cvo/cvo_scenarios_test.go @@ -3676,7 +3676,7 @@ func TestCVO_ParallelError(t *testing.T) { }, "0000_20_a_file.yaml": nil, "0000_20_b_file.yaml": &payload.UpdateError{ - UpdateEffect: payload.UpdateEffectNone, + UpdateEffect: payload.UpdateEffectFail, Message: "Failed to reconcile 20-b-file resource", }, }} @@ -3861,7 +3861,7 @@ func TestCVO_ParallelError(t *testing.T) { {Type: DesiredReleaseAccepted, Status: configv1.ConditionTrue, Reason: "PayloadLoaded", Message: "Payload loaded version=\"1.0.0-abc\" image=\"image/image:1\" architecture=\"" + architecture + "\""}, {Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, - {Type: ClusterStatusFailing, Status: configv1.ConditionTrue, Reason: "MultipleErrors", Message: "Multiple errors are preventing progress:\n* Failed to reconcile 10-a-file resource\n* Failed to reconcile 20-b-file resource"}, + {Type: ClusterStatusFailing, Status: configv1.ConditionTrue, Message: "Failed to reconcile 20-b-file resource"}, {Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Reason: "MultipleErrors", Message: "Unable to apply 1.0.0-abc: an unknown error has occurred: MultipleErrors"}, {Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse}, }, diff --git a/pkg/cvo/status.go b/pkg/cvo/status.go index 3eb0ee4785..9736dfd24e 100644 --- a/pkg/cvo/status.go +++ b/pkg/cvo/status.go @@ -293,8 +293,37 @@ func updateClusterVersionStatus(cvStatus *configv1.ClusterVersionStatus, status }) } - progressReason, progressMessage, skipFailure := convertErrorToProgressing(cvStatus.History, now.Time, status) + failure := status.Failure + failingReason, failingMessage := getReasonMessageFromError(failure) + var skipFailure bool + var progressReason, progressMessage string + if !status.Reconciling && len(cvStatus.History) != 0 { + progressReason, progressMessage, skipFailure = convertErrorToProgressing(now.Time, failure) + failure = filterErrorForFailingCondition(failure, payload.UpdateEffectNone) + filteredFailingReason, filteredFailingMessage := getReasonMessageFromError(failure) + if failingReason != filteredFailingReason { + klog.Infof("Filtered failure reason changed from '%s' to '%s'", failingReason, filteredFailingReason) + } + if failingMessage != filteredFailingMessage { + klog.Infof("Filtered failure message changed from '%s' to '%s'", failingMessage, filteredFailingMessage) + } + failingReason, failingMessage = filteredFailingReason, filteredFailingMessage + } + + // set the failing condition + failingCondition := configv1.ClusterOperatorStatusCondition{ + Type: ClusterStatusFailing, + Status: configv1.ConditionFalse, + LastTransitionTime: now, + } + if failure != nil && !skipFailure { + failingCondition.Status = configv1.ConditionTrue + failingCondition.Reason = failingReason + failingCondition.Message = failingMessage + } + resourcemerge.SetOperatorStatusCondition(&cvStatus.Conditions, failingCondition) + // update progressing if err := status.Failure; err != nil && !skipFailure { var reason string msg := progressMessage @@ -307,16 +336,6 @@ func updateClusterVersionStatus(cvStatus *configv1.ClusterVersionStatus, status msg = "an error occurred" } - // set the failing condition - resourcemerge.SetOperatorStatusCondition(&cvStatus.Conditions, configv1.ClusterOperatorStatusCondition{ - Type: ClusterStatusFailing, - Status: configv1.ConditionTrue, - Reason: reason, - Message: err.Error(), - LastTransitionTime: now, - }) - - // update progressing if status.Reconciling { resourcemerge.SetOperatorStatusCondition(&cvStatus.Conditions, configv1.ClusterOperatorStatusCondition{ Type: configv1.OperatorProgressing, @@ -336,9 +355,6 @@ func updateClusterVersionStatus(cvStatus *configv1.ClusterVersionStatus, status } } else { - // clear the failure condition - resourcemerge.SetOperatorStatusCondition(&cvStatus.Conditions, configv1.ClusterOperatorStatusCondition{Type: ClusterStatusFailing, Status: configv1.ConditionFalse, LastTransitionTime: now}) - // update progressing if status.Reconciling { message := fmt.Sprintf("Cluster version is %s", version) @@ -413,6 +429,48 @@ func updateClusterVersionStatus(cvStatus *configv1.ClusterVersionStatus, status } } +// getReasonMessageFromError returns the reason and the message from an error. +// If the reason or message is not available, an empty string is returned. +func getReasonMessageFromError(err error) (reason, message string) { + if uErr, ok := err.(*payload.UpdateError); ok { + reason = uErr.Reason + } + if err != nil { + message = err.Error() + } + return reason, message +} + +// filterErrorForFailingCondition filters out update errors based on the given +// updateEffect from a MultipleError error. If the err has the reason +// MultipleErrors, its immediate nested errors are filtered out and the error +// is recreated. If all nested errors are filtered out, nil is returned. +func filterErrorForFailingCondition(err error, updateEffect payload.UpdateEffectType) error { + if err == nil { + return nil + } + if uErr, ok := err.(*payload.UpdateError); ok && uErr.Reason == "MultipleErrors" { + if nested, ok := uErr.Nested.(interface{ Errors() []error }); ok { + filtered := nested.Errors() + filtered = filterOutUpdateErrors(filtered, updateEffect) + return newMultipleError(filtered) + } + } + return err +} + +// filterOutUpdateErrors filters out update errors of the given effect. +func filterOutUpdateErrors(errs []error, updateEffect payload.UpdateEffectType) []error { + filtered := make([]error, 0, len(errs)) + for _, err := range errs { + if uErr, ok := err.(*payload.UpdateError); ok && uErr.UpdateEffect == updateEffect { + continue + } + filtered = append(filtered, err) + } + return filtered +} + func setImplicitlyEnabledCapabilitiesCondition(cvStatus *configv1.ClusterVersionStatus, implicitlyEnabled []configv1.ClusterVersionCapability, now metav1.Time) { @@ -479,11 +537,11 @@ func setDesiredReleaseAcceptedCondition(cvStatus *configv1.ClusterVersionStatus, // how an update error is interpreted. An error may simply need to be reported but does not indicate the update is // failing. An error may indicate the update is failing or that if the error continues for a defined interval the // update is failing. -func convertErrorToProgressing(history []configv1.UpdateHistory, now time.Time, status *SyncWorkerStatus) (reason string, message string, ok bool) { - if len(history) == 0 || status.Failure == nil || status.Reconciling { +func convertErrorToProgressing(now time.Time, statusFailure error) (reason string, message string, ok bool) { + if statusFailure == nil { return "", "", false } - uErr, ok := status.Failure.(*payload.UpdateError) + uErr, ok := statusFailure.(*payload.UpdateError) if !ok { return "", "", false } diff --git a/pkg/cvo/status_test.go b/pkg/cvo/status_test.go index 48488ca3ee..1403c71f99 100644 --- a/pkg/cvo/status_test.go +++ b/pkg/cvo/status_test.go @@ -8,8 +8,10 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" + "github.com/openshift/cluster-version-operator/pkg/payload" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/diff" + apierrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/validation/field" "k8s.io/client-go/tools/record" @@ -345,3 +347,449 @@ func TestUpdateClusterVersionStatus_ReconciliationIssues(t *testing.T) { }) } } + +func TestUpdateClusterVersionStatus_FilteringMultipleErrorsForFailingCondition(t *testing.T) { + ignoreLastTransitionTime := cmpopts.IgnoreFields(configv1.ClusterOperatorStatusCondition{}, "LastTransitionTime") + type args struct { + syncWorkerStatus *SyncWorkerStatus + } + tests := []struct { + name string + args args + shouldModifyWhenNotReconcilingAndHistoryNotEmpty bool + expectedConditionNotModified *configv1.ClusterOperatorStatusCondition + expectedConditionModified *configv1.ClusterOperatorStatusCondition + }{ + { + name: "no errors are present", + args: args{ + syncWorkerStatus: &SyncWorkerStatus{ + Failure: nil, + }, + }, + expectedConditionNotModified: &configv1.ClusterOperatorStatusCondition{ + Type: ClusterStatusFailing, + Status: configv1.ConditionFalse, + }, + }, + { + name: "single generic error", + args: args{ + syncWorkerStatus: &SyncWorkerStatus{ + Failure: fmt.Errorf("error has happened"), + }, + }, + expectedConditionNotModified: &configv1.ClusterOperatorStatusCondition{ + Type: ClusterStatusFailing, + Status: configv1.ConditionTrue, + Message: "error has happened", + }, + }, + { + name: "single UpdateEffectNone error", + args: args{ + syncWorkerStatus: &SyncWorkerStatus{ + Failure: &payload.UpdateError{ + UpdateEffect: payload.UpdateEffectNone, + Reason: "ClusterOperatorUpdating", + Message: "Cluster operator A is updating", + }, + }, + }, + expectedConditionNotModified: &configv1.ClusterOperatorStatusCondition{ + Type: ClusterStatusFailing, + Status: configv1.ConditionTrue, + Reason: "ClusterOperatorUpdating", + Message: "Cluster operator A is updating", + }, + shouldModifyWhenNotReconcilingAndHistoryNotEmpty: true, + expectedConditionModified: &configv1.ClusterOperatorStatusCondition{ + Type: ClusterStatusFailing, + Status: configv1.ConditionFalse, + }, + }, + { + name: "single condensed UpdateEffectFail UpdateError", + args: args{ + syncWorkerStatus: &SyncWorkerStatus{ + Failure: &payload.UpdateError{ + Nested: apierrors.NewAggregate([]error{ + &payload.UpdateError{ + UpdateEffect: payload.UpdateEffectFail, + Reason: "ClusterOperatorNotAvailable", + Message: "Cluster operator A is not available", + }, + &payload.UpdateError{ + UpdateEffect: payload.UpdateEffectFail, + Reason: "ClusterOperatorNotAvailable", + Message: "Cluster operator B is not available", + }, + }), + UpdateEffect: payload.UpdateEffectFail, + Reason: "ClusterOperatorNotAvailable", + Message: "Cluster operators A, B are not available", + }, + }, + }, + expectedConditionNotModified: &configv1.ClusterOperatorStatusCondition{ + Type: ClusterStatusFailing, + Status: configv1.ConditionTrue, + Reason: "ClusterOperatorNotAvailable", + Message: "Cluster operators A, B are not available", + }, + }, + { + name: "MultipleErrors of UpdateEffectFail and UpdateEffectFailAfterInterval", + args: args{ + syncWorkerStatus: &SyncWorkerStatus{ + Failure: &payload.UpdateError{ + Nested: apierrors.NewAggregate([]error{ + &payload.UpdateError{ + UpdateEffect: payload.UpdateEffectFail, + Reason: "ClusterOperatorNotAvailable", + Message: "Cluster operator A is not available", + }, + &payload.UpdateError{ + UpdateEffect: payload.UpdateEffectFailAfterInterval, + Reason: "ClusterOperatorDegraded", + Message: "Cluster operator B is degraded", + }, + }), + UpdateEffect: payload.UpdateEffectFail, + Reason: "MultipleErrors", + Message: "Multiple errors are preventing progress:\n* Cluster operator A is not available\n* Cluster operator B is degraded", + }, + }, + }, + expectedConditionNotModified: &configv1.ClusterOperatorStatusCondition{ + Type: ClusterStatusFailing, + Status: configv1.ConditionTrue, + Reason: "MultipleErrors", + Message: "Multiple errors are preventing progress:\n* Cluster operator A is not available\n* Cluster operator B is degraded", + }, + }, + { + name: "MultipleErrors of UpdateEffectFail and UpdateEffectNone", + args: args{ + syncWorkerStatus: &SyncWorkerStatus{ + Failure: &payload.UpdateError{ + Nested: apierrors.NewAggregate([]error{ + &payload.UpdateError{ + UpdateEffect: payload.UpdateEffectFail, + Reason: "ClusterOperatorNotAvailable", + Message: "Cluster operator A is not available", + }, + &payload.UpdateError{ + UpdateEffect: payload.UpdateEffectNone, + Reason: "ClusterOperatorUpdating", + Message: "Cluster operator B is updating versions", + }, + }), + UpdateEffect: payload.UpdateEffectFail, + Reason: "MultipleErrors", + Message: "Multiple errors are preventing progress:\n* Cluster operator A is not available\n* Cluster operator B is updating versions", + }, + }, + }, + expectedConditionNotModified: &configv1.ClusterOperatorStatusCondition{ + Type: ClusterStatusFailing, + Status: configv1.ConditionTrue, + Reason: "MultipleErrors", + Message: "Multiple errors are preventing progress:\n* Cluster operator A is not available\n* Cluster operator B is updating versions", + }, + shouldModifyWhenNotReconcilingAndHistoryNotEmpty: true, + expectedConditionModified: &configv1.ClusterOperatorStatusCondition{ + Type: ClusterStatusFailing, + Status: configv1.ConditionTrue, + Reason: "ClusterOperatorNotAvailable", + Message: "Cluster operator A is not available", + }, + }, + { + name: "MultipleErrors of UpdateEffectNone and UpdateEffectNone", + args: args{ + syncWorkerStatus: &SyncWorkerStatus{ + Failure: &payload.UpdateError{ + Nested: apierrors.NewAggregate([]error{ + &payload.UpdateError{ + UpdateEffect: payload.UpdateEffectNone, + Reason: "ClusterOperatorUpdating", + Message: "Cluster operator A is updating versions", + }, + &payload.UpdateError{ + UpdateEffect: payload.UpdateEffectNone, + Reason: "NewUpdateEffectNoneReason", + Message: "Cluster operator B is getting conscious", + }, + }), + UpdateEffect: payload.UpdateEffectFail, + Reason: "MultipleErrors", + Message: "Multiple errors are preventing progress:\n* Cluster operator A is updating versions\n* Cluster operator B is getting conscious", + }, + }, + }, + expectedConditionNotModified: &configv1.ClusterOperatorStatusCondition{ + Type: ClusterStatusFailing, + Status: configv1.ConditionTrue, + Reason: "MultipleErrors", + Message: "Multiple errors are preventing progress:\n* Cluster operator A is updating versions\n* Cluster operator B is getting conscious", + }, + shouldModifyWhenNotReconcilingAndHistoryNotEmpty: true, + expectedConditionModified: &configv1.ClusterOperatorStatusCondition{ + Type: ClusterStatusFailing, + Status: configv1.ConditionFalse, + }, + }, + { + name: "MultipleErrors of UpdateEffectFail, UpdateEffectFailAfterInterval, and UpdateEffectNone", + args: args{ + syncWorkerStatus: &SyncWorkerStatus{ + Failure: &payload.UpdateError{ + Nested: apierrors.NewAggregate([]error{ + &payload.UpdateError{ + UpdateEffect: payload.UpdateEffectFail, + Reason: "ClusterOperatorNotAvailable", + Message: "Cluster operator A is not available", + }, + &payload.UpdateError{ + UpdateEffect: payload.UpdateEffectNone, + Reason: "ClusterOperatorUpdating", + Message: "Cluster operator B is updating versions", + }, + &payload.UpdateError{ + UpdateEffect: payload.UpdateEffectFailAfterInterval, + Reason: "ClusterOperatorDegraded", + Message: "Cluster operator C is degraded", + }, + }), + UpdateEffect: payload.UpdateEffectFail, + Reason: "MultipleErrors", + Message: "Multiple errors are preventing progress:\n* Cluster operator A is not available\n* Cluster operator B is updating versions\n* Cluster operator C is degraded", + }, + }, + }, + expectedConditionNotModified: &configv1.ClusterOperatorStatusCondition{ + Type: ClusterStatusFailing, + Status: configv1.ConditionTrue, + Reason: "MultipleErrors", + Message: "Multiple errors are preventing progress:\n* Cluster operator A is not available\n* Cluster operator B is updating versions\n* Cluster operator C is degraded", + }, + shouldModifyWhenNotReconcilingAndHistoryNotEmpty: true, + expectedConditionModified: &configv1.ClusterOperatorStatusCondition{ + Type: ClusterStatusFailing, + Status: configv1.ConditionTrue, + Reason: "MultipleErrors", + Message: "Multiple errors are preventing progress:\n* Cluster operator A is not available\n* Cluster operator C is degraded", + }, + }, + } + for _, tc := range tests { + tc := tc + gates := fakeRiFlags{} + release := configv1.Release{} + getAvailableUpdates := func() *availableUpdates { return nil } + var noErrors field.ErrorList + + t.Run(tc.name, func(t *testing.T) { + type helper struct { + isReconciling bool + isHistoryEmpty bool + } + combinations := []helper{ + {false, false}, + {false, true}, + {true, false}, + {true, true}, + } + for _, c := range combinations { + tc.args.syncWorkerStatus.Reconciling = c.isReconciling + cvStatus := &configv1.ClusterVersionStatus{} + if !c.isHistoryEmpty { + cvStatus.History = []configv1.UpdateHistory{{State: configv1.PartialUpdate}} + } + expectedCondition := tc.expectedConditionNotModified + if tc.shouldModifyWhenNotReconcilingAndHistoryNotEmpty && !c.isReconciling && !c.isHistoryEmpty { + expectedCondition = tc.expectedConditionModified + } + updateClusterVersionStatus(cvStatus, tc.args.syncWorkerStatus, release, getAvailableUpdates, gates, noErrors) + condition := resourcemerge.FindOperatorStatusCondition(cvStatus.Conditions, ClusterStatusFailing) + if diff := cmp.Diff(expectedCondition, condition, ignoreLastTransitionTime); diff != "" { + t.Errorf("unexpected condition when Reconciling == %t && isHistoryEmpty == %t\n:%s", c.isReconciling, c.isHistoryEmpty, diff) + } + } + }) + } +} + +func Test_filterOutUpdateErrors(t *testing.T) { + type args struct { + errs []error + updateEffectType payload.UpdateEffectType + } + tests := []struct { + name string + args args + want []error + }{ + { + name: "empty errors", + args: args{ + errs: []error{}, + updateEffectType: payload.UpdateEffectNone, + }, + want: []error{}, + }, + { + name: "single update error of the specified value", + args: args{ + errs: []error{ + &payload.UpdateError{ + Name: "None", + UpdateEffect: payload.UpdateEffectNone, + }, + }, + updateEffectType: payload.UpdateEffectNone, + }, + want: []error{}, + }, + { + name: "errors do not contain update errors of the specified value", + args: args{ + errs: []error{ + &payload.UpdateError{ + Name: "Fail", + UpdateEffect: payload.UpdateEffectFail, + }, + &payload.UpdateError{ + Name: "Report", + UpdateEffect: payload.UpdateEffectReport, + }, + &payload.UpdateError{ + Name: "Fail After Interval", + UpdateEffect: payload.UpdateEffectFailAfterInterval, + }, + }, + updateEffectType: payload.UpdateEffectNone, + }, + want: []error{ + &payload.UpdateError{ + Name: "Fail", + UpdateEffect: payload.UpdateEffectFail, + }, + &payload.UpdateError{ + Name: "Report", + UpdateEffect: payload.UpdateEffectReport, + }, + &payload.UpdateError{ + Name: "Fail After Interval", + UpdateEffect: payload.UpdateEffectFailAfterInterval, + }, + }, + }, + { + name: "errors contain update errors of the specified value UpdateEffectNone", + args: args{ + errs: []error{ + &payload.UpdateError{ + Name: "Fail After Interval", + UpdateEffect: payload.UpdateEffectFailAfterInterval, + }, + &payload.UpdateError{ + Name: "None #1", + UpdateEffect: payload.UpdateEffectNone, + }, + &payload.UpdateError{ + Name: "Report", + UpdateEffect: payload.UpdateEffectReport, + }, + &payload.UpdateError{ + Name: "None #2", + UpdateEffect: payload.UpdateEffectNone, + }, + }, + updateEffectType: payload.UpdateEffectNone, + }, + want: []error{ + &payload.UpdateError{ + Name: "Fail After Interval", + UpdateEffect: payload.UpdateEffectFailAfterInterval, + }, + &payload.UpdateError{ + Name: "Report", + UpdateEffect: payload.UpdateEffectReport, + }, + }, + }, + { + name: "errors contain update errors of the specified value UpdateEffectReport", + args: args{ + errs: []error{ + &payload.UpdateError{ + Name: "Fail After Interval", + UpdateEffect: payload.UpdateEffectFailAfterInterval, + }, + &payload.UpdateError{ + Name: "None #1", + UpdateEffect: payload.UpdateEffectNone, + }, + &payload.UpdateError{ + Name: "Report", + UpdateEffect: payload.UpdateEffectReport, + }, + &payload.UpdateError{ + Name: "None #2", + UpdateEffect: payload.UpdateEffectNone, + }, + }, + updateEffectType: payload.UpdateEffectReport, + }, + want: []error{ + &payload.UpdateError{ + Name: "Fail After Interval", + UpdateEffect: payload.UpdateEffectFailAfterInterval, + }, + &payload.UpdateError{ + Name: "None #1", + UpdateEffect: payload.UpdateEffectNone, + }, + &payload.UpdateError{ + Name: "None #2", + UpdateEffect: payload.UpdateEffectNone, + }, + }, + }, + { + name: "errors contain only update errors of the specified value UpdateEffectNone", + args: args{ + errs: []error{ + &payload.UpdateError{ + Name: "None #1", + UpdateEffect: payload.UpdateEffectNone, + }, + &payload.UpdateError{ + Name: "None #2", + UpdateEffect: payload.UpdateEffectNone, + }, + &payload.UpdateError{ + Name: "None #3", + UpdateEffect: payload.UpdateEffectNone, + }, + &payload.UpdateError{ + Name: "None #4", + UpdateEffect: payload.UpdateEffectNone, + }, + }, + updateEffectType: payload.UpdateEffectNone, + }, + want: []error{}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + filtered := filterOutUpdateErrors(tt.args.errs, tt.args.updateEffectType) + if difference := cmp.Diff(filtered, tt.want); difference != "" { + t.Errorf("got errors differ from expected:\n%s", difference) + } + }) + } +}