From f945dbb3fac69cac16ffe773ee71141dc2112efc Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Tue, 27 Nov 2018 11:28:45 -0800 Subject: [PATCH] awstagdeprovision: Ignore more errors We're leaking clusters in CI because of errors like [1]: time="2018-11-27T18:48:25Z" level=fatal msg="Unrecoverable error/timed out: error converting route53 zones to internal AWS objects: Throttling: Rate exceeded\n\tstatus code: 400, request id: 0573f1b4-f275-11e8-b479-fd079d6c6b48" With this commit, we just assume that any error will go away eventually, and keep rolling forward with exponential backoff. When that assumption breaks down, we expect the caller (e.g. ci-operator or a human user) to kill teardown (and optionally fix whatever was blocking it). Docs for AWS rate limits are in [2]; the main takeaway is that these limits are set by AWS with no way for us to request changes, and that most are per-account (not per-VPC or other resource that scales with the number of simultaneous CI clusters). [1]: https://storage.googleapis.com/origin-ci-test/pr-logs/pull/openshift_installer/738/pull-ci-openshift-installer-master-e2e-aws/1639/artifacts/e2e-aws/installer/.openshift_install.log [2]: https://docs.aws.amazon.com/general/latest/gr/aws_service_limits.html --- contrib/pkg/awstagdeprovision/awstagdeprovision.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/contrib/pkg/awstagdeprovision/awstagdeprovision.go b/contrib/pkg/awstagdeprovision/awstagdeprovision.go index 5eb68866d9e..f99d82981dd 100644 --- a/contrib/pkg/awstagdeprovision/awstagdeprovision.go +++ b/contrib/pkg/awstagdeprovision/awstagdeprovision.go @@ -1157,7 +1157,8 @@ func deleteS3Buckets(session *session.Session, filter AWSFilter, clusterName str awsObjects, err := bucketsToAWSObjects(results.Buckets, s3Client, logger) if err != nil { - return false, fmt.Errorf("error converting buckets to internal objects: %v", err) + logger.Debugf("error converting s3 buckets to native AWS objects: %v", err) + return false, nil } filteredObjects := filterObjects(awsObjects, filter) @@ -1381,7 +1382,7 @@ func deleteRoute53(session *session.Session, filters AWSFilter, clusterName stri awsZones, err := r53ZonesToAWSObjects(allZones.HostedZones, r53Client) if err != nil { logger.Debugf("error converting r53Zones to native AWS objects: %v", err) - return false, fmt.Errorf("error converting route53 zones to internal AWS objects: %v", err) + return false, nil } filteredZones := filterObjects(awsZones, filters)