awstagdeprovision: Ignore more errors

wking · wking · commit f945dbb3fac6 · 2018-11-27T11:28:45.000-08:00
We're leaking clusters in CI because of errors like [1]: time="2018-11-27T18:48:25Z" level=fatal msg="Unrecoverable error/timed out: error converting route53 zones to internal AWS objects: Throttling: Rate exceeded\n\tstatus code: 400, request id: 0573f1b4-f275-11e8-b479-fd079d6c6b48" With this commit, we just assume that any error will go away eventually, and keep rolling forward with exponential backoff. When that assumption breaks down, we expect the caller (e.g. ci-operator or a human user) to kill teardown (and optionally fix whatever was blocking it). Docs for AWS rate limits are in [2]; the main takeaway is that these limits are set by AWS with no way for us to request changes, and that most are per-account (not per-VPC or other resource that scales with the number of simultaneous CI clusters). [1]: https://storage.googleapis.com/origin-ci-test/pr-logs/pull/openshift_installer/738/pull-ci-openshift-installer-master-e2e-aws/1639/artifacts/e2e-aws/installer/.openshift_install.log [2]: https://docs.aws.amazon.com/general/latest/gr/aws_service_limits.html
diff --git a/contrib/pkg/awstagdeprovision/awstagdeprovision.go b/contrib/pkg/awstagdeprovision/awstagdeprovision.go
@@ -1157,7 +1157,8 @@ func deleteS3Buckets(session *session.Session, filter AWSFilter, clusterName str
 
 		awsObjects, err := bucketsToAWSObjects(results.Buckets, s3Client, logger)
 		if err != nil {
-			return false, fmt.Errorf("error converting buckets to internal objects: %v", err)
+			logger.Debugf("error converting s3 buckets to native AWS objects: %v", err)
+			return false, nil
 		}
 
 		filteredObjects := filterObjects(awsObjects, filter)
@@ -1381,7 +1382,7 @@ func deleteRoute53(session *session.Session, filters AWSFilter, clusterName stri
 		awsZones, err := r53ZonesToAWSObjects(allZones.HostedZones, r53Client)
 		if err != nil {
 			logger.Debugf("error converting r53Zones to native AWS objects: %v", err)
-			return false, fmt.Errorf("error converting route53 zones to internal AWS objects: %v", err)
+			return false, nil
 		}
 
 		filteredZones := filterObjects(awsZones, filters)