From b3d04e5fd5f497951761da3fb6f71b555c2dd5fe Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Mon, 27 Sep 2021 15:50:57 -0700 Subject: [PATCH] ci-operator/config/openshift/release: Drop 4.8 -> 4.9 -> 4.8 rollback jobs 4.9 includes the backwards-incompatible etcd disk-schema change from etcd v3.5.0 [1]. That causes rollback jobs to fail like [2,3]: Working towards 4.8.12: 69 of 678 done (10% complete) where the cluster-version operator is waiting for the etcd operator (but hasn't been waiting quite long enough to complain about it by name): $ curl -s https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/periodic-ci-openshift-release-master-ci-4.9-upgrade-from-stable-4.8-e2e-aws-ovn-upgrade-rollback/1441850131237310464/artifacts/e2e-aws-ovn-upgrade-rollback/gather-extra/artifacts/pods/openshift-cluster-version_cluster-version-operator-7cfbc65959-xctxv_cluster-version-operator.log | grep 'Running sync.*in state\|Result of work' | tail -n3 I0925 23:25:54.121866 1 sync_worker.go:541] Running sync registry.build01.ci.openshift.org/ci-op-sqh94dxj/release@sha256:c3af995af7ee85e88c43c943e0a64c7066d90e77fafdabc7b22a095e4ea3c25a (force=true) on generation 3 in state Updating at attempt 11 I0925 23:31:36.036075 1 task_graph.go:555] Result of work: [Cluster operator etcd is degraded] I0925 23:34:38.987554 1 sync_worker.go:541] Running sync registry.build01.ci.openshift.org/ci-op-sqh94dxj/release@sha256:c3af995af7ee85e88c43c943e0a64c7066d90e77fafdabc7b22a095e4ea3c25a (force=true) on generation 3 in state Updating at attempt 12 Seeing what the etcd operator has to say: $ curl -s https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/periodic-ci-openshift-release-master-ci-4.9-upgrade-from-stable-4.8-e2e-aws-ovn-upgrade-rollback/1441850131237310464/artifacts/e2e-aws-ovn-upgrade-rollback/gather-extra/artifacts/clusteroperators.json | jq -r '.items[] | select(.metadata.name == "etcd").status.conditions[] | .lastTransitionTime + " " + .type + "=" + .status + " " + (.reason // "-") + ": " + (.message // "-")' 2021-09-25T19:56:27Z RecentBackup=Unknown ControllerStarted: - 2021-09-25T22:01:04Z Degraded=True EtcdMembers_UnhealthyMembers::StaticPods_Error: EtcdMembersDegraded: 2 of 3 members are available, ip-10-0-246-48.us-east-2.compute.internal is unhealthy StaticPodsDegraded: pod/etcd-ip-10-0-246-48.us-east-2.compute.internal container "etcd" is waiting: CrashLoopBackOff: back-off 5m0s restarting failed container=etcd pod=etcd-ip-10-0-246-48.us-east-2.compute.internal_openshift-etcd(967f9e83-e6a2-437e-85e6-c33563286f7f) 2021-09-25T21:55:07Z Progressing=True NodeInstaller: NodeInstallerProgressing: 3 nodes are at revision 4; 0 nodes have achieved new revision 5 2021-09-25T19:57:59Z Available=True AsExpected: StaticPodsAvailable: 3 nodes are active; 3 nodes are at revision 4; 0 nodes have achieved new revision 5 EtcdMembersAvailable: 2 of 3 members are available, ip-10-0-246-48.us-east-2.compute.internal is unhealthy 2021-09-25T19:56:43Z Upgradeable=True AsExpected: All is well And from the logs of that container [4]: {"level":"fatal","ts":"2021-09-25T23:34:47.679Z","caller":"membership/cluster.go:790","msg":"invalid downgrade; server version is lower than determined cluster version","current-server-version":"3.4.14","determined-cluster-version":"3.5","stacktrace":"go.etcd.io/etcd/etcdserver/api/membership.mustDetectDowngrade\n\t/go/src/go.etcd.io/etcd/etcdserver/api/membership/cluster.go:790... We know these rollback jobs will always stick on that etcd disk-schema rollback, so no sense in spending CI money running them and watching them fail. This commit drops the jobs, and we won't worry about other minor-rollback issues around 4.8-to-4.9. [1]: https://bugzilla.redhat.com/show_bug.cgi?id=1999777#c0 [2]: https://prow.ci.openshift.org/view/gs/origin-ci-test/logs/periodic-ci-openshift-release-master-ci-4.9-upgrade-from-stable-4.8-e2e-aws-ovn-upgrade-rollback/1441850131237310464 [3]: https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/periodic-ci-openshift-release-master-ci-4.9-upgrade-from-stable-4.8-e2e-aws-ovn-upgrade-rollback/1441850131237310464/artifacts/e2e-aws-ovn-upgrade-rollback/gather-extra/artifacts/clusterversion.json [4]: https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/periodic-ci-openshift-release-master-ci-4.9-upgrade-from-stable-4.8-e2e-aws-ovn-upgrade-rollback/1441850131237310464/artifacts/e2e-aws-ovn-upgrade-rollback/gather-extra/artifacts/pods/openshift-etcd_etcd-ip-10-0-246-48.us-east-2.compute.internal_etcd.log --- ...aster__ci-4.9-upgrade-from-stable-4.8.yaml | 19 --- .../openshift-release-master-periodics.yaml | 150 ------------------ 2 files changed, 169 deletions(-) diff --git a/ci-operator/config/openshift/release/openshift-release-master__ci-4.9-upgrade-from-stable-4.8.yaml b/ci-operator/config/openshift/release/openshift-release-master__ci-4.9-upgrade-from-stable-4.8.yaml index cae886c5e1e6b..8a57fd068bddd 100644 --- a/ci-operator/config/openshift/release/openshift-release-master__ci-4.9-upgrade-from-stable-4.8.yaml +++ b/ci-operator/config/openshift/release/openshift-release-master__ci-4.9-upgrade-from-stable-4.8.yaml @@ -67,15 +67,6 @@ tests: BASE_DOMAIN: aws-2.ci.openshift.org TEST_TYPE: upgrade-conformance workflow: openshift-upgrade-aws-ovn -- as: e2e-aws-ovn-upgrade-rollback - interval: 24h - steps: - cluster_profile: aws-2 - env: - BASE_DOMAIN: aws-2.ci.openshift.org - TEST_TYPE: upgrade-conformance - TEST_UPGRADE_OPTIONS: abort-at=99 - workflow: openshift-upgrade-aws-ovn - as: e2e-gcp-ovn-upgrade cluster: build01 interval: 48h @@ -98,16 +89,6 @@ tests: env: TEST_TYPE: upgrade-conformance workflow: openshift-upgrade-openstack-loki -- as: e2e-aws-upgrade-rollback - interval: 48h - steps: - cluster_profile: aws-2 - env: - BASE_DOMAIN: aws-2.ci.openshift.org - DELETE_MC: "false" - TEST_TYPE: upgrade - TEST_UPGRADE_OPTIONS: abort-at=99 - workflow: openshift-upgrade-aws - as: e2e-aws-uwm interval: 48h steps: diff --git a/ci-operator/jobs/openshift/release/openshift-release-master-periodics.yaml b/ci-operator/jobs/openshift/release/openshift-release-master-periodics.yaml index 2d3115bb5700b..c3d019ff2cf5b 100644 --- a/ci-operator/jobs/openshift/release/openshift-release-master-periodics.yaml +++ b/ci-operator/jobs/openshift/release/openshift-release-master-periodics.yaml @@ -18300,81 +18300,6 @@ periodics: - name: result-aggregator secret: secretName: result-aggregator -- agent: kubernetes - cluster: build01 - decorate: true - decoration_config: - skip_cloning: true - extra_refs: - - base_ref: master - org: openshift - repo: release - interval: 24h - labels: - ci-operator.openshift.io/variant: ci-4.9-upgrade-from-stable-4.8 - ci.openshift.io/generator: prowgen - job-release: "4.9" - pj-rehearse.openshift.io/can-be-rehearsed: "true" - name: periodic-ci-openshift-release-master-ci-4.9-upgrade-from-stable-4.8-e2e-aws-ovn-upgrade-rollback - spec: - containers: - - args: - - --gcs-upload-secret=/secrets/gcs/service-account.json - - --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson - - --lease-server-credentials-file=/etc/boskos/credentials - - --report-credentials-file=/etc/report/credentials - - --secret-dir=/secrets/ci-pull-credentials - - --secret-dir=/usr/local/e2e-aws-ovn-upgrade-rollback-cluster-profile - - --target=e2e-aws-ovn-upgrade-rollback - - --variant=ci-4.9-upgrade-from-stable-4.8 - command: - - ci-operator - image: ci-operator:latest - imagePullPolicy: Always - name: "" - resources: - requests: - cpu: 10m - volumeMounts: - - mountPath: /etc/boskos - name: boskos - readOnly: true - - mountPath: /secrets/ci-pull-credentials - name: ci-pull-credentials - readOnly: true - - mountPath: /usr/local/e2e-aws-ovn-upgrade-rollback-cluster-profile - name: cluster-profile - - mountPath: /secrets/gcs - name: gcs-credentials - readOnly: true - - mountPath: /etc/pull-secret - name: pull-secret - readOnly: true - - mountPath: /etc/report - name: result-aggregator - readOnly: true - serviceAccountName: ci-operator - volumes: - - name: boskos - secret: - items: - - key: credentials - path: credentials - secretName: boskos-credentials - - name: ci-pull-credentials - secret: - secretName: ci-pull-credentials - - name: cluster-profile - projected: - sources: - - secret: - name: cluster-secrets-aws-2 - - name: pull-secret - secret: - secretName: registry-pull-credentials - - name: result-aggregator - secret: - secretName: result-aggregator - agent: kubernetes cluster: build01 decorate: true @@ -18450,81 +18375,6 @@ periodics: - name: result-aggregator secret: secretName: result-aggregator -- agent: kubernetes - cluster: build01 - decorate: true - decoration_config: - skip_cloning: true - extra_refs: - - base_ref: master - org: openshift - repo: release - interval: 48h - labels: - ci-operator.openshift.io/variant: ci-4.9-upgrade-from-stable-4.8 - ci.openshift.io/generator: prowgen - job-release: "4.9" - pj-rehearse.openshift.io/can-be-rehearsed: "true" - name: periodic-ci-openshift-release-master-ci-4.9-upgrade-from-stable-4.8-e2e-aws-upgrade-rollback - spec: - containers: - - args: - - --gcs-upload-secret=/secrets/gcs/service-account.json - - --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson - - --lease-server-credentials-file=/etc/boskos/credentials - - --report-credentials-file=/etc/report/credentials - - --secret-dir=/secrets/ci-pull-credentials - - --secret-dir=/usr/local/e2e-aws-upgrade-rollback-cluster-profile - - --target=e2e-aws-upgrade-rollback - - --variant=ci-4.9-upgrade-from-stable-4.8 - command: - - ci-operator - image: ci-operator:latest - imagePullPolicy: Always - name: "" - resources: - requests: - cpu: 10m - volumeMounts: - - mountPath: /etc/boskos - name: boskos - readOnly: true - - mountPath: /secrets/ci-pull-credentials - name: ci-pull-credentials - readOnly: true - - mountPath: /usr/local/e2e-aws-upgrade-rollback-cluster-profile - name: cluster-profile - - mountPath: /secrets/gcs - name: gcs-credentials - readOnly: true - - mountPath: /etc/pull-secret - name: pull-secret - readOnly: true - - mountPath: /etc/report - name: result-aggregator - readOnly: true - serviceAccountName: ci-operator - volumes: - - name: boskos - secret: - items: - - key: credentials - path: credentials - secretName: boskos-credentials - - name: ci-pull-credentials - secret: - secretName: ci-pull-credentials - - name: cluster-profile - projected: - sources: - - secret: - name: cluster-secrets-aws-2 - - name: pull-secret - secret: - secretName: registry-pull-credentials - - name: result-aggregator - secret: - secretName: result-aggregator - agent: kubernetes cluster: build01 decorate: true