diff --git a/ci-operator/config/openshift/openshift-tests-private/openshift-openshift-tests-private-release-4.21__multi-nightly.yaml b/ci-operator/config/openshift/openshift-tests-private/openshift-openshift-tests-private-release-4.21__multi-nightly.yaml index 7c360814dafd7..e7943dd3b8948 100644 --- a/ci-operator/config/openshift/openshift-tests-private/openshift-openshift-tests-private-release-4.21__multi-nightly.yaml +++ b/ci-operator/config/openshift/openshift-tests-private/openshift-openshift-tests-private-release-4.21__multi-nightly.yaml @@ -211,6 +211,40 @@ tests: test: - chain: openshift-e2e-test-qe-destructive workflow: cucushift-installer-rehearse-aws-ipi-byo-kms-etcd-encryption +- as: aws-ipi-localzone-rootvolume-f7 + cron: 52 7 3,10,17,24 * * + steps: + cluster_profile: aws-1-qe + env: + AWS_DEFAULT_GP3_THROUGHPUT: "500" + AWS_DEFAULT_MACHINE_VOLUME_SIZE: "120" + AWS_DEFAULT_MACHINE_VOLUME_TYPE: gp3 + BASE_DOMAIN: qe.devcluster.openshift.com + EDGE_ZONES_LIST: us-east-1-dfw-2a + ENABLE_AWS_EDGE_ZONE: "yes" + test: + - chain: openshift-e2e-test-qe + workflow: cucushift-installer-rehearse-aws-ipi-edge-zone-rootvolume +- as: aws-ipi-localzone-rootvolume-f28-destructive + cron: 16 9 15 * * + steps: + cluster_profile: aws-1-qe + env: + AWS_COMPUTE_GP3_THROUGHPUT: "1000" + AWS_COMPUTE_VOLUME_SIZE: "120" + AWS_COMPUTE_VOLUME_TYPE: gp3 + AWS_CONTROL_PLANE_GP3_THROUGHPUT: "1200" + AWS_CONTROL_PLANE_VOLUME_SIZE: "150" + AWS_CONTROL_PLANE_VOLUME_TYPE: gp3 + AWS_EDGE_GP3_THROUGHPUT: "1000" + AWS_EDGE_VOLUME_SIZE: "120" + AWS_EDGE_VOLUME_TYPE: gp3 + BASE_DOMAIN: qe.devcluster.openshift.com + EDGE_ZONES_LIST: us-east-1-dfw-2a + ENABLE_AWS_EDGE_ZONE: "yes" + test: + - chain: openshift-e2e-test-qe-destructive + workflow: cucushift-installer-rehearse-aws-ipi-edge-zone-rootvolume - as: aws-ipi-byo-route53-compact-cloudfront-amd-f28-destructive cron: 32 20 19 * * steps: diff --git a/ci-operator/jobs/openshift/openshift-tests-private/openshift-openshift-tests-private-release-4.21-periodics.yaml b/ci-operator/jobs/openshift/openshift-tests-private/openshift-openshift-tests-private-release-4.21-periodics.yaml index 0718a1411718c..2f489c05f26f1 100644 --- a/ci-operator/jobs/openshift/openshift-tests-private/openshift-openshift-tests-private-release-4.21-periodics.yaml +++ b/ci-operator/jobs/openshift/openshift-tests-private/openshift-openshift-tests-private-release-4.21-periodics.yaml @@ -70410,6 +70410,170 @@ periodics: - name: result-aggregator secret: secretName: result-aggregator +- agent: kubernetes + cluster: build09 + cron: 16 9 15 * * + decorate: true + decoration_config: + skip_cloning: true + extra_refs: + - base_ref: release-4.21 + org: openshift + repo: openshift-tests-private + labels: + ci-operator.openshift.io/cloud: aws + ci-operator.openshift.io/cloud-cluster-profile: aws-1-qe + ci-operator.openshift.io/variant: multi-nightly + ci.openshift.io/generator: prowgen + job-release: "4.21" + pj-rehearse.openshift.io/can-be-rehearsed: "true" + name: periodic-ci-openshift-openshift-tests-private-release-4.21-multi-nightly-aws-ipi-localzone-rootvolume-f28-destructive + spec: + containers: + - args: + - --gcs-upload-secret=/secrets/gcs/service-account.json + - --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson + - --lease-server-credentials-file=/etc/boskos/credentials + - --oauth-token-path=/usr/local/github-credentials/oauth + - --report-credentials-file=/etc/report/credentials + - --secret-dir=/secrets/ci-pull-credentials + - --target=aws-ipi-localzone-rootvolume-f28-destructive + - --variant=multi-nightly + command: + - ci-operator + image: quay-proxy.ci.openshift.org/openshift/ci:ci_ci-operator_latest + imagePullPolicy: Always + name: "" + resources: + requests: + cpu: 10m + volumeMounts: + - mountPath: /etc/boskos + name: boskos + readOnly: true + - mountPath: /secrets/ci-pull-credentials + name: ci-pull-credentials + readOnly: true + - mountPath: /secrets/gcs + name: gcs-credentials + readOnly: true + - mountPath: /usr/local/github-credentials + name: github-credentials-openshift-ci-robot-private-git-cloner + readOnly: true + - mountPath: /secrets/manifest-tool + name: manifest-tool-local-pusher + readOnly: true + - mountPath: /etc/pull-secret + name: pull-secret + readOnly: true + - mountPath: /etc/report + name: result-aggregator + readOnly: true + serviceAccountName: ci-operator + volumes: + - name: boskos + secret: + items: + - key: credentials + path: credentials + secretName: boskos-credentials + - name: ci-pull-credentials + secret: + secretName: ci-pull-credentials + - name: github-credentials-openshift-ci-robot-private-git-cloner + secret: + secretName: github-credentials-openshift-ci-robot-private-git-cloner + - name: manifest-tool-local-pusher + secret: + secretName: manifest-tool-local-pusher + - name: pull-secret + secret: + secretName: registry-pull-credentials + - name: result-aggregator + secret: + secretName: result-aggregator +- agent: kubernetes + cluster: build09 + cron: 52 7 3,10,17,24 * * + decorate: true + decoration_config: + skip_cloning: true + extra_refs: + - base_ref: release-4.21 + org: openshift + repo: openshift-tests-private + labels: + ci-operator.openshift.io/cloud: aws + ci-operator.openshift.io/cloud-cluster-profile: aws-1-qe + ci-operator.openshift.io/variant: multi-nightly + ci.openshift.io/generator: prowgen + job-release: "4.21" + pj-rehearse.openshift.io/can-be-rehearsed: "true" + name: periodic-ci-openshift-openshift-tests-private-release-4.21-multi-nightly-aws-ipi-localzone-rootvolume-f7 + spec: + containers: + - args: + - --gcs-upload-secret=/secrets/gcs/service-account.json + - --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson + - --lease-server-credentials-file=/etc/boskos/credentials + - --oauth-token-path=/usr/local/github-credentials/oauth + - --report-credentials-file=/etc/report/credentials + - --secret-dir=/secrets/ci-pull-credentials + - --target=aws-ipi-localzone-rootvolume-f7 + - --variant=multi-nightly + command: + - ci-operator + image: quay-proxy.ci.openshift.org/openshift/ci:ci_ci-operator_latest + imagePullPolicy: Always + name: "" + resources: + requests: + cpu: 10m + volumeMounts: + - mountPath: /etc/boskos + name: boskos + readOnly: true + - mountPath: /secrets/ci-pull-credentials + name: ci-pull-credentials + readOnly: true + - mountPath: /secrets/gcs + name: gcs-credentials + readOnly: true + - mountPath: /usr/local/github-credentials + name: github-credentials-openshift-ci-robot-private-git-cloner + readOnly: true + - mountPath: /secrets/manifest-tool + name: manifest-tool-local-pusher + readOnly: true + - mountPath: /etc/pull-secret + name: pull-secret + readOnly: true + - mountPath: /etc/report + name: result-aggregator + readOnly: true + serviceAccountName: ci-operator + volumes: + - name: boskos + secret: + items: + - key: credentials + path: credentials + secretName: boskos-credentials + - name: ci-pull-credentials + secret: + secretName: ci-pull-credentials + - name: github-credentials-openshift-ci-robot-private-git-cloner + secret: + secretName: github-credentials-openshift-ci-robot-private-git-cloner + - name: manifest-tool-local-pusher + secret: + secretName: manifest-tool-local-pusher + - name: pull-secret + secret: + secretName: registry-pull-credentials + - name: result-aggregator + secret: + secretName: result-aggregator - agent: kubernetes cluster: build09 cron: 29 21 5,12,21,28 * * diff --git a/ci-operator/step-registry/cucushift/installer/check/aws/rootvolume/OWNERS b/ci-operator/step-registry/cucushift/installer/check/aws/rootvolume/OWNERS new file mode 100644 index 0000000000000..ceede07f9a881 --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/check/aws/rootvolume/OWNERS @@ -0,0 +1,8 @@ +approvers: +- yunjiang29 +- gpei +- liweinan +reviewers: +- yunjiang29 +- gpei +- liweinan diff --git a/ci-operator/step-registry/cucushift/installer/check/aws/rootvolume/cucushift-installer-check-aws-rootvolume-commands.sh b/ci-operator/step-registry/cucushift/installer/check/aws/rootvolume/cucushift-installer-check-aws-rootvolume-commands.sh new file mode 100755 index 0000000000000..ea683cadc2994 --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/check/aws/rootvolume/cucushift-installer-check-aws-rootvolume-commands.sh @@ -0,0 +1,140 @@ +#!/bin/bash + +set -o nounset +set -o errexit +set -o pipefail + +# save the exit code for junit xml file generated in step gather-must-gather +# pre configuration steps before running installation, exit code 100 if failed, +# save to install-pre-config-status.txt +# post check steps after cluster installation, exit code 101 if failed, +# save to install-post-check-status.txt +EXIT_CODE=101 +trap 'if [[ "$?" == 0 ]]; then EXIT_CODE=0; fi; echo "${EXIT_CODE}" > "${SHARED_DIR}/install-post-check-status.txt"' EXIT TERM + +export AWS_SHARED_CREDENTIALS_FILE="${CLUSTER_PROFILE_DIR}/.awscred" + +if [ -f "${SHARED_DIR}/kubeconfig" ] ; then + export KUBECONFIG="${SHARED_DIR}/kubeconfig" +else + echo "No KUBECONFIG found, exit now" + exit 1 +fi + +# Get cluster infrastructure details +INFRA_ID=$(jq -r '.infraID' "${SHARED_DIR}/metadata.json") +CLUSTER_ID="${INFRA_ID}" +REGION=$(oc get infrastructure cluster -o jsonpath='{.status.platformStatus.aws.region}') + +echo "Cluster ID: ${CLUSTER_ID}" +echo "Region: ${REGION}" + +CONFIG="${SHARED_DIR}/install-config.yaml" + +if [ ! -f "${CONFIG}" ]; then + echo "No install-config found, exit now" + exit 1 +fi + +function read_install_config() { + local query="$1" + yq-go r "${CONFIG}" "${query}" 2>/dev/null || true +} + +# Read all root volume type/throughput/iops configurations from install-config.yaml +# worker pool is at compute[0], edge pool is at compute[1] +DEFAULT_TYPE=$(read_install_config 'platform.aws.defaultMachinePlatform.rootVolume.type') +DEFAULT_THROUGHPUT=$(read_install_config 'platform.aws.defaultMachinePlatform.rootVolume.throughput') +DEFAULT_IOPS=$(read_install_config 'platform.aws.defaultMachinePlatform.rootVolume.iops') + +CONTROL_PLANE_TYPE=$(read_install_config 'controlPlane.platform.aws.rootVolume.type') +CONTROL_PLANE_THROUGHPUT=$(read_install_config 'controlPlane.platform.aws.rootVolume.throughput') +CONTROL_PLANE_IOPS=$(read_install_config 'controlPlane.platform.aws.rootVolume.iops') + +COMPUTE_TYPE=$(read_install_config "compute[0].platform.aws.rootVolume.type") +COMPUTE_THROUGHPUT=$(read_install_config "compute[0].platform.aws.rootVolume.throughput") +COMPUTE_IOPS=$(read_install_config "compute[0].platform.aws.rootVolume.iops") + +CONTROL_PLANE_COUNT=$(read_install_config "controlPlane.replicas") +COMPUTE_COUNT=$(read_install_config "compute[0].replicas") + +ret=0 + +VOLS_JSON="${ARTIFACT_DIR}/vols.json" +aws --region "${REGION}" ec2 describe-volumes --filters "Name=tag:kubernetes.io/cluster/${INFRA_ID},Values=owned" > "${VOLS_JSON}" + +function volume_check() { + local role=$1 + local expect_type=$2 + local expect_throughput=$3 + local expect_iops=$4 + local expect_count=$5 + + echo "Checking ${role} volumes: type=${expect_type}, throughput=${expect_throughput}, iops=${expect_iops}, count=${expect_count}" + + local matched + matched=$(jq -r --arg r "-${role}-" --arg t "${expect_type}" --argjson tp "${expect_throughput}" --argjson i "${expect_iops}" \ + '[.Volumes[] | select((.Tags[] | (.Key == "Name" and (.Value | contains($r)))) and .Iops == $i and .VolumeType == $t and .Throughput == $tp)] | length' "${VOLS_JSON}") + + if [ "${matched}" != "${expect_count}" ]; then + echo "ERROR: ${role} volumes mismatch (expected ${expect_count}, got ${matched}). See $(basename "${VOLS_JSON}")" + ret=$((ret+1)) + else + echo "PASS: ${role} volumes match expected configuration." + fi +} + +echo "-------------------------------------------------------------" +echo "Checking root volumes (gp3: type/throughput/iops/count)" +echo "-------------------------------------------------------------" + +# control-plane (always expected) +EXPECTED_CONTROL_PLANE_TYPE="${CONTROL_PLANE_TYPE:-${DEFAULT_TYPE}}" +EXPECTED_CONTROL_PLANE_THROUGHPUT="${CONTROL_PLANE_THROUGHPUT:-${DEFAULT_THROUGHPUT}}" +EXPECTED_CONTROL_PLANE_IOPS="${CONTROL_PLANE_IOPS:-${DEFAULT_IOPS}}" + +if [[ "${EXPECTED_CONTROL_PLANE_TYPE}" == "gp3" && -n "${EXPECTED_CONTROL_PLANE_THROUGHPUT}" && -n "${EXPECTED_CONTROL_PLANE_IOPS}" && -n "${CONTROL_PLANE_COUNT}" ]]; then + volume_check "master" "${EXPECTED_CONTROL_PLANE_TYPE}" "${EXPECTED_CONTROL_PLANE_THROUGHPUT}" "${EXPECTED_CONTROL_PLANE_IOPS}" "${CONTROL_PLANE_COUNT}" +else + echo "SKIP: control-plane volumes not fully specified." +fi + +# worker pool (compute[0]) +EXPECTED_COMPUTE_TYPE="${COMPUTE_TYPE:-${DEFAULT_TYPE}}" +EXPECTED_COMPUTE_THROUGHPUT="${COMPUTE_THROUGHPUT:-${DEFAULT_THROUGHPUT}}" +EXPECTED_COMPUTE_IOPS="${COMPUTE_IOPS:-${DEFAULT_IOPS}}" + +if [[ "${EXPECTED_COMPUTE_TYPE}" == "gp3" && -n "${EXPECTED_COMPUTE_THROUGHPUT}" && -n "${EXPECTED_COMPUTE_IOPS}" && -n "${COMPUTE_COUNT}" ]]; then + volume_check "worker" "${EXPECTED_COMPUTE_TYPE}" "${EXPECTED_COMPUTE_THROUGHPUT}" "${EXPECTED_COMPUTE_IOPS}" "${COMPUTE_COUNT}" +else + echo "SKIP: worker volumes not fully specified." +fi + +# edge pool (compute[1]) only when edge zone is enabled +if [[ "${ENABLE_AWS_EDGE_ZONE}" == "yes" ]]; then + EDGE_TYPE=$(read_install_config "compute[1].platform.aws.rootVolume.type") + EDGE_THROUGHPUT=$(read_install_config "compute[1].platform.aws.rootVolume.throughput") + EDGE_IOPS=$(read_install_config "compute[1].platform.aws.rootVolume.iops") + EDGE_COUNT=$(read_install_config "compute[1].replicas") + + EXPECTED_EDGE_TYPE="${EDGE_TYPE:-${DEFAULT_TYPE}}" + EXPECTED_EDGE_THROUGHPUT="${EDGE_THROUGHPUT:-${DEFAULT_THROUGHPUT}}" + EXPECTED_EDGE_IOPS="${EDGE_IOPS:-${DEFAULT_IOPS}}" + + if [[ "${EXPECTED_EDGE_TYPE}" == "gp3" && -n "${EXPECTED_EDGE_THROUGHPUT}" && -n "${EXPECTED_EDGE_IOPS}" && -n "${EDGE_COUNT}" ]]; then + volume_check "edge" "${EXPECTED_EDGE_TYPE}" "${EXPECTED_EDGE_THROUGHPUT}" "${EXPECTED_EDGE_IOPS}" "${EDGE_COUNT}" + else + echo "SKIP: edge volumes not fully specified or edge zone disabled." + fi +fi + +echo "-------------------------------------------------------------" +echo "Test Summary" +echo "-------------------------------------------------------------" +if [ ${ret} -eq 0 ]; then + echo "All root volume checks passed." +else + echo "Some root volume checks failed. See $(basename "${VOLS_JSON}") for details." +fi + +exit ${ret} diff --git a/ci-operator/step-registry/cucushift/installer/check/aws/rootvolume/cucushift-installer-check-aws-rootvolume-ref.metadata.json b/ci-operator/step-registry/cucushift/installer/check/aws/rootvolume/cucushift-installer-check-aws-rootvolume-ref.metadata.json new file mode 100644 index 0000000000000..c6f6faf83af26 --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/check/aws/rootvolume/cucushift-installer-check-aws-rootvolume-ref.metadata.json @@ -0,0 +1,15 @@ +{ + "path": "cucushift/installer/check/aws/rootvolume/cucushift-installer-check-aws-rootvolume-ref.yaml", + "owners": { + "approvers": [ + "yunjiang29", + "gpei", + "liweinan" + ], + "reviewers": [ + "yunjiang29", + "gpei", + "liweinan" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/cucushift/installer/check/aws/rootvolume/cucushift-installer-check-aws-rootvolume-ref.yaml b/ci-operator/step-registry/cucushift/installer/check/aws/rootvolume/cucushift-installer-check-aws-rootvolume-ref.yaml new file mode 100644 index 0000000000000..cb7235bd1cd16 --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/check/aws/rootvolume/cucushift-installer-check-aws-rootvolume-ref.yaml @@ -0,0 +1,24 @@ +ref: + as: cucushift-installer-check-aws-rootvolume + from_image: + namespace: ocp + name: "4.21" + tag: upi-installer + grace_period: 10m + commands: cucushift-installer-check-aws-rootvolume-commands.sh + resources: + requests: + cpu: 10m + memory: 100Mi + env: + - name: ENABLE_AWS_EDGE_ZONE + default: "no" + documentation: |- + Set to "yes" to enable edge zone support and check edge node root volumes. + When enabled, the script will look for edge compute pool in install-config.yaml + and verify edge node root volume configurations. + documentation: >- + Validate AWS root volume configuration post-installation: + - Ensure gp3 throughput configuration is applied on worker and control plane nodes + - Report mismatched or missing throughput settings for root volumes + - When ENABLE_AWS_EDGE_ZONE=yes, also validates edge node root volumes diff --git a/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/edge-zone/rootvolume/OWNERS b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/edge-zone/rootvolume/OWNERS new file mode 100644 index 0000000000000..a289759113618 --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/edge-zone/rootvolume/OWNERS @@ -0,0 +1,8 @@ +approvers: +- jianlinliu +- yunjiang29 +- gpei +reviewers: +- jianlinliu +- yunjiang29 +- gpei diff --git a/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/edge-zone/rootvolume/cucushift-installer-rehearse-aws-ipi-edge-zone-rootvolume-workflow.metadata.json b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/edge-zone/rootvolume/cucushift-installer-rehearse-aws-ipi-edge-zone-rootvolume-workflow.metadata.json new file mode 100644 index 0000000000000..04a5b4218d1df --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/edge-zone/rootvolume/cucushift-installer-rehearse-aws-ipi-edge-zone-rootvolume-workflow.metadata.json @@ -0,0 +1,15 @@ +{ + "path": "cucushift/installer/rehearse/aws/ipi/edge-zone/rootvolume/cucushift-installer-rehearse-aws-ipi-edge-zone-rootvolume-workflow.yaml", + "owners": { + "approvers": [ + "jianlinliu", + "yunjiang29", + "gpei" + ], + "reviewers": [ + "jianlinliu", + "yunjiang29", + "gpei" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/edge-zone/rootvolume/cucushift-installer-rehearse-aws-ipi-edge-zone-rootvolume-workflow.yaml b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/edge-zone/rootvolume/cucushift-installer-rehearse-aws-ipi-edge-zone-rootvolume-workflow.yaml new file mode 100644 index 0000000000000..2efc2238e21e5 --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/edge-zone/rootvolume/cucushift-installer-rehearse-aws-ipi-edge-zone-rootvolume-workflow.yaml @@ -0,0 +1,16 @@ +workflow: + as: cucushift-installer-rehearse-aws-ipi-edge-zone-rootvolume + steps: + pre: + - chain: cucushift-installer-rehearse-aws-ipi-edge-zone-rootvolume-provision + - ref: cucushift-installer-reportportal-marker + post: + - chain: cucushift-installer-rehearse-aws-ipi-deprovision + - ref: send-results-to-reportportal + documentation: |- + This is the workflow to trigger Prow's rehearsal test for AWS root volume + configuration tests with edge zones. It uses phase 2 workflow where edge zone subnets are + automatically provisioned by the installer (if edge zone is enabled). It includes + root volume configuration and post-installation verification in the pre phase. + Supports both default machine pool settings and individual settings for + compute/control plane/edge nodes. diff --git a/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/edge-zone/rootvolume/provision/OWNERS b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/edge-zone/rootvolume/provision/OWNERS new file mode 100644 index 0000000000000..a289759113618 --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/edge-zone/rootvolume/provision/OWNERS @@ -0,0 +1,8 @@ +approvers: +- jianlinliu +- yunjiang29 +- gpei +reviewers: +- jianlinliu +- yunjiang29 +- gpei diff --git a/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/edge-zone/rootvolume/provision/cucushift-installer-rehearse-aws-ipi-edge-zone-rootvolume-provision-chain.metadata.json b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/edge-zone/rootvolume/provision/cucushift-installer-rehearse-aws-ipi-edge-zone-rootvolume-provision-chain.metadata.json new file mode 100644 index 0000000000000..f47e99d0abc4e --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/edge-zone/rootvolume/provision/cucushift-installer-rehearse-aws-ipi-edge-zone-rootvolume-provision-chain.metadata.json @@ -0,0 +1,15 @@ +{ + "path": "cucushift/installer/rehearse/aws/ipi/edge-zone/rootvolume/provision/cucushift-installer-rehearse-aws-ipi-edge-zone-rootvolume-provision-chain.yaml", + "owners": { + "approvers": [ + "jianlinliu", + "yunjiang29", + "gpei" + ], + "reviewers": [ + "jianlinliu", + "yunjiang29", + "gpei" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/edge-zone/rootvolume/provision/cucushift-installer-rehearse-aws-ipi-edge-zone-rootvolume-provision-chain.yaml b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/edge-zone/rootvolume/provision/cucushift-installer-rehearse-aws-ipi-edge-zone-rootvolume-provision-chain.yaml new file mode 100644 index 0000000000000..f728cdb6f9c76 --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/edge-zone/rootvolume/provision/cucushift-installer-rehearse-aws-ipi-edge-zone-rootvolume-provision-chain.yaml @@ -0,0 +1,50 @@ +chain: + as: cucushift-installer-rehearse-aws-ipi-edge-zone-rootvolume-provision + steps: + - ref: ipi-install-rbac + - ref: openshift-cluster-bot-rbac + - ref: ipi-aws-pre-edge-zones-opt-in + - ref: ipi-conf + - ref: ipi-conf-telemetry + - ref: ipi-conf-aws + - ref: ipi-conf-aws-edge-zone + - ref: ipi-conf-aws-rootvolume + - ref: ipi-install-monitoringpvc + - ref: ipi-conf-aws-usage-info + - chain: aws-provision-iam-user-minimal-permission + - ref: ipi-install-install-aws + - ref: health-check-node-count + - ref: cucushift-installer-check-aws-rootvolume + - ref: ipi-install-times-collection + - ref: enable-qe-catalogsource + - ref: storage-conf-storageclass-set-default-storageclass + - ref: cucushift-installer-check-aws-edge-zone + - chain: cucushift-installer-check + env: + - name: CONTROL_PLANE_INSTANCE_TYPE + default: "m6i.xlarge" + documentation: "Instance type for control plane nodes" + - name: COMPUTE_NODE_TYPE + default: "m5.xlarge" + documentation: "Instance type for compute nodes" + - name: ENABLE_AWS_EDGE_ZONE + default: "yes" + - name: EDGE_ZONE_TYPES + default: "local-zone" + - name: EDGE_NODE_WORKER_SCHEDULABLE + default: "no" + - name: EDGE_NODE_WORKER_ASSIGN_PUBLIC_IP + default: "no" + - name: EDGE_NODE_WORKER_NUMBER + default: "1" + - name: EDGE_NODE_INSTANCE_TYPE + default: "" + - name: EXPECTED_COMPUTE_NODE_COUNT + default: "4" + - name: REQUIRED_DEFAULT_STORAGECLASS + default: "gp2-csi" + documentation: |- + Create an IPI cluster on AWS for QE e2e tests with root volume configuration and edge zones. + Supports both standard compute pools and edge zone compute pools. + Root volume configuration can be set via defaultMachinePlatform or individual + compute/controlPlane/edge pool settings. diff --git a/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/rootvolume/OWNERS b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/rootvolume/OWNERS new file mode 100644 index 0000000000000..a289759113618 --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/rootvolume/OWNERS @@ -0,0 +1,8 @@ +approvers: +- jianlinliu +- yunjiang29 +- gpei +reviewers: +- jianlinliu +- yunjiang29 +- gpei diff --git a/ci-operator/step-registry/ipi/conf/aws/rootvolume/ipi-conf-aws-rootvolume-commands.sh b/ci-operator/step-registry/ipi/conf/aws/rootvolume/ipi-conf-aws-rootvolume-commands.sh index 7ae2a1da2ff9a..f44b2ac47cbab 100755 --- a/ci-operator/step-registry/ipi/conf/aws/rootvolume/ipi-conf-aws-rootvolume-commands.sh +++ b/ci-operator/step-registry/ipi/conf/aws/rootvolume/ipi-conf-aws-rootvolume-commands.sh @@ -6,47 +6,107 @@ set -o pipefail CONFIG="${SHARED_DIR}/install-config.yaml" -if [[ "${AWS_COMPUTE_VOLUME_TYPE}" != "" ]]; then - echo "Compute volume type: ${AWS_COMPUTE_VOLUME_TYPE}" - PATCH=$(mktemp) - cat >> "${PATCH}" << EOF -compute: -- platform: - aws: - rootVolume: - type: ${AWS_COMPUTE_VOLUME_TYPE} - size: ${AWS_COMPUTE_VOLUME_SIZE} -EOF - cat "${PATCH}" - yq-go m -x -i "${CONFIG}" "${PATCH}" -fi - -if [[ "${AWS_CONTROL_PLANE_VOLUME_TYPE}" != "" ]]; then - echo "Control plane volume type: ${AWS_CONTROL_PLANE_VOLUME_TYPE}" - PATCH=$(mktemp) - cat >> "${PATCH}" << EOF -controlPlane: - platform: - aws: - rootVolume: - type: ${AWS_CONTROL_PLANE_VOLUME_TYPE} - size: ${AWS_CONTROL_PLANE_VOLUME_SIZE} -EOF - cat "${PATCH}" - yq-go m -x -i "${CONFIG}" "${PATCH}" -fi - -if [[ "${AWS_DEFAULT_MACHINE_VOLUME_TYPE}" != "" ]]; then - echo "Default machine volume type: ${AWS_DEFAULT_MACHINE_VOLUME_TYPE}" - PATCH=$(mktemp) - cat >> "${PATCH}" << EOF -platform: - aws: - defaultMachinePlatform: - rootVolume: - type: ${AWS_DEFAULT_MACHINE_VOLUME_TYPE} - size: ${AWS_DEFAULT_MACHINE_VOLUME_SIZE} -EOF - cat "${PATCH}" - yq-go m -x -i "${CONFIG}" "${PATCH}" +if [ ! -f "${CONFIG}" ]; then + echo "No install-config found, exit now" + exit 1 fi + +# Calculate minimum IOPS from throughput for gp3 volumes +# AWS constraint: throughput / iops <= 0.25, so iops >= throughput / 0.25 +# Round up to nearest 100 for safety +function get_iops_from_throughput() { + local throughput="$1" + local iops=$(( (throughput * 4 + 99) / 100 * 100 )) + # According to: https://aws.amazon.com/cn/ebs/volume-types + # The new gp3 volumes deliver a baseline performance of 3,000 IOPS and 125 MiBps at any volume size + if (( iops < 3000 )); then + iops=3000 + fi + echo "${iops}" +} + +echo "-------------------------------------------------------------" +echo "Root volume configuration" +echo "-------------------------------------------------------------" + +# Handle compute rootVolume configuration +if [[ -n "${AWS_COMPUTE_VOLUME_TYPE:-}" ]]; then + echo "compute volume type: ${AWS_COMPUTE_VOLUME_TYPE}" + yq-go w -i "${CONFIG}" "compute[0].platform.aws.rootVolume.type" "${AWS_COMPUTE_VOLUME_TYPE}" +fi +if [[ -n "${AWS_COMPUTE_VOLUME_SIZE:-}" ]]; then + yq-go w -i "${CONFIG}" "compute[0].platform.aws.rootVolume.size" "${AWS_COMPUTE_VOLUME_SIZE}" +fi +if [[ -n "${AWS_COMPUTE_GP3_THROUGHPUT:-}" ]]; then + min_iops=$(get_iops_from_throughput "${AWS_COMPUTE_GP3_THROUGHPUT}") + echo "Calculated minimum IOPS: ${min_iops} (based on throughput ${AWS_COMPUTE_GP3_THROUGHPUT})" + yq-go w -i "${CONFIG}" "compute[0].platform.aws.rootVolume.iops" "${min_iops}" + yq-go w -i "${CONFIG}" "compute[0].platform.aws.rootVolume.throughput" "${AWS_COMPUTE_GP3_THROUGHPUT}" +fi + +# Handle edge compute pool rootVolume configuration +if [[ "${ENABLE_AWS_EDGE_ZONE:-}" == "yes" ]]; then + if [[ -n "${AWS_EDGE_VOLUME_TYPE:-}" ]]; then + echo "edge volume type: ${AWS_EDGE_VOLUME_TYPE}" + yq-go w -i "${CONFIG}" "compute[1].platform.aws.rootVolume.type" "${AWS_EDGE_VOLUME_TYPE}" + fi + if [[ -n "${AWS_EDGE_VOLUME_SIZE:-}" ]]; then + yq-go w -i "${CONFIG}" "compute[1].platform.aws.rootVolume.size" "${AWS_EDGE_VOLUME_SIZE}" + fi + if [[ -n "${AWS_EDGE_GP3_THROUGHPUT:-}" ]]; then + min_iops=$(get_iops_from_throughput "${AWS_EDGE_GP3_THROUGHPUT}") + echo "Calculated minimum IOPS: ${min_iops} (based on throughput ${AWS_EDGE_GP3_THROUGHPUT})" + yq-go w -i "${CONFIG}" "compute[1].platform.aws.rootVolume.iops" "${min_iops}" + yq-go w -i "${CONFIG}" "compute[1].platform.aws.rootVolume.throughput" "${AWS_EDGE_GP3_THROUGHPUT}" + fi +fi + +# Handle controlPlane rootVolume configuration +if [[ -n "${AWS_CONTROL_PLANE_VOLUME_TYPE:-}" ]]; then + echo "controlPlane volume type: ${AWS_CONTROL_PLANE_VOLUME_TYPE}" + yq-go w -i "${CONFIG}" "controlPlane.platform.aws.rootVolume.type" "${AWS_CONTROL_PLANE_VOLUME_TYPE}" +fi +if [[ -n "${AWS_CONTROL_PLANE_VOLUME_SIZE:-}" ]]; then + yq-go w -i "${CONFIG}" "controlPlane.platform.aws.rootVolume.size" "${AWS_CONTROL_PLANE_VOLUME_SIZE}" +fi +if [[ -n "${AWS_CONTROL_PLANE_GP3_THROUGHPUT:-}" ]]; then + min_iops=$(get_iops_from_throughput "${AWS_CONTROL_PLANE_GP3_THROUGHPUT}") + echo "Calculated minimum IOPS: ${min_iops} (based on throughput ${AWS_CONTROL_PLANE_GP3_THROUGHPUT})" + yq-go w -i "${CONFIG}" "controlPlane.platform.aws.rootVolume.iops" "${min_iops}" + yq-go w -i "${CONFIG}" "controlPlane.platform.aws.rootVolume.throughput" "${AWS_CONTROL_PLANE_GP3_THROUGHPUT}" +fi + +# Handle defaultMachinePlatform rootVolume configuration +# Note: defaultMachinePlatform applies to all pools unless overridden by specific pool settings +if [[ -n "${AWS_DEFAULT_MACHINE_VOLUME_TYPE:-}" ]]; then + echo "defaultMachinePlatform volume type: ${AWS_DEFAULT_MACHINE_VOLUME_TYPE}" + yq-go w -i "${CONFIG}" "platform.aws.defaultMachinePlatform.rootVolume.type" "${AWS_DEFAULT_MACHINE_VOLUME_TYPE}" +fi +if [[ -n "${AWS_DEFAULT_MACHINE_VOLUME_SIZE:-}" ]]; then + yq-go w -i "${CONFIG}" "platform.aws.defaultMachinePlatform.rootVolume.size" "${AWS_DEFAULT_MACHINE_VOLUME_SIZE}" +fi +if [[ -n "${AWS_DEFAULT_GP3_THROUGHPUT:-}" ]]; then + min_iops=$(get_iops_from_throughput "${AWS_DEFAULT_GP3_THROUGHPUT}") + echo "Calculated minimum IOPS: ${min_iops} (based on throughput ${AWS_DEFAULT_GP3_THROUGHPUT})" + yq-go w -i "${CONFIG}" "platform.aws.defaultMachinePlatform.rootVolume.iops" "${min_iops}" + yq-go w -i "${CONFIG}" "platform.aws.defaultMachinePlatform.rootVolume.throughput" "${AWS_DEFAULT_GP3_THROUGHPUT}" +fi + +echo "-------------------------------------------------------------" +echo "Configured root volume settings" +echo "-------------------------------------------------------------" + +# Output configured settings for verification +echo "Compute pool rootVolume:" +yq-go r "${CONFIG}" "compute[0]" 2>/dev/null || echo " (not configured)" + +if [[ "${ENABLE_AWS_EDGE_ZONE:-}" == "yes" ]]; then + echo "Edge pool rootVolume:" + yq-go r "${CONFIG}" "compute[1]" 2>/dev/null || echo " (not configured)" +fi + +echo "ControlPlane rootVolume:" +yq-go r "${CONFIG}" "controlPlane" 2>/dev/null || echo " (not configured)" + +echo "DefaultMachinePlatform rootVolume:" +yq-go r "${CONFIG}" "platform.aws.defaultMachinePlatform" 2>/dev/null || echo " (not configured)" diff --git a/ci-operator/step-registry/ipi/conf/aws/rootvolume/ipi-conf-aws-rootvolume-ref.yaml b/ci-operator/step-registry/ipi/conf/aws/rootvolume/ipi-conf-aws-rootvolume-ref.yaml index 8c5183e8cdd2b..4553817a64f90 100644 --- a/ci-operator/step-registry/ipi/conf/aws/rootvolume/ipi-conf-aws-rootvolume-ref.yaml +++ b/ci-operator/step-registry/ipi/conf/aws/rootvolume/ipi-conf-aws-rootvolume-ref.yaml @@ -27,6 +27,39 @@ ref: default: "120" documentation: |- default volume size + - name: AWS_DEFAULT_GP3_THROUGHPUT + default: "" + documentation: |- + throughput in MiB/s for defaultMachinePlatform gp3 root volumes. + When throughput is specified for gp3 volumes, iops is automatically calculated + to satisfy AWS constraint (throughput / iops <= 0.25). The minimum required iops + is calculated as: iops = ceil(throughput * 4 / 100) * 100 + - name: AWS_COMPUTE_GP3_THROUGHPUT + default: "" + documentation: |- + throughput in MiB/s for compute gp3 root volumes (overrides defaultMachinePlatform) + - name: AWS_CONTROL_PLANE_GP3_THROUGHPUT + default: "" + documentation: |- + throughput in MiB/s for control plane gp3 root volumes (overrides defaultMachinePlatform) + - name: AWS_EDGE_VOLUME_TYPE + default: "" + documentation: |- + volume type for edge compute pool volume + - name: AWS_EDGE_VOLUME_SIZE + default: "120" + documentation: |- + volume size for edge compute pool volume + - name: AWS_EDGE_GP3_THROUGHPUT + default: "" + documentation: |- + throughput in MiB/s for edge compute pool gp3 root volumes (overrides defaultMachinePlatform) + - name: ENABLE_AWS_EDGE_ZONE + default: "no" + documentation: |- + Enable AWS edge zone support. When set to "yes", edge compute pool will be created. + Edge pool configuration (AWS_EDGE_VOLUME_TYPE, AWS_EDGE_VOLUME_SIZE, AWS_EDGE_GP3_THROUGHPUT) + will only be applied if this is set to "yes". resources: requests: cpu: 10m