diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e0233bd8..5fe70d6b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -37,7 +37,13 @@ jobs: needs: [golang, code-scanning] secrets: inherit + e2e-smoke: + needs: golang + secrets: inherit + uses: ./.github/workflows/e2e-smoke.yaml + e2e-test: needs: golang + if: github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/release-') secrets: inherit uses: ./.github/workflows/e2e.yaml diff --git a/.github/workflows/e2e-smoke.yaml b/.github/workflows/e2e-smoke.yaml new file mode 100644 index 00000000..88eb1301 --- /dev/null +++ b/.github/workflows/e2e-smoke.yaml @@ -0,0 +1,70 @@ +# Copyright 2026 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: E2E Smoke Tests + +on: + workflow_call: + secrets: + AWS_ACCESS_KEY_ID: + required: true + AWS_SECRET_ACCESS_KEY: + required: true + AWS_SSH_KEY: + required: true + +jobs: + e2e-smoke: + runs-on: linux-amd64-cpu4 + strategy: + fail-fast: false + matrix: + label: ["default && !rpm", "cluster && minimal"] + name: E2E Smoke (${{ matrix.label }}) + + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: Install Go + uses: actions/setup-go@v6 + with: + go-version: 'stable' + check-latest: true + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y make + + - name: Run smoke e2e test for ${{ matrix.label }} + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_SSH_KEY: ${{ secrets.AWS_SSH_KEY }} + LOG_ARTIFACT_DIR: e2e_logs + run: | + e2e_ssh_key=$(mktemp) + echo "${{ secrets.AWS_SSH_KEY }}" > "$e2e_ssh_key" + chmod 600 "$e2e_ssh_key" + export E2E_SSH_KEY="$e2e_ssh_key" + make -f tests/Makefile test GINKGO_ARGS="--label-filter='${{ matrix.label }}' --json-report ginkgo.json" + + - name: Archive Ginkgo logs + if: always() + uses: actions/upload-artifact@v7 + with: + name: ginkgo-smoke-logs-${{ strategy.job-index }} + path: ginkgo.json + retention-days: 15 diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 2b315dfa..2881500b 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -name: End-to-end Tests +name: E2E Full Tests on: workflow_call: @@ -24,13 +24,29 @@ on: AWS_SSH_KEY: required: true +permissions: + issues: write + jobs: e2e-test: runs-on: linux-amd64-cpu4 - if: ${{ github.event.workflow_run.conclusion == 'success' }} && ${{ github.event.workflow_run.event == 'push' }} strategy: + fail-fast: false matrix: - label: [default, legacy, dra, kernel, rpm-rocky, rpm-al2023, rpm-fedora] + label: + - legacy + - dra + - kernel + - ctk-git + - k8s-git + - k8s-kind-git + - k8s-latest + - "cluster && gpu && !minimal && !ha && !dedicated" + - "cluster && dedicated" + - "cluster && ha" + - rpm-rocky + - rpm-al2023 + - rpm-fedora name: E2E Test (${{ matrix.label }}) steps: @@ -59,10 +75,7 @@ jobs: echo "${{ secrets.AWS_SSH_KEY }}" > "$e2e_ssh_key" chmod 600 "$e2e_ssh_key" export E2E_SSH_KEY="$e2e_ssh_key" - EXTRA_ARGS="" - if [ "${{ matrix.label }}" = "default" ]; then - EXTRA_ARGS="--json-report ginkgo.json" - fi + EXTRA_ARGS="--json-report ginkgo.json" # RPM suites run both single-node and cluster tests (~60min total); # increase timeout from the 1h default to avoid cleanup timeouts. case "${{ matrix.label }}" in @@ -71,9 +84,10 @@ jobs: make -f tests/Makefile test GINKGO_ARGS="--label-filter='${{ matrix.label }}' $EXTRA_ARGS" - name: Archive Ginkgo logs + if: always() uses: actions/upload-artifact@v7 with: - name: ginkgo-logs-${{ matrix.label }} + name: ginkgo-full-logs-${{ strategy.job-index }} path: ginkgo.json retention-days: 15 @@ -113,7 +127,6 @@ jobs: integration-test: runs-on: linux-amd64-cpu4 - if: ${{ github.event.workflow_run.conclusion == 'success' }} && ${{ github.event.workflow_run.event == 'push' }} steps: - name: Checkout code uses: actions/checkout@v6 @@ -124,3 +137,44 @@ jobs: aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws_ssh_key: ${{ secrets.AWS_SSH_KEY }} holodeck_config: "tests/data/test_aws.yml" + + create-issue: + runs-on: ubuntu-latest + needs: [e2e-test, e2e-test-arm64, integration-test] + if: failure() + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: Create or update failure issue + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + SHA_SHORT="${GITHUB_SHA:0:8}" + TITLE="E2E failure on ${SHA_SHORT}" + BODY=$(cat <