From 6802e3bc9dba27a0a09eee5759d1e02d1b7408bb Mon Sep 17 00:00:00 2001
From: James <james.russo@heygen.com>
Date: Wed, 22 Apr 2026 21:30:09 +0000
Subject: [PATCH 1/2] ci(regression): build test Docker image once, share
 across shards
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Splits regression.yml into a `build-image` job + the existing
`regression-shards` matrix. The build job produces a Docker tarball via
`docker/build-push-action` with `outputs: type=docker,dest=...`, uploads
it as a GHA artifact (retention 1 day, gzip level 1), and each shard
downloads + `docker load`s it instead of rebuilding.

Measured on PR #419 regression runs before the change:
- Docker build step: ~234s per shard WITH GHA layer cache hit
- 11 shards × ~234s = ~43 min of runner time per PR just on redundant
  image builds

Cold-cache cases are much worse — happening right now on PR #419 after
release commit b6f50ce bumped every `packages/*/package.json`, invalidating
the COPY layer that feeds `bun install --frozen-lockfile`. All 10 shards
are currently 25-30+ min into a parallel rebuild, thundering-herding
the same npm packages from 10 runners.

After this change:
- 1× build (~4 min warm, ~15 min cold) + 11× (download + `docker load`)
- Expected ~15-20s overhead per shard for artifact download + load
- Net savings: ~30-40 min of runner time per PR run on warm cache,
  substantially more on cold cache

The build job doesn't checkout LFS — Dockerfile.test only COPYs source +
package manifests, never the golden baselines, so the image build never
needed LFS. Shards still need LFS for the tests/**/output/output.mp4
baselines they validate against.
---
 .github/workflows/regression.yml | 64 ++++++++++++++++++++++++++------
 1 file changed, 52 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml
index 73871aa2..9acee6e7 100644
--- a/.github/workflows/regression.yml
+++ b/.github/workflows/regression.yml
@@ -30,10 +30,52 @@ jobs:
               - "packages/engine/**"
               - "Dockerfile*"
 
-  regression-shards:
+  # Build the regression Docker image once, export it as a tarball, and upload
+  # as an artifact. Each matrix shard then downloads + `docker load`s it instead
+  # of rebuilding from cache. Measured on PR #419: the Docker build step takes
+  # ~4 min per shard even with GHA cache, so 11 shards = ~44 min of redundant
+  # build time per run. This job replaces that with a single ~4 min build plus
+  # ~15s of artifact download per shard.
+  build-image:
+    name: Build regression test image
     needs: changes
     if: needs.changes.outputs.code == 'true'
     runs-on: ubuntu-latest
+    timeout-minutes: 20
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        # No LFS needed here — Dockerfile.test only copies source + package manifests,
+        # not the golden baselines under packages/producer/tests/**/output.
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Build test image to tarball
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: Dockerfile.test
+          tags: hyperframes-producer:test
+          cache-from: type=gha,scope=regression-test-image
+          cache-to: type=gha,mode=max,scope=regression-test-image
+          outputs: type=docker,dest=/tmp/regression-test-image.tar
+
+      - name: Report image size
+        run: ls -lh /tmp/regression-test-image.tar
+
+      - name: Upload image artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: regression-test-image
+          path: /tmp/regression-test-image.tar
+          retention-days: 1
+          compression-level: 1
+
+  regression-shards:
+    needs: [changes, build-image]
+    if: needs.changes.outputs.code == 'true'
+    runs-on: ubuntu-latest
     timeout-minutes: 40
     strategy:
       fail-fast: false
@@ -79,18 +121,16 @@ jobs:
             fi
           done
 
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Build test Docker image (cached)
-        uses: docker/build-push-action@v6
+      - name: Download test image artifact
+        uses: actions/download-artifact@v4
         with:
-          context: .
-          file: Dockerfile.test
-          load: true
-          tags: hyperframes-producer:test
-          cache-from: type=gha,scope=regression-test-image
-          cache-to: type=gha,mode=max,scope=regression-test-image
+          name: regression-test-image
+          path: /tmp
+
+      - name: Load test image
+        run: |
+          docker load -i /tmp/regression-test-image.tar
+          docker image ls hyperframes-producer:test
 
       - name: "Run regression shard: ${{ matrix.shard }}"
         run: |

From e09775cc0191004868bfd7e0a3f56fc399e75d0a Mon Sep 17 00:00:00 2001
From: James <james.russo@heygen.com>
Date: Wed, 22 Apr 2026 21:34:57 +0000
Subject: [PATCH 2/2] ci(regression): add explicit least-privilege permissions

Addresses CodeQL warning 'Workflow does not contain permissions'.
Defaults the workflow GITHUB_TOKEN to `contents: read` only. The
build-image job elevates to `actions: write` because
`docker/build-push-action` with `cache-from/to: type=gha` uses the
GitHub Actions cache API, which needs read+write on the actions scope.
---
 .github/workflows/regression.yml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml
index 9acee6e7..c8dd04eb 100644
--- a/.github/workflows/regression.yml
+++ b/.github/workflows/regression.yml
@@ -11,6 +11,12 @@ concurrency:
   group: regression-${{ github.ref }}
   cancel-in-progress: true
 
+# Least-privilege token: only reading code. Jobs that need more (e.g. GHA
+# cache reads/writes from docker/build-push-action with `type=gha`) elevate
+# their own permissions inline.
+permissions:
+  contents: read
+
 jobs:
   changes:
     name: Detect changes
@@ -42,6 +48,9 @@ jobs:
     if: needs.changes.outputs.code == 'true'
     runs-on: ubuntu-latest
     timeout-minutes: 20
+    permissions:
+      contents: read
+      actions: write # docker/build-push-action `type=gha` cache reads + writes
     steps:
       - name: Checkout
         uses: actions/checkout@v4