From a81882f2fd78f8daf8fd5ad24baffef4bd4b21e5 Mon Sep 17 00:00:00 2001
From: Oseltamivir <bryansg2013@gmail.com>
Date: Sun, 7 Dec 2025 23:39:22 -0800
Subject: [PATCH 01/19] Initial commit, for #304

---
 .github/workflows/pr-comment-sweep.yml |  71 ++++++++++++
 .github/workflows/sweep-executor.yml   | 151 +++++++++++++++++++++++++
 2 files changed, 222 insertions(+)
 create mode 100644 .github/workflows/pr-comment-sweep.yml
 create mode 100644 .github/workflows/sweep-executor.yml

diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml
new file mode 100644
index 000000000..be54869a6
--- /dev/null
+++ b/.github/workflows/pr-comment-sweep.yml
@@ -0,0 +1,71 @@
+name: PR Comment Sweep
+
+on:
+  issue_comment:
+    types: [created]
+
+concurrency:
+  group: "PR-SWEEP-${{ github.event.issue.number }}"
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  parse:
+    # Run only for PR comments, from trusted authors, starting with /sweep
+    if: >-
+      ${{ github.event.issue.pull_request &&
+          startsWith(github.event.comment.body, '/sweep') &&
+          contains('OWNER,MEMBER,COLLABORATOR', github.event.comment.author_association) }}
+    runs-on: ubuntu-latest
+    outputs:
+      pr-number: ${{ steps.parse.outputs.pr-number }}
+      generator-args: ${{ steps.parse.outputs.generator-args }}
+    steps:
+      - name: Derive PR number and parse command
+        id: parse
+        shell: bash
+        env:
+          BODY: ${{ github.event.comment.body }}
+          PR_NUMBER: ${{ github.event.issue.number }}
+        run: |
+          set -euo pipefail
+
+          # Extract first line starting with /sweep
+          cmd_line=$(printf "%s" "$BODY" | awk '/^\/sweep/{print; exit}')
+          if [[ -z "$cmd_line" ]]; then
+            echo "No /sweep command found in comment" >&2
+            exit 1
+          fi
+          cmd_args=${cmd_line#* /sweep}
+          # Handle case when it's exactly '/sweep' (no args)
+          if [[ "$cmd_line" == "/sweep" ]]; then
+            cmd_args=""
+          else
+            cmd_args=${cmd_line#/sweep}
+          fi
+          cmd_args=$(echo "$cmd_args" | xargs || true)
+
+          echo "Command args: $cmd_args"
+
+          echo "generator-args=$cmd_args" >> "$GITHUB_OUTPUT"
+          echo "pr-number=$PR_NUMBER" >> "$GITHUB_OUTPUT"
+
+  execute:
+    needs: parse
+    uses: ./.github/workflows/sweep-executor.yml
+    secrets: inherit
+    with:
+      pr-number: ${{ needs.parse.outputs.pr-number }}
+      generator-args: ${{ needs.parse.outputs.generator-args }}
+
+  note-ignored:
+    # Inform when comment doesn't meet criteria (non-PR or not authorized)
+    if: ${{ !github.event.issue.pull_request ||
+        !startsWith(github.event.comment.body, '/sweep') ||
+        !contains('OWNER,MEMBER,COLLABORATOR', github.event.comment.author_association) }}
+    runs-on: ubuntu-latest
+    steps:
+      - run: |
+          echo "Comment ignored. Either not on a PR, not a /sweep command, or author not authorized (OWNER/MEMBER/COLLABORATOR required)."
diff --git a/.github/workflows/sweep-executor.yml b/.github/workflows/sweep-executor.yml
new file mode 100644
index 000000000..65bec188e
--- /dev/null
+++ b/.github/workflows/sweep-executor.yml
@@ -0,0 +1,151 @@
+name: Template - Sweep Executor
+
+on:
+  workflow_call:
+    inputs:
+      pr-number:
+        required: true
+        type: string
+      generator-args:
+        required: true
+        type: string
+
+permissions:
+  contents: read
+
+jobs:
+  generate:
+    runs-on: ubuntu-latest
+    outputs:
+      search-space-config: ${{ steps.generate.outputs.search-space-config }}
+      is-multinode: ${{ steps.detect.outputs.is-multinode }}
+    steps:
+      - name: Checkout PR head
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
+        with:
+          ref: refs/pull/${{ inputs.pr-number }}/head
+
+      - name: Detect node type from args
+        id: detect
+        run: |
+          set -euo pipefail
+          ARGS=${{ inputs.generator-args }}
+          if [[ "$ARGS" == *"--multi-node"* ]]; then
+            echo "is-multinode=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "is-multinode=false" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Generate sweep configs
+        id: generate
+        shell: python
+        env:
+          GITHUB_WORKSPACE: ${{ github.workspace }}
+        run: |
+          import json, os, shlex, subprocess, sys
+
+          cmd_args = r'''${{ inputs.generator-args }}'''
+          script = os.path.join(os.environ['GITHUB_WORKSPACE'], 'utils', 'matrix_logic', 'generate_sweep_configs.py')
+          cfg_amd = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'amd-master.yaml')
+          cfg_nv = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'nvidia-master.yaml')
+          runners = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'runners.yaml')
+
+          subprocess.run([sys.executable, '-m', 'pip', 'install', 'pydantic'], check=True)
+
+          argv = [sys.executable, script]
+          if cmd_args.strip():
+              argv += shlex.split(cmd_args)
+          argv += ['--config-files', cfg_amd, cfg_nv, '--runner-config', runners]
+
+          print('Invoking:', ' '.join(shlex.quote(a) for a in argv))
+          res = subprocess.run(argv, capture_output=True, text=True)
+          if res.returncode != 0:
+              print('Generator failed. stdout:\n', res.stdout)
+              print('stderr:\n', res.stderr, file=sys.stderr)
+              raise SystemExit(res.returncode)
+
+          try:
+              data = json.loads(res.stdout)
+          except Exception as e:
+              print('Failed to parse generator output as JSON:', e, file=sys.stderr)
+              print('Raw output:\n', res.stdout)
+              raise
+
+          print(f"Generated {len(data)} configs")
+          with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
+              f.write('search-space-config=' + json.dumps(data) + '\n')
+
+  run-single-node:
+    needs: generate
+    if: ${{ needs.generate.result == 'success' && needs.generate.outputs.search-space-config != '[]' && needs.generate.outputs.is-multinode == 'false' }}
+    uses: ./.github/workflows/benchmark-tmpl.yml
+    name: Sweep (Single-Node)
+    strategy:
+      fail-fast: false
+      matrix:
+        config: ${{ fromJson(needs.generate.outputs.search-space-config) }}
+    secrets: inherit
+    with:
+      exp-name: ${{ matrix.config.exp-name }}
+      isl: ${{ matrix.config.isl }}
+      osl: ${{ matrix.config.osl }}
+      max-model-len: ${{ matrix.config.max-model-len }}
+      runner: ${{ matrix.config.runner }}
+      image: ${{ matrix.config.image }}
+      model: ${{ matrix.config.model }}
+      model-prefix: ${{ matrix.config.model-prefix }}
+      framework: ${{ matrix.config.framework }}
+      precision: ${{ matrix.config.precision }}
+      tp: ${{ matrix.config.tp }}
+      ep: ${{ matrix.config.ep }}
+      dp-attn: ${{ matrix.config.dp-attn }}
+      conc: ${{ matrix.config.conc }}
+      spec-decoding: ${{ matrix.config.spec-decoding }}
+      disagg: ${{ matrix.config.disagg }}
+
+  run-multi-node:
+    needs: generate
+    if: ${{ needs.generate.result == 'success' && needs.generate.outputs.search-space-config != '[]' && needs.generate.outputs.is-multinode == 'true' }}
+    uses: ./.github/workflows/benchmark-multinode-tmpl.yml
+    name: Sweep (Multi-Node)
+    strategy:
+      fail-fast: false
+      matrix:
+        config: ${{ fromJson(needs.generate.outputs.search-space-config) }}
+    secrets: inherit
+    with:
+      exp-name: ${{ matrix.config.exp-name }}
+      isl: ${{ matrix.config.isl }}
+      osl: ${{ matrix.config.osl }}
+      max-model-len: ${{ matrix.config.max-model-len }}
+      runner: ${{ matrix.config.runner }}
+      image: ${{ matrix.config.image }}
+      model: ${{ matrix.config.model }}
+      model-prefix: ${{ matrix.config.model-prefix }}
+      framework: ${{ matrix.config.framework }}
+      precision: ${{ matrix.config.precision }}
+      conc-list: ${{ toJson(matrix.config.conc) }}
+      spec-decoding: ${{ matrix.config.spec-decoding }}
+      disagg: ${{ matrix.config.disagg }}
+
+      prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
+      prefill-tp: ${{ matrix.config.prefill.tp }}
+      prefill-ep: ${{ matrix.config.prefill.ep }}
+      prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
+      prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}
+
+      decode-num-worker: ${{ matrix.config.decode.num-worker }}
+      decode-tp: ${{ matrix.config.decode.tp }}
+      decode-ep: ${{ matrix.config.decode.ep }}
+      decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
+      decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
+
+  collect-results:
+    needs: [run-single-node, run-multi-node]
+    if: ${{ always() && (needs.run-single-node.result == 'success' || needs.run-multi-node.result == 'success') }}
+    uses: ./.github/workflows/collect-results.yml
+    name: Collect Results
+    secrets: inherit
+    with:
+      exp-name: ''
+

From d27616a505ba95b9b2b17fa900cad8e707afc999 Mon Sep 17 00:00:00 2001
From: Oseltamivir <bryansg2013@gmail.com>
Date: Sun, 7 Dec 2025 23:45:51 -0800
Subject: [PATCH 02/19] Allow testing on own PR

---
 .github/workflows/sweep-executor.yml | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/sweep-executor.yml b/.github/workflows/sweep-executor.yml
index 65bec188e..fc63c754a 100644
--- a/.github/workflows/sweep-executor.yml
+++ b/.github/workflows/sweep-executor.yml
@@ -9,6 +9,16 @@ on:
       generator-args:
         required: true
         type: string
+  workflow_dispatch:
+    inputs:
+      pr-number:
+        description: PR number to checkout (refs/pull/<num>/head)
+        required: true
+        type: string
+      generator-args:
+        description: Args passed to generate_sweep_configs.py (omit /sweep)
+        required: true
+        type: string
 
 permissions:
   contents: read
@@ -148,4 +158,3 @@ jobs:
     secrets: inherit
     with:
       exp-name: ''
-

From dce0b32c2fe02ac7cf93dc5aa9fd2126cec152e9 Mon Sep 17 00:00:00 2001
From: Oseltamivir <bryansg2013@gmail.com>
Date: Mon, 8 Dec 2025 07:06:57 -0800
Subject: [PATCH 03/19] condense workflow

---
 .github/workflows/pr-comment-sweep.yml | 222 ++++++++++++++++++++++---
 .github/workflows/sweep-executor.yml   | 160 ------------------
 2 files changed, 196 insertions(+), 186 deletions(-)
 delete mode 100644 .github/workflows/sweep-executor.yml

diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml
index be54869a6..f8ec2354c 100644
--- a/.github/workflows/pr-comment-sweep.yml
+++ b/.github/workflows/pr-comment-sweep.yml
@@ -1,45 +1,57 @@
-name: PR Comment Sweep
+name: Sweep
 
 on:
+  # PR comment trigger
   issue_comment:
     types: [created]
+  # Manual trigger
+  workflow_dispatch:
+    inputs:
+      pr-number:
+        description: PR number to checkout (refs/pull/<num>/head)
+        required: false
+        type: string
+      generator-args:
+        description: Args passed to generate_sweep_configs.py (omit /sweep)
+        required: false
+        type: string
+  # Push-based example/testing
+  push:
+    branches-ignore:
+      - main
+      - master
 
 concurrency:
-  group: "PR-SWEEP-${{ github.event.issue.number }}"
+  group: ${{ github.event.issue.number && format('PR-SWEEP-{0}', github.event.issue.number) || format('REF-SWEEP-{0}', github.ref_name) }}
   cancel-in-progress: true
 
 permissions:
   contents: read
 
 jobs:
-  parse:
-    # Run only for PR comments, from trusted authors, starting with /sweep
-    if: >-
-      ${{ github.event.issue.pull_request &&
-          startsWith(github.event.comment.body, '/sweep') &&
-          contains('OWNER,MEMBER,COLLABORATOR', github.event.comment.author_association) }}
+  prepare:
     runs-on: ubuntu-latest
     outputs:
-      pr-number: ${{ steps.parse.outputs.pr-number }}
-      generator-args: ${{ steps.parse.outputs.generator-args }}
+      pr-number: ${{ steps.parse.outputs.pr-number || steps.resolve.outputs.pr-number }}
+      generator-args: ${{ steps.parse.outputs.generator-args || steps.resolve.outputs.generator-args }}
     steps:
-      - name: Derive PR number and parse command
+      - name: Parse PR comment (/sweep ...)
         id: parse
+        if: ${{ github.event_name == 'issue_comment' && github.event.issue.pull_request && startsWith(github.event.comment.body, '/sweep') && contains(fromJson('["OWNER","MEMBER","COLLABORATOR"]'), github.event.comment.author_association) }}
         shell: bash
         env:
           BODY: ${{ github.event.comment.body }}
           PR_NUMBER: ${{ github.event.issue.number }}
         run: |
           set -euo pipefail
-
-          # Extract first line starting with /sweep
-          cmd_line=$(printf "%s" "$BODY" | awk '/^\/sweep/{print; exit}')
+          # Allow optional leading whitespace before /sweep
+          cmd_line=$(printf "%s" "$BODY" | awk '/^[[:space:]]*\/sweep/{print; exit}')
           if [[ -z "$cmd_line" ]]; then
             echo "No /sweep command found in comment" >&2
             exit 1
           fi
-          cmd_args=${cmd_line#* /sweep}
-          # Handle case when it's exactly '/sweep' (no args)
+          # Trim leading spaces then strip the /sweep prefix
+          cmd_line=$(echo "$cmd_line" | sed 's/^[[:space:]]*//')
           if [[ "$cmd_line" == "/sweep" ]]; then
             cmd_args=""
           else
@@ -47,24 +59,182 @@ jobs:
           fi
           cmd_args=$(echo "$cmd_args" | xargs || true)
 
-          echo "Command args: $cmd_args"
-
           echo "generator-args=$cmd_args" >> "$GITHUB_OUTPUT"
           echo "pr-number=$PR_NUMBER" >> "$GITHUB_OUTPUT"
 
-  execute:
-    needs: parse
-    uses: ./.github/workflows/sweep-executor.yml
+      - name: Find PR for this branch (if any)
+        id: find
+        if: ${{ github.event_name != 'issue_comment' }}
+        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+        with:
+          script: |
+            const owner = context.repo.owner;
+            const repo = context.repo.repo;
+            const branch = context.ref.replace('refs/heads/', '');
+            const res = await github.rest.pulls.list({ owner, repo, state: 'open', head: `${owner}:${branch}` });
+            const num = res.data[0]?.number ? String(res.data[0].number) : '';
+            core.setOutput('pr-number', num);
+
+      - name: Prepare inputs (push/dispatch)
+        id: resolve
+        if: ${{ github.event_name != 'issue_comment' }}
+        shell: bash
+        env:
+          DISPATCH_PR: ${{ github.event.inputs.pr-number }}
+          DISPATCH_ARGS: ${{ github.event.inputs.generator-args }}
+        run: |
+          set -euo pipefail
+          pr_from_branch='${{ steps.find.outputs.pr-number }}'
+          pr_number="${DISPATCH_PR:-}"; if [[ -z "$pr_number" ]]; then pr_number="$pr_from_branch"; fi
+          gen_args="${DISPATCH_ARGS:-}"
+          if [[ -z "$gen_args" ]]; then
+            gen_args='full-sweep --single-node --runner-type h200 --model-prefix dsr1 --seq-lens 1k1k --max-conc 4'
+          fi
+          echo "Resolved PR: $pr_number";
+          echo "Using generator args: $gen_args";
+          echo "pr-number=$pr_number" >> "$GITHUB_OUTPUT"
+          echo "generator-args=$gen_args" >> "$GITHUB_OUTPUT"
+
+  generate:
+    needs: prepare
+    if: ${{ needs.prepare.outputs.pr-number != '' && needs.prepare.outputs.generator-args != '' }}
+    runs-on: ubuntu-latest
+    outputs:
+      search-space-config: ${{ steps.generate.outputs.search-space-config }}
+      is-multinode: ${{ steps.detect.outputs.is-multinode }}
+    steps:
+      - name: Checkout PR head
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
+        with:
+          ref: refs/pull/${{ needs.prepare.outputs.pr-number }}/head
+
+      - name: Detect node type from args
+        id: detect
+        run: |
+          set -euo pipefail
+          ARGS='${{ needs.prepare.outputs.generator-args }}'
+          if [[ "$ARGS" == *"--multi-node"* ]]; then
+            echo "is-multinode=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "is-multinode=false" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Generate sweep configs
+        id: generate
+        shell: python
+        env:
+          GITHUB_WORKSPACE: ${{ github.workspace }}
+        run: |
+          import json, os, shlex, subprocess, sys
+
+          cmd_args = r'''${{ needs.prepare.outputs.generator-args }}'''
+          script = os.path.join(os.environ['GITHUB_WORKSPACE'], 'utils', 'matrix_logic', 'generate_sweep_configs.py')
+          cfg_amd = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'amd-master.yaml')
+          cfg_nv = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'nvidia-master.yaml')
+          runners = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'runners.yaml')
+
+          subprocess.run([sys.executable, '-m', 'pip', 'install', 'pydantic'], check=True)
+
+          argv = [sys.executable, script]
+          if cmd_args.strip():
+              argv += shlex.split(cmd_args)
+          argv += ['--config-files', cfg_amd, cfg_nv, '--runner-config', runners]
+
+          print('Invoking:', ' '.join(shlex.quote(a) for a in argv))
+          res = subprocess.run(argv, capture_output=True, text=True)
+          if res.returncode != 0:
+              print('Generator failed. stdout:\n', res.stdout)
+              print('stderr:\n', res.stderr, file=sys.stderr)
+              raise SystemExit(res.returncode)
+
+          try:
+              data = json.loads(res.stdout)
+          except Exception as e:
+              print('Failed to parse generator output as JSON:', e, file=sys.stderr)
+              print('Raw output:\n', res.stdout)
+              raise
+
+          print(f"Generated {len(data)} configs")
+          with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
+              f.write('search-space-config=' + json.dumps(data) + '\n')
+
+  run-single-node:
+    needs: generate
+    if: ${{ needs.generate.result == 'success' && needs.generate.outputs.search-space-config != '[]' && needs.generate.outputs.is-multinode == 'false' }}
+    uses: ./.github/workflows/benchmark-tmpl.yml
+    name: Sweep (Single-Node)
+    strategy:
+      fail-fast: false
+      matrix:
+        config: ${{ fromJson(needs.generate.outputs.search-space-config) }}
+    secrets: inherit
+    with:
+      exp-name: ${{ matrix.config.exp-name }}
+      isl: ${{ matrix.config.isl }}
+      osl: ${{ matrix.config.osl }}
+      max-model-len: ${{ matrix.config.max-model-len }}
+      runner: ${{ matrix.config.runner }}
+      image: ${{ matrix.config.image }}
+      model: ${{ matrix.config.model }}
+      model-prefix: ${{ matrix.config.model-prefix }}
+      framework: ${{ matrix.config.framework }}
+      precision: ${{ matrix.config.precision }}
+      tp: ${{ matrix.config.tp }}
+      ep: ${{ matrix.config.ep }}
+      dp-attn: ${{ matrix.config.dp-attn }}
+      conc: ${{ matrix.config.conc }}
+      spec-decoding: ${{ matrix.config.spec-decoding }}
+      disagg: ${{ matrix.config.disagg }}
+
+  run-multi-node:
+    needs: generate
+    if: ${{ needs.generate.result == 'success' && needs.generate.outputs.search-space-config != '[]' && needs.generate.outputs.is-multinode == 'true' }}
+    uses: ./.github/workflows/benchmark-multinode-tmpl.yml
+    name: Sweep (Multi-Node)
+    strategy:
+      fail-fast: false
+      matrix:
+        config: ${{ fromJson(needs.generate.outputs.search-space-config) }}
+    secrets: inherit
+    with:
+      exp-name: ${{ matrix.config.exp-name }}
+      isl: ${{ matrix.config.isl }}
+      osl: ${{ matrix.config.osl }}
+      max-model-len: ${{ matrix.config.max-model-len }}
+      runner: ${{ matrix.config.runner }}
+      image: ${{ matrix.config.image }}
+      model: ${{ matrix.config.model }}
+      model-prefix: ${{ matrix.config.model-prefix }}
+      framework: ${{ matrix.config.framework }}
+      precision: ${{ matrix.config.precision }}
+      conc-list: ${{ toJson(matrix.config.conc) }}
+      spec-decoding: ${{ matrix.config.spec-decoding }}
+      disagg: ${{ matrix.config.disagg }}
+
+      prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
+      prefill-tp: ${{ matrix.config.prefill.tp }}
+      prefill-ep: ${{ matrix.config.prefill.ep }}
+      prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
+      prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}
+
+      decode-num-worker: ${{ matrix.config.decode.num-worker }}
+      decode-tp: ${{ matrix.config.decode.tp }}
+      decode-ep: ${{ matrix.config.decode.ep }}
+      decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
+      decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
+
+  collect-results:
+    needs: [run-single-node, run-multi-node]
+    if: ${{ always() && (needs.run-single-node.result == 'success' || needs.run-multi-node.result == 'success') }}
+    uses: ./.github/workflows/collect-results.yml
+    name: Collect Results
     secrets: inherit
     with:
-      pr-number: ${{ needs.parse.outputs.pr-number }}
-      generator-args: ${{ needs.parse.outputs.generator-args }}
+      exp-name: ''
 
   note-ignored:
     # Inform when comment doesn't meet criteria (non-PR or not authorized)
-    if: ${{ !github.event.issue.pull_request ||
-        !startsWith(github.event.comment.body, '/sweep') ||
-        !contains('OWNER,MEMBER,COLLABORATOR', github.event.comment.author_association) }}
+    if: ${{ github.event_name == 'issue_comment' && (!github.event.issue.pull_request || !startsWith(github.event.comment.body, '/sweep') || !contains(fromJson('["OWNER","MEMBER","COLLABORATOR"]'), github.event.comment.author_association)) }}
     runs-on: ubuntu-latest
     steps:
       - run: |
diff --git a/.github/workflows/sweep-executor.yml b/.github/workflows/sweep-executor.yml
deleted file mode 100644
index fc63c754a..000000000
--- a/.github/workflows/sweep-executor.yml
+++ /dev/null
@@ -1,160 +0,0 @@
-name: Template - Sweep Executor
-
-on:
-  workflow_call:
-    inputs:
-      pr-number:
-        required: true
-        type: string
-      generator-args:
-        required: true
-        type: string
-  workflow_dispatch:
-    inputs:
-      pr-number:
-        description: PR number to checkout (refs/pull/<num>/head)
-        required: true
-        type: string
-      generator-args:
-        description: Args passed to generate_sweep_configs.py (omit /sweep)
-        required: true
-        type: string
-
-permissions:
-  contents: read
-
-jobs:
-  generate:
-    runs-on: ubuntu-latest
-    outputs:
-      search-space-config: ${{ steps.generate.outputs.search-space-config }}
-      is-multinode: ${{ steps.detect.outputs.is-multinode }}
-    steps:
-      - name: Checkout PR head
-        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
-        with:
-          ref: refs/pull/${{ inputs.pr-number }}/head
-
-      - name: Detect node type from args
-        id: detect
-        run: |
-          set -euo pipefail
-          ARGS=${{ inputs.generator-args }}
-          if [[ "$ARGS" == *"--multi-node"* ]]; then
-            echo "is-multinode=true" >> "$GITHUB_OUTPUT"
-          else
-            echo "is-multinode=false" >> "$GITHUB_OUTPUT"
-          fi
-
-      - name: Generate sweep configs
-        id: generate
-        shell: python
-        env:
-          GITHUB_WORKSPACE: ${{ github.workspace }}
-        run: |
-          import json, os, shlex, subprocess, sys
-
-          cmd_args = r'''${{ inputs.generator-args }}'''
-          script = os.path.join(os.environ['GITHUB_WORKSPACE'], 'utils', 'matrix_logic', 'generate_sweep_configs.py')
-          cfg_amd = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'amd-master.yaml')
-          cfg_nv = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'nvidia-master.yaml')
-          runners = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'runners.yaml')
-
-          subprocess.run([sys.executable, '-m', 'pip', 'install', 'pydantic'], check=True)
-
-          argv = [sys.executable, script]
-          if cmd_args.strip():
-              argv += shlex.split(cmd_args)
-          argv += ['--config-files', cfg_amd, cfg_nv, '--runner-config', runners]
-
-          print('Invoking:', ' '.join(shlex.quote(a) for a in argv))
-          res = subprocess.run(argv, capture_output=True, text=True)
-          if res.returncode != 0:
-              print('Generator failed. stdout:\n', res.stdout)
-              print('stderr:\n', res.stderr, file=sys.stderr)
-              raise SystemExit(res.returncode)
-
-          try:
-              data = json.loads(res.stdout)
-          except Exception as e:
-              print('Failed to parse generator output as JSON:', e, file=sys.stderr)
-              print('Raw output:\n', res.stdout)
-              raise
-
-          print(f"Generated {len(data)} configs")
-          with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
-              f.write('search-space-config=' + json.dumps(data) + '\n')
-
-  run-single-node:
-    needs: generate
-    if: ${{ needs.generate.result == 'success' && needs.generate.outputs.search-space-config != '[]' && needs.generate.outputs.is-multinode == 'false' }}
-    uses: ./.github/workflows/benchmark-tmpl.yml
-    name: Sweep (Single-Node)
-    strategy:
-      fail-fast: false
-      matrix:
-        config: ${{ fromJson(needs.generate.outputs.search-space-config) }}
-    secrets: inherit
-    with:
-      exp-name: ${{ matrix.config.exp-name }}
-      isl: ${{ matrix.config.isl }}
-      osl: ${{ matrix.config.osl }}
-      max-model-len: ${{ matrix.config.max-model-len }}
-      runner: ${{ matrix.config.runner }}
-      image: ${{ matrix.config.image }}
-      model: ${{ matrix.config.model }}
-      model-prefix: ${{ matrix.config.model-prefix }}
-      framework: ${{ matrix.config.framework }}
-      precision: ${{ matrix.config.precision }}
-      tp: ${{ matrix.config.tp }}
-      ep: ${{ matrix.config.ep }}
-      dp-attn: ${{ matrix.config.dp-attn }}
-      conc: ${{ matrix.config.conc }}
-      spec-decoding: ${{ matrix.config.spec-decoding }}
-      disagg: ${{ matrix.config.disagg }}
-
-  run-multi-node:
-    needs: generate
-    if: ${{ needs.generate.result == 'success' && needs.generate.outputs.search-space-config != '[]' && needs.generate.outputs.is-multinode == 'true' }}
-    uses: ./.github/workflows/benchmark-multinode-tmpl.yml
-    name: Sweep (Multi-Node)
-    strategy:
-      fail-fast: false
-      matrix:
-        config: ${{ fromJson(needs.generate.outputs.search-space-config) }}
-    secrets: inherit
-    with:
-      exp-name: ${{ matrix.config.exp-name }}
-      isl: ${{ matrix.config.isl }}
-      osl: ${{ matrix.config.osl }}
-      max-model-len: ${{ matrix.config.max-model-len }}
-      runner: ${{ matrix.config.runner }}
-      image: ${{ matrix.config.image }}
-      model: ${{ matrix.config.model }}
-      model-prefix: ${{ matrix.config.model-prefix }}
-      framework: ${{ matrix.config.framework }}
-      precision: ${{ matrix.config.precision }}
-      conc-list: ${{ toJson(matrix.config.conc) }}
-      spec-decoding: ${{ matrix.config.spec-decoding }}
-      disagg: ${{ matrix.config.disagg }}
-
-      prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
-      prefill-tp: ${{ matrix.config.prefill.tp }}
-      prefill-ep: ${{ matrix.config.prefill.ep }}
-      prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
-      prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}
-
-      decode-num-worker: ${{ matrix.config.decode.num-worker }}
-      decode-tp: ${{ matrix.config.decode.tp }}
-      decode-ep: ${{ matrix.config.decode.ep }}
-      decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
-      decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
-
-  collect-results:
-    needs: [run-single-node, run-multi-node]
-    if: ${{ always() && (needs.run-single-node.result == 'success' || needs.run-multi-node.result == 'success') }}
-    uses: ./.github/workflows/collect-results.yml
-    name: Collect Results
-    secrets: inherit
-    with:
-      exp-name: ''

From 503ed3b3baeb1f1e62cae6fe822a35dc9ea64ebd Mon Sep 17 00:00:00 2001
From: Oseltamivir <bryansg2013@gmail.com>
Date: Mon, 8 Dec 2025 07:27:22 -0800
Subject: [PATCH 04/19] Rename Workflow

---
 .github/workflows/pr-comment-sweep.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml
index f8ec2354c..e83e6761d 100644
--- a/.github/workflows/pr-comment-sweep.yml
+++ b/.github/workflows/pr-comment-sweep.yml
@@ -1,4 +1,4 @@
-name: Sweep
+name: Slash Command Sweep
 
 on:
   # PR comment trigger

From f1feee44cd1f92a0b733a09ecd6bb954ca60792c Mon Sep 17 00:00:00 2001
From: Oseltamivir <bryansg2013@gmail.com>
Date: Wed, 10 Dec 2025 06:40:07 -0800
Subject: [PATCH 05/19] Use environments

---
 .github/workflows/e2e-tests.yml        |  32 +++++-
 .github/workflows/pr-comment-sweep.yml | 153 +++----------------------
 2 files changed, 48 insertions(+), 137 deletions(-)

diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml
index cef324e91..3633ef4ec 100644
--- a/.github/workflows/e2e-tests.yml
+++ b/.github/workflows/e2e-tests.yml
@@ -1,5 +1,5 @@
 name: End-to-End Tests
-run-name: e2e Test - ${{ inputs.test-name || github.event.inputs.generate-cli-command }}
+run-name: e2e Test - ${{ inputs.test-name || inputs.generate-cli-command || github.event.inputs.generate-cli-command }}
 
 on:
     workflow_dispatch:
@@ -12,21 +12,47 @@ on:
                 description: "Name for this test run"
                 required: false
                 type: string
+            ref:
+                description: "Ref (branch/sha) to checkout for generating configs"
+                required: false
+                type: string
+    workflow_call:
+        inputs:
+            generate-cli-command:
+                description: "Command passed to generate matrix script"
+                required: true
+                type: string
+            test-name:
+                description: "Name for this test run"
+                required: false
+                type: string
+            ref:
+                description: "Ref (branch/sha) to checkout for generating configs"
+                required: false
+                type: string
 
 jobs:
     get-jobs:
         runs-on: ubuntu-latest
         outputs:
             search-space-config: ${{ steps.get-jobs.outputs.search-space-config }}
+        environment: bryan-test
         steps:
-            - name: Checkout code
+            - name: Checkout code (ref)
+              if: ${{ inputs.ref && inputs.ref != '' }}
+              uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
+              with:
+                ref: ${{ inputs.ref }}
+
+            - name: Checkout code (default)
+              if: ${{ !inputs.ref || inputs.ref == '' }}
               uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
 
             - id: get-jobs
               run: |
                   pip install pydantic
                   CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix_logic/generate_sweep_configs.py \
-                    ${{ inputs.generate-cli-command }} \
+                    ${{ inputs.generate-cli-command || github.event.inputs.generate-cli-command }} \
                     --runner-config .github/configs/runners.yaml \
                     --config-files .github/configs/nvidia-master.yaml .github/configs/amd-master.yaml)
                   echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml
index e83e6761d..c2595c154 100644
--- a/.github/workflows/pr-comment-sweep.yml
+++ b/.github/workflows/pr-comment-sweep.yml
@@ -37,7 +37,7 @@ jobs:
     steps:
       - name: Parse PR comment (/sweep ...)
         id: parse
-        if: ${{ github.event_name == 'issue_comment' && github.event.issue.pull_request && startsWith(github.event.comment.body, '/sweep') && contains(fromJson('["OWNER","MEMBER","COLLABORATOR"]'), github.event.comment.author_association) }}
+        if: ${{ github.event_name == 'issue_comment' && github.event.issue.pull_request && startsWith(github.event.comment.body, '/sweep') }}
         shell: bash
         env:
           BODY: ${{ github.event.comment.body }}
@@ -95,147 +95,32 @@ jobs:
           echo "pr-number=$pr_number" >> "$GITHUB_OUTPUT"
           echo "generator-args=$gen_args" >> "$GITHUB_OUTPUT"
 
-  generate:
+  call-e2e-pr:
     needs: prepare
-    if: ${{ needs.prepare.outputs.pr-number != '' && needs.prepare.outputs.generator-args != '' }}
-    runs-on: ubuntu-latest
-    outputs:
-      search-space-config: ${{ steps.generate.outputs.search-space-config }}
-      is-multinode: ${{ steps.detect.outputs.is-multinode }}
-    steps:
-      - name: Checkout PR head
-        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
-        with:
-          ref: refs/pull/${{ needs.prepare.outputs.pr-number }}/head
-
-      - name: Detect node type from args
-        id: detect
-        run: |
-          set -euo pipefail
-          ARGS='${{ needs.prepare.outputs.generator-args }}'
-          if [[ "$ARGS" == *"--multi-node"* ]]; then
-            echo "is-multinode=true" >> "$GITHUB_OUTPUT"
-          else
-            echo "is-multinode=false" >> "$GITHUB_OUTPUT"
-          fi
-
-      - name: Generate sweep configs
-        id: generate
-        shell: python
-        env:
-          GITHUB_WORKSPACE: ${{ github.workspace }}
-        run: |
-          import json, os, shlex, subprocess, sys
-
-          cmd_args = r'''${{ needs.prepare.outputs.generator-args }}'''
-          script = os.path.join(os.environ['GITHUB_WORKSPACE'], 'utils', 'matrix_logic', 'generate_sweep_configs.py')
-          cfg_amd = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'amd-master.yaml')
-          cfg_nv = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'nvidia-master.yaml')
-          runners = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'runners.yaml')
-
-          subprocess.run([sys.executable, '-m', 'pip', 'install', 'pydantic'], check=True)
-
-          argv = [sys.executable, script]
-          if cmd_args.strip():
-              argv += shlex.split(cmd_args)
-          argv += ['--config-files', cfg_amd, cfg_nv, '--runner-config', runners]
-
-          print('Invoking:', ' '.join(shlex.quote(a) for a in argv))
-          res = subprocess.run(argv, capture_output=True, text=True)
-          if res.returncode != 0:
-              print('Generator failed. stdout:\n', res.stdout)
-              print('stderr:\n', res.stderr, file=sys.stderr)
-              raise SystemExit(res.returncode)
-
-          try:
-              data = json.loads(res.stdout)
-          except Exception as e:
-              print('Failed to parse generator output as JSON:', e, file=sys.stderr)
-              print('Raw output:\n', res.stdout)
-              raise
-
-          print(f"Generated {len(data)} configs")
-          with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
-              f.write('search-space-config=' + json.dumps(data) + '\n')
-
-  run-single-node:
-    needs: generate
-    if: ${{ needs.generate.result == 'success' && needs.generate.outputs.search-space-config != '[]' && needs.generate.outputs.is-multinode == 'false' }}
-    uses: ./.github/workflows/benchmark-tmpl.yml
-    name: Sweep (Single-Node)
-    strategy:
-      fail-fast: false
-      matrix:
-        config: ${{ fromJson(needs.generate.outputs.search-space-config) }}
+    if: ${{ github.event_name == 'issue_comment' && needs.prepare.outputs.pr-number != '' && needs.prepare.outputs.generator-args != '' }}
+    uses: ./.github/workflows/e2e-tests.yml
+    name: Run E2E (PR comment)
     secrets: inherit
     with:
-      exp-name: ${{ matrix.config.exp-name }}
-      isl: ${{ matrix.config.isl }}
-      osl: ${{ matrix.config.osl }}
-      max-model-len: ${{ matrix.config.max-model-len }}
-      runner: ${{ matrix.config.runner }}
-      image: ${{ matrix.config.image }}
-      model: ${{ matrix.config.model }}
-      model-prefix: ${{ matrix.config.model-prefix }}
-      framework: ${{ matrix.config.framework }}
-      precision: ${{ matrix.config.precision }}
-      tp: ${{ matrix.config.tp }}
-      ep: ${{ matrix.config.ep }}
-      dp-attn: ${{ matrix.config.dp-attn }}
-      conc: ${{ matrix.config.conc }}
-      spec-decoding: ${{ matrix.config.spec-decoding }}
-      disagg: ${{ matrix.config.disagg }}
+      generate-cli-command: ${{ needs.prepare.outputs.generator-args }}
+      test-name: PR #${{ needs.prepare.outputs.pr-number }} sweep
+      ref: refs/pull/${{ needs.prepare.outputs.pr-number }}/head
 
-  run-multi-node:
-    needs: generate
-    if: ${{ needs.generate.result == 'success' && needs.generate.outputs.search-space-config != '[]' && needs.generate.outputs.is-multinode == 'true' }}
-    uses: ./.github/workflows/benchmark-multinode-tmpl.yml
-    name: Sweep (Multi-Node)
-    strategy:
-      fail-fast: false
-      matrix:
-        config: ${{ fromJson(needs.generate.outputs.search-space-config) }}
-    secrets: inherit
-    with:
-      exp-name: ${{ matrix.config.exp-name }}
-      isl: ${{ matrix.config.isl }}
-      osl: ${{ matrix.config.osl }}
-      max-model-len: ${{ matrix.config.max-model-len }}
-      runner: ${{ matrix.config.runner }}
-      image: ${{ matrix.config.image }}
-      model: ${{ matrix.config.model }}
-      model-prefix: ${{ matrix.config.model-prefix }}
-      framework: ${{ matrix.config.framework }}
-      precision: ${{ matrix.config.precision }}
-      conc-list: ${{ toJson(matrix.config.conc) }}
-      spec-decoding: ${{ matrix.config.spec-decoding }}
-      disagg: ${{ matrix.config.disagg }}
-
-      prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
-      prefill-tp: ${{ matrix.config.prefill.tp }}
-      prefill-ep: ${{ matrix.config.prefill.ep }}
-      prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
-      prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}
-
-      decode-num-worker: ${{ matrix.config.decode.num-worker }}
-      decode-tp: ${{ matrix.config.decode.tp }}
-      decode-ep: ${{ matrix.config.decode.ep }}
-      decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
-      decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
-
-  collect-results:
-    needs: [run-single-node, run-multi-node]
-    if: ${{ always() && (needs.run-single-node.result == 'success' || needs.run-multi-node.result == 'success') }}
-    uses: ./.github/workflows/collect-results.yml
-    name: Collect Results
+  call-e2e-nonpr:
+    needs: prepare
+    if: ${{ github.event_name != 'issue_comment' && needs.prepare.outputs.generator-args != '' }}
+    uses: ./.github/workflows/e2e-tests.yml
+    name: Run E2E (manual/push)
     secrets: inherit
     with:
-      exp-name: ''
+      generate-cli-command: ${{ needs.prepare.outputs.generator-args }}
+      test-name: Manual/Push sweep
+      ref: ${{ needs.prepare.outputs.pr-number && format('refs/pull/{0}/head', needs.prepare.outputs.pr-number) || '' }}
 
   note-ignored:
-    # Inform when comment doesn't meet criteria (non-PR or not authorized)
-    if: ${{ github.event_name == 'issue_comment' && (!github.event.issue.pull_request || !startsWith(github.event.comment.body, '/sweep') || !contains(fromJson('["OWNER","MEMBER","COLLABORATOR"]'), github.event.comment.author_association)) }}
+    # Inform when comment doesn't meet criteria (non-PR or not a /sweep)
+    if: ${{ github.event_name == 'issue_comment' && (!github.event.issue.pull_request || !startsWith(github.event.comment.body, '/sweep')) }}
     runs-on: ubuntu-latest
     steps:
       - run: |
-          echo "Comment ignored. Either not on a PR, not a /sweep command, or author not authorized (OWNER/MEMBER/COLLABORATOR required)."
+          echo "Comment ignored. Either not on a PR or not a /sweep command. For PR comments, runs require environment approval."

From 9a5fe6e49b3918e2511795a24c723c20c5610c47 Mon Sep 17 00:00:00 2001
From: Oseltamivir <bryansg2013@gmail.com>
Date: Wed, 10 Dec 2025 08:35:07 -0800
Subject: [PATCH 06/19] Changed environment location

---
 .github/workflows/e2e-tests.yml        |  1 -
 .github/workflows/pr-comment-sweep.yml | 42 +++++++++++++++++---------
 2 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml
index 3633ef4ec..f807e8a97 100644
--- a/.github/workflows/e2e-tests.yml
+++ b/.github/workflows/e2e-tests.yml
@@ -36,7 +36,6 @@ jobs:
         runs-on: ubuntu-latest
         outputs:
             search-space-config: ${{ steps.get-jobs.outputs.search-space-config }}
-        environment: bryan-test
         steps:
             - name: Checkout code (ref)
               if: ${{ inputs.ref && inputs.ref != '' }}
diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml
index c2595c154..2397cf33c 100644
--- a/.github/workflows/pr-comment-sweep.yml
+++ b/.github/workflows/pr-comment-sweep.yml
@@ -1,4 +1,5 @@
 name: Slash Command Sweep
+run-name: "Validate PR #${{ github.event.issue.number }}"
 
 on:
   # PR comment trigger
@@ -22,14 +23,16 @@ on:
       - master
 
 concurrency:
-  group: ${{ github.event.issue.number && format('PR-SWEEP-{0}', github.event.issue.number) || format('REF-SWEEP-{0}', github.ref_name) }}
+  group: "PR#${{ github.event.issue.number || github.ref_name }}"
   cancel-in-progress: true
 
 permissions:
   contents: read
 
 jobs:
-  prepare:
+  get-jobs:
+    # Skip for PR comments that are not /sweep; run for all other triggers
+    if: ${{ github.event_name != 'issue_comment' || (github.event.issue.pull_request && startsWith(github.event.comment.body, '/sweep')) }}
     runs-on: ubuntu-latest
     outputs:
       pr-number: ${{ steps.parse.outputs.pr-number || steps.resolve.outputs.pr-number }}
@@ -95,27 +98,36 @@ jobs:
           echo "pr-number=$pr_number" >> "$GITHUB_OUTPUT"
           echo "generator-args=$gen_args" >> "$GITHUB_OUTPUT"
 
-  call-e2e-pr:
-    needs: prepare
-    if: ${{ github.event_name == 'issue_comment' && needs.prepare.outputs.pr-number != '' && needs.prepare.outputs.generator-args != '' }}
+  approval:
+    needs: get-jobs
+    if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' }}
+    runs-on: ubuntu-latest
+    name: approval
+    environment: bryan-test
+    steps:
+      - run: echo "approved"
+
+  validate:
+    needs: [get-jobs, approval]
+    if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' }}
     uses: ./.github/workflows/e2e-tests.yml
-    name: Run E2E (PR comment)
+    name: validate
     secrets: inherit
     with:
-      generate-cli-command: ${{ needs.prepare.outputs.generator-args }}
-      test-name: PR #${{ needs.prepare.outputs.pr-number }} sweep
-      ref: refs/pull/${{ needs.prepare.outputs.pr-number }}/head
+      generate-cli-command: ${{ needs.get-jobs.outputs.generator-args }}
+      test-name: PR #${{ needs.get-jobs.outputs.pr-number }} sweep
+      ref: refs/pull/${{ needs.get-jobs.outputs.pr-number }}/head
 
-  call-e2e-nonpr:
-    needs: prepare
-    if: ${{ github.event_name != 'issue_comment' && needs.prepare.outputs.generator-args != '' }}
+  validate-nonpr:
+    needs: get-jobs
+    if: ${{ github.event_name != 'issue_comment' && needs.get-jobs.outputs.generator-args != '' }}
     uses: ./.github/workflows/e2e-tests.yml
-    name: Run E2E (manual/push)
+    name: validate (manual/push)
     secrets: inherit
     with:
-      generate-cli-command: ${{ needs.prepare.outputs.generator-args }}
+      generate-cli-command: ${{ needs.get-jobs.outputs.generator-args }}
       test-name: Manual/Push sweep
-      ref: ${{ needs.prepare.outputs.pr-number && format('refs/pull/{0}/head', needs.prepare.outputs.pr-number) || '' }}
+      ref: ${{ needs.get-jobs.outputs.pr-number && format('refs/pull/{0}/head', needs.get-jobs.outputs.pr-number) || '' }}
 
   note-ignored:
     # Inform when comment doesn't meet criteria (non-PR or not a /sweep)

From 4524540ffcc18328160b3106d9dad388053566ea Mon Sep 17 00:00:00 2001
From: Oseltamivir <bryansg2013@gmail.com>
Date: Wed, 10 Dec 2025 08:46:30 -0800
Subject: [PATCH 07/19] Stricter activation

---
 .github/workflows/pr-comment-sweep.yml | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml
index 2397cf33c..fafd588aa 100644
--- a/.github/workflows/pr-comment-sweep.yml
+++ b/.github/workflows/pr-comment-sweep.yml
@@ -47,14 +47,12 @@ jobs:
           PR_NUMBER: ${{ github.event.issue.number }}
         run: |
           set -euo pipefail
-          # Allow optional leading whitespace before /sweep
-          cmd_line=$(printf "%s" "$BODY" | awk '/^[[:space:]]*\/sweep/{print; exit}')
+          # Require /sweep at the start of the line
+          cmd_line=$(printf "%s" "$BODY" | awk '/^\/sweep/{print; exit}')
           if [[ -z "$cmd_line" ]]; then
-            echo "No /sweep command found in comment" >&2
+            echo "No /sweep command found at comment start" >&2
             exit 1
           fi
-          # Trim leading spaces then strip the /sweep prefix
-          cmd_line=$(echo "$cmd_line" | sed 's/^[[:space:]]*//')
           if [[ "$cmd_line" == "/sweep" ]]; then
             cmd_args=""
           else
@@ -100,7 +98,7 @@ jobs:
 
   approval:
     needs: get-jobs
-    if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' }}
+    if: ${{ needs.get-jobs.outputs.generator-args != '' }}
     runs-on: ubuntu-latest
     name: approval
     environment: bryan-test
@@ -109,7 +107,7 @@ jobs:
 
   validate:
     needs: [get-jobs, approval]
-    if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' }}
+    if: ${{ needs.get-jobs.outputs.generator-args != '' }}
     uses: ./.github/workflows/e2e-tests.yml
     name: validate
     secrets: inherit

From 91e6d6c9cd3e8a23e7ac36b96216d3e22c353328 Mon Sep 17 00:00:00 2001
From: Oseltamivir <bryansg2013@gmail.com>
Date: Sun, 14 Dec 2025 18:27:28 -0800
Subject: [PATCH 08/19] Test replies

---
 .github/workflows/pr-comment-sweep.yml | 34 ++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml
index fafd588aa..89c9eed96 100644
--- a/.github/workflows/pr-comment-sweep.yml
+++ b/.github/workflows/pr-comment-sweep.yml
@@ -28,6 +28,7 @@ concurrency:
 
 permissions:
   contents: read
+  issues: write
 
 jobs:
   get-jobs:
@@ -63,6 +64,39 @@ jobs:
           echo "generator-args=$cmd_args" >> "$GITHUB_OUTPUT"
           echo "pr-number=$PR_NUMBER" >> "$GITHUB_OUTPUT"
 
+      - name: Reply with run link
+        if: ${{ github.event_name == 'issue_comment' && startsWith(github.event.comment.body, '/sweep') }}
+        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+        env:
+          RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+          AUTHOR: ${{ github.event.comment.user.login }}
+        with:
+          script: |
+            const owner = context.repo.owner;
+            const repo = context.repo.repo;
+            const issue_number = context.issue.number;
+            const runUrl = process.env.RUN_URL;
+            const author = process.env.AUTHOR;
+            const body = `@${author} thanks! Kicking off a sweep.\n\nRun: ${runUrl}\nApproval: required in environment 'bryan-test'.`;
+            await github.rest.issues.createComment({ owner, repo, issue_number, body });
+
+      - name: Reply with run link (manual trigger)
+        if: ${{ github.event_name == 'workflow_dispatch' && steps.resolve.outputs.pr-number != '' }}
+        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+        env:
+          RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+          AUTHOR: ${{ github.actor }}
+          ISSUE_NUMBER: ${{ steps.resolve.outputs.pr-number }}
+        with:
+          script: |
+            const owner = context.repo.owner;
+            const repo = context.repo.repo;
+            const issue_number = process.env.ISSUE_NUMBER;
+            const runUrl = process.env.RUN_URL;
+            const author = process.env.AUTHOR;
+            const body = `@${author} triggered a manual sweep.\n\nRun: ${runUrl}\n(Manual run on branch ${context.ref})`;
+            await github.rest.issues.createComment({ owner, repo, issue_number, body });
+
       - name: Find PR for this branch (if any)
         id: find
         if: ${{ github.event_name != 'issue_comment' }}

From ac35382434387b932d7858064e4fac3cfd90d859 Mon Sep 17 00:00:00 2001
From: Oseltamivir <bryansg2013@gmail.com>
Date: Mon, 15 Dec 2025 07:27:47 -0800
Subject: [PATCH 09/19] Test replies

---
 .github/workflows/pr-comment-sweep.yml | 55 +++++++++++++++++---------
 1 file changed, 36 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml
index 89c9eed96..f71bbd303 100644
--- a/.github/workflows/pr-comment-sweep.yml
+++ b/.github/workflows/pr-comment-sweep.yml
@@ -80,23 +80,6 @@ jobs:
             const body = `@${author} thanks! Kicking off a sweep.\n\nRun: ${runUrl}\nApproval: required in environment 'bryan-test'.`;
             await github.rest.issues.createComment({ owner, repo, issue_number, body });
 
-      - name: Reply with run link (manual trigger)
-        if: ${{ github.event_name == 'workflow_dispatch' && steps.resolve.outputs.pr-number != '' }}
-        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
-        env:
-          RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-          AUTHOR: ${{ github.actor }}
-          ISSUE_NUMBER: ${{ steps.resolve.outputs.pr-number }}
-        with:
-          script: |
-            const owner = context.repo.owner;
-            const repo = context.repo.repo;
-            const issue_number = process.env.ISSUE_NUMBER;
-            const runUrl = process.env.RUN_URL;
-            const author = process.env.AUTHOR;
-            const body = `@${author} triggered a manual sweep.\n\nRun: ${runUrl}\n(Manual run on branch ${context.ref})`;
-            await github.rest.issues.createComment({ owner, repo, issue_number, body });
-
       - name: Find PR for this branch (if any)
         id: find
         if: ${{ github.event_name != 'issue_comment' }}
@@ -130,9 +113,43 @@ jobs:
           echo "pr-number=$pr_number" >> "$GITHUB_OUTPUT"
           echo "generator-args=$gen_args" >> "$GITHUB_OUTPUT"
 
+      - name: Reply with run link (manual trigger)
+        if: ${{ github.event_name == 'workflow_dispatch' && steps.resolve.outputs.pr-number != '' }}
+        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+        env:
+          RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+          AUTHOR: ${{ github.actor }}
+          ISSUE_NUMBER: ${{ steps.resolve.outputs.pr-number }}
+        with:
+          script: |
+            const owner = context.repo.owner;
+            const repo = context.repo.repo;
+            const issue_number = process.env.ISSUE_NUMBER;
+            const runUrl = process.env.RUN_URL;
+            const author = process.env.AUTHOR;
+            const body = `@${author} triggered a manual sweep.\n\nRun: ${runUrl}\n(Manual run on branch ${context.ref})`;
+            await github.rest.issues.createComment({ owner, repo, issue_number, body });
+
+      - name: Reply with run link (push trigger)
+        if: ${{ github.event_name == 'push' && steps.resolve.outputs.pr-number != '' }}
+        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+        env:
+          RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+          AUTHOR: ${{ github.actor }}
+          ISSUE_NUMBER: ${{ steps.resolve.outputs.pr-number }}
+        with:
+          script: |
+            const owner = context.repo.owner;
+            const repo = context.repo.repo;
+            const issue_number = process.env.ISSUE_NUMBER;
+            const runUrl = process.env.RUN_URL;
+            const author = process.env.AUTHOR;
+            const body = `@${author} pushed changes and triggered a sweep.\n\nRun: ${runUrl}\n(Push on ${context.ref})`;
+            await github.rest.issues.createComment({ owner, repo, issue_number, body });
+
   approval:
     needs: get-jobs
-    if: ${{ needs.get-jobs.outputs.generator-args != '' }}
+    if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' }}
     runs-on: ubuntu-latest
     name: approval
     environment: bryan-test
@@ -141,7 +158,7 @@ jobs:
 
   validate:
     needs: [get-jobs, approval]
-    if: ${{ needs.get-jobs.outputs.generator-args != '' }}
+    if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' }}
     uses: ./.github/workflows/e2e-tests.yml
     name: validate
     secrets: inherit

From c09922a79155f6729d72b82a2273a42474adfa85 Mon Sep 17 00:00:00 2001
From: Oseltamivir <bryansg2013@gmail.com>
Date: Mon, 15 Dec 2025 07:38:10 -0800
Subject: [PATCH 10/19] Use token for comment perm

---
 .github/workflows/pr-comment-sweep.yml | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml
index f71bbd303..35f21c334 100644
--- a/.github/workflows/pr-comment-sweep.yml
+++ b/.github/workflows/pr-comment-sweep.yml
@@ -29,6 +29,7 @@ concurrency:
 permissions:
   contents: read
   issues: write
+  pull-requests: write
 
 jobs:
   get-jobs:
@@ -65,12 +66,14 @@ jobs:
           echo "pr-number=$PR_NUMBER" >> "$GITHUB_OUTPUT"
 
       - name: Reply with run link
-        if: ${{ github.event_name == 'issue_comment' && startsWith(github.event.comment.body, '/sweep') }}
+        if: ${{ github.event_name == 'issue_comment' && startsWith(github.event.comment.body, '/sweep') && github.repository_owner == 'InferenceMAX' }}
         uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+        continue-on-error: true
         env:
           RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
           AUTHOR: ${{ github.event.comment.user.login }}
         with:
+          github-token: ${{ github.token }}
           script: |
             const owner = context.repo.owner;
             const repo = context.repo.repo;
@@ -114,13 +117,15 @@ jobs:
           echo "generator-args=$gen_args" >> "$GITHUB_OUTPUT"
 
       - name: Reply with run link (manual trigger)
-        if: ${{ github.event_name == 'workflow_dispatch' && steps.resolve.outputs.pr-number != '' }}
+        if: ${{ github.event_name == 'workflow_dispatch' && steps.resolve.outputs.pr-number != '' && github.repository_owner == 'InferenceMAX' }}
         uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+        continue-on-error: true
         env:
           RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
           AUTHOR: ${{ github.actor }}
           ISSUE_NUMBER: ${{ steps.resolve.outputs.pr-number }}
         with:
+          github-token: ${{ github.token }}
           script: |
             const owner = context.repo.owner;
             const repo = context.repo.repo;
@@ -131,13 +136,15 @@ jobs:
             await github.rest.issues.createComment({ owner, repo, issue_number, body });
 
       - name: Reply with run link (push trigger)
-        if: ${{ github.event_name == 'push' && steps.resolve.outputs.pr-number != '' }}
+        if: ${{ github.event_name == 'push' && steps.resolve.outputs.pr-number != '' && github.repository_owner == 'InferenceMAX' }}
         uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+        continue-on-error: true
         env:
           RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
           AUTHOR: ${{ github.actor }}
           ISSUE_NUMBER: ${{ steps.resolve.outputs.pr-number }}
         with:
+          github-token: ${{ github.token }}
           script: |
             const owner = context.repo.owner;
             const repo = context.repo.repo;

From 4f9eb0cc505570cec641097ed67e669c36ce267d Mon Sep 17 00:00:00 2001
From: Oseltamivir <bryansg2013@gmail.com>
Date: Mon, 15 Dec 2025 07:44:33 -0800
Subject: [PATCH 11/19] Forgot validation

---
 .github/workflows/pr-comment-sweep.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml
index 35f21c334..9e1501d99 100644
--- a/.github/workflows/pr-comment-sweep.yml
+++ b/.github/workflows/pr-comment-sweep.yml
@@ -156,7 +156,7 @@ jobs:
 
   approval:
     needs: get-jobs
-    if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' }}
+    if: ${{ (github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '') || (github.event_name == 'push' && needs.get-jobs.outputs.generator-args != '') }}
     runs-on: ubuntu-latest
     name: approval
     environment: bryan-test
@@ -165,7 +165,7 @@ jobs:
 
   validate:
     needs: [get-jobs, approval]
-    if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' }}
+    if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' && needs.approval.result == 'success' }}
     uses: ./.github/workflows/e2e-tests.yml
     name: validate
     secrets: inherit
@@ -175,8 +175,8 @@ jobs:
       ref: refs/pull/${{ needs.get-jobs.outputs.pr-number }}/head
 
   validate-nonpr:
-    needs: get-jobs
-    if: ${{ github.event_name != 'issue_comment' && needs.get-jobs.outputs.generator-args != '' }}
+    needs: [get-jobs, approval]
+    if: ${{ needs.get-jobs.outputs.generator-args != '' && ((github.event_name == 'push' && needs.approval.result == 'success') || github.event_name == 'workflow_dispatch') }}
     uses: ./.github/workflows/e2e-tests.yml
     name: validate (manual/push)
     secrets: inherit

From b6fa1012d585e53e540280c43fe2a661bd193cb7 Mon Sep 17 00:00:00 2001
From: Cameron Quilici <cjquilici@gmail.com>
Date: Mon, 15 Dec 2025 09:18:56 -0600
Subject: [PATCH 12/19] feat: performance changelog triggered runs (as opposed
 to nightly) (#267) [skip-sweep]

* add logic for event driven runs

new single workflow that runs on merge to main, new perg-changelog.yaml to track performance changes, new logic to parse changelog, removed cron job in full sweep schedulers

* testing pt 1

* raise error if yaml diff in perf changelog is not valid

* remove unused imports in process_changelog.py

* config data key fix

* raise error if test-config subprocess fails to run

* backfill changelog

* backfill changelog pt 2

* backfill changelog pt 3

* backfill changelog pt 4

* backfill changelog pt 5

* backfill changelog pt 6

* add always() condition to upload changelog metadata

* backfill changelog pt 7 (test)

* backfill changelog pt 8 (revert test)

* backfill changelog pt 9

* backfill changelog pt 11

* change if condition for jobs in run sweep workflow

* debugging run sweep workflow

* debugging run sweep workflow pt 2

* debugging run sweep workflow pt 3 (revert)

* debugging run sweep workflow pt 4

* debugging run sweep workflow pt 5

* debugging run sweep workflow pt 6

* debugging run sweep workflow pt 7

* add always() condition to upload changelog metadata (add back, this got removed)

* add bmk prefix to results

* backfill changelog official

* for concurrency group, use more unique sha
---
 .../workflows/benchmark-multinode-tmpl.yml    |   2 +-
 .github/workflows/benchmark-tmpl.yml          |   2 +-
 .github/workflows/collect-results.yml         |  10 +-
 .../workflows/full-sweep-1k1k-scheduler.yml   |   2 -
 .../workflows/full-sweep-1k8k-scheduler.yml   |   2 -
 .../workflows/full-sweep-8k1k-scheduler.yml   |   2 -
 .github/workflows/run-sweep.yml               | 235 ++++++++++++++++++
 perf-changelog.yaml                           |  83 +++++++
 utils/constants.py                            |   4 +
 utils/matrix_logic/generate_sweep_configs.py  | 190 +++++++++++---
 .../test_generate_sweep_configs.py            |  86 -------
 utils/matrix_logic/test_validation.py         | 129 ++++++++++
 utils/matrix_logic/validation.py              | 116 +++++++++
 utils/process_changelog.py                    | 142 +++++++++++
 14 files changed, 867 insertions(+), 138 deletions(-)
 create mode 100644 .github/workflows/run-sweep.yml
 create mode 100644 perf-changelog.yaml
 create mode 100644 utils/constants.py
 create mode 100644 utils/process_changelog.py

diff --git a/.github/workflows/benchmark-multinode-tmpl.yml b/.github/workflows/benchmark-multinode-tmpl.yml
index 2b828bda8..6c42cbdaa 100644
--- a/.github/workflows/benchmark-multinode-tmpl.yml
+++ b/.github/workflows/benchmark-multinode-tmpl.yml
@@ -170,5 +170,5 @@ jobs:
       - name: Upload results
         uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
         with:
-          name: ${{ env.RESULT_FILENAME }}
+          name: bmk_${{ env.RESULT_FILENAME }}
           path: agg_${{ env.RESULT_FILENAME }}_*.json
diff --git a/.github/workflows/benchmark-tmpl.yml b/.github/workflows/benchmark-tmpl.yml
index 60c19b441..a48081fac 100644
--- a/.github/workflows/benchmark-tmpl.yml
+++ b/.github/workflows/benchmark-tmpl.yml
@@ -169,5 +169,5 @@ jobs:
       - name: Upload result
         uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
         with:
-          name: ${{ env.RESULT_FILENAME }}
+          name: bmk_${{ env.RESULT_FILENAME }}
           path: agg_${{ env.RESULT_FILENAME }}.json
\ No newline at end of file
diff --git a/.github/workflows/collect-results.yml b/.github/workflows/collect-results.yml
index 8105c6d53..d0b0bd992 100644
--- a/.github/workflows/collect-results.yml
+++ b/.github/workflows/collect-results.yml
@@ -3,7 +3,7 @@ name: Template - Collect Results
 on:
   workflow_call:
     inputs:
-      exp-name:
+      result-prefix:
         required: false
         type: string
         default: ''
@@ -26,7 +26,7 @@ jobs:
         uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
         with:
           path: results/
-          pattern: ${{ inputs.exp-name && format('{0}_*', inputs.exp-name) || '*' }}
+          pattern: ${{ inputs.result-prefix && format('{0}_*', inputs.result-prefix) || '*' }}
 
       - name: Print summary
         run: |
@@ -34,10 +34,10 @@ jobs:
           python3 utils/summarize.py results/ >> $GITHUB_STEP_SUMMARY
 
       - name: Aggregate results
-        run: python3 utils/collect_results.py results/ ${{ inputs.exp-name || 'all' }}
+        run: python3 utils/collect_results.py results/ ${{ inputs.result-prefix || 'all' }}
 
       - name: Upload aggregated results
         uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
         with:
-          name: results_${{ inputs.exp-name || 'all' }}
-          path: agg_${{ inputs.exp-name || 'all' }}.json
+          name: results_${{ inputs.result-prefix || 'all' }}
+          path: agg_${{ inputs.result-prefix || 'all' }}.json
diff --git a/.github/workflows/full-sweep-1k1k-scheduler.yml b/.github/workflows/full-sweep-1k1k-scheduler.yml
index 8b32f47c0..3c592cf0a 100644
--- a/.github/workflows/full-sweep-1k1k-scheduler.yml
+++ b/.github/workflows/full-sweep-1k1k-scheduler.yml
@@ -2,8 +2,6 @@ name: "Full Sweep Scheduler - 1k1k"
 
 on:
     workflow_dispatch:
-    schedule:
-        - cron: "0 0 * * *"
 
 jobs:
     get-dsr1-configs:
diff --git a/.github/workflows/full-sweep-1k8k-scheduler.yml b/.github/workflows/full-sweep-1k8k-scheduler.yml
index 393864fdf..be909aad5 100644
--- a/.github/workflows/full-sweep-1k8k-scheduler.yml
+++ b/.github/workflows/full-sweep-1k8k-scheduler.yml
@@ -2,8 +2,6 @@ name: "Full Sweep Scheduler - 1k8k"
 
 on:
     workflow_dispatch:
-    schedule:
-        - cron: "0 0 * * *"
 
 jobs:
     get-dsr1-configs:
diff --git a/.github/workflows/full-sweep-8k1k-scheduler.yml b/.github/workflows/full-sweep-8k1k-scheduler.yml
index 629e56bd9..3eabe74f4 100644
--- a/.github/workflows/full-sweep-8k1k-scheduler.yml
+++ b/.github/workflows/full-sweep-8k1k-scheduler.yml
@@ -2,8 +2,6 @@ name: "Full Sweep Scheduler - 8k1k"
 
 on:
     workflow_dispatch:
-    schedule:
-        - cron: "0 0 * * *"
 
 jobs:
     get-dsr1-configs:
diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
new file mode 100644
index 000000000..cb3c4dde5
--- /dev/null
+++ b/.github/workflows/run-sweep.yml
@@ -0,0 +1,235 @@
+name: "Run Sweep"
+run-name: Run Sweep - ${{ github.event.pull_request.title || github.ref_name }}
+
+concurrency:
+    group: sweep-${{ github.event.pull_request.number || github.sha }}
+    cancel-in-progress: true
+
+on:
+    push:
+        branches:
+            - main
+        paths:
+            - "perf-changelog.yaml"
+    pull_request:
+        branches:
+            - main
+        types:
+            - ready_for_review
+            - synchronize
+            - labeled
+        paths:
+            - "perf-changelog.yaml"
+
+jobs:
+    setup:
+        runs-on: ubuntu-latest
+        if: >-
+            (github.event_name == 'pull_request' && !github.event.pull_request.draft && contains(github.event.pull_request.labels.*.name, 'sweep-enabled')) ||
+            (github.event_name != 'pull_request' && !contains(github.event.head_commit.message, '[skip-sweep]'))
+        outputs:
+            search-space-config: ${{ steps.setup.outputs.search-space-config }}
+        steps:
+            - name: Checkout code
+              uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
+              with:
+                  fetch-depth: 0
+
+            - id: setup
+              run: |
+                  pip install pydantic
+
+                  if [ "${{ github.event_name }}" == "pull_request" ]; then
+                      BASE_REF="origin/${{ github.base_ref }}"
+                      HEAD_REF="${{ github.event.pull_request.head.sha }}"
+                  else
+                      BASE_REF="${{ github.event.before }}"
+                      HEAD_REF="${{ github.event.after }}"
+                  fi
+
+                  CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/process_changelog.py \
+                      --changelog-file ${GITHUB_WORKSPACE}/perf-changelog.yaml \
+                      --base-ref "$BASE_REF" \
+                      --head-ref "$HEAD_REF")
+
+                  echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
+
+    sweep-multi-node-1k1k:
+        needs: setup
+        if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k']) != 'null' }}
+        uses: ./.github/workflows/benchmark-multinode-tmpl.yml
+        name: multi-node 1k1k /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }}
+        secrets: inherit
+        with: &multi-node-inputs
+            isl: ${{ matrix.config.isl }}
+            osl: ${{ matrix.config.osl }}
+            max-model-len: ${{ matrix.config.max-model-len }}
+            runner: ${{ matrix.config.runner }}
+            image: ${{ matrix.config.image }}
+            model: ${{ matrix.config.model }}
+            model-prefix: ${{ matrix.config.model-prefix }}
+            framework: ${{ matrix.config.framework }}
+            precision: ${{ matrix.config.precision }}
+            exp-name: ${{ matrix.config.exp-name }}
+            conc-list: ${{ toJson(matrix.config.conc) }}
+            spec-decoding: ${{ matrix.config.spec-decoding }}
+            disagg: ${{ matrix.config.disagg }}
+
+            prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
+            prefill-tp: ${{ matrix.config.prefill.tp }}
+            prefill-ep: ${{ matrix.config.prefill.ep }}
+            prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
+            prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}
+
+            decode-num-worker: ${{ matrix.config.decode.num-worker }}
+            decode-tp: ${{ matrix.config.decode.tp }}
+            decode-ep: ${{ matrix.config.decode.ep }}
+            decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
+            decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
+
+    sweep-multi-node-1k8k:
+        needs: setup
+        if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k8k']) != 'null' }}
+        uses: ./.github/workflows/benchmark-multinode-tmpl.yml
+        name: multi-node 1k8k /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k8k'] }}
+        secrets: inherit
+        with: *multi-node-inputs
+
+    sweep-multi-node-8k1k:
+        needs: setup
+        if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k']) != 'null' }}
+        uses: ./.github/workflows/benchmark-multinode-tmpl.yml
+        name: multi-node 8k1k /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k'] }}
+        secrets: inherit
+        with: *multi-node-inputs
+
+    sweep-single-node-1k1k:
+        needs: setup
+        if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['1k1k']) != 'null' }}
+        uses: ./.github/workflows/benchmark-tmpl.yml
+        name: single-node 1k1k /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }}
+        secrets: inherit
+        with: &single-node-inputs
+            exp-name: ${{ matrix.config.exp-name }}
+            isl: ${{ matrix.config.isl }}
+            osl: ${{ matrix.config.osl }}
+            max-model-len: ${{ matrix.config.max-model-len }}
+            runner: ${{ matrix.config.runner }}
+            image: ${{ matrix.config.image }}
+            model: ${{ matrix.config.model }}
+            model-prefix: ${{ matrix.config.model-prefix }}
+            framework: ${{ matrix.config.framework }}
+            precision: ${{ matrix.config.precision }}
+            tp: ${{ matrix.config.tp }}
+            ep: ${{ matrix.config.ep }}
+            dp-attn: ${{ matrix.config.dp-attn }}
+            conc: ${{ matrix.config.conc }}
+            spec-decoding: ${{ matrix.config.spec-decoding }}
+            disagg: ${{ matrix.config.disagg }}
+
+    sweep-single-node-1k8k:
+        needs: setup
+        if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['1k8k']) != 'null' }}
+        uses: ./.github/workflows/benchmark-tmpl.yml
+        name: single-node 1k8k /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k8k'] }}
+        secrets: inherit
+        with: *single-node-inputs
+
+    sweep-single-node-8k1k:
+        needs: setup
+        if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['8k1k']) != 'null' }}
+        uses: ./.github/workflows/benchmark-tmpl.yml
+        name: single-node 8k1k /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['8k1k'] }}
+        secrets: inherit
+        with: *single-node-inputs
+
+    collect-results:
+        needs:
+            [
+                sweep-single-node-1k1k,
+                sweep-single-node-1k8k,
+                sweep-single-node-8k1k,
+                sweep-multi-node-1k1k,
+                sweep-multi-node-1k8k,
+                sweep-multi-node-8k1k,
+                setup,
+            ]
+        if: ${{ always() && needs.setup.result != 'skipped' }}
+        uses: ./.github/workflows/collect-results.yml
+        secrets: inherit
+        with:
+            result-prefix: "bmk"
+
+    upload-changelog-metadata:
+        needs: [setup, collect-results]
+        if: ${{ always() && needs.setup.result != 'skipped' }}
+        runs-on: ubuntu-latest
+        steps:
+            - name: Extract and save changelog metadata
+              env:
+                  CONFIG_JSON: ${{ needs.setup.outputs.search-space-config }}
+              run: |
+                  echo "$CONFIG_JSON" | jq '.changelog_metadata' > changelog_metadata.json
+
+            - name: Upload changelog artifact
+              uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
+              with:
+                  name: changelog-metadata
+                  path: changelog_metadata.json
+
+    calc-success-rate:
+        needs: collect-results
+        if: ${{ always() && needs.collect-results.result != 'skipped'}}
+        runs-on: ubuntu-latest
+
+        env:
+            RESULTS_DIR: "results/"
+            STATS_FILENAME: "run_stats"
+            GITHUB_TOKEN: ${{ secrets.REPO_PAT }}
+
+        steps:
+            - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
+              with:
+                  token: ${{ secrets.REPO_PAT }}
+                  fetch-depth: 0
+
+            - name: Download results artifacts
+              uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
+              with:
+                  path: ${{ env.RESULTS_DIR }}
+                  pattern: results_*
+
+            - name: Install python dependencies
+              run: pip install PyGithub
+
+            - name: Calculate success rate
+              run: python3 utils/calc_success_rate.py $STATS_FILENAME
+
+            - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
+              with:
+                  name: "run-stats"
+                  path: ${{ env.STATS_FILENAME }}.json
diff --git a/perf-changelog.yaml b/perf-changelog.yaml
new file mode 100644
index 000000000..a74285c53
--- /dev/null
+++ b/perf-changelog.yaml
@@ -0,0 +1,83 @@
+- config-keys:
+    - 70b-fp8-*-vllm
+  description: |
+    - Add compilation-config: '{"custom_ops": ["-rms_norm", "-quant_fp8", "-silu_and_mul"]}' as
+      extra config to all benchmarks/70b_fp8_mi*.sh scripts
+    - 6-7% uplift for llama for 6/8 configs
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/95
+- config-keys:
+    - gptoss-fp4-*-trt
+  description: |
+    - Upgrade GPT-OSS TRT images from 'release:1.1.0rc2.post2' to '1.2.0rc0.post1'
+    - Add NCCL_GRAPH_REGISTER=0 to benchmarks/gptoss_fp4_b200_trt_slurm.sh
+    - Change kv_cache_config.dtype from 'auto' to 'fp8' in benchmarks/gptoss_fp4_b200_trt_slurm.sh
+    - Remove MOE_BACKEND=CUTLASS, now just defaults to TRTLLM
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/110
+- config-keys:
+    - gptoss*
+    - dsr1*
+  description: |
+    - Remove Llama 70B runs to make room for multi-node disagg prefill+wideEP on 
+      h100/h200/b200/mi300/mi325/mi355
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/149
+- config-keys:
+    - gptoss-fp4-b200-vllm
+    - gptoss-fp4-h100-vllm
+    - gptoss-fp4-h200-vllm
+  description: |
+    - Upgrade vLLM from 0.10.2 to 0.11.0 for GPT-OSS NVIDIA single-node configs
+    - Adds compilation-config: '{"cudagraph_mode":"PIECEWISE"} accordingly since vLLM 0.11.0
+      requires now defaults to FULL_AND_PIECEWISE
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/159
+- config-keys:
+    - dsr1*
+  description: |
+    - Fixes bug where 1k8k and 8k1k full sweeps had incorrect max-model-len for DeepSeek
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/163
+- config-keys:
+    - dsr1-fp4-b200-sglang
+    - dsr1-fp8-b200-sglang
+    - dsr1-fp8-h200-sglang
+  description: |
+    - Consolidates H200 and B200 SGLang configurations to use unified v0.5.5-cu129-amd64 
+      image tag and updates deprecated SGLang server arguments to their current equivalents.
+    - --enable-flashinfer-trtllm-moe & --enable-ep-moe is no longer available in sglang so we needed to change it
+      - ep: 4 for all tp: 4 entries (3 occurrences in dsr1-fp4-b200-sglang)
+      - ep: 8 for all tp: 8 entries (6 occurrences across dsr1-fp4-b200-sglang and dsr1-fp8-b200-sglang)
+    - dsr1_fp4_b200_docker.sh: Replaced --enable-ep-moe with --ep-size $EP_SIZE and --enable-flashinfer-trtllm-moe with 
+      --moe-runner-backend flashinfer_trtllm
+    - dsr1_fp8_b200_docker.sh: Replaced --enable-flashinfer-trtllm-moe with --moe-runner-backend flashinfer_trtllm and 
+      added --ep-size $EP_SIZE
+    - launch_b200-nvd.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container
+    - launch_b200-tg.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/204
+- config-keys:
+    - gptoss-fp4-mi355x-vllm
+    - gptoss-fp4-b200-vllm
+  description: |
+    - Extend concurrency to 128 for gptoss mi355x/b200 vllm configurations
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/209
+- config-keys:
+    - gptoss-fp4-b200-trt
+  description: |
+    - Extend concurrency to 128 for gptoss b200 TRT configurations
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/233
+- config-keys:
+    - "*gb200-sglang"
+  description: |
+    - Introducing some improvements in GB200 SGLang DSR1 submission
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/257
+- config-keys:
+    - dsr1-fp8-h200-trt
+  description: |
+    - Update TRT image from nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc0.post1 to nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc2
+    - Increase concurrency for some configurations
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/266
+- config-keys:
+    - gptoss-fp4-b200-vllm
+    - gptoss-fp4-h100-vllm
+    - gptoss-fp4-h200-vllm
+  description: |
+    - Update vLLM image for NVIDIA configs from vLLM 0.11.0 to vLLM 0.11.2
+    - Adds kv-cache-dtype: fp8 to benchmarks/gptoss_fp4_b200_docker.sh
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/273
\ No newline at end of file
diff --git a/utils/constants.py b/utils/constants.py
new file mode 100644
index 000000000..a465091da
--- /dev/null
+++ b/utils/constants.py
@@ -0,0 +1,4 @@
+MASTER_CONFIGS = [".github/configs/amd-master.yaml",
+                  ".github/configs/nvidia-master.yaml"]
+RUNNER_CONFIG = ".github/configs/runners.yaml"
+GENERATE_SWEEPS_PY_SCRIPT = "utils/matrix_logic/generate_sweep_configs.py"
\ No newline at end of file
diff --git a/utils/matrix_logic/generate_sweep_configs.py b/utils/matrix_logic/generate_sweep_configs.py
index 8fc47651c..d8fab38cf 100644
--- a/utils/matrix_logic/generate_sweep_configs.py
+++ b/utils/matrix_logic/generate_sweep_configs.py
@@ -1,8 +1,17 @@
 import json
-import yaml
 import argparse
+import sys
+from pathlib import Path
 
-from validation import validate_master_config, validate_matrix_entry, validate_runner_config, Fields
+# Ensure sibling modules are importable regardless of how script is invoked
+sys.path.insert(0, str(Path(__file__).resolve().parent))
+
+from validation import (
+    validate_matrix_entry,
+    load_config_files,
+    load_runner_file,
+    Fields
+)
 
 seq_len_stoi = {
     "1k1k": (1024, 1024),
@@ -366,42 +375,126 @@ def get_lowest_conc(search_space_entry):
     return matrix_values
 
 
-def load_config_files(config_files):
-    """Load and merge configuration files."""
-    all_config_data = {}
-    for config_file in config_files:
-        try:
-            with open(config_file, 'r') as f:
-                config_data = yaml.safe_load(f)
-                assert isinstance(
-                    config_data, dict), f"Config file '{config_file}' must contain a dictionary"
-
-                # Check for duplicate keys, this is only in place to prevent against the very unlikely
-                # case where an entry in one config accidentally/purposefully tries to override an entry in another config
-                duplicate_keys = set(all_config_data.keys()) & set(
-                    config_data.keys())
-                if duplicate_keys:
-                    raise ValueError(
-                        f"Duplicate configuration keys found in '{config_file}': {', '.join(sorted(duplicate_keys))}"
-                    )
-
-                all_config_data.update(config_data)
-        except FileNotFoundError:
-            raise ValueError(f"Input file '{config_file}' does not exist.")
-
-    return all_config_data
-
-
-def load_runner_file(runner_file):
-    """Load runner configuration file."""
-    try:
-        with open(runner_file, 'r') as f:
-            runner_config = yaml.safe_load(f)
-    except FileNotFoundError as e:
+def generate_test_config_sweep(args, all_config_data):
+    """Generate full sweep for specific config keys.
+
+    Validates that all specified config keys exist before generating.
+    Expands all configs fully without any filtering.
+    """
+    # Validate all config keys exist
+    missing_keys = [key for key in args.config_keys if key not in all_config_data]
+    if missing_keys:
+        available_keys = sorted(all_config_data.keys())
         raise ValueError(
-            f"Runner config file '{runner_file}' does not exist.")
+            f"Config key(s) not found: {', '.join(missing_keys)}.\n"
+            f"Available keys: {', '.join(available_keys)}"
+        )
+
+    matrix_values = []
+
+    for key in args.config_keys:
+        val = all_config_data[key]
+        is_multinode = val.get(Fields.MULTINODE.value, False)
+
+        image = val[Fields.IMAGE.value]
+        model = val[Fields.MODEL.value]
+        model_code = val[Fields.MODEL_PREFIX.value]
+        precision = val[Fields.PRECISION.value]
+        framework = val[Fields.FRAMEWORK.value]
+        runner = val[Fields.RUNNER.value]
+        disagg = val.get(Fields.DISAGG.value, False)
 
-    return runner_config
+        for seq_len_config in val[Fields.SEQ_LEN_CONFIGS.value]:
+            isl = seq_len_config[Fields.ISL.value]
+            osl = seq_len_config[Fields.OSL.value]
+            seq_len_str = seq_len_to_str(isl, osl)
+
+            for bmk in seq_len_config[Fields.SEARCH_SPACE.value]:
+                if is_multinode:
+                    # Multinode config
+                    spec_decoding = bmk.get(Fields.SPEC_DECODING.value, "none")
+                    prefill = bmk[Fields.PREFILL.value]
+                    decode = bmk[Fields.DECODE.value]
+
+                    # Get concurrency values
+                    if Fields.CONC_LIST.value in bmk:
+                        conc_values = bmk[Fields.CONC_LIST.value]
+                    else:
+                        conc_start = bmk[Fields.CONC_START.value]
+                        conc_end = bmk[Fields.CONC_END.value]
+                        conc_values = []
+                        conc = conc_start
+                        while conc <= conc_end:
+                            conc_values.append(conc)
+                            if conc == conc_end:
+                                break
+                            conc *= 2
+                            if conc > conc_end:
+                                conc = conc_end
+
+                    entry = {
+                        Fields.IMAGE.value: image,
+                        Fields.MODEL.value: model,
+                        Fields.MODEL_PREFIX.value: model_code,
+                        Fields.PRECISION.value: precision,
+                        Fields.FRAMEWORK.value: framework,
+                        Fields.RUNNER.value: runner,
+                        Fields.ISL.value: isl,
+                        Fields.OSL.value: osl,
+                        Fields.SPEC_DECODING.value: spec_decoding,
+                        Fields.PREFILL.value: prefill,
+                        Fields.DECODE.value: decode,
+                        Fields.CONC.value: conc_values,
+                        Fields.MAX_MODEL_LEN.value: isl + osl + 200,
+                        Fields.EXP_NAME.value: f"{model_code}_{seq_len_str}",
+                        Fields.DISAGG.value: disagg,
+                    }
+                    matrix_values.append(validate_matrix_entry(entry, is_multinode=True))
+                else:
+                    # Single-node config
+                    tp = bmk[Fields.TP.value]
+                    ep = bmk.get(Fields.EP.value)
+                    dp_attn = bmk.get(Fields.DP_ATTN.value)
+                    spec_decoding = bmk.get(Fields.SPEC_DECODING.value, "none")
+
+                    # Get concurrency values
+                    if Fields.CONC_LIST.value in bmk:
+                        conc_values = bmk[Fields.CONC_LIST.value]
+                    else:
+                        conc_start = bmk[Fields.CONC_START.value]
+                        conc_end = bmk[Fields.CONC_END.value]
+                        conc_values = []
+                        conc = conc_start
+                        while conc <= conc_end:
+                            conc_values.append(conc)
+                            if conc == conc_end:
+                                break
+                            conc *= 2
+                            if conc > conc_end:
+                                conc = conc_end
+
+                    for conc in conc_values:
+                        entry = {
+                            Fields.IMAGE.value: image,
+                            Fields.MODEL.value: model,
+                            Fields.MODEL_PREFIX.value: model_code,
+                            Fields.PRECISION.value: precision,
+                            Fields.FRAMEWORK.value: framework,
+                            Fields.RUNNER.value: runner,
+                            Fields.ISL.value: isl,
+                            Fields.OSL.value: osl,
+                            Fields.TP.value: tp,
+                            Fields.CONC.value: conc,
+                            Fields.MAX_MODEL_LEN.value: isl + osl + 200,
+                            Fields.EP.value: ep if ep is not None else 1,
+                            Fields.DP_ATTN.value: dp_attn if dp_attn is not None else False,
+                            Fields.SPEC_DECODING.value: spec_decoding,
+                            Fields.EXP_NAME.value: f"{model_code}_{seq_len_str}",
+                            Fields.DISAGG.value: disagg,
+                        }
+                        matrix_values.append(validate_matrix_entry(entry, is_multinode=False))
+
+    return matrix_values
 
 
 def main():
@@ -545,13 +638,30 @@ def main():
         help='Show this help message and exit'
     )
 
+    # Subcommand: test-config
+    test_config_keys_parser = subparsers.add_parser(
+        'test-config',
+        parents=[parent_parser],
+        add_help=False,
+        help='Generate full sweep for specific config keys. Validates that all specified keys exist before generating.'
+    )
+    test_config_keys_parser.add_argument(
+        '--config-keys',
+        nargs='+',
+        required=True,
+        help='One or more config keys to generate sweep for (e.g., dsr1-fp4-b200-sglang dsr1-fp8-h200-trt)'
+    )
+    test_config_keys_parser.add_argument(
+        '-h', '--help',
+        action='help',
+        help='Show this help message and exit'
+    )
+
     args = parser.parse_args()
 
-    # Load and validate configuration files
+    # Load and validate configuration files (validation happens by default in load functions)
     all_config_data = load_config_files(args.config_files)
     runner_data = load_runner_file(args.runner_config)
-    validate_master_config(all_config_data)
-    validate_runner_config(runner_data)
 
     # Route to appropriate function based on subcommand
     if args.command == 'full-sweep':
@@ -559,6 +669,8 @@ def main():
     elif args.command == 'runner-model-sweep':
         matrix_values = generate_runner_model_sweep_config(
             args, all_config_data, runner_data)
+    elif args.command == 'test-config':
+        matrix_values = generate_test_config_sweep(args, all_config_data)
     else:
         parser.error(f"Unknown command: {args.command}")
 
diff --git a/utils/matrix_logic/test_generate_sweep_configs.py b/utils/matrix_logic/test_generate_sweep_configs.py
index 1381f394e..c505611c3 100644
--- a/utils/matrix_logic/test_generate_sweep_configs.py
+++ b/utils/matrix_logic/test_generate_sweep_configs.py
@@ -7,8 +7,6 @@
     seq_len_to_str,
     generate_full_sweep,
     generate_runner_model_sweep_config,
-    load_config_files,
-    load_runner_file,
 )
 
 
@@ -583,90 +581,6 @@ def test_uses_lowest_conc(self, sample_single_node_config, sample_runner_config,
         assert all(entry["conc"] == 4 for entry in result)
 
 
-# =============================================================================
-# Test load_config_files
-# =============================================================================
-
-class TestLoadConfigFiles:
-    """Tests for load_config_files function."""
-
-    def test_load_single_file(self, tmp_path):
-        """Should load a single config file."""
-        config_file = tmp_path / "config.yaml"
-        config_file.write_text("""
-test-config:
-  image: test-image
-  model: test-model
-""")
-        result = load_config_files([str(config_file)])
-        assert "test-config" in result
-        assert result["test-config"]["image"] == "test-image"
-
-    def test_load_multiple_files(self, tmp_path):
-        """Should merge multiple config files."""
-        config1 = tmp_path / "config1.yaml"
-        config1.write_text("""
-config-one:
-  value: 1
-""")
-        config2 = tmp_path / "config2.yaml"
-        config2.write_text("""
-config-two:
-  value: 2
-""")
-        result = load_config_files([str(config1), str(config2)])
-        assert "config-one" in result
-        assert "config-two" in result
-
-    def test_duplicate_keys_raise_error(self, tmp_path):
-        """Duplicate keys across files should raise error."""
-        config1 = tmp_path / "config1.yaml"
-        config1.write_text("""
-duplicate-key:
-  value: 1
-""")
-        config2 = tmp_path / "config2.yaml"
-        config2.write_text("""
-duplicate-key:
-  value: 2
-""")
-        with pytest.raises(ValueError) as exc_info:
-            load_config_files([str(config1), str(config2)])
-        assert "Duplicate configuration keys" in str(exc_info.value)
-
-    def test_nonexistent_file_raises_error(self):
-        """Nonexistent file should raise error."""
-        with pytest.raises(ValueError) as exc_info:
-            load_config_files(["nonexistent.yaml"])
-        assert "does not exist" in str(exc_info.value)
-
-
-# =============================================================================
-# Test load_runner_file
-# =============================================================================
-
-class TestLoadRunnerFile:
-    """Tests for load_runner_file function."""
-
-    def test_load_runner_file(self, tmp_path):
-        """Should load runner config file."""
-        runner_file = tmp_path / "runners.yaml"
-        runner_file.write_text("""
-h100:
-- h100-node-0
-- h100-node-1
-""")
-        result = load_runner_file(str(runner_file))
-        assert "h100" in result
-        assert len(result["h100"]) == 2
-
-    def test_nonexistent_runner_file(self):
-        """Nonexistent runner file should raise error."""
-        with pytest.raises(ValueError) as exc_info:
-            load_runner_file("nonexistent.yaml")
-        assert "does not exist" in str(exc_info.value)
-
-
 # =============================================================================
 # Test edge cases and special configurations
 # =============================================================================
diff --git a/utils/matrix_logic/test_validation.py b/utils/matrix_logic/test_validation.py
index 008ed2b42..d9cc7f0d9 100644
--- a/utils/matrix_logic/test_validation.py
+++ b/utils/matrix_logic/test_validation.py
@@ -14,6 +14,8 @@
     validate_matrix_entry,
     validate_master_config,
     validate_runner_config,
+    load_config_files,
+    load_runner_file,
 )
 
 
@@ -738,3 +740,130 @@ def test_multiple_runner_types(self, valid_runner_config):
         assert "h200" in result
         assert "mi300x" in result
         assert "gb200" in result
+
+
+# =============================================================================
+# Test load_config_files
+# =============================================================================
+
+class TestLoadConfigFiles:
+    """Tests for load_config_files function."""
+
+    def test_load_single_file_with_validation(self, tmp_path, valid_single_node_master_config):
+        """Should load and validate a single config file."""
+        config_file = tmp_path / "config.yaml"
+        import yaml
+        config_file.write_text(yaml.dump({"test-config": valid_single_node_master_config}))
+        result = load_config_files([str(config_file)])
+        assert "test-config" in result
+        assert result["test-config"]["image"] == valid_single_node_master_config["image"]
+
+    def test_load_single_file_without_validation(self, tmp_path):
+        """Should load a single config file without validation when validate=False."""
+        config_file = tmp_path / "config.yaml"
+        config_file.write_text("""
+test-config:
+  image: test-image
+  model: test-model
+""")
+        result = load_config_files([str(config_file)], validate=False)
+        assert "test-config" in result
+        assert result["test-config"]["image"] == "test-image"
+
+    def test_load_multiple_files(self, tmp_path):
+        """Should merge multiple config files."""
+        config1 = tmp_path / "config1.yaml"
+        config1.write_text("""
+config-one:
+  value: 1
+""")
+        config2 = tmp_path / "config2.yaml"
+        config2.write_text("""
+config-two:
+  value: 2
+""")
+        result = load_config_files([str(config1), str(config2)], validate=False)
+        assert "config-one" in result
+        assert "config-two" in result
+
+    def test_duplicate_keys_raise_error(self, tmp_path):
+        """Duplicate keys across files should raise error."""
+        config1 = tmp_path / "config1.yaml"
+        config1.write_text("""
+duplicate-key:
+  value: 1
+""")
+        config2 = tmp_path / "config2.yaml"
+        config2.write_text("""
+duplicate-key:
+  value: 2
+""")
+        with pytest.raises(ValueError) as exc_info:
+            load_config_files([str(config1), str(config2)], validate=False)
+        assert "Duplicate configuration keys" in str(exc_info.value)
+
+    def test_nonexistent_file_raises_error(self):
+        """Nonexistent file should raise error."""
+        with pytest.raises(ValueError) as exc_info:
+            load_config_files(["nonexistent.yaml"])
+        assert "does not exist" in str(exc_info.value)
+
+    def test_validation_runs_by_default(self, tmp_path):
+        """Validation should run by default and catch invalid configs."""
+        config_file = tmp_path / "config.yaml"
+        config_file.write_text("""
+invalid-config:
+  image: test-image
+  # Missing required fields like model, model-prefix, precision, etc.
+""")
+        with pytest.raises(ValueError) as exc_info:
+            load_config_files([str(config_file)])
+        assert "failed validation" in str(exc_info.value)
+
+
+# =============================================================================
+# Test load_runner_file
+# =============================================================================
+
+class TestLoadRunnerFile:
+    """Tests for load_runner_file function."""
+
+    def test_load_runner_file_with_validation(self, tmp_path):
+        """Should load and validate runner config file."""
+        runner_file = tmp_path / "runners.yaml"
+        runner_file.write_text("""
+h100:
+- h100-node-0
+- h100-node-1
+""")
+        result = load_runner_file(str(runner_file))
+        assert "h100" in result
+        assert len(result["h100"]) == 2
+
+    def test_load_runner_file_without_validation(self, tmp_path):
+        """Should load runner config file without validation when validate=False."""
+        runner_file = tmp_path / "runners.yaml"
+        runner_file.write_text("""
+h100:
+- h100-node-0
+- h100-node-1
+""")
+        result = load_runner_file(str(runner_file), validate=False)
+        assert "h100" in result
+        assert len(result["h100"]) == 2
+
+    def test_nonexistent_runner_file(self):
+        """Nonexistent runner file should raise error."""
+        with pytest.raises(ValueError) as exc_info:
+            load_runner_file("nonexistent.yaml")
+        assert "does not exist" in str(exc_info.value)
+
+    def test_validation_runs_by_default(self, tmp_path):
+        """Validation should run by default and catch invalid configs."""
+        runner_file = tmp_path / "runners.yaml"
+        runner_file.write_text("""
+h100: not-a-list
+""")
+        with pytest.raises(ValueError) as exc_info:
+            load_runner_file(str(runner_file))
+        assert "must be a list" in str(exc_info.value)
diff --git a/utils/matrix_logic/validation.py b/utils/matrix_logic/validation.py
index 30012423a..955e4c5b5 100644
--- a/utils/matrix_logic/validation.py
+++ b/utils/matrix_logic/validation.py
@@ -3,6 +3,7 @@
 from enum import Enum
 
 import pprint
+import yaml
 
 """
     The below class defines the field names expected to be present in the JSON entries
@@ -315,3 +316,118 @@ def validate_runner_config(runner_configs: dict) -> List[dict]:
                 f"Runner config entry '{key}' cannot be an empty list")
 
     return runner_configs
+
+
+"""
+    Below is the validation logic for the changelog entries found in perf-changelog.yaml.
+    This ensures that the changelog entries conform to the expected structure before
+    proceeding with processing.
+"""
+
+
+class ChangelogEntry(BaseModel):
+    """Pydantic model for validating changelog entry structure."""
+    model_config = ConfigDict(extra="forbid", populate_by_name=True)
+
+    config_keys: list[str] = Field(alias="config-keys", min_length=1)
+    description: str
+
+
+class ChangelogMetadata(BaseModel):
+    """Pydantic model for validating changelog metadata structure."""
+    model_config = ConfigDict(extra="forbid")
+
+    base_ref: str
+    head_ref: str
+    entries: list[ChangelogEntry]
+
+
+class ChangelogMatrixEntry(BaseModel):
+    """Pydantic model for validating final changelog matrix entry structure.
+    This imposes a strict contract on the output of process_changelog.py, dictated by
+    the expected input to the run-sweep.yml workflow file.
+    """
+    model_config = ConfigDict(extra="forbid", populate_by_name=True)
+
+    single_node: dict[str, list[SingleNodeMatrixEntry]
+                      ] = Field(default_factory=dict)
+    multi_node: dict[str, list[MultiNodeMatrixEntry]
+                     ] = Field(default_factory=dict)
+    changelog_metadata: ChangelogMetadata
+
+
+# =============================================================================
+# File Loading Functions
+# =============================================================================
+
+
+def load_config_files(config_files: List[str], validate: bool = True) -> dict:
+    """Load and merge configuration files.
+
+    Args:
+        config_files: List of paths to YAML configuration files.
+        validate: If True, run validate_master_config on loaded data. Defaults to True.
+
+    Returns:
+        Merged configuration dictionary.
+
+    Raises:
+        ValueError: If file doesn't exist, isn't a dict, or has duplicate keys.
+    """
+    all_config_data = {}
+    for config_file in config_files:
+        try:
+            with open(config_file, 'r') as f:
+                config_data = yaml.safe_load(f)
+                assert isinstance(
+                    config_data, dict), f"Config file '{config_file}' must contain a dictionary"
+
+                # Don't allow '*' wildcard in master config keys as we need to reserve these
+                # for expansion in process_changelog.py
+                for key in config_data.keys():
+                    if "*" in key:
+                        raise ValueError(
+                            f" Wildcard '*' is not allowed in master config keys: '{key}'")
+
+                # Check for duplicate keys
+                duplicate_keys = set(all_config_data.keys()) & set(
+                    config_data.keys())
+                if duplicate_keys:
+                    raise ValueError(
+                        f"Duplicate configuration keys found in '{config_file}': {', '.join(sorted(duplicate_keys))}"
+                    )
+
+                all_config_data.update(config_data)
+        except FileNotFoundError:
+            raise ValueError(f"Input file '{config_file}' does not exist.")
+
+    if validate:
+        validate_master_config(all_config_data)
+
+    return all_config_data
+
+
+def load_runner_file(runner_file: str, validate: bool = True) -> dict:
+    """Load runner configuration file.
+
+    Args:
+        runner_file: Path to the runner YAML configuration file.
+        validate: If True, run validate_runner_config on loaded data. Defaults to True.
+
+    Returns:
+        Runner configuration dictionary.
+
+    Raises:
+        ValueError: If file doesn't exist or fails validation.
+    """
+    try:
+        with open(runner_file, 'r') as f:
+            runner_config = yaml.safe_load(f)
+    except FileNotFoundError:
+        raise ValueError(
+            f"Runner config file '{runner_file}' does not exist.")
+
+    if validate:
+        validate_runner_config(runner_config)
+
+    return runner_config
diff --git a/utils/process_changelog.py b/utils/process_changelog.py
new file mode 100644
index 000000000..4a856c9a8
--- /dev/null
+++ b/utils/process_changelog.py
@@ -0,0 +1,142 @@
+import argparse
+import json
+import re
+import subprocess
+from collections import defaultdict
+
+import yaml
+from constants import GENERATE_SWEEPS_PY_SCRIPT, MASTER_CONFIGS, RUNNER_CONFIG
+from matrix_logic.generate_sweep_configs import seq_len_to_str
+from matrix_logic.validation import (
+    ChangelogEntry,
+    ChangelogMatrixEntry,
+    load_config_files,
+)
+
+
+def get_added_lines(base_ref: str, head_ref: str, filepath: str) -> str:
+    result = subprocess.run(
+        ["git", "diff", base_ref, head_ref, "--", filepath],
+        capture_output=True,
+        text=True,
+    )
+
+    added_lines = []
+    for line in result.stdout.split("\n"):
+        if line.startswith("-") and not line.startswith("---"):
+            # Don't allow deletions in the changelog
+            # By convention, it should act as a running log of performance changes,
+            # so we only want to see additions
+            raise ValueError(
+                f"Deletions are not allowed in {filepath}. "
+                f"Only additions to the changelog are permitted. "
+                f"Found deleted line: {line[1:]}"
+            )
+        elif line.startswith("+") and not line.startswith("+++"):
+            added_lines.append(line[1:])
+
+    return "\n".join(added_lines)
+
+
+def get_config_keys_from_master(
+    config_keys: list[str], master_config: dict
+) -> list[str]:
+    resolved_keys = set()
+    for key in config_keys:
+        if "*" in key:
+            pattern = re.compile(re.escape(key).replace(r"\*", ".*"))
+            matched_keys = [k for k in master_config if pattern.fullmatch(k)]
+            if not matched_keys:
+                raise ValueError(
+                    f"No config keys matched the wildcard pattern '{key}' in master configs."
+                )
+            resolved_keys.update(matched_keys)
+        elif key not in master_config:
+            raise ValueError(f"Config key '{key}' not found in master configs.")
+        else:
+            resolved_keys.add(key)
+    return list(resolved_keys)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--base-ref", type=str, required=True)
+    parser.add_argument("--head-ref", type=str, required=True)
+    parser.add_argument("--changelog-file", type=str, required=True)
+    args = parser.parse_args()
+
+    added_yaml = get_added_lines(args.base_ref, args.head_ref, args.changelog_file)
+
+    if not added_yaml.strip():
+        raise ValueError("No additions found in the changelog file.")
+
+    changelog_data = yaml.safe_load(added_yaml)
+
+    if not changelog_data:
+        raise ValueError("No valid YAML entries found in the changelog additions.")
+
+    final_results = {
+        "single_node": defaultdict(list),
+        "multi_node": defaultdict(list),
+        "changelog_metadata": {
+            "base_ref": args.base_ref,
+            "head_ref": args.head_ref,
+            "entries": changelog_data,
+        },
+    }
+
+    all_results = []
+    # Deduplicate repeated configs, if for some reason a config key appears multiple times
+    # in one commit, we don't want to run that config two times (there will just be twice as many
+    # data points for that config, which is not useful)
+    all_configs_to_run = set()
+
+    for entry_data in changelog_data:
+        entry = ChangelogEntry.model_validate(entry_data)
+        configs_to_run = get_config_keys_from_master(
+            entry.config_keys, load_config_files(MASTER_CONFIGS)
+        )
+
+        # Skip configs already processed
+        configs_to_run = [c for c in configs_to_run if c not in all_configs_to_run]
+        if not configs_to_run:
+            continue
+        all_configs_to_run.update(configs_to_run)
+
+        try:
+            result = subprocess.run(
+                [
+                    "python3",
+                    GENERATE_SWEEPS_PY_SCRIPT,
+                    "test-config",
+                    "--config-keys",
+                    *configs_to_run,
+                    "--config-files",
+                    *MASTER_CONFIGS,
+                    "--runner-config",
+                    RUNNER_CONFIG,
+                ],
+                capture_output=True,
+                text=True,
+                check=True,
+            )
+        except subprocess.CalledProcessError as e:
+            print(e.stderr)
+            raise
+
+        all_results.extend(json.loads(result.stdout))
+
+    for result in all_results:
+        seq_len_str = seq_len_to_str(result["isl"], result["osl"])
+        if "prefill" in result and result["prefill"] is not None:
+            final_results["multi_node"][seq_len_str].append(result)
+        else:
+            final_results["single_node"][seq_len_str].append(result)
+
+    # Validate final results structure
+    validated = ChangelogMatrixEntry.model_validate(final_results)
+    print(validated.model_dump_json(by_alias=True))
+
+
+if __name__ == "__main__":
+    main()

From 5383cfaa7149d053339566692f3a663c2c10ed56 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 15 Dec 2025 10:10:21 -0600
Subject: [PATCH 13/19] chore(deps): bump the github-actions group across 1
 directory with 3 updates (#331)

Bumps the github-actions group with 3 updates in the / directory: [actions/checkout](https://github.com/actions/checkout), [actions/upload-artifact](https://github.com/actions/upload-artifact) and [actions/download-artifact](https://github.com/actions/download-artifact).


Updates `actions/checkout` from 6.0.0 to 6.0.1
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v6...8e8c483db84b4bee98b60c0593521ed34d9990e8)

Updates `actions/upload-artifact` from 5.0.0 to 6.0.0
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/330a01c490aca151604b8cf639adc76d48f6c5d4...b7c566a772e6b6bfb58ed0dc250532a479d7789f)

Updates `actions/download-artifact` from 6.0.0 to 7.0.0
- [Release notes](https://github.com/actions/download-artifact/releases)
- [Commits](https://github.com/actions/download-artifact/compare/018cc2cf5baa6db3ef3c5f8a56943fffe632ef53...37930b1c2abaa49bbe596cd826c3c89aef350131)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-version: 6.0.1
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: github-actions
- dependency-name: actions/upload-artifact
  dependency-version: 6.0.0
  dependency-type: direct:production
  update-type: version-update:semver-major
  dependency-group: github-actions
- dependency-name: actions/download-artifact
  dependency-version: 7.0.0
  dependency-type: direct:production
  update-type: version-update:semver-major
  dependency-group: github-actions
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/benchmark-multinode-tmpl.yml  |  2 +-
 .github/workflows/benchmark-tmpl.yml            |  2 +-
 .github/workflows/collect-results.yml           |  4 ++--
 .github/workflows/e2e-tests.yml                 |  4 ++--
 .github/workflows/full-sweep-1k1k-scheduler.yml |  4 ++--
 .github/workflows/full-sweep-1k8k-scheduler.yml |  4 ++--
 .github/workflows/full-sweep-8k1k-scheduler.yml |  4 ++--
 .github/workflows/label-validation.yml          |  4 ++--
 .github/workflows/run-sweep.yml                 | 10 +++++-----
 9 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/benchmark-multinode-tmpl.yml b/.github/workflows/benchmark-multinode-tmpl.yml
index 6c42cbdaa..93de8faa0 100644
--- a/.github/workflows/benchmark-multinode-tmpl.yml
+++ b/.github/workflows/benchmark-multinode-tmpl.yml
@@ -168,7 +168,7 @@ jobs:
           done
 
       - name: Upload results
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
         with:
           name: bmk_${{ env.RESULT_FILENAME }}
           path: agg_${{ env.RESULT_FILENAME }}_*.json
diff --git a/.github/workflows/benchmark-tmpl.yml b/.github/workflows/benchmark-tmpl.yml
index a48081fac..8a3dcfb5c 100644
--- a/.github/workflows/benchmark-tmpl.yml
+++ b/.github/workflows/benchmark-tmpl.yml
@@ -167,7 +167,7 @@ jobs:
         run: |
           python3 utils/process_result.py
       - name: Upload result
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
         with:
           name: bmk_${{ env.RESULT_FILENAME }}
           path: agg_${{ env.RESULT_FILENAME }}.json
\ No newline at end of file
diff --git a/.github/workflows/collect-results.yml b/.github/workflows/collect-results.yml
index d0b0bd992..ccc2ce4e4 100644
--- a/.github/workflows/collect-results.yml
+++ b/.github/workflows/collect-results.yml
@@ -23,7 +23,7 @@ jobs:
           fetch-depth: 0
 
       - name: Download JSON artifacts
-        uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
+        uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
         with:
           path: results/
           pattern: ${{ inputs.result-prefix && format('{0}_*', inputs.result-prefix) || '*' }}
@@ -37,7 +37,7 @@ jobs:
         run: python3 utils/collect_results.py results/ ${{ inputs.result-prefix || 'all' }}
 
       - name: Upload aggregated results
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
         with:
           name: results_${{ inputs.result-prefix || 'all' }}
           path: agg_${{ inputs.result-prefix || 'all' }}.json
diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml
index f807e8a97..9a8c4b70d 100644
--- a/.github/workflows/e2e-tests.yml
+++ b/.github/workflows/e2e-tests.yml
@@ -147,7 +147,7 @@ jobs:
                   fetch-depth: 0
 
             - name: Download results artifacts
-              uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
+              uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
               with:
                   path: ${{ env.RESULTS_DIR }}
                   pattern: results_*
@@ -158,7 +158,7 @@ jobs:
             - name: Calculate success rate
               run: python3 utils/calc_success_rate.py $STATS_FILENAME
 
-            - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
+            - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
               with:
                   name: "run-stats"
                   path: ${{ env.STATS_FILENAME }}.json
diff --git a/.github/workflows/full-sweep-1k1k-scheduler.yml b/.github/workflows/full-sweep-1k1k-scheduler.yml
index 3c592cf0a..a8b40214e 100644
--- a/.github/workflows/full-sweep-1k1k-scheduler.yml
+++ b/.github/workflows/full-sweep-1k1k-scheduler.yml
@@ -211,7 +211,7 @@ jobs:
                   fetch-depth: 0
 
             - name: Download results artifacts
-              uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
+              uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
               with:
                   path: ${{ env.RESULTS_DIR }}
                   pattern: results_*
@@ -222,7 +222,7 @@ jobs:
             - name: Calculate success rate
               run: python3 utils/calc_success_rate.py $STATS_FILENAME
 
-            - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
+            - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
               with:
                   name: "run-stats"
                   path: ${{ env.STATS_FILENAME }}.json
diff --git a/.github/workflows/full-sweep-1k8k-scheduler.yml b/.github/workflows/full-sweep-1k8k-scheduler.yml
index be909aad5..062f00265 100644
--- a/.github/workflows/full-sweep-1k8k-scheduler.yml
+++ b/.github/workflows/full-sweep-1k8k-scheduler.yml
@@ -211,7 +211,7 @@ jobs:
                   fetch-depth: 0
 
             - name: Download results artifacts
-              uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
+              uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
               with:
                   path: ${{ env.RESULTS_DIR }}
                   pattern: results_*
@@ -222,7 +222,7 @@ jobs:
             - name: Calculate success rate
               run: python3 utils/calc_success_rate.py $STATS_FILENAME
 
-            - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
+            - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
               with:
                   name: "run-stats"
                   path: ${{ env.STATS_FILENAME }}.json
diff --git a/.github/workflows/full-sweep-8k1k-scheduler.yml b/.github/workflows/full-sweep-8k1k-scheduler.yml
index 3eabe74f4..2b45b9679 100644
--- a/.github/workflows/full-sweep-8k1k-scheduler.yml
+++ b/.github/workflows/full-sweep-8k1k-scheduler.yml
@@ -211,7 +211,7 @@ jobs:
                   fetch-depth: 0
 
             - name: Download results artifacts
-              uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
+              uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
               with:
                   path: ${{ env.RESULTS_DIR }}
                   pattern: results_*
@@ -222,7 +222,7 @@ jobs:
             - name: Calculate success rate
               run: python3 utils/calc_success_rate.py $STATS_FILENAME
 
-            - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
+            - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
               with:
                   name: "run-stats"
                   path: ${{ env.STATS_FILENAME }}.json
diff --git a/.github/workflows/label-validation.yml b/.github/workflows/label-validation.yml
index 3afe0ff06..1b01deb84 100644
--- a/.github/workflows/label-validation.yml
+++ b/.github/workflows/label-validation.yml
@@ -134,7 +134,7 @@ jobs:
           fetch-depth: 0
 
       - name: Download results artifacts
-        uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
+        uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
         with:
           path: ${{ env.RESULTS_DIR }}
           pattern: results_*
@@ -145,7 +145,7 @@ jobs:
       - name: Calculate success rate
         run: python3 utils/calc_success_rate.py $STATS_FILENAME
 
-      - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
+      - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
         with:
           name: "run-stats"
           path: ${{ env.STATS_FILENAME }}.json
diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
index cb3c4dde5..e449942d1 100644
--- a/.github/workflows/run-sweep.yml
+++ b/.github/workflows/run-sweep.yml
@@ -31,7 +31,7 @@ jobs:
             search-space-config: ${{ steps.setup.outputs.search-space-config }}
         steps:
             - name: Checkout code
-              uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
+              uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
               with:
                   fetch-depth: 0
 
@@ -196,7 +196,7 @@ jobs:
                   echo "$CONFIG_JSON" | jq '.changelog_metadata' > changelog_metadata.json
 
             - name: Upload changelog artifact
-              uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
+              uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
               with:
                   name: changelog-metadata
                   path: changelog_metadata.json
@@ -212,13 +212,13 @@ jobs:
             GITHUB_TOKEN: ${{ secrets.REPO_PAT }}
 
         steps:
-            - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
+            - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
               with:
                   token: ${{ secrets.REPO_PAT }}
                   fetch-depth: 0
 
             - name: Download results artifacts
-              uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
+              uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
               with:
                   path: ${{ env.RESULTS_DIR }}
                   pattern: results_*
@@ -229,7 +229,7 @@ jobs:
             - name: Calculate success rate
               run: python3 utils/calc_success_rate.py $STATS_FILENAME
 
-            - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
+            - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
               with:
                   name: "run-stats"
                   path: ${{ env.STATS_FILENAME }}.json

From 2a85d1d8320680cfb459e6316c6d5a01070f3b59 Mon Sep 17 00:00:00 2001
From: Cameron Quilici <cjquilici@gmail.com>
Date: Mon, 15 Dec 2025 11:01:27 -0600
Subject: [PATCH 14/19] fix: add final newline to original perf-changelog.yaml
 so that there wont be erroneous negative diff [skip-sweep] (#333)

---
 perf-changelog.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index a74285c53..112145f10 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -80,4 +80,4 @@
   description: |
     - Update vLLM image for NVIDIA configs from vLLM 0.11.0 to vLLM 0.11.2
     - Adds kv-cache-dtype: fp8 to benchmarks/gptoss_fp4_b200_docker.sh
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/273
\ No newline at end of file
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/273

From 1a3e65c0912df5b9052f572fce40463b1a3f4cf2 Mon Sep 17 00:00:00 2001
From: ppalanga <ppalanga@amd.com>
Date: Tue, 16 Dec 2025 16:01:23 -0800
Subject: [PATCH 15/19] Update MI355x Deepseek-R1 FP4  SGLang Image to
 v0.5.6.post1 (#330)

* Update amd-master.yaml

* Update perf-changelog.yaml

* Update dsr1_fp4_mi355x_docker.sh

* Update dsr1_fp4_mi355x_docker.sh

---------

Co-authored-by: Cameron Quilici <cjquilici@gmail.com>
---
 .github/configs/amd-master.yaml      | 2 +-
 benchmarks/dsr1_fp4_mi355x_docker.sh | 5 ++++-
 perf-changelog.yaml                  | 5 +++++
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml
index a2674153a..a61f4fdd3 100644
--- a/.github/configs/amd-master.yaml
+++ b/.github/configs/amd-master.yaml
@@ -1,5 +1,5 @@
 dsr1-fp4-mi355x-sglang:
-  image: rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi35x-20250915
+  image: lmsysorg/sglang:v0.5.6.post1-rocm700-mi35x
   model: amd/DeepSeek-R1-0528-MXFP4-Preview
   model-prefix: dsr1
   runner: mi355x
diff --git a/benchmarks/dsr1_fp4_mi355x_docker.sh b/benchmarks/dsr1_fp4_mi355x_docker.sh
index ca1255802..8b3750a64 100644
--- a/benchmarks/dsr1_fp4_mi355x_docker.sh
+++ b/benchmarks/dsr1_fp4_mi355x_docker.sh
@@ -11,6 +11,7 @@
 # RESULT_FILENAME
 # NUM_PROMPTS
 export SGLANG_USE_AITER=1
+export ROCM_QUICK_REDUCE_QUANTIZATION=INT4
 
 PREFILL_SIZE=196608
 if [[ "$ISL" == "8192" && "$OSL" == "1024" ]]; then
@@ -30,7 +31,9 @@ python3 -m sglang.launch_server --model-path=$MODEL --trust-remote-code \
 --disable-radix-cache \
 --num-continuous-decode-steps=4 \
 --max-prefill-tokens=$PREFILL_SIZE \
---cuda-graph-max-bs=128 > $SERVER_LOG 2>&1 &
+--cuda-graph-max-bs=128 \
+--attention-backend aiter \
+--kv-cache-dtype fp8_e4m3 > $SERVER_LOG 2>&1 &
 
 SERVER_PID=$!
 
diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 112145f10..926ac7e1d 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -81,3 +81,8 @@
     - Update vLLM image for NVIDIA configs from vLLM 0.11.0 to vLLM 0.11.2
     - Adds kv-cache-dtype: fp8 to benchmarks/gptoss_fp4_b200_docker.sh
     PR: https://github.com/InferenceMAX/InferenceMAX/pull/273
+- config-keys:
+    - dsr1-fp4-mi355x-sglang
+  description: |
+    - Updating MI355x Deepseek-R1 FP4 SGLang Image to upstream v0.5.6.post1 
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/330

From c13b7f75330b74d5475a6d6ab9e5d07dc189498f Mon Sep 17 00:00:00 2001
From: Oseltamivir <bryansg2013@gmail.com>
Date: Tue, 16 Dec 2025 19:05:56 -0800
Subject: [PATCH 16/19] TOCTOU

---
 .github/workflows/pr-comment-sweep.yml | 113 +++++++++++++++++++++++--
 1 file changed, 105 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml
index 9e1501d99..dfc502d8d 100644
--- a/.github/workflows/pr-comment-sweep.yml
+++ b/.github/workflows/pr-comment-sweep.yml
@@ -39,6 +39,9 @@ jobs:
     outputs:
       pr-number: ${{ steps.parse.outputs.pr-number || steps.resolve.outputs.pr-number }}
       generator-args: ${{ steps.parse.outputs.generator-args || steps.resolve.outputs.generator-args }}
+      author-can-bypass: ${{ steps.auth.outputs.can-bypass }}
+      # IMPORTANT: immutable ref (commit SHA) to prevent TOCTOU on refs/pull/<n>/head
+      ref: ${{ steps.ref_comment.outputs.ref || steps.ref_other.outputs.ref }}
     steps:
       - name: Parse PR comment (/sweep ...)
         id: parse
@@ -65,6 +68,41 @@ jobs:
           echo "generator-args=$cmd_args" >> "$GITHUB_OUTPUT"
           echo "pr-number=$PR_NUMBER" >> "$GITHUB_OUTPUT"
 
+      - name: Check author permissions (PR comments)
+        id: auth
+        if: ${{ github.event_name == 'issue_comment' && github.event.issue.pull_request }}
+        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+        with:
+          script: |
+            const owner = context.repo.owner;
+            const repo = context.repo.repo;
+            const username = context.payload.comment?.user?.login;
+            let permission = 'none';
+            try {
+              const res = await github.rest.repos.getCollaboratorPermissionLevel({ owner, repo, username });
+              permission = res.data?.permission || 'none';
+            } catch (e) {
+              permission = 'none';
+            }
+            const canBypass = ['admin','maintain','write'].includes(permission);
+            core.info(`Author ${username} permission: ${permission}; bypass=${canBypass}`);
+            core.setOutput('can-bypass', canBypass ? 'true' : 'false');
+
+      # ---- PR SHA pinning (issue_comment path) ----
+      - name: Resolve immutable PR ref (pin to head SHA)
+        id: ref_comment
+        if: ${{ github.event_name == 'issue_comment' && github.event.issue.pull_request && startsWith(github.event.comment.body, '/sweep') }}
+        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+        with:
+          script: |
+            const owner = context.repo.owner;
+            const repo = context.repo.repo;
+            const pr = context.issue.number;
+            const res = await github.rest.pulls.get({ owner, repo, pull_number: pr });
+            const sha = res.data.head.sha;
+            core.info(`Resolved PR #${pr} head SHA: ${sha}`);
+            core.setOutput('ref', sha);
+
       - name: Reply with run link
         if: ${{ github.event_name == 'issue_comment' && startsWith(github.event.comment.body, '/sweep') && github.repository_owner == 'InferenceMAX' }}
         uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
@@ -72,6 +110,9 @@ jobs:
         env:
           RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
           AUTHOR: ${{ github.event.comment.user.login }}
+          GEN_CMD: ${{ steps.parse.outputs.generator-args }}
+          CAN_BYPASS: ${{ steps.auth.outputs.can-bypass }}
+          PINNED_REF: ${{ steps.ref_comment.outputs.ref }}
         with:
           github-token: ${{ github.token }}
           script: |
@@ -80,7 +121,12 @@ jobs:
             const issue_number = context.issue.number;
             const runUrl = process.env.RUN_URL;
             const author = process.env.AUTHOR;
-            const body = `@${author} thanks! Kicking off a sweep.\n\nRun: ${runUrl}\nApproval: required in environment 'bryan-test'.`;
+            const genCmd = process.env.GEN_CMD || '';
+            const canBypass = (process.env.CAN_BYPASS || '').toLowerCase() === 'true';
+            const pinned = process.env.PINNED_REF || '';
+            const shortSha = pinned ? pinned.slice(0, 7) : '';
+            const approvalMsg = canBypass ? 'Approval: not required (trusted collaborator).' : "Approval: required in environment 'bryan-test'.";
+            const body = `@${author} thanks! Kicking off a sweep.\n\nRun: ${runUrl}\nCommand: \`${genCmd}\`\nPinned ref: \`${shortSha}\`\n${approvalMsg}`;
             await github.rest.issues.createComment({ owner, repo, issue_number, body });
 
       - name: Find PR for this branch (if any)
@@ -116,6 +162,30 @@ jobs:
           echo "pr-number=$pr_number" >> "$GITHUB_OUTPUT"
           echo "generator-args=$gen_args" >> "$GITHUB_OUTPUT"
 
+      # ---- Immutable ref for push/dispatch:
+      # If this is tied to an open PR, pin to PR head SHA; else use the pushed commit SHA.
+      - name: Resolve immutable ref (pin PR to head SHA; else use event SHA)
+        id: ref_other
+        if: ${{ github.event_name != 'issue_comment' }}
+        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+        env:
+          PR_NUMBER: ${{ steps.resolve.outputs.pr-number }}
+        with:
+          script: |
+            const owner = context.repo.owner;
+            const repo = context.repo.repo;
+            const prStr = process.env.PR_NUMBER || '';
+            if (prStr) {
+              const pr = Number(prStr);
+              const res = await github.rest.pulls.get({ owner, repo, pull_number: pr });
+              const sha = res.data.head.sha;
+              core.info(`Resolved PR #${pr} head SHA: ${sha}`);
+              core.setOutput('ref', sha);
+            } else {
+              core.info(`No PR detected; using event SHA: ${context.sha}`);
+              core.setOutput('ref', context.sha);
+            }
+
       - name: Reply with run link (manual trigger)
         if: ${{ github.event_name == 'workflow_dispatch' && steps.resolve.outputs.pr-number != '' && github.repository_owner == 'InferenceMAX' }}
         uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
@@ -124,6 +194,8 @@ jobs:
           RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
           AUTHOR: ${{ github.actor }}
           ISSUE_NUMBER: ${{ steps.resolve.outputs.pr-number }}
+          GEN_CMD: ${{ steps.resolve.outputs.generator-args }}
+          PINNED_REF: ${{ steps.ref_other.outputs.ref }}
         with:
           github-token: ${{ github.token }}
           script: |
@@ -132,7 +204,10 @@ jobs:
             const issue_number = process.env.ISSUE_NUMBER;
             const runUrl = process.env.RUN_URL;
             const author = process.env.AUTHOR;
-            const body = `@${author} triggered a manual sweep.\n\nRun: ${runUrl}\n(Manual run on branch ${context.ref})`;
+            const genCmd = process.env.GEN_CMD || '';
+            const pinned = process.env.PINNED_REF || '';
+            const shortSha = pinned ? pinned.slice(0, 7) : '';
+            const body = `@${author} triggered a manual sweep.\n\nRun: ${runUrl}\nCommand: \`${genCmd}\`\nPinned ref: \`${shortSha}\`\n(Manual run on branch ${context.ref})`;
             await github.rest.issues.createComment({ owner, repo, issue_number, body });
 
       - name: Reply with run link (push trigger)
@@ -143,6 +218,8 @@ jobs:
           RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
           AUTHOR: ${{ github.actor }}
           ISSUE_NUMBER: ${{ steps.resolve.outputs.pr-number }}
+          GEN_CMD: ${{ steps.resolve.outputs.generator-args }}
+          PINNED_REF: ${{ steps.ref_other.outputs.ref }}
         with:
           github-token: ${{ github.token }}
           script: |
@@ -151,28 +228,47 @@ jobs:
             const issue_number = process.env.ISSUE_NUMBER;
             const runUrl = process.env.RUN_URL;
             const author = process.env.AUTHOR;
-            const body = `@${author} pushed changes and triggered a sweep.\n\nRun: ${runUrl}\n(Push on ${context.ref})`;
+            const genCmd = process.env.GEN_CMD || '';
+            const pinned = process.env.PINNED_REF || '';
+            const shortSha = pinned ? pinned.slice(0, 7) : '';
+            const body = `@${author} pushed changes and triggered a sweep.\n\nRun: ${runUrl}\nCommand: \`${genCmd}\`\nPinned ref: \`${shortSha}\`\n(Push on ${context.ref})`;
             await github.rest.issues.createComment({ owner, repo, issue_number, body });
 
   approval:
     needs: get-jobs
-    if: ${{ (github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '') || (github.event_name == 'push' && needs.get-jobs.outputs.generator-args != '') }}
+    # Require environment approval when:
+    # - push events (unchanged), or
+    # - PR comments from non-trusted authors
+    if: ${{ (github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' && needs.get-jobs.outputs.author-can-bypass != 'true') || (github.event_name == 'push' && needs.get-jobs.outputs.generator-args != '') }}
     runs-on: ubuntu-latest
     name: approval
     environment: bryan-test
     steps:
       - run: echo "approved"
 
+  validate-trusted:
+    needs: [get-jobs]
+    if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' && needs.get-jobs.outputs.author-can-bypass == 'true' }}
+    uses: ./.github/workflows/e2e-tests.yml
+    name: validate (trusted author)
+    secrets: inherit
+    with:
+      generate-cli-command: ${{ needs.get-jobs.outputs.generator-args }}
+      test-name: PR #${{ needs.get-jobs.outputs.pr-number }} sweep
+      # Use pinned SHA to prevent TOCTOU on refs/pull/<n>/head
+      ref: ${{ needs.get-jobs.outputs.ref }}
+
   validate:
     needs: [get-jobs, approval]
-    if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' && needs.approval.result == 'success' }}
+    if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' && needs.get-jobs.outputs.author-can-bypass != 'true' && needs.approval.result == 'success' }}
     uses: ./.github/workflows/e2e-tests.yml
     name: validate
     secrets: inherit
     with:
       generate-cli-command: ${{ needs.get-jobs.outputs.generator-args }}
       test-name: PR #${{ needs.get-jobs.outputs.pr-number }} sweep
-      ref: refs/pull/${{ needs.get-jobs.outputs.pr-number }}/head
+      # Use pinned SHA to prevent TOCTOU on refs/pull/<n>/head
+      ref: ${{ needs.get-jobs.outputs.ref }}
 
   validate-nonpr:
     needs: [get-jobs, approval]
@@ -183,7 +279,8 @@ jobs:
     with:
       generate-cli-command: ${{ needs.get-jobs.outputs.generator-args }}
       test-name: Manual/Push sweep
-      ref: ${{ needs.get-jobs.outputs.pr-number && format('refs/pull/{0}/head', needs.get-jobs.outputs.pr-number) || '' }}
+      # For push/dispatch, this is either PR head SHA (if PR found) or event SHA.
+      ref: ${{ needs.get-jobs.outputs.ref }}
 
   note-ignored:
     # Inform when comment doesn't meet criteria (non-PR or not a /sweep)
@@ -191,4 +288,4 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - run: |
-          echo "Comment ignored. Either not on a PR or not a /sweep command. For PR comments, runs require environment approval."
+          echo "Comment ignored. Either not on a PR or not a /sweep command. For PR comments, runs require environment approval."
\ No newline at end of file

From 80e40d4467599bc2dbca080c1c92d84746d118d5 Mon Sep 17 00:00:00 2001
From: Oseltamivir <bryansg2013@gmail.com>
Date: Wed, 17 Dec 2025 08:10:52 -0800
Subject: [PATCH 17/19] Test new env

---
 .github/workflows/pr-comment-sweep.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml
index dfc502d8d..ae3c0e52f 100644
--- a/.github/workflows/pr-comment-sweep.yml
+++ b/.github/workflows/pr-comment-sweep.yml
@@ -125,7 +125,7 @@ jobs:
             const canBypass = (process.env.CAN_BYPASS || '').toLowerCase() === 'true';
             const pinned = process.env.PINNED_REF || '';
             const shortSha = pinned ? pinned.slice(0, 7) : '';
-            const approvalMsg = canBypass ? 'Approval: not required (trusted collaborator).' : "Approval: required in environment 'bryan-test'.";
+            const approvalMsg = canBypass ? 'Approval: not required (trusted collaborator).' : "Approval: required in environment 'Outside Collaborator E2E Test'.";
             const body = `@${author} thanks! Kicking off a sweep.\n\nRun: ${runUrl}\nCommand: \`${genCmd}\`\nPinned ref: \`${shortSha}\`\n${approvalMsg}`;
             await github.rest.issues.createComment({ owner, repo, issue_number, body });
 
@@ -242,7 +242,7 @@ jobs:
     if: ${{ (github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' && needs.get-jobs.outputs.author-can-bypass != 'true') || (github.event_name == 'push' && needs.get-jobs.outputs.generator-args != '') }}
     runs-on: ubuntu-latest
     name: approval
-    environment: bryan-test
+    environment: Outside Collaborator E2E Test
     steps:
       - run: echo "approved"
 

From 724c37015aa54eb5438af416dfcf5fb788c80d77 Mon Sep 17 00:00:00 2001
From: Oseltamivir <bryansg2013@gmail.com>
Date: Wed, 17 Dec 2025 08:53:48 -0800
Subject: [PATCH 18/19] Ready for merge

---
 .github/workflows/pr-comment-sweep.yml | 165 ++-----------------------
 1 file changed, 10 insertions(+), 155 deletions(-)

diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml
index ae3c0e52f..a66b4dc7a 100644
--- a/.github/workflows/pr-comment-sweep.yml
+++ b/.github/workflows/pr-comment-sweep.yml
@@ -2,25 +2,8 @@ name: Slash Command Sweep
 run-name: "Validate PR #${{ github.event.issue.number }}"
 
 on:
-  # PR comment trigger
   issue_comment:
     types: [created]
-  # Manual trigger
-  workflow_dispatch:
-    inputs:
-      pr-number:
-        description: PR number to checkout (refs/pull/<num>/head)
-        required: false
-        type: string
-      generator-args:
-        description: Args passed to generate_sweep_configs.py (omit /sweep)
-        required: false
-        type: string
-  # Push-based example/testing
-  push:
-    branches-ignore:
-      - main
-      - master
 
 concurrency:
   group: "PR#${{ github.event.issue.number || github.ref_name }}"
@@ -33,15 +16,15 @@ permissions:
 
 jobs:
   get-jobs:
-    # Skip for PR comments that are not /sweep; run for all other triggers
-    if: ${{ github.event_name != 'issue_comment' || (github.event.issue.pull_request && startsWith(github.event.comment.body, '/sweep')) }}
+    # Only run for PR comments that start with /sweep
+    if: ${{ github.event.issue.pull_request && startsWith(github.event.comment.body, '/sweep') }}
     runs-on: ubuntu-latest
     outputs:
-      pr-number: ${{ steps.parse.outputs.pr-number || steps.resolve.outputs.pr-number }}
-      generator-args: ${{ steps.parse.outputs.generator-args || steps.resolve.outputs.generator-args }}
+      pr-number: ${{ steps.parse.outputs.pr-number }}
+      generator-args: ${{ steps.parse.outputs.generator-args }}
       author-can-bypass: ${{ steps.auth.outputs.can-bypass }}
-      # IMPORTANT: immutable ref (commit SHA) to prevent TOCTOU on refs/pull/<n>/head
-      ref: ${{ steps.ref_comment.outputs.ref || steps.ref_other.outputs.ref }}
+      # Immutable ref (commit SHA) to prevent TOCTOU on refs/pull/<n>/head
+      ref: ${{ steps.ref_comment.outputs.ref }}
     steps:
       - name: Parse PR comment (/sweep ...)
         id: parse
@@ -68,7 +51,7 @@ jobs:
           echo "generator-args=$cmd_args" >> "$GITHUB_OUTPUT"
           echo "pr-number=$PR_NUMBER" >> "$GITHUB_OUTPUT"
 
-      - name: Check author permissions (PR comments)
+      - name: Check author permissions
         id: auth
         if: ${{ github.event_name == 'issue_comment' && github.event.issue.pull_request }}
         uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
@@ -88,7 +71,7 @@ jobs:
             core.info(`Author ${username} permission: ${permission}; bypass=${canBypass}`);
             core.setOutput('can-bypass', canBypass ? 'true' : 'false');
 
-      # ---- PR SHA pinning (issue_comment path) ----
+      # ---- PR SHA pinning ----
       - name: Resolve immutable PR ref (pin to head SHA)
         id: ref_comment
         if: ${{ github.event_name == 'issue_comment' && github.event.issue.pull_request && startsWith(github.event.comment.body, '/sweep') }}
@@ -126,120 +109,12 @@ jobs:
             const pinned = process.env.PINNED_REF || '';
             const shortSha = pinned ? pinned.slice(0, 7) : '';
             const approvalMsg = canBypass ? 'Approval: not required (trusted collaborator).' : "Approval: required in environment 'Outside Collaborator E2E Test'.";
-            const body = `@${author} thanks! Kicking off a sweep.\n\nRun: ${runUrl}\nCommand: \`${genCmd}\`\nPinned ref: \`${shortSha}\`\n${approvalMsg}`;
-            await github.rest.issues.createComment({ owner, repo, issue_number, body });
-
-      - name: Find PR for this branch (if any)
-        id: find
-        if: ${{ github.event_name != 'issue_comment' }}
-        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
-        with:
-          script: |
-            const owner = context.repo.owner;
-            const repo = context.repo.repo;
-            const branch = context.ref.replace('refs/heads/', '');
-            const res = await github.rest.pulls.list({ owner, repo, state: 'open', head: `${owner}:${branch}` });
-            const num = res.data[0]?.number ? String(res.data[0].number) : '';
-            core.setOutput('pr-number', num);
-
-      - name: Prepare inputs (push/dispatch)
-        id: resolve
-        if: ${{ github.event_name != 'issue_comment' }}
-        shell: bash
-        env:
-          DISPATCH_PR: ${{ github.event.inputs.pr-number }}
-          DISPATCH_ARGS: ${{ github.event.inputs.generator-args }}
-        run: |
-          set -euo pipefail
-          pr_from_branch='${{ steps.find.outputs.pr-number }}'
-          pr_number="${DISPATCH_PR:-}"; if [[ -z "$pr_number" ]]; then pr_number="$pr_from_branch"; fi
-          gen_args="${DISPATCH_ARGS:-}"
-          if [[ -z "$gen_args" ]]; then
-            gen_args='full-sweep --single-node --runner-type h200 --model-prefix dsr1 --seq-lens 1k1k --max-conc 4'
-          fi
-          echo "Resolved PR: $pr_number";
-          echo "Using generator args: $gen_args";
-          echo "pr-number=$pr_number" >> "$GITHUB_OUTPUT"
-          echo "generator-args=$gen_args" >> "$GITHUB_OUTPUT"
-
-      # ---- Immutable ref for push/dispatch:
-      # If this is tied to an open PR, pin to PR head SHA; else use the pushed commit SHA.
-      - name: Resolve immutable ref (pin PR to head SHA; else use event SHA)
-        id: ref_other
-        if: ${{ github.event_name != 'issue_comment' }}
-        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
-        env:
-          PR_NUMBER: ${{ steps.resolve.outputs.pr-number }}
-        with:
-          script: |
-            const owner = context.repo.owner;
-            const repo = context.repo.repo;
-            const prStr = process.env.PR_NUMBER || '';
-            if (prStr) {
-              const pr = Number(prStr);
-              const res = await github.rest.pulls.get({ owner, repo, pull_number: pr });
-              const sha = res.data.head.sha;
-              core.info(`Resolved PR #${pr} head SHA: ${sha}`);
-              core.setOutput('ref', sha);
-            } else {
-              core.info(`No PR detected; using event SHA: ${context.sha}`);
-              core.setOutput('ref', context.sha);
-            }
-
-      - name: Reply with run link (manual trigger)
-        if: ${{ github.event_name == 'workflow_dispatch' && steps.resolve.outputs.pr-number != '' && github.repository_owner == 'InferenceMAX' }}
-        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
-        continue-on-error: true
-        env:
-          RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-          AUTHOR: ${{ github.actor }}
-          ISSUE_NUMBER: ${{ steps.resolve.outputs.pr-number }}
-          GEN_CMD: ${{ steps.resolve.outputs.generator-args }}
-          PINNED_REF: ${{ steps.ref_other.outputs.ref }}
-        with:
-          github-token: ${{ github.token }}
-          script: |
-            const owner = context.repo.owner;
-            const repo = context.repo.repo;
-            const issue_number = process.env.ISSUE_NUMBER;
-            const runUrl = process.env.RUN_URL;
-            const author = process.env.AUTHOR;
-            const genCmd = process.env.GEN_CMD || '';
-            const pinned = process.env.PINNED_REF || '';
-            const shortSha = pinned ? pinned.slice(0, 7) : '';
-            const body = `@${author} triggered a manual sweep.\n\nRun: ${runUrl}\nCommand: \`${genCmd}\`\nPinned ref: \`${shortSha}\`\n(Manual run on branch ${context.ref})`;
-            await github.rest.issues.createComment({ owner, repo, issue_number, body });
-
-      - name: Reply with run link (push trigger)
-        if: ${{ github.event_name == 'push' && steps.resolve.outputs.pr-number != '' && github.repository_owner == 'InferenceMAX' }}
-        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
-        continue-on-error: true
-        env:
-          RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-          AUTHOR: ${{ github.actor }}
-          ISSUE_NUMBER: ${{ steps.resolve.outputs.pr-number }}
-          GEN_CMD: ${{ steps.resolve.outputs.generator-args }}
-          PINNED_REF: ${{ steps.ref_other.outputs.ref }}
-        with:
-          github-token: ${{ github.token }}
-          script: |
-            const owner = context.repo.owner;
-            const repo = context.repo.repo;
-            const issue_number = process.env.ISSUE_NUMBER;
-            const runUrl = process.env.RUN_URL;
-            const author = process.env.AUTHOR;
-            const genCmd = process.env.GEN_CMD || '';
-            const pinned = process.env.PINNED_REF || '';
-            const shortSha = pinned ? pinned.slice(0, 7) : '';
-            const body = `@${author} pushed changes and triggered a sweep.\n\nRun: ${runUrl}\nCommand: \`${genCmd}\`\nPinned ref: \`${shortSha}\`\n(Push on ${context.ref})`;
+            const body = `@${author} Kicking off a sweep.\n\nRun: ${runUrl}\nCommand: \`${genCmd}\`\nPinned ref: \`${shortSha}\`\n${approvalMsg}`;
             await github.rest.issues.createComment({ owner, repo, issue_number, body });
 
   approval:
     needs: get-jobs
-    # Require environment approval when:
-    # - push events (unchanged), or
-    # - PR comments from non-trusted authors
-    if: ${{ (github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' && needs.get-jobs.outputs.author-can-bypass != 'true') || (github.event_name == 'push' && needs.get-jobs.outputs.generator-args != '') }}
+    if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' && needs.get-jobs.outputs.author-can-bypass != 'true' }}
     runs-on: ubuntu-latest
     name: approval
     environment: Outside Collaborator E2E Test
@@ -269,23 +144,3 @@ jobs:
       test-name: PR #${{ needs.get-jobs.outputs.pr-number }} sweep
       # Use pinned SHA to prevent TOCTOU on refs/pull/<n>/head
       ref: ${{ needs.get-jobs.outputs.ref }}
-
-  validate-nonpr:
-    needs: [get-jobs, approval]
-    if: ${{ needs.get-jobs.outputs.generator-args != '' && ((github.event_name == 'push' && needs.approval.result == 'success') || github.event_name == 'workflow_dispatch') }}
-    uses: ./.github/workflows/e2e-tests.yml
-    name: validate (manual/push)
-    secrets: inherit
-    with:
-      generate-cli-command: ${{ needs.get-jobs.outputs.generator-args }}
-      test-name: Manual/Push sweep
-      # For push/dispatch, this is either PR head SHA (if PR found) or event SHA.
-      ref: ${{ needs.get-jobs.outputs.ref }}
-
-  note-ignored:
-    # Inform when comment doesn't meet criteria (non-PR or not a /sweep)
-    if: ${{ github.event_name == 'issue_comment' && (!github.event.issue.pull_request || !startsWith(github.event.comment.body, '/sweep')) }}
-    runs-on: ubuntu-latest
-    steps:
-      - run: |
-          echo "Comment ignored. Either not on a PR or not a /sweep command. For PR comments, runs require environment approval."
\ No newline at end of file

From a397f9a983c9322e2f5da08bf2e99ff05ccd18fc Mon Sep 17 00:00:00 2001
From: Ankur Singh <ankusingh@nvidia.com>
Date: Wed, 17 Dec 2025 07:55:29 -0800
Subject: [PATCH 19/19] Add benchmark script for GPTOSS FP4 B200 TRT-LLM (#256)

* Add benchmark script for GPTOSS FP4 B200 TRT-LLM

* make changes to perf changelog

---------

Co-authored-by: Cameron Quilici <cjquilici@gmail.com>
---
 benchmarks/gptoss_fp4_b200_trt_docker.sh | 90 ++++++++++++++++++++++++
 perf-changelog.yaml                      |  5 ++
 2 files changed, 95 insertions(+)
 create mode 100644 benchmarks/gptoss_fp4_b200_trt_docker.sh

diff --git a/benchmarks/gptoss_fp4_b200_trt_docker.sh b/benchmarks/gptoss_fp4_b200_trt_docker.sh
new file mode 100644
index 000000000..1f5fbe868
--- /dev/null
+++ b/benchmarks/gptoss_fp4_b200_trt_docker.sh
@@ -0,0 +1,90 @@
+#!/usr/bin/env bash
+
+# === Required Env Vars === 
+# MODEL
+# PORT
+# TP
+# EP_SIZE
+# DP_ATTENTION
+# CONC
+# ISL
+# OSL
+# MAX_MODEL_LEN
+# RANDOM_RANGE_RATIO
+# NUM_PROMPTS
+# RESULT_FILENAME
+
+SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)
+
+# GPTOSS TRTLLM Deployment Guide:
+# https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/deployment-guide/quick-start-recipe-for-gpt-oss-on-trtllm.md
+
+MOE_BACKEND="TRTLLM"
+echo "MOE_BACKEND set to '$MOE_BACKEND'"
+
+EXTRA_CONFIG_FILE="gptoss-fp4.yml"
+export TRTLLM_ENABLE_PDL=1
+export NCCL_GRAPH_REGISTER=0
+
+cat > $EXTRA_CONFIG_FILE << EOF
+cuda_graph_config:
+    enable_padding: true
+    max_batch_size: $CONC
+enable_attention_dp: $DP_ATTENTION
+kv_cache_config:
+    dtype: fp8
+    enable_block_reuse: false
+    free_gpu_memory_fraction: 0.85
+print_iter_log: true
+stream_interval: 20
+num_postprocess_workers: 4
+moe_config:
+    backend: $MOE_BACKEND
+EOF
+
+if [[ "$DP_ATTENTION" == "true" ]]; then
+    cat << EOF >> $EXTRA_CONFIG_FILE
+attention_dp_config:
+    enable_balance: true
+EOF
+fi
+
+echo "Generated config file contents:"
+cat $EXTRA_CONFIG_FILE
+
+set -x
+
+MAX_NUM_TOKENS=20000
+
+# Launch TRT-LLM server
+mpirun -n 1 --oversubscribe --allow-run-as-root \
+    trtllm-serve $MODEL --port=$PORT \
+    --trust_remote_code \
+    --backend=pytorch \
+    --max_batch_size 512 \
+    --max_seq_len=$MAX_MODEL_LEN \
+    --max_num_tokens=$MAX_NUM_TOKENS \
+    --tp_size=$TP --ep_size=$EP_SIZE \
+    --extra_llm_api_options=$EXTRA_CONFIG_FILE > $SERVER_LOG 2>&1 &
+
+SERVER_PID=$!
+
+# Source benchmark utilities
+source "$(dirname "$0")/benchmark_lib.sh"
+
+# Wait for server to be ready
+wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
+
+pip install -q datasets pandas
+
+run_benchmark_serving \
+    --model "$MODEL" \
+    --port "$PORT" \
+    --backend openai \
+    --input-len "$ISL" \
+    --output-len "$OSL" \
+    --random-range-ratio "$RANDOM_RANGE_RATIO" \
+    --num-prompts "$NUM_PROMPTS" \
+    --max-concurrency "$CONC" \
+    --result-filename "$RESULT_FILENAME" \
+    --result-dir /workspace/
\ No newline at end of file
diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 926ac7e1d..995c0a684 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -86,3 +86,8 @@
   description: |
     - Updating MI355x Deepseek-R1 FP4 SGLang Image to upstream v0.5.6.post1 
     PR: https://github.com/InferenceMAX/InferenceMAX/pull/330
+- config-keys:
+    - gptoss-fp4-b200-trt
+  description: |
+    - Add benchmark script for GPTOSS FP4 B200 TRT-LLM
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/256