From 433f2ef1c028ab659ff76dc6cd61346a11a3602b Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Wed, 10 Dec 2025 15:12:21 -0600
Subject: [PATCH 01/29] add logic for event driven runs

new single workflow that runs on merge to main, new perg-changelog.yaml to track performance changes, new logic to parse changelog, removed cron job in full sweep schedulers
---
 .../workflows/full-sweep-1k1k-scheduler.yml   |   2 -
 .../workflows/full-sweep-1k8k-scheduler.yml   |   2 -
 .../workflows/full-sweep-8k1k-scheduler.yml   |   2 -
 .github/workflows/run-sweep.yml               | 233 ++++++++++++++++++
 perf-changelog.yaml                           |   8 +
 utils/constants.py                            |   4 +
 utils/matrix_logic/generate_sweep_configs.py  | 190 +++++++++++---
 .../test_generate_sweep_configs.py            |  86 -------
 utils/matrix_logic/test_validation.py         | 129 ++++++++++
 utils/matrix_logic/validation.py              | 116 +++++++++
 utils/process_changelog.py                    | 143 +++++++++++
 11 files changed, 784 insertions(+), 131 deletions(-)
 create mode 100644 .github/workflows/run-sweep.yml
 create mode 100644 perf-changelog.yaml
 create mode 100644 utils/constants.py
 create mode 100644 utils/process_changelog.py

diff --git a/.github/workflows/full-sweep-1k1k-scheduler.yml b/.github/workflows/full-sweep-1k1k-scheduler.yml
index 8b32f47c0..3c592cf0a 100644
--- a/.github/workflows/full-sweep-1k1k-scheduler.yml
+++ b/.github/workflows/full-sweep-1k1k-scheduler.yml
@@ -2,8 +2,6 @@ name: "Full Sweep Scheduler - 1k1k"
 
 on:
     workflow_dispatch:
-    schedule:
-        - cron: "0 0 * * *"
 
 jobs:
     get-dsr1-configs:
diff --git a/.github/workflows/full-sweep-1k8k-scheduler.yml b/.github/workflows/full-sweep-1k8k-scheduler.yml
index 393864fdf..be909aad5 100644
--- a/.github/workflows/full-sweep-1k8k-scheduler.yml
+++ b/.github/workflows/full-sweep-1k8k-scheduler.yml
@@ -2,8 +2,6 @@ name: "Full Sweep Scheduler - 1k8k"
 
 on:
     workflow_dispatch:
-    schedule:
-        - cron: "0 0 * * *"
 
 jobs:
     get-dsr1-configs:
diff --git a/.github/workflows/full-sweep-8k1k-scheduler.yml b/.github/workflows/full-sweep-8k1k-scheduler.yml
index 629e56bd9..3eabe74f4 100644
--- a/.github/workflows/full-sweep-8k1k-scheduler.yml
+++ b/.github/workflows/full-sweep-8k1k-scheduler.yml
@@ -2,8 +2,6 @@ name: "Full Sweep Scheduler - 8k1k"
 
 on:
     workflow_dispatch:
-    schedule:
-        - cron: "0 0 * * *"
 
 jobs:
     get-dsr1-configs:
diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
new file mode 100644
index 000000000..af4cd9605
--- /dev/null
+++ b/.github/workflows/run-sweep.yml
@@ -0,0 +1,233 @@
+name: "Run Sweep"
+run-name: Run Sweep - ${{ github.event.pull_request.title || github.ref_name }}
+
+concurrency:
+    group: sweep-${{ github.event.pull_request.number || github.ref }}
+    cancel-in-progress: true
+
+on:
+    push:
+        branches:
+            - main
+        paths:
+            - "perf-changelog.yaml"
+    pull_request:
+        branches:
+            - main
+        types:
+            - ready_for_review
+            - synchronize
+            - labeled
+        paths:
+            - "perf-changelog.yaml"
+
+jobs:
+    setup:
+        runs-on: ubuntu-latest
+        if: >-
+            (github.event_name == 'pull_request' && !github.event.pull_request.draft && contains(github.event.pull_request.labels.*.name, 'sweep-enabled')) ||
+            (github.event_name != 'pull_request' && !contains(github.event.head_commit.message, '[skip-sweep]'))
+        outputs:
+            search-space-config: ${{ steps.setup.outputs.search-space-config }}
+        steps:
+            - name: Checkout code
+              uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
+              with:
+                  fetch-depth: 0
+
+            - id: setup
+              run: |
+                  pip install pydantic
+
+                  if [ "${{ github.event_name }}" == "pull_request" ]; then
+                      BASE_REF="origin/${{ github.base_ref }}"
+                      HEAD_REF="${{ github.event.pull_request.head.sha }}"
+                  else
+                      BASE_REF="${{ github.event.before }}"
+                      HEAD_REF="${{ github.event.after }}"
+                  fi
+
+                  CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/process_changelog.py \
+                      --changelog-file ${GITHUB_WORKSPACE}/perf-changelog.yaml \
+                      --base-ref "$BASE_REF" \
+                      --head-ref "$HEAD_REF")
+
+                  echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
+
+    sweep-multi-node-1k1k:
+        needs: setup
+        if: ${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] != '[]' }}
+        uses: ./.github/workflows/benchmark-multinode-tmpl.yml
+        name: multi-node 1k1k /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }}
+        secrets: inherit
+        with: &multi-node-inputs
+            isl: ${{ matrix.config.isl }}
+            osl: ${{ matrix.config.osl }}
+            max-model-len: ${{ matrix.config.max-model-len }}
+            runner: ${{ matrix.config.runner }}
+            image: ${{ matrix.config.image }}
+            model: ${{ matrix.config.model }}
+            model-prefix: ${{ matrix.config.model-prefix }}
+            framework: ${{ matrix.config.framework }}
+            precision: ${{ matrix.config.precision }}
+            exp-name: ${{ matrix.config.exp-name }}
+            conc-list: ${{ toJson(matrix.config.conc) }}
+            spec-decoding: ${{ matrix.config.spec-decoding }}
+            disagg: ${{ matrix.config.disagg }}
+
+            prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
+            prefill-tp: ${{ matrix.config.prefill.tp }}
+            prefill-ep: ${{ matrix.config.prefill.ep }}
+            prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
+            prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}
+
+            decode-num-worker: ${{ matrix.config.decode.num-worker }}
+            decode-tp: ${{ matrix.config.decode.tp }}
+            decode-ep: ${{ matrix.config.decode.ep }}
+            decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
+            decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
+
+    sweep-multi-node-1k8k:
+        needs: setup
+        if: ${{ needs.setup.outputs.search-space-config.multi_node['1k8k'] != '[]' }}
+        uses: ./.github/workflows/benchmark-multinode-tmpl.yml
+        name: multi-node 1k8k /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k8k'] }}
+        secrets: inherit
+        with: *multi-node-inputs
+
+    sweep-multi-node-8k1k:
+        needs: setup
+        if: ${{ needs.setup.outputs.search-space-config.multi_node['8k1k'] != '[]' }}
+        uses: ./.github/workflows/benchmark-multinode-tmpl.yml
+        name: multi-node 8k1k /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k'] }}
+        secrets: inherit
+        with: *multi-node-inputs
+
+    sweep-single-node-1k1k:
+        needs: setup
+        if: ${{ needs.setup.outputs.search-space-config.single_node['1k1k'] != '[]' }}
+        uses: ./.github/workflows/benchmark-tmpl.yml
+        name: single-node 1k1k /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }}
+        secrets: inherit
+        with: &single-node-inputs
+            exp-name: ${{ matrix.config.exp-name }}
+            isl: ${{ matrix.config.isl }}
+            osl: ${{ matrix.config.osl }}
+            max-model-len: ${{ matrix.config.max-model-len }}
+            runner: ${{ matrix.config.runner }}
+            image: ${{ matrix.config.image }}
+            model: ${{ matrix.config.model }}
+            model-prefix: ${{ matrix.config.model-prefix }}
+            framework: ${{ matrix.config.framework }}
+            precision: ${{ matrix.config.precision }}
+            tp: ${{ matrix.config.tp }}
+            ep: ${{ matrix.config.ep }}
+            dp-attn: ${{ matrix.config.dp-attn }}
+            conc: ${{ matrix.config.conc }}
+            spec-decoding: ${{ matrix.config.spec-decoding }}
+            disagg: ${{ matrix.config.disagg }}
+
+    sweep-single-node-1k8k:
+        needs: setup
+        if: ${{ needs.setup.outputs.search-space-config.single_node['1k8k'] != '[]' }}
+        uses: ./.github/workflows/benchmark-tmpl.yml
+        name: single-node 1k8k /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k8k'] }}
+        secrets: inherit
+        with: *single-node-inputs
+
+    sweep-single-node-8k1k:
+        needs: setup
+        if: ${{ needs.setup.outputs.search-space-config.single_node['8k1k'] != '[]' }}
+        uses: ./.github/workflows/benchmark-tmpl.yml
+        name: single-node 8k1k /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['8k1k'] }}
+        secrets: inherit
+        with: *single-node-inputs
+
+    collect-results:
+        needs:
+            [
+                sweep-single-node-1k1k,
+                sweep-single-node-1k8k,
+                sweep-single-node-8k1k,
+                sweep-multi-node-1k1k,
+                sweep-multi-node-1k8k,
+                sweep-multi-node-8k1k,
+                setup,
+            ]
+        if: ${{ always() && needs.setup.result != 'skipped' }}
+        uses: ./.github/workflows/collect-results.yml
+        secrets: inherit
+
+    upload-changelog-metadata:
+        needs: [setup, collect-results]
+        if: ${{ needs.setup.result != 'skipped' }}
+        runs-on: ubuntu-latest
+        steps:
+            - name: Extract and save changelog metadata
+              env:
+                  CONFIG_JSON: ${{ needs.setup.outputs.search-space-config }}
+              run: |
+                  echo "$CONFIG_JSON" | jq '.changelog_metadata' > changelog_metadata.json
+
+            - name: Upload changelog artifact
+              uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
+              with:
+                  name: changelog-metadata
+                  path: changelog_metadata.json
+
+    calc-success-rate:
+        needs: collect-results
+        if: ${{ always() && needs.collect-results.result != 'skipped'}}
+        runs-on: ubuntu-latest
+
+        env:
+            RESULTS_DIR: "results/"
+            STATS_FILENAME: "run_stats"
+            GITHUB_TOKEN: ${{ secrets.REPO_PAT }}
+
+        steps:
+            - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
+              with:
+                  token: ${{ secrets.REPO_PAT }}
+                  fetch-depth: 0
+
+            - name: Download results artifacts
+              uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
+              with:
+                  path: ${{ env.RESULTS_DIR }}
+                  pattern: results_*
+
+            - name: Install python dependencies
+              run: pip install PyGithub
+
+            - name: Calculate success rate
+              run: python3 utils/calc_success_rate.py $STATS_FILENAME
+
+            - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
+              with:
+                  name: "run-stats"
+                  path: ${{ env.STATS_FILENAME }}.json
diff --git a/perf-changelog.yaml b/perf-changelog.yaml
new file mode 100644
index 000000000..76d66e889
--- /dev/null
+++ b/perf-changelog.yaml
@@ -0,0 +1,8 @@
+- config-keys:
+    - gptoss-fp4-mi300x-vllm
+  description: |
+    Updating vllm version for mi300x
+- config-keys:
+    - gptoss-fp4-mi300x-vllm
+  description: |
+    Updating vllm version for mi325x
diff --git a/utils/constants.py b/utils/constants.py
new file mode 100644
index 000000000..a465091da
--- /dev/null
+++ b/utils/constants.py
@@ -0,0 +1,4 @@
+MASTER_CONFIGS = [".github/configs/amd-master.yaml",
+                  ".github/configs/nvidia-master.yaml"]
+RUNNER_CONFIG = ".github/configs/runners.yaml"
+GENERATE_SWEEPS_PY_SCRIPT = "utils/matrix_logic/generate_sweep_configs.py"
\ No newline at end of file
diff --git a/utils/matrix_logic/generate_sweep_configs.py b/utils/matrix_logic/generate_sweep_configs.py
index 8fc47651c..d8fab38cf 100644
--- a/utils/matrix_logic/generate_sweep_configs.py
+++ b/utils/matrix_logic/generate_sweep_configs.py
@@ -1,8 +1,17 @@
 import json
-import yaml
 import argparse
+import sys
+from pathlib import Path
 
-from validation import validate_master_config, validate_matrix_entry, validate_runner_config, Fields
+# Ensure sibling modules are importable regardless of how script is invoked
+sys.path.insert(0, str(Path(__file__).resolve().parent))
+
+from validation import (
+    validate_matrix_entry,
+    load_config_files,
+    load_runner_file,
+    Fields
+)
 
 seq_len_stoi = {
     "1k1k": (1024, 1024),
@@ -366,42 +375,126 @@ def get_lowest_conc(search_space_entry):
     return matrix_values
 
 
-def load_config_files(config_files):
-    """Load and merge configuration files."""
-    all_config_data = {}
-    for config_file in config_files:
-        try:
-            with open(config_file, 'r') as f:
-                config_data = yaml.safe_load(f)
-                assert isinstance(
-                    config_data, dict), f"Config file '{config_file}' must contain a dictionary"
-
-                # Check for duplicate keys, this is only in place to prevent against the very unlikely
-                # case where an entry in one config accidentally/purposefully tries to override an entry in another config
-                duplicate_keys = set(all_config_data.keys()) & set(
-                    config_data.keys())
-                if duplicate_keys:
-                    raise ValueError(
-                        f"Duplicate configuration keys found in '{config_file}': {', '.join(sorted(duplicate_keys))}"
-                    )
-
-                all_config_data.update(config_data)
-        except FileNotFoundError:
-            raise ValueError(f"Input file '{config_file}' does not exist.")
-
-    return all_config_data
-
-
-def load_runner_file(runner_file):
-    """Load runner configuration file."""
-    try:
-        with open(runner_file, 'r') as f:
-            runner_config = yaml.safe_load(f)
-    except FileNotFoundError as e:
+def generate_test_config_sweep(args, all_config_data):
+    """Generate full sweep for specific config keys.
+
+    Validates that all specified config keys exist before generating.
+    Expands all configs fully without any filtering.
+    """
+    # Validate all config keys exist
+    missing_keys = [key for key in args.config_keys if key not in all_config_data]
+    if missing_keys:
+        available_keys = sorted(all_config_data.keys())
         raise ValueError(
-            f"Runner config file '{runner_file}' does not exist.")
+            f"Config key(s) not found: {', '.join(missing_keys)}.\n"
+            f"Available keys: {', '.join(available_keys)}"
+        )
+
+    matrix_values = []
+
+    for key in args.config_keys:
+        val = all_config_data[key]
+        is_multinode = val.get(Fields.MULTINODE.value, False)
+
+        image = val[Fields.IMAGE.value]
+        model = val[Fields.MODEL.value]
+        model_code = val[Fields.MODEL_PREFIX.value]
+        precision = val[Fields.PRECISION.value]
+        framework = val[Fields.FRAMEWORK.value]
+        runner = val[Fields.RUNNER.value]
+        disagg = val.get(Fields.DISAGG.value, False)
 
-    return runner_config
+        for seq_len_config in val[Fields.SEQ_LEN_CONFIGS.value]:
+            isl = seq_len_config[Fields.ISL.value]
+            osl = seq_len_config[Fields.OSL.value]
+            seq_len_str = seq_len_to_str(isl, osl)
+
+            for bmk in seq_len_config[Fields.SEARCH_SPACE.value]:
+                if is_multinode:
+                    # Multinode config
+                    spec_decoding = bmk.get(Fields.SPEC_DECODING.value, "none")
+                    prefill = bmk[Fields.PREFILL.value]
+                    decode = bmk[Fields.DECODE.value]
+
+                    # Get concurrency values
+                    if Fields.CONC_LIST.value in bmk:
+                        conc_values = bmk[Fields.CONC_LIST.value]
+                    else:
+                        conc_start = bmk[Fields.CONC_START.value]
+                        conc_end = bmk[Fields.CONC_END.value]
+                        conc_values = []
+                        conc = conc_start
+                        while conc <= conc_end:
+                            conc_values.append(conc)
+                            if conc == conc_end:
+                                break
+                            conc *= 2
+                            if conc > conc_end:
+                                conc = conc_end
+
+                    entry = {
+                        Fields.IMAGE.value: image,
+                        Fields.MODEL.value: model,
+                        Fields.MODEL_PREFIX.value: model_code,
+                        Fields.PRECISION.value: precision,
+                        Fields.FRAMEWORK.value: framework,
+                        Fields.RUNNER.value: runner,
+                        Fields.ISL.value: isl,
+                        Fields.OSL.value: osl,
+                        Fields.SPEC_DECODING.value: spec_decoding,
+                        Fields.PREFILL.value: prefill,
+                        Fields.DECODE.value: decode,
+                        Fields.CONC.value: conc_values,
+                        Fields.MAX_MODEL_LEN.value: isl + osl + 200,
+                        Fields.EXP_NAME.value: f"{model_code}_{seq_len_str}",
+                        Fields.DISAGG.value: disagg,
+                    }
+                    matrix_values.append(validate_matrix_entry(entry, is_multinode=True))
+                else:
+                    # Single-node config
+                    tp = bmk[Fields.TP.value]
+                    ep = bmk.get(Fields.EP.value)
+                    dp_attn = bmk.get(Fields.DP_ATTN.value)
+                    spec_decoding = bmk.get(Fields.SPEC_DECODING.value, "none")
+
+                    # Get concurrency values
+                    if Fields.CONC_LIST.value in bmk:
+                        conc_values = bmk[Fields.CONC_LIST.value]
+                    else:
+                        conc_start = bmk[Fields.CONC_START.value]
+                        conc_end = bmk[Fields.CONC_END.value]
+                        conc_values = []
+                        conc = conc_start
+                        while conc <= conc_end:
+                            conc_values.append(conc)
+                            if conc == conc_end:
+                                break
+                            conc *= 2
+                            if conc > conc_end:
+                                conc = conc_end
+
+                    for conc in conc_values:
+                        entry = {
+                            Fields.IMAGE.value: image,
+                            Fields.MODEL.value: model,
+                            Fields.MODEL_PREFIX.value: model_code,
+                            Fields.PRECISION.value: precision,
+                            Fields.FRAMEWORK.value: framework,
+                            Fields.RUNNER.value: runner,
+                            Fields.ISL.value: isl,
+                            Fields.OSL.value: osl,
+                            Fields.TP.value: tp,
+                            Fields.CONC.value: conc,
+                            Fields.MAX_MODEL_LEN.value: isl + osl + 200,
+                            Fields.EP.value: ep if ep is not None else 1,
+                            Fields.DP_ATTN.value: dp_attn if dp_attn is not None else False,
+                            Fields.SPEC_DECODING.value: spec_decoding,
+                            Fields.EXP_NAME.value: f"{model_code}_{seq_len_str}",
+                            Fields.DISAGG.value: disagg,
+                        }
+                        matrix_values.append(validate_matrix_entry(entry, is_multinode=False))
+
+    return matrix_values
 
 
 def main():
@@ -545,13 +638,30 @@ def main():
         help='Show this help message and exit'
     )
 
+    # Subcommand: test-config
+    test_config_keys_parser = subparsers.add_parser(
+        'test-config',
+        parents=[parent_parser],
+        add_help=False,
+        help='Generate full sweep for specific config keys. Validates that all specified keys exist before generating.'
+    )
+    test_config_keys_parser.add_argument(
+        '--config-keys',
+        nargs='+',
+        required=True,
+        help='One or more config keys to generate sweep for (e.g., dsr1-fp4-b200-sglang dsr1-fp8-h200-trt)'
+    )
+    test_config_keys_parser.add_argument(
+        '-h', '--help',
+        action='help',
+        help='Show this help message and exit'
+    )
+
     args = parser.parse_args()
 
-    # Load and validate configuration files
+    # Load and validate configuration files (validation happens by default in load functions)
     all_config_data = load_config_files(args.config_files)
     runner_data = load_runner_file(args.runner_config)
-    validate_master_config(all_config_data)
-    validate_runner_config(runner_data)
 
     # Route to appropriate function based on subcommand
     if args.command == 'full-sweep':
@@ -559,6 +669,8 @@ def main():
     elif args.command == 'runner-model-sweep':
         matrix_values = generate_runner_model_sweep_config(
             args, all_config_data, runner_data)
+    elif args.command == 'test-config':
+        matrix_values = generate_test_config_sweep(args, all_config_data)
     else:
         parser.error(f"Unknown command: {args.command}")
 
diff --git a/utils/matrix_logic/test_generate_sweep_configs.py b/utils/matrix_logic/test_generate_sweep_configs.py
index 1381f394e..c505611c3 100644
--- a/utils/matrix_logic/test_generate_sweep_configs.py
+++ b/utils/matrix_logic/test_generate_sweep_configs.py
@@ -7,8 +7,6 @@
     seq_len_to_str,
     generate_full_sweep,
     generate_runner_model_sweep_config,
-    load_config_files,
-    load_runner_file,
 )
 
 
@@ -583,90 +581,6 @@ def test_uses_lowest_conc(self, sample_single_node_config, sample_runner_config,
         assert all(entry["conc"] == 4 for entry in result)
 
 
-# =============================================================================
-# Test load_config_files
-# =============================================================================
-
-class TestLoadConfigFiles:
-    """Tests for load_config_files function."""
-
-    def test_load_single_file(self, tmp_path):
-        """Should load a single config file."""
-        config_file = tmp_path / "config.yaml"
-        config_file.write_text("""
-test-config:
-  image: test-image
-  model: test-model
-""")
-        result = load_config_files([str(config_file)])
-        assert "test-config" in result
-        assert result["test-config"]["image"] == "test-image"
-
-    def test_load_multiple_files(self, tmp_path):
-        """Should merge multiple config files."""
-        config1 = tmp_path / "config1.yaml"
-        config1.write_text("""
-config-one:
-  value: 1
-""")
-        config2 = tmp_path / "config2.yaml"
-        config2.write_text("""
-config-two:
-  value: 2
-""")
-        result = load_config_files([str(config1), str(config2)])
-        assert "config-one" in result
-        assert "config-two" in result
-
-    def test_duplicate_keys_raise_error(self, tmp_path):
-        """Duplicate keys across files should raise error."""
-        config1 = tmp_path / "config1.yaml"
-        config1.write_text("""
-duplicate-key:
-  value: 1
-""")
-        config2 = tmp_path / "config2.yaml"
-        config2.write_text("""
-duplicate-key:
-  value: 2
-""")
-        with pytest.raises(ValueError) as exc_info:
-            load_config_files([str(config1), str(config2)])
-        assert "Duplicate configuration keys" in str(exc_info.value)
-
-    def test_nonexistent_file_raises_error(self):
-        """Nonexistent file should raise error."""
-        with pytest.raises(ValueError) as exc_info:
-            load_config_files(["nonexistent.yaml"])
-        assert "does not exist" in str(exc_info.value)
-
-
-# =============================================================================
-# Test load_runner_file
-# =============================================================================
-
-class TestLoadRunnerFile:
-    """Tests for load_runner_file function."""
-
-    def test_load_runner_file(self, tmp_path):
-        """Should load runner config file."""
-        runner_file = tmp_path / "runners.yaml"
-        runner_file.write_text("""
-h100:
-- h100-node-0
-- h100-node-1
-""")
-        result = load_runner_file(str(runner_file))
-        assert "h100" in result
-        assert len(result["h100"]) == 2
-
-    def test_nonexistent_runner_file(self):
-        """Nonexistent runner file should raise error."""
-        with pytest.raises(ValueError) as exc_info:
-            load_runner_file("nonexistent.yaml")
-        assert "does not exist" in str(exc_info.value)
-
-
 # =============================================================================
 # Test edge cases and special configurations
 # =============================================================================
diff --git a/utils/matrix_logic/test_validation.py b/utils/matrix_logic/test_validation.py
index 008ed2b42..d9cc7f0d9 100644
--- a/utils/matrix_logic/test_validation.py
+++ b/utils/matrix_logic/test_validation.py
@@ -14,6 +14,8 @@
     validate_matrix_entry,
     validate_master_config,
     validate_runner_config,
+    load_config_files,
+    load_runner_file,
 )
 
 
@@ -738,3 +740,130 @@ def test_multiple_runner_types(self, valid_runner_config):
         assert "h200" in result
         assert "mi300x" in result
         assert "gb200" in result
+
+
+# =============================================================================
+# Test load_config_files
+# =============================================================================
+
+class TestLoadConfigFiles:
+    """Tests for load_config_files function."""
+
+    def test_load_single_file_with_validation(self, tmp_path, valid_single_node_master_config):
+        """Should load and validate a single config file."""
+        config_file = tmp_path / "config.yaml"
+        import yaml
+        config_file.write_text(yaml.dump({"test-config": valid_single_node_master_config}))
+        result = load_config_files([str(config_file)])
+        assert "test-config" in result
+        assert result["test-config"]["image"] == valid_single_node_master_config["image"]
+
+    def test_load_single_file_without_validation(self, tmp_path):
+        """Should load a single config file without validation when validate=False."""
+        config_file = tmp_path / "config.yaml"
+        config_file.write_text("""
+test-config:
+  image: test-image
+  model: test-model
+""")
+        result = load_config_files([str(config_file)], validate=False)
+        assert "test-config" in result
+        assert result["test-config"]["image"] == "test-image"
+
+    def test_load_multiple_files(self, tmp_path):
+        """Should merge multiple config files."""
+        config1 = tmp_path / "config1.yaml"
+        config1.write_text("""
+config-one:
+  value: 1
+""")
+        config2 = tmp_path / "config2.yaml"
+        config2.write_text("""
+config-two:
+  value: 2
+""")
+        result = load_config_files([str(config1), str(config2)], validate=False)
+        assert "config-one" in result
+        assert "config-two" in result
+
+    def test_duplicate_keys_raise_error(self, tmp_path):
+        """Duplicate keys across files should raise error."""
+        config1 = tmp_path / "config1.yaml"
+        config1.write_text("""
+duplicate-key:
+  value: 1
+""")
+        config2 = tmp_path / "config2.yaml"
+        config2.write_text("""
+duplicate-key:
+  value: 2
+""")
+        with pytest.raises(ValueError) as exc_info:
+            load_config_files([str(config1), str(config2)], validate=False)
+        assert "Duplicate configuration keys" in str(exc_info.value)
+
+    def test_nonexistent_file_raises_error(self):
+        """Nonexistent file should raise error."""
+        with pytest.raises(ValueError) as exc_info:
+            load_config_files(["nonexistent.yaml"])
+        assert "does not exist" in str(exc_info.value)
+
+    def test_validation_runs_by_default(self, tmp_path):
+        """Validation should run by default and catch invalid configs."""
+        config_file = tmp_path / "config.yaml"
+        config_file.write_text("""
+invalid-config:
+  image: test-image
+  # Missing required fields like model, model-prefix, precision, etc.
+""")
+        with pytest.raises(ValueError) as exc_info:
+            load_config_files([str(config_file)])
+        assert "failed validation" in str(exc_info.value)
+
+
+# =============================================================================
+# Test load_runner_file
+# =============================================================================
+
+class TestLoadRunnerFile:
+    """Tests for load_runner_file function."""
+
+    def test_load_runner_file_with_validation(self, tmp_path):
+        """Should load and validate runner config file."""
+        runner_file = tmp_path / "runners.yaml"
+        runner_file.write_text("""
+h100:
+- h100-node-0
+- h100-node-1
+""")
+        result = load_runner_file(str(runner_file))
+        assert "h100" in result
+        assert len(result["h100"]) == 2
+
+    def test_load_runner_file_without_validation(self, tmp_path):
+        """Should load runner config file without validation when validate=False."""
+        runner_file = tmp_path / "runners.yaml"
+        runner_file.write_text("""
+h100:
+- h100-node-0
+- h100-node-1
+""")
+        result = load_runner_file(str(runner_file), validate=False)
+        assert "h100" in result
+        assert len(result["h100"]) == 2
+
+    def test_nonexistent_runner_file(self):
+        """Nonexistent runner file should raise error."""
+        with pytest.raises(ValueError) as exc_info:
+            load_runner_file("nonexistent.yaml")
+        assert "does not exist" in str(exc_info.value)
+
+    def test_validation_runs_by_default(self, tmp_path):
+        """Validation should run by default and catch invalid configs."""
+        runner_file = tmp_path / "runners.yaml"
+        runner_file.write_text("""
+h100: not-a-list
+""")
+        with pytest.raises(ValueError) as exc_info:
+            load_runner_file(str(runner_file))
+        assert "must be a list" in str(exc_info.value)
diff --git a/utils/matrix_logic/validation.py b/utils/matrix_logic/validation.py
index 30012423a..323e9e326 100644
--- a/utils/matrix_logic/validation.py
+++ b/utils/matrix_logic/validation.py
@@ -3,6 +3,7 @@
 from enum import Enum
 
 import pprint
+import yaml
 
 """
     The below class defines the field names expected to be present in the JSON entries
@@ -315,3 +316,118 @@ def validate_runner_config(runner_configs: dict) -> List[dict]:
                 f"Runner config entry '{key}' cannot be an empty list")
 
     return runner_configs
+
+
+"""
+    Below is the validation logic for the changelog entries found in perf-changelog.yaml.
+    This ensures that the changelog entries conform to the expected structure before
+    proceeding with processing.
+"""
+
+
+class ChangelogEntry(BaseModel):
+    """Pydantic model for validating changelog entry structure."""
+    model_config = ConfigDict(extra="forbid", populate_by_name=True)
+
+    config_keys: list[str] = Field(alias="config-keys", min_length=1)
+    description: str
+
+
+class ChangelogMetadata(BaseModel):
+    """Pydantic model for validating changelog metadata structure."""
+    model_config = ConfigDict(extra="forbid")
+
+    base_ref: str
+    head_ref: str
+    entries: list[ChangelogEntry]
+
+
+class ChangelogMatrixEntry(BaseModel):
+    """Pydantic model for validating final changelog matrix entry structure.
+    This imposes a strict contract on the output of process_changelog.py, dictated by
+    the expected input to the run-sweep.yml workflow file.
+    """
+    model_config = ConfigDict(extra="forbid", populate_by_name=True)
+
+    single_node: dict[str, list[SingleNodeMatrixEntry]
+                      ] = Field(default_factory=dict)
+    multi_node: dict[str, list[MultiNodeMatrixEntry]
+                     ] = Field(default_factory=dict)
+    changelog_metadata: ChangelogMetadata
+
+
+# =============================================================================
+# File Loading Functions
+# =============================================================================
+
+
+def load_config_files(config_files: List[str], validate: bool = True) -> dict:
+    """Load and merge configuration files.
+
+    Args:
+        config_files: List of paths to YAML configuration files.
+        validate: If True, run validate_master_config on loaded data. Defaults to True.
+
+    Returns:
+        Merged configuration dictionary.
+
+    Raises:
+        ValueError: If file doesn't exist, isn't a dict, or has duplicate keys.
+    """
+    all_config_data = {}
+    for config_file in config_files:
+        try:
+            with open(config_file, 'r') as f:
+                config_data = yaml.safe_load(f)
+                assert isinstance(
+                    config_data, dict), f"Config file '{config_file}' must contain a dictionary"
+
+                # Don't allow '*' wildcard in master config keys as we need to reserve these
+                # for expansion in process_changelog.py
+                for key in all_config_data.keys():
+                    if "*" in key:
+                        raise ValueError(
+                            f" Wildcard '*' is not allowed in master config keys: '{key}'")
+
+                # Check for duplicate keys
+                duplicate_keys = set(all_config_data.keys()) & set(
+                    config_data.keys())
+                if duplicate_keys:
+                    raise ValueError(
+                        f"Duplicate configuration keys found in '{config_file}': {', '.join(sorted(duplicate_keys))}"
+                    )
+
+                all_config_data.update(config_data)
+        except FileNotFoundError:
+            raise ValueError(f"Input file '{config_file}' does not exist.")
+
+    if validate:
+        validate_master_config(all_config_data)
+
+    return all_config_data
+
+
+def load_runner_file(runner_file: str, validate: bool = True) -> dict:
+    """Load runner configuration file.
+
+    Args:
+        runner_file: Path to the runner YAML configuration file.
+        validate: If True, run validate_runner_config on loaded data. Defaults to True.
+
+    Returns:
+        Runner configuration dictionary.
+
+    Raises:
+        ValueError: If file doesn't exist or fails validation.
+    """
+    try:
+        with open(runner_file, 'r') as f:
+            runner_config = yaml.safe_load(f)
+    except FileNotFoundError:
+        raise ValueError(
+            f"Runner config file '{runner_file}' does not exist.")
+
+    if validate:
+        validate_runner_config(runner_config)
+
+    return runner_config
diff --git a/utils/process_changelog.py b/utils/process_changelog.py
new file mode 100644
index 000000000..80ad04b11
--- /dev/null
+++ b/utils/process_changelog.py
@@ -0,0 +1,143 @@
+import argparse
+import json
+import re
+import subprocess
+from collections import defaultdict
+
+import yaml
+from constants import GENERATE_SWEEPS_PY_SCRIPT, MASTER_CONFIGS, RUNNER_CONFIG
+from matrix_logic.generate_sweep_configs import seq_len_to_str
+from matrix_logic.validation import ChangelogEntry, ChangelogMatrixEntry, load_config_files
+from pydantic import BaseModel, ConfigDict, Field
+
+
+def get_added_lines(base_ref: str, head_ref: str, filepath: str) -> str:
+    result = subprocess.run(
+        ["git", "diff", base_ref, head_ref, "--", filepath],
+        capture_output=True,
+        text=True,
+    )
+
+    added_lines = []
+    for line in result.stdout.split("\n"):
+        if line.startswith("-") and not line.startswith("---"):
+            # Don't allow deletions in the changelog
+            # By convention, it should act as a running log of performance changes,
+            # so we only want to see additions
+            raise ValueError(
+                f"Deletions are not allowed in {filepath}. "
+                f"Only additions to the changelog are permitted. "
+                f"Found deleted line: {line[1:]}"
+            )
+        elif line.startswith("+") and not line.startswith("+++"):
+            added_lines.append(line[1:])
+
+    return "\n".join(added_lines)
+
+
+def get_config_keys_from_master(
+    config_keys: list[str], master_config: dict
+) -> list[str]:
+    resolved_keys = set()
+    for key in config_keys:
+        if "*" in key:
+            pattern = re.compile(re.escape(key).replace(r"\*", ".*"))
+            matched_keys = [k for k in master_config if pattern.fullmatch(k)]
+            if not matched_keys:
+                raise ValueError(
+                    f"No config keys matched the wildcard pattern '{key}' in master configs."
+                )
+            resolved_keys.update(matched_keys)
+        elif key not in master_config:
+            raise ValueError(
+                f"Config key '{key}' not found in master configs.")
+        else:
+            resolved_keys.add(key)
+    return list(resolved_keys)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--base-ref", type=str, required=True)
+    parser.add_argument("--head-ref", type=str, required=True)
+    parser.add_argument("--changelog-file", type=str, required=True)
+    args = parser.parse_args()
+
+    added_yaml = get_added_lines(
+        args.base_ref, args.head_ref, args.changelog_file)
+
+    if not added_yaml.strip():
+        print("No new changelog entries found")
+        return
+
+    changelog_data = yaml.safe_load(added_yaml)
+
+    if not changelog_data:
+        print("No new changelog entries found")
+        return
+
+    final_results = {
+        "single_node": defaultdict(list),
+        "multi_node": defaultdict(list),
+        "changelog_metadata": {
+            "base_ref": args.base_ref,
+            "head_ref": args.head_ref,
+            "entries": changelog_data,
+        },
+    }
+    
+    all_results = []
+    # Deduplicate repeated configs, if for some reason a config key appears multiple times
+    # in one commit, we don't want to run that config two times (there will just be twice as many
+    # data points for that config, which is not useful)
+    all_configs_to_run = set()
+
+    for entry_data in changelog_data:
+        entry = ChangelogEntry.model_validate(entry_data)
+        configs_to_run = get_config_keys_from_master(
+            entry.config_keys, load_config_files(MASTER_CONFIGS)
+        )
+
+        # Skip configs already processed
+        configs_to_run = [c for c in configs_to_run if c not in all_configs_to_run]
+        if not configs_to_run:
+            continue
+        all_configs_to_run.update(configs_to_run)
+
+        try:
+            result = subprocess.run(
+                [
+                    "python3",
+                    GENERATE_SWEEPS_PY_SCRIPT,
+                    "test-config",
+                    "--config-keys",
+                    *configs_to_run,
+                    "--config-files",
+                    *MASTER_CONFIGS,
+                    "--runner-config",
+                    RUNNER_CONFIG,
+                ],
+                capture_output=True,
+                text=True,
+                check=True,
+            )
+        except subprocess.CalledProcessError as e:
+            print(e.stderr)
+            continue
+
+        all_results.extend(json.loads(result.stdout))
+
+    for result in all_results:
+        seq_len_str = seq_len_to_str(result["isl"], result["osl"])
+        if "prefill" in result and result["prefill"] is not None:
+            final_results["multi_node"][seq_len_str].append(result)
+        else:
+            final_results["single_node"][seq_len_str].append(result)
+
+    # Validate final results structure 
+    validated = ChangelogMatrixEntry.model_validate(final_results)
+    print(validated.model_dump_json(by_alias=True))
+
+
+if __name__ == "__main__":
+    main()

From dd4682baefcd7bc0916908b4103ab4afd8f9687c Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Wed, 10 Dec 2025 16:08:18 -0600
Subject: [PATCH 02/29] testing pt 1

---
 perf-changelog.yaml | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 76d66e889..6dab8fa83 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -1,8 +1,9 @@
 - config-keys:
-    - gptoss-fp4-mi300x-vllm
+    - gptoss-fp4-*-trt
   description: |
-    Updating vllm version for mi300x
-- config-keys:
-    - gptoss-fp4-mi300x-vllm
-  description: |
-    Updating vllm version for mi325x
+    - Upgrade GPT-OSS TRT images from 'release:1.1.0rc2.post2' to '1.2.0rc0.post1'
+    - Add NCCL_GRAPH_REGISTER=0 to benchmarks/gptoss_fp4_b200_trt_slurm.sh
+    - Change kv_cache_config.dtype from 'auto' to 'fp8' in benchmarks/gptoss_fp4_b200_trt_slurm.sh
+    - Remove MOE_BACKEND=CUTLASS, now just defaults to TRTLLM
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/110
+

From 7d6e0528e833f042ba8b7f1245305e0ba8c7887c Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Thu, 11 Dec 2025 09:13:33 -0600
Subject: [PATCH 03/29] raise error if yaml diff in perf changelog is not valid

---
 utils/process_changelog.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/utils/process_changelog.py b/utils/process_changelog.py
index 80ad04b11..d8d3942d2 100644
--- a/utils/process_changelog.py
+++ b/utils/process_changelog.py
@@ -67,14 +67,12 @@ def main():
         args.base_ref, args.head_ref, args.changelog_file)
 
     if not added_yaml.strip():
-        print("No new changelog entries found")
-        return
+        raise ValueError("No additions found in the changelog file.")
 
     changelog_data = yaml.safe_load(added_yaml)
 
     if not changelog_data:
-        print("No new changelog entries found")
-        return
+        raise ValueError("No valid YAML entries found in the changelog additions.")
 
     final_results = {
         "single_node": defaultdict(list),

From ce49098d50ddbe1770d5e235b78de8919d5eb140 Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Thu, 11 Dec 2025 09:14:45 -0600
Subject: [PATCH 04/29] remove unused imports in process_changelog.py

---
 utils/process_changelog.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/utils/process_changelog.py b/utils/process_changelog.py
index d8d3942d2..677754911 100644
--- a/utils/process_changelog.py
+++ b/utils/process_changelog.py
@@ -7,8 +7,11 @@
 import yaml
 from constants import GENERATE_SWEEPS_PY_SCRIPT, MASTER_CONFIGS, RUNNER_CONFIG
 from matrix_logic.generate_sweep_configs import seq_len_to_str
-from matrix_logic.validation import ChangelogEntry, ChangelogMatrixEntry, load_config_files
-from pydantic import BaseModel, ConfigDict, Field
+from matrix_logic.validation import (
+    ChangelogEntry,
+    ChangelogMatrixEntry,
+    load_config_files,
+)
 
 
 def get_added_lines(base_ref: str, head_ref: str, filepath: str) -> str:
@@ -49,8 +52,7 @@ def get_config_keys_from_master(
                 )
             resolved_keys.update(matched_keys)
         elif key not in master_config:
-            raise ValueError(
-                f"Config key '{key}' not found in master configs.")
+            raise ValueError(f"Config key '{key}' not found in master configs.")
         else:
             resolved_keys.add(key)
     return list(resolved_keys)
@@ -63,8 +65,7 @@ def main():
     parser.add_argument("--changelog-file", type=str, required=True)
     args = parser.parse_args()
 
-    added_yaml = get_added_lines(
-        args.base_ref, args.head_ref, args.changelog_file)
+    added_yaml = get_added_lines(args.base_ref, args.head_ref, args.changelog_file)
 
     if not added_yaml.strip():
         raise ValueError("No additions found in the changelog file.")
@@ -83,7 +84,7 @@ def main():
             "entries": changelog_data,
         },
     }
-    
+
     all_results = []
     # Deduplicate repeated configs, if for some reason a config key appears multiple times
     # in one commit, we don't want to run that config two times (there will just be twice as many
@@ -132,7 +133,7 @@ def main():
         else:
             final_results["single_node"][seq_len_str].append(result)
 
-    # Validate final results structure 
+    # Validate final results structure
     validated = ChangelogMatrixEntry.model_validate(final_results)
     print(validated.model_dump_json(by_alias=True))
 

From e6f6fe9de3d78cdd2a2cc606bbadcdb81834ff9c Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Thu, 11 Dec 2025 09:34:08 -0600
Subject: [PATCH 05/29] config data key fix

---
 utils/matrix_logic/validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/matrix_logic/validation.py b/utils/matrix_logic/validation.py
index 323e9e326..955e4c5b5 100644
--- a/utils/matrix_logic/validation.py
+++ b/utils/matrix_logic/validation.py
@@ -384,7 +384,7 @@ def load_config_files(config_files: List[str], validate: bool = True) -> dict:
 
                 # Don't allow '*' wildcard in master config keys as we need to reserve these
                 # for expansion in process_changelog.py
-                for key in all_config_data.keys():
+                for key in config_data.keys():
                     if "*" in key:
                         raise ValueError(
                             f" Wildcard '*' is not allowed in master config keys: '{key}'")

From b87eeddecd25eca394b9cff2cb23c05af98dbaed Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Thu, 11 Dec 2025 09:57:36 -0600
Subject: [PATCH 06/29] raise error if test-config subprocess fails to run

---
 utils/process_changelog.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/process_changelog.py b/utils/process_changelog.py
index 677754911..4a856c9a8 100644
--- a/utils/process_changelog.py
+++ b/utils/process_changelog.py
@@ -122,7 +122,7 @@ def main():
             )
         except subprocess.CalledProcessError as e:
             print(e.stderr)
-            continue
+            raise
 
         all_results.extend(json.loads(result.stdout))
 

From ba0b115adf3e65da7fd52683fd9394b7567e3b5a Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Thu, 11 Dec 2025 12:30:13 -0600
Subject: [PATCH 07/29] backfill changelog

---
 perf-changelog.yaml | 70 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 6dab8fa83..169162b08 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -6,4 +6,74 @@
     - Change kv_cache_config.dtype from 'auto' to 'fp8' in benchmarks/gptoss_fp4_b200_trt_slurm.sh
     - Remove MOE_BACKEND=CUTLASS, now just defaults to TRTLLM
     PR: https://github.com/InferenceMAX/InferenceMAX/pull/110
+- config-keys:
+    - gptoss*
+    - dsr1*
+  description: |
+    - Remove Llama 70B runs to make room for multi-node disagg prefill+wideEP on 
+      h100/h200/b200/mi300/mi325/mi355
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/149
+- config-keys:
+    - gptoss-fp4-b200-vllm
+    - gptoss-fp4-h100-vllm
+    - gptoss-fp4-h200-vllm
+  description: |
+    - Upgrade vLLM from 0.10.2 to 0.11.0 for GPT-OSS NVIDIA single-node configs
+    - Adds compilation-config: '{"cudagraph_mode":"PIECEWISE"} accordingly since vLLM 0.11.0
+      requires now defaults to FULL_AND_PIECEWISE
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/159
+- config-keys:
+    - dsr1*
+  description: |
+    - Fixes bug where 1k8k and 8k1k full sweeps had incorrect max-model-len for DeepSeek
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/163
+- config-keys:
+    - dsr1-fp4-b200-sglang
+    - dsr1-fp8-b200-sglang
+    - dsr1-fp8-h200-sglang
+  description: |
+    - Consolidates H200 and B200 SGLang configurations to use unified v0.5.5-cu129-amd64 
+      image tag and updates deprecated SGLang server arguments to their current equivalents.
+    - --enable-flashinfer-trtllm-moe & --enable-ep-moe is no longer available in sglang so we needed to change it
+      - ep: 4 for all tp: 4 entries (3 occurrences in dsr1-fp4-b200-sglang)
+      - ep: 8 for all tp: 8 entries (6 occurrences across dsr1-fp4-b200-sglang and dsr1-fp8-b200-sglang)
+    - dsr1_fp4_b200_docker.sh: Replaced --enable-ep-moe with --ep-size $EP_SIZE and --enable-flashinfer-trtllm-moe with 
+      --moe-runner-backend flashinfer_trtllm
+    - dsr1_fp8_b200_docker.sh: Replaced --enable-flashinfer-trtllm-moe with --moe-runner-backend flashinfer_trtllm and 
+      added --ep-size $EP_SIZE
+    - launch_b200-nvd.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container
+    - launch_b200-tg.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/204
+- config-keys:
+    - gptoss-fp4-mi355x-vllm
+    - gptoss-fp4-b200-vllm
+  description: |
+    - Extend concurrency to 128 for gptoss mi355x/b200 vllm configurations
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/209
+- config-keys:
+    - gptoss-fp4-b200-trt
+  description: |
+    - Extend concurrency to 128 for gptoss b200 TRT configurations
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/233
+- config-keys:
+    - "*gb200-sglang"
+  description: |
+    - Introducing some improvements in GB200 SGLang DSR1 submission
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/257
+- config-keys:
+    - dsr1-fp8-h200-trt
+  description: |
+    - Update TRT image from nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc0.post1 to nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc2
+    - Increase concurrency for some configurations
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/266
+- config-keys:
+    - gptoss-fp4-b200-vllm
+    - gptoss-fp4-h100-vllm
+    - gptoss-fp4-h200-vllm
+  description: |
+    - Update vLLM image for NVIDIA configs from vLLM 0.11.0 to vLLM 0.11.2
+    - Adds kv-cache-dtype: fp8 to benchmarks/gptoss_fp4_b200_docker.sh
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/273
+
+
 

From 747bc2dc708fbcedc52df8b3a157bdf30514ae0f Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Thu, 11 Dec 2025 12:33:19 -0600
Subject: [PATCH 08/29] backfill changelog pt 2

---
 perf-changelog.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 169162b08..b4f5746a2 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -56,7 +56,7 @@
     - Extend concurrency to 128 for gptoss b200 TRT configurations
     PR: https://github.com/InferenceMAX/InferenceMAX/pull/233
 - config-keys:
-    - "*gb200-sglang"
+    - "*gb200-dynamo-sglang"
   description: |
     - Introducing some improvements in GB200 SGLang DSR1 submission
     PR: https://github.com/InferenceMAX/InferenceMAX/pull/257

From ca24b8ecdae474c154371600e9bdae76b059d055 Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Thu, 11 Dec 2025 12:34:40 -0600
Subject: [PATCH 09/29] backfill changelog pt 3

---
 perf-changelog.yaml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index b4f5746a2..d04c8d14c 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -55,11 +55,6 @@
   description: |
     - Extend concurrency to 128 for gptoss b200 TRT configurations
     PR: https://github.com/InferenceMAX/InferenceMAX/pull/233
-- config-keys:
-    - "*gb200-dynamo-sglang"
-  description: |
-    - Introducing some improvements in GB200 SGLang DSR1 submission
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/257
 - config-keys:
     - dsr1-fp8-h200-trt
   description: |

From 954ebd67ed509a176cceb4ee1e8f148ba343b60e Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Thu, 11 Dec 2025 12:43:46 -0600
Subject: [PATCH 10/29] backfill changelog pt 4

---
 perf-changelog.yaml | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index d04c8d14c..7b4d1021d 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -6,13 +6,6 @@
     - Change kv_cache_config.dtype from 'auto' to 'fp8' in benchmarks/gptoss_fp4_b200_trt_slurm.sh
     - Remove MOE_BACKEND=CUTLASS, now just defaults to TRTLLM
     PR: https://github.com/InferenceMAX/InferenceMAX/pull/110
-- config-keys:
-    - gptoss*
-    - dsr1*
-  description: |
-    - Remove Llama 70B runs to make room for multi-node disagg prefill+wideEP on 
-      h100/h200/b200/mi300/mi325/mi355
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/149
 - config-keys:
     - gptoss-fp4-b200-vllm
     - gptoss-fp4-h100-vllm

From ee346b3996bb764d52fd069bf98be6a22ee2f45e Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Thu, 11 Dec 2025 12:47:19 -0600
Subject: [PATCH 11/29] backfill changelog pt 5

---
 perf-changelog.yaml | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 7b4d1021d..7ef4bca2f 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -1,11 +1,3 @@
-- config-keys:
-    - gptoss-fp4-*-trt
-  description: |
-    - Upgrade GPT-OSS TRT images from 'release:1.1.0rc2.post2' to '1.2.0rc0.post1'
-    - Add NCCL_GRAPH_REGISTER=0 to benchmarks/gptoss_fp4_b200_trt_slurm.sh
-    - Change kv_cache_config.dtype from 'auto' to 'fp8' in benchmarks/gptoss_fp4_b200_trt_slurm.sh
-    - Remove MOE_BACKEND=CUTLASS, now just defaults to TRTLLM
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/110
 - config-keys:
     - gptoss-fp4-b200-vllm
     - gptoss-fp4-h100-vllm
@@ -15,11 +7,6 @@
     - Adds compilation-config: '{"cudagraph_mode":"PIECEWISE"} accordingly since vLLM 0.11.0
       requires now defaults to FULL_AND_PIECEWISE
     PR: https://github.com/InferenceMAX/InferenceMAX/pull/159
-- config-keys:
-    - dsr1*
-  description: |
-    - Fixes bug where 1k8k and 8k1k full sweeps had incorrect max-model-len for DeepSeek
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/163
 - config-keys:
     - dsr1-fp4-b200-sglang
     - dsr1-fp8-b200-sglang

From ab6f948ea553148d92bf7102292f959f37a1077f Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Thu, 11 Dec 2025 15:36:51 -0600
Subject: [PATCH 12/29] backfill changelog pt 6

---
 perf-changelog.yaml | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 7ef4bca2f..d0ca53121 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -1,3 +1,25 @@
+- config-keys:
+    - 70b-fp8-*-vllm
+  description: |
+    - Add compilation-config: '{"custom_ops": ["-rms_norm", "-quant_fp8", "-silu_and_mul"]}' as
+      extra config to all benchmarks/70b_fp8_mi*.sh scripts
+    - 6-7% uplift for llama for 6/8 configs
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/95
+- config-keys:
+    - gptoss-fp4-*-trt
+  description: |
+    - Upgrade GPT-OSS TRT images from 'release:1.1.0rc2.post2' to '1.2.0rc0.post1'
+    - Add NCCL_GRAPH_REGISTER=0 to benchmarks/gptoss_fp4_b200_trt_slurm.sh
+    - Change kv_cache_config.dtype from 'auto' to 'fp8' in benchmarks/gptoss_fp4_b200_trt_slurm.sh
+    - Remove MOE_BACKEND=CUTLASS, now just defaults to TRTLLM
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/110
+- config-keys:
+    - gptoss*
+    - dsr1*
+  description: |
+    - Remove Llama 70B runs to make room for multi-node disagg prefill+wideEP on 
+      h100/h200/b200/mi300/mi325/mi355
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/149
 - config-keys:
     - gptoss-fp4-b200-vllm
     - gptoss-fp4-h100-vllm
@@ -7,6 +29,11 @@
     - Adds compilation-config: '{"cudagraph_mode":"PIECEWISE"} accordingly since vLLM 0.11.0
       requires now defaults to FULL_AND_PIECEWISE
     PR: https://github.com/InferenceMAX/InferenceMAX/pull/159
+- config-keys:
+    - dsr1*
+  description: |
+    - Fixes bug where 1k8k and 8k1k full sweeps had incorrect max-model-len for DeepSeek
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/163
 - config-keys:
     - dsr1-fp4-b200-sglang
     - dsr1-fp8-b200-sglang
@@ -35,6 +62,11 @@
   description: |
     - Extend concurrency to 128 for gptoss b200 TRT configurations
     PR: https://github.com/InferenceMAX/InferenceMAX/pull/233
+- config-keys:
+    - "*gb200-sglang"
+  description: |
+    - Introducing some improvements in GB200 SGLang DSR1 submission
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/257
 - config-keys:
     - dsr1-fp8-h200-trt
   description: |

From 27074d2c29ab507d39b09e0c940c9215144603f5 Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Fri, 12 Dec 2025 08:42:28 -0600
Subject: [PATCH 13/29] add always() condition to upload changelog metadata

---
 .github/workflows/run-sweep.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
index af4cd9605..83966a296 100644
--- a/.github/workflows/run-sweep.yml
+++ b/.github/workflows/run-sweep.yml
@@ -184,7 +184,7 @@ jobs:
 
     upload-changelog-metadata:
         needs: [setup, collect-results]
-        if: ${{ needs.setup.result != 'skipped' }}
+        if: ${{ always() && needs.setup.result != 'skipped' }}
         runs-on: ubuntu-latest
         steps:
             - name: Extract and save changelog metadata

From 763b3946c22097eb8e37750705a9d586ae4ed4b9 Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Fri, 12 Dec 2025 08:43:55 -0600
Subject: [PATCH 14/29] backfill changelog pt 7 (test)

---
 perf-changelog.yaml | 77 ---------------------------------------------
 1 file changed, 77 deletions(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index d0ca53121..663fe369e 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -1,86 +1,9 @@
-- config-keys:
-    - 70b-fp8-*-vllm
-  description: |
-    - Add compilation-config: '{"custom_ops": ["-rms_norm", "-quant_fp8", "-silu_and_mul"]}' as
-      extra config to all benchmarks/70b_fp8_mi*.sh scripts
-    - 6-7% uplift for llama for 6/8 configs
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/95
-- config-keys:
-    - gptoss-fp4-*-trt
-  description: |
-    - Upgrade GPT-OSS TRT images from 'release:1.1.0rc2.post2' to '1.2.0rc0.post1'
-    - Add NCCL_GRAPH_REGISTER=0 to benchmarks/gptoss_fp4_b200_trt_slurm.sh
-    - Change kv_cache_config.dtype from 'auto' to 'fp8' in benchmarks/gptoss_fp4_b200_trt_slurm.sh
-    - Remove MOE_BACKEND=CUTLASS, now just defaults to TRTLLM
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/110
-- config-keys:
-    - gptoss*
-    - dsr1*
-  description: |
-    - Remove Llama 70B runs to make room for multi-node disagg prefill+wideEP on 
-      h100/h200/b200/mi300/mi325/mi355
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/149
-- config-keys:
-    - gptoss-fp4-b200-vllm
-    - gptoss-fp4-h100-vllm
-    - gptoss-fp4-h200-vllm
-  description: |
-    - Upgrade vLLM from 0.10.2 to 0.11.0 for GPT-OSS NVIDIA single-node configs
-    - Adds compilation-config: '{"cudagraph_mode":"PIECEWISE"} accordingly since vLLM 0.11.0
-      requires now defaults to FULL_AND_PIECEWISE
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/159
-- config-keys:
-    - dsr1*
-  description: |
-    - Fixes bug where 1k8k and 8k1k full sweeps had incorrect max-model-len for DeepSeek
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/163
-- config-keys:
-    - dsr1-fp4-b200-sglang
-    - dsr1-fp8-b200-sglang
-    - dsr1-fp8-h200-sglang
-  description: |
-    - Consolidates H200 and B200 SGLang configurations to use unified v0.5.5-cu129-amd64 
-      image tag and updates deprecated SGLang server arguments to their current equivalents.
-    - --enable-flashinfer-trtllm-moe & --enable-ep-moe is no longer available in sglang so we needed to change it
-      - ep: 4 for all tp: 4 entries (3 occurrences in dsr1-fp4-b200-sglang)
-      - ep: 8 for all tp: 8 entries (6 occurrences across dsr1-fp4-b200-sglang and dsr1-fp8-b200-sglang)
-    - dsr1_fp4_b200_docker.sh: Replaced --enable-ep-moe with --ep-size $EP_SIZE and --enable-flashinfer-trtllm-moe with 
-      --moe-runner-backend flashinfer_trtllm
-    - dsr1_fp8_b200_docker.sh: Replaced --enable-flashinfer-trtllm-moe with --moe-runner-backend flashinfer_trtllm and 
-      added --ep-size $EP_SIZE
-    - launch_b200-nvd.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container
-    - launch_b200-tg.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/204
 - config-keys:
     - gptoss-fp4-mi355x-vllm
     - gptoss-fp4-b200-vllm
   description: |
     - Extend concurrency to 128 for gptoss mi355x/b200 vllm configurations
     PR: https://github.com/InferenceMAX/InferenceMAX/pull/209
-- config-keys:
-    - gptoss-fp4-b200-trt
-  description: |
-    - Extend concurrency to 128 for gptoss b200 TRT configurations
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/233
-- config-keys:
-    - "*gb200-sglang"
-  description: |
-    - Introducing some improvements in GB200 SGLang DSR1 submission
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/257
-- config-keys:
-    - dsr1-fp8-h200-trt
-  description: |
-    - Update TRT image from nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc0.post1 to nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc2
-    - Increase concurrency for some configurations
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/266
-- config-keys:
-    - gptoss-fp4-b200-vllm
-    - gptoss-fp4-h100-vllm
-    - gptoss-fp4-h200-vllm
-  description: |
-    - Update vLLM image for NVIDIA configs from vLLM 0.11.0 to vLLM 0.11.2
-    - Adds kv-cache-dtype: fp8 to benchmarks/gptoss_fp4_b200_docker.sh
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/273
 
 
 

From d0b2de74e1b31a1bed5fbdc22936f5c007dd2aa2 Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Fri, 12 Dec 2025 08:44:16 -0600
Subject: [PATCH 15/29] backfill changelog pt 8 (revert test)

---
 perf-changelog.yaml | 77 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 663fe369e..d0ca53121 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -1,9 +1,86 @@
+- config-keys:
+    - 70b-fp8-*-vllm
+  description: |
+    - Add compilation-config: '{"custom_ops": ["-rms_norm", "-quant_fp8", "-silu_and_mul"]}' as
+      extra config to all benchmarks/70b_fp8_mi*.sh scripts
+    - 6-7% uplift for llama for 6/8 configs
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/95
+- config-keys:
+    - gptoss-fp4-*-trt
+  description: |
+    - Upgrade GPT-OSS TRT images from 'release:1.1.0rc2.post2' to '1.2.0rc0.post1'
+    - Add NCCL_GRAPH_REGISTER=0 to benchmarks/gptoss_fp4_b200_trt_slurm.sh
+    - Change kv_cache_config.dtype from 'auto' to 'fp8' in benchmarks/gptoss_fp4_b200_trt_slurm.sh
+    - Remove MOE_BACKEND=CUTLASS, now just defaults to TRTLLM
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/110
+- config-keys:
+    - gptoss*
+    - dsr1*
+  description: |
+    - Remove Llama 70B runs to make room for multi-node disagg prefill+wideEP on 
+      h100/h200/b200/mi300/mi325/mi355
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/149
+- config-keys:
+    - gptoss-fp4-b200-vllm
+    - gptoss-fp4-h100-vllm
+    - gptoss-fp4-h200-vllm
+  description: |
+    - Upgrade vLLM from 0.10.2 to 0.11.0 for GPT-OSS NVIDIA single-node configs
+    - Adds compilation-config: '{"cudagraph_mode":"PIECEWISE"} accordingly since vLLM 0.11.0
+      requires now defaults to FULL_AND_PIECEWISE
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/159
+- config-keys:
+    - dsr1*
+  description: |
+    - Fixes bug where 1k8k and 8k1k full sweeps had incorrect max-model-len for DeepSeek
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/163
+- config-keys:
+    - dsr1-fp4-b200-sglang
+    - dsr1-fp8-b200-sglang
+    - dsr1-fp8-h200-sglang
+  description: |
+    - Consolidates H200 and B200 SGLang configurations to use unified v0.5.5-cu129-amd64 
+      image tag and updates deprecated SGLang server arguments to their current equivalents.
+    - --enable-flashinfer-trtllm-moe & --enable-ep-moe is no longer available in sglang so we needed to change it
+      - ep: 4 for all tp: 4 entries (3 occurrences in dsr1-fp4-b200-sglang)
+      - ep: 8 for all tp: 8 entries (6 occurrences across dsr1-fp4-b200-sglang and dsr1-fp8-b200-sglang)
+    - dsr1_fp4_b200_docker.sh: Replaced --enable-ep-moe with --ep-size $EP_SIZE and --enable-flashinfer-trtllm-moe with 
+      --moe-runner-backend flashinfer_trtllm
+    - dsr1_fp8_b200_docker.sh: Replaced --enable-flashinfer-trtllm-moe with --moe-runner-backend flashinfer_trtllm and 
+      added --ep-size $EP_SIZE
+    - launch_b200-nvd.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container
+    - launch_b200-tg.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/204
 - config-keys:
     - gptoss-fp4-mi355x-vllm
     - gptoss-fp4-b200-vllm
   description: |
     - Extend concurrency to 128 for gptoss mi355x/b200 vllm configurations
     PR: https://github.com/InferenceMAX/InferenceMAX/pull/209
+- config-keys:
+    - gptoss-fp4-b200-trt
+  description: |
+    - Extend concurrency to 128 for gptoss b200 TRT configurations
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/233
+- config-keys:
+    - "*gb200-sglang"
+  description: |
+    - Introducing some improvements in GB200 SGLang DSR1 submission
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/257
+- config-keys:
+    - dsr1-fp8-h200-trt
+  description: |
+    - Update TRT image from nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc0.post1 to nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc2
+    - Increase concurrency for some configurations
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/266
+- config-keys:
+    - gptoss-fp4-b200-vllm
+    - gptoss-fp4-h100-vllm
+    - gptoss-fp4-h200-vllm
+  description: |
+    - Update vLLM image for NVIDIA configs from vLLM 0.11.0 to vLLM 0.11.2
+    - Adds kv-cache-dtype: fp8 to benchmarks/gptoss_fp4_b200_docker.sh
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/273
 
 
 

From 41341addf926d63cb09f0fa35772066caa09d37b Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Fri, 12 Dec 2025 08:51:04 -0600
Subject: [PATCH 16/29] backfill changelog pt 9

---
 perf-changelog.yaml | 75 ---------------------------------------------
 1 file changed, 75 deletions(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index d0ca53121..fb3c4a5df 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -1,78 +1,3 @@
-- config-keys:
-    - 70b-fp8-*-vllm
-  description: |
-    - Add compilation-config: '{"custom_ops": ["-rms_norm", "-quant_fp8", "-silu_and_mul"]}' as
-      extra config to all benchmarks/70b_fp8_mi*.sh scripts
-    - 6-7% uplift for llama for 6/8 configs
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/95
-- config-keys:
-    - gptoss-fp4-*-trt
-  description: |
-    - Upgrade GPT-OSS TRT images from 'release:1.1.0rc2.post2' to '1.2.0rc0.post1'
-    - Add NCCL_GRAPH_REGISTER=0 to benchmarks/gptoss_fp4_b200_trt_slurm.sh
-    - Change kv_cache_config.dtype from 'auto' to 'fp8' in benchmarks/gptoss_fp4_b200_trt_slurm.sh
-    - Remove MOE_BACKEND=CUTLASS, now just defaults to TRTLLM
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/110
-- config-keys:
-    - gptoss*
-    - dsr1*
-  description: |
-    - Remove Llama 70B runs to make room for multi-node disagg prefill+wideEP on 
-      h100/h200/b200/mi300/mi325/mi355
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/149
-- config-keys:
-    - gptoss-fp4-b200-vllm
-    - gptoss-fp4-h100-vllm
-    - gptoss-fp4-h200-vllm
-  description: |
-    - Upgrade vLLM from 0.10.2 to 0.11.0 for GPT-OSS NVIDIA single-node configs
-    - Adds compilation-config: '{"cudagraph_mode":"PIECEWISE"} accordingly since vLLM 0.11.0
-      requires now defaults to FULL_AND_PIECEWISE
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/159
-- config-keys:
-    - dsr1*
-  description: |
-    - Fixes bug where 1k8k and 8k1k full sweeps had incorrect max-model-len for DeepSeek
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/163
-- config-keys:
-    - dsr1-fp4-b200-sglang
-    - dsr1-fp8-b200-sglang
-    - dsr1-fp8-h200-sglang
-  description: |
-    - Consolidates H200 and B200 SGLang configurations to use unified v0.5.5-cu129-amd64 
-      image tag and updates deprecated SGLang server arguments to their current equivalents.
-    - --enable-flashinfer-trtllm-moe & --enable-ep-moe is no longer available in sglang so we needed to change it
-      - ep: 4 for all tp: 4 entries (3 occurrences in dsr1-fp4-b200-sglang)
-      - ep: 8 for all tp: 8 entries (6 occurrences across dsr1-fp4-b200-sglang and dsr1-fp8-b200-sglang)
-    - dsr1_fp4_b200_docker.sh: Replaced --enable-ep-moe with --ep-size $EP_SIZE and --enable-flashinfer-trtllm-moe with 
-      --moe-runner-backend flashinfer_trtllm
-    - dsr1_fp8_b200_docker.sh: Replaced --enable-flashinfer-trtllm-moe with --moe-runner-backend flashinfer_trtllm and 
-      added --ep-size $EP_SIZE
-    - launch_b200-nvd.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container
-    - launch_b200-tg.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/204
-- config-keys:
-    - gptoss-fp4-mi355x-vllm
-    - gptoss-fp4-b200-vllm
-  description: |
-    - Extend concurrency to 128 for gptoss mi355x/b200 vllm configurations
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/209
-- config-keys:
-    - gptoss-fp4-b200-trt
-  description: |
-    - Extend concurrency to 128 for gptoss b200 TRT configurations
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/233
-- config-keys:
-    - "*gb200-sglang"
-  description: |
-    - Introducing some improvements in GB200 SGLang DSR1 submission
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/257
-- config-keys:
-    - dsr1-fp8-h200-trt
-  description: |
-    - Update TRT image from nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc0.post1 to nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc2
-    - Increase concurrency for some configurations
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/266
 - config-keys:
     - gptoss-fp4-b200-vllm
     - gptoss-fp4-h100-vllm

From f1319629d162e56752c30f8eb8a14f3b1d305257 Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Fri, 12 Dec 2025 09:22:07 -0600
Subject: [PATCH 17/29] backfill changelog pt 11

---
 perf-changelog.yaml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index fb3c4a5df..814ed3908 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -5,7 +5,4 @@
   description: |
     - Update vLLM image for NVIDIA configs from vLLM 0.11.0 to vLLM 0.11.2
     - Adds kv-cache-dtype: fp8 to benchmarks/gptoss_fp4_b200_docker.sh
-    PR: https://github.com/InferenceMAX/InferenceMAX/pull/273
-
-
-
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/273
\ No newline at end of file

From dfeba212d3c5a51e62d5bdf1e4b8beb85c39e192 Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Fri, 12 Dec 2025 11:38:23 -0600
Subject: [PATCH 18/29] change if condition for jobs in run sweep workflow

---
 .github/workflows/run-sweep.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
index 83966a296..9e9299607 100644
--- a/.github/workflows/run-sweep.yml
+++ b/.github/workflows/run-sweep.yml
@@ -56,7 +56,7 @@ jobs:
 
     sweep-multi-node-1k1k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] != '[]' }}
+        if: ${{ needs.setup.outputs.search-space-config.multi_node['1k1k']  }}
         uses: ./.github/workflows/benchmark-multinode-tmpl.yml
         name: multi-node 1k1k /
         strategy:
@@ -93,7 +93,7 @@ jobs:
 
     sweep-multi-node-1k8k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.multi_node['1k8k'] != '[]' }}
+        if: ${{ needs.setup.outputs.search-space-config.multi_node['1k8k'] }}
         uses: ./.github/workflows/benchmark-multinode-tmpl.yml
         name: multi-node 1k8k /
         strategy:
@@ -105,7 +105,7 @@ jobs:
 
     sweep-multi-node-8k1k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.multi_node['8k1k'] != '[]' }}
+        if: ${{ needs.setup.outputs.search-space-config.multi_node['8k1k'] }}
         uses: ./.github/workflows/benchmark-multinode-tmpl.yml
         name: multi-node 8k1k /
         strategy:
@@ -117,7 +117,7 @@ jobs:
 
     sweep-single-node-1k1k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.single_node['1k1k'] != '[]' }}
+        if: ${{ needs.setup.outputs.search-space-config.single_node['1k1k'] }}
         uses: ./.github/workflows/benchmark-tmpl.yml
         name: single-node 1k1k /
         strategy:
@@ -145,7 +145,7 @@ jobs:
 
     sweep-single-node-1k8k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.single_node['1k8k'] != '[]' }}
+        if: ${{ needs.setup.outputs.search-space-config.single_node['1k8k'] }}
         uses: ./.github/workflows/benchmark-tmpl.yml
         name: single-node 1k8k /
         strategy:
@@ -157,7 +157,7 @@ jobs:
 
     sweep-single-node-8k1k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.single_node['8k1k'] != '[]' }}
+        if: ${{ needs.setup.outputs.search-space-config.single_node['8k1k'] }}
         uses: ./.github/workflows/benchmark-tmpl.yml
         name: single-node 8k1k /
         strategy:

From fd07f40a2f1eaddf599d36e48475928b7da6fa11 Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Fri, 12 Dec 2025 11:41:31 -0600
Subject: [PATCH 19/29] debugging run sweep workflow

---
 .github/workflows/run-sweep.yml | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
index 9e9299607..7c581f9e3 100644
--- a/.github/workflows/run-sweep.yml
+++ b/.github/workflows/run-sweep.yml
@@ -54,6 +54,17 @@ jobs:
 
                   echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
 
+    test:
+        runs-on: ubuntu-latest
+        needs: setup
+        steps:
+            - name: Test
+              run: |
+                  echo ${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] }}
+                  echo ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }}
+                  echo ${{ needs.setup.outputs.search-space-config.single_node['1k1k'] }}
+                  ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }}
+
     sweep-multi-node-1k1k:
         needs: setup
         if: ${{ needs.setup.outputs.search-space-config.multi_node['1k1k']  }}

From 228e0a209b384d703060ebeb8c8b5138c6363001 Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Fri, 12 Dec 2025 11:42:51 -0600
Subject: [PATCH 20/29] debugging run sweep workflow pt 2

---
 .github/workflows/run-sweep.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
index 7c581f9e3..2cffe0b4c 100644
--- a/.github/workflows/run-sweep.yml
+++ b/.github/workflows/run-sweep.yml
@@ -60,10 +60,10 @@ jobs:
         steps:
             - name: Test
               run: |
-                  echo ${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] }}
-                  echo ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }}
-                  echo ${{ needs.setup.outputs.search-space-config.single_node['1k1k'] }}
-                  ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }}
+                  echo "${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] }}"
+                  echo "${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }}"
+                  echo "${{ needs.setup.outputs.search-space-config.single_node['1k1k'] }}"
+                  echo "${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }}"
 
     sweep-multi-node-1k1k:
         needs: setup

From cb2cc8a461197e0b8b9b84c72df30edd401923bd Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Fri, 12 Dec 2025 11:44:56 -0600
Subject: [PATCH 21/29] debugging run sweep workflow pt 3 (revert)

---
 .github/workflows/run-sweep.yml | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
index 2cffe0b4c..f647d02ba 100644
--- a/.github/workflows/run-sweep.yml
+++ b/.github/workflows/run-sweep.yml
@@ -54,20 +54,9 @@ jobs:
 
                   echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
 
-    test:
-        runs-on: ubuntu-latest
-        needs: setup
-        steps:
-            - name: Test
-              run: |
-                  echo "${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] }}"
-                  echo "${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }}"
-                  echo "${{ needs.setup.outputs.search-space-config.single_node['1k1k'] }}"
-                  echo "${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }}"
-
     sweep-multi-node-1k1k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.multi_node['1k1k']  }}
+        if: ${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] != ''  }}
         uses: ./.github/workflows/benchmark-multinode-tmpl.yml
         name: multi-node 1k1k /
         strategy:
@@ -104,7 +93,7 @@ jobs:
 
     sweep-multi-node-1k8k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.multi_node['1k8k'] }}
+        if: ${{ needs.setup.outputs.search-space-config.multi_node['1k8k'] != '' }}
         uses: ./.github/workflows/benchmark-multinode-tmpl.yml
         name: multi-node 1k8k /
         strategy:
@@ -116,7 +105,7 @@ jobs:
 
     sweep-multi-node-8k1k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.multi_node['8k1k'] }}
+        if: ${{ needs.setup.outputs.search-space-config.multi_node['8k1k'] != '' }}
         uses: ./.github/workflows/benchmark-multinode-tmpl.yml
         name: multi-node 8k1k /
         strategy:
@@ -128,7 +117,7 @@ jobs:
 
     sweep-single-node-1k1k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.single_node['1k1k'] }}
+        if: ${{ needs.setup.outputs.search-space-config.single_node['1k1k'] != '' }}
         uses: ./.github/workflows/benchmark-tmpl.yml
         name: single-node 1k1k /
         strategy:
@@ -156,7 +145,7 @@ jobs:
 
     sweep-single-node-1k8k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.single_node['1k8k'] }}
+        if: ${{ needs.setup.outputs.search-space-config.single_node['1k8k'] != '' }}
         uses: ./.github/workflows/benchmark-tmpl.yml
         name: single-node 1k8k /
         strategy:
@@ -168,7 +157,7 @@ jobs:
 
     sweep-single-node-8k1k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.single_node['8k1k'] }}
+        if: ${{ needs.setup.outputs.search-space-config.single_node['8k1k'] != '' }}
         uses: ./.github/workflows/benchmark-tmpl.yml
         name: single-node 8k1k /
         strategy:

From 055b324c18e2a091d46b74624a9e75d33d4e1a3c Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Fri, 12 Dec 2025 11:49:04 -0600
Subject: [PATCH 22/29] debugging run sweep workflow pt 4

---
 .github/workflows/run-sweep.yml | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
index f647d02ba..4217cb45e 100644
--- a/.github/workflows/run-sweep.yml
+++ b/.github/workflows/run-sweep.yml
@@ -54,6 +54,18 @@ jobs:
 
                   echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
 
+    test:
+        runs-on: ubuntu-latest
+        needs: setup
+        steps:
+            - name: Test
+              run: |
+                  echo "${{ fromJson(needs.setup.outputs.search-space-config) }}"
+                  echo "${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] }}"
+                  echo "${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }}"
+                  echo "${{ needs.setup.outputs.search-space-config.single_node['1k1k'] }}"
+                  echo "${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }}"
+
     sweep-multi-node-1k1k:
         needs: setup
         if: ${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] != ''  }}

From ae65551d86c25c63d9287746f7cd76ff71de6acb Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Fri, 12 Dec 2025 11:50:40 -0600
Subject: [PATCH 23/29] debugging run sweep workflow pt 5

---
 .github/workflows/run-sweep.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
index 4217cb45e..09f72bde5 100644
--- a/.github/workflows/run-sweep.yml
+++ b/.github/workflows/run-sweep.yml
@@ -60,10 +60,12 @@ jobs:
         steps:
             - name: Test
               run: |
-                  echo "${{ fromJson(needs.setup.outputs.search-space-config) }}"
+                  echo "${{ needs.setup.outputs.search-space-config }}"
                   echo "${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] }}"
+                  echo "${{ fromJson(needs.setup.outputs.search-space-config).multi_node }}"
                   echo "${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }}"
                   echo "${{ needs.setup.outputs.search-space-config.single_node['1k1k'] }}"
+                  echo "${{ fromJson(needs.setup.outputs.search-space-config).single_node }}"
                   echo "${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }}"
 
     sweep-multi-node-1k1k:

From 667d2e18815b04d636ee68194ca419f65cbe04e7 Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Fri, 12 Dec 2025 12:04:15 -0600
Subject: [PATCH 24/29] debugging run sweep workflow pt 6

---
 .github/workflows/run-sweep.yml | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
index 09f72bde5..a46bf55fb 100644
--- a/.github/workflows/run-sweep.yml
+++ b/.github/workflows/run-sweep.yml
@@ -55,22 +55,19 @@ jobs:
                   echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
 
     test:
-        runs-on: ubuntu-latest
         needs: setup
+        runs-on: ubuntu-latest
         steps:
             - name: Test
               run: |
-                  echo "${{ needs.setup.outputs.search-space-config }}"
-                  echo "${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] }}"
-                  echo "${{ fromJson(needs.setup.outputs.search-space-config).multi_node }}"
-                  echo "${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }}"
-                  echo "${{ needs.setup.outputs.search-space-config.single_node['1k1k'] }}"
-                  echo "${{ fromJson(needs.setup.outputs.search-space-config).single_node }}"
-                  echo "${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }}"
+                echo "${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k']) }}"
+                echo "${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node) }}"
+                echo "${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['1k1k']) }}"
+                echo "${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node) }}"
 
     sweep-multi-node-1k1k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] != ''  }}
+        if: ${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] != '[]' }}
         uses: ./.github/workflows/benchmark-multinode-tmpl.yml
         name: multi-node 1k1k /
         strategy:
@@ -107,7 +104,7 @@ jobs:
 
     sweep-multi-node-1k8k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.multi_node['1k8k'] != '' }}
+        if: ${{ needs.setup.outputs.search-space-config.multi_node['1k8k'] != '[]' }}
         uses: ./.github/workflows/benchmark-multinode-tmpl.yml
         name: multi-node 1k8k /
         strategy:
@@ -119,7 +116,7 @@ jobs:
 
     sweep-multi-node-8k1k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.multi_node['8k1k'] != '' }}
+        if: ${{ needs.setup.outputs.search-space-config.multi_node['8k1k'] != '[]' }}
         uses: ./.github/workflows/benchmark-multinode-tmpl.yml
         name: multi-node 8k1k /
         strategy:
@@ -131,7 +128,7 @@ jobs:
 
     sweep-single-node-1k1k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.single_node['1k1k'] != '' }}
+        if: ${{ needs.setup.outputs.search-space-config.single_node['1k1k'] != '[]' }}
         uses: ./.github/workflows/benchmark-tmpl.yml
         name: single-node 1k1k /
         strategy:
@@ -159,7 +156,7 @@ jobs:
 
     sweep-single-node-1k8k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.single_node['1k8k'] != '' }}
+        if: ${{ needs.setup.outputs.search-space-config.single_node['1k8k'] != '[]' }}
         uses: ./.github/workflows/benchmark-tmpl.yml
         name: single-node 1k8k /
         strategy:
@@ -171,7 +168,7 @@ jobs:
 
     sweep-single-node-8k1k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.single_node['8k1k'] != '' }}
+        if: ${{ needs.setup.outputs.search-space-config.single_node['8k1k'] != '[]' }}
         uses: ./.github/workflows/benchmark-tmpl.yml
         name: single-node 8k1k /
         strategy:
@@ -198,7 +195,7 @@ jobs:
 
     upload-changelog-metadata:
         needs: [setup, collect-results]
-        if: ${{ always() && needs.setup.result != 'skipped' }}
+        if: ${{ needs.setup.result != 'skipped' }}
         runs-on: ubuntu-latest
         steps:
             - name: Extract and save changelog metadata

From ef3ba6b0bb3f4f399c25f79781f96ecff46f9237 Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Fri, 12 Dec 2025 12:07:57 -0600
Subject: [PATCH 25/29] debugging run sweep workflow pt 7

---
 .github/workflows/run-sweep.yml | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
index a46bf55fb..19a673027 100644
--- a/.github/workflows/run-sweep.yml
+++ b/.github/workflows/run-sweep.yml
@@ -54,20 +54,9 @@ jobs:
 
                   echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
 
-    test:
-        needs: setup
-        runs-on: ubuntu-latest
-        steps:
-            - name: Test
-              run: |
-                echo "${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k']) }}"
-                echo "${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node) }}"
-                echo "${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['1k1k']) }}"
-                echo "${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node) }}"
-
     sweep-multi-node-1k1k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] != '[]' }}
+        if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k']) != 'null' }}
         uses: ./.github/workflows/benchmark-multinode-tmpl.yml
         name: multi-node 1k1k /
         strategy:
@@ -104,7 +93,7 @@ jobs:
 
     sweep-multi-node-1k8k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.multi_node['1k8k'] != '[]' }}
+        if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k8k']) != 'null' }}
         uses: ./.github/workflows/benchmark-multinode-tmpl.yml
         name: multi-node 1k8k /
         strategy:
@@ -116,7 +105,7 @@ jobs:
 
     sweep-multi-node-8k1k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.multi_node['8k1k'] != '[]' }}
+        if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k']) != 'null' }}
         uses: ./.github/workflows/benchmark-multinode-tmpl.yml
         name: multi-node 8k1k /
         strategy:
@@ -128,7 +117,7 @@ jobs:
 
     sweep-single-node-1k1k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.single_node['1k1k'] != '[]' }}
+        if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['1k1k']) != 'null' }}
         uses: ./.github/workflows/benchmark-tmpl.yml
         name: single-node 1k1k /
         strategy:
@@ -156,7 +145,7 @@ jobs:
 
     sweep-single-node-1k8k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.single_node['1k8k'] != '[]' }}
+        if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['1k8k']) != 'null' }}
         uses: ./.github/workflows/benchmark-tmpl.yml
         name: single-node 1k8k /
         strategy:
@@ -168,7 +157,7 @@ jobs:
 
     sweep-single-node-8k1k:
         needs: setup
-        if: ${{ needs.setup.outputs.search-space-config.single_node['8k1k'] != '[]' }}
+        if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['8k1k']) != 'null' }}
         uses: ./.github/workflows/benchmark-tmpl.yml
         name: single-node 8k1k /
         strategy:

From fae8278e42174abc1ba0338f35bf077a4195d50d Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Fri, 12 Dec 2025 14:46:50 -0600
Subject: [PATCH 26/29] add always() condition to upload changelog metadata
 (add back, this got removed)

---
 .github/workflows/run-sweep.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
index 19a673027..17166f079 100644
--- a/.github/workflows/run-sweep.yml
+++ b/.github/workflows/run-sweep.yml
@@ -184,7 +184,7 @@ jobs:
 
     upload-changelog-metadata:
         needs: [setup, collect-results]
-        if: ${{ needs.setup.result != 'skipped' }}
+        if: ${{ always() && needs.setup.result != 'skipped' }}
         runs-on: ubuntu-latest
         steps:
             - name: Extract and save changelog metadata

From 2018ad3b2d872540705e9affc2351cff8d393640 Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Sun, 14 Dec 2025 20:28:37 -0600
Subject: [PATCH 27/29] add bmk prefix to results

---
 .github/workflows/benchmark-multinode-tmpl.yml |  2 +-
 .github/workflows/benchmark-tmpl.yml           |  2 +-
 .github/workflows/collect-results.yml          | 10 +++++-----
 .github/workflows/run-sweep.yml                |  2 ++
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/benchmark-multinode-tmpl.yml b/.github/workflows/benchmark-multinode-tmpl.yml
index 2b828bda8..6c42cbdaa 100644
--- a/.github/workflows/benchmark-multinode-tmpl.yml
+++ b/.github/workflows/benchmark-multinode-tmpl.yml
@@ -170,5 +170,5 @@ jobs:
       - name: Upload results
         uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
         with:
-          name: ${{ env.RESULT_FILENAME }}
+          name: bmk_${{ env.RESULT_FILENAME }}
           path: agg_${{ env.RESULT_FILENAME }}_*.json
diff --git a/.github/workflows/benchmark-tmpl.yml b/.github/workflows/benchmark-tmpl.yml
index 60c19b441..a48081fac 100644
--- a/.github/workflows/benchmark-tmpl.yml
+++ b/.github/workflows/benchmark-tmpl.yml
@@ -169,5 +169,5 @@ jobs:
       - name: Upload result
         uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
         with:
-          name: ${{ env.RESULT_FILENAME }}
+          name: bmk_${{ env.RESULT_FILENAME }}
           path: agg_${{ env.RESULT_FILENAME }}.json
\ No newline at end of file
diff --git a/.github/workflows/collect-results.yml b/.github/workflows/collect-results.yml
index 8105c6d53..d0b0bd992 100644
--- a/.github/workflows/collect-results.yml
+++ b/.github/workflows/collect-results.yml
@@ -3,7 +3,7 @@ name: Template - Collect Results
 on:
   workflow_call:
     inputs:
-      exp-name:
+      result-prefix:
         required: false
         type: string
         default: ''
@@ -26,7 +26,7 @@ jobs:
         uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
         with:
           path: results/
-          pattern: ${{ inputs.exp-name && format('{0}_*', inputs.exp-name) || '*' }}
+          pattern: ${{ inputs.result-prefix && format('{0}_*', inputs.result-prefix) || '*' }}
 
       - name: Print summary
         run: |
@@ -34,10 +34,10 @@ jobs:
           python3 utils/summarize.py results/ >> $GITHUB_STEP_SUMMARY
 
       - name: Aggregate results
-        run: python3 utils/collect_results.py results/ ${{ inputs.exp-name || 'all' }}
+        run: python3 utils/collect_results.py results/ ${{ inputs.result-prefix || 'all' }}
 
       - name: Upload aggregated results
         uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
         with:
-          name: results_${{ inputs.exp-name || 'all' }}
-          path: agg_${{ inputs.exp-name || 'all' }}.json
+          name: results_${{ inputs.result-prefix || 'all' }}
+          path: agg_${{ inputs.result-prefix || 'all' }}.json
diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
index 17166f079..cf01437ff 100644
--- a/.github/workflows/run-sweep.yml
+++ b/.github/workflows/run-sweep.yml
@@ -181,6 +181,8 @@ jobs:
         if: ${{ always() && needs.setup.result != 'skipped' }}
         uses: ./.github/workflows/collect-results.yml
         secrets: inherit
+        with:
+            result-prefix: "bmk"
 
     upload-changelog-metadata:
         needs: [setup, collect-results]

From 5e0c779694e55470b164555c906b14e8ba2afd98 Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Mon, 15 Dec 2025 09:07:18 -0600
Subject: [PATCH 28/29] backfill changelog official

---
 perf-changelog.yaml | 75 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 814ed3908..a74285c53 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -1,3 +1,78 @@
+- config-keys:
+    - 70b-fp8-*-vllm
+  description: |
+    - Add compilation-config: '{"custom_ops": ["-rms_norm", "-quant_fp8", "-silu_and_mul"]}' as
+      extra config to all benchmarks/70b_fp8_mi*.sh scripts
+    - 6-7% uplift for llama for 6/8 configs
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/95
+- config-keys:
+    - gptoss-fp4-*-trt
+  description: |
+    - Upgrade GPT-OSS TRT images from 'release:1.1.0rc2.post2' to '1.2.0rc0.post1'
+    - Add NCCL_GRAPH_REGISTER=0 to benchmarks/gptoss_fp4_b200_trt_slurm.sh
+    - Change kv_cache_config.dtype from 'auto' to 'fp8' in benchmarks/gptoss_fp4_b200_trt_slurm.sh
+    - Remove MOE_BACKEND=CUTLASS, now just defaults to TRTLLM
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/110
+- config-keys:
+    - gptoss*
+    - dsr1*
+  description: |
+    - Remove Llama 70B runs to make room for multi-node disagg prefill+wideEP on 
+      h100/h200/b200/mi300/mi325/mi355
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/149
+- config-keys:
+    - gptoss-fp4-b200-vllm
+    - gptoss-fp4-h100-vllm
+    - gptoss-fp4-h200-vllm
+  description: |
+    - Upgrade vLLM from 0.10.2 to 0.11.0 for GPT-OSS NVIDIA single-node configs
+    - Adds compilation-config: '{"cudagraph_mode":"PIECEWISE"} accordingly since vLLM 0.11.0
+      requires now defaults to FULL_AND_PIECEWISE
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/159
+- config-keys:
+    - dsr1*
+  description: |
+    - Fixes bug where 1k8k and 8k1k full sweeps had incorrect max-model-len for DeepSeek
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/163
+- config-keys:
+    - dsr1-fp4-b200-sglang
+    - dsr1-fp8-b200-sglang
+    - dsr1-fp8-h200-sglang
+  description: |
+    - Consolidates H200 and B200 SGLang configurations to use unified v0.5.5-cu129-amd64 
+      image tag and updates deprecated SGLang server arguments to their current equivalents.
+    - --enable-flashinfer-trtllm-moe & --enable-ep-moe is no longer available in sglang so we needed to change it
+      - ep: 4 for all tp: 4 entries (3 occurrences in dsr1-fp4-b200-sglang)
+      - ep: 8 for all tp: 8 entries (6 occurrences across dsr1-fp4-b200-sglang and dsr1-fp8-b200-sglang)
+    - dsr1_fp4_b200_docker.sh: Replaced --enable-ep-moe with --ep-size $EP_SIZE and --enable-flashinfer-trtllm-moe with 
+      --moe-runner-backend flashinfer_trtllm
+    - dsr1_fp8_b200_docker.sh: Replaced --enable-flashinfer-trtllm-moe with --moe-runner-backend flashinfer_trtllm and 
+      added --ep-size $EP_SIZE
+    - launch_b200-nvd.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container
+    - launch_b200-tg.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/204
+- config-keys:
+    - gptoss-fp4-mi355x-vllm
+    - gptoss-fp4-b200-vllm
+  description: |
+    - Extend concurrency to 128 for gptoss mi355x/b200 vllm configurations
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/209
+- config-keys:
+    - gptoss-fp4-b200-trt
+  description: |
+    - Extend concurrency to 128 for gptoss b200 TRT configurations
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/233
+- config-keys:
+    - "*gb200-sglang"
+  description: |
+    - Introducing some improvements in GB200 SGLang DSR1 submission
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/257
+- config-keys:
+    - dsr1-fp8-h200-trt
+  description: |
+    - Update TRT image from nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc0.post1 to nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc2
+    - Increase concurrency for some configurations
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/266
 - config-keys:
     - gptoss-fp4-b200-vllm
     - gptoss-fp4-h100-vllm

From 8d8ffa1662c3697deb4af2e57f6cf987edc8c9b5 Mon Sep 17 00:00:00 2001
From: Cam Quilici <cjquilici@gmail.com>
Date: Mon, 15 Dec 2025 09:14:21 -0600
Subject: [PATCH 29/29] for concurrency group, use more unique sha

---
 .github/workflows/run-sweep.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
index cf01437ff..cb3c4dde5 100644
--- a/.github/workflows/run-sweep.yml
+++ b/.github/workflows/run-sweep.yml
@@ -2,7 +2,7 @@ name: "Run Sweep"
 run-name: Run Sweep - ${{ github.event.pull_request.title || github.ref_name }}
 
 concurrency:
-    group: sweep-${{ github.event.pull_request.number || github.ref }}
+    group: sweep-${{ github.event.pull_request.number || github.sha }}
     cancel-in-progress: true
 
 on: