From 433f2ef1c028ab659ff76dc6cd61346a11a3602b Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Wed, 10 Dec 2025 15:12:21 -0600 Subject: [PATCH 01/29] add logic for event driven runs new single workflow that runs on merge to main, new perg-changelog.yaml to track performance changes, new logic to parse changelog, removed cron job in full sweep schedulers --- .../workflows/full-sweep-1k1k-scheduler.yml | 2 - .../workflows/full-sweep-1k8k-scheduler.yml | 2 - .../workflows/full-sweep-8k1k-scheduler.yml | 2 - .github/workflows/run-sweep.yml | 233 ++++++++++++++++++ perf-changelog.yaml | 8 + utils/constants.py | 4 + utils/matrix_logic/generate_sweep_configs.py | 190 +++++++++++--- .../test_generate_sweep_configs.py | 86 ------- utils/matrix_logic/test_validation.py | 129 ++++++++++ utils/matrix_logic/validation.py | 116 +++++++++ utils/process_changelog.py | 143 +++++++++++ 11 files changed, 784 insertions(+), 131 deletions(-) create mode 100644 .github/workflows/run-sweep.yml create mode 100644 perf-changelog.yaml create mode 100644 utils/constants.py create mode 100644 utils/process_changelog.py diff --git a/.github/workflows/full-sweep-1k1k-scheduler.yml b/.github/workflows/full-sweep-1k1k-scheduler.yml index 8b32f47c0..3c592cf0a 100644 --- a/.github/workflows/full-sweep-1k1k-scheduler.yml +++ b/.github/workflows/full-sweep-1k1k-scheduler.yml @@ -2,8 +2,6 @@ name: "Full Sweep Scheduler - 1k1k" on: workflow_dispatch: - schedule: - - cron: "0 0 * * *" jobs: get-dsr1-configs: diff --git a/.github/workflows/full-sweep-1k8k-scheduler.yml b/.github/workflows/full-sweep-1k8k-scheduler.yml index 393864fdf..be909aad5 100644 --- a/.github/workflows/full-sweep-1k8k-scheduler.yml +++ b/.github/workflows/full-sweep-1k8k-scheduler.yml @@ -2,8 +2,6 @@ name: "Full Sweep Scheduler - 1k8k" on: workflow_dispatch: - schedule: - - cron: "0 0 * * *" jobs: get-dsr1-configs: diff --git a/.github/workflows/full-sweep-8k1k-scheduler.yml b/.github/workflows/full-sweep-8k1k-scheduler.yml index 629e56bd9..3eabe74f4 100644 --- a/.github/workflows/full-sweep-8k1k-scheduler.yml +++ b/.github/workflows/full-sweep-8k1k-scheduler.yml @@ -2,8 +2,6 @@ name: "Full Sweep Scheduler - 8k1k" on: workflow_dispatch: - schedule: - - cron: "0 0 * * *" jobs: get-dsr1-configs: diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml new file mode 100644 index 000000000..af4cd9605 --- /dev/null +++ b/.github/workflows/run-sweep.yml @@ -0,0 +1,233 @@ +name: "Run Sweep" +run-name: Run Sweep - ${{ github.event.pull_request.title || github.ref_name }} + +concurrency: + group: sweep-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +on: + push: + branches: + - main + paths: + - "perf-changelog.yaml" + pull_request: + branches: + - main + types: + - ready_for_review + - synchronize + - labeled + paths: + - "perf-changelog.yaml" + +jobs: + setup: + runs-on: ubuntu-latest + if: >- + (github.event_name == 'pull_request' && !github.event.pull_request.draft && contains(github.event.pull_request.labels.*.name, 'sweep-enabled')) || + (github.event_name != 'pull_request' && !contains(github.event.head_commit.message, '[skip-sweep]')) + outputs: + search-space-config: ${{ steps.setup.outputs.search-space-config }} + steps: + - name: Checkout code + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + with: + fetch-depth: 0 + + - id: setup + run: | + pip install pydantic + + if [ "${{ github.event_name }}" == "pull_request" ]; then + BASE_REF="origin/${{ github.base_ref }}" + HEAD_REF="${{ github.event.pull_request.head.sha }}" + else + BASE_REF="${{ github.event.before }}" + HEAD_REF="${{ github.event.after }}" + fi + + CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/process_changelog.py \ + --changelog-file ${GITHUB_WORKSPACE}/perf-changelog.yaml \ + --base-ref "$BASE_REF" \ + --head-ref "$HEAD_REF") + + echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT + + sweep-multi-node-1k1k: + needs: setup + if: ${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] != '[]' }} + uses: ./.github/workflows/benchmark-multinode-tmpl.yml + name: multi-node 1k1k / + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }} + secrets: inherit + with: &multi-node-inputs + isl: ${{ matrix.config.isl }} + osl: ${{ matrix.config.osl }} + max-model-len: ${{ matrix.config.max-model-len }} + runner: ${{ matrix.config.runner }} + image: ${{ matrix.config.image }} + model: ${{ matrix.config.model }} + model-prefix: ${{ matrix.config.model-prefix }} + framework: ${{ matrix.config.framework }} + precision: ${{ matrix.config.precision }} + exp-name: ${{ matrix.config.exp-name }} + conc-list: ${{ toJson(matrix.config.conc) }} + spec-decoding: ${{ matrix.config.spec-decoding }} + disagg: ${{ matrix.config.disagg }} + + prefill-num-worker: ${{ matrix.config.prefill.num-worker }} + prefill-tp: ${{ matrix.config.prefill.tp }} + prefill-ep: ${{ matrix.config.prefill.ep }} + prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }} + prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }} + + decode-num-worker: ${{ matrix.config.decode.num-worker }} + decode-tp: ${{ matrix.config.decode.tp }} + decode-ep: ${{ matrix.config.decode.ep }} + decode-dp-attn: ${{ matrix.config.decode.dp-attn }} + decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }} + + sweep-multi-node-1k8k: + needs: setup + if: ${{ needs.setup.outputs.search-space-config.multi_node['1k8k'] != '[]' }} + uses: ./.github/workflows/benchmark-multinode-tmpl.yml + name: multi-node 1k8k / + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k8k'] }} + secrets: inherit + with: *multi-node-inputs + + sweep-multi-node-8k1k: + needs: setup + if: ${{ needs.setup.outputs.search-space-config.multi_node['8k1k'] != '[]' }} + uses: ./.github/workflows/benchmark-multinode-tmpl.yml + name: multi-node 8k1k / + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k'] }} + secrets: inherit + with: *multi-node-inputs + + sweep-single-node-1k1k: + needs: setup + if: ${{ needs.setup.outputs.search-space-config.single_node['1k1k'] != '[]' }} + uses: ./.github/workflows/benchmark-tmpl.yml + name: single-node 1k1k / + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }} + secrets: inherit + with: &single-node-inputs + exp-name: ${{ matrix.config.exp-name }} + isl: ${{ matrix.config.isl }} + osl: ${{ matrix.config.osl }} + max-model-len: ${{ matrix.config.max-model-len }} + runner: ${{ matrix.config.runner }} + image: ${{ matrix.config.image }} + model: ${{ matrix.config.model }} + model-prefix: ${{ matrix.config.model-prefix }} + framework: ${{ matrix.config.framework }} + precision: ${{ matrix.config.precision }} + tp: ${{ matrix.config.tp }} + ep: ${{ matrix.config.ep }} + dp-attn: ${{ matrix.config.dp-attn }} + conc: ${{ matrix.config.conc }} + spec-decoding: ${{ matrix.config.spec-decoding }} + disagg: ${{ matrix.config.disagg }} + + sweep-single-node-1k8k: + needs: setup + if: ${{ needs.setup.outputs.search-space-config.single_node['1k8k'] != '[]' }} + uses: ./.github/workflows/benchmark-tmpl.yml + name: single-node 1k8k / + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k8k'] }} + secrets: inherit + with: *single-node-inputs + + sweep-single-node-8k1k: + needs: setup + if: ${{ needs.setup.outputs.search-space-config.single_node['8k1k'] != '[]' }} + uses: ./.github/workflows/benchmark-tmpl.yml + name: single-node 8k1k / + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['8k1k'] }} + secrets: inherit + with: *single-node-inputs + + collect-results: + needs: + [ + sweep-single-node-1k1k, + sweep-single-node-1k8k, + sweep-single-node-8k1k, + sweep-multi-node-1k1k, + sweep-multi-node-1k8k, + sweep-multi-node-8k1k, + setup, + ] + if: ${{ always() && needs.setup.result != 'skipped' }} + uses: ./.github/workflows/collect-results.yml + secrets: inherit + + upload-changelog-metadata: + needs: [setup, collect-results] + if: ${{ needs.setup.result != 'skipped' }} + runs-on: ubuntu-latest + steps: + - name: Extract and save changelog metadata + env: + CONFIG_JSON: ${{ needs.setup.outputs.search-space-config }} + run: | + echo "$CONFIG_JSON" | jq '.changelog_metadata' > changelog_metadata.json + + - name: Upload changelog artifact + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: changelog-metadata + path: changelog_metadata.json + + calc-success-rate: + needs: collect-results + if: ${{ always() && needs.collect-results.result != 'skipped'}} + runs-on: ubuntu-latest + + env: + RESULTS_DIR: "results/" + STATS_FILENAME: "run_stats" + GITHUB_TOKEN: ${{ secrets.REPO_PAT }} + + steps: + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + with: + token: ${{ secrets.REPO_PAT }} + fetch-depth: 0 + + - name: Download results artifacts + uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + with: + path: ${{ env.RESULTS_DIR }} + pattern: results_* + + - name: Install python dependencies + run: pip install PyGithub + + - name: Calculate success rate + run: python3 utils/calc_success_rate.py $STATS_FILENAME + + - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: "run-stats" + path: ${{ env.STATS_FILENAME }}.json diff --git a/perf-changelog.yaml b/perf-changelog.yaml new file mode 100644 index 000000000..76d66e889 --- /dev/null +++ b/perf-changelog.yaml @@ -0,0 +1,8 @@ +- config-keys: + - gptoss-fp4-mi300x-vllm + description: | + Updating vllm version for mi300x +- config-keys: + - gptoss-fp4-mi300x-vllm + description: | + Updating vllm version for mi325x diff --git a/utils/constants.py b/utils/constants.py new file mode 100644 index 000000000..a465091da --- /dev/null +++ b/utils/constants.py @@ -0,0 +1,4 @@ +MASTER_CONFIGS = [".github/configs/amd-master.yaml", + ".github/configs/nvidia-master.yaml"] +RUNNER_CONFIG = ".github/configs/runners.yaml" +GENERATE_SWEEPS_PY_SCRIPT = "utils/matrix_logic/generate_sweep_configs.py" \ No newline at end of file diff --git a/utils/matrix_logic/generate_sweep_configs.py b/utils/matrix_logic/generate_sweep_configs.py index 8fc47651c..d8fab38cf 100644 --- a/utils/matrix_logic/generate_sweep_configs.py +++ b/utils/matrix_logic/generate_sweep_configs.py @@ -1,8 +1,17 @@ import json -import yaml import argparse +import sys +from pathlib import Path -from validation import validate_master_config, validate_matrix_entry, validate_runner_config, Fields +# Ensure sibling modules are importable regardless of how script is invoked +sys.path.insert(0, str(Path(__file__).resolve().parent)) + +from validation import ( + validate_matrix_entry, + load_config_files, + load_runner_file, + Fields +) seq_len_stoi = { "1k1k": (1024, 1024), @@ -366,42 +375,126 @@ def get_lowest_conc(search_space_entry): return matrix_values -def load_config_files(config_files): - """Load and merge configuration files.""" - all_config_data = {} - for config_file in config_files: - try: - with open(config_file, 'r') as f: - config_data = yaml.safe_load(f) - assert isinstance( - config_data, dict), f"Config file '{config_file}' must contain a dictionary" - - # Check for duplicate keys, this is only in place to prevent against the very unlikely - # case where an entry in one config accidentally/purposefully tries to override an entry in another config - duplicate_keys = set(all_config_data.keys()) & set( - config_data.keys()) - if duplicate_keys: - raise ValueError( - f"Duplicate configuration keys found in '{config_file}': {', '.join(sorted(duplicate_keys))}" - ) - - all_config_data.update(config_data) - except FileNotFoundError: - raise ValueError(f"Input file '{config_file}' does not exist.") - - return all_config_data - - -def load_runner_file(runner_file): - """Load runner configuration file.""" - try: - with open(runner_file, 'r') as f: - runner_config = yaml.safe_load(f) - except FileNotFoundError as e: +def generate_test_config_sweep(args, all_config_data): + """Generate full sweep for specific config keys. + + Validates that all specified config keys exist before generating. + Expands all configs fully without any filtering. + """ + # Validate all config keys exist + missing_keys = [key for key in args.config_keys if key not in all_config_data] + if missing_keys: + available_keys = sorted(all_config_data.keys()) raise ValueError( - f"Runner config file '{runner_file}' does not exist.") + f"Config key(s) not found: {', '.join(missing_keys)}.\n" + f"Available keys: {', '.join(available_keys)}" + ) + + matrix_values = [] + + for key in args.config_keys: + val = all_config_data[key] + is_multinode = val.get(Fields.MULTINODE.value, False) + + image = val[Fields.IMAGE.value] + model = val[Fields.MODEL.value] + model_code = val[Fields.MODEL_PREFIX.value] + precision = val[Fields.PRECISION.value] + framework = val[Fields.FRAMEWORK.value] + runner = val[Fields.RUNNER.value] + disagg = val.get(Fields.DISAGG.value, False) - return runner_config + for seq_len_config in val[Fields.SEQ_LEN_CONFIGS.value]: + isl = seq_len_config[Fields.ISL.value] + osl = seq_len_config[Fields.OSL.value] + seq_len_str = seq_len_to_str(isl, osl) + + for bmk in seq_len_config[Fields.SEARCH_SPACE.value]: + if is_multinode: + # Multinode config + spec_decoding = bmk.get(Fields.SPEC_DECODING.value, "none") + prefill = bmk[Fields.PREFILL.value] + decode = bmk[Fields.DECODE.value] + + # Get concurrency values + if Fields.CONC_LIST.value in bmk: + conc_values = bmk[Fields.CONC_LIST.value] + else: + conc_start = bmk[Fields.CONC_START.value] + conc_end = bmk[Fields.CONC_END.value] + conc_values = [] + conc = conc_start + while conc <= conc_end: + conc_values.append(conc) + if conc == conc_end: + break + conc *= 2 + if conc > conc_end: + conc = conc_end + + entry = { + Fields.IMAGE.value: image, + Fields.MODEL.value: model, + Fields.MODEL_PREFIX.value: model_code, + Fields.PRECISION.value: precision, + Fields.FRAMEWORK.value: framework, + Fields.RUNNER.value: runner, + Fields.ISL.value: isl, + Fields.OSL.value: osl, + Fields.SPEC_DECODING.value: spec_decoding, + Fields.PREFILL.value: prefill, + Fields.DECODE.value: decode, + Fields.CONC.value: conc_values, + Fields.MAX_MODEL_LEN.value: isl + osl + 200, + Fields.EXP_NAME.value: f"{model_code}_{seq_len_str}", + Fields.DISAGG.value: disagg, + } + matrix_values.append(validate_matrix_entry(entry, is_multinode=True)) + else: + # Single-node config + tp = bmk[Fields.TP.value] + ep = bmk.get(Fields.EP.value) + dp_attn = bmk.get(Fields.DP_ATTN.value) + spec_decoding = bmk.get(Fields.SPEC_DECODING.value, "none") + + # Get concurrency values + if Fields.CONC_LIST.value in bmk: + conc_values = bmk[Fields.CONC_LIST.value] + else: + conc_start = bmk[Fields.CONC_START.value] + conc_end = bmk[Fields.CONC_END.value] + conc_values = [] + conc = conc_start + while conc <= conc_end: + conc_values.append(conc) + if conc == conc_end: + break + conc *= 2 + if conc > conc_end: + conc = conc_end + + for conc in conc_values: + entry = { + Fields.IMAGE.value: image, + Fields.MODEL.value: model, + Fields.MODEL_PREFIX.value: model_code, + Fields.PRECISION.value: precision, + Fields.FRAMEWORK.value: framework, + Fields.RUNNER.value: runner, + Fields.ISL.value: isl, + Fields.OSL.value: osl, + Fields.TP.value: tp, + Fields.CONC.value: conc, + Fields.MAX_MODEL_LEN.value: isl + osl + 200, + Fields.EP.value: ep if ep is not None else 1, + Fields.DP_ATTN.value: dp_attn if dp_attn is not None else False, + Fields.SPEC_DECODING.value: spec_decoding, + Fields.EXP_NAME.value: f"{model_code}_{seq_len_str}", + Fields.DISAGG.value: disagg, + } + matrix_values.append(validate_matrix_entry(entry, is_multinode=False)) + + return matrix_values def main(): @@ -545,13 +638,30 @@ def main(): help='Show this help message and exit' ) + # Subcommand: test-config + test_config_keys_parser = subparsers.add_parser( + 'test-config', + parents=[parent_parser], + add_help=False, + help='Generate full sweep for specific config keys. Validates that all specified keys exist before generating.' + ) + test_config_keys_parser.add_argument( + '--config-keys', + nargs='+', + required=True, + help='One or more config keys to generate sweep for (e.g., dsr1-fp4-b200-sglang dsr1-fp8-h200-trt)' + ) + test_config_keys_parser.add_argument( + '-h', '--help', + action='help', + help='Show this help message and exit' + ) + args = parser.parse_args() - # Load and validate configuration files + # Load and validate configuration files (validation happens by default in load functions) all_config_data = load_config_files(args.config_files) runner_data = load_runner_file(args.runner_config) - validate_master_config(all_config_data) - validate_runner_config(runner_data) # Route to appropriate function based on subcommand if args.command == 'full-sweep': @@ -559,6 +669,8 @@ def main(): elif args.command == 'runner-model-sweep': matrix_values = generate_runner_model_sweep_config( args, all_config_data, runner_data) + elif args.command == 'test-config': + matrix_values = generate_test_config_sweep(args, all_config_data) else: parser.error(f"Unknown command: {args.command}") diff --git a/utils/matrix_logic/test_generate_sweep_configs.py b/utils/matrix_logic/test_generate_sweep_configs.py index 1381f394e..c505611c3 100644 --- a/utils/matrix_logic/test_generate_sweep_configs.py +++ b/utils/matrix_logic/test_generate_sweep_configs.py @@ -7,8 +7,6 @@ seq_len_to_str, generate_full_sweep, generate_runner_model_sweep_config, - load_config_files, - load_runner_file, ) @@ -583,90 +581,6 @@ def test_uses_lowest_conc(self, sample_single_node_config, sample_runner_config, assert all(entry["conc"] == 4 for entry in result) -# ============================================================================= -# Test load_config_files -# ============================================================================= - -class TestLoadConfigFiles: - """Tests for load_config_files function.""" - - def test_load_single_file(self, tmp_path): - """Should load a single config file.""" - config_file = tmp_path / "config.yaml" - config_file.write_text(""" -test-config: - image: test-image - model: test-model -""") - result = load_config_files([str(config_file)]) - assert "test-config" in result - assert result["test-config"]["image"] == "test-image" - - def test_load_multiple_files(self, tmp_path): - """Should merge multiple config files.""" - config1 = tmp_path / "config1.yaml" - config1.write_text(""" -config-one: - value: 1 -""") - config2 = tmp_path / "config2.yaml" - config2.write_text(""" -config-two: - value: 2 -""") - result = load_config_files([str(config1), str(config2)]) - assert "config-one" in result - assert "config-two" in result - - def test_duplicate_keys_raise_error(self, tmp_path): - """Duplicate keys across files should raise error.""" - config1 = tmp_path / "config1.yaml" - config1.write_text(""" -duplicate-key: - value: 1 -""") - config2 = tmp_path / "config2.yaml" - config2.write_text(""" -duplicate-key: - value: 2 -""") - with pytest.raises(ValueError) as exc_info: - load_config_files([str(config1), str(config2)]) - assert "Duplicate configuration keys" in str(exc_info.value) - - def test_nonexistent_file_raises_error(self): - """Nonexistent file should raise error.""" - with pytest.raises(ValueError) as exc_info: - load_config_files(["nonexistent.yaml"]) - assert "does not exist" in str(exc_info.value) - - -# ============================================================================= -# Test load_runner_file -# ============================================================================= - -class TestLoadRunnerFile: - """Tests for load_runner_file function.""" - - def test_load_runner_file(self, tmp_path): - """Should load runner config file.""" - runner_file = tmp_path / "runners.yaml" - runner_file.write_text(""" -h100: -- h100-node-0 -- h100-node-1 -""") - result = load_runner_file(str(runner_file)) - assert "h100" in result - assert len(result["h100"]) == 2 - - def test_nonexistent_runner_file(self): - """Nonexistent runner file should raise error.""" - with pytest.raises(ValueError) as exc_info: - load_runner_file("nonexistent.yaml") - assert "does not exist" in str(exc_info.value) - - # ============================================================================= # Test edge cases and special configurations # ============================================================================= diff --git a/utils/matrix_logic/test_validation.py b/utils/matrix_logic/test_validation.py index 008ed2b42..d9cc7f0d9 100644 --- a/utils/matrix_logic/test_validation.py +++ b/utils/matrix_logic/test_validation.py @@ -14,6 +14,8 @@ validate_matrix_entry, validate_master_config, validate_runner_config, + load_config_files, + load_runner_file, ) @@ -738,3 +740,130 @@ def test_multiple_runner_types(self, valid_runner_config): assert "h200" in result assert "mi300x" in result assert "gb200" in result + + +# ============================================================================= +# Test load_config_files +# ============================================================================= + +class TestLoadConfigFiles: + """Tests for load_config_files function.""" + + def test_load_single_file_with_validation(self, tmp_path, valid_single_node_master_config): + """Should load and validate a single config file.""" + config_file = tmp_path / "config.yaml" + import yaml + config_file.write_text(yaml.dump({"test-config": valid_single_node_master_config})) + result = load_config_files([str(config_file)]) + assert "test-config" in result + assert result["test-config"]["image"] == valid_single_node_master_config["image"] + + def test_load_single_file_without_validation(self, tmp_path): + """Should load a single config file without validation when validate=False.""" + config_file = tmp_path / "config.yaml" + config_file.write_text(""" +test-config: + image: test-image + model: test-model +""") + result = load_config_files([str(config_file)], validate=False) + assert "test-config" in result + assert result["test-config"]["image"] == "test-image" + + def test_load_multiple_files(self, tmp_path): + """Should merge multiple config files.""" + config1 = tmp_path / "config1.yaml" + config1.write_text(""" +config-one: + value: 1 +""") + config2 = tmp_path / "config2.yaml" + config2.write_text(""" +config-two: + value: 2 +""") + result = load_config_files([str(config1), str(config2)], validate=False) + assert "config-one" in result + assert "config-two" in result + + def test_duplicate_keys_raise_error(self, tmp_path): + """Duplicate keys across files should raise error.""" + config1 = tmp_path / "config1.yaml" + config1.write_text(""" +duplicate-key: + value: 1 +""") + config2 = tmp_path / "config2.yaml" + config2.write_text(""" +duplicate-key: + value: 2 +""") + with pytest.raises(ValueError) as exc_info: + load_config_files([str(config1), str(config2)], validate=False) + assert "Duplicate configuration keys" in str(exc_info.value) + + def test_nonexistent_file_raises_error(self): + """Nonexistent file should raise error.""" + with pytest.raises(ValueError) as exc_info: + load_config_files(["nonexistent.yaml"]) + assert "does not exist" in str(exc_info.value) + + def test_validation_runs_by_default(self, tmp_path): + """Validation should run by default and catch invalid configs.""" + config_file = tmp_path / "config.yaml" + config_file.write_text(""" +invalid-config: + image: test-image + # Missing required fields like model, model-prefix, precision, etc. +""") + with pytest.raises(ValueError) as exc_info: + load_config_files([str(config_file)]) + assert "failed validation" in str(exc_info.value) + + +# ============================================================================= +# Test load_runner_file +# ============================================================================= + +class TestLoadRunnerFile: + """Tests for load_runner_file function.""" + + def test_load_runner_file_with_validation(self, tmp_path): + """Should load and validate runner config file.""" + runner_file = tmp_path / "runners.yaml" + runner_file.write_text(""" +h100: +- h100-node-0 +- h100-node-1 +""") + result = load_runner_file(str(runner_file)) + assert "h100" in result + assert len(result["h100"]) == 2 + + def test_load_runner_file_without_validation(self, tmp_path): + """Should load runner config file without validation when validate=False.""" + runner_file = tmp_path / "runners.yaml" + runner_file.write_text(""" +h100: +- h100-node-0 +- h100-node-1 +""") + result = load_runner_file(str(runner_file), validate=False) + assert "h100" in result + assert len(result["h100"]) == 2 + + def test_nonexistent_runner_file(self): + """Nonexistent runner file should raise error.""" + with pytest.raises(ValueError) as exc_info: + load_runner_file("nonexistent.yaml") + assert "does not exist" in str(exc_info.value) + + def test_validation_runs_by_default(self, tmp_path): + """Validation should run by default and catch invalid configs.""" + runner_file = tmp_path / "runners.yaml" + runner_file.write_text(""" +h100: not-a-list +""") + with pytest.raises(ValueError) as exc_info: + load_runner_file(str(runner_file)) + assert "must be a list" in str(exc_info.value) diff --git a/utils/matrix_logic/validation.py b/utils/matrix_logic/validation.py index 30012423a..323e9e326 100644 --- a/utils/matrix_logic/validation.py +++ b/utils/matrix_logic/validation.py @@ -3,6 +3,7 @@ from enum import Enum import pprint +import yaml """ The below class defines the field names expected to be present in the JSON entries @@ -315,3 +316,118 @@ def validate_runner_config(runner_configs: dict) -> List[dict]: f"Runner config entry '{key}' cannot be an empty list") return runner_configs + + +""" + Below is the validation logic for the changelog entries found in perf-changelog.yaml. + This ensures that the changelog entries conform to the expected structure before + proceeding with processing. +""" + + +class ChangelogEntry(BaseModel): + """Pydantic model for validating changelog entry structure.""" + model_config = ConfigDict(extra="forbid", populate_by_name=True) + + config_keys: list[str] = Field(alias="config-keys", min_length=1) + description: str + + +class ChangelogMetadata(BaseModel): + """Pydantic model for validating changelog metadata structure.""" + model_config = ConfigDict(extra="forbid") + + base_ref: str + head_ref: str + entries: list[ChangelogEntry] + + +class ChangelogMatrixEntry(BaseModel): + """Pydantic model for validating final changelog matrix entry structure. + This imposes a strict contract on the output of process_changelog.py, dictated by + the expected input to the run-sweep.yml workflow file. + """ + model_config = ConfigDict(extra="forbid", populate_by_name=True) + + single_node: dict[str, list[SingleNodeMatrixEntry] + ] = Field(default_factory=dict) + multi_node: dict[str, list[MultiNodeMatrixEntry] + ] = Field(default_factory=dict) + changelog_metadata: ChangelogMetadata + + +# ============================================================================= +# File Loading Functions +# ============================================================================= + + +def load_config_files(config_files: List[str], validate: bool = True) -> dict: + """Load and merge configuration files. + + Args: + config_files: List of paths to YAML configuration files. + validate: If True, run validate_master_config on loaded data. Defaults to True. + + Returns: + Merged configuration dictionary. + + Raises: + ValueError: If file doesn't exist, isn't a dict, or has duplicate keys. + """ + all_config_data = {} + for config_file in config_files: + try: + with open(config_file, 'r') as f: + config_data = yaml.safe_load(f) + assert isinstance( + config_data, dict), f"Config file '{config_file}' must contain a dictionary" + + # Don't allow '*' wildcard in master config keys as we need to reserve these + # for expansion in process_changelog.py + for key in all_config_data.keys(): + if "*" in key: + raise ValueError( + f" Wildcard '*' is not allowed in master config keys: '{key}'") + + # Check for duplicate keys + duplicate_keys = set(all_config_data.keys()) & set( + config_data.keys()) + if duplicate_keys: + raise ValueError( + f"Duplicate configuration keys found in '{config_file}': {', '.join(sorted(duplicate_keys))}" + ) + + all_config_data.update(config_data) + except FileNotFoundError: + raise ValueError(f"Input file '{config_file}' does not exist.") + + if validate: + validate_master_config(all_config_data) + + return all_config_data + + +def load_runner_file(runner_file: str, validate: bool = True) -> dict: + """Load runner configuration file. + + Args: + runner_file: Path to the runner YAML configuration file. + validate: If True, run validate_runner_config on loaded data. Defaults to True. + + Returns: + Runner configuration dictionary. + + Raises: + ValueError: If file doesn't exist or fails validation. + """ + try: + with open(runner_file, 'r') as f: + runner_config = yaml.safe_load(f) + except FileNotFoundError: + raise ValueError( + f"Runner config file '{runner_file}' does not exist.") + + if validate: + validate_runner_config(runner_config) + + return runner_config diff --git a/utils/process_changelog.py b/utils/process_changelog.py new file mode 100644 index 000000000..80ad04b11 --- /dev/null +++ b/utils/process_changelog.py @@ -0,0 +1,143 @@ +import argparse +import json +import re +import subprocess +from collections import defaultdict + +import yaml +from constants import GENERATE_SWEEPS_PY_SCRIPT, MASTER_CONFIGS, RUNNER_CONFIG +from matrix_logic.generate_sweep_configs import seq_len_to_str +from matrix_logic.validation import ChangelogEntry, ChangelogMatrixEntry, load_config_files +from pydantic import BaseModel, ConfigDict, Field + + +def get_added_lines(base_ref: str, head_ref: str, filepath: str) -> str: + result = subprocess.run( + ["git", "diff", base_ref, head_ref, "--", filepath], + capture_output=True, + text=True, + ) + + added_lines = [] + for line in result.stdout.split("\n"): + if line.startswith("-") and not line.startswith("---"): + # Don't allow deletions in the changelog + # By convention, it should act as a running log of performance changes, + # so we only want to see additions + raise ValueError( + f"Deletions are not allowed in {filepath}. " + f"Only additions to the changelog are permitted. " + f"Found deleted line: {line[1:]}" + ) + elif line.startswith("+") and not line.startswith("+++"): + added_lines.append(line[1:]) + + return "\n".join(added_lines) + + +def get_config_keys_from_master( + config_keys: list[str], master_config: dict +) -> list[str]: + resolved_keys = set() + for key in config_keys: + if "*" in key: + pattern = re.compile(re.escape(key).replace(r"\*", ".*")) + matched_keys = [k for k in master_config if pattern.fullmatch(k)] + if not matched_keys: + raise ValueError( + f"No config keys matched the wildcard pattern '{key}' in master configs." + ) + resolved_keys.update(matched_keys) + elif key not in master_config: + raise ValueError( + f"Config key '{key}' not found in master configs.") + else: + resolved_keys.add(key) + return list(resolved_keys) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--base-ref", type=str, required=True) + parser.add_argument("--head-ref", type=str, required=True) + parser.add_argument("--changelog-file", type=str, required=True) + args = parser.parse_args() + + added_yaml = get_added_lines( + args.base_ref, args.head_ref, args.changelog_file) + + if not added_yaml.strip(): + print("No new changelog entries found") + return + + changelog_data = yaml.safe_load(added_yaml) + + if not changelog_data: + print("No new changelog entries found") + return + + final_results = { + "single_node": defaultdict(list), + "multi_node": defaultdict(list), + "changelog_metadata": { + "base_ref": args.base_ref, + "head_ref": args.head_ref, + "entries": changelog_data, + }, + } + + all_results = [] + # Deduplicate repeated configs, if for some reason a config key appears multiple times + # in one commit, we don't want to run that config two times (there will just be twice as many + # data points for that config, which is not useful) + all_configs_to_run = set() + + for entry_data in changelog_data: + entry = ChangelogEntry.model_validate(entry_data) + configs_to_run = get_config_keys_from_master( + entry.config_keys, load_config_files(MASTER_CONFIGS) + ) + + # Skip configs already processed + configs_to_run = [c for c in configs_to_run if c not in all_configs_to_run] + if not configs_to_run: + continue + all_configs_to_run.update(configs_to_run) + + try: + result = subprocess.run( + [ + "python3", + GENERATE_SWEEPS_PY_SCRIPT, + "test-config", + "--config-keys", + *configs_to_run, + "--config-files", + *MASTER_CONFIGS, + "--runner-config", + RUNNER_CONFIG, + ], + capture_output=True, + text=True, + check=True, + ) + except subprocess.CalledProcessError as e: + print(e.stderr) + continue + + all_results.extend(json.loads(result.stdout)) + + for result in all_results: + seq_len_str = seq_len_to_str(result["isl"], result["osl"]) + if "prefill" in result and result["prefill"] is not None: + final_results["multi_node"][seq_len_str].append(result) + else: + final_results["single_node"][seq_len_str].append(result) + + # Validate final results structure + validated = ChangelogMatrixEntry.model_validate(final_results) + print(validated.model_dump_json(by_alias=True)) + + +if __name__ == "__main__": + main() From dd4682baefcd7bc0916908b4103ab4afd8f9687c Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Wed, 10 Dec 2025 16:08:18 -0600 Subject: [PATCH 02/29] testing pt 1 --- perf-changelog.yaml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 76d66e889..6dab8fa83 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1,8 +1,9 @@ - config-keys: - - gptoss-fp4-mi300x-vllm + - gptoss-fp4-*-trt description: | - Updating vllm version for mi300x -- config-keys: - - gptoss-fp4-mi300x-vllm - description: | - Updating vllm version for mi325x + - Upgrade GPT-OSS TRT images from 'release:1.1.0rc2.post2' to '1.2.0rc0.post1' + - Add NCCL_GRAPH_REGISTER=0 to benchmarks/gptoss_fp4_b200_trt_slurm.sh + - Change kv_cache_config.dtype from 'auto' to 'fp8' in benchmarks/gptoss_fp4_b200_trt_slurm.sh + - Remove MOE_BACKEND=CUTLASS, now just defaults to TRTLLM + PR: https://github.com/InferenceMAX/InferenceMAX/pull/110 + From 7d6e0528e833f042ba8b7f1245305e0ba8c7887c Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Thu, 11 Dec 2025 09:13:33 -0600 Subject: [PATCH 03/29] raise error if yaml diff in perf changelog is not valid --- utils/process_changelog.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/utils/process_changelog.py b/utils/process_changelog.py index 80ad04b11..d8d3942d2 100644 --- a/utils/process_changelog.py +++ b/utils/process_changelog.py @@ -67,14 +67,12 @@ def main(): args.base_ref, args.head_ref, args.changelog_file) if not added_yaml.strip(): - print("No new changelog entries found") - return + raise ValueError("No additions found in the changelog file.") changelog_data = yaml.safe_load(added_yaml) if not changelog_data: - print("No new changelog entries found") - return + raise ValueError("No valid YAML entries found in the changelog additions.") final_results = { "single_node": defaultdict(list), From ce49098d50ddbe1770d5e235b78de8919d5eb140 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Thu, 11 Dec 2025 09:14:45 -0600 Subject: [PATCH 04/29] remove unused imports in process_changelog.py --- utils/process_changelog.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/utils/process_changelog.py b/utils/process_changelog.py index d8d3942d2..677754911 100644 --- a/utils/process_changelog.py +++ b/utils/process_changelog.py @@ -7,8 +7,11 @@ import yaml from constants import GENERATE_SWEEPS_PY_SCRIPT, MASTER_CONFIGS, RUNNER_CONFIG from matrix_logic.generate_sweep_configs import seq_len_to_str -from matrix_logic.validation import ChangelogEntry, ChangelogMatrixEntry, load_config_files -from pydantic import BaseModel, ConfigDict, Field +from matrix_logic.validation import ( + ChangelogEntry, + ChangelogMatrixEntry, + load_config_files, +) def get_added_lines(base_ref: str, head_ref: str, filepath: str) -> str: @@ -49,8 +52,7 @@ def get_config_keys_from_master( ) resolved_keys.update(matched_keys) elif key not in master_config: - raise ValueError( - f"Config key '{key}' not found in master configs.") + raise ValueError(f"Config key '{key}' not found in master configs.") else: resolved_keys.add(key) return list(resolved_keys) @@ -63,8 +65,7 @@ def main(): parser.add_argument("--changelog-file", type=str, required=True) args = parser.parse_args() - added_yaml = get_added_lines( - args.base_ref, args.head_ref, args.changelog_file) + added_yaml = get_added_lines(args.base_ref, args.head_ref, args.changelog_file) if not added_yaml.strip(): raise ValueError("No additions found in the changelog file.") @@ -83,7 +84,7 @@ def main(): "entries": changelog_data, }, } - + all_results = [] # Deduplicate repeated configs, if for some reason a config key appears multiple times # in one commit, we don't want to run that config two times (there will just be twice as many @@ -132,7 +133,7 @@ def main(): else: final_results["single_node"][seq_len_str].append(result) - # Validate final results structure + # Validate final results structure validated = ChangelogMatrixEntry.model_validate(final_results) print(validated.model_dump_json(by_alias=True)) From e6f6fe9de3d78cdd2a2cc606bbadcdb81834ff9c Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Thu, 11 Dec 2025 09:34:08 -0600 Subject: [PATCH 05/29] config data key fix --- utils/matrix_logic/validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/matrix_logic/validation.py b/utils/matrix_logic/validation.py index 323e9e326..955e4c5b5 100644 --- a/utils/matrix_logic/validation.py +++ b/utils/matrix_logic/validation.py @@ -384,7 +384,7 @@ def load_config_files(config_files: List[str], validate: bool = True) -> dict: # Don't allow '*' wildcard in master config keys as we need to reserve these # for expansion in process_changelog.py - for key in all_config_data.keys(): + for key in config_data.keys(): if "*" in key: raise ValueError( f" Wildcard '*' is not allowed in master config keys: '{key}'") From b87eeddecd25eca394b9cff2cb23c05af98dbaed Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Thu, 11 Dec 2025 09:57:36 -0600 Subject: [PATCH 06/29] raise error if test-config subprocess fails to run --- utils/process_changelog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/process_changelog.py b/utils/process_changelog.py index 677754911..4a856c9a8 100644 --- a/utils/process_changelog.py +++ b/utils/process_changelog.py @@ -122,7 +122,7 @@ def main(): ) except subprocess.CalledProcessError as e: print(e.stderr) - continue + raise all_results.extend(json.loads(result.stdout)) From ba0b115adf3e65da7fd52683fd9394b7567e3b5a Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Thu, 11 Dec 2025 12:30:13 -0600 Subject: [PATCH 07/29] backfill changelog --- perf-changelog.yaml | 70 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 6dab8fa83..169162b08 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -6,4 +6,74 @@ - Change kv_cache_config.dtype from 'auto' to 'fp8' in benchmarks/gptoss_fp4_b200_trt_slurm.sh - Remove MOE_BACKEND=CUTLASS, now just defaults to TRTLLM PR: https://github.com/InferenceMAX/InferenceMAX/pull/110 +- config-keys: + - gptoss* + - dsr1* + description: | + - Remove Llama 70B runs to make room for multi-node disagg prefill+wideEP on + h100/h200/b200/mi300/mi325/mi355 + PR: https://github.com/InferenceMAX/InferenceMAX/pull/149 +- config-keys: + - gptoss-fp4-b200-vllm + - gptoss-fp4-h100-vllm + - gptoss-fp4-h200-vllm + description: | + - Upgrade vLLM from 0.10.2 to 0.11.0 for GPT-OSS NVIDIA single-node configs + - Adds compilation-config: '{"cudagraph_mode":"PIECEWISE"} accordingly since vLLM 0.11.0 + requires now defaults to FULL_AND_PIECEWISE + PR: https://github.com/InferenceMAX/InferenceMAX/pull/159 +- config-keys: + - dsr1* + description: | + - Fixes bug where 1k8k and 8k1k full sweeps had incorrect max-model-len for DeepSeek + PR: https://github.com/InferenceMAX/InferenceMAX/pull/163 +- config-keys: + - dsr1-fp4-b200-sglang + - dsr1-fp8-b200-sglang + - dsr1-fp8-h200-sglang + description: | + - Consolidates H200 and B200 SGLang configurations to use unified v0.5.5-cu129-amd64 + image tag and updates deprecated SGLang server arguments to their current equivalents. + - --enable-flashinfer-trtllm-moe & --enable-ep-moe is no longer available in sglang so we needed to change it + - ep: 4 for all tp: 4 entries (3 occurrences in dsr1-fp4-b200-sglang) + - ep: 8 for all tp: 8 entries (6 occurrences across dsr1-fp4-b200-sglang and dsr1-fp8-b200-sglang) + - dsr1_fp4_b200_docker.sh: Replaced --enable-ep-moe with --ep-size $EP_SIZE and --enable-flashinfer-trtllm-moe with + --moe-runner-backend flashinfer_trtllm + - dsr1_fp8_b200_docker.sh: Replaced --enable-flashinfer-trtllm-moe with --moe-runner-backend flashinfer_trtllm and + added --ep-size $EP_SIZE + - launch_b200-nvd.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container + - launch_b200-tg.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container + PR: https://github.com/InferenceMAX/InferenceMAX/pull/204 +- config-keys: + - gptoss-fp4-mi355x-vllm + - gptoss-fp4-b200-vllm + description: | + - Extend concurrency to 128 for gptoss mi355x/b200 vllm configurations + PR: https://github.com/InferenceMAX/InferenceMAX/pull/209 +- config-keys: + - gptoss-fp4-b200-trt + description: | + - Extend concurrency to 128 for gptoss b200 TRT configurations + PR: https://github.com/InferenceMAX/InferenceMAX/pull/233 +- config-keys: + - "*gb200-sglang" + description: | + - Introducing some improvements in GB200 SGLang DSR1 submission + PR: https://github.com/InferenceMAX/InferenceMAX/pull/257 +- config-keys: + - dsr1-fp8-h200-trt + description: | + - Update TRT image from nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc0.post1 to nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc2 + - Increase concurrency for some configurations + PR: https://github.com/InferenceMAX/InferenceMAX/pull/266 +- config-keys: + - gptoss-fp4-b200-vllm + - gptoss-fp4-h100-vllm + - gptoss-fp4-h200-vllm + description: | + - Update vLLM image for NVIDIA configs from vLLM 0.11.0 to vLLM 0.11.2 + - Adds kv-cache-dtype: fp8 to benchmarks/gptoss_fp4_b200_docker.sh + PR: https://github.com/InferenceMAX/InferenceMAX/pull/273 + + From 747bc2dc708fbcedc52df8b3a157bdf30514ae0f Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Thu, 11 Dec 2025 12:33:19 -0600 Subject: [PATCH 08/29] backfill changelog pt 2 --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 169162b08..b4f5746a2 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -56,7 +56,7 @@ - Extend concurrency to 128 for gptoss b200 TRT configurations PR: https://github.com/InferenceMAX/InferenceMAX/pull/233 - config-keys: - - "*gb200-sglang" + - "*gb200-dynamo-sglang" description: | - Introducing some improvements in GB200 SGLang DSR1 submission PR: https://github.com/InferenceMAX/InferenceMAX/pull/257 From ca24b8ecdae474c154371600e9bdae76b059d055 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Thu, 11 Dec 2025 12:34:40 -0600 Subject: [PATCH 09/29] backfill changelog pt 3 --- perf-changelog.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index b4f5746a2..d04c8d14c 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -55,11 +55,6 @@ description: | - Extend concurrency to 128 for gptoss b200 TRT configurations PR: https://github.com/InferenceMAX/InferenceMAX/pull/233 -- config-keys: - - "*gb200-dynamo-sglang" - description: | - - Introducing some improvements in GB200 SGLang DSR1 submission - PR: https://github.com/InferenceMAX/InferenceMAX/pull/257 - config-keys: - dsr1-fp8-h200-trt description: | From 954ebd67ed509a176cceb4ee1e8f148ba343b60e Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Thu, 11 Dec 2025 12:43:46 -0600 Subject: [PATCH 10/29] backfill changelog pt 4 --- perf-changelog.yaml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index d04c8d14c..7b4d1021d 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -6,13 +6,6 @@ - Change kv_cache_config.dtype from 'auto' to 'fp8' in benchmarks/gptoss_fp4_b200_trt_slurm.sh - Remove MOE_BACKEND=CUTLASS, now just defaults to TRTLLM PR: https://github.com/InferenceMAX/InferenceMAX/pull/110 -- config-keys: - - gptoss* - - dsr1* - description: | - - Remove Llama 70B runs to make room for multi-node disagg prefill+wideEP on - h100/h200/b200/mi300/mi325/mi355 - PR: https://github.com/InferenceMAX/InferenceMAX/pull/149 - config-keys: - gptoss-fp4-b200-vllm - gptoss-fp4-h100-vllm From ee346b3996bb764d52fd069bf98be6a22ee2f45e Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Thu, 11 Dec 2025 12:47:19 -0600 Subject: [PATCH 11/29] backfill changelog pt 5 --- perf-changelog.yaml | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 7b4d1021d..7ef4bca2f 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1,11 +1,3 @@ -- config-keys: - - gptoss-fp4-*-trt - description: | - - Upgrade GPT-OSS TRT images from 'release:1.1.0rc2.post2' to '1.2.0rc0.post1' - - Add NCCL_GRAPH_REGISTER=0 to benchmarks/gptoss_fp4_b200_trt_slurm.sh - - Change kv_cache_config.dtype from 'auto' to 'fp8' in benchmarks/gptoss_fp4_b200_trt_slurm.sh - - Remove MOE_BACKEND=CUTLASS, now just defaults to TRTLLM - PR: https://github.com/InferenceMAX/InferenceMAX/pull/110 - config-keys: - gptoss-fp4-b200-vllm - gptoss-fp4-h100-vllm @@ -15,11 +7,6 @@ - Adds compilation-config: '{"cudagraph_mode":"PIECEWISE"} accordingly since vLLM 0.11.0 requires now defaults to FULL_AND_PIECEWISE PR: https://github.com/InferenceMAX/InferenceMAX/pull/159 -- config-keys: - - dsr1* - description: | - - Fixes bug where 1k8k and 8k1k full sweeps had incorrect max-model-len for DeepSeek - PR: https://github.com/InferenceMAX/InferenceMAX/pull/163 - config-keys: - dsr1-fp4-b200-sglang - dsr1-fp8-b200-sglang From ab6f948ea553148d92bf7102292f959f37a1077f Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Thu, 11 Dec 2025 15:36:51 -0600 Subject: [PATCH 12/29] backfill changelog pt 6 --- perf-changelog.yaml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 7ef4bca2f..d0ca53121 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1,3 +1,25 @@ +- config-keys: + - 70b-fp8-*-vllm + description: | + - Add compilation-config: '{"custom_ops": ["-rms_norm", "-quant_fp8", "-silu_and_mul"]}' as + extra config to all benchmarks/70b_fp8_mi*.sh scripts + - 6-7% uplift for llama for 6/8 configs + PR: https://github.com/InferenceMAX/InferenceMAX/pull/95 +- config-keys: + - gptoss-fp4-*-trt + description: | + - Upgrade GPT-OSS TRT images from 'release:1.1.0rc2.post2' to '1.2.0rc0.post1' + - Add NCCL_GRAPH_REGISTER=0 to benchmarks/gptoss_fp4_b200_trt_slurm.sh + - Change kv_cache_config.dtype from 'auto' to 'fp8' in benchmarks/gptoss_fp4_b200_trt_slurm.sh + - Remove MOE_BACKEND=CUTLASS, now just defaults to TRTLLM + PR: https://github.com/InferenceMAX/InferenceMAX/pull/110 +- config-keys: + - gptoss* + - dsr1* + description: | + - Remove Llama 70B runs to make room for multi-node disagg prefill+wideEP on + h100/h200/b200/mi300/mi325/mi355 + PR: https://github.com/InferenceMAX/InferenceMAX/pull/149 - config-keys: - gptoss-fp4-b200-vllm - gptoss-fp4-h100-vllm @@ -7,6 +29,11 @@ - Adds compilation-config: '{"cudagraph_mode":"PIECEWISE"} accordingly since vLLM 0.11.0 requires now defaults to FULL_AND_PIECEWISE PR: https://github.com/InferenceMAX/InferenceMAX/pull/159 +- config-keys: + - dsr1* + description: | + - Fixes bug where 1k8k and 8k1k full sweeps had incorrect max-model-len for DeepSeek + PR: https://github.com/InferenceMAX/InferenceMAX/pull/163 - config-keys: - dsr1-fp4-b200-sglang - dsr1-fp8-b200-sglang @@ -35,6 +62,11 @@ description: | - Extend concurrency to 128 for gptoss b200 TRT configurations PR: https://github.com/InferenceMAX/InferenceMAX/pull/233 +- config-keys: + - "*gb200-sglang" + description: | + - Introducing some improvements in GB200 SGLang DSR1 submission + PR: https://github.com/InferenceMAX/InferenceMAX/pull/257 - config-keys: - dsr1-fp8-h200-trt description: | From 27074d2c29ab507d39b09e0c940c9215144603f5 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Fri, 12 Dec 2025 08:42:28 -0600 Subject: [PATCH 13/29] add always() condition to upload changelog metadata --- .github/workflows/run-sweep.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index af4cd9605..83966a296 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -184,7 +184,7 @@ jobs: upload-changelog-metadata: needs: [setup, collect-results] - if: ${{ needs.setup.result != 'skipped' }} + if: ${{ always() && needs.setup.result != 'skipped' }} runs-on: ubuntu-latest steps: - name: Extract and save changelog metadata From 763b3946c22097eb8e37750705a9d586ae4ed4b9 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Fri, 12 Dec 2025 08:43:55 -0600 Subject: [PATCH 14/29] backfill changelog pt 7 (test) --- perf-changelog.yaml | 77 --------------------------------------------- 1 file changed, 77 deletions(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index d0ca53121..663fe369e 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1,86 +1,9 @@ -- config-keys: - - 70b-fp8-*-vllm - description: | - - Add compilation-config: '{"custom_ops": ["-rms_norm", "-quant_fp8", "-silu_and_mul"]}' as - extra config to all benchmarks/70b_fp8_mi*.sh scripts - - 6-7% uplift for llama for 6/8 configs - PR: https://github.com/InferenceMAX/InferenceMAX/pull/95 -- config-keys: - - gptoss-fp4-*-trt - description: | - - Upgrade GPT-OSS TRT images from 'release:1.1.0rc2.post2' to '1.2.0rc0.post1' - - Add NCCL_GRAPH_REGISTER=0 to benchmarks/gptoss_fp4_b200_trt_slurm.sh - - Change kv_cache_config.dtype from 'auto' to 'fp8' in benchmarks/gptoss_fp4_b200_trt_slurm.sh - - Remove MOE_BACKEND=CUTLASS, now just defaults to TRTLLM - PR: https://github.com/InferenceMAX/InferenceMAX/pull/110 -- config-keys: - - gptoss* - - dsr1* - description: | - - Remove Llama 70B runs to make room for multi-node disagg prefill+wideEP on - h100/h200/b200/mi300/mi325/mi355 - PR: https://github.com/InferenceMAX/InferenceMAX/pull/149 -- config-keys: - - gptoss-fp4-b200-vllm - - gptoss-fp4-h100-vllm - - gptoss-fp4-h200-vllm - description: | - - Upgrade vLLM from 0.10.2 to 0.11.0 for GPT-OSS NVIDIA single-node configs - - Adds compilation-config: '{"cudagraph_mode":"PIECEWISE"} accordingly since vLLM 0.11.0 - requires now defaults to FULL_AND_PIECEWISE - PR: https://github.com/InferenceMAX/InferenceMAX/pull/159 -- config-keys: - - dsr1* - description: | - - Fixes bug where 1k8k and 8k1k full sweeps had incorrect max-model-len for DeepSeek - PR: https://github.com/InferenceMAX/InferenceMAX/pull/163 -- config-keys: - - dsr1-fp4-b200-sglang - - dsr1-fp8-b200-sglang - - dsr1-fp8-h200-sglang - description: | - - Consolidates H200 and B200 SGLang configurations to use unified v0.5.5-cu129-amd64 - image tag and updates deprecated SGLang server arguments to their current equivalents. - - --enable-flashinfer-trtllm-moe & --enable-ep-moe is no longer available in sglang so we needed to change it - - ep: 4 for all tp: 4 entries (3 occurrences in dsr1-fp4-b200-sglang) - - ep: 8 for all tp: 8 entries (6 occurrences across dsr1-fp4-b200-sglang and dsr1-fp8-b200-sglang) - - dsr1_fp4_b200_docker.sh: Replaced --enable-ep-moe with --ep-size $EP_SIZE and --enable-flashinfer-trtllm-moe with - --moe-runner-backend flashinfer_trtllm - - dsr1_fp8_b200_docker.sh: Replaced --enable-flashinfer-trtllm-moe with --moe-runner-backend flashinfer_trtllm and - added --ep-size $EP_SIZE - - launch_b200-nvd.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container - - launch_b200-tg.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container - PR: https://github.com/InferenceMAX/InferenceMAX/pull/204 - config-keys: - gptoss-fp4-mi355x-vllm - gptoss-fp4-b200-vllm description: | - Extend concurrency to 128 for gptoss mi355x/b200 vllm configurations PR: https://github.com/InferenceMAX/InferenceMAX/pull/209 -- config-keys: - - gptoss-fp4-b200-trt - description: | - - Extend concurrency to 128 for gptoss b200 TRT configurations - PR: https://github.com/InferenceMAX/InferenceMAX/pull/233 -- config-keys: - - "*gb200-sglang" - description: | - - Introducing some improvements in GB200 SGLang DSR1 submission - PR: https://github.com/InferenceMAX/InferenceMAX/pull/257 -- config-keys: - - dsr1-fp8-h200-trt - description: | - - Update TRT image from nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc0.post1 to nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc2 - - Increase concurrency for some configurations - PR: https://github.com/InferenceMAX/InferenceMAX/pull/266 -- config-keys: - - gptoss-fp4-b200-vllm - - gptoss-fp4-h100-vllm - - gptoss-fp4-h200-vllm - description: | - - Update vLLM image for NVIDIA configs from vLLM 0.11.0 to vLLM 0.11.2 - - Adds kv-cache-dtype: fp8 to benchmarks/gptoss_fp4_b200_docker.sh - PR: https://github.com/InferenceMAX/InferenceMAX/pull/273 From d0b2de74e1b31a1bed5fbdc22936f5c007dd2aa2 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Fri, 12 Dec 2025 08:44:16 -0600 Subject: [PATCH 15/29] backfill changelog pt 8 (revert test) --- perf-changelog.yaml | 77 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 663fe369e..d0ca53121 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1,9 +1,86 @@ +- config-keys: + - 70b-fp8-*-vllm + description: | + - Add compilation-config: '{"custom_ops": ["-rms_norm", "-quant_fp8", "-silu_and_mul"]}' as + extra config to all benchmarks/70b_fp8_mi*.sh scripts + - 6-7% uplift for llama for 6/8 configs + PR: https://github.com/InferenceMAX/InferenceMAX/pull/95 +- config-keys: + - gptoss-fp4-*-trt + description: | + - Upgrade GPT-OSS TRT images from 'release:1.1.0rc2.post2' to '1.2.0rc0.post1' + - Add NCCL_GRAPH_REGISTER=0 to benchmarks/gptoss_fp4_b200_trt_slurm.sh + - Change kv_cache_config.dtype from 'auto' to 'fp8' in benchmarks/gptoss_fp4_b200_trt_slurm.sh + - Remove MOE_BACKEND=CUTLASS, now just defaults to TRTLLM + PR: https://github.com/InferenceMAX/InferenceMAX/pull/110 +- config-keys: + - gptoss* + - dsr1* + description: | + - Remove Llama 70B runs to make room for multi-node disagg prefill+wideEP on + h100/h200/b200/mi300/mi325/mi355 + PR: https://github.com/InferenceMAX/InferenceMAX/pull/149 +- config-keys: + - gptoss-fp4-b200-vllm + - gptoss-fp4-h100-vllm + - gptoss-fp4-h200-vllm + description: | + - Upgrade vLLM from 0.10.2 to 0.11.0 for GPT-OSS NVIDIA single-node configs + - Adds compilation-config: '{"cudagraph_mode":"PIECEWISE"} accordingly since vLLM 0.11.0 + requires now defaults to FULL_AND_PIECEWISE + PR: https://github.com/InferenceMAX/InferenceMAX/pull/159 +- config-keys: + - dsr1* + description: | + - Fixes bug where 1k8k and 8k1k full sweeps had incorrect max-model-len for DeepSeek + PR: https://github.com/InferenceMAX/InferenceMAX/pull/163 +- config-keys: + - dsr1-fp4-b200-sglang + - dsr1-fp8-b200-sglang + - dsr1-fp8-h200-sglang + description: | + - Consolidates H200 and B200 SGLang configurations to use unified v0.5.5-cu129-amd64 + image tag and updates deprecated SGLang server arguments to their current equivalents. + - --enable-flashinfer-trtllm-moe & --enable-ep-moe is no longer available in sglang so we needed to change it + - ep: 4 for all tp: 4 entries (3 occurrences in dsr1-fp4-b200-sglang) + - ep: 8 for all tp: 8 entries (6 occurrences across dsr1-fp4-b200-sglang and dsr1-fp8-b200-sglang) + - dsr1_fp4_b200_docker.sh: Replaced --enable-ep-moe with --ep-size $EP_SIZE and --enable-flashinfer-trtllm-moe with + --moe-runner-backend flashinfer_trtllm + - dsr1_fp8_b200_docker.sh: Replaced --enable-flashinfer-trtllm-moe with --moe-runner-backend flashinfer_trtllm and + added --ep-size $EP_SIZE + - launch_b200-nvd.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container + - launch_b200-tg.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container + PR: https://github.com/InferenceMAX/InferenceMAX/pull/204 - config-keys: - gptoss-fp4-mi355x-vllm - gptoss-fp4-b200-vllm description: | - Extend concurrency to 128 for gptoss mi355x/b200 vllm configurations PR: https://github.com/InferenceMAX/InferenceMAX/pull/209 +- config-keys: + - gptoss-fp4-b200-trt + description: | + - Extend concurrency to 128 for gptoss b200 TRT configurations + PR: https://github.com/InferenceMAX/InferenceMAX/pull/233 +- config-keys: + - "*gb200-sglang" + description: | + - Introducing some improvements in GB200 SGLang DSR1 submission + PR: https://github.com/InferenceMAX/InferenceMAX/pull/257 +- config-keys: + - dsr1-fp8-h200-trt + description: | + - Update TRT image from nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc0.post1 to nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc2 + - Increase concurrency for some configurations + PR: https://github.com/InferenceMAX/InferenceMAX/pull/266 +- config-keys: + - gptoss-fp4-b200-vllm + - gptoss-fp4-h100-vllm + - gptoss-fp4-h200-vllm + description: | + - Update vLLM image for NVIDIA configs from vLLM 0.11.0 to vLLM 0.11.2 + - Adds kv-cache-dtype: fp8 to benchmarks/gptoss_fp4_b200_docker.sh + PR: https://github.com/InferenceMAX/InferenceMAX/pull/273 From 41341addf926d63cb09f0fa35772066caa09d37b Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Fri, 12 Dec 2025 08:51:04 -0600 Subject: [PATCH 16/29] backfill changelog pt 9 --- perf-changelog.yaml | 75 --------------------------------------------- 1 file changed, 75 deletions(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index d0ca53121..fb3c4a5df 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1,78 +1,3 @@ -- config-keys: - - 70b-fp8-*-vllm - description: | - - Add compilation-config: '{"custom_ops": ["-rms_norm", "-quant_fp8", "-silu_and_mul"]}' as - extra config to all benchmarks/70b_fp8_mi*.sh scripts - - 6-7% uplift for llama for 6/8 configs - PR: https://github.com/InferenceMAX/InferenceMAX/pull/95 -- config-keys: - - gptoss-fp4-*-trt - description: | - - Upgrade GPT-OSS TRT images from 'release:1.1.0rc2.post2' to '1.2.0rc0.post1' - - Add NCCL_GRAPH_REGISTER=0 to benchmarks/gptoss_fp4_b200_trt_slurm.sh - - Change kv_cache_config.dtype from 'auto' to 'fp8' in benchmarks/gptoss_fp4_b200_trt_slurm.sh - - Remove MOE_BACKEND=CUTLASS, now just defaults to TRTLLM - PR: https://github.com/InferenceMAX/InferenceMAX/pull/110 -- config-keys: - - gptoss* - - dsr1* - description: | - - Remove Llama 70B runs to make room for multi-node disagg prefill+wideEP on - h100/h200/b200/mi300/mi325/mi355 - PR: https://github.com/InferenceMAX/InferenceMAX/pull/149 -- config-keys: - - gptoss-fp4-b200-vllm - - gptoss-fp4-h100-vllm - - gptoss-fp4-h200-vllm - description: | - - Upgrade vLLM from 0.10.2 to 0.11.0 for GPT-OSS NVIDIA single-node configs - - Adds compilation-config: '{"cudagraph_mode":"PIECEWISE"} accordingly since vLLM 0.11.0 - requires now defaults to FULL_AND_PIECEWISE - PR: https://github.com/InferenceMAX/InferenceMAX/pull/159 -- config-keys: - - dsr1* - description: | - - Fixes bug where 1k8k and 8k1k full sweeps had incorrect max-model-len for DeepSeek - PR: https://github.com/InferenceMAX/InferenceMAX/pull/163 -- config-keys: - - dsr1-fp4-b200-sglang - - dsr1-fp8-b200-sglang - - dsr1-fp8-h200-sglang - description: | - - Consolidates H200 and B200 SGLang configurations to use unified v0.5.5-cu129-amd64 - image tag and updates deprecated SGLang server arguments to their current equivalents. - - --enable-flashinfer-trtllm-moe & --enable-ep-moe is no longer available in sglang so we needed to change it - - ep: 4 for all tp: 4 entries (3 occurrences in dsr1-fp4-b200-sglang) - - ep: 8 for all tp: 8 entries (6 occurrences across dsr1-fp4-b200-sglang and dsr1-fp8-b200-sglang) - - dsr1_fp4_b200_docker.sh: Replaced --enable-ep-moe with --ep-size $EP_SIZE and --enable-flashinfer-trtllm-moe with - --moe-runner-backend flashinfer_trtllm - - dsr1_fp8_b200_docker.sh: Replaced --enable-flashinfer-trtllm-moe with --moe-runner-backend flashinfer_trtllm and - added --ep-size $EP_SIZE - - launch_b200-nvd.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container - - launch_b200-tg.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container - PR: https://github.com/InferenceMAX/InferenceMAX/pull/204 -- config-keys: - - gptoss-fp4-mi355x-vllm - - gptoss-fp4-b200-vllm - description: | - - Extend concurrency to 128 for gptoss mi355x/b200 vllm configurations - PR: https://github.com/InferenceMAX/InferenceMAX/pull/209 -- config-keys: - - gptoss-fp4-b200-trt - description: | - - Extend concurrency to 128 for gptoss b200 TRT configurations - PR: https://github.com/InferenceMAX/InferenceMAX/pull/233 -- config-keys: - - "*gb200-sglang" - description: | - - Introducing some improvements in GB200 SGLang DSR1 submission - PR: https://github.com/InferenceMAX/InferenceMAX/pull/257 -- config-keys: - - dsr1-fp8-h200-trt - description: | - - Update TRT image from nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc0.post1 to nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc2 - - Increase concurrency for some configurations - PR: https://github.com/InferenceMAX/InferenceMAX/pull/266 - config-keys: - gptoss-fp4-b200-vllm - gptoss-fp4-h100-vllm From f1319629d162e56752c30f8eb8a14f3b1d305257 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Fri, 12 Dec 2025 09:22:07 -0600 Subject: [PATCH 17/29] backfill changelog pt 11 --- perf-changelog.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index fb3c4a5df..814ed3908 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -5,7 +5,4 @@ description: | - Update vLLM image for NVIDIA configs from vLLM 0.11.0 to vLLM 0.11.2 - Adds kv-cache-dtype: fp8 to benchmarks/gptoss_fp4_b200_docker.sh - PR: https://github.com/InferenceMAX/InferenceMAX/pull/273 - - - + PR: https://github.com/InferenceMAX/InferenceMAX/pull/273 \ No newline at end of file From dfeba212d3c5a51e62d5bdf1e4b8beb85c39e192 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Fri, 12 Dec 2025 11:38:23 -0600 Subject: [PATCH 18/29] change if condition for jobs in run sweep workflow --- .github/workflows/run-sweep.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index 83966a296..9e9299607 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -56,7 +56,7 @@ jobs: sweep-multi-node-1k1k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] != '[]' }} + if: ${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node 1k1k / strategy: @@ -93,7 +93,7 @@ jobs: sweep-multi-node-1k8k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.multi_node['1k8k'] != '[]' }} + if: ${{ needs.setup.outputs.search-space-config.multi_node['1k8k'] }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node 1k8k / strategy: @@ -105,7 +105,7 @@ jobs: sweep-multi-node-8k1k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.multi_node['8k1k'] != '[]' }} + if: ${{ needs.setup.outputs.search-space-config.multi_node['8k1k'] }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node 8k1k / strategy: @@ -117,7 +117,7 @@ jobs: sweep-single-node-1k1k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.single_node['1k1k'] != '[]' }} + if: ${{ needs.setup.outputs.search-space-config.single_node['1k1k'] }} uses: ./.github/workflows/benchmark-tmpl.yml name: single-node 1k1k / strategy: @@ -145,7 +145,7 @@ jobs: sweep-single-node-1k8k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.single_node['1k8k'] != '[]' }} + if: ${{ needs.setup.outputs.search-space-config.single_node['1k8k'] }} uses: ./.github/workflows/benchmark-tmpl.yml name: single-node 1k8k / strategy: @@ -157,7 +157,7 @@ jobs: sweep-single-node-8k1k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.single_node['8k1k'] != '[]' }} + if: ${{ needs.setup.outputs.search-space-config.single_node['8k1k'] }} uses: ./.github/workflows/benchmark-tmpl.yml name: single-node 8k1k / strategy: From fd07f40a2f1eaddf599d36e48475928b7da6fa11 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Fri, 12 Dec 2025 11:41:31 -0600 Subject: [PATCH 19/29] debugging run sweep workflow --- .github/workflows/run-sweep.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index 9e9299607..7c581f9e3 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -54,6 +54,17 @@ jobs: echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT + test: + runs-on: ubuntu-latest + needs: setup + steps: + - name: Test + run: | + echo ${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] }} + echo ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }} + echo ${{ needs.setup.outputs.search-space-config.single_node['1k1k'] }} + ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }} + sweep-multi-node-1k1k: needs: setup if: ${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] }} From 228e0a209b384d703060ebeb8c8b5138c6363001 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Fri, 12 Dec 2025 11:42:51 -0600 Subject: [PATCH 20/29] debugging run sweep workflow pt 2 --- .github/workflows/run-sweep.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index 7c581f9e3..2cffe0b4c 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -60,10 +60,10 @@ jobs: steps: - name: Test run: | - echo ${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] }} - echo ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }} - echo ${{ needs.setup.outputs.search-space-config.single_node['1k1k'] }} - ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }} + echo "${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] }}" + echo "${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }}" + echo "${{ needs.setup.outputs.search-space-config.single_node['1k1k'] }}" + echo "${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }}" sweep-multi-node-1k1k: needs: setup From cb2cc8a461197e0b8b9b84c72df30edd401923bd Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Fri, 12 Dec 2025 11:44:56 -0600 Subject: [PATCH 21/29] debugging run sweep workflow pt 3 (revert) --- .github/workflows/run-sweep.yml | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index 2cffe0b4c..f647d02ba 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -54,20 +54,9 @@ jobs: echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT - test: - runs-on: ubuntu-latest - needs: setup - steps: - - name: Test - run: | - echo "${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] }}" - echo "${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }}" - echo "${{ needs.setup.outputs.search-space-config.single_node['1k1k'] }}" - echo "${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }}" - sweep-multi-node-1k1k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] }} + if: ${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] != '' }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node 1k1k / strategy: @@ -104,7 +93,7 @@ jobs: sweep-multi-node-1k8k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.multi_node['1k8k'] }} + if: ${{ needs.setup.outputs.search-space-config.multi_node['1k8k'] != '' }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node 1k8k / strategy: @@ -116,7 +105,7 @@ jobs: sweep-multi-node-8k1k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.multi_node['8k1k'] }} + if: ${{ needs.setup.outputs.search-space-config.multi_node['8k1k'] != '' }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node 8k1k / strategy: @@ -128,7 +117,7 @@ jobs: sweep-single-node-1k1k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.single_node['1k1k'] }} + if: ${{ needs.setup.outputs.search-space-config.single_node['1k1k'] != '' }} uses: ./.github/workflows/benchmark-tmpl.yml name: single-node 1k1k / strategy: @@ -156,7 +145,7 @@ jobs: sweep-single-node-1k8k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.single_node['1k8k'] }} + if: ${{ needs.setup.outputs.search-space-config.single_node['1k8k'] != '' }} uses: ./.github/workflows/benchmark-tmpl.yml name: single-node 1k8k / strategy: @@ -168,7 +157,7 @@ jobs: sweep-single-node-8k1k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.single_node['8k1k'] }} + if: ${{ needs.setup.outputs.search-space-config.single_node['8k1k'] != '' }} uses: ./.github/workflows/benchmark-tmpl.yml name: single-node 8k1k / strategy: From 055b324c18e2a091d46b74624a9e75d33d4e1a3c Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Fri, 12 Dec 2025 11:49:04 -0600 Subject: [PATCH 22/29] debugging run sweep workflow pt 4 --- .github/workflows/run-sweep.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index f647d02ba..4217cb45e 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -54,6 +54,18 @@ jobs: echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT + test: + runs-on: ubuntu-latest + needs: setup + steps: + - name: Test + run: | + echo "${{ fromJson(needs.setup.outputs.search-space-config) }}" + echo "${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] }}" + echo "${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }}" + echo "${{ needs.setup.outputs.search-space-config.single_node['1k1k'] }}" + echo "${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }}" + sweep-multi-node-1k1k: needs: setup if: ${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] != '' }} From ae65551d86c25c63d9287746f7cd76ff71de6acb Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Fri, 12 Dec 2025 11:50:40 -0600 Subject: [PATCH 23/29] debugging run sweep workflow pt 5 --- .github/workflows/run-sweep.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index 4217cb45e..09f72bde5 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -60,10 +60,12 @@ jobs: steps: - name: Test run: | - echo "${{ fromJson(needs.setup.outputs.search-space-config) }}" + echo "${{ needs.setup.outputs.search-space-config }}" echo "${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] }}" + echo "${{ fromJson(needs.setup.outputs.search-space-config).multi_node }}" echo "${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }}" echo "${{ needs.setup.outputs.search-space-config.single_node['1k1k'] }}" + echo "${{ fromJson(needs.setup.outputs.search-space-config).single_node }}" echo "${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }}" sweep-multi-node-1k1k: From 667d2e18815b04d636ee68194ca419f65cbe04e7 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Fri, 12 Dec 2025 12:04:15 -0600 Subject: [PATCH 24/29] debugging run sweep workflow pt 6 --- .github/workflows/run-sweep.yml | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index 09f72bde5..a46bf55fb 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -55,22 +55,19 @@ jobs: echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT test: - runs-on: ubuntu-latest needs: setup + runs-on: ubuntu-latest steps: - name: Test run: | - echo "${{ needs.setup.outputs.search-space-config }}" - echo "${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] }}" - echo "${{ fromJson(needs.setup.outputs.search-space-config).multi_node }}" - echo "${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }}" - echo "${{ needs.setup.outputs.search-space-config.single_node['1k1k'] }}" - echo "${{ fromJson(needs.setup.outputs.search-space-config).single_node }}" - echo "${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }}" + echo "${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k']) }}" + echo "${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node) }}" + echo "${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['1k1k']) }}" + echo "${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node) }}" sweep-multi-node-1k1k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] != '' }} + if: ${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] != '[]' }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node 1k1k / strategy: @@ -107,7 +104,7 @@ jobs: sweep-multi-node-1k8k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.multi_node['1k8k'] != '' }} + if: ${{ needs.setup.outputs.search-space-config.multi_node['1k8k'] != '[]' }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node 1k8k / strategy: @@ -119,7 +116,7 @@ jobs: sweep-multi-node-8k1k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.multi_node['8k1k'] != '' }} + if: ${{ needs.setup.outputs.search-space-config.multi_node['8k1k'] != '[]' }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node 8k1k / strategy: @@ -131,7 +128,7 @@ jobs: sweep-single-node-1k1k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.single_node['1k1k'] != '' }} + if: ${{ needs.setup.outputs.search-space-config.single_node['1k1k'] != '[]' }} uses: ./.github/workflows/benchmark-tmpl.yml name: single-node 1k1k / strategy: @@ -159,7 +156,7 @@ jobs: sweep-single-node-1k8k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.single_node['1k8k'] != '' }} + if: ${{ needs.setup.outputs.search-space-config.single_node['1k8k'] != '[]' }} uses: ./.github/workflows/benchmark-tmpl.yml name: single-node 1k8k / strategy: @@ -171,7 +168,7 @@ jobs: sweep-single-node-8k1k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.single_node['8k1k'] != '' }} + if: ${{ needs.setup.outputs.search-space-config.single_node['8k1k'] != '[]' }} uses: ./.github/workflows/benchmark-tmpl.yml name: single-node 8k1k / strategy: @@ -198,7 +195,7 @@ jobs: upload-changelog-metadata: needs: [setup, collect-results] - if: ${{ always() && needs.setup.result != 'skipped' }} + if: ${{ needs.setup.result != 'skipped' }} runs-on: ubuntu-latest steps: - name: Extract and save changelog metadata From ef3ba6b0bb3f4f399c25f79781f96ecff46f9237 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Fri, 12 Dec 2025 12:07:57 -0600 Subject: [PATCH 25/29] debugging run sweep workflow pt 7 --- .github/workflows/run-sweep.yml | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index a46bf55fb..19a673027 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -54,20 +54,9 @@ jobs: echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT - test: - needs: setup - runs-on: ubuntu-latest - steps: - - name: Test - run: | - echo "${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k']) }}" - echo "${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node) }}" - echo "${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['1k1k']) }}" - echo "${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node) }}" - sweep-multi-node-1k1k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] != '[]' }} + if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k']) != 'null' }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node 1k1k / strategy: @@ -104,7 +93,7 @@ jobs: sweep-multi-node-1k8k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.multi_node['1k8k'] != '[]' }} + if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k8k']) != 'null' }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node 1k8k / strategy: @@ -116,7 +105,7 @@ jobs: sweep-multi-node-8k1k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.multi_node['8k1k'] != '[]' }} + if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k']) != 'null' }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node 8k1k / strategy: @@ -128,7 +117,7 @@ jobs: sweep-single-node-1k1k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.single_node['1k1k'] != '[]' }} + if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['1k1k']) != 'null' }} uses: ./.github/workflows/benchmark-tmpl.yml name: single-node 1k1k / strategy: @@ -156,7 +145,7 @@ jobs: sweep-single-node-1k8k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.single_node['1k8k'] != '[]' }} + if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['1k8k']) != 'null' }} uses: ./.github/workflows/benchmark-tmpl.yml name: single-node 1k8k / strategy: @@ -168,7 +157,7 @@ jobs: sweep-single-node-8k1k: needs: setup - if: ${{ needs.setup.outputs.search-space-config.single_node['8k1k'] != '[]' }} + if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['8k1k']) != 'null' }} uses: ./.github/workflows/benchmark-tmpl.yml name: single-node 8k1k / strategy: From fae8278e42174abc1ba0338f35bf077a4195d50d Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Fri, 12 Dec 2025 14:46:50 -0600 Subject: [PATCH 26/29] add always() condition to upload changelog metadata (add back, this got removed) --- .github/workflows/run-sweep.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index 19a673027..17166f079 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -184,7 +184,7 @@ jobs: upload-changelog-metadata: needs: [setup, collect-results] - if: ${{ needs.setup.result != 'skipped' }} + if: ${{ always() && needs.setup.result != 'skipped' }} runs-on: ubuntu-latest steps: - name: Extract and save changelog metadata From 2018ad3b2d872540705e9affc2351cff8d393640 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Sun, 14 Dec 2025 20:28:37 -0600 Subject: [PATCH 27/29] add bmk prefix to results --- .github/workflows/benchmark-multinode-tmpl.yml | 2 +- .github/workflows/benchmark-tmpl.yml | 2 +- .github/workflows/collect-results.yml | 10 +++++----- .github/workflows/run-sweep.yml | 2 ++ 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/workflows/benchmark-multinode-tmpl.yml b/.github/workflows/benchmark-multinode-tmpl.yml index 2b828bda8..6c42cbdaa 100644 --- a/.github/workflows/benchmark-multinode-tmpl.yml +++ b/.github/workflows/benchmark-multinode-tmpl.yml @@ -170,5 +170,5 @@ jobs: - name: Upload results uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: - name: ${{ env.RESULT_FILENAME }} + name: bmk_${{ env.RESULT_FILENAME }} path: agg_${{ env.RESULT_FILENAME }}_*.json diff --git a/.github/workflows/benchmark-tmpl.yml b/.github/workflows/benchmark-tmpl.yml index 60c19b441..a48081fac 100644 --- a/.github/workflows/benchmark-tmpl.yml +++ b/.github/workflows/benchmark-tmpl.yml @@ -169,5 +169,5 @@ jobs: - name: Upload result uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: - name: ${{ env.RESULT_FILENAME }} + name: bmk_${{ env.RESULT_FILENAME }} path: agg_${{ env.RESULT_FILENAME }}.json \ No newline at end of file diff --git a/.github/workflows/collect-results.yml b/.github/workflows/collect-results.yml index 8105c6d53..d0b0bd992 100644 --- a/.github/workflows/collect-results.yml +++ b/.github/workflows/collect-results.yml @@ -3,7 +3,7 @@ name: Template - Collect Results on: workflow_call: inputs: - exp-name: + result-prefix: required: false type: string default: '' @@ -26,7 +26,7 @@ jobs: uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 with: path: results/ - pattern: ${{ inputs.exp-name && format('{0}_*', inputs.exp-name) || '*' }} + pattern: ${{ inputs.result-prefix && format('{0}_*', inputs.result-prefix) || '*' }} - name: Print summary run: | @@ -34,10 +34,10 @@ jobs: python3 utils/summarize.py results/ >> $GITHUB_STEP_SUMMARY - name: Aggregate results - run: python3 utils/collect_results.py results/ ${{ inputs.exp-name || 'all' }} + run: python3 utils/collect_results.py results/ ${{ inputs.result-prefix || 'all' }} - name: Upload aggregated results uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: - name: results_${{ inputs.exp-name || 'all' }} - path: agg_${{ inputs.exp-name || 'all' }}.json + name: results_${{ inputs.result-prefix || 'all' }} + path: agg_${{ inputs.result-prefix || 'all' }}.json diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index 17166f079..cf01437ff 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -181,6 +181,8 @@ jobs: if: ${{ always() && needs.setup.result != 'skipped' }} uses: ./.github/workflows/collect-results.yml secrets: inherit + with: + result-prefix: "bmk" upload-changelog-metadata: needs: [setup, collect-results] From 5e0c779694e55470b164555c906b14e8ba2afd98 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Mon, 15 Dec 2025 09:07:18 -0600 Subject: [PATCH 28/29] backfill changelog official --- perf-changelog.yaml | 75 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 814ed3908..a74285c53 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1,3 +1,78 @@ +- config-keys: + - 70b-fp8-*-vllm + description: | + - Add compilation-config: '{"custom_ops": ["-rms_norm", "-quant_fp8", "-silu_and_mul"]}' as + extra config to all benchmarks/70b_fp8_mi*.sh scripts + - 6-7% uplift for llama for 6/8 configs + PR: https://github.com/InferenceMAX/InferenceMAX/pull/95 +- config-keys: + - gptoss-fp4-*-trt + description: | + - Upgrade GPT-OSS TRT images from 'release:1.1.0rc2.post2' to '1.2.0rc0.post1' + - Add NCCL_GRAPH_REGISTER=0 to benchmarks/gptoss_fp4_b200_trt_slurm.sh + - Change kv_cache_config.dtype from 'auto' to 'fp8' in benchmarks/gptoss_fp4_b200_trt_slurm.sh + - Remove MOE_BACKEND=CUTLASS, now just defaults to TRTLLM + PR: https://github.com/InferenceMAX/InferenceMAX/pull/110 +- config-keys: + - gptoss* + - dsr1* + description: | + - Remove Llama 70B runs to make room for multi-node disagg prefill+wideEP on + h100/h200/b200/mi300/mi325/mi355 + PR: https://github.com/InferenceMAX/InferenceMAX/pull/149 +- config-keys: + - gptoss-fp4-b200-vllm + - gptoss-fp4-h100-vllm + - gptoss-fp4-h200-vllm + description: | + - Upgrade vLLM from 0.10.2 to 0.11.0 for GPT-OSS NVIDIA single-node configs + - Adds compilation-config: '{"cudagraph_mode":"PIECEWISE"} accordingly since vLLM 0.11.0 + requires now defaults to FULL_AND_PIECEWISE + PR: https://github.com/InferenceMAX/InferenceMAX/pull/159 +- config-keys: + - dsr1* + description: | + - Fixes bug where 1k8k and 8k1k full sweeps had incorrect max-model-len for DeepSeek + PR: https://github.com/InferenceMAX/InferenceMAX/pull/163 +- config-keys: + - dsr1-fp4-b200-sglang + - dsr1-fp8-b200-sglang + - dsr1-fp8-h200-sglang + description: | + - Consolidates H200 and B200 SGLang configurations to use unified v0.5.5-cu129-amd64 + image tag and updates deprecated SGLang server arguments to their current equivalents. + - --enable-flashinfer-trtllm-moe & --enable-ep-moe is no longer available in sglang so we needed to change it + - ep: 4 for all tp: 4 entries (3 occurrences in dsr1-fp4-b200-sglang) + - ep: 8 for all tp: 8 entries (6 occurrences across dsr1-fp4-b200-sglang and dsr1-fp8-b200-sglang) + - dsr1_fp4_b200_docker.sh: Replaced --enable-ep-moe with --ep-size $EP_SIZE and --enable-flashinfer-trtllm-moe with + --moe-runner-backend flashinfer_trtllm + - dsr1_fp8_b200_docker.sh: Replaced --enable-flashinfer-trtllm-moe with --moe-runner-backend flashinfer_trtllm and + added --ep-size $EP_SIZE + - launch_b200-nvd.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container + - launch_b200-tg.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container + PR: https://github.com/InferenceMAX/InferenceMAX/pull/204 +- config-keys: + - gptoss-fp4-mi355x-vllm + - gptoss-fp4-b200-vllm + description: | + - Extend concurrency to 128 for gptoss mi355x/b200 vllm configurations + PR: https://github.com/InferenceMAX/InferenceMAX/pull/209 +- config-keys: + - gptoss-fp4-b200-trt + description: | + - Extend concurrency to 128 for gptoss b200 TRT configurations + PR: https://github.com/InferenceMAX/InferenceMAX/pull/233 +- config-keys: + - "*gb200-sglang" + description: | + - Introducing some improvements in GB200 SGLang DSR1 submission + PR: https://github.com/InferenceMAX/InferenceMAX/pull/257 +- config-keys: + - dsr1-fp8-h200-trt + description: | + - Update TRT image from nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc0.post1 to nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc2 + - Increase concurrency for some configurations + PR: https://github.com/InferenceMAX/InferenceMAX/pull/266 - config-keys: - gptoss-fp4-b200-vllm - gptoss-fp4-h100-vllm From 8d8ffa1662c3697deb4af2e57f6cf987edc8c9b5 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Mon, 15 Dec 2025 09:14:21 -0600 Subject: [PATCH 29/29] for concurrency group, use more unique sha --- .github/workflows/run-sweep.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index cf01437ff..cb3c4dde5 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -2,7 +2,7 @@ name: "Run Sweep" run-name: Run Sweep - ${{ github.event.pull_request.title || github.ref_name }} concurrency: - group: sweep-${{ github.event.pull_request.number || github.ref }} + group: sweep-${{ github.event.pull_request.number || github.sha }} cancel-in-progress: true on: