diff --git a/.github/workflows/benchmark-multinode-tmpl.yml b/.github/workflows/benchmark-multinode-tmpl.yml index 2b828bda8..6c42cbdaa 100644 --- a/.github/workflows/benchmark-multinode-tmpl.yml +++ b/.github/workflows/benchmark-multinode-tmpl.yml @@ -170,5 +170,5 @@ jobs: - name: Upload results uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: - name: ${{ env.RESULT_FILENAME }} + name: bmk_${{ env.RESULT_FILENAME }} path: agg_${{ env.RESULT_FILENAME }}_*.json diff --git a/.github/workflows/benchmark-tmpl.yml b/.github/workflows/benchmark-tmpl.yml index 60c19b441..a48081fac 100644 --- a/.github/workflows/benchmark-tmpl.yml +++ b/.github/workflows/benchmark-tmpl.yml @@ -169,5 +169,5 @@ jobs: - name: Upload result uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: - name: ${{ env.RESULT_FILENAME }} + name: bmk_${{ env.RESULT_FILENAME }} path: agg_${{ env.RESULT_FILENAME }}.json \ No newline at end of file diff --git a/.github/workflows/collect-results.yml b/.github/workflows/collect-results.yml index 8105c6d53..d0b0bd992 100644 --- a/.github/workflows/collect-results.yml +++ b/.github/workflows/collect-results.yml @@ -3,7 +3,7 @@ name: Template - Collect Results on: workflow_call: inputs: - exp-name: + result-prefix: required: false type: string default: '' @@ -26,7 +26,7 @@ jobs: uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 with: path: results/ - pattern: ${{ inputs.exp-name && format('{0}_*', inputs.exp-name) || '*' }} + pattern: ${{ inputs.result-prefix && format('{0}_*', inputs.result-prefix) || '*' }} - name: Print summary run: | @@ -34,10 +34,10 @@ jobs: python3 utils/summarize.py results/ >> $GITHUB_STEP_SUMMARY - name: Aggregate results - run: python3 utils/collect_results.py results/ ${{ inputs.exp-name || 'all' }} + run: python3 utils/collect_results.py results/ ${{ inputs.result-prefix || 'all' }} - name: Upload aggregated results uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: - name: results_${{ inputs.exp-name || 'all' }} - path: agg_${{ inputs.exp-name || 'all' }}.json + name: results_${{ inputs.result-prefix || 'all' }} + path: agg_${{ inputs.result-prefix || 'all' }}.json diff --git a/.github/workflows/full-sweep-1k1k-scheduler.yml b/.github/workflows/full-sweep-1k1k-scheduler.yml index 8b32f47c0..3c592cf0a 100644 --- a/.github/workflows/full-sweep-1k1k-scheduler.yml +++ b/.github/workflows/full-sweep-1k1k-scheduler.yml @@ -2,8 +2,6 @@ name: "Full Sweep Scheduler - 1k1k" on: workflow_dispatch: - schedule: - - cron: "0 0 * * *" jobs: get-dsr1-configs: diff --git a/.github/workflows/full-sweep-1k8k-scheduler.yml b/.github/workflows/full-sweep-1k8k-scheduler.yml index 393864fdf..be909aad5 100644 --- a/.github/workflows/full-sweep-1k8k-scheduler.yml +++ b/.github/workflows/full-sweep-1k8k-scheduler.yml @@ -2,8 +2,6 @@ name: "Full Sweep Scheduler - 1k8k" on: workflow_dispatch: - schedule: - - cron: "0 0 * * *" jobs: get-dsr1-configs: diff --git a/.github/workflows/full-sweep-8k1k-scheduler.yml b/.github/workflows/full-sweep-8k1k-scheduler.yml index 629e56bd9..3eabe74f4 100644 --- a/.github/workflows/full-sweep-8k1k-scheduler.yml +++ b/.github/workflows/full-sweep-8k1k-scheduler.yml @@ -2,8 +2,6 @@ name: "Full Sweep Scheduler - 8k1k" on: workflow_dispatch: - schedule: - - cron: "0 0 * * *" jobs: get-dsr1-configs: diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml new file mode 100644 index 000000000..cb3c4dde5 --- /dev/null +++ b/.github/workflows/run-sweep.yml @@ -0,0 +1,235 @@ +name: "Run Sweep" +run-name: Run Sweep - ${{ github.event.pull_request.title || github.ref_name }} + +concurrency: + group: sweep-${{ github.event.pull_request.number || github.sha }} + cancel-in-progress: true + +on: + push: + branches: + - main + paths: + - "perf-changelog.yaml" + pull_request: + branches: + - main + types: + - ready_for_review + - synchronize + - labeled + paths: + - "perf-changelog.yaml" + +jobs: + setup: + runs-on: ubuntu-latest + if: >- + (github.event_name == 'pull_request' && !github.event.pull_request.draft && contains(github.event.pull_request.labels.*.name, 'sweep-enabled')) || + (github.event_name != 'pull_request' && !contains(github.event.head_commit.message, '[skip-sweep]')) + outputs: + search-space-config: ${{ steps.setup.outputs.search-space-config }} + steps: + - name: Checkout code + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + with: + fetch-depth: 0 + + - id: setup + run: | + pip install pydantic + + if [ "${{ github.event_name }}" == "pull_request" ]; then + BASE_REF="origin/${{ github.base_ref }}" + HEAD_REF="${{ github.event.pull_request.head.sha }}" + else + BASE_REF="${{ github.event.before }}" + HEAD_REF="${{ github.event.after }}" + fi + + CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/process_changelog.py \ + --changelog-file ${GITHUB_WORKSPACE}/perf-changelog.yaml \ + --base-ref "$BASE_REF" \ + --head-ref "$HEAD_REF") + + echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT + + sweep-multi-node-1k1k: + needs: setup + if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k']) != 'null' }} + uses: ./.github/workflows/benchmark-multinode-tmpl.yml + name: multi-node 1k1k / + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }} + secrets: inherit + with: &multi-node-inputs + isl: ${{ matrix.config.isl }} + osl: ${{ matrix.config.osl }} + max-model-len: ${{ matrix.config.max-model-len }} + runner: ${{ matrix.config.runner }} + image: ${{ matrix.config.image }} + model: ${{ matrix.config.model }} + model-prefix: ${{ matrix.config.model-prefix }} + framework: ${{ matrix.config.framework }} + precision: ${{ matrix.config.precision }} + exp-name: ${{ matrix.config.exp-name }} + conc-list: ${{ toJson(matrix.config.conc) }} + spec-decoding: ${{ matrix.config.spec-decoding }} + disagg: ${{ matrix.config.disagg }} + + prefill-num-worker: ${{ matrix.config.prefill.num-worker }} + prefill-tp: ${{ matrix.config.prefill.tp }} + prefill-ep: ${{ matrix.config.prefill.ep }} + prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }} + prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }} + + decode-num-worker: ${{ matrix.config.decode.num-worker }} + decode-tp: ${{ matrix.config.decode.tp }} + decode-ep: ${{ matrix.config.decode.ep }} + decode-dp-attn: ${{ matrix.config.decode.dp-attn }} + decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }} + + sweep-multi-node-1k8k: + needs: setup + if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k8k']) != 'null' }} + uses: ./.github/workflows/benchmark-multinode-tmpl.yml + name: multi-node 1k8k / + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k8k'] }} + secrets: inherit + with: *multi-node-inputs + + sweep-multi-node-8k1k: + needs: setup + if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k']) != 'null' }} + uses: ./.github/workflows/benchmark-multinode-tmpl.yml + name: multi-node 8k1k / + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k'] }} + secrets: inherit + with: *multi-node-inputs + + sweep-single-node-1k1k: + needs: setup + if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['1k1k']) != 'null' }} + uses: ./.github/workflows/benchmark-tmpl.yml + name: single-node 1k1k / + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }} + secrets: inherit + with: &single-node-inputs + exp-name: ${{ matrix.config.exp-name }} + isl: ${{ matrix.config.isl }} + osl: ${{ matrix.config.osl }} + max-model-len: ${{ matrix.config.max-model-len }} + runner: ${{ matrix.config.runner }} + image: ${{ matrix.config.image }} + model: ${{ matrix.config.model }} + model-prefix: ${{ matrix.config.model-prefix }} + framework: ${{ matrix.config.framework }} + precision: ${{ matrix.config.precision }} + tp: ${{ matrix.config.tp }} + ep: ${{ matrix.config.ep }} + dp-attn: ${{ matrix.config.dp-attn }} + conc: ${{ matrix.config.conc }} + spec-decoding: ${{ matrix.config.spec-decoding }} + disagg: ${{ matrix.config.disagg }} + + sweep-single-node-1k8k: + needs: setup + if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['1k8k']) != 'null' }} + uses: ./.github/workflows/benchmark-tmpl.yml + name: single-node 1k8k / + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k8k'] }} + secrets: inherit + with: *single-node-inputs + + sweep-single-node-8k1k: + needs: setup + if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['8k1k']) != 'null' }} + uses: ./.github/workflows/benchmark-tmpl.yml + name: single-node 8k1k / + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['8k1k'] }} + secrets: inherit + with: *single-node-inputs + + collect-results: + needs: + [ + sweep-single-node-1k1k, + sweep-single-node-1k8k, + sweep-single-node-8k1k, + sweep-multi-node-1k1k, + sweep-multi-node-1k8k, + sweep-multi-node-8k1k, + setup, + ] + if: ${{ always() && needs.setup.result != 'skipped' }} + uses: ./.github/workflows/collect-results.yml + secrets: inherit + with: + result-prefix: "bmk" + + upload-changelog-metadata: + needs: [setup, collect-results] + if: ${{ always() && needs.setup.result != 'skipped' }} + runs-on: ubuntu-latest + steps: + - name: Extract and save changelog metadata + env: + CONFIG_JSON: ${{ needs.setup.outputs.search-space-config }} + run: | + echo "$CONFIG_JSON" | jq '.changelog_metadata' > changelog_metadata.json + + - name: Upload changelog artifact + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: changelog-metadata + path: changelog_metadata.json + + calc-success-rate: + needs: collect-results + if: ${{ always() && needs.collect-results.result != 'skipped'}} + runs-on: ubuntu-latest + + env: + RESULTS_DIR: "results/" + STATS_FILENAME: "run_stats" + GITHUB_TOKEN: ${{ secrets.REPO_PAT }} + + steps: + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + with: + token: ${{ secrets.REPO_PAT }} + fetch-depth: 0 + + - name: Download results artifacts + uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + with: + path: ${{ env.RESULTS_DIR }} + pattern: results_* + + - name: Install python dependencies + run: pip install PyGithub + + - name: Calculate success rate + run: python3 utils/calc_success_rate.py $STATS_FILENAME + + - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: "run-stats" + path: ${{ env.STATS_FILENAME }}.json diff --git a/perf-changelog.yaml b/perf-changelog.yaml new file mode 100644 index 000000000..a74285c53 --- /dev/null +++ b/perf-changelog.yaml @@ -0,0 +1,83 @@ +- config-keys: + - 70b-fp8-*-vllm + description: | + - Add compilation-config: '{"custom_ops": ["-rms_norm", "-quant_fp8", "-silu_and_mul"]}' as + extra config to all benchmarks/70b_fp8_mi*.sh scripts + - 6-7% uplift for llama for 6/8 configs + PR: https://github.com/InferenceMAX/InferenceMAX/pull/95 +- config-keys: + - gptoss-fp4-*-trt + description: | + - Upgrade GPT-OSS TRT images from 'release:1.1.0rc2.post2' to '1.2.0rc0.post1' + - Add NCCL_GRAPH_REGISTER=0 to benchmarks/gptoss_fp4_b200_trt_slurm.sh + - Change kv_cache_config.dtype from 'auto' to 'fp8' in benchmarks/gptoss_fp4_b200_trt_slurm.sh + - Remove MOE_BACKEND=CUTLASS, now just defaults to TRTLLM + PR: https://github.com/InferenceMAX/InferenceMAX/pull/110 +- config-keys: + - gptoss* + - dsr1* + description: | + - Remove Llama 70B runs to make room for multi-node disagg prefill+wideEP on + h100/h200/b200/mi300/mi325/mi355 + PR: https://github.com/InferenceMAX/InferenceMAX/pull/149 +- config-keys: + - gptoss-fp4-b200-vllm + - gptoss-fp4-h100-vllm + - gptoss-fp4-h200-vllm + description: | + - Upgrade vLLM from 0.10.2 to 0.11.0 for GPT-OSS NVIDIA single-node configs + - Adds compilation-config: '{"cudagraph_mode":"PIECEWISE"} accordingly since vLLM 0.11.0 + requires now defaults to FULL_AND_PIECEWISE + PR: https://github.com/InferenceMAX/InferenceMAX/pull/159 +- config-keys: + - dsr1* + description: | + - Fixes bug where 1k8k and 8k1k full sweeps had incorrect max-model-len for DeepSeek + PR: https://github.com/InferenceMAX/InferenceMAX/pull/163 +- config-keys: + - dsr1-fp4-b200-sglang + - dsr1-fp8-b200-sglang + - dsr1-fp8-h200-sglang + description: | + - Consolidates H200 and B200 SGLang configurations to use unified v0.5.5-cu129-amd64 + image tag and updates deprecated SGLang server arguments to their current equivalents. + - --enable-flashinfer-trtllm-moe & --enable-ep-moe is no longer available in sglang so we needed to change it + - ep: 4 for all tp: 4 entries (3 occurrences in dsr1-fp4-b200-sglang) + - ep: 8 for all tp: 8 entries (6 occurrences across dsr1-fp4-b200-sglang and dsr1-fp8-b200-sglang) + - dsr1_fp4_b200_docker.sh: Replaced --enable-ep-moe with --ep-size $EP_SIZE and --enable-flashinfer-trtllm-moe with + --moe-runner-backend flashinfer_trtllm + - dsr1_fp8_b200_docker.sh: Replaced --enable-flashinfer-trtllm-moe with --moe-runner-backend flashinfer_trtllm and + added --ep-size $EP_SIZE + - launch_b200-nvd.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container + - launch_b200-tg.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container + PR: https://github.com/InferenceMAX/InferenceMAX/pull/204 +- config-keys: + - gptoss-fp4-mi355x-vllm + - gptoss-fp4-b200-vllm + description: | + - Extend concurrency to 128 for gptoss mi355x/b200 vllm configurations + PR: https://github.com/InferenceMAX/InferenceMAX/pull/209 +- config-keys: + - gptoss-fp4-b200-trt + description: | + - Extend concurrency to 128 for gptoss b200 TRT configurations + PR: https://github.com/InferenceMAX/InferenceMAX/pull/233 +- config-keys: + - "*gb200-sglang" + description: | + - Introducing some improvements in GB200 SGLang DSR1 submission + PR: https://github.com/InferenceMAX/InferenceMAX/pull/257 +- config-keys: + - dsr1-fp8-h200-trt + description: | + - Update TRT image from nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc0.post1 to nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc2 + - Increase concurrency for some configurations + PR: https://github.com/InferenceMAX/InferenceMAX/pull/266 +- config-keys: + - gptoss-fp4-b200-vllm + - gptoss-fp4-h100-vllm + - gptoss-fp4-h200-vllm + description: | + - Update vLLM image for NVIDIA configs from vLLM 0.11.0 to vLLM 0.11.2 + - Adds kv-cache-dtype: fp8 to benchmarks/gptoss_fp4_b200_docker.sh + PR: https://github.com/InferenceMAX/InferenceMAX/pull/273 \ No newline at end of file diff --git a/utils/constants.py b/utils/constants.py new file mode 100644 index 000000000..a465091da --- /dev/null +++ b/utils/constants.py @@ -0,0 +1,4 @@ +MASTER_CONFIGS = [".github/configs/amd-master.yaml", + ".github/configs/nvidia-master.yaml"] +RUNNER_CONFIG = ".github/configs/runners.yaml" +GENERATE_SWEEPS_PY_SCRIPT = "utils/matrix_logic/generate_sweep_configs.py" \ No newline at end of file diff --git a/utils/matrix_logic/generate_sweep_configs.py b/utils/matrix_logic/generate_sweep_configs.py index 8fc47651c..d8fab38cf 100644 --- a/utils/matrix_logic/generate_sweep_configs.py +++ b/utils/matrix_logic/generate_sweep_configs.py @@ -1,8 +1,17 @@ import json -import yaml import argparse +import sys +from pathlib import Path -from validation import validate_master_config, validate_matrix_entry, validate_runner_config, Fields +# Ensure sibling modules are importable regardless of how script is invoked +sys.path.insert(0, str(Path(__file__).resolve().parent)) + +from validation import ( + validate_matrix_entry, + load_config_files, + load_runner_file, + Fields +) seq_len_stoi = { "1k1k": (1024, 1024), @@ -366,42 +375,126 @@ def get_lowest_conc(search_space_entry): return matrix_values -def load_config_files(config_files): - """Load and merge configuration files.""" - all_config_data = {} - for config_file in config_files: - try: - with open(config_file, 'r') as f: - config_data = yaml.safe_load(f) - assert isinstance( - config_data, dict), f"Config file '{config_file}' must contain a dictionary" - - # Check for duplicate keys, this is only in place to prevent against the very unlikely - # case where an entry in one config accidentally/purposefully tries to override an entry in another config - duplicate_keys = set(all_config_data.keys()) & set( - config_data.keys()) - if duplicate_keys: - raise ValueError( - f"Duplicate configuration keys found in '{config_file}': {', '.join(sorted(duplicate_keys))}" - ) - - all_config_data.update(config_data) - except FileNotFoundError: - raise ValueError(f"Input file '{config_file}' does not exist.") - - return all_config_data - - -def load_runner_file(runner_file): - """Load runner configuration file.""" - try: - with open(runner_file, 'r') as f: - runner_config = yaml.safe_load(f) - except FileNotFoundError as e: +def generate_test_config_sweep(args, all_config_data): + """Generate full sweep for specific config keys. + + Validates that all specified config keys exist before generating. + Expands all configs fully without any filtering. + """ + # Validate all config keys exist + missing_keys = [key for key in args.config_keys if key not in all_config_data] + if missing_keys: + available_keys = sorted(all_config_data.keys()) raise ValueError( - f"Runner config file '{runner_file}' does not exist.") + f"Config key(s) not found: {', '.join(missing_keys)}.\n" + f"Available keys: {', '.join(available_keys)}" + ) + + matrix_values = [] + + for key in args.config_keys: + val = all_config_data[key] + is_multinode = val.get(Fields.MULTINODE.value, False) + + image = val[Fields.IMAGE.value] + model = val[Fields.MODEL.value] + model_code = val[Fields.MODEL_PREFIX.value] + precision = val[Fields.PRECISION.value] + framework = val[Fields.FRAMEWORK.value] + runner = val[Fields.RUNNER.value] + disagg = val.get(Fields.DISAGG.value, False) - return runner_config + for seq_len_config in val[Fields.SEQ_LEN_CONFIGS.value]: + isl = seq_len_config[Fields.ISL.value] + osl = seq_len_config[Fields.OSL.value] + seq_len_str = seq_len_to_str(isl, osl) + + for bmk in seq_len_config[Fields.SEARCH_SPACE.value]: + if is_multinode: + # Multinode config + spec_decoding = bmk.get(Fields.SPEC_DECODING.value, "none") + prefill = bmk[Fields.PREFILL.value] + decode = bmk[Fields.DECODE.value] + + # Get concurrency values + if Fields.CONC_LIST.value in bmk: + conc_values = bmk[Fields.CONC_LIST.value] + else: + conc_start = bmk[Fields.CONC_START.value] + conc_end = bmk[Fields.CONC_END.value] + conc_values = [] + conc = conc_start + while conc <= conc_end: + conc_values.append(conc) + if conc == conc_end: + break + conc *= 2 + if conc > conc_end: + conc = conc_end + + entry = { + Fields.IMAGE.value: image, + Fields.MODEL.value: model, + Fields.MODEL_PREFIX.value: model_code, + Fields.PRECISION.value: precision, + Fields.FRAMEWORK.value: framework, + Fields.RUNNER.value: runner, + Fields.ISL.value: isl, + Fields.OSL.value: osl, + Fields.SPEC_DECODING.value: spec_decoding, + Fields.PREFILL.value: prefill, + Fields.DECODE.value: decode, + Fields.CONC.value: conc_values, + Fields.MAX_MODEL_LEN.value: isl + osl + 200, + Fields.EXP_NAME.value: f"{model_code}_{seq_len_str}", + Fields.DISAGG.value: disagg, + } + matrix_values.append(validate_matrix_entry(entry, is_multinode=True)) + else: + # Single-node config + tp = bmk[Fields.TP.value] + ep = bmk.get(Fields.EP.value) + dp_attn = bmk.get(Fields.DP_ATTN.value) + spec_decoding = bmk.get(Fields.SPEC_DECODING.value, "none") + + # Get concurrency values + if Fields.CONC_LIST.value in bmk: + conc_values = bmk[Fields.CONC_LIST.value] + else: + conc_start = bmk[Fields.CONC_START.value] + conc_end = bmk[Fields.CONC_END.value] + conc_values = [] + conc = conc_start + while conc <= conc_end: + conc_values.append(conc) + if conc == conc_end: + break + conc *= 2 + if conc > conc_end: + conc = conc_end + + for conc in conc_values: + entry = { + Fields.IMAGE.value: image, + Fields.MODEL.value: model, + Fields.MODEL_PREFIX.value: model_code, + Fields.PRECISION.value: precision, + Fields.FRAMEWORK.value: framework, + Fields.RUNNER.value: runner, + Fields.ISL.value: isl, + Fields.OSL.value: osl, + Fields.TP.value: tp, + Fields.CONC.value: conc, + Fields.MAX_MODEL_LEN.value: isl + osl + 200, + Fields.EP.value: ep if ep is not None else 1, + Fields.DP_ATTN.value: dp_attn if dp_attn is not None else False, + Fields.SPEC_DECODING.value: spec_decoding, + Fields.EXP_NAME.value: f"{model_code}_{seq_len_str}", + Fields.DISAGG.value: disagg, + } + matrix_values.append(validate_matrix_entry(entry, is_multinode=False)) + + return matrix_values def main(): @@ -545,13 +638,30 @@ def main(): help='Show this help message and exit' ) + # Subcommand: test-config + test_config_keys_parser = subparsers.add_parser( + 'test-config', + parents=[parent_parser], + add_help=False, + help='Generate full sweep for specific config keys. Validates that all specified keys exist before generating.' + ) + test_config_keys_parser.add_argument( + '--config-keys', + nargs='+', + required=True, + help='One or more config keys to generate sweep for (e.g., dsr1-fp4-b200-sglang dsr1-fp8-h200-trt)' + ) + test_config_keys_parser.add_argument( + '-h', '--help', + action='help', + help='Show this help message and exit' + ) + args = parser.parse_args() - # Load and validate configuration files + # Load and validate configuration files (validation happens by default in load functions) all_config_data = load_config_files(args.config_files) runner_data = load_runner_file(args.runner_config) - validate_master_config(all_config_data) - validate_runner_config(runner_data) # Route to appropriate function based on subcommand if args.command == 'full-sweep': @@ -559,6 +669,8 @@ def main(): elif args.command == 'runner-model-sweep': matrix_values = generate_runner_model_sweep_config( args, all_config_data, runner_data) + elif args.command == 'test-config': + matrix_values = generate_test_config_sweep(args, all_config_data) else: parser.error(f"Unknown command: {args.command}") diff --git a/utils/matrix_logic/test_generate_sweep_configs.py b/utils/matrix_logic/test_generate_sweep_configs.py index 1381f394e..c505611c3 100644 --- a/utils/matrix_logic/test_generate_sweep_configs.py +++ b/utils/matrix_logic/test_generate_sweep_configs.py @@ -7,8 +7,6 @@ seq_len_to_str, generate_full_sweep, generate_runner_model_sweep_config, - load_config_files, - load_runner_file, ) @@ -583,90 +581,6 @@ def test_uses_lowest_conc(self, sample_single_node_config, sample_runner_config, assert all(entry["conc"] == 4 for entry in result) -# ============================================================================= -# Test load_config_files -# ============================================================================= - -class TestLoadConfigFiles: - """Tests for load_config_files function.""" - - def test_load_single_file(self, tmp_path): - """Should load a single config file.""" - config_file = tmp_path / "config.yaml" - config_file.write_text(""" -test-config: - image: test-image - model: test-model -""") - result = load_config_files([str(config_file)]) - assert "test-config" in result - assert result["test-config"]["image"] == "test-image" - - def test_load_multiple_files(self, tmp_path): - """Should merge multiple config files.""" - config1 = tmp_path / "config1.yaml" - config1.write_text(""" -config-one: - value: 1 -""") - config2 = tmp_path / "config2.yaml" - config2.write_text(""" -config-two: - value: 2 -""") - result = load_config_files([str(config1), str(config2)]) - assert "config-one" in result - assert "config-two" in result - - def test_duplicate_keys_raise_error(self, tmp_path): - """Duplicate keys across files should raise error.""" - config1 = tmp_path / "config1.yaml" - config1.write_text(""" -duplicate-key: - value: 1 -""") - config2 = tmp_path / "config2.yaml" - config2.write_text(""" -duplicate-key: - value: 2 -""") - with pytest.raises(ValueError) as exc_info: - load_config_files([str(config1), str(config2)]) - assert "Duplicate configuration keys" in str(exc_info.value) - - def test_nonexistent_file_raises_error(self): - """Nonexistent file should raise error.""" - with pytest.raises(ValueError) as exc_info: - load_config_files(["nonexistent.yaml"]) - assert "does not exist" in str(exc_info.value) - - -# ============================================================================= -# Test load_runner_file -# ============================================================================= - -class TestLoadRunnerFile: - """Tests for load_runner_file function.""" - - def test_load_runner_file(self, tmp_path): - """Should load runner config file.""" - runner_file = tmp_path / "runners.yaml" - runner_file.write_text(""" -h100: -- h100-node-0 -- h100-node-1 -""") - result = load_runner_file(str(runner_file)) - assert "h100" in result - assert len(result["h100"]) == 2 - - def test_nonexistent_runner_file(self): - """Nonexistent runner file should raise error.""" - with pytest.raises(ValueError) as exc_info: - load_runner_file("nonexistent.yaml") - assert "does not exist" in str(exc_info.value) - - # ============================================================================= # Test edge cases and special configurations # ============================================================================= diff --git a/utils/matrix_logic/test_validation.py b/utils/matrix_logic/test_validation.py index 008ed2b42..d9cc7f0d9 100644 --- a/utils/matrix_logic/test_validation.py +++ b/utils/matrix_logic/test_validation.py @@ -14,6 +14,8 @@ validate_matrix_entry, validate_master_config, validate_runner_config, + load_config_files, + load_runner_file, ) @@ -738,3 +740,130 @@ def test_multiple_runner_types(self, valid_runner_config): assert "h200" in result assert "mi300x" in result assert "gb200" in result + + +# ============================================================================= +# Test load_config_files +# ============================================================================= + +class TestLoadConfigFiles: + """Tests for load_config_files function.""" + + def test_load_single_file_with_validation(self, tmp_path, valid_single_node_master_config): + """Should load and validate a single config file.""" + config_file = tmp_path / "config.yaml" + import yaml + config_file.write_text(yaml.dump({"test-config": valid_single_node_master_config})) + result = load_config_files([str(config_file)]) + assert "test-config" in result + assert result["test-config"]["image"] == valid_single_node_master_config["image"] + + def test_load_single_file_without_validation(self, tmp_path): + """Should load a single config file without validation when validate=False.""" + config_file = tmp_path / "config.yaml" + config_file.write_text(""" +test-config: + image: test-image + model: test-model +""") + result = load_config_files([str(config_file)], validate=False) + assert "test-config" in result + assert result["test-config"]["image"] == "test-image" + + def test_load_multiple_files(self, tmp_path): + """Should merge multiple config files.""" + config1 = tmp_path / "config1.yaml" + config1.write_text(""" +config-one: + value: 1 +""") + config2 = tmp_path / "config2.yaml" + config2.write_text(""" +config-two: + value: 2 +""") + result = load_config_files([str(config1), str(config2)], validate=False) + assert "config-one" in result + assert "config-two" in result + + def test_duplicate_keys_raise_error(self, tmp_path): + """Duplicate keys across files should raise error.""" + config1 = tmp_path / "config1.yaml" + config1.write_text(""" +duplicate-key: + value: 1 +""") + config2 = tmp_path / "config2.yaml" + config2.write_text(""" +duplicate-key: + value: 2 +""") + with pytest.raises(ValueError) as exc_info: + load_config_files([str(config1), str(config2)], validate=False) + assert "Duplicate configuration keys" in str(exc_info.value) + + def test_nonexistent_file_raises_error(self): + """Nonexistent file should raise error.""" + with pytest.raises(ValueError) as exc_info: + load_config_files(["nonexistent.yaml"]) + assert "does not exist" in str(exc_info.value) + + def test_validation_runs_by_default(self, tmp_path): + """Validation should run by default and catch invalid configs.""" + config_file = tmp_path / "config.yaml" + config_file.write_text(""" +invalid-config: + image: test-image + # Missing required fields like model, model-prefix, precision, etc. +""") + with pytest.raises(ValueError) as exc_info: + load_config_files([str(config_file)]) + assert "failed validation" in str(exc_info.value) + + +# ============================================================================= +# Test load_runner_file +# ============================================================================= + +class TestLoadRunnerFile: + """Tests for load_runner_file function.""" + + def test_load_runner_file_with_validation(self, tmp_path): + """Should load and validate runner config file.""" + runner_file = tmp_path / "runners.yaml" + runner_file.write_text(""" +h100: +- h100-node-0 +- h100-node-1 +""") + result = load_runner_file(str(runner_file)) + assert "h100" in result + assert len(result["h100"]) == 2 + + def test_load_runner_file_without_validation(self, tmp_path): + """Should load runner config file without validation when validate=False.""" + runner_file = tmp_path / "runners.yaml" + runner_file.write_text(""" +h100: +- h100-node-0 +- h100-node-1 +""") + result = load_runner_file(str(runner_file), validate=False) + assert "h100" in result + assert len(result["h100"]) == 2 + + def test_nonexistent_runner_file(self): + """Nonexistent runner file should raise error.""" + with pytest.raises(ValueError) as exc_info: + load_runner_file("nonexistent.yaml") + assert "does not exist" in str(exc_info.value) + + def test_validation_runs_by_default(self, tmp_path): + """Validation should run by default and catch invalid configs.""" + runner_file = tmp_path / "runners.yaml" + runner_file.write_text(""" +h100: not-a-list +""") + with pytest.raises(ValueError) as exc_info: + load_runner_file(str(runner_file)) + assert "must be a list" in str(exc_info.value) diff --git a/utils/matrix_logic/validation.py b/utils/matrix_logic/validation.py index 30012423a..955e4c5b5 100644 --- a/utils/matrix_logic/validation.py +++ b/utils/matrix_logic/validation.py @@ -3,6 +3,7 @@ from enum import Enum import pprint +import yaml """ The below class defines the field names expected to be present in the JSON entries @@ -315,3 +316,118 @@ def validate_runner_config(runner_configs: dict) -> List[dict]: f"Runner config entry '{key}' cannot be an empty list") return runner_configs + + +""" + Below is the validation logic for the changelog entries found in perf-changelog.yaml. + This ensures that the changelog entries conform to the expected structure before + proceeding with processing. +""" + + +class ChangelogEntry(BaseModel): + """Pydantic model for validating changelog entry structure.""" + model_config = ConfigDict(extra="forbid", populate_by_name=True) + + config_keys: list[str] = Field(alias="config-keys", min_length=1) + description: str + + +class ChangelogMetadata(BaseModel): + """Pydantic model for validating changelog metadata structure.""" + model_config = ConfigDict(extra="forbid") + + base_ref: str + head_ref: str + entries: list[ChangelogEntry] + + +class ChangelogMatrixEntry(BaseModel): + """Pydantic model for validating final changelog matrix entry structure. + This imposes a strict contract on the output of process_changelog.py, dictated by + the expected input to the run-sweep.yml workflow file. + """ + model_config = ConfigDict(extra="forbid", populate_by_name=True) + + single_node: dict[str, list[SingleNodeMatrixEntry] + ] = Field(default_factory=dict) + multi_node: dict[str, list[MultiNodeMatrixEntry] + ] = Field(default_factory=dict) + changelog_metadata: ChangelogMetadata + + +# ============================================================================= +# File Loading Functions +# ============================================================================= + + +def load_config_files(config_files: List[str], validate: bool = True) -> dict: + """Load and merge configuration files. + + Args: + config_files: List of paths to YAML configuration files. + validate: If True, run validate_master_config on loaded data. Defaults to True. + + Returns: + Merged configuration dictionary. + + Raises: + ValueError: If file doesn't exist, isn't a dict, or has duplicate keys. + """ + all_config_data = {} + for config_file in config_files: + try: + with open(config_file, 'r') as f: + config_data = yaml.safe_load(f) + assert isinstance( + config_data, dict), f"Config file '{config_file}' must contain a dictionary" + + # Don't allow '*' wildcard in master config keys as we need to reserve these + # for expansion in process_changelog.py + for key in config_data.keys(): + if "*" in key: + raise ValueError( + f" Wildcard '*' is not allowed in master config keys: '{key}'") + + # Check for duplicate keys + duplicate_keys = set(all_config_data.keys()) & set( + config_data.keys()) + if duplicate_keys: + raise ValueError( + f"Duplicate configuration keys found in '{config_file}': {', '.join(sorted(duplicate_keys))}" + ) + + all_config_data.update(config_data) + except FileNotFoundError: + raise ValueError(f"Input file '{config_file}' does not exist.") + + if validate: + validate_master_config(all_config_data) + + return all_config_data + + +def load_runner_file(runner_file: str, validate: bool = True) -> dict: + """Load runner configuration file. + + Args: + runner_file: Path to the runner YAML configuration file. + validate: If True, run validate_runner_config on loaded data. Defaults to True. + + Returns: + Runner configuration dictionary. + + Raises: + ValueError: If file doesn't exist or fails validation. + """ + try: + with open(runner_file, 'r') as f: + runner_config = yaml.safe_load(f) + except FileNotFoundError: + raise ValueError( + f"Runner config file '{runner_file}' does not exist.") + + if validate: + validate_runner_config(runner_config) + + return runner_config diff --git a/utils/process_changelog.py b/utils/process_changelog.py new file mode 100644 index 000000000..4a856c9a8 --- /dev/null +++ b/utils/process_changelog.py @@ -0,0 +1,142 @@ +import argparse +import json +import re +import subprocess +from collections import defaultdict + +import yaml +from constants import GENERATE_SWEEPS_PY_SCRIPT, MASTER_CONFIGS, RUNNER_CONFIG +from matrix_logic.generate_sweep_configs import seq_len_to_str +from matrix_logic.validation import ( + ChangelogEntry, + ChangelogMatrixEntry, + load_config_files, +) + + +def get_added_lines(base_ref: str, head_ref: str, filepath: str) -> str: + result = subprocess.run( + ["git", "diff", base_ref, head_ref, "--", filepath], + capture_output=True, + text=True, + ) + + added_lines = [] + for line in result.stdout.split("\n"): + if line.startswith("-") and not line.startswith("---"): + # Don't allow deletions in the changelog + # By convention, it should act as a running log of performance changes, + # so we only want to see additions + raise ValueError( + f"Deletions are not allowed in {filepath}. " + f"Only additions to the changelog are permitted. " + f"Found deleted line: {line[1:]}" + ) + elif line.startswith("+") and not line.startswith("+++"): + added_lines.append(line[1:]) + + return "\n".join(added_lines) + + +def get_config_keys_from_master( + config_keys: list[str], master_config: dict +) -> list[str]: + resolved_keys = set() + for key in config_keys: + if "*" in key: + pattern = re.compile(re.escape(key).replace(r"\*", ".*")) + matched_keys = [k for k in master_config if pattern.fullmatch(k)] + if not matched_keys: + raise ValueError( + f"No config keys matched the wildcard pattern '{key}' in master configs." + ) + resolved_keys.update(matched_keys) + elif key not in master_config: + raise ValueError(f"Config key '{key}' not found in master configs.") + else: + resolved_keys.add(key) + return list(resolved_keys) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--base-ref", type=str, required=True) + parser.add_argument("--head-ref", type=str, required=True) + parser.add_argument("--changelog-file", type=str, required=True) + args = parser.parse_args() + + added_yaml = get_added_lines(args.base_ref, args.head_ref, args.changelog_file) + + if not added_yaml.strip(): + raise ValueError("No additions found in the changelog file.") + + changelog_data = yaml.safe_load(added_yaml) + + if not changelog_data: + raise ValueError("No valid YAML entries found in the changelog additions.") + + final_results = { + "single_node": defaultdict(list), + "multi_node": defaultdict(list), + "changelog_metadata": { + "base_ref": args.base_ref, + "head_ref": args.head_ref, + "entries": changelog_data, + }, + } + + all_results = [] + # Deduplicate repeated configs, if for some reason a config key appears multiple times + # in one commit, we don't want to run that config two times (there will just be twice as many + # data points for that config, which is not useful) + all_configs_to_run = set() + + for entry_data in changelog_data: + entry = ChangelogEntry.model_validate(entry_data) + configs_to_run = get_config_keys_from_master( + entry.config_keys, load_config_files(MASTER_CONFIGS) + ) + + # Skip configs already processed + configs_to_run = [c for c in configs_to_run if c not in all_configs_to_run] + if not configs_to_run: + continue + all_configs_to_run.update(configs_to_run) + + try: + result = subprocess.run( + [ + "python3", + GENERATE_SWEEPS_PY_SCRIPT, + "test-config", + "--config-keys", + *configs_to_run, + "--config-files", + *MASTER_CONFIGS, + "--runner-config", + RUNNER_CONFIG, + ], + capture_output=True, + text=True, + check=True, + ) + except subprocess.CalledProcessError as e: + print(e.stderr) + raise + + all_results.extend(json.loads(result.stdout)) + + for result in all_results: + seq_len_str = seq_len_to_str(result["isl"], result["osl"]) + if "prefill" in result and result["prefill"] is not None: + final_results["multi_node"][seq_len_str].append(result) + else: + final_results["single_node"][seq_len_str].append(result) + + # Validate final results structure + validated = ChangelogMatrixEntry.model_validate(final_results) + print(validated.model_dump_json(by_alias=True)) + + +if __name__ == "__main__": + main()