From a81882f2fd78f8daf8fd5ad24baffef4bd4b21e5 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Sun, 7 Dec 2025 23:39:22 -0800 Subject: [PATCH 01/19] Initial commit, for #304 --- .github/workflows/pr-comment-sweep.yml | 71 ++++++++++++ .github/workflows/sweep-executor.yml | 151 +++++++++++++++++++++++++ 2 files changed, 222 insertions(+) create mode 100644 .github/workflows/pr-comment-sweep.yml create mode 100644 .github/workflows/sweep-executor.yml diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml new file mode 100644 index 000000000..be54869a6 --- /dev/null +++ b/.github/workflows/pr-comment-sweep.yml @@ -0,0 +1,71 @@ +name: PR Comment Sweep + +on: + issue_comment: + types: [created] + +concurrency: + group: "PR-SWEEP-${{ github.event.issue.number }}" + cancel-in-progress: true + +permissions: + contents: read + +jobs: + parse: + # Run only for PR comments, from trusted authors, starting with /sweep + if: >- + ${{ github.event.issue.pull_request && + startsWith(github.event.comment.body, '/sweep') && + contains('OWNER,MEMBER,COLLABORATOR', github.event.comment.author_association) }} + runs-on: ubuntu-latest + outputs: + pr-number: ${{ steps.parse.outputs.pr-number }} + generator-args: ${{ steps.parse.outputs.generator-args }} + steps: + - name: Derive PR number and parse command + id: parse + shell: bash + env: + BODY: ${{ github.event.comment.body }} + PR_NUMBER: ${{ github.event.issue.number }} + run: | + set -euo pipefail + + # Extract first line starting with /sweep + cmd_line=$(printf "%s" "$BODY" | awk '/^\/sweep/{print; exit}') + if [[ -z "$cmd_line" ]]; then + echo "No /sweep command found in comment" >&2 + exit 1 + fi + cmd_args=${cmd_line#* /sweep} + # Handle case when it's exactly '/sweep' (no args) + if [[ "$cmd_line" == "/sweep" ]]; then + cmd_args="" + else + cmd_args=${cmd_line#/sweep} + fi + cmd_args=$(echo "$cmd_args" | xargs || true) + + echo "Command args: $cmd_args" + + echo "generator-args=$cmd_args" >> "$GITHUB_OUTPUT" + echo "pr-number=$PR_NUMBER" >> "$GITHUB_OUTPUT" + + execute: + needs: parse + uses: ./.github/workflows/sweep-executor.yml + secrets: inherit + with: + pr-number: ${{ needs.parse.outputs.pr-number }} + generator-args: ${{ needs.parse.outputs.generator-args }} + + note-ignored: + # Inform when comment doesn't meet criteria (non-PR or not authorized) + if: ${{ !github.event.issue.pull_request || + !startsWith(github.event.comment.body, '/sweep') || + !contains('OWNER,MEMBER,COLLABORATOR', github.event.comment.author_association) }} + runs-on: ubuntu-latest + steps: + - run: | + echo "Comment ignored. Either not on a PR, not a /sweep command, or author not authorized (OWNER/MEMBER/COLLABORATOR required)." diff --git a/.github/workflows/sweep-executor.yml b/.github/workflows/sweep-executor.yml new file mode 100644 index 000000000..65bec188e --- /dev/null +++ b/.github/workflows/sweep-executor.yml @@ -0,0 +1,151 @@ +name: Template - Sweep Executor + +on: + workflow_call: + inputs: + pr-number: + required: true + type: string + generator-args: + required: true + type: string + +permissions: + contents: read + +jobs: + generate: + runs-on: ubuntu-latest + outputs: + search-space-config: ${{ steps.generate.outputs.search-space-config }} + is-multinode: ${{ steps.detect.outputs.is-multinode }} + steps: + - name: Checkout PR head + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + with: + ref: refs/pull/${{ inputs.pr-number }}/head + + - name: Detect node type from args + id: detect + run: | + set -euo pipefail + ARGS=${{ inputs.generator-args }} + if [[ "$ARGS" == *"--multi-node"* ]]; then + echo "is-multinode=true" >> "$GITHUB_OUTPUT" + else + echo "is-multinode=false" >> "$GITHUB_OUTPUT" + fi + + - name: Generate sweep configs + id: generate + shell: python + env: + GITHUB_WORKSPACE: ${{ github.workspace }} + run: | + import json, os, shlex, subprocess, sys + + cmd_args = r'''${{ inputs.generator-args }}''' + script = os.path.join(os.environ['GITHUB_WORKSPACE'], 'utils', 'matrix_logic', 'generate_sweep_configs.py') + cfg_amd = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'amd-master.yaml') + cfg_nv = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'nvidia-master.yaml') + runners = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'runners.yaml') + + subprocess.run([sys.executable, '-m', 'pip', 'install', 'pydantic'], check=True) + + argv = [sys.executable, script] + if cmd_args.strip(): + argv += shlex.split(cmd_args) + argv += ['--config-files', cfg_amd, cfg_nv, '--runner-config', runners] + + print('Invoking:', ' '.join(shlex.quote(a) for a in argv)) + res = subprocess.run(argv, capture_output=True, text=True) + if res.returncode != 0: + print('Generator failed. stdout:\n', res.stdout) + print('stderr:\n', res.stderr, file=sys.stderr) + raise SystemExit(res.returncode) + + try: + data = json.loads(res.stdout) + except Exception as e: + print('Failed to parse generator output as JSON:', e, file=sys.stderr) + print('Raw output:\n', res.stdout) + raise + + print(f"Generated {len(data)} configs") + with open(os.environ['GITHUB_OUTPUT'], 'a') as f: + f.write('search-space-config=' + json.dumps(data) + '\n') + + run-single-node: + needs: generate + if: ${{ needs.generate.result == 'success' && needs.generate.outputs.search-space-config != '[]' && needs.generate.outputs.is-multinode == 'false' }} + uses: ./.github/workflows/benchmark-tmpl.yml + name: Sweep (Single-Node) + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.generate.outputs.search-space-config) }} + secrets: inherit + with: + exp-name: ${{ matrix.config.exp-name }} + isl: ${{ matrix.config.isl }} + osl: ${{ matrix.config.osl }} + max-model-len: ${{ matrix.config.max-model-len }} + runner: ${{ matrix.config.runner }} + image: ${{ matrix.config.image }} + model: ${{ matrix.config.model }} + model-prefix: ${{ matrix.config.model-prefix }} + framework: ${{ matrix.config.framework }} + precision: ${{ matrix.config.precision }} + tp: ${{ matrix.config.tp }} + ep: ${{ matrix.config.ep }} + dp-attn: ${{ matrix.config.dp-attn }} + conc: ${{ matrix.config.conc }} + spec-decoding: ${{ matrix.config.spec-decoding }} + disagg: ${{ matrix.config.disagg }} + + run-multi-node: + needs: generate + if: ${{ needs.generate.result == 'success' && needs.generate.outputs.search-space-config != '[]' && needs.generate.outputs.is-multinode == 'true' }} + uses: ./.github/workflows/benchmark-multinode-tmpl.yml + name: Sweep (Multi-Node) + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.generate.outputs.search-space-config) }} + secrets: inherit + with: + exp-name: ${{ matrix.config.exp-name }} + isl: ${{ matrix.config.isl }} + osl: ${{ matrix.config.osl }} + max-model-len: ${{ matrix.config.max-model-len }} + runner: ${{ matrix.config.runner }} + image: ${{ matrix.config.image }} + model: ${{ matrix.config.model }} + model-prefix: ${{ matrix.config.model-prefix }} + framework: ${{ matrix.config.framework }} + precision: ${{ matrix.config.precision }} + conc-list: ${{ toJson(matrix.config.conc) }} + spec-decoding: ${{ matrix.config.spec-decoding }} + disagg: ${{ matrix.config.disagg }} + + prefill-num-worker: ${{ matrix.config.prefill.num-worker }} + prefill-tp: ${{ matrix.config.prefill.tp }} + prefill-ep: ${{ matrix.config.prefill.ep }} + prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }} + prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }} + + decode-num-worker: ${{ matrix.config.decode.num-worker }} + decode-tp: ${{ matrix.config.decode.tp }} + decode-ep: ${{ matrix.config.decode.ep }} + decode-dp-attn: ${{ matrix.config.decode.dp-attn }} + decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }} + + collect-results: + needs: [run-single-node, run-multi-node] + if: ${{ always() && (needs.run-single-node.result == 'success' || needs.run-multi-node.result == 'success') }} + uses: ./.github/workflows/collect-results.yml + name: Collect Results + secrets: inherit + with: + exp-name: '' + From d27616a505ba95b9b2b17fa900cad8e707afc999 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Sun, 7 Dec 2025 23:45:51 -0800 Subject: [PATCH 02/19] Allow testing on own PR --- .github/workflows/sweep-executor.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/sweep-executor.yml b/.github/workflows/sweep-executor.yml index 65bec188e..fc63c754a 100644 --- a/.github/workflows/sweep-executor.yml +++ b/.github/workflows/sweep-executor.yml @@ -9,6 +9,16 @@ on: generator-args: required: true type: string + workflow_dispatch: + inputs: + pr-number: + description: PR number to checkout (refs/pull//head) + required: true + type: string + generator-args: + description: Args passed to generate_sweep_configs.py (omit /sweep) + required: true + type: string permissions: contents: read @@ -148,4 +158,3 @@ jobs: secrets: inherit with: exp-name: '' - From dce0b32c2fe02ac7cf93dc5aa9fd2126cec152e9 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Mon, 8 Dec 2025 07:06:57 -0800 Subject: [PATCH 03/19] condense workflow --- .github/workflows/pr-comment-sweep.yml | 222 ++++++++++++++++++++++--- .github/workflows/sweep-executor.yml | 160 ------------------ 2 files changed, 196 insertions(+), 186 deletions(-) delete mode 100644 .github/workflows/sweep-executor.yml diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml index be54869a6..f8ec2354c 100644 --- a/.github/workflows/pr-comment-sweep.yml +++ b/.github/workflows/pr-comment-sweep.yml @@ -1,45 +1,57 @@ -name: PR Comment Sweep +name: Sweep on: + # PR comment trigger issue_comment: types: [created] + # Manual trigger + workflow_dispatch: + inputs: + pr-number: + description: PR number to checkout (refs/pull//head) + required: false + type: string + generator-args: + description: Args passed to generate_sweep_configs.py (omit /sweep) + required: false + type: string + # Push-based example/testing + push: + branches-ignore: + - main + - master concurrency: - group: "PR-SWEEP-${{ github.event.issue.number }}" + group: ${{ github.event.issue.number && format('PR-SWEEP-{0}', github.event.issue.number) || format('REF-SWEEP-{0}', github.ref_name) }} cancel-in-progress: true permissions: contents: read jobs: - parse: - # Run only for PR comments, from trusted authors, starting with /sweep - if: >- - ${{ github.event.issue.pull_request && - startsWith(github.event.comment.body, '/sweep') && - contains('OWNER,MEMBER,COLLABORATOR', github.event.comment.author_association) }} + prepare: runs-on: ubuntu-latest outputs: - pr-number: ${{ steps.parse.outputs.pr-number }} - generator-args: ${{ steps.parse.outputs.generator-args }} + pr-number: ${{ steps.parse.outputs.pr-number || steps.resolve.outputs.pr-number }} + generator-args: ${{ steps.parse.outputs.generator-args || steps.resolve.outputs.generator-args }} steps: - - name: Derive PR number and parse command + - name: Parse PR comment (/sweep ...) id: parse + if: ${{ github.event_name == 'issue_comment' && github.event.issue.pull_request && startsWith(github.event.comment.body, '/sweep') && contains(fromJson('["OWNER","MEMBER","COLLABORATOR"]'), github.event.comment.author_association) }} shell: bash env: BODY: ${{ github.event.comment.body }} PR_NUMBER: ${{ github.event.issue.number }} run: | set -euo pipefail - - # Extract first line starting with /sweep - cmd_line=$(printf "%s" "$BODY" | awk '/^\/sweep/{print; exit}') + # Allow optional leading whitespace before /sweep + cmd_line=$(printf "%s" "$BODY" | awk '/^[[:space:]]*\/sweep/{print; exit}') if [[ -z "$cmd_line" ]]; then echo "No /sweep command found in comment" >&2 exit 1 fi - cmd_args=${cmd_line#* /sweep} - # Handle case when it's exactly '/sweep' (no args) + # Trim leading spaces then strip the /sweep prefix + cmd_line=$(echo "$cmd_line" | sed 's/^[[:space:]]*//') if [[ "$cmd_line" == "/sweep" ]]; then cmd_args="" else @@ -47,24 +59,182 @@ jobs: fi cmd_args=$(echo "$cmd_args" | xargs || true) - echo "Command args: $cmd_args" - echo "generator-args=$cmd_args" >> "$GITHUB_OUTPUT" echo "pr-number=$PR_NUMBER" >> "$GITHUB_OUTPUT" - execute: - needs: parse - uses: ./.github/workflows/sweep-executor.yml + - name: Find PR for this branch (if any) + id: find + if: ${{ github.event_name != 'issue_comment' }} + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + with: + script: | + const owner = context.repo.owner; + const repo = context.repo.repo; + const branch = context.ref.replace('refs/heads/', ''); + const res = await github.rest.pulls.list({ owner, repo, state: 'open', head: `${owner}:${branch}` }); + const num = res.data[0]?.number ? String(res.data[0].number) : ''; + core.setOutput('pr-number', num); + + - name: Prepare inputs (push/dispatch) + id: resolve + if: ${{ github.event_name != 'issue_comment' }} + shell: bash + env: + DISPATCH_PR: ${{ github.event.inputs.pr-number }} + DISPATCH_ARGS: ${{ github.event.inputs.generator-args }} + run: | + set -euo pipefail + pr_from_branch='${{ steps.find.outputs.pr-number }}' + pr_number="${DISPATCH_PR:-}"; if [[ -z "$pr_number" ]]; then pr_number="$pr_from_branch"; fi + gen_args="${DISPATCH_ARGS:-}" + if [[ -z "$gen_args" ]]; then + gen_args='full-sweep --single-node --runner-type h200 --model-prefix dsr1 --seq-lens 1k1k --max-conc 4' + fi + echo "Resolved PR: $pr_number"; + echo "Using generator args: $gen_args"; + echo "pr-number=$pr_number" >> "$GITHUB_OUTPUT" + echo "generator-args=$gen_args" >> "$GITHUB_OUTPUT" + + generate: + needs: prepare + if: ${{ needs.prepare.outputs.pr-number != '' && needs.prepare.outputs.generator-args != '' }} + runs-on: ubuntu-latest + outputs: + search-space-config: ${{ steps.generate.outputs.search-space-config }} + is-multinode: ${{ steps.detect.outputs.is-multinode }} + steps: + - name: Checkout PR head + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + with: + ref: refs/pull/${{ needs.prepare.outputs.pr-number }}/head + + - name: Detect node type from args + id: detect + run: | + set -euo pipefail + ARGS='${{ needs.prepare.outputs.generator-args }}' + if [[ "$ARGS" == *"--multi-node"* ]]; then + echo "is-multinode=true" >> "$GITHUB_OUTPUT" + else + echo "is-multinode=false" >> "$GITHUB_OUTPUT" + fi + + - name: Generate sweep configs + id: generate + shell: python + env: + GITHUB_WORKSPACE: ${{ github.workspace }} + run: | + import json, os, shlex, subprocess, sys + + cmd_args = r'''${{ needs.prepare.outputs.generator-args }}''' + script = os.path.join(os.environ['GITHUB_WORKSPACE'], 'utils', 'matrix_logic', 'generate_sweep_configs.py') + cfg_amd = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'amd-master.yaml') + cfg_nv = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'nvidia-master.yaml') + runners = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'runners.yaml') + + subprocess.run([sys.executable, '-m', 'pip', 'install', 'pydantic'], check=True) + + argv = [sys.executable, script] + if cmd_args.strip(): + argv += shlex.split(cmd_args) + argv += ['--config-files', cfg_amd, cfg_nv, '--runner-config', runners] + + print('Invoking:', ' '.join(shlex.quote(a) for a in argv)) + res = subprocess.run(argv, capture_output=True, text=True) + if res.returncode != 0: + print('Generator failed. stdout:\n', res.stdout) + print('stderr:\n', res.stderr, file=sys.stderr) + raise SystemExit(res.returncode) + + try: + data = json.loads(res.stdout) + except Exception as e: + print('Failed to parse generator output as JSON:', e, file=sys.stderr) + print('Raw output:\n', res.stdout) + raise + + print(f"Generated {len(data)} configs") + with open(os.environ['GITHUB_OUTPUT'], 'a') as f: + f.write('search-space-config=' + json.dumps(data) + '\n') + + run-single-node: + needs: generate + if: ${{ needs.generate.result == 'success' && needs.generate.outputs.search-space-config != '[]' && needs.generate.outputs.is-multinode == 'false' }} + uses: ./.github/workflows/benchmark-tmpl.yml + name: Sweep (Single-Node) + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.generate.outputs.search-space-config) }} + secrets: inherit + with: + exp-name: ${{ matrix.config.exp-name }} + isl: ${{ matrix.config.isl }} + osl: ${{ matrix.config.osl }} + max-model-len: ${{ matrix.config.max-model-len }} + runner: ${{ matrix.config.runner }} + image: ${{ matrix.config.image }} + model: ${{ matrix.config.model }} + model-prefix: ${{ matrix.config.model-prefix }} + framework: ${{ matrix.config.framework }} + precision: ${{ matrix.config.precision }} + tp: ${{ matrix.config.tp }} + ep: ${{ matrix.config.ep }} + dp-attn: ${{ matrix.config.dp-attn }} + conc: ${{ matrix.config.conc }} + spec-decoding: ${{ matrix.config.spec-decoding }} + disagg: ${{ matrix.config.disagg }} + + run-multi-node: + needs: generate + if: ${{ needs.generate.result == 'success' && needs.generate.outputs.search-space-config != '[]' && needs.generate.outputs.is-multinode == 'true' }} + uses: ./.github/workflows/benchmark-multinode-tmpl.yml + name: Sweep (Multi-Node) + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.generate.outputs.search-space-config) }} + secrets: inherit + with: + exp-name: ${{ matrix.config.exp-name }} + isl: ${{ matrix.config.isl }} + osl: ${{ matrix.config.osl }} + max-model-len: ${{ matrix.config.max-model-len }} + runner: ${{ matrix.config.runner }} + image: ${{ matrix.config.image }} + model: ${{ matrix.config.model }} + model-prefix: ${{ matrix.config.model-prefix }} + framework: ${{ matrix.config.framework }} + precision: ${{ matrix.config.precision }} + conc-list: ${{ toJson(matrix.config.conc) }} + spec-decoding: ${{ matrix.config.spec-decoding }} + disagg: ${{ matrix.config.disagg }} + + prefill-num-worker: ${{ matrix.config.prefill.num-worker }} + prefill-tp: ${{ matrix.config.prefill.tp }} + prefill-ep: ${{ matrix.config.prefill.ep }} + prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }} + prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }} + + decode-num-worker: ${{ matrix.config.decode.num-worker }} + decode-tp: ${{ matrix.config.decode.tp }} + decode-ep: ${{ matrix.config.decode.ep }} + decode-dp-attn: ${{ matrix.config.decode.dp-attn }} + decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }} + + collect-results: + needs: [run-single-node, run-multi-node] + if: ${{ always() && (needs.run-single-node.result == 'success' || needs.run-multi-node.result == 'success') }} + uses: ./.github/workflows/collect-results.yml + name: Collect Results secrets: inherit with: - pr-number: ${{ needs.parse.outputs.pr-number }} - generator-args: ${{ needs.parse.outputs.generator-args }} + exp-name: '' note-ignored: # Inform when comment doesn't meet criteria (non-PR or not authorized) - if: ${{ !github.event.issue.pull_request || - !startsWith(github.event.comment.body, '/sweep') || - !contains('OWNER,MEMBER,COLLABORATOR', github.event.comment.author_association) }} + if: ${{ github.event_name == 'issue_comment' && (!github.event.issue.pull_request || !startsWith(github.event.comment.body, '/sweep') || !contains(fromJson('["OWNER","MEMBER","COLLABORATOR"]'), github.event.comment.author_association)) }} runs-on: ubuntu-latest steps: - run: | diff --git a/.github/workflows/sweep-executor.yml b/.github/workflows/sweep-executor.yml deleted file mode 100644 index fc63c754a..000000000 --- a/.github/workflows/sweep-executor.yml +++ /dev/null @@ -1,160 +0,0 @@ -name: Template - Sweep Executor - -on: - workflow_call: - inputs: - pr-number: - required: true - type: string - generator-args: - required: true - type: string - workflow_dispatch: - inputs: - pr-number: - description: PR number to checkout (refs/pull//head) - required: true - type: string - generator-args: - description: Args passed to generate_sweep_configs.py (omit /sweep) - required: true - type: string - -permissions: - contents: read - -jobs: - generate: - runs-on: ubuntu-latest - outputs: - search-space-config: ${{ steps.generate.outputs.search-space-config }} - is-multinode: ${{ steps.detect.outputs.is-multinode }} - steps: - - name: Checkout PR head - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - with: - ref: refs/pull/${{ inputs.pr-number }}/head - - - name: Detect node type from args - id: detect - run: | - set -euo pipefail - ARGS=${{ inputs.generator-args }} - if [[ "$ARGS" == *"--multi-node"* ]]; then - echo "is-multinode=true" >> "$GITHUB_OUTPUT" - else - echo "is-multinode=false" >> "$GITHUB_OUTPUT" - fi - - - name: Generate sweep configs - id: generate - shell: python - env: - GITHUB_WORKSPACE: ${{ github.workspace }} - run: | - import json, os, shlex, subprocess, sys - - cmd_args = r'''${{ inputs.generator-args }}''' - script = os.path.join(os.environ['GITHUB_WORKSPACE'], 'utils', 'matrix_logic', 'generate_sweep_configs.py') - cfg_amd = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'amd-master.yaml') - cfg_nv = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'nvidia-master.yaml') - runners = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'runners.yaml') - - subprocess.run([sys.executable, '-m', 'pip', 'install', 'pydantic'], check=True) - - argv = [sys.executable, script] - if cmd_args.strip(): - argv += shlex.split(cmd_args) - argv += ['--config-files', cfg_amd, cfg_nv, '--runner-config', runners] - - print('Invoking:', ' '.join(shlex.quote(a) for a in argv)) - res = subprocess.run(argv, capture_output=True, text=True) - if res.returncode != 0: - print('Generator failed. stdout:\n', res.stdout) - print('stderr:\n', res.stderr, file=sys.stderr) - raise SystemExit(res.returncode) - - try: - data = json.loads(res.stdout) - except Exception as e: - print('Failed to parse generator output as JSON:', e, file=sys.stderr) - print('Raw output:\n', res.stdout) - raise - - print(f"Generated {len(data)} configs") - with open(os.environ['GITHUB_OUTPUT'], 'a') as f: - f.write('search-space-config=' + json.dumps(data) + '\n') - - run-single-node: - needs: generate - if: ${{ needs.generate.result == 'success' && needs.generate.outputs.search-space-config != '[]' && needs.generate.outputs.is-multinode == 'false' }} - uses: ./.github/workflows/benchmark-tmpl.yml - name: Sweep (Single-Node) - strategy: - fail-fast: false - matrix: - config: ${{ fromJson(needs.generate.outputs.search-space-config) }} - secrets: inherit - with: - exp-name: ${{ matrix.config.exp-name }} - isl: ${{ matrix.config.isl }} - osl: ${{ matrix.config.osl }} - max-model-len: ${{ matrix.config.max-model-len }} - runner: ${{ matrix.config.runner }} - image: ${{ matrix.config.image }} - model: ${{ matrix.config.model }} - model-prefix: ${{ matrix.config.model-prefix }} - framework: ${{ matrix.config.framework }} - precision: ${{ matrix.config.precision }} - tp: ${{ matrix.config.tp }} - ep: ${{ matrix.config.ep }} - dp-attn: ${{ matrix.config.dp-attn }} - conc: ${{ matrix.config.conc }} - spec-decoding: ${{ matrix.config.spec-decoding }} - disagg: ${{ matrix.config.disagg }} - - run-multi-node: - needs: generate - if: ${{ needs.generate.result == 'success' && needs.generate.outputs.search-space-config != '[]' && needs.generate.outputs.is-multinode == 'true' }} - uses: ./.github/workflows/benchmark-multinode-tmpl.yml - name: Sweep (Multi-Node) - strategy: - fail-fast: false - matrix: - config: ${{ fromJson(needs.generate.outputs.search-space-config) }} - secrets: inherit - with: - exp-name: ${{ matrix.config.exp-name }} - isl: ${{ matrix.config.isl }} - osl: ${{ matrix.config.osl }} - max-model-len: ${{ matrix.config.max-model-len }} - runner: ${{ matrix.config.runner }} - image: ${{ matrix.config.image }} - model: ${{ matrix.config.model }} - model-prefix: ${{ matrix.config.model-prefix }} - framework: ${{ matrix.config.framework }} - precision: ${{ matrix.config.precision }} - conc-list: ${{ toJson(matrix.config.conc) }} - spec-decoding: ${{ matrix.config.spec-decoding }} - disagg: ${{ matrix.config.disagg }} - - prefill-num-worker: ${{ matrix.config.prefill.num-worker }} - prefill-tp: ${{ matrix.config.prefill.tp }} - prefill-ep: ${{ matrix.config.prefill.ep }} - prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }} - prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }} - - decode-num-worker: ${{ matrix.config.decode.num-worker }} - decode-tp: ${{ matrix.config.decode.tp }} - decode-ep: ${{ matrix.config.decode.ep }} - decode-dp-attn: ${{ matrix.config.decode.dp-attn }} - decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }} - - collect-results: - needs: [run-single-node, run-multi-node] - if: ${{ always() && (needs.run-single-node.result == 'success' || needs.run-multi-node.result == 'success') }} - uses: ./.github/workflows/collect-results.yml - name: Collect Results - secrets: inherit - with: - exp-name: '' From 503ed3b3baeb1f1e62cae6fe822a35dc9ea64ebd Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Mon, 8 Dec 2025 07:27:22 -0800 Subject: [PATCH 04/19] Rename Workflow --- .github/workflows/pr-comment-sweep.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml index f8ec2354c..e83e6761d 100644 --- a/.github/workflows/pr-comment-sweep.yml +++ b/.github/workflows/pr-comment-sweep.yml @@ -1,4 +1,4 @@ -name: Sweep +name: Slash Command Sweep on: # PR comment trigger From f1feee44cd1f92a0b733a09ecd6bb954ca60792c Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Wed, 10 Dec 2025 06:40:07 -0800 Subject: [PATCH 05/19] Use environments --- .github/workflows/e2e-tests.yml | 32 +++++- .github/workflows/pr-comment-sweep.yml | 153 +++---------------------- 2 files changed, 48 insertions(+), 137 deletions(-) diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml index cef324e91..3633ef4ec 100644 --- a/.github/workflows/e2e-tests.yml +++ b/.github/workflows/e2e-tests.yml @@ -1,5 +1,5 @@ name: End-to-End Tests -run-name: e2e Test - ${{ inputs.test-name || github.event.inputs.generate-cli-command }} +run-name: e2e Test - ${{ inputs.test-name || inputs.generate-cli-command || github.event.inputs.generate-cli-command }} on: workflow_dispatch: @@ -12,21 +12,47 @@ on: description: "Name for this test run" required: false type: string + ref: + description: "Ref (branch/sha) to checkout for generating configs" + required: false + type: string + workflow_call: + inputs: + generate-cli-command: + description: "Command passed to generate matrix script" + required: true + type: string + test-name: + description: "Name for this test run" + required: false + type: string + ref: + description: "Ref (branch/sha) to checkout for generating configs" + required: false + type: string jobs: get-jobs: runs-on: ubuntu-latest outputs: search-space-config: ${{ steps.get-jobs.outputs.search-space-config }} + environment: bryan-test steps: - - name: Checkout code + - name: Checkout code (ref) + if: ${{ inputs.ref && inputs.ref != '' }} + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + ref: ${{ inputs.ref }} + + - name: Checkout code (default) + if: ${{ !inputs.ref || inputs.ref == '' }} uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - id: get-jobs run: | pip install pydantic CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix_logic/generate_sweep_configs.py \ - ${{ inputs.generate-cli-command }} \ + ${{ inputs.generate-cli-command || github.event.inputs.generate-cli-command }} \ --runner-config .github/configs/runners.yaml \ --config-files .github/configs/nvidia-master.yaml .github/configs/amd-master.yaml) echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml index e83e6761d..c2595c154 100644 --- a/.github/workflows/pr-comment-sweep.yml +++ b/.github/workflows/pr-comment-sweep.yml @@ -37,7 +37,7 @@ jobs: steps: - name: Parse PR comment (/sweep ...) id: parse - if: ${{ github.event_name == 'issue_comment' && github.event.issue.pull_request && startsWith(github.event.comment.body, '/sweep') && contains(fromJson('["OWNER","MEMBER","COLLABORATOR"]'), github.event.comment.author_association) }} + if: ${{ github.event_name == 'issue_comment' && github.event.issue.pull_request && startsWith(github.event.comment.body, '/sweep') }} shell: bash env: BODY: ${{ github.event.comment.body }} @@ -95,147 +95,32 @@ jobs: echo "pr-number=$pr_number" >> "$GITHUB_OUTPUT" echo "generator-args=$gen_args" >> "$GITHUB_OUTPUT" - generate: + call-e2e-pr: needs: prepare - if: ${{ needs.prepare.outputs.pr-number != '' && needs.prepare.outputs.generator-args != '' }} - runs-on: ubuntu-latest - outputs: - search-space-config: ${{ steps.generate.outputs.search-space-config }} - is-multinode: ${{ steps.detect.outputs.is-multinode }} - steps: - - name: Checkout PR head - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - with: - ref: refs/pull/${{ needs.prepare.outputs.pr-number }}/head - - - name: Detect node type from args - id: detect - run: | - set -euo pipefail - ARGS='${{ needs.prepare.outputs.generator-args }}' - if [[ "$ARGS" == *"--multi-node"* ]]; then - echo "is-multinode=true" >> "$GITHUB_OUTPUT" - else - echo "is-multinode=false" >> "$GITHUB_OUTPUT" - fi - - - name: Generate sweep configs - id: generate - shell: python - env: - GITHUB_WORKSPACE: ${{ github.workspace }} - run: | - import json, os, shlex, subprocess, sys - - cmd_args = r'''${{ needs.prepare.outputs.generator-args }}''' - script = os.path.join(os.environ['GITHUB_WORKSPACE'], 'utils', 'matrix_logic', 'generate_sweep_configs.py') - cfg_amd = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'amd-master.yaml') - cfg_nv = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'nvidia-master.yaml') - runners = os.path.join(os.environ['GITHUB_WORKSPACE'], '.github', 'configs', 'runners.yaml') - - subprocess.run([sys.executable, '-m', 'pip', 'install', 'pydantic'], check=True) - - argv = [sys.executable, script] - if cmd_args.strip(): - argv += shlex.split(cmd_args) - argv += ['--config-files', cfg_amd, cfg_nv, '--runner-config', runners] - - print('Invoking:', ' '.join(shlex.quote(a) for a in argv)) - res = subprocess.run(argv, capture_output=True, text=True) - if res.returncode != 0: - print('Generator failed. stdout:\n', res.stdout) - print('stderr:\n', res.stderr, file=sys.stderr) - raise SystemExit(res.returncode) - - try: - data = json.loads(res.stdout) - except Exception as e: - print('Failed to parse generator output as JSON:', e, file=sys.stderr) - print('Raw output:\n', res.stdout) - raise - - print(f"Generated {len(data)} configs") - with open(os.environ['GITHUB_OUTPUT'], 'a') as f: - f.write('search-space-config=' + json.dumps(data) + '\n') - - run-single-node: - needs: generate - if: ${{ needs.generate.result == 'success' && needs.generate.outputs.search-space-config != '[]' && needs.generate.outputs.is-multinode == 'false' }} - uses: ./.github/workflows/benchmark-tmpl.yml - name: Sweep (Single-Node) - strategy: - fail-fast: false - matrix: - config: ${{ fromJson(needs.generate.outputs.search-space-config) }} + if: ${{ github.event_name == 'issue_comment' && needs.prepare.outputs.pr-number != '' && needs.prepare.outputs.generator-args != '' }} + uses: ./.github/workflows/e2e-tests.yml + name: Run E2E (PR comment) secrets: inherit with: - exp-name: ${{ matrix.config.exp-name }} - isl: ${{ matrix.config.isl }} - osl: ${{ matrix.config.osl }} - max-model-len: ${{ matrix.config.max-model-len }} - runner: ${{ matrix.config.runner }} - image: ${{ matrix.config.image }} - model: ${{ matrix.config.model }} - model-prefix: ${{ matrix.config.model-prefix }} - framework: ${{ matrix.config.framework }} - precision: ${{ matrix.config.precision }} - tp: ${{ matrix.config.tp }} - ep: ${{ matrix.config.ep }} - dp-attn: ${{ matrix.config.dp-attn }} - conc: ${{ matrix.config.conc }} - spec-decoding: ${{ matrix.config.spec-decoding }} - disagg: ${{ matrix.config.disagg }} + generate-cli-command: ${{ needs.prepare.outputs.generator-args }} + test-name: PR #${{ needs.prepare.outputs.pr-number }} sweep + ref: refs/pull/${{ needs.prepare.outputs.pr-number }}/head - run-multi-node: - needs: generate - if: ${{ needs.generate.result == 'success' && needs.generate.outputs.search-space-config != '[]' && needs.generate.outputs.is-multinode == 'true' }} - uses: ./.github/workflows/benchmark-multinode-tmpl.yml - name: Sweep (Multi-Node) - strategy: - fail-fast: false - matrix: - config: ${{ fromJson(needs.generate.outputs.search-space-config) }} - secrets: inherit - with: - exp-name: ${{ matrix.config.exp-name }} - isl: ${{ matrix.config.isl }} - osl: ${{ matrix.config.osl }} - max-model-len: ${{ matrix.config.max-model-len }} - runner: ${{ matrix.config.runner }} - image: ${{ matrix.config.image }} - model: ${{ matrix.config.model }} - model-prefix: ${{ matrix.config.model-prefix }} - framework: ${{ matrix.config.framework }} - precision: ${{ matrix.config.precision }} - conc-list: ${{ toJson(matrix.config.conc) }} - spec-decoding: ${{ matrix.config.spec-decoding }} - disagg: ${{ matrix.config.disagg }} - - prefill-num-worker: ${{ matrix.config.prefill.num-worker }} - prefill-tp: ${{ matrix.config.prefill.tp }} - prefill-ep: ${{ matrix.config.prefill.ep }} - prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }} - prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }} - - decode-num-worker: ${{ matrix.config.decode.num-worker }} - decode-tp: ${{ matrix.config.decode.tp }} - decode-ep: ${{ matrix.config.decode.ep }} - decode-dp-attn: ${{ matrix.config.decode.dp-attn }} - decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }} - - collect-results: - needs: [run-single-node, run-multi-node] - if: ${{ always() && (needs.run-single-node.result == 'success' || needs.run-multi-node.result == 'success') }} - uses: ./.github/workflows/collect-results.yml - name: Collect Results + call-e2e-nonpr: + needs: prepare + if: ${{ github.event_name != 'issue_comment' && needs.prepare.outputs.generator-args != '' }} + uses: ./.github/workflows/e2e-tests.yml + name: Run E2E (manual/push) secrets: inherit with: - exp-name: '' + generate-cli-command: ${{ needs.prepare.outputs.generator-args }} + test-name: Manual/Push sweep + ref: ${{ needs.prepare.outputs.pr-number && format('refs/pull/{0}/head', needs.prepare.outputs.pr-number) || '' }} note-ignored: - # Inform when comment doesn't meet criteria (non-PR or not authorized) - if: ${{ github.event_name == 'issue_comment' && (!github.event.issue.pull_request || !startsWith(github.event.comment.body, '/sweep') || !contains(fromJson('["OWNER","MEMBER","COLLABORATOR"]'), github.event.comment.author_association)) }} + # Inform when comment doesn't meet criteria (non-PR or not a /sweep) + if: ${{ github.event_name == 'issue_comment' && (!github.event.issue.pull_request || !startsWith(github.event.comment.body, '/sweep')) }} runs-on: ubuntu-latest steps: - run: | - echo "Comment ignored. Either not on a PR, not a /sweep command, or author not authorized (OWNER/MEMBER/COLLABORATOR required)." + echo "Comment ignored. Either not on a PR or not a /sweep command. For PR comments, runs require environment approval." From 9a5fe6e49b3918e2511795a24c723c20c5610c47 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Wed, 10 Dec 2025 08:35:07 -0800 Subject: [PATCH 06/19] Changed environment location --- .github/workflows/e2e-tests.yml | 1 - .github/workflows/pr-comment-sweep.yml | 42 +++++++++++++++++--------- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml index 3633ef4ec..f807e8a97 100644 --- a/.github/workflows/e2e-tests.yml +++ b/.github/workflows/e2e-tests.yml @@ -36,7 +36,6 @@ jobs: runs-on: ubuntu-latest outputs: search-space-config: ${{ steps.get-jobs.outputs.search-space-config }} - environment: bryan-test steps: - name: Checkout code (ref) if: ${{ inputs.ref && inputs.ref != '' }} diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml index c2595c154..2397cf33c 100644 --- a/.github/workflows/pr-comment-sweep.yml +++ b/.github/workflows/pr-comment-sweep.yml @@ -1,4 +1,5 @@ name: Slash Command Sweep +run-name: "Validate PR #${{ github.event.issue.number }}" on: # PR comment trigger @@ -22,14 +23,16 @@ on: - master concurrency: - group: ${{ github.event.issue.number && format('PR-SWEEP-{0}', github.event.issue.number) || format('REF-SWEEP-{0}', github.ref_name) }} + group: "PR#${{ github.event.issue.number || github.ref_name }}" cancel-in-progress: true permissions: contents: read jobs: - prepare: + get-jobs: + # Skip for PR comments that are not /sweep; run for all other triggers + if: ${{ github.event_name != 'issue_comment' || (github.event.issue.pull_request && startsWith(github.event.comment.body, '/sweep')) }} runs-on: ubuntu-latest outputs: pr-number: ${{ steps.parse.outputs.pr-number || steps.resolve.outputs.pr-number }} @@ -95,27 +98,36 @@ jobs: echo "pr-number=$pr_number" >> "$GITHUB_OUTPUT" echo "generator-args=$gen_args" >> "$GITHUB_OUTPUT" - call-e2e-pr: - needs: prepare - if: ${{ github.event_name == 'issue_comment' && needs.prepare.outputs.pr-number != '' && needs.prepare.outputs.generator-args != '' }} + approval: + needs: get-jobs + if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' }} + runs-on: ubuntu-latest + name: approval + environment: bryan-test + steps: + - run: echo "approved" + + validate: + needs: [get-jobs, approval] + if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' }} uses: ./.github/workflows/e2e-tests.yml - name: Run E2E (PR comment) + name: validate secrets: inherit with: - generate-cli-command: ${{ needs.prepare.outputs.generator-args }} - test-name: PR #${{ needs.prepare.outputs.pr-number }} sweep - ref: refs/pull/${{ needs.prepare.outputs.pr-number }}/head + generate-cli-command: ${{ needs.get-jobs.outputs.generator-args }} + test-name: PR #${{ needs.get-jobs.outputs.pr-number }} sweep + ref: refs/pull/${{ needs.get-jobs.outputs.pr-number }}/head - call-e2e-nonpr: - needs: prepare - if: ${{ github.event_name != 'issue_comment' && needs.prepare.outputs.generator-args != '' }} + validate-nonpr: + needs: get-jobs + if: ${{ github.event_name != 'issue_comment' && needs.get-jobs.outputs.generator-args != '' }} uses: ./.github/workflows/e2e-tests.yml - name: Run E2E (manual/push) + name: validate (manual/push) secrets: inherit with: - generate-cli-command: ${{ needs.prepare.outputs.generator-args }} + generate-cli-command: ${{ needs.get-jobs.outputs.generator-args }} test-name: Manual/Push sweep - ref: ${{ needs.prepare.outputs.pr-number && format('refs/pull/{0}/head', needs.prepare.outputs.pr-number) || '' }} + ref: ${{ needs.get-jobs.outputs.pr-number && format('refs/pull/{0}/head', needs.get-jobs.outputs.pr-number) || '' }} note-ignored: # Inform when comment doesn't meet criteria (non-PR or not a /sweep) From 4524540ffcc18328160b3106d9dad388053566ea Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Wed, 10 Dec 2025 08:46:30 -0800 Subject: [PATCH 07/19] Stricter activation --- .github/workflows/pr-comment-sweep.yml | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml index 2397cf33c..fafd588aa 100644 --- a/.github/workflows/pr-comment-sweep.yml +++ b/.github/workflows/pr-comment-sweep.yml @@ -47,14 +47,12 @@ jobs: PR_NUMBER: ${{ github.event.issue.number }} run: | set -euo pipefail - # Allow optional leading whitespace before /sweep - cmd_line=$(printf "%s" "$BODY" | awk '/^[[:space:]]*\/sweep/{print; exit}') + # Require /sweep at the start of the line + cmd_line=$(printf "%s" "$BODY" | awk '/^\/sweep/{print; exit}') if [[ -z "$cmd_line" ]]; then - echo "No /sweep command found in comment" >&2 + echo "No /sweep command found at comment start" >&2 exit 1 fi - # Trim leading spaces then strip the /sweep prefix - cmd_line=$(echo "$cmd_line" | sed 's/^[[:space:]]*//') if [[ "$cmd_line" == "/sweep" ]]; then cmd_args="" else @@ -100,7 +98,7 @@ jobs: approval: needs: get-jobs - if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' }} + if: ${{ needs.get-jobs.outputs.generator-args != '' }} runs-on: ubuntu-latest name: approval environment: bryan-test @@ -109,7 +107,7 @@ jobs: validate: needs: [get-jobs, approval] - if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' }} + if: ${{ needs.get-jobs.outputs.generator-args != '' }} uses: ./.github/workflows/e2e-tests.yml name: validate secrets: inherit From 91e6d6c9cd3e8a23e7ac36b96216d3e22c353328 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Sun, 14 Dec 2025 18:27:28 -0800 Subject: [PATCH 08/19] Test replies --- .github/workflows/pr-comment-sweep.yml | 34 ++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml index fafd588aa..89c9eed96 100644 --- a/.github/workflows/pr-comment-sweep.yml +++ b/.github/workflows/pr-comment-sweep.yml @@ -28,6 +28,7 @@ concurrency: permissions: contents: read + issues: write jobs: get-jobs: @@ -63,6 +64,39 @@ jobs: echo "generator-args=$cmd_args" >> "$GITHUB_OUTPUT" echo "pr-number=$PR_NUMBER" >> "$GITHUB_OUTPUT" + - name: Reply with run link + if: ${{ github.event_name == 'issue_comment' && startsWith(github.event.comment.body, '/sweep') }} + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + env: + RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + AUTHOR: ${{ github.event.comment.user.login }} + with: + script: | + const owner = context.repo.owner; + const repo = context.repo.repo; + const issue_number = context.issue.number; + const runUrl = process.env.RUN_URL; + const author = process.env.AUTHOR; + const body = `@${author} thanks! Kicking off a sweep.\n\nRun: ${runUrl}\nApproval: required in environment 'bryan-test'.`; + await github.rest.issues.createComment({ owner, repo, issue_number, body }); + + - name: Reply with run link (manual trigger) + if: ${{ github.event_name == 'workflow_dispatch' && steps.resolve.outputs.pr-number != '' }} + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + env: + RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + AUTHOR: ${{ github.actor }} + ISSUE_NUMBER: ${{ steps.resolve.outputs.pr-number }} + with: + script: | + const owner = context.repo.owner; + const repo = context.repo.repo; + const issue_number = process.env.ISSUE_NUMBER; + const runUrl = process.env.RUN_URL; + const author = process.env.AUTHOR; + const body = `@${author} triggered a manual sweep.\n\nRun: ${runUrl}\n(Manual run on branch ${context.ref})`; + await github.rest.issues.createComment({ owner, repo, issue_number, body }); + - name: Find PR for this branch (if any) id: find if: ${{ github.event_name != 'issue_comment' }} From ac35382434387b932d7858064e4fac3cfd90d859 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Mon, 15 Dec 2025 07:27:47 -0800 Subject: [PATCH 09/19] Test replies --- .github/workflows/pr-comment-sweep.yml | 55 +++++++++++++++++--------- 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml index 89c9eed96..f71bbd303 100644 --- a/.github/workflows/pr-comment-sweep.yml +++ b/.github/workflows/pr-comment-sweep.yml @@ -80,23 +80,6 @@ jobs: const body = `@${author} thanks! Kicking off a sweep.\n\nRun: ${runUrl}\nApproval: required in environment 'bryan-test'.`; await github.rest.issues.createComment({ owner, repo, issue_number, body }); - - name: Reply with run link (manual trigger) - if: ${{ github.event_name == 'workflow_dispatch' && steps.resolve.outputs.pr-number != '' }} - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 - env: - RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - AUTHOR: ${{ github.actor }} - ISSUE_NUMBER: ${{ steps.resolve.outputs.pr-number }} - with: - script: | - const owner = context.repo.owner; - const repo = context.repo.repo; - const issue_number = process.env.ISSUE_NUMBER; - const runUrl = process.env.RUN_URL; - const author = process.env.AUTHOR; - const body = `@${author} triggered a manual sweep.\n\nRun: ${runUrl}\n(Manual run on branch ${context.ref})`; - await github.rest.issues.createComment({ owner, repo, issue_number, body }); - - name: Find PR for this branch (if any) id: find if: ${{ github.event_name != 'issue_comment' }} @@ -130,9 +113,43 @@ jobs: echo "pr-number=$pr_number" >> "$GITHUB_OUTPUT" echo "generator-args=$gen_args" >> "$GITHUB_OUTPUT" + - name: Reply with run link (manual trigger) + if: ${{ github.event_name == 'workflow_dispatch' && steps.resolve.outputs.pr-number != '' }} + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + env: + RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + AUTHOR: ${{ github.actor }} + ISSUE_NUMBER: ${{ steps.resolve.outputs.pr-number }} + with: + script: | + const owner = context.repo.owner; + const repo = context.repo.repo; + const issue_number = process.env.ISSUE_NUMBER; + const runUrl = process.env.RUN_URL; + const author = process.env.AUTHOR; + const body = `@${author} triggered a manual sweep.\n\nRun: ${runUrl}\n(Manual run on branch ${context.ref})`; + await github.rest.issues.createComment({ owner, repo, issue_number, body }); + + - name: Reply with run link (push trigger) + if: ${{ github.event_name == 'push' && steps.resolve.outputs.pr-number != '' }} + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + env: + RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + AUTHOR: ${{ github.actor }} + ISSUE_NUMBER: ${{ steps.resolve.outputs.pr-number }} + with: + script: | + const owner = context.repo.owner; + const repo = context.repo.repo; + const issue_number = process.env.ISSUE_NUMBER; + const runUrl = process.env.RUN_URL; + const author = process.env.AUTHOR; + const body = `@${author} pushed changes and triggered a sweep.\n\nRun: ${runUrl}\n(Push on ${context.ref})`; + await github.rest.issues.createComment({ owner, repo, issue_number, body }); + approval: needs: get-jobs - if: ${{ needs.get-jobs.outputs.generator-args != '' }} + if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' }} runs-on: ubuntu-latest name: approval environment: bryan-test @@ -141,7 +158,7 @@ jobs: validate: needs: [get-jobs, approval] - if: ${{ needs.get-jobs.outputs.generator-args != '' }} + if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' }} uses: ./.github/workflows/e2e-tests.yml name: validate secrets: inherit From c09922a79155f6729d72b82a2273a42474adfa85 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Mon, 15 Dec 2025 07:38:10 -0800 Subject: [PATCH 10/19] Use token for comment perm --- .github/workflows/pr-comment-sweep.yml | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml index f71bbd303..35f21c334 100644 --- a/.github/workflows/pr-comment-sweep.yml +++ b/.github/workflows/pr-comment-sweep.yml @@ -29,6 +29,7 @@ concurrency: permissions: contents: read issues: write + pull-requests: write jobs: get-jobs: @@ -65,12 +66,14 @@ jobs: echo "pr-number=$PR_NUMBER" >> "$GITHUB_OUTPUT" - name: Reply with run link - if: ${{ github.event_name == 'issue_comment' && startsWith(github.event.comment.body, '/sweep') }} + if: ${{ github.event_name == 'issue_comment' && startsWith(github.event.comment.body, '/sweep') && github.repository_owner == 'InferenceMAX' }} uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + continue-on-error: true env: RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} AUTHOR: ${{ github.event.comment.user.login }} with: + github-token: ${{ github.token }} script: | const owner = context.repo.owner; const repo = context.repo.repo; @@ -114,13 +117,15 @@ jobs: echo "generator-args=$gen_args" >> "$GITHUB_OUTPUT" - name: Reply with run link (manual trigger) - if: ${{ github.event_name == 'workflow_dispatch' && steps.resolve.outputs.pr-number != '' }} + if: ${{ github.event_name == 'workflow_dispatch' && steps.resolve.outputs.pr-number != '' && github.repository_owner == 'InferenceMAX' }} uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + continue-on-error: true env: RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} AUTHOR: ${{ github.actor }} ISSUE_NUMBER: ${{ steps.resolve.outputs.pr-number }} with: + github-token: ${{ github.token }} script: | const owner = context.repo.owner; const repo = context.repo.repo; @@ -131,13 +136,15 @@ jobs: await github.rest.issues.createComment({ owner, repo, issue_number, body }); - name: Reply with run link (push trigger) - if: ${{ github.event_name == 'push' && steps.resolve.outputs.pr-number != '' }} + if: ${{ github.event_name == 'push' && steps.resolve.outputs.pr-number != '' && github.repository_owner == 'InferenceMAX' }} uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + continue-on-error: true env: RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} AUTHOR: ${{ github.actor }} ISSUE_NUMBER: ${{ steps.resolve.outputs.pr-number }} with: + github-token: ${{ github.token }} script: | const owner = context.repo.owner; const repo = context.repo.repo; From 4f9eb0cc505570cec641097ed67e669c36ce267d Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Mon, 15 Dec 2025 07:44:33 -0800 Subject: [PATCH 11/19] Forgot validation --- .github/workflows/pr-comment-sweep.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml index 35f21c334..9e1501d99 100644 --- a/.github/workflows/pr-comment-sweep.yml +++ b/.github/workflows/pr-comment-sweep.yml @@ -156,7 +156,7 @@ jobs: approval: needs: get-jobs - if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' }} + if: ${{ (github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '') || (github.event_name == 'push' && needs.get-jobs.outputs.generator-args != '') }} runs-on: ubuntu-latest name: approval environment: bryan-test @@ -165,7 +165,7 @@ jobs: validate: needs: [get-jobs, approval] - if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' }} + if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' && needs.approval.result == 'success' }} uses: ./.github/workflows/e2e-tests.yml name: validate secrets: inherit @@ -175,8 +175,8 @@ jobs: ref: refs/pull/${{ needs.get-jobs.outputs.pr-number }}/head validate-nonpr: - needs: get-jobs - if: ${{ github.event_name != 'issue_comment' && needs.get-jobs.outputs.generator-args != '' }} + needs: [get-jobs, approval] + if: ${{ needs.get-jobs.outputs.generator-args != '' && ((github.event_name == 'push' && needs.approval.result == 'success') || github.event_name == 'workflow_dispatch') }} uses: ./.github/workflows/e2e-tests.yml name: validate (manual/push) secrets: inherit From b6fa1012d585e53e540280c43fe2a661bd193cb7 Mon Sep 17 00:00:00 2001 From: Cameron Quilici Date: Mon, 15 Dec 2025 09:18:56 -0600 Subject: [PATCH 12/19] feat: performance changelog triggered runs (as opposed to nightly) (#267) [skip-sweep] * add logic for event driven runs new single workflow that runs on merge to main, new perg-changelog.yaml to track performance changes, new logic to parse changelog, removed cron job in full sweep schedulers * testing pt 1 * raise error if yaml diff in perf changelog is not valid * remove unused imports in process_changelog.py * config data key fix * raise error if test-config subprocess fails to run * backfill changelog * backfill changelog pt 2 * backfill changelog pt 3 * backfill changelog pt 4 * backfill changelog pt 5 * backfill changelog pt 6 * add always() condition to upload changelog metadata * backfill changelog pt 7 (test) * backfill changelog pt 8 (revert test) * backfill changelog pt 9 * backfill changelog pt 11 * change if condition for jobs in run sweep workflow * debugging run sweep workflow * debugging run sweep workflow pt 2 * debugging run sweep workflow pt 3 (revert) * debugging run sweep workflow pt 4 * debugging run sweep workflow pt 5 * debugging run sweep workflow pt 6 * debugging run sweep workflow pt 7 * add always() condition to upload changelog metadata (add back, this got removed) * add bmk prefix to results * backfill changelog official * for concurrency group, use more unique sha --- .../workflows/benchmark-multinode-tmpl.yml | 2 +- .github/workflows/benchmark-tmpl.yml | 2 +- .github/workflows/collect-results.yml | 10 +- .../workflows/full-sweep-1k1k-scheduler.yml | 2 - .../workflows/full-sweep-1k8k-scheduler.yml | 2 - .../workflows/full-sweep-8k1k-scheduler.yml | 2 - .github/workflows/run-sweep.yml | 235 ++++++++++++++++++ perf-changelog.yaml | 83 +++++++ utils/constants.py | 4 + utils/matrix_logic/generate_sweep_configs.py | 190 +++++++++++--- .../test_generate_sweep_configs.py | 86 ------- utils/matrix_logic/test_validation.py | 129 ++++++++++ utils/matrix_logic/validation.py | 116 +++++++++ utils/process_changelog.py | 142 +++++++++++ 14 files changed, 867 insertions(+), 138 deletions(-) create mode 100644 .github/workflows/run-sweep.yml create mode 100644 perf-changelog.yaml create mode 100644 utils/constants.py create mode 100644 utils/process_changelog.py diff --git a/.github/workflows/benchmark-multinode-tmpl.yml b/.github/workflows/benchmark-multinode-tmpl.yml index 2b828bda8..6c42cbdaa 100644 --- a/.github/workflows/benchmark-multinode-tmpl.yml +++ b/.github/workflows/benchmark-multinode-tmpl.yml @@ -170,5 +170,5 @@ jobs: - name: Upload results uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: - name: ${{ env.RESULT_FILENAME }} + name: bmk_${{ env.RESULT_FILENAME }} path: agg_${{ env.RESULT_FILENAME }}_*.json diff --git a/.github/workflows/benchmark-tmpl.yml b/.github/workflows/benchmark-tmpl.yml index 60c19b441..a48081fac 100644 --- a/.github/workflows/benchmark-tmpl.yml +++ b/.github/workflows/benchmark-tmpl.yml @@ -169,5 +169,5 @@ jobs: - name: Upload result uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: - name: ${{ env.RESULT_FILENAME }} + name: bmk_${{ env.RESULT_FILENAME }} path: agg_${{ env.RESULT_FILENAME }}.json \ No newline at end of file diff --git a/.github/workflows/collect-results.yml b/.github/workflows/collect-results.yml index 8105c6d53..d0b0bd992 100644 --- a/.github/workflows/collect-results.yml +++ b/.github/workflows/collect-results.yml @@ -3,7 +3,7 @@ name: Template - Collect Results on: workflow_call: inputs: - exp-name: + result-prefix: required: false type: string default: '' @@ -26,7 +26,7 @@ jobs: uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 with: path: results/ - pattern: ${{ inputs.exp-name && format('{0}_*', inputs.exp-name) || '*' }} + pattern: ${{ inputs.result-prefix && format('{0}_*', inputs.result-prefix) || '*' }} - name: Print summary run: | @@ -34,10 +34,10 @@ jobs: python3 utils/summarize.py results/ >> $GITHUB_STEP_SUMMARY - name: Aggregate results - run: python3 utils/collect_results.py results/ ${{ inputs.exp-name || 'all' }} + run: python3 utils/collect_results.py results/ ${{ inputs.result-prefix || 'all' }} - name: Upload aggregated results uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: - name: results_${{ inputs.exp-name || 'all' }} - path: agg_${{ inputs.exp-name || 'all' }}.json + name: results_${{ inputs.result-prefix || 'all' }} + path: agg_${{ inputs.result-prefix || 'all' }}.json diff --git a/.github/workflows/full-sweep-1k1k-scheduler.yml b/.github/workflows/full-sweep-1k1k-scheduler.yml index 8b32f47c0..3c592cf0a 100644 --- a/.github/workflows/full-sweep-1k1k-scheduler.yml +++ b/.github/workflows/full-sweep-1k1k-scheduler.yml @@ -2,8 +2,6 @@ name: "Full Sweep Scheduler - 1k1k" on: workflow_dispatch: - schedule: - - cron: "0 0 * * *" jobs: get-dsr1-configs: diff --git a/.github/workflows/full-sweep-1k8k-scheduler.yml b/.github/workflows/full-sweep-1k8k-scheduler.yml index 393864fdf..be909aad5 100644 --- a/.github/workflows/full-sweep-1k8k-scheduler.yml +++ b/.github/workflows/full-sweep-1k8k-scheduler.yml @@ -2,8 +2,6 @@ name: "Full Sweep Scheduler - 1k8k" on: workflow_dispatch: - schedule: - - cron: "0 0 * * *" jobs: get-dsr1-configs: diff --git a/.github/workflows/full-sweep-8k1k-scheduler.yml b/.github/workflows/full-sweep-8k1k-scheduler.yml index 629e56bd9..3eabe74f4 100644 --- a/.github/workflows/full-sweep-8k1k-scheduler.yml +++ b/.github/workflows/full-sweep-8k1k-scheduler.yml @@ -2,8 +2,6 @@ name: "Full Sweep Scheduler - 8k1k" on: workflow_dispatch: - schedule: - - cron: "0 0 * * *" jobs: get-dsr1-configs: diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml new file mode 100644 index 000000000..cb3c4dde5 --- /dev/null +++ b/.github/workflows/run-sweep.yml @@ -0,0 +1,235 @@ +name: "Run Sweep" +run-name: Run Sweep - ${{ github.event.pull_request.title || github.ref_name }} + +concurrency: + group: sweep-${{ github.event.pull_request.number || github.sha }} + cancel-in-progress: true + +on: + push: + branches: + - main + paths: + - "perf-changelog.yaml" + pull_request: + branches: + - main + types: + - ready_for_review + - synchronize + - labeled + paths: + - "perf-changelog.yaml" + +jobs: + setup: + runs-on: ubuntu-latest + if: >- + (github.event_name == 'pull_request' && !github.event.pull_request.draft && contains(github.event.pull_request.labels.*.name, 'sweep-enabled')) || + (github.event_name != 'pull_request' && !contains(github.event.head_commit.message, '[skip-sweep]')) + outputs: + search-space-config: ${{ steps.setup.outputs.search-space-config }} + steps: + - name: Checkout code + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + with: + fetch-depth: 0 + + - id: setup + run: | + pip install pydantic + + if [ "${{ github.event_name }}" == "pull_request" ]; then + BASE_REF="origin/${{ github.base_ref }}" + HEAD_REF="${{ github.event.pull_request.head.sha }}" + else + BASE_REF="${{ github.event.before }}" + HEAD_REF="${{ github.event.after }}" + fi + + CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/process_changelog.py \ + --changelog-file ${GITHUB_WORKSPACE}/perf-changelog.yaml \ + --base-ref "$BASE_REF" \ + --head-ref "$HEAD_REF") + + echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT + + sweep-multi-node-1k1k: + needs: setup + if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k']) != 'null' }} + uses: ./.github/workflows/benchmark-multinode-tmpl.yml + name: multi-node 1k1k / + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }} + secrets: inherit + with: &multi-node-inputs + isl: ${{ matrix.config.isl }} + osl: ${{ matrix.config.osl }} + max-model-len: ${{ matrix.config.max-model-len }} + runner: ${{ matrix.config.runner }} + image: ${{ matrix.config.image }} + model: ${{ matrix.config.model }} + model-prefix: ${{ matrix.config.model-prefix }} + framework: ${{ matrix.config.framework }} + precision: ${{ matrix.config.precision }} + exp-name: ${{ matrix.config.exp-name }} + conc-list: ${{ toJson(matrix.config.conc) }} + spec-decoding: ${{ matrix.config.spec-decoding }} + disagg: ${{ matrix.config.disagg }} + + prefill-num-worker: ${{ matrix.config.prefill.num-worker }} + prefill-tp: ${{ matrix.config.prefill.tp }} + prefill-ep: ${{ matrix.config.prefill.ep }} + prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }} + prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }} + + decode-num-worker: ${{ matrix.config.decode.num-worker }} + decode-tp: ${{ matrix.config.decode.tp }} + decode-ep: ${{ matrix.config.decode.ep }} + decode-dp-attn: ${{ matrix.config.decode.dp-attn }} + decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }} + + sweep-multi-node-1k8k: + needs: setup + if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k8k']) != 'null' }} + uses: ./.github/workflows/benchmark-multinode-tmpl.yml + name: multi-node 1k8k / + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k8k'] }} + secrets: inherit + with: *multi-node-inputs + + sweep-multi-node-8k1k: + needs: setup + if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k']) != 'null' }} + uses: ./.github/workflows/benchmark-multinode-tmpl.yml + name: multi-node 8k1k / + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k'] }} + secrets: inherit + with: *multi-node-inputs + + sweep-single-node-1k1k: + needs: setup + if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['1k1k']) != 'null' }} + uses: ./.github/workflows/benchmark-tmpl.yml + name: single-node 1k1k / + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }} + secrets: inherit + with: &single-node-inputs + exp-name: ${{ matrix.config.exp-name }} + isl: ${{ matrix.config.isl }} + osl: ${{ matrix.config.osl }} + max-model-len: ${{ matrix.config.max-model-len }} + runner: ${{ matrix.config.runner }} + image: ${{ matrix.config.image }} + model: ${{ matrix.config.model }} + model-prefix: ${{ matrix.config.model-prefix }} + framework: ${{ matrix.config.framework }} + precision: ${{ matrix.config.precision }} + tp: ${{ matrix.config.tp }} + ep: ${{ matrix.config.ep }} + dp-attn: ${{ matrix.config.dp-attn }} + conc: ${{ matrix.config.conc }} + spec-decoding: ${{ matrix.config.spec-decoding }} + disagg: ${{ matrix.config.disagg }} + + sweep-single-node-1k8k: + needs: setup + if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['1k8k']) != 'null' }} + uses: ./.github/workflows/benchmark-tmpl.yml + name: single-node 1k8k / + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k8k'] }} + secrets: inherit + with: *single-node-inputs + + sweep-single-node-8k1k: + needs: setup + if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['8k1k']) != 'null' }} + uses: ./.github/workflows/benchmark-tmpl.yml + name: single-node 8k1k / + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['8k1k'] }} + secrets: inherit + with: *single-node-inputs + + collect-results: + needs: + [ + sweep-single-node-1k1k, + sweep-single-node-1k8k, + sweep-single-node-8k1k, + sweep-multi-node-1k1k, + sweep-multi-node-1k8k, + sweep-multi-node-8k1k, + setup, + ] + if: ${{ always() && needs.setup.result != 'skipped' }} + uses: ./.github/workflows/collect-results.yml + secrets: inherit + with: + result-prefix: "bmk" + + upload-changelog-metadata: + needs: [setup, collect-results] + if: ${{ always() && needs.setup.result != 'skipped' }} + runs-on: ubuntu-latest + steps: + - name: Extract and save changelog metadata + env: + CONFIG_JSON: ${{ needs.setup.outputs.search-space-config }} + run: | + echo "$CONFIG_JSON" | jq '.changelog_metadata' > changelog_metadata.json + + - name: Upload changelog artifact + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: changelog-metadata + path: changelog_metadata.json + + calc-success-rate: + needs: collect-results + if: ${{ always() && needs.collect-results.result != 'skipped'}} + runs-on: ubuntu-latest + + env: + RESULTS_DIR: "results/" + STATS_FILENAME: "run_stats" + GITHUB_TOKEN: ${{ secrets.REPO_PAT }} + + steps: + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + with: + token: ${{ secrets.REPO_PAT }} + fetch-depth: 0 + + - name: Download results artifacts + uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + with: + path: ${{ env.RESULTS_DIR }} + pattern: results_* + + - name: Install python dependencies + run: pip install PyGithub + + - name: Calculate success rate + run: python3 utils/calc_success_rate.py $STATS_FILENAME + + - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: "run-stats" + path: ${{ env.STATS_FILENAME }}.json diff --git a/perf-changelog.yaml b/perf-changelog.yaml new file mode 100644 index 000000000..a74285c53 --- /dev/null +++ b/perf-changelog.yaml @@ -0,0 +1,83 @@ +- config-keys: + - 70b-fp8-*-vllm + description: | + - Add compilation-config: '{"custom_ops": ["-rms_norm", "-quant_fp8", "-silu_and_mul"]}' as + extra config to all benchmarks/70b_fp8_mi*.sh scripts + - 6-7% uplift for llama for 6/8 configs + PR: https://github.com/InferenceMAX/InferenceMAX/pull/95 +- config-keys: + - gptoss-fp4-*-trt + description: | + - Upgrade GPT-OSS TRT images from 'release:1.1.0rc2.post2' to '1.2.0rc0.post1' + - Add NCCL_GRAPH_REGISTER=0 to benchmarks/gptoss_fp4_b200_trt_slurm.sh + - Change kv_cache_config.dtype from 'auto' to 'fp8' in benchmarks/gptoss_fp4_b200_trt_slurm.sh + - Remove MOE_BACKEND=CUTLASS, now just defaults to TRTLLM + PR: https://github.com/InferenceMAX/InferenceMAX/pull/110 +- config-keys: + - gptoss* + - dsr1* + description: | + - Remove Llama 70B runs to make room for multi-node disagg prefill+wideEP on + h100/h200/b200/mi300/mi325/mi355 + PR: https://github.com/InferenceMAX/InferenceMAX/pull/149 +- config-keys: + - gptoss-fp4-b200-vllm + - gptoss-fp4-h100-vllm + - gptoss-fp4-h200-vllm + description: | + - Upgrade vLLM from 0.10.2 to 0.11.0 for GPT-OSS NVIDIA single-node configs + - Adds compilation-config: '{"cudagraph_mode":"PIECEWISE"} accordingly since vLLM 0.11.0 + requires now defaults to FULL_AND_PIECEWISE + PR: https://github.com/InferenceMAX/InferenceMAX/pull/159 +- config-keys: + - dsr1* + description: | + - Fixes bug where 1k8k and 8k1k full sweeps had incorrect max-model-len for DeepSeek + PR: https://github.com/InferenceMAX/InferenceMAX/pull/163 +- config-keys: + - dsr1-fp4-b200-sglang + - dsr1-fp8-b200-sglang + - dsr1-fp8-h200-sglang + description: | + - Consolidates H200 and B200 SGLang configurations to use unified v0.5.5-cu129-amd64 + image tag and updates deprecated SGLang server arguments to their current equivalents. + - --enable-flashinfer-trtllm-moe & --enable-ep-moe is no longer available in sglang so we needed to change it + - ep: 4 for all tp: 4 entries (3 occurrences in dsr1-fp4-b200-sglang) + - ep: 8 for all tp: 8 entries (6 occurrences across dsr1-fp4-b200-sglang and dsr1-fp8-b200-sglang) + - dsr1_fp4_b200_docker.sh: Replaced --enable-ep-moe with --ep-size $EP_SIZE and --enable-flashinfer-trtllm-moe with + --moe-runner-backend flashinfer_trtllm + - dsr1_fp8_b200_docker.sh: Replaced --enable-flashinfer-trtllm-moe with --moe-runner-backend flashinfer_trtllm and + added --ep-size $EP_SIZE + - launch_b200-nvd.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container + - launch_b200-tg.sh: Added -e EP_SIZE to Docker run command to pass environment variable to container + PR: https://github.com/InferenceMAX/InferenceMAX/pull/204 +- config-keys: + - gptoss-fp4-mi355x-vllm + - gptoss-fp4-b200-vllm + description: | + - Extend concurrency to 128 for gptoss mi355x/b200 vllm configurations + PR: https://github.com/InferenceMAX/InferenceMAX/pull/209 +- config-keys: + - gptoss-fp4-b200-trt + description: | + - Extend concurrency to 128 for gptoss b200 TRT configurations + PR: https://github.com/InferenceMAX/InferenceMAX/pull/233 +- config-keys: + - "*gb200-sglang" + description: | + - Introducing some improvements in GB200 SGLang DSR1 submission + PR: https://github.com/InferenceMAX/InferenceMAX/pull/257 +- config-keys: + - dsr1-fp8-h200-trt + description: | + - Update TRT image from nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc0.post1 to nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc2 + - Increase concurrency for some configurations + PR: https://github.com/InferenceMAX/InferenceMAX/pull/266 +- config-keys: + - gptoss-fp4-b200-vllm + - gptoss-fp4-h100-vllm + - gptoss-fp4-h200-vllm + description: | + - Update vLLM image for NVIDIA configs from vLLM 0.11.0 to vLLM 0.11.2 + - Adds kv-cache-dtype: fp8 to benchmarks/gptoss_fp4_b200_docker.sh + PR: https://github.com/InferenceMAX/InferenceMAX/pull/273 \ No newline at end of file diff --git a/utils/constants.py b/utils/constants.py new file mode 100644 index 000000000..a465091da --- /dev/null +++ b/utils/constants.py @@ -0,0 +1,4 @@ +MASTER_CONFIGS = [".github/configs/amd-master.yaml", + ".github/configs/nvidia-master.yaml"] +RUNNER_CONFIG = ".github/configs/runners.yaml" +GENERATE_SWEEPS_PY_SCRIPT = "utils/matrix_logic/generate_sweep_configs.py" \ No newline at end of file diff --git a/utils/matrix_logic/generate_sweep_configs.py b/utils/matrix_logic/generate_sweep_configs.py index 8fc47651c..d8fab38cf 100644 --- a/utils/matrix_logic/generate_sweep_configs.py +++ b/utils/matrix_logic/generate_sweep_configs.py @@ -1,8 +1,17 @@ import json -import yaml import argparse +import sys +from pathlib import Path -from validation import validate_master_config, validate_matrix_entry, validate_runner_config, Fields +# Ensure sibling modules are importable regardless of how script is invoked +sys.path.insert(0, str(Path(__file__).resolve().parent)) + +from validation import ( + validate_matrix_entry, + load_config_files, + load_runner_file, + Fields +) seq_len_stoi = { "1k1k": (1024, 1024), @@ -366,42 +375,126 @@ def get_lowest_conc(search_space_entry): return matrix_values -def load_config_files(config_files): - """Load and merge configuration files.""" - all_config_data = {} - for config_file in config_files: - try: - with open(config_file, 'r') as f: - config_data = yaml.safe_load(f) - assert isinstance( - config_data, dict), f"Config file '{config_file}' must contain a dictionary" - - # Check for duplicate keys, this is only in place to prevent against the very unlikely - # case where an entry in one config accidentally/purposefully tries to override an entry in another config - duplicate_keys = set(all_config_data.keys()) & set( - config_data.keys()) - if duplicate_keys: - raise ValueError( - f"Duplicate configuration keys found in '{config_file}': {', '.join(sorted(duplicate_keys))}" - ) - - all_config_data.update(config_data) - except FileNotFoundError: - raise ValueError(f"Input file '{config_file}' does not exist.") - - return all_config_data - - -def load_runner_file(runner_file): - """Load runner configuration file.""" - try: - with open(runner_file, 'r') as f: - runner_config = yaml.safe_load(f) - except FileNotFoundError as e: +def generate_test_config_sweep(args, all_config_data): + """Generate full sweep for specific config keys. + + Validates that all specified config keys exist before generating. + Expands all configs fully without any filtering. + """ + # Validate all config keys exist + missing_keys = [key for key in args.config_keys if key not in all_config_data] + if missing_keys: + available_keys = sorted(all_config_data.keys()) raise ValueError( - f"Runner config file '{runner_file}' does not exist.") + f"Config key(s) not found: {', '.join(missing_keys)}.\n" + f"Available keys: {', '.join(available_keys)}" + ) + + matrix_values = [] + + for key in args.config_keys: + val = all_config_data[key] + is_multinode = val.get(Fields.MULTINODE.value, False) + + image = val[Fields.IMAGE.value] + model = val[Fields.MODEL.value] + model_code = val[Fields.MODEL_PREFIX.value] + precision = val[Fields.PRECISION.value] + framework = val[Fields.FRAMEWORK.value] + runner = val[Fields.RUNNER.value] + disagg = val.get(Fields.DISAGG.value, False) - return runner_config + for seq_len_config in val[Fields.SEQ_LEN_CONFIGS.value]: + isl = seq_len_config[Fields.ISL.value] + osl = seq_len_config[Fields.OSL.value] + seq_len_str = seq_len_to_str(isl, osl) + + for bmk in seq_len_config[Fields.SEARCH_SPACE.value]: + if is_multinode: + # Multinode config + spec_decoding = bmk.get(Fields.SPEC_DECODING.value, "none") + prefill = bmk[Fields.PREFILL.value] + decode = bmk[Fields.DECODE.value] + + # Get concurrency values + if Fields.CONC_LIST.value in bmk: + conc_values = bmk[Fields.CONC_LIST.value] + else: + conc_start = bmk[Fields.CONC_START.value] + conc_end = bmk[Fields.CONC_END.value] + conc_values = [] + conc = conc_start + while conc <= conc_end: + conc_values.append(conc) + if conc == conc_end: + break + conc *= 2 + if conc > conc_end: + conc = conc_end + + entry = { + Fields.IMAGE.value: image, + Fields.MODEL.value: model, + Fields.MODEL_PREFIX.value: model_code, + Fields.PRECISION.value: precision, + Fields.FRAMEWORK.value: framework, + Fields.RUNNER.value: runner, + Fields.ISL.value: isl, + Fields.OSL.value: osl, + Fields.SPEC_DECODING.value: spec_decoding, + Fields.PREFILL.value: prefill, + Fields.DECODE.value: decode, + Fields.CONC.value: conc_values, + Fields.MAX_MODEL_LEN.value: isl + osl + 200, + Fields.EXP_NAME.value: f"{model_code}_{seq_len_str}", + Fields.DISAGG.value: disagg, + } + matrix_values.append(validate_matrix_entry(entry, is_multinode=True)) + else: + # Single-node config + tp = bmk[Fields.TP.value] + ep = bmk.get(Fields.EP.value) + dp_attn = bmk.get(Fields.DP_ATTN.value) + spec_decoding = bmk.get(Fields.SPEC_DECODING.value, "none") + + # Get concurrency values + if Fields.CONC_LIST.value in bmk: + conc_values = bmk[Fields.CONC_LIST.value] + else: + conc_start = bmk[Fields.CONC_START.value] + conc_end = bmk[Fields.CONC_END.value] + conc_values = [] + conc = conc_start + while conc <= conc_end: + conc_values.append(conc) + if conc == conc_end: + break + conc *= 2 + if conc > conc_end: + conc = conc_end + + for conc in conc_values: + entry = { + Fields.IMAGE.value: image, + Fields.MODEL.value: model, + Fields.MODEL_PREFIX.value: model_code, + Fields.PRECISION.value: precision, + Fields.FRAMEWORK.value: framework, + Fields.RUNNER.value: runner, + Fields.ISL.value: isl, + Fields.OSL.value: osl, + Fields.TP.value: tp, + Fields.CONC.value: conc, + Fields.MAX_MODEL_LEN.value: isl + osl + 200, + Fields.EP.value: ep if ep is not None else 1, + Fields.DP_ATTN.value: dp_attn if dp_attn is not None else False, + Fields.SPEC_DECODING.value: spec_decoding, + Fields.EXP_NAME.value: f"{model_code}_{seq_len_str}", + Fields.DISAGG.value: disagg, + } + matrix_values.append(validate_matrix_entry(entry, is_multinode=False)) + + return matrix_values def main(): @@ -545,13 +638,30 @@ def main(): help='Show this help message and exit' ) + # Subcommand: test-config + test_config_keys_parser = subparsers.add_parser( + 'test-config', + parents=[parent_parser], + add_help=False, + help='Generate full sweep for specific config keys. Validates that all specified keys exist before generating.' + ) + test_config_keys_parser.add_argument( + '--config-keys', + nargs='+', + required=True, + help='One or more config keys to generate sweep for (e.g., dsr1-fp4-b200-sglang dsr1-fp8-h200-trt)' + ) + test_config_keys_parser.add_argument( + '-h', '--help', + action='help', + help='Show this help message and exit' + ) + args = parser.parse_args() - # Load and validate configuration files + # Load and validate configuration files (validation happens by default in load functions) all_config_data = load_config_files(args.config_files) runner_data = load_runner_file(args.runner_config) - validate_master_config(all_config_data) - validate_runner_config(runner_data) # Route to appropriate function based on subcommand if args.command == 'full-sweep': @@ -559,6 +669,8 @@ def main(): elif args.command == 'runner-model-sweep': matrix_values = generate_runner_model_sweep_config( args, all_config_data, runner_data) + elif args.command == 'test-config': + matrix_values = generate_test_config_sweep(args, all_config_data) else: parser.error(f"Unknown command: {args.command}") diff --git a/utils/matrix_logic/test_generate_sweep_configs.py b/utils/matrix_logic/test_generate_sweep_configs.py index 1381f394e..c505611c3 100644 --- a/utils/matrix_logic/test_generate_sweep_configs.py +++ b/utils/matrix_logic/test_generate_sweep_configs.py @@ -7,8 +7,6 @@ seq_len_to_str, generate_full_sweep, generate_runner_model_sweep_config, - load_config_files, - load_runner_file, ) @@ -583,90 +581,6 @@ def test_uses_lowest_conc(self, sample_single_node_config, sample_runner_config, assert all(entry["conc"] == 4 for entry in result) -# ============================================================================= -# Test load_config_files -# ============================================================================= - -class TestLoadConfigFiles: - """Tests for load_config_files function.""" - - def test_load_single_file(self, tmp_path): - """Should load a single config file.""" - config_file = tmp_path / "config.yaml" - config_file.write_text(""" -test-config: - image: test-image - model: test-model -""") - result = load_config_files([str(config_file)]) - assert "test-config" in result - assert result["test-config"]["image"] == "test-image" - - def test_load_multiple_files(self, tmp_path): - """Should merge multiple config files.""" - config1 = tmp_path / "config1.yaml" - config1.write_text(""" -config-one: - value: 1 -""") - config2 = tmp_path / "config2.yaml" - config2.write_text(""" -config-two: - value: 2 -""") - result = load_config_files([str(config1), str(config2)]) - assert "config-one" in result - assert "config-two" in result - - def test_duplicate_keys_raise_error(self, tmp_path): - """Duplicate keys across files should raise error.""" - config1 = tmp_path / "config1.yaml" - config1.write_text(""" -duplicate-key: - value: 1 -""") - config2 = tmp_path / "config2.yaml" - config2.write_text(""" -duplicate-key: - value: 2 -""") - with pytest.raises(ValueError) as exc_info: - load_config_files([str(config1), str(config2)]) - assert "Duplicate configuration keys" in str(exc_info.value) - - def test_nonexistent_file_raises_error(self): - """Nonexistent file should raise error.""" - with pytest.raises(ValueError) as exc_info: - load_config_files(["nonexistent.yaml"]) - assert "does not exist" in str(exc_info.value) - - -# ============================================================================= -# Test load_runner_file -# ============================================================================= - -class TestLoadRunnerFile: - """Tests for load_runner_file function.""" - - def test_load_runner_file(self, tmp_path): - """Should load runner config file.""" - runner_file = tmp_path / "runners.yaml" - runner_file.write_text(""" -h100: -- h100-node-0 -- h100-node-1 -""") - result = load_runner_file(str(runner_file)) - assert "h100" in result - assert len(result["h100"]) == 2 - - def test_nonexistent_runner_file(self): - """Nonexistent runner file should raise error.""" - with pytest.raises(ValueError) as exc_info: - load_runner_file("nonexistent.yaml") - assert "does not exist" in str(exc_info.value) - - # ============================================================================= # Test edge cases and special configurations # ============================================================================= diff --git a/utils/matrix_logic/test_validation.py b/utils/matrix_logic/test_validation.py index 008ed2b42..d9cc7f0d9 100644 --- a/utils/matrix_logic/test_validation.py +++ b/utils/matrix_logic/test_validation.py @@ -14,6 +14,8 @@ validate_matrix_entry, validate_master_config, validate_runner_config, + load_config_files, + load_runner_file, ) @@ -738,3 +740,130 @@ def test_multiple_runner_types(self, valid_runner_config): assert "h200" in result assert "mi300x" in result assert "gb200" in result + + +# ============================================================================= +# Test load_config_files +# ============================================================================= + +class TestLoadConfigFiles: + """Tests for load_config_files function.""" + + def test_load_single_file_with_validation(self, tmp_path, valid_single_node_master_config): + """Should load and validate a single config file.""" + config_file = tmp_path / "config.yaml" + import yaml + config_file.write_text(yaml.dump({"test-config": valid_single_node_master_config})) + result = load_config_files([str(config_file)]) + assert "test-config" in result + assert result["test-config"]["image"] == valid_single_node_master_config["image"] + + def test_load_single_file_without_validation(self, tmp_path): + """Should load a single config file without validation when validate=False.""" + config_file = tmp_path / "config.yaml" + config_file.write_text(""" +test-config: + image: test-image + model: test-model +""") + result = load_config_files([str(config_file)], validate=False) + assert "test-config" in result + assert result["test-config"]["image"] == "test-image" + + def test_load_multiple_files(self, tmp_path): + """Should merge multiple config files.""" + config1 = tmp_path / "config1.yaml" + config1.write_text(""" +config-one: + value: 1 +""") + config2 = tmp_path / "config2.yaml" + config2.write_text(""" +config-two: + value: 2 +""") + result = load_config_files([str(config1), str(config2)], validate=False) + assert "config-one" in result + assert "config-two" in result + + def test_duplicate_keys_raise_error(self, tmp_path): + """Duplicate keys across files should raise error.""" + config1 = tmp_path / "config1.yaml" + config1.write_text(""" +duplicate-key: + value: 1 +""") + config2 = tmp_path / "config2.yaml" + config2.write_text(""" +duplicate-key: + value: 2 +""") + with pytest.raises(ValueError) as exc_info: + load_config_files([str(config1), str(config2)], validate=False) + assert "Duplicate configuration keys" in str(exc_info.value) + + def test_nonexistent_file_raises_error(self): + """Nonexistent file should raise error.""" + with pytest.raises(ValueError) as exc_info: + load_config_files(["nonexistent.yaml"]) + assert "does not exist" in str(exc_info.value) + + def test_validation_runs_by_default(self, tmp_path): + """Validation should run by default and catch invalid configs.""" + config_file = tmp_path / "config.yaml" + config_file.write_text(""" +invalid-config: + image: test-image + # Missing required fields like model, model-prefix, precision, etc. +""") + with pytest.raises(ValueError) as exc_info: + load_config_files([str(config_file)]) + assert "failed validation" in str(exc_info.value) + + +# ============================================================================= +# Test load_runner_file +# ============================================================================= + +class TestLoadRunnerFile: + """Tests for load_runner_file function.""" + + def test_load_runner_file_with_validation(self, tmp_path): + """Should load and validate runner config file.""" + runner_file = tmp_path / "runners.yaml" + runner_file.write_text(""" +h100: +- h100-node-0 +- h100-node-1 +""") + result = load_runner_file(str(runner_file)) + assert "h100" in result + assert len(result["h100"]) == 2 + + def test_load_runner_file_without_validation(self, tmp_path): + """Should load runner config file without validation when validate=False.""" + runner_file = tmp_path / "runners.yaml" + runner_file.write_text(""" +h100: +- h100-node-0 +- h100-node-1 +""") + result = load_runner_file(str(runner_file), validate=False) + assert "h100" in result + assert len(result["h100"]) == 2 + + def test_nonexistent_runner_file(self): + """Nonexistent runner file should raise error.""" + with pytest.raises(ValueError) as exc_info: + load_runner_file("nonexistent.yaml") + assert "does not exist" in str(exc_info.value) + + def test_validation_runs_by_default(self, tmp_path): + """Validation should run by default and catch invalid configs.""" + runner_file = tmp_path / "runners.yaml" + runner_file.write_text(""" +h100: not-a-list +""") + with pytest.raises(ValueError) as exc_info: + load_runner_file(str(runner_file)) + assert "must be a list" in str(exc_info.value) diff --git a/utils/matrix_logic/validation.py b/utils/matrix_logic/validation.py index 30012423a..955e4c5b5 100644 --- a/utils/matrix_logic/validation.py +++ b/utils/matrix_logic/validation.py @@ -3,6 +3,7 @@ from enum import Enum import pprint +import yaml """ The below class defines the field names expected to be present in the JSON entries @@ -315,3 +316,118 @@ def validate_runner_config(runner_configs: dict) -> List[dict]: f"Runner config entry '{key}' cannot be an empty list") return runner_configs + + +""" + Below is the validation logic for the changelog entries found in perf-changelog.yaml. + This ensures that the changelog entries conform to the expected structure before + proceeding with processing. +""" + + +class ChangelogEntry(BaseModel): + """Pydantic model for validating changelog entry structure.""" + model_config = ConfigDict(extra="forbid", populate_by_name=True) + + config_keys: list[str] = Field(alias="config-keys", min_length=1) + description: str + + +class ChangelogMetadata(BaseModel): + """Pydantic model for validating changelog metadata structure.""" + model_config = ConfigDict(extra="forbid") + + base_ref: str + head_ref: str + entries: list[ChangelogEntry] + + +class ChangelogMatrixEntry(BaseModel): + """Pydantic model for validating final changelog matrix entry structure. + This imposes a strict contract on the output of process_changelog.py, dictated by + the expected input to the run-sweep.yml workflow file. + """ + model_config = ConfigDict(extra="forbid", populate_by_name=True) + + single_node: dict[str, list[SingleNodeMatrixEntry] + ] = Field(default_factory=dict) + multi_node: dict[str, list[MultiNodeMatrixEntry] + ] = Field(default_factory=dict) + changelog_metadata: ChangelogMetadata + + +# ============================================================================= +# File Loading Functions +# ============================================================================= + + +def load_config_files(config_files: List[str], validate: bool = True) -> dict: + """Load and merge configuration files. + + Args: + config_files: List of paths to YAML configuration files. + validate: If True, run validate_master_config on loaded data. Defaults to True. + + Returns: + Merged configuration dictionary. + + Raises: + ValueError: If file doesn't exist, isn't a dict, or has duplicate keys. + """ + all_config_data = {} + for config_file in config_files: + try: + with open(config_file, 'r') as f: + config_data = yaml.safe_load(f) + assert isinstance( + config_data, dict), f"Config file '{config_file}' must contain a dictionary" + + # Don't allow '*' wildcard in master config keys as we need to reserve these + # for expansion in process_changelog.py + for key in config_data.keys(): + if "*" in key: + raise ValueError( + f" Wildcard '*' is not allowed in master config keys: '{key}'") + + # Check for duplicate keys + duplicate_keys = set(all_config_data.keys()) & set( + config_data.keys()) + if duplicate_keys: + raise ValueError( + f"Duplicate configuration keys found in '{config_file}': {', '.join(sorted(duplicate_keys))}" + ) + + all_config_data.update(config_data) + except FileNotFoundError: + raise ValueError(f"Input file '{config_file}' does not exist.") + + if validate: + validate_master_config(all_config_data) + + return all_config_data + + +def load_runner_file(runner_file: str, validate: bool = True) -> dict: + """Load runner configuration file. + + Args: + runner_file: Path to the runner YAML configuration file. + validate: If True, run validate_runner_config on loaded data. Defaults to True. + + Returns: + Runner configuration dictionary. + + Raises: + ValueError: If file doesn't exist or fails validation. + """ + try: + with open(runner_file, 'r') as f: + runner_config = yaml.safe_load(f) + except FileNotFoundError: + raise ValueError( + f"Runner config file '{runner_file}' does not exist.") + + if validate: + validate_runner_config(runner_config) + + return runner_config diff --git a/utils/process_changelog.py b/utils/process_changelog.py new file mode 100644 index 000000000..4a856c9a8 --- /dev/null +++ b/utils/process_changelog.py @@ -0,0 +1,142 @@ +import argparse +import json +import re +import subprocess +from collections import defaultdict + +import yaml +from constants import GENERATE_SWEEPS_PY_SCRIPT, MASTER_CONFIGS, RUNNER_CONFIG +from matrix_logic.generate_sweep_configs import seq_len_to_str +from matrix_logic.validation import ( + ChangelogEntry, + ChangelogMatrixEntry, + load_config_files, +) + + +def get_added_lines(base_ref: str, head_ref: str, filepath: str) -> str: + result = subprocess.run( + ["git", "diff", base_ref, head_ref, "--", filepath], + capture_output=True, + text=True, + ) + + added_lines = [] + for line in result.stdout.split("\n"): + if line.startswith("-") and not line.startswith("---"): + # Don't allow deletions in the changelog + # By convention, it should act as a running log of performance changes, + # so we only want to see additions + raise ValueError( + f"Deletions are not allowed in {filepath}. " + f"Only additions to the changelog are permitted. " + f"Found deleted line: {line[1:]}" + ) + elif line.startswith("+") and not line.startswith("+++"): + added_lines.append(line[1:]) + + return "\n".join(added_lines) + + +def get_config_keys_from_master( + config_keys: list[str], master_config: dict +) -> list[str]: + resolved_keys = set() + for key in config_keys: + if "*" in key: + pattern = re.compile(re.escape(key).replace(r"\*", ".*")) + matched_keys = [k for k in master_config if pattern.fullmatch(k)] + if not matched_keys: + raise ValueError( + f"No config keys matched the wildcard pattern '{key}' in master configs." + ) + resolved_keys.update(matched_keys) + elif key not in master_config: + raise ValueError(f"Config key '{key}' not found in master configs.") + else: + resolved_keys.add(key) + return list(resolved_keys) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--base-ref", type=str, required=True) + parser.add_argument("--head-ref", type=str, required=True) + parser.add_argument("--changelog-file", type=str, required=True) + args = parser.parse_args() + + added_yaml = get_added_lines(args.base_ref, args.head_ref, args.changelog_file) + + if not added_yaml.strip(): + raise ValueError("No additions found in the changelog file.") + + changelog_data = yaml.safe_load(added_yaml) + + if not changelog_data: + raise ValueError("No valid YAML entries found in the changelog additions.") + + final_results = { + "single_node": defaultdict(list), + "multi_node": defaultdict(list), + "changelog_metadata": { + "base_ref": args.base_ref, + "head_ref": args.head_ref, + "entries": changelog_data, + }, + } + + all_results = [] + # Deduplicate repeated configs, if for some reason a config key appears multiple times + # in one commit, we don't want to run that config two times (there will just be twice as many + # data points for that config, which is not useful) + all_configs_to_run = set() + + for entry_data in changelog_data: + entry = ChangelogEntry.model_validate(entry_data) + configs_to_run = get_config_keys_from_master( + entry.config_keys, load_config_files(MASTER_CONFIGS) + ) + + # Skip configs already processed + configs_to_run = [c for c in configs_to_run if c not in all_configs_to_run] + if not configs_to_run: + continue + all_configs_to_run.update(configs_to_run) + + try: + result = subprocess.run( + [ + "python3", + GENERATE_SWEEPS_PY_SCRIPT, + "test-config", + "--config-keys", + *configs_to_run, + "--config-files", + *MASTER_CONFIGS, + "--runner-config", + RUNNER_CONFIG, + ], + capture_output=True, + text=True, + check=True, + ) + except subprocess.CalledProcessError as e: + print(e.stderr) + raise + + all_results.extend(json.loads(result.stdout)) + + for result in all_results: + seq_len_str = seq_len_to_str(result["isl"], result["osl"]) + if "prefill" in result and result["prefill"] is not None: + final_results["multi_node"][seq_len_str].append(result) + else: + final_results["single_node"][seq_len_str].append(result) + + # Validate final results structure + validated = ChangelogMatrixEntry.model_validate(final_results) + print(validated.model_dump_json(by_alias=True)) + + +if __name__ == "__main__": + main() From 5383cfaa7149d053339566692f3a663c2c10ed56 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Dec 2025 10:10:21 -0600 Subject: [PATCH 13/19] chore(deps): bump the github-actions group across 1 directory with 3 updates (#331) Bumps the github-actions group with 3 updates in the / directory: [actions/checkout](https://github.com/actions/checkout), [actions/upload-artifact](https://github.com/actions/upload-artifact) and [actions/download-artifact](https://github.com/actions/download-artifact). Updates `actions/checkout` from 6.0.0 to 6.0.1 - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v6...8e8c483db84b4bee98b60c0593521ed34d9990e8) Updates `actions/upload-artifact` from 5.0.0 to 6.0.0 - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/330a01c490aca151604b8cf639adc76d48f6c5d4...b7c566a772e6b6bfb58ed0dc250532a479d7789f) Updates `actions/download-artifact` from 6.0.0 to 7.0.0 - [Release notes](https://github.com/actions/download-artifact/releases) - [Commits](https://github.com/actions/download-artifact/compare/018cc2cf5baa6db3ef3c5f8a56943fffe632ef53...37930b1c2abaa49bbe596cd826c3c89aef350131) --- updated-dependencies: - dependency-name: actions/checkout dependency-version: 6.0.1 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: github-actions - dependency-name: actions/upload-artifact dependency-version: 6.0.0 dependency-type: direct:production update-type: version-update:semver-major dependency-group: github-actions - dependency-name: actions/download-artifact dependency-version: 7.0.0 dependency-type: direct:production update-type: version-update:semver-major dependency-group: github-actions ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/benchmark-multinode-tmpl.yml | 2 +- .github/workflows/benchmark-tmpl.yml | 2 +- .github/workflows/collect-results.yml | 4 ++-- .github/workflows/e2e-tests.yml | 4 ++-- .github/workflows/full-sweep-1k1k-scheduler.yml | 4 ++-- .github/workflows/full-sweep-1k8k-scheduler.yml | 4 ++-- .github/workflows/full-sweep-8k1k-scheduler.yml | 4 ++-- .github/workflows/label-validation.yml | 4 ++-- .github/workflows/run-sweep.yml | 10 +++++----- 9 files changed, 19 insertions(+), 19 deletions(-) diff --git a/.github/workflows/benchmark-multinode-tmpl.yml b/.github/workflows/benchmark-multinode-tmpl.yml index 6c42cbdaa..93de8faa0 100644 --- a/.github/workflows/benchmark-multinode-tmpl.yml +++ b/.github/workflows/benchmark-multinode-tmpl.yml @@ -168,7 +168,7 @@ jobs: done - name: Upload results - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: bmk_${{ env.RESULT_FILENAME }} path: agg_${{ env.RESULT_FILENAME }}_*.json diff --git a/.github/workflows/benchmark-tmpl.yml b/.github/workflows/benchmark-tmpl.yml index a48081fac..8a3dcfb5c 100644 --- a/.github/workflows/benchmark-tmpl.yml +++ b/.github/workflows/benchmark-tmpl.yml @@ -167,7 +167,7 @@ jobs: run: | python3 utils/process_result.py - name: Upload result - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: bmk_${{ env.RESULT_FILENAME }} path: agg_${{ env.RESULT_FILENAME }}.json \ No newline at end of file diff --git a/.github/workflows/collect-results.yml b/.github/workflows/collect-results.yml index d0b0bd992..ccc2ce4e4 100644 --- a/.github/workflows/collect-results.yml +++ b/.github/workflows/collect-results.yml @@ -23,7 +23,7 @@ jobs: fetch-depth: 0 - name: Download JSON artifacts - uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 with: path: results/ pattern: ${{ inputs.result-prefix && format('{0}_*', inputs.result-prefix) || '*' }} @@ -37,7 +37,7 @@ jobs: run: python3 utils/collect_results.py results/ ${{ inputs.result-prefix || 'all' }} - name: Upload aggregated results - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: results_${{ inputs.result-prefix || 'all' }} path: agg_${{ inputs.result-prefix || 'all' }}.json diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml index f807e8a97..9a8c4b70d 100644 --- a/.github/workflows/e2e-tests.yml +++ b/.github/workflows/e2e-tests.yml @@ -147,7 +147,7 @@ jobs: fetch-depth: 0 - name: Download results artifacts - uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 with: path: ${{ env.RESULTS_DIR }} pattern: results_* @@ -158,7 +158,7 @@ jobs: - name: Calculate success rate run: python3 utils/calc_success_rate.py $STATS_FILENAME - - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: "run-stats" path: ${{ env.STATS_FILENAME }}.json diff --git a/.github/workflows/full-sweep-1k1k-scheduler.yml b/.github/workflows/full-sweep-1k1k-scheduler.yml index 3c592cf0a..a8b40214e 100644 --- a/.github/workflows/full-sweep-1k1k-scheduler.yml +++ b/.github/workflows/full-sweep-1k1k-scheduler.yml @@ -211,7 +211,7 @@ jobs: fetch-depth: 0 - name: Download results artifacts - uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 with: path: ${{ env.RESULTS_DIR }} pattern: results_* @@ -222,7 +222,7 @@ jobs: - name: Calculate success rate run: python3 utils/calc_success_rate.py $STATS_FILENAME - - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: "run-stats" path: ${{ env.STATS_FILENAME }}.json diff --git a/.github/workflows/full-sweep-1k8k-scheduler.yml b/.github/workflows/full-sweep-1k8k-scheduler.yml index be909aad5..062f00265 100644 --- a/.github/workflows/full-sweep-1k8k-scheduler.yml +++ b/.github/workflows/full-sweep-1k8k-scheduler.yml @@ -211,7 +211,7 @@ jobs: fetch-depth: 0 - name: Download results artifacts - uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 with: path: ${{ env.RESULTS_DIR }} pattern: results_* @@ -222,7 +222,7 @@ jobs: - name: Calculate success rate run: python3 utils/calc_success_rate.py $STATS_FILENAME - - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: "run-stats" path: ${{ env.STATS_FILENAME }}.json diff --git a/.github/workflows/full-sweep-8k1k-scheduler.yml b/.github/workflows/full-sweep-8k1k-scheduler.yml index 3eabe74f4..2b45b9679 100644 --- a/.github/workflows/full-sweep-8k1k-scheduler.yml +++ b/.github/workflows/full-sweep-8k1k-scheduler.yml @@ -211,7 +211,7 @@ jobs: fetch-depth: 0 - name: Download results artifacts - uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 with: path: ${{ env.RESULTS_DIR }} pattern: results_* @@ -222,7 +222,7 @@ jobs: - name: Calculate success rate run: python3 utils/calc_success_rate.py $STATS_FILENAME - - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: "run-stats" path: ${{ env.STATS_FILENAME }}.json diff --git a/.github/workflows/label-validation.yml b/.github/workflows/label-validation.yml index 3afe0ff06..1b01deb84 100644 --- a/.github/workflows/label-validation.yml +++ b/.github/workflows/label-validation.yml @@ -134,7 +134,7 @@ jobs: fetch-depth: 0 - name: Download results artifacts - uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 with: path: ${{ env.RESULTS_DIR }} pattern: results_* @@ -145,7 +145,7 @@ jobs: - name: Calculate success rate run: python3 utils/calc_success_rate.py $STATS_FILENAME - - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: "run-stats" path: ${{ env.STATS_FILENAME }}.json diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index cb3c4dde5..e449942d1 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -31,7 +31,7 @@ jobs: search-space-config: ${{ steps.setup.outputs.search-space-config }} steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: fetch-depth: 0 @@ -196,7 +196,7 @@ jobs: echo "$CONFIG_JSON" | jq '.changelog_metadata' > changelog_metadata.json - name: Upload changelog artifact - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: changelog-metadata path: changelog_metadata.json @@ -212,13 +212,13 @@ jobs: GITHUB_TOKEN: ${{ secrets.REPO_PAT }} steps: - - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: token: ${{ secrets.REPO_PAT }} fetch-depth: 0 - name: Download results artifacts - uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 with: path: ${{ env.RESULTS_DIR }} pattern: results_* @@ -229,7 +229,7 @@ jobs: - name: Calculate success rate run: python3 utils/calc_success_rate.py $STATS_FILENAME - - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: "run-stats" path: ${{ env.STATS_FILENAME }}.json From 2a85d1d8320680cfb459e6316c6d5a01070f3b59 Mon Sep 17 00:00:00 2001 From: Cameron Quilici Date: Mon, 15 Dec 2025 11:01:27 -0600 Subject: [PATCH 14/19] fix: add final newline to original perf-changelog.yaml so that there wont be erroneous negative diff [skip-sweep] (#333) --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index a74285c53..112145f10 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -80,4 +80,4 @@ description: | - Update vLLM image for NVIDIA configs from vLLM 0.11.0 to vLLM 0.11.2 - Adds kv-cache-dtype: fp8 to benchmarks/gptoss_fp4_b200_docker.sh - PR: https://github.com/InferenceMAX/InferenceMAX/pull/273 \ No newline at end of file + PR: https://github.com/InferenceMAX/InferenceMAX/pull/273 From 1a3e65c0912df5b9052f572fce40463b1a3f4cf2 Mon Sep 17 00:00:00 2001 From: ppalanga Date: Tue, 16 Dec 2025 16:01:23 -0800 Subject: [PATCH 15/19] Update MI355x Deepseek-R1 FP4 SGLang Image to v0.5.6.post1 (#330) * Update amd-master.yaml * Update perf-changelog.yaml * Update dsr1_fp4_mi355x_docker.sh * Update dsr1_fp4_mi355x_docker.sh --------- Co-authored-by: Cameron Quilici --- .github/configs/amd-master.yaml | 2 +- benchmarks/dsr1_fp4_mi355x_docker.sh | 5 ++++- perf-changelog.yaml | 5 +++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index a2674153a..a61f4fdd3 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -1,5 +1,5 @@ dsr1-fp4-mi355x-sglang: - image: rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi35x-20250915 + image: lmsysorg/sglang:v0.5.6.post1-rocm700-mi35x model: amd/DeepSeek-R1-0528-MXFP4-Preview model-prefix: dsr1 runner: mi355x diff --git a/benchmarks/dsr1_fp4_mi355x_docker.sh b/benchmarks/dsr1_fp4_mi355x_docker.sh index ca1255802..8b3750a64 100644 --- a/benchmarks/dsr1_fp4_mi355x_docker.sh +++ b/benchmarks/dsr1_fp4_mi355x_docker.sh @@ -11,6 +11,7 @@ # RESULT_FILENAME # NUM_PROMPTS export SGLANG_USE_AITER=1 +export ROCM_QUICK_REDUCE_QUANTIZATION=INT4 PREFILL_SIZE=196608 if [[ "$ISL" == "8192" && "$OSL" == "1024" ]]; then @@ -30,7 +31,9 @@ python3 -m sglang.launch_server --model-path=$MODEL --trust-remote-code \ --disable-radix-cache \ --num-continuous-decode-steps=4 \ --max-prefill-tokens=$PREFILL_SIZE \ ---cuda-graph-max-bs=128 > $SERVER_LOG 2>&1 & +--cuda-graph-max-bs=128 \ +--attention-backend aiter \ +--kv-cache-dtype fp8_e4m3 > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 112145f10..926ac7e1d 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -81,3 +81,8 @@ - Update vLLM image for NVIDIA configs from vLLM 0.11.0 to vLLM 0.11.2 - Adds kv-cache-dtype: fp8 to benchmarks/gptoss_fp4_b200_docker.sh PR: https://github.com/InferenceMAX/InferenceMAX/pull/273 +- config-keys: + - dsr1-fp4-mi355x-sglang + description: | + - Updating MI355x Deepseek-R1 FP4 SGLang Image to upstream v0.5.6.post1 + PR: https://github.com/InferenceMAX/InferenceMAX/pull/330 From c13b7f75330b74d5475a6d6ab9e5d07dc189498f Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Tue, 16 Dec 2025 19:05:56 -0800 Subject: [PATCH 16/19] TOCTOU --- .github/workflows/pr-comment-sweep.yml | 113 +++++++++++++++++++++++-- 1 file changed, 105 insertions(+), 8 deletions(-) diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml index 9e1501d99..dfc502d8d 100644 --- a/.github/workflows/pr-comment-sweep.yml +++ b/.github/workflows/pr-comment-sweep.yml @@ -39,6 +39,9 @@ jobs: outputs: pr-number: ${{ steps.parse.outputs.pr-number || steps.resolve.outputs.pr-number }} generator-args: ${{ steps.parse.outputs.generator-args || steps.resolve.outputs.generator-args }} + author-can-bypass: ${{ steps.auth.outputs.can-bypass }} + # IMPORTANT: immutable ref (commit SHA) to prevent TOCTOU on refs/pull//head + ref: ${{ steps.ref_comment.outputs.ref || steps.ref_other.outputs.ref }} steps: - name: Parse PR comment (/sweep ...) id: parse @@ -65,6 +68,41 @@ jobs: echo "generator-args=$cmd_args" >> "$GITHUB_OUTPUT" echo "pr-number=$PR_NUMBER" >> "$GITHUB_OUTPUT" + - name: Check author permissions (PR comments) + id: auth + if: ${{ github.event_name == 'issue_comment' && github.event.issue.pull_request }} + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + with: + script: | + const owner = context.repo.owner; + const repo = context.repo.repo; + const username = context.payload.comment?.user?.login; + let permission = 'none'; + try { + const res = await github.rest.repos.getCollaboratorPermissionLevel({ owner, repo, username }); + permission = res.data?.permission || 'none'; + } catch (e) { + permission = 'none'; + } + const canBypass = ['admin','maintain','write'].includes(permission); + core.info(`Author ${username} permission: ${permission}; bypass=${canBypass}`); + core.setOutput('can-bypass', canBypass ? 'true' : 'false'); + + # ---- PR SHA pinning (issue_comment path) ---- + - name: Resolve immutable PR ref (pin to head SHA) + id: ref_comment + if: ${{ github.event_name == 'issue_comment' && github.event.issue.pull_request && startsWith(github.event.comment.body, '/sweep') }} + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + with: + script: | + const owner = context.repo.owner; + const repo = context.repo.repo; + const pr = context.issue.number; + const res = await github.rest.pulls.get({ owner, repo, pull_number: pr }); + const sha = res.data.head.sha; + core.info(`Resolved PR #${pr} head SHA: ${sha}`); + core.setOutput('ref', sha); + - name: Reply with run link if: ${{ github.event_name == 'issue_comment' && startsWith(github.event.comment.body, '/sweep') && github.repository_owner == 'InferenceMAX' }} uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 @@ -72,6 +110,9 @@ jobs: env: RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} AUTHOR: ${{ github.event.comment.user.login }} + GEN_CMD: ${{ steps.parse.outputs.generator-args }} + CAN_BYPASS: ${{ steps.auth.outputs.can-bypass }} + PINNED_REF: ${{ steps.ref_comment.outputs.ref }} with: github-token: ${{ github.token }} script: | @@ -80,7 +121,12 @@ jobs: const issue_number = context.issue.number; const runUrl = process.env.RUN_URL; const author = process.env.AUTHOR; - const body = `@${author} thanks! Kicking off a sweep.\n\nRun: ${runUrl}\nApproval: required in environment 'bryan-test'.`; + const genCmd = process.env.GEN_CMD || ''; + const canBypass = (process.env.CAN_BYPASS || '').toLowerCase() === 'true'; + const pinned = process.env.PINNED_REF || ''; + const shortSha = pinned ? pinned.slice(0, 7) : ''; + const approvalMsg = canBypass ? 'Approval: not required (trusted collaborator).' : "Approval: required in environment 'bryan-test'."; + const body = `@${author} thanks! Kicking off a sweep.\n\nRun: ${runUrl}\nCommand: \`${genCmd}\`\nPinned ref: \`${shortSha}\`\n${approvalMsg}`; await github.rest.issues.createComment({ owner, repo, issue_number, body }); - name: Find PR for this branch (if any) @@ -116,6 +162,30 @@ jobs: echo "pr-number=$pr_number" >> "$GITHUB_OUTPUT" echo "generator-args=$gen_args" >> "$GITHUB_OUTPUT" + # ---- Immutable ref for push/dispatch: + # If this is tied to an open PR, pin to PR head SHA; else use the pushed commit SHA. + - name: Resolve immutable ref (pin PR to head SHA; else use event SHA) + id: ref_other + if: ${{ github.event_name != 'issue_comment' }} + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + env: + PR_NUMBER: ${{ steps.resolve.outputs.pr-number }} + with: + script: | + const owner = context.repo.owner; + const repo = context.repo.repo; + const prStr = process.env.PR_NUMBER || ''; + if (prStr) { + const pr = Number(prStr); + const res = await github.rest.pulls.get({ owner, repo, pull_number: pr }); + const sha = res.data.head.sha; + core.info(`Resolved PR #${pr} head SHA: ${sha}`); + core.setOutput('ref', sha); + } else { + core.info(`No PR detected; using event SHA: ${context.sha}`); + core.setOutput('ref', context.sha); + } + - name: Reply with run link (manual trigger) if: ${{ github.event_name == 'workflow_dispatch' && steps.resolve.outputs.pr-number != '' && github.repository_owner == 'InferenceMAX' }} uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 @@ -124,6 +194,8 @@ jobs: RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} AUTHOR: ${{ github.actor }} ISSUE_NUMBER: ${{ steps.resolve.outputs.pr-number }} + GEN_CMD: ${{ steps.resolve.outputs.generator-args }} + PINNED_REF: ${{ steps.ref_other.outputs.ref }} with: github-token: ${{ github.token }} script: | @@ -132,7 +204,10 @@ jobs: const issue_number = process.env.ISSUE_NUMBER; const runUrl = process.env.RUN_URL; const author = process.env.AUTHOR; - const body = `@${author} triggered a manual sweep.\n\nRun: ${runUrl}\n(Manual run on branch ${context.ref})`; + const genCmd = process.env.GEN_CMD || ''; + const pinned = process.env.PINNED_REF || ''; + const shortSha = pinned ? pinned.slice(0, 7) : ''; + const body = `@${author} triggered a manual sweep.\n\nRun: ${runUrl}\nCommand: \`${genCmd}\`\nPinned ref: \`${shortSha}\`\n(Manual run on branch ${context.ref})`; await github.rest.issues.createComment({ owner, repo, issue_number, body }); - name: Reply with run link (push trigger) @@ -143,6 +218,8 @@ jobs: RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} AUTHOR: ${{ github.actor }} ISSUE_NUMBER: ${{ steps.resolve.outputs.pr-number }} + GEN_CMD: ${{ steps.resolve.outputs.generator-args }} + PINNED_REF: ${{ steps.ref_other.outputs.ref }} with: github-token: ${{ github.token }} script: | @@ -151,28 +228,47 @@ jobs: const issue_number = process.env.ISSUE_NUMBER; const runUrl = process.env.RUN_URL; const author = process.env.AUTHOR; - const body = `@${author} pushed changes and triggered a sweep.\n\nRun: ${runUrl}\n(Push on ${context.ref})`; + const genCmd = process.env.GEN_CMD || ''; + const pinned = process.env.PINNED_REF || ''; + const shortSha = pinned ? pinned.slice(0, 7) : ''; + const body = `@${author} pushed changes and triggered a sweep.\n\nRun: ${runUrl}\nCommand: \`${genCmd}\`\nPinned ref: \`${shortSha}\`\n(Push on ${context.ref})`; await github.rest.issues.createComment({ owner, repo, issue_number, body }); approval: needs: get-jobs - if: ${{ (github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '') || (github.event_name == 'push' && needs.get-jobs.outputs.generator-args != '') }} + # Require environment approval when: + # - push events (unchanged), or + # - PR comments from non-trusted authors + if: ${{ (github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' && needs.get-jobs.outputs.author-can-bypass != 'true') || (github.event_name == 'push' && needs.get-jobs.outputs.generator-args != '') }} runs-on: ubuntu-latest name: approval environment: bryan-test steps: - run: echo "approved" + validate-trusted: + needs: [get-jobs] + if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' && needs.get-jobs.outputs.author-can-bypass == 'true' }} + uses: ./.github/workflows/e2e-tests.yml + name: validate (trusted author) + secrets: inherit + with: + generate-cli-command: ${{ needs.get-jobs.outputs.generator-args }} + test-name: PR #${{ needs.get-jobs.outputs.pr-number }} sweep + # Use pinned SHA to prevent TOCTOU on refs/pull//head + ref: ${{ needs.get-jobs.outputs.ref }} + validate: needs: [get-jobs, approval] - if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' && needs.approval.result == 'success' }} + if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' && needs.get-jobs.outputs.author-can-bypass != 'true' && needs.approval.result == 'success' }} uses: ./.github/workflows/e2e-tests.yml name: validate secrets: inherit with: generate-cli-command: ${{ needs.get-jobs.outputs.generator-args }} test-name: PR #${{ needs.get-jobs.outputs.pr-number }} sweep - ref: refs/pull/${{ needs.get-jobs.outputs.pr-number }}/head + # Use pinned SHA to prevent TOCTOU on refs/pull//head + ref: ${{ needs.get-jobs.outputs.ref }} validate-nonpr: needs: [get-jobs, approval] @@ -183,7 +279,8 @@ jobs: with: generate-cli-command: ${{ needs.get-jobs.outputs.generator-args }} test-name: Manual/Push sweep - ref: ${{ needs.get-jobs.outputs.pr-number && format('refs/pull/{0}/head', needs.get-jobs.outputs.pr-number) || '' }} + # For push/dispatch, this is either PR head SHA (if PR found) or event SHA. + ref: ${{ needs.get-jobs.outputs.ref }} note-ignored: # Inform when comment doesn't meet criteria (non-PR or not a /sweep) @@ -191,4 +288,4 @@ jobs: runs-on: ubuntu-latest steps: - run: | - echo "Comment ignored. Either not on a PR or not a /sweep command. For PR comments, runs require environment approval." + echo "Comment ignored. Either not on a PR or not a /sweep command. For PR comments, runs require environment approval." \ No newline at end of file From 80e40d4467599bc2dbca080c1c92d84746d118d5 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Wed, 17 Dec 2025 08:10:52 -0800 Subject: [PATCH 17/19] Test new env --- .github/workflows/pr-comment-sweep.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml index dfc502d8d..ae3c0e52f 100644 --- a/.github/workflows/pr-comment-sweep.yml +++ b/.github/workflows/pr-comment-sweep.yml @@ -125,7 +125,7 @@ jobs: const canBypass = (process.env.CAN_BYPASS || '').toLowerCase() === 'true'; const pinned = process.env.PINNED_REF || ''; const shortSha = pinned ? pinned.slice(0, 7) : ''; - const approvalMsg = canBypass ? 'Approval: not required (trusted collaborator).' : "Approval: required in environment 'bryan-test'."; + const approvalMsg = canBypass ? 'Approval: not required (trusted collaborator).' : "Approval: required in environment 'Outside Collaborator E2E Test'."; const body = `@${author} thanks! Kicking off a sweep.\n\nRun: ${runUrl}\nCommand: \`${genCmd}\`\nPinned ref: \`${shortSha}\`\n${approvalMsg}`; await github.rest.issues.createComment({ owner, repo, issue_number, body }); @@ -242,7 +242,7 @@ jobs: if: ${{ (github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' && needs.get-jobs.outputs.author-can-bypass != 'true') || (github.event_name == 'push' && needs.get-jobs.outputs.generator-args != '') }} runs-on: ubuntu-latest name: approval - environment: bryan-test + environment: Outside Collaborator E2E Test steps: - run: echo "approved" From 724c37015aa54eb5438af416dfcf5fb788c80d77 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Wed, 17 Dec 2025 08:53:48 -0800 Subject: [PATCH 18/19] Ready for merge --- .github/workflows/pr-comment-sweep.yml | 165 ++----------------------- 1 file changed, 10 insertions(+), 155 deletions(-) diff --git a/.github/workflows/pr-comment-sweep.yml b/.github/workflows/pr-comment-sweep.yml index ae3c0e52f..a66b4dc7a 100644 --- a/.github/workflows/pr-comment-sweep.yml +++ b/.github/workflows/pr-comment-sweep.yml @@ -2,25 +2,8 @@ name: Slash Command Sweep run-name: "Validate PR #${{ github.event.issue.number }}" on: - # PR comment trigger issue_comment: types: [created] - # Manual trigger - workflow_dispatch: - inputs: - pr-number: - description: PR number to checkout (refs/pull//head) - required: false - type: string - generator-args: - description: Args passed to generate_sweep_configs.py (omit /sweep) - required: false - type: string - # Push-based example/testing - push: - branches-ignore: - - main - - master concurrency: group: "PR#${{ github.event.issue.number || github.ref_name }}" @@ -33,15 +16,15 @@ permissions: jobs: get-jobs: - # Skip for PR comments that are not /sweep; run for all other triggers - if: ${{ github.event_name != 'issue_comment' || (github.event.issue.pull_request && startsWith(github.event.comment.body, '/sweep')) }} + # Only run for PR comments that start with /sweep + if: ${{ github.event.issue.pull_request && startsWith(github.event.comment.body, '/sweep') }} runs-on: ubuntu-latest outputs: - pr-number: ${{ steps.parse.outputs.pr-number || steps.resolve.outputs.pr-number }} - generator-args: ${{ steps.parse.outputs.generator-args || steps.resolve.outputs.generator-args }} + pr-number: ${{ steps.parse.outputs.pr-number }} + generator-args: ${{ steps.parse.outputs.generator-args }} author-can-bypass: ${{ steps.auth.outputs.can-bypass }} - # IMPORTANT: immutable ref (commit SHA) to prevent TOCTOU on refs/pull//head - ref: ${{ steps.ref_comment.outputs.ref || steps.ref_other.outputs.ref }} + # Immutable ref (commit SHA) to prevent TOCTOU on refs/pull//head + ref: ${{ steps.ref_comment.outputs.ref }} steps: - name: Parse PR comment (/sweep ...) id: parse @@ -68,7 +51,7 @@ jobs: echo "generator-args=$cmd_args" >> "$GITHUB_OUTPUT" echo "pr-number=$PR_NUMBER" >> "$GITHUB_OUTPUT" - - name: Check author permissions (PR comments) + - name: Check author permissions id: auth if: ${{ github.event_name == 'issue_comment' && github.event.issue.pull_request }} uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 @@ -88,7 +71,7 @@ jobs: core.info(`Author ${username} permission: ${permission}; bypass=${canBypass}`); core.setOutput('can-bypass', canBypass ? 'true' : 'false'); - # ---- PR SHA pinning (issue_comment path) ---- + # ---- PR SHA pinning ---- - name: Resolve immutable PR ref (pin to head SHA) id: ref_comment if: ${{ github.event_name == 'issue_comment' && github.event.issue.pull_request && startsWith(github.event.comment.body, '/sweep') }} @@ -126,120 +109,12 @@ jobs: const pinned = process.env.PINNED_REF || ''; const shortSha = pinned ? pinned.slice(0, 7) : ''; const approvalMsg = canBypass ? 'Approval: not required (trusted collaborator).' : "Approval: required in environment 'Outside Collaborator E2E Test'."; - const body = `@${author} thanks! Kicking off a sweep.\n\nRun: ${runUrl}\nCommand: \`${genCmd}\`\nPinned ref: \`${shortSha}\`\n${approvalMsg}`; - await github.rest.issues.createComment({ owner, repo, issue_number, body }); - - - name: Find PR for this branch (if any) - id: find - if: ${{ github.event_name != 'issue_comment' }} - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 - with: - script: | - const owner = context.repo.owner; - const repo = context.repo.repo; - const branch = context.ref.replace('refs/heads/', ''); - const res = await github.rest.pulls.list({ owner, repo, state: 'open', head: `${owner}:${branch}` }); - const num = res.data[0]?.number ? String(res.data[0].number) : ''; - core.setOutput('pr-number', num); - - - name: Prepare inputs (push/dispatch) - id: resolve - if: ${{ github.event_name != 'issue_comment' }} - shell: bash - env: - DISPATCH_PR: ${{ github.event.inputs.pr-number }} - DISPATCH_ARGS: ${{ github.event.inputs.generator-args }} - run: | - set -euo pipefail - pr_from_branch='${{ steps.find.outputs.pr-number }}' - pr_number="${DISPATCH_PR:-}"; if [[ -z "$pr_number" ]]; then pr_number="$pr_from_branch"; fi - gen_args="${DISPATCH_ARGS:-}" - if [[ -z "$gen_args" ]]; then - gen_args='full-sweep --single-node --runner-type h200 --model-prefix dsr1 --seq-lens 1k1k --max-conc 4' - fi - echo "Resolved PR: $pr_number"; - echo "Using generator args: $gen_args"; - echo "pr-number=$pr_number" >> "$GITHUB_OUTPUT" - echo "generator-args=$gen_args" >> "$GITHUB_OUTPUT" - - # ---- Immutable ref for push/dispatch: - # If this is tied to an open PR, pin to PR head SHA; else use the pushed commit SHA. - - name: Resolve immutable ref (pin PR to head SHA; else use event SHA) - id: ref_other - if: ${{ github.event_name != 'issue_comment' }} - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 - env: - PR_NUMBER: ${{ steps.resolve.outputs.pr-number }} - with: - script: | - const owner = context.repo.owner; - const repo = context.repo.repo; - const prStr = process.env.PR_NUMBER || ''; - if (prStr) { - const pr = Number(prStr); - const res = await github.rest.pulls.get({ owner, repo, pull_number: pr }); - const sha = res.data.head.sha; - core.info(`Resolved PR #${pr} head SHA: ${sha}`); - core.setOutput('ref', sha); - } else { - core.info(`No PR detected; using event SHA: ${context.sha}`); - core.setOutput('ref', context.sha); - } - - - name: Reply with run link (manual trigger) - if: ${{ github.event_name == 'workflow_dispatch' && steps.resolve.outputs.pr-number != '' && github.repository_owner == 'InferenceMAX' }} - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 - continue-on-error: true - env: - RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - AUTHOR: ${{ github.actor }} - ISSUE_NUMBER: ${{ steps.resolve.outputs.pr-number }} - GEN_CMD: ${{ steps.resolve.outputs.generator-args }} - PINNED_REF: ${{ steps.ref_other.outputs.ref }} - with: - github-token: ${{ github.token }} - script: | - const owner = context.repo.owner; - const repo = context.repo.repo; - const issue_number = process.env.ISSUE_NUMBER; - const runUrl = process.env.RUN_URL; - const author = process.env.AUTHOR; - const genCmd = process.env.GEN_CMD || ''; - const pinned = process.env.PINNED_REF || ''; - const shortSha = pinned ? pinned.slice(0, 7) : ''; - const body = `@${author} triggered a manual sweep.\n\nRun: ${runUrl}\nCommand: \`${genCmd}\`\nPinned ref: \`${shortSha}\`\n(Manual run on branch ${context.ref})`; - await github.rest.issues.createComment({ owner, repo, issue_number, body }); - - - name: Reply with run link (push trigger) - if: ${{ github.event_name == 'push' && steps.resolve.outputs.pr-number != '' && github.repository_owner == 'InferenceMAX' }} - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 - continue-on-error: true - env: - RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - AUTHOR: ${{ github.actor }} - ISSUE_NUMBER: ${{ steps.resolve.outputs.pr-number }} - GEN_CMD: ${{ steps.resolve.outputs.generator-args }} - PINNED_REF: ${{ steps.ref_other.outputs.ref }} - with: - github-token: ${{ github.token }} - script: | - const owner = context.repo.owner; - const repo = context.repo.repo; - const issue_number = process.env.ISSUE_NUMBER; - const runUrl = process.env.RUN_URL; - const author = process.env.AUTHOR; - const genCmd = process.env.GEN_CMD || ''; - const pinned = process.env.PINNED_REF || ''; - const shortSha = pinned ? pinned.slice(0, 7) : ''; - const body = `@${author} pushed changes and triggered a sweep.\n\nRun: ${runUrl}\nCommand: \`${genCmd}\`\nPinned ref: \`${shortSha}\`\n(Push on ${context.ref})`; + const body = `@${author} Kicking off a sweep.\n\nRun: ${runUrl}\nCommand: \`${genCmd}\`\nPinned ref: \`${shortSha}\`\n${approvalMsg}`; await github.rest.issues.createComment({ owner, repo, issue_number, body }); approval: needs: get-jobs - # Require environment approval when: - # - push events (unchanged), or - # - PR comments from non-trusted authors - if: ${{ (github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' && needs.get-jobs.outputs.author-can-bypass != 'true') || (github.event_name == 'push' && needs.get-jobs.outputs.generator-args != '') }} + if: ${{ github.event_name == 'issue_comment' && needs.get-jobs.outputs.pr-number != '' && needs.get-jobs.outputs.generator-args != '' && needs.get-jobs.outputs.author-can-bypass != 'true' }} runs-on: ubuntu-latest name: approval environment: Outside Collaborator E2E Test @@ -269,23 +144,3 @@ jobs: test-name: PR #${{ needs.get-jobs.outputs.pr-number }} sweep # Use pinned SHA to prevent TOCTOU on refs/pull//head ref: ${{ needs.get-jobs.outputs.ref }} - - validate-nonpr: - needs: [get-jobs, approval] - if: ${{ needs.get-jobs.outputs.generator-args != '' && ((github.event_name == 'push' && needs.approval.result == 'success') || github.event_name == 'workflow_dispatch') }} - uses: ./.github/workflows/e2e-tests.yml - name: validate (manual/push) - secrets: inherit - with: - generate-cli-command: ${{ needs.get-jobs.outputs.generator-args }} - test-name: Manual/Push sweep - # For push/dispatch, this is either PR head SHA (if PR found) or event SHA. - ref: ${{ needs.get-jobs.outputs.ref }} - - note-ignored: - # Inform when comment doesn't meet criteria (non-PR or not a /sweep) - if: ${{ github.event_name == 'issue_comment' && (!github.event.issue.pull_request || !startsWith(github.event.comment.body, '/sweep')) }} - runs-on: ubuntu-latest - steps: - - run: | - echo "Comment ignored. Either not on a PR or not a /sweep command. For PR comments, runs require environment approval." \ No newline at end of file From a397f9a983c9322e2f5da08bf2e99ff05ccd18fc Mon Sep 17 00:00:00 2001 From: Ankur Singh Date: Wed, 17 Dec 2025 07:55:29 -0800 Subject: [PATCH 19/19] Add benchmark script for GPTOSS FP4 B200 TRT-LLM (#256) * Add benchmark script for GPTOSS FP4 B200 TRT-LLM * make changes to perf changelog --------- Co-authored-by: Cameron Quilici --- benchmarks/gptoss_fp4_b200_trt_docker.sh | 90 ++++++++++++++++++++++++ perf-changelog.yaml | 5 ++ 2 files changed, 95 insertions(+) create mode 100644 benchmarks/gptoss_fp4_b200_trt_docker.sh diff --git a/benchmarks/gptoss_fp4_b200_trt_docker.sh b/benchmarks/gptoss_fp4_b200_trt_docker.sh new file mode 100644 index 000000000..1f5fbe868 --- /dev/null +++ b/benchmarks/gptoss_fp4_b200_trt_docker.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash + +# === Required Env Vars === +# MODEL +# PORT +# TP +# EP_SIZE +# DP_ATTENTION +# CONC +# ISL +# OSL +# MAX_MODEL_LEN +# RANDOM_RANGE_RATIO +# NUM_PROMPTS +# RESULT_FILENAME + +SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log) + +# GPTOSS TRTLLM Deployment Guide: +# https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/deployment-guide/quick-start-recipe-for-gpt-oss-on-trtllm.md + +MOE_BACKEND="TRTLLM" +echo "MOE_BACKEND set to '$MOE_BACKEND'" + +EXTRA_CONFIG_FILE="gptoss-fp4.yml" +export TRTLLM_ENABLE_PDL=1 +export NCCL_GRAPH_REGISTER=0 + +cat > $EXTRA_CONFIG_FILE << EOF +cuda_graph_config: + enable_padding: true + max_batch_size: $CONC +enable_attention_dp: $DP_ATTENTION +kv_cache_config: + dtype: fp8 + enable_block_reuse: false + free_gpu_memory_fraction: 0.85 +print_iter_log: true +stream_interval: 20 +num_postprocess_workers: 4 +moe_config: + backend: $MOE_BACKEND +EOF + +if [[ "$DP_ATTENTION" == "true" ]]; then + cat << EOF >> $EXTRA_CONFIG_FILE +attention_dp_config: + enable_balance: true +EOF +fi + +echo "Generated config file contents:" +cat $EXTRA_CONFIG_FILE + +set -x + +MAX_NUM_TOKENS=20000 + +# Launch TRT-LLM server +mpirun -n 1 --oversubscribe --allow-run-as-root \ + trtllm-serve $MODEL --port=$PORT \ + --trust_remote_code \ + --backend=pytorch \ + --max_batch_size 512 \ + --max_seq_len=$MAX_MODEL_LEN \ + --max_num_tokens=$MAX_NUM_TOKENS \ + --tp_size=$TP --ep_size=$EP_SIZE \ + --extra_llm_api_options=$EXTRA_CONFIG_FILE > $SERVER_LOG 2>&1 & + +SERVER_PID=$! + +# Source benchmark utilities +source "$(dirname "$0")/benchmark_lib.sh" + +# Wait for server to be ready +wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID" + +pip install -q datasets pandas + +run_benchmark_serving \ + --model "$MODEL" \ + --port "$PORT" \ + --backend openai \ + --input-len "$ISL" \ + --output-len "$OSL" \ + --random-range-ratio "$RANDOM_RANGE_RATIO" \ + --num-prompts "$NUM_PROMPTS" \ + --max-concurrency "$CONC" \ + --result-filename "$RESULT_FILENAME" \ + --result-dir /workspace/ \ No newline at end of file diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 926ac7e1d..995c0a684 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -86,3 +86,8 @@ description: | - Updating MI355x Deepseek-R1 FP4 SGLang Image to upstream v0.5.6.post1 PR: https://github.com/InferenceMAX/InferenceMAX/pull/330 +- config-keys: + - gptoss-fp4-b200-trt + description: | + - Add benchmark script for GPTOSS FP4 B200 TRT-LLM + PR: https://github.com/InferenceMAX/InferenceMAX/pull/256