Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
d556b88
add trt init for 70b
kedarpotdar-nv Aug 28, 2025
426f48e
remove dsr1 and add $MAX_MODEL_LEN to launch configs
kedarpotdar-nv Aug 28, 2025
12a7f6e
remove b200 tg
kedarpotdar-nv Aug 28, 2025
0fc8ab4
add RUNNER LABEL and temporarily remove bmk-b200?
kedarpotdar-nv Aug 28, 2025
4b30c03
fix per kimbo's suggestion
kedarpotdar-nv Aug 28, 2025
aab2320
revert local runner var
kedarpotdar-nv Aug 28, 2025
0c5ad16
update sqsh file name to include runner name. i.e. trt
kedarpotdar-nv Aug 28, 2025
7487baa
temporarily remove other benchmarks. only keep bmk-b200-trt
kedarpotdar-nv Aug 28, 2025
1233b53
refactor scheduler to add trt tag, update ngc image address , update …
kedarpotdar-nv Aug 28, 2025
7800006
refactor trt into separate yml
kedarpotdar-nv Aug 28, 2025
43057dd
fix file name
kedarpotdar-nv Aug 28, 2025
a94fbd0
comment vllm for now
kedarpotdar-nv Aug 28, 2025
0225b10
update port in trtllm-serve
kedarpotdar-nv Aug 28, 2025
1e594f3
update artifact name to have runner name at end
kedarpotdar-nv Aug 28, 2025
f63768c
update plot function with b200-trt
kedarpotdar-nv Aug 29, 2025
ed20d23
add h200 trt
kedarpotdar-nv Aug 29, 2025
25566a9
fix launch slurm script based on runner label
kedarpotdar-nv Aug 29, 2025
d33cda5
better identify if result is vllm or trt
kedarpotdar-nv Aug 29, 2025
de2d8de
clarify runners for trt and vllm
kedarpotdar-nv Aug 29, 2025
80dc11d
fix runner names
kedarpotdar-nv Aug 29, 2025
3cf357b
remove trt runners
kedarpotdar-nv Aug 29, 2025
9d7cbd3
ensure trt runners are correctly tagged
kedarpotdar-nv Aug 29, 2025
a2ed19c
rename launch scripts
kedarpotdar-nv Aug 29, 2025
fd1ff2e
only get latest run id
kedarpotdar-nv Aug 29, 2025
63d11bf
update trtllm image version
kedarpotdar-nv Aug 29, 2025
85a6e51
img ids
kedarpotdar-nv Aug 29, 2025
6c8af51
add fw identifier to benchmark template
kedarpotdar-nv Aug 29, 2025
9946fb8
limit concurrency for now
kedarpotdar-nv Aug 29, 2025
c59bcde
fp4 test with trt
kedarpotdar-nv Aug 29, 2025
30697f4
update result processing logic
kedarpotdar-nv Aug 29, 2025
8fed2d0
remove fp4 from h200!
kedarpotdar-nv Aug 29, 2025
631c007
fp4 processing logic
kedarpotdar-nv Aug 29, 2025
9e9df07
remove restrictions
kedarpotdar-nv Aug 29, 2025
b3855ed
merge trt into mainline 70b
kedarpotdar-nv Aug 29, 2025
374e374
add back all runners
kedarpotdar-nv Aug 29, 2025
c79ee8f
ensure trt results are collected
kedarpotdar-nv Aug 29, 2025
ca1baae
update plot function to include trt, remove 70b-trt tmpl. yml, tempor…
kedarpotdar-nv Aug 29, 2025
483ad63
fix errors!
kedarpotdar-nv Aug 29, 2025
4b854c6
fix vllm launch cmd
kedarpotdar-nv Aug 29, 2025
2caa5e5
rollback vllm changes
kedarpotdar-nv Aug 29, 2025
96f04b6
update plotter to ensure b200 , h200 and h100 are correct colors
kedarpotdar-nv Aug 29, 2025
71e8088
minor bug fix
kedarpotdar-nv Aug 29, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 97 additions & 44 deletions .github/workflows/70b-tmpl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,37 +30,37 @@ jobs:
- name: Find the latest Docker image
run: echo "Hardcoding image tags for now."

bmk-h100:
needs: find-latest-image
uses: ./.github/workflows/benchmark-tmpl.yml
secrets: inherit
with:
exp-name: ${{ inputs.exp-name }}
isl: ${{ inputs.isl }}
osl: ${{ inputs.osl }}
max-model-len: ${{ inputs.max-model-len }}
random-range-ratio: ${{ inputs.random-range-ratio }}
runner: h100
image: 'kedarpotdar147/vllm0.1:latest'
model: 'nvidia/Llama-3.1-70B-Instruct-FP8'
tp-list: '[2, 4, 8]'
timeout: ${{ inputs.timeout }}
# bmk-h100:
# needs: find-latest-image
# uses: ./.github/workflows/benchmark-tmpl.yml
# secrets: inherit
# with:
# exp-name: ${{ inputs.exp-name }}
# isl: ${{ inputs.isl }}
# osl: ${{ inputs.osl }}
# max-model-len: ${{ inputs.max-model-len }}
# random-range-ratio: ${{ inputs.random-range-ratio }}
# runner: h100
# image: 'kedarpotdar147/vllm0.1:latest'
# model: 'nvidia/Llama-3.1-70B-Instruct-FP8'
# tp-list: '[2]'
# timeout: ${{ inputs.timeout }}

bmk-h200:
needs: find-latest-image
uses: ./.github/workflows/benchmark-tmpl.yml
secrets: inherit
with:
exp-name: ${{ inputs.exp-name }}
isl: ${{ inputs.isl }}
osl: ${{ inputs.osl }}
max-model-len: ${{ inputs.max-model-len }}
random-range-ratio: ${{ inputs.random-range-ratio }}
runner: h200
image: 'kedarpotdar147/vllm0.1:latest'
model: 'nvidia/Llama-3.1-70B-Instruct-FP8'
tp-list: '[1, 2, 4, 8]'
timeout: ${{ inputs.timeout }}
# bmk-h200:
# needs: find-latest-image
# uses: ./.github/workflows/benchmark-tmpl.yml
# secrets: inherit
# with:
# exp-name: ${{ inputs.exp-name }}
# isl: ${{ inputs.isl }}
# osl: ${{ inputs.osl }}
# max-model-len: ${{ inputs.max-model-len }}
# random-range-ratio: ${{ inputs.random-range-ratio }}
# runner: h200
# image: 'kedarpotdar147/vllm0.1:latest'
# model: 'nvidia/Llama-3.1-70B-Instruct-FP8'
# tp-list: '[2]'
# timeout: ${{ inputs.timeout }}

bmk-b200:
needs: find-latest-image
Expand All @@ -75,43 +75,96 @@ jobs:
runner: b200
image: 'kedarpotdar147/vllm0.1:latest'
model: 'nvidia/Llama-3.1-70B-Instruct-FP8'
tp-list: '[1, 2, 4, 8]'
tp-list: '[2]'
timeout: ${{ inputs.timeout }}

bmk-mi300x:
# bmk-mi300x:
# needs: find-latest-image
# uses: ./.github/workflows/benchmark-tmpl.yml
# secrets: inherit
# with:
# exp-name: ${{ inputs.exp-name }}
# isl: ${{ inputs.isl }}
# osl: ${{ inputs.osl }}
# max-model-len: ${{ inputs.max-model-len }}
# random-range-ratio: ${{ inputs.random-range-ratio }}
# runner: mi300x
# image: 'rocm/vllm-dev:nightly_official_0729_rc1_20250718'
# model: 'amd/Llama-3.1-70B-Instruct-FP8-KV'
# tp-list: '[1, 2, 4, 8]'
# timeout: ${{ inputs.timeout }}

# bmk-mi325x:
# needs: find-latest-image
# uses: ./.github/workflows/benchmark-tmpl.yml
# secrets: inherit
# with:
# exp-name: ${{ inputs.exp-name }}
# isl: ${{ inputs.isl }}
# osl: ${{ inputs.osl }}
# max-model-len: ${{ inputs.max-model-len }}
# random-range-ratio: ${{ inputs.random-range-ratio }}
# runner: mi325x
# image: 'rocm/vllm-dev:nightly_official_0729_rc1_20250718'
# model: 'amd/Llama-3.1-70B-Instruct-FP8-KV'
# tp-list: '[1, 2, 4, 8]'
# timeout: ${{ inputs.timeout }}

# TRT-LLM jobs
# bmk-b200-trt:
# needs: find-latest-image
# uses: ./.github/workflows/benchmark-tmpl.yml
# secrets: inherit
# with:
# exp-name: 70b-trt
# isl: ${{ inputs.isl }}
# osl: ${{ inputs.osl }}
# max-model-len: ${{ inputs.max-model-len }}
# random-range-ratio: ${{ inputs.random-range-ratio }}
# runner: b200
# image: 'nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc1'
# model: 'nvidia/Llama-3.3-70B-Instruct-FP8'
# tp-list: '[2]'
# precision: 'fp8'
# timeout: ${{ inputs.timeout }}

bmk-h200-trt:
needs: find-latest-image
uses: ./.github/workflows/benchmark-tmpl.yml
secrets: inherit
with:
exp-name: ${{ inputs.exp-name }}
exp-name: 70b-trt
isl: ${{ inputs.isl }}
osl: ${{ inputs.osl }}
max-model-len: ${{ inputs.max-model-len }}
random-range-ratio: ${{ inputs.random-range-ratio }}
runner: mi300x
image: 'rocm/vllm-dev:nightly_official_0729_rc1_20250718'
model: 'amd/Llama-3.1-70B-Instruct-FP8-KV'
tp-list: '[1, 2, 4, 8]'
runner: h200
image: 'nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc1'
model: 'nvidia/Llama-3.3-70B-Instruct-FP8'
tp-list: '[2]'
precision: 'fp8'
timeout: ${{ inputs.timeout }}

bmk-mi325x:
bmk-b200-trt-fp4:
needs: find-latest-image
uses: ./.github/workflows/benchmark-tmpl.yml
secrets: inherit
with:
exp-name: ${{ inputs.exp-name }}
exp-name: 70b-trt
isl: ${{ inputs.isl }}
osl: ${{ inputs.osl }}
max-model-len: ${{ inputs.max-model-len }}
random-range-ratio: ${{ inputs.random-range-ratio }}
runner: mi325x
image: 'rocm/vllm-dev:nightly_official_0729_rc1_20250718'
model: 'amd/Llama-3.1-70B-Instruct-FP8-KV'
tp-list: '[1, 2, 4, 8]'
runner: b200
image: 'nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc1'
model: 'nvidia/Llama-3.3-70B-Instruct-FP4'
tp-list: '[2]'
precision: 'fp4'
timeout: ${{ inputs.timeout }}


collect-results:
needs: [bmk-h100, bmk-h200, bmk-b200, bmk-mi300x, bmk-mi325x]
needs: [bmk-b200, bmk-h200-trt, bmk-b200-trt-fp4]
if: ${{ always() && !cancelled() }}
uses: ./.github/workflows/collect-results.yml
secrets: inherit
Expand Down
28 changes: 23 additions & 5 deletions .github/workflows/benchmark-tmpl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ on:
tp-list:
required: true
type: string
precision:
required: false
type: string
default: 'fp8'
timeout:
required: true
type: number
Expand All @@ -43,6 +47,8 @@ env:
MAX_MODEL_LEN: ${{ inputs.max-model-len }}
RANDOM_RANGE_RATIO: ${{ inputs.random-range-ratio }}
IMAGE: ${{ inputs.image }}
RUNNER_LABEL: ${{ inputs.runner }}
PRECISION: ${{ inputs.precision }}

jobs:
benchmark:
Expand All @@ -53,7 +59,7 @@ jobs:
fail-fast: false
matrix:
tp: ${{ fromJson(inputs.tp-list) }}
conc: [4, 8, 16, 32, 64]
conc: [4]
name: '${{ inputs.runner }} (tp${{ matrix.tp }} , conc${{ matrix.conc }})'

env:
Expand All @@ -68,7 +74,7 @@ jobs:

- name: Set result filename
run: |
RESULT_FILENAME=${{ env.EXP_NAME }}_tp${{ env.TP }}_conc${{ env.CONC }}_${{ runner.name }}
RESULT_FILENAME=${{ env.EXP_NAME }}_tp${{ env.TP }}_conc${{ env.CONC }}_${{ inputs.runner }}
echo "RESULT_FILENAME=${RESULT_FILENAME}" >> $GITHUB_ENV

- name: Launch job script
Expand All @@ -77,10 +83,22 @@ jobs:
bash ./runners/launch_${RUNNER_NAME%%_*}.sh ${{ inputs.exp-name }}

- name: Process result
run: python3 utils/process_result.py ${{ inputs.runner }} ${{ env.TP }} ${{ env.RESULT_FILENAME }}
run: |
RESULT_FILENAME=${{ env.EXP_NAME }}_tp${{ env.TP }}_conc${{ env.CONC }}_${{ inputs.runner }}
# Determine framework based on image
if [[ "${{ inputs.image }}" == *"tensorrt-llm"* ]]; then
FRAMEWORK="TRT-LLM"
elif [[ "${{ inputs.image }}" == *"vllm"* ]]; then
FRAMEWORK="vLLM"
elif [[ "${{ inputs.image }}" == *"sglang"* ]]; then
FRAMEWORK="SGLang"
else
FRAMEWORK="${{ inputs.runner }}"
fi
python3 utils/process_result.py $FRAMEWORK ${{ env.TP }} $RESULT_FILENAME ${{ env.PRECISION }}

- name: Upload result
uses: actions/upload-artifact@v4
with:
name: ${{ env.RESULT_FILENAME }}
path: agg_${{ env.RESULT_FILENAME }}.json
name: ${{ env.EXP_NAME }}_tp${{ env.TP }}_conc${{ env.CONC }}_${{ env.PRECISION }}_${{ runner.name }}
path: agg_${{ env.EXP_NAME }}_tp${{ env.TP }}_conc${{ env.CONC }}_${{ inputs.runner }}.json
4 changes: 2 additions & 2 deletions .github/workflows/cluster-cleanup.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
- 'h200-nv_2'
- 'h200-nv_3'
- 'b200-nv_0'
- 'b200-nv_1'
- 'b200-nv_1'
- 'mi325x-tw_0'
- 'mi325x-tw_1'
- 'mi325x-tw_2'
Expand All @@ -47,7 +47,7 @@ jobs:
runner:
- 'h100-cr_0'
- 'h100-cr_1'
- 'b200-tg_0'
# - 'b200-tg_0'
- 'mi300x-cr_0'
- 'mi300x-amd_0'
- 'mi300x-amd_1'
Expand Down
8 changes: 7 additions & 1 deletion .github/workflows/collect-results.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,13 @@ jobs:
uses: actions/download-artifact@v4
with:
path: results/
pattern: ${{ inputs.exp-name }}_*
pattern: ${{ inputs.exp-name }}*

- name: Download TRT artifacts
uses: actions/download-artifact@v4
with:
path: results/
pattern: 70b-trt*

- name: Print summary
run: python3 utils/summarize.py results/ ${{ inputs.exp-name }} >> $GITHUB_STEP_SUMMARY
Expand Down
106 changes: 54 additions & 52 deletions .github/workflows/workflow-scheduler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,59 +23,61 @@ jobs:
osl: 1024
max-model-len: 2048
random-range-ratio: 0.8

dsr1-1k1k:
needs: cleanup
uses: ./.github/workflows/dsr1-tmpl.yml
secrets: inherit
with:
exp-name: 'dsr1_1k1k'
isl: 1024
osl: 1024
max-model-len: 2048
random-range-ratio: 0.8

_70b-8k1k:
needs: cleanup
uses: ./.github/workflows/70b-tmpl.yml
secrets: inherit
with:
exp-name: '70b_8k1k'
isl: 8192
osl: 1024
max-model-len: 9216
random-range-ratio: 0.8

# dsr1-1k1k:
# needs: cleanup
# uses: ./.github/workflows/dsr1-tmpl.yml
# secrets: inherit
# with:
# exp-name: 'dsr1_1k1k'
# isl: 1024
# osl: 1024
# max-model-len: 2048
# random-range-ratio: 0.8

# _70b-8k1k:
# needs: cleanup
# uses: ./.github/workflows/70b-tmpl.yml
# secrets: inherit
# with:
# exp-name: '70b_8k1k'
# isl: 8192
# osl: 1024
# max-model-len: 9216
# random-range-ratio: 0.8


dsr1-8k1k:
needs: cleanup
uses: ./.github/workflows/dsr1-tmpl.yml
secrets: inherit
with:
exp-name: 'dsr1_8k1k'
isl: 8192
osl: 1024
max-model-len: 9216
random-range-ratio: 0.8
# dsr1-8k1k:
# needs: cleanup
# uses: ./.github/workflows/dsr1-tmpl.yml
# secrets: inherit
# with:
# exp-name: 'dsr1_8k1k'
# isl: 8192
# osl: 1024
# max-model-len: 9216
# random-range-ratio: 0.8

_70b-1k8k:
needs: cleanup
uses: ./.github/workflows/70b-tmpl.yml
secrets: inherit
with:
exp-name: '70b_1k8k'
isl: 1024
osl: 8192
max-model-len: 9216
random-range-ratio: 0.8
timeout: 240
# _70b-1k8k:
# needs: cleanup
# uses: ./.github/workflows/70b-tmpl.yml
# secrets: inherit
# with:
# exp-name: '70b_1k8k'
# isl: 1024
# osl: 8192
# max-model-len: 9216
# random-range-ratio: 0.8
# timeout: 240

dsr1-1k8k:
needs: cleanup
uses: ./.github/workflows/dsr1-tmpl.yml
secrets: inherit
with:
exp-name: 'dsr1_1k8k'
isl: 1024
osl: 8192
max-model-len: 9216
random-range-ratio: 0.8
# dsr1-1k8k:
# needs: cleanup
# uses: ./.github/workflows/dsr1-tmpl.yml
# secrets: inherit
# with:
# exp-name: 'dsr1_1k8k'
# isl: 1024
# osl: 8192
# max-model-len: 9216
# random-range-ratio: 0.8
Loading