From d556b8862a6c54c6ff393fef4c7719f1e0aa6fab Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 09:58:28 -0700 Subject: [PATCH 01/28] add trt init for 70b --- .github/workflows/70b-tmpl.yml | 18 ++++++++- benchmarks/70b_b200-trt_slurm.sh | 63 ++++++++++++++++++++++++++++++++ runners/launch_b200-trt.sh | 23 ++++++++++++ 3 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 benchmarks/70b_b200-trt_slurm.sh create mode 100644 runners/launch_b200-trt.sh diff --git a/.github/workflows/70b-tmpl.yml b/.github/workflows/70b-tmpl.yml index 23ad88551..d196fc575 100644 --- a/.github/workflows/70b-tmpl.yml +++ b/.github/workflows/70b-tmpl.yml @@ -78,6 +78,22 @@ jobs: tp-list: '[1, 2, 4, 8]' timeout: ${{ inputs.timeout }} + bmk-b200-trt: + needs: find-latest-image + uses: ./.github/workflows/benchmark-tmpl.yml + secrets: inherit + with: + exp-name: ${{ inputs.exp-name }} + isl: ${{ inputs.isl }} + osl: ${{ inputs.osl }} + max-model-len: ${{ inputs.max-model-len }} + random-range-ratio: ${{ inputs.random-range-ratio }} + runner: b200-trt + image: 'nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc0' + model: 'nvidia/Llama-3.3-70B-Instruct-FP8' + tp-list: '[1, 2, 4, 8]' + timeout: ${{ inputs.timeout }} + bmk-mi300x: needs: find-latest-image uses: ./.github/workflows/benchmark-tmpl.yml @@ -111,7 +127,7 @@ jobs: timeout: ${{ inputs.timeout }} collect-results: - needs: [bmk-h100, bmk-h200, bmk-b200, bmk-mi300x, bmk-mi325x] + needs: [bmk-h100, bmk-h200, bmk-b200, bmk-b200-trt, bmk-mi300x, bmk-mi325x] if: ${{ always() && !cancelled() }} uses: ./.github/workflows/collect-results.yml secrets: inherit diff --git a/benchmarks/70b_b200-trt_slurm.sh b/benchmarks/70b_b200-trt_slurm.sh new file mode 100644 index 000000000..0286b9d5c --- /dev/null +++ b/benchmarks/70b_b200-trt_slurm.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash + +# === Required Env Vars === +# HF_TOKEN +# HF_HUB_CACHE +# IMAGE +# MODEL +# ISL +# OSL +# MAX_MODEL_LEN +# RANDOM_RANGE_RATIO +# TP +# CONC +# RESULT_FILENAME +# PORT_OFFSET + +echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" + +set -x +hf download $MODEL +SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log) +PORT=$(( 8888 + $PORT_OFFSET )) + +# Create llama-config.yml inline +cat > llama-config.yml << 'EOF' +enable_attention_dp: false +cuda_graph_config: + enable_padding: true + max_batch_size: 1024 +kv_cache_config: + dtype: fp8 + enable_block_reuse: false +stream_interval: 4 +EOF + +mpirun -n 1 --oversubscribe --allow-run-as-root trtllm-serve $MODEL --tp_size $TP --trust_remote_code --num_postprocess_workers 2 --extra_llm_api_options llama-config.yml > $SERVER_LOG 2>&1 & + +set +x +while IFS= read -r line; do + printf '%s\n' "$line" + if [[ "$line" =~ [Ee][Rr][Rr][Oo][Rr] ]]; then + sleep 5 + tail -n100 $SERVER_LOG + echo "JOB $SLURM_JOB_ID ran on NODE $SLURMD_NODENAME" + exit 1 + fi + if [[ "$line" == *"Application startup complete"* ]]; then + break + fi +done < <(tail -F -n0 "$SERVER_LOG") + +set -x +git clone https://github.com/kimbochen/bench_serving.git +python3 bench_serving/benchmark_serving.py \ +--model $MODEL --backend openai \ +--base-url http://0.0.0.0:$PORT \ +--dataset-name random \ +--random-input-len $ISL --random-output-len $OSL --random-range-ratio $RANDOM_RANGE_RATIO \ +--num-prompts $(( $CONC * 10 )) --max-concurrency $CONC \ +--request-rate inf --ignore-eos \ +--save-result --percentile-metrics 'ttft,tpot,itl,e2el' \ +--result-dir /workspace/ \ +--result-filename $RESULT_FILENAME.json diff --git a/runners/launch_b200-trt.sh b/runners/launch_b200-trt.sh new file mode 100644 index 000000000..ec53ea7c8 --- /dev/null +++ b/runners/launch_b200-trt.sh @@ -0,0 +1,23 @@ +#!/usr/bin/bash + +export HF_HUB_CACHE_MOUNT="/raid/hf_hub_cache/" +export PORT_OFFSET=${USER: -1} + +MODEL_CODE="${1%%_*}" +PARTITION="dgx-b200" +SQUASH_FILE="/raid/image_${MODEL_CODE}_b200-trt.sqsh" + +salloc --partition=$PARTITION --gres=gpu:$TP --exclusive --time=180 --no-shell +JOB_ID=$(squeue -u $USER -h -o %A) + +set -x +srun --jobid=$JOB_ID bash -c "enroot import -o $SQUASH_FILE docker://$IMAGE" +srun --jobid=$JOB_ID \ +--container-image=$SQUASH_FILE \ +--container-mounts=$GITHUB_WORKSPACE:/workspace/,$HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \ +--container-mount-home \ +--container-workdir=/workspace/ \ +--no-container-entrypoint --export=ALL \ +bash benchmarks/${MODEL_CODE}_b200-trt_slurm.sh + +scancel $JOB_ID From 426f48e4ef3cc447bbce2f2ff794ac4517fc149b Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 10:19:09 -0700 Subject: [PATCH 02/28] remove dsr1 and add $MAX_MODEL_LEN to launch configs --- .github/workflows/workflow-scheduler.yml | 60 ++++++++++++------------ benchmarks/70b_b200-trt_slurm.sh | 2 +- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/.github/workflows/workflow-scheduler.yml b/.github/workflows/workflow-scheduler.yml index ce03740fc..c952000d3 100644 --- a/.github/workflows/workflow-scheduler.yml +++ b/.github/workflows/workflow-scheduler.yml @@ -24,16 +24,16 @@ jobs: max-model-len: 2048 random-range-ratio: 0.8 - dsr1-1k1k: - needs: cleanup - uses: ./.github/workflows/dsr1-tmpl.yml - secrets: inherit - with: - exp-name: 'dsr1_1k1k' - isl: 1024 - osl: 1024 - max-model-len: 2048 - random-range-ratio: 0.8 + # dsr1-1k1k: + # needs: cleanup + # uses: ./.github/workflows/dsr1-tmpl.yml + # secrets: inherit + # with: + # exp-name: 'dsr1_1k1k' + # isl: 1024 + # osl: 1024 + # max-model-len: 2048 + # random-range-ratio: 0.8 _70b-8k1k: needs: cleanup @@ -46,16 +46,16 @@ jobs: max-model-len: 9216 random-range-ratio: 0.8 - dsr1-8k1k: - needs: cleanup - uses: ./.github/workflows/dsr1-tmpl.yml - secrets: inherit - with: - exp-name: 'dsr1_8k1k' - isl: 8192 - osl: 1024 - max-model-len: 9216 - random-range-ratio: 0.8 + # dsr1-8k1k: + # needs: cleanup + # uses: ./.github/workflows/dsr1-tmpl.yml + # secrets: inherit + # with: + # exp-name: 'dsr1_8k1k' + # isl: 8192 + # osl: 1024 + # max-model-len: 9216 + # random-range-ratio: 0.8 _70b-1k8k: needs: cleanup @@ -69,13 +69,13 @@ jobs: random-range-ratio: 0.8 timeout: 240 - dsr1-1k8k: - needs: cleanup - uses: ./.github/workflows/dsr1-tmpl.yml - secrets: inherit - with: - exp-name: 'dsr1_1k8k' - isl: 1024 - osl: 8192 - max-model-len: 9216 - random-range-ratio: 0.8 + # dsr1-1k8k: + # needs: cleanup + # uses: ./.github/workflows/dsr1-tmpl.yml + # secrets: inherit + # with: + # exp-name: 'dsr1_1k8k' + # isl: 1024 + # osl: 8192 + # max-model-len: 9216 + # random-range-ratio: 0.8 diff --git a/benchmarks/70b_b200-trt_slurm.sh b/benchmarks/70b_b200-trt_slurm.sh index 0286b9d5c..3540fb9ce 100644 --- a/benchmarks/70b_b200-trt_slurm.sh +++ b/benchmarks/70b_b200-trt_slurm.sh @@ -33,7 +33,7 @@ kv_cache_config: stream_interval: 4 EOF -mpirun -n 1 --oversubscribe --allow-run-as-root trtllm-serve $MODEL --tp_size $TP --trust_remote_code --num_postprocess_workers 2 --extra_llm_api_options llama-config.yml > $SERVER_LOG 2>&1 & +mpirun -n 1 --oversubscribe --allow-run-as-root trtllm-serve $MODEL --tp_size $TP --trust_remote_code --max_seq_len $MAX_MODEL_LEN --max_num_tokens $MAX_MODEL_LEN--num_postprocess_workers 2 --extra_llm_api_options llama-config.yml > $SERVER_LOG 2>&1 & set +x while IFS= read -r line; do From 12a7f6e4a134edc7623ca2851c4f927687333069 Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 10:32:00 -0700 Subject: [PATCH 03/28] remove b200 tg --- .github/workflows/cluster-cleanup.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cluster-cleanup.yml b/.github/workflows/cluster-cleanup.yml index e0f30ae17..a74311d9f 100644 --- a/.github/workflows/cluster-cleanup.yml +++ b/.github/workflows/cluster-cleanup.yml @@ -47,7 +47,7 @@ jobs: runner: - 'h100-cr_0' - 'h100-cr_1' - - 'b200-tg_0' + # - 'b200-tg_0' - 'mi300x-cr_0' - 'mi300x-amd_0' - 'mi300x-amd_1' From 0fc8ab472528a28d1c261cc80e98063eb1c17204 Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 11:21:24 -0700 Subject: [PATCH 04/28] add RUNNER LABEL and temporarily remove bmk-b200? --- .github/workflows/70b-tmpl.yml | 32 ++++++++++++++-------------- .github/workflows/benchmark-tmpl.yml | 1 + benchmarks/70b_b200-trt_slurm.sh | 2 +- runners/launch_b200-nv.sh | 2 +- runners/launch_b200-trt.sh | 23 -------------------- 5 files changed, 19 insertions(+), 41 deletions(-) delete mode 100644 runners/launch_b200-trt.sh diff --git a/.github/workflows/70b-tmpl.yml b/.github/workflows/70b-tmpl.yml index d196fc575..76c062773 100644 --- a/.github/workflows/70b-tmpl.yml +++ b/.github/workflows/70b-tmpl.yml @@ -62,21 +62,21 @@ jobs: tp-list: '[1, 2, 4, 8]' timeout: ${{ inputs.timeout }} - bmk-b200: - needs: find-latest-image - uses: ./.github/workflows/benchmark-tmpl.yml - secrets: inherit - with: - exp-name: ${{ inputs.exp-name }} - isl: ${{ inputs.isl }} - osl: ${{ inputs.osl }} - max-model-len: ${{ inputs.max-model-len }} - random-range-ratio: ${{ inputs.random-range-ratio }} - runner: b200 - image: 'kedarpotdar147/vllm0.1:latest' - model: 'nvidia/Llama-3.1-70B-Instruct-FP8' - tp-list: '[1, 2, 4, 8]' - timeout: ${{ inputs.timeout }} + # bmk-b200: + # needs: find-latest-image + # uses: ./.github/workflows/benchmark-tmpl.yml + # secrets: inherit + # with: + # exp-name: ${{ inputs.exp-name }} + # isl: ${{ inputs.isl }} + # osl: ${{ inputs.osl }} + # max-model-len: ${{ inputs.max-model-len }} + # random-range-ratio: ${{ inputs.random-range-ratio }} + # runner: b200 + # image: 'kedarpotdar147/vllm0.1:latest' + # model: 'nvidia/Llama-3.1-70B-Instruct-FP8' + # tp-list: '[1, 2, 4, 8]' + # timeout: ${{ inputs.timeout }} bmk-b200-trt: needs: find-latest-image @@ -127,7 +127,7 @@ jobs: timeout: ${{ inputs.timeout }} collect-results: - needs: [bmk-h100, bmk-h200, bmk-b200, bmk-b200-trt, bmk-mi300x, bmk-mi325x] + needs: [bmk-h100, bmk-h200, bmk-b200-trt, bmk-mi300x, bmk-mi325x] if: ${{ always() && !cancelled() }} uses: ./.github/workflows/collect-results.yml secrets: inherit diff --git a/.github/workflows/benchmark-tmpl.yml b/.github/workflows/benchmark-tmpl.yml index 7e4e0b708..e9b37cb3a 100644 --- a/.github/workflows/benchmark-tmpl.yml +++ b/.github/workflows/benchmark-tmpl.yml @@ -43,6 +43,7 @@ env: MAX_MODEL_LEN: ${{ inputs.max-model-len }} RANDOM_RANGE_RATIO: ${{ inputs.random-range-ratio }} IMAGE: ${{ inputs.image }} + RUNNER_LABEL: ${{ inputs.runner }} jobs: benchmark: diff --git a/benchmarks/70b_b200-trt_slurm.sh b/benchmarks/70b_b200-trt_slurm.sh index 3540fb9ce..e2461aac6 100644 --- a/benchmarks/70b_b200-trt_slurm.sh +++ b/benchmarks/70b_b200-trt_slurm.sh @@ -33,7 +33,7 @@ kv_cache_config: stream_interval: 4 EOF -mpirun -n 1 --oversubscribe --allow-run-as-root trtllm-serve $MODEL --tp_size $TP --trust_remote_code --max_seq_len $MAX_MODEL_LEN --max_num_tokens $MAX_MODEL_LEN--num_postprocess_workers 2 --extra_llm_api_options llama-config.yml > $SERVER_LOG 2>&1 & +mpirun -n 1 --oversubscribe --allow-run-as-root trtllm-serve $MODEL --tp_size $TP --trust_remote_code --max_seq_len $MAX_MODEL_LEN --max_num_tokens $MAX_MODEL_LEN --num_postprocess_workers 2 --extra_llm_api_options llama-config.yml > $SERVER_LOG 2>&1 & set +x while IFS= read -r line; do diff --git a/runners/launch_b200-nv.sh b/runners/launch_b200-nv.sh index 83f1ec801..5acd79743 100644 --- a/runners/launch_b200-nv.sh +++ b/runners/launch_b200-nv.sh @@ -18,6 +18,6 @@ srun --jobid=$JOB_ID \ --container-mount-home \ --container-workdir=/workspace/ \ --no-container-entrypoint --export=ALL \ -bash benchmarks/${MODEL_CODE}_b200_slurm.sh +bash benchmarks/${MODEL_CODE}_${RUNNER_LABEL}_slurm.sh scancel $JOB_ID diff --git a/runners/launch_b200-trt.sh b/runners/launch_b200-trt.sh deleted file mode 100644 index ec53ea7c8..000000000 --- a/runners/launch_b200-trt.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/bash - -export HF_HUB_CACHE_MOUNT="/raid/hf_hub_cache/" -export PORT_OFFSET=${USER: -1} - -MODEL_CODE="${1%%_*}" -PARTITION="dgx-b200" -SQUASH_FILE="/raid/image_${MODEL_CODE}_b200-trt.sqsh" - -salloc --partition=$PARTITION --gres=gpu:$TP --exclusive --time=180 --no-shell -JOB_ID=$(squeue -u $USER -h -o %A) - -set -x -srun --jobid=$JOB_ID bash -c "enroot import -o $SQUASH_FILE docker://$IMAGE" -srun --jobid=$JOB_ID \ ---container-image=$SQUASH_FILE \ ---container-mounts=$GITHUB_WORKSPACE:/workspace/,$HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \ ---container-mount-home \ ---container-workdir=/workspace/ \ ---no-container-entrypoint --export=ALL \ -bash benchmarks/${MODEL_CODE}_b200-trt_slurm.sh - -scancel $JOB_ID From 4b30c03579959ef8c46cbf4ed8cba9a2a231e49c Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 11:39:45 -0700 Subject: [PATCH 05/28] fix per kimbo's suggestion --- .github/workflows/70b-tmpl.yml | 32 ++++++++++++++-------------- .github/workflows/benchmark-tmpl.yml | 2 +- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/.github/workflows/70b-tmpl.yml b/.github/workflows/70b-tmpl.yml index 76c062773..d196fc575 100644 --- a/.github/workflows/70b-tmpl.yml +++ b/.github/workflows/70b-tmpl.yml @@ -62,21 +62,21 @@ jobs: tp-list: '[1, 2, 4, 8]' timeout: ${{ inputs.timeout }} - # bmk-b200: - # needs: find-latest-image - # uses: ./.github/workflows/benchmark-tmpl.yml - # secrets: inherit - # with: - # exp-name: ${{ inputs.exp-name }} - # isl: ${{ inputs.isl }} - # osl: ${{ inputs.osl }} - # max-model-len: ${{ inputs.max-model-len }} - # random-range-ratio: ${{ inputs.random-range-ratio }} - # runner: b200 - # image: 'kedarpotdar147/vllm0.1:latest' - # model: 'nvidia/Llama-3.1-70B-Instruct-FP8' - # tp-list: '[1, 2, 4, 8]' - # timeout: ${{ inputs.timeout }} + bmk-b200: + needs: find-latest-image + uses: ./.github/workflows/benchmark-tmpl.yml + secrets: inherit + with: + exp-name: ${{ inputs.exp-name }} + isl: ${{ inputs.isl }} + osl: ${{ inputs.osl }} + max-model-len: ${{ inputs.max-model-len }} + random-range-ratio: ${{ inputs.random-range-ratio }} + runner: b200 + image: 'kedarpotdar147/vllm0.1:latest' + model: 'nvidia/Llama-3.1-70B-Instruct-FP8' + tp-list: '[1, 2, 4, 8]' + timeout: ${{ inputs.timeout }} bmk-b200-trt: needs: find-latest-image @@ -127,7 +127,7 @@ jobs: timeout: ${{ inputs.timeout }} collect-results: - needs: [bmk-h100, bmk-h200, bmk-b200-trt, bmk-mi300x, bmk-mi325x] + needs: [bmk-h100, bmk-h200, bmk-b200, bmk-b200-trt, bmk-mi300x, bmk-mi325x] if: ${{ always() && !cancelled() }} uses: ./.github/workflows/collect-results.yml secrets: inherit diff --git a/.github/workflows/benchmark-tmpl.yml b/.github/workflows/benchmark-tmpl.yml index e9b37cb3a..818c34f43 100644 --- a/.github/workflows/benchmark-tmpl.yml +++ b/.github/workflows/benchmark-tmpl.yml @@ -43,7 +43,6 @@ env: MAX_MODEL_LEN: ${{ inputs.max-model-len }} RANDOM_RANGE_RATIO: ${{ inputs.random-range-ratio }} IMAGE: ${{ inputs.image }} - RUNNER_LABEL: ${{ inputs.runner }} jobs: benchmark: @@ -75,6 +74,7 @@ jobs: - name: Launch job script run: | RUNNER_NAME=${{ runner.name }} + RUNNER_LABEL=${{ inputs.runner }} bash ./runners/launch_${RUNNER_NAME%%_*}.sh ${{ inputs.exp-name }} - name: Process result From aab2320fc667e9932013e03f26b54facf332db6e Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 12:00:52 -0700 Subject: [PATCH 06/28] revert local runner var --- .github/workflows/benchmark-tmpl.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmark-tmpl.yml b/.github/workflows/benchmark-tmpl.yml index 818c34f43..e9b37cb3a 100644 --- a/.github/workflows/benchmark-tmpl.yml +++ b/.github/workflows/benchmark-tmpl.yml @@ -43,6 +43,7 @@ env: MAX_MODEL_LEN: ${{ inputs.max-model-len }} RANDOM_RANGE_RATIO: ${{ inputs.random-range-ratio }} IMAGE: ${{ inputs.image }} + RUNNER_LABEL: ${{ inputs.runner }} jobs: benchmark: @@ -74,7 +75,6 @@ jobs: - name: Launch job script run: | RUNNER_NAME=${{ runner.name }} - RUNNER_LABEL=${{ inputs.runner }} bash ./runners/launch_${RUNNER_NAME%%_*}.sh ${{ inputs.exp-name }} - name: Process result From 0c5ad16c803a5bac2ec1d668add1e5be77a335e6 Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 12:54:45 -0700 Subject: [PATCH 07/28] update sqsh file name to include runner name. i.e. trt --- runners/launch_b200-nv.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runners/launch_b200-nv.sh b/runners/launch_b200-nv.sh index 5acd79743..576b4f660 100644 --- a/runners/launch_b200-nv.sh +++ b/runners/launch_b200-nv.sh @@ -5,7 +5,7 @@ export PORT_OFFSET=${USER: -1} MODEL_CODE="${1%%_*}" PARTITION="dgx-b200" -SQUASH_FILE="/raid/image_${MODEL_CODE}_b200.sqsh" +SQUASH_FILE="/raid/image_${MODEL_CODE}_${RUNNER_LABEL}.sqsh" salloc --partition=$PARTITION --gres=gpu:$TP --exclusive --time=180 --no-shell JOB_ID=$(squeue -u $USER -h -o %A) From 7487baa8ccb0c80b05199d0fb04c3e04095cc7d9 Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 13:06:10 -0700 Subject: [PATCH 08/28] temporarily remove other benchmarks. only keep bmk-b200-trt --- .github/workflows/70b-tmpl.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/70b-tmpl.yml b/.github/workflows/70b-tmpl.yml index d196fc575..03f606de6 100644 --- a/.github/workflows/70b-tmpl.yml +++ b/.github/workflows/70b-tmpl.yml @@ -127,7 +127,7 @@ jobs: timeout: ${{ inputs.timeout }} collect-results: - needs: [bmk-h100, bmk-h200, bmk-b200, bmk-b200-trt, bmk-mi300x, bmk-mi325x] + needs: [ bmk-b200-trt] if: ${{ always() && !cancelled() }} uses: ./.github/workflows/collect-results.yml secrets: inherit From 1233b53434d081a21c60a84e5fac4329558d3bd2 Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 15:00:57 -0700 Subject: [PATCH 09/28] refactor scheduler to add trt tag, update ngc image address , update summarize.py to reflect backend, fix issue with result filename --- .github/workflows/70b-tmpl.yml | 2 +- .github/workflows/benchmark-tmpl.yml | 10 ++++--- .github/workflows/workflow-scheduler.yml | 34 ++++++++++++++++++++++++ utils/summarize.py | 10 +++++-- 4 files changed, 49 insertions(+), 7 deletions(-) diff --git a/.github/workflows/70b-tmpl.yml b/.github/workflows/70b-tmpl.yml index 03f606de6..a5ed09b57 100644 --- a/.github/workflows/70b-tmpl.yml +++ b/.github/workflows/70b-tmpl.yml @@ -89,7 +89,7 @@ jobs: max-model-len: ${{ inputs.max-model-len }} random-range-ratio: ${{ inputs.random-range-ratio }} runner: b200-trt - image: 'nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc0' + image: 'nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc0' model: 'nvidia/Llama-3.3-70B-Instruct-FP8' tp-list: '[1, 2, 4, 8]' timeout: ${{ inputs.timeout }} diff --git a/.github/workflows/benchmark-tmpl.yml b/.github/workflows/benchmark-tmpl.yml index e9b37cb3a..ab34d37cf 100644 --- a/.github/workflows/benchmark-tmpl.yml +++ b/.github/workflows/benchmark-tmpl.yml @@ -69,7 +69,7 @@ jobs: - name: Set result filename run: | - RESULT_FILENAME=${{ env.EXP_NAME }}_tp${{ env.TP }}_conc${{ env.CONC }}_${{ runner.name }} + RESULT_FILENAME=${{ env.EXP_NAME }}_tp${{ env.TP }}_conc${{ env.CONC }}_${{ inputs.runner }} echo "RESULT_FILENAME=${RESULT_FILENAME}" >> $GITHUB_ENV - name: Launch job script @@ -78,10 +78,12 @@ jobs: bash ./runners/launch_${RUNNER_NAME%%_*}.sh ${{ inputs.exp-name }} - name: Process result - run: python3 utils/process_result.py ${{ inputs.runner }} ${{ env.TP }} ${{ env.RESULT_FILENAME }} + run: | + RESULT_FILENAME=${{ env.EXP_NAME }}_tp${{ env.TP }}_conc${{ env.CONC }}_${{ inputs.runner }} + python3 utils/process_result.py ${{ inputs.runner }} ${{ env.TP }} $RESULT_FILENAME - name: Upload result uses: actions/upload-artifact@v4 with: - name: ${{ env.RESULT_FILENAME }} - path: agg_${{ env.RESULT_FILENAME }}.json + name: ${{ env.EXP_NAME }}_tp${{ env.TP }}_conc${{ env.CONC }}_${{ inputs.runner }} + path: agg_${{ env.EXP_NAME }}_tp${{ env.TP }}_conc${{ env.CONC }}_${{ inputs.runner }}.json diff --git a/.github/workflows/workflow-scheduler.yml b/.github/workflows/workflow-scheduler.yml index c952000d3..c8c188a07 100644 --- a/.github/workflows/workflow-scheduler.yml +++ b/.github/workflows/workflow-scheduler.yml @@ -23,6 +23,17 @@ jobs: osl: 1024 max-model-len: 2048 random-range-ratio: 0.8 + + _70b-trt-1k1k: + needs: cleanup + uses: ./.github/workflows/70b-tmpl.yml + secrets: inherit + with: + exp-name: '70b-trt_1k1k' + isl: 1024 + osl: 1024 + max-model-len: 2048 + random-range-ratio: 0.8 # dsr1-1k1k: # needs: cleanup @@ -45,6 +56,17 @@ jobs: osl: 1024 max-model-len: 9216 random-range-ratio: 0.8 + + _70b-trt-8k1k: + needs: cleanup + uses: ./.github/workflows/70b-tmpl.yml + secrets: inherit + with: + exp-name: '70b-trt_8k1k' + isl: 8192 + osl: 1024 + max-model-len: 9216 + random-range-ratio: 0.8 # dsr1-8k1k: # needs: cleanup @@ -69,6 +91,18 @@ jobs: random-range-ratio: 0.8 timeout: 240 + _70b-trt-1k8k: + needs: cleanup + uses: ./.github/workflows/70b-tmpl.yml + secrets: inherit + with: + exp-name: '70b-trt_1k8k' + isl: 1024 + osl: 8192 + max-model-len: 9216 + random-range-ratio: 0.8 + timeout: 240 + # dsr1-1k8k: # needs: cleanup # uses: ./.github/workflows/dsr1-tmpl.yml diff --git a/utils/summarize.py b/utils/summarize.py index 20d9ae127..245c77de5 100644 --- a/utils/summarize.py +++ b/utils/summarize.py @@ -12,14 +12,20 @@ results.sort(key=lambda r: (r['hw'], r['tp'], r['conc'])) summary_header = f'''\ -| Hardware | TP | Conc | TTFT (ms) | TPOT (ms) | E2EL (s) | TPUT per GPU | -| :-: | :-: | :-: | :-: | :-: | :-: | :-: |\ +| Hardware | Framework | TP | Conc | TTFT (ms) | TPOT (ms) | E2EL (s) | TPUT per GPU | +| :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: |\ ''' print(summary_header) for result in results: + # Extract framework from experiment name or runner + framework = 'vLLM' # default + if 'trt' in result.get('exp_name', '').lower() or 'trt' in result.get('runner', '').lower(): + framework = 'TRT-LLM' + print( f"| {result['hw'].upper()} " + f"| {framework} " f"| {result['tp']} " f"| {result['conc']} " f"| {(result['median_ttft'] * 1000):.4f} " From 7800006ac9556da77b57af249000ced89c3619a8 Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 15:07:39 -0700 Subject: [PATCH 10/28] refactor trt into separate yml --- .github/workflows/70b-trt-tmpl.yml | 55 ++++++++++++++++++++++++ .github/workflows/workflow-scheduler.yml | 6 +-- 2 files changed, 58 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/70b-trt-tmpl.yml diff --git a/.github/workflows/70b-trt-tmpl.yml b/.github/workflows/70b-trt-tmpl.yml new file mode 100644 index 000000000..c0f5bfe0f --- /dev/null +++ b/.github/workflows/70b-trt-tmpl.yml @@ -0,0 +1,55 @@ +name: LLaMA 70B TRT-LLM Template + +on: + workflow_call: + inputs: + exp-name: + required: true + type: string + isl: + required: true + type: string + osl: + required: true + type: string + max-model-len: + required: true + type: string + random-range-ratio: + required: true + type: string + timeout: + required: false + type: number + default: 180 + +jobs: + find-latest-image: + runs-on: ubuntu-latest + steps: + - name: Find the latest Docker image + run: echo "Hardcoding image tags for now." + + bmk-b200-trt: + needs: find-latest-image + uses: ./.github/workflows/benchmark-tmpl.yml + secrets: inherit + with: + exp-name: ${{ inputs.exp-name }} + isl: ${{ inputs.isl }} + osl: ${{ inputs.osl }} + max-model-len: ${{ inputs.max-model-len }} + random-range-ratio: ${{ inputs.random-range-ratio }} + runner: b200-trt + image: 'nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc0' + model: 'nvidia/Llama-3.3-70B-Instruct-FP8' + tp-list: '[1, 2, 4, 8]' + timeout: ${{ inputs.timeout }} + + collect-results: + needs: [bmk-b200-trt] + if: ${{ always() && !cancelled() }} + uses: ./.github/workflows/collect-results.yml + secrets: inherit + with: + exp-name: ${{ inputs.exp-name }} diff --git a/.github/workflows/workflow-scheduler.yml b/.github/workflows/workflow-scheduler.yml index c8c188a07..5039b1b73 100644 --- a/.github/workflows/workflow-scheduler.yml +++ b/.github/workflows/workflow-scheduler.yml @@ -26,7 +26,7 @@ jobs: _70b-trt-1k1k: needs: cleanup - uses: ./.github/workflows/70b-tmpl.yml + uses: ./.github/workflows/70b-trt-tmpl.yml secrets: inherit with: exp-name: '70b-trt_1k1k' @@ -59,7 +59,7 @@ jobs: _70b-trt-8k1k: needs: cleanup - uses: ./.github/workflows/70b-tmpl.yml + uses: ./.github/workflows/70b-trt-tmpl.yml secrets: inherit with: exp-name: '70b-trt_8k1k' @@ -93,7 +93,7 @@ jobs: _70b-trt-1k8k: needs: cleanup - uses: ./.github/workflows/70b-tmpl.yml + uses: ./.github/workflows/70b-trt-tmpl.yml secrets: inherit with: exp-name: '70b-trt_1k8k' From 43057dde569ed5f14f591be8375d14e7d6c0d23e Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 15:13:21 -0700 Subject: [PATCH 11/28] fix file name --- .github/workflows/70b-tmpl.yml | 18 +----------------- ...-trt_slurm.sh => 70b-trt_b200-trt_slurm.sh} | 0 2 files changed, 1 insertion(+), 17 deletions(-) rename benchmarks/{70b_b200-trt_slurm.sh => 70b-trt_b200-trt_slurm.sh} (100%) diff --git a/.github/workflows/70b-tmpl.yml b/.github/workflows/70b-tmpl.yml index a5ed09b57..23ad88551 100644 --- a/.github/workflows/70b-tmpl.yml +++ b/.github/workflows/70b-tmpl.yml @@ -78,22 +78,6 @@ jobs: tp-list: '[1, 2, 4, 8]' timeout: ${{ inputs.timeout }} - bmk-b200-trt: - needs: find-latest-image - uses: ./.github/workflows/benchmark-tmpl.yml - secrets: inherit - with: - exp-name: ${{ inputs.exp-name }} - isl: ${{ inputs.isl }} - osl: ${{ inputs.osl }} - max-model-len: ${{ inputs.max-model-len }} - random-range-ratio: ${{ inputs.random-range-ratio }} - runner: b200-trt - image: 'nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc0' - model: 'nvidia/Llama-3.3-70B-Instruct-FP8' - tp-list: '[1, 2, 4, 8]' - timeout: ${{ inputs.timeout }} - bmk-mi300x: needs: find-latest-image uses: ./.github/workflows/benchmark-tmpl.yml @@ -127,7 +111,7 @@ jobs: timeout: ${{ inputs.timeout }} collect-results: - needs: [ bmk-b200-trt] + needs: [bmk-h100, bmk-h200, bmk-b200, bmk-mi300x, bmk-mi325x] if: ${{ always() && !cancelled() }} uses: ./.github/workflows/collect-results.yml secrets: inherit diff --git a/benchmarks/70b_b200-trt_slurm.sh b/benchmarks/70b-trt_b200-trt_slurm.sh similarity index 100% rename from benchmarks/70b_b200-trt_slurm.sh rename to benchmarks/70b-trt_b200-trt_slurm.sh From a94fbd0e8f7b821f0b53fc2b4f33821ecd39101b Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 15:15:22 -0700 Subject: [PATCH 12/28] comment vllm for now --- .github/workflows/workflow-scheduler.yml | 62 ++++++++++++------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/.github/workflows/workflow-scheduler.yml b/.github/workflows/workflow-scheduler.yml index 5039b1b73..7a631376c 100644 --- a/.github/workflows/workflow-scheduler.yml +++ b/.github/workflows/workflow-scheduler.yml @@ -13,16 +13,16 @@ jobs: cleanup: uses: ./.github/workflows/cluster-cleanup.yml - _70b-1k1k: - needs: cleanup - uses: ./.github/workflows/70b-tmpl.yml - secrets: inherit - with: - exp-name: '70b_1k1k' - isl: 1024 - osl: 1024 - max-model-len: 2048 - random-range-ratio: 0.8 + # _70b-1k1k: + # needs: cleanup + # uses: ./.github/workflows/70b-tmpl.yml + # secrets: inherit + # with: + # exp-name: '70b_1k1k' + # isl: 1024 + # osl: 1024 + # max-model-len: 2048 + # random-range-ratio: 0.8 _70b-trt-1k1k: needs: cleanup @@ -46,16 +46,16 @@ jobs: # max-model-len: 2048 # random-range-ratio: 0.8 - _70b-8k1k: - needs: cleanup - uses: ./.github/workflows/70b-tmpl.yml - secrets: inherit - with: - exp-name: '70b_8k1k' - isl: 8192 - osl: 1024 - max-model-len: 9216 - random-range-ratio: 0.8 + # _70b-8k1k: + # needs: cleanup + # uses: ./.github/workflows/70b-tmpl.yml + # secrets: inherit + # with: + # exp-name: '70b_8k1k' + # isl: 8192 + # osl: 1024 + # max-model-len: 9216 + # random-range-ratio: 0.8 _70b-trt-8k1k: needs: cleanup @@ -79,17 +79,17 @@ jobs: # max-model-len: 9216 # random-range-ratio: 0.8 - _70b-1k8k: - needs: cleanup - uses: ./.github/workflows/70b-tmpl.yml - secrets: inherit - with: - exp-name: '70b_1k8k' - isl: 1024 - osl: 8192 - max-model-len: 9216 - random-range-ratio: 0.8 - timeout: 240 + # _70b-1k8k: + # needs: cleanup + # uses: ./.github/workflows/70b-tmpl.yml + # secrets: inherit + # with: + # exp-name: '70b_1k8k' + # isl: 1024 + # osl: 8192 + # max-model-len: 9216 + # random-range-ratio: 0.8 + # timeout: 240 _70b-trt-1k8k: needs: cleanup From 0225b1026649b213d2ef2cd31f513ae4f206e3d7 Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 15:25:33 -0700 Subject: [PATCH 13/28] update port in trtllm-serve --- benchmarks/70b-trt_b200-trt_slurm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/70b-trt_b200-trt_slurm.sh b/benchmarks/70b-trt_b200-trt_slurm.sh index e2461aac6..5f91bb2e2 100644 --- a/benchmarks/70b-trt_b200-trt_slurm.sh +++ b/benchmarks/70b-trt_b200-trt_slurm.sh @@ -33,7 +33,7 @@ kv_cache_config: stream_interval: 4 EOF -mpirun -n 1 --oversubscribe --allow-run-as-root trtllm-serve $MODEL --tp_size $TP --trust_remote_code --max_seq_len $MAX_MODEL_LEN --max_num_tokens $MAX_MODEL_LEN --num_postprocess_workers 2 --extra_llm_api_options llama-config.yml > $SERVER_LOG 2>&1 & +mpirun -n 1 --oversubscribe --allow-run-as-root trtllm-serve $MODEL --tp_size $TP --trust_remote_code --max_seq_len $MAX_MODEL_LEN --max_num_tokens $MAX_MODEL_LEN --num_postprocess_workers 2 --extra_llm_api_options llama-config.yml --port $PORT > $SERVER_LOG 2>&1 & set +x while IFS= read -r line; do From 1e594f30fcd372124531ccd020da02cc3bbab17d Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 16:07:11 -0700 Subject: [PATCH 14/28] update artifact name to have runner name at end --- .github/workflows/benchmark-tmpl.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmark-tmpl.yml b/.github/workflows/benchmark-tmpl.yml index ab34d37cf..78bd69c10 100644 --- a/.github/workflows/benchmark-tmpl.yml +++ b/.github/workflows/benchmark-tmpl.yml @@ -85,5 +85,5 @@ jobs: - name: Upload result uses: actions/upload-artifact@v4 with: - name: ${{ env.EXP_NAME }}_tp${{ env.TP }}_conc${{ env.CONC }}_${{ inputs.runner }} + name: ${{ env.EXP_NAME }}_tp${{ env.TP }}_conc${{ env.CONC }}_${{ runner.name }} path: agg_${{ env.EXP_NAME }}_tp${{ env.TP }}_conc${{ env.CONC }}_${{ inputs.runner }}.json From f63768ca0f6959f7e9add440892714ad90911d15 Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 20:14:51 -0700 Subject: [PATCH 15/28] update plot function with b200-trt --- utils/plot_perf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/plot_perf.py b/utils/plot_perf.py index 35eb46eb2..7dd12dc59 100644 --- a/utils/plot_perf.py +++ b/utils/plot_perf.py @@ -10,6 +10,7 @@ 'h100': 'lightgreen', 'h200': 'green', 'b200': 'black', + 'b200-trt': 'darkblue', 'mi300x': 'pink', 'mi325x': 'red', 'mi355x': 'purple' From ed20d230dd3d8e3e8787ce674ca3c028bc213c5c Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 20:26:37 -0700 Subject: [PATCH 16/28] add h200 trt --- .github/workflows/70b-trt-tmpl.yml | 18 ++++++++- benchmarks/70b-h200-trt_slurm.sh | 63 ++++++++++++++++++++++++++++++ utils/plot_perf.py | 1 + 3 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 benchmarks/70b-h200-trt_slurm.sh diff --git a/.github/workflows/70b-trt-tmpl.yml b/.github/workflows/70b-trt-tmpl.yml index c0f5bfe0f..8c7184443 100644 --- a/.github/workflows/70b-trt-tmpl.yml +++ b/.github/workflows/70b-trt-tmpl.yml @@ -46,8 +46,24 @@ jobs: tp-list: '[1, 2, 4, 8]' timeout: ${{ inputs.timeout }} + bmk-h200-trt: + needs: find-latest-image + uses: ./.github/workflows/benchmark-tmpl.yml + secrets: inherit + with: + exp-name: ${{ inputs.exp-name }} + isl: ${{ inputs.isl }} + osl: ${{ inputs.osl }} + max-model-len: ${{ inputs.max-model-len }} + random-range-ratio: ${{ inputs.random-range-ratio }} + runner: h200-trt + image: 'nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc0' + model: 'nvidia/Llama-3.3-70B-Instruct-FP8' + tp-list: '[2, 4, 8]' + timeout: ${{ inputs.timeout }} + collect-results: - needs: [bmk-b200-trt] + needs: [bmk-b200-trt, bmk-h200-trt] if: ${{ always() && !cancelled() }} uses: ./.github/workflows/collect-results.yml secrets: inherit diff --git a/benchmarks/70b-h200-trt_slurm.sh b/benchmarks/70b-h200-trt_slurm.sh new file mode 100644 index 000000000..5f91bb2e2 --- /dev/null +++ b/benchmarks/70b-h200-trt_slurm.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash + +# === Required Env Vars === +# HF_TOKEN +# HF_HUB_CACHE +# IMAGE +# MODEL +# ISL +# OSL +# MAX_MODEL_LEN +# RANDOM_RANGE_RATIO +# TP +# CONC +# RESULT_FILENAME +# PORT_OFFSET + +echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" + +set -x +hf download $MODEL +SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log) +PORT=$(( 8888 + $PORT_OFFSET )) + +# Create llama-config.yml inline +cat > llama-config.yml << 'EOF' +enable_attention_dp: false +cuda_graph_config: + enable_padding: true + max_batch_size: 1024 +kv_cache_config: + dtype: fp8 + enable_block_reuse: false +stream_interval: 4 +EOF + +mpirun -n 1 --oversubscribe --allow-run-as-root trtllm-serve $MODEL --tp_size $TP --trust_remote_code --max_seq_len $MAX_MODEL_LEN --max_num_tokens $MAX_MODEL_LEN --num_postprocess_workers 2 --extra_llm_api_options llama-config.yml --port $PORT > $SERVER_LOG 2>&1 & + +set +x +while IFS= read -r line; do + printf '%s\n' "$line" + if [[ "$line" =~ [Ee][Rr][Rr][Oo][Rr] ]]; then + sleep 5 + tail -n100 $SERVER_LOG + echo "JOB $SLURM_JOB_ID ran on NODE $SLURMD_NODENAME" + exit 1 + fi + if [[ "$line" == *"Application startup complete"* ]]; then + break + fi +done < <(tail -F -n0 "$SERVER_LOG") + +set -x +git clone https://github.com/kimbochen/bench_serving.git +python3 bench_serving/benchmark_serving.py \ +--model $MODEL --backend openai \ +--base-url http://0.0.0.0:$PORT \ +--dataset-name random \ +--random-input-len $ISL --random-output-len $OSL --random-range-ratio $RANDOM_RANGE_RATIO \ +--num-prompts $(( $CONC * 10 )) --max-concurrency $CONC \ +--request-rate inf --ignore-eos \ +--save-result --percentile-metrics 'ttft,tpot,itl,e2el' \ +--result-dir /workspace/ \ +--result-filename $RESULT_FILENAME.json diff --git a/utils/plot_perf.py b/utils/plot_perf.py index 7dd12dc59..5b2909fe3 100644 --- a/utils/plot_perf.py +++ b/utils/plot_perf.py @@ -9,6 +9,7 @@ hw_color = { 'h100': 'lightgreen', 'h200': 'green', + 'h200-trt': 'darkgreen', 'b200': 'black', 'b200-trt': 'darkblue', 'mi300x': 'pink', From 25566a91ab2054d175e062f9b0529a416675a07c Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 20:32:19 -0700 Subject: [PATCH 17/28] fix launch slurm script based on runner label --- runners/launch_h200-nv.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runners/launch_h200-nv.sh b/runners/launch_h200-nv.sh index 4bedf9b71..e53d952d0 100644 --- a/runners/launch_h200-nv.sh +++ b/runners/launch_h200-nv.sh @@ -5,7 +5,7 @@ export HF_HUB_CACHE_MOUNT="/raid/hf_hub_cache/" export PORT_OFFSET=${USER: -1} PARTITION="dgx-h200" -SQUASH_FILE="/raid/image_${MODEL_CODE}_h200.sqsh" +SQUASH_FILE="/raid/image_${MODEL_CODE}_${RUNNER_LABEL}.sqsh" salloc --partition=$PARTITION --gres=gpu:$TP --exclusive --time=180 --no-shell JOB_ID=$(squeue -u $USER -h -o %A) @@ -18,6 +18,6 @@ srun --jobid=$JOB_ID \ --container-mount-home \ --container-workdir=/workspace/ \ --no-container-entrypoint --export=ALL \ -bash benchmarks/${MODEL_CODE}_h200_slurm.sh +bash benchmarks/${MODEL_CODE}_${RUNNER_LABEL}_slurm.sh scancel $JOB_ID From d33cda5d7c7095d07c8d396b57873ca709b2d577 Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 20:46:15 -0700 Subject: [PATCH 18/28] better identify if result is vllm or trt --- utils/summarize.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/utils/summarize.py b/utils/summarize.py index 245c77de5..6c6f9dc43 100644 --- a/utils/summarize.py +++ b/utils/summarize.py @@ -18,10 +18,19 @@ print(summary_header) for result in results: - # Extract framework from experiment name or runner - framework = 'vLLM' # default - if 'trt' in result.get('exp_name', '').lower() or 'trt' in result.get('runner', '').lower(): - framework = 'TRT-LLM' + # Extract framework - prefer explicit framework field, fallback to detection + framework = result.get('framework', 'vLLM') # default to vLLM if not specified + + # If no explicit framework field, try to detect from other fields + if framework == 'vLLM': + exp_name = result.get('exp_name', '') + runner = result.get('runner', '') + + # Check for TRT-LLM indicators + if ('trt' in exp_name.lower() or 'trt' in runner.lower() or + 'trt-llm' in exp_name.lower() or 'trt-llm' in runner.lower() or + 'tensorrt' in exp_name.lower() or 'tensorrt' in runner.lower()): + framework = 'TRT-LLM' print( f"| {result['hw'].upper()} " From de2d8de25e2291400da52706583d8941b7d57558 Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 21:26:01 -0700 Subject: [PATCH 19/28] clarify runners for trt and vllm --- .github/workflows/70b-tmpl.yml | 4 +- .github/workflows/70b-trt-tmpl.yml | 4 +- .github/workflows/cluster-cleanup.yml | 5 ++ .github/workflows/workflow-scheduler.yml | 62 ++++++++++++------------ 4 files changed, 40 insertions(+), 35 deletions(-) diff --git a/.github/workflows/70b-tmpl.yml b/.github/workflows/70b-tmpl.yml index 23ad88551..ff70adcca 100644 --- a/.github/workflows/70b-tmpl.yml +++ b/.github/workflows/70b-tmpl.yml @@ -59,7 +59,7 @@ jobs: runner: h200 image: 'kedarpotdar147/vllm0.1:latest' model: 'nvidia/Llama-3.1-70B-Instruct-FP8' - tp-list: '[1, 2, 4, 8]' + tp-list: '[2]' timeout: ${{ inputs.timeout }} bmk-b200: @@ -75,7 +75,7 @@ jobs: runner: b200 image: 'kedarpotdar147/vllm0.1:latest' model: 'nvidia/Llama-3.1-70B-Instruct-FP8' - tp-list: '[1, 2, 4, 8]' + tp-list: '[2]' timeout: ${{ inputs.timeout }} bmk-mi300x: diff --git a/.github/workflows/70b-trt-tmpl.yml b/.github/workflows/70b-trt-tmpl.yml index 8c7184443..61e5c77d9 100644 --- a/.github/workflows/70b-trt-tmpl.yml +++ b/.github/workflows/70b-trt-tmpl.yml @@ -43,7 +43,7 @@ jobs: runner: b200-trt image: 'nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc0' model: 'nvidia/Llama-3.3-70B-Instruct-FP8' - tp-list: '[1, 2, 4, 8]' + tp-list: '[2]' timeout: ${{ inputs.timeout }} bmk-h200-trt: @@ -59,7 +59,7 @@ jobs: runner: h200-trt image: 'nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc0' model: 'nvidia/Llama-3.3-70B-Instruct-FP8' - tp-list: '[2, 4, 8]' + tp-list: '[2]' timeout: ${{ inputs.timeout }} collect-results: diff --git a/.github/workflows/cluster-cleanup.yml b/.github/workflows/cluster-cleanup.yml index a74311d9f..3ecf9763d 100644 --- a/.github/workflows/cluster-cleanup.yml +++ b/.github/workflows/cluster-cleanup.yml @@ -23,8 +23,13 @@ jobs: - 'h200-nv_1' - 'h200-nv_2' - 'h200-nv_3' + - 'h200-trt_0' + - 'h200-trt_1' + - 'h200-trt_2' - 'b200-nv_0' - 'b200-nv_1' + - 'b200-trt_0' + - 'b200-trt_1' - 'mi325x-tw_0' - 'mi325x-tw_1' - 'mi325x-tw_2' diff --git a/.github/workflows/workflow-scheduler.yml b/.github/workflows/workflow-scheduler.yml index 7a631376c..2acf7f741 100644 --- a/.github/workflows/workflow-scheduler.yml +++ b/.github/workflows/workflow-scheduler.yml @@ -13,16 +13,16 @@ jobs: cleanup: uses: ./.github/workflows/cluster-cleanup.yml - # _70b-1k1k: - # needs: cleanup - # uses: ./.github/workflows/70b-tmpl.yml - # secrets: inherit - # with: - # exp-name: '70b_1k1k' - # isl: 1024 - # osl: 1024 - # max-model-len: 2048 - # random-range-ratio: 0.8 + _70b-1k1k: + needs: cleanup + uses: ./.github/workflows/70b-tmpl.yml + secrets: inherit + with: + exp-name: '70b_1k1k' + isl: 1024 + osl: 1024 + max-model-len: 2048 + random-range-ratio: 0.8 _70b-trt-1k1k: needs: cleanup @@ -57,16 +57,16 @@ jobs: # max-model-len: 9216 # random-range-ratio: 0.8 - _70b-trt-8k1k: - needs: cleanup - uses: ./.github/workflows/70b-trt-tmpl.yml - secrets: inherit - with: - exp-name: '70b-trt_8k1k' - isl: 8192 - osl: 1024 - max-model-len: 9216 - random-range-ratio: 0.8 + # _70b-trt-8k1k: + # needs: cleanup + # uses: ./.github/workflows/70b-trt-tmpl.yml + # secrets: inherit + # with: + # exp-name: '70b-trt_8k1k' + # isl: 8192 + # osl: 1024 + # max-model-len: 9216 + # random-range-ratio: 0.8 # dsr1-8k1k: # needs: cleanup @@ -91,17 +91,17 @@ jobs: # random-range-ratio: 0.8 # timeout: 240 - _70b-trt-1k8k: - needs: cleanup - uses: ./.github/workflows/70b-trt-tmpl.yml - secrets: inherit - with: - exp-name: '70b-trt_1k8k' - isl: 1024 - osl: 8192 - max-model-len: 9216 - random-range-ratio: 0.8 - timeout: 240 + # _70b-trt-1k8k: + # needs: cleanup + # uses: ./.github/workflows/70b-trt-tmpl.yml + # secrets: inherit + # with: + # exp-name: '70b-trt_1k8k' + # isl: 1024 + # osl: 8192 + # max-model-len: 9216 + # random-range-ratio: 0.8 + # timeout: 240 # dsr1-1k8k: # needs: cleanup From 80dc11defa2acc3633431ed38043fe470250ab28 Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 22:01:27 -0700 Subject: [PATCH 20/28] fix runner names --- .github/workflows/70b-trt-tmpl.yml | 4 ++-- .../{70b-h200-trt_slurm.sh => 70b-trt_b200-nv_slurm.sh} | 0 .../{70b-trt_b200-trt_slurm.sh => 70b-trt_h200-nv_slurm.sh} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename benchmarks/{70b-h200-trt_slurm.sh => 70b-trt_b200-nv_slurm.sh} (100%) rename benchmarks/{70b-trt_b200-trt_slurm.sh => 70b-trt_h200-nv_slurm.sh} (100%) diff --git a/.github/workflows/70b-trt-tmpl.yml b/.github/workflows/70b-trt-tmpl.yml index 61e5c77d9..36b06e513 100644 --- a/.github/workflows/70b-trt-tmpl.yml +++ b/.github/workflows/70b-trt-tmpl.yml @@ -40,7 +40,7 @@ jobs: osl: ${{ inputs.osl }} max-model-len: ${{ inputs.max-model-len }} random-range-ratio: ${{ inputs.random-range-ratio }} - runner: b200-trt + runner: b200-nv image: 'nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc0' model: 'nvidia/Llama-3.3-70B-Instruct-FP8' tp-list: '[2]' @@ -56,7 +56,7 @@ jobs: osl: ${{ inputs.osl }} max-model-len: ${{ inputs.max-model-len }} random-range-ratio: ${{ inputs.random-range-ratio }} - runner: h200-trt + runner: h200-nv image: 'nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc0' model: 'nvidia/Llama-3.3-70B-Instruct-FP8' tp-list: '[2]' diff --git a/benchmarks/70b-h200-trt_slurm.sh b/benchmarks/70b-trt_b200-nv_slurm.sh similarity index 100% rename from benchmarks/70b-h200-trt_slurm.sh rename to benchmarks/70b-trt_b200-nv_slurm.sh diff --git a/benchmarks/70b-trt_b200-trt_slurm.sh b/benchmarks/70b-trt_h200-nv_slurm.sh similarity index 100% rename from benchmarks/70b-trt_b200-trt_slurm.sh rename to benchmarks/70b-trt_h200-nv_slurm.sh From 3cf357bfac4ec438eb52ea63726b29a53eed1d66 Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 22:06:45 -0700 Subject: [PATCH 21/28] remove trt runners --- .github/workflows/cluster-cleanup.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/cluster-cleanup.yml b/.github/workflows/cluster-cleanup.yml index 3ecf9763d..a74311d9f 100644 --- a/.github/workflows/cluster-cleanup.yml +++ b/.github/workflows/cluster-cleanup.yml @@ -23,13 +23,8 @@ jobs: - 'h200-nv_1' - 'h200-nv_2' - 'h200-nv_3' - - 'h200-trt_0' - - 'h200-trt_1' - - 'h200-trt_2' - 'b200-nv_0' - 'b200-nv_1' - - 'b200-trt_0' - - 'b200-trt_1' - 'mi325x-tw_0' - 'mi325x-tw_1' - 'mi325x-tw_2' From 9d7cbd3c3c4a89ba2ecea25b52f9396371556da8 Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 23:00:58 -0700 Subject: [PATCH 22/28] ensure trt runners are correctly tagged --- .github/workflows/70b-trt-tmpl.yml | 4 ++-- .github/workflows/workflow-scheduler.yml | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/70b-trt-tmpl.yml b/.github/workflows/70b-trt-tmpl.yml index 36b06e513..b66fdf2b7 100644 --- a/.github/workflows/70b-trt-tmpl.yml +++ b/.github/workflows/70b-trt-tmpl.yml @@ -40,7 +40,7 @@ jobs: osl: ${{ inputs.osl }} max-model-len: ${{ inputs.max-model-len }} random-range-ratio: ${{ inputs.random-range-ratio }} - runner: b200-nv + runner: b200 image: 'nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc0' model: 'nvidia/Llama-3.3-70B-Instruct-FP8' tp-list: '[2]' @@ -56,7 +56,7 @@ jobs: osl: ${{ inputs.osl }} max-model-len: ${{ inputs.max-model-len }} random-range-ratio: ${{ inputs.random-range-ratio }} - runner: h200-nv + runner: h200 image: 'nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc0' model: 'nvidia/Llama-3.3-70B-Instruct-FP8' tp-list: '[2]' diff --git a/.github/workflows/workflow-scheduler.yml b/.github/workflows/workflow-scheduler.yml index 2acf7f741..c65335569 100644 --- a/.github/workflows/workflow-scheduler.yml +++ b/.github/workflows/workflow-scheduler.yml @@ -13,16 +13,16 @@ jobs: cleanup: uses: ./.github/workflows/cluster-cleanup.yml - _70b-1k1k: - needs: cleanup - uses: ./.github/workflows/70b-tmpl.yml - secrets: inherit - with: - exp-name: '70b_1k1k' - isl: 1024 - osl: 1024 - max-model-len: 2048 - random-range-ratio: 0.8 + # _70b-1k1k: + # needs: cleanup + # uses: ./.github/workflows/70b-tmpl.yml + # secrets: inherit + # with: + # exp-name: '70b_1k1k' + # isl: 1024 + # osl: 1024 + # max-model-len: 2048 + # random-range-ratio: 0.8 _70b-trt-1k1k: needs: cleanup From a2ed19c37d8a06ad0196339305ac644e3bd4fbf6 Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 23:04:03 -0700 Subject: [PATCH 23/28] rename launch scripts --- benchmarks/{70b-trt_b200-nv_slurm.sh => 70b-trt_b200_slurm.sh} | 0 benchmarks/{70b-trt_h200-nv_slurm.sh => 70b-trt_h200_slurm.sh} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename benchmarks/{70b-trt_b200-nv_slurm.sh => 70b-trt_b200_slurm.sh} (100%) rename benchmarks/{70b-trt_h200-nv_slurm.sh => 70b-trt_h200_slurm.sh} (100%) diff --git a/benchmarks/70b-trt_b200-nv_slurm.sh b/benchmarks/70b-trt_b200_slurm.sh similarity index 100% rename from benchmarks/70b-trt_b200-nv_slurm.sh rename to benchmarks/70b-trt_b200_slurm.sh diff --git a/benchmarks/70b-trt_h200-nv_slurm.sh b/benchmarks/70b-trt_h200_slurm.sh similarity index 100% rename from benchmarks/70b-trt_h200-nv_slurm.sh rename to benchmarks/70b-trt_h200_slurm.sh From fd1ff2e45bd850fa0f3f28def31f82d6583e3715 Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 23:35:18 -0700 Subject: [PATCH 24/28] only get latest run id --- runners/launch_b200-nv.sh | 2 +- runners/launch_h200-nv.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/runners/launch_b200-nv.sh b/runners/launch_b200-nv.sh index 576b4f660..54a9d97a9 100644 --- a/runners/launch_b200-nv.sh +++ b/runners/launch_b200-nv.sh @@ -8,7 +8,7 @@ PARTITION="dgx-b200" SQUASH_FILE="/raid/image_${MODEL_CODE}_${RUNNER_LABEL}.sqsh" salloc --partition=$PARTITION --gres=gpu:$TP --exclusive --time=180 --no-shell -JOB_ID=$(squeue -u $USER -h -o %A) +JOB_ID=$(squeue -u $USER -h -o %A | tail -1) set -x srun --jobid=$JOB_ID bash -c "enroot import -o $SQUASH_FILE docker://$IMAGE" diff --git a/runners/launch_h200-nv.sh b/runners/launch_h200-nv.sh index e53d952d0..52256221a 100644 --- a/runners/launch_h200-nv.sh +++ b/runners/launch_h200-nv.sh @@ -8,7 +8,7 @@ PARTITION="dgx-h200" SQUASH_FILE="/raid/image_${MODEL_CODE}_${RUNNER_LABEL}.sqsh" salloc --partition=$PARTITION --gres=gpu:$TP --exclusive --time=180 --no-shell -JOB_ID=$(squeue -u $USER -h -o %A) +JOB_ID=$(squeue -u $USER -h -o %A | tail -1) set -x srun --jobid=$JOB_ID bash -c "enroot import -o $SQUASH_FILE docker://$IMAGE" From 63d11bf5d6515900b9cf0a6c34caac7406e9b771 Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 23:51:52 -0700 Subject: [PATCH 25/28] update trtllm image version --- .github/workflows/70b-trt-tmpl.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/70b-trt-tmpl.yml b/.github/workflows/70b-trt-tmpl.yml index b66fdf2b7..40cf27f38 100644 --- a/.github/workflows/70b-trt-tmpl.yml +++ b/.github/workflows/70b-trt-tmpl.yml @@ -41,7 +41,7 @@ jobs: max-model-len: ${{ inputs.max-model-len }} random-range-ratio: ${{ inputs.random-range-ratio }} runner: b200 - image: 'nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc0' + image: 'nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc1' model: 'nvidia/Llama-3.3-70B-Instruct-FP8' tp-list: '[2]' timeout: ${{ inputs.timeout }} @@ -57,7 +57,7 @@ jobs: max-model-len: ${{ inputs.max-model-len }} random-range-ratio: ${{ inputs.random-range-ratio }} runner: h200 - image: 'nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc0' + image: 'nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc1' model: 'nvidia/Llama-3.3-70B-Instruct-FP8' tp-list: '[2]' timeout: ${{ inputs.timeout }} From 85a6e51b95032f226074756c9b41bdee8de423f4 Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Thu, 28 Aug 2025 23:55:34 -0700 Subject: [PATCH 26/28] img ids --- runners/launch_b200-nv.sh | 2 +- runners/launch_h100-cw.sh | 2 +- runners/launch_h200-cw.sh | 4 ++-- runners/launch_h200-nb.sh | 2 +- runners/launch_h200-nv.sh | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/runners/launch_b200-nv.sh b/runners/launch_b200-nv.sh index 54a9d97a9..21ec5c35e 100644 --- a/runners/launch_b200-nv.sh +++ b/runners/launch_b200-nv.sh @@ -5,7 +5,7 @@ export PORT_OFFSET=${USER: -1} MODEL_CODE="${1%%_*}" PARTITION="dgx-b200" -SQUASH_FILE="/raid/image_${MODEL_CODE}_${RUNNER_LABEL}.sqsh" +SQUASH_FILE="/raid/image_${MODEL_CODE}_${RUNNER_LABEL}-2.sqsh" salloc --partition=$PARTITION --gres=gpu:$TP --exclusive --time=180 --no-shell JOB_ID=$(squeue -u $USER -h -o %A | tail -1) diff --git a/runners/launch_h100-cw.sh b/runners/launch_h100-cw.sh index 570790e0b..f39c2f8b0 100644 --- a/runners/launch_h100-cw.sh +++ b/runners/launch_h100-cw.sh @@ -5,7 +5,7 @@ export HF_HUB_CACHE_MOUNT="/mnt/vast/hf_hub_cache/" export PORT_OFFSET=${USER: -1} PARTITION="h100" -SQUASH_FILE="/mnt/vast/squash/image_${MODEL_CODE}_h100.sqsh" +SQUASH_FILE="/mnt/vast/squash/image_${MODEL_CODE}_h100-2.sqsh" salloc --partition=$PARTITION --gres=gpu:$TP --exclusive --time=180 --no-shell JOB_ID=$(squeue -u $USER -h -o %A) diff --git a/runners/launch_h200-cw.sh b/runners/launch_h200-cw.sh index 3245cb379..1329fd4f7 100644 --- a/runners/launch_h200-cw.sh +++ b/runners/launch_h200-cw.sh @@ -5,10 +5,10 @@ export HF_HUB_CACHE_MOUNT="/mnt/vast/hf_hub_cache/" export PORT_OFFSET=${USER: -1} PARTITION="h200" -SQUASH_FILE="/mnt/vast/squash/image_${MODEL_CODE}_h200.sqsh" +SQUASH_FILE="/mnt/vast/squash/image_${MODEL_CODE}_h200-2.sqsh" salloc --partition=$PARTITION --gres=gpu:$TP --exclusive --time=180 --no-shell -JOB_ID=$(squeue -u $USER -h -o %A) +JOB_ID=$(squeue -u $USER -h -o %A | tail -1) set -x srun --jobid=$JOB_ID bash -c "enroot import -o $SQUASH_FILE docker://$IMAGE" diff --git a/runners/launch_h200-nb.sh b/runners/launch_h200-nb.sh index 028cf8033..7d4dbd2df 100644 --- a/runners/launch_h200-nb.sh +++ b/runners/launch_h200-nb.sh @@ -5,7 +5,7 @@ export HF_HUB_CACHE_MOUNT="/home/hf_hub_cache/" export PORT_OFFSET=${USER: -1} PARTITION="main" -SQUASH_FILE="/home/squash/image_${MODEL_CODE}_h200.sqsh" +SQUASH_FILE="/home/squash/image_${MODEL_CODE}_h200-2.sqsh" salloc --partition=$PARTITION --gres=gpu:$TP --exclusive --time=180 --no-shell JOB_ID=$(squeue -u $USER -h -o %A) diff --git a/runners/launch_h200-nv.sh b/runners/launch_h200-nv.sh index 52256221a..b5b2d7df5 100644 --- a/runners/launch_h200-nv.sh +++ b/runners/launch_h200-nv.sh @@ -5,7 +5,7 @@ export HF_HUB_CACHE_MOUNT="/raid/hf_hub_cache/" export PORT_OFFSET=${USER: -1} PARTITION="dgx-h200" -SQUASH_FILE="/raid/image_${MODEL_CODE}_${RUNNER_LABEL}.sqsh" +SQUASH_FILE="/raid/image_${MODEL_CODE}_${RUNNER_LABEL}-2.sqsh" salloc --partition=$PARTITION --gres=gpu:$TP --exclusive --time=180 --no-shell JOB_ID=$(squeue -u $USER -h -o %A | tail -1) From 6c8af514fefb8aef6afc9349ca885f2f3f62af16 Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Fri, 29 Aug 2025 00:16:53 -0700 Subject: [PATCH 27/28] add fw identifier to benchmark template --- .github/workflows/benchmark-tmpl.yml | 12 +++++++++++- utils/process_result.py | 16 ++++++++++++++-- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/.github/workflows/benchmark-tmpl.yml b/.github/workflows/benchmark-tmpl.yml index 78bd69c10..4bb8213c3 100644 --- a/.github/workflows/benchmark-tmpl.yml +++ b/.github/workflows/benchmark-tmpl.yml @@ -80,7 +80,17 @@ jobs: - name: Process result run: | RESULT_FILENAME=${{ env.EXP_NAME }}_tp${{ env.TP }}_conc${{ env.CONC }}_${{ inputs.runner }} - python3 utils/process_result.py ${{ inputs.runner }} ${{ env.TP }} $RESULT_FILENAME + # Determine framework based on image + if [[ "${{ inputs.image }}" == *"tensorrt-llm"* ]]; then + FRAMEWORK="TRT-LLM" + elif [[ "${{ inputs.image }}" == *"vllm"* ]]; then + FRAMEWORK="vLLM" + elif [[ "${{ inputs.image }}" == *"sglang"* ]]; then + FRAMEWORK="SGLang" + else + FRAMEWORK="${{ inputs.runner }}" + fi + python3 utils/process_result.py $FRAMEWORK ${{ env.TP }} $RESULT_FILENAME - name: Upload result uses: actions/upload-artifact@v4 diff --git a/utils/process_result.py b/utils/process_result.py index d0f0ef000..e7b697361 100644 --- a/utils/process_result.py +++ b/utils/process_result.py @@ -3,15 +3,27 @@ from pathlib import Path -hw = sys.argv[1] +framework = sys.argv[1] # First arg is the framework (TRT-LLM, vLLM, SGLang, etc.) tp_size = int(sys.argv[2]) result_filename = sys.argv[3] with open(f'{result_filename}.json') as f: bmk_result = json.load(f) +# Extract hardware from result filename or runner name +# Result filename format: {exp-name}_tp{tp}_conc{conc}_{runner} +# We need to extract the hardware type from the runner +result_parts = result_filename.split('_') +if len(result_parts) >= 4: + runner_part = result_parts[-1] # Last part is the runner + # Extract hardware type (e.g., 'b200' from 'b200-nv_0') + hw = runner_part.split('-')[0].upper() # Convert to uppercase for consistency +else: + hw = "UNKNOWN" + data = { - 'hw': hw, + 'hw': hw, # Hardware (B200, H200, etc.) + 'framework': framework, # Framework (TRT-LLM, vLLM, SGLang, etc.) 'tp': tp_size, 'conc': int(bmk_result['max_concurrency']), 'model': bmk_result['model_id'], From 9946fb83526f734b1e89a8b571e264bf1c899331 Mon Sep 17 00:00:00 2001 From: Kedar Potdar Date: Fri, 29 Aug 2025 00:17:48 -0700 Subject: [PATCH 28/28] limit concurrency for now --- .github/workflows/benchmark-tmpl.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmark-tmpl.yml b/.github/workflows/benchmark-tmpl.yml index 4bb8213c3..c1e89d80a 100644 --- a/.github/workflows/benchmark-tmpl.yml +++ b/.github/workflows/benchmark-tmpl.yml @@ -54,7 +54,7 @@ jobs: fail-fast: false matrix: tp: ${{ fromJson(inputs.tp-list) }} - conc: [4, 8, 16, 32, 64] + conc: [4] name: '${{ inputs.runner }} (tp${{ matrix.tp }} , conc${{ matrix.conc }})' env: