From 2699f976c552e1f52c86f17fdf53ebe5f7355328 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 21 Nov 2025 21:10:08 +0000 Subject: [PATCH 1/4] Initial plan From f560513749f345ab02fc9ec28e2716a5e722552a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 21 Nov 2025 21:21:09 +0000 Subject: [PATCH 2/4] Update required env vars documentation in all benchmark scripts Co-authored-by: cquil11 <60715037+cquil11@users.noreply.github.com> --- benchmarks/dsr1_fp4_b200_docker.sh | 12 ++++++++++++ benchmarks/dsr1_fp4_b200_trt_slurm.sh | 13 +++++++------ benchmarks/dsr1_fp4_mi355x_docker.sh | 14 ++++++++------ benchmarks/dsr1_fp4_mi355x_slurm.sh | 13 ++++++------- benchmarks/dsr1_fp8_b200_docker.sh | 11 +++++++---- benchmarks/dsr1_fp8_b200_trt_slurm.sh | 13 +++++++------ benchmarks/dsr1_fp8_h200_slurm.sh | 11 ++++------- benchmarks/dsr1_fp8_h200_trt_slurm.sh | 13 +++++++------ benchmarks/dsr1_fp8_mi300x_docker.sh | 10 ++++++---- benchmarks/dsr1_fp8_mi300x_slurm.sh | 12 +++++------- benchmarks/dsr1_fp8_mi325x_docker.sh | 9 +++++---- benchmarks/dsr1_fp8_mi325x_slurm.sh | 10 ++++++++++ benchmarks/dsr1_fp8_mi355x_docker.sh | 9 +++++---- benchmarks/dsr1_fp8_mi355x_slurm.sh | 12 +++++------- benchmarks/gptoss_fp4_b200_docker.sh | 12 +++++------- benchmarks/gptoss_fp4_b200_trt_slurm.sh | 13 +++++++------ benchmarks/gptoss_fp4_h100_docker.sh | 8 +++----- benchmarks/gptoss_fp4_h100_slurm.sh | 10 ++++------ benchmarks/gptoss_fp4_h200_slurm.sh | 10 ++++------ benchmarks/gptoss_fp4_h200_trt_slurm.sh | 11 ++++------- benchmarks/gptoss_fp4_mi300x_docker.sh | 9 ++++++--- benchmarks/gptoss_fp4_mi300x_slurm.sh | 11 +++++------ benchmarks/gptoss_fp4_mi325x_docker.sh | 9 ++++++--- benchmarks/gptoss_fp4_mi325x_slurm.sh | 11 +++++------ benchmarks/gptoss_fp4_mi355x_docker.sh | 7 ++++--- benchmarks/gptoss_fp4_mi355x_slurm.sh | 11 +++++------ 26 files changed, 152 insertions(+), 132 deletions(-) diff --git a/benchmarks/dsr1_fp4_b200_docker.sh b/benchmarks/dsr1_fp4_b200_docker.sh index a520871fa..4ff123a32 100644 --- a/benchmarks/dsr1_fp4_b200_docker.sh +++ b/benchmarks/dsr1_fp4_b200_docker.sh @@ -1,5 +1,17 @@ #!/usr/bin/env bash +# === Required Env Vars === +# MODEL +# PORT +# TP +# CONC +# ISL +# OSL +# RANDOM_RANGE_RATIO +# RESULT_FILENAME +# EP_SIZE +# NUM_PROMPTS + nvidia-smi # To improve CI stability, we patch this helper function to prevent a race condition that diff --git a/benchmarks/dsr1_fp4_b200_trt_slurm.sh b/benchmarks/dsr1_fp4_b200_trt_slurm.sh index b4227e428..80262db66 100644 --- a/benchmarks/dsr1_fp4_b200_trt_slurm.sh +++ b/benchmarks/dsr1_fp4_b200_trt_slurm.sh @@ -1,20 +1,21 @@ #!/usr/bin/env bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE -# IMAGE +# === Required Env Vars === # MODEL +# PORT +# TP +# CONC # ISL # OSL # MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME # PORT_OFFSET # DP_ATTENTION # EP_SIZE +# EXTRA_CONFIG_FILE +# MAX_NUM_TOKENS +# MOE_BACKEND echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" diff --git a/benchmarks/dsr1_fp4_mi355x_docker.sh b/benchmarks/dsr1_fp4_mi355x_docker.sh index c20a9f63f..c32763567 100644 --- a/benchmarks/dsr1_fp4_mi355x_docker.sh +++ b/benchmarks/dsr1_fp4_mi355x_docker.sh @@ -1,14 +1,16 @@ #!/usr/bin/env bash -# ========= Required Env Vars ========= -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL -# MAX_MODEL_LEN -# RANDOM_RANGE_RATIO +# PORT # TP # CONC -# PORT +# ISL +# OSL +# RANDOM_RANGE_RATIO +# RESULT_FILENAME +# NUM_PROMPTS +# PREFILL_SIZE export SGLANG_USE_AITER=1 PREFILL_SIZE=196608 diff --git a/benchmarks/dsr1_fp4_mi355x_slurm.sh b/benchmarks/dsr1_fp4_mi355x_slurm.sh index f4d7f1d39..d283d29c9 100644 --- a/benchmarks/dsr1_fp4_mi355x_slurm.sh +++ b/benchmarks/dsr1_fp4_mi355x_slurm.sh @@ -1,17 +1,16 @@ #!/usr/bin/env bash -# ========= Required Env Vars ========= -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL +# PORT +# TP +# CONC # ISL # OSL -# MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC -# PORT # RESULT_FILENAME +# PORT_OFFSET +# PREFILL_SIZE export SGLANG_USE_AITER=1 SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log) diff --git a/benchmarks/dsr1_fp8_b200_docker.sh b/benchmarks/dsr1_fp8_b200_docker.sh index ffa7644bd..4d8a9ff18 100644 --- a/benchmarks/dsr1_fp8_b200_docker.sh +++ b/benchmarks/dsr1_fp8_b200_docker.sh @@ -1,13 +1,16 @@ #!/usr/bin/env bash -# ========= Required Env Vars ========= -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL # PORT # TP # CONC -# MAX_MODEL_LEN +# ISL +# OSL +# RANDOM_RANGE_RATIO +# RESULT_FILENAME +# EP_SIZE +# NUM_PROMPTS nvidia-smi diff --git a/benchmarks/dsr1_fp8_b200_trt_slurm.sh b/benchmarks/dsr1_fp8_b200_trt_slurm.sh index a9a1a04ff..488cc15c6 100644 --- a/benchmarks/dsr1_fp8_b200_trt_slurm.sh +++ b/benchmarks/dsr1_fp8_b200_trt_slurm.sh @@ -1,20 +1,21 @@ #!/usr/bin/env bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE -# IMAGE +# === Required Env Vars === # MODEL +# PORT +# TP +# CONC # ISL # OSL # MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME # PORT_OFFSET # DP_ATTENTION # EP_SIZE +# EXTRA_CONFIG_FILE +# MAX_NUM_TOKENS +# MOE_BACKEND echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" diff --git a/benchmarks/dsr1_fp8_h200_slurm.sh b/benchmarks/dsr1_fp8_h200_slurm.sh index 06345ecb2..796c0c857 100644 --- a/benchmarks/dsr1_fp8_h200_slurm.sh +++ b/benchmarks/dsr1_fp8_h200_slurm.sh @@ -1,16 +1,13 @@ #!/usr/bin/env bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE -# IMAGE +# === Required Env Vars === # MODEL +# PORT +# TP +# CONC # ISL # OSL -# MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME # PORT_OFFSET diff --git a/benchmarks/dsr1_fp8_h200_trt_slurm.sh b/benchmarks/dsr1_fp8_h200_trt_slurm.sh index 4ece6f7bc..e530050e5 100644 --- a/benchmarks/dsr1_fp8_h200_trt_slurm.sh +++ b/benchmarks/dsr1_fp8_h200_trt_slurm.sh @@ -1,20 +1,21 @@ #!/usr/bin/env bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE -# IMAGE +# === Required Env Vars === # MODEL +# PORT +# TP +# CONC # ISL # OSL # MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME # PORT_OFFSET # DP_ATTENTION # EP_SIZE +# EXTRA_CONFIG_FILE +# MAX_NUM_TOKENS +# MOE_BACKEND echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" diff --git a/benchmarks/dsr1_fp8_mi300x_docker.sh b/benchmarks/dsr1_fp8_mi300x_docker.sh index 8c269dd83..09fdfeb6b 100644 --- a/benchmarks/dsr1_fp8_mi300x_docker.sh +++ b/benchmarks/dsr1_fp8_mi300x_docker.sh @@ -1,13 +1,15 @@ #!/usr/bin/env bash -# ========= Required Env Vars ========= -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL # PORT # TP # CONC -# MAX_MODEL_LEN +# ISL +# OSL +# RANDOM_RANGE_RATIO +# RESULT_FILENAME +# NF # Reference # https://rocm.docs.amd.com/en/docs-7.0-rc1/preview/benchmark-docker/inference-sglang-deepseek-r1-fp8.html#run-the-inference-benchmark diff --git a/benchmarks/dsr1_fp8_mi300x_slurm.sh b/benchmarks/dsr1_fp8_mi300x_slurm.sh index 5fad7a587..598df7b38 100644 --- a/benchmarks/dsr1_fp8_mi300x_slurm.sh +++ b/benchmarks/dsr1_fp8_mi300x_slurm.sh @@ -1,17 +1,15 @@ #!/usr/bin/bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE -# IMAGE +# === Required Env Vars === # MODEL +# PORT +# TP +# CONC # ISL # OSL -# MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME +# NF echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" diff --git a/benchmarks/dsr1_fp8_mi325x_docker.sh b/benchmarks/dsr1_fp8_mi325x_docker.sh index 72c89571d..a8cdf566a 100644 --- a/benchmarks/dsr1_fp8_mi325x_docker.sh +++ b/benchmarks/dsr1_fp8_mi325x_docker.sh @@ -1,13 +1,14 @@ #!/usr/bin/env bash -# ========= Required Env Vars ========= -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL # PORT # TP # CONC -# MAX_MODEL_LEN +# ISL +# OSL +# RANDOM_RANGE_RATIO +# RESULT_FILENAME # Reference # https://rocm.docs.amd.com/en/docs-7.0-docker/benchmark-docker/inference-sglang-deepseek-r1-fp8.html diff --git a/benchmarks/dsr1_fp8_mi325x_slurm.sh b/benchmarks/dsr1_fp8_mi325x_slurm.sh index 67e4cc394..d0b9d3039 100644 --- a/benchmarks/dsr1_fp8_mi325x_slurm.sh +++ b/benchmarks/dsr1_fp8_mi325x_slurm.sh @@ -1,5 +1,15 @@ #!/usr/bin/bash +# === Required Env Vars === +# MODEL +# PORT +# TP +# CONC +# ISL +# OSL +# RANDOM_RANGE_RATIO +# RESULT_FILENAME + echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log) diff --git a/benchmarks/dsr1_fp8_mi355x_docker.sh b/benchmarks/dsr1_fp8_mi355x_docker.sh index 128810b42..8c5038cee 100644 --- a/benchmarks/dsr1_fp8_mi355x_docker.sh +++ b/benchmarks/dsr1_fp8_mi355x_docker.sh @@ -1,13 +1,14 @@ #!/usr/bin/env bash -# ========= Required Env Vars ========= -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL # PORT # TP # CONC -# MAX_MODEL_LEN +# ISL +# OSL +# RANDOM_RANGE_RATIO +# RESULT_FILENAME # Reference # https://rocm.docs.amd.com/en/docs-7.0-docker/benchmark-docker/inference-sglang-deepseek-r1-fp8.html diff --git a/benchmarks/dsr1_fp8_mi355x_slurm.sh b/benchmarks/dsr1_fp8_mi355x_slurm.sh index fd6fe49fb..2645b30ce 100644 --- a/benchmarks/dsr1_fp8_mi355x_slurm.sh +++ b/benchmarks/dsr1_fp8_mi355x_slurm.sh @@ -1,17 +1,15 @@ #!/usr/bin/env bash -# ========= Required Env Vars ========= -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL +# PORT +# TP +# CONC # ISL # OSL -# MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC -# PORT # RESULT_FILENAME +# PORT_OFFSET export HF_MODULES_CACHE="/tmp/hf_modules_cache/" export SGLANG_USE_AITER=1 diff --git a/benchmarks/gptoss_fp4_b200_docker.sh b/benchmarks/gptoss_fp4_b200_docker.sh index 1736701c4..4fbf4f50c 100644 --- a/benchmarks/gptoss_fp4_b200_docker.sh +++ b/benchmarks/gptoss_fp4_b200_docker.sh @@ -1,18 +1,16 @@ #!/usr/bin/env bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE -# IMAGE +# === Required Env Vars === # MODEL +# PORT +# TP +# CONC # ISL # OSL # MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME -# PORT_OFFSET +# NUM_PROMPTS nvidia-smi diff --git a/benchmarks/gptoss_fp4_b200_trt_slurm.sh b/benchmarks/gptoss_fp4_b200_trt_slurm.sh index 7542cd1a0..86a9627cc 100644 --- a/benchmarks/gptoss_fp4_b200_trt_slurm.sh +++ b/benchmarks/gptoss_fp4_b200_trt_slurm.sh @@ -1,20 +1,21 @@ #!/usr/bin/env bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE -# IMAGE +# === Required Env Vars === # MODEL +# PORT +# TP +# CONC # ISL # OSL # MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME # PORT_OFFSET # DP_ATTENTION # EP_SIZE +# EXTRA_CONFIG_FILE +# MAX_NUM_TOKENS +# MOE_BACKEND # GPTOSS TRTLLM Deployment Guide: # https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/deployment-guide/quick-start-recipe-for-gpt-oss-on-trtllm.md diff --git a/benchmarks/gptoss_fp4_h100_docker.sh b/benchmarks/gptoss_fp4_h100_docker.sh index f38ea3293..48b548e37 100644 --- a/benchmarks/gptoss_fp4_h100_docker.sh +++ b/benchmarks/gptoss_fp4_h100_docker.sh @@ -1,15 +1,13 @@ #!/usr/bin/env bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL -# MAX_MODEL_LEN -# RANDOM_RANGE_RATIO +# PORT # TP # CONC # ISL # OSL +# RANDOM_RANGE_RATIO # RESULT_FILENAME diff --git a/benchmarks/gptoss_fp4_h100_slurm.sh b/benchmarks/gptoss_fp4_h100_slurm.sh index 843219b95..2af5bcb69 100644 --- a/benchmarks/gptoss_fp4_h100_slurm.sh +++ b/benchmarks/gptoss_fp4_h100_slurm.sh @@ -1,15 +1,13 @@ #!/usr/bin/env bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL +# PORT +# TP +# CONC # ISL # OSL -# MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME # PORT_OFFSET diff --git a/benchmarks/gptoss_fp4_h200_slurm.sh b/benchmarks/gptoss_fp4_h200_slurm.sh index dc29baf8d..7fee5fe07 100644 --- a/benchmarks/gptoss_fp4_h200_slurm.sh +++ b/benchmarks/gptoss_fp4_h200_slurm.sh @@ -1,16 +1,14 @@ #!/usr/bin/env bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE -# IMAGE +# === Required Env Vars === # MODEL +# PORT +# TP +# CONC # ISL # OSL # MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME # PORT_OFFSET diff --git a/benchmarks/gptoss_fp4_h200_trt_slurm.sh b/benchmarks/gptoss_fp4_h200_trt_slurm.sh index 21d6ae02c..9f2dfc326 100644 --- a/benchmarks/gptoss_fp4_h200_trt_slurm.sh +++ b/benchmarks/gptoss_fp4_h200_trt_slurm.sh @@ -1,16 +1,13 @@ #!/usr/bin/env bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE -# IMAGE +# === Required Env Vars === # MODEL +# PORT +# TP +# CONC # ISL # OSL -# MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME # PORT_OFFSET # DP_ATTENTION diff --git a/benchmarks/gptoss_fp4_mi300x_docker.sh b/benchmarks/gptoss_fp4_mi300x_docker.sh index 7d1f98226..f812dbab7 100644 --- a/benchmarks/gptoss_fp4_mi300x_docker.sh +++ b/benchmarks/gptoss_fp4_mi300x_docker.sh @@ -1,13 +1,16 @@ #!/usr/bin/env bash -# ========= Required Env Vars ========= -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL # PORT # TP # CONC +# ISL +# OSL # MAX_MODEL_LEN +# RANDOM_RANGE_RATIO +# RESULT_FILENAME +# NF # If the machine runs a MEC FW older than 177, RCCL # cannot reclaim some memory. diff --git a/benchmarks/gptoss_fp4_mi300x_slurm.sh b/benchmarks/gptoss_fp4_mi300x_slurm.sh index a9e164cc2..7c8eaca4e 100644 --- a/benchmarks/gptoss_fp4_mi300x_slurm.sh +++ b/benchmarks/gptoss_fp4_mi300x_slurm.sh @@ -1,17 +1,16 @@ #!/usr/bin/bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE -# IMAGE +# === Required Env Vars === # MODEL +# PORT +# TP +# CONC # ISL # OSL # MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME +# NF echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" diff --git a/benchmarks/gptoss_fp4_mi325x_docker.sh b/benchmarks/gptoss_fp4_mi325x_docker.sh index 46462ad6d..03c554ecb 100644 --- a/benchmarks/gptoss_fp4_mi325x_docker.sh +++ b/benchmarks/gptoss_fp4_mi325x_docker.sh @@ -1,13 +1,16 @@ #!/usr/bin/env bash -# ========= Required Env Vars ========= -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL # PORT # TP # CONC +# ISL +# OSL # MAX_MODEL_LEN +# RANDOM_RANGE_RATIO +# RESULT_FILENAME +# NF # If the machine runs a MEC FW older than 177, RCCL # cannot reclaim some memory. diff --git a/benchmarks/gptoss_fp4_mi325x_slurm.sh b/benchmarks/gptoss_fp4_mi325x_slurm.sh index f15e6261c..1f6be36b9 100644 --- a/benchmarks/gptoss_fp4_mi325x_slurm.sh +++ b/benchmarks/gptoss_fp4_mi325x_slurm.sh @@ -1,17 +1,16 @@ #!/usr/bin/bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE -# IMAGE +# === Required Env Vars === # MODEL +# PORT +# TP +# CONC # ISL # OSL # MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME +# NF echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" diff --git a/benchmarks/gptoss_fp4_mi355x_docker.sh b/benchmarks/gptoss_fp4_mi355x_docker.sh index 0e54245d4..68fc59f8c 100644 --- a/benchmarks/gptoss_fp4_mi355x_docker.sh +++ b/benchmarks/gptoss_fp4_mi355x_docker.sh @@ -1,15 +1,16 @@ #!/usr/bin/env bash -# ========= Required Env Vars ========= -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL # PORT # TP # CONC +# ISL +# OSL # MAX_MODEL_LEN # RANDOM_RANGE_RATIO # RESULT_FILENAME +# NUM_PROMPTS cat > config.yaml << EOF compilation-config: '{"compile_sizes":[1,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,256,512,1024,2048,8192] , "cudagraph_capture_sizes":[1,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,136,144,152,160,168,176,184,192,200,208,216,224,232,240,248,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,520,528,536,544,552,560,568,576,584,592,600,608,616,624,632,640,648,656,664,672,680,688,696,704,712,720,728,736,744,752,760,768,776,784,792,800,808,816,824,832,840,848,856,864,872,880,888,896,904,912,920,928,936,944,952,960,968,976,984,992,1000,1008,1016,1024,2048,4096,8192] , "cudagraph_mode": "FULL_AND_PIECEWISE"}' diff --git a/benchmarks/gptoss_fp4_mi355x_slurm.sh b/benchmarks/gptoss_fp4_mi355x_slurm.sh index a2adf2952..5e40eba1d 100644 --- a/benchmarks/gptoss_fp4_mi355x_slurm.sh +++ b/benchmarks/gptoss_fp4_mi355x_slurm.sh @@ -1,17 +1,16 @@ #!/usr/bin/env bash -# ========= Required Env Vars ========= -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL +# PORT +# TP +# CONC # ISL # OSL # MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC -# PORT # RESULT_FILENAME +# PORT_OFFSET SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log) From bce245680b44474e76c48a99c4ddb79ab3b02d18 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 21 Nov 2025 21:26:39 +0000 Subject: [PATCH 3/4] Fix required env vars - remove NF, PREFILL_SIZE, and correct PORT/PORT_OFFSET Co-authored-by: cquil11 <60715037+cquil11@users.noreply.github.com> --- benchmarks/dsr1_fp4_b200_trt_slurm.sh | 1 - benchmarks/dsr1_fp4_mi355x_docker.sh | 1 - benchmarks/dsr1_fp4_mi355x_slurm.sh | 2 -- benchmarks/dsr1_fp8_b200_trt_slurm.sh | 1 - benchmarks/dsr1_fp8_h200_slurm.sh | 1 - benchmarks/dsr1_fp8_h200_trt_slurm.sh | 1 - benchmarks/dsr1_fp8_mi300x_docker.sh | 1 - benchmarks/dsr1_fp8_mi300x_slurm.sh | 2 -- benchmarks/dsr1_fp8_mi325x_slurm.sh | 1 - benchmarks/dsr1_fp8_mi355x_slurm.sh | 1 - benchmarks/gptoss_fp4_b200_trt_slurm.sh | 1 - benchmarks/gptoss_fp4_h100_slurm.sh | 1 - benchmarks/gptoss_fp4_h200_slurm.sh | 2 -- benchmarks/gptoss_fp4_h200_trt_slurm.sh | 1 - benchmarks/gptoss_fp4_mi300x_docker.sh | 1 - benchmarks/gptoss_fp4_mi300x_slurm.sh | 2 -- benchmarks/gptoss_fp4_mi325x_docker.sh | 1 - benchmarks/gptoss_fp4_mi325x_slurm.sh | 2 -- benchmarks/gptoss_fp4_mi355x_slurm.sh | 2 -- 19 files changed, 25 deletions(-) diff --git a/benchmarks/dsr1_fp4_b200_trt_slurm.sh b/benchmarks/dsr1_fp4_b200_trt_slurm.sh index 80262db66..dd9ed3cc6 100644 --- a/benchmarks/dsr1_fp4_b200_trt_slurm.sh +++ b/benchmarks/dsr1_fp4_b200_trt_slurm.sh @@ -2,7 +2,6 @@ # === Required Env Vars === # MODEL -# PORT # TP # CONC # ISL diff --git a/benchmarks/dsr1_fp4_mi355x_docker.sh b/benchmarks/dsr1_fp4_mi355x_docker.sh index c32763567..ca1255802 100644 --- a/benchmarks/dsr1_fp4_mi355x_docker.sh +++ b/benchmarks/dsr1_fp4_mi355x_docker.sh @@ -10,7 +10,6 @@ # RANDOM_RANGE_RATIO # RESULT_FILENAME # NUM_PROMPTS -# PREFILL_SIZE export SGLANG_USE_AITER=1 PREFILL_SIZE=196608 diff --git a/benchmarks/dsr1_fp4_mi355x_slurm.sh b/benchmarks/dsr1_fp4_mi355x_slurm.sh index d283d29c9..0983b7ddf 100644 --- a/benchmarks/dsr1_fp4_mi355x_slurm.sh +++ b/benchmarks/dsr1_fp4_mi355x_slurm.sh @@ -9,8 +9,6 @@ # OSL # RANDOM_RANGE_RATIO # RESULT_FILENAME -# PORT_OFFSET -# PREFILL_SIZE export SGLANG_USE_AITER=1 SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log) diff --git a/benchmarks/dsr1_fp8_b200_trt_slurm.sh b/benchmarks/dsr1_fp8_b200_trt_slurm.sh index 488cc15c6..cf62990d6 100644 --- a/benchmarks/dsr1_fp8_b200_trt_slurm.sh +++ b/benchmarks/dsr1_fp8_b200_trt_slurm.sh @@ -2,7 +2,6 @@ # === Required Env Vars === # MODEL -# PORT # TP # CONC # ISL diff --git a/benchmarks/dsr1_fp8_h200_slurm.sh b/benchmarks/dsr1_fp8_h200_slurm.sh index 796c0c857..f84d741d6 100644 --- a/benchmarks/dsr1_fp8_h200_slurm.sh +++ b/benchmarks/dsr1_fp8_h200_slurm.sh @@ -2,7 +2,6 @@ # === Required Env Vars === # MODEL -# PORT # TP # CONC # ISL diff --git a/benchmarks/dsr1_fp8_h200_trt_slurm.sh b/benchmarks/dsr1_fp8_h200_trt_slurm.sh index e530050e5..bdd25b60f 100644 --- a/benchmarks/dsr1_fp8_h200_trt_slurm.sh +++ b/benchmarks/dsr1_fp8_h200_trt_slurm.sh @@ -2,7 +2,6 @@ # === Required Env Vars === # MODEL -# PORT # TP # CONC # ISL diff --git a/benchmarks/dsr1_fp8_mi300x_docker.sh b/benchmarks/dsr1_fp8_mi300x_docker.sh index 09fdfeb6b..e92765ebb 100644 --- a/benchmarks/dsr1_fp8_mi300x_docker.sh +++ b/benchmarks/dsr1_fp8_mi300x_docker.sh @@ -9,7 +9,6 @@ # OSL # RANDOM_RANGE_RATIO # RESULT_FILENAME -# NF # Reference # https://rocm.docs.amd.com/en/docs-7.0-rc1/preview/benchmark-docker/inference-sglang-deepseek-r1-fp8.html#run-the-inference-benchmark diff --git a/benchmarks/dsr1_fp8_mi300x_slurm.sh b/benchmarks/dsr1_fp8_mi300x_slurm.sh index 598df7b38..662f4bdfb 100644 --- a/benchmarks/dsr1_fp8_mi300x_slurm.sh +++ b/benchmarks/dsr1_fp8_mi300x_slurm.sh @@ -2,14 +2,12 @@ # === Required Env Vars === # MODEL -# PORT # TP # CONC # ISL # OSL # RANDOM_RANGE_RATIO # RESULT_FILENAME -# NF echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" diff --git a/benchmarks/dsr1_fp8_mi325x_slurm.sh b/benchmarks/dsr1_fp8_mi325x_slurm.sh index d0b9d3039..fb5e07df9 100644 --- a/benchmarks/dsr1_fp8_mi325x_slurm.sh +++ b/benchmarks/dsr1_fp8_mi325x_slurm.sh @@ -2,7 +2,6 @@ # === Required Env Vars === # MODEL -# PORT # TP # CONC # ISL diff --git a/benchmarks/dsr1_fp8_mi355x_slurm.sh b/benchmarks/dsr1_fp8_mi355x_slurm.sh index 2645b30ce..921f08a4c 100644 --- a/benchmarks/dsr1_fp8_mi355x_slurm.sh +++ b/benchmarks/dsr1_fp8_mi355x_slurm.sh @@ -9,7 +9,6 @@ # OSL # RANDOM_RANGE_RATIO # RESULT_FILENAME -# PORT_OFFSET export HF_MODULES_CACHE="/tmp/hf_modules_cache/" export SGLANG_USE_AITER=1 diff --git a/benchmarks/gptoss_fp4_b200_trt_slurm.sh b/benchmarks/gptoss_fp4_b200_trt_slurm.sh index 86a9627cc..a60f6ed24 100644 --- a/benchmarks/gptoss_fp4_b200_trt_slurm.sh +++ b/benchmarks/gptoss_fp4_b200_trt_slurm.sh @@ -2,7 +2,6 @@ # === Required Env Vars === # MODEL -# PORT # TP # CONC # ISL diff --git a/benchmarks/gptoss_fp4_h100_slurm.sh b/benchmarks/gptoss_fp4_h100_slurm.sh index 2af5bcb69..a004f8892 100644 --- a/benchmarks/gptoss_fp4_h100_slurm.sh +++ b/benchmarks/gptoss_fp4_h100_slurm.sh @@ -9,7 +9,6 @@ # OSL # RANDOM_RANGE_RATIO # RESULT_FILENAME -# PORT_OFFSET echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" diff --git a/benchmarks/gptoss_fp4_h200_slurm.sh b/benchmarks/gptoss_fp4_h200_slurm.sh index 7fee5fe07..970b7ad35 100644 --- a/benchmarks/gptoss_fp4_h200_slurm.sh +++ b/benchmarks/gptoss_fp4_h200_slurm.sh @@ -2,12 +2,10 @@ # === Required Env Vars === # MODEL -# PORT # TP # CONC # ISL # OSL -# MAX_MODEL_LEN # RANDOM_RANGE_RATIO # RESULT_FILENAME # PORT_OFFSET diff --git a/benchmarks/gptoss_fp4_h200_trt_slurm.sh b/benchmarks/gptoss_fp4_h200_trt_slurm.sh index 9f2dfc326..12a6af5b7 100644 --- a/benchmarks/gptoss_fp4_h200_trt_slurm.sh +++ b/benchmarks/gptoss_fp4_h200_trt_slurm.sh @@ -2,7 +2,6 @@ # === Required Env Vars === # MODEL -# PORT # TP # CONC # ISL diff --git a/benchmarks/gptoss_fp4_mi300x_docker.sh b/benchmarks/gptoss_fp4_mi300x_docker.sh index f812dbab7..50d86b52a 100644 --- a/benchmarks/gptoss_fp4_mi300x_docker.sh +++ b/benchmarks/gptoss_fp4_mi300x_docker.sh @@ -10,7 +10,6 @@ # MAX_MODEL_LEN # RANDOM_RANGE_RATIO # RESULT_FILENAME -# NF # If the machine runs a MEC FW older than 177, RCCL # cannot reclaim some memory. diff --git a/benchmarks/gptoss_fp4_mi300x_slurm.sh b/benchmarks/gptoss_fp4_mi300x_slurm.sh index 7c8eaca4e..a86e66b3d 100644 --- a/benchmarks/gptoss_fp4_mi300x_slurm.sh +++ b/benchmarks/gptoss_fp4_mi300x_slurm.sh @@ -2,7 +2,6 @@ # === Required Env Vars === # MODEL -# PORT # TP # CONC # ISL @@ -10,7 +9,6 @@ # MAX_MODEL_LEN # RANDOM_RANGE_RATIO # RESULT_FILENAME -# NF echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" diff --git a/benchmarks/gptoss_fp4_mi325x_docker.sh b/benchmarks/gptoss_fp4_mi325x_docker.sh index 03c554ecb..2117f787e 100644 --- a/benchmarks/gptoss_fp4_mi325x_docker.sh +++ b/benchmarks/gptoss_fp4_mi325x_docker.sh @@ -10,7 +10,6 @@ # MAX_MODEL_LEN # RANDOM_RANGE_RATIO # RESULT_FILENAME -# NF # If the machine runs a MEC FW older than 177, RCCL # cannot reclaim some memory. diff --git a/benchmarks/gptoss_fp4_mi325x_slurm.sh b/benchmarks/gptoss_fp4_mi325x_slurm.sh index 1f6be36b9..56c7651ed 100644 --- a/benchmarks/gptoss_fp4_mi325x_slurm.sh +++ b/benchmarks/gptoss_fp4_mi325x_slurm.sh @@ -2,7 +2,6 @@ # === Required Env Vars === # MODEL -# PORT # TP # CONC # ISL @@ -10,7 +9,6 @@ # MAX_MODEL_LEN # RANDOM_RANGE_RATIO # RESULT_FILENAME -# NF echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" diff --git a/benchmarks/gptoss_fp4_mi355x_slurm.sh b/benchmarks/gptoss_fp4_mi355x_slurm.sh index 5e40eba1d..342b7dde3 100644 --- a/benchmarks/gptoss_fp4_mi355x_slurm.sh +++ b/benchmarks/gptoss_fp4_mi355x_slurm.sh @@ -7,10 +7,8 @@ # CONC # ISL # OSL -# MAX_MODEL_LEN # RANDOM_RANGE_RATIO # RESULT_FILENAME -# PORT_OFFSET SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log) From 993ebe73134cd26761f8c7631b5864a8c6606978 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 21 Nov 2025 21:31:58 +0000 Subject: [PATCH 4/4] Remove internally-calculated vars from required env vars (EXTRA_CONFIG_FILE, MAX_NUM_TOKENS, MOE_BACKEND) Co-authored-by: cquil11 <60715037+cquil11@users.noreply.github.com> --- benchmarks/dsr1_fp4_b200_trt_slurm.sh | 3 --- benchmarks/dsr1_fp8_b200_trt_slurm.sh | 3 --- benchmarks/dsr1_fp8_h200_trt_slurm.sh | 3 --- benchmarks/gptoss_fp4_b200_trt_slurm.sh | 3 --- 4 files changed, 12 deletions(-) diff --git a/benchmarks/dsr1_fp4_b200_trt_slurm.sh b/benchmarks/dsr1_fp4_b200_trt_slurm.sh index dd9ed3cc6..aa2be7648 100644 --- a/benchmarks/dsr1_fp4_b200_trt_slurm.sh +++ b/benchmarks/dsr1_fp4_b200_trt_slurm.sh @@ -12,9 +12,6 @@ # PORT_OFFSET # DP_ATTENTION # EP_SIZE -# EXTRA_CONFIG_FILE -# MAX_NUM_TOKENS -# MOE_BACKEND echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" diff --git a/benchmarks/dsr1_fp8_b200_trt_slurm.sh b/benchmarks/dsr1_fp8_b200_trt_slurm.sh index cf62990d6..58d24a7ed 100644 --- a/benchmarks/dsr1_fp8_b200_trt_slurm.sh +++ b/benchmarks/dsr1_fp8_b200_trt_slurm.sh @@ -12,9 +12,6 @@ # PORT_OFFSET # DP_ATTENTION # EP_SIZE -# EXTRA_CONFIG_FILE -# MAX_NUM_TOKENS -# MOE_BACKEND echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" diff --git a/benchmarks/dsr1_fp8_h200_trt_slurm.sh b/benchmarks/dsr1_fp8_h200_trt_slurm.sh index bdd25b60f..ac6bc167c 100644 --- a/benchmarks/dsr1_fp8_h200_trt_slurm.sh +++ b/benchmarks/dsr1_fp8_h200_trt_slurm.sh @@ -12,9 +12,6 @@ # PORT_OFFSET # DP_ATTENTION # EP_SIZE -# EXTRA_CONFIG_FILE -# MAX_NUM_TOKENS -# MOE_BACKEND echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" diff --git a/benchmarks/gptoss_fp4_b200_trt_slurm.sh b/benchmarks/gptoss_fp4_b200_trt_slurm.sh index a60f6ed24..44e9dbf4c 100644 --- a/benchmarks/gptoss_fp4_b200_trt_slurm.sh +++ b/benchmarks/gptoss_fp4_b200_trt_slurm.sh @@ -12,9 +12,6 @@ # PORT_OFFSET # DP_ATTENTION # EP_SIZE -# EXTRA_CONFIG_FILE -# MAX_NUM_TOKENS -# MOE_BACKEND # GPTOSS TRTLLM Deployment Guide: # https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/deployment-guide/quick-start-recipe-for-gpt-oss-on-trtllm.md