diff --git a/benchmarks/dsr1_fp4_b200_docker.sh b/benchmarks/dsr1_fp4_b200_docker.sh index a520871fa..4ff123a32 100644 --- a/benchmarks/dsr1_fp4_b200_docker.sh +++ b/benchmarks/dsr1_fp4_b200_docker.sh @@ -1,5 +1,17 @@ #!/usr/bin/env bash +# === Required Env Vars === +# MODEL +# PORT +# TP +# CONC +# ISL +# OSL +# RANDOM_RANGE_RATIO +# RESULT_FILENAME +# EP_SIZE +# NUM_PROMPTS + nvidia-smi # To improve CI stability, we patch this helper function to prevent a race condition that diff --git a/benchmarks/dsr1_fp4_b200_trt_slurm.sh b/benchmarks/dsr1_fp4_b200_trt_slurm.sh index b4227e428..aa2be7648 100644 --- a/benchmarks/dsr1_fp4_b200_trt_slurm.sh +++ b/benchmarks/dsr1_fp4_b200_trt_slurm.sh @@ -1,16 +1,13 @@ #!/usr/bin/env bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE -# IMAGE +# === Required Env Vars === # MODEL +# TP +# CONC # ISL # OSL # MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME # PORT_OFFSET # DP_ATTENTION diff --git a/benchmarks/dsr1_fp4_mi355x_docker.sh b/benchmarks/dsr1_fp4_mi355x_docker.sh index c20a9f63f..ca1255802 100644 --- a/benchmarks/dsr1_fp4_mi355x_docker.sh +++ b/benchmarks/dsr1_fp4_mi355x_docker.sh @@ -1,14 +1,15 @@ #!/usr/bin/env bash -# ========= Required Env Vars ========= -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL -# MAX_MODEL_LEN -# RANDOM_RANGE_RATIO +# PORT # TP # CONC -# PORT +# ISL +# OSL +# RANDOM_RANGE_RATIO +# RESULT_FILENAME +# NUM_PROMPTS export SGLANG_USE_AITER=1 PREFILL_SIZE=196608 diff --git a/benchmarks/dsr1_fp4_mi355x_slurm.sh b/benchmarks/dsr1_fp4_mi355x_slurm.sh index f4d7f1d39..0983b7ddf 100644 --- a/benchmarks/dsr1_fp4_mi355x_slurm.sh +++ b/benchmarks/dsr1_fp4_mi355x_slurm.sh @@ -1,16 +1,13 @@ #!/usr/bin/env bash -# ========= Required Env Vars ========= -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL +# PORT +# TP +# CONC # ISL # OSL -# MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC -# PORT # RESULT_FILENAME export SGLANG_USE_AITER=1 SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log) diff --git a/benchmarks/dsr1_fp8_b200_docker.sh b/benchmarks/dsr1_fp8_b200_docker.sh index ffa7644bd..4d8a9ff18 100644 --- a/benchmarks/dsr1_fp8_b200_docker.sh +++ b/benchmarks/dsr1_fp8_b200_docker.sh @@ -1,13 +1,16 @@ #!/usr/bin/env bash -# ========= Required Env Vars ========= -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL # PORT # TP # CONC -# MAX_MODEL_LEN +# ISL +# OSL +# RANDOM_RANGE_RATIO +# RESULT_FILENAME +# EP_SIZE +# NUM_PROMPTS nvidia-smi diff --git a/benchmarks/dsr1_fp8_b200_trt_slurm.sh b/benchmarks/dsr1_fp8_b200_trt_slurm.sh index a9a1a04ff..58d24a7ed 100644 --- a/benchmarks/dsr1_fp8_b200_trt_slurm.sh +++ b/benchmarks/dsr1_fp8_b200_trt_slurm.sh @@ -1,16 +1,13 @@ #!/usr/bin/env bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE -# IMAGE +# === Required Env Vars === # MODEL +# TP +# CONC # ISL # OSL # MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME # PORT_OFFSET # DP_ATTENTION diff --git a/benchmarks/dsr1_fp8_h200_slurm.sh b/benchmarks/dsr1_fp8_h200_slurm.sh index 06345ecb2..f84d741d6 100644 --- a/benchmarks/dsr1_fp8_h200_slurm.sh +++ b/benchmarks/dsr1_fp8_h200_slurm.sh @@ -1,16 +1,12 @@ #!/usr/bin/env bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE -# IMAGE +# === Required Env Vars === # MODEL +# TP +# CONC # ISL # OSL -# MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME # PORT_OFFSET diff --git a/benchmarks/dsr1_fp8_h200_trt_slurm.sh b/benchmarks/dsr1_fp8_h200_trt_slurm.sh index 4ece6f7bc..ac6bc167c 100644 --- a/benchmarks/dsr1_fp8_h200_trt_slurm.sh +++ b/benchmarks/dsr1_fp8_h200_trt_slurm.sh @@ -1,16 +1,13 @@ #!/usr/bin/env bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE -# IMAGE +# === Required Env Vars === # MODEL +# TP +# CONC # ISL # OSL # MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME # PORT_OFFSET # DP_ATTENTION diff --git a/benchmarks/dsr1_fp8_mi300x_docker.sh b/benchmarks/dsr1_fp8_mi300x_docker.sh index 8c269dd83..e92765ebb 100644 --- a/benchmarks/dsr1_fp8_mi300x_docker.sh +++ b/benchmarks/dsr1_fp8_mi300x_docker.sh @@ -1,13 +1,14 @@ #!/usr/bin/env bash -# ========= Required Env Vars ========= -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL # PORT # TP # CONC -# MAX_MODEL_LEN +# ISL +# OSL +# RANDOM_RANGE_RATIO +# RESULT_FILENAME # Reference # https://rocm.docs.amd.com/en/docs-7.0-rc1/preview/benchmark-docker/inference-sglang-deepseek-r1-fp8.html#run-the-inference-benchmark diff --git a/benchmarks/dsr1_fp8_mi300x_slurm.sh b/benchmarks/dsr1_fp8_mi300x_slurm.sh index 5fad7a587..662f4bdfb 100644 --- a/benchmarks/dsr1_fp8_mi300x_slurm.sh +++ b/benchmarks/dsr1_fp8_mi300x_slurm.sh @@ -1,16 +1,12 @@ #!/usr/bin/bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE -# IMAGE +# === Required Env Vars === # MODEL +# TP +# CONC # ISL # OSL -# MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" diff --git a/benchmarks/dsr1_fp8_mi325x_docker.sh b/benchmarks/dsr1_fp8_mi325x_docker.sh index 72c89571d..a8cdf566a 100644 --- a/benchmarks/dsr1_fp8_mi325x_docker.sh +++ b/benchmarks/dsr1_fp8_mi325x_docker.sh @@ -1,13 +1,14 @@ #!/usr/bin/env bash -# ========= Required Env Vars ========= -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL # PORT # TP # CONC -# MAX_MODEL_LEN +# ISL +# OSL +# RANDOM_RANGE_RATIO +# RESULT_FILENAME # Reference # https://rocm.docs.amd.com/en/docs-7.0-docker/benchmark-docker/inference-sglang-deepseek-r1-fp8.html diff --git a/benchmarks/dsr1_fp8_mi325x_slurm.sh b/benchmarks/dsr1_fp8_mi325x_slurm.sh index 67e4cc394..fb5e07df9 100644 --- a/benchmarks/dsr1_fp8_mi325x_slurm.sh +++ b/benchmarks/dsr1_fp8_mi325x_slurm.sh @@ -1,5 +1,14 @@ #!/usr/bin/bash +# === Required Env Vars === +# MODEL +# TP +# CONC +# ISL +# OSL +# RANDOM_RANGE_RATIO +# RESULT_FILENAME + echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log) diff --git a/benchmarks/dsr1_fp8_mi355x_docker.sh b/benchmarks/dsr1_fp8_mi355x_docker.sh index 128810b42..8c5038cee 100644 --- a/benchmarks/dsr1_fp8_mi355x_docker.sh +++ b/benchmarks/dsr1_fp8_mi355x_docker.sh @@ -1,13 +1,14 @@ #!/usr/bin/env bash -# ========= Required Env Vars ========= -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL # PORT # TP # CONC -# MAX_MODEL_LEN +# ISL +# OSL +# RANDOM_RANGE_RATIO +# RESULT_FILENAME # Reference # https://rocm.docs.amd.com/en/docs-7.0-docker/benchmark-docker/inference-sglang-deepseek-r1-fp8.html diff --git a/benchmarks/dsr1_fp8_mi355x_slurm.sh b/benchmarks/dsr1_fp8_mi355x_slurm.sh index fd6fe49fb..921f08a4c 100644 --- a/benchmarks/dsr1_fp8_mi355x_slurm.sh +++ b/benchmarks/dsr1_fp8_mi355x_slurm.sh @@ -1,16 +1,13 @@ #!/usr/bin/env bash -# ========= Required Env Vars ========= -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL +# PORT +# TP +# CONC # ISL # OSL -# MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC -# PORT # RESULT_FILENAME export HF_MODULES_CACHE="/tmp/hf_modules_cache/" diff --git a/benchmarks/gptoss_fp4_b200_docker.sh b/benchmarks/gptoss_fp4_b200_docker.sh index 1736701c4..4fbf4f50c 100644 --- a/benchmarks/gptoss_fp4_b200_docker.sh +++ b/benchmarks/gptoss_fp4_b200_docker.sh @@ -1,18 +1,16 @@ #!/usr/bin/env bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE -# IMAGE +# === Required Env Vars === # MODEL +# PORT +# TP +# CONC # ISL # OSL # MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME -# PORT_OFFSET +# NUM_PROMPTS nvidia-smi diff --git a/benchmarks/gptoss_fp4_b200_trt_slurm.sh b/benchmarks/gptoss_fp4_b200_trt_slurm.sh index 7542cd1a0..44e9dbf4c 100644 --- a/benchmarks/gptoss_fp4_b200_trt_slurm.sh +++ b/benchmarks/gptoss_fp4_b200_trt_slurm.sh @@ -1,16 +1,13 @@ #!/usr/bin/env bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE -# IMAGE +# === Required Env Vars === # MODEL +# TP +# CONC # ISL # OSL # MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME # PORT_OFFSET # DP_ATTENTION diff --git a/benchmarks/gptoss_fp4_h100_docker.sh b/benchmarks/gptoss_fp4_h100_docker.sh index f38ea3293..48b548e37 100644 --- a/benchmarks/gptoss_fp4_h100_docker.sh +++ b/benchmarks/gptoss_fp4_h100_docker.sh @@ -1,15 +1,13 @@ #!/usr/bin/env bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL -# MAX_MODEL_LEN -# RANDOM_RANGE_RATIO +# PORT # TP # CONC # ISL # OSL +# RANDOM_RANGE_RATIO # RESULT_FILENAME diff --git a/benchmarks/gptoss_fp4_h100_slurm.sh b/benchmarks/gptoss_fp4_h100_slurm.sh index 843219b95..a004f8892 100644 --- a/benchmarks/gptoss_fp4_h100_slurm.sh +++ b/benchmarks/gptoss_fp4_h100_slurm.sh @@ -1,17 +1,14 @@ #!/usr/bin/env bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL +# PORT +# TP +# CONC # ISL # OSL -# MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME -# PORT_OFFSET echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" diff --git a/benchmarks/gptoss_fp4_h200_slurm.sh b/benchmarks/gptoss_fp4_h200_slurm.sh index dc29baf8d..970b7ad35 100644 --- a/benchmarks/gptoss_fp4_h200_slurm.sh +++ b/benchmarks/gptoss_fp4_h200_slurm.sh @@ -1,16 +1,12 @@ #!/usr/bin/env bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE -# IMAGE +# === Required Env Vars === # MODEL +# TP +# CONC # ISL # OSL -# MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME # PORT_OFFSET diff --git a/benchmarks/gptoss_fp4_h200_trt_slurm.sh b/benchmarks/gptoss_fp4_h200_trt_slurm.sh index 21d6ae02c..12a6af5b7 100644 --- a/benchmarks/gptoss_fp4_h200_trt_slurm.sh +++ b/benchmarks/gptoss_fp4_h200_trt_slurm.sh @@ -1,16 +1,12 @@ #!/usr/bin/env bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE -# IMAGE +# === Required Env Vars === # MODEL +# TP +# CONC # ISL # OSL -# MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME # PORT_OFFSET # DP_ATTENTION diff --git a/benchmarks/gptoss_fp4_mi300x_docker.sh b/benchmarks/gptoss_fp4_mi300x_docker.sh index 7d1f98226..50d86b52a 100644 --- a/benchmarks/gptoss_fp4_mi300x_docker.sh +++ b/benchmarks/gptoss_fp4_mi300x_docker.sh @@ -1,13 +1,15 @@ #!/usr/bin/env bash -# ========= Required Env Vars ========= -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL # PORT # TP # CONC +# ISL +# OSL # MAX_MODEL_LEN +# RANDOM_RANGE_RATIO +# RESULT_FILENAME # If the machine runs a MEC FW older than 177, RCCL # cannot reclaim some memory. diff --git a/benchmarks/gptoss_fp4_mi300x_slurm.sh b/benchmarks/gptoss_fp4_mi300x_slurm.sh index a9e164cc2..a86e66b3d 100644 --- a/benchmarks/gptoss_fp4_mi300x_slurm.sh +++ b/benchmarks/gptoss_fp4_mi300x_slurm.sh @@ -1,16 +1,13 @@ #!/usr/bin/bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE -# IMAGE +# === Required Env Vars === # MODEL +# TP +# CONC # ISL # OSL # MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" diff --git a/benchmarks/gptoss_fp4_mi325x_docker.sh b/benchmarks/gptoss_fp4_mi325x_docker.sh index 46462ad6d..2117f787e 100644 --- a/benchmarks/gptoss_fp4_mi325x_docker.sh +++ b/benchmarks/gptoss_fp4_mi325x_docker.sh @@ -1,13 +1,15 @@ #!/usr/bin/env bash -# ========= Required Env Vars ========= -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL # PORT # TP # CONC +# ISL +# OSL # MAX_MODEL_LEN +# RANDOM_RANGE_RATIO +# RESULT_FILENAME # If the machine runs a MEC FW older than 177, RCCL # cannot reclaim some memory. diff --git a/benchmarks/gptoss_fp4_mi325x_slurm.sh b/benchmarks/gptoss_fp4_mi325x_slurm.sh index f15e6261c..56c7651ed 100644 --- a/benchmarks/gptoss_fp4_mi325x_slurm.sh +++ b/benchmarks/gptoss_fp4_mi325x_slurm.sh @@ -1,16 +1,13 @@ #!/usr/bin/bash -# === Required Env Vars === -# HF_TOKEN -# HF_HUB_CACHE -# IMAGE +# === Required Env Vars === # MODEL +# TP +# CONC # ISL # OSL # MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC # RESULT_FILENAME diff --git a/benchmarks/gptoss_fp4_mi355x_docker.sh b/benchmarks/gptoss_fp4_mi355x_docker.sh index 0e54245d4..68fc59f8c 100644 --- a/benchmarks/gptoss_fp4_mi355x_docker.sh +++ b/benchmarks/gptoss_fp4_mi355x_docker.sh @@ -1,15 +1,16 @@ #!/usr/bin/env bash -# ========= Required Env Vars ========= -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL # PORT # TP # CONC +# ISL +# OSL # MAX_MODEL_LEN # RANDOM_RANGE_RATIO # RESULT_FILENAME +# NUM_PROMPTS cat > config.yaml << EOF compilation-config: '{"compile_sizes":[1,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,256,512,1024,2048,8192] , "cudagraph_capture_sizes":[1,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,136,144,152,160,168,176,184,192,200,208,216,224,232,240,248,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,520,528,536,544,552,560,568,576,584,592,600,608,616,624,632,640,648,656,664,672,680,688,696,704,712,720,728,736,744,752,760,768,776,784,792,800,808,816,824,832,840,848,856,864,872,880,888,896,904,912,920,928,936,944,952,960,968,976,984,992,1000,1008,1016,1024,2048,4096,8192] , "cudagraph_mode": "FULL_AND_PIECEWISE"}' diff --git a/benchmarks/gptoss_fp4_mi355x_slurm.sh b/benchmarks/gptoss_fp4_mi355x_slurm.sh index a2adf2952..342b7dde3 100644 --- a/benchmarks/gptoss_fp4_mi355x_slurm.sh +++ b/benchmarks/gptoss_fp4_mi355x_slurm.sh @@ -1,16 +1,13 @@ #!/usr/bin/env bash -# ========= Required Env Vars ========= -# HF_TOKEN -# HF_HUB_CACHE +# === Required Env Vars === # MODEL +# PORT +# TP +# CONC # ISL # OSL -# MAX_MODEL_LEN # RANDOM_RANGE_RATIO -# TP -# CONC -# PORT # RESULT_FILENAME SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)