From 7924b313fe45512810fa33537c318c30af1dc6a6 Mon Sep 17 00:00:00 2001 From: jthomson04 Date: Wed, 4 Feb 2026 19:28:12 -0800 Subject: [PATCH 1/3] fix gb200 model prefix Signed-off-by: jthomson04 --- .github/configs/nvidia-master.yaml | 2 +- runners/launch_gb200-nv.sh | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index a5cad5206..fe174a123 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -2286,7 +2286,7 @@ dsr1-fp4-gb200-dynamo-trt: dsr1-fp8-gb200-dynamo-trt: image: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2 model: deepseek-ai/DeepSeek-R1-0528 - model-prefix: dsr1-fp8 + model-prefix: dsr1 runner: gb200 precision: fp8 framework: dynamo-trt diff --git a/runners/launch_gb200-nv.sh b/runners/launch_gb200-nv.sh index 83e94005e..4b770194c 100755 --- a/runners/launch_gb200-nv.sh +++ b/runners/launch_gb200-nv.sh @@ -18,12 +18,14 @@ elif [[ $FRAMEWORK == "dynamo-trt" ]]; then if [[ $MODEL_PREFIX == "gptoss" ]]; then export MODEL_PATH="/mnt/lustre01/models/gpt-oss-120b" export SERVED_MODEL_NAME="gpt-oss-120b" - elif [[ $MODEL_PREFIX == "dsr1" ]]; then + elif [[ $MODEL_PREFIX == "dsr1" && $PRECISION == "fp4" ]]; then export MODEL_PATH="/mnt/lustre01/models/deepseek-r1-0528-fp4-v2/" export SERVED_MODEL_NAME="deepseek-r1-fp4" - elif [[ $MODEL_PREFIX == "dsr1-fp8" ]]; then + export SRT_SLURM_MODEL_PREFIX="dsr1" + elif [[ $MODEL_PREFIX == "dsr1" && $PRECISION == "fp8" ]]; then export MODEL_PATH="/mnt/numa1/groups/sa-shared/models/deepseek-r1-0528/" export SERVED_MODEL_NAME="deepseek-r1-fp8" + export SRT_SLURM_MODEL_PREFIX="dsr1-fp8" else echo "Unsupported model prefix: $MODEL_PREFIX. Supported prefixes are: gptoss or dsr1" exit 1 @@ -138,7 +140,7 @@ srtctl_root: "${SRTCTL_ROOT}" # Model path aliases model_paths: - "${MODEL_PREFIX}": "${MODEL_PATH}" + "${SRT_SLURM_MODEL_PREFIX}": "${MODEL_PATH}" containers: dynamo-trtllm: ${SQUASH_FILE} dynamo-sglang: ${SQUASH_FILE} From 98e5ac1e0a47fcf43d3f83bacd198682b5723138 Mon Sep 17 00:00:00 2001 From: jthomson04 Date: Wed, 4 Feb 2026 19:33:19 -0800 Subject: [PATCH 2/3] update perf-changelog Signed-off-by: jthomson04 --- perf-changelog.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index aa8ad57f9..2329c58d4 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -361,3 +361,9 @@ - "8k1k: 14 scenarios (7 MTP, 7 STP) for long context workloads" - "Prefill workers: 1-5P, Decode workers: 1-4D, TP/EP: 8/16/32" pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/617 + +- config-keys: + - dsr1-fp8-gb200-dynamo-trt + description: + - "Fix model_prefix argument in yaml configs" + pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/646 \ No newline at end of file From 2dc7f4b4787787cea6b52f668a7bd490ad2d508b Mon Sep 17 00:00:00 2001 From: jthomson04 Date: Wed, 4 Feb 2026 19:34:42 -0800 Subject: [PATCH 3/3] newline Signed-off-by: jthomson04 --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 2329c58d4..813c173e6 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -366,4 +366,4 @@ - dsr1-fp8-gb200-dynamo-trt description: - "Fix model_prefix argument in yaml configs" - pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/646 \ No newline at end of file + pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/646