diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index a5cad5206..fe174a123 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -2286,7 +2286,7 @@ dsr1-fp4-gb200-dynamo-trt: dsr1-fp8-gb200-dynamo-trt: image: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2 model: deepseek-ai/DeepSeek-R1-0528 - model-prefix: dsr1-fp8 + model-prefix: dsr1 runner: gb200 precision: fp8 framework: dynamo-trt diff --git a/perf-changelog.yaml b/perf-changelog.yaml index aa8ad57f9..813c173e6 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -361,3 +361,9 @@ - "8k1k: 14 scenarios (7 MTP, 7 STP) for long context workloads" - "Prefill workers: 1-5P, Decode workers: 1-4D, TP/EP: 8/16/32" pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/617 + +- config-keys: + - dsr1-fp8-gb200-dynamo-trt + description: + - "Fix model_prefix argument in yaml configs" + pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/646 diff --git a/runners/launch_gb200-nv.sh b/runners/launch_gb200-nv.sh index 83e94005e..4b770194c 100755 --- a/runners/launch_gb200-nv.sh +++ b/runners/launch_gb200-nv.sh @@ -18,12 +18,14 @@ elif [[ $FRAMEWORK == "dynamo-trt" ]]; then if [[ $MODEL_PREFIX == "gptoss" ]]; then export MODEL_PATH="/mnt/lustre01/models/gpt-oss-120b" export SERVED_MODEL_NAME="gpt-oss-120b" - elif [[ $MODEL_PREFIX == "dsr1" ]]; then + elif [[ $MODEL_PREFIX == "dsr1" && $PRECISION == "fp4" ]]; then export MODEL_PATH="/mnt/lustre01/models/deepseek-r1-0528-fp4-v2/" export SERVED_MODEL_NAME="deepseek-r1-fp4" - elif [[ $MODEL_PREFIX == "dsr1-fp8" ]]; then + export SRT_SLURM_MODEL_PREFIX="dsr1" + elif [[ $MODEL_PREFIX == "dsr1" && $PRECISION == "fp8" ]]; then export MODEL_PATH="/mnt/numa1/groups/sa-shared/models/deepseek-r1-0528/" export SERVED_MODEL_NAME="deepseek-r1-fp8" + export SRT_SLURM_MODEL_PREFIX="dsr1-fp8" else echo "Unsupported model prefix: $MODEL_PREFIX. Supported prefixes are: gptoss or dsr1" exit 1 @@ -138,7 +140,7 @@ srtctl_root: "${SRTCTL_ROOT}" # Model path aliases model_paths: - "${MODEL_PREFIX}": "${MODEL_PATH}" + "${SRT_SLURM_MODEL_PREFIX}": "${MODEL_PATH}" containers: dynamo-trtllm: ${SQUASH_FILE} dynamo-sglang: ${SQUASH_FILE}