SemiAnalysisAI · functionstackx · Feb 8, 2026 · Feb 8, 2026 · Feb 8, 2026
@@ -1977,7 +1977,7 @@ dsr1-fp8-h200-dynamo-trt:
 dsr1-fp8-h100-dynamo-trt:
   image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post3
   model: deepseek-ai/DeepSeek-R1-0528
-  model-prefix: DeepSeek-R1-0528
+  model-prefix: dsr1
   runner: h100-multinode-slurm
   precision: fp8
   framework: dynamo-trt
@@ -2314,21 +2314,23 @@ dsr1-fp8-h100-dynamo-trt:
         tp: 16
         ep: 16
         dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [78]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen2_tep16_batch32_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen2_tep16_batch32_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 2
-        tp: 16
-        ep: 16
-        dp-attn: false
+    # commenting out cuz it persistently causes problems
+    # https://github.com/InferenceMAX/InferenceMAX/actions/runs/21769314582/job/62813105509
+    # - spec-decoding: "mtp"
+    #   conc-list: [78]
+    #   prefill:
+    #     num-worker: 1
+    #     tp: 16
+    #     ep: 16
+    #     dp-attn: true
+    #     additional-settings:
+    #     # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen2_tep16_batch32_eplb0_mtp3.yaml
+    #     - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen2_tep16_batch32_eplb0_mtp3.yaml"
+    #   decode:
+    #     num-worker: 2
+    #     tp: 16
+    #     ep: 16
+    #     dp-attn: false
     - spec-decoding: "mtp"
       conc-list: [154]
       prefill:

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
@@ -459,3 +459,9 @@
   description:
     - "New B300 FP8 Dynamo TRT configurations"
   pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/638
+- config-keys:
+    - dsr1-fp8-h100-dynamo-trt
+  description:
+    - "Add DeepSeek R1 FP8 H100 Dynamo TRT-LLM disaggregated multinode configurations"
+    - "fix model_prefix bug from https://github.com/InferenceMAX/InferenceMAX/pull/651"
+  pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/663
diff --git a/runners/launch_h100-dgxc-slurm.sh b/runners/launch_h100-dgxc-slurm.sh
@@ -37,9 +37,10 @@ CONTAINER_KEY=$(echo "$IMAGE" | sed 's|nvcr.io/|nvcr.io#|')
 # Map container image to local squash file
 SQUASH_FILE="/mnt/nfs/sa-shared/containers/$(echo "$IMAGE" | sed 's|nvcr.io/||' | sed 's/[\/:@#]/+/g').sqsh"
 
-if [[ $MODEL_PREFIX == "DeepSeek-R1-0528" ]]; then
+if [[ $MODEL_PREFIX == "dsr1" && $PRECISION == "fp8" ]]; then
     export MODEL_PATH="/mnt/numa1/shared/models/dsr1-fp8"
     export SERVED_MODEL_NAME="DeepSeek-R1-0528"
+    export SRT_SLURM_MODEL_PREFIX="DeepSeek-R1-0528"
 else
     echo "Unsupported model prefix: $MODEL_PREFIX. Supported prefixes are: DeepSeek-R1-0528"
     exit 1
@@ -64,7 +65,7 @@ network_interface: ""
 srtctl_root: "${GITHUB_WORKSPACE}/${SRT_REPO_DIR}"
 # Model path aliases
 model_paths:
-  "${MODEL_PREFIX}": "${MODEL_PATH}"
+  "${SRT_SLURM_MODEL_PREFIX}": "${MODEL_PATH}"
 containers:
   latest: "${SQUASH_FILE}"
   "${CONTAINER_KEY}": "${SQUASH_FILE}"