From 5573b4a6461f822b13a445febf8992f93a04db18 Mon Sep 17 00:00:00 2001 From: functionstackx <47992694+functionstackx@users.noreply.github.com> Date: Sat, 7 Feb 2026 19:52:32 -0500 Subject: [PATCH 1/2] fix: h100 dsr1 fp8 dynamo trtllm model prefix --- .github/configs/nvidia-master.yaml | 34 ++++++++++++++++-------------- perf-changelog.yaml | 6 ++++++ runners/launch_h100-dgxc-slurm.sh | 5 +++-- 3 files changed, 27 insertions(+), 18 deletions(-) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index 8ab211cb5..1f7f527ef 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -1977,7 +1977,7 @@ dsr1-fp8-h200-dynamo-trt: dsr1-fp8-h100-dynamo-trt: image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post3 model: deepseek-ai/DeepSeek-R1-0528 - model-prefix: DeepSeek-R1-0528 + model-prefix: dsr1 runner: h100-multinode-slurm precision: fp8 framework: dynamo-trt @@ -2314,21 +2314,23 @@ dsr1-fp8-h100-dynamo-trt: tp: 16 ep: 16 dp-attn: true - - spec-decoding: "mtp" - conc-list: [78] - prefill: - num-worker: 1 - tp: 16 - ep: 16 - dp-attn: true - additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen2_tep16_batch32_eplb0_mtp3.yaml - - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen2_tep16_batch32_eplb0_mtp3.yaml" - decode: - num-worker: 2 - tp: 16 - ep: 16 - dp-attn: false + # commenting out cuz it persistently causes problems + # https://github.com/InferenceMAX/InferenceMAX/actions/runs/21769314582/job/62813105509 + # - spec-decoding: "mtp" + # conc-list: [78] + # prefill: + # num-worker: 1 + # tp: 16 + # ep: 16 + # dp-attn: true + # additional-settings: + # # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen2_tep16_batch32_eplb0_mtp3.yaml + # - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen2_tep16_batch32_eplb0_mtp3.yaml" + # decode: + # num-worker: 2 + # tp: 16 + # ep: 16 + # dp-attn: false - spec-decoding: "mtp" conc-list: [154] prefill: diff --git a/perf-changelog.yaml b/perf-changelog.yaml index eaae0dafc..7afd81192 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -459,3 +459,9 @@ description: - "New B300 FP8 Dynamo TRT configurations" pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/638 +- config-keys: + - dsr1-fp8-h100-dynamo-trt + description: + - "Add DeepSeek R1 FP8 H100 Dynamo TRT-LLM disaggregated multinode configurations" + - "fix model_prefix bug from https://github.com/InferenceMAX/InferenceMAX/pull/651" + pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/670 diff --git a/runners/launch_h100-dgxc-slurm.sh b/runners/launch_h100-dgxc-slurm.sh index fb0ae69f7..d461e4a94 100644 --- a/runners/launch_h100-dgxc-slurm.sh +++ b/runners/launch_h100-dgxc-slurm.sh @@ -37,9 +37,10 @@ CONTAINER_KEY=$(echo "$IMAGE" | sed 's|nvcr.io/|nvcr.io#|') # Map container image to local squash file SQUASH_FILE="/mnt/nfs/sa-shared/containers/$(echo "$IMAGE" | sed 's|nvcr.io/||' | sed 's/[\/:@#]/+/g').sqsh" -if [[ $MODEL_PREFIX == "DeepSeek-R1-0528" ]]; then +if [[ $MODEL_PREFIX == "dsr1" && $PRECISION == "fp8" ]]; then export MODEL_PATH="/mnt/numa1/shared/models/dsr1-fp8" export SERVED_MODEL_NAME="DeepSeek-R1-0528" + export SRT_SLURM_MODEL_PREFIX="DeepSeek-R1-0528" else echo "Unsupported model prefix: $MODEL_PREFIX. Supported prefixes are: DeepSeek-R1-0528" exit 1 @@ -64,7 +65,7 @@ network_interface: "" srtctl_root: "${GITHUB_WORKSPACE}/${SRT_REPO_DIR}" # Model path aliases model_paths: - "${MODEL_PREFIX}": "${MODEL_PATH}" + "${SRT_SLURM_MODEL_PREFIX}": "${MODEL_PATH}" containers: latest: "${SQUASH_FILE}" "${CONTAINER_KEY}": "${SQUASH_FILE}" From b1e02a2d11f87a7b8766a399102da6d5bbd2d9a0 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Sun, 8 Feb 2026 00:58:56 +0000 Subject: [PATCH 2/2] fix: update perf-changelog pr-link to #663 Co-authored-by: functionstackx --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 7afd81192..05d016aa7 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -464,4 +464,4 @@ description: - "Add DeepSeek R1 FP8 H100 Dynamo TRT-LLM disaggregated multinode configurations" - "fix model_prefix bug from https://github.com/InferenceMAX/InferenceMAX/pull/651" - pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/670 + pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/663