Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 18 additions & 16 deletions .github/configs/nvidia-master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1977,7 +1977,7 @@ dsr1-fp8-h200-dynamo-trt:
dsr1-fp8-h100-dynamo-trt:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post3
model: deepseek-ai/DeepSeek-R1-0528
model-prefix: DeepSeek-R1-0528
model-prefix: dsr1
runner: h100-multinode-slurm
precision: fp8
framework: dynamo-trt
Expand Down Expand Up @@ -2314,21 +2314,23 @@ dsr1-fp8-h100-dynamo-trt:
tp: 16
ep: 16
dp-attn: true
- spec-decoding: "mtp"
conc-list: [78]
prefill:
num-worker: 1
tp: 16
ep: 16
dp-attn: true
additional-settings:
# https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen2_tep16_batch32_eplb0_mtp3.yaml
- "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen2_tep16_batch32_eplb0_mtp3.yaml"
decode:
num-worker: 2
tp: 16
ep: 16
dp-attn: false
# commenting out cuz it persistently causes problems
# https://github.com/InferenceMAX/InferenceMAX/actions/runs/21769314582/job/62813105509
# - spec-decoding: "mtp"
# conc-list: [78]
# prefill:
# num-worker: 1
# tp: 16
# ep: 16
# dp-attn: true
# additional-settings:
# # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen2_tep16_batch32_eplb0_mtp3.yaml
# - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen2_tep16_batch32_eplb0_mtp3.yaml"
# decode:
# num-worker: 2
# tp: 16
# ep: 16
# dp-attn: false
- spec-decoding: "mtp"
conc-list: [154]
prefill:
Expand Down
6 changes: 6 additions & 0 deletions perf-changelog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -459,3 +459,9 @@
description:
- "New B300 FP8 Dynamo TRT configurations"
pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/638
- config-keys:
- dsr1-fp8-h100-dynamo-trt
description:
- "Add DeepSeek R1 FP8 H100 Dynamo TRT-LLM disaggregated multinode configurations"
- "fix model_prefix bug from https://github.com/InferenceMAX/InferenceMAX/pull/651"
pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/663
5 changes: 3 additions & 2 deletions runners/launch_h100-dgxc-slurm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@ CONTAINER_KEY=$(echo "$IMAGE" | sed 's|nvcr.io/|nvcr.io#|')
# Map container image to local squash file
SQUASH_FILE="/mnt/nfs/sa-shared/containers/$(echo "$IMAGE" | sed 's|nvcr.io/||' | sed 's/[\/:@#]/+/g').sqsh"

if [[ $MODEL_PREFIX == "DeepSeek-R1-0528" ]]; then
if [[ $MODEL_PREFIX == "dsr1" && $PRECISION == "fp8" ]]; then
export MODEL_PATH="/mnt/numa1/shared/models/dsr1-fp8"
export SERVED_MODEL_NAME="DeepSeek-R1-0528"
export SRT_SLURM_MODEL_PREFIX="DeepSeek-R1-0528"
else
echo "Unsupported model prefix: $MODEL_PREFIX. Supported prefixes are: DeepSeek-R1-0528"
exit 1
Expand All @@ -64,7 +65,7 @@ network_interface: ""
srtctl_root: "${GITHUB_WORKSPACE}/${SRT_REPO_DIR}"
# Model path aliases
model_paths:
"${MODEL_PREFIX}": "${MODEL_PATH}"
"${SRT_SLURM_MODEL_PREFIX}": "${MODEL_PATH}"
containers:
latest: "${SQUASH_FILE}"
"${CONTAINER_KEY}": "${SQUASH_FILE}"
Expand Down
Loading