From 1e133721c3011bbee6351c28e8c943fd6a9859c4 Mon Sep 17 00:00:00 2001 From: jthomson04 Date: Mon, 2 Feb 2026 12:05:42 -0800 Subject: [PATCH 1/8] initial configs Signed-off-by: jthomson04 --- .github/configs/nvidia-master.yaml | 580 +++++++++++++++++++++++++++++ 1 file changed, 580 insertions(+) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index f12b586f4..87b6945e5 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -2290,6 +2290,586 @@ dsr1-fp4-gb200-dynamo-trt: ep: 16 dp-attn: true + +dsr1-fp8-gb200-dynamo-trt: + image: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2 + model: deepseek-ai/DeepSeek-R1-0528 + model-prefix: dsr1-fp8 + runner: gb200 + precision: fp8 + framework: dynamo-trt + multinode: true + disagg: true + seq-len-configs: + # 1k1k MTP configs + - isl: 1024 + osl: 1024 + search-space: + - spec-decoding: "mtp" + conc-list: [4301] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep8_batch512_eplb0_mtp1_4301.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep8_batch512_eplb0_mtp1_4301.yaml" + decode: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [2151] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep8_batch256_eplb0_mtp1_2151.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep8_batch256_eplb0_mtp1_2151.yaml" + decode: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [1229] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep16_batch64_eplb0_mtp1_1229.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep16_batch64_eplb0_mtp1_1229.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [615] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep32_batch16_eplb0_mtp3_615.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep32_batch16_eplb0_mtp3_615.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [36] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch8_eplb0_mtp3_36.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch8_eplb0_mtp3_36.yaml" + decode: + num-worker: 3 + tp: 8 + ep: 8 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [18] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch4_eplb0_mtp3_18.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch4_eplb0_mtp3_18.yaml" + decode: + num-worker: 3 + tp: 8 + ep: 8 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [9] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch2_eplb0_mtp3_9.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch2_eplb0_mtp3_9.yaml" + decode: + num-worker: 3 + tp: 8 + ep: 8 + dp-attn: false + # 1k1k STP configs + - conc-list: [6144] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep8_batch768_eplb0_mtp0_6144.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep8_batch768_eplb0_mtp0_6144.yaml" + decode: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + - conc-list: [4301] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0_4301.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0_4301.yaml" + decode: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + - conc-list: [2151] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep16_batch128_eplb0_mtp0_2151.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep16_batch128_eplb0_mtp0_2151.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + - conc-list: [1127] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep32_batch32_eplb0_mtp0_1127.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep32_batch32_eplb0_mtp0_1127.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - conc-list: [256] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep32_batch8_eplb0_mtp0_256.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep32_batch8_eplb0_mtp0_256.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - conc-list: [27] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen3_tep8_batch8_eplb0_mtp0_27.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen3_tep8_batch8_eplb0_mtp0_27.yaml" + decode: + num-worker: 3 + tp: 8 + ep: 8 + dp-attn: false + - conc-list: [3] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0_3.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0_3.yaml" + decode: + num-worker: 3 + tp: 8 + ep: 8 + dp-attn: false + # 1k8k MTP configs + - isl: 1024 + osl: 8192 + search-space: + - spec-decoding: "mtp" + conc-list: [8192] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k8k/mtp/ctx1_gen2_dep16_batch256_eplb0_mtp1_8192.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k8k/mtp/ctx1_gen2_dep16_batch256_eplb0_mtp1_8192.yaml" + decode: + num-worker: 2 + tp: 16 + ep: 16 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [2152] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k8k/mtp/ctx1_gen2_dep16_batch64_eplb0_mtp1_2152.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k8k/mtp/ctx1_gen2_dep16_batch64_eplb0_mtp1_2152.yaml" + decode: + num-worker: 2 + tp: 16 + ep: 16 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [564] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k8k/mtp/ctx1_gen1_dep32_batch16_eplb0_mtp3_564.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k8k/mtp/ctx1_gen1_dep32_batch16_eplb0_mtp3_564.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [72] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k8k/mtp/ctx1_gen1_dep32_batch2_eplb0_mtp3_72.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k8k/mtp/ctx1_gen1_dep32_batch2_eplb0_mtp3_72.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [4, 8] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k8k/mtp/ctx1_gen4_tep8_batch2_eplb0_mtp3_8.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k8k/mtp/ctx1_gen4_tep8_batch2_eplb0_mtp3_8.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + # 1k8k STP configs + - conc-list: [8192] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k8k/stp/ctx1_gen2_dep16_batch256_eplb0_mtp0_8192.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k8k/stp/ctx1_gen2_dep16_batch256_eplb0_mtp0_8192.yaml" + decode: + num-worker: 2 + tp: 16 + ep: 16 + dp-attn: true + - conc-list: [2048] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k8k/stp/ctx1_gen1_dep32_batch64_eplb0_mtp0_2048.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k8k/stp/ctx1_gen1_dep32_batch64_eplb0_mtp0_2048.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - conc-list: [564] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k8k/stp/ctx1_gen1_dep32_batch16_eplb0_mtp0_564.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k8k/stp/ctx1_gen1_dep32_batch16_eplb0_mtp0_564.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - conc-list: [36] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k8k/stp/ctx1_gen4_tep8_batch8_eplb0_mtp0_36.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k8k/stp/ctx1_gen4_tep8_batch8_eplb0_mtp0_36.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + - conc-list: [4] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k8k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k8k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + # 8k1k MTP configs + - isl: 8192 + osl: 1024 + search-space: + - spec-decoding: "mtp" + conc-list: [666] + prefill: + num-worker: 3 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx3_gen1_dep8_batch64_eplb0_mtp3_666.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx3_gen1_dep8_batch64_eplb0_mtp3_666.yaml" + decode: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [666] + prefill: + num-worker: 5 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx5_gen1_dep16_batch32_eplb0_mtp3_666.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx5_gen1_dep16_batch32_eplb0_mtp3_666.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [333] + prefill: + num-worker: 3 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx3_gen1_dep16_batch16_eplb0_mtp3_333.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx3_gen1_dep16_batch16_eplb0_mtp3_333.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [333] + prefill: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx4_gen1_dep32_batch8_eplb0_mtp3_333.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx4_gen1_dep32_batch8_eplb0_mtp3_333.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [90] + prefill: + num-worker: 2 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx2_gen1_dep32_batch2_eplb0_mtp3_90.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx2_gen1_dep32_batch2_eplb0_mtp3_90.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [15] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx1_gen3_tep8_batch4_eplb0_mtp3_15.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx1_gen3_tep8_batch4_eplb0_mtp3_15.yaml" + decode: + num-worker: 3 + tp: 8 + ep: 8 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [6] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx1_gen3_tep8_batch2_eplb0_mtp3_6.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx1_gen3_tep8_batch2_eplb0_mtp3_6.yaml" + decode: + num-worker: 3 + tp: 8 + ep: 8 + dp-attn: false + # 8k1k STP configs + search-space: + - conc-list: [1229] + prefill: + num-worker: 5 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx5_gen1_dep16_batch64_eplb0_mtp0_1229.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx5_gen1_dep16_batch64_eplb0_mtp0_1229.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + - conc-list: [666] + prefill: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx4_gen1_dep32_batch16_eplb0_mtp0_666.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx4_gen1_dep32_batch16_eplb0_mtp0_666.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - conc-list: [615] + prefill: + num-worker: 3 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx3_gen1_dep16_batch32_eplb0_mtp0_615.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx3_gen1_dep16_batch32_eplb0_mtp0_615.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + - conc-list: [333] + prefill: + num-worker: 2 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx2_gen1_dep32_batch8_eplb0_mtp0_333.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx2_gen1_dep32_batch8_eplb0_mtp0_333.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - conc-list: [63] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0_63.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0_63.yaml" + decode: + num-worker: 3 + tp: 8 + ep: 8 + dp-attn: false + - conc-list: [18] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch4_eplb0_mtp0_18.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch4_eplb0_mtp0_18.yaml" + decode: + num-worker: 3 + tp: 8 + ep: 8 + dp-attn: false + - conc-list: [6] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0_6.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0_6.yaml" + decode: + num-worker: 3 + tp: 8 + ep: 8 + dp-attn: false + + dsr1-fp8-gb200-dynamo-sglang: image: lmsysorg/sglang:v0.5.5.post2 model: deepseek-ai/DeepSeek-R1-0528 From e663d90f2e0cb3a16f8f198118f5e0be8de0dfbc Mon Sep 17 00:00:00 2001 From: jthomson04 Date: Mon, 2 Feb 2026 12:07:26 -0800 Subject: [PATCH 2/8] update perf changelog Signed-off-by: jthomson04 --- perf-changelog.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 3ac387147..a56255be6 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -325,3 +325,9 @@ - "Disable torch.compile for MI355X DeepSeek-R1 FP8 SGLang" - "set cuda-graph-max-bs to CONC" pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/613 + +-config-keys: + - dsr1-fp8-gb200-dynamo-trt + description: + - "Update GB200 Dynamo TRT image from 0.5.1-rc0.pre3 to 0.8.1.post2" + pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/617 From 9cb5200974f9babaa5d477c48c469ff0071c53a4 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Mon, 2 Feb 2026 23:00:56 +0000 Subject: [PATCH 3/8] Fix YAML syntax error in perf-changelog.yaml Add missing space after dash in config-keys entry for dsr1-fp8-gb200-dynamo-trt. '-config-keys:' should be '- config-keys:' Co-authored-by: functionstackx --- perf-changelog.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index a56255be6..2dd9becb3 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -326,8 +326,8 @@ - "set cuda-graph-max-bs to CONC" pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/613 --config-keys: - - dsr1-fp8-gb200-dynamo-trt +- config-keys: + - dsr1-fp8-gb200-dynamo-trt description: - "Update GB200 Dynamo TRT image from 0.5.1-rc0.pre3 to 0.8.1.post2" pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/617 From e5efea300b4c9095acf7030feeb2ca233e95da10 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Mon, 2 Feb 2026 23:09:53 +0000 Subject: [PATCH 4/8] Fix model-prefix typo: dsr1-fp8 -> dsr1 The model-prefix 'dsr1-fp8' is not a supported prefix. The supported prefixes are 'gptoss' and 'dsr1'. Changed to 'dsr1' to fix the launch_gb200-nv.sh runner error. Co-authored-by: functionstackx --- .github/configs/nvidia-master.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index 87b6945e5..3eaa7b5be 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -2294,7 +2294,7 @@ dsr1-fp4-gb200-dynamo-trt: dsr1-fp8-gb200-dynamo-trt: image: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2 model: deepseek-ai/DeepSeek-R1-0528 - model-prefix: dsr1-fp8 + model-prefix: dsr1 runner: gb200 precision: fp8 framework: dynamo-trt From ce38029c619fa4ad516274108a526ba6d9dc7216 Mon Sep 17 00:00:00 2001 From: jthomson04 Date: Mon, 2 Feb 2026 15:50:36 -0800 Subject: [PATCH 5/8] fix gb200 Signed-off-by: jthomson04 --- .github/configs/nvidia-master.yaml | 2 +- runners/launch_gb200-nv.sh | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index 3eaa7b5be..87b6945e5 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -2294,7 +2294,7 @@ dsr1-fp4-gb200-dynamo-trt: dsr1-fp8-gb200-dynamo-trt: image: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2 model: deepseek-ai/DeepSeek-R1-0528 - model-prefix: dsr1 + model-prefix: dsr1-fp8 runner: gb200 precision: fp8 framework: dynamo-trt diff --git a/runners/launch_gb200-nv.sh b/runners/launch_gb200-nv.sh index 1944e04e0..6c828fa52 100755 --- a/runners/launch_gb200-nv.sh +++ b/runners/launch_gb200-nv.sh @@ -21,6 +21,9 @@ elif [[ $FRAMEWORK == "dynamo-trt" ]]; then elif [[ $MODEL_PREFIX == "dsr1" ]]; then export MODEL_PATH="/mnt/lustre01/models/deepseek-r1-0528-fp4-v2/" export SERVED_MODEL_NAME="deepseek-r1-fp4" + elif [[ $MODEL_PREFIX == "dsr1-fp8" ]]; then + export MODEL_PATH="/mnt/lustre01/models/deepseek-r1-0528/" + export SERVED_MODEL_NAME="deepseek-r1-fp8" else echo "Unsupported model prefix: $MODEL_PREFIX. Supported prefixes are: gptoss or dsr1" exit 1 @@ -42,8 +45,6 @@ NGINX_SQUASH_FILE="/mnt/lustre01/users-public/sa-shared/$(echo "$NGINX_IMAGE" | srun -N 1 -A $SLURM_ACCOUNT -p $SLURM_PARTITION bash -c "enroot import -o $SQUASH_FILE docker://$IMAGE" srun -N 1 -A $SLURM_ACCOUNT -p $SLURM_PARTITION bash -c "enroot import -o $NGINX_SQUASH_FILE docker://$NGINX_IMAGE" - - export ISL="$ISL" export OSL="$OSL" From 69d903a97d4621ad4bb197fd364f9724db72fbd3 Mon Sep 17 00:00:00 2001 From: jthomson04 Date: Mon, 2 Feb 2026 15:57:56 -0800 Subject: [PATCH 6/8] fix nvidia-master.yaml Signed-off-by: jthomson04 --- .github/configs/nvidia-master.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index 87b6945e5..3c541ff38 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -2769,7 +2769,6 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: false # 8k1k STP configs - search-space: - conc-list: [1229] prefill: num-worker: 5 From 05cde10bd259a16234ba30011fdc79ecd3eda1f5 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Tue, 3 Feb 2026 05:43:32 +0000 Subject: [PATCH 7/8] Update perf-changelog with detailed DSR1 FP8 GB200 scenario descriptions Co-authored-by: Cameron Quilici --- perf-changelog.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 2dd9becb3..e1fab89e1 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -329,5 +329,10 @@ - config-keys: - dsr1-fp8-gb200-dynamo-trt description: - - "Update GB200 Dynamo TRT image from 0.5.1-rc0.pre3 to 0.8.1.post2" + - "Add DeepSeek R1 FP8 GB200 Dynamo TRT-LLM disaggregated multinode configurations" + - "Image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2" + - "1k1k: 14 scenarios (7 MTP, 7 STP) with varying DP attention/TEP modes" + - "1k8k: 10 scenarios (5 MTP, 5 STP) for long output generation" + - "8k1k: 14 scenarios (7 MTP, 7 STP) for long context workloads" + - "Prefill workers: 1-5P, Decode workers: 1-4D, TP/EP: 8/16/32" pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/617 From d9e73bd71d20e8d35bb76e7292d82cb73c9306e0 Mon Sep 17 00:00:00 2001 From: jthomson04 Date: Mon, 2 Feb 2026 22:36:21 -0800 Subject: [PATCH 8/8] update dsr1 fp8 path Signed-off-by: jthomson04 --- runners/launch_gb200-nv.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runners/launch_gb200-nv.sh b/runners/launch_gb200-nv.sh index 6c828fa52..83e94005e 100755 --- a/runners/launch_gb200-nv.sh +++ b/runners/launch_gb200-nv.sh @@ -22,7 +22,7 @@ elif [[ $FRAMEWORK == "dynamo-trt" ]]; then export MODEL_PATH="/mnt/lustre01/models/deepseek-r1-0528-fp4-v2/" export SERVED_MODEL_NAME="deepseek-r1-fp4" elif [[ $MODEL_PREFIX == "dsr1-fp8" ]]; then - export MODEL_PATH="/mnt/lustre01/models/deepseek-r1-0528/" + export MODEL_PATH="/mnt/numa1/groups/sa-shared/models/deepseek-r1-0528/" export SERVED_MODEL_NAME="deepseek-r1-fp8" else echo "Unsupported model prefix: $MODEL_PREFIX. Supported prefixes are: gptoss or dsr1"