From 3b88a073c722b636d63340d8ec478c0b6454133c Mon Sep 17 00:00:00 2001
From: Nathan Levin <nlevin@nvidia.com>
Date: Thu, 5 Feb 2026 17:34:13 +0000
Subject: [PATCH 01/10] Update dsr1-fp8-h100-dynamo-trt with verified 29 Pareto
 configs

Update H100 section with verified Pareto-optimal points:

1k1k ISL/OSL:
- MTP: 9 configs (conc: 6, 9, 30, 60, 117, 231, 462, 615, 1229)
- STP: 9 configs (conc: 6, 9, 30, 60, 231, 462, 924, 1845, 4916)

8k1k ISL/OSL (new):
- MTP: 6 configs (conc: 6, 9, 30, 77, 78, 154)
- STP: 5 configs (conc: 6, 9, 30, 154, 308)

All configs use TP=16, EP=16 across 2 nodes (16 GPUs total).
---
 .github/configs/nvidia-master.yaml | 442 +++++++++++++++++++++++++++++
 1 file changed, 442 insertions(+)

diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml
index a2e036510..eb21a9587 100644
--- a/.github/configs/nvidia-master.yaml
+++ b/.github/configs/nvidia-master.yaml
@@ -1571,6 +1571,448 @@ dsr1-fp8-h200-dynamo-trt:
         ep: 8
         dp-attn: true
 
+dsr1-fp8-h100-dynamo-trt:
+  image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post1
+  model: deepseek-ai/DeepSeek-R1-0528
+  model-prefix: dsr1
+  runner: h100-multinode-slurm
+  precision: fp8
+  framework: dynamo-trt
+  multinode: true
+  disagg: true
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    # MTP configurations
+    - spec-decoding: "mtp"
+      conc-list: [6]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch1_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch1_eplb0_mtp3.yaml"
+      decode:
+        num-worker: 3
+        tp: 16
+        ep: 16
+        dp-attn: false
+    - spec-decoding: "mtp"
+      conc-list: [9]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch2_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch2_eplb0_mtp3.yaml"
+      decode:
+        num-worker: 3
+        tp: 16
+        ep: 16
+        dp-attn: false
+    - spec-decoding: "mtp"
+      conc-list: [30]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch8_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch8_eplb0_mtp3.yaml"
+      decode:
+        num-worker: 3
+        tp: 16
+        ep: 16
+        dp-attn: false
+    - spec-decoding: "mtp"
+      conc-list: [60]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch16_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch16_eplb0_mtp3.yaml"
+      decode:
+        num-worker: 3
+        tp: 16
+        ep: 16
+        dp-attn: false
+    - spec-decoding: "mtp"
+      conc-list: [117]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch32_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch32_eplb0_mtp3.yaml"
+      decode:
+        num-worker: 3
+        tp: 16
+        ep: 16
+        dp-attn: false
+    - spec-decoding: "mtp"
+      conc-list: [231]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_dep16_batch4_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_dep16_batch4_eplb0_mtp3.yaml"
+      decode:
+        num-worker: 3
+        tp: 16
+        ep: 16
+        dp-attn: true
+    - spec-decoding: "mtp"
+      conc-list: [615]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp2.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp2.yaml"
+      decode:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+    - spec-decoding: "mtp"
+      conc-list: [462]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch128_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch128_eplb0_mtp3.yaml"
+      decode:
+        num-worker: 3
+        tp: 16
+        ep: 16
+        dp-attn: false
+    - spec-decoding: "mtp"
+      conc-list: [1229]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch64_eplb0_mtp1.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch64_eplb0_mtp1.yaml"
+      decode:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+    # Non-MTP configurations (STP)
+    - conc-list: [6]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch1_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch1_eplb0_mtp0.yaml"
+      decode:
+        num-worker: 3
+        tp: 16
+        ep: 16
+        dp-attn: false
+    - conc-list: [9]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch2_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch2_eplb0_mtp0.yaml"
+      decode:
+        num-worker: 3
+        tp: 16
+        ep: 16
+        dp-attn: false
+    - conc-list: [30]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch8_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch8_eplb0_mtp0.yaml"
+      decode:
+        num-worker: 3
+        tp: 16
+        ep: 16
+        dp-attn: false
+    - conc-list: [60]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch16_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch16_eplb0_mtp0.yaml"
+      decode:
+        num-worker: 3
+        tp: 16
+        ep: 16
+        dp-attn: false
+    - conc-list: [231]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch4_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch4_eplb0_mtp0.yaml"
+      decode:
+        num-worker: 3
+        tp: 16
+        ep: 16
+        dp-attn: true
+    - conc-list: [462]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch8_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch8_eplb0_mtp0.yaml"
+      decode:
+        num-worker: 3
+        tp: 16
+        ep: 16
+        dp-attn: true
+    - conc-list: [924]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch16_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch16_eplb0_mtp0.yaml"
+      decode:
+        num-worker: 3
+        tp: 16
+        ep: 16
+        dp-attn: true
+    - conc-list: [1845]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch32_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch32_eplb0_mtp0.yaml"
+      decode:
+        num-worker: 3
+        tp: 16
+        ep: 16
+        dp-attn: true
+    - conc-list: [4916]
+      prefill:
+        num-worker: 2
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx2_gen1_dep16_batch256_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx2_gen1_dep16_batch256_eplb0_mtp0.yaml"
+      decode:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+  - isl: 8192
+    osl: 1024
+    search-space:
+    # MTP configurations (6 points)
+    - spec-decoding: "mtp"
+      conc-list: [6]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch1_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch1_eplb0_mtp3.yaml"
+      decode:
+        num-worker: 3
+        tp: 16
+        ep: 16
+        dp-attn: false
+    - spec-decoding: "mtp"
+      conc-list: [9]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch2_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch2_eplb0_mtp3.yaml"
+      decode:
+        num-worker: 3
+        tp: 16
+        ep: 16
+        dp-attn: false
+    - spec-decoding: "mtp"
+      conc-list: [30]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch8_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch8_eplb0_mtp3.yaml"
+      decode:
+        num-worker: 3
+        tp: 16
+        ep: 16
+        dp-attn: false
+    - spec-decoding: "mtp"
+      conc-list: [77]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen1_dep16_batch4_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen1_dep16_batch4_eplb0_mtp3.yaml"
+      decode:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+    - spec-decoding: "mtp"
+      conc-list: [78]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen2_tep16_batch32_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen2_tep16_batch32_eplb0_mtp3.yaml"
+      decode:
+        num-worker: 2
+        tp: 16
+        ep: 16
+        dp-attn: false
+    - spec-decoding: "mtp"
+      conc-list: [154]
+      prefill:
+        num-worker: 2
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx2_gen1_dep16_batch8_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx2_gen1_dep16_batch8_eplb0_mtp3.yaml"
+      decode:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+    # STP configurations (5 points)
+    - conc-list: [6]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch1_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch1_eplb0_mtp0.yaml"
+      decode:
+        num-worker: 3
+        tp: 16
+        ep: 16
+        dp-attn: false
+    - conc-list: [9]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch2_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch2_eplb0_mtp0.yaml"
+      decode:
+        num-worker: 3
+        tp: 16
+        ep: 16
+        dp-attn: false
+    - conc-list: [30]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch8_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch8_eplb0_mtp0.yaml"
+      decode:
+        num-worker: 3
+        tp: 16
+        ep: 16
+        dp-attn: false
+    - conc-list: [154]
+      prefill:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen2_tep16_batch64_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen2_tep16_batch64_eplb0_mtp0.yaml"
+      decode:
+        num-worker: 2
+        tp: 16
+        ep: 16
+        dp-attn: false
+    - conc-list: [308]
+      prefill:
+        num-worker: 2
+        tp: 16
+        ep: 16
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx2_gen1_dep16_batch16_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx2_gen1_dep16_batch16_eplb0_mtp0.yaml"
+      decode:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+
 gptoss-fp4-b200-trt:
   image: nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc2.post2
   model: openai/gpt-oss-120b

From c4103f5a7b52a32dd2534079a7b10d46bbc56d53 Mon Sep 17 00:00:00 2001
From: Sahithi Chigurupati <chigurupati.sahithi@gmail.com>
Date: Thu, 5 Feb 2026 12:57:05 -0800
Subject: [PATCH 02/10] add h100 multinode launch script

---
 .github/configs/runners.yaml      |   2 +
 runners/launch_h100-dgxc-slurm.sh | 174 ++++++++++++++++++++++++++++++
 2 files changed, 176 insertions(+)
 create mode 100644 runners/launch_h100-dgxc-slurm.sh

diff --git a/.github/configs/runners.yaml b/.github/configs/runners.yaml
index 86ef27315..eb9602ecb 100644
--- a/.github/configs/runners.yaml
+++ b/.github/configs/runners.yaml
@@ -66,3 +66,5 @@ b300:
 - 'b300-nv_0'
 gb300:
 - 'gb300-nv_0'
+h100-multinode-slurm:
+- 'h100-dgxc-slurm_0'
diff --git a/runners/launch_h100-dgxc-slurm.sh b/runners/launch_h100-dgxc-slurm.sh
new file mode 100644
index 000000000..be5a05c58
--- /dev/null
+++ b/runners/launch_h100-dgxc-slurm.sh
@@ -0,0 +1,174 @@
+#!/usr/bin/bash
+
+set -x
+
+echo "Cloning srt-slurm repository..."
+SRT_REPO_DIR="srt-slurm"
+if [ -d "$SRT_REPO_DIR" ]; then
+    echo "Removing existing $SRT_REPO_DIR..."
+    rm -rf "$SRT_REPO_DIR"
+fi
+
+git clone https://github.com/ishandhanani/srt-slurm.git "$SRT_REPO_DIR"
+cd "$SRT_REPO_DIR"
+git checkout trtllm-h100
+
+echo "Installing srtctl..."
+curl -LsSf https://astral.sh/uv/install.sh | sh
+source $HOME/.local/bin/env
+
+uv venv
+source .venv/bin/activate
+uv pip install -e .
+
+if ! command -v srtctl &> /dev/null; then
+    echo "Error: Failed to install srtctl"
+    exit 1
+fi
+
+echo "Configs available at: $SRT_REPO_DIR/"
+
+export SLURM_PARTITION="hpc-gpu-1"
+export SLURM_ACCOUNT="customer"
+
+# Convert IMAGE to srt-slurm format (nvcr.io/ -> nvcr.io#)
+CONTAINER_KEY=$(echo "$IMAGE" | sed 's|nvcr.io/|nvcr.io#|')
+
+# Use patched container for dynamo-trt (MNNVL fix), otherwise derive from IMAGE
+if [[ "$IMAGE" == *"ai-dynamo/tensorrtllm-runtime"* ]]; then
+    SQUASH_FILE="/mnt/nfs/lustre/containers/dynamo-trtllm-mnnvl-fix.sqsh"
+else
+    SQUASH_FILE="/mnt/nfs/slurm-shared/containers/$(echo "$IMAGE" | sed 's|nvcr.io/||' | sed 's/[\/:@#]/+/g').sqsh"
+fi
+
+if [[ $MODEL_PREFIX == "DeepSeek-R1-0528" ]]; then
+    export MODEL_PATH="/mnt/numa1/shared/models/dsr1-fp8"
+    export SERVED_MODEL_NAME="DeepSeek-R1-0528"
+else
+    echo "Unsupported model prefix: $MODEL_PREFIX. Supported prefixes are: DeepSeek-R1-0528"
+    exit 1
+fi
+
+export ISL="$ISL"
+export OSL="$OSL"
+
+# Create srtslurm.yaml for srtctl
+echo "Creating srtslurm.yaml configuration..."
+cat > srtslurm.yaml <<EOF
+# SRT SLURM Configuration for H100
+
+# Default SLURM settings
+default_account: "${SLURM_ACCOUNT}"
+default_partition: "${SLURM_PARTITION}"
+default_time_limit: "4:00:00"
+# Resource defaults
+gpus_per_node: 8
+network_interface: ""
+# Path to srtctl repo root (where the configs live)
+srtctl_root: "${GITHUB_WORKSPACE}/${SRT_REPO_DIR}"
+# Model path aliases
+model_paths:
+  "${MODEL_PREFIX}": "${MODEL_PATH}"
+containers:
+  latest: "${SQUASH_FILE}"
+  "${CONTAINER_KEY}": "${SQUASH_FILE}"
+# SLURM directive compatibility
+use_gpus_per_node_directive: true
+use_segment_sbatch_directive: false
+use_exclusive_sbatch_directive: false
+EOF
+
+echo "Generated srtslurm.yaml:"
+cat srtslurm.yaml
+
+echo "Running make setup..."
+make setup ARCH=x86_64
+
+echo "Submitting job with srtctl..."
+SRTCTL_OUTPUT=$(srtctl apply -f "$CONFIG_FILE" --tags "h100,${MODEL_PREFIX},${PRECISION},${ISL}x${OSL},infmax-$(date +%Y%m%d)" 2>&1)
+echo "$SRTCTL_OUTPUT"
+
+# Extract JOB_ID from srtctl output
+JOB_ID=$(echo "$SRTCTL_OUTPUT" | grep -oP '✅ Job \K[0-9]+' || echo "$SRTCTL_OUTPUT" | grep -oP 'Job \K[0-9]+')
+
+if [ -z "$JOB_ID" ]; then
+    echo "Error: Failed to extract JOB_ID from srtctl output"
+    exit 1
+fi
+
+echo "Extracted JOB_ID: $JOB_ID"
+
+# Wait for this specific job to complete
+echo "Waiting for job $JOB_ID to complete..."
+while [ -n "$(squeue -j $JOB_ID --noheader 2>/dev/null)" ]; do
+    echo "Job $JOB_ID still running..."
+    squeue -j $JOB_ID
+    sleep 30
+done
+echo "Job $JOB_ID completed!"
+
+echo "Collecting results..."
+
+# Use the JOB_ID to find the logs directory
+# srtctl creates logs in outputs/JOB_ID/logs/
+LOGS_DIR="outputs/$JOB_ID/logs"
+
+if [ ! -d "$LOGS_DIR" ]; then
+    echo "Warning: Logs directory not found at $LOGS_DIR"
+    exit 1
+fi
+
+echo "Found logs directory: $LOGS_DIR"
+
+cat $LOGS_DIR/sweep_${JOB_ID}.log
+
+for file in $LOGS_DIR/*; do
+    if [ -f "$file" ]; then
+        tail -n 500 $file
+    fi
+done
+
+# Find all result subdirectories
+RESULT_SUBDIRS=$(find "$LOGS_DIR" -maxdepth 1 -type d -name "*isl*osl*" 2>/dev/null)
+
+if [ -z "$RESULT_SUBDIRS" ]; then
+    echo "Warning: No result subdirectories found in $LOGS_DIR"
+else
+    # Process results from all configurations
+    for result_subdir in $RESULT_SUBDIRS; do
+        echo "Processing result subdirectory: $result_subdir"
+
+        # Extract configuration info from directory name
+        CONFIG_NAME=$(basename "$result_subdir")
+
+        # Find all result JSON files
+        RESULT_FILES=$(find "$result_subdir" -name "results_concurrency_*.json" 2>/dev/null)
+
+        for result_file in $RESULT_FILES; do
+            if [ -f "$result_file" ]; then
+                # Extract metadata from filename
+                # Files are of the format "results_concurrency_gpus_{num gpus}_ctx_{num ctx}_gen_{num gen}.json"
+                filename=$(basename "$result_file")
+                concurrency=$(echo "$filename" | sed -n 's/results_concurrency_\([0-9]*\)_gpus_.*/\1/p')
+                gpus=$(echo "$filename" | sed -n 's/results_concurrency_[0-9]*_gpus_\([0-9]*\)_ctx_.*/\1/p')
+                ctx=$(echo "$filename" | sed -n 's/.*_ctx_\([0-9]*\)_gen_.*/\1/p')
+                gen=$(echo "$filename" | sed -n 's/.*_gen_\([0-9]*\)\.json/\1/p')
+
+                echo "Processing concurrency $concurrency with $gpus GPUs (ctx: $ctx, gen: $gen): $result_file"
+
+                WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${CONFIG_NAME}_conc${concurrency}_gpus_${gpus}_ctx_${ctx}_gen_${gen}.json"
+                cp "$result_file" "$WORKSPACE_RESULT_FILE"
+
+                echo "Copied result file to: $WORKSPACE_RESULT_FILE"
+            fi
+        done
+    done
+fi
+
+echo "All result files processed"
+
+# Cleanup
+echo "Cleaning up..."
+deactivate 2>/dev/null || true
+rm -rf .venv
+echo "Cleanup complete"

From 46456c0816228d5bfadf5e41840d168c90e3b6c0 Mon Sep 17 00:00:00 2001
From: Sahithi Chigurupati <chigurupati.sahithi@gmail.com>
Date: Thu, 5 Feb 2026 13:25:51 -0800
Subject: [PATCH 03/10] modify model prefix

---
 .github/configs/nvidia-master.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml
index eb21a9587..ece670591 100644
--- a/.github/configs/nvidia-master.yaml
+++ b/.github/configs/nvidia-master.yaml
@@ -1574,7 +1574,7 @@ dsr1-fp8-h200-dynamo-trt:
 dsr1-fp8-h100-dynamo-trt:
   image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post1
   model: deepseek-ai/DeepSeek-R1-0528
-  model-prefix: dsr1
+  model-prefix: DeepSeek-R1-0528
   runner: h100-multinode-slurm
   precision: fp8
   framework: dynamo-trt

From 1fc4cd750118acdc88c277d89fb18d6c7858a45a Mon Sep 17 00:00:00 2001
From: nlevin-ui <nlevin@nvidia.com>
Date: Thu, 5 Feb 2026 15:34:43 -0700
Subject: [PATCH 04/10] Update .github/configs/nvidia-master.yaml

Co-authored-by: claude[bot] <209825114+claude[bot]@users.noreply.github.com>
---
 .github/configs/nvidia-master.yaml | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml
index ece670591..0ad6e30a8 100644
--- a/.github/configs/nvidia-master.yaml
+++ b/.github/configs/nvidia-master.yaml
@@ -1676,35 +1676,35 @@ dsr1-fp8-h100-dynamo-trt:
         ep: 16
         dp-attn: true
     - spec-decoding: "mtp"
-      conc-list: [615]
+      conc-list: [462]
       prefill:
         num-worker: 1
         tp: 16
         ep: 16
         dp-attn: true
         additional-settings:
-        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp2.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp2.yaml"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch128_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch128_eplb0_mtp3.yaml"
       decode:
-        num-worker: 1
+        num-worker: 3
         tp: 16
         ep: 16
-        dp-attn: true
+        dp-attn: false
     - spec-decoding: "mtp"
-      conc-list: [462]
+      conc-list: [615]
       prefill:
         num-worker: 1
         tp: 16
         ep: 16
         dp-attn: true
         additional-settings:
-        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch128_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch128_eplb0_mtp3.yaml"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp2.yaml
+        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp2.yaml"
       decode:
-        num-worker: 3
+        num-worker: 1
         tp: 16
         ep: 16
-        dp-attn: false
+        dp-attn: true
     - spec-decoding: "mtp"
       conc-list: [1229]
       prefill:

From c42032e809e1b765dd9e01b493a905628ec2e22e Mon Sep 17 00:00:00 2001
From: Nathan Levin <nlevin@nvidia.com>
Date: Thu, 5 Feb 2026 23:12:01 +0000
Subject: [PATCH 05/10] Remove hardcoded container override in H100 launch
 script

Use consistent sed-based path derivation for all container images.
---
 runners/launch_h100-dgxc-slurm.sh | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/runners/launch_h100-dgxc-slurm.sh b/runners/launch_h100-dgxc-slurm.sh
index be5a05c58..18eddf1c0 100644
--- a/runners/launch_h100-dgxc-slurm.sh
+++ b/runners/launch_h100-dgxc-slurm.sh
@@ -34,12 +34,8 @@ export SLURM_ACCOUNT="customer"
 # Convert IMAGE to srt-slurm format (nvcr.io/ -> nvcr.io#)
 CONTAINER_KEY=$(echo "$IMAGE" | sed 's|nvcr.io/|nvcr.io#|')
 
-# Use patched container for dynamo-trt (MNNVL fix), otherwise derive from IMAGE
-if [[ "$IMAGE" == *"ai-dynamo/tensorrtllm-runtime"* ]]; then
-    SQUASH_FILE="/mnt/nfs/lustre/containers/dynamo-trtllm-mnnvl-fix.sqsh"
-else
-    SQUASH_FILE="/mnt/nfs/slurm-shared/containers/$(echo "$IMAGE" | sed 's|nvcr.io/||' | sed 's/[\/:@#]/+/g').sqsh"
-fi
+# Map container image to local squash file
+SQUASH_FILE="/mnt/nfs/slurm-shared/containers/$(echo "$IMAGE" | sed 's|nvcr.io/||' | sed 's/[\/:@#]/+/g').sqsh"
 
 if [[ $MODEL_PREFIX == "DeepSeek-R1-0528" ]]; then
     export MODEL_PATH="/mnt/numa1/shared/models/dsr1-fp8"

From 7cf62e5f0f0bfb2967830fa683e7219deb200103 Mon Sep 17 00:00:00 2001
From: Nathan Levin <nlevin@nvidia.com>
Date: Fri, 6 Feb 2026 00:45:09 +0000
Subject: [PATCH 06/10] Update H100 image to tensorrtllm-runtime:0.8.1.post3

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .github/configs/nvidia-master.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml
index 0ad6e30a8..8ec22da4e 100644
--- a/.github/configs/nvidia-master.yaml
+++ b/.github/configs/nvidia-master.yaml
@@ -1,5 +1,5 @@
 dsr1-fp4-b200-dynamo-trt:
-  image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post1
+  image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post3
   model: deepseek-r1-fp4
   model-prefix: dsr1
   runner: b200-multinode-slurm
@@ -384,7 +384,7 @@ dsr1-fp4-b200-dynamo-trt:
         dp-attn: true
 
 dsr1-fp4-b300-dynamo-trt:
-  image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post1
+  image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post3
   model: deepseek-r1-fp4
   model-prefix: dsr1
   runner: b300
@@ -1087,7 +1087,7 @@ dsr1-fp8-h200-trt-mtp:
     - { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 256, spec-decoding: mtp }
 
 dsr1-fp8-h200-dynamo-trt:
-  image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post1
+  image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post3
   model: deepseek-ai/DeepSeek-R1-0528
   model-prefix: dsr1
   runner: h200-multinode-slurm
@@ -1572,7 +1572,7 @@ dsr1-fp8-h200-dynamo-trt:
         dp-attn: true
 
 dsr1-fp8-h100-dynamo-trt:
-  image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post1
+  image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post3
   model: deepseek-ai/DeepSeek-R1-0528
   model-prefix: DeepSeek-R1-0528
   runner: h100-multinode-slurm

From af0736424e8d7c6273b9538c43ef885dfc2b7b66 Mon Sep 17 00:00:00 2001
From: nlevin-ui <nlevin@nvidia.com>
Date: Thu, 5 Feb 2026 20:56:16 -0700
Subject: [PATCH 07/10] Update runners/launch_h100-dgxc-slurm.sh

Co-authored-by: claude[bot] <209825114+claude[bot]@users.noreply.github.com>
---
 runners/launch_h100-dgxc-slurm.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runners/launch_h100-dgxc-slurm.sh b/runners/launch_h100-dgxc-slurm.sh
index 18eddf1c0..6c6126aa8 100644
--- a/runners/launch_h100-dgxc-slurm.sh
+++ b/runners/launch_h100-dgxc-slurm.sh
@@ -11,7 +11,7 @@ fi
 
 git clone https://github.com/ishandhanani/srt-slurm.git "$SRT_REPO_DIR"
 cd "$SRT_REPO_DIR"
-git checkout trtllm-h100
+git checkout sa-submission-q1-2026
 
 echo "Installing srtctl..."
 curl -LsSf https://astral.sh/uv/install.sh | sh

From aa3af24af726678042fff26cd4914f4aff3a5872 Mon Sep 17 00:00:00 2001
From: Nathan Levin <nlevin@nvidia.com>
Date: Fri, 6 Feb 2026 04:01:01 +0000
Subject: [PATCH 08/10] fix: revert container to post1 for non-H100 configs

Only dsr1-fp8-h100-dynamo-trt should use 0.8.1.post3.
Revert B200, B300, and H200 configs back to 0.8.1.post1.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .github/configs/nvidia-master.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml
index 8ec22da4e..65f78234b 100644
--- a/.github/configs/nvidia-master.yaml
+++ b/.github/configs/nvidia-master.yaml
@@ -1,5 +1,5 @@
 dsr1-fp4-b200-dynamo-trt:
-  image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post3
+  image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post1
   model: deepseek-r1-fp4
   model-prefix: dsr1
   runner: b200-multinode-slurm
@@ -384,7 +384,7 @@ dsr1-fp4-b200-dynamo-trt:
         dp-attn: true
 
 dsr1-fp4-b300-dynamo-trt:
-  image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post3
+  image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post1
   model: deepseek-r1-fp4
   model-prefix: dsr1
   runner: b300
@@ -1087,7 +1087,7 @@ dsr1-fp8-h200-trt-mtp:
     - { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 256, spec-decoding: mtp }
 
 dsr1-fp8-h200-dynamo-trt:
-  image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post3
+  image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post1
   model: deepseek-ai/DeepSeek-R1-0528
   model-prefix: dsr1
   runner: h200-multinode-slurm

From 3911118c62279c9aaa036d26d2605eadde23e421 Mon Sep 17 00:00:00 2001
From: Nathan Levin <nlevin@nvidia.com>
Date: Fri, 6 Feb 2026 04:10:13 +0000
Subject: [PATCH 09/10] Add perf-changelog entry for dsr1-fp8-h100-dynamo-trt

Documents the new H100 FP8 disaggregated TRT-LLM configurations
using tensorrtllm-runtime:0.8.1.post3 container.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 perf-changelog.yaml | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 1ddfc2ee1..84494e065 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -400,14 +400,14 @@
     - "Update GPT-OSS FP4 B200 TRT pareto configurations and new container image"
     - "Extend maximum concurrency to 256 across all sequence lengths"
   pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/639
-  
+
 - config-keys:
     - dsr1-fp8-mi355x-sglang-disagg
   description:
     - "Add --use-chat-template argument to benchmark_serving script"
     - "Without this arg, MTP acceptance rates are artificially high for DeepSeek with MTP"
   pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/647
-  
+
 - config-keys: 
     - dsr1-fp8-b200-sglang-mtp
   description:
@@ -417,7 +417,7 @@
     - "Update launch_b200-dgxc.sh to support SPEC_SUFFIX for MTP script selection"
     - "Configurations: TP=8, EP=1, concurrency 4-64 for 1k1k, 1k8k, and 8k1k sequence lengths"
   pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/626
-  
+
 - config-keys:
     - dsr1-fp4-b200-trt-mtp
   description:
@@ -425,5 +425,9 @@
     - "Enable dynamic piecewise CUDA graphs for several conditions"
     - "Adjust TP8/TP4 search space to reduce overlapping points"
   pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/642
-  
-  
+
+- config-keys:
+    - dsr1-fp8-h100-dynamo-trt
+  description:
+    - "Add DeepSeek R1 FP8 H100 Dynamo TRT-LLM disaggregated multinode configurations"
+  pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/651

From d496e1064b052bd4b2f9554ac97f3fd0d8781c4e Mon Sep 17 00:00:00 2001
From: Nathan Levin <nlevin@nvidia.com>
Date: Fri, 6 Feb 2026 22:54:19 +0000
Subject: [PATCH 10/10] fix: use sa-shared container path in H100 launch script

Change SQUASH_FILE path from /mnt/nfs/slurm-shared/containers/ to
/mnt/nfs/sa-shared/containers/ to match cluster configuration.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 runners/launch_h100-dgxc-slurm.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runners/launch_h100-dgxc-slurm.sh b/runners/launch_h100-dgxc-slurm.sh
index 6c6126aa8..fb0ae69f7 100644
--- a/runners/launch_h100-dgxc-slurm.sh
+++ b/runners/launch_h100-dgxc-slurm.sh
@@ -35,7 +35,7 @@ export SLURM_ACCOUNT="customer"
 CONTAINER_KEY=$(echo "$IMAGE" | sed 's|nvcr.io/|nvcr.io#|')
 
 # Map container image to local squash file
-SQUASH_FILE="/mnt/nfs/slurm-shared/containers/$(echo "$IMAGE" | sed 's|nvcr.io/||' | sed 's/[\/:@#]/+/g').sqsh"
+SQUASH_FILE="/mnt/nfs/sa-shared/containers/$(echo "$IMAGE" | sed 's|nvcr.io/||' | sed 's/[\/:@#]/+/g').sqsh"
 
 if [[ $MODEL_PREFIX == "DeepSeek-R1-0528" ]]; then
     export MODEL_PATH="/mnt/numa1/shared/models/dsr1-fp8"