From 44868fc8a659c6f2b9daedd33ccabd0738388481 Mon Sep 17 00:00:00 2001
From: ishandhanani <ishandhanani@gmail.com>
Date: Wed, 3 Dec 2025 16:39:42 -0800
Subject: [PATCH 1/4] go

---
 .github/configs/nvidia-master.yaml            | 102 +++++++++++++++++-
 .../dsr1_fp8_gb200_dynamo-sglang_slurm.sh     |   6 +-
 2 files changed, 98 insertions(+), 10 deletions(-)

diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml
index c4370f483..3b094d572 100644
--- a/.github/configs/nvidia-master.yaml
+++ b/.github/configs/nvidia-master.yaml
@@ -852,17 +852,37 @@ dsr1-fp8-gb200-dynamo-sglang:
   - isl: 8192
     osl: 1024
     search-space:
+    # Low latency (1 prefill worker at DEP4 and 1 decode worker at DEP4)
     - spec-decoding: "none"
-      conc-list: [ 128, 256, 384, 448, 512, 576, 1024, 2048, 4096 ]
+      conc-list: [ 4, 8, 16, 32, 64, 128, 256, 512 ]
       prefill:
-        num-worker: 6
+        num-worker: 1
         # tp, ep, and dp-attn do nothing because they are hardcoded in the following file:
-        # https://github.com/Elnifio/dynamo/blob/update-result-file-name/components/backends/sglang/slurm_jobs/scripts/gb200-fp8.sh
         tp: 1
         ep: 1
         dp-attn: true
         additional-settings:
-        - "PREFILL_NODES=12"
+        - "PREFILL_NODES=1"
+        - "N_ADDITIONAL_FRONTENDS=8"
+      decode:
+        num-worker: 1
+        tp: 1
+        ep: 1
+        dp-attn: true
+        additional-settings:
+        - "DECODE_NODES=1"
+
+    # Middle and top of curve (5 prefill workers each at DEP8 and 1 decode worker at DEP32)
+    - spec-decoding: "none"
+      conc-list: [ 512, 1024, 2048, 6144 ]
+      prefill:
+        num-worker: 5
+        # tp, ep, and dp-attn do nothing because they are hardcoded in the following file:
+        tp: 1
+        ep: 1
+        dp-attn: true
+        additional-settings:
+        - "PREFILL_NODES=2"
         - "N_ADDITIONAL_FRONTENDS=8"
       decode:
         num-worker: 1
@@ -870,4 +890,76 @@ dsr1-fp8-gb200-dynamo-sglang:
         ep: 1
         dp-attn: true
         additional-settings:
-        - "DECODE_NODES=6"
+        - "DECODE_NODES=8"
+
+  dsr1-fp8-gb200-dynamo-sglang:
+    # TODO: swap
+    image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.5.1-rc0.pre1
+    # TODO: what is the right name?
+    model: deepseek-ai/DeepSeek-R1-0528-fp4-v2
+    model-prefix: dsr1
+    runner: gb200
+    precision: fp4
+    framework: dynamo-sglang
+    multinode: true
+    disagg: true
+    seq-len-configs:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # Low latency (1 prefill worker at DEP4 and 2 decode workers at DEP4)
+      - spec-decoding: "none"
+        conc-list: [ 4, 8, 32, 64, 128, 112, 128, 256 ]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          - "PREFILL_NODES=1"
+          - "N_ADDITIONAL_FRONTENDS=8"
+        decode:
+          num-worker: 2
+          tp: 1
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=2"
+
+      # Mid curve (1 prefill worker at DEP4 and 1 decode workers at DEP48)
+      - spec-decoding: "none"
+        conc-list: [ 512, 1024, 2048, 4096, 8192 ]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          - "PREFILL_NODES=1"
+          - "N_ADDITIONAL_FRONTENDS=8"
+        decode:
+          num-worker: 2
+          tp: 1
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=12"
+
+      # Top of curve (1 prefill worker at DEP4 and 1 decode worker at DEP32)
+      - spec-decoding: "none"
+        conc-list: [ 8192, 12000, 15000 ]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          - "PREFILL_NODES=1"
+          - "N_ADDITIONAL_FRONTENDS=8"
+        decode:
+          num-worker: 2
+          tp: 1
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=8"
\ No newline at end of file
diff --git a/benchmarks/dsr1_fp8_gb200_dynamo-sglang_slurm.sh b/benchmarks/dsr1_fp8_gb200_dynamo-sglang_slurm.sh
index 99e2c7afd..f538e3e29 100644
--- a/benchmarks/dsr1_fp8_gb200_dynamo-sglang_slurm.sh
+++ b/benchmarks/dsr1_fp8_gb200_dynamo-sglang_slurm.sh
@@ -12,11 +12,7 @@ check_env_vars CONC_LIST ISL OSL IMAGE SPEC_DECODING MODEL_PATH \
 
 # Always clone and setup Dynamo
 echo "Cloning Dynamo repository..."
-if [ "$ISL" = "1024" ] && [ "$OSL" = "1024" ]; then
-    git clone --branch ishan/sa-1.1-sgl-dsr1-fp8 https://github.com/ai-dynamo/dynamo.git
-else
-    git clone --branch update-result-file-name https://github.com/Elnifio/dynamo.git
-fi
+git clone --branch ishan/sa-1.1-sgl-dsr1 https://github.com/ai-dynamo/dynamo.git
 
 cd "$SGL_SLURM_JOBS_PATH"
 

From 0e3c3598b1f5367a4165ee19bae7446b714755d3 Mon Sep 17 00:00:00 2001
From: ishandhanani <ishandhanani@gmail.com>
Date: Wed, 3 Dec 2025 16:40:46 -0800
Subject: [PATCH 2/4] typo

---
 .github/configs/nvidia-master.yaml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml
index 3b094d572..881d68591 100644
--- a/.github/configs/nvidia-master.yaml
+++ b/.github/configs/nvidia-master.yaml
@@ -857,7 +857,6 @@ dsr1-fp8-gb200-dynamo-sglang:
       conc-list: [ 4, 8, 16, 32, 64, 128, 256, 512 ]
       prefill:
         num-worker: 1
-        # tp, ep, and dp-attn do nothing because they are hardcoded in the following file:
         tp: 1
         ep: 1
         dp-attn: true
@@ -877,7 +876,6 @@ dsr1-fp8-gb200-dynamo-sglang:
       conc-list: [ 512, 1024, 2048, 6144 ]
       prefill:
         num-worker: 5
-        # tp, ep, and dp-attn do nothing because they are hardcoded in the following file:
         tp: 1
         ep: 1
         dp-attn: true
@@ -892,7 +890,7 @@ dsr1-fp8-gb200-dynamo-sglang:
         additional-settings:
         - "DECODE_NODES=8"
 
-  dsr1-fp8-gb200-dynamo-sglang:
+  dsr1-fp4-gb200-dynamo-sglang:
     # TODO: swap
     image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.5.1-rc0.pre1
     # TODO: what is the right name?

From 297bd7fc640345cb2f2b79e8ee5bebd251bf0b89 Mon Sep 17 00:00:00 2001
From: ishandhanani <ishandhanani@gmail.com>
Date: Wed, 3 Dec 2025 16:42:31 -0800
Subject: [PATCH 3/4] typo...

---
 .github/configs/nvidia-master.yaml | 142 ++++++++++++++---------------
 1 file changed, 71 insertions(+), 71 deletions(-)

diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml
index 881d68591..999d34441 100644
--- a/.github/configs/nvidia-master.yaml
+++ b/.github/configs/nvidia-master.yaml
@@ -890,74 +890,74 @@ dsr1-fp8-gb200-dynamo-sglang:
         additional-settings:
         - "DECODE_NODES=8"
 
-  dsr1-fp4-gb200-dynamo-sglang:
-    # TODO: swap
-    image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.5.1-rc0.pre1
-    # TODO: what is the right name?
-    model: deepseek-ai/DeepSeek-R1-0528-fp4-v2
-    model-prefix: dsr1
-    runner: gb200
-    precision: fp4
-    framework: dynamo-sglang
-    multinode: true
-    disagg: true
-    seq-len-configs:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      # Low latency (1 prefill worker at DEP4 and 2 decode workers at DEP4)
-      - spec-decoding: "none"
-        conc-list: [ 4, 8, 32, 64, 128, 112, 128, 256 ]
-        prefill:
-          num-worker: 1
-          tp: 1
-          ep: 1
-          dp-attn: true
-          additional-settings:
-          - "PREFILL_NODES=1"
-          - "N_ADDITIONAL_FRONTENDS=8"
-        decode:
-          num-worker: 2
-          tp: 1
-          ep: 1
-          dp-attn: true
-          additional-settings:
-          - "DECODE_NODES=2"
-
-      # Mid curve (1 prefill worker at DEP4 and 1 decode workers at DEP48)
-      - spec-decoding: "none"
-        conc-list: [ 512, 1024, 2048, 4096, 8192 ]
-        prefill:
-          num-worker: 1
-          tp: 1
-          ep: 1
-          dp-attn: true
-          additional-settings:
-          - "PREFILL_NODES=1"
-          - "N_ADDITIONAL_FRONTENDS=8"
-        decode:
-          num-worker: 2
-          tp: 1
-          ep: 1
-          dp-attn: true
-          additional-settings:
-          - "DECODE_NODES=12"
-
-      # Top of curve (1 prefill worker at DEP4 and 1 decode worker at DEP32)
-      - spec-decoding: "none"
-        conc-list: [ 8192, 12000, 15000 ]
-        prefill:
-          num-worker: 1
-          tp: 1
-          ep: 1
-          dp-attn: true
-          additional-settings:
-          - "PREFILL_NODES=1"
-          - "N_ADDITIONAL_FRONTENDS=8"
-        decode:
-          num-worker: 2
-          tp: 1
-          ep: 1
-          dp-attn: true
-          additional-settings:
-          - "DECODE_NODES=8"
\ No newline at end of file
+dsr1-fp4-gb200-dynamo-sglang:
+  # TODO: swap
+  image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.5.1-rc0.pre1
+  # TODO: what is the right name?
+  model: deepseek-ai/DeepSeek-R1-0528-fp4-v2
+  model-prefix: dsr1
+  runner: gb200
+  precision: fp4
+  framework: dynamo-sglang
+  multinode: true
+  disagg: true
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    # Low latency (1 prefill worker at DEP4 and 2 decode workers at DEP4)
+    - spec-decoding: "none"
+      conc-list: [ 4, 8, 32, 64, 128, 112, 128, 256 ]
+      prefill:
+        num-worker: 1
+        tp: 1
+        ep: 1
+        dp-attn: true
+        additional-settings:
+        - "PREFILL_NODES=1"
+        - "N_ADDITIONAL_FRONTENDS=8"
+      decode:
+        num-worker: 2
+        tp: 1
+        ep: 1
+        dp-attn: true
+        additional-settings:
+        - "DECODE_NODES=2"
+
+    # Mid curve (1 prefill worker at DEP4 and 1 decode workers at DEP48)
+    - spec-decoding: "none"
+      conc-list: [ 512, 1024, 2048, 4096, 8192 ]
+      prefill:
+        num-worker: 1
+        tp: 1
+        ep: 1
+        dp-attn: true
+        additional-settings:
+        - "PREFILL_NODES=1"
+        - "N_ADDITIONAL_FRONTENDS=8"
+      decode:
+        num-worker: 2
+        tp: 1
+        ep: 1
+        dp-attn: true
+        additional-settings:
+        - "DECODE_NODES=12"
+
+    # Top of curve (1 prefill worker at DEP4 and 1 decode worker at DEP32)
+    - spec-decoding: "none"
+      conc-list: [ 8192, 12000, 15000 ]
+      prefill:
+        num-worker: 1
+        tp: 1
+        ep: 1
+        dp-attn: true
+        additional-settings:
+        - "PREFILL_NODES=1"
+        - "N_ADDITIONAL_FRONTENDS=8"
+      decode:
+        num-worker: 2
+        tp: 1
+        ep: 1
+        dp-attn: true
+        additional-settings:
+        - "DECODE_NODES=8"
\ No newline at end of file

From 2cc19a3ec4e245ee6ce868e1fe3c01a8782e723a Mon Sep 17 00:00:00 2001
From: ishandhanani <ishandhanani@gmail.com>
Date: Wed, 3 Dec 2025 16:50:16 -0800
Subject: [PATCH 4/4] more

---
 .github/configs/nvidia-master.yaml            |  1 +
 .../dsr1_fp4_gb200_dynamo-sglang_slurm.sh     | 37 +++++++++++++++++++
 .../dsr1_fp8_gb200_dynamo-sglang_slurm.sh     |  2 +
 3 files changed, 40 insertions(+)
 create mode 100644 benchmarks/dsr1_fp4_gb200_dynamo-sglang_slurm.sh

diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml
index 999d34441..71fb257e8 100644
--- a/.github/configs/nvidia-master.yaml
+++ b/.github/configs/nvidia-master.yaml
@@ -798,6 +798,7 @@ dsr1-fp8-gb200-dynamo-sglang:
         additional-settings:
         - "PREFILL_NODES=4"
         - "N_ADDITIONAL_FRONTENDS=9"
+        - "SCRIPT_MODE=max-tpt"
       decode:
         num-worker: 1
         tp: 1
diff --git a/benchmarks/dsr1_fp4_gb200_dynamo-sglang_slurm.sh b/benchmarks/dsr1_fp4_gb200_dynamo-sglang_slurm.sh
new file mode 100644
index 000000000..7a668f30c
--- /dev/null
+++ b/benchmarks/dsr1_fp4_gb200_dynamo-sglang_slurm.sh
@@ -0,0 +1,37 @@
+
+#!/bin/bash
+
+set -x
+
+source "$(dirname "$0")/benchmark_lib.sh"
+
+check_env_vars CONC_LIST ISL OSL IMAGE SPEC_DECODING MODEL_PATH \
+    PREFILL_NUM_WORKERS PREFILL_TP PREFILL_EP PREFILL_DP_ATTN \
+    DECODE_NUM_WORKERS DECODE_TP DECODE_EP DECODE_DP_ATTN \
+    PREFILL_NODES DECODE_NODES N_ADDITIONAL_FRONTENDS SGL_SLURM_JOBS_PATH # SGL_SLURM_JOBS_PATH FIXME
+
+# Always clone and setup Dynamo
+echo "Cloning Dynamo repository..."
+git clone --branch ishan/sa-1.1-sgl-dsr1 https://github.com/ai-dynamo/dynamo.git
+
+cd "$SGL_SLURM_JOBS_PATH"
+
+# Set up SGL launch script-specific environment variables
+export TIME_LIMIT="04:00:00"
+export MODEL_PATH=$MODEL_PATH
+export CONFIG_DIR=$CONFIG_DIR
+export CONTAINER_IMAGE=$IMAGE
+export GPU_TYPE="gb200-fp4"
+
+# Launch jobs based on ISL/OSL
+# Replace ' ' in CONC_LIST with 'x' such that the concurrency list is represented
+# by a list of numbers delimted by 'x'. This is because of how the underlying launch script
+# expects the concurrencies.
+bash ./submit_disagg.sh $PREFILL_NODES \
+    $PREFILL_NUM_WORKERS \
+    $DECODE_NODES \
+    $DECODE_NUM_WORKERS \
+    $N_ADDITIONAL_FRONTENDS \
+    $ISL $OSL "${CONC_LIST// /x}" inf \
+    $GPU_TYPE \
+    $SCRIPT_MODE
\ No newline at end of file
diff --git a/benchmarks/dsr1_fp8_gb200_dynamo-sglang_slurm.sh b/benchmarks/dsr1_fp8_gb200_dynamo-sglang_slurm.sh
index f538e3e29..4e44b0414 100644
--- a/benchmarks/dsr1_fp8_gb200_dynamo-sglang_slurm.sh
+++ b/benchmarks/dsr1_fp8_gb200_dynamo-sglang_slurm.sh
@@ -21,6 +21,7 @@ export TIME_LIMIT="04:00:00"
 export MODEL_PATH=$MODEL_PATH
 export CONFIG_DIR=$CONFIG_DIR
 export CONTAINER_IMAGE=$IMAGE
+export GPU_TYPE="gb200-fp8"
 
 # Launch jobs based on ISL/OSL
 # Replace ' ' in CONC_LIST with 'x' such that the concurrency list is represented
@@ -32,4 +33,5 @@ bash ./submit_disagg.sh $PREFILL_NODES \
     $DECODE_NUM_WORKERS \
     $N_ADDITIONAL_FRONTENDS \
     $ISL $OSL "${CONC_LIST// /x}" inf \
+    $GPU_TYPE \
     $SCRIPT_MODE
\ No newline at end of file