From 358e17b3ee18059a93d99b8c74097dcbe81adbb9 Mon Sep 17 00:00:00 2001
From: sglang-bot <sglangbot@gmail.com>
Date: Sun, 26 Apr 2026 03:30:38 -0700
Subject: [PATCH 1/3] dsv4-fp4 sglang b200/b300: floor --max-running-requests
 at 8

Mirrors the floor-of-4 pattern from the mi355x atom script (#1170);
prevents tiny CONC values from yielding sub-optimal max-running-requests.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 benchmarks/single_node/dsv4_fp4_b200.sh        | 2 +-
 benchmarks/single_node/dsv4_fp4_b300_sglang.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/benchmarks/single_node/dsv4_fp4_b200.sh b/benchmarks/single_node/dsv4_fp4_b200.sh
index e7a676b45..f691106e6 100755
--- a/benchmarks/single_node/dsv4_fp4_b200.sh
+++ b/benchmarks/single_node/dsv4_fp4_b200.sh
@@ -90,7 +90,7 @@ PYTHONNOUSERSITE=1 sglang serve \
     --port $PORT \
     --trust-remote-code \
     --tp $TP \
-    --max-running-requests "$((CONC * 3 / 2))" \
+    --max-running-requests "$(( CONC * 3 / 2 > 8 ? CONC * 3 / 2 : 8 ))" \
     --mem-fraction-static 0.90 \
     --swa-full-tokens-ratio 0.1 \
     "${PARALLEL_ARGS[@]}" $EVAL_CONTEXT_ARGS >> $SERVER_LOG 2>&1 &
diff --git a/benchmarks/single_node/dsv4_fp4_b300_sglang.sh b/benchmarks/single_node/dsv4_fp4_b300_sglang.sh
index 6fae10837..dededd071 100755
--- a/benchmarks/single_node/dsv4_fp4_b300_sglang.sh
+++ b/benchmarks/single_node/dsv4_fp4_b300_sglang.sh
@@ -110,7 +110,7 @@ PYTHONNOUSERSITE=1 sglang serve \
     --port $PORT \
     --trust-remote-code \
     --tp $TP \
-    --max-running-requests "$((CONC * 3 / 2))" \
+    --max-running-requests "$(( CONC * 3 / 2 > 8 ? CONC * 3 / 2 : 8 ))" \
     --mem-fraction-static "$MEM_FRACTION_STATIC" \
     --swa-full-tokens-ratio "$SWA_FULL_TOKENS_RATIO" \
     "${PARALLEL_ARGS[@]}" $EVAL_CONTEXT_ARGS >> $SERVER_LOG 2>&1 &

From 90faa5305203e004c4f3e301a1316e2dc242d237 Mon Sep 17 00:00:00 2001
From: sglang-bot <sglangbot@gmail.com>
Date: Sun, 26 Apr 2026 03:35:38 -0700
Subject: [PATCH 2/3] perf-changelog: add entry for #1173 (max-running-requests
 floor)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 perf-changelog.yaml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 0bce77831..a627cbc10 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -1862,3 +1862,10 @@
     - "Restore the recipe-per-CONC split (low-latency / balanced / max-throughput) on top of the low-latency-only fallback from #1143; the DeepEP FP8 weight-postprocess path is fixed, so the high-throughput scenario runs again"
     - "Recipes from https://docs.sglang.io/cookbook/autoregressive/DeepSeek/DeepSeek-V4"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1158
+
+- config-keys:
+    - dsv4-fp4-b200-sglang
+    - dsv4-fp4-b300-sglang
+  description:
+    - "Floor --max-running-requests at 8 in dsv4_fp4_b200.sh and dsv4_fp4_b300_sglang.sh so low-CONC sweeps don't drop below the queue depth needed for stable benchmarking (CONC * 3 / 2 still applies above CONC=5)"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1173

From e0f0c32db23866e7fc1f87edeb33e31f079835cd Mon Sep 17 00:00:00 2001
From: sglang-bot <sglangbot@gmail.com>
Date: Sun, 26 Apr 2026 03:36:46 -0700
Subject: [PATCH 3/3] revert b200 change; scope to b300 only

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 benchmarks/single_node/dsv4_fp4_b200.sh | 2 +-
 perf-changelog.yaml                     | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/benchmarks/single_node/dsv4_fp4_b200.sh b/benchmarks/single_node/dsv4_fp4_b200.sh
index f691106e6..e7a676b45 100755
--- a/benchmarks/single_node/dsv4_fp4_b200.sh
+++ b/benchmarks/single_node/dsv4_fp4_b200.sh
@@ -90,7 +90,7 @@ PYTHONNOUSERSITE=1 sglang serve \
     --port $PORT \
     --trust-remote-code \
     --tp $TP \
-    --max-running-requests "$(( CONC * 3 / 2 > 8 ? CONC * 3 / 2 : 8 ))" \
+    --max-running-requests "$((CONC * 3 / 2))" \
     --mem-fraction-static 0.90 \
     --swa-full-tokens-ratio 0.1 \
     "${PARALLEL_ARGS[@]}" $EVAL_CONTEXT_ARGS >> $SERVER_LOG 2>&1 &
diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index a627cbc10..77c2dd31e 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -1864,8 +1864,7 @@
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1158
 
 - config-keys:
-    - dsv4-fp4-b200-sglang
     - dsv4-fp4-b300-sglang
   description:
-    - "Floor --max-running-requests at 8 in dsv4_fp4_b200.sh and dsv4_fp4_b300_sglang.sh so low-CONC sweeps don't drop below the queue depth needed for stable benchmarking (CONC * 3 / 2 still applies above CONC=5)"
+    - "Floor --max-running-requests at 8 in dsv4_fp4_b300_sglang.sh so low-CONC sweeps don't drop below the queue depth needed for stable benchmarking (CONC * 3 / 2 still applies above CONC=5)"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1173