From 3ecfb6cfc10b460f0c63cbee02fc0ab6c1546fa3 Mon Sep 17 00:00:00 2001
From: Qiaolin-Yu <liin1211@outlook.com>
Date: Sun, 26 Apr 2026 02:26:11 -0700
Subject: [PATCH 1/2] retry sglang

---
 perf-changelog.yaml | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 0e358a20a..ccad30ed5 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -1784,7 +1784,7 @@
   description:
     - "Restore the recipe-per-CONC split (low-latency / balanced / max-throughput) on top of the low-latency-only fallback from #1143; the DeepEP FP8 weight-postprocess path is fixed, so the high-throughput scenario runs again"
     - "Recipes from https://docs.sglang.io/cookbook/autoregressive/DeepSeek/DeepSeek-V4"
-  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1158
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1132
 
 - config-keys:
     - dsv4-fp8-mi355x-sglang
@@ -1855,3 +1855,10 @@
     - "Pinned to PR1 limitations: single-sequence kv_cache hardcode, --enforce-eager required, ATOM_USE_TRITON_MOE=1 (aiter fused_moe broken on gfx950)"
     - "Sweep will expand to TP=4/8 conc 4–256 once ROCm/ATOM PR3 (multi-request) and PR4 (CUDAGraph) land"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1170
+
+- config-keys:
+    - dsv4-fp4-b300-sglang
+  description:
+    - "Restore the recipe-per-CONC split (low-latency / balanced / max-throughput) on top of the low-latency-only fallback from #1143; the DeepEP FP8 weight-postprocess path is fixed, so the high-throughput scenario runs again"
+    - "Recipes from https://docs.sglang.io/cookbook/autoregressive/DeepSeek/DeepSeek-V4"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1158

From 5b95b33c05601486907a6e9628bcc58e4029ece5 Mon Sep 17 00:00:00 2001
From: Qiaolin-Yu <liin1211@outlook.com>
Date: Sun, 26 Apr 2026 02:28:44 -0700
Subject: [PATCH 2/2] fix

---
 perf-changelog.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index ccad30ed5..0bce77831 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -1784,7 +1784,7 @@
   description:
     - "Restore the recipe-per-CONC split (low-latency / balanced / max-throughput) on top of the low-latency-only fallback from #1143; the DeepEP FP8 weight-postprocess path is fixed, so the high-throughput scenario runs again"
     - "Recipes from https://docs.sglang.io/cookbook/autoregressive/DeepSeek/DeepSeek-V4"
-  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1132
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1158
 
 - config-keys:
     - dsv4-fp8-mi355x-sglang