From 3ecfb6cfc10b460f0c63cbee02fc0ab6c1546fa3 Mon Sep 17 00:00:00 2001 From: Qiaolin-Yu Date: Sun, 26 Apr 2026 02:26:11 -0700 Subject: [PATCH 1/2] retry sglang --- perf-changelog.yaml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 0e358a20a..ccad30ed5 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1784,7 +1784,7 @@ description: - "Restore the recipe-per-CONC split (low-latency / balanced / max-throughput) on top of the low-latency-only fallback from #1143; the DeepEP FP8 weight-postprocess path is fixed, so the high-throughput scenario runs again" - "Recipes from https://docs.sglang.io/cookbook/autoregressive/DeepSeek/DeepSeek-V4" - pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1158 + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1132 - config-keys: - dsv4-fp8-mi355x-sglang @@ -1855,3 +1855,10 @@ - "Pinned to PR1 limitations: single-sequence kv_cache hardcode, --enforce-eager required, ATOM_USE_TRITON_MOE=1 (aiter fused_moe broken on gfx950)" - "Sweep will expand to TP=4/8 conc 4–256 once ROCm/ATOM PR3 (multi-request) and PR4 (CUDAGraph) land" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1170 + +- config-keys: + - dsv4-fp4-b300-sglang + description: + - "Restore the recipe-per-CONC split (low-latency / balanced / max-throughput) on top of the low-latency-only fallback from #1143; the DeepEP FP8 weight-postprocess path is fixed, so the high-throughput scenario runs again" + - "Recipes from https://docs.sglang.io/cookbook/autoregressive/DeepSeek/DeepSeek-V4" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1158 From 5b95b33c05601486907a6e9628bcc58e4029ece5 Mon Sep 17 00:00:00 2001 From: Qiaolin-Yu Date: Sun, 26 Apr 2026 02:28:44 -0700 Subject: [PATCH 2/2] fix --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index ccad30ed5..0bce77831 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1784,7 +1784,7 @@ description: - "Restore the recipe-per-CONC split (low-latency / balanced / max-throughput) on top of the low-latency-only fallback from #1143; the DeepEP FP8 weight-postprocess path is fixed, so the high-throughput scenario runs again" - "Recipes from https://docs.sglang.io/cookbook/autoregressive/DeepSeek/DeepSeek-V4" - pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1132 + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1158 - config-keys: - dsv4-fp8-mi355x-sglang