From ef0ec6cfe4ab20ab3495686613ca0f8b83baaa39 Mon Sep 17 00:00:00 2001 From: yhyang201 Date: Sun, 26 Apr 2026 18:51:23 +0800 Subject: [PATCH 1/3] dsv4-b300-sglang: enable SWA_EVICT_DROP_PAGE_MARGIN for DP-attention Co-Authored-By: Claude Opus 4.6 --- benchmarks/single_node/dsv4_fp4_b300_sglang.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarks/single_node/dsv4_fp4_b300_sglang.sh b/benchmarks/single_node/dsv4_fp4_b300_sglang.sh index dededd071..ac552c733 100755 --- a/benchmarks/single_node/dsv4_fp4_b300_sglang.sh +++ b/benchmarks/single_node/dsv4_fp4_b300_sglang.sh @@ -70,6 +70,7 @@ DEEPEP_CONFIG='{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96} MEM_FRACTION_STATIC=0.90 if [ "${DP_ATTENTION}" = "true" ]; then + export SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN=1 export SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE=0 export SGLANG_OPT_FIX_HASH_MEGA_MOE=0 export SGLANG_OPT_USE_FAST_MASK_EP=1 From d96eb7162506cb927d748df32e0bb83dc2e2f809 Mon Sep 17 00:00:00 2001 From: yhyang201 Date: Sun, 26 Apr 2026 18:53:36 +0800 Subject: [PATCH 2/3] perf-changelog: add dsv4-b300-sglang entry for #1174 Co-Authored-By: Claude Opus 4.6 --- perf-changelog.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 77c2dd31e..532624528 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1868,3 +1868,10 @@ description: - "Floor --max-running-requests at 8 in dsv4_fp4_b300_sglang.sh so low-CONC sweeps don't drop below the queue depth needed for stable benchmarking (CONC * 3 / 2 still applies above CONC=5)" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1173 + +- config-keys: + - dsv4-fp4-b300-sglang + description: + - "Restore the recipe-per-CONC split (low-latency / balanced / max-throughput) on top of the low-latency-only fallback from #1143; the DeepEP FP8 weight-postprocess path is fixed, so the high-throughput scenario runs again" + - "Recipes from https://docs.sglang.io/cookbook/autoregressive/DeepSeek/DeepSeek-V4" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1174 From 807b11a3f6e7cb86895ac04413d9f0de6910af56 Mon Sep 17 00:00:00 2001 From: yhyang201 Date: Sun, 26 Apr 2026 18:54:46 +0800 Subject: [PATCH 3/3] perf-changelog: update description for #1174 Co-Authored-By: Claude Opus 4.6 --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 532624528..589f75766 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1872,6 +1872,6 @@ - config-keys: - dsv4-fp4-b300-sglang description: - - "Restore the recipe-per-CONC split (low-latency / balanced / max-throughput) on top of the low-latency-only fallback from #1143; the DeepEP FP8 weight-postprocess path is fixed, so the high-throughput scenario runs again" + - "better performance for dp-attention" - "Recipes from https://docs.sglang.io/cookbook/autoregressive/DeepSeek/DeepSeek-V4" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1174