SemiAnalysisAI · Qiaolin-Yu · Apr 26, 2026 · Apr 26, 2026 · Apr 26, 2026 · Apr 26, 2026
diff --git a/benchmarks/single_node/dsv4_fp4_b300_sglang.sh b/benchmarks/single_node/dsv4_fp4_b300_sglang.sh
@@ -70,6 +70,7 @@ DEEPEP_CONFIG='{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}
 MEM_FRACTION_STATIC=0.90
 
 if [ "${DP_ATTENTION}" = "true" ]; then
+    export SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN=1
     export SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE=0
     export SGLANG_OPT_FIX_HASH_MEGA_MOE=0
     export SGLANG_OPT_USE_FAST_MASK_EP=1

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
@@ -1868,3 +1868,10 @@
   description:
     - "Floor --max-running-requests at 8 in dsv4_fp4_b300_sglang.sh so low-CONC sweeps don't drop below the queue depth needed for stable benchmarking (CONC * 3 / 2 still applies above CONC=5)"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1173
+
+- config-keys:
+    - dsv4-fp4-b300-sglang
+  description:
+    - "better performance for dp-attention"
+    - "Recipes from https://docs.sglang.io/cookbook/autoregressive/DeepSeek/DeepSeek-V4"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1174