From 938d1d006d148fc40d360554d1733d1c4ab75a24 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Sat, 25 Apr 2026 16:35:33 -0700 Subject: [PATCH 1/2] retrigger mi355x dsv4 --- benchmarks/single_node/dsv4_fp8_mi355x.sh | 4 +--- perf-changelog.yaml | 7 +++++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/benchmarks/single_node/dsv4_fp8_mi355x.sh b/benchmarks/single_node/dsv4_fp8_mi355x.sh index 8ca19f71f..971b18b6a 100755 --- a/benchmarks/single_node/dsv4_fp8_mi355x.sh +++ b/benchmarks/single_node/dsv4_fp8_mi355x.sh @@ -83,13 +83,11 @@ python3 -m sglang.launch_server \ --attention-backend compressed \ --max-running-request 256 \ --page-size 256 \ - --chunked-prefill-size 4096 \ + --chunked-prefill-size 8192 \ --disable-shared-experts-fusion \ --disable-cuda-graph \ --tool-call-parser deepseekv4 \ --reasoning-parser deepseek-v4 \ - --mem-fraction-static 0.88 \ - --max-total-tokens $((CONC * (ISL + OSL) + 200)) \ --watchdog-timeout 1800 $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 397da6591..23542b901 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1819,3 +1819,10 @@ - "Restore the recipe-per-CONC split (low-latency / balanced / max-throughput) on top of the low-latency-only fallback from #1143; the DeepEP FP8 weight-postprocess path is fixed, so the high-throughput scenario runs again" - "Recipes from https://docs.sglang.io/cookbook/autoregressive/DeepSeek/DeepSeek-V4" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1132 + +- config-keys: + - dsv4-fp8-mi355x-sglang + description: + - "Drop --mem-fraction-static 0.88 and --max-total-tokens from dsv4_fp8_mi355x.sh to fix OOM with dp_size=8" + - "Bump --chunked-prefill-size from 4096 to 8192 to match working dsv4pro script" + - "Retriggering dsv4-fp8-mi355x-sglang" From f660cd2e91cfcde2fcddf68c1531c61197142e4e Mon Sep 17 00:00:00 2001 From: Bryan Shan <58582368+Oseltamivir@users.noreply.github.com> Date: Sat, 25 Apr 2026 16:39:37 -0700 Subject: [PATCH 2/2] Refactor changelog descriptions for consistency Updated descriptions in perf-changelog.yaml for clarity. --- perf-changelog.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 23542b901..14d9e6704 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1823,6 +1823,6 @@ - config-keys: - dsv4-fp8-mi355x-sglang description: - - "Drop --mem-fraction-static 0.88 and --max-total-tokens from dsv4_fp8_mi355x.sh to fix OOM with dp_size=8" - - "Bump --chunked-prefill-size from 4096 to 8192 to match working dsv4pro script" - - "Retriggering dsv4-fp8-mi355x-sglang" + - "Drop --mem-fraction-static 0.88 and --max-total-tokens from dsv4_fp8_mi355x.sh" + - "Bump --chunked-prefill-size from 4096 to 8192" + - "Retrigger dsv4-fp8-mi355x-sglang"