diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index 616aa6ff9..23d805c88 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -1892,10 +1892,12 @@ dsv4-fp4-b300-sglang-mtp: osl: 1024 search-space: - { tp: 8, ep: 1, conc-start: 1, conc-end: 8, spec-decoding: mtp } + - { tp: 4, ep: 1, conc-start: 16, conc-end: 128, spec-decoding: mtp } - isl: 8192 osl: 1024 search-space: - { tp: 8, ep: 1, conc-start: 1, conc-end: 8, spec-decoding: mtp } + - { tp: 4, ep: 1, conc-start: 16, conc-end: 128, spec-decoding: mtp } qwen3.5-bf16-b200-sglang: image: lmsysorg/sglang:nightly-dev-20260216-d3bae71e diff --git a/benchmarks/single_node/dsv4_fp4_b300_sglang_mtp.sh b/benchmarks/single_node/dsv4_fp4_b300_sglang_mtp.sh index 767b9a8f9..a1b8043a6 100755 --- a/benchmarks/single_node/dsv4_fp4_b300_sglang_mtp.sh +++ b/benchmarks/single_node/dsv4_fp4_b300_sglang_mtp.sh @@ -132,6 +132,7 @@ run_benchmark_serving \ --model "$MODEL" \ --port "$PORT" \ --backend vllm \ + --dsv4 \ --input-len "$ISL" \ --output-len "$OSL" \ --random-range-ratio "$RANDOM_RANGE_RATIO" \ diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 300d39c40..84d7a2b44 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1903,3 +1903,10 @@ - "ISL=8192: TP4 conc 4-64; DP4 (dp-attn) conc 128-1024; DP8 (dp-attn) conc 1024-8192" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1155 +- config-keys: + - dsv4-fp4-b300-sglang-mtp + description: + - "Fix missing line-continuation backslash before --dsv4 in run_benchmark_serving invocation" + - "Without the backslash bash truncated the bench command at --dsv4 and tried to exec the next line (--input-len ...) as a new command, breaking the run" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1183 +