Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
185d2ff
Re-submit dsv4-fp4-gb200-dynamo-vllm against srt-slurm aflowers/gb200…
Ankur-singh Apr 26, 2026
65e804c
Merge remote-tracking branch 'origin/main' into nv/dsv4-fp4-gb200-dyn…
Ankur-singh Apr 26, 2026
925706b
Revert 1k/1k tep8 recipe changes; leave 1k/1k untouched
Ankur-singh Apr 26, 2026
8492512
Comment out VLLM_RANDOMIZE_DP_DUMMY_INPUTS / VLLM_MOE_ROUTING_SIMULAT…
Ankur-singh Apr 26, 2026
11c9514
Merge branch 'main' into nv/dsv4-fp4-gb200-dynamo-vllm
Ankur-singh Apr 26, 2026
5b0347f
Switch to deepseek-v4-pro-sa SA-curated subset; drop 1k/1k
Ankur-singh Apr 26, 2026
4b8c5e7
Merge branch 'main' into nv/dsv4-fp4-gb200-dynamo-vllm
Ankur-singh Apr 26, 2026
88c7a2e
Update perf-changelog.yaml
Ankur-singh Apr 26, 2026
ad9680e
Switch to vLLM 0.20.0 + dynamo wheel pin; rebase recipes on aflowers/…
Ankur-singh Apr 28, 2026
ed541a7
Drop benchmark.tokenizer_mode from all 6 recipes
Ankur-singh Apr 28, 2026
5182970
Merge branch 'main' into nv/dsv4-fp4-gb200-dynamo-vllm
Ankur-singh Apr 28, 2026
103957b
Strip sha256 pin from vllm container references
Ankur-singh Apr 28, 2026
c2ec702
Drop identity.model from all 6 recipes
Ankur-singh Apr 28, 2026
4b4ebcd
Merge remote-tracking branch 'origin/main' into nv/dsv4-fp4-gb200-dyn…
Ankur-singh Apr 28, 2026
a14f71d
Switch dsv4-fp4 MODEL_PATH from /mnt/numa1 to /mnt/lustre01
Oseltamivir Apr 28, 2026
dd386a5
Merge branch 'main' into nv/dsv4-fp4-gb200-dynamo-vllm
Oseltamivir Apr 28, 2026
a1096ba
Trim DSv4 GB200 dynamo-vLLM configs
alec-flowers Apr 28, 2026
ddca892
Merge branch 'main' into nv/dsv4-fp4-gb200-dynamo-vllm
alec-flowers Apr 28, 2026
efcfa65
Fix perf changelog entry formatting
alec-flowers Apr 28, 2026
ecdebe9
Restore dynamic GB200 container import
alec-flowers Apr 28, 2026
762f5c1
Merge branch 'main' into nv/dsv4-fp4-gb200-dynamo-vllm
Oseltamivir Apr 29, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 19 additions & 66 deletions .github/configs/nvidia-master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7627,7 +7627,7 @@ kimik2.5-fp4-gb200-dynamo-vllm:
dp-attn: true

dsv4-fp4-gb200-dynamo-vllm:
image: vllm/vllm-openai:deepseekv4-cu130
image: vllm/vllm-openai:v0.20.0-ubuntu2404
model: deepseek-ai/DeepSeek-V4-Pro
model-prefix: dsv4
runner: gb200
Expand All @@ -7636,104 +7636,57 @@ dsv4-fp4-gb200-dynamo-vllm:
multinode: true
disagg: true
seq-len-configs:
# 1k/1k — extrapolated from kimi-k2.5 1k/1k topologies, scaled to DSV4-Pro's
# DP>=8 constraint. No upstream NVIDIA reference for DSV4-Pro vLLM disagg
# at this seq-len yet (PR #67 only publishes 8k/1k).
- isl: 1024
- isl: 8192
osl: 1024
search-space:
# Low-concurrency / interactivity: 1 prefill (DP=8) + 1 decode (TP=8).
# 4 nodes total. Mirrors NVIDIA aflowers/gb200-dsv4-recipes branch
# 1p1d-dep8-tep8.yaml (offload + numa-bind stripped — see recipe header).
- conc-list: [1, 4, 8, 16, 32, 64]
# Three validated 8k/1k points mirrored from NVIDIA/srt-slurm
# aflowers/vllm-gb200-v0.20.0 history. conc-list values match each
# recipe's benchmark.concurrencies.

# Low latency: 1 prefill (DEP=8) + 1 decode (TP=8). 5 nodes total with
# a dedicated NATS/etcd infra node.
- conc-list: [1]
prefill:
num-worker: 1
tp: 8
ep: 8
dp-attn: true
additional-settings:
- "CONFIG_FILE=recipes/vllm/deepseek-v4/1k1k/disagg-gb200-1p1d-dep8-tep8.yaml"
- "CONFIG_FILE=recipes/vllm/deepseek-v4/8k1k/disagg-gb200-low-latency.yaml"
decode:
num-worker: 1
tp: 8
ep: 1
dp-attn: false
# Mid throughput: 1 prefill (DP=8) + 1 wide decode (DP=16).
# 6 nodes. Single prefill is plenty for 1k prompts up to ~conc 4096.
- conc-list: [128, 256, 1024, 2048, 4096]

# Mid curve: 1 prefill (DEP=8) + 1 decode (DEP=8). 5 nodes total with
# a dedicated NATS/etcd infra node.
- conc-list: [256]
prefill:
num-worker: 1
tp: 8
ep: 8
dp-attn: true
additional-settings:
- "CONFIG_FILE=recipes/vllm/deepseek-v4/1k1k/disagg-gb200-1p1d-dep8-dep16.yaml"
- "CONFIG_FILE=recipes/vllm/deepseek-v4/8k1k/disagg-gb200-mid-curve.yaml"
decode:
num-worker: 1
tp: 16
ep: 16
dp-attn: true
# High throughput: 3 prefills (DP=8) + 1 wide decode (DP=16). 10 nodes.
# The 4096 overlap with the 1p1d block gives a crossover point. 8192
# would saturate 1p1d's prefill, so this topology takes over there.
- conc-list: [4096, 8192]
prefill:
num-worker: 3
tp: 8
ep: 8
dp-attn: true
additional-settings:
- "CONFIG_FILE=recipes/vllm/deepseek-v4/1k1k/disagg-gb200-3p1d-dep8-dep16.yaml"
decode:
num-worker: 1
tp: 16
ep: 16
dp-attn: true

- isl: 8192
osl: 1024
search-space:
# Low-concurrency / interactivity: 1 prefill (DP=8) + 1 decode (TP=8).
# 4 nodes total. Mirrors NVIDIA aflowers/gb200-dsv4-recipes branch.
- conc-list: [1, 4, 8, 16, 32, 64]
prefill:
num-worker: 1
tp: 8
ep: 8
dp-attn: true
additional-settings:
- "CONFIG_FILE=recipes/vllm/deepseek-v4/8k1k/disagg-gb200-1p1d-dep8-tep8.yaml"
decode:
num-worker: 1
tp: 8
ep: 1
dp-attn: false
# Mid: 3 prefills (DP=8) + 1 wide decode (DP=16). 10 nodes total.
- conc-list: [512, 1024]
# Max throughput: 3 prefill (DEP=8 each) + 1 decode (DEP=8). 9 nodes
# total with a dedicated NATS/etcd infra node.
- conc-list: [4096]
prefill:
num-worker: 3
tp: 8
ep: 8
dp-attn: true
additional-settings:
- "CONFIG_FILE=recipes/vllm/deepseek-v4/8k1k/disagg-gb200-3p1d-dep8-dep16.yaml"
- "CONFIG_FILE=recipes/vllm/deepseek-v4/8k1k/disagg-gb200-max-tpt.yaml"
decode:
num-worker: 1
tp: 16
ep: 16
dp-attn: true
# Max throughput: 7 prefills (DP=8) + 1 wide decode (DP=16). 18 nodes
# (full cluster). Mirrors NVIDIA/srt-slurm PR #67.
- conc-list: [4096, 8192]
prefill:
num-worker: 7
tp: 8
ep: 8
dp-attn: true
additional-settings:
- "CONFIG_FILE=recipes/vllm/deepseek-v4/8k1k/disagg-gb200-7p1d-dep8-dep16.yaml"
decode:
num-worker: 1
tp: 16
ep: 16
dp-attn: true

This file was deleted.

This file was deleted.

Loading
Loading