Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 101 additions & 0 deletions .github/configs/nvidia-master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6646,3 +6646,104 @@ dsr1-fp4-b200-dynamo-sglang-mtp:
tp: 8
ep: 1
dp-attn: false

kimik2.5-fp4-gb200-dynamo-vllm:
image: vllm/vllm-openai:v0.18.0-cu130
model: nvidia/Kimi-K2.5-NVFP4
model-prefix: kimik2.5
runner: gb200
precision: fp4
framework: dynamo-vllm
multinode: true
disagg: true
seq-len-configs:
- isl: 1024
osl: 1024
search-space:
- conc-list: [256, 512, 1024, 2048, 3072, 4096]
prefill:
num-worker: 1
tp: 4
ep: 4
dp-attn: true
additional-settings:
# https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/vllm/kimi-k2.5/1k1k/disagg-gb200-1p1d-dep4-dep16.yaml
- "CONFIG_FILE=recipes/vllm/kimi-k2.5/1k1k/disagg-gb200-1p1d-dep4-dep16.yaml"
decode:
num-worker: 1
tp: 16
ep: 16
dp-attn: true
- conc-list: [4, 8, 16, 32, 64, 128]
prefill:
num-worker: 1
tp: 4
ep: 4
dp-attn: true
additional-settings:
# https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/vllm/kimi-k2.5/1k1k/disagg-gb200-1p4d-dep4-tep4.yaml
- "CONFIG_FILE=recipes/vllm/kimi-k2.5/1k1k/disagg-gb200-1p4d-dep4-tep4.yaml"
decode:
num-worker: 4
tp: 4
ep: 4
dp-attn: false
- isl: 8192
osl: 1024
search-space:
- conc-list: [4, 8, 16, 32, 128]
Comment thread
nlevin-ui marked this conversation as resolved.
prefill:
num-worker: 1
tp: 4
ep: 4
dp-attn: true
additional-settings:
# https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-1p4d-dep4-tep4.yaml
- "CONFIG_FILE=recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-1p4d-dep4-tep4.yaml"
decode:
num-worker: 4
tp: 4
ep: 4
dp-attn: false
- conc-list: [512, 1024]
prefill:
num-worker: 3
tp: 4
ep: 4
dp-attn: true
additional-settings:
# https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-3p1d-dep4-dep16.yaml
- "CONFIG_FILE=recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-3p1d-dep4-dep16.yaml"
decode:
num-worker: 1
tp: 16
ep: 16
dp-attn: true
- conc-list: [2048]
prefill:
num-worker: 5
tp: 4
ep: 4
dp-attn: true
additional-settings:
# https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-5p1d-dep4-dep8.yaml
- "CONFIG_FILE=recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-5p1d-dep4-dep8.yaml"
decode:
num-worker: 1
tp: 8
ep: 8
dp-attn: true
- conc-list: [3072, 4096]
prefill:
num-worker: 6
tp: 4
ep: 4
dp-attn: true
additional-settings:
# https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-6p1d-dep4-dep16.yaml
- "CONFIG_FILE=recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-6p1d-dep4-dep16.yaml"
decode:
num-worker: 1
tp: 16
ep: 16
dp-attn: true
13 changes: 13 additions & 0 deletions perf-changelog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1265,3 +1265,16 @@
description:
- "Add Qwen3.5-397B-A17B-FP8 H200 SGLang MTP (EAGLE speculative decoding)"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1001

- config-keys:
- kimik2.5-fp4-gb200-dynamo-vllm
description:
- "Add Kimi K2.5 NVFP4 GB200 disaggregated multinode vLLM benchmark via Dynamo frontend"
- "Image: vllm/vllm-openai:v0.18.0-cu130"
- "Model: nvidia/Kimi-K2.5-NVFP4 with NixlConnector KV transfer, FLASHINFER_MLA attention"
- "1k1k configs: high-throughput DEP (1P1D dep4/dep16), low-latency TEP (1P4D dep4/tep4)"
- "8k1k configs: low-latency TEP (1P4D), mid-curve DEP (3P1D dep16), high-throughput (5P1D dep8, 6P1D dep16)"
- "Recipes sourced from NVIDIA/srt-slurm branch sa-submission-q2-2026"
- "New framework: dynamo-vllm (Dynamo frontend + vLLM backend)"
- "Runner script updated to clone NVIDIA/srt-slurm and map vLLM container image"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1008
21 changes: 18 additions & 3 deletions runners/launch_gb200-nv.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ elif [[ $FRAMEWORK == "dynamo-trt" ]]; then
echo "Unsupported model prefix: $MODEL_PREFIX. Supported prefixes are: gptoss or dsr1"
exit 1
fi
elif [[ $FRAMEWORK == "dynamo-vllm" ]]; then
if [[ $MODEL_PREFIX == "kimik2.5" && $PRECISION == "fp4" ]]; then
export MODEL_PATH="/mnt/lustre01/models/kimi-k2.5-nvfp4"
export SRT_SLURM_MODEL_PREFIX="kimi-k2.5-nvfp4"
else
echo "Unsupported model prefix/precision combination: $MODEL_PREFIX/$PRECISION. Supported combinations for dynamo-vllm: kimik2.5/fp4"
exit 1
fi
else
export MODEL_PATH=$MODEL
fi
Expand Down Expand Up @@ -112,9 +120,15 @@ if [ -d "$SRT_REPO_DIR" ]; then
rm -rf "$SRT_REPO_DIR"
fi

git clone https://github.com/ishandhanani/srt-slurm.git "$SRT_REPO_DIR"
cd "$SRT_REPO_DIR"
git checkout sa-submission-q1-2026
if [[ $FRAMEWORK == "dynamo-vllm" ]]; then
git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
cd "$SRT_REPO_DIR"
git checkout sa-submission-q2-2026
else
git clone https://github.com/ishandhanani/srt-slurm.git "$SRT_REPO_DIR"
cd "$SRT_REPO_DIR"
git checkout sa-submission-q1-2026
fi

echo "Installing srtctl..."
curl -LsSf https://astral.sh/uv/install.sh | sh
Expand Down Expand Up @@ -155,6 +169,7 @@ model_paths:
containers:
dynamo-trtllm: ${SQUASH_FILE}
dynamo-sglang: ${SQUASH_FILE}
"${IMAGE}": ${SQUASH_FILE}
nginx-sqsh: ${NGINX_SQUASH_FILE}
EOF

Expand Down
Loading