From 3b88a073c722b636d63340d8ec478c0b6454133c Mon Sep 17 00:00:00 2001 From: Nathan Levin Date: Thu, 5 Feb 2026 17:34:13 +0000 Subject: [PATCH 01/10] Update dsr1-fp8-h100-dynamo-trt with verified 29 Pareto configs Update H100 section with verified Pareto-optimal points: 1k1k ISL/OSL: - MTP: 9 configs (conc: 6, 9, 30, 60, 117, 231, 462, 615, 1229) - STP: 9 configs (conc: 6, 9, 30, 60, 231, 462, 924, 1845, 4916) 8k1k ISL/OSL (new): - MTP: 6 configs (conc: 6, 9, 30, 77, 78, 154) - STP: 5 configs (conc: 6, 9, 30, 154, 308) All configs use TP=16, EP=16 across 2 nodes (16 GPUs total). --- .github/configs/nvidia-master.yaml | 442 +++++++++++++++++++++++++++++ 1 file changed, 442 insertions(+) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index a2e036510..eb21a9587 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -1571,6 +1571,448 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: true +dsr1-fp8-h100-dynamo-trt: + image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post1 + model: deepseek-ai/DeepSeek-R1-0528 + model-prefix: dsr1 + runner: h100-multinode-slurm + precision: fp8 + framework: dynamo-trt + multinode: true + disagg: true + seq-len-configs: + - isl: 1024 + osl: 1024 + search-space: + # MTP configurations + - spec-decoding: "mtp" + conc-list: [6] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch1_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch1_eplb0_mtp3.yaml" + decode: + num-worker: 3 + tp: 16 + ep: 16 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [9] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch2_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch2_eplb0_mtp3.yaml" + decode: + num-worker: 3 + tp: 16 + ep: 16 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [30] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch8_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch8_eplb0_mtp3.yaml" + decode: + num-worker: 3 + tp: 16 + ep: 16 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [60] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch16_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch16_eplb0_mtp3.yaml" + decode: + num-worker: 3 + tp: 16 + ep: 16 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [117] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch32_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch32_eplb0_mtp3.yaml" + decode: + num-worker: 3 + tp: 16 + ep: 16 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [231] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_dep16_batch4_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_dep16_batch4_eplb0_mtp3.yaml" + decode: + num-worker: 3 + tp: 16 + ep: 16 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [615] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp2.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp2.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [462] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch128_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch128_eplb0_mtp3.yaml" + decode: + num-worker: 3 + tp: 16 + ep: 16 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [1229] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch64_eplb0_mtp1.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch64_eplb0_mtp1.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + # Non-MTP configurations (STP) + - conc-list: [6] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch1_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch1_eplb0_mtp0.yaml" + decode: + num-worker: 3 + tp: 16 + ep: 16 + dp-attn: false + - conc-list: [9] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch2_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch2_eplb0_mtp0.yaml" + decode: + num-worker: 3 + tp: 16 + ep: 16 + dp-attn: false + - conc-list: [30] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch8_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch8_eplb0_mtp0.yaml" + decode: + num-worker: 3 + tp: 16 + ep: 16 + dp-attn: false + - conc-list: [60] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch16_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch16_eplb0_mtp0.yaml" + decode: + num-worker: 3 + tp: 16 + ep: 16 + dp-attn: false + - conc-list: [231] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch4_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch4_eplb0_mtp0.yaml" + decode: + num-worker: 3 + tp: 16 + ep: 16 + dp-attn: true + - conc-list: [462] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch8_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch8_eplb0_mtp0.yaml" + decode: + num-worker: 3 + tp: 16 + ep: 16 + dp-attn: true + - conc-list: [924] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch16_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch16_eplb0_mtp0.yaml" + decode: + num-worker: 3 + tp: 16 + ep: 16 + dp-attn: true + - conc-list: [1845] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch32_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch32_eplb0_mtp0.yaml" + decode: + num-worker: 3 + tp: 16 + ep: 16 + dp-attn: true + - conc-list: [4916] + prefill: + num-worker: 2 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx2_gen1_dep16_batch256_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx2_gen1_dep16_batch256_eplb0_mtp0.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + - isl: 8192 + osl: 1024 + search-space: + # MTP configurations (6 points) + - spec-decoding: "mtp" + conc-list: [6] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch1_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch1_eplb0_mtp3.yaml" + decode: + num-worker: 3 + tp: 16 + ep: 16 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [9] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch2_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch2_eplb0_mtp3.yaml" + decode: + num-worker: 3 + tp: 16 + ep: 16 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [30] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch8_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch8_eplb0_mtp3.yaml" + decode: + num-worker: 3 + tp: 16 + ep: 16 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [77] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen1_dep16_batch4_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen1_dep16_batch4_eplb0_mtp3.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [78] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen2_tep16_batch32_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen2_tep16_batch32_eplb0_mtp3.yaml" + decode: + num-worker: 2 + tp: 16 + ep: 16 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [154] + prefill: + num-worker: 2 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx2_gen1_dep16_batch8_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx2_gen1_dep16_batch8_eplb0_mtp3.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + # STP configurations (5 points) + - conc-list: [6] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch1_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch1_eplb0_mtp0.yaml" + decode: + num-worker: 3 + tp: 16 + ep: 16 + dp-attn: false + - conc-list: [9] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch2_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch2_eplb0_mtp0.yaml" + decode: + num-worker: 3 + tp: 16 + ep: 16 + dp-attn: false + - conc-list: [30] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch8_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch8_eplb0_mtp0.yaml" + decode: + num-worker: 3 + tp: 16 + ep: 16 + dp-attn: false + - conc-list: [154] + prefill: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen2_tep16_batch64_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen2_tep16_batch64_eplb0_mtp0.yaml" + decode: + num-worker: 2 + tp: 16 + ep: 16 + dp-attn: false + - conc-list: [308] + prefill: + num-worker: 2 + tp: 16 + ep: 16 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx2_gen1_dep16_batch16_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx2_gen1_dep16_batch16_eplb0_mtp0.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + gptoss-fp4-b200-trt: image: nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc2.post2 model: openai/gpt-oss-120b From c4103f5a7b52a32dd2534079a7b10d46bbc56d53 Mon Sep 17 00:00:00 2001 From: Sahithi Chigurupati Date: Thu, 5 Feb 2026 12:57:05 -0800 Subject: [PATCH 02/10] add h100 multinode launch script --- .github/configs/runners.yaml | 2 + runners/launch_h100-dgxc-slurm.sh | 174 ++++++++++++++++++++++++++++++ 2 files changed, 176 insertions(+) create mode 100644 runners/launch_h100-dgxc-slurm.sh diff --git a/.github/configs/runners.yaml b/.github/configs/runners.yaml index 86ef27315..eb9602ecb 100644 --- a/.github/configs/runners.yaml +++ b/.github/configs/runners.yaml @@ -66,3 +66,5 @@ b300: - 'b300-nv_0' gb300: - 'gb300-nv_0' +h100-multinode-slurm: +- 'h100-dgxc-slurm_0' diff --git a/runners/launch_h100-dgxc-slurm.sh b/runners/launch_h100-dgxc-slurm.sh new file mode 100644 index 000000000..be5a05c58 --- /dev/null +++ b/runners/launch_h100-dgxc-slurm.sh @@ -0,0 +1,174 @@ +#!/usr/bin/bash + +set -x + +echo "Cloning srt-slurm repository..." +SRT_REPO_DIR="srt-slurm" +if [ -d "$SRT_REPO_DIR" ]; then + echo "Removing existing $SRT_REPO_DIR..." + rm -rf "$SRT_REPO_DIR" +fi + +git clone https://github.com/ishandhanani/srt-slurm.git "$SRT_REPO_DIR" +cd "$SRT_REPO_DIR" +git checkout trtllm-h100 + +echo "Installing srtctl..." +curl -LsSf https://astral.sh/uv/install.sh | sh +source $HOME/.local/bin/env + +uv venv +source .venv/bin/activate +uv pip install -e . + +if ! command -v srtctl &> /dev/null; then + echo "Error: Failed to install srtctl" + exit 1 +fi + +echo "Configs available at: $SRT_REPO_DIR/" + +export SLURM_PARTITION="hpc-gpu-1" +export SLURM_ACCOUNT="customer" + +# Convert IMAGE to srt-slurm format (nvcr.io/ -> nvcr.io#) +CONTAINER_KEY=$(echo "$IMAGE" | sed 's|nvcr.io/|nvcr.io#|') + +# Use patched container for dynamo-trt (MNNVL fix), otherwise derive from IMAGE +if [[ "$IMAGE" == *"ai-dynamo/tensorrtllm-runtime"* ]]; then + SQUASH_FILE="/mnt/nfs/lustre/containers/dynamo-trtllm-mnnvl-fix.sqsh" +else + SQUASH_FILE="/mnt/nfs/slurm-shared/containers/$(echo "$IMAGE" | sed 's|nvcr.io/||' | sed 's/[\/:@#]/+/g').sqsh" +fi + +if [[ $MODEL_PREFIX == "DeepSeek-R1-0528" ]]; then + export MODEL_PATH="/mnt/numa1/shared/models/dsr1-fp8" + export SERVED_MODEL_NAME="DeepSeek-R1-0528" +else + echo "Unsupported model prefix: $MODEL_PREFIX. Supported prefixes are: DeepSeek-R1-0528" + exit 1 +fi + +export ISL="$ISL" +export OSL="$OSL" + +# Create srtslurm.yaml for srtctl +echo "Creating srtslurm.yaml configuration..." +cat > srtslurm.yaml <&1) +echo "$SRTCTL_OUTPUT" + +# Extract JOB_ID from srtctl output +JOB_ID=$(echo "$SRTCTL_OUTPUT" | grep -oP '✅ Job \K[0-9]+' || echo "$SRTCTL_OUTPUT" | grep -oP 'Job \K[0-9]+') + +if [ -z "$JOB_ID" ]; then + echo "Error: Failed to extract JOB_ID from srtctl output" + exit 1 +fi + +echo "Extracted JOB_ID: $JOB_ID" + +# Wait for this specific job to complete +echo "Waiting for job $JOB_ID to complete..." +while [ -n "$(squeue -j $JOB_ID --noheader 2>/dev/null)" ]; do + echo "Job $JOB_ID still running..." + squeue -j $JOB_ID + sleep 30 +done +echo "Job $JOB_ID completed!" + +echo "Collecting results..." + +# Use the JOB_ID to find the logs directory +# srtctl creates logs in outputs/JOB_ID/logs/ +LOGS_DIR="outputs/$JOB_ID/logs" + +if [ ! -d "$LOGS_DIR" ]; then + echo "Warning: Logs directory not found at $LOGS_DIR" + exit 1 +fi + +echo "Found logs directory: $LOGS_DIR" + +cat $LOGS_DIR/sweep_${JOB_ID}.log + +for file in $LOGS_DIR/*; do + if [ -f "$file" ]; then + tail -n 500 $file + fi +done + +# Find all result subdirectories +RESULT_SUBDIRS=$(find "$LOGS_DIR" -maxdepth 1 -type d -name "*isl*osl*" 2>/dev/null) + +if [ -z "$RESULT_SUBDIRS" ]; then + echo "Warning: No result subdirectories found in $LOGS_DIR" +else + # Process results from all configurations + for result_subdir in $RESULT_SUBDIRS; do + echo "Processing result subdirectory: $result_subdir" + + # Extract configuration info from directory name + CONFIG_NAME=$(basename "$result_subdir") + + # Find all result JSON files + RESULT_FILES=$(find "$result_subdir" -name "results_concurrency_*.json" 2>/dev/null) + + for result_file in $RESULT_FILES; do + if [ -f "$result_file" ]; then + # Extract metadata from filename + # Files are of the format "results_concurrency_gpus_{num gpus}_ctx_{num ctx}_gen_{num gen}.json" + filename=$(basename "$result_file") + concurrency=$(echo "$filename" | sed -n 's/results_concurrency_\([0-9]*\)_gpus_.*/\1/p') + gpus=$(echo "$filename" | sed -n 's/results_concurrency_[0-9]*_gpus_\([0-9]*\)_ctx_.*/\1/p') + ctx=$(echo "$filename" | sed -n 's/.*_ctx_\([0-9]*\)_gen_.*/\1/p') + gen=$(echo "$filename" | sed -n 's/.*_gen_\([0-9]*\)\.json/\1/p') + + echo "Processing concurrency $concurrency with $gpus GPUs (ctx: $ctx, gen: $gen): $result_file" + + WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${CONFIG_NAME}_conc${concurrency}_gpus_${gpus}_ctx_${ctx}_gen_${gen}.json" + cp "$result_file" "$WORKSPACE_RESULT_FILE" + + echo "Copied result file to: $WORKSPACE_RESULT_FILE" + fi + done + done +fi + +echo "All result files processed" + +# Cleanup +echo "Cleaning up..." +deactivate 2>/dev/null || true +rm -rf .venv +echo "Cleanup complete" From 46456c0816228d5bfadf5e41840d168c90e3b6c0 Mon Sep 17 00:00:00 2001 From: Sahithi Chigurupati Date: Thu, 5 Feb 2026 13:25:51 -0800 Subject: [PATCH 03/10] modify model prefix --- .github/configs/nvidia-master.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index eb21a9587..ece670591 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -1574,7 +1574,7 @@ dsr1-fp8-h200-dynamo-trt: dsr1-fp8-h100-dynamo-trt: image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post1 model: deepseek-ai/DeepSeek-R1-0528 - model-prefix: dsr1 + model-prefix: DeepSeek-R1-0528 runner: h100-multinode-slurm precision: fp8 framework: dynamo-trt From 1fc4cd750118acdc88c277d89fb18d6c7858a45a Mon Sep 17 00:00:00 2001 From: nlevin-ui Date: Thu, 5 Feb 2026 15:34:43 -0700 Subject: [PATCH 04/10] Update .github/configs/nvidia-master.yaml Co-authored-by: claude[bot] <209825114+claude[bot]@users.noreply.github.com> --- .github/configs/nvidia-master.yaml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index ece670591..0ad6e30a8 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -1676,35 +1676,35 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true - spec-decoding: "mtp" - conc-list: [615] + conc-list: [462] prefill: num-worker: 1 tp: 16 ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp2.yaml - - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp2.yaml" + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch128_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch128_eplb0_mtp3.yaml" decode: - num-worker: 1 + num-worker: 3 tp: 16 ep: 16 - dp-attn: true + dp-attn: false - spec-decoding: "mtp" - conc-list: [462] + conc-list: [615] prefill: num-worker: 1 tp: 16 ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch128_eplb0_mtp3.yaml - - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch128_eplb0_mtp3.yaml" + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp2.yaml + - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp2.yaml" decode: - num-worker: 3 + num-worker: 1 tp: 16 ep: 16 - dp-attn: false + dp-attn: true - spec-decoding: "mtp" conc-list: [1229] prefill: From c42032e809e1b765dd9e01b493a905628ec2e22e Mon Sep 17 00:00:00 2001 From: Nathan Levin Date: Thu, 5 Feb 2026 23:12:01 +0000 Subject: [PATCH 05/10] Remove hardcoded container override in H100 launch script Use consistent sed-based path derivation for all container images. --- runners/launch_h100-dgxc-slurm.sh | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/runners/launch_h100-dgxc-slurm.sh b/runners/launch_h100-dgxc-slurm.sh index be5a05c58..18eddf1c0 100644 --- a/runners/launch_h100-dgxc-slurm.sh +++ b/runners/launch_h100-dgxc-slurm.sh @@ -34,12 +34,8 @@ export SLURM_ACCOUNT="customer" # Convert IMAGE to srt-slurm format (nvcr.io/ -> nvcr.io#) CONTAINER_KEY=$(echo "$IMAGE" | sed 's|nvcr.io/|nvcr.io#|') -# Use patched container for dynamo-trt (MNNVL fix), otherwise derive from IMAGE -if [[ "$IMAGE" == *"ai-dynamo/tensorrtllm-runtime"* ]]; then - SQUASH_FILE="/mnt/nfs/lustre/containers/dynamo-trtllm-mnnvl-fix.sqsh" -else - SQUASH_FILE="/mnt/nfs/slurm-shared/containers/$(echo "$IMAGE" | sed 's|nvcr.io/||' | sed 's/[\/:@#]/+/g').sqsh" -fi +# Map container image to local squash file +SQUASH_FILE="/mnt/nfs/slurm-shared/containers/$(echo "$IMAGE" | sed 's|nvcr.io/||' | sed 's/[\/:@#]/+/g').sqsh" if [[ $MODEL_PREFIX == "DeepSeek-R1-0528" ]]; then export MODEL_PATH="/mnt/numa1/shared/models/dsr1-fp8" From 7cf62e5f0f0bfb2967830fa683e7219deb200103 Mon Sep 17 00:00:00 2001 From: Nathan Levin Date: Fri, 6 Feb 2026 00:45:09 +0000 Subject: [PATCH 06/10] Update H100 image to tensorrtllm-runtime:0.8.1.post3 Co-authored-by: Cursor --- .github/configs/nvidia-master.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index 0ad6e30a8..8ec22da4e 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -1,5 +1,5 @@ dsr1-fp4-b200-dynamo-trt: - image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post1 + image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post3 model: deepseek-r1-fp4 model-prefix: dsr1 runner: b200-multinode-slurm @@ -384,7 +384,7 @@ dsr1-fp4-b200-dynamo-trt: dp-attn: true dsr1-fp4-b300-dynamo-trt: - image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post1 + image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post3 model: deepseek-r1-fp4 model-prefix: dsr1 runner: b300 @@ -1087,7 +1087,7 @@ dsr1-fp8-h200-trt-mtp: - { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 256, spec-decoding: mtp } dsr1-fp8-h200-dynamo-trt: - image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post1 + image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post3 model: deepseek-ai/DeepSeek-R1-0528 model-prefix: dsr1 runner: h200-multinode-slurm @@ -1572,7 +1572,7 @@ dsr1-fp8-h200-dynamo-trt: dp-attn: true dsr1-fp8-h100-dynamo-trt: - image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post1 + image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post3 model: deepseek-ai/DeepSeek-R1-0528 model-prefix: DeepSeek-R1-0528 runner: h100-multinode-slurm From af0736424e8d7c6273b9538c43ef885dfc2b7b66 Mon Sep 17 00:00:00 2001 From: nlevin-ui Date: Thu, 5 Feb 2026 20:56:16 -0700 Subject: [PATCH 07/10] Update runners/launch_h100-dgxc-slurm.sh Co-authored-by: claude[bot] <209825114+claude[bot]@users.noreply.github.com> --- runners/launch_h100-dgxc-slurm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runners/launch_h100-dgxc-slurm.sh b/runners/launch_h100-dgxc-slurm.sh index 18eddf1c0..6c6126aa8 100644 --- a/runners/launch_h100-dgxc-slurm.sh +++ b/runners/launch_h100-dgxc-slurm.sh @@ -11,7 +11,7 @@ fi git clone https://github.com/ishandhanani/srt-slurm.git "$SRT_REPO_DIR" cd "$SRT_REPO_DIR" -git checkout trtllm-h100 +git checkout sa-submission-q1-2026 echo "Installing srtctl..." curl -LsSf https://astral.sh/uv/install.sh | sh From aa3af24af726678042fff26cd4914f4aff3a5872 Mon Sep 17 00:00:00 2001 From: Nathan Levin Date: Fri, 6 Feb 2026 04:01:01 +0000 Subject: [PATCH 08/10] fix: revert container to post1 for non-H100 configs Only dsr1-fp8-h100-dynamo-trt should use 0.8.1.post3. Revert B200, B300, and H200 configs back to 0.8.1.post1. Co-authored-by: Cursor --- .github/configs/nvidia-master.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index 8ec22da4e..65f78234b 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -1,5 +1,5 @@ dsr1-fp4-b200-dynamo-trt: - image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post3 + image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post1 model: deepseek-r1-fp4 model-prefix: dsr1 runner: b200-multinode-slurm @@ -384,7 +384,7 @@ dsr1-fp4-b200-dynamo-trt: dp-attn: true dsr1-fp4-b300-dynamo-trt: - image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post3 + image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post1 model: deepseek-r1-fp4 model-prefix: dsr1 runner: b300 @@ -1087,7 +1087,7 @@ dsr1-fp8-h200-trt-mtp: - { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 256, spec-decoding: mtp } dsr1-fp8-h200-dynamo-trt: - image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post3 + image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post1 model: deepseek-ai/DeepSeek-R1-0528 model-prefix: dsr1 runner: h200-multinode-slurm From 3911118c62279c9aaa036d26d2605eadde23e421 Mon Sep 17 00:00:00 2001 From: Nathan Levin Date: Fri, 6 Feb 2026 04:10:13 +0000 Subject: [PATCH 09/10] Add perf-changelog entry for dsr1-fp8-h100-dynamo-trt Documents the new H100 FP8 disaggregated TRT-LLM configurations using tensorrtllm-runtime:0.8.1.post3 container. Co-authored-by: Cursor --- perf-changelog.yaml | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 1ddfc2ee1..84494e065 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -400,14 +400,14 @@ - "Update GPT-OSS FP4 B200 TRT pareto configurations and new container image" - "Extend maximum concurrency to 256 across all sequence lengths" pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/639 - + - config-keys: - dsr1-fp8-mi355x-sglang-disagg description: - "Add --use-chat-template argument to benchmark_serving script" - "Without this arg, MTP acceptance rates are artificially high for DeepSeek with MTP" pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/647 - + - config-keys: - dsr1-fp8-b200-sglang-mtp description: @@ -417,7 +417,7 @@ - "Update launch_b200-dgxc.sh to support SPEC_SUFFIX for MTP script selection" - "Configurations: TP=8, EP=1, concurrency 4-64 for 1k1k, 1k8k, and 8k1k sequence lengths" pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/626 - + - config-keys: - dsr1-fp4-b200-trt-mtp description: @@ -425,5 +425,9 @@ - "Enable dynamic piecewise CUDA graphs for several conditions" - "Adjust TP8/TP4 search space to reduce overlapping points" pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/642 - - + +- config-keys: + - dsr1-fp8-h100-dynamo-trt + description: + - "Add DeepSeek R1 FP8 H100 Dynamo TRT-LLM disaggregated multinode configurations" + pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/651 From d496e1064b052bd4b2f9554ac97f3fd0d8781c4e Mon Sep 17 00:00:00 2001 From: Nathan Levin Date: Fri, 6 Feb 2026 22:54:19 +0000 Subject: [PATCH 10/10] fix: use sa-shared container path in H100 launch script Change SQUASH_FILE path from /mnt/nfs/slurm-shared/containers/ to /mnt/nfs/sa-shared/containers/ to match cluster configuration. Co-authored-by: Cursor --- runners/launch_h100-dgxc-slurm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runners/launch_h100-dgxc-slurm.sh b/runners/launch_h100-dgxc-slurm.sh index 6c6126aa8..fb0ae69f7 100644 --- a/runners/launch_h100-dgxc-slurm.sh +++ b/runners/launch_h100-dgxc-slurm.sh @@ -35,7 +35,7 @@ export SLURM_ACCOUNT="customer" CONTAINER_KEY=$(echo "$IMAGE" | sed 's|nvcr.io/|nvcr.io#|') # Map container image to local squash file -SQUASH_FILE="/mnt/nfs/slurm-shared/containers/$(echo "$IMAGE" | sed 's|nvcr.io/||' | sed 's/[\/:@#]/+/g').sqsh" +SQUASH_FILE="/mnt/nfs/sa-shared/containers/$(echo "$IMAGE" | sed 's|nvcr.io/||' | sed 's/[\/:@#]/+/g').sqsh" if [[ $MODEL_PREFIX == "DeepSeek-R1-0528" ]]; then export MODEL_PATH="/mnt/numa1/shared/models/dsr1-fp8"