diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 993a075bd..f1181b941 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -953,7 +953,7 @@ dsr1-fp8-mi355x-sglang-disagg-mtp: dsr1-fp4-mi355x-sglang-disagg: - image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-3 + image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0313-2 model: amd/DeepSeek-R1-0528-MXFP4 model-prefix: dsr1 runner: mi355x-disagg @@ -1161,7 +1161,7 @@ dsr1-fp4-mi355x-sglang-disagg: - "DECODE_MTP_SIZE=0" dsr1-fp4-mi355x-sglang-disagg-mtp: - image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-3 + image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0313-2 model: amd/DeepSeek-R1-0528-MXFP4 model-prefix: dsr1 runner: mi355x-disagg diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index d893ade6c..d6202608d 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -19,7 +19,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen2_dep8_batch64_eplb0_mtp2.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen2_dep8_batch64_eplb0_mtp2.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen2_dep8_batch64_eplb0_mtp2.yaml" decode: num-worker: 2 @@ -34,7 +34,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen5_dep8_batch16_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen5_dep8_batch16_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen5_dep8_batch16_eplb0_mtp3.yaml" decode: num-worker: 5 @@ -49,7 +49,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen5_tep8_batch1_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen5_tep8_batch1_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen5_tep8_batch1_eplb0_mtp3.yaml" decode: num-worker: 5 @@ -64,7 +64,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen5_tep8_batch32_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen5_tep8_batch32_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen5_tep8_batch32_eplb0_mtp3.yaml" decode: num-worker: 5 @@ -79,7 +79,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx3_gen4_dep8_batch128_eplb0_mtp1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx3_gen4_dep8_batch128_eplb0_mtp1.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/mtp/ctx3_gen4_dep8_batch128_eplb0_mtp1.yaml" decode: num-worker: 4 @@ -94,7 +94,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx3_gen5_dep4_batch512_eplb0_mtp1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx3_gen5_dep4_batch512_eplb0_mtp1.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/mtp/ctx3_gen5_dep4_batch512_eplb0_mtp1.yaml" decode: num-worker: 5 @@ -110,7 +110,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0.yaml" decode: num-worker: 1 @@ -124,7 +124,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen2_dep8_batch128_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen2_dep8_batch128_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen2_dep8_batch128_eplb0_mtp0.yaml" decode: num-worker: 2 @@ -138,7 +138,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen5_dep8_batch32_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen5_dep8_batch32_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen5_dep8_batch32_eplb0_mtp0.yaml" decode: num-worker: 5 @@ -152,7 +152,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen5_tep8_batch1_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen5_tep8_batch1_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen5_tep8_batch1_eplb0_mtp0.yaml" decode: num-worker: 5 @@ -166,7 +166,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen5_tep8_batch32_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen5_tep8_batch32_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen5_tep8_batch32_eplb0_mtp0.yaml" decode: num-worker: 5 @@ -180,7 +180,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen6_tep8_batch64_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen6_tep8_batch64_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen6_tep8_batch64_eplb0_mtp0.yaml" decode: num-worker: 6 @@ -199,7 +199,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen1_dep8_batch8_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen1_dep8_batch8_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen1_dep8_batch8_eplb0_mtp3.yaml" decode: num-worker: 1 @@ -214,7 +214,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen3_tep8_batch16_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen3_tep8_batch16_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen3_tep8_batch16_eplb0_mtp3.yaml" decode: num-worker: 3 @@ -229,7 +229,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen5_tep8_batch1_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen5_tep8_batch1_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen5_tep8_batch1_eplb0_mtp3.yaml" decode: num-worker: 5 @@ -244,7 +244,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen5_tep8_batch8_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen5_tep8_batch8_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen5_tep8_batch8_eplb0_mtp3.yaml" decode: num-worker: 5 @@ -259,7 +259,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx3_gen1_dep8_batch64_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx3_gen1_dep8_batch64_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/mtp/ctx3_gen1_dep8_batch64_eplb0_mtp3.yaml" decode: num-worker: 1 @@ -274,7 +274,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx5_gen1_dep8_batch192_eplb0_mtp1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx5_gen1_dep8_batch192_eplb0_mtp1.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/mtp/ctx5_gen1_dep8_batch192_eplb0_mtp1.yaml" decode: num-worker: 1 @@ -289,7 +289,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx5_gen2_dep8_batch32_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx5_gen2_dep8_batch32_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/mtp/ctx5_gen2_dep8_batch32_eplb0_mtp3.yaml" decode: num-worker: 2 @@ -305,7 +305,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx1_gen5_tep8_batch1_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx1_gen5_tep8_batch1_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/stp/ctx1_gen5_tep8_batch1_eplb0_mtp0.yaml" decode: num-worker: 5 @@ -319,7 +319,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx1_gen5_tep8_batch8_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx1_gen5_tep8_batch8_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/stp/ctx1_gen5_tep8_batch8_eplb0_mtp0.yaml" decode: num-worker: 5 @@ -333,7 +333,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx2_gen5_tep8_batch64_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx2_gen5_tep8_batch64_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/stp/ctx2_gen5_tep8_batch64_eplb0_mtp0.yaml" decode: num-worker: 5 @@ -347,7 +347,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx4_gen1_dep8_batch192_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx4_gen1_dep8_batch192_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/stp/ctx4_gen1_dep8_batch192_eplb0_mtp0.yaml" decode: num-worker: 1 @@ -361,7 +361,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx4_gen3_dep8_batch32_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx4_gen3_dep8_batch32_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/stp/ctx4_gen3_dep8_batch32_eplb0_mtp0.yaml" decode: num-worker: 3 @@ -375,7 +375,7 @@ dsr1-fp4-b200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx7_gen2_dep8_batch128_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx7_gen2_dep8_batch128_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/stp/ctx7_gen2_dep8_batch128_eplb0_mtp0.yaml" decode: num-worker: 2 @@ -405,7 +405,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch1_eplb0_mtp3_8.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch1_eplb0_mtp3_8.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch1_eplb0_mtp3_8.yaml" decode: num-worker: 8 @@ -420,7 +420,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch4_eplb0_mtp3_32.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch4_eplb0_mtp3_32.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch4_eplb0_mtp3_32.yaml" decode: num-worker: 8 @@ -435,7 +435,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch8_eplb0_mtp3_64.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch8_eplb0_mtp3_64.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch8_eplb0_mtp3_64.yaml" decode: num-worker: 8 @@ -450,7 +450,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch32_eplb0_mtp3_256.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch32_eplb0_mtp3_256.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch32_eplb0_mtp3_256.yaml" decode: num-worker: 8 @@ -466,7 +466,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen7_dep8_batch128_eplb0_mtp3_896.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen7_dep8_batch128_eplb0_mtp3_896.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen7_dep8_batch128_eplb0_mtp3_896.yaml" decode: num-worker: 7 @@ -481,7 +481,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen4_dep8_batch256_eplb0_mtp3_1024.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen4_dep8_batch256_eplb0_mtp3_1024.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen4_dep8_batch256_eplb0_mtp3_1024.yaml" decode: num-worker: 4 @@ -496,7 +496,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen3_dep8_batch384_eplb0_mtp3_1184.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen3_dep8_batch384_eplb0_mtp3_1184.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen3_dep8_batch384_eplb0_mtp3_1184.yaml" decode: num-worker: 3 @@ -511,7 +511,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen2_dep8_batch768_eplb0_mtp2_1600.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen2_dep8_batch768_eplb0_mtp2_1600.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen2_dep8_batch768_eplb0_mtp2_1600.yaml" decode: num-worker: 2 @@ -527,7 +527,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen3_tp8_batch1024_eplb0_mtp0_4.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen3_tp8_batch1024_eplb0_mtp0_4.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen3_tp8_batch1024_eplb0_mtp0_4.yaml" decode: num-worker: 3 @@ -541,7 +541,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen3_tp8_batch1024_eplb0_mtp0_32.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen3_tp8_batch1024_eplb0_mtp0_32.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen3_tp8_batch1024_eplb0_mtp0_32.yaml" decode: num-worker: 3 @@ -555,7 +555,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen3_tp8_batch1024_eplb0_mtp0_128.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen3_tp8_batch1024_eplb0_mtp0_128.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen3_tp8_batch1024_eplb0_mtp0_128.yaml" decode: num-worker: 3 @@ -570,7 +570,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen5_dep8_batch48_eplb0_mtp0_1920.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen5_dep8_batch48_eplb0_mtp0_1920.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen5_dep8_batch48_eplb0_mtp0_1920.yaml" decode: num-worker: 5 @@ -584,7 +584,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0_4096.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0_4096.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0_4096.yaml" decode: num-worker: 1 @@ -598,7 +598,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx2_gen5_dep8_batch128_eplb0_mtp0_5152.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx2_gen5_dep8_batch128_eplb0_mtp0_5152.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/stp/ctx2_gen5_dep8_batch128_eplb0_mtp0_5152.yaml" decode: num-worker: 5 @@ -618,7 +618,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen6_tp8_batch8_eplb0_mtp3_8.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen6_tp8_batch8_eplb0_mtp3_8.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen6_tp8_batch8_eplb0_mtp3_8.yaml" decode: num-worker: 6 @@ -633,7 +633,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen2_tp8_batch32_eplb0_mtp3_8.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen2_tp8_batch32_eplb0_mtp3_8.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen2_tp8_batch32_eplb0_mtp3_8.yaml" decode: num-worker: 2 @@ -648,7 +648,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen6_tp8_batch8_eplb0_mtp3_48.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen6_tp8_batch8_eplb0_mtp3_48.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen6_tp8_batch8_eplb0_mtp3_48.yaml" decode: num-worker: 6 @@ -663,7 +663,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen4_tp8_batch16_eplb0_mtp3_64.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen4_tp8_batch16_eplb0_mtp3_64.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen4_tp8_batch16_eplb0_mtp3_64.yaml" decode: num-worker: 4 @@ -679,7 +679,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx2_gen3_dep8_batch8_eplb0_mtp3_224.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx2_gen3_dep8_batch8_eplb0_mtp3_224.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/mtp/ctx2_gen3_dep8_batch8_eplb0_mtp3_224.yaml" decode: num-worker: 3 @@ -694,7 +694,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx2_gen1_dep8_batch32_eplb0_mtp3_288.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx2_gen1_dep8_batch32_eplb0_mtp3_288.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/mtp/ctx2_gen1_dep8_batch32_eplb0_mtp3_288.yaml" decode: num-worker: 1 @@ -709,7 +709,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx4_gen1_dep8_batch128_eplb0_mtp2_1088.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx4_gen1_dep8_batch128_eplb0_mtp2_1088.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/mtp/ctx4_gen1_dep8_batch128_eplb0_mtp2_1088.yaml" decode: num-worker: 1 @@ -725,7 +725,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 1 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen1_tp8_batch1_eplb0_mtp0_1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen1_tp8_batch1_eplb0_mtp0_1.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen1_tp8_batch1_eplb0_mtp0_1.yaml" decode: num-worker: 1 @@ -739,7 +739,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen4_tp8_batch32_eplb0_mtp0_32.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen4_tp8_batch32_eplb0_mtp0_32.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen4_tp8_batch32_eplb0_mtp0_32.yaml" decode: num-worker: 4 @@ -753,7 +753,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen4_tp8_batch32_eplb0_mtp0_128.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen4_tp8_batch32_eplb0_mtp0_128.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen4_tp8_batch32_eplb0_mtp0_128.yaml" decode: num-worker: 4 @@ -767,7 +767,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen6_tp8_batch16_eplb0_mtp0_96.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen6_tp8_batch16_eplb0_mtp0_96.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen6_tp8_batch16_eplb0_mtp0_96.yaml" decode: num-worker: 6 @@ -782,7 +782,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen1_dep8_batch128_eplb0_mtp0_128.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen1_dep8_batch128_eplb0_mtp0_128.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen1_dep8_batch128_eplb0_mtp0_128.yaml" decode: num-worker: 1 @@ -796,7 +796,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen2_dep8_batch64_eplb0_mtp0_128.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen2_dep8_batch64_eplb0_mtp0_128.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen2_dep8_batch64_eplb0_mtp0_128.yaml" decode: num-worker: 2 @@ -810,7 +810,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen1_dep8_batch256_eplb0_mtp0_256.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen1_dep8_batch256_eplb0_mtp0_256.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen1_dep8_batch256_eplb0_mtp0_256.yaml" decode: num-worker: 1 @@ -824,7 +824,7 @@ dsr1-fp8-b200-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx2_gen1_dep8_batch640_eplb0_mtp0_640.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx2_gen1_dep8_batch640_eplb0_mtp0_640.yaml - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx2_gen1_dep8_batch640_eplb0_mtp0_640.yaml" decode: num-worker: 1 @@ -854,7 +854,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen1_dep8_batch64_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen1_dep8_batch64_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen1_dep8_batch64_eplb0_mtp3.yaml" decode: num-worker: 1 @@ -869,7 +869,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen2_dep8_batch16_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen2_dep8_batch16_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen2_dep8_batch16_eplb0_mtp3.yaml" decode: num-worker: 2 @@ -884,7 +884,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen5_tep8_batch1_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen5_tep8_batch1_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen5_tep8_batch1_eplb0_mtp3.yaml" decode: num-worker: 5 @@ -899,7 +899,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen5_tep8_batch32_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen5_tep8_batch32_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen5_tep8_batch32_eplb0_mtp3.yaml" decode: num-worker: 5 @@ -914,7 +914,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx2_gen1_dep8_batch256_eplb0_mtp1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx2_gen1_dep8_batch256_eplb0_mtp1.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/mtp/ctx2_gen1_dep8_batch256_eplb0_mtp1.yaml" decode: num-worker: 1 @@ -929,7 +929,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx5_gen2_dep8_batch512_eplb0_mtp1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx5_gen2_dep8_batch512_eplb0_mtp1.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/mtp/ctx5_gen2_dep8_batch512_eplb0_mtp1.yaml" decode: num-worker: 2 @@ -944,7 +944,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx5_gen2_dep8_batch768_eplb0_mtp1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx5_gen2_dep8_batch768_eplb0_mtp1.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/mtp/ctx5_gen2_dep8_batch768_eplb0_mtp1.yaml" decode: num-worker: 2 @@ -960,7 +960,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen2_dep8_batch64_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen2_dep8_batch64_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen2_dep8_batch64_eplb0_mtp0.yaml" decode: num-worker: 2 @@ -974,7 +974,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml" decode: num-worker: 4 @@ -988,7 +988,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen5_tep4_batch4_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen5_tep4_batch4_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen5_tep4_batch4_eplb0_mtp0.yaml" decode: num-worker: 5 @@ -1002,7 +1002,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen5_tep8_batch64_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen5_tep8_batch64_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen5_tep8_batch64_eplb0_mtp0.yaml" decode: num-worker: 5 @@ -1016,7 +1016,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx2_gen1_dep8_batch512_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx2_gen1_dep8_batch512_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/stp/ctx2_gen1_dep8_batch512_eplb0_mtp0.yaml" decode: num-worker: 1 @@ -1030,7 +1030,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx3_gen1_dep8_batch1024_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx3_gen1_dep8_batch1024_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/stp/ctx3_gen1_dep8_batch1024_eplb0_mtp0.yaml" decode: num-worker: 1 @@ -1044,7 +1044,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx3_gen2_dep8_batch256_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx3_gen2_dep8_batch256_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/stp/ctx3_gen2_dep8_batch256_eplb0_mtp0.yaml" decode: num-worker: 2 @@ -1063,7 +1063,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx10_gen1_dep8_batch256_eplb0_mtp1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx10_gen1_dep8_batch256_eplb0_mtp1.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/mtp/ctx10_gen1_dep8_batch256_eplb0_mtp1.yaml" decode: num-worker: 1 @@ -1078,7 +1078,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep4_batch8_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep4_batch8_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep4_batch8_eplb0_mtp3.yaml" decode: num-worker: 4 @@ -1093,7 +1093,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml" decode: num-worker: 4 @@ -1108,7 +1108,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3.yaml" decode: num-worker: 4 @@ -1123,7 +1123,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx3_gen1_dep8_batch16_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx3_gen1_dep8_batch16_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/mtp/ctx3_gen1_dep8_batch16_eplb0_mtp3.yaml" decode: num-worker: 1 @@ -1138,7 +1138,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx9_gen1_dep8_batch128_eplb0_mtp1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx9_gen1_dep8_batch128_eplb0_mtp1.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/mtp/ctx9_gen1_dep8_batch128_eplb0_mtp1.yaml" decode: num-worker: 1 @@ -1154,7 +1154,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep4_batch32_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep4_batch32_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep4_batch32_eplb0_mtp0.yaml" decode: num-worker: 3 @@ -1168,7 +1168,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0.yaml" decode: num-worker: 3 @@ -1182,7 +1182,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0.yaml" decode: num-worker: 3 @@ -1196,7 +1196,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen4_tep4_batch2_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen4_tep4_batch2_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen4_tep4_batch2_eplb0_mtp0.yaml" decode: num-worker: 4 @@ -1210,7 +1210,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx5_gen2_dep8_batch32_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx5_gen2_dep8_batch32_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/stp/ctx5_gen2_dep8_batch32_eplb0_mtp0.yaml" decode: num-worker: 2 @@ -1224,7 +1224,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx6_gen1_dep8_batch128_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx6_gen1_dep8_batch128_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/stp/ctx6_gen1_dep8_batch128_eplb0_mtp0.yaml" decode: num-worker: 1 @@ -1238,7 +1238,7 @@ dsr1-fp4-b300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx8_gen1_dep8_batch256_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx8_gen1_dep8_batch256_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/stp/ctx8_gen1_dep8_batch256_eplb0_mtp0.yaml" decode: num-worker: 1 @@ -1268,7 +1268,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen8_tp8_batch1_eplb0_mtp3_10.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen8_tp8_batch1_eplb0_mtp3_10.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen8_tp8_batch1_eplb0_mtp3_10.yaml" decode: num-worker: 8 @@ -1283,7 +1283,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen8_tp8_batch16_eplb0_mtp3_160.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen8_tp8_batch16_eplb0_mtp3_160.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen8_tp8_batch16_eplb0_mtp3_160.yaml" decode: num-worker: 8 @@ -1298,7 +1298,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen1_dp8_batch256_eplb0_mtp1_3072.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen1_dp8_batch256_eplb0_mtp1_3072.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen1_dp8_batch256_eplb0_mtp1_3072.yaml" decode: num-worker: 1 @@ -1313,7 +1313,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen2_dep8_batch128_eplb0_mtp1_2560.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen2_dep8_batch128_eplb0_mtp1_2560.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen2_dep8_batch128_eplb0_mtp1_2560.yaml" decode: num-worker: 2 @@ -1328,7 +1328,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen5_dep8_batch16_eplb0_mtp2_720.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen5_dep8_batch16_eplb0_mtp2_720.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen5_dep8_batch16_eplb0_mtp2_720.yaml" decode: num-worker: 5 @@ -1343,7 +1343,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx3_gen2_dp8_batch512_eplb0_mtp1_11264.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx3_gen2_dp8_batch512_eplb0_mtp1_11264.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/mtp/ctx3_gen2_dp8_batch512_eplb0_mtp1_11264.yaml" decode: num-worker: 2 @@ -1361,7 +1361,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen1_dep8_batch256_eplb0_mtp0_2112.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen1_dep8_batch256_eplb0_mtp0_2112.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen1_dep8_batch256_eplb0_mtp0_2112.yaml" decode: num-worker: 1 @@ -1375,7 +1375,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen2_dp8_batch128_eplb0_mtp0_3072.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen2_dp8_batch128_eplb0_mtp0_3072.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen2_dp8_batch128_eplb0_mtp0_3072.yaml" decode: num-worker: 2 @@ -1389,7 +1389,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen3_dp8_batch48_eplb0_mtp0_1280.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen3_dp8_batch48_eplb0_mtp0_1280.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen3_dp8_batch48_eplb0_mtp0_1280.yaml" decode: num-worker: 3 @@ -1403,7 +1403,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen8_tp8_batch64_eplb0_mtp0_12.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen8_tp8_batch64_eplb0_mtp0_12.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen8_tp8_batch64_eplb0_mtp0_12.yaml" decode: num-worker: 8 @@ -1417,7 +1417,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen8_tp8_batch64_eplb0_mtp0_128.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen8_tp8_batch64_eplb0_mtp0_128.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen8_tp8_batch64_eplb0_mtp0_128.yaml" decode: num-worker: 8 @@ -1431,7 +1431,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen8_tp8_batch64_eplb0_mtp0_384.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen8_tp8_batch64_eplb0_mtp0_384.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen8_tp8_batch64_eplb0_mtp0_384.yaml" decode: num-worker: 8 @@ -1445,7 +1445,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx2_gen1_dp8_batch1024_eplb0_mtp0_16384.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx2_gen1_dp8_batch1024_eplb0_mtp0_16384.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/stp/ctx2_gen1_dp8_batch1024_eplb0_mtp0_16384.yaml" decode: num-worker: 1 @@ -1464,7 +1464,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen2_tp8_batch16_eplb0_mtp3_40.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen2_tp8_batch16_eplb0_mtp3_40.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen2_tp8_batch16_eplb0_mtp3_40.yaml" decode: num-worker: 2 @@ -1479,7 +1479,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen4_tp8_batch1_eplb0_mtp3_8.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen4_tp8_batch1_eplb0_mtp3_8.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen4_tp8_batch1_eplb0_mtp3_8.yaml" decode: num-worker: 4 @@ -1494,7 +1494,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen4_tp8_batch4_eplb0_mtp3_20.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen4_tp8_batch4_eplb0_mtp3_20.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen4_tp8_batch4_eplb0_mtp3_20.yaml" decode: num-worker: 4 @@ -1509,7 +1509,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen1_dp8_batch8_eplb0_mtp3_72.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen1_dp8_batch8_eplb0_mtp3_72.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen1_dp8_batch8_eplb0_mtp3_72.yaml" decode: num-worker: 1 @@ -1524,7 +1524,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx2_gen1_dp8_batch16_eplb0_mtp3_144.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx2_gen1_dp8_batch16_eplb0_mtp3_144.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/mtp/ctx2_gen1_dp8_batch16_eplb0_mtp3_144.yaml" decode: num-worker: 1 @@ -1539,7 +1539,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx4_gen1_dp8_batch64_eplb0_mtp2_512.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx4_gen1_dp8_batch64_eplb0_mtp2_512.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/mtp/ctx4_gen1_dp8_batch64_eplb0_mtp2_512.yaml" decode: num-worker: 1 @@ -1557,7 +1557,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx1_gen4_tp8_batch16_eplb0_mtp0_64.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx1_gen4_tp8_batch16_eplb0_mtp0_64.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/stp/ctx1_gen4_tp8_batch16_eplb0_mtp0_64.yaml" decode: num-worker: 4 @@ -1571,7 +1571,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx1_gen8_tp8_batch2_eplb0_mtp0_16.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx1_gen8_tp8_batch2_eplb0_mtp0_16.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/stp/ctx1_gen8_tp8_batch2_eplb0_mtp0_16.yaml" decode: num-worker: 8 @@ -1585,7 +1585,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx2_gen1_dp8_batch32_eplb0_mtp0_256.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx2_gen1_dp8_batch32_eplb0_mtp0_256.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/stp/ctx2_gen1_dp8_batch32_eplb0_mtp0_256.yaml" decode: num-worker: 1 @@ -1599,7 +1599,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx3_gen1_dp8_batch64_eplb0_mtp0_512.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx3_gen1_dp8_batch64_eplb0_mtp0_512.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/stp/ctx3_gen1_dp8_batch64_eplb0_mtp0_512.yaml" decode: num-worker: 1 @@ -1613,7 +1613,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx3_gen5_tp8_batch64_eplb0_mtp0_256.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx3_gen5_tp8_batch64_eplb0_mtp0_256.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/stp/ctx3_gen5_tp8_batch64_eplb0_mtp0_256.yaml" decode: num-worker: 5 @@ -1627,7 +1627,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx5_gen1_dp8_batch128_eplb0_mtp0_1075.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx5_gen1_dp8_batch128_eplb0_mtp0_1075.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/stp/ctx5_gen1_dp8_batch128_eplb0_mtp0_1075.yaml" decode: num-worker: 1 @@ -1641,7 +1641,7 @@ dsr1-fp8-b300-dynamo-trt: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx7_gen1_dep8_batch384_eplb0_mtp0_3072.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx7_gen1_dep8_batch384_eplb0_mtp0_3072.yaml - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/stp/ctx7_gen1_dep8_batch384_eplb0_mtp0_3072.yaml" decode: num-worker: 1 @@ -2484,7 +2484,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/1k1k/mtp/c1_ctx1_gen11_tep8_batch1_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c1_ctx1_gen11_tep8_batch1_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c1_ctx1_gen11_tep8_batch1_eplb0_mtp3.yaml" decode: num-worker: 11 @@ -2499,7 +2499,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/1k1k/mtp/c4_ctx1_gen11_tep8_batch128_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c4_ctx1_gen11_tep8_batch128_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c4_ctx1_gen11_tep8_batch128_eplb0_mtp3.yaml" decode: num-worker: 11 @@ -2514,7 +2514,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/1k1k/mtp/c8_ctx1_gen11_tep8_batch128_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c8_ctx1_gen11_tep8_batch128_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c8_ctx1_gen11_tep8_batch128_eplb0_mtp3.yaml" decode: num-worker: 11 @@ -2529,7 +2529,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/1k1k/mtp/c16_ctx1_gen9_tep8_batch128_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c16_ctx1_gen9_tep8_batch128_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c16_ctx1_gen9_tep8_batch128_eplb0_mtp3.yaml" decode: num-worker: 9 @@ -2544,7 +2544,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/1k1k/mtp/c32_ctx1_gen11_tep8_batch128_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c32_ctx1_gen11_tep8_batch128_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c32_ctx1_gen11_tep8_batch128_eplb0_mtp3.yaml" decode: num-worker: 11 @@ -2559,7 +2559,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/1k1k/mtp/c64_ctx1_gen8_dep8_batch128_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c64_ctx1_gen8_dep8_batch128_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c64_ctx1_gen8_dep8_batch128_eplb0_mtp3.yaml" decode: num-worker: 8 @@ -2574,7 +2574,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/1k1k/mtp/c128_ctx1_gen7_dep8_batch128_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c128_ctx1_gen7_dep8_batch128_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c128_ctx1_gen7_dep8_batch128_eplb0_mtp3.yaml" decode: num-worker: 7 @@ -2589,7 +2589,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/1k1k/mtp/c256_ctx1_gen4_dep8_batch128_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c256_ctx1_gen4_dep8_batch128_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c256_ctx1_gen4_dep8_batch128_eplb0_mtp3.yaml" decode: num-worker: 4 @@ -2604,7 +2604,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/1k1k/mtp/c512_ctx1_gen2_dep8_batch256_eplb0_mtp1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c512_ctx1_gen2_dep8_batch256_eplb0_mtp1.yaml - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c512_ctx1_gen2_dep8_batch256_eplb0_mtp1.yaml" decode: num-worker: 2 @@ -2619,7 +2619,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/1k1k/stp/c1_ctx1_gen9_tep8_batch1_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c1_ctx1_gen9_tep8_batch1_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c1_ctx1_gen9_tep8_batch1_eplb0_mtp0.yaml" decode: num-worker: 9 @@ -2633,7 +2633,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/1k1k/stp/c4_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c4_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c4_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml" decode: num-worker: 9 @@ -2647,7 +2647,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/1k1k/stp/c8_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c8_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c8_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml" decode: num-worker: 9 @@ -2661,7 +2661,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/1k1k/stp/c16_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c16_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c16_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml" decode: num-worker: 9 @@ -2675,7 +2675,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/1k1k/stp/c32_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c32_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c32_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml" decode: num-worker: 9 @@ -2689,7 +2689,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/1k1k/stp/c64_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c64_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c64_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml" decode: num-worker: 9 @@ -2703,7 +2703,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/1k1k/stp/c128_ctx1_gen9_dep8_batch512_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c128_ctx1_gen9_dep8_batch512_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c128_ctx1_gen9_dep8_batch512_eplb0_mtp0.yaml" decode: num-worker: 9 @@ -2717,7 +2717,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/1k1k/stp/c256_ctx1_gen6_dep8_batch512_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c256_ctx1_gen6_dep8_batch512_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c256_ctx1_gen6_dep8_batch512_eplb0_mtp0.yaml" decode: num-worker: 6 @@ -2731,7 +2731,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/1k1k/stp/c512_ctx2_gen7_dep8_batch512_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c512_ctx2_gen7_dep8_batch512_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c512_ctx2_gen7_dep8_batch512_eplb0_mtp0.yaml" decode: num-worker: 7 @@ -2750,7 +2750,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/8k1k/mtp/c1_ctx1_gen7_tep8_batch1_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c1_ctx1_gen7_tep8_batch1_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c1_ctx1_gen7_tep8_batch1_eplb0_mtp3.yaml" decode: num-worker: 7 @@ -2765,7 +2765,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/8k1k/mtp/c4_ctx1_gen7_tep8_batch32_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c4_ctx1_gen7_tep8_batch32_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c4_ctx1_gen7_tep8_batch32_eplb0_mtp3.yaml" decode: num-worker: 7 @@ -2780,7 +2780,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/8k1k/mtp/c8_ctx1_gen6_tep8_batch32_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c8_ctx1_gen6_tep8_batch32_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c8_ctx1_gen6_tep8_batch32_eplb0_mtp3.yaml" decode: num-worker: 6 @@ -2795,7 +2795,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/8k1k/mtp/c16_ctx1_gen3_tep8_batch32_eplb0_mtp2.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c16_ctx1_gen3_tep8_batch32_eplb0_mtp2.yaml - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c16_ctx1_gen3_tep8_batch32_eplb0_mtp2.yaml" decode: num-worker: 3 @@ -2810,7 +2810,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/8k1k/mtp/c32_ctx3_gen5_tep8_batch32_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c32_ctx3_gen5_tep8_batch32_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c32_ctx3_gen5_tep8_batch32_eplb0_mtp3.yaml" decode: num-worker: 5 @@ -2825,7 +2825,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/8k1k/mtp/c64_ctx1_gen1_dep8_batch32_eplb0_mtp2.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c64_ctx1_gen1_dep8_batch32_eplb0_mtp2.yaml - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c64_ctx1_gen1_dep8_batch32_eplb0_mtp2.yaml" decode: num-worker: 1 @@ -2840,7 +2840,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/8k1k/mtp/c128_ctx2_gen1_dep8_batch32_eplb0_mtp2.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c128_ctx2_gen1_dep8_batch32_eplb0_mtp2.yaml - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c128_ctx2_gen1_dep8_batch32_eplb0_mtp2.yaml" decode: num-worker: 1 @@ -2855,7 +2855,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/8k1k/mtp/c256_ctx3_gen1_dep8_batch32_eplb0_mtp2.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c256_ctx3_gen1_dep8_batch32_eplb0_mtp2.yaml - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c256_ctx3_gen1_dep8_batch32_eplb0_mtp2.yaml" decode: num-worker: 1 @@ -2870,7 +2870,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/8k1k/mtp/c512_ctx3_gen1_dep8_batch64_eplb0_mtp1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c512_ctx3_gen1_dep8_batch64_eplb0_mtp1.yaml - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c512_ctx3_gen1_dep8_batch64_eplb0_mtp1.yaml" decode: num-worker: 1 @@ -2885,7 +2885,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/8k1k/stp/c1_ctx1_gen7_tep8_batch1_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c1_ctx1_gen7_tep8_batch1_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c1_ctx1_gen7_tep8_batch1_eplb0_mtp0.yaml" decode: num-worker: 7 @@ -2899,7 +2899,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/8k1k/stp/c4_ctx1_gen7_tep8_batch32_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c4_ctx1_gen7_tep8_batch32_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c4_ctx1_gen7_tep8_batch32_eplb0_mtp0.yaml" decode: num-worker: 7 @@ -2913,7 +2913,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/8k1k/stp/c8_ctx1_gen6_tep8_batch16_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c8_ctx1_gen6_tep8_batch16_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c8_ctx1_gen6_tep8_batch16_eplb0_mtp0.yaml" decode: num-worker: 6 @@ -2927,7 +2927,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/8k1k/stp/c16_ctx1_gen3_tep8_batch32_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c16_ctx1_gen3_tep8_batch32_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c16_ctx1_gen3_tep8_batch32_eplb0_mtp0.yaml" decode: num-worker: 3 @@ -2941,7 +2941,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/8k1k/stp/c32_ctx2_gen5_tep8_batch128_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c32_ctx2_gen5_tep8_batch128_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c32_ctx2_gen5_tep8_batch128_eplb0_mtp0.yaml" decode: num-worker: 5 @@ -2955,7 +2955,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/8k1k/stp/c64_ctx2_gen3_dep8_batch128_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c64_ctx2_gen3_dep8_batch128_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c64_ctx2_gen3_dep8_batch128_eplb0_mtp0.yaml" decode: num-worker: 3 @@ -2969,7 +2969,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/8k1k/stp/c128_ctx1_gen1_dep8_batch256_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c128_ctx1_gen1_dep8_batch256_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c128_ctx1_gen1_dep8_batch256_eplb0_mtp0.yaml" decode: num-worker: 1 @@ -2983,7 +2983,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/8k1k/stp/c256_ctx5_gen3_dep8_batch256_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c256_ctx5_gen3_dep8_batch256_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c256_ctx5_gen3_dep8_batch256_eplb0_mtp0.yaml" decode: num-worker: 3 @@ -2997,7 +2997,7 @@ dsr1-fp8-h200-dynamo-trt: ep: 8 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h200/8k1k/stp/c512_ctx3_gen1_dep8_batch512_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c512_ctx3_gen1_dep8_batch512_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c512_ctx3_gen1_dep8_batch512_eplb0_mtp0.yaml" decode: num-worker: 1 @@ -3027,7 +3027,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch1_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch1_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch1_eplb0_mtp3.yaml" decode: num-worker: 3 @@ -3042,7 +3042,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch2_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch2_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch2_eplb0_mtp3.yaml" decode: num-worker: 3 @@ -3057,7 +3057,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch8_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch8_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch8_eplb0_mtp3.yaml" decode: num-worker: 3 @@ -3072,7 +3072,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch16_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch16_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch16_eplb0_mtp3.yaml" decode: num-worker: 3 @@ -3087,7 +3087,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch32_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch32_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch32_eplb0_mtp3.yaml" decode: num-worker: 3 @@ -3102,7 +3102,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_dep16_batch4_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_dep16_batch4_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_dep16_batch4_eplb0_mtp3.yaml" decode: num-worker: 3 @@ -3117,7 +3117,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch128_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch128_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch128_eplb0_mtp3.yaml" decode: num-worker: 3 @@ -3132,7 +3132,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp2.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp2.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp2.yaml" decode: num-worker: 1 @@ -3147,7 +3147,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch64_eplb0_mtp1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch64_eplb0_mtp1.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch64_eplb0_mtp1.yaml" decode: num-worker: 1 @@ -3162,7 +3162,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch1_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch1_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch1_eplb0_mtp0.yaml" decode: num-worker: 3 @@ -3176,7 +3176,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch2_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch2_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch2_eplb0_mtp0.yaml" decode: num-worker: 3 @@ -3190,7 +3190,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch8_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch8_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch8_eplb0_mtp0.yaml" decode: num-worker: 3 @@ -3204,7 +3204,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch16_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch16_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch16_eplb0_mtp0.yaml" decode: num-worker: 3 @@ -3218,7 +3218,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch4_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch4_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch4_eplb0_mtp0.yaml" decode: num-worker: 3 @@ -3232,7 +3232,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch8_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch8_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch8_eplb0_mtp0.yaml" decode: num-worker: 3 @@ -3246,7 +3246,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch16_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch16_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch16_eplb0_mtp0.yaml" decode: num-worker: 3 @@ -3260,7 +3260,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch32_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch32_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch32_eplb0_mtp0.yaml" decode: num-worker: 3 @@ -3274,7 +3274,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx2_gen1_dep16_batch256_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx2_gen1_dep16_batch256_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx2_gen1_dep16_batch256_eplb0_mtp0.yaml" decode: num-worker: 1 @@ -3293,7 +3293,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch1_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch1_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch1_eplb0_mtp3.yaml" decode: num-worker: 3 @@ -3308,7 +3308,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch2_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch2_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch2_eplb0_mtp3.yaml" decode: num-worker: 3 @@ -3323,7 +3323,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch8_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch8_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch8_eplb0_mtp3.yaml" decode: num-worker: 3 @@ -3338,7 +3338,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen1_dep16_batch4_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen1_dep16_batch4_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen1_dep16_batch4_eplb0_mtp3.yaml" decode: num-worker: 1 @@ -3355,7 +3355,7 @@ dsr1-fp8-h100-dynamo-trt: # ep: 16 # dp-attn: true # additional-settings: - # # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen2_tep16_batch32_eplb0_mtp3.yaml + # # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen2_tep16_batch32_eplb0_mtp3.yaml # - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen2_tep16_batch32_eplb0_mtp3.yaml" # decode: # num-worker: 2 @@ -3370,7 +3370,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx2_gen1_dep16_batch8_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx2_gen1_dep16_batch8_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx2_gen1_dep16_batch8_eplb0_mtp3.yaml" decode: num-worker: 1 @@ -3385,7 +3385,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch1_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch1_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch1_eplb0_mtp0.yaml" decode: num-worker: 3 @@ -3399,7 +3399,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch2_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch2_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch2_eplb0_mtp0.yaml" decode: num-worker: 3 @@ -3413,7 +3413,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch8_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch8_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch8_eplb0_mtp0.yaml" decode: num-worker: 3 @@ -3427,7 +3427,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen2_tep16_batch64_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen2_tep16_batch64_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen2_tep16_batch64_eplb0_mtp0.yaml" decode: num-worker: 2 @@ -3441,7 +3441,7 @@ dsr1-fp8-h100-dynamo-trt: ep: 16 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx2_gen1_dep16_batch16_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx2_gen1_dep16_batch16_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx2_gen1_dep16_batch16_eplb0_mtp0.yaml" decode: num-worker: 1 @@ -3873,7 +3873,7 @@ dsr1-fp4-gb200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3.yaml" decode: num-worker: 1 @@ -3888,7 +3888,7 @@ dsr1-fp4-gb200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml" decode: num-worker: 4 @@ -3903,7 +3903,7 @@ dsr1-fp4-gb200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx2_gen1_dep16_batch256_eplb256_mtp1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx2_gen1_dep16_batch256_eplb256_mtp1.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/mtp/ctx2_gen1_dep16_batch256_eplb256_mtp1.yaml" decode: num-worker: 1 @@ -3918,7 +3918,7 @@ dsr1-fp4-gb200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx3_gen1_dep32_batch64_eplb288_mtp1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx3_gen1_dep32_batch64_eplb288_mtp1.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/mtp/ctx3_gen1_dep32_batch64_eplb288_mtp1.yaml" decode: num-worker: 1 @@ -3933,7 +3933,7 @@ dsr1-fp4-gb200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx3_gen5_dep4_batch768_eplb0_mtp1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx3_gen5_dep4_batch768_eplb0_mtp1.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/mtp/ctx3_gen5_dep4_batch768_eplb0_mtp1.yaml" decode: num-worker: 5 @@ -3950,7 +3950,7 @@ dsr1-fp4-gb200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0.yaml" decode: num-worker: 1 @@ -3964,7 +3964,7 @@ dsr1-fp4-gb200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen1_dep32_batch16_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen1_dep32_batch16_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen1_dep32_batch16_eplb0_mtp0.yaml" decode: num-worker: 1 @@ -3978,7 +3978,7 @@ dsr1-fp4-gb200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen2_dep4_batch768_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen2_dep4_batch768_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen2_dep4_batch768_eplb0_mtp0.yaml" decode: num-worker: 2 @@ -3992,7 +3992,7 @@ dsr1-fp4-gb200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen4_tep8_batch32_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen4_tep8_batch32_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen4_tep8_batch32_eplb0_mtp0.yaml" decode: num-worker: 4 @@ -4006,7 +4006,7 @@ dsr1-fp4-gb200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml" decode: num-worker: 4 @@ -4020,7 +4020,7 @@ dsr1-fp4-gb200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx2_gen1_dep16_batch256_eplb256_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx2_gen1_dep16_batch256_eplb256_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx2_gen1_dep16_batch256_eplb256_mtp0.yaml" decode: num-worker: 1 @@ -4034,7 +4034,7 @@ dsr1-fp4-gb200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx2_gen1_dep32_batch64_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx2_gen1_dep32_batch64_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx2_gen1_dep32_batch64_eplb0_mtp0.yaml" decode: num-worker: 1 @@ -4054,7 +4054,7 @@ dsr1-fp4-gb200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml" decode: num-worker: 4 @@ -4069,7 +4069,7 @@ dsr1-fp4-gb200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx3_gen1_dep32_batch4_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx3_gen1_dep32_batch4_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/mtp/ctx3_gen1_dep32_batch4_eplb0_mtp3.yaml" decode: num-worker: 1 @@ -4084,7 +4084,7 @@ dsr1-fp4-gb200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx7_gen1_dep16_batch64_eplb256_mtp1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx7_gen1_dep16_batch64_eplb256_mtp1.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/mtp/ctx7_gen1_dep16_batch64_eplb256_mtp1.yaml" decode: num-worker: 1 @@ -4099,7 +4099,7 @@ dsr1-fp4-gb200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx8_gen1_dep32_batch16_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx8_gen1_dep32_batch16_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/mtp/ctx8_gen1_dep32_batch16_eplb0_mtp3.yaml" decode: num-worker: 1 @@ -4114,7 +4114,7 @@ dsr1-fp4-gb200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx11_gen1_dep16_batch256_eplb256_mtp1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx11_gen1_dep16_batch256_eplb256_mtp1.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/mtp/ctx11_gen1_dep16_batch256_eplb256_mtp1.yaml" decode: num-worker: 1 @@ -4130,7 +4130,7 @@ dsr1-fp4-gb200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx1_gen4_tep8_batch16_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx1_gen4_tep8_batch16_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx1_gen4_tep8_batch16_eplb0_mtp0.yaml" decode: num-worker: 4 @@ -4144,7 +4144,7 @@ dsr1-fp4-gb200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml" decode: num-worker: 4 @@ -4158,7 +4158,7 @@ dsr1-fp4-gb200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx2_gen1_dep32_batch8_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx2_gen1_dep32_batch8_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx2_gen1_dep32_batch8_eplb0_mtp0.yaml" decode: num-worker: 1 @@ -4172,7 +4172,7 @@ dsr1-fp4-gb200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx7_gen1_dep32_batch32_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx7_gen1_dep32_batch32_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx7_gen1_dep32_batch32_eplb0_mtp0.yaml" decode: num-worker: 1 @@ -4186,7 +4186,7 @@ dsr1-fp4-gb200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx8_gen1_dep16_batch128_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx8_gen1_dep16_batch128_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx8_gen1_dep16_batch128_eplb0_mtp0.yaml" decode: num-worker: 1 @@ -4200,7 +4200,7 @@ dsr1-fp4-gb200-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx10_gen1_dep16_batch256_eplb256_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx10_gen1_dep16_batch256_eplb256_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx10_gen1_dep16_batch256_eplb256_mtp0.yaml" decode: num-worker: 1 @@ -4231,7 +4231,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep8_batch512_eplb0_mtp1_4301.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep8_batch512_eplb0_mtp1_4301.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep8_batch512_eplb0_mtp1_4301.yaml" decode: num-worker: 1 @@ -4246,7 +4246,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep8_batch256_eplb0_mtp1_2151.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep8_batch256_eplb0_mtp1_2151.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep8_batch256_eplb0_mtp1_2151.yaml" decode: num-worker: 1 @@ -4261,7 +4261,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep16_batch64_eplb0_mtp1_1229.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep16_batch64_eplb0_mtp1_1229.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep16_batch64_eplb0_mtp1_1229.yaml" decode: num-worker: 1 @@ -4276,7 +4276,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep32_batch16_eplb0_mtp3_615.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep32_batch16_eplb0_mtp3_615.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep32_batch16_eplb0_mtp3_615.yaml" decode: num-worker: 1 @@ -4291,7 +4291,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch8_eplb0_mtp3_36.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch8_eplb0_mtp3_36.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch8_eplb0_mtp3_36.yaml" decode: num-worker: 3 @@ -4306,7 +4306,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch4_eplb0_mtp3_18.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch4_eplb0_mtp3_18.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch4_eplb0_mtp3_18.yaml" decode: num-worker: 3 @@ -4321,7 +4321,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch2_eplb0_mtp3_9.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch2_eplb0_mtp3_9.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch2_eplb0_mtp3_9.yaml" decode: num-worker: 3 @@ -4336,7 +4336,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep8_batch768_eplb0_mtp0_6144.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep8_batch768_eplb0_mtp0_6144.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep8_batch768_eplb0_mtp0_6144.yaml" decode: num-worker: 1 @@ -4350,7 +4350,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0_4301.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0_4301.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0_4301.yaml" decode: num-worker: 1 @@ -4364,7 +4364,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep16_batch128_eplb0_mtp0_2151.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep16_batch128_eplb0_mtp0_2151.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep16_batch128_eplb0_mtp0_2151.yaml" decode: num-worker: 1 @@ -4378,7 +4378,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep32_batch32_eplb0_mtp0_1127.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep32_batch32_eplb0_mtp0_1127.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep32_batch32_eplb0_mtp0_1127.yaml" decode: num-worker: 1 @@ -4392,7 +4392,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep32_batch8_eplb0_mtp0_256.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep32_batch8_eplb0_mtp0_256.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep32_batch8_eplb0_mtp0_256.yaml" decode: num-worker: 1 @@ -4406,7 +4406,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen3_tep8_batch8_eplb0_mtp0_27.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen3_tep8_batch8_eplb0_mtp0_27.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen3_tep8_batch8_eplb0_mtp0_27.yaml" decode: num-worker: 3 @@ -4420,7 +4420,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0_3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0_3.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0_3.yaml" decode: num-worker: 3 @@ -4439,7 +4439,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx3_gen1_dep8_batch64_eplb0_mtp3_666.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx3_gen1_dep8_batch64_eplb0_mtp3_666.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx3_gen1_dep8_batch64_eplb0_mtp3_666.yaml" decode: num-worker: 1 @@ -4454,7 +4454,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx5_gen1_dep16_batch32_eplb0_mtp3_666.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx5_gen1_dep16_batch32_eplb0_mtp3_666.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx5_gen1_dep16_batch32_eplb0_mtp3_666.yaml" decode: num-worker: 1 @@ -4469,7 +4469,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx3_gen1_dep16_batch16_eplb0_mtp3_333.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx3_gen1_dep16_batch16_eplb0_mtp3_333.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx3_gen1_dep16_batch16_eplb0_mtp3_333.yaml" decode: num-worker: 1 @@ -4484,7 +4484,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx4_gen1_dep32_batch8_eplb0_mtp3_333.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx4_gen1_dep32_batch8_eplb0_mtp3_333.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx4_gen1_dep32_batch8_eplb0_mtp3_333.yaml" decode: num-worker: 1 @@ -4499,7 +4499,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx2_gen1_dep32_batch2_eplb0_mtp3_90.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx2_gen1_dep32_batch2_eplb0_mtp3_90.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx2_gen1_dep32_batch2_eplb0_mtp3_90.yaml" decode: num-worker: 1 @@ -4514,7 +4514,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx1_gen3_tep8_batch4_eplb0_mtp3_15.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx1_gen3_tep8_batch4_eplb0_mtp3_15.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx1_gen3_tep8_batch4_eplb0_mtp3_15.yaml" decode: num-worker: 3 @@ -4529,7 +4529,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx1_gen3_tep8_batch2_eplb0_mtp3_6.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx1_gen3_tep8_batch2_eplb0_mtp3_6.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx1_gen3_tep8_batch2_eplb0_mtp3_6.yaml" decode: num-worker: 3 @@ -4544,7 +4544,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx5_gen1_dep16_batch64_eplb0_mtp0_1229.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx5_gen1_dep16_batch64_eplb0_mtp0_1229.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx5_gen1_dep16_batch64_eplb0_mtp0_1229.yaml" decode: num-worker: 1 @@ -4558,7 +4558,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx4_gen1_dep32_batch16_eplb0_mtp0_666.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx4_gen1_dep32_batch16_eplb0_mtp0_666.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx4_gen1_dep32_batch16_eplb0_mtp0_666.yaml" decode: num-worker: 1 @@ -4572,7 +4572,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx3_gen1_dep16_batch32_eplb0_mtp0_615.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx3_gen1_dep16_batch32_eplb0_mtp0_615.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx3_gen1_dep16_batch32_eplb0_mtp0_615.yaml" decode: num-worker: 1 @@ -4586,7 +4586,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx2_gen1_dep32_batch8_eplb0_mtp0_333.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx2_gen1_dep32_batch8_eplb0_mtp0_333.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx2_gen1_dep32_batch8_eplb0_mtp0_333.yaml" decode: num-worker: 1 @@ -4600,7 +4600,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0_63.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0_63.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0_63.yaml" decode: num-worker: 3 @@ -4614,7 +4614,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch4_eplb0_mtp0_18.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch4_eplb0_mtp0_18.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch4_eplb0_mtp0_18.yaml" decode: num-worker: 3 @@ -4628,7 +4628,7 @@ dsr1-fp8-gb200-dynamo-trt: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0_6.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0_6.yaml - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0_6.yaml" decode: num-worker: 3 @@ -4658,7 +4658,7 @@ dsr1-fp8-gb200-dynamo-sglang: ep: 1 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/gb200-fp8/1k1k/low-latency.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb200-fp8/1k1k/low-latency.yaml - "CONFIG_FILE=recipes/gb200-fp8/1k1k/low-latency.yaml" decode: num-worker: 1 @@ -4674,7 +4674,7 @@ dsr1-fp8-gb200-dynamo-sglang: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/gb200-fp8/1k1k/mid-curve.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb200-fp8/1k1k/mid-curve.yaml - "CONFIG_FILE=recipes/gb200-fp8/1k1k/mid-curve.yaml" decode: num-worker: 1 @@ -4690,7 +4690,7 @@ dsr1-fp8-gb200-dynamo-sglang: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/gb200-fp8/1k1k/max-tpt.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb200-fp8/1k1k/max-tpt.yaml - "CONFIG_FILE=recipes/gb200-fp8/1k1k/max-tpt.yaml" decode: num-worker: 1 @@ -4706,7 +4706,7 @@ dsr1-fp8-gb200-dynamo-sglang: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/gb200-fp8/1k1k/ultra-tpt.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb200-fp8/1k1k/ultra-tpt.yaml - "CONFIG_FILE=recipes/gb200-fp8/1k1k/ultra-tpt.yaml" decode: num-worker: 1 @@ -4725,7 +4725,7 @@ dsr1-fp8-gb200-dynamo-sglang: ep: 1 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/gb200-fp8/8k1k/low-latency.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb200-fp8/8k1k/low-latency.yaml - "CONFIG_FILE=recipes/gb200-fp8/8k1k/low-latency.yaml" decode: num-worker: 1 @@ -4741,7 +4741,7 @@ dsr1-fp8-gb200-dynamo-sglang: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/gb200-fp8/8k1k/mid-curve.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb200-fp8/8k1k/mid-curve.yaml - "CONFIG_FILE=recipes/gb200-fp8/8k1k/mid-curve.yaml" decode: num-worker: 1 @@ -4757,7 +4757,7 @@ dsr1-fp8-gb200-dynamo-sglang: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/gb200-fp8/8k1k/max_tpt.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb200-fp8/8k1k/max_tpt.yaml - "CONFIG_FILE=recipes/gb200-fp8/8k1k/max_tpt.yaml" decode: num-worker: 1 @@ -4786,7 +4786,7 @@ dsr1-fp8-gb300-dynamo-sglang: ep: 1 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/gb300-fp8/1k1k/stp/low-latency.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb300-fp8/1k1k/stp/low-latency.yaml - "CONFIG_FILE=recipes/gb300-fp8/1k1k/stp/low-latency.yaml" decode: num-worker: 4 @@ -4802,7 +4802,7 @@ dsr1-fp8-gb300-dynamo-sglang: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/gb300-fp8/1k1k/stp/mid.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb300-fp8/1k1k/stp/mid.yaml - "CONFIG_FILE=recipes/gb300-fp8/1k1k/stp/mid.yaml" decode: num-worker: 1 @@ -4818,7 +4818,7 @@ dsr1-fp8-gb300-dynamo-sglang: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/gb300-fp8/1k1k/stp/max.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb300-fp8/1k1k/stp/max.yaml - "CONFIG_FILE=recipes/gb300-fp8/1k1k/stp/max.yaml" decode: num-worker: 1 @@ -4837,7 +4837,7 @@ dsr1-fp8-gb300-dynamo-sglang: ep: 1 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/gb300-fp8/8k1k/stp/low-latency.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb300-fp8/8k1k/stp/low-latency.yaml - "CONFIG_FILE=recipes/gb300-fp8/8k1k/stp/low-latency.yaml" decode: num-worker: 1 @@ -4853,7 +4853,7 @@ dsr1-fp8-gb300-dynamo-sglang: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/gb300-fp8/8k1k/stp/mid.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb300-fp8/8k1k/stp/mid.yaml - "CONFIG_FILE=recipes/gb300-fp8/8k1k/stp/mid.yaml" decode: num-worker: 1 @@ -4869,7 +4869,7 @@ dsr1-fp8-gb300-dynamo-sglang: ep: 8 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/gb300-fp8/8k1k/stp/max.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb300-fp8/8k1k/stp/max.yaml - "CONFIG_FILE=recipes/gb300-fp8/8k1k/stp/max.yaml" decode: num-worker: 1 @@ -5013,7 +5013,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen1_dep4_batch768_eplb0_mtp1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen1_dep4_batch768_eplb0_mtp1.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen1_dep4_batch768_eplb0_mtp1.yaml" decode: num-worker: 1 @@ -5028,7 +5028,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen1_dep32_batch8_eplb0_mtp.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen1_dep32_batch8_eplb0_mtp.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen1_dep32_batch8_eplb0_mtp.yaml" decode: num-worker: 1 @@ -5043,7 +5043,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml" decode: num-worker: 4 @@ -5058,7 +5058,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml" decode: num-worker: 4 @@ -5073,7 +5073,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx3_gen1_dep16_batch128_eplb256_mtp1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx3_gen1_dep16_batch128_eplb256_mtp1.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx3_gen1_dep16_batch128_eplb256_mtp1.yaml" decode: num-worker: 1 @@ -5088,7 +5088,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx3_gen1_dep32_batch32_eplb288_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx3_gen1_dep32_batch32_eplb288_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx3_gen1_dep32_batch32_eplb288_mtp3.yaml" decode: num-worker: 1 @@ -5103,7 +5103,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml" decode: num-worker: 4 @@ -5117,7 +5117,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx1_gen4_tep8_batch32_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx1_gen4_tep8_batch32_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx1_gen4_tep8_batch32_eplb0_mtp0.yaml" decode: num-worker: 4 @@ -5131,7 +5131,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx2_gen1_dep8_batch1024_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx2_gen1_dep8_batch1024_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx2_gen1_dep8_batch1024_eplb0_mtp0.yaml" decode: num-worker: 1 @@ -5145,7 +5145,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx2_gen1_dep32_batch32_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx2_gen1_dep32_batch32_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx2_gen1_dep32_batch32_eplb0_mtp0.yaml" decode: num-worker: 1 @@ -5159,7 +5159,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx3_gen1_dep16_batch256_eplb256_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx3_gen1_dep16_batch256_eplb256_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx3_gen1_dep16_batch256_eplb256_mtp0.yaml" decode: num-worker: 1 @@ -5173,7 +5173,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx3_gen1_dep32_batch64_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx3_gen1_dep32_batch64_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx3_gen1_dep32_batch64_eplb0_mtp0.yaml" decode: num-worker: 1 @@ -5192,7 +5192,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen3_tep8_batch8_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen3_tep8_batch8_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen3_tep8_batch8_eplb0_mtp3.yaml" decode: num-worker: 3 @@ -5207,7 +5207,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml" decode: num-worker: 4 @@ -5222,7 +5222,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3.yaml" decode: num-worker: 4 @@ -5237,7 +5237,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx4_gen1_dep32_batch4_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx4_gen1_dep32_batch4_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx4_gen1_dep32_batch4_eplb0_mtp3.yaml" decode: num-worker: 1 @@ -5252,7 +5252,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx8_gen1_dep32_batch8_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx8_gen1_dep32_batch8_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx8_gen1_dep32_batch8_eplb0_mtp3.yaml" decode: num-worker: 1 @@ -5267,7 +5267,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx10_gen1_dep8_batch256_eplb0_mtp1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx10_gen1_dep8_batch256_eplb0_mtp1.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx10_gen1_dep8_batch256_eplb0_mtp1.yaml" decode: num-worker: 1 @@ -5282,7 +5282,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx10_gen1_dep16_batch32_eplb0_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx10_gen1_dep16_batch32_eplb0_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx10_gen1_dep16_batch32_eplb0_mtp3.yaml" decode: num-worker: 1 @@ -5297,7 +5297,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx13_gen1_dep16_batch64_eplb256_mtp3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx13_gen1_dep16_batch64_eplb256_mtp3.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx13_gen1_dep16_batch64_eplb256_mtp3.yaml" decode: num-worker: 1 @@ -5312,7 +5312,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0.yaml" decode: num-worker: 3 @@ -5326,7 +5326,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml" decode: num-worker: 4 @@ -5340,7 +5340,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen4_tep8_batch2_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen4_tep8_batch2_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen4_tep8_batch2_eplb0_mtp0.yaml" decode: num-worker: 4 @@ -5354,7 +5354,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen5_tep4_batch4_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen5_tep4_batch4_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen5_tep4_batch4_eplb0_mtp0.yaml" decode: num-worker: 5 @@ -5368,7 +5368,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx7_gen1_dep32_batch16_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx7_gen1_dep32_batch16_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx7_gen1_dep32_batch16_eplb0_mtp0.yaml" decode: num-worker: 1 @@ -5382,7 +5382,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx9_gen1_dep16_batch64_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx9_gen1_dep16_batch64_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx9_gen1_dep16_batch64_eplb0_mtp0.yaml" decode: num-worker: 1 @@ -5396,7 +5396,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx11_gen3_dep4_batch256_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx11_gen3_dep4_batch256_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx11_gen3_dep4_batch256_eplb0_mtp0.yaml" decode: num-worker: 3 @@ -5410,7 +5410,7 @@ dsr1-fp4-gb300-dynamo-trt: ep: 2 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx14_gen1_dep16_batch128_eplb0_mtp0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx14_gen1_dep16_batch128_eplb0_mtp0.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx14_gen1_dep16_batch128_eplb0_mtp0.yaml" decode: num-worker: 1 @@ -5554,7 +5554,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3_8.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3_8.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3_8.yaml" decode: num-worker: 4 @@ -5569,7 +5569,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3_24.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3_24.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3_24.yaml" decode: num-worker: 4 @@ -5584,7 +5584,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3_180.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3_180.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3_180.yaml" decode: num-worker: 1 @@ -5599,7 +5599,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx2_gen1_dep32_batch16_eplb0_mtp3_564.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx2_gen1_dep32_batch16_eplb0_mtp3_564.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx2_gen1_dep32_batch16_eplb0_mtp3_564.yaml" decode: num-worker: 1 @@ -5614,7 +5614,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp3_666.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp3_666.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp3_666.yaml" decode: num-worker: 1 @@ -5629,7 +5629,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx2_gen1_dep16_batch128_eplb0_mtp1_2253.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx2_gen1_dep16_batch128_eplb0_mtp1_2253.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx2_gen1_dep16_batch128_eplb0_mtp1_2253.yaml" decode: num-worker: 1 @@ -5644,7 +5644,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx3_gen2_dep8_batch512_eplb0_mtp1_8192.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx3_gen2_dep8_batch512_eplb0_mtp1_8192.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx3_gen2_dep8_batch512_eplb0_mtp1_8192.yaml" decode: num-worker: 2 @@ -5659,7 +5659,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml" decode: num-worker: 4 @@ -5673,7 +5673,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch4_eplb0_mtp0_24.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch4_eplb0_mtp0_24.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch4_eplb0_mtp0_24.yaml" decode: num-worker: 4 @@ -5687,7 +5687,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch16_eplb0_mtp0_84.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch16_eplb0_mtp0_84.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch16_eplb0_mtp0_84.yaml" decode: num-worker: 4 @@ -5701,7 +5701,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx2_gen1_dep32_batch32_eplb0_mtp0_1229.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx2_gen1_dep32_batch32_eplb0_mtp0_1229.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx2_gen1_dep32_batch32_eplb0_mtp0_1229.yaml" decode: num-worker: 1 @@ -5715,7 +5715,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx2_gen1_dep16_batch128_eplb0_mtp0_2253.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx2_gen1_dep16_batch128_eplb0_mtp0_2253.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx2_gen1_dep16_batch128_eplb0_mtp0_2253.yaml" decode: num-worker: 1 @@ -5729,7 +5729,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx3_gen2_dep8_batch512_eplb0_mtp0_8602.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx3_gen2_dep8_batch512_eplb0_mtp0_8602.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx3_gen2_dep8_batch512_eplb0_mtp0_8602.yaml" decode: num-worker: 2 @@ -5743,7 +5743,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx3_gen2_dep8_batch768_eplb0_mtp0_12288.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx3_gen2_dep8_batch768_eplb0_mtp0_12288.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx3_gen2_dep8_batch768_eplb0_mtp0_12288.yaml" decode: num-worker: 2 @@ -5762,7 +5762,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3_8.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3_8.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3_8.yaml" decode: num-worker: 4 @@ -5777,7 +5777,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3_24.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3_24.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3_24.yaml" decode: num-worker: 4 @@ -5792,7 +5792,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx6_gen1_dep32_batch8_eplb0_mtp3_333.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx6_gen1_dep32_batch8_eplb0_mtp3_333.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx6_gen1_dep32_batch8_eplb0_mtp3_333.yaml" decode: num-worker: 1 @@ -5807,7 +5807,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx8_gen1_dep16_batch32_eplb0_mtp3_666.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx8_gen1_dep16_batch32_eplb0_mtp3_666.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx8_gen1_dep16_batch32_eplb0_mtp3_666.yaml" decode: num-worker: 1 @@ -5822,7 +5822,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx10_gen1_dep16_batch64_eplb0_mtp1_1229.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx10_gen1_dep16_batch64_eplb0_mtp1_1229.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx10_gen1_dep16_batch64_eplb0_mtp1_1229.yaml" decode: num-worker: 1 @@ -5837,7 +5837,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx7_gen1_dep8_batch128_eplb0_mtp1_1229.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx7_gen1_dep8_batch128_eplb0_mtp1_1229.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx7_gen1_dep8_batch128_eplb0_mtp1_1229.yaml" decode: num-worker: 1 @@ -5852,7 +5852,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml" decode: num-worker: 4 @@ -5866,7 +5866,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch4_eplb0_mtp0_24.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch4_eplb0_mtp0_24.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch4_eplb0_mtp0_24.yaml" decode: num-worker: 4 @@ -5880,7 +5880,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch8_eplb0_mtp0_36.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch8_eplb0_mtp0_36.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch8_eplb0_mtp0_36.yaml" decode: num-worker: 4 @@ -5894,7 +5894,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx6_gen1_dep32_batch16_eplb0_mtp0_512.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx6_gen1_dep32_batch16_eplb0_mtp0_512.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx6_gen1_dep32_batch16_eplb0_mtp0_512.yaml" decode: num-worker: 1 @@ -5908,7 +5908,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx4_gen1_dep16_batch32_eplb0_mtp0_666.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx4_gen1_dep16_batch32_eplb0_mtp0_666.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx4_gen1_dep16_batch32_eplb0_mtp0_666.yaml" decode: num-worker: 1 @@ -5922,7 +5922,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx7_gen1_dep16_batch64_eplb0_mtp0_1229.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx7_gen1_dep16_batch64_eplb0_mtp0_1229.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx7_gen1_dep16_batch64_eplb0_mtp0_1229.yaml" decode: num-worker: 1 @@ -5936,7 +5936,7 @@ dsr1-fp8-gb300-dynamo-trt: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx7_gen1_dep8_batch256_eplb0_mtp0_2151.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx7_gen1_dep8_batch256_eplb0_mtp0_2151.yaml - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx7_gen1_dep8_batch256_eplb0_mtp0_2151.yaml" decode: num-worker: 1 @@ -6492,7 +6492,7 @@ dsr1-fp4-b200-dynamo-sglang: ep: 4 dp-attn: true additional-settings: - - "CONFIG_FILE=recipes/b200-fp4/1k1k/stp/low-latency-dep4-1p-tep8-5d.yaml" + - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_stp_lowlat[0]" decode: num-worker: 5 tp: 8 @@ -6505,7 +6505,7 @@ dsr1-fp4-b200-dynamo-sglang: ep: 4 dp-attn: true additional-settings: - - "CONFIG_FILE=recipes/b200-fp4/1k1k/stp/low-latency-dep4-1p-tep8-6d.yaml" + - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_stp_lowlat[1]" decode: num-worker: 6 tp: 8 @@ -6518,7 +6518,7 @@ dsr1-fp4-b200-dynamo-sglang: ep: 4 dp-attn: true additional-settings: - - "CONFIG_FILE=recipes/b200-fp4/1k1k/stp/max-tpt-dep4-1p-dep8-1d.yaml" + - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_stp_maxtpt[0]" decode: num-worker: 1 tp: 8 @@ -6531,7 +6531,7 @@ dsr1-fp4-b200-dynamo-sglang: ep: 4 dp-attn: true additional-settings: - - "CONFIG_FILE=recipes/b200-fp4/1k1k/stp/max-tpt-dep4-1p-dep8-2d.yaml" + - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_stp_maxtpt[1]" decode: num-worker: 2 tp: 8 @@ -6548,7 +6548,7 @@ dsr1-fp4-b200-dynamo-sglang: ep: 4 dp-attn: true additional-settings: - - "CONFIG_FILE=recipes/b200-fp4/8k1k/stp/low-latency-dep4-1p-tep8-1d.yaml" + - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:zip_override_stp_lowlat[0]" decode: num-worker: 1 tp: 8 @@ -6561,7 +6561,7 @@ dsr1-fp4-b200-dynamo-sglang: ep: 4 dp-attn: true additional-settings: - - "CONFIG_FILE=recipes/b200-fp4/8k1k/stp/low-latency-dep4-1p-tep8-5d.yaml" + - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:zip_override_stp_lowlat[1]" decode: num-worker: 5 tp: 8 @@ -6574,7 +6574,7 @@ dsr1-fp4-b200-dynamo-sglang: ep: 4 dp-attn: true additional-settings: - - "CONFIG_FILE=recipes/b200-fp4/8k1k/stp/low-latency-dep4-2p-tep8-5d.yaml" + - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:zip_override_stp_lowlat[2]" decode: num-worker: 5 tp: 8 @@ -6587,7 +6587,7 @@ dsr1-fp4-b200-dynamo-sglang: ep: 1 dp-attn: false additional-settings: - - "CONFIG_FILE=recipes/b200-fp4/8k1k/stp/low-latency-tp4-1p-tp8-1d.yaml" + - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:override_stp_tp4" decode: num-worker: 1 tp: 8 @@ -6600,7 +6600,7 @@ dsr1-fp4-b200-dynamo-sglang: ep: 4 dp-attn: true additional-settings: - - "CONFIG_FILE=recipes/b200-fp4/8k1k/stp/max-tpt-dep4-7p-dep8-2d.yaml" + - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:override_stp_maxtpt_7p2d" decode: num-worker: 2 tp: 8 @@ -6628,7 +6628,7 @@ dsr1-fp8-b200-dynamo-sglang: ep: 8 dp-attn: false additional-settings: - - "CONFIG_FILE=recipes/b200-fp8/1k1k/stp/low-latency-tep8-1p1d.yaml" + - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_stp_lowlat[0]" decode: num-worker: 1 tp: 8 @@ -6641,7 +6641,7 @@ dsr1-fp8-b200-dynamo-sglang: ep: 8 dp-attn: false additional-settings: - - "CONFIG_FILE=recipes/b200-fp8/1k1k/stp/low-latency-tep8-1p3d.yaml" + - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_stp_lowlat[1]" decode: num-worker: 3 tp: 8 @@ -6654,7 +6654,7 @@ dsr1-fp8-b200-dynamo-sglang: ep: 8 dp-attn: true additional-settings: - - "CONFIG_FILE=recipes/b200-fp8/1k1k/stp/max-tpt-dep8-1p5d.yaml" + - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_stp_maxtpt[0]" decode: num-worker: 5 tp: 8 @@ -6667,7 +6667,7 @@ dsr1-fp8-b200-dynamo-sglang: ep: 8 dp-attn: true additional-settings: - - "CONFIG_FILE=recipes/b200-fp8/1k1k/stp/max-tpt-dep8-2p5d.yaml" + - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_stp_maxtpt[1]" decode: num-worker: 5 tp: 8 @@ -6684,7 +6684,7 @@ dsr1-fp8-b200-dynamo-sglang: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/b200-fp8/8k1k_stp_lowlat_0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_stp_lowlat_0.yaml - "CONFIG_FILE=recipes/b200-fp8/8k1k_stp_lowlat_0.yaml" decode: num-worker: 3 @@ -6698,7 +6698,7 @@ dsr1-fp8-b200-dynamo-sglang: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/b200-fp8/8k1k_stp_lowlat_1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_stp_lowlat_1.yaml - "CONFIG_FILE=recipes/b200-fp8/8k1k_stp_lowlat_1.yaml" decode: num-worker: 4 @@ -6712,7 +6712,7 @@ dsr1-fp8-b200-dynamo-sglang: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/b200-fp8/8k1k_stp_lowlat_2.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_stp_lowlat_2.yaml - "CONFIG_FILE=recipes/b200-fp8/8k1k_stp_lowlat_2.yaml" decode: num-worker: 6 @@ -6727,7 +6727,7 @@ dsr1-fp8-b200-dynamo-sglang: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/b200-fp8/8k1k_stp_maxtpt_0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_stp_maxtpt_0.yaml - "CONFIG_FILE=recipes/b200-fp8/8k1k_stp_maxtpt_0.yaml" decode: num-worker: 2 @@ -6741,7 +6741,7 @@ dsr1-fp8-b200-dynamo-sglang: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/b200-fp8/8k1k_stp_maxtpt_1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_stp_maxtpt_1.yaml - "CONFIG_FILE=recipes/b200-fp8/8k1k_stp_maxtpt_1.yaml" decode: num-worker: 1 @@ -6755,7 +6755,7 @@ dsr1-fp8-b200-dynamo-sglang: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/b200-fp8/8k1k_stp_maxtpt_2.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_stp_maxtpt_2.yaml - "CONFIG_FILE=recipes/b200-fp8/8k1k_stp_maxtpt_2.yaml" decode: num-worker: 1 @@ -6769,7 +6769,7 @@ dsr1-fp8-b200-dynamo-sglang: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/b200-fp8/8k1k_stp_maxtpt_3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_stp_maxtpt_3.yaml - "CONFIG_FILE=recipes/b200-fp8/8k1k_stp_maxtpt_3.yaml" decode: num-worker: 1 @@ -6799,7 +6799,7 @@ dsr1-fp8-b200-dynamo-sglang-mtp: ep: 8 dp-attn: false additional-settings: - - "CONFIG_FILE=recipes/b200-fp8/1k1k/mtp/low-latency-tep8-1p1d.yaml" + - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_mtp_lowlat[0]" decode: num-worker: 1 tp: 8 @@ -6814,7 +6814,7 @@ dsr1-fp8-b200-dynamo-sglang-mtp: ep: 8 dp-attn: false additional-settings: - - "CONFIG_FILE=recipes/b200-fp8/1k1k/mtp/low-latency-tep8-1p3d.yaml" + - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_mtp_lowlat[1]" decode: num-worker: 3 tp: 8 @@ -6829,7 +6829,7 @@ dsr1-fp8-b200-dynamo-sglang-mtp: ep: 8 dp-attn: true additional-settings: - - "CONFIG_FILE=recipes/b200-fp8/1k1k/mtp/max-tpt-dep8-1p5d.yaml" + - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_mtp_maxtpt[1]" decode: num-worker: 5 tp: 8 @@ -6844,7 +6844,7 @@ dsr1-fp8-b200-dynamo-sglang-mtp: ep: 8 dp-attn: true additional-settings: - - "CONFIG_FILE=recipes/b200-fp8/1k1k/mtp/max-tpt-dep8-2p5d.yaml" + - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_mtp_maxtpt[2]" decode: num-worker: 5 tp: 8 @@ -6859,7 +6859,7 @@ dsr1-fp8-b200-dynamo-sglang-mtp: ep: 8 dp-attn: true additional-settings: - - "CONFIG_FILE=recipes/b200-fp8/1k1k/mtp/max-tpt-dep8-1p2d.yaml" + - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:override_mtp_maxtpt_1p2d" decode: num-worker: 2 tp: 8 @@ -6877,7 +6877,7 @@ dsr1-fp8-b200-dynamo-sglang-mtp: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/b200-fp8/8k1k_mtp_lowlat_0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_mtp_lowlat_0.yaml - "CONFIG_FILE=recipes/b200-fp8/8k1k_mtp_lowlat_0.yaml" decode: num-worker: 3 @@ -6892,7 +6892,7 @@ dsr1-fp8-b200-dynamo-sglang-mtp: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/b200-fp8/8k1k_mtp_lowlat_1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_mtp_lowlat_1.yaml - "CONFIG_FILE=recipes/b200-fp8/8k1k_mtp_lowlat_1.yaml" decode: num-worker: 4 @@ -6907,7 +6907,7 @@ dsr1-fp8-b200-dynamo-sglang-mtp: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/b200-fp8/8k1k_mtp_lowlat_2.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_mtp_lowlat_2.yaml - "CONFIG_FILE=recipes/b200-fp8/8k1k_mtp_lowlat_2.yaml" decode: num-worker: 6 @@ -6923,7 +6923,7 @@ dsr1-fp8-b200-dynamo-sglang-mtp: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/b200-fp8/8k1k_mtp_maxtpt_0.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_mtp_maxtpt_0.yaml - "CONFIG_FILE=recipes/b200-fp8/8k1k_mtp_maxtpt_0.yaml" decode: num-worker: 2 @@ -6938,7 +6938,7 @@ dsr1-fp8-b200-dynamo-sglang-mtp: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/b200-fp8/8k1k_mtp_maxtpt_1.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_mtp_maxtpt_1.yaml - "CONFIG_FILE=recipes/b200-fp8/8k1k_mtp_maxtpt_1.yaml" decode: num-worker: 1 @@ -6953,7 +6953,7 @@ dsr1-fp8-b200-dynamo-sglang-mtp: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/b200-fp8/8k1k_mtp_maxtpt_2.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_mtp_maxtpt_2.yaml - "CONFIG_FILE=recipes/b200-fp8/8k1k_mtp_maxtpt_2.yaml" decode: num-worker: 1 @@ -6968,7 +6968,7 @@ dsr1-fp8-b200-dynamo-sglang-mtp: ep: 1 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/b200-fp8/8k1k_mtp_maxtpt_3.yaml + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_mtp_maxtpt_3.yaml - "CONFIG_FILE=recipes/b200-fp8/8k1k_mtp_maxtpt_3.yaml" decode: num-worker: 1 @@ -6997,8 +6997,8 @@ dsr1-fp4-b200-dynamo-sglang-mtp: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/b200-fp4/1k1k/mtp/low-latency-dep4-1p-tep8-5d.yaml - - "CONFIG_FILE=recipes/b200-fp4/1k1k/mtp/low-latency-dep4-1p-tep8-5d.yaml" + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/1k1k.yaml + - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_mtp_lowlat[0]" decode: num-worker: 5 tp: 8 @@ -7012,8 +7012,8 @@ dsr1-fp4-b200-dynamo-sglang-mtp: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/b200-fp4/1k1k/mtp/low-latency-dep4-1p-tep8-6d.yaml - - "CONFIG_FILE=recipes/b200-fp4/1k1k/mtp/low-latency-dep4-1p-tep8-6d.yaml" + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/1k1k.yaml + - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_mtp_lowlat[1]" decode: num-worker: 6 tp: 8 @@ -7027,8 +7027,8 @@ dsr1-fp4-b200-dynamo-sglang-mtp: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/b200-fp4/1k1k/mtp/max-tpt-dep4-1p-dep8-1d.yaml - - "CONFIG_FILE=recipes/b200-fp4/1k1k/mtp/max-tpt-dep4-1p-dep8-1d.yaml" + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/1k1k.yaml + - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_mtp_maxtpt[0]" decode: num-worker: 1 tp: 8 @@ -7042,8 +7042,8 @@ dsr1-fp4-b200-dynamo-sglang-mtp: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/b200-fp4/1k1k/mtp/max-tpt-dep4-1p-dep8-2d.yaml - - "CONFIG_FILE=recipes/b200-fp4/1k1k/mtp/max-tpt-dep4-1p-dep8-2d.yaml" + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/1k1k.yaml + - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_mtp_maxtpt[1]" decode: num-worker: 2 tp: 8 @@ -7063,8 +7063,8 @@ dsr1-fp4-b200-dynamo-sglang-mtp: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/b200-fp4/8k1k/mtp/low-latency-dep4-1p-tep8-1d.yaml - - "CONFIG_FILE=recipes/b200-fp4/8k1k/mtp/low-latency-dep4-1p-tep8-1d.yaml" + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/8k1k.yaml + - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:zip_override_mtp_lowlat[0]" decode: num-worker: 1 tp: 8 @@ -7078,8 +7078,8 @@ dsr1-fp4-b200-dynamo-sglang-mtp: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/b200-fp4/8k1k/mtp/low-latency-dep4-1p-tep8-5d.yaml - - "CONFIG_FILE=recipes/b200-fp4/8k1k/mtp/low-latency-dep4-1p-tep8-5d.yaml" + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/8k1k.yaml + - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:zip_override_mtp_lowlat[1]" decode: num-worker: 5 tp: 8 @@ -7093,8 +7093,8 @@ dsr1-fp4-b200-dynamo-sglang-mtp: ep: 4 dp-attn: true additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/b200-fp4/8k1k/mtp/low-latency-dep4-2p-tep8-5d.yaml - - "CONFIG_FILE=recipes/b200-fp4/8k1k/mtp/low-latency-dep4-2p-tep8-5d.yaml" + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/8k1k.yaml + - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:zip_override_mtp_lowlat[2]" decode: num-worker: 5 tp: 8 @@ -7108,8 +7108,8 @@ dsr1-fp4-b200-dynamo-sglang-mtp: ep: 1 dp-attn: false additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/b200-fp4/8k1k/mtp/low-latency-tp4-1p-tp8-1d.yaml - - "CONFIG_FILE=recipes/b200-fp4/8k1k/mtp/low-latency-tp4-1p-tp8-1d.yaml" + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/8k1k.yaml + - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:override_mtp_tp4" decode: num-worker: 1 tp: 8 diff --git a/.github/workflows/benchmark-multinode-tmpl.yml b/.github/workflows/benchmark-multinode-tmpl.yml index a46c26a12..dba3938a8 100644 --- a/.github/workflows/benchmark-multinode-tmpl.yml +++ b/.github/workflows/benchmark-multinode-tmpl.yml @@ -77,6 +77,20 @@ on: required: false type: string default: "[]" + run-eval: + type: boolean + required: false + default: false + eval-only: + description: "Run only evals (skip throughput benchmark)" + type: boolean + required: false + default: false + eval-conc: + description: "Concurrency to use for eval requests (overrides default max-of-conc-list)" + type: string + required: false + default: "" ref: description: "Git ref (branch/sha) to checkout" required: false @@ -96,6 +110,9 @@ env: CONC_LIST: ${{ join(fromJson(inputs.conc-list), ' ') }} SPEC_DECODING: ${{ inputs.spec-decoding }} DISAGG: ${{ inputs.disagg }} + RUN_EVAL: ${{ inputs.run-eval }} + EVAL_ONLY: ${{ inputs.eval-only }} + EVAL_CONC: ${{ inputs.eval-conc }} PYTHONDONTWRITEBYTECODE: '1' PYTHONPYCACHEPREFIX: /tmp/inferencex-pycache @@ -116,7 +133,7 @@ jobs: benchmark: runs-on: ${{ inputs.runner }} timeout-minutes: 480 - name: "${{ inputs.exp-name }} ${{ inputs.precision }} ${{ inputs.runner }} ${{ inputs.framework }} | P(tp${{ inputs.prefill-tp }}/ep${{ inputs.prefill-ep }}/dp${{ inputs.prefill-dp-attn }}/nw${{ inputs.prefill-num-worker }}) D(tp${{ inputs.decode-tp }}/ep${{ inputs.decode-ep }}/dp${{ inputs.decode-dp-attn }}/nw${{ inputs.decode-num-worker }}) | disagg-${{ inputs.disagg }} spec-${{ inputs.spec-decoding }} conc-${{ join(fromJson(inputs.conc-list), 'x') }}" + name: "${{ inputs.exp-name }} ${{ inputs.precision }} ${{ inputs.runner }} ${{ inputs.framework }} | P(tp${{ inputs.prefill-tp }}/ep${{ inputs.prefill-ep }}/dp${{ inputs.prefill-dp-attn }}/nw${{ inputs.prefill-num-worker }}) D(tp${{ inputs.decode-tp }}/ep${{ inputs.decode-ep }}/dp${{ inputs.decode-dp-attn }}/nw${{ inputs.decode-num-worker }}) | disagg-${{ inputs.disagg }} spec-${{ inputs.spec-decoding }} conc-${{ join(fromJson(inputs.conc-list), 'x') }}${{ inputs.eval-only && ' | eval-only' || (inputs.run-eval && ' | eval' || '') }}" steps: - name: Slurm cleanup (pre-run) @@ -137,9 +154,17 @@ jobs: ref: ${{ inputs.ref || github.ref }} clean: false + - name: Cleanup stale eval outputs (pre-run) + if: ${{ inputs.run-eval || inputs.eval-only }} + run: | + rm -f meta_env.json || true + rm -f results*.json || true + rm -f sample*.jsonl || true + - name: Launch multi-node job script env: RUNNER_NAME: ${{ runner.name }} + RUNNER_TYPE: ${{ inputs.runner }} # Hash uniquely on {EXP_NAME}_{PRECISION}_{FRAMEWORK}_prefill-tp{}-ep{}-dp{}-nw{}_decode-tp{}-ep{}-dp{}-nw{}_disagg-{}_spec-{}_conc{}_{runner} RESULT_FILENAME: ${{ env.EXP_NAME }}_${{ env.PRECISION }}_${{ env.FRAMEWORK }}_prefill-tp${{ env.PREFILL_TP }}-ep${{ env.PREFILL_EP }}-dp${{ env.PREFILL_DP_ATTN }}-nw${{ env.PREFILL_NUM_WORKERS }}_decode-tp${{ env.DECODE_TP }}-ep${{ env.DECODE_EP }}-dp${{ env.DECODE_DP_ATTN }}-nw${{ env.DECODE_NUM_WORKERS }}_disagg-${{ env.DISAGG }}_spec-${{ env.SPEC_DECODING }}_conc${{ join(fromJson(inputs.conc-list), 'x') }}_${{ runner.name }} run: | @@ -150,16 +175,26 @@ jobs: export ${{ join(fromJson(inputs.prefill-additional-settings), ' ') }} ${{ join(fromJson(inputs.decode-additional-settings), ' ') }} export IS_MULTINODE=true bash ./runners/launch_${RUNNER_NAME%%_*}.sh - # Check if at least one result file was created - if ls ${RESULT_FILENAME}_*.json 1> /dev/null 2>&1; then - echo "RESULT_FILENAME=${RESULT_FILENAME}" >> $GITHUB_ENV - echo "Found result files: $(ls ${RESULT_FILENAME}_*.json)" + if [ "${{ inputs.eval-only }}" = "true" ]; then + echo "Eval-only mode: skipping benchmark result file check" + # Verify eval produced results + if ! ls results*.json 1>/dev/null 2>&1; then + echo "Eval-only run failed: no results*.json files found." >&2 + exit 1 + fi else - echo "Run failed: No benchmark result files found for ${RESULT_FILENAME}_*.json" >&2 - exit 1 + # Check if at least one result file was created + if ls ${RESULT_FILENAME}_*.json 1> /dev/null 2>&1; then + echo "RESULT_FILENAME=${RESULT_FILENAME}" >> $GITHUB_ENV + echo "Found result files: $(ls ${RESULT_FILENAME}_*.json)" + else + echo "Run failed: No benchmark result files found for ${RESULT_FILENAME}_*.json" >&2 + exit 1 + fi fi - name: Process results + if: ${{ !inputs.eval-only }} env: RUNNER_TYPE: ${{ inputs.runner }} run: | @@ -180,11 +215,34 @@ jobs: done - name: Upload results + if: ${{ !inputs.eval-only }} uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: bmk_${{ env.RESULT_FILENAME }} path: agg_${{ env.RESULT_FILENAME }}_*.json + - name: Upload eval results (if any) + if: ${{ always() && (env.RUN_EVAL == 'true' || inputs.eval-only) }} + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: eval_${{ env.EXP_NAME }}_${{ env.RESULT_FILENAME }} + path: | + meta_env.json + results*.json + sample*.jsonl + if-no-files-found: ignore + + - name: Verify eval scores + if: ${{ inputs.eval-only }} + run: python3 utils/evals/validate_scores.py + + - name: Cleanup eval outputs (post-upload) + if: ${{ always() && (inputs.run-eval || inputs.eval-only) }} + run: | + rm -f meta_env.json || true + rm -f results*.json || true + rm -f sample*.jsonl || true + - name: Upload logs if: always() uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 diff --git a/.github/workflows/benchmark-tmpl.yml b/.github/workflows/benchmark-tmpl.yml index 0c214acc8..561a3fbb8 100644 --- a/.github/workflows/benchmark-tmpl.yml +++ b/.github/workflows/benchmark-tmpl.yml @@ -131,6 +131,13 @@ jobs: ref: ${{ inputs.ref || github.ref }} clean: false + - name: Cleanup stale eval outputs (pre-run) + if: ${{ inputs.run-eval || inputs.eval-only }} + run: | + rm -f meta_env.json || true + rm -f results*.json || true + rm -f sample*.jsonl || true + - name: Launch job script env: RUNNER_NAME: ${{ runner.name }} diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml index afbec49b0..19a60b9ea 100644 --- a/.github/workflows/e2e-tests.yml +++ b/.github/workflows/e2e-tests.yml @@ -38,6 +38,7 @@ jobs: single-node-config: ${{ steps.get-jobs.outputs.single-node-config }} multi-node-config: ${{ steps.get-jobs.outputs.multi-node-config }} eval-config: ${{ steps.get-jobs.outputs.eval-config }} + multi-node-eval-config: ${{ steps.get-jobs.outputs.multi-node-eval-config }} steps: - name: Checkout code (ref) if: ${{ inputs.ref && inputs.ref != '' }} @@ -55,11 +56,13 @@ jobs: CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix_logic/generate_sweep_configs.py \ ${{ inputs.generate-cli-command || github.event.inputs.generate-cli-command }}) SINGLE=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' not in x and not x.get('eval-only', False)]))") - MULTI=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' in x]))") + MULTI=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' in x and not x.get('eval-only', False)]))") EVALS=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' not in x and x.get('run-eval', False)]))") + MULTI_EVAL=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' in x and x.get('run-eval', False)]))") echo "single-node-config=$SINGLE" >> $GITHUB_OUTPUT echo "multi-node-config=$MULTI" >> $GITHUB_OUTPUT echo "eval-config=$EVALS" >> $GITHUB_OUTPUT + echo "multi-node-eval-config=$MULTI_EVAL" >> $GITHUB_OUTPUT test-sweep-multi-node: needs: get-jobs @@ -97,6 +100,48 @@ jobs: decode-ep: ${{ matrix.config.decode.ep }} decode-dp-attn: ${{ matrix.config.decode.dp-attn }} decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }} + run-eval: false + ref: ${{ inputs.ref }} + + test-sweep-multi-node-evals: + needs: get-jobs + if: ${{ needs.get-jobs.outputs.multi-node-eval-config != '[]' }} + uses: ./.github/workflows/benchmark-multinode-tmpl.yml + name: multi-node eval / + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.get-jobs.outputs.multi-node-eval-config) }} + secrets: inherit + with: + isl: ${{ matrix.config.isl }} + osl: ${{ matrix.config.osl }} + max-model-len: ${{ matrix.config.max-model-len }} + runner: ${{ matrix.config.runner }} + image: ${{ matrix.config.image }} + model: ${{ matrix.config.model }} + model-prefix: ${{ matrix.config.model-prefix }} + framework: ${{ matrix.config.framework }} + precision: ${{ matrix.config.precision }} + exp-name: ${{ matrix.config.exp-name }} + conc-list: ${{ toJson(matrix.config.conc) }} + spec-decoding: ${{ matrix.config.spec-decoding }} + disagg: ${{ matrix.config.disagg }} + + prefill-num-worker: ${{ matrix.config.prefill.num-worker }} + prefill-tp: ${{ matrix.config.prefill.tp }} + prefill-ep: ${{ matrix.config.prefill.ep }} + prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }} + prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }} + + decode-num-worker: ${{ matrix.config.decode.num-worker }} + decode-tp: ${{ matrix.config.decode.tp }} + decode-ep: ${{ matrix.config.decode.ep }} + decode-dp-attn: ${{ matrix.config.decode.dp-attn }} + decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }} + run-eval: true + eval-only: true + eval-conc: ${{ matrix.config.eval-conc }} ref: ${{ inputs.ref }} test-sweep-single-node: @@ -162,15 +207,15 @@ jobs: collect-results: needs: [test-sweep-multi-node, test-sweep-single-node] - if: ${{ always() }} + if: ${{ always() && (needs.test-sweep-multi-node.result != 'skipped' || needs.test-sweep-single-node.result != 'skipped') }} uses: ./.github/workflows/collect-results.yml secrets: inherit with: result-prefix: "bmk" collect-evals: - needs: [test-sweep-evals] - if: ${{ always() && needs.test-sweep-evals.result != 'skipped' }} + needs: [test-sweep-evals, test-sweep-multi-node-evals] + if: ${{ always() && (needs.test-sweep-evals.result != 'skipped' || needs.test-sweep-multi-node-evals.result != 'skipped') }} uses: ./.github/workflows/collect-evals.yml secrets: inherit diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index 44e335f49..205e1e2d4 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -105,6 +105,7 @@ jobs: decode-ep: ${{ matrix.config.decode.ep }} decode-dp-attn: ${{ matrix.config.decode.dp-attn }} decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }} + run-eval: false sweep-multi-node-8k1k: needs: setup @@ -189,6 +190,44 @@ jobs: run-eval: true eval-only: true + sweep-multi-node-evals: + needs: setup + if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != '[]' && toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != 'null' }} + uses: ./.github/workflows/benchmark-multinode-tmpl.yml + name: multi-node eval / + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.search-space-config).multinode_evals }} + secrets: inherit + with: + exp-name: ${{ matrix.config.exp-name }} + isl: ${{ matrix.config.isl }} + osl: ${{ matrix.config.osl }} + max-model-len: ${{ matrix.config.max-model-len }} + runner: ${{ matrix.config.runner }} + image: ${{ matrix.config.image }} + model: ${{ matrix.config.model }} + model-prefix: ${{ matrix.config.model-prefix }} + framework: ${{ matrix.config.framework }} + precision: ${{ matrix.config.precision }} + conc-list: ${{ toJson(matrix.config.conc) }} + spec-decoding: ${{ matrix.config.spec-decoding }} + disagg: ${{ matrix.config.disagg }} + prefill-num-worker: ${{ matrix.config.prefill.num-worker }} + prefill-tp: ${{ matrix.config.prefill.tp }} + prefill-ep: ${{ matrix.config.prefill.ep }} + prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }} + prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }} + decode-num-worker: ${{ matrix.config.decode.num-worker }} + decode-tp: ${{ matrix.config.decode.tp }} + decode-ep: ${{ matrix.config.decode.ep }} + decode-dp-attn: ${{ matrix.config.decode.dp-attn }} + decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }} + run-eval: true + eval-only: true + eval-conc: ${{ matrix.config.eval-conc }} + collect-results: needs: [ @@ -205,8 +244,8 @@ jobs: result-prefix: "bmk" collect-evals: - needs: [sweep-evals, setup] - if: ${{ always() && needs.setup.result != 'skipped' && needs.sweep-evals.result != 'skipped' }} + needs: [sweep-evals, sweep-multi-node-evals, setup] + if: ${{ always() && needs.setup.result != 'skipped' && (needs.sweep-evals.result != 'skipped' || needs.sweep-multi-node-evals.result != 'skipped') }} uses: ./.github/workflows/collect-evals.yml secrets: inherit diff --git a/AGENTS.md b/AGENTS.md index 50c9c9c14..451b9c213 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -37,8 +37,9 @@ InferenceX is an open-source, automated benchmarking system that continuously tr │ ├── workflows/ # GitHub Actions CI/CD │ │ ├── run-sweep.yml # Main performance sweep │ │ ├── e2e-tests.yml # End-to-end testing -│ │ ├── benchmark-tmpl.yml # Benchmark job template -│ │ └── collect-evals.yml # Eval results collection +│ │ ├── benchmark-tmpl.yml # Single-node benchmark job template +│ │ ├── benchmark-multinode-tmpl.yml # Multi-node benchmark job template +│ │ └── collect-evals.yml # Eval results collection │ └── configs/ # Master configuration files │ ├── nvidia-master.yaml │ ├── amd-master.yaml @@ -187,7 +188,7 @@ When working with benchmark configurations, use these valid values: ### Registering Recipes from srtslurm -For disaggregated multi-node configurations (dynamo-sglang, dynamo-trt), recipes are stored in the external [srtslurm](https://github.com/ishandhanani/srt-slurm) repository. To stage these recipes in InferenceX: +For disaggregated multi-node configurations (dynamo-sglang, dynamo-trt), recipes are stored in the external [srtslurm](https://github.com/NVIDIA/srt-slurm) repository. To stage these recipes in InferenceX: **1. Locate source recipes in srtslurm:** ```bash @@ -301,14 +302,28 @@ Evals run optional accuracy checks to ensure model outputs aren't degraded by in ### When Evals Run -Evals are **off by default** (`RUN_EVAL=false`). When enabled, they run at two concurrency levels per configuration group: +Evals run as **separate workflow jobs** from throughput benchmarks (eval-only mode). The `EVAL_ONLY` flag skips throughput benchmarking and only runs lm-eval. -- **Highest concurrency** per (model, runner, framework, precision, ISL, OSL, spec-decoding, dp-attn) -- **Lower-median concurrency** per (model, runner, framework, precision, ISL, OSL, spec-decoding, dp-attn) +**Single-node** eval selection: +- All TPs at **highest concurrency** and **median concurrency** per (model, runner, framework, precision, ISL, OSL, spec-decoding, dp-attn) +- Only on `8k1k` sequence length + +**Multi-node** eval selection: +- Entry with **highest max eligible concurrency** per (model, runner, framework, precision, spec-decoding, prefill-dp-attn, decode-dp-attn) +- Only `8k1k` sequence length +- Eval runs at `eval-conc`, the upper median concurrency from the selected config This selection logic is in `mark_eval_entries()` in `utils/matrix_logic/generate_sweep_configs.py`. -**Note**: Evals only run on `8k1k` sequence length. +**Workflow separation**: Eval jobs are independent from benchmark jobs: +- `run-sweep.yml`: `sweep-evals` (single-node) and `sweep-multi-node-evals` (multi-node) +- `e2e-tests.yml`: `test-sweep-evals` and `test-sweep-multi-node-evals` +- Both use their respective benchmark templates with `eval-only: true` +- `collect-evals` depends only on eval jobs, not benchmark jobs + +**Multi-node eval infrastructure**: +- AMD (MI355X): `server.sh` skips `bench.sh` when `EVAL_ONLY=true`, runs lm-eval directly +- NVIDIA Slurm multi-node (GB200, GB300, B200, B300, H100, H200): srt-slurm invokes its `lm-eval` runner from `do_sweep.py` as a post/eval-only step using `INFMAX_WORKSPACE` ### Eval Framework: lm-eval @@ -338,23 +353,33 @@ All benchmark scripts in `benchmarks/` follow one of two flows: ```bash # Combined mode (benchmark + eval): -# 1. Start server +# 1. Start server (with --context-length expansion if EVAL_ONLY=true) # 2. wait_for_server_ready -# 3. run_benchmark_serving (throughput) -# 4. Conditionally run evals: +# 3. run_benchmark_serving (skipped automatically when EVAL_ONLY=true) +# 4. Run evals: if [ "${RUN_EVAL}" = "true" ]; then run_eval --framework lm-eval --port "$PORT" - append_lm_eval_summary + append_lm_eval_summary # Writes meta_env.json and moves artifacts fi # Eval-only mode (EVAL_ONLY=true): -# 1. Compute expanded context via compute_eval_context_length -# 2. Start server with expanded context (--context-length or --max-model-len) +# 1. Compute eval context via compute_eval_context_length +# 2. Start server with that context (--context-length or --max-model-len) # 3. wait_for_server_ready # 4. run_benchmark_serving returns immediately (skipped) # 5. run_eval + append_lm_eval_summary ``` +**Multi-node AMD** (`benchmarks/multi_node/amd_utils/server.sh`): +- Skips `bench.sh` when `EVAL_ONLY=true` +- Runs lm-eval via `run_eval` against the router on port 30000 +- Copies eval artifacts to `/run_logs/slurm_job-*/eval_results/` + +**Multi-node NVIDIA Slurm** (GB200, GB300, B200, B300, H100, H200 via srt-slurm): +- Uses the srt-slurm `lm-eval` runner as a post/eval-only step from `do_sweep.py` +- Mounts the InferenceX checkout from `INFMAX_WORKSPACE` at `/infmax-workspace` +- `lm-eval` runner sources `benchmark_lib.sh` from `/infmax-workspace` + ### Key Eval Functions in `benchmarks/benchmark_lib.sh` | Function | Description | @@ -364,7 +389,7 @@ fi | `append_lm_eval_summary` | Writes `meta_env.json` and moves eval artifacts to workspace | | `_install_lm_eval_deps` | Installs lm-eval dependencies | | `_patch_lm_eval` | Patches lm-eval for reasoning tokens and TRT compatibility | -| `compute_eval_context_length` | Computes eval context length (5x benchmark context, capped at model native max) | +| `compute_eval_context_length` | Computes eval context length (requested benchmark context, capped at model native max) | | `get_native_max_context_length` | Extracts model's native max context length from HF config | ### Eval Results Collection diff --git a/benchmarks/benchmark_lib.sh b/benchmarks/benchmark_lib.sh index 535313252..5bab9bc61 100644 --- a/benchmarks/benchmark_lib.sh +++ b/benchmarks/benchmark_lib.sh @@ -506,13 +506,13 @@ _install_lm_eval_deps() { python3 -m pip install -q --no-cache-dir --break-system-packages "lm-eval[api]" || true local lm_eval_ref="b315ef3b05176acc9732bb7fdec116abe1ecc476" if command -v git >/dev/null 2>&1; then - if ! python3 -m pip install -q --no-cache-dir --no-deps --break-system-packages \ + if ! python3 -m pip install -q --no-cache-dir --no-deps --force-reinstall --break-system-packages \ "git+https://github.com/EleutherAI/lm-evaluation-harness.git@${lm_eval_ref}"; then - python3 -m pip install -q --no-cache-dir --no-deps --break-system-packages \ + python3 -m pip install -q --no-cache-dir --no-deps --force-reinstall --break-system-packages \ "https://github.com/EleutherAI/lm-evaluation-harness/archive/${lm_eval_ref}.tar.gz" || true fi else - python3 -m pip install -q --no-cache-dir --no-deps --break-system-packages \ + python3 -m pip install -q --no-cache-dir --no-deps --force-reinstall --break-system-packages \ "https://github.com/EleutherAI/lm-evaluation-harness/archive/${lm_eval_ref}.tar.gz" || true fi } @@ -593,20 +593,29 @@ PY get_native_max_context_length() { local model_path="$1" + # Prefer MODEL_PATH (local model directory) when available, since the + # argument may be a served-model name that is neither a valid HF repo + # ID nor a local path (e.g. "deepseek-r1-fp4" on the B300 cluster). + if [ -n "${MODEL_PATH:-}" ] && [ -d "${MODEL_PATH}" ]; then + model_path="${MODEL_PATH}" + fi python3 -c " -from transformers import AutoConfig -config = AutoConfig.from_pretrained('${model_path}', trust_remote_code=True) -for attr in ['max_position_embeddings', 'max_sequence_length', 'seq_length', 'n_positions']: - if hasattr(config, attr): - print(getattr(config, attr)) - break -else: +try: + from transformers import AutoConfig + config = AutoConfig.from_pretrained('${model_path}', trust_remote_code=True) + for attr in ['max_position_embeddings', 'max_sequence_length', 'seq_length', 'n_positions']: + if hasattr(config, attr): + print(getattr(config, attr)) + break + else: + print(0) +except Exception: print(0) " } # Compute the context length for eval-only mode. -# Uses 5x the benchmark context capped at the model's native max. +# Uses the requested benchmark context capped at the model's native max. # Sets EVAL_MAX_MODEL_LEN (needed by run_lm_eval). # Echoes the computed value for scripts to capture. # @@ -708,8 +717,32 @@ append_lm_eval_summary() { # Write minimal meta for collectors that expect it local meta_json="${out_dir}/meta_env.json" local model_name="${MODEL_NAME:-$MODEL}" + local is_multinode_json="false" + if [ "${IS_MULTINODE:-false}" = "true" ]; then + is_multinode_json="true" + fi + + local prefill_tp="${PREFILL_TP:-${TP:-1}}" + local prefill_ep="${PREFILL_EP:-${EP_SIZE:-1}}" + local prefill_num_workers="${PREFILL_NUM_WORKERS:-1}" + local decode_tp="${DECODE_TP:-${TP:-1}}" + local decode_ep="${DECODE_EP:-${EP_SIZE:-1}}" + local decode_num_workers="${DECODE_NUM_WORKERS:-1}" + local dp_json="false" - if [ "${DP_ATTENTION}" = "true" ]; then dp_json="true"; fi + if [ "${DP_ATTENTION:-false}" = "true" ]; then dp_json="true"; fi + local prefill_dp_json="$dp_json" + if [ "${PREFILL_DP_ATTENTION:-${DP_ATTENTION:-false}}" = "true" ]; then + prefill_dp_json="true" + else + prefill_dp_json="false" + fi + local decode_dp_json="$dp_json" + if [ "${DECODE_DP_ATTENTION:-${DP_ATTENTION:-false}}" = "true" ]; then + decode_dp_json="true" + else + decode_dp_json="false" + fi # Derive framework/precision from env, fallback to parsing RESULT_FILENAME # RESULT_FILENAME format (from workflow): @@ -734,6 +767,7 @@ append_lm_eval_summary() { fi cat > "${meta_json}" </dev/null 2>&1; then + EVAL_HEALTH_OK=true + break + fi + echo "Eval health check attempt $_attempt failed, retrying in 10s..." + sleep 10 + done + + if [[ "$EVAL_HEALTH_OK" != "true" ]]; then + echo "WARNING: Router health check failed after 3 attempts. Skipping eval." + else + # Must run from repo root so utils/evals/${task}.yaml resolves + pushd /workspace + + # Source eval functions from benchmark_lib.sh + source /workspace/benchmarks/benchmark_lib.sh + + # Use EVAL_CONC from workflow if set, otherwise fall back to max of conc list + if [[ -n "${EVAL_CONC:-}" ]]; then + export EVAL_CONCURRENT_REQUESTS="${EVAL_CONC}" + else + export EVAL_CONCURRENT_REQUESTS=$(echo "$BENCH_MAX_CONCURRENCY" | tr 'x' '\n' | sort -n | tail -1) + fi + + if [[ "$DRY_RUN" -eq 1 ]]; then + echo "DRY RUN: run_eval --framework lm-eval --port 30000 (conc=${EVAL_CONCURRENT_REQUESTS})" + else + # Run lm-eval against the router on port 30000 + run_eval --framework lm-eval --port 30000 + + # Set metadata env vars for append_lm_eval_summary + export TP="${PREFILL_TP_SIZE}" + export CONC="${EVAL_CONCURRENT_REQUESTS}" + export EP_SIZE=1 + [[ "${PREFILL_ENABLE_EP}" == "true" ]] && EP_SIZE="${PREFILL_TP_SIZE}" + export PREFILL_TP="${PREFILL_TP_SIZE}" + export PREFILL_EP=1 + [[ "${PREFILL_ENABLE_EP}" == "true" ]] && PREFILL_EP="${PREFILL_TP_SIZE}" + export PREFILL_NUM_WORKERS="${xP}" + export DECODE_TP="${DECODE_TP_SIZE}" + export DECODE_EP=1 + [[ "${DECODE_ENABLE_EP}" == "true" ]] && DECODE_EP="${DECODE_TP_SIZE}" + export DECODE_NUM_WORKERS="${yD}" + export DP_ATTENTION="${PREFILL_ENABLE_DP}" + export PREFILL_DP_ATTENTION="${PREFILL_ENABLE_DP}" + export DECODE_DP_ATTENTION="${DECODE_ENABLE_DP}" + export ISL="${BENCH_INPUT_LEN}" + export OSL="${BENCH_OUTPUT_LEN}" + # FRAMEWORK, PRECISION, MODEL_PREFIX, RUNNER_TYPE, RESULT_FILENAME + # are already set via Docker -e flags from job.slurm + + append_lm_eval_summary + # Files (meta_env.json, results*.json, sample*.jsonl) are now in /workspace + + # Copy eval artifacts to run_logs for NFS extraction by runner + EVAL_COPY_DIR="/run_logs/slurm_job-${SLURM_JOB_ID}/eval_results" + mkdir -p "$EVAL_COPY_DIR" + for f in meta_env.json; do + [ -e "/workspace/$f" ] && cp -f "/workspace/$f" "$EVAL_COPY_DIR/" + done + # Use find for glob patterns to avoid "no match" errors + find /workspace -maxdepth 1 -name 'results*.json' -exec cp -f {} "$EVAL_COPY_DIR/" \; + find /workspace -maxdepth 1 -name 'sample*.jsonl' -exec cp -f {} "$EVAL_COPY_DIR/" \; + + echo "Eval completed. Artifacts staged in $EVAL_COPY_DIR" + fi + + popd + fi + fi + # Copy benchmark results to BENCHMARK_LOGS_DIR (mounted from host) LOGS_OUTPUT="${BENCHMARK_LOGS_DIR:-/run_logs}/logs" mkdir -p "$LOGS_OUTPUT" diff --git a/benchmarks/multi_node/amd_utils/submit.sh b/benchmarks/multi_node/amd_utils/submit.sh index 802106350..be22b8d33 100755 --- a/benchmarks/multi_node/amd_utils/submit.sh +++ b/benchmarks/multi_node/amd_utils/submit.sh @@ -105,6 +105,17 @@ export BENCH_NUM_PROMPTS_MULTIPLIER=10 export BENCH_MAX_CONCURRENCY=${CONCURRENCIES} export BENCH_REQUEST_RATE=${REQUEST_RATE} +# Eval-related env vars (threaded from workflow → runner → here → job.slurm → Docker) +export RUN_EVAL="${RUN_EVAL:-false}" +export EVAL_ONLY="${EVAL_ONLY:-false}" +export EVAL_CONC="${EVAL_CONC:-}" +export FRAMEWORK="${FRAMEWORK:-}" +export PRECISION="${PRECISION:-}" +export MODEL_PREFIX="${MODEL_PREFIX:-}" +export RUNNER_TYPE="${RUNNER_TYPE:-}" +export RESULT_FILENAME="${RESULT_FILENAME:-}" +export SPEC_DECODING="${SPEC_DECODING:-}" + # Log directory: must be on NFS (shared filesystem) so the submit host can read SLURM output. # SLURM writes output files on the batch node, so /tmp won't work (node-local). # Defaults to a sibling directory of the submit working directory. diff --git a/perf-changelog.yaml b/perf-changelog.yaml index f0ce418d0..16fea938d 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1486,6 +1486,30 @@ - "Higher concurrency improves MoE weight amortization: 8552 total TPS at conc=256 vs 4016 at conc=64 (2.1x)" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1053 +- config-keys: + - dsr1-fp4-b200-dynamo-trt + - dsr1-fp8-b200-dynamo-trt + - dsr1-fp4-b200-dynamo-sglang + - dsr1-fp8-b200-dynamo-sglang + - dsr1-fp8-b200-dynamo-sglang-mtp + - dsr1-fp4-b200-dynamo-sglang-mtp + - dsr1-fp4-b300-dynamo-trt + - dsr1-fp8-b300-dynamo-trt + - dsr1-fp4-gb300-dynamo-trt + - dsr1-fp8-gb300-dynamo-trt + - dsr1-fp4-gb300-dynamo-sglang + - dsr1-fp8-gb300-dynamo-sglang + - dsr1-fp8-mi355x-sglang-disagg + - dsr1-fp8-mi355x-sglang-disagg-mtp + - dsr1-fp4-mi355x-sglang-disagg + - dsr1-fp4-mi355x-sglang-disagg-mtp + description: + - "Add multi-node lm-eval accuracy runs" + - "Eval picks the config with highest max eligible concurrency per (model, runner, framework, precision, spec-decoding, prefill-dp-attn, decode-dp-attn) group on 8k1k" + - "Eval concurrency set to the median eligible conc (>= MIN_EVAL_CONC=16) of the selected config to avoid OOM" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1000 + evals-only: true + - config-keys: - qwen3.5-fp4-b300-sglang description: diff --git a/runners/launch_b200-dgxc-slurm.sh b/runners/launch_b200-dgxc-slurm.sh index 847b7ee80..b9d4d90cc 100644 --- a/runners/launch_b200-dgxc-slurm.sh +++ b/runners/launch_b200-dgxc-slurm.sh @@ -21,7 +21,7 @@ if [[ "$IS_MULTINODE" == "true" ]]; then export MODEL_PATH="/lustre/fsw/models/dsr1-0528-nvfp4-v2" export SRT_SLURM_MODEL_PREFIX="dsr1" elif [[ $MODEL_PREFIX == "dsr1" && $PRECISION == "fp8" ]]; then - export MODEL_PATH="/raid/tmp/dsr1-0528-fp8" + export MODEL_PATH="/lustre/fsw/models/dsr1-0528-fp8" export SRT_SLURM_MODEL_PREFIX="dsr1-fp8" else echo "Unsupported model prefix/precision: $MODEL_PREFIX/$PRECISION" @@ -36,9 +36,9 @@ if [[ "$IS_MULTINODE" == "true" ]]; then rm -rf "$SRT_REPO_DIR" fi - git clone https://github.com/ishandhanani/srt-slurm.git "$SRT_REPO_DIR" + git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR" cd "$SRT_REPO_DIR" || exit 1 - git checkout sa-submission-q1-2026 + git checkout sa-submission-q2-2026 echo "Installing srtctl..." export UV_INSTALL_DIR="$GITHUB_WORKSPACE/.local/bin" @@ -65,6 +65,7 @@ if [[ "$IS_MULTINODE" == "true" ]]; then export ISL="$ISL" export OSL="$OSL" + export EVAL_ONLY="${EVAL_ONLY:-false}" # Create srtslurm.yaml for srtctl (used by both frameworks) SRTCTL_ROOT="${GITHUB_WORKSPACE}/${SRT_REPO_DIR}" @@ -98,9 +99,23 @@ EOF echo "Running make setup..." make setup ARCH=x86_64 + # Export eval-related env vars for srt-slurm post-benchmark eval + export INFMAX_WORKSPACE="$GITHUB_WORKSPACE" + echo "Submitting job with srtctl..." + + if [[ -z "$CONFIG_FILE" ]]; then + echo "Error: CONFIG_FILE is not set. The srt-slurm path requires a CONFIG_FILE in additional-settings." >&2 + echo "Config: MODEL_PREFIX=${MODEL_PREFIX} PRECISION=${PRECISION} FRAMEWORK=${FRAMEWORK}" >&2 + exit 1 + fi + # Override the job name in the config file with the runner name sed -i "s/^name:.*/name: \"${RUNNER_NAME}\"/" "$CONFIG_FILE" + # Bump recipe health-check timeout from 360×10s=3600s to 720×10s=7200s + # so large-model loads (e.g. DSR1-FP8 ~680GB off shared FS) finish in time. + # Uses ${CONFIG_FILE%%:*} because CONFIG_FILE may carry an :override[N] suffix. + sed -i 's/^ max_attempts: [0-9]*/ max_attempts: 720/' "${CONFIG_FILE%%:*}" SRTCTL_OUTPUT=$(srtctl apply -f "$CONFIG_FILE" --tags "b200,${MODEL_PREFIX},${PRECISION},${ISL}x${OSL},infmax-$(date +%Y%m%d)" 2>&1) echo "$SRTCTL_OUTPUT" @@ -162,45 +177,66 @@ EOF cp -r "$LOGS_DIR" "$GITHUB_WORKSPACE/LOGS" tar czf "$GITHUB_WORKSPACE/multinode_server_logs.tar.gz" -C "$LOGS_DIR" . - # Find all result subdirectories - RESULT_SUBDIRS=$(find "$LOGS_DIR" -maxdepth 1 -type d -name "*isl*osl*" 2>/dev/null) - - if [ -z "$RESULT_SUBDIRS" ]; then - echo "Warning: No result subdirectories found in $LOGS_DIR" - else - # Process results from all configurations - for result_subdir in $RESULT_SUBDIRS; do - echo "Processing result subdirectory: $result_subdir" - - # Extract configuration info from directory name - CONFIG_NAME=$(basename "$result_subdir") - - # Find all result JSON files - RESULT_FILES=$(find "$result_subdir" -name "results_concurrency_*.json" 2>/dev/null) - - for result_file in $RESULT_FILES; do - if [ -f "$result_file" ]; then - # Extract metadata from filename - # Files are of the format "results_concurrency_gpus_{num gpus}_ctx_{num ctx}_gen_{num gen}.json" - filename=$(basename "$result_file") - concurrency=$(echo "$filename" | sed -n 's/results_concurrency_\([0-9]*\)_gpus_.*/\1/p') - gpus=$(echo "$filename" | sed -n 's/results_concurrency_[0-9]*_gpus_\([0-9]*\)_ctx_.*/\1/p') - ctx=$(echo "$filename" | sed -n 's/.*_ctx_\([0-9]*\)_gen_.*/\1/p') - gen=$(echo "$filename" | sed -n 's/.*_gen_\([0-9]*\)\.json/\1/p') + if [[ "${EVAL_ONLY:-false}" != "true" ]]; then + # Find all result subdirectories + RESULT_SUBDIRS=$(find "$LOGS_DIR" -maxdepth 1 -type d -name "*isl*osl*" 2>/dev/null) - echo "Processing concurrency $concurrency with $gpus GPUs (ctx: $ctx, gen: $gen): $result_file" + if [ -z "$RESULT_SUBDIRS" ]; then + echo "Warning: No result subdirectories found in $LOGS_DIR" + else + # Process results from all configurations + for result_subdir in $RESULT_SUBDIRS; do + echo "Processing result subdirectory: $result_subdir" + + # Extract configuration info from directory name + CONFIG_NAME=$(basename "$result_subdir") + + # Find all result JSON files + RESULT_FILES=$(find "$result_subdir" -name "results_concurrency_*.json" 2>/dev/null) + + for result_file in $RESULT_FILES; do + if [ -f "$result_file" ]; then + # Extract metadata from filename + # Files are of the format "results_concurrency_gpus_{num gpus}_ctx_{num ctx}_gen_{num gen}.json" + filename=$(basename "$result_file") + concurrency=$(echo "$filename" | sed -n 's/results_concurrency_\([0-9]*\)_gpus_.*/\1/p') + gpus=$(echo "$filename" | sed -n 's/results_concurrency_[0-9]*_gpus_\([0-9]*\)_ctx_.*/\1/p') + ctx=$(echo "$filename" | sed -n 's/.*_ctx_\([0-9]*\)_gen_.*/\1/p') + gen=$(echo "$filename" | sed -n 's/.*_gen_\([0-9]*\)\.json/\1/p') + + echo "Processing concurrency $concurrency with $gpus GPUs (ctx: $ctx, gen: $gen): $result_file" + + WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${CONFIG_NAME}_conc${concurrency}_gpus_${gpus}_ctx_${ctx}_gen_${gen}.json" + cp "$result_file" "$WORKSPACE_RESULT_FILE" + + echo "Copied result file to: $WORKSPACE_RESULT_FILE" + fi + done + done + fi - WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${CONFIG_NAME}_conc${concurrency}_gpus_${gpus}_ctx_${ctx}_gen_${gen}.json" - cp "$result_file" "$WORKSPACE_RESULT_FILE" + echo "All result files processed" + else + echo "EVAL_ONLY=true: Skipping benchmark result collection" + fi - echo "Copied result file to: $WORKSPACE_RESULT_FILE" - fi + # Collect eval results if eval was requested + if [[ "${RUN_EVAL:-false}" == "true" || "${EVAL_ONLY:-false}" == "true" ]]; then + EVAL_DIR="$LOGS_DIR/eval_results" + if [ -d "$EVAL_DIR" ]; then + echo "Extracting eval results from $EVAL_DIR" + shopt -s nullglob + for eval_file in "$EVAL_DIR"/*; do + [ -f "$eval_file" ] || continue + cp "$eval_file" "$GITHUB_WORKSPACE/" + echo "Copied eval artifact: $(basename "$eval_file")" done - done + shopt -u nullglob + else + echo "WARNING: RUN_EVAL=true but no eval results found at $EVAL_DIR" + fi fi - echo "All result files processed" - # Clean up srt-slurm outputs to prevent NFS silly-rename lock files # from blocking the next job's checkout on this runner echo "Cleaning up srt-slurm outputs..." diff --git a/runners/launch_b300-nv.sh b/runners/launch_b300-nv.sh index 7c042b4f1..b49391a3c 100644 --- a/runners/launch_b300-nv.sh +++ b/runners/launch_b300-nv.sh @@ -18,11 +18,11 @@ fi # The yaml files specify HuggingFace model IDs for portability, but we use # local paths to avoid repeated downloading on the shared B300 cluster. if [[ $MODEL_PREFIX == "dsr1" && $PRECISION == "fp4" ]]; then - export MODEL_PATH="/scratch/models/deepseek-r1-0528-nvfp4-v2" + export MODEL_PATH="/data/models/dsr1-fp4" export SERVED_MODEL_NAME="deepseek-r1-fp4" export SRT_SLURM_MODEL_PREFIX="dsr1" elif [[ $MODEL_PREFIX == "dsr1" && $PRECISION == "fp8" ]]; then - export MODEL_PATH="/scratch/models/deepseek-r1-0528" + export MODEL_PATH="/data/models/dsr1-fp8" export SERVED_MODEL_NAME="deepseek-r1-fp8" export SRT_SLURM_MODEL_PREFIX="dsr1-fp8" else @@ -37,9 +37,9 @@ if [ -d "$SRT_REPO_DIR" ]; then rm -rf "$SRT_REPO_DIR" fi -git clone https://github.com/ishandhanani/srt-slurm.git "$SRT_REPO_DIR" +git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR" cd "$SRT_REPO_DIR" || exit 1 -git checkout sa-submission-q1-2026 +git checkout sa-submission-q2-2026 echo "Installing srtctl..." export UV_INSTALL_DIR="$GITHUB_WORKSPACE/.local/bin" @@ -66,6 +66,7 @@ srun -N 1 -A $SLURM_ACCOUNT -p $SLURM_PARTITION bash -c "enroot import -o $NGINX export ISL="$ISL" export OSL="$OSL" +export EVAL_ONLY="${EVAL_ONLY:-false}" # Create srtslurm.yaml for srtctl SRTCTL_ROOT="${GITHUB_WORKSPACE}/${SRT_REPO_DIR}" @@ -101,7 +102,17 @@ cat srtslurm.yaml echo "Running make setup..." make setup ARCH=x86_64 +# Export eval-related env vars for srt-slurm post-benchmark eval +export INFMAX_WORKSPACE="$GITHUB_WORKSPACE" + echo "Submitting job with srtctl..." + +if [[ -z "$CONFIG_FILE" ]]; then + echo "Error: CONFIG_FILE is not set. The srt-slurm path requires a CONFIG_FILE in additional-settings." >&2 + echo "Config: MODEL_PREFIX=${MODEL_PREFIX} PRECISION=${PRECISION} FRAMEWORK=${FRAMEWORK}" >&2 + exit 1 +fi + # Override the job name in the config file with the runner name sed -i "s/^name:.*/name: \"${RUNNER_NAME}\"/" "$CONFIG_FILE" SRTCTL_OUTPUT=$(srtctl apply -f "$CONFIG_FILE" --tags "b300,${MODEL_PREFIX},${PRECISION},${ISL}x${OSL},infmax-$(date +%Y%m%d)" 2>&1) @@ -165,45 +176,66 @@ echo "Found logs directory: $LOGS_DIR" cp -r "$LOGS_DIR" "$GITHUB_WORKSPACE/LOGS" tar czf "$GITHUB_WORKSPACE/multinode_server_logs.tar.gz" -C "$LOGS_DIR" . -# Find all result subdirectories -RESULT_SUBDIRS=$(find "$LOGS_DIR" -maxdepth 1 -type d -name "*isl*osl*" 2>/dev/null) +if [[ "${EVAL_ONLY:-false}" != "true" ]]; then + # Find all result subdirectories + RESULT_SUBDIRS=$(find "$LOGS_DIR" -maxdepth 1 -type d -name "*isl*osl*" 2>/dev/null) + + if [ -z "$RESULT_SUBDIRS" ]; then + echo "Warning: No result subdirectories found in $LOGS_DIR" + else + # Process results from all configurations + for result_subdir in $RESULT_SUBDIRS; do + echo "Processing result subdirectory: $result_subdir" + + # Extract configuration info from directory name + CONFIG_NAME=$(basename "$result_subdir") + + # Find all result JSON files + RESULT_FILES=$(find "$result_subdir" -name "results_concurrency_*.json" 2>/dev/null) + + for result_file in $RESULT_FILES; do + if [ -f "$result_file" ]; then + # Extract metadata from filename + # Files are of the format "results_concurrency_gpus_{num gpus}_ctx_{num ctx}_gen_{num gen}.json" + filename=$(basename "$result_file") + concurrency=$(echo "$filename" | sed -n 's/results_concurrency_\([0-9]*\)_gpus_.*/\1/p') + gpus=$(echo "$filename" | sed -n 's/results_concurrency_[0-9]*_gpus_\([0-9]*\)_ctx_.*/\1/p') + ctx=$(echo "$filename" | sed -n 's/.*_ctx_\([0-9]*\)_gen_.*/\1/p') + gen=$(echo "$filename" | sed -n 's/.*_gen_\([0-9]*\)\.json/\1/p') + + echo "Processing concurrency $concurrency with $gpus GPUs (ctx: $ctx, gen: $gen): $result_file" + + WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${CONFIG_NAME}_conc${concurrency}_gpus_${gpus}_ctx_${ctx}_gen_${gen}.json" + cp "$result_file" "$WORKSPACE_RESULT_FILE" + + echo "Copied result file to: $WORKSPACE_RESULT_FILE" + fi + done + done + fi -if [ -z "$RESULT_SUBDIRS" ]; then - echo "Warning: No result subdirectories found in $LOGS_DIR" + echo "All result files processed" else - # Process results from all configurations - for result_subdir in $RESULT_SUBDIRS; do - echo "Processing result subdirectory: $result_subdir" - - # Extract configuration info from directory name - CONFIG_NAME=$(basename "$result_subdir") - - # Find all result JSON files - RESULT_FILES=$(find "$result_subdir" -name "results_concurrency_*.json" 2>/dev/null) - - for result_file in $RESULT_FILES; do - if [ -f "$result_file" ]; then - # Extract metadata from filename - # Files are of the format "results_concurrency_gpus_{num gpus}_ctx_{num ctx}_gen_{num gen}.json" - filename=$(basename "$result_file") - concurrency=$(echo "$filename" | sed -n 's/results_concurrency_\([0-9]*\)_gpus_.*/\1/p') - gpus=$(echo "$filename" | sed -n 's/results_concurrency_[0-9]*_gpus_\([0-9]*\)_ctx_.*/\1/p') - ctx=$(echo "$filename" | sed -n 's/.*_ctx_\([0-9]*\)_gen_.*/\1/p') - gen=$(echo "$filename" | sed -n 's/.*_gen_\([0-9]*\)\.json/\1/p') - - echo "Processing concurrency $concurrency with $gpus GPUs (ctx: $ctx, gen: $gen): $result_file" - - WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${CONFIG_NAME}_conc${concurrency}_gpus_${gpus}_ctx_${ctx}_gen_${gen}.json" - cp "$result_file" "$WORKSPACE_RESULT_FILE" + echo "EVAL_ONLY=true: Skipping benchmark result collection" +fi - echo "Copied result file to: $WORKSPACE_RESULT_FILE" - fi +# Collect eval results if eval was requested +if [[ "${RUN_EVAL:-false}" == "true" || "${EVAL_ONLY:-false}" == "true" ]]; then + EVAL_DIR="$LOGS_DIR/eval_results" + if [ -d "$EVAL_DIR" ]; then + echo "Extracting eval results from $EVAL_DIR" + shopt -s nullglob + for eval_file in "$EVAL_DIR"/*; do + [ -f "$eval_file" ] || continue + cp "$eval_file" "$GITHUB_WORKSPACE/" + echo "Copied eval artifact: $(basename "$eval_file")" done - done + shopt -u nullglob + else + echo "WARNING: RUN_EVAL=true but no eval results found at $EVAL_DIR" + fi fi -echo "All result files processed" - # Clean up srt-slurm outputs to prevent NFS silly-rename lock files # from blocking the next job's checkout on this runner echo "Cleaning up srt-slurm outputs..." diff --git a/runners/launch_gb200-nv.sh b/runners/launch_gb200-nv.sh index 153bcd0f6..b746e4a24 100755 --- a/runners/launch_gb200-nv.sh +++ b/runners/launch_gb200-nv.sh @@ -62,6 +62,8 @@ NGINX_SQUASH_FILE="/mnt/lustre01/users-public/sa-shared/$(echo "$NGINX_IMAGE" | enroot import -o $SQUASH_FILE docker://$IMAGE enroot import -o $NGINX_SQUASH_FILE docker://$NGINX_IMAGE +export EVAL_ONLY="${EVAL_ONLY:-false}" + export ISL="$ISL" export OSL="$OSL" @@ -117,6 +119,14 @@ PY fi +# srt-slurm path requires a CONFIG_FILE pointing to a recipe YAML. +# Without it, srtctl apply scans every YAML in the repo and submits hundreds of jobs. +if [[ -z "$CONFIG_FILE" ]]; then + echo "Error: CONFIG_FILE is not set. The srt-slurm path requires a CONFIG_FILE in additional-settings." >&2 + echo "Config: MODEL_PREFIX=${MODEL_PREFIX} PRECISION=${PRECISION} FRAMEWORK=${FRAMEWORK}" >&2 + exit 1 +fi + echo "Cloning srt-slurm repository..." SRT_REPO_DIR="srt-slurm" if [ -d "$SRT_REPO_DIR" ]; then @@ -187,6 +197,9 @@ cat srtslurm.yaml echo "Running make setup..." make setup ARCH=aarch64 +# Export eval-related env vars for srt-slurm post-benchmark eval +export INFMAX_WORKSPACE="$GITHUB_WORKSPACE" + echo "Submitting job with srtctl..." # Override the job name in the config file with the runner name @@ -246,51 +259,74 @@ set -x echo "Job $JOB_ID completed!" echo "Collecting results..." -if [ ! -d "$LOGS_DIR" ]; then +if [ -d "$LOGS_DIR" ]; then + echo "Found logs directory: $LOGS_DIR" + cp -r "$LOGS_DIR" "$GITHUB_WORKSPACE/LOGS" + tar czf "$GITHUB_WORKSPACE/multinode_server_logs.tar.gz" -C "$LOGS_DIR" . +else echo "Warning: Logs directory not found at $LOGS_DIR" - exit 1 fi -echo "Found logs directory: $LOGS_DIR" +if [[ "${EVAL_ONLY:-false}" != "true" ]]; then + if [ ! -d "$LOGS_DIR" ]; then + exit 1 + fi -cp -r "$LOGS_DIR" "$GITHUB_WORKSPACE/LOGS" -tar czf "$GITHUB_WORKSPACE/multinode_server_logs.tar.gz" -C "$LOGS_DIR" . + # Find all result subdirectories + RESULT_SUBDIRS=$(find "$LOGS_DIR" -maxdepth 1 -type d -name "*isl*osl*" 2>/dev/null) -# Find all result subdirectories -RESULT_SUBDIRS=$(find "$LOGS_DIR" -maxdepth 1 -type d -name "*isl*osl*" 2>/dev/null) + if [ -z "$RESULT_SUBDIRS" ]; then + echo "Warning: No result subdirectories found in $LOGS_DIR" + else + # Process results from all configurations + for result_subdir in $RESULT_SUBDIRS; do + echo "Processing result subdirectory: $result_subdir" + + # Extract configuration info from directory name + CONFIG_NAME=$(basename "$result_subdir") + + # Find all result JSON files + RESULT_FILES=$(find "$result_subdir" -name "results_concurrency_*.json" 2>/dev/null) + + for result_file in $RESULT_FILES; do + if [ -f "$result_file" ]; then + # Extract metadata from filename + # Files are of the format "results_concurrency_gpus_{num gpus}_ctx_{num ctx}_gen_{num gen}.json" + filename=$(basename "$result_file") + concurrency=$(echo "$filename" | sed -n 's/results_concurrency_\([0-9]*\)_gpus_.*/\1/p') + gpus=$(echo "$filename" | sed -n 's/results_concurrency_[0-9]*_gpus_\([0-9]*\)_ctx_.*/\1/p') + ctx=$(echo "$filename" | sed -n 's/.*_ctx_\([0-9]*\)_gen_.*/\1/p') + gen=$(echo "$filename" | sed -n 's/.*_gen_\([0-9]*\)\.json/\1/p') + + echo "Processing concurrency $concurrency with $gpus GPUs (ctx: $ctx, gen: $gen): $result_file" + + WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${CONFIG_NAME}_conc${concurrency}_gpus_${gpus}_ctx_${ctx}_gen_${gen}.json" + cp "$result_file" "$WORKSPACE_RESULT_FILE" + + echo "Copied result file to: $WORKSPACE_RESULT_FILE" + fi + done + done + fi -if [ -z "$RESULT_SUBDIRS" ]; then - echo "Warning: No result subdirectories found in $LOGS_DIR" + echo "All result files processed" else - # Process results from all configurations - for result_subdir in $RESULT_SUBDIRS; do - echo "Processing result subdirectory: $result_subdir" - - # Extract configuration info from directory name - CONFIG_NAME=$(basename "$result_subdir") - - # Find all result JSON files - RESULT_FILES=$(find "$result_subdir" -name "results_concurrency_*.json" 2>/dev/null) - - for result_file in $RESULT_FILES; do - if [ -f "$result_file" ]; then - # Extract metadata from filename - # Files are of the format "results_concurrency_gpus_{num gpus}_ctx_{num ctx}_gen_{num gen}.json" - filename=$(basename "$result_file") - concurrency=$(echo "$filename" | sed -n 's/results_concurrency_\([0-9]*\)_gpus_.*/\1/p') - gpus=$(echo "$filename" | sed -n 's/results_concurrency_[0-9]*_gpus_\([0-9]*\)_ctx_.*/\1/p') - ctx=$(echo "$filename" | sed -n 's/.*_ctx_\([0-9]*\)_gen_.*/\1/p') - gen=$(echo "$filename" | sed -n 's/.*_gen_\([0-9]*\)\.json/\1/p') - - echo "Processing concurrency $concurrency with $gpus GPUs (ctx: $ctx, gen: $gen): $result_file" - - WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${CONFIG_NAME}_conc${concurrency}_gpus_${gpus}_ctx_${ctx}_gen_${gen}.json" - cp "$result_file" "$WORKSPACE_RESULT_FILE" + echo "EVAL_ONLY=true: Skipping benchmark result collection" +fi - echo "Copied result file to: $WORKSPACE_RESULT_FILE" - fi +# Collect eval results if eval was requested +if [[ "${RUN_EVAL:-false}" == "true" || "${EVAL_ONLY:-false}" == "true" ]]; then + EVAL_DIR="$LOGS_DIR/eval_results" + if [ -d "$EVAL_DIR" ]; then + echo "Extracting eval results from $EVAL_DIR" + shopt -s nullglob + for eval_file in "$EVAL_DIR"/*; do + [ -f "$eval_file" ] || continue + cp "$eval_file" "$GITHUB_WORKSPACE/" + echo "Copied eval artifact: $(basename "$eval_file")" done - done + shopt -u nullglob + else + echo "WARNING: RUN_EVAL=true but no eval results found at $EVAL_DIR" + fi fi - -echo "All result files processed" diff --git a/runners/launch_gb300-nv.sh b/runners/launch_gb300-nv.sh index d71fd5af7..5f48ddcec 100644 --- a/runners/launch_gb300-nv.sh +++ b/runners/launch_gb300-nv.sh @@ -31,6 +31,8 @@ NGINX_SQUASH_FILE="/home/sa-shared/squash/$(echo "$NGINX_IMAGE" | sed 's/[\/:@#] srun --partition=$SLURM_PARTITION --exclusive --time=180 bash -c "enroot import -o $SQUASH_FILE docker://$IMAGE" srun --partition=$SLURM_PARTITION --exclusive --time=180 bash -c "enroot import -o $NGINX_SQUASH_FILE docker://$NGINX_IMAGE" +export EVAL_ONLY="${EVAL_ONLY:-false}" + export ISL="$ISL" export OSL="$OSL" @@ -41,9 +43,9 @@ if [ -d "$SRT_REPO_DIR" ]; then rm -rf "$SRT_REPO_DIR" fi -git clone https://github.com/ishandhanani/srt-slurm.git "$SRT_REPO_DIR" +git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR" cd "$SRT_REPO_DIR" -git checkout sa-submission-q1-2026 +git checkout sa-submission-q2-2026 echo "Installing srtctl..." export UV_INSTALL_DIR="$GITHUB_WORKSPACE/.local/bin" @@ -95,8 +97,17 @@ cat srtslurm.yaml echo "Running make setup..." make setup ARCH=aarch64 +# Export eval-related env vars for srt-slurm post-benchmark eval +export INFMAX_WORKSPACE="$GITHUB_WORKSPACE" + echo "Submitting job with srtctl..." +if [[ -z "$CONFIG_FILE" ]]; then + echo "Error: CONFIG_FILE is not set. The srt-slurm path requires a CONFIG_FILE in additional-settings." >&2 + echo "Config: MODEL_PREFIX=${MODEL_PREFIX} PRECISION=${PRECISION} FRAMEWORK=${FRAMEWORK}" >&2 + exit 1 +fi + # Override the job name in the config file with the runner name sed -i "s/^name:.*/name: \"${RUNNER_NAME}\"/" "$CONFIG_FILE" @@ -150,54 +161,77 @@ set -x echo "Job $JOB_ID completed!" echo "Collecting results..." -if [ ! -d "$LOGS_DIR" ]; then +if [ -d "$LOGS_DIR" ]; then + echo "Found logs directory: $LOGS_DIR" + cp -r "$LOGS_DIR" "$GITHUB_WORKSPACE/LOGS" + tar czf "$GITHUB_WORKSPACE/multinode_server_logs.tar.gz" -C "$LOGS_DIR" . +else echo "Warning: Logs directory not found at $LOGS_DIR" - exit 1 fi -echo "Found logs directory: $LOGS_DIR" - -cp -r "$LOGS_DIR" "$GITHUB_WORKSPACE/LOGS" -tar czf "$GITHUB_WORKSPACE/multinode_server_logs.tar.gz" -C "$LOGS_DIR" . +if [[ "${EVAL_ONLY:-false}" != "true" ]]; then + if [ ! -d "$LOGS_DIR" ]; then + exit 1 + fi -# Find all result subdirectories -RESULT_SUBDIRS=$(find "$LOGS_DIR" -maxdepth 1 -type d -name "*isl*osl*" 2>/dev/null) + # Find all result subdirectories + RESULT_SUBDIRS=$(find "$LOGS_DIR" -maxdepth 1 -type d -name "*isl*osl*" 2>/dev/null) -if [ -z "$RESULT_SUBDIRS" ]; then - echo "Warning: No result subdirectories found in $LOGS_DIR" -else - # Process results from all configurations - for result_subdir in $RESULT_SUBDIRS; do - echo "Processing result subdirectory: $result_subdir" + if [ -z "$RESULT_SUBDIRS" ]; then + echo "Warning: No result subdirectories found in $LOGS_DIR" + else + # Process results from all configurations + for result_subdir in $RESULT_SUBDIRS; do + echo "Processing result subdirectory: $result_subdir" - # Extract configuration info from directory name - CONFIG_NAME=$(basename "$result_subdir") + # Extract configuration info from directory name + CONFIG_NAME=$(basename "$result_subdir") - # Find all result JSON files - RESULT_FILES=$(find "$result_subdir" -name "results_concurrency_*.json" 2>/dev/null) + # Find all result JSON files + RESULT_FILES=$(find "$result_subdir" -name "results_concurrency_*.json" 2>/dev/null) - for result_file in $RESULT_FILES; do - if [ -f "$result_file" ]; then - # Extract metadata from filename - # Files are of the format "results_concurrency_gpus_{num gpus}_ctx_{num ctx}_gen_{num gen}.json" - filename=$(basename "$result_file") - concurrency=$(echo "$filename" | sed -n 's/results_concurrency_\([0-9]*\)_gpus_.*/\1/p') - gpus=$(echo "$filename" | sed -n 's/results_concurrency_[0-9]*_gpus_\([0-9]*\)_ctx_.*/\1/p') - ctx=$(echo "$filename" | sed -n 's/.*_ctx_\([0-9]*\)_gen_.*/\1/p') - gen=$(echo "$filename" | sed -n 's/.*_gen_\([0-9]*\)\.json/\1/p') + for result_file in $RESULT_FILES; do + if [ -f "$result_file" ]; then + # Extract metadata from filename + # Files are of the format "results_concurrency_gpus_{num gpus}_ctx_{num ctx}_gen_{num gen}.json" + filename=$(basename "$result_file") + concurrency=$(echo "$filename" | sed -n 's/results_concurrency_\([0-9]*\)_gpus_.*/\1/p') + gpus=$(echo "$filename" | sed -n 's/results_concurrency_[0-9]*_gpus_\([0-9]*\)_ctx_.*/\1/p') + ctx=$(echo "$filename" | sed -n 's/.*_ctx_\([0-9]*\)_gen_.*/\1/p') + gen=$(echo "$filename" | sed -n 's/.*_gen_\([0-9]*\)\.json/\1/p') - echo "Processing concurrency $concurrency with $gpus GPUs (ctx: $ctx, gen: $gen): $result_file" + echo "Processing concurrency $concurrency with $gpus GPUs (ctx: $ctx, gen: $gen): $result_file" - WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${CONFIG_NAME}_conc${concurrency}_gpus_${gpus}_ctx_${ctx}_gen_${gen}.json" - cp "$result_file" "$WORKSPACE_RESULT_FILE" + WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${CONFIG_NAME}_conc${concurrency}_gpus_${gpus}_ctx_${ctx}_gen_${gen}.json" + cp "$result_file" "$WORKSPACE_RESULT_FILE" - echo "Copied result file to: $WORKSPACE_RESULT_FILE" - fi + echo "Copied result file to: $WORKSPACE_RESULT_FILE" + fi + done done - done + fi + + echo "All result files processed" +else + echo "EVAL_ONLY=true: Skipping benchmark result collection" fi -echo "All result files processed" +# Collect eval results if eval was requested +if [[ "${RUN_EVAL:-false}" == "true" || "${EVAL_ONLY:-false}" == "true" ]]; then + EVAL_DIR="$LOGS_DIR/eval_results" + if [ -d "$EVAL_DIR" ]; then + echo "Extracting eval results from $EVAL_DIR" + shopt -s nullglob + for eval_file in "$EVAL_DIR"/*; do + [ -f "$eval_file" ] || continue + cp "$eval_file" "$GITHUB_WORKSPACE/" + echo "Copied eval artifact: $(basename "$eval_file")" + done + shopt -u nullglob + else + echo "WARNING: RUN_EVAL=true but no eval results found at $EVAL_DIR" + fi +fi # Clean up srt-slurm outputs to prevent NFS silly-rename lock files # from blocking the next job's checkout on this runner diff --git a/runners/launch_h100-dgxc-slurm.sh b/runners/launch_h100-dgxc-slurm.sh index b52fb0a57..1bdc0c7c1 100644 --- a/runners/launch_h100-dgxc-slurm.sh +++ b/runners/launch_h100-dgxc-slurm.sh @@ -41,13 +41,20 @@ if [[ "$IS_MULTINODE" == "true" ]]; then rm -rf "$SRT_REPO_DIR" fi - git clone https://github.com/ishandhanani/srt-slurm.git "$SRT_REPO_DIR" + git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR" cd "$SRT_REPO_DIR" - git checkout sa-submission-q1-2026 + git checkout sa-submission-q2-2026 echo "Installing srtctl..." - curl -LsSf https://astral.sh/uv/install.sh | sh - source $HOME/.local/bin/env + export UV_INSTALL_DIR="/mnt/nfs/sa-shared/.uv/bin" + export UV_CACHE_DIR="/mnt/nfs/sa-shared/.uv/cache" + export UV_PYTHON_INSTALL_DIR="/mnt/nfs/sa-shared/.uv/python" + mkdir -p "$UV_INSTALL_DIR" "$UV_CACHE_DIR" "$UV_PYTHON_INSTALL_DIR" + if ! [ -x "$UV_INSTALL_DIR/uv" ]; then + curl -LsSf https://astral.sh/uv/install.sh | sh + fi + export PATH="$UV_INSTALL_DIR:$PATH" + source $UV_INSTALL_DIR/env uv venv source .venv/bin/activate @@ -75,6 +82,7 @@ if [[ "$IS_MULTINODE" == "true" ]]; then export ISL="$ISL" export OSL="$OSL" + export EVAL_ONLY="${EVAL_ONLY:-false}" # Create srtslurm.yaml for srtctl (used by both frameworks) SRTCTL_ROOT="${GITHUB_WORKSPACE}/${SRT_REPO_DIR}" @@ -112,7 +120,17 @@ EOF echo "Running make setup..." make setup ARCH=x86_64 + # Export eval-related env vars for srt-slurm post-benchmark eval + export INFMAX_WORKSPACE="$GITHUB_WORKSPACE" + echo "Submitting job with srtctl..." + + if [[ -z "$CONFIG_FILE" ]]; then + echo "Error: CONFIG_FILE is not set. The srt-slurm path requires a CONFIG_FILE in additional-settings." >&2 + echo "Config: MODEL_PREFIX=${MODEL_PREFIX} PRECISION=${PRECISION} FRAMEWORK=${FRAMEWORK}" >&2 + exit 1 + fi + # Override the job name in the config file with the runner name sed -i "s/^name:.*/name: \"${RUNNER_NAME}\"/" "$CONFIG_FILE" sed -i "/^name:.*/a sbatch_directives:\n exclude: \"${SLURM_EXCLUDED_NODELIST}\"" "$CONFIG_FILE" @@ -177,45 +195,66 @@ EOF cp -r "$LOGS_DIR" "$GITHUB_WORKSPACE/LOGS" tar czf "$GITHUB_WORKSPACE/multinode_server_logs.tar.gz" -C "$LOGS_DIR" . - # Find all result subdirectories - RESULT_SUBDIRS=$(find "$LOGS_DIR" -maxdepth 1 -type d -name "*isl*osl*" 2>/dev/null) - - if [ -z "$RESULT_SUBDIRS" ]; then - echo "Warning: No result subdirectories found in $LOGS_DIR" - else - # Process results from all configurations - for result_subdir in $RESULT_SUBDIRS; do - echo "Processing result subdirectory: $result_subdir" - - # Extract configuration info from directory name - CONFIG_NAME=$(basename "$result_subdir") - - # Find all result JSON files - RESULT_FILES=$(find "$result_subdir" -name "results_concurrency_*.json" 2>/dev/null) + if [[ "${EVAL_ONLY:-false}" != "true" ]]; then + # Find all result subdirectories + RESULT_SUBDIRS=$(find "$LOGS_DIR" -maxdepth 1 -type d -name "*isl*osl*" 2>/dev/null) - for result_file in $RESULT_FILES; do - if [ -f "$result_file" ]; then - # Extract metadata from filename - # Files are of the format "results_concurrency_gpus_{num gpus}_ctx_{num ctx}_gen_{num gen}.json" - filename=$(basename "$result_file") - concurrency=$(echo "$filename" | sed -n 's/results_concurrency_\([0-9]*\)_gpus_.*/\1/p') - gpus=$(echo "$filename" | sed -n 's/results_concurrency_[0-9]*_gpus_\([0-9]*\)_ctx_.*/\1/p') - ctx=$(echo "$filename" | sed -n 's/.*_ctx_\([0-9]*\)_gen_.*/\1/p') - gen=$(echo "$filename" | sed -n 's/.*_gen_\([0-9]*\)\.json/\1/p') - - echo "Processing concurrency $concurrency with $gpus GPUs (ctx: $ctx, gen: $gen): $result_file" + if [ -z "$RESULT_SUBDIRS" ]; then + echo "Warning: No result subdirectories found in $LOGS_DIR" + else + # Process results from all configurations + for result_subdir in $RESULT_SUBDIRS; do + echo "Processing result subdirectory: $result_subdir" + + # Extract configuration info from directory name + CONFIG_NAME=$(basename "$result_subdir") + + # Find all result JSON files + RESULT_FILES=$(find "$result_subdir" -name "results_concurrency_*.json" 2>/dev/null) + + for result_file in $RESULT_FILES; do + if [ -f "$result_file" ]; then + # Extract metadata from filename + # Files are of the format "results_concurrency_gpus_{num gpus}_ctx_{num ctx}_gen_{num gen}.json" + filename=$(basename "$result_file") + concurrency=$(echo "$filename" | sed -n 's/results_concurrency_\([0-9]*\)_gpus_.*/\1/p') + gpus=$(echo "$filename" | sed -n 's/results_concurrency_[0-9]*_gpus_\([0-9]*\)_ctx_.*/\1/p') + ctx=$(echo "$filename" | sed -n 's/.*_ctx_\([0-9]*\)_gen_.*/\1/p') + gen=$(echo "$filename" | sed -n 's/.*_gen_\([0-9]*\)\.json/\1/p') + + echo "Processing concurrency $concurrency with $gpus GPUs (ctx: $ctx, gen: $gen): $result_file" + + WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${CONFIG_NAME}_conc${concurrency}_gpus_${gpus}_ctx_${ctx}_gen_${gen}.json" + cp "$result_file" "$WORKSPACE_RESULT_FILE" + + echo "Copied result file to: $WORKSPACE_RESULT_FILE" + fi + done + done + fi - WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${CONFIG_NAME}_conc${concurrency}_gpus_${gpus}_ctx_${ctx}_gen_${gen}.json" - cp "$result_file" "$WORKSPACE_RESULT_FILE" + echo "All result files processed" + else + echo "EVAL_ONLY=true: Skipping benchmark result collection" + fi - echo "Copied result file to: $WORKSPACE_RESULT_FILE" - fi + # Collect eval results if eval was requested + if [[ "${RUN_EVAL:-false}" == "true" || "${EVAL_ONLY:-false}" == "true" ]]; then + EVAL_DIR="$LOGS_DIR/eval_results" + if [ -d "$EVAL_DIR" ]; then + echo "Extracting eval results from $EVAL_DIR" + shopt -s nullglob + for eval_file in "$EVAL_DIR"/*; do + [ -f "$eval_file" ] || continue + cp "$eval_file" "$GITHUB_WORKSPACE/" + echo "Copied eval artifact: $(basename "$eval_file")" done - done + shopt -u nullglob + else + echo "WARNING: RUN_EVAL=true but no eval results found at $EVAL_DIR" + fi fi - echo "All result files processed" - # Clean up srt-slurm outputs to prevent NFS silly-rename lock files # from blocking the next job's checkout on this runner echo "Cleaning up srt-slurm outputs..." diff --git a/runners/launch_h200-dgxc-slurm.sh b/runners/launch_h200-dgxc-slurm.sh index 9b3b771a5..4dba44931 100755 --- a/runners/launch_h200-dgxc-slurm.sh +++ b/runners/launch_h200-dgxc-slurm.sh @@ -40,9 +40,9 @@ if [[ "$IS_MULTINODE" == "true" ]]; then rm -rf "$SRT_REPO_DIR" fi - git clone https://github.com/ishandhanani/srt-slurm.git "$SRT_REPO_DIR" + git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR" cd "$SRT_REPO_DIR" - git checkout sa-submission-q1-2026 + git checkout sa-submission-q2-2026 echo "Installing srtctl..." curl -LsSf https://astral.sh/uv/install.sh | sh @@ -74,6 +74,7 @@ if [[ "$IS_MULTINODE" == "true" ]]; then export ISL="$ISL" export OSL="$OSL" + export EVAL_ONLY="${EVAL_ONLY:-false}" # Create srtslurm.yaml for srtctl (used by both frameworks) SRTCTL_ROOT="${GITHUB_WORKSPACE}/${SRT_REPO_DIR}" @@ -112,7 +113,17 @@ EOF echo "Running make setup..." make setup ARCH=x86_64 + # Export eval-related env vars for srt-slurm post-benchmark eval + export INFMAX_WORKSPACE="$GITHUB_WORKSPACE" + echo "Submitting job with srtctl..." + + if [[ -z "$CONFIG_FILE" ]]; then + echo "Error: CONFIG_FILE is not set. The srt-slurm path requires a CONFIG_FILE in additional-settings." >&2 + echo "Config: MODEL_PREFIX=${MODEL_PREFIX} PRECISION=${PRECISION} FRAMEWORK=${FRAMEWORK}" >&2 + exit 1 + fi + # Override the job name in the config file with the runner name sed -i "s/^name:.*/name: \"${RUNNER_NAME}\"/" "$CONFIG_FILE" SRTCTL_OUTPUT=$(srtctl apply -f "$CONFIG_FILE" --tags "h200,${MODEL_PREFIX},${PRECISION},${ISL}x${OSL},infmax-$(date +%Y%m%d)" 2>&1) @@ -176,45 +187,66 @@ EOF cp -r "$LOGS_DIR" "$GITHUB_WORKSPACE/LOGS" tar czf "$GITHUB_WORKSPACE/multinode_server_logs.tar.gz" -C "$LOGS_DIR" . - # Find all result subdirectories - RESULT_SUBDIRS=$(find "$LOGS_DIR" -maxdepth 1 -type d -name "*isl*osl*" 2>/dev/null) - - if [ -z "$RESULT_SUBDIRS" ]; then - echo "Warning: No result subdirectories found in $LOGS_DIR" - else - # Process results from all configurations - for result_subdir in $RESULT_SUBDIRS; do - echo "Processing result subdirectory: $result_subdir" - - # Extract configuration info from directory name - CONFIG_NAME=$(basename "$result_subdir") - - # Find all result JSON files - RESULT_FILES=$(find "$result_subdir" -name "results_concurrency_*.json" 2>/dev/null) - - for result_file in $RESULT_FILES; do - if [ -f "$result_file" ]; then - # Extract metadata from filename - # Files are of the format "results_concurrency_gpus_{num gpus}_ctx_{num ctx}_gen_{num gen}.json" - filename=$(basename "$result_file") - concurrency=$(echo "$filename" | sed -n 's/results_concurrency_\([0-9]*\)_gpus_.*/\1/p') - gpus=$(echo "$filename" | sed -n 's/results_concurrency_[0-9]*_gpus_\([0-9]*\)_ctx_.*/\1/p') - ctx=$(echo "$filename" | sed -n 's/.*_ctx_\([0-9]*\)_gen_.*/\1/p') - gen=$(echo "$filename" | sed -n 's/.*_gen_\([0-9]*\)\.json/\1/p') + if [[ "${EVAL_ONLY:-false}" != "true" ]]; then + # Find all result subdirectories + RESULT_SUBDIRS=$(find "$LOGS_DIR" -maxdepth 1 -type d -name "*isl*osl*" 2>/dev/null) - echo "Processing concurrency $concurrency with $gpus GPUs (ctx: $ctx, gen: $gen): $result_file" + if [ -z "$RESULT_SUBDIRS" ]; then + echo "Warning: No result subdirectories found in $LOGS_DIR" + else + # Process results from all configurations + for result_subdir in $RESULT_SUBDIRS; do + echo "Processing result subdirectory: $result_subdir" + + # Extract configuration info from directory name + CONFIG_NAME=$(basename "$result_subdir") + + # Find all result JSON files + RESULT_FILES=$(find "$result_subdir" -name "results_concurrency_*.json" 2>/dev/null) + + for result_file in $RESULT_FILES; do + if [ -f "$result_file" ]; then + # Extract metadata from filename + # Files are of the format "results_concurrency_gpus_{num gpus}_ctx_{num ctx}_gen_{num gen}.json" + filename=$(basename "$result_file") + concurrency=$(echo "$filename" | sed -n 's/results_concurrency_\([0-9]*\)_gpus_.*/\1/p') + gpus=$(echo "$filename" | sed -n 's/results_concurrency_[0-9]*_gpus_\([0-9]*\)_ctx_.*/\1/p') + ctx=$(echo "$filename" | sed -n 's/.*_ctx_\([0-9]*\)_gen_.*/\1/p') + gen=$(echo "$filename" | sed -n 's/.*_gen_\([0-9]*\)\.json/\1/p') + + echo "Processing concurrency $concurrency with $gpus GPUs (ctx: $ctx, gen: $gen): $result_file" + + WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${CONFIG_NAME}_conc${concurrency}_gpus_${gpus}_ctx_${ctx}_gen_${gen}.json" + cp "$result_file" "$WORKSPACE_RESULT_FILE" + + echo "Copied result file to: $WORKSPACE_RESULT_FILE" + fi + done + done + fi - WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${CONFIG_NAME}_conc${concurrency}_gpus_${gpus}_ctx_${ctx}_gen_${gen}.json" - cp "$result_file" "$WORKSPACE_RESULT_FILE" + echo "All result files processed" + else + echo "EVAL_ONLY=true: Skipping benchmark result collection" + fi - echo "Copied result file to: $WORKSPACE_RESULT_FILE" - fi + # Collect eval results if eval was requested + if [[ "${RUN_EVAL:-false}" == "true" || "${EVAL_ONLY:-false}" == "true" ]]; then + EVAL_DIR="$LOGS_DIR/eval_results" + if [ -d "$EVAL_DIR" ]; then + echo "Extracting eval results from $EVAL_DIR" + shopt -s nullglob + for eval_file in "$EVAL_DIR"/*; do + [ -f "$eval_file" ] || continue + cp "$eval_file" "$GITHUB_WORKSPACE/" + echo "Copied eval artifact: $(basename "$eval_file")" done - done + shopt -u nullglob + else + echo "WARNING: RUN_EVAL=true but no eval results found at $EVAL_DIR" + fi fi - echo "All result files processed" - # Clean up srt-slurm outputs to prevent NFS silly-rename lock files # from blocking the next job's checkout on this runner echo "Cleaning up srt-slurm outputs..." diff --git a/runners/launch_mi355x-amds.sh b/runners/launch_mi355x-amds.sh index 908d0afd8..5e3225b81 100644 --- a/runners/launch_mi355x-amds.sh +++ b/runners/launch_mi355x-amds.sh @@ -51,6 +51,10 @@ if [[ "$IS_MULTINODE" == "true" ]]; then mkdir -p "$BENCHMARK_LOGS_DIR" sudo rm -rf "$BENCHMARK_LOGS_DIR/logs" 2>/dev/null || true + # Ensure root-owned files are cleaned up even on early exit to prevent + # EACCES errors when the next GH Actions job checks out on this runner + trap 'sudo rm -rf "$BENCHMARK_LOGS_DIR" 2>/dev/null || true' EXIT + SCRIPT_NAME="${EXP_NAME%%_*}_${PRECISION}_mi355x_${FRAMEWORK}.sh" if [[ "$FRAMEWORK" == "sglang-disagg" ]]; then BENCHMARK_SUBDIR="multi_node" @@ -101,33 +105,53 @@ if [[ "$IS_MULTINODE" == "true" ]]; then # search for "FRAMEWORK_DIFF_IF_STATEMENT #3" for this if-statement # Find the latest log directory that contains the data - cat > collect_latest_results.py <<'PY' + if [[ "${EVAL_ONLY:-false}" != "true" ]]; then + cat > collect_latest_results.py <<'PY' import os, sys sgl_job_dir, isl, osl, nexp = sys.argv[1], int(sys.argv[2]), int(sys.argv[3]), int(sys.argv[4]) for path in sorted([f"{sgl_job_dir}/logs/{name}/sglang_isl_{isl}_osl_{osl}" for name in os.listdir(f"{sgl_job_dir}/logs/") if os.path.isdir(f"{sgl_job_dir}/logs/{name}/sglang_isl_{isl}_osl_{osl}")], key=os.path.getmtime, reverse=True)[:nexp]: print(path) PY - LOGS_DIR=$(python3 collect_latest_results.py "$BENCHMARK_LOGS_DIR" "$ISL" "$OSL" 1) - if [ -z "$LOGS_DIR" ]; then - echo "No logs directory found for ISL=${ISL}, OSL=${OSL}" - exit 1 + LOGS_DIR=$(python3 collect_latest_results.py "$BENCHMARK_LOGS_DIR" "$ISL" "$OSL" 1) + if [ -z "$LOGS_DIR" ]; then + echo "No logs directory found for ISL=${ISL}, OSL=${OSL}" + exit 1 + fi + + echo "Found logs directory: $LOGS_DIR" + ls -la "$LOGS_DIR" + + # Result JSON are contained within the result directory + for result_file in $(find $LOGS_DIR -type f); do + # result_file should directly be isl_ISL_osl_OSL_concurrency_CONC_req_rate_R_gpus_N_ctx_M_gen_N.json + file_name=$(basename $result_file) + if [ -f $result_file ]; then + # Copy the result file to workspace with a unique name + WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${file_name}" + echo "Found result file ${result_file}. Copying it to ${WORKSPACE_RESULT_FILE}" + cp $result_file $WORKSPACE_RESULT_FILE + fi + done fi - echo "Found logs directory: $LOGS_DIR" - ls -la "$LOGS_DIR" - - # Result JSON are contained within the result directory - for result_file in $(find $LOGS_DIR -type f); do - # result_file should directly be isl_ISL_osl_OSL_concurrency_CONC_req_rate_R_gpus_N_ctx_M_gen_N.json - file_name=$(basename $result_file) - if [ -f $result_file ]; then - # Copy the result file to workspace with a unique name - WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${file_name}" - echo "Found result file ${result_file}. Copying it to ${WORKSPACE_RESULT_FILE}" - cp $result_file $WORKSPACE_RESULT_FILE + # Extract eval results if eval was requested + if [[ "${RUN_EVAL:-false}" == "true" ]]; then + # Find eval_results in the slurm job logs directory + EVAL_DIR=$(find "$BENCHMARK_LOGS_DIR/logs" -type d -name eval_results 2>/dev/null | head -1) + if [ -n "$EVAL_DIR" ] && [ -d "$EVAL_DIR" ]; then + echo "Extracting eval results from $EVAL_DIR" + shopt -s nullglob + for eval_file in "$EVAL_DIR"/*; do + [ -f "$eval_file" ] || continue + cp "$eval_file" "$GITHUB_WORKSPACE/" + echo "Copied eval artifact: $(basename "$eval_file")" + done + shopt -u nullglob + else + echo "WARNING: RUN_EVAL=true but no eval results found under $BENCHMARK_LOGS_DIR/logs" fi - done + fi echo "All result files processed" # Use sync scancel to ensure nfs file handle is released in time @@ -146,6 +170,9 @@ PY echo "Logs copied to $ARTIFACT_DIR for artifact upload" fi + # Clean up root-owned files to prevent EACCES on GH Actions checkout cleanup + sudo rm -rf "$BENCHMARK_LOGS_DIR" 2>/dev/null || true + else export HF_HUB_CACHE_MOUNT="/var/lib/hf-hub-cache/" diff --git a/utils/collect_eval_results.py b/utils/collect_eval_results.py index 1c2f6429b..18917447e 100644 --- a/utils/collect_eval_results.py +++ b/utils/collect_eval_results.py @@ -10,7 +10,8 @@ from summarize import ( load_json, MODEL, HARDWARE, FRAMEWORK, PRECISION, TP, EP, CONC, DP_ATTENTION, TASK, SCORE, EM_STRICT, EM_FLEXIBLE, N_EFF, - SPEC_DECODING + SPEC_DECODING, PREFILL_TP, PREFILL_EP, PREFILL_DP_ATTN, PREFILL_WORKERS, + DECODE_TP, DECODE_EP, DECODE_DP_ATTN, DECODE_WORKERS ) @@ -160,19 +161,67 @@ def se(x: Any) -> str: return '' +def as_int(x: Any, default: int = 0) -> int: + """Convert a metadata field to int with a fallback.""" + try: + return int(x) + except Exception: + return default + + +def as_bool(x: Any, default: bool = False) -> bool: + """Parse a metadata boolean stored as bool/string/int.""" + if isinstance(x, bool): + return x + if x is None: + return default + return str(x).lower() == 'true' + + def build_row(meta: Dict[str, Any], m: Dict[str, Any]) -> Dict[str, Any]: """Build a result row from metadata and extracted metrics.""" + is_multinode = as_bool(meta.get('is_multinode'), False) + prefill_tp = as_int(meta.get('prefill_tp', meta.get('tp', 1)), 1) + prefill_ep = as_int(meta.get('prefill_ep', meta.get('ep', 1)), 1) + prefill_num_workers = as_int(meta.get('prefill_num_workers', 1), 1) + decode_tp = as_int(meta.get('decode_tp', meta.get('tp', 1)), 1) + decode_ep = as_int(meta.get('decode_ep', meta.get('ep', 1)), 1) + decode_num_workers = as_int(meta.get('decode_num_workers', 1), 1) + prefill_dp_attention = meta.get('prefill_dp_attention') + decode_dp_attention = meta.get('decode_dp_attention') + dp_attention = meta.get('dp_attention', 'none') + + if prefill_dp_attention is None: + prefill_dp_attention = dp_attention + if decode_dp_attention is None: + decode_dp_attention = dp_attention + + if is_multinode: + if prefill_dp_attention == decode_dp_attention: + dp_attention = prefill_dp_attention + else: + dp_attention = f"prefill={str(prefill_dp_attention).lower()},decode={str(decode_dp_attention).lower()}" + row = { + 'is_multinode': is_multinode, 'model_prefix': meta.get('infmax_model_prefix', 'unknown'), 'model': m.get('model') or meta.get('model', 'unknown'), 'hw': meta.get('hw', 'unknown').upper(), 'framework': meta.get('framework', 'unknown').lower(), 'precision': meta.get('precision', 'unknown').lower(), 'spec_decoding': meta.get('spec_decoding', 'unknown'), - 'tp': int(meta.get('tp', 1)), - 'ep': int(meta.get('ep', 1)), - 'conc': int(meta.get('conc', 0)), - 'dp_attention': str(meta.get('dp_attention', "none")).lower(), + 'tp': as_int(meta.get('tp', prefill_tp), prefill_tp), + 'ep': as_int(meta.get('ep', prefill_ep), prefill_ep), + 'prefill_tp': prefill_tp, + 'prefill_ep': prefill_ep, + 'prefill_num_workers': prefill_num_workers, + 'decode_tp': decode_tp, + 'decode_ep': decode_ep, + 'decode_num_workers': decode_num_workers, + 'conc': as_int(meta.get('conc', 0), 0), + 'dp_attention': str(dp_attention).lower(), + 'prefill_dp_attention': str(prefill_dp_attention).lower(), + 'decode_dp_attention': str(decode_dp_attention).lower(), 'task': m.get('task', 'unknown'), 'em_strict': m.get('strict'), 'em_strict_se': m.get('strict_se'), @@ -226,49 +275,111 @@ def main(): row = build_row(meta, m) rows.append(row) + single_node_rows = [r for r in rows if not r['is_multinode']] + multinode_rows = [r for r in rows if r['is_multinode']] + # Sort for stable output (default: by model_prefix) sort_by = sys.argv[3] if len(sys.argv) > 3 else 'model_prefix' - if sort_by == 'hw': - rows.sort(key=lambda r: ( - r['hw'], r['framework'], r['precision'], r.get('spec_decoding', ''), r['tp'], r['ep'], r['conc'] + single_node_sort_key = ( + (lambda r: ( + r['hw'], r['framework'], r['precision'], r.get('spec_decoding', ''), + r['tp'], r['ep'], r['conc'], )) - else: - rows.sort(key=lambda r: ( - r['model_prefix'], r['hw'], r['framework'], r['precision'], r.get('spec_decoding', ''), r['tp'], r['ep'], r['conc'] + if sort_by == 'hw' + else (lambda r: ( + r['model_prefix'], r['hw'], r['framework'], r['precision'], + r.get('spec_decoding', ''), r['tp'], r['ep'], r['conc'], + )) + ) + multinode_sort_key = ( + (lambda r: ( + r['hw'], r['framework'], r['precision'], r.get('spec_decoding', ''), + r['prefill_tp'], r['prefill_ep'], r['prefill_num_workers'], + r['decode_tp'], r['decode_ep'], r['decode_num_workers'], r['conc'], + )) + if sort_by == 'hw' + else (lambda r: ( + r['model_prefix'], r['hw'], r['framework'], r['precision'], + r.get('spec_decoding', ''), + r['prefill_tp'], r['prefill_ep'], r['prefill_num_workers'], + r['decode_tp'], r['decode_ep'], r['decode_num_workers'], r['conc'], )) + ) + single_node_rows.sort(key=single_node_sort_key) + multinode_rows.sort(key=multinode_sort_key) if not rows: print('> No eval results found to summarize.') else: # Print table using tabulate MODEL_PREFIX = "Model Prefix" - headers = [ - MODEL_PREFIX, HARDWARE, FRAMEWORK, PRECISION, SPEC_DECODING, TP, EP, CONC, DP_ATTENTION, - TASK, SCORE, EM_STRICT, EM_FLEXIBLE, N_EFF, MODEL - ] - - table_rows = [ - [ - r['model_prefix'], - r['hw'], - r['framework'].upper(), - r['precision'].upper(), - r['spec_decoding'], - r['tp'], - r['ep'], - r['conc'], - r['dp_attention'], - r['task'], - f"{pct(r['score'])}{se(r['score_se'])}", - f"{pct(r['em_strict'])}{se(r['em_strict_se'])}", - f"{pct(r['em_flexible'])}{se(r['em_flexible_se'])}", - r['n_eff'] or '', - r['model'] + + if single_node_rows: + headers = [ + MODEL_PREFIX, HARDWARE, FRAMEWORK, PRECISION, SPEC_DECODING, + TP, EP, CONC, DP_ATTENTION, + TASK, SCORE, EM_STRICT, EM_FLEXIBLE, N_EFF, MODEL, + ] + table_rows = [ + [ + r['model_prefix'], + r['hw'], + r['framework'].upper(), + r['precision'].upper(), + r['spec_decoding'], + r['tp'], + r['ep'], + r['conc'], + r['dp_attention'], + r['task'], + f"{pct(r['score'])}{se(r['score_se'])}", + f"{pct(r['em_strict'])}{se(r['em_strict_se'])}", + f"{pct(r['em_flexible'])}{se(r['em_flexible_se'])}", + r['n_eff'] or '', + r['model'], + ] + for r in single_node_rows + ] + print("### Single-Node Eval Results\n") + print(tabulate(table_rows, headers=headers, tablefmt="github")) + + if multinode_rows: + headers = [ + MODEL_PREFIX, HARDWARE, FRAMEWORK, PRECISION, SPEC_DECODING, + PREFILL_TP, PREFILL_EP, PREFILL_DP_ATTN, PREFILL_WORKERS, + DECODE_TP, DECODE_EP, DECODE_DP_ATTN, DECODE_WORKERS, + CONC, TASK, SCORE, EM_STRICT, EM_FLEXIBLE, N_EFF, MODEL, + ] + table_rows = [ + [ + r['model_prefix'], + r['hw'], + r['framework'].upper(), + r['precision'].upper(), + r['spec_decoding'], + r['prefill_tp'], + r['prefill_ep'], + r['prefill_dp_attention'], + r['prefill_num_workers'], + r['decode_tp'], + r['decode_ep'], + r['decode_dp_attention'], + r['decode_num_workers'], + r['conc'], + r['task'], + f"{pct(r['score'])}{se(r['score_se'])}", + f"{pct(r['em_strict'])}{se(r['em_strict_se'])}", + f"{pct(r['em_flexible'])}{se(r['em_flexible_se'])}", + r['n_eff'] or '', + r['model'], + ] + for r in multinode_rows ] - for r in rows - ] + if single_node_rows: + print("\n") + print("### Multi-Node Eval Results\n") + print(tabulate(table_rows, headers=headers, tablefmt="github")) - print(tabulate(table_rows, headers=headers, tablefmt="github")) # Write JSON aggregate out_path = Path(f'agg_eval_{exp_name}.json') diff --git a/utils/evals/EVALS.md b/utils/evals/EVALS.md index e32d6d988..96301d9cd 100644 --- a/utils/evals/EVALS.md +++ b/utils/evals/EVALS.md @@ -6,21 +6,53 @@ Quick graded QnA which measures model performance. Examples of test suites: - **gpqa**: Graduate level, Google-Proof multiple choice questions ## When? -At the highest and median concurrency levels (all TPs), per (model, runner, framework, precision, ISL, OSL, spec-decoding, dp-attn), only for 8k1k. In eval-only mode, the server starts with expanded context length. In combined mode (RUN_EVAL=true), evals run against the same server used for throughput benchmarks. Logic is defined in `mark_eval_entries` of `utils/matrix_logic/generate_sweep_configs.py` +Evals run as **separate workflow jobs** from throughput benchmarks. The selection logic is in `mark_eval_entries()` of `utils/matrix_logic/generate_sweep_configs.py`. + +**Single-node**: At the highest and median concurrency levels (all TPs), per (model, runner, framework, precision, ISL, OSL, spec-decoding, dp-attn), only for 8k1k. + +**Multi-node**: One entry per (model, runner, framework, precision, spec-decoding, prefill-dp-attn, decode-dp-attn) with the highest max eligible concurrency, only for 8k1k. The eval job runs at `eval-conc`, the upper median of that config's eligible concurrency list. ## Why? -To verify how model outputs are affected by throughput optimizations. +To verify how model outputs are affected by throughput optimizations. - TP/Conc might affect model outputs - Check kernel implementations for correctness - If there was a tradeoff in accuracy for performance ## How? -- `run_eval`, defined in `benchmarks/benchmark_lib.sh`, is called in `benchmarks/*`. It runs EleutherAI/lm-evaluation-harness (lmeval) against the running server's OpenAI-compatible endpoint. In eval-only mode (`EVAL_ONLY=true`), the server is started once with expanded context length (up to 5x benchmark context, capped at model native max). JSON results are processed and converted to a table with `utils/collect_eval_results.py`. +`run_eval` in `benchmarks/benchmark_lib.sh` runs EleutherAI/lm-evaluation-harness against the server's OpenAI-compatible endpoint. Concurrency is set via `EVAL_CONCURRENT_REQUESTS` env var (not a CLI flag). Results are collected by `utils/collect_eval_results.py` and published as a summary table. + +### Single-node +In eval-only mode (`EVAL_ONLY=true`), the benchmark script computes `EVAL_MAX_MODEL_LEN` via `compute_eval_context_length`, starts the server with that context length, skips throughput, and runs lm-eval directly. Each framework wires that context differently (`--context-length` for SGLang, `--max_seq_len` for TRT-LLM). + +### Multi-node +Multi-node evals support two hardware paths: + +**MI355X (AMD)** — `benchmarks/multi_node/amd_utils/server.sh` +- Skips `bench.sh` when `EVAL_ONLY=true` +- Runs lm-eval via `run_eval` against the router on port 30000 +- Concurrency uses workflow-provided `EVAL_CONC` when set, otherwise falls back to max of `BENCH_MAX_CONCURRENCY` (x-separated values) +- Eval artifacts copied to `/run_logs/slurm_job-*/eval_results/` +- `runners/launch_mi355x-amds.sh` skips benchmark result collection when `EVAL_ONLY=true` and uses `find` to locate eval results + +**NVIDIA Slurm multi-node (GB200, GB300, B200, B300, H100, H200)** — via [srt-slurm](https://github.com/NVIDIA/srt-slurm) (`sa-submission-q2-2026` branch) +- `do_sweep.py` skips the benchmark stage when `EVAL_ONLY=true`, runs `_run_post_eval()` directly +- In eval-only mode, uses the full `wait_for_model()` health check (same as benchmark stage) since the benchmark health check was skipped +- `lm-eval` runner (`benchmarks/lm_eval.py`) is invoked by `do_sweep.py` as a post/eval-only step and sources InferenceX's `benchmark_lib.sh` from the mounted workspace (`/infmax-workspace`) +- Eval artifacts written to `/logs/eval_results/` inside the container, collected by launch scripts +- NVIDIA Slurm launch scripts always collect server logs for debugging but skip benchmark result collection when `EVAL_ONLY=true` +- Env vars threaded: `RUN_EVAL`, `EVAL_ONLY`, `IS_MULTINODE`, `FRAMEWORK`, `PRECISION`, `MODEL_PREFIX`, `RUNNER_TYPE`, `RESULT_FILENAME`, `SPEC_DECODING`, `ISL`, `OSL`, `PREFILL_TP/EP/NUM_WORKERS/DP_ATTN`, `DECODE_TP/EP/NUM_WORKERS/DP_ATTN`, `MODEL_NAME`, `EVAL_CONC` + +### Workflow structure +- `e2e-tests.yml`: `test-sweep-evals` (single-node) and `test-sweep-multi-node-evals` (multi-node) +- `run-sweep.yml`: `sweep-evals` (single-node) and `sweep-multi-node-evals` (multi-node) +- Both use their respective benchmark templates with `eval-only: true`, `run-eval: true` +- `collect-evals` depends on both eval jobs; `collect-results` only runs when benchmark jobs ran +- `process_changelog.py` splits eval results into `evals` (single-node) and `multinode_evals` + +### Score validation +`utils/evals/validate_scores.py` checks eval results against thresholds in `utils/evals/thresholds.json`. Runs as a separate workflow step after artifact upload so results are preserved even if validation fails. ## Misc Following files are task definitions from lmeval, more info on changes within the files - `utils/evals/gsm8k.yaml` - `utils/evals/gpqa_diamond.yaml` - - - diff --git a/utils/matrix_logic/generate_sweep_configs.py b/utils/matrix_logic/generate_sweep_configs.py index bc4562415..a95de595b 100644 --- a/utils/matrix_logic/generate_sweep_configs.py +++ b/utils/matrix_logic/generate_sweep_configs.py @@ -20,6 +20,8 @@ "8k1k": (8192, 1024) } +MIN_EVAL_CONC = 16 + # Reverse mapping for exp-name generation seq_len_itos = {v: k for k, v in seq_len_stoi.items()} @@ -33,26 +35,42 @@ def seq_len_to_str(isl: int, osl: int) -> str: return seq_len_itos.get((isl, osl), f"{isl}_{osl}") def mark_eval_entries(matrix_values: list[dict]) -> list[dict]: - """Eval selection policy (single-node only): - - Only consider 8k1k (isl=8192, osl=1024). - - For each unique (model, runner, framework, precision, isl, osl, spec-decoding, dp-attn): + """Eval selection policy: + - Single-node: only consider 8k1k (isl=8192, osl=1024). + For each unique (model, runner, framework, precision, isl, osl, spec-decoding, dp-attn): + - Ignore entries with conc < MIN_EVAL_CONC - Mark all entries at the highest CONC (all TPs) - Mark all entries at the median CONC (all TPs) + - Multi-node: for each unique (model, runner, framework, precision, + spec-decoding, prefill-dp-attn, decode-dp-attn), only 8k1k entries. + Ignore entries with all conc values < MIN_EVAL_CONC. Mark the entry with + the highest max concurrency among the remaining entries. Sets eval-conc to + the median of the eligible conc list to avoid OOM during eval. """ from collections import defaultdict - # Only run evals on 8k1k target_isl, target_osl = seq_len_stoi["8k1k"] - # Group entries by (model, runner, framework, precision, isl, osl, spec-decoding, dp-attn). - # Only include entries that have a top-level TP (i.e., single-node schema). - groups = defaultdict(list) + eval_indices = set() + mn_eval_conc = {} # index -> chosen eval concurrency for multinode entries + + def _eligible_eval_concs(entry): + conc = entry[Fields.CONC.value] + conc_values = conc if isinstance(conc, list) else [conc] + return sorted(c for c in conc_values if c >= MIN_EVAL_CONC) + + def _max_eval_conc(ie): + return max(_eligible_eval_concs(ie[1])) + + # Single-node: group by (model, runner, framework, precision, isl, osl, spec-decoding, dp-attn). + # Only 8k1k entries with a top-level TP (single-node schema). + sn_groups = defaultdict(list) for i, entry in enumerate(matrix_values): if Fields.TP.value not in entry: continue - if entry.get(Fields.ISL.value) != target_isl or entry.get(Fields.OSL.value) != target_osl: continue - + if not _eligible_eval_concs(entry): + continue key = ( entry[Fields.MODEL.value], entry[Fields.RUNNER.value], @@ -61,27 +79,54 @@ def mark_eval_entries(matrix_values: list[dict]) -> list[dict]: entry[Fields.ISL.value], entry[Fields.OSL.value], entry[Fields.SPEC_DECODING.value], - entry[Fields.DP_ATTN.value] + entry[Fields.DP_ATTN.value], ) - groups[key].append((i, entry)) - - # For each group, select entries at highest CONC and median CONC (all TPs) - eval_indices = set() - for key, entries in groups.items(): - if not entries: - continue + sn_groups[key].append((i, entry)) + for entries in sn_groups.values(): conc_values = sorted(set(e[Fields.CONC.value] for _, e in entries)) median_conc = conc_values[len(conc_values) // 2] target_concs = {conc_values[-1], median_conc} - for i, e in entries: if e[Fields.CONC.value] in target_concs: eval_indices.add(i) + # Multi-node: group by (model, runner, framework, precision, spec-decoding, prefill-dp, decode-dp). + # Only 8k1k entries with a prefill key (multi-node schema). + # Pick the entry with the highest max concurrency per group. + mn_groups = defaultdict(list) + for i, entry in enumerate(matrix_values): + if Fields.TP.value in entry: + continue + if Fields.PREFILL.value not in entry: + continue + if entry.get(Fields.ISL.value) != target_isl or entry.get(Fields.OSL.value) != target_osl: + continue + if not _eligible_eval_concs(entry): + continue + key = ( + entry[Fields.MODEL.value], + entry[Fields.RUNNER.value], + entry[Fields.FRAMEWORK.value], + entry[Fields.PRECISION.value], + entry[Fields.SPEC_DECODING.value], + entry.get(Fields.PREFILL.value, {}).get(Fields.DP_ATTN.value), + entry.get(Fields.DECODE.value, {}).get(Fields.DP_ATTN.value), + ) + mn_groups[key].append((i, entry)) + + for entries in mn_groups.values(): + best_idx, best_entry = max(entries, key=_max_eval_conc) + eval_indices.add(best_idx) + # Set eval-conc to median of eligible conc values to avoid OOM during eval + eval_concs = _eligible_eval_concs(best_entry) + mn_eval_conc[best_idx] = eval_concs[len(eval_concs) // 2] + # Mark the selected entries for i, entry in enumerate(matrix_values): entry[Fields.RUN_EVAL.value] = i in eval_indices + if i in mn_eval_conc: + entry[Fields.EVAL_CONC.value] = mn_eval_conc[i] return matrix_values @@ -557,9 +602,18 @@ def generate_test_config_sweep(args, all_config_data): runner = val[Fields.RUNNER.value] disagg = val.get(Fields.DISAGG.value, False) + # Build seq-len filter if --seq-lens was provided + seq_lens_filter = None + if getattr(args, 'seq_lens', None): + seq_lens_filter = {seq_len_stoi[s] for s in args.seq_lens} + for seq_len_config in val[Fields.SEQ_LEN_CONFIGS.value]: isl = seq_len_config[Fields.ISL.value] osl = seq_len_config[Fields.OSL.value] + + if seq_lens_filter and (isl, osl) not in seq_lens_filter: + continue + seq_len_str = seq_len_to_str(isl, osl) for bmk in seq_len_config[Fields.SEARCH_SPACE.value]: @@ -905,6 +959,13 @@ def main(): required=False, help='Only include these concurrency values. Values must exist in the config conc-range/list.' ) + test_config_keys_parser.add_argument( + '--seq-lens', + nargs='+', + choices=list(seq_len_stoi.keys()), + required=False, + help='Only include these sequence length configurations (e.g., 1k1k 8k1k)' + ) test_config_keys_parser.add_argument( '-h', '--help', action='help', diff --git a/utils/matrix_logic/test_generate_sweep_configs.py b/utils/matrix_logic/test_generate_sweep_configs.py index d05299472..e19d32522 100644 --- a/utils/matrix_logic/test_generate_sweep_configs.py +++ b/utils/matrix_logic/test_generate_sweep_configs.py @@ -2,14 +2,15 @@ import pytest import argparse from generate_sweep_configs import ( + MIN_EVAL_CONC, seq_len_stoi, seq_len_itos, seq_len_to_str, generate_full_sweep, generate_runner_model_sweep_config, + mark_eval_entries, apply_node_type_defaults, expand_config_keys, - mark_eval_entries, ) @@ -181,6 +182,209 @@ def test_unknown_sequence_lengths(self): assert seq_len_to_str(4096, 1024) == "4096_1024" +# ============================================================================= +# Test mark_eval_entries +# ============================================================================= + +class TestMarkEvalEntries: + """Tests for eval matrix selection policy.""" + + def test_single_node_skips_eval_entries_below_min_conc(self): + """Single-node eval selection should ignore conc values below MIN_EVAL_CONC.""" + matrix_values = [ + { + "model": "deepseek-ai/DeepSeek-R1-0528", + "runner": "b200", + "framework": "sglang", + "precision": "fp8", + "isl": 8192, + "osl": 1024, + "spec-decoding": "none", + "dp-attn": False, + "tp": 8, + "conc": 8, + }, + { + "model": "deepseek-ai/DeepSeek-R1-0528", + "runner": "b200", + "framework": "sglang", + "precision": "fp8", + "isl": 8192, + "osl": 1024, + "spec-decoding": "none", + "dp-attn": False, + "tp": 8, + "conc": MIN_EVAL_CONC, + }, + { + "model": "deepseek-ai/DeepSeek-R1-0528", + "runner": "b200", + "framework": "sglang", + "precision": "fp8", + "isl": 8192, + "osl": 1024, + "spec-decoding": "none", + "dp-attn": False, + "tp": 8, + "conc": 32, + }, + { + "model": "deepseek-ai/DeepSeek-R1-0528", + "runner": "b200", + "framework": "sglang", + "precision": "fp8", + "isl": 8192, + "osl": 1024, + "spec-decoding": "none", + "dp-attn": False, + "tp": 8, + "conc": 64, + }, + ] + + result = mark_eval_entries(matrix_values) + + assert result[0]["run-eval"] is False + assert result[1]["run-eval"] is False + assert result[2]["run-eval"] is True + assert result[3]["run-eval"] is True + + def test_multi_node_skips_groups_with_only_conc_below_min_conc(self): + """Multinode eval selection should skip groups whose conc lists are all below MIN_EVAL_CONC.""" + matrix_values = [ + { + "model": "deepseek-ai/DeepSeek-R1-0528", + "runner": "b200-multinode", + "framework": "dynamo-trt", + "precision": "fp8", + "isl": 8192, + "osl": 1024, + "spec-decoding": "none", + "prefill": { + "num-worker": 1, + "tp": 8, + "ep": 1, + "dp-attn": False, + }, + "decode": { + "num-worker": 1, + "tp": 8, + "ep": 1, + "dp-attn": False, + }, + "conc": [1], + } + ] + + result = mark_eval_entries(matrix_values) + + assert result[0]["run-eval"] is False + assert "eval-conc" not in result[0] + + def test_multi_node_eval_conc_uses_only_conc_values_at_or_above_min_conc(self): + """Multinode eval-conc should be chosen from conc values >= MIN_EVAL_CONC.""" + matrix_values = [ + { + "model": "deepseek-ai/DeepSeek-R1-0528", + "runner": "b200-multinode", + "framework": "dynamo-trt", + "precision": "fp8", + "isl": 8192, + "osl": 1024, + "spec-decoding": "none", + "prefill": { + "num-worker": 1, + "tp": 8, + "ep": 1, + "dp-attn": True, + }, + "decode": { + "num-worker": 4, + "tp": 8, + "ep": 1, + "dp-attn": False, + }, + "conc": [8, 16, 32], + }, + { + "model": "deepseek-ai/DeepSeek-R1-0528", + "runner": "b200-multinode", + "framework": "dynamo-trt", + "precision": "fp8", + "isl": 8192, + "osl": 1024, + "spec-decoding": "none", + "prefill": { + "num-worker": 1, + "tp": 8, + "ep": 1, + "dp-attn": True, + }, + "decode": { + "num-worker": 4, + "tp": 8, + "ep": 1, + "dp-attn": False, + }, + "conc": [8], + }, + ] + + result = mark_eval_entries(matrix_values) + + assert result[0]["run-eval"] is True + assert result[0]["eval-conc"] == 32 + assert result[1]["run-eval"] is False + + def test_marks_highest_and_median_conc(self): + """Should mark highest and median concurrency for 8k1k entries.""" + entries = [ + {'model': 'm', 'runner': 'r', 'framework': 'f', 'precision': 'fp8', + 'isl': 8192, 'osl': 1024, 'tp': 2, 'conc': 32, + 'spec-decoding': False, 'dp-attn': False, 'run-eval': False}, + {'model': 'm', 'runner': 'r', 'framework': 'f', 'precision': 'fp8', + 'isl': 8192, 'osl': 1024, 'tp': 2, 'conc': 128, + 'spec-decoding': False, 'dp-attn': False, 'run-eval': False}, + {'model': 'm', 'runner': 'r', 'framework': 'f', 'precision': 'fp8', + 'isl': 8192, 'osl': 1024, 'tp': 2, 'conc': 512, + 'spec-decoding': False, 'dp-attn': False, 'run-eval': False}, + ] + result = mark_eval_entries(entries) + # conc values: [32, 128, 512]. median=128 (index 1), highest=512 + assert result[0]['run-eval'] is False # conc=32 + assert result[1]['run-eval'] is True # conc=128 (median) + assert result[2]['run-eval'] is True # conc=512 (highest) + + def test_non_8k1k_never_marked(self): + """Entries with non-8k1k seq lengths should never be eval-marked.""" + entries = [ + {'model': 'm', 'runner': 'r', 'framework': 'f', 'precision': 'fp8', + 'isl': 1024, 'osl': 1024, 'tp': 2, 'conc': 512, + 'spec-decoding': False, 'dp-attn': False, 'run-eval': False}, + ] + result = mark_eval_entries(entries) + assert result[0]['run-eval'] is False + + def test_never_marks_all_entries(self): + """mark_eval_entries should never mark every single-node entry, + ensuring the e2e splitting logic can distinguish default from evals-only.""" + entries = [ + {'model': 'm', 'runner': 'r', 'framework': 'f', 'precision': 'fp8', + 'isl': 8192, 'osl': 1024, 'tp': 2, 'conc': c, + 'spec-decoding': False, 'dp-attn': False, 'run-eval': False} + for c in [32, 64, 128, 256, 512] + ] + [ + # Non-8k1k entry that should never be marked + {'model': 'm', 'runner': 'r', 'framework': 'f', 'precision': 'fp8', + 'isl': 1024, 'osl': 1024, 'tp': 2, 'conc': 64, + 'spec-decoding': False, 'dp-attn': False, 'run-eval': False}, + ] + result = mark_eval_entries(entries) + non_prefill = [x for x in result if 'prefill' not in x] + assert not all(x['run-eval'] for x in non_prefill), \ + "mark_eval_entries must not mark all entries — would break e2e splitting" + + # ============================================================================= # Test generate_full_sweep for single-node # ============================================================================= @@ -1595,7 +1799,7 @@ def _split_e2e_configs(data): Returns (SINGLE, MULTI, EVALS) lists matching the workflow filters. """ single = [x for x in data if 'prefill' not in x and not x.get('eval-only', False)] - multi = [x for x in data if 'prefill' in x] + multi = [x for x in data if 'prefill' in x and not x.get('eval-only', False)] evals = [x for x in data if 'prefill' not in x and x.get('run-eval', False)] return single, multi, evals @@ -1682,64 +1886,3 @@ def test_prefill_entries_never_in_single_or_evals(self, mixed_entries): assert all('prefill' not in x for x in evals) -class TestMarkEvalEntries: - """Verify mark_eval_entries only marks highest/median concurrency at 8k1k.""" - - def test_marks_highest_and_median_conc(self): - """Should mark highest and median concurrency for 8k1k entries.""" - entries = [ - {'model': 'm', 'runner': 'r', 'framework': 'f', 'precision': 'fp8', - 'isl': 8192, 'osl': 1024, 'tp': 2, 'conc': 32, - 'spec-decoding': False, 'dp-attn': False, 'run-eval': False}, - {'model': 'm', 'runner': 'r', 'framework': 'f', 'precision': 'fp8', - 'isl': 8192, 'osl': 1024, 'tp': 2, 'conc': 128, - 'spec-decoding': False, 'dp-attn': False, 'run-eval': False}, - {'model': 'm', 'runner': 'r', 'framework': 'f', 'precision': 'fp8', - 'isl': 8192, 'osl': 1024, 'tp': 2, 'conc': 512, - 'spec-decoding': False, 'dp-attn': False, 'run-eval': False}, - ] - result = mark_eval_entries(entries) - # conc values: [32, 128, 512]. median=128 (index 1), highest=512 - assert result[0]['run-eval'] is False # conc=32 - assert result[1]['run-eval'] is True # conc=128 (median) - assert result[2]['run-eval'] is True # conc=512 (highest) - - def test_non_8k1k_never_marked(self): - """Entries with non-8k1k seq lengths should never be eval-marked.""" - entries = [ - {'model': 'm', 'runner': 'r', 'framework': 'f', 'precision': 'fp8', - 'isl': 1024, 'osl': 1024, 'tp': 2, 'conc': 512, - 'spec-decoding': False, 'dp-attn': False, 'run-eval': False}, - ] - result = mark_eval_entries(entries) - assert result[0]['run-eval'] is False - - def test_multinode_entries_never_marked(self): - """Entries without top-level tp (multi-node) should never be eval-marked.""" - entries = [ - {'model': 'm', 'runner': 'r', 'framework': 'f', 'precision': 'fp8', - 'isl': 8192, 'osl': 1024, 'conc': 512, - 'spec-decoding': False, 'dp-attn': False, 'run-eval': False, - 'prefill': {'tp': 2, 'num-worker': 1}}, - ] - result = mark_eval_entries(entries) - assert result[0]['run-eval'] is False - - def test_never_marks_all_entries(self): - """mark_eval_entries should never mark every single-node entry, - ensuring the e2e splitting logic can distinguish default from evals-only.""" - entries = [ - {'model': 'm', 'runner': 'r', 'framework': 'f', 'precision': 'fp8', - 'isl': 8192, 'osl': 1024, 'tp': 2, 'conc': c, - 'spec-decoding': False, 'dp-attn': False, 'run-eval': False} - for c in [32, 64, 128, 256, 512] - ] + [ - # Non-8k1k entry that should never be marked - {'model': 'm', 'runner': 'r', 'framework': 'f', 'precision': 'fp8', - 'isl': 1024, 'osl': 1024, 'tp': 2, 'conc': 64, - 'spec-decoding': False, 'dp-attn': False, 'run-eval': False}, - ] - result = mark_eval_entries(entries) - non_prefill = [x for x in result if 'prefill' not in x] - assert not all(x['run-eval'] for x in non_prefill), \ - "mark_eval_entries must not mark all entries — would break e2e splitting" diff --git a/utils/matrix_logic/validation.py b/utils/matrix_logic/validation.py index 312952b96..ce10840b5 100644 --- a/utils/matrix_logic/validation.py +++ b/utils/matrix_logic/validation.py @@ -54,6 +54,7 @@ class Fields(Enum): # Eval RUN_EVAL = 'run-eval' EVAL_ONLY = 'eval-only' + EVAL_CONC = 'eval-conc' """ @@ -128,6 +129,8 @@ class MultiNodeMatrixEntry(BaseModel): exp_name: str = Field(alias=Fields.EXP_NAME.value) disagg: bool run_eval: bool = Field(alias=Fields.RUN_EVAL.value) + eval_only: bool = Field(alias=Fields.EVAL_ONLY.value, default=False) + eval_conc: Optional[int] = Field(default=None, alias=Fields.EVAL_CONC.value) def validate_matrix_entry(entry: dict, is_multinode: bool) -> dict: @@ -363,6 +366,7 @@ class ChangelogMatrixEntry(BaseModel): multi_node: dict[str, list[MultiNodeMatrixEntry] ] = Field(default_factory=dict) evals: list[SingleNodeMatrixEntry] = Field(default_factory=list) + multinode_evals: list[MultiNodeMatrixEntry] = Field(default_factory=list) changelog_metadata: ChangelogMetadata diff --git a/utils/process_changelog.py b/utils/process_changelog.py index 7da19d030..8df60434b 100644 --- a/utils/process_changelog.py +++ b/utils/process_changelog.py @@ -82,6 +82,7 @@ def main(): "single_node": defaultdict(list), "multi_node": defaultdict(list), "evals": [], + "multinode_evals": [], "changelog_metadata": { "base_ref": args.base_ref, "head_ref": args.head_ref, @@ -163,7 +164,8 @@ def main(): else: final_results["single_node"][seq_len_str].append(result) - final_results["evals"] = all_eval_results + final_results["evals"] = [e for e in all_eval_results if e.get("prefill") is None] + final_results["multinode_evals"] = [e for e in all_eval_results if e.get("prefill") is not None] # Validate final results structure validated = ChangelogMatrixEntry.model_validate(final_results)