diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index bf8f20be7..10a8230bd 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -953,7 +953,7 @@ dsr1-fp8-mi355x-sglang-disagg-mtp: dsr1-fp4-mi355x-sglang-disagg: - image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0313-2 + image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-3 model: amd/DeepSeek-R1-0528-MXFP4 model-prefix: dsr1 runner: mi355x-disagg @@ -1161,7 +1161,7 @@ dsr1-fp4-mi355x-sglang-disagg: - "DECODE_MTP_SIZE=0" dsr1-fp4-mi355x-sglang-disagg-mtp: - image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0313-2 + image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-3 model: amd/DeepSeek-R1-0528-MXFP4 model-prefix: dsr1 runner: mi355x-disagg diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 1514eb83d..300e2c11d 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1653,3 +1653,11 @@ description: - "Add kv-cache-dtype fp8, max-cudagraph-capture-size 2048, max-num-batched-tokens, and stream-interval 20 to server launch args" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1047 + +- config-keys: + - dsr1-fp8-h200-dynamo-trt + - dsr1-fp8-h200-dynamo-sglang + description: + - "Add H200 multinode evals-only runs" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1094 + evals-only: true diff --git a/runners/launch_h200-dgxc-slurm.sh b/runners/launch_h200-dgxc-slurm.sh index 4dba44931..e11ca7b20 100755 --- a/runners/launch_h200-dgxc-slurm.sh +++ b/runners/launch_h200-dgxc-slurm.sh @@ -126,6 +126,8 @@ EOF # Override the job name in the config file with the runner name sed -i "s/^name:.*/name: \"${RUNNER_NAME}\"/" "$CONFIG_FILE" + sed -i '/^health_check:/,/^[^ ]/{ /^health_check:/d; /^ /d; }' "${CONFIG_FILE%%:*}" + printf '\nhealth_check:\n max_attempts: 720\n interval_seconds: 10\n' >> "${CONFIG_FILE%%:*}" SRTCTL_OUTPUT=$(srtctl apply -f "$CONFIG_FILE" --tags "h200,${MODEL_PREFIX},${PRECISION},${ISL}x${OSL},infmax-$(date +%Y%m%d)" 2>&1) echo "$SRTCTL_OUTPUT"