Skip to content
Merged
4 changes: 2 additions & 2 deletions .github/configs/amd-master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -953,7 +953,7 @@ dsr1-fp8-mi355x-sglang-disagg-mtp:


dsr1-fp4-mi355x-sglang-disagg:
image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0313-2
image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-3
model: amd/DeepSeek-R1-0528-MXFP4
model-prefix: dsr1
runner: mi355x-disagg
Expand Down Expand Up @@ -1161,7 +1161,7 @@ dsr1-fp4-mi355x-sglang-disagg:
- "DECODE_MTP_SIZE=0"

dsr1-fp4-mi355x-sglang-disagg-mtp:
image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0313-2
image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-3
model: amd/DeepSeek-R1-0528-MXFP4
model-prefix: dsr1
runner: mi355x-disagg
Expand Down
8 changes: 8 additions & 0 deletions perf-changelog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1653,3 +1653,11 @@
description:
- "Add kv-cache-dtype fp8, max-cudagraph-capture-size 2048, max-num-batched-tokens, and stream-interval 20 to server launch args"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1047

- config-keys:
- dsr1-fp8-h200-dynamo-trt
- dsr1-fp8-h200-dynamo-sglang
description:
- "Add H200 multinode evals-only runs"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1094
evals-only: true
2 changes: 2 additions & 0 deletions runners/launch_h200-dgxc-slurm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ EOF

# Override the job name in the config file with the runner name
sed -i "s/^name:.*/name: \"${RUNNER_NAME}\"/" "$CONFIG_FILE"
sed -i '/^health_check:/,/^[^ ]/{ /^health_check:/d; /^ /d; }' "${CONFIG_FILE%%:*}"
printf '\nhealth_check:\n max_attempts: 720\n interval_seconds: 10\n' >> "${CONFIG_FILE%%:*}"
SRTCTL_OUTPUT=$(srtctl apply -f "$CONFIG_FILE" --tags "h200,${MODEL_PREFIX},${PRECISION},${ISL}x${OSL},infmax-$(date +%Y%m%d)" 2>&1)
echo "$SRTCTL_OUTPUT"

Expand Down
Loading