diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index 06c37888a..e2900490d 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -205,7 +205,7 @@ dsr1-fp8-b200-trt-mtp: - { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 256, spec-decoding: mtp } dsr1-fp8-h200-sglang: - image: lmsysorg/sglang:v0.5.6-cu129-amd64 + image: lmsysorg/sglang:v0.5.7-cu129-amd64 model: deepseek-ai/DeepSeek-R1-0528 model-prefix: dsr1 runner: h200 diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 71a44dabb..087184f08 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -195,3 +195,9 @@ - "Remove deprecated --max-seq-len-to-capture flag" - "Add HIP_VISIBLE_DEVICES env var for Ray compatibility in vLLM 0.14+" pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/496 + +- config-keys: + - dsr1-fp8-h200-sglang + description: + - "Update H200 DeepSeek R1 FP8 SGLang image from v0.5.6 to v0.5.7" + pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/538