From e81250acea620c0e8ffe8df0791ef578bf988574 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 16:01:47 +0000 Subject: [PATCH 1/2] Update AMD single-node DeepSeek R1 SGLang image from v0.5.8 to v0.5.9 Update image tags for 4 single-node configs: - dsr1-fp4-mi355x-sglang: v0.5.8-rocm700-mi35x -> v0.5.9-rocm700-mi35x - dsr1-fp8-mi300x-sglang: v0.5.8-rocm700-mi30x -> v0.5.9-rocm700-mi30x - dsr1-fp8-mi325x-sglang: v0.5.8-rocm700-mi30x -> v0.5.9-rocm700-mi30x - dsr1-fp8-mi355x-sglang: v0.5.8-rocm700-mi35x -> v0.5.9-rocm700-mi35x Key SGLang v0.5.9 changes for AMD: - AITER v0.1.10.post3 with FP8 Prefill/Decode/KV Cache - FP8 prefill attention kernel integration - MORI EP two-batch overlapping optimization - OOM fix for DeepSeek weight loading Closes #812 Co-authored-by: Cameron Quilici --- .github/configs/amd-master.yaml | 8 ++++---- perf-changelog.yaml | 11 ++++++++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index d7e51cb28..4ed3eb69f 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -1,5 +1,5 @@ dsr1-fp4-mi355x-sglang: - image: lmsysorg/sglang:v0.5.8-rocm700-mi35x + image: lmsysorg/sglang:v0.5.9-rocm700-mi35x model: amd/DeepSeek-R1-0528-MXFP4-Preview model-prefix: dsr1 runner: mi355x @@ -73,7 +73,7 @@ dsr1-fp4-mi355x-atom-mtp: - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp } dsr1-fp8-mi300x-sglang: - image: lmsysorg/sglang:v0.5.8-rocm700-mi30x + image: lmsysorg/sglang:v0.5.9-rocm700-mi30x model: deepseek-ai/DeepSeek-R1-0528 model-prefix: dsr1 runner: mi300x @@ -95,7 +95,7 @@ dsr1-fp8-mi300x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } dsr1-fp8-mi325x-sglang: - image: lmsysorg/sglang:v0.5.8-rocm700-mi30x + image: lmsysorg/sglang:v0.5.9-rocm700-mi30x model: deepseek-ai/DeepSeek-R1-0528 model-prefix: dsr1 runner: mi325x @@ -117,7 +117,7 @@ dsr1-fp8-mi325x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } dsr1-fp8-mi355x-sglang: - image: lmsysorg/sglang:v0.5.8-rocm700-mi35x + image: lmsysorg/sglang:v0.5.9-rocm700-mi35x model: deepseek-ai/DeepSeek-R1-0528 model-prefix: dsr1 runner: mi355x diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 9c4c9e438..3ce78d63a 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -733,4 +733,13 @@ - "Extend concurrency range to conc-end: 256 across all sequence lengths (1k1k, 1k8k, 8k1k)" - "Fix MTP 1k8k conc-start from 256 to 4 to enable full concurrency sweep" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/699 - + +- config-keys: + - dsr1-fp4-mi355x-sglang + - dsr1-fp8-mi300x-sglang + - dsr1-fp8-mi325x-sglang + - dsr1-fp8-mi355x-sglang + description: + - "Update SGLang image from v0.5.8 to v0.5.9 for AMD single-node DeepSeek R1 configs" + - "Key changes: AITER v0.1.10.post3 with FP8 Prefill/Decode/KV Cache, FP8 prefill attention kernel, MORI EP two-batch overlapping, OOM fix for DeepSeek weight loading" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/XXX From 7f4ed70bdc1a2f33c7e475b20615198b6a28c4e0 Mon Sep 17 00:00:00 2001 From: Cameron Quilici Date: Thu, 26 Feb 2026 11:11:45 -0600 Subject: [PATCH 2/2] Update perf-changelog.yaml --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 3ce78d63a..1cde3c779 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -742,4 +742,4 @@ description: - "Update SGLang image from v0.5.8 to v0.5.9 for AMD single-node DeepSeek R1 configs" - "Key changes: AITER v0.1.10.post3 with FP8 Prefill/Decode/KV Cache, FP8 prefill attention kernel, MORI EP two-batch overlapping, OOM fix for DeepSeek weight loading" - pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/XXX + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/816