diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 0bed096f3..e688f6b91 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -1,5 +1,5 @@ dsr1-fp4-mi355x-sglang: - image: lmsysorg/sglang:v0.5.8-rocm700-mi35x + image: lmsysorg/sglang:v0.5.9-rocm700-mi35x model: amd/DeepSeek-R1-0528-MXFP4-Preview model-prefix: dsr1 runner: mi355x @@ -73,7 +73,7 @@ dsr1-fp4-mi355x-atom-mtp: - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp } dsr1-fp8-mi300x-sglang: - image: lmsysorg/sglang:v0.5.8-rocm700-mi30x + image: lmsysorg/sglang:v0.5.9-rocm700-mi30x model: deepseek-ai/DeepSeek-R1-0528 model-prefix: dsr1 runner: mi300x @@ -95,7 +95,7 @@ dsr1-fp8-mi300x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } dsr1-fp8-mi325x-sglang: - image: lmsysorg/sglang:v0.5.8-rocm700-mi30x + image: lmsysorg/sglang:v0.5.9-rocm700-mi30x model: deepseek-ai/DeepSeek-R1-0528 model-prefix: dsr1 runner: mi325x @@ -117,7 +117,7 @@ dsr1-fp8-mi325x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } dsr1-fp8-mi355x-sglang: - image: lmsysorg/sglang:v0.5.8-rocm700-mi35x + image: lmsysorg/sglang:v0.5.9-rocm700-mi35x model: deepseek-ai/DeepSeek-R1-0528 model-prefix: dsr1 runner: mi355x diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 18c27597a..30782b124 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -745,3 +745,12 @@ - "Gains: fused add+rmsnorm+pad for GPT-OSS (automatic via PassManager), AITER attention block size fix" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/806 +- config-keys: + - dsr1-fp4-mi355x-sglang + - dsr1-fp8-mi300x-sglang + - dsr1-fp8-mi325x-sglang + - dsr1-fp8-mi355x-sglang + description: + - "Update SGLang image from v0.5.8 to v0.5.9 for AMD single-node DeepSeek R1 configs" + - "Key changes: AITER v0.1.10.post3 with FP8 Prefill/Decode/KV Cache, FP8 prefill attention kernel, MORI EP two-batch overlapping, OOM fix for DeepSeek weight loading" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/816