From 10f16e68f7e00a110b06b1eb080b00460333cf8f Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 03:48:45 -0500 Subject: [PATCH 01/33] feat: enhance Qwen benchmark scripts with additional parameters * Added CONTEXT_LENGTH and MAX_PREFILL_TOKENS variables for better configuration. * Updated launch_server command with new options: --tokenizer-worker-num, --enable-aiter-allreduce-fusion, --cuda-graph-max-bs, --context-length, --disable-radix-cache, --max-prefill-tokens, and --scheduler-recv-interval. --- benchmarks/single_node/qwen3.5_bf16_mi355x.sh | 3 ++- benchmarks/single_node/qwen3.5_fp8_mi355x.sh | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/benchmarks/single_node/qwen3.5_bf16_mi355x.sh b/benchmarks/single_node/qwen3.5_bf16_mi355x.sh index ce82b9a53..bf4d3feb7 100755 --- a/benchmarks/single_node/qwen3.5_bf16_mi355x.sh +++ b/benchmarks/single_node/qwen3.5_bf16_mi355x.sh @@ -44,10 +44,11 @@ python3 -m sglang.launch_server \ --tokenizer-worker-num 6 \ --enable-aiter-allreduce-fusion \ --cuda-graph-max-bs $CONC \ + --context-length $CONTEXT_LENGTH \ --disable-radix-cache \ --max-prefill-tokens $MAX_PREFILL_TOKENS \ --scheduler-recv-interval 30 \ - --mem-fraction-static 0.75 $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 & + --mem-fraction-static 0.8 $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/benchmarks/single_node/qwen3.5_fp8_mi355x.sh b/benchmarks/single_node/qwen3.5_fp8_mi355x.sh index ce82b9a53..bf4d3feb7 100644 --- a/benchmarks/single_node/qwen3.5_fp8_mi355x.sh +++ b/benchmarks/single_node/qwen3.5_fp8_mi355x.sh @@ -44,10 +44,11 @@ python3 -m sglang.launch_server \ --tokenizer-worker-num 6 \ --enable-aiter-allreduce-fusion \ --cuda-graph-max-bs $CONC \ + --context-length $CONTEXT_LENGTH \ --disable-radix-cache \ --max-prefill-tokens $MAX_PREFILL_TOKENS \ --scheduler-recv-interval 30 \ - --mem-fraction-static 0.75 $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 & + --mem-fraction-static 0.8 $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 & SERVER_PID=$! From b22010e82dc6418925f9ccb16815ddd00c0180ea Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 03:49:10 -0500 Subject: [PATCH 02/33] Update perf-changelog.yaml to include new Qwen3.5 FP8 and BF16 SGLang benchmark configurations for MI355X, enhancing performance with updated CLI arguments. --- perf-changelog.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 6721dbb1e..575d2bbb8 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1358,3 +1358,10 @@ description: - "Enable SGLANG_ENABLE_SPEC_V2=1 for Qwen3.5 FP8 H200 SGLang MTP" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1017 + +- config-keys: + - qwen3.5-fp8-mi355x-sglang + - qwen3.5-bf16-mi355x-sglang + description: + - "Update cli args of Qwen3.5 FP8 and BF16 SGLang benchmarks for MI355X to achieve better performance" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/942 From 22d9500abe6780c773a63a84c2420e961b8f8c29 Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 03:49:10 -0500 Subject: [PATCH 03/33] Update SGLang image versions for Qwen3.5 configurations in amd-master.yaml to v0.5.9, ensuring compatibility with recent changes. --- .github/configs/amd-master.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 89318004b..d6a02bf0f 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -114,7 +114,7 @@ dsr1-fp8-mi355x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } qwen3.5-bf16-mi355x-sglang: - image: rocm/sgl-dev:v0.5.8.post1-rocm720-mi35x-20260215 + image: lmsysorg/sglang-rocm:v0.5.9-rocm720-mi35x-20260325 model: Qwen/Qwen3.5-397B-A17B model-prefix: qwen3.5 runner: mi355x @@ -186,7 +186,7 @@ qwen3.5-fp8-mi325x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } qwen3.5-fp8-mi355x-sglang: - image: rocm/sgl-dev:v0.5.8.post1-rocm720-mi35x-20260218 + image: lmsysorg/sglang-rocm:v0.5.9-rocm720-mi35x-20260325 model: Qwen/Qwen3.5-397B-A17B-FP8 model-prefix: qwen3.5 runner: mi355x From 6261169fbc61acfc548c434f80362d199c3187bb Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 03:49:10 -0500 Subject: [PATCH 04/33] use 0327 build --- .github/configs/amd-master.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index d6a02bf0f..241016c53 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -114,7 +114,7 @@ dsr1-fp8-mi355x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } qwen3.5-bf16-mi355x-sglang: - image: lmsysorg/sglang-rocm:v0.5.9-rocm720-mi35x-20260325 + image: lmsysorg/sglang-rocm:v0.5.9-rocm720-mi35x-20260327 model: Qwen/Qwen3.5-397B-A17B model-prefix: qwen3.5 runner: mi355x @@ -186,7 +186,7 @@ qwen3.5-fp8-mi325x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } qwen3.5-fp8-mi355x-sglang: - image: lmsysorg/sglang-rocm:v0.5.9-rocm720-mi35x-20260325 + image: lmsysorg/sglang-rocm:v0.5.9-rocm720-mi35x-20260327 model: Qwen/Qwen3.5-397B-A17B-FP8 model-prefix: qwen3.5 runner: mi355x From 559daa36315ac90480ca021b11a0979911d4d0f9 Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 03:49:10 -0500 Subject: [PATCH 05/33] Update perf-changelog.yaml to reflect the new PR link for Qwen3.5 FP8 and BF16 SGLang benchmarks on MI355X, ensuring accurate tracking of performance enhancements. --- perf-changelog.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 575d2bbb8..371a0e416 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1364,4 +1364,5 @@ - qwen3.5-bf16-mi355x-sglang description: - "Update cli args of Qwen3.5 FP8 and BF16 SGLang benchmarks for MI355X to achieve better performance" - pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/942 + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/980 + From 8be5c3b030ba128d2ed1595a5cf18542f92bdb92 Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 03:49:10 -0500 Subject: [PATCH 06/33] Update Qwen3.5 image tags in amd-master.yaml to v0.5.10rc0 for MI355X configurations and adjust perf-changelog.yaml to reflect the changes, ensuring accurate performance tracking and compatibility. --- .github/configs/amd-master.yaml | 4 ++-- perf-changelog.yaml | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 241016c53..099ce7156 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -114,7 +114,7 @@ dsr1-fp8-mi355x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } qwen3.5-bf16-mi355x-sglang: - image: lmsysorg/sglang-rocm:v0.5.9-rocm720-mi35x-20260327 + image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260327 model: Qwen/Qwen3.5-397B-A17B model-prefix: qwen3.5 runner: mi355x @@ -186,7 +186,7 @@ qwen3.5-fp8-mi325x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } qwen3.5-fp8-mi355x-sglang: - image: lmsysorg/sglang-rocm:v0.5.9-rocm720-mi35x-20260327 + image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260327 model: Qwen/Qwen3.5-397B-A17B-FP8 model-prefix: qwen3.5 runner: mi355x diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 371a0e416..c21d66228 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1362,7 +1362,9 @@ - config-keys: - qwen3.5-fp8-mi355x-sglang - qwen3.5-bf16-mi355x-sglang + - qwen3.5-fp8-mi355x-sglang description: - - "Update cli args of Qwen3.5 FP8 and BF16 SGLang benchmarks for MI355X to achieve better performance" + - "Fix MI355X Qwen 3.5 image tag: v0.5.9-rocm720-mi35x-20260327 is not on Docker Hub (404)" + - "Use lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260329" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/980 From c4120a8b7e43a7c21cbe9f41aac2336d3588270d Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 03:49:10 -0500 Subject: [PATCH 07/33] Update Qwen3.5 FP8 and BF16 SGLang benchmark descriptions in perf-changelog.yaml to reflect improved CLI arguments for MI355X, ensuring better performance tracking. --- perf-changelog.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index c21d66228..6cd30602b 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1364,7 +1364,7 @@ - qwen3.5-bf16-mi355x-sglang - qwen3.5-fp8-mi355x-sglang description: - - "Fix MI355X Qwen 3.5 image tag: v0.5.9-rocm720-mi35x-20260327 is not on Docker Hub (404)" - - "Use lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260329" + - "Update cli args of Qwen3.5 FP8 and BF16 SGLang benchmarks for MI355X to achieve better performance" + - "Use lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260327" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/980 From 576008672ef2ccdbf70b01e649ea26d0a7db3b04 Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 03:49:10 -0500 Subject: [PATCH 08/33] Enhance Qwen3.5 benchmark scripts for MI355X by adding EP_SIZE parameter and adjusting memory fraction. Updated launch_server command to include data-parallel-size and improved context length handling for better performance. --- benchmarks/single_node/qwen3.5_bf16_mi355x.sh | 2 +- benchmarks/single_node/qwen3.5_fp8_mi355x.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/single_node/qwen3.5_bf16_mi355x.sh b/benchmarks/single_node/qwen3.5_bf16_mi355x.sh index bf4d3feb7..f7a01a6d1 100755 --- a/benchmarks/single_node/qwen3.5_bf16_mi355x.sh +++ b/benchmarks/single_node/qwen3.5_bf16_mi355x.sh @@ -48,7 +48,7 @@ python3 -m sglang.launch_server \ --disable-radix-cache \ --max-prefill-tokens $MAX_PREFILL_TOKENS \ --scheduler-recv-interval 30 \ - --mem-fraction-static 0.8 $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 & + --mem-fraction-static 0.75 $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/benchmarks/single_node/qwen3.5_fp8_mi355x.sh b/benchmarks/single_node/qwen3.5_fp8_mi355x.sh index bf4d3feb7..f7a01a6d1 100644 --- a/benchmarks/single_node/qwen3.5_fp8_mi355x.sh +++ b/benchmarks/single_node/qwen3.5_fp8_mi355x.sh @@ -48,7 +48,7 @@ python3 -m sglang.launch_server \ --disable-radix-cache \ --max-prefill-tokens $MAX_PREFILL_TOKENS \ --scheduler-recv-interval 30 \ - --mem-fraction-static 0.8 $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 & + --mem-fraction-static 0.75 $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 & SERVER_PID=$! From df4c673dc5795c346f4a91ecb4a06a08601b620c Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 03:49:10 -0500 Subject: [PATCH 09/33] Update search-space configurations in amd-master.yaml for Qwen3.5 benchmarks, increasing conc-end values and adding new entries for improved performance tuning on MI355X and MI300X. --- .github/configs/amd-master.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 099ce7156..6bf2a195d 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -125,11 +125,13 @@ qwen3.5-bf16-mi355x-sglang: - isl: 1024 osl: 1024 search-space: - - { tp: 8, conc-start: 4, conc-end: 64 } + - { tp: 8, conc-start: 4, conc-end: 256 } + - { tp: 8, ep: 8, conc-start: 64, conc-end: 256 } - isl: 8192 osl: 1024 search-space: - - { tp: 8, conc-start: 4, conc-end: 64 } + - { tp: 8, conc-start: 4, conc-end: 256 } + - { tp: 8, ep: 8, conc-start: 64, conc-end: 256 } qwen3.5-bf16-mi300x-sglang: image: lmsysorg/sglang:v0.5.10-rocm720-mi30x @@ -198,10 +200,12 @@ qwen3.5-fp8-mi355x-sglang: osl: 1024 search-space: - { tp: 8, conc-start: 4, conc-end: 64 } + - { tp: 4, ep: 4, conc-start: 16, conc-end: 256 } - isl: 8192 osl: 1024 search-space: - { tp: 8, conc-start: 4, conc-end: 64 } + - { tp: 4, ep: 4, conc-start: 16, conc-end: 256 } qwen3.5-fp4-mi355x-sglang: image: lmsysorg/sglang:v0.5.10-rocm720-mi35x From 4367ae06eedc5ad627e826f18d66d5c1b9dcbf0f Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 03:49:10 -0500 Subject: [PATCH 10/33] Remove context length parameter from Qwen3.5 BF16 and FP8 benchmark scripts for MI355X to streamline configuration and improve performance. --- benchmarks/single_node/qwen3.5_bf16_mi355x.sh | 1 - benchmarks/single_node/qwen3.5_fp8_mi355x.sh | 1 - 2 files changed, 2 deletions(-) diff --git a/benchmarks/single_node/qwen3.5_bf16_mi355x.sh b/benchmarks/single_node/qwen3.5_bf16_mi355x.sh index f7a01a6d1..ce82b9a53 100755 --- a/benchmarks/single_node/qwen3.5_bf16_mi355x.sh +++ b/benchmarks/single_node/qwen3.5_bf16_mi355x.sh @@ -44,7 +44,6 @@ python3 -m sglang.launch_server \ --tokenizer-worker-num 6 \ --enable-aiter-allreduce-fusion \ --cuda-graph-max-bs $CONC \ - --context-length $CONTEXT_LENGTH \ --disable-radix-cache \ --max-prefill-tokens $MAX_PREFILL_TOKENS \ --scheduler-recv-interval 30 \ diff --git a/benchmarks/single_node/qwen3.5_fp8_mi355x.sh b/benchmarks/single_node/qwen3.5_fp8_mi355x.sh index f7a01a6d1..ce82b9a53 100644 --- a/benchmarks/single_node/qwen3.5_fp8_mi355x.sh +++ b/benchmarks/single_node/qwen3.5_fp8_mi355x.sh @@ -44,7 +44,6 @@ python3 -m sglang.launch_server \ --tokenizer-worker-num 6 \ --enable-aiter-allreduce-fusion \ --cuda-graph-max-bs $CONC \ - --context-length $CONTEXT_LENGTH \ --disable-radix-cache \ --max-prefill-tokens $MAX_PREFILL_TOKENS \ --scheduler-recv-interval 30 \ From 37406ecc894bf7b1d3caeca7d2925d1f0f5caef5 Mon Sep 17 00:00:00 2001 From: "Chen, Zhentao" Date: Wed, 15 Apr 2026 03:49:10 -0500 Subject: [PATCH 11/33] update to 5.10 rocm for qwen35 --- .github/configs/amd-master.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 6bf2a195d..a96dd971f 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -114,7 +114,7 @@ dsr1-fp8-mi355x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } qwen3.5-bf16-mi355x-sglang: - image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260327 + image: lmsysorg/sglang:v0.5.10rc0-rocm720-mi35x model: Qwen/Qwen3.5-397B-A17B model-prefix: qwen3.5 runner: mi355x @@ -188,7 +188,7 @@ qwen3.5-fp8-mi325x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } qwen3.5-fp8-mi355x-sglang: - image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260327 + image: lmsysorg/sglang:v0.5.10rc0-rocm720-mi35x model: Qwen/Qwen3.5-397B-A17B-FP8 model-prefix: qwen3.5 runner: mi355x From f5279e53bb0509cb2489994337ffbc5b078da7a8 Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 03:49:10 -0500 Subject: [PATCH 12/33] Update Qwen3.5 benchmark configurations in amd-master.yaml to include EP_SIZE parameter for search-space entries, enhancing performance tuning for MI355X and MI300X. Adjusted perf-changelog.yaml to reflect updated image tag for better performance tracking. --- .github/configs/amd-master.yaml | 8 ++++---- perf-changelog.yaml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index a96dd971f..98aa8471d 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -125,12 +125,12 @@ qwen3.5-bf16-mi355x-sglang: - isl: 1024 osl: 1024 search-space: - - { tp: 8, conc-start: 4, conc-end: 256 } + - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 } - { tp: 8, ep: 8, conc-start: 64, conc-end: 256 } - isl: 8192 osl: 1024 search-space: - - { tp: 8, conc-start: 4, conc-end: 256 } + - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 } - { tp: 8, ep: 8, conc-start: 64, conc-end: 256 } qwen3.5-bf16-mi300x-sglang: @@ -199,12 +199,12 @@ qwen3.5-fp8-mi355x-sglang: - isl: 1024 osl: 1024 search-space: - - { tp: 8, conc-start: 4, conc-end: 64 } + - { tp: 8, ep: 1, conc-start: 4, conc-end: 64 } - { tp: 4, ep: 4, conc-start: 16, conc-end: 256 } - isl: 8192 osl: 1024 search-space: - - { tp: 8, conc-start: 4, conc-end: 64 } + - { tp: 8, ep: 1, conc-start: 4, conc-end: 64 } - { tp: 4, ep: 4, conc-start: 16, conc-end: 256 } qwen3.5-fp4-mi355x-sglang: diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 6cd30602b..9ce255b32 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1365,6 +1365,6 @@ - qwen3.5-fp8-mi355x-sglang description: - "Update cli args of Qwen3.5 FP8 and BF16 SGLang benchmarks for MI355X to achieve better performance" - - "Use lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260327" + - "Use lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/980 From da6b5ac935a9cf7f2d9496e111e5a7cff6bfd21f Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 03:49:10 -0500 Subject: [PATCH 13/33] Update context length calculations in Qwen3.5 benchmark scripts for BF16 and FP8 to improve performance tuning. Adjusted search-space configurations in amd-master.yaml to increase conc-end values for MI355X and MI300X. --- .github/configs/amd-master.yaml | 4 ++-- benchmarks/single_node/qwen3.5_bf16_mi355x.sh | 2 +- benchmarks/single_node/qwen3.5_fp8_mi355x.sh | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 98aa8471d..866d6bdbd 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -199,12 +199,12 @@ qwen3.5-fp8-mi355x-sglang: - isl: 1024 osl: 1024 search-space: - - { tp: 8, ep: 1, conc-start: 4, conc-end: 64 } + - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 } - { tp: 4, ep: 4, conc-start: 16, conc-end: 256 } - isl: 8192 osl: 1024 search-space: - - { tp: 8, ep: 1, conc-start: 4, conc-end: 64 } + - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 } - { tp: 4, ep: 4, conc-start: 16, conc-end: 256 } qwen3.5-fp4-mi355x-sglang: diff --git a/benchmarks/single_node/qwen3.5_bf16_mi355x.sh b/benchmarks/single_node/qwen3.5_bf16_mi355x.sh index ce82b9a53..c6bed1491 100755 --- a/benchmarks/single_node/qwen3.5_bf16_mi355x.sh +++ b/benchmarks/single_node/qwen3.5_bf16_mi355x.sh @@ -20,7 +20,7 @@ hf download "$MODEL" SERVER_LOG=/workspace/server.log PORT=${PORT:-8888} -CONTEXT_LENGTH=$((ISL + OSL + 20)) +CONTEXT_LENGTH=$((ISL + OSL + 200)) MAX_PREFILL_TOKENS=32768 EVAL_CONTEXT_ARGS="" diff --git a/benchmarks/single_node/qwen3.5_fp8_mi355x.sh b/benchmarks/single_node/qwen3.5_fp8_mi355x.sh index ce82b9a53..c6bed1491 100644 --- a/benchmarks/single_node/qwen3.5_fp8_mi355x.sh +++ b/benchmarks/single_node/qwen3.5_fp8_mi355x.sh @@ -20,7 +20,7 @@ hf download "$MODEL" SERVER_LOG=/workspace/server.log PORT=${PORT:-8888} -CONTEXT_LENGTH=$((ISL + OSL + 20)) +CONTEXT_LENGTH=$((ISL + OSL + 200)) MAX_PREFILL_TOKENS=32768 EVAL_CONTEXT_ARGS="" From b318fee7f01bdcc354341f2cff919fbc53b70ca8 Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 03:49:33 -0500 Subject: [PATCH 14/33] Update image tags in amd-master.yaml for Qwen3.5 benchmarks to v0.5.10rc0-rocm700 for MI355X and MI300X configurations, ensuring compatibility and improved performance tracking. --- .github/configs/amd-master.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 866d6bdbd..8c68dba4e 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -188,7 +188,7 @@ qwen3.5-fp8-mi325x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } qwen3.5-fp8-mi355x-sglang: - image: lmsysorg/sglang:v0.5.10rc0-rocm720-mi35x + image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm700-mi35x-20260331 model: Qwen/Qwen3.5-397B-A17B-FP8 model-prefix: qwen3.5 runner: mi355x From 54b94f18678d37d8a6b4c0b61b951226b3874c48 Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 03:49:51 -0500 Subject: [PATCH 15/33] Update image tags in amd-master.yaml for Qwen3.5 benchmarks, changing MI355X to v0.5.10rc0-rocm700 and MI300X to v0.5.9-rocm720, ensuring compatibility and consistency across configurations. --- .github/configs/amd-master.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 8c68dba4e..e95925749 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -114,7 +114,7 @@ dsr1-fp8-mi355x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } qwen3.5-bf16-mi355x-sglang: - image: lmsysorg/sglang:v0.5.10rc0-rocm720-mi35x + image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm700-mi35x-20260331 model: Qwen/Qwen3.5-397B-A17B model-prefix: qwen3.5 runner: mi355x From 0e0d51f56d120caadf092a25b49edce4753af44f Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 03:49:51 -0500 Subject: [PATCH 16/33] Remove data-parallel-size parameter and increase mem-fraction-static from 0.75 to 0.8 in Qwen3.5 BF16 and FP8 benchmark scripts to enhance performance tuning. --- benchmarks/single_node/qwen3.5_bf16_mi355x.sh | 3 +-- benchmarks/single_node/qwen3.5_fp8_mi355x.sh | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/benchmarks/single_node/qwen3.5_bf16_mi355x.sh b/benchmarks/single_node/qwen3.5_bf16_mi355x.sh index c6bed1491..e07d5600e 100755 --- a/benchmarks/single_node/qwen3.5_bf16_mi355x.sh +++ b/benchmarks/single_node/qwen3.5_bf16_mi355x.sh @@ -39,7 +39,6 @@ python3 -m sglang.launch_server \ --port $PORT \ --tensor-parallel-size $TP \ --ep-size $EP_SIZE \ - --data-parallel-size 1 \ --trust-remote-code \ --tokenizer-worker-num 6 \ --enable-aiter-allreduce-fusion \ @@ -47,7 +46,7 @@ python3 -m sglang.launch_server \ --disable-radix-cache \ --max-prefill-tokens $MAX_PREFILL_TOKENS \ --scheduler-recv-interval 30 \ - --mem-fraction-static 0.75 $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 & + --mem-fraction-static 0.8 $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/benchmarks/single_node/qwen3.5_fp8_mi355x.sh b/benchmarks/single_node/qwen3.5_fp8_mi355x.sh index c6bed1491..e07d5600e 100644 --- a/benchmarks/single_node/qwen3.5_fp8_mi355x.sh +++ b/benchmarks/single_node/qwen3.5_fp8_mi355x.sh @@ -39,7 +39,6 @@ python3 -m sglang.launch_server \ --port $PORT \ --tensor-parallel-size $TP \ --ep-size $EP_SIZE \ - --data-parallel-size 1 \ --trust-remote-code \ --tokenizer-worker-num 6 \ --enable-aiter-allreduce-fusion \ @@ -47,7 +46,7 @@ python3 -m sglang.launch_server \ --disable-radix-cache \ --max-prefill-tokens $MAX_PREFILL_TOKENS \ --scheduler-recv-interval 30 \ - --mem-fraction-static 0.75 $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 & + --mem-fraction-static 0.8 $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 & SERVER_PID=$! From 44cccfc2f193ba3c452b8fb97e058ecb323efe2f Mon Sep 17 00:00:00 2001 From: Chun Fang Date: Wed, 15 Apr 2026 03:49:51 -0500 Subject: [PATCH 17/33] Update sglang image for qwen3.5 mi355x configs to fix shared memory crash --- .github/configs/amd-master.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index e95925749..441b68f75 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -114,7 +114,7 @@ dsr1-fp8-mi355x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } qwen3.5-bf16-mi355x-sglang: - image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm700-mi35x-20260331 + image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260401 model: Qwen/Qwen3.5-397B-A17B model-prefix: qwen3.5 runner: mi355x @@ -188,7 +188,7 @@ qwen3.5-fp8-mi325x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } qwen3.5-fp8-mi355x-sglang: - image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm700-mi35x-20260331 + image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260401 model: Qwen/Qwen3.5-397B-A17B-FP8 model-prefix: qwen3.5 runner: mi355x From ce131ccb8d15444b2c940923a365e6ced608e2bf Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 03:49:51 -0500 Subject: [PATCH 18/33] Refine search-space configurations in amd-master.yaml for Qwen3.5 benchmarks, adjusting parameters to optimize performance for MI355X. Update perf-changelog.yaml to remove an outdated entry. --- .github/configs/amd-master.yaml | 9 +++++---- perf-changelog.yaml | 1 - 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 441b68f75..f4987813a 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -199,13 +199,14 @@ qwen3.5-fp8-mi355x-sglang: - isl: 1024 osl: 1024 search-space: - - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 } - - { tp: 4, ep: 4, conc-start: 16, conc-end: 256 } + - { tp: 2, ep: 1, conc-start: 4, conc-end: 32 } + - { tp: 2, ep: 2, conc-start: 4, conc-end: 256 } - isl: 8192 osl: 1024 search-space: - - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 } - - { tp: 4, ep: 4, conc-start: 16, conc-end: 256 } + - { tp: 2, ep: 2, conc-start: 4, conc-end: 32 } + - { tp: 4, ep: 1, conc-start: 32, conc-end: 256 } + - { tp: 8, ep: 8, conc-start: 16, conc-end: 256 } qwen3.5-fp4-mi355x-sglang: image: lmsysorg/sglang:v0.5.10-rocm720-mi35x diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 9ce255b32..e944c0f66 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1367,4 +1367,3 @@ - "Update cli args of Qwen3.5 FP8 and BF16 SGLang benchmarks for MI355X to achieve better performance" - "Use lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/980 - From f9f7f29805741b0f919b346f7f7db0ef22925e10 Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 03:49:51 -0500 Subject: [PATCH 19/33] Update image tags in amd-master.yaml and perf-changelog.yaml for Qwen3.5 benchmarks, replacing outdated sglang image references with the latest version to ensure consistency and improved performance. --- .github/configs/amd-master.yaml | 4 ++-- perf-changelog.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index f4987813a..6c0ae1617 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -114,7 +114,7 @@ dsr1-fp8-mi355x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } qwen3.5-bf16-mi355x-sglang: - image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260401 + image: lmsysorg/sglang:v0.5.10-rocm720-mi35x model: Qwen/Qwen3.5-397B-A17B model-prefix: qwen3.5 runner: mi355x @@ -188,7 +188,7 @@ qwen3.5-fp8-mi325x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } qwen3.5-fp8-mi355x-sglang: - image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260401 + image: lmsysorg/sglang:v0.5.10-rocm720-mi35x model: Qwen/Qwen3.5-397B-A17B-FP8 model-prefix: qwen3.5 runner: mi355x diff --git a/perf-changelog.yaml b/perf-changelog.yaml index e944c0f66..9386fe4d0 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1365,5 +1365,5 @@ - qwen3.5-fp8-mi355x-sglang description: - "Update cli args of Qwen3.5 FP8 and BF16 SGLang benchmarks for MI355X to achieve better performance" - - "Use lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x" + - "Use lmsysorg/sglang:v0.5.10-rocm720-mi35x" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/980 From 0fc9c129a4700a8df1a903fde843e9cc17008141 Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 03:49:51 -0500 Subject: [PATCH 20/33] Remove aiter allreduce fusion option from Qwen3.5 FP8 MI355X benchmark script to simplify configuration and enhance performance tuning. --- benchmarks/single_node/qwen3.5_fp8_mi355x.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/benchmarks/single_node/qwen3.5_fp8_mi355x.sh b/benchmarks/single_node/qwen3.5_fp8_mi355x.sh index e07d5600e..7e4709974 100644 --- a/benchmarks/single_node/qwen3.5_fp8_mi355x.sh +++ b/benchmarks/single_node/qwen3.5_fp8_mi355x.sh @@ -41,7 +41,6 @@ python3 -m sglang.launch_server \ --ep-size $EP_SIZE \ --trust-remote-code \ --tokenizer-worker-num 6 \ - --enable-aiter-allreduce-fusion \ --cuda-graph-max-bs $CONC \ --disable-radix-cache \ --max-prefill-tokens $MAX_PREFILL_TOKENS \ From fe7672ba86c4ec1f454ba0a5f8e80d098f612697 Mon Sep 17 00:00:00 2001 From: Chun Fang Date: Wed, 15 Apr 2026 03:49:51 -0500 Subject: [PATCH 21/33] optimize the search space --- .github/configs/amd-master.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 6c0ae1617..f331ea403 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -199,6 +199,8 @@ qwen3.5-fp8-mi355x-sglang: - isl: 1024 osl: 1024 search-space: + - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 } + - { tp: 8, ep: 8, conc-start: 64, conc-end: 256 } - { tp: 2, ep: 1, conc-start: 4, conc-end: 32 } - { tp: 2, ep: 2, conc-start: 4, conc-end: 256 } - isl: 8192 From 7776a071302fae7f94097abee39267b1376fd4c1 Mon Sep 17 00:00:00 2001 From: Chun Fang Date: Wed, 15 Apr 2026 03:49:51 -0500 Subject: [PATCH 22/33] Upgrade image to 20260413 --- .github/configs/amd-master.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index f331ea403..1d478fba5 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -188,7 +188,7 @@ qwen3.5-fp8-mi325x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } qwen3.5-fp8-mi355x-sglang: - image: lmsysorg/sglang:v0.5.10-rocm720-mi35x + image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260413 model: Qwen/Qwen3.5-397B-A17B-FP8 model-prefix: qwen3.5 runner: mi355x From 99b69b39c75b2584ad3dbfb0038ed0b190d9a521 Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 03:51:10 -0500 Subject: [PATCH 23/33] Update sglang image tags for Qwen3.5 benchmarks in amd-master.yaml to the latest version (20260414) for improved consistency and performance. --- .github/configs/amd-master.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 1d478fba5..8c51a2444 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -114,7 +114,7 @@ dsr1-fp8-mi355x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } qwen3.5-bf16-mi355x-sglang: - image: lmsysorg/sglang:v0.5.10-rocm720-mi35x + image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260414 model: Qwen/Qwen3.5-397B-A17B model-prefix: qwen3.5 runner: mi355x @@ -188,7 +188,7 @@ qwen3.5-fp8-mi325x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } qwen3.5-fp8-mi355x-sglang: - image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260413 + image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260414 model: Qwen/Qwen3.5-397B-A17B-FP8 model-prefix: qwen3.5 runner: mi355x From 0a359267a705fe2915a3791f2539e52741ef349c Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 06:24:57 -0500 Subject: [PATCH 24/33] Remove redundant search-space entry for qwen3.5-bf16-mi355x-sglang in amd-master.yaml to streamline configuration. --- .github/configs/amd-master.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 8c51a2444..bae706ff9 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -131,7 +131,6 @@ qwen3.5-bf16-mi355x-sglang: osl: 1024 search-space: - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 } - - { tp: 8, ep: 8, conc-start: 64, conc-end: 256 } qwen3.5-bf16-mi300x-sglang: image: lmsysorg/sglang:v0.5.10-rocm720-mi30x From f7a59b6dc409769c65d8184b9634cba5b34459b9 Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 06:26:36 -0500 Subject: [PATCH 25/33] Remove duplicate search-space entry for qwen3.5-bf16-mi355x-sglang in amd-master.yaml to enhance configuration clarity. --- .github/configs/amd-master.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index bae706ff9..4e54ad0d1 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -126,7 +126,6 @@ qwen3.5-bf16-mi355x-sglang: osl: 1024 search-space: - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 } - - { tp: 8, ep: 8, conc-start: 64, conc-end: 256 } - isl: 8192 osl: 1024 search-space: From b1bd7015bb95f8e18406ddb9a7af5372984416b8 Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 06:33:28 -0500 Subject: [PATCH 26/33] Refine search-space configurations for qwen3.5-fp8-mi355x-sglang in amd-master.yaml by adjusting parameters to improve performance tuning. --- .github/configs/amd-master.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 4e54ad0d1..3df18d073 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -199,14 +199,12 @@ qwen3.5-fp8-mi355x-sglang: search-space: - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 } - { tp: 8, ep: 8, conc-start: 64, conc-end: 256 } - - { tp: 2, ep: 1, conc-start: 4, conc-end: 32 } - - { tp: 2, ep: 2, conc-start: 4, conc-end: 256 } + - { tp: 2, ep: 2, conc-start: 128, conc-end: 256 } - isl: 8192 osl: 1024 search-space: - { tp: 2, ep: 2, conc-start: 4, conc-end: 32 } - { tp: 4, ep: 1, conc-start: 32, conc-end: 256 } - - { tp: 8, ep: 8, conc-start: 16, conc-end: 256 } qwen3.5-fp4-mi355x-sglang: image: lmsysorg/sglang:v0.5.10-rocm720-mi35x From 2915be47964f212abd576f569f7c0e2110fab631 Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 06:35:54 -0500 Subject: [PATCH 27/33] Update sglang image tags for qwen3.5 configurations in amd-master.yaml to the latest version (20260415) for improved consistency and performance. --- .github/configs/amd-master.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 3df18d073..b13dc793d 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -114,7 +114,7 @@ dsr1-fp8-mi355x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } qwen3.5-bf16-mi355x-sglang: - image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260414 + image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415 model: Qwen/Qwen3.5-397B-A17B model-prefix: qwen3.5 runner: mi355x @@ -186,7 +186,7 @@ qwen3.5-fp8-mi325x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } qwen3.5-fp8-mi355x-sglang: - image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260414 + image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415 model: Qwen/Qwen3.5-397B-A17B-FP8 model-prefix: qwen3.5 runner: mi355x From 81c88861692e6bd7fe2a531e5054a65ae5d9e43d Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 21:05:47 -0500 Subject: [PATCH 28/33] Update sglang image tag for Qwen3.5 FP8 and BF16 benchmarks in perf-changelog.yaml to the latest version (v0.5.10rc0-rocm720-mi35x-20260415) for enhanced performance. --- perf-changelog.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 9386fe4d0..b4ca74ea9 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1365,5 +1365,5 @@ - qwen3.5-fp8-mi355x-sglang description: - "Update cli args of Qwen3.5 FP8 and BF16 SGLang benchmarks for MI355X to achieve better performance" - - "Use lmsysorg/sglang:v0.5.10-rocm720-mi35x" - pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/980 + - "Use lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1036 From 1f12ab30b9dfa9410fd8cceab79928ebfc606880 Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 21:07:20 -0500 Subject: [PATCH 29/33] Add aiter allreduce fusion option to Qwen3.5 FP8 MI355X benchmark script for enhanced performance tuning. --- benchmarks/single_node/qwen3.5_fp8_mi355x.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarks/single_node/qwen3.5_fp8_mi355x.sh b/benchmarks/single_node/qwen3.5_fp8_mi355x.sh index 7e4709974..e07d5600e 100644 --- a/benchmarks/single_node/qwen3.5_fp8_mi355x.sh +++ b/benchmarks/single_node/qwen3.5_fp8_mi355x.sh @@ -41,6 +41,7 @@ python3 -m sglang.launch_server \ --ep-size $EP_SIZE \ --trust-remote-code \ --tokenizer-worker-num 6 \ + --enable-aiter-allreduce-fusion \ --cuda-graph-max-bs $CONC \ --disable-radix-cache \ --max-prefill-tokens $MAX_PREFILL_TOKENS \ From 319ed68072ad159e594bbf108d4aad4afebc0940 Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 21:08:14 -0500 Subject: [PATCH 30/33] Adjust CONTEXT_LENGTH in Qwen3.5 benchmark scripts for BF16 and FP8 configurations to improve performance tuning. --- benchmarks/single_node/qwen3.5_bf16_mi355x.sh | 2 +- benchmarks/single_node/qwen3.5_fp8_mi355x.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/single_node/qwen3.5_bf16_mi355x.sh b/benchmarks/single_node/qwen3.5_bf16_mi355x.sh index e07d5600e..6d40e3e3f 100755 --- a/benchmarks/single_node/qwen3.5_bf16_mi355x.sh +++ b/benchmarks/single_node/qwen3.5_bf16_mi355x.sh @@ -20,7 +20,7 @@ hf download "$MODEL" SERVER_LOG=/workspace/server.log PORT=${PORT:-8888} -CONTEXT_LENGTH=$((ISL + OSL + 200)) +CONTEXT_LENGTH=$((ISL + OSL + 20)) MAX_PREFILL_TOKENS=32768 EVAL_CONTEXT_ARGS="" diff --git a/benchmarks/single_node/qwen3.5_fp8_mi355x.sh b/benchmarks/single_node/qwen3.5_fp8_mi355x.sh index e07d5600e..6d40e3e3f 100644 --- a/benchmarks/single_node/qwen3.5_fp8_mi355x.sh +++ b/benchmarks/single_node/qwen3.5_fp8_mi355x.sh @@ -20,7 +20,7 @@ hf download "$MODEL" SERVER_LOG=/workspace/server.log PORT=${PORT:-8888} -CONTEXT_LENGTH=$((ISL + OSL + 200)) +CONTEXT_LENGTH=$((ISL + OSL + 20)) MAX_PREFILL_TOKENS=32768 EVAL_CONTEXT_ARGS="" From ee837ecb18c54f0fedee3c4367d4c9e678ab6c79 Mon Sep 17 00:00:00 2001 From: "Chen, Todd" Date: Wed, 15 Apr 2026 21:08:51 -0500 Subject: [PATCH 31/33] Remove duplicate entry for qwen3.5-fp8-mi355x-sglang in perf-changelog.yaml to enhance configuration clarity and maintain consistency. --- perf-changelog.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index b4ca74ea9..b4c9e1273 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1362,7 +1362,6 @@ - config-keys: - qwen3.5-fp8-mi355x-sglang - qwen3.5-bf16-mi355x-sglang - - qwen3.5-fp8-mi355x-sglang description: - "Update cli args of Qwen3.5 FP8 and BF16 SGLang benchmarks for MI355X to achieve better performance" - "Use lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415" From 8c51a6301fbbabdcdfb69ebdc5f5bb1e79c6a0c6 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Thu, 16 Apr 2026 02:12:01 +0000 Subject: [PATCH 32/33] Add upstream SGLang PR links to perf-changelog.yaml for qwen3.5 MI355X entry Co-authored-by: functionstackx --- perf-changelog.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index b4c9e1273..4b38ad0a3 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1365,4 +1365,5 @@ description: - "Update cli args of Qwen3.5 FP8 and BF16 SGLang benchmarks for MI355X to achieve better performance" - "Use lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415" + - "Image includes upstream SGLang PRs: https://github.com/sgl-project/sglang/pull/21188, https://github.com/sgl-project/sglang/pull/21421, https://github.com/sgl-project/sglang/pull/20736" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1036 From f0a8e2d978d60aab89707212a5d6d446ca7ccace Mon Sep 17 00:00:00 2001 From: Chun Fang Date: Thu, 16 Apr 2026 10:08:06 +0000 Subject: [PATCH 33/33] Downgrade the image for Qwen3.5-FP8-MI355X-SGLang to 20260414 lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260414 --- .github/configs/amd-master.yaml | 2 +- perf-changelog.yaml | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index b13dc793d..95c26c075 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -186,7 +186,7 @@ qwen3.5-fp8-mi325x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } qwen3.5-fp8-mi355x-sglang: - image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415 + image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260414 model: Qwen/Qwen3.5-397B-A17B-FP8 model-prefix: qwen3.5 runner: mi355x diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 4b38ad0a3..3cfd2b377 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1364,6 +1364,7 @@ - qwen3.5-bf16-mi355x-sglang description: - "Update cli args of Qwen3.5 FP8 and BF16 SGLang benchmarks for MI355X to achieve better performance" - - "Use lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415" + - "Use lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415 for BF16 benchmark" + - "Use lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260414 for FP8 benchmark" - "Image includes upstream SGLang PRs: https://github.com/sgl-project/sglang/pull/21188, https://github.com/sgl-project/sglang/pull/21421, https://github.com/sgl-project/sglang/pull/20736" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1036