From a364a35a14d642184dd735dc36d1e7ed322535c4 Mon Sep 17 00:00:00 2001 From: ajith-sirra-amd Date: Mon, 20 Apr 2026 12:03:30 +0530 Subject: [PATCH 01/11] AMD GLM5 FP4 (mxfp4) MI355X Support Signed-off-by: ajith-sirra-amd --- .github/configs/amd-master.yaml | 20 ++++++ benchmarks/single_node/glm5_fp4_mi355x.sh | 81 +++++++++++++++++++++++ perf-changelog.yaml | 7 ++ 3 files changed, 108 insertions(+) create mode 100644 benchmarks/single_node/glm5_fp4_mi355x.sh diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index f1181b941..611890002 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -337,6 +337,26 @@ glm5-fp8-mi355x-atom: search-space: - { tp: 8, conc-start: 4, conc-end: 256 } +glm5-fp4-mi355x-sglang: + image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm700-mi35x-20260416 + model: amd/GLM-5-MXFP4 + model-prefix: glm5 + runner: mi355x + precision: fp8 + framework: sglang + multinode: false + seq-len-configs: + - isl: 1024 + osl: 1024 + search-space: + - { tp: 2, conc-start: 4, conc-end: 256 } + - { tp: 4, conc-start: 4, conc-end: 16 } + - isl: 8192 + osl: 1024 + search-space: + - { tp: 2, conc-start: 4, conc-end: 256 } + - { tp: 4, conc-start: 4, conc-end: 16 } + kimik2.5-int4-mi355x-vllm: image: vllm/vllm-openai-rocm:v0.18.0 model: moonshotai/Kimi-K2.5 diff --git a/benchmarks/single_node/glm5_fp4_mi355x.sh b/benchmarks/single_node/glm5_fp4_mi355x.sh new file mode 100644 index 000000000..77bb48ead --- /dev/null +++ b/benchmarks/single_node/glm5_fp4_mi355x.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash +set -x + +source "$(dirname "$0")/../benchmark_lib.sh" + +check_env_vars \ + MODEL \ + TP \ + CONC \ + ISL \ + OSL \ + RANDOM_RANGE_RATIO \ + RESULT_FILENAME + +if [[ -n "$SLURM_JOB_ID" ]]; then + echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" +fi + +hf download "$MODEL" + +# ROCm / SGLang performance tuning for MI355X +export SGLANG_ROCM_FUSED_DECODE_MLA=0 +export ROCM_QUICK_REDUCE_QUANTIZATION=INT4 +export SAFETENSORS_FAST_GPU=1 + +SERVER_LOG=/workspace/server.log +PORT=${PORT:-8888} +CONTEXT_LENGTH=$((ISL + OSL + 32)) + +EVAL_CONTEXT_ARGS="" +if [ "${EVAL_ONLY}" = "true" ]; then + setup_eval_context + EVAL_CONTEXT_ARGS="--context-length $EVAL_MAX_MODEL_LEN" +fi +# Start GPU monitoring (power, temperature, clocks every second) +start_gpu_monitor + +python3 -m sglang.launch_server \ + --model-path $MODEL \ + --host=0.0.0.0 \ + --port $PORT \ + --tensor-parallel-size $TP \ + --trust-remote-code \ + --cuda-graph-max-bs $CONC \ + --context-length $CONTEXT_LENGTH \ + --mem-fraction-static 0.85 \ + --tool-call-parser glm47 \ + --reasoning-parser glm45 \ + --model-loader-extra-config '{"enable_multithread_load": true, "num_threads": 8}' \ + --nsa-prefill-backend tilelang \ + --nsa-decode-backend tilelang $EVAL_CONTEXT_ARGS \ + --kv-cache-dtype fp8_e4m3 \ + --tokenizer-worker-num $((TP*2)) \ + --disable-radix-cache> $SERVER_LOG 2>&1 & + +SERVER_PID=$! + +# Wait for server to be ready +wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID" + +run_benchmark_serving \ + --model "$MODEL" \ + --port "$PORT" \ + --backend vllm \ + --input-len "$ISL" \ + --output-len "$OSL" \ + --random-range-ratio "$RANDOM_RANGE_RATIO" \ + --num-prompts "$((CONC * 10))" \ + --max-concurrency "$CONC" \ + --result-filename "$RESULT_FILENAME" \ + --result-dir /workspace/ + +# After throughput, run evaluation only if RUN_EVAL is true +if [ "${RUN_EVAL}" = "true" ]; then + run_eval --framework lm-eval --port "$PORT" + append_lm_eval_summary +fi + +# Stop GPU monitoring +stop_gpu_monitor +set +x diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 16fea938d..12ffb59f3 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1,3 +1,10 @@ +- config-keys: + -glm5-fp4-mi355x-sglang + description: + - "Add GLM5 MXFP4 (FP4) MI350X SGLang Support" + - "Container : lmsysorg/sglang-rocm:v0.5.10rc0-rocm700-mi35x-20260416" + pr-link: To be Added + - config-keys: - kimik2.5-fp4-gb200-dynamo-trt description: From 1339a94df91a66de471ad15c2cf937c681e78ce4 Mon Sep 17 00:00:00 2001 From: ajith-sirra-amd Date: Mon, 20 Apr 2026 12:09:02 +0530 Subject: [PATCH 02/11] AMD GLM5 FP4 (mxfp4) MI355X Support - Perf Change Log Signed-off-by: ajith-sirra-amd --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 12ffb59f3..dc5eaa443 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3,7 +3,7 @@ description: - "Add GLM5 MXFP4 (FP4) MI350X SGLang Support" - "Container : lmsysorg/sglang-rocm:v0.5.10rc0-rocm700-mi35x-20260416" - pr-link: To be Added + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1098 - config-keys: - kimik2.5-fp4-gb200-dynamo-trt From 93be0c54bd9a11cb1e0325401567784075eddaef Mon Sep 17 00:00:00 2001 From: ajith-sirra-amd Date: Mon, 20 Apr 2026 12:30:35 +0530 Subject: [PATCH 03/11] AMD GLM5 FP4 (mxfp4) MI355X Support Signed-off-by: ajith-sirra-amd --- benchmarks/single_node/glm5_fp4_mi355x.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benchmarks/single_node/glm5_fp4_mi355x.sh b/benchmarks/single_node/glm5_fp4_mi355x.sh index 77bb48ead..297d99819 100644 --- a/benchmarks/single_node/glm5_fp4_mi355x.sh +++ b/benchmarks/single_node/glm5_fp4_mi355x.sh @@ -35,6 +35,8 @@ fi # Start GPU monitoring (power, temperature, clocks every second) start_gpu_monitor +pip install -U transformers + python3 -m sglang.launch_server \ --model-path $MODEL \ --host=0.0.0.0 \ From 1a22dfe7f2f929d9b0a1bef07978784d50658eaa Mon Sep 17 00:00:00 2001 From: ajith-sirra-amd Date: Mon, 20 Apr 2026 13:52:28 +0530 Subject: [PATCH 04/11] AMD GLM5 FP4 (mxfp4) MI355X Support Signed-off-by: ajith-sirra-amd --- .github/configs/amd-master.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 611890002..0b32f72fe 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -338,7 +338,7 @@ glm5-fp8-mi355x-atom: - { tp: 8, conc-start: 4, conc-end: 256 } glm5-fp4-mi355x-sglang: - image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm700-mi35x-20260416 + image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415 model: amd/GLM-5-MXFP4 model-prefix: glm5 runner: mi355x From b30c0424e4e3cc5fed5aca83db9f7e3701d0eccc Mon Sep 17 00:00:00 2001 From: ajith-sirra-amd Date: Mon, 20 Apr 2026 15:01:54 +0530 Subject: [PATCH 05/11] AMD GLM5 FP4 (mxfp4) MI355X Support Signed-off-by: ajith-sirra-amd --- .github/configs/amd-master.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 0b32f72fe..26b4cf1bc 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -339,7 +339,7 @@ glm5-fp8-mi355x-atom: glm5-fp4-mi355x-sglang: image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415 - model: amd/GLM-5-MXFP4 + model: amd/GLM-5.1-MXFP4 model-prefix: glm5 runner: mi355x precision: fp8 From a47acb5610ac9729517e2ad98eb45978f8a421c8 Mon Sep 17 00:00:00 2001 From: ajith-sirra-amd Date: Mon, 20 Apr 2026 15:43:27 +0530 Subject: [PATCH 06/11] AMD GLM5 FP4 (mxfp4) MI355X Support Signed-off-by: ajith-sirra-amd --- .github/configs/amd-master.yaml | 4 ++-- .../single_node/{glm5_fp4_mi355x.sh => glm5.1_fp4_mi355x.sh} | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename benchmarks/single_node/{glm5_fp4_mi355x.sh => glm5.1_fp4_mi355x.sh} (100%) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 26b4cf1bc..57fa5bbcb 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -337,10 +337,10 @@ glm5-fp8-mi355x-atom: search-space: - { tp: 8, conc-start: 4, conc-end: 256 } -glm5-fp4-mi355x-sglang: +glm5.1-fp4-mi355x-sglang: image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415 model: amd/GLM-5.1-MXFP4 - model-prefix: glm5 + model-prefix: glm5.1 runner: mi355x precision: fp8 framework: sglang diff --git a/benchmarks/single_node/glm5_fp4_mi355x.sh b/benchmarks/single_node/glm5.1_fp4_mi355x.sh similarity index 100% rename from benchmarks/single_node/glm5_fp4_mi355x.sh rename to benchmarks/single_node/glm5.1_fp4_mi355x.sh From c8ad523c7fdf9e1eeafaefb3249772491b292e41 Mon Sep 17 00:00:00 2001 From: ajith-sirra-amd Date: Mon, 20 Apr 2026 15:45:47 +0530 Subject: [PATCH 07/11] AMD GLM5 FP4 (MXFP4) MI355X Support Signed-off-by: ajith-sirra-amd --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index dc5eaa443..5eb264d90 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1,5 +1,5 @@ - config-keys: - -glm5-fp4-mi355x-sglang + - glm5.1-fp4-mi355x-sglang description: - "Add GLM5 MXFP4 (FP4) MI350X SGLang Support" - "Container : lmsysorg/sglang-rocm:v0.5.10rc0-rocm700-mi35x-20260416" From b57fc3557c6cb03c55a44f8dac3f2687a81592c0 Mon Sep 17 00:00:00 2001 From: ajith-sirra-amd Date: Mon, 20 Apr 2026 15:46:11 +0530 Subject: [PATCH 08/11] AMD GLM5 FP4 (MXFP4) MI355X Support Signed-off-by: ajith-sirra-amd --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 5eb264d90..b4d73ba6b 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -2,7 +2,7 @@ - glm5.1-fp4-mi355x-sglang description: - "Add GLM5 MXFP4 (FP4) MI350X SGLang Support" - - "Container : lmsysorg/sglang-rocm:v0.5.10rc0-rocm700-mi35x-20260416" + - "Container : lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1098 - config-keys: From 9d2b96a69bc5b1bfea83d6a3be572e6841fe0639 Mon Sep 17 00:00:00 2001 From: ajith-sirra-amd Date: Mon, 20 Apr 2026 15:46:24 +0530 Subject: [PATCH 09/11] AMD GLM5 FP4 (MXFP4) MI355X Support Signed-off-by: ajith-sirra-amd --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index b4d73ba6b..e4d0ad68d 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1,7 +1,7 @@ - config-keys: - glm5.1-fp4-mi355x-sglang description: - - "Add GLM5 MXFP4 (FP4) MI350X SGLang Support" + - "Add GLM5.1 MXFP4 (FP4) MI350X SGLang Support" - "Container : lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1098 From 277f774aef4ab8f46babc5d5e359e372ca052acd Mon Sep 17 00:00:00 2001 From: ajith-sirra-amd Date: Mon, 20 Apr 2026 15:51:35 +0530 Subject: [PATCH 10/11] AMD GLM5 FP4 (MXFP4) MI355X Support Signed-off-by: ajith-sirra-amd --- .github/configs/amd-master.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 57fa5bbcb..95034d7d7 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -342,7 +342,7 @@ glm5.1-fp4-mi355x-sglang: model: amd/GLM-5.1-MXFP4 model-prefix: glm5.1 runner: mi355x - precision: fp8 + precision: fp4 framework: sglang multinode: false seq-len-configs: From e70e01fb1476157ff6107ab0e1bf6b2d29585cd6 Mon Sep 17 00:00:00 2001 From: ajith-sirra-amd Date: Mon, 20 Apr 2026 20:50:56 +0530 Subject: [PATCH 11/11] AMD GLM5 FP4 (MXFP4) MI355X Support Signed-off-by: ajith-sirra-amd --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index e4d0ad68d..8e965a932 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1,7 +1,7 @@ - config-keys: - glm5.1-fp4-mi355x-sglang description: - - "Add GLM5.1 MXFP4 (FP4) MI350X SGLang Support" + - "Add GLM5.1 MXFP4 (FP4) MI355X SGLang Support" - "Container : lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1098