From 421bd7f820664d398457ca270a657504cea7237c Mon Sep 17 00:00:00 2001 From: Cameron Quilici Date: Fri, 5 Dec 2025 09:12:52 -0600 Subject: [PATCH] =?UTF-8?q?Revert=20"Change=20dsr1=20fp8=20image=20to=20lm?= =?UTF-8?q?sysorg/sglang=200.5.5.post3=20and=20fp4=20image=20to=E2=80=A6"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 0fe9dccffb1ed64994cbcbe4fc5a6bb1aed5c310. --- .github/configs/amd-master.yaml | 4 ++-- benchmarks/dsr1_fp4_mi355x_docker.sh | 1 - benchmarks/dsr1_fp4_mi355x_slurm.sh | 1 - benchmarks/dsr1_fp8_mi355x_docker.sh | 5 ----- benchmarks/dsr1_fp8_mi355x_slurm.sh | 6 +----- 5 files changed, 3 insertions(+), 14 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index fba81e6c7..938011d47 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -1,5 +1,5 @@ dsr1-fp4-mi355x-sglang: - image: lmsysorg/sglang:v0.5.5.post2-rocm700-mi35x + image: rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi35x-20250915 model: amd/DeepSeek-R1-0528-MXFP4-Preview model-prefix: dsr1 runner: mi355x @@ -63,7 +63,7 @@ dsr1-fp8-mi325x-sglang: - { tp: 8, conc-start: 4, conc-end: 64 } dsr1-fp8-mi355x-sglang: - image: lmsysorg/sglang:v0.5.5.post3-rocm700-mi35x + image: rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi35x-20250915 model: deepseek-ai/DeepSeek-R1-0528 model-prefix: dsr1 runner: mi355x diff --git a/benchmarks/dsr1_fp4_mi355x_docker.sh b/benchmarks/dsr1_fp4_mi355x_docker.sh index fc3ed5e21..ca1255802 100644 --- a/benchmarks/dsr1_fp4_mi355x_docker.sh +++ b/benchmarks/dsr1_fp4_mi355x_docker.sh @@ -11,7 +11,6 @@ # RESULT_FILENAME # NUM_PROMPTS export SGLANG_USE_AITER=1 -export ROCM_QUICK_REDUCE_QUANTIZATION=INT4 PREFILL_SIZE=196608 if [[ "$ISL" == "8192" && "$OSL" == "1024" ]]; then diff --git a/benchmarks/dsr1_fp4_mi355x_slurm.sh b/benchmarks/dsr1_fp4_mi355x_slurm.sh index 05f9b4bb8..0983b7ddf 100644 --- a/benchmarks/dsr1_fp4_mi355x_slurm.sh +++ b/benchmarks/dsr1_fp4_mi355x_slurm.sh @@ -10,7 +10,6 @@ # RANDOM_RANGE_RATIO # RESULT_FILENAME export SGLANG_USE_AITER=1 -export ROCM_QUICK_REDUCE_QUANTIZATION=INT4 SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log) PREFILL_SIZE=196608 diff --git a/benchmarks/dsr1_fp8_mi355x_docker.sh b/benchmarks/dsr1_fp8_mi355x_docker.sh index 573d1b6a4..8c5038cee 100644 --- a/benchmarks/dsr1_fp8_mi355x_docker.sh +++ b/benchmarks/dsr1_fp8_mi355x_docker.sh @@ -14,14 +14,10 @@ # https://rocm.docs.amd.com/en/docs-7.0-docker/benchmark-docker/inference-sglang-deepseek-r1-fp8.html export SGLANG_USE_AITER=1 -export RCCL_MSCCL_ENABLE=0 -export ROCM_QUICK_REDUCE_QUANTIZATION=INT4 - SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log) python3 -m sglang.launch_server \ - --attention-backend aiter \ --model-path $MODEL \ --host=0.0.0.0 \ --port $PORT \ @@ -31,7 +27,6 @@ python3 -m sglang.launch_server \ --mem-fraction-static 0.8 --disable-radix-cache \ --num-continuous-decode-steps 4 \ --max-prefill-tokens 196608 \ - --enable-torch-compile \ --cuda-graph-max-bs 128 > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/benchmarks/dsr1_fp8_mi355x_slurm.sh b/benchmarks/dsr1_fp8_mi355x_slurm.sh index 8795d61b5..921f08a4c 100644 --- a/benchmarks/dsr1_fp8_mi355x_slurm.sh +++ b/benchmarks/dsr1_fp8_mi355x_slurm.sh @@ -12,14 +12,11 @@ export HF_MODULES_CACHE="/tmp/hf_modules_cache/" export SGLANG_USE_AITER=1 -export RCCL_MSCCL_ENABLE=0 -export ROCM_QUICK_REDUCE_QUANTIZATION=INT4 SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log) set -x python3 -m sglang.launch_server \ - --attention-backend aiter \ --model-path $MODEL \ --host=0.0.0.0 \ --port $PORT \ @@ -30,8 +27,7 @@ python3 -m sglang.launch_server \ --disable-radix-cache \ --num-continuous-decode-steps 4 \ --max-prefill-tokens 196608 \ - --cuda-graph-max-bs 128 \ - --enable-torch-compile > $SERVER_LOG 2>&1 & + --cuda-graph-max-bs 128 > $SERVER_LOG 2>&1 & SERVER_PID=$!