diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 5bc61f53a..61be1c8e2 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -777,25 +777,24 @@ gptoss-fp4-mi355x-vllm: - { tp: 8, conc-start: 4, conc-end: 8 } gptoss-fp4-mi355x-atom: - image: rocm/atom:rocm7.1.1-ubuntu24.04-pytorch2.9-atom0.1.1-MI350x + image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post model: openai/gpt-oss-120b model-prefix: gptoss runner: mi355x precision: fp4 framework: atom multinode: false - scenarios: - fixed-seq-len: - - isl: 1024 - osl: 1024 - search-space: - - { tp: 1, conc-start: 16, conc-end: 128 } - - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 } - - isl: 8192 - osl: 1024 - search-space: - - { tp: 1, conc-start: 4, conc-end: 128 } - - { tp: 8, ep: 1, conc-start: 4, conc-end: 16 } + seq-len-configs: + - isl: 1024 + osl: 1024 + search-space: + - { tp: 1, conc-start: 16, conc-end: 256 } + - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 } + - isl: 8192 + osl: 1024 + search-space: + - { tp: 1, conc-start: 4, conc-end: 256 } + - { tp: 8, ep: 1, conc-start: 4, conc-end: 16 } dsr1-fp8-mi355x-atom: image: rocm/atom:rocm7.1.1-ubuntu24.04-pytorch2.9-atom0.1.1-MI350x diff --git a/benchmarks/single_node/gptoss_fp4_mi355x_atom.sh b/benchmarks/single_node/gptoss_fp4_mi355x_atom.sh index 76bc87c0c..ee0810e8f 100644 --- a/benchmarks/single_node/gptoss_fp4_mi355x_atom.sh +++ b/benchmarks/single_node/gptoss_fp4_mi355x_atom.sh @@ -44,6 +44,7 @@ fi # Start GPU monitoring (power, temperature, clocks every second) start_gpu_monitor +MEM_FRAC_STATIC=0.9 set -x @@ -54,6 +55,7 @@ python3 -m atom.entrypoints.openai_server \ --server-port $PORT \ -tp $TP \ --kv_cache_dtype fp8 $CALCULATED_MAX_MODEL_LEN $EP \ + --gpu-memory-utilization $MEM_FRAC_STATIC \ --block-size $BLOCK_SIZE > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 647ec35f9..f50347c65 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1902,6 +1902,12 @@ - "Pass --dsv4 to run_benchmark_serving so MTP benchmarks use the DSv4 chat template (PR #1153)" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1182 +- config-keys: + - gptoss-fp4-mi355x-atom + description: + - "Update GPTOSS-120B FP4 MI355X Atom benchmark (rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post)" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1195 + - config-keys: - dsv4-fp4-b300-vllm description: