Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions .github/benchmark/oot_benchmark_models.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@
{
"display": "Kimi-K2-Thinking-MXFP4 TP4",
"dashboard_model": "Kimi-K2-Thinking-MXFP4-tp4",
"source_path": "amd/Kimi-K2-Thinking-MXFP4",
"path": "amd/Kimi-K2-Thinking-MXFP4",
"source_path": "amd/Kimi-K2-Thinking-MXFP4-AttnFP8",
"path": "amd/Kimi-K2-Thinking-MXFP4-AttnFP8",
"prefix": "kimi-k2-thinking-mxfp4-tp4",
"extra_args": "--trust-remote-code --tensor-parallel-size 4 --max-num-batched-tokens 16384 --max-model-len 16384",
"bench_args": "",
Expand All @@ -54,8 +54,8 @@
{
"display": "Kimi-K2-Thinking-MXFP4 TP8",
"dashboard_model": "Kimi-K2-Thinking-MXFP4-tp8",
"source_path": "amd/Kimi-K2-Thinking-MXFP4",
"path": "amd/Kimi-K2-Thinking-MXFP4",
"source_path": "amd/Kimi-K2-Thinking-MXFP4-AttnFP8",
"path": "amd/Kimi-K2-Thinking-MXFP4-AttnFP8",
"prefix": "kimi-k2-thinking-mxfp4-tp8",
"extra_args": "--trust-remote-code --tensor-parallel-size 8 --max-num-batched-tokens 16384 --max-model-len 16384",
"bench_args": "",
Expand All @@ -65,8 +65,8 @@
{
"display": "Kimi-K2.5-MXFP4 TP4",
"dashboard_model": "Kimi-K2.5-MXFP4-tp4",
"source_path": "amd/Kimi-K2.5-MXFP4",
"path": "amd/Kimi-K2.5-MXFP4",
"source_path": "amd/Kimi-K2.5-MXFP4-AttnFP8",
"path": "amd/Kimi-K2.5-MXFP4-AttnFP8",
"prefix": "kimi-k25-mxfp4-tp4",
"extra_args": "--trust-remote-code --tensor-parallel-size 4",
"bench_args": "",
Expand All @@ -76,8 +76,8 @@
{
"display": "Kimi-K2.5-MXFP4 TP8",
"dashboard_model": "Kimi-K2.5-MXFP4",
"source_path": "amd/Kimi-K2.5-MXFP4",
"path": "amd/Kimi-K2.5-MXFP4",
"source_path": "amd/Kimi-K2.5-MXFP4-AttnFP8",
"path": "amd/Kimi-K2.5-MXFP4-AttnFP8",
"prefix": "kimi-k25-mxfp4-tp8",
"extra_args": "--trust-remote-code --tensor-parallel-size 8",
"bench_args": "",
Expand Down Expand Up @@ -170,7 +170,7 @@
"extra_args": "--trust-remote-code --tensor-parallel-size 1 --max-num-batched-tokens 32768 --max-model-len 16384",
"bench_args": "",
"runner": "atom-mi355-8gpu-oot-benchmark",
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_USE_FLYDSL_GDR=1"
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0"
},
{
"display": "Qwen3-Next-80B-A3B-Instruct-FP8 TP4",
Expand Down
13 changes: 11 additions & 2 deletions .github/workflows/atom-vllm-benchmark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: ATOM vLLM Benchmark

concurrency:
group: ${{ github.workflow }}-${{ github.repository }}-${{ github.ref_name }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
cancel-in-progress: false

on:
workflow_dispatch:
Expand Down Expand Up @@ -399,7 +399,16 @@ jobs:
excluded_pairs = set(model.get("excluded_input_output_pairs", []))
model_params = []
seen = set()
extra_concurrency = (128, 256) if str(model.get("prefix", "")).startswith("qwen3-5-") else ()
prefix = str(model.get("prefix", ""))
extra_concurrency = (
(128, 256)
if prefix.startswith((
"qwen3-5-",
"kimi-k2-thinking-mxfp4-",
"kimi-k25-mxfp4-",
))
else ()
)

for param in params:
variants = [param]
Expand Down
21 changes: 19 additions & 2 deletions .github/workflows/docker-release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ on:
description: "Build OOT vLLM image for manual runs (scheduled nightly runs always build OOT; default enabled so manual releases also publish OOT)"
type: boolean
default: true
oot_tag_suffix:
description: "Optional suffix appended to the OOT vLLM nightly tag for manual runs. Example: custom -> rocm/atom-dev:vllm-v<version>-nightly_<date>-custom. When set, vllm-latest is not updated."
default: ""
build_sglang_image:
description: "Build SGLang+ATOM image for manual runs (scheduled nightly runs always build SGLang+ATOM; default enabled so manual releases also publish SGLang+ATOM)"
type: boolean
Expand Down Expand Up @@ -112,6 +115,7 @@ jobs:
echo "AITER_COMMIT: ${{ inputs.aiter_commit || env.AITER_COMMIT }}"
echo "RCCL_REPO: ${{ inputs.rccl_repo || env.RCCL_REPO }}"
echo "RCCL_BRANCH: ${{ inputs.rccl_branch || env.RCCL_BRANCH }}"
echo "OOT_TAG_SUFFIX: ${{ inputs.oot_tag_suffix || '' }}"
echo "SGLANG_REPO: ${{ inputs.sglang_repo || env.SGLANG_REPO }}"
echo "SGLANG_REF: ${{ inputs.sglang_ref || env.SGLANG_REF }}"
echo "SGLANG_VERSION: ${{ inputs.sglang_version || env.SGLANG_VERSION }}"
Expand Down Expand Up @@ -211,13 +215,26 @@ jobs:
- name: Push OOT Docker image
if: ${{ success() && (inputs.only_release_oot == true || (inputs.only_release_sglang != true && (github.event_name == 'schedule' || inputs.build_oot_image == true))) }}
run: |
set -euo pipefail
VLLM_VER="${{ env.VLLM_VERSION }}"
OOT_TAG_SUFFIX="${{ inputs.oot_tag_suffix || '' }}"
OOT_TAG="vllm-v${VLLM_VER}-nightly_$(date +%Y%m%d)"
OOT_LATEST_TAG="vllm-latest"
if [ -n "${OOT_TAG_SUFFIX}" ]; then
if [[ ! "${OOT_TAG_SUFFIX}" =~ ^[A-Za-z0-9_.-]+$ ]]; then
echo "Invalid oot_tag_suffix '${OOT_TAG_SUFFIX}'. Allowed characters: letters, numbers, '.', '_' and '-'."
exit 1
fi
OOT_TAG="${OOT_TAG}-${OOT_TAG_SUFFIX}"
fi
docker tag atom_oot_release:ci rocm/atom-dev:${OOT_TAG}
docker push rocm/atom-dev:${OOT_TAG}
docker tag atom_oot_release:ci rocm/atom-dev:${OOT_LATEST_TAG}
docker push rocm/atom-dev:${OOT_LATEST_TAG}
if [ -z "${OOT_TAG_SUFFIX}" ]; then
docker tag atom_oot_release:ci rocm/atom-dev:${OOT_LATEST_TAG}
docker push rocm/atom-dev:${OOT_LATEST_TAG}
else
echo "Custom OOT tag suffix '${OOT_TAG_SUFFIX}' provided; skipping rocm/atom-dev:${OOT_LATEST_TAG} update."
fi

- name: Build SGLang Docker image
if: ${{ success() && (inputs.only_release_sglang == true || (inputs.only_release_oot != true && (github.event_name == 'schedule' || inputs.build_sglang_image == true))) }}
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ ARG MAX_JOBS

RUN pip install --upgrade setuptools_scm
RUN echo "========== [Parallel] Building Aiter ==========" && \
git clone --depth 1 $AITER_REPO /app/aiter-test && \
git clone $AITER_REPO /app/aiter-test && \
cd /app/aiter-test && \
pip install -r requirements.txt && \
git checkout $AITER_COMMIT && \
Expand Down
Loading