From 1b47665a57fa47ea6baf15ea1c10a14519a5a7f7 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Fri, 23 Jan 2026 12:09:54 -0800 Subject: [PATCH 01/10] remove assign --- .github/workflows/claude.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml index 2417e63da..71e7f6b03 100644 --- a/.github/workflows/claude.yml +++ b/.github/workflows/claude.yml @@ -4,7 +4,7 @@ on: issue_comment: types: [created] issues: - types: [opened, assigned] + types: [opened] pull_request_review_comment: types: [created] From ea6f98e0d30cf9fd196aba55878e826ab6838bc8 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Sun, 25 Jan 2026 16:15:09 -0800 Subject: [PATCH 02/10] initial --- perf-changelog.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 7f4944259..beef977fd 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -211,6 +211,7 @@ - "Set --attention-backend aiter for AMD aiter attention backend" - "Update chunked-prefill-size and max-prefill-tokens from 196608 to 131072" pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/544 + - config-keys: - dsr1-fp8-mi325x-sglang description: @@ -221,3 +222,11 @@ - "Reduce chunked-prefill-size from 196608 to 131072" - "Reduce max-prefill-tokens from 196608 to 131072" pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/545 + +- config-keys: + - gptoss* + - dsr1* + description: + - Add evals to GPT-OSS and DeepSeek R1 benchmark 1k8k sweeps + pr-link: + - https://github.com/InferenceMAX/InferenceMAX/pull/258 From f053d797a3efc74e083bd3af6921847f2e6b6ed9 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Sun, 25 Jan 2026 16:17:42 -0800 Subject: [PATCH 03/10] update perf --- perf-changelog.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index beef977fd..895779094 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -228,5 +228,4 @@ - dsr1* description: - Add evals to GPT-OSS and DeepSeek R1 benchmark 1k8k sweeps - pr-link: - - https://github.com/InferenceMAX/InferenceMAX/pull/258 + pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/258 From 064d50adf30291bda167829ce5484549cbfce907 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Mon, 26 Jan 2026 09:19:51 -0600 Subject: [PATCH 04/10] fix perf changelog --- perf-changelog.yaml | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 895779094..8ee049445 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -224,8 +224,32 @@ pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/545 - config-keys: - - gptoss* - - dsr1* + # NVIDIA single-node + - dsr1-fp4-b200-sglang + - dsr1-fp4-b200-trt + - dsr1-fp4-b200-trt-mtp + - dsr1-fp8-b200-sglang + - dsr1-fp8-b200-trt + - dsr1-fp8-b200-trt-mtp + - dsr1-fp8-h200-sglang + - dsr1-fp8-h200-trt + - dsr1-fp8-h200-trt-mtp + - gptoss-fp4-b200-trt + - gptoss-fp4-b200-vllm + - gptoss-fp4-h100-vllm + - gptoss-fp4-h200-trt + - gptoss-fp4-h200-vllm + # AMD single-node + - dsr1-fp4-mi355x-sglang + - dsr1-fp4-mi355x-atom + - dsr1-fp8-mi300x-sglang + - dsr1-fp8-mi325x-sglang + - dsr1-fp8-mi355x-sglang + - dsr1-fp8-mi355x-atom + - gptoss-fp4-mi300x-vllm + - gptoss-fp4-mi325x-vllm + - gptoss-fp4-mi355x-vllm + - gptoss-fp4-mi355x-atom description: - Add evals to GPT-OSS and DeepSeek R1 benchmark 1k8k sweeps pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/258 From 928705b404c91e6dbcb62d697dcd3f40f1048633 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Mon, 26 Jan 2026 09:55:36 -0600 Subject: [PATCH 05/10] trigger test sweep From af1741dd9ec5fddf7e9fe51f74743bc2cf220095 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Mon, 26 Jan 2026 09:59:06 -0600 Subject: [PATCH 06/10] trigger test sweep pt 2 From 80fbf3c64319e0f55ab6844813011ce257e76520 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Mon, 26 Jan 2026 12:25:43 -0600 Subject: [PATCH 07/10] rebase for evals only --- perf-changelog.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 8ee049445..48814eecc 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -253,3 +253,4 @@ description: - Add evals to GPT-OSS and DeepSeek R1 benchmark 1k8k sweeps pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/258 + evals-only: true From 7383aa9b81dd6a86435e9805c9130351e24068f2 Mon Sep 17 00:00:00 2001 From: Cameron Quilici Date: Mon, 26 Jan 2026 23:03:33 +0000 Subject: [PATCH 08/10] Update perf-changelog.yaml --- perf-changelog.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index ce04044ce..a0d73021a 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -229,7 +229,7 @@ description: - "Fix AITER env vars for vLLM v0.14.0 on AMD MI300X and MI325X" pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/535 - + - config-keys: # NVIDIA single-node - dsr1-fp4-b200-sglang @@ -260,4 +260,4 @@ description: - Add official GSM8k eval results to GPT-OSS and DeepSeek R1 scenarios pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/558 - evals-only: true \ No newline at end of file + evals-only: true From 1dbeedd8c103bab4c637c8bc5259d4d93a5e9042 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Mon, 26 Jan 2026 17:05:23 -0600 Subject: [PATCH 09/10] remove newline --- perf-changelog.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index a0d73021a..5bb0d8a8c 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -211,7 +211,6 @@ - "Set --attention-backend aiter for AMD aiter attention backend" - "Update chunked-prefill-size and max-prefill-tokens from 196608 to 131072" pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/544 - - config-keys: - dsr1-fp8-mi325x-sglang description: From ae2dd3b368ad0fe1c5dd0db3844d34dab926e67a Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Mon, 26 Jan 2026 17:09:33 -0600 Subject: [PATCH 10/10] update perf changelog --- perf-changelog.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 5bb0d8a8c..a0ec46600 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -228,7 +228,7 @@ description: - "Fix AITER env vars for vLLM v0.14.0 on AMD MI300X and MI325X" pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/535 - + - config-keys: # NVIDIA single-node - dsr1-fp4-b200-sglang @@ -259,4 +259,4 @@ description: - Add official GSM8k eval results to GPT-OSS and DeepSeek R1 scenarios pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/558 - evals-only: true + evals-only: true \ No newline at end of file