From ff53521646286d2bf74dc1fafe984abfceadf7ce Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Sun, 26 Apr 2026 16:18:24 -0700 Subject: [PATCH 1/2] Lower MIN_EVAL_CONC to 8 and run evals for dsv4-fp4-b300-sglang-mtp Evals were skipped because the sweep's max concurrency (8) was below MIN_EVAL_CONC (16). Lower the threshold to 8 so low-concurrency latency-focused configs can run evals, and add an evals-only perf-changelog entry for dsv4-fp4-b300-sglang-mtp. Co-Authored-By: Claude Opus 4.6 --- perf-changelog.yaml | 6 ++++++ utils/matrix_logic/generate_sweep_configs.py | 2 +- utils/matrix_logic/test_generate_sweep_configs.py | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 992c64ecb..7ba23dc12 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1886,3 +1886,9 @@ - "Image pinned to lmsysorg/sglang:deepseek-v4-blackwell@sha256:df18bfc4aa9ecf59451002b49ba00cae58042de9e2a96378bbd21b404dd62c7b" - "Adds SGLANG_OPT_* env knobs (SWA_SPLIT_LEAF_ON_INSERT, USE_JIT_NORM, USE_JIT_INDEXER_METADATA, USE_TOPK_V2, USE_CUSTOM_ALL_REDUCE_V2)" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1187 + +- config-keys: + - dsv4-fp4-b300-sglang-mtp + description: + - "Run evals for DeepSeek-V4-Pro FP4 B300 SGLang MTP (conc 8 now eligible after MIN_EVAL_CONC lowered to 8)" + evals-only: true diff --git a/utils/matrix_logic/generate_sweep_configs.py b/utils/matrix_logic/generate_sweep_configs.py index e543bb4af..6df13e516 100644 --- a/utils/matrix_logic/generate_sweep_configs.py +++ b/utils/matrix_logic/generate_sweep_configs.py @@ -19,7 +19,7 @@ "8k1k": (8192, 1024) } -MIN_EVAL_CONC = 16 +MIN_EVAL_CONC = 8 # Reverse mapping for exp-name generation seq_len_itos = {v: k for k, v in seq_len_stoi.items()} diff --git a/utils/matrix_logic/test_generate_sweep_configs.py b/utils/matrix_logic/test_generate_sweep_configs.py index a03ded47f..b2dac2212 100644 --- a/utils/matrix_logic/test_generate_sweep_configs.py +++ b/utils/matrix_logic/test_generate_sweep_configs.py @@ -333,7 +333,7 @@ def test_multi_node_eval_conc_uses_only_conc_values_at_or_above_min_conc(self): result = mark_eval_entries(matrix_values) assert result[0]["run-eval"] is True - assert result[0]["eval-conc"] == 32 + assert result[0]["eval-conc"] == 16 assert result[1]["run-eval"] is False def test_marks_highest_and_median_conc(self): From ec065b3cdc9f3e50ea2bb4b6587a1c8a57f24bed Mon Sep 17 00:00:00 2001 From: Bryan Shan <58582368+Oseltamivir@users.noreply.github.com> Date: Sun, 26 Apr 2026 17:43:23 -0700 Subject: [PATCH 2/2] Update perf-changelog.yaml --- perf-changelog.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 7ba23dc12..dbd0b0a18 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1890,5 +1890,6 @@ - config-keys: - dsv4-fp4-b300-sglang-mtp description: - - "Run evals for DeepSeek-V4-Pro FP4 B300 SGLang MTP (conc 8 now eligible after MIN_EVAL_CONC lowered to 8)" + - "Run evals for DSv4 FP4 B300 SGLang MTP" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1189 evals-only: true