From 976d390dfad374263abffca8b4604bbacd76dcff Mon Sep 17 00:00:00 2001 From: Jatin Gangani Date: Wed, 19 Nov 2025 16:13:42 -0800 Subject: [PATCH] adding ISL/OSL to collect results table summary --- utils/process_result.py | 4 ++++ utils/summarize.py | 10 +++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/utils/process_result.py b/utils/process_result.py index b02ab28f9..1a59ce301 100644 --- a/utils/process_result.py +++ b/utils/process_result.py @@ -18,6 +18,8 @@ framework = os.environ.get('FRAMEWORK') precision = os.environ.get('PRECISION') mtp_mode = os.environ.get('MTP_MODE') +isl = os.environ.get('ISL') +osl = os.environ.get('OSL') with open(f'{result_filename}.json') as f: bmk_result = json.load(f) @@ -31,6 +33,8 @@ 'model': bmk_result['model_id'], 'framework': framework, 'precision': precision, + 'isl': int(isl) if isl else None, + 'osl': int(osl) if osl else None, 'tput_per_gpu': float(bmk_result['total_token_throughput']) / tp_size, 'output_tput_per_gpu': float(bmk_result['output_throughput']) / decode_gpus, 'input_tput_per_gpu': (float(bmk_result['total_token_throughput']) - float(bmk_result['output_throughput']) )/ prefill_gpus diff --git a/utils/summarize.py b/utils/summarize.py index d5e3f06e4..503da2690 100644 --- a/utils/summarize.py +++ b/utils/summarize.py @@ -9,11 +9,11 @@ with open(result_path) as f: result = json.load(f) results.append(result) -results.sort(key=lambda r: (r.get('model', 'unknown'), r['hw'], r.get('framework', 'vllm'), r.get('precision', 'fp8'), r['tp'], r['ep'], r['conc'])) +results.sort(key=lambda r: (r.get('model', 'unknown'), r['hw'], r.get('framework', 'vllm'), r.get('precision', 'fp8'), r.get('isl', 0), r.get('osl', 0), r['tp'], r['ep'], r['conc'])) summary_header = f'''\ -| Model | Hardware | Framework | Precision | TP | EP | DP Attention | Conc | TTFT (ms) | TPOT (ms) | Interactivity (tok/s/user) | E2EL (s) | TPUT per GPU | Output TPUT per GPU | Input TPUT per GPU | -| :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: |\ +| Model | Hardware | Framework | Precision | ISL | OSL | TP | EP | DP Attention | Conc | TTFT (ms) | TPOT (ms) | Interactivity (tok/s/user) | E2EL (s) | TPUT per GPU | Output TPUT per GPU | Input TPUT per GPU | +| :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: |\ ''' print(summary_header) @@ -21,11 +21,15 @@ framework = result.get('framework', 'vllm') precision = result.get('precision', 'fp8') model = result.get('model', 'unknown') + isl = result.get('isl', 'N/A') + osl = result.get('osl', 'N/A') print( f"| {model} " f"| {result['hw'].upper()} " f"| {framework.upper()} " f"| {precision.upper()} " + f"| {isl} " + f"| {osl} " f"| {result['tp']} " f"| {result['ep']} " f"| {result['dp_attention']} "