From 2eabfc9b6cc70df5fef488ae14b1740717579363 Mon Sep 17 00:00:00 2001 From: Benjamin Cheung Date: Mon, 3 Nov 2025 17:18:49 -0500 Subject: [PATCH 01/14] debug: add some timestamping to see how the timing would be called --- .ci/scripts/test_llama.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh index 1e3b331f427..06f6d4e61e9 100644 --- a/.ci/scripts/test_llama.sh +++ b/.ci/scripts/test_llama.sh @@ -254,9 +254,21 @@ fi if [[ "${QUANTIZE_KV_CACHE}" == "ON" ]]; then EXPORT_ARGS="${EXPORT_ARGS} model.quantize_kv_cache=true" fi + +# Display the time +echo "==========================================" +echo "Starting model export at $(date +"%Y-%m-%d %H:%M:%S")" +echo "Configuration: MODE=${MODE}, DTYPE=${DTYPE}, MODEL=${MODEL_NAME}" +EXPORT_START_TIME=$(date +%s) + # Add dynamically linked library location $PYTHON_EXECUTABLE -m extension.llm.export.export_llm ${EXPORT_ARGS} +EXPORT_END_TIME=$(date +%s) +EXPORT_DURATION=$((EXPORT_END_TIME - EXPORT_START_TIME)) +echo "Model export completed at $(date +"%Y-%m-%d %H:%M:%S") - Duration: ${EXPORT_DURATION} seconds" +echo "==========================================" + # Create tokenizer.bin. echo "Creating tokenizer.bin" $PYTHON_EXECUTABLE -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin From 8cbd036a4fb2bca7a817703b1c88005e01ac757e Mon Sep 17 00:00:00 2001 From: Benjamin Cheung Date: Mon, 3 Nov 2025 23:15:14 -0500 Subject: [PATCH 02/14] fix: add a max export time arg with a default of 500 --- .ci/scripts/test_llama.sh | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) mode change 100644 => 100755 .ci/scripts/test_llama.sh diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh old mode 100644 new mode 100755 index 06f6d4e61e9..1ed2baf81b2 --- a/.ci/scripts/test_llama.sh +++ b/.ci/scripts/test_llama.sh @@ -54,6 +54,9 @@ PT2E_QUANTIZE="${PT2E_QUANTIZE:-}" # Default CMake Build Type to release mode CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release} +# Default CMake Build Type to release mode +MAX_EXPORT_TIME=${MAX_EXPORT_TIME:500} + # Argument validation is done individually below for each required parameter if [[ -z "${MODEL_NAME:-}" ]]; then echo "Missing model name, exiting..." @@ -256,7 +259,6 @@ if [[ "${QUANTIZE_KV_CACHE}" == "ON" ]]; then fi # Display the time -echo "==========================================" echo "Starting model export at $(date +"%Y-%m-%d %H:%M:%S")" echo "Configuration: MODE=${MODE}, DTYPE=${DTYPE}, MODEL=${MODEL_NAME}" EXPORT_START_TIME=$(date +%s) @@ -267,7 +269,14 @@ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm ${EXPORT_ARGS} EXPORT_END_TIME=$(date +%s) EXPORT_DURATION=$((EXPORT_END_TIME - EXPORT_START_TIME)) echo "Model export completed at $(date +"%Y-%m-%d %H:%M:%S") - Duration: ${EXPORT_DURATION} seconds" -echo "==========================================" + +if [ $EXPORT_DURATION -gt $MAX_EXPORT_TIME ]; then + echo "Failure; Export took ${EXPORT_DURATION} seconds, exceeding threshold of ${MAX_EXPORT_TIME} seconds" + exit 1 +fi + +echo "Successl Export time check passed: ${EXPORT_DURATION}s <= ${MAX_EXPORT_TIME}s" + # Create tokenizer.bin. echo "Creating tokenizer.bin" From 98a8de7f5d3f692068525f4682ed079ef7281556 Mon Sep 17 00:00:00 2001 From: Benjamin Cheung Date: Mon, 3 Nov 2025 23:16:42 -0500 Subject: [PATCH 03/14] fix: add a comment for legibility --- .ci/scripts/test_llama.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh index 1ed2baf81b2..f072c6d4e82 100755 --- a/.ci/scripts/test_llama.sh +++ b/.ci/scripts/test_llama.sh @@ -54,7 +54,7 @@ PT2E_QUANTIZE="${PT2E_QUANTIZE:-}" # Default CMake Build Type to release mode CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release} -# Default CMake Build Type to release mode +# Default maximum export time. MAX_EXPORT_TIME=${MAX_EXPORT_TIME:500} # Argument validation is done individually below for each required parameter @@ -270,6 +270,7 @@ EXPORT_END_TIME=$(date +%s) EXPORT_DURATION=$((EXPORT_END_TIME - EXPORT_START_TIME)) echo "Model export completed at $(date +"%Y-%m-%d %H:%M:%S") - Duration: ${EXPORT_DURATION} seconds" +# Check export time against threshold. Default is 500 seconds. if [ $EXPORT_DURATION -gt $MAX_EXPORT_TIME ]; then echo "Failure; Export took ${EXPORT_DURATION} seconds, exceeding threshold of ${MAX_EXPORT_TIME} seconds" exit 1 From 39f514f638be78c07727ccfe4ef7984f05be453b Mon Sep 17 00:00:00 2001 From: Benjamin Cheung Date: Wed, 5 Nov 2025 17:13:47 -0500 Subject: [PATCH 04/14] Fix: update the default value --- .ci/scripts/test_llama.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh index f072c6d4e82..ea9d10ae3b7 100755 --- a/.ci/scripts/test_llama.sh +++ b/.ci/scripts/test_llama.sh @@ -55,7 +55,7 @@ PT2E_QUANTIZE="${PT2E_QUANTIZE:-}" CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release} # Default maximum export time. -MAX_EXPORT_TIME=${MAX_EXPORT_TIME:500} +MAX_EXPORT_TIME=${MAX_EXPORT_TIME:-500} # Argument validation is done individually below for each required parameter if [[ -z "${MODEL_NAME:-}" ]]; then From dbdf5eab66aa22926f383661cecbb58b4f997fcd Mon Sep 17 00:00:00 2001 From: Benjamin Cheung Date: Wed, 5 Nov 2025 17:50:54 -0500 Subject: [PATCH 05/14] fix: update typo --- .ci/scripts/test_llama.sh | 51 +++++++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh index ea9d10ae3b7..054d1a75dee 100755 --- a/.ci/scripts/test_llama.sh +++ b/.ci/scripts/test_llama.sh @@ -54,9 +54,6 @@ PT2E_QUANTIZE="${PT2E_QUANTIZE:-}" # Default CMake Build Type to release mode CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release} -# Default maximum export time. -MAX_EXPORT_TIME=${MAX_EXPORT_TIME:-500} - # Argument validation is done individually below for each required parameter if [[ -z "${MODEL_NAME:-}" ]]; then echo "Missing model name, exiting..." @@ -140,6 +137,51 @@ else QNN_SDK_ROOT="" fi +# Set dynamic max export times +PLATFORM="x86" +if [[ "$(uname)" == "Darwin" ]]; then + PLATFORM="macos" +elif [[ "$(uname -m)" == "aarch64" ]] || [[ "$(uname -m)" == "arm64" ]]; then + PLATFORM="arm64" +fi + +# Lookup threshold based on platform:dtype:mode +case "${PLATFORM}:${DTYPE}:${MODE}" in + # Linux x86 configurations + "x86:fp32:portable") MAX_EXPORT_TIME=100 ;; # actual: 72s + "x86:fp32:xnnpack+custom") MAX_EXPORT_TIME=360 ;; # actual: 276s + "x86:fp32:xnnpack+custom+qe") MAX_EXPORT_TIME=360 ;; + "x86:fp32:xnnpack+custom+quantize_kv") MAX_EXPORT_TIME=400 ;; + "x86:fp32:xnnpack+quantize_kv") MAX_EXPORT_TIME=400 ;; + "x86:bf16:portable") MAX_EXPORT_TIME=100 ;; # actual: 75s + "x86:bf16:custom") MAX_EXPORT_TIME=130 ;; + + # Linux ARM64 configurations + "arm64:fp32:portable") MAX_EXPORT_TIME=162 ;; # actual: 124s + "arm64:fp32:xnnpack+custom") MAX_EXPORT_TIME=630 ;; # actual: 483s + "arm64:fp32:xnnpack+custom+qe") MAX_EXPORT_TIME=630 ;; + "arm64:fp32:xnnpack+custom+quantize_kv") MAX_EXPORT_TIME=680 ;; + "arm64:fp32:xnnpack+quantize_kv") MAX_EXPORT_TIME=680 ;; + "arm64:bf16:portable") MAX_EXPORT_TIME=162 ;; # actual: 118s + "arm64:bf16:custom") MAX_EXPORT_TIME=133 ;; # actual: 102s + + # macOS configurations + "macos:fp32:mps") MAX_EXPORT_TIME=60 ;; # actual: 30s + "macos:fp32:coreml") MAX_EXPORT_TIME=80 ;; # actual: 61s + "macos:fp32:xnnpack+custom+quantize_kv") MAX_EXPORT_TIME=170 ;; # actual: 133s + "macos:fp32:xnnpack+custom") MAX_EXPORT_TIME=150 ;; + "macos:fp32:portable") MAX_EXPORT_TIME=80 ;; + "macos:bf16:portable") MAX_EXPORT_TIME=80 ;; + "macos:bf16:custom") MAX_EXPORT_TIME=100 ;; + + # Default fallback for unknown configurations + *) + MAX_EXPORT_TIME=500 + echo "Warning: No threshold defined for ${PLATFORM}:${DTYPE}:${MODE}, using default: ${MAX_EXPORT_TIME}s" + ;; +esac + + echo "QNN option ${QNN}" echo "QNN_SDK_ROOT: ${QNN_SDK_ROOT}" @@ -259,7 +301,6 @@ if [[ "${QUANTIZE_KV_CACHE}" == "ON" ]]; then fi # Display the time -echo "Starting model export at $(date +"%Y-%m-%d %H:%M:%S")" echo "Configuration: MODE=${MODE}, DTYPE=${DTYPE}, MODEL=${MODEL_NAME}" EXPORT_START_TIME=$(date +%s) @@ -276,7 +317,7 @@ if [ $EXPORT_DURATION -gt $MAX_EXPORT_TIME ]; then exit 1 fi -echo "Successl Export time check passed: ${EXPORT_DURATION}s <= ${MAX_EXPORT_TIME}s" +echo "Success; Export time check passed: ${EXPORT_DURATION}s <= ${MAX_EXPORT_TIME}s" # Create tokenizer.bin. From 728f086c36f212c6502a15e66d40359e68c57215 Mon Sep 17 00:00:00 2001 From: Benjamin Cheung Date: Wed, 5 Nov 2025 17:52:58 -0500 Subject: [PATCH 06/14] fix: remove the extra echo line --- .ci/scripts/test_llama.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh index 054d1a75dee..eb640559409 100755 --- a/.ci/scripts/test_llama.sh +++ b/.ci/scripts/test_llama.sh @@ -300,8 +300,6 @@ if [[ "${QUANTIZE_KV_CACHE}" == "ON" ]]; then EXPORT_ARGS="${EXPORT_ARGS} model.quantize_kv_cache=true" fi -# Display the time -echo "Configuration: MODE=${MODE}, DTYPE=${DTYPE}, MODEL=${MODEL_NAME}" EXPORT_START_TIME=$(date +%s) # Add dynamically linked library location From 6eefcc33139ffb9e949e65147f790337b884642b Mon Sep 17 00:00:00 2001 From: Benjamin Cheung Date: Wed, 5 Nov 2025 18:17:46 -0500 Subject: [PATCH 07/14] fix: remove the extra configs --- .ci/scripts/test_llama.sh | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh index eb640559409..bfd42223a82 100755 --- a/.ci/scripts/test_llama.sh +++ b/.ci/scripts/test_llama.sh @@ -150,18 +150,11 @@ case "${PLATFORM}:${DTYPE}:${MODE}" in # Linux x86 configurations "x86:fp32:portable") MAX_EXPORT_TIME=100 ;; # actual: 72s "x86:fp32:xnnpack+custom") MAX_EXPORT_TIME=360 ;; # actual: 276s - "x86:fp32:xnnpack+custom+qe") MAX_EXPORT_TIME=360 ;; - "x86:fp32:xnnpack+custom+quantize_kv") MAX_EXPORT_TIME=400 ;; - "x86:fp32:xnnpack+quantize_kv") MAX_EXPORT_TIME=400 ;; "x86:bf16:portable") MAX_EXPORT_TIME=100 ;; # actual: 75s - "x86:bf16:custom") MAX_EXPORT_TIME=130 ;; # Linux ARM64 configurations "arm64:fp32:portable") MAX_EXPORT_TIME=162 ;; # actual: 124s "arm64:fp32:xnnpack+custom") MAX_EXPORT_TIME=630 ;; # actual: 483s - "arm64:fp32:xnnpack+custom+qe") MAX_EXPORT_TIME=630 ;; - "arm64:fp32:xnnpack+custom+quantize_kv") MAX_EXPORT_TIME=680 ;; - "arm64:fp32:xnnpack+quantize_kv") MAX_EXPORT_TIME=680 ;; "arm64:bf16:portable") MAX_EXPORT_TIME=162 ;; # actual: 118s "arm64:bf16:custom") MAX_EXPORT_TIME=133 ;; # actual: 102s @@ -169,10 +162,6 @@ case "${PLATFORM}:${DTYPE}:${MODE}" in "macos:fp32:mps") MAX_EXPORT_TIME=60 ;; # actual: 30s "macos:fp32:coreml") MAX_EXPORT_TIME=80 ;; # actual: 61s "macos:fp32:xnnpack+custom+quantize_kv") MAX_EXPORT_TIME=170 ;; # actual: 133s - "macos:fp32:xnnpack+custom") MAX_EXPORT_TIME=150 ;; - "macos:fp32:portable") MAX_EXPORT_TIME=80 ;; - "macos:bf16:portable") MAX_EXPORT_TIME=80 ;; - "macos:bf16:custom") MAX_EXPORT_TIME=100 ;; # Default fallback for unknown configurations *) From b24d8f4a2dd34aaff18fa415ae39798567fd8e35 Mon Sep 17 00:00:00 2001 From: Benjamin Cheung Date: Wed, 5 Nov 2025 19:21:08 -0500 Subject: [PATCH 08/14] fix: remove the extra configs --- .ci/scripts/test_llama.sh | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh index bfd42223a82..1e32b1dc349 100755 --- a/.ci/scripts/test_llama.sh +++ b/.ci/scripts/test_llama.sh @@ -145,31 +145,35 @@ elif [[ "$(uname -m)" == "aarch64" ]] || [[ "$(uname -m)" == "arm64" ]]; then PLATFORM="arm64" fi +BUFFER_TIME=50 + # Lookup threshold based on platform:dtype:mode case "${PLATFORM}:${DTYPE}:${MODE}" in + # Linux x86 configurations - "x86:fp32:portable") MAX_EXPORT_TIME=100 ;; # actual: 72s - "x86:fp32:xnnpack+custom") MAX_EXPORT_TIME=360 ;; # actual: 276s - "x86:bf16:portable") MAX_EXPORT_TIME=100 ;; # actual: 75s + "x86:fp32:portable") ACT_EXPORT_TIME=72 ;; + "x86:fp32:xnnpack+custom") ACT_EXPORT_TIME=276 ;; + "x86:bf16:portable") ACT_EXPORT_TIME=75 ;; # Linux ARM64 configurations - "arm64:fp32:portable") MAX_EXPORT_TIME=162 ;; # actual: 124s - "arm64:fp32:xnnpack+custom") MAX_EXPORT_TIME=630 ;; # actual: 483s - "arm64:bf16:portable") MAX_EXPORT_TIME=162 ;; # actual: 118s - "arm64:bf16:custom") MAX_EXPORT_TIME=133 ;; # actual: 102s + "arm64:fp32:portable") ACT_EXPORT_TIME=124 ;; + "arm64:fp32:xnnpack+custom") ACT_EXPORT_TIME=483 ;; + "arm64:bf16:portable") ACT_EXPORT_TIME=118 ;; + "arm64:bf16:custom") ACT_EXPORT_TIME=102 ;; # macOS configurations - "macos:fp32:mps") MAX_EXPORT_TIME=60 ;; # actual: 30s - "macos:fp32:coreml") MAX_EXPORT_TIME=80 ;; # actual: 61s - "macos:fp32:xnnpack+custom+quantize_kv") MAX_EXPORT_TIME=170 ;; # actual: 133s + "macos:fp32:mps") ACT_EXPORT_TIME=30 ;; + "macos:fp32:coreml") ACT_EXPORT_TIME=61 ;; + "macos:fp32:xnnpack+custom+quantize_kv") ACT_EXPORT_TIME=133 ;; # Default fallback for unknown configurations *) - MAX_EXPORT_TIME=500 + ACT_EXPORT_TIME=450 echo "Warning: No threshold defined for ${PLATFORM}:${DTYPE}:${MODE}, using default: ${MAX_EXPORT_TIME}s" ;; esac +MAX_EXPORT_TIME=$((ACT_EXPORT_TIME + BUFFER_TIME)) echo "QNN option ${QNN}" echo "QNN_SDK_ROOT: ${QNN_SDK_ROOT}" From cdf7df057a860119c89e4301c6809d1230851a5a Mon Sep 17 00:00:00 2001 From: Benjamin Cheung Date: Thu, 6 Nov 2025 17:05:17 -0500 Subject: [PATCH 09/14] fix: modify the excho statement and update the max_export time calculation --- .ci/scripts/test_llama.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh index 1e32b1dc349..e5c3959e5c5 100755 --- a/.ci/scripts/test_llama.sh +++ b/.ci/scripts/test_llama.sh @@ -169,7 +169,7 @@ case "${PLATFORM}:${DTYPE}:${MODE}" in # Default fallback for unknown configurations *) ACT_EXPORT_TIME=450 - echo "Warning: No threshold defined for ${PLATFORM}:${DTYPE}:${MODE}, using default: ${MAX_EXPORT_TIME}s" + echo "Warning: No threshold defined for ${PLATFORM}:${DTYPE}:${MODE}, using default: $((ACT_EXPORT_TIME + BUFFER_TIME))s" ;; esac @@ -304,7 +304,7 @@ echo "Model export completed at $(date +"%Y-%m-%d %H:%M:%S") - Duration: ${EXPOR # Check export time against threshold. Default is 500 seconds. if [ $EXPORT_DURATION -gt $MAX_EXPORT_TIME ]; then - echo "Failure; Export took ${EXPORT_DURATION} seconds, exceeding threshold of ${MAX_EXPORT_TIME} seconds" + echo "Failure: Export took ${EXPORT_DURATION}s (threshold: ${MAX_EXPORT_TIME}s). This PR may have regressed export time — review changes or bump the threshold if appropriate." exit 1 fi From 54c63d17be62f564ce890a6f09d12043b3ab06b6 Mon Sep 17 00:00:00 2001 From: Benjamin Cheung Date: Thu, 6 Nov 2025 20:42:58 -0500 Subject: [PATCH 10/14] fix: add more configurations --- .ci/scripts/test_llama.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh index e5c3959e5c5..10f0296fceb 100755 --- a/.ci/scripts/test_llama.sh +++ b/.ci/scripts/test_llama.sh @@ -154,12 +154,20 @@ case "${PLATFORM}:${DTYPE}:${MODE}" in "x86:fp32:portable") ACT_EXPORT_TIME=72 ;; "x86:fp32:xnnpack+custom") ACT_EXPORT_TIME=276 ;; "x86:bf16:portable") ACT_EXPORT_TIME=75 ;; + "x86:bf16:custom") ACT_EXPORT_TIME=65 ;; + "x86:fp32:xnnpack+custom+qe") ACT_EXPORT_TIME=285 ;; + "x86:fp32:xnnpack+custom+quantize_kv") ACT_EXPORT_TIME=295 ;; + "x86:fp32:xnnpack+quantize_kv") ACT_EXPORT_TIME=356 ;; + "x86:fp32:qnn") ACT_EXPORT_TIME=334 ;; # Linux ARM64 configurations "arm64:fp32:portable") ACT_EXPORT_TIME=124 ;; "arm64:fp32:xnnpack+custom") ACT_EXPORT_TIME=483 ;; "arm64:bf16:portable") ACT_EXPORT_TIME=118 ;; "arm64:bf16:custom") ACT_EXPORT_TIME=102 ;; + "arm64:fp32:xnnpack+custom+qe") ACT_EXPORT_TIME=486 ;; + "arm64:fp32:xnnpack+custom+quantize_kv") ACT_EXPORT_TIME=521 ;; + "arm64:fp32:xnnpack+quantize_kv") ACT_EXPORT_TIME=514 ;; # macOS configurations "macos:fp32:mps") ACT_EXPORT_TIME=30 ;; From 315aa6e18fe256d8eb64ec93e601bd5a270874c6 Mon Sep 17 00:00:00 2001 From: Benjamin Cheung Date: Thu, 6 Nov 2025 20:47:34 -0500 Subject: [PATCH 11/14] fix: add the qnn configurations --- .ci/scripts/test_llama.sh | 43 ++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh index 10f0296fceb..81c1588d23e 100755 --- a/.ci/scripts/test_llama.sh +++ b/.ci/scripts/test_llama.sh @@ -145,39 +145,40 @@ elif [[ "$(uname -m)" == "aarch64" ]] || [[ "$(uname -m)" == "arm64" ]]; then PLATFORM="arm64" fi -BUFFER_TIME=50 +BUFFER_TIME=25 # Lookup threshold based on platform:dtype:mode -case "${PLATFORM}:${DTYPE}:${MODE}" in +case "${PLATFORM}:${DTYPE}:${MODE}:${PT2E_QUANTIZE}" in # Linux x86 configurations - "x86:fp32:portable") ACT_EXPORT_TIME=72 ;; - "x86:fp32:xnnpack+custom") ACT_EXPORT_TIME=276 ;; - "x86:bf16:portable") ACT_EXPORT_TIME=75 ;; - "x86:bf16:custom") ACT_EXPORT_TIME=65 ;; - "x86:fp32:xnnpack+custom+qe") ACT_EXPORT_TIME=285 ;; - "x86:fp32:xnnpack+custom+quantize_kv") ACT_EXPORT_TIME=295 ;; - "x86:fp32:xnnpack+quantize_kv") ACT_EXPORT_TIME=356 ;; - "x86:fp32:qnn") ACT_EXPORT_TIME=334 ;; + "x86:fp32:portable:") ACT_EXPORT_TIME=72 ;; + "x86:fp32:xnnpack+custom:") ACT_EXPORT_TIME=276 ;; + "x86:bf16:portable:") ACT_EXPORT_TIME=75 ;; + "x86:bf16:custom:") ACT_EXPORT_TIME=65 ;; + "x86:fp32:xnnpack+custom+qe:") ACT_EXPORT_TIME=285 ;; + "x86:fp32:xnnpack+custom+quantize_kv:") ACT_EXPORT_TIME=295 ;; + "x86:fp32:xnnpack+quantize_kv:") ACT_EXPORT_TIME=356 ;; + "x86:fp32:qnn:16a16w") ACT_EXPORT_TIME=334 ;; + "x86:fp32:qnn:8a8w") ACT_EXPORT_TIME=81 ;; # Linux ARM64 configurations - "arm64:fp32:portable") ACT_EXPORT_TIME=124 ;; - "arm64:fp32:xnnpack+custom") ACT_EXPORT_TIME=483 ;; - "arm64:bf16:portable") ACT_EXPORT_TIME=118 ;; - "arm64:bf16:custom") ACT_EXPORT_TIME=102 ;; - "arm64:fp32:xnnpack+custom+qe") ACT_EXPORT_TIME=486 ;; - "arm64:fp32:xnnpack+custom+quantize_kv") ACT_EXPORT_TIME=521 ;; - "arm64:fp32:xnnpack+quantize_kv") ACT_EXPORT_TIME=514 ;; + "arm64:fp32:portable:") ACT_EXPORT_TIME=124 ;; + "arm64:fp32:xnnpack+custom:") ACT_EXPORT_TIME=483 ;; + "arm64:bf16:portable:") ACT_EXPORT_TIME=118 ;; + "arm64:bf16:custom:") ACT_EXPORT_TIME=102 ;; + "arm64:fp32:xnnpack+custom+qe:") ACT_EXPORT_TIME=486 ;; + "arm64:fp32:xnnpack+custom+quantize_kv:") ACT_EXPORT_TIME=521 ;; + "arm64:fp32:xnnpack+quantize_kv:") ACT_EXPORT_TIME=514 ;; # macOS configurations - "macos:fp32:mps") ACT_EXPORT_TIME=30 ;; - "macos:fp32:coreml") ACT_EXPORT_TIME=61 ;; - "macos:fp32:xnnpack+custom+quantize_kv") ACT_EXPORT_TIME=133 ;; + "macos:fp32:mps:") ACT_EXPORT_TIME=30 ;; + "macos:fp32:coreml:") ACT_EXPORT_TIME=61 ;; + "macos:fp32:xnnpack+custom+quantize_kv:") ACT_EXPORT_TIME=133 ;; # Default fallback for unknown configurations *) ACT_EXPORT_TIME=450 - echo "Warning: No threshold defined for ${PLATFORM}:${DTYPE}:${MODE}, using default: $((ACT_EXPORT_TIME + BUFFER_TIME))s" + echo "Warning: No threshold defined for ${PLATFORM}:${DTYPE}:${MODE}:${PT2E_QUANTIZE}, using default: $((ACT_EXPORT_TIME + BUFFER_TIME))s" ;; esac From 79803294f066f386d4b06ca3a29c66126e931114 Mon Sep 17 00:00:00 2001 From: Benjamin Cheung Date: Wed, 12 Nov 2025 17:15:34 -0500 Subject: [PATCH 12/14] fix: remove the exit 1 --- .ci/scripts/test_llama.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh index 81c1588d23e..2be02460944 100755 --- a/.ci/scripts/test_llama.sh +++ b/.ci/scripts/test_llama.sh @@ -158,8 +158,8 @@ case "${PLATFORM}:${DTYPE}:${MODE}:${PT2E_QUANTIZE}" in "x86:fp32:xnnpack+custom+qe:") ACT_EXPORT_TIME=285 ;; "x86:fp32:xnnpack+custom+quantize_kv:") ACT_EXPORT_TIME=295 ;; "x86:fp32:xnnpack+quantize_kv:") ACT_EXPORT_TIME=356 ;; - "x86:fp32:qnn:16a16w") ACT_EXPORT_TIME=334 ;; - "x86:fp32:qnn:8a8w") ACT_EXPORT_TIME=81 ;; + "x86:fp32:qnn:16a16w") ACT_EXPORT_TIME=334 ;; + "x86:fp32:qnn:8a8w") ACT_EXPORT_TIME=81 ;; # Linux ARM64 configurations "arm64:fp32:portable:") ACT_EXPORT_TIME=124 ;; @@ -314,7 +314,6 @@ echo "Model export completed at $(date +"%Y-%m-%d %H:%M:%S") - Duration: ${EXPOR # Check export time against threshold. Default is 500 seconds. if [ $EXPORT_DURATION -gt $MAX_EXPORT_TIME ]; then echo "Failure: Export took ${EXPORT_DURATION}s (threshold: ${MAX_EXPORT_TIME}s). This PR may have regressed export time — review changes or bump the threshold if appropriate." - exit 1 fi echo "Success; Export time check passed: ${EXPORT_DURATION}s <= ${MAX_EXPORT_TIME}s" From 3630a8ec22609ebc1392eb9ff88bfc3a5bc69c41 Mon Sep 17 00:00:00 2001 From: Benjamin Cheung Date: Wed, 12 Nov 2025 20:04:40 -0500 Subject: [PATCH 13/14] test: add a script to extract all the model export times --- scripts/check_model_export_times.py | 225 ++++++++++++++++++++++++++++ 1 file changed, 225 insertions(+) create mode 100644 scripts/check_model_export_times.py diff --git a/scripts/check_model_export_times.py b/scripts/check_model_export_times.py new file mode 100644 index 00000000000..d81f7642004 --- /dev/null +++ b/scripts/check_model_export_times.py @@ -0,0 +1,225 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +import requests +import re +import argparse +from datetime import datetime +from collections import defaultdict + +class GithubActionsClient: + + def __init__(self, token: str): + + self.base_url = "https://api.github.com/repos/pytorch/executorch" + self.__headers = { + "Authorization": f"token {token}", + "Accept": "application/vnd.github+json" + } + + def get_runs(self, params=None): + + runs_url = f"{self.base_url}/actions/runs" + response = requests.get(runs_url, headers=self.__headers, params=params) + response.raise_for_status() + + return response.json()["workflow_runs"] + + def get_jobs(self, run_id: int, jobs_per_page: int=100): + + jobs_url = f"{self.base_url}/actions/runs/{run_id}/jobs" + all_jobs = [] + page = 1 + + while True: + response = requests.get( + jobs_url, + headers=self.__headers, + params={"per_page": jobs_per_page, "page": page} + ) + response.raise_for_status() + + json_response = response.json() + jobs = json_response["jobs"] + + if not jobs: # No more jobs + break + + all_jobs.extend(jobs) + + # Stop if we got fewer jobs than requested (last page) + if len(jobs) < jobs_per_page: + break + + page += 1 + + return all_jobs + + def get_job_logs(self, job_id: int): + + logs_url = f"{self.base_url}/actions/jobs/{job_id}/logs" + response = requests.get(logs_url, headers=self.__headers) + response.raise_for_status() + + return response.content.decode() + +def extract_model_export_times(log): + + duration = re.search(r'Model export completed .* Duration: (\d+)', log) + docker_image = re.search(r'DOCKER_IMAGE:\s*(.+?)(?:\s|$)', log) + dtype = re.search(r'DTYPE=(\w+)', log) + mode = re.search(r'MODE=(\S+)', log) + runner = re.search(r'runner:\s*(\S+)', log) + + log_extract = { + "duration": duration.group(1) if duration else None, + "docker_image": docker_image.group(1) if docker_image else None, + "dtype": dtype.group(1) if dtype else None, + "mode": mode.group(1) if mode else None, + "runner": runner.group(1) if runner else None, + } + + + return log_extract + +def extract_full_model_export_times(gha_client, filters=None, run_id=None): + + if run_id: + # run_id will be a list when using nargs='+' + if isinstance(run_id, list): + all_runs = [{"id": rid} for rid in run_id] + else: + # Fallback for single string + all_runs = [{"id": run_id}] + else: + # No run_id provided, fetch runs using filters + all_runs = gha_client.get_runs(params=filters) + + model_tracker = defaultdict(list) + + for idx, run in enumerate(all_runs, 1): + + run_id_val = run["id"] + print(f"Processing run {idx}/{len(all_runs)}: ID {run_id_val}") + + try: + jobs = gha_client.get_jobs(run_id_val) + + for job in jobs: + + if job["conclusion"] == "skipped": + continue + + if not ("test-llama" in job["name"]): + continue + + try: + log = gha_client.get_job_logs(job_id=job["id"]) + + extracted_config = extract_model_export_times(log) + extracted_config["job_name"] = job["name"] + + if extracted_config['duration']: + model_tracker[run_id_val].append(extracted_config) + + except Exception as e: + print(f" Warning: Failed to get logs for job {job['id']}: {e}") + continue + + except Exception as e: + print(f" Error: Failed to get jobs for run {run_id_val}: {e}") + continue + + return model_tracker + +def print_results_as_table(results_dict): + """Print results as a formatted markdown table.""" + + # Extract all jobs from the defaultdict + all_jobs = [] + for run_id, jobs in results_dict.items(): + for job in jobs: + job['run_id'] = run_id # Add run_id to each job + all_jobs.append(job) + + if not all_jobs: + print("No jobs found.") + return + + # Print header + print("\n## Model Export Times\n") + print("| Run ID | Job Name | DType | Mode | Runner | Docker Image | Duration (s) |") + print("|--------|----------|-------|------|--------|--------------|--------------|") + + # Print each job + for job in all_jobs: + run_id = job.get('run_id', 'N/A') + job_name = job.get('job_name', 'N/A')[:60] # Truncate long names + dtype = job.get('dtype', 'N/A') + mode = job.get('mode', 'N/A') + runner = job.get('runner', 'N/A') + docker_image = job.get('docker_image', 'None') + duration = job.get('duration', 'N/A') + + # Truncate docker image if too long + if docker_image and len(docker_image) > 40: + docker_image = docker_image[:37] + "..." + + print(f"| {run_id} | {job_name} | {dtype} | {mode} | {runner} | {docker_image} | {duration} |") + + # Print summary statistics + print(f"\n**Total Jobs:** {len(all_jobs)}") + + # Calculate average duration + durations = [int(job['duration']) for job in all_jobs if job.get('duration', '').isdigit()] + if durations: + avg_duration = sum(durations) / len(durations) + print(f"**Average Duration:** {avg_duration:.1f} seconds") + print(f"**Min Duration:** {min(durations)} seconds") + print(f"**Max Duration:** {max(durations)} seconds") + +def main(): + + parser = argparse.ArgumentParser( + description="A tool to get all model export times for the different configurations based on the githug actions runs" + ) + + parser.add_argument( + "--github_token", + metavar="executable", + type=str, + help="Your github access token", + default="" + ) + + parser.add_argument( + "--created_time", + metavar="executable", + type=str, + help="The date of the earliest github runs to include of the format YYYY-MM-DD", + default=datetime.today().strftime('%Y-%m-%d') + ) + + parser.add_argument( + "--run_id", + metavar="RUN_ID", + type=str, + nargs='+', # Accept one or more arguments + help="One or more run IDs to extract model export times from", + default=None + ) + + args = parser.parse_args() + + gha_client = GithubActionsClient(token=args.github_token) + + filters = {"created":f">={args.created_time}"} + + model_tracker_output = extract_full_model_export_times(gha_client, filters=filters, run_id=args.run_id) + + print_results_as_table(model_tracker_output) + + +if __name__ == "__main__": + main() + + From 372421fe50c4c1ccc09145ede14ab2a495ae68b6 Mon Sep 17 00:00:00 2001 From: Benjamin Cheung Date: Wed, 19 Nov 2025 14:45:16 -0500 Subject: [PATCH 14/14] fix: update the linting --- scripts/check_model_export_times.py | 189 +++++++++++++++------------- 1 file changed, 99 insertions(+), 90 deletions(-) diff --git a/scripts/check_model_export_times.py b/scripts/check_model_export_times.py index d81f7642004..f85a7c5a793 100644 --- a/scripts/check_model_export_times.py +++ b/scripts/check_model_export_times.py @@ -1,88 +1,91 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import requests -import re import argparse -from datetime import datetime +import re from collections import defaultdict +from datetime import datetime + +import requests + class GithubActionsClient: - + def __init__(self, token: str): - + self.base_url = "https://api.github.com/repos/pytorch/executorch" self.__headers = { "Authorization": f"token {token}", - "Accept": "application/vnd.github+json" - } - + "Accept": "application/vnd.github+json", + } + def get_runs(self, params=None): - + runs_url = f"{self.base_url}/actions/runs" response = requests.get(runs_url, headers=self.__headers, params=params) response.raise_for_status() - + return response.json()["workflow_runs"] - - def get_jobs(self, run_id: int, jobs_per_page: int=100): - + + def get_jobs(self, run_id: int, jobs_per_page: int = 100): + jobs_url = f"{self.base_url}/actions/runs/{run_id}/jobs" all_jobs = [] page = 1 - + while True: response = requests.get( - jobs_url, - headers=self.__headers, - params={"per_page": jobs_per_page, "page": page} + jobs_url, + headers=self.__headers, + params={"per_page": jobs_per_page, "page": page}, ) response.raise_for_status() - + json_response = response.json() jobs = json_response["jobs"] - + if not jobs: # No more jobs break - + all_jobs.extend(jobs) - + # Stop if we got fewer jobs than requested (last page) if len(jobs) < jobs_per_page: break - + page += 1 - + return all_jobs - + def get_job_logs(self, job_id: int): - + logs_url = f"{self.base_url}/actions/jobs/{job_id}/logs" response = requests.get(logs_url, headers=self.__headers) response.raise_for_status() - + return response.content.decode() + def extract_model_export_times(log): - duration = re.search(r'Model export completed .* Duration: (\d+)', log) - docker_image = re.search(r'DOCKER_IMAGE:\s*(.+?)(?:\s|$)', log) - dtype = re.search(r'DTYPE=(\w+)', log) - mode = re.search(r'MODE=(\S+)', log) - runner = re.search(r'runner:\s*(\S+)', log) - + duration = re.search(r"Model export completed .* Duration: (\d+)", log) + docker_image = re.search(r"DOCKER_IMAGE:\s*(.+?)(?:\s|$)", log) + dtype = re.search(r"DTYPE=(\w+)", log) + mode = re.search(r"MODE=(\S+)", log) + runner = re.search(r"runner:\s*(\S+)", log) + log_extract = { "duration": duration.group(1) if duration else None, "docker_image": docker_image.group(1) if docker_image else None, "dtype": dtype.group(1) if dtype else None, "mode": mode.group(1) if mode else None, "runner": runner.group(1) if runner else None, - } - - + } + return log_extract + def extract_full_model_export_times(gha_client, filters=None, run_id=None): - + if run_id: # run_id will be a list when using nargs='+' if isinstance(run_id, list): @@ -93,133 +96,139 @@ def extract_full_model_export_times(gha_client, filters=None, run_id=None): else: # No run_id provided, fetch runs using filters all_runs = gha_client.get_runs(params=filters) - + model_tracker = defaultdict(list) - + for idx, run in enumerate(all_runs, 1): - + run_id_val = run["id"] print(f"Processing run {idx}/{len(all_runs)}: ID {run_id_val}") - + try: jobs = gha_client.get_jobs(run_id_val) - + for job in jobs: - + if job["conclusion"] == "skipped": continue - + if not ("test-llama" in job["name"]): continue - + try: log = gha_client.get_job_logs(job_id=job["id"]) - + extracted_config = extract_model_export_times(log) extracted_config["job_name"] = job["name"] - - if extracted_config['duration']: + + if extracted_config["duration"]: model_tracker[run_id_val].append(extracted_config) - + except Exception as e: print(f" Warning: Failed to get logs for job {job['id']}: {e}") continue - + except Exception as e: print(f" Error: Failed to get jobs for run {run_id_val}: {e}") continue - + return model_tracker + def print_results_as_table(results_dict): """Print results as a formatted markdown table.""" - + # Extract all jobs from the defaultdict all_jobs = [] for run_id, jobs in results_dict.items(): for job in jobs: - job['run_id'] = run_id # Add run_id to each job + job["run_id"] = run_id # Add run_id to each job all_jobs.append(job) - + if not all_jobs: print("No jobs found.") return - + # Print header print("\n## Model Export Times\n") print("| Run ID | Job Name | DType | Mode | Runner | Docker Image | Duration (s) |") print("|--------|----------|-------|------|--------|--------------|--------------|") - + # Print each job for job in all_jobs: - run_id = job.get('run_id', 'N/A') - job_name = job.get('job_name', 'N/A')[:60] # Truncate long names - dtype = job.get('dtype', 'N/A') - mode = job.get('mode', 'N/A') - runner = job.get('runner', 'N/A') - docker_image = job.get('docker_image', 'None') - duration = job.get('duration', 'N/A') - + run_id = job.get("run_id", "N/A") + job_name = job.get("job_name", "N/A")[:60] # Truncate long names + dtype = job.get("dtype", "N/A") + mode = job.get("mode", "N/A") + runner = job.get("runner", "N/A") + docker_image = job.get("docker_image", "None") + duration = job.get("duration", "N/A") + # Truncate docker image if too long if docker_image and len(docker_image) > 40: docker_image = docker_image[:37] + "..." - - print(f"| {run_id} | {job_name} | {dtype} | {mode} | {runner} | {docker_image} | {duration} |") - + + print( + f"| {run_id} | {job_name} | {dtype} | {mode} | {runner} | {docker_image} | {duration} |" + ) + # Print summary statistics print(f"\n**Total Jobs:** {len(all_jobs)}") - + # Calculate average duration - durations = [int(job['duration']) for job in all_jobs if job.get('duration', '').isdigit()] + durations = [ + int(job["duration"]) for job in all_jobs if job.get("duration", "").isdigit() + ] if durations: avg_duration = sum(durations) / len(durations) print(f"**Average Duration:** {avg_duration:.1f} seconds") print(f"**Min Duration:** {min(durations)} seconds") print(f"**Max Duration:** {max(durations)} seconds") - + + def main(): - + parser = argparse.ArgumentParser( description="A tool to get all model export times for the different configurations based on the githug actions runs" - ) - + ) + parser.add_argument( "--github_token", metavar="executable", type=str, help="Your github access token", - default="" - ) - + default="", + ) + parser.add_argument( "--created_time", metavar="executable", type=str, help="The date of the earliest github runs to include of the format YYYY-MM-DD", - default=datetime.today().strftime('%Y-%m-%d') - ) - + default=datetime.today().strftime("%Y-%m-%d"), + ) + parser.add_argument( "--run_id", metavar="RUN_ID", type=str, - nargs='+', # Accept one or more arguments + nargs="+", # Accept one or more arguments help="One or more run IDs to extract model export times from", - default=None + default=None, ) - + args = parser.parse_args() - + gha_client = GithubActionsClient(token=args.github_token) - - filters = {"created":f">={args.created_time}"} - - model_tracker_output = extract_full_model_export_times(gha_client, filters=filters, run_id=args.run_id) - + + filters = {"created": f">={args.created_time}"} + + model_tracker_output = extract_full_model_export_times( + gha_client, filters=filters, run_id=args.run_id + ) + print_results_as_table(model_tracker_output) - + if __name__ == "__main__": main() - -