From 9a1376c4c6e33c646a5e2f7fbd1b6e4090128830 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 16 Apr 2026 17:41:53 +0000 Subject: [PATCH 01/12] Initial plan From 6194792895b834d29d23b1f113378971c23f195d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 16 Apr 2026 17:51:16 +0000 Subject: [PATCH 02/12] chore: outline plan for MCP gateway health check retry fix Agent-Logs-Url: https://github.com/github/gh-aw/sessions/c923f8eb-a610-4433-b2c6-cfae71b2811d Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .github/mcp.json | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 .github/mcp.json diff --git a/.github/mcp.json b/.github/mcp.json new file mode 100644 index 00000000000..9ca83b55e03 --- /dev/null +++ b/.github/mcp.json @@ -0,0 +1,8 @@ +{ + "mcpServers": { + "github-agentic-workflows": { + "command": "gh", + "args": ["aw", "mcp-server"] + } + } +} From 5fd98d19c3ffe3b7af4df53538eaf7df9605d0cb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 16 Apr 2026 17:56:55 +0000 Subject: [PATCH 03/12] fix: add MCP gateway health-check retry backoff for startup delays Agent-Logs-Url: https://github.com/github/gh-aw/sessions/c923f8eb-a610-4433-b2c6-cfae71b2811d Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .github/mcp.json | 8 -------- actions/setup/js/start_mcp_gateway.cjs | 11 ++++++++--- actions/setup/sh/start_mcp_gateway.sh | 9 +++++---- actions/setup/sh/start_mcp_gateway_test.sh | 7 +++++++ 4 files changed, 20 insertions(+), 15 deletions(-) delete mode 100644 .github/mcp.json diff --git a/.github/mcp.json b/.github/mcp.json deleted file mode 100644 index 9ca83b55e03..00000000000 --- a/.github/mcp.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "mcpServers": { - "github-agentic-workflows": { - "command": "gh", - "args": ["aw", "mcp-server"] - } - } -} diff --git a/actions/setup/js/start_mcp_gateway.cjs b/actions/setup/js/start_mcp_gateway.cjs index 57f2e0b5603..adefe7ad8ca 100644 --- a/actions/setup/js/start_mcp_gateway.cjs +++ b/actions/setup/js/start_mcp_gateway.cjs @@ -370,16 +370,20 @@ async function main() { core.info(`Health endpoint: ${healthUrl}`); core.info(`(Note: MCP_GATEWAY_DOMAIN is '${gatewayDomain}' for container access)`); - core.info("Retrying up to 120 times with 1s delay (120s total timeout)"); + core.info("Retrying up to 120 times with exponential backoff (250ms to 1s, ~120s total timeout)"); core.info(""); const maxRetries = 120; + const initialRetryDelayMs = 250; + const maxRetryDelayMs = 1000; let httpCode = 0; let healthBody = ""; let succeeded = false; + let attemptsMade = 0; core.info("=== Health Check Progress ==="); for (let attempt = 1; attempt <= maxRetries; attempt++) { + attemptsMade = attempt; const elapsedSec = Math.floor((nowMs() - healthCheckStart) / 1000); if (attempt % 10 === 1 || attempt === 1) { core.info(`Attempt ${attempt}/${maxRetries} (${elapsedSec}s elapsed)...`); @@ -399,14 +403,15 @@ async function main() { } if (attempt < maxRetries) { - await sleep(1000); + const retryDelayMs = Math.min(initialRetryDelayMs * 2 ** (attempt - 1), maxRetryDelayMs); + await sleep(retryDelayMs); } } core.info("=== End Health Check Progress ==="); core.info(""); core.info(`Final HTTP code: ${httpCode}`); - core.info(`Total attempts: ${maxRetries}`); + core.info(`Total attempts: ${attemptsMade}`); if (healthBody) { core.info(`Health response body: ${healthBody}`); } else { diff --git a/actions/setup/sh/start_mcp_gateway.sh b/actions/setup/sh/start_mcp_gateway.sh index 824bff5a9a7..b232d587424 100755 --- a/actions/setup/sh/start_mcp_gateway.sh +++ b/actions/setup/sh/start_mcp_gateway.sh @@ -209,7 +209,7 @@ HEALTH_CHECK_START=$(date +%s%3N) HEALTH_CHECK_HOST="localhost" echo "Health endpoint: http://${HEALTH_CHECK_HOST}:${MCP_GATEWAY_PORT}/health" echo "(Note: MCP_GATEWAY_DOMAIN is '${MCP_GATEWAY_DOMAIN}' for container access)" -echo "Retrying up to 120 times with 1s delay (120s total timeout)" +echo "Retrying up to 120 times with exponential backoff (250ms to 1s, ~120s total timeout)" echo "" # Check health endpoint using localhost (since we're running on the host) @@ -219,7 +219,8 @@ echo "" set +e MAX_RETRIES=120 -RETRY_DELAY=1 +INITIAL_RETRY_DELAY=0.25 +MAX_RETRY_DELAY=1 RETRY_COUNT=0 HTTP_CODE="" HEALTH_RESPONSE="" @@ -234,7 +235,6 @@ while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do ELAPSED_MS=$(($(date +%s%3N) - HEALTH_CHECK_START)) ELAPSED_SEC=$((ELAPSED_MS / 1000)) - # Show progress every 10 retries or on first attempt if [ $((RETRY_COUNT % 10)) -eq 1 ] || [ $RETRY_COUNT -eq 1 ]; then echo "Attempt $RETRY_COUNT/$MAX_RETRIES (${ELAPSED_SEC}s elapsed)..." fi @@ -255,7 +255,8 @@ while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do # If this is not the last attempt, wait before retrying if [ $RETRY_COUNT -lt $MAX_RETRIES ]; then - sleep $RETRY_DELAY + RETRY_DELAY=$(awk -v base="$INITIAL_RETRY_DELAY" -v exp="$((RETRY_COUNT - 1))" -v max="$MAX_RETRY_DELAY" 'BEGIN { delay = base * (2 ^ exp); if (delay > max) delay = max; printf "%.3f", delay }') + sleep "$RETRY_DELAY" fi done echo "=== End Health Check Progress ===" diff --git a/actions/setup/sh/start_mcp_gateway_test.sh b/actions/setup/sh/start_mcp_gateway_test.sh index e1d4e1ba565..718fd463880 100755 --- a/actions/setup/sh/start_mcp_gateway_test.sh +++ b/actions/setup/sh/start_mcp_gateway_test.sh @@ -281,6 +281,13 @@ test_validation_functions_exist() { else print_result "--network host flag validation missing" "FAIL" fi + + # Check for health check retry/backoff logic + if grep -q "INITIAL_RETRY_DELAY=0.25" "$SCRIPT_PATH" && grep -q "MAX_RETRY_DELAY=1" "$SCRIPT_PATH"; then + print_result "Health check exponential backoff configuration exists" "PASS" + else + print_result "Health check exponential backoff configuration missing" "FAIL" + fi } # Run all tests From 70aa68eaa380f5a18c13f0ae6237a2495ec13c8e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 16 Apr 2026 17:59:38 +0000 Subject: [PATCH 04/12] refactor: cap MCP gateway startup backoff without subprocess overhead Agent-Logs-Url: https://github.com/github/gh-aw/sessions/c923f8eb-a610-4433-b2c6-cfae71b2811d Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- actions/setup/js/start_mcp_gateway.cjs | 4 +++- actions/setup/sh/start_mcp_gateway.sh | 12 +++++++++--- actions/setup/sh/start_mcp_gateway_test.sh | 2 +- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/actions/setup/js/start_mcp_gateway.cjs b/actions/setup/js/start_mcp_gateway.cjs index adefe7ad8ca..079067c514b 100644 --- a/actions/setup/js/start_mcp_gateway.cjs +++ b/actions/setup/js/start_mcp_gateway.cjs @@ -376,6 +376,7 @@ async function main() { const maxRetries = 120; const initialRetryDelayMs = 250; const maxRetryDelayMs = 1000; + const maxRetryExponent = Math.floor(Math.log2(maxRetryDelayMs / initialRetryDelayMs)); let httpCode = 0; let healthBody = ""; let succeeded = false; @@ -403,7 +404,8 @@ async function main() { } if (attempt < maxRetries) { - const retryDelayMs = Math.min(initialRetryDelayMs * 2 ** (attempt - 1), maxRetryDelayMs); + const retryExponent = Math.min(attempt - 1, maxRetryExponent); + const retryDelayMs = Math.min(initialRetryDelayMs * 2 ** retryExponent, maxRetryDelayMs); await sleep(retryDelayMs); } } diff --git a/actions/setup/sh/start_mcp_gateway.sh b/actions/setup/sh/start_mcp_gateway.sh index b232d587424..604428836d7 100755 --- a/actions/setup/sh/start_mcp_gateway.sh +++ b/actions/setup/sh/start_mcp_gateway.sh @@ -219,8 +219,6 @@ echo "" set +e MAX_RETRIES=120 -INITIAL_RETRY_DELAY=0.25 -MAX_RETRY_DELAY=1 RETRY_COUNT=0 HTTP_CODE="" HEALTH_RESPONSE="" @@ -255,7 +253,15 @@ while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do # If this is not the last attempt, wait before retrying if [ $RETRY_COUNT -lt $MAX_RETRIES ]; then - RETRY_DELAY=$(awk -v base="$INITIAL_RETRY_DELAY" -v exp="$((RETRY_COUNT - 1))" -v max="$MAX_RETRY_DELAY" 'BEGIN { delay = base * (2 ^ exp); if (delay > max) delay = max; printf "%.3f", delay }') + # Exponential backoff with 1s cap: + # attempt 1 -> 0.25s, attempt 2 -> 0.5s, attempt 3+ -> 1s + if [ $RETRY_COUNT -eq 1 ]; then + RETRY_DELAY="0.25" + elif [ $RETRY_COUNT -eq 2 ]; then + RETRY_DELAY="0.5" + else + RETRY_DELAY="1" + fi sleep "$RETRY_DELAY" fi done diff --git a/actions/setup/sh/start_mcp_gateway_test.sh b/actions/setup/sh/start_mcp_gateway_test.sh index 718fd463880..4cb9f029310 100755 --- a/actions/setup/sh/start_mcp_gateway_test.sh +++ b/actions/setup/sh/start_mcp_gateway_test.sh @@ -283,7 +283,7 @@ test_validation_functions_exist() { fi # Check for health check retry/backoff logic - if grep -q "INITIAL_RETRY_DELAY=0.25" "$SCRIPT_PATH" && grep -q "MAX_RETRY_DELAY=1" "$SCRIPT_PATH"; then + if grep -q "RETRY_COUNT -eq 1" "$SCRIPT_PATH" && grep -q "RETRY_COUNT -eq 2" "$SCRIPT_PATH" && grep -q "RETRY_DELAY=\"1\"" "$SCRIPT_PATH"; then print_result "Health check exponential backoff configuration exists" "PASS" else print_result "Health check exponential backoff configuration missing" "FAIL" From 93af3fc09cd269a8e2ed32c3276ef5b8cea9655a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 16 Apr 2026 18:01:34 +0000 Subject: [PATCH 05/12] test: tighten MCP gateway backoff assertions Agent-Logs-Url: https://github.com/github/gh-aw/sessions/c923f8eb-a610-4433-b2c6-cfae71b2811d Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- actions/setup/js/start_mcp_gateway.cjs | 3 ++- actions/setup/sh/start_mcp_gateway_test.sh | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/actions/setup/js/start_mcp_gateway.cjs b/actions/setup/js/start_mcp_gateway.cjs index 079067c514b..8540dec1523 100644 --- a/actions/setup/js/start_mcp_gateway.cjs +++ b/actions/setup/js/start_mcp_gateway.cjs @@ -376,7 +376,8 @@ async function main() { const maxRetries = 120; const initialRetryDelayMs = 250; const maxRetryDelayMs = 1000; - const maxRetryExponent = Math.floor(Math.log2(maxRetryDelayMs / initialRetryDelayMs)); + // 250ms -> 500ms -> 1000ms, then capped at 1000ms. + const maxRetryExponent = 2; let httpCode = 0; let healthBody = ""; let succeeded = false; diff --git a/actions/setup/sh/start_mcp_gateway_test.sh b/actions/setup/sh/start_mcp_gateway_test.sh index 4cb9f029310..62efb969f00 100755 --- a/actions/setup/sh/start_mcp_gateway_test.sh +++ b/actions/setup/sh/start_mcp_gateway_test.sh @@ -283,7 +283,11 @@ test_validation_functions_exist() { fi # Check for health check retry/backoff logic - if grep -q "RETRY_COUNT -eq 1" "$SCRIPT_PATH" && grep -q "RETRY_COUNT -eq 2" "$SCRIPT_PATH" && grep -q "RETRY_DELAY=\"1\"" "$SCRIPT_PATH"; then + if grep -q "RETRY_COUNT -eq 1" "$SCRIPT_PATH" && + grep -q "RETRY_COUNT -eq 2" "$SCRIPT_PATH" && + grep -q "RETRY_DELAY=\"0.25\"" "$SCRIPT_PATH" && + grep -q "RETRY_DELAY=\"0.5\"" "$SCRIPT_PATH" && + grep -q "RETRY_DELAY=\"1\"" "$SCRIPT_PATH"; then print_result "Health check exponential backoff configuration exists" "PASS" else print_result "Health check exponential backoff configuration missing" "FAIL" From f5cb3d500e79fc27a4952537600ea371cd706344 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 16 Apr 2026 18:03:18 +0000 Subject: [PATCH 06/12] chore: simplify MCP gateway JS backoff calculation Agent-Logs-Url: https://github.com/github/gh-aw/sessions/c923f8eb-a610-4433-b2c6-cfae71b2811d Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- actions/setup/js/start_mcp_gateway.cjs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/actions/setup/js/start_mcp_gateway.cjs b/actions/setup/js/start_mcp_gateway.cjs index 8540dec1523..abec6fbf267 100644 --- a/actions/setup/js/start_mcp_gateway.cjs +++ b/actions/setup/js/start_mcp_gateway.cjs @@ -375,7 +375,7 @@ async function main() { const maxRetries = 120; const initialRetryDelayMs = 250; - const maxRetryDelayMs = 1000; + // delay = initialRetryDelayMs * 2^min(attempt-1, maxRetryExponent) // 250ms -> 500ms -> 1000ms, then capped at 1000ms. const maxRetryExponent = 2; let httpCode = 0; @@ -406,7 +406,7 @@ async function main() { if (attempt < maxRetries) { const retryExponent = Math.min(attempt - 1, maxRetryExponent); - const retryDelayMs = Math.min(initialRetryDelayMs * 2 ** retryExponent, maxRetryDelayMs); + const retryDelayMs = initialRetryDelayMs * 2 ** retryExponent; await sleep(retryDelayMs); } } From 2d880a4cf30f1aa535662a55167ac74ca6e0b11c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 16 Apr 2026 18:05:21 +0000 Subject: [PATCH 07/12] docs: clarify backoff formula and strengthen shell pattern checks Agent-Logs-Url: https://github.com/github/gh-aw/sessions/c923f8eb-a610-4433-b2c6-cfae71b2811d Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- actions/setup/js/start_mcp_gateway.cjs | 2 +- actions/setup/sh/start_mcp_gateway_test.sh | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/actions/setup/js/start_mcp_gateway.cjs b/actions/setup/js/start_mcp_gateway.cjs index abec6fbf267..ff5bde4811d 100644 --- a/actions/setup/js/start_mcp_gateway.cjs +++ b/actions/setup/js/start_mcp_gateway.cjs @@ -375,7 +375,7 @@ async function main() { const maxRetries = 120; const initialRetryDelayMs = 250; - // delay = initialRetryDelayMs * 2^min(attempt-1, maxRetryExponent) + // delay = initialRetryDelayMs * 2**min(attempt-1, maxRetryExponent) // 250ms -> 500ms -> 1000ms, then capped at 1000ms. const maxRetryExponent = 2; let httpCode = 0; diff --git a/actions/setup/sh/start_mcp_gateway_test.sh b/actions/setup/sh/start_mcp_gateway_test.sh index 62efb969f00..cdee99198b8 100755 --- a/actions/setup/sh/start_mcp_gateway_test.sh +++ b/actions/setup/sh/start_mcp_gateway_test.sh @@ -285,9 +285,12 @@ test_validation_functions_exist() { # Check for health check retry/backoff logic if grep -q "RETRY_COUNT -eq 1" "$SCRIPT_PATH" && grep -q "RETRY_COUNT -eq 2" "$SCRIPT_PATH" && + grep -q "elif \[ \$RETRY_COUNT -eq 2 \]" "$SCRIPT_PATH" && + grep -q "else" "$SCRIPT_PATH" && grep -q "RETRY_DELAY=\"0.25\"" "$SCRIPT_PATH" && grep -q "RETRY_DELAY=\"0.5\"" "$SCRIPT_PATH" && - grep -q "RETRY_DELAY=\"1\"" "$SCRIPT_PATH"; then + grep -q "RETRY_DELAY=\"1\"" "$SCRIPT_PATH" && + grep -q "sleep \"\$RETRY_DELAY\"" "$SCRIPT_PATH"; then print_result "Health check exponential backoff configuration exists" "PASS" else print_result "Health check exponential backoff configuration missing" "FAIL" From d8744749fa248dd591751656e613767deed4dda5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 16 Apr 2026 18:06:47 +0000 Subject: [PATCH 08/12] test: assert documented third backoff tier Agent-Logs-Url: https://github.com/github/gh-aw/sessions/c923f8eb-a610-4433-b2c6-cfae71b2811d Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- actions/setup/sh/start_mcp_gateway_test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/actions/setup/sh/start_mcp_gateway_test.sh b/actions/setup/sh/start_mcp_gateway_test.sh index cdee99198b8..2712646be53 100755 --- a/actions/setup/sh/start_mcp_gateway_test.sh +++ b/actions/setup/sh/start_mcp_gateway_test.sh @@ -290,6 +290,7 @@ test_validation_functions_exist() { grep -q "RETRY_DELAY=\"0.25\"" "$SCRIPT_PATH" && grep -q "RETRY_DELAY=\"0.5\"" "$SCRIPT_PATH" && grep -q "RETRY_DELAY=\"1\"" "$SCRIPT_PATH" && + grep -q "attempt 3+ -> 1s" "$SCRIPT_PATH" && grep -q "sleep \"\$RETRY_DELAY\"" "$SCRIPT_PATH"; then print_result "Health check exponential backoff configuration exists" "PASS" else From 0bb933393578dd6d7f0c0a4f4b174ce17c7db181 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 16 Apr 2026 18:36:33 +0000 Subject: [PATCH 09/12] chore: plan follow-up for review comment Agent-Logs-Url: https://github.com/github/gh-aw/sessions/0e241816-4acc-4ea2-98b5-d6db16911e8a Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .github/mcp.json | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .github/mcp.json diff --git a/.github/mcp.json b/.github/mcp.json new file mode 100644 index 00000000000..b953af2639e --- /dev/null +++ b/.github/mcp.json @@ -0,0 +1,11 @@ +{ + "mcpServers": { + "github-agentic-workflows": { + "command": "gh", + "args": [ + "aw", + "mcp-server" + ] + } + } +} \ No newline at end of file From 7682cd261aa7f1a4f2582799584683b8bb10f9aa Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 16 Apr 2026 18:40:00 +0000 Subject: [PATCH 10/12] refactor: use shared withRetry helper in MCP gateway health polling Agent-Logs-Url: https://github.com/github/gh-aw/sessions/0e241816-4acc-4ea2-98b5-d6db16911e8a Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .github/mcp.json | 11 ----- actions/setup/js/start_mcp_gateway.cjs | 61 ++++++++++++++------------ 2 files changed, 33 insertions(+), 39 deletions(-) delete mode 100644 .github/mcp.json diff --git a/.github/mcp.json b/.github/mcp.json deleted file mode 100644 index b953af2639e..00000000000 --- a/.github/mcp.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "mcpServers": { - "github-agentic-workflows": { - "command": "gh", - "args": [ - "aw", - "mcp-server" - ] - } - } -} \ No newline at end of file diff --git a/actions/setup/js/start_mcp_gateway.cjs b/actions/setup/js/start_mcp_gateway.cjs index ff5bde4811d..c81c950b36d 100644 --- a/actions/setup/js/start_mcp_gateway.cjs +++ b/actions/setup/js/start_mcp_gateway.cjs @@ -31,6 +31,7 @@ const { spawn, execSync } = require("child_process"); const fs = require("fs"); const http = require("http"); const path = require("path"); +const { withRetry } = require("./error_recovery.cjs"); // --------------------------------------------------------------------------- // Timing helpers @@ -373,43 +374,47 @@ async function main() { core.info("Retrying up to 120 times with exponential backoff (250ms to 1s, ~120s total timeout)"); core.info(""); - const maxRetries = 120; + const maxAttempts = 120; const initialRetryDelayMs = 250; - // delay = initialRetryDelayMs * 2**min(attempt-1, maxRetryExponent) - // 250ms -> 500ms -> 1000ms, then capped at 1000ms. - const maxRetryExponent = 2; let httpCode = 0; let healthBody = ""; let succeeded = false; let attemptsMade = 0; core.info("=== Health Check Progress ==="); - for (let attempt = 1; attempt <= maxRetries; attempt++) { - attemptsMade = attempt; - const elapsedSec = Math.floor((nowMs() - healthCheckStart) / 1000); - if (attempt % 10 === 1 || attempt === 1) { - core.info(`Attempt ${attempt}/${maxRetries} (${elapsedSec}s elapsed)...`); - } - - try { - const res = await httpGet(healthUrl, 2000); - httpCode = res.statusCode; - healthBody = res.body; - if (httpCode === 200 && healthBody) { - core.info(`✓ Health check succeeded on attempt ${attempt} (${elapsedSec}s elapsed)`); - succeeded = true; - break; - } - } catch { - // Connection refused / timeout – retry - } + try { + await withRetry( + async () => { + attemptsMade += 1; + const elapsedSec = Math.floor((nowMs() - healthCheckStart) / 1000); + if (attemptsMade % 10 === 1 || attemptsMade === 1) { + core.info(`Attempt ${attemptsMade}/${maxAttempts} (${elapsedSec}s elapsed)...`); + } - if (attempt < maxRetries) { - const retryExponent = Math.min(attempt - 1, maxRetryExponent); - const retryDelayMs = initialRetryDelayMs * 2 ** retryExponent; - await sleep(retryDelayMs); - } + const res = await httpGet(healthUrl, 2000); + httpCode = res.statusCode; + healthBody = res.body; + if (httpCode === 200 && healthBody) { + core.info(`✓ Health check succeeded on attempt ${attemptsMade} (${elapsedSec}s elapsed)`); + succeeded = true; + return; + } + throw new Error(`Health endpoint not ready (HTTP ${httpCode || 0})`); + }, + { + maxRetries: maxAttempts - 1, + initialDelayMs: initialRetryDelayMs, + maxDelayMs: 1000, + backoffMultiplier: 2, + jitterMs: 0, + shouldRetry: () => true, + }, + "MCP gateway health check" + ); + } catch { + // Retry exhaustion is handled below using existing diagnostics. } + core.info("=== End Health Check Progress ==="); core.info(""); From ebf5c12be84edbadfe155435963acb35a6f27eb0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 16 Apr 2026 18:41:46 +0000 Subject: [PATCH 11/12] chore: clarify withRetry attempt semantics in gateway health check Agent-Logs-Url: https://github.com/github/gh-aw/sessions/0e241816-4acc-4ea2-98b5-d6db16911e8a Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- actions/setup/js/start_mcp_gateway.cjs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/actions/setup/js/start_mcp_gateway.cjs b/actions/setup/js/start_mcp_gateway.cjs index c81c950b36d..3ba71812074 100644 --- a/actions/setup/js/start_mcp_gateway.cjs +++ b/actions/setup/js/start_mcp_gateway.cjs @@ -374,7 +374,9 @@ async function main() { core.info("Retrying up to 120 times with exponential backoff (250ms to 1s, ~120s total timeout)"); core.info(""); - const maxAttempts = 120; + const maxTotalAttempts = 120; + // withRetry's maxRetries excludes the initial attempt. + const maxRetryCount = maxTotalAttempts - 1; const initialRetryDelayMs = 250; let httpCode = 0; let healthBody = ""; @@ -385,10 +387,11 @@ async function main() { try { await withRetry( async () => { + // Counts total health-check attempts, including the final successful attempt. attemptsMade += 1; const elapsedSec = Math.floor((nowMs() - healthCheckStart) / 1000); if (attemptsMade % 10 === 1 || attemptsMade === 1) { - core.info(`Attempt ${attemptsMade}/${maxAttempts} (${elapsedSec}s elapsed)...`); + core.info(`Attempt ${attemptsMade}/${maxTotalAttempts} (${elapsedSec}s elapsed)...`); } const res = await httpGet(healthUrl, 2000); @@ -402,7 +405,7 @@ async function main() { throw new Error(`Health endpoint not ready (HTTP ${httpCode || 0})`); }, { - maxRetries: maxAttempts - 1, + maxRetries: maxRetryCount, initialDelayMs: initialRetryDelayMs, maxDelayMs: 1000, backoffMultiplier: 2, From 7b4184f05e42ec0c2caa604544a56dde2dad1cae Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 16 Apr 2026 18:43:31 +0000 Subject: [PATCH 12/12] docs: clarify intentional retry policy for gateway health polling Agent-Logs-Url: https://github.com/github/gh-aw/sessions/0e241816-4acc-4ea2-98b5-d6db16911e8a Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- actions/setup/js/start_mcp_gateway.cjs | 1 + 1 file changed, 1 insertion(+) diff --git a/actions/setup/js/start_mcp_gateway.cjs b/actions/setup/js/start_mcp_gateway.cjs index 3ba71812074..bfca155fb2f 100644 --- a/actions/setup/js/start_mcp_gateway.cjs +++ b/actions/setup/js/start_mcp_gateway.cjs @@ -410,6 +410,7 @@ async function main() { maxDelayMs: 1000, backoffMultiplier: 2, jitterMs: 0, + // Preserve previous loop behavior: retry any health-check failure until attempts are exhausted. shouldRetry: () => true, }, "MCP gateway health check"