diff --git a/stash/restore/action.yml b/stash/restore/action.yml index 50cb8267..8f39dcd1 100644 --- a/stash/restore/action.yml +++ b/stash/restore/action.yml @@ -45,6 +45,20 @@ inputs: If true, only the current branch will be searched for the stash. If false, the base branch(PRs)/default branch will be searched as well. default: "false" + retry-count: + description: > + Number of attempts for downloading the stash artifact. When `gh run download` + exits with code 1 (the transient failure mode observed for artifact downloads), + the download will be retried until it succeeds or this many attempts have been + made. Other exit codes are not retried. + default: "3" + fail-on-download: + description: > + If true, the action will fail when a stash artifact was found but could not be + downloaded (after exhausting `retry-count` attempts). If false (the default), + a failed download is reported only via the `stash-hit` output and the step + itself succeeds. + default: "false" outputs: stash-hit: description: > @@ -148,21 +162,52 @@ runs: STASH_RUN_ID: "${{ steps.check-stash.outputs.stash_run_id }}" REPO: "${{ github.repository }}" STASH_DIR: "${{ steps.mung.outputs.stash_path }}" + RETRY_COUNT: "${{ inputs.retry-count }}" + FAIL_ON_DOWNLOAD: "${{ inputs.fail-on-download }}" run: | - # Catch errors in the download with || to avoid the whole workflow failing - # when the download times out + # The default GitHub Actions bash shell runs with `set -eo pipefail`, + # which would abort this step the moment `gh run download` or `rm -rf` + # returns non-zero. Disable `errexit` explicitly so a single failing + # command cannot kill the step — we handle failures ourselves via $?. + set +e if [[ "${{ inputs.clean }}" == "true" ]]; then if [[ -d "$STASH_DIR" ]]; then echo "Removing existing stash directory: $STASH_DIR" rm -rf "$STASH_DIR" fi fi - gh run download "$STASH_RUN_ID" \ - --name "$STASH_NAME" \ - --dir "$STASH_DIR" \ - -R "$REPO" || download="failed" && download="success" + # Retry up to RETRY_COUNT times when `gh run download` exits with + # code 1 (the transient failure mode observed for artifact downloads). + download="failed" + attempt=1 + while (( attempt <= RETRY_COUNT )); do + echo "Downloading stash (attempt $attempt of $RETRY_COUNT)..." + gh run download "$STASH_RUN_ID" \ + --name "$STASH_NAME" \ + --dir "$STASH_DIR" \ + -R "$REPO" + rc=$? + if (( rc == 0 )); then + download="success" + break + fi + if (( rc != 1 )); then + echo "::warning ::gh run download failed with exit code $rc; not retrying." + break + fi + echo "::warning ::gh run download failed with exit code 1 on attempt $attempt." + attempt=$(( attempt + 1 )) + done echo "download=$download" >> "$GITHUB_OUTPUT" + if [[ "$download" != "success" && "$FAIL_ON_DOWNLOAD" == "true" ]]; then + echo "::error ::Stash artifact download failed after $RETRY_COUNT attempt(s) and fail-on-download is true." + exit 1 + fi + # Otherwise exit 0 — the `download` output tells downstream steps + # whether the stash was restored, and a failed download must not fail + # the step unless the caller opted in via fail-on-download. + exit 0 - name: Set stash-hit Output id: output