diff --git a/.github/actions/setup-api-client/action.yml b/.github/actions/setup-api-client/action.yml index fa22de5fd..3e43bc863 100644 --- a/.github/actions/setup-api-client/action.yml +++ b/.github/actions/setup-api-client/action.yml @@ -241,27 +241,57 @@ runs: # lru-cache is an explicit transitive dep of @octokit/auth-app required for # GitHub App token minting; pin it here so npm always hoists a specific version # even if a prior cached node_modules state is missing it. - # Capture stderr for debugging if the command fails - npm_output=$(mktemp) - npm_cmd=(npm install --no-save --location=project \ - @octokit/rest@20.0.2 \ - @octokit/plugin-retry@6.0.1 \ - @octokit/plugin-paginate-rest@9.1.5 \ - @octokit/auth-app@6.0.3 \ - lru-cache@10.4.3) - if "${npm_cmd[@]}" 2>"$npm_output"; then - rm -f "$npm_output" - else - echo "::warning::npm install failed with: $(cat "$npm_output")" - echo "::warning::Retrying with --legacy-peer-deps" + # + # Retry with exponential backoff to survive transient npm registry errors + # (e.g. 403 Forbidden from CDN/rate-limit on safe-buffer, undici, etc.). + NPM_PACKAGES=( + @octokit/rest@20.0.2 + @octokit/plugin-retry@6.0.1 + @octokit/plugin-paginate-rest@9.1.5 + @octokit/auth-app@6.0.3 + lru-cache@10.4.3 + ) + NPM_MAX_RETRIES=3 + NPM_BACKOFF=5 # seconds; doubles each retry (5, 10) + npm_installed=false + + for (( attempt=1; attempt<=NPM_MAX_RETRIES; attempt++ )); do + npm_output=$(mktemp) + + if npm install --no-save --location=project "${NPM_PACKAGES[@]}" 2>"$npm_output"; then + rm -f "$npm_output" + npm_installed=true + break + fi + + npm_err=$(cat "$npm_output") rm -f "$npm_output" - npm_cmd=(npm install --no-save --legacy-peer-deps --location=project \ - @octokit/rest@20.0.2 \ - @octokit/plugin-retry@6.0.1 \ - @octokit/plugin-paginate-rest@9.1.5 \ - @octokit/auth-app@6.0.3 \ - lru-cache@10.4.3) - "${npm_cmd[@]}" + echo "::warning::npm install attempt $attempt/$NPM_MAX_RETRIES failed: $npm_err" + + # On first failure, also try --legacy-peer-deps in case it's a peer dep conflict + if [ "$attempt" -eq 1 ]; then + echo "::warning::Retrying with --legacy-peer-deps" + npm_output=$(mktemp) + if npm install --no-save --legacy-peer-deps --location=project "${NPM_PACKAGES[@]}" 2>"$npm_output"; then + rm -f "$npm_output" + npm_installed=true + break + fi + npm_err_legacy=$(cat "$npm_output") + rm -f "$npm_output" + echo "::warning::npm install with --legacy-peer-deps failed: $npm_err_legacy" + fi + + if [ "$attempt" -lt "$NPM_MAX_RETRIES" ]; then + echo "::notice::Waiting ${NPM_BACKOFF}s before retry..." + sleep "$NPM_BACKOFF" + NPM_BACKOFF=$((NPM_BACKOFF * 2)) + fi + done + + if [ "$npm_installed" != "true" ]; then + echo "::error::npm install failed after $NPM_MAX_RETRIES attempts" + exit 1 fi # Restore vendored package metadata that npm may have overwritten diff --git a/.github/scripts/verifier_ci_query.js b/.github/scripts/verifier_ci_query.js index e73c49e7e..f308fc6b0 100644 --- a/.github/scripts/verifier_ci_query.js +++ b/.github/scripts/verifier_ci_query.js @@ -153,7 +153,7 @@ async function fetchWorkflowRun({ const category = getErrorCategory(error); // 404 errors are expected for workflows that don't exist in consumer repos // Use info level instead of warning to reduce noise - const isNotFound = category === ERROR_CATEGORIES.RESOURCE || error.status === 404; + const isNotFound = category === ERROR_CATEGORIES.resource || error.status === 404; const logFn = isNotFound ? core?.info?.bind(core) : core?.warning?.bind(core); logFn?.( `Failed to fetch workflow runs for ${workflowId}: ${error.message}; category=${category}` @@ -189,7 +189,7 @@ async function fetchWorkflowJobs({ return { jobs, error: null }; } catch (error) { const category = getErrorCategory(error); - const isNotFound = category === ERROR_CATEGORIES.RESOURCE || error.status === 404; + const isNotFound = category === ERROR_CATEGORIES.resource || error.status === 404; const logFn = isNotFound ? core?.info?.bind(core) : core?.warning?.bind(core); logFn?.(`Failed to fetch workflow jobs for ${runId}: ${error.message}; category=${category}`); return { jobs: [], error: { category, message: error.message } }; diff --git a/templates/consumer-repo/.github/scripts/verifier_ci_query.js b/templates/consumer-repo/.github/scripts/verifier_ci_query.js index e73c49e7e..f308fc6b0 100644 --- a/templates/consumer-repo/.github/scripts/verifier_ci_query.js +++ b/templates/consumer-repo/.github/scripts/verifier_ci_query.js @@ -153,7 +153,7 @@ async function fetchWorkflowRun({ const category = getErrorCategory(error); // 404 errors are expected for workflows that don't exist in consumer repos // Use info level instead of warning to reduce noise - const isNotFound = category === ERROR_CATEGORIES.RESOURCE || error.status === 404; + const isNotFound = category === ERROR_CATEGORIES.resource || error.status === 404; const logFn = isNotFound ? core?.info?.bind(core) : core?.warning?.bind(core); logFn?.( `Failed to fetch workflow runs for ${workflowId}: ${error.message}; category=${category}` @@ -189,7 +189,7 @@ async function fetchWorkflowJobs({ return { jobs, error: null }; } catch (error) { const category = getErrorCategory(error); - const isNotFound = category === ERROR_CATEGORIES.RESOURCE || error.status === 404; + const isNotFound = category === ERROR_CATEGORIES.resource || error.status === 404; const logFn = isNotFound ? core?.info?.bind(core) : core?.warning?.bind(core); logFn?.(`Failed to fetch workflow jobs for ${runId}: ${error.message}; category=${category}`); return { jobs: [], error: { category, message: error.message } };