diff --git a/.github/workflows/aqua.yml b/.github/workflows/aqua.yml index 1307f2c27..eb0a6412e 100644 --- a/.github/workflows/aqua.yml +++ b/.github/workflows/aqua.yml @@ -17,9 +17,9 @@ permissions: id-token: write env: - GITHUB_API_TOKEN: ${{ secrets.MISE_GITHUB_TOKEN }} - GH_TOKEN: ${{ secrets.MISE_GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ secrets.MISE_GITHUB_TOKEN }} DRY_RUN: 0 + jobs: update: runs-on: ubuntu-latest diff --git a/.github/workflows/backfill.yml b/.github/workflows/backfill.yml index 945f6eb7e..a78c2e6db 100644 --- a/.github/workflows/backfill.yml +++ b/.github/workflows/backfill.yml @@ -32,8 +32,6 @@ permissions: env: GITHUB_TOKEN: ${{ secrets.MISE_GITHUB_TOKEN }} - TOKEN_MANAGER_URL: https://mise-tools.jdx.dev - TOKEN_MANAGER_SECRET: ${{ secrets.TOKEN_MANAGER_SECRET }} jobs: backfill: @@ -73,6 +71,9 @@ jobs: args="$args --debug" fi node scripts/${{ github.event.inputs.script }} $args + env: + GITHUB_PROXY_URL: https://mise-tools.jdx.dev + API_SECRET: ${{ secrets.TOKEN_MANAGER_SECRET }} - name: Push any remaining changes if: ${{ github.event.inputs.dry_run != 'true' }} diff --git a/.github/workflows/metadata.yml b/.github/workflows/metadata.yml index 64082f676..0623a01ad 100644 --- a/.github/workflows/metadata.yml +++ b/.github/workflows/metadata.yml @@ -11,8 +11,6 @@ concurrency: env: GITHUB_TOKEN: ${{ secrets.MISE_GITHUB_TOKEN }} - TOKEN_MANAGER_URL: https://mise-tools.jdx.dev - TOKEN_MANAGER_SECRET: ${{ secrets.TOKEN_MANAGER_SECRET }} jobs: fetch-metadata: @@ -33,4 +31,5 @@ jobs: run: node scripts/fetch-metadata.js env: SYNC_API_URL: https://mise-tools.jdx.dev + GITHUB_PROXY_URL: https://mise-tools.jdx.dev API_SECRET: ${{ secrets.TOKEN_MANAGER_SECRET }} diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 589584f5a..6c544123c 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -14,9 +14,9 @@ permissions: id-token: write env: - GITHUB_API_TOKEN: ${{ secrets.MISE_GITHUB_TOKEN }} - GH_TOKEN: ${{ secrets.MISE_GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ secrets.MISE_GITHUB_TOKEN }} DRY_RUN: 0 + jobs: update: runs-on: ubuntu-latest diff --git a/.github/workflows/tool-analysis.yml b/.github/workflows/tool-analysis.yml index bb6a06d01..22193b6a9 100644 --- a/.github/workflows/tool-analysis.yml +++ b/.github/workflows/tool-analysis.yml @@ -14,8 +14,7 @@ permissions: id-token: write env: - GITHUB_API_TOKEN: ${{ secrets.MISE_GITHUB_TOKEN }} - GH_TOKEN: ${{ secrets.MISE_GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ secrets.MISE_GITHUB_TOKEN }} jobs: analyze: diff --git a/.github/workflows/update.yml b/.github/workflows/update.yml index 298e3b0c7..2d4e27b23 100644 --- a/.github/workflows/update.yml +++ b/.github/workflows/update.yml @@ -5,7 +5,7 @@ on: full_sync: description: "Force full sync of all versions to D1" required: false - default: "false" + default: false type: boolean schedule: - cron: "*/15 * * * *" @@ -20,11 +20,9 @@ permissions: id-token: write env: - GITHUB_API_TOKEN: ${{ secrets.MISE_GITHUB_TOKEN }} - GH_TOKEN: ${{ secrets.MISE_GITHUB_TOKEN }} - TOKEN_MANAGER_URL: https://mise-tools.jdx.dev - TOKEN_MANAGER_SECRET: ${{ secrets.TOKEN_MANAGER_SECRET }} + GITHUB_TOKEN: ${{ secrets.MISE_GITHUB_TOKEN }} DRY_RUN: 0 + jobs: update: runs-on: ubuntu-latest @@ -43,6 +41,9 @@ jobs: restore-keys: | last-processed-tool- - run: ./scripts/update.sh "${{ github.event.schedule }}" + env: + GITHUB_PROXY_URL: https://mise-tools.jdx.dev + API_SECRET: ${{ secrets.TOKEN_MANAGER_SECRET }} - run: git checkout docs && git clean -df docs - name: Sync tools to D1 run: node scripts/sync-to-d1.js diff --git a/CLAUDE.md b/CLAUDE.md index 4856965b8..c0566360d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -95,6 +95,6 @@ The `ANALYTICS_DB` contains: The update workflow uses a token rotation system: -- `TOKEN_MANAGER_URL` / `TOKEN_MANAGER_SECRET`: Cloudflare Worker API for token pool -- `scripts/github-token.js`: Gets tokens, marks rate-limited tokens -- Tokens rotate automatically when rate limited +- `web/src/pages/gh/[...path].ts`: GitHub proxy that handles token rotation server-side +- `GITHUB_PROXY_URL`, `API_SECRET`: Proxy URL and API secret for authentication +- The proxy manages authentication and automatically rotates tokens when rate limits are hit, removing the need for clients to handle raw tokens diff --git a/scripts/backfill-created-at.js b/scripts/backfill-created-at.js index f51b911f2..e85be9151 100644 --- a/scripts/backfill-created-at.js +++ b/scripts/backfill-created-at.js @@ -8,9 +8,9 @@ * and updates TOML files that have placeholder timestamps. * * Environment variables: - * GITHUB_TOKEN - GitHub token for API access - * TOKEN_MANAGER_URL - URL of token manager service - * TOKEN_MANAGER_SECRET - Secret for token manager + * GITHUB_PROXY_URL - URL of GitHub proxy (e.g., https://mise-tools.jdx.dev) + * API_SECRET - Secret for proxy authentication + * GITHUB_TOKEN - Fallback GitHub token (optional) */ import { readFileSync, writeFileSync, readdirSync, existsSync } from "fs"; @@ -22,79 +22,53 @@ const DOCS_DIR = join(process.cwd(), "docs"); const CONCURRENCY = 30; // Process 30 tools in parallel const COMMIT_INTERVAL = 100; // Commit every 100 tools -// Get a random token from the token manager for each request -async function getRandomToken() { - const baseUrl = process.env.TOKEN_MANAGER_URL; - const secret = process.env.TOKEN_MANAGER_SECRET; +// Fetch versions with timestamps from mise +async function fetchVersionsWithTimestamps(tool, debug = false) { + // Use GitHub Proxy if available, otherwise fall back to direct GitHub access + const proxyUrl = process.env.GITHUB_PROXY_URL; // e.g. https://mise-tools.jdx.dev + const apiSecret = process.env.API_SECRET; + const githubToken = process.env.GITHUB_TOKEN; + + const env = { + ...process.env, + MISE_LIST_ALL_VERSIONS: "1", // Get all versions, not just first page + MISE_USE_VERSIONS_HOST: "0", // Bypass versions host to get real timestamps from GitHub + }; + + if (proxyUrl && apiSecret) { + env.MISE_URL_REPLACEMENTS = JSON.stringify({ + "regex:^https://api\\.github\\.com": `${proxyUrl}/gh`, + }); + env.MISE_GITHUB_TOKEN = apiSecret; + } else if (githubToken) { + env.MISE_GITHUB_TOKEN = githubToken; + } - if (!baseUrl || !secret) { - return process.env.GITHUB_TOKEN || null; + if (debug) { + env.MISE_DEBUG = "1"; } try { - const response = await fetch(`${baseUrl}/api/token`, { - headers: { - Authorization: `Bearer ${secret}`, - }, + const output = execSync(`mise ls-remote --json "${tool}"`, { + encoding: "utf-8", + stdio: ["pipe", "pipe", "pipe"], + env, + timeout: 60000, }); - if (!response.ok) { - return process.env.GITHUB_TOKEN || null; + if (!output || !output.trim()) { + return null; } - const data = await response.json(); - return data.token; + const data = JSON.parse(output); + return data; } catch (e) { - return process.env.GITHUB_TOKEN || null; - } -} - -// Fetch versions with timestamps from mise -async function fetchVersionsWithTimestamps(tool, retries = 2, debug = false) { - for (let attempt = 0; attempt <= retries; attempt++) { - // Get a fresh random token for each attempt - const token = await getRandomToken(); - const env = { - ...process.env, - MISE_LIST_ALL_VERSIONS: "1", // Get all versions, not just first page - MISE_USE_VERSIONS_HOST: "0", // Bypass versions host to get real timestamps from GitHub - }; - if (debug) { - env.MISE_DEBUG = "1"; - } - if (token) { - env.GITHUB_TOKEN = token; - } - - try { - const output = execSync(`mise ls-remote --json "${tool}"`, { - encoding: "utf-8", - stdio: ["pipe", "pipe", "pipe"], - env, - timeout: 60000, - }); - - if (!output || !output.trim()) { - return null; - } - - const data = JSON.parse(output); - return data; - } catch (e) { - const stderr = e.stderr?.toString() || ""; - // Retry on rate limiting with a new token - if (stderr.includes("rate limit") || stderr.includes("403")) { - continue; - } - if (attempt === retries) { - if (stderr) { - console.log(` stderr: ${stderr.slice(0, 200)}`); - } - return null; - } + const stderr = e.stderr?.toString() || ""; + if (stderr) { + console.log(` stderr: ${stderr.slice(0, 200)}`); } + return null; } - return null; } // Parse command line arguments @@ -156,7 +130,7 @@ async function processTool(tool, dryRun, debug = false) { } // Fetch real timestamps from mise - const versionData = await fetchVersionsWithTimestamps(tool, 2, debug); + const versionData = await fetchVersionsWithTimestamps(tool, debug); if (!versionData) { return { tool, status: "failed", error: "Failed to fetch versions" }; } diff --git a/scripts/fetch-metadata.js b/scripts/fetch-metadata.js index f9aa3476f..1a3bfb54a 100755 --- a/scripts/fetch-metadata.js +++ b/scripts/fetch-metadata.js @@ -5,10 +5,10 @@ * Usage: node fetch-metadata.js [--tool=name] * * Environment variables: - * SYNC_API_URL - Base URL of the API (e.g., https://mise-tools.jdx.dev) - * API_SECRET - API secret for authentication - * TOKEN_MANAGER_URL / TOKEN_MANAGER_SECRET - GitHub token manager (optional) - * GITHUB_TOKEN - Fallback GitHub token (optional) + * SYNC_API_URL - Base URL of the API (e.g., https://mise-tools.jdx.dev) + * API_SECRET - API secret for authentication + * GITHUB_PROXY_URL - URL of GitHub proxy (optional) + * GITHUB_TOKEN - Fallback GitHub token (optional) * * Fetches license, homepage, authors, and description from: * - npm (registry.npmjs.org) @@ -98,89 +98,6 @@ function getToolsFromToml() { return tools; } -// Token manager state -let currentToken = null; -let currentTokenId = null; - -// Get a token from the token manager -async function getTokenFromManager() { - const baseUrl = process.env.TOKEN_MANAGER_URL; - const secret = process.env.TOKEN_MANAGER_SECRET; - - if (!baseUrl || !secret) { - return null; - } - - try { - const response = await fetch(`${baseUrl}/api/token`, { - headers: { - Authorization: `Bearer ${secret}`, - }, - }); - - if (!response.ok) { - console.error(`Failed to get token from manager: ${response.status}`); - return null; - } - - const data = await response.json(); - currentToken = data.token; - currentTokenId = data.token_id || data.installation_id; - console.log(`Got token from manager (ID: ${currentTokenId})`); - return currentToken; - } catch (e) { - console.error(`Error getting token from manager: ${e.message}`); - return null; - } -} - -// Mark current token as rate-limited and get a new one -async function rotateToken() { - const baseUrl = process.env.TOKEN_MANAGER_URL; - const secret = process.env.TOKEN_MANAGER_SECRET; - - if (!baseUrl || !secret || !currentTokenId) { - return null; - } - - try { - // Mark current token as rate-limited - const resetAt = new Date(Date.now() + 60 * 60 * 1000).toISOString(); // 1 hour from now - await fetch(`${baseUrl}/api/token/rate-limit`, { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${secret}`, - }, - body: JSON.stringify({ - token_id: currentTokenId, - reset_at: resetAt, - }), - }); - console.log(`Marked token ${currentTokenId} as rate-limited`); - - // Get a new token - return await getTokenFromManager(); - } catch (e) { - console.error(`Error rotating token: ${e.message}`); - return null; - } -} - -// Get the current GitHub token (from manager or env) -async function getGitHubToken() { - // Try token manager first - if (!currentToken) { - const managerToken = await getTokenFromManager(); - if (managerToken) return managerToken; - } else { - return currentToken; - } - - // Fall back to env var - return process.env.GITHUB_TOKEN || null; -} - // Rate limiters for each API class RateLimiter { constructor(requestsPerSecond) { @@ -366,44 +283,37 @@ async function fetchRubyGemsMetadata(gemName) { } } -// Fetch GitHub metadata with token rotation support +// Fetch GitHub metadata via Proxy or Direct API async function fetchGitHubMetadata(owner, repo) { await rateLimiters.github.wait(); - const token = await getGitHubToken(); + const proxyUrl = process.env.GITHUB_PROXY_URL; + const apiSecret = process.env.API_SECRET; + const githubToken = process.env.GITHUB_TOKEN; - try { - const headers = { - Accept: "application/vnd.github.v3+json", - "User-Agent": "mise-versions", - }; - if (token) { - headers.Authorization = `token ${token}`; + let url; + const headers = { + Accept: "application/vnd.github.v3+json", + "User-Agent": "mise-versions", + }; + + if (proxyUrl && apiSecret) { + url = `${proxyUrl}/gh/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}`; + headers["Authorization"] = `Bearer ${apiSecret}`; + } else { + url = `https://api.github.com/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}`; + if (githubToken) { + headers["Authorization"] = `Bearer ${githubToken}`; } + } + try { const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), 10000); - const response = await fetch( - `https://api.github.com/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}`, - { headers, signal: controller.signal }, - ); + const response = await fetch(url, { headers, signal: controller.signal }); clearTimeout(timeout); - // Handle rate limiting - if (response.status === 403 || response.status === 429) { - const remaining = response.headers.get("x-ratelimit-remaining"); - if (remaining === "0") { - console.log(`GitHub rate limited, rotating token...`); - const newToken = await rotateToken(); - if (newToken) { - // Retry with new token - return fetchGitHubMetadata(owner, repo); - } - } - return null; - } - if (response.status === 404) { return null; } @@ -508,14 +418,12 @@ async function main() { const tools = getToolsFromToml(); console.log(`Found ${tools.length} tools`); - // Initialize GitHub token - const tokenManagerUrl = process.env.TOKEN_MANAGER_URL; - if (tokenManagerUrl) { - console.log("Using token manager for GitHub API"); - } else if (process.env.GITHUB_TOKEN) { - console.log("Using GITHUB_TOKEN from environment"); + // Initialize GitHub proxy + const proxyUrl = process.env.GITHUB_PROXY_URL; + if (proxyUrl) { + console.log("Using GitHub Proxy"); } else { - console.log("No GitHub token configured, API may be rate limited"); + console.log("No GitHub Proxy configured, API may be rate limited"); } // Filter tools if specific tool requested diff --git a/scripts/github-token.js b/scripts/github-token.js deleted file mode 100755 index 5ecc64a9e..000000000 --- a/scripts/github-token.js +++ /dev/null @@ -1,230 +0,0 @@ -#!/usr/bin/env node - -/** - * GitHub Token Manager Helper for GitHub Actions - * - * This script fetches a GitHub token from the token manager API - * and can optionally record usage for rate limit tracking. - * - * Usage in GitHub Actions: - * - * - name: Get GitHub Token - * id: github-token - * run: node scripts/github-token.js - * env: - * TOKEN_MANAGER_URL: ${{ secrets.TOKEN_MANAGER_URL }} - * TOKEN_MANAGER_SECRET: ${{ secrets.TOKEN_MANAGER_SECRET }} - * - * - name: Use Token - * run: | - * echo "Token: ${{ steps.github-token.outputs.token }}" - * # Use the token for GitHub API calls - * env: - * GITHUB_TOKEN: ${{ steps.github-token.outputs.token }} - */ - -import https from "https"; -import http from "http"; -import fs from "fs"; - -async function makeRequest(url, options = {}) { - return new Promise((resolve, reject) => { - const urlObj = new URL(url); - const client = urlObj.protocol === "https:" ? https : http; - - const req = client.request( - url, - { - method: options.method || "GET", - headers: { - "Content-Type": "application/json", - ...options.headers, - }, - }, - (res) => { - let data = ""; - res.on("data", (chunk) => (data += chunk)); - res.on("end", () => { - try { - const parsed = JSON.parse(data); - resolve({ - status: res.statusCode, - data: parsed, - headers: res.headers, - }); - } catch (e) { - resolve({ status: res.statusCode, data, headers: res.headers }); - } - }); - }, - ); - - req.on("error", reject); - - if (options.body) { - req.write(options.body); - } - - req.end(); - }); -} - -async function recordUsage(baseUrl, secret, tokenId, endpoint, rateLimitInfo) { - const usageUrl = `${baseUrl}/api/token/usage`; - - return new Promise((resolve, reject) => { - const urlObj = new URL(usageUrl); - const client = urlObj.protocol === "https:" ? https : http; - - const payload = JSON.stringify({ - token_id: tokenId, - endpoint, - remaining_requests: rateLimitInfo?.remaining, - reset_at: rateLimitInfo?.reset - ? new Date(rateLimitInfo.reset * 1000).toISOString() - : undefined, - }); - - const req = client.request( - usageUrl, - { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${secret}`, - "Content-Length": Buffer.byteLength(payload), - }, - }, - (res) => { - let data = ""; - res.on("data", (chunk) => (data += chunk)); - res.on("end", () => resolve({ status: res.statusCode, data })); - }, - ); - - req.on("error", reject); - req.write(payload); - req.end(); - }); -} - -async function markRateLimited(baseUrl, secret, tokenId, resetTime) { - const rateLimitUrl = `${baseUrl}/api/token/rate-limit`; - - const reset_at = resetTime - ? new Date(resetTime).toISOString() // resetTime is in YYYY-MM-DD HH:MM:SS +timezone format - : new Date(Date.now() + 10 * 60 * 1000).toISOString(); // Default to 10 minutes from now - - const payload = JSON.stringify({ - token_id: tokenId, - reset_at: reset_at, - }); - - return makeRequest(rateLimitUrl, { - method: "POST", - headers: { - Authorization: `Bearer ${secret}`, - "Content-Length": Buffer.byteLength(payload), - }, - body: payload, - }); -} - -async function main() { - const baseUrl = process.env.TOKEN_MANAGER_URL; - const secret = process.env.TOKEN_MANAGER_SECRET; - const action = process.argv[2] || "get-token"; - - if (!baseUrl || !secret) { - console.error( - "❌ Missing required environment variables: TOKEN_MANAGER_URL, TOKEN_MANAGER_SECRET", - ); - process.exit(1); - } - - try { - if (action === "get-token") { - const response = await makeRequest(`${baseUrl}/api/token`, { - headers: { - Authorization: `Bearer ${secret}`, - }, - }); - - if (response.status !== 200) { - console.error( - `❌ Failed to fetch token: ${response.status} ${response.data}`, - ); - process.exit(1); - } - - const { token, installation_id, expires_at, token_id } = response.data; - - // Set GitHub Actions outputs - if (process.env.GITHUB_ACTIONS === "true") { - // Mask the token in logs - console.error(`::add-mask::${token}`); - } - - // Return token and token_id - console.log(`${token} ${token_id || installation_id}`); - } else if (action === "mark-rate-limited") { - const tokenId = process.argv[3]; - const resetTime = process.argv[4]; // Optional reset time (e.g. 2025-07-28 04:18:45 +10:00) - - if (!tokenId) { - console.error( - "❌ Usage: node github-token.js mark-rate-limited [reset_time]", - ); - process.exit(1); - } - - const response = await markRateLimited( - baseUrl, - secret, - parseInt(tokenId), - resetTime, - ); - - if (response.status !== 200) { - console.error( - `❌ Failed to mark token ${tokenId} as rate-limited: ${response.status}`, - ); - process.exit(1); - } - } else if (action === "stats") { - console.error("📊 Fetching token statistics..."); - - const response = await makeRequest(`${baseUrl}/api/stats`, { - headers: { - Authorization: `Bearer ${secret}`, - }, - }); - - if (response.status !== 200) { - console.error( - `❌ Failed to fetch stats: ${response.status} ${response.data}`, - ); - process.exit(1); - } - - console.error("📈 Token Statistics:"); - console.error(` Active tokens: ${response.data.active}`); - console.error(` Total tokens: ${response.data.total}`); - } else { - console.error( - "❌ Unknown action. Available actions: get-token, record-usage, mark-rate-limited, stats", - ); - process.exit(1); - } - } catch (error) { - console.error("❌ Error:", error.message); - process.exit(1); - } -} - -// ES module equivalent of require.main === module -if (import.meta.url === `file://${process.argv[1]}`) { - main(); -} - -export { makeRequest, recordUsage, markRateLimited }; diff --git a/scripts/migrate.js b/scripts/migrate.js index 60c38cd0b..03a6573dc 100644 --- a/scripts/migrate.js +++ b/scripts/migrate.js @@ -10,8 +10,8 @@ * This script is mainly for checking status and monitoring. * * Environment Variables: - * TOKEN_MANAGER_URL - URL of the token manager API - * TOKEN_MANAGER_SECRET - API secret for authentication + * API_URL - URL of the API (e.g. https://mise-tools.jdx.dev) + * API_SECRET - API secret for authentication */ import https from "https"; @@ -71,13 +71,13 @@ async function getMigrationStatus(baseUrl, secret) { } async function main() { - const baseUrl = process.env.TOKEN_MANAGER_URL; - const secret = process.env.TOKEN_MANAGER_SECRET; + const baseUrl = process.env.API_URL; + const secret = process.env.API_SECRET; const action = process.argv[2] || "status"; if (!baseUrl || !secret) { console.error( - "❌ Missing required environment variables: TOKEN_MANAGER_URL, TOKEN_MANAGER_SECRET", + "❌ Missing required environment variables: API_URL, API_SECRET", ); process.exit(1); } diff --git a/scripts/update.sh b/scripts/update.sh index 5ca89a4c4..c02a2b2e8 100755 --- a/scripts/update.sh +++ b/scripts/update.sh @@ -7,9 +7,12 @@ export MISE_USE_VERSIONS_HOST=0 export MISE_LIST_ALL_VERSIONS=1 export MISE_LOG_HTTP=1 -# GitHub Token Manager configuration -export TOKEN_MANAGER_URL="$TOKEN_MANAGER_URL" -export TOKEN_MANAGER_SECRET="$TOKEN_MANAGER_SECRET" +# GitHub Proxy configuration +if [ -n "${GITHUB_PROXY_URL:-}" ] && [ -n "${API_SECRET:-}" ]; then + export MISE_URL_REPLACEMENTS="{\"regex:^https://api\\.github\\.com\": \"${GITHUB_PROXY_URL}/gh\"}" + # Pass API_SECRET as MISE_GITHUB_TOKEN for proxy authentication + export MISE_GITHUB_TOKEN="$API_SECRET" +fi # ============================================================================ # Structured Logging @@ -226,8 +229,6 @@ generate_summary() { - **Tools Skipped**: $(get_stat "total_tools_skipped") - **Tools Failed**: $(get_stat "total_tools_failed") - **Tools with No Versions**: $(get_stat "total_tools_no_versions") -- **Tokens Used**: $(get_stat "total_tokens_used") -- **Rate Limits Hit**: $(get_stat "total_rate_limits_hit") - **Duration**: ${duration_minutes}m ${duration_seconds}s - **Mise Version**: ${CUR_MISE_VERSION:-not set} @@ -236,10 +237,6 @@ generate_summary() { - **Update Rate**: $([ "$(get_stat "total_tools_checked")" -gt 0 ] && echo "$((($(get_stat "total_tools_updated") * 100) / $(get_stat "total_tools_checked")))" || echo "0")% - **Coverage**: $([ "$(get_stat "total_tools_available")" -gt 0 ] && echo "$((($(get_stat "total_tools_checked") * 100) / $(get_stat "total_tools_available")))" || echo "0")% -## 🔧 Token Management -- **Tokens Consumed**: $(get_stat "total_tokens_used") -- **Rate Limit Events**: $(get_stat "total_rate_limits_hit") - ## 📋 Details - **Tools Available**: $(get_stat "total_tools_available") - **Tools Processed**: $(get_stat "total_tools_checked") @@ -285,27 +282,9 @@ SUMMARY_EOF set_stat "summary_generated" "true" } -# Function to mark a token as rate-limited -mark_token_rate_limited() { - local token_id="$1" - local reset_time="${2:-}" - - if [ -z "$TOKEN_MANAGER_URL" ] || [ -z "$TOKEN_MANAGER_SECRET" ]; then - return - fi - - increment_stat "total_rate_limits_hit" - - # Mark token as rate-limited asynchronously - { - node scripts/github-token.js mark-rate-limited "$token_id" "$reset_time" || true - } & -} - # Function to generate TOML file with timestamps generate_toml_file() { local tool="$1" - local token="$2" local toml_file="docs/$tool.toml" local versions_file="docs/$tool" @@ -350,25 +329,6 @@ generate_toml_file() { fi } -# Function to get a fresh GitHub token from the token manager -get_github_token() { - if [ -z "$TOKEN_MANAGER_URL" ] || [ -z "$TOKEN_MANAGER_SECRET" ]; then - log_error "TOKEN_MANAGER_URL and TOKEN_MANAGER_SECRET not set" - return 1 - fi - - increment_stat "total_tokens_used" - - local token_output - if ! token_output=$(node scripts/github-token.js get-token); then - log_error "No tokens available" - return 1 - fi - - echo "$token_output" - return 0 -} - fetch() { increment_stat "total_tools_checked" @@ -379,67 +339,17 @@ fetch() { ;; esac - # Get a fresh token for this fetch operation - local token_info - if ! token_info=$(get_github_token); then - # No tokens available, stop processing this tool gracefully - log_warn "No tokens available, skipping" "tool=$1" - increment_stat "total_tools_failed" - return 1 - fi - local token - local token_id - - # Parse token and token_id from the response - if [[ "$token_info" == *" "* ]]; then - token=$(echo "$token_info" | cut -d' ' -f1) - token_id=$(echo "$token_info" | cut -d' ' -f2) - else - # No valid token received, stop processing this tool - log_error "No valid token received, skipping" "tool=$1" - increment_stat "total_tools_failed" - return 1 - fi - - local rate_limit_info - rate_limit_info=$(GITHUB_TOKEN="$token" mise x -- wait-for-gh-rate-limit 2>&1 || echo "") - # Only show rate limit if low - local remaining - remaining=$(echo "$rate_limit_info" | grep -oP 'GitHub rate limit: \K[0-9]+' || echo "5000") - if [ "$remaining" -lt 1000 ]; then - log_warn "GitHub rate limit low" "remaining=$remaining" "tool=$1" - fi - log_info "Fetching versions" "tool=$1" - - # Create a temporary file to capture stderr and check for rate limiting - local stderr_file - stderr_file=$(mktemp) + # Log removed to reduce verbosity + # log_info "Fetching versions" "tool=$1" - if ! docker run -e GITHUB_TOKEN="$token" -e MISE_USE_VERSIONS_HOST -e MISE_LIST_ALL_VERSIONS -e MISE_LOG_HTTP -e MISE_EXPERIMENTAL -e MISE_TRUSTED_CONFIG_PATHS=/ \ - jdxcode/mise -y ls-remote "$1" >"docs/$1" 2>"$stderr_file"; then + if ! docker run -e MISE_URL_REPLACEMENTS -e MISE_GITHUB_TOKEN -e MISE_USE_VERSIONS_HOST -e MISE_LIST_ALL_VERSIONS -e MISE_LOG_HTTP -e MISE_EXPERIMENTAL -e MISE_TRUSTED_CONFIG_PATHS=/ \ + jdxcode/mise -y ls-remote "$1" >"docs/$1"; then log_error "Failed to fetch versions" "tool=$1" increment_stat "total_tools_failed" - - cat "$stderr_file" >&2 - - # Check if this was a rate limit issue (403 Forbidden) - if grep -q "403 Forbidden" "$stderr_file"; then - local reset_time="" - if [ "$remaining" == "0" ]; then - reset_time=$(echo "$rate_limit_info" | grep -oP 'resets at \K\S+ \S+' || echo "") - fi - mark_token_rate_limited "$token_id" "$reset_time" - log_warn "Rate limited, retrying with new token" "tool=$1" "token_id=$token_id" - fetch "$1" - fi - - rm -f "$stderr_file" "docs/$1" + rm -f "docs/$1" return fi - # Clean up stderr file - rm -f "$stderr_file" - new_lines=$(wc -l <"docs/$1") if [ "$new_lines" -eq 0 ]; then log_debug "No versions found" "tool=$1" @@ -465,7 +375,7 @@ fetch() { esac # Generate TOML file with timestamps (only TOML is committed) - generate_toml_file "$1" "$token" + generate_toml_file "$1" # Clean up intermediate plain text file rm -f "docs/$1" @@ -480,151 +390,111 @@ fetch() { fi } -# Enhanced token management setup -setup_token_management() { - log_group_start "Token Management Setup" +CUR_MISE_VERSION=$(docker run jdxcode/mise -v) +export CUR_MISE_VERSION +log_info "Mise version detected" "version=$CUR_MISE_VERSION" - if [ -z "$TOKEN_MANAGER_URL" ] || [ -z "$TOKEN_MANAGER_SECRET" ]; then - log_error "Token manager not configured" - log_group_end - return 1 - fi +tools="$(docker run -e MISE_EXPERIMENTAL=1 -e MISE_VERSION="$CUR_MISE_VERSION" jdxcode/mise registry | awk '{print $1}')" +total_tools=$(echo "$tools" | wc -w) +set_stat "total_tools_available" "$total_tools" +log_info "Tool registry loaded" "total_tools=$total_tools" - # Check token manager health - if ! curl -f -s "$TOKEN_MANAGER_URL/health" >/dev/null 2>&1; then - log_error "Token manager health check failed" "url=$TOKEN_MANAGER_URL" - log_group_end - return 1 +# Cleanup old tools that are no longer in the registry +log_info "Cleaning up old tools" +for file in docs/*.toml; do + if [[ ! -f "$file" ]]; then + continue fi - log_info "Token manager health check passed" - - # Get token statistics - if STATS=$(curl -s -H "Authorization: Bearer $TOKEN_MANAGER_SECRET" "$TOKEN_MANAGER_URL/api/stats" 2>/dev/null); then - ACTIVE_TOKENS=$(echo "$STATS" | jq -r '.active // 0' 2>/dev/null || echo "0") - log_info "Token pool status" "active_tokens=$ACTIVE_TOKENS" - if [ "$ACTIVE_TOKENS" -eq 0 ]; then - log_error "No active tokens available" - log_group_end - return 1 - fi + tool_name=$(basename "$file" .toml) + # specialized files we want to keep around + if [[ "$tool_name" == python-precompiled* ]]; then + continue fi - - log_group_end -} - -# Setup token management before starting -if setup_token_management; then - log_group_start "Initialization" - - CUR_MISE_VERSION=$(docker run jdxcode/mise -v) - export CUR_MISE_VERSION - log_info "Mise version detected" "version=$CUR_MISE_VERSION" - - tools="$(docker run -e MISE_EXPERIMENTAL=1 -e MISE_VERSION="$CUR_MISE_VERSION" jdxcode/mise registry | awk '{print $1}')" - total_tools=$(echo "$tools" | wc -w) - set_stat "total_tools_available" "$total_tools" - log_info "Tool registry loaded" "total_tools=$total_tools" - - # Check if tokens are available before starting processing - if ! get_github_token >/dev/null 2>&1; then - log_warn "No tokens available - stopping early" - log_group_end - generate_summary - exit 0 + if ! echo "$tools" | grep -q "^$tool_name$"; then + log_info "Removing old tool" "tool=$tool_name" + rm -f "$file" "docs/$tool_name" + git rm --ignore-unmatch "$file" "docs/$tool_name" 2>/dev/null || true fi +done - log_group_end +# Resume from the last processed tool +last_tool_processed="" +if [ -f "last_processed_tool.txt" ]; then + last_tool_processed=$(cat "last_processed_tool.txt") + log_info "Resuming from previous run" "last_tool=$last_tool_processed" +fi +tools_limited=$(grep -m 1 -A 100 -F -x "$last_tool_processed" <<< "$tools"$'\n'"$tools" | tail -n +2 || echo "$tools" | head -n 100) + +log_group_start "Processing Tools" + +# Process tools +export -f fetch generate_toml_file increment_stat get_stat add_to_list set_stat +export -f log log_debug log_info log_warn log_error should_log log_timestamp get_log_priority +export STATS_DIR LOG_LEVEL +export MISE_URL_REPLACEMENTS +export MISE_GITHUB_TOKEN + +first_processed_tool="" +last_processed_tool="" +processed_count=0 + +for tool in $tools_limited; do + # Log progress every 10 tools + processed_count=$((processed_count + 1)) + if (( processed_count % 10 == 0 )); then + log_info "Processing tools..." "count=$processed_count" + fi - # Resume from the last processed tool - last_tool_processed="" - if [ -f "last_processed_tool.txt" ]; then - last_tool_processed=$(cat "last_processed_tool.txt") - log_info "Resuming from previous run" "last_tool=$last_tool_processed" + if ! timeout 60s bash -c "fetch $tool"; then + log_error "Fetch timed out or failed, continuing" "tool=$tool" + # Don't break, continue to next tool + continue fi - tools_limited=$(grep -m 1 -A 100 -F -x "$last_tool_processed" <<<"$tools"$'\n'"$tools" | tail -n +2 || echo "$tools" | head -n 100) + if [ -z "$first_processed_tool" ]; then + first_processed_tool="$tool" + fi + last_processed_tool="$tool" +done - log_group_start "Processing Tools" +log_group_end +set_stat "first_processed_tool" "$first_processed_tool" +if [ -n "$last_processed_tool" ]; then + echo "$last_processed_tool" >"last_processed_tool.txt" +fi +set_stat "last_processed_tool" "$last_processed_tool" - # Cleanup old tools that are no longer in the registry - log_info "Cleaning up old tools" - for file in docs/*.toml; do - if [[ ! -f "$file" ]]; then - continue - fi - tool_name=$(basename "$file" .toml) - # specialized files we want to keep around - if [[ "$tool_name" == python-precompiled* ]]; then - continue - fi - if ! echo "$tools" | grep -q "^$tool_name$"; then - log_info "Removing old tool" "tool=$tool_name" - rm -f "$file" "docs/$tool_name" - git rm --ignore-unmatch "$file" "docs/$tool_name" 2>/dev/null || true - fi - done - - # Process tools - export -f fetch get_github_token mark_token_rate_limited generate_toml_file increment_stat get_stat add_to_list set_stat - export -f log log_debug log_info log_warn log_error should_log log_timestamp get_log_priority - export STATS_DIR LOG_LEVEL - first_processed_tool="" - last_processed_tool="" - for tool in $tools_limited; do - if ! timeout 60s bash -c "fetch $tool"; then - log_error "Fetch timed out or failed, continuing" "tool=$tool" - # Don't break, continue to next tool - continue - fi - if [ -z "$first_processed_tool" ]; then - first_processed_tool="$tool" - fi - last_processed_tool="$tool" - done +if [ "${DRY_RUN:-}" == 0 ] && ! git diff-index --cached --quiet HEAD; then + git diff --compact-summary --cached - log_group_end - set_stat "first_processed_tool" "$first_processed_tool" - if [ -n "$last_processed_tool" ]; then - echo "$last_processed_tool" >"last_processed_tool.txt" - fi - set_stat "last_processed_tool" "$last_processed_tool" - - if [ "${DRY_RUN:-}" == 0 ] && ! git diff-index --cached --quiet HEAD; then - git diff --compact-summary --cached - - # Get the list of updated tools for the commit message - updated_tools_list=$(cat "$STATS_DIR/updated_tools_list" 2>/dev/null || echo "") - tools_updated_count=$(get_stat "total_tools_updated") - - commit_msg="" - if [ -n "$updated_tools_list" ] && [ "$tools_updated_count" -gt 0 ]; then - # Create a more descriptive commit message with updated tools - if [ "$tools_updated_count" -le 10 ]; then - # If 10 or fewer tools, list them all - commit_msg="versions: update $tools_updated_count tools ($updated_tools_list)" - else - # If more than 10 tools, just show the count - commit_msg="versions: update $tools_updated_count tools" - fi + # Get the list of updated tools for the commit message + updated_tools_list=$(cat "$STATS_DIR/updated_tools_list" 2>/dev/null || echo "") + tools_updated_count=$(get_stat "total_tools_updated") + + commit_msg="" + if [ -n "$updated_tools_list" ] && [ "$tools_updated_count" -gt 0 ]; then + # Create a more descriptive commit message with updated tools + if [ "$tools_updated_count" -le 10 ]; then + # If 10 or fewer tools, list them all + commit_msg="versions: update $tools_updated_count tools ($updated_tools_list)" else - # Fallback to original message - commit_msg="versions: update" + # If more than 10 tools, just show the count + commit_msg="versions: update $tools_updated_count tools" fi - - git commit -m "$commit_msg" - git pull --autostash --rebase origin main - git push + else + # Fallback to original message + commit_msg="versions: update" fi - # Save updated tools list for D1 sync (one tool per line) - cat "$STATS_DIR/updated_tools_list" 2>/dev/null | tr ' ' '\n' | grep -v '^$' >updated_tools.txt || true - updated_count=$(wc -l /dev/null | tr ' ' '\n' | grep -v '^$' > updated_tools.txt || true +updated_count=$(wc -l < updated_tools.txt | tr -d ' ') +log_info "Updated tools saved" "file=updated_tools.txt" "count=$updated_count" + # Always generate and display summary generate_summary diff --git a/src/database.ts b/src/database.ts index 85a9a49fa..ff60d65ec 100644 --- a/src/database.ts +++ b/src/database.ts @@ -87,6 +87,19 @@ export function setupDatabase(db: ReturnType) { ); }, + // Deactivate a token (used when GitHub returns 401) + async deactivateToken(tokenId: number) { + await db + .update(tokens) + .set({ + is_active: 0, + }) + .where(eq(tokens.id, tokenId)) + .run(); + + console.log(`Token ${tokenId} deactivated`); + }, + // Get all active tokens (all are user tokens now) async getAllTokens() { return await db diff --git a/web/src/pages/gh/[...path].ts b/web/src/pages/gh/[...path].ts new file mode 100644 index 000000000..91b1da9ab --- /dev/null +++ b/web/src/pages/gh/[...path].ts @@ -0,0 +1,107 @@ +import type { APIRoute } from "astro"; +import { drizzle } from "drizzle-orm/d1"; +import { setupDatabase } from "../../../../src/database"; +import { errorResponse, requireApiAuth } from "../../lib/api"; + +// GitHub proxy +export const ALL: APIRoute = async ({ request, locals, params }) => { + const env = locals.runtime.env; + const ctx = locals.runtime.ctx; + + if (request.method !== "GET" && request.method !== "HEAD") { + return errorResponse("Method Not Allowed", 405); + } + + // Require API auth + const authError = requireApiAuth(request, env.API_SECRET); + if (authError) return authError; + + // Database setup + const db = drizzle(env.DB); + const database = setupDatabase(db); + + // Trigger deactivation + ctx.waitUntil(database.deactivateExpiredTokens().catch(console.error)); + + // Retry loop (max 3 attempts) + for (let attempt = 0; attempt < 3; attempt++) { + // Get GitHub token + const token = await database.getNextToken(); + if (!token) { + return errorResponse("No GitHub tokens available", 503); + } + + // Construct upstream URL + const path = params.path ?? ""; + const url = new URL(request.url); + const upstreamUrl = new URL(`https://api.github.com/${path}${url.search}`); + + // Forward request + const headers = new Headers(request.headers); + headers.set("Authorization", `Bearer ${token.token}`); + + try { + const response = await fetch(upstreamUrl.toString(), { + method: request.method, + headers, + }); + + // Handle rate limits and errors + const rateLimitRemaining = response.headers.get("x-ratelimit-remaining"); + const rateLimitReset = response.headers.get("x-ratelimit-reset"); + + let isRateLimited = false; + if ( + response.status === 403 || + response.status === 429 || + response.status === 401 + ) { + if ( + (rateLimitRemaining === "0" && rateLimitReset) || + response.status === 429 + ) { + isRateLimited = true; + console.warn( + `Token ${token.id} rate limited (${response.status}). Marking as rate limited.`, + ); + + // Default to 1 hour if no reset header/invalid for 429 + let resetDate = new Date(Date.now() + 60 * 60 * 1000).toISOString(); + if (rateLimitReset) { + resetDate = new Date(parseInt(rateLimitReset) * 1000).toISOString(); + } else if (response.headers.get("retry-after")) { + const retryAfter = parseInt(response.headers.get("retry-after")!); + resetDate = new Date(Date.now() + retryAfter * 1000).toISOString(); + } + + await database.markTokenRateLimited(token.id, resetDate); + } else if (response.status === 401) { + console.warn( + `Token ${token.id} unauthorized (401). Deactivating token.`, + ); + await database.deactivateToken(token.id); + } + } + + const shouldRetry = + (isRateLimited || response.status === 401) && attempt < 2; + + if (!shouldRetry) { + return new Response(response.body, { + status: response.status, + statusText: response.statusText, + headers: response.headers, + }); + } + } catch (err) { + console.error("Proxy error:", err); + // Capture error as response if possible, or just fail + return errorResponse( + `Proxy error: ${err instanceof Error ? err.message : String(err)}`, + 502, + ); + } + } + + return errorResponse("Retries exhausted", 502); +};