diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index 59dd245f7676..000000000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,54 +0,0 @@ -version: 2.1 - -# https://circleci.com/docs/guides/orchestrate/dynamic-config/ -setup: true - -orbs: - git-shallow-clone: guitarrapc/git-shallow-clone@2.8.0 - continuation: circleci/continuation@2.0.1 - node: circleci/node@7.2.1 - -parameters: - ghBaseBranch: - default: next - description: The name of the base branch (the target of the PR) - type: string - ghPrNumber: - default: '' - description: The PR number - type: string - workflow: - default: skipped - description: Which workflow to run - enum: - - normal - - merged - - daily - - skipped - - docs - type: enum - -jobs: - generate-and-run-config: - executor: - name: node/default - resource_class: small - steps: - - node/install: - install-yarn: true - - git-shallow-clone/checkout_advanced: - clone_options: '--depth 1' - - run: - name: Install dependencies - command: yarn workspaces focus @storybook/scripts - - run: - name: Generate config - command: | - yarn dlx jiti ./scripts/ci/main.ts --workflow=<< pipeline.parameters.workflow >> - - continuation/continue: - configuration_path: .circleci/config.generated.yml -workflows: - setup: - jobs: - - generate-and-run-config - when: pipeline.parameters.workflow != "skipped" diff --git a/.cursor/mcp.json b/.cursor/mcp.json index e07488b7f3cc..58dd754ee755 100644 --- a/.cursor/mcp.json +++ b/.cursor/mcp.json @@ -1,10 +1,8 @@ { "mcpServers": { - "wallaby": { - "command": "node", - "args": [ - "~/.wallaby/mcp/" - ] + "nx-mcp": { + "command": "npx", + "args": ["nx", "mcp"] } } -} \ No newline at end of file +} diff --git a/.github/workflows/nx.yml b/.github/workflows/nx.yml index f3b048a2122a..c306b9813624 100644 --- a/.github/workflows/nx.yml +++ b/.github/workflows/nx.yml @@ -10,6 +10,15 @@ on: schedule: - cron: '0 23 * * *' +concurrency: + # Per-PR queue: consecutive pushes to the same PR queue behind an + # in-progress run (cancel-in-progress: false). This protects against NX + # Cloud's self-cancellation behaviour — we never start a new CIPE on a + # branch while the previous one is still running, so nothing gets + # auto-cancelled mid-flight. Different PRs keep their own queues. + group: nx-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: false + permissions: actions: read contents: read diff --git a/.github/workflows/trigger-circle-ci-workflow.yml b/.github/workflows/trigger-circle-ci-workflow.yml deleted file mode 100644 index 7b1cedda19f1..000000000000 --- a/.github/workflows/trigger-circle-ci-workflow.yml +++ /dev/null @@ -1,68 +0,0 @@ -name: Trigger CircleCI workflow - -on: - # Use pull_request_target, as we don't need to check out the actual code of the fork in this script. - # And this is the only way to trigger the Circle CI API on forks as well. - pull_request_target: - types: [opened, synchronize, labeled, reopened] - push: - branches: - - next - - main - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - get-branch: - if: github.repository_owner == 'storybookjs' - runs-on: ubuntu-latest - steps: - - id: get-branch - env: - # Stored as environment variable to prevent script injection - REF_NAME: ${{ github.ref_name }} - PR_REF_NAME: ${{ github.event.pull_request.head.ref }} - run: | - if [ "${{ github.event.pull_request.head.repo.fork }}" = "true" ]; then - export BRANCH=pull/${{ github.event.pull_request.number }}/head - elif [ "${{ github.event_name }}" = "push" ]; then - export BRANCH="$REF_NAME" - else - export BRANCH="$PR_REF_NAME" - fi - echo "$BRANCH" - echo "branch=$BRANCH" >> $GITHUB_ENV - outputs: - branch: ${{ env.branch }} - - get-parameters: - if: github.repository_owner == 'storybookjs' - runs-on: ubuntu-latest - steps: - - if: github.event_name == 'pull_request_target' && (contains(github.event.pull_request.labels.*.name, 'ci:normal')) - run: echo "workflow=normal" >> $GITHUB_ENV - - if: github.event_name == 'pull_request_target' && (contains(github.event.pull_request.labels.*.name, 'ci:docs')) - run: echo "workflow=docs" >> $GITHUB_ENV - - if: github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'ci:merged') - run: echo "workflow=merged" >> $GITHUB_ENV - - if: github.event_name == 'pull_request_target' && (contains(github.event.pull_request.labels.*.name, 'ci:daily')) - run: echo "workflow=daily" >> $GITHUB_ENV - outputs: - workflow: ${{ env.workflow }} - ghBaseBranch: ${{ github.event.pull_request.base.ref }} - ghPrNumber: ${{ github.event.pull_request.number }} - - trigger-circle-ci-workflow: - runs-on: ubuntu-latest - needs: [get-branch, get-parameters] - if: github.repository_owner == 'storybookjs' && needs.get-parameters.outputs.workflow != '' - steps: - - name: Trigger Normal tests - uses: fjogeleit/http-request-action@v1 - with: - url: 'https://circleci.com/api/v2/project/gh/storybookjs/storybook/pipeline' - method: 'POST' - customHeaders: '{"Content-Type": "application/json", "Circle-Token": "${{ secrets.CIRCLE_CI_TOKEN }}"}' - data: '{ "branch": "${{needs.get-branch.outputs.branch}}", "parameters": ${{toJson(needs.get-parameters.outputs)}} }' diff --git a/.gitignore b/.gitignore index 3460788946c0..b47b88155e40 100644 --- a/.gitignore +++ b/.gitignore @@ -80,4 +80,5 @@ CLAUDE.local.md .vscode/mcp.json .mcp.json .nx/polygraph -.omc \ No newline at end of file +.omc +scripts/ci-eval.db \ No newline at end of file diff --git a/.nx/workflows/agents.yaml b/.nx/workflows/agents.yaml index 878ce4ea82db..01a4d6303643 100644 --- a/.nx/workflows/agents.yaml +++ b/.nx/workflows/agents.yaml @@ -52,7 +52,12 @@ linux-browsers-init-steps: &linux-browsers-init-steps rm -f ~/.yarn/berry/cache/@storybook-*.zip 2>/dev/null || true - name: Install Browsers script: | - yarn exec playwright install chromium --with-deps + # Intentionally no --with-deps: the NX base image already carries enough + # X/audio/font libraries for Chromium to launch, and --with-deps forces + # apt-get update which frequently races against Ubuntu mirror sync and + # fails the whole CIPE. If Chromium starts erroring with missing .so + # files, revert to --with-deps or switch to a pre-baked image. + yarn exec playwright install chromium - name: Verify script: | echo "node: $(node --version)" @@ -77,7 +82,7 @@ launch-templates: init-steps: *linux-init-steps env: *common-env-vars linux-browsers-js: - resource-class: 'docker_linux_amd64/extra_large+' + resource-class: 'docker_linux_amd64/medium+' image: 'ubuntu22.04-node20.19-v2' init-steps: *linux-browsers-init-steps env: *common-env-vars diff --git a/nx.json b/nx.json index f315972a194c..1a0dc6abf8ba 100644 --- a/nx.json +++ b/nx.json @@ -205,5 +205,5 @@ ] }, "analytics": false, - "codexCacheBust": "2026-03-23T12:57:22Z" + "codexCacheBust": "2026-04-17T00:00:00Z" } diff --git a/package.json b/package.json index d56fce979d06..0b9b3a3229d3 100644 --- a/package.json +++ b/package.json @@ -61,7 +61,7 @@ "typescript": "^5.9.3" }, "devDependencies": { - "@nx/workspace": "^22.6.1", + "@nx/workspace": "^22.6.5", "@playwright/test": "^1.58.2", "@types/kill-port": "^2.0.3", "http-server": "^14.1.1", @@ -69,7 +69,7 @@ "jiti": "^2.6.1", "kill-port": "^2.0.1", "lint-staged": "^16.4.0", - "nx": "^22.6.1", + "nx": "^22.6.5", "oxfmt": "^0.41.0", "std-env": "^4.0.0", "vite": "^7.0.4", diff --git a/scripts/ci-eval.db.README.md b/scripts/ci-eval.db.README.md new file mode 100644 index 000000000000..1cbce09ee8dd --- /dev/null +++ b/scripts/ci-eval.db.README.md @@ -0,0 +1,473 @@ +# CI Evaluation Database (`ci-eval.db`) + +SQLite cache for the NX Cloud vs CircleCI evaluation. Every number in the +companion canvases (`canvases/nx-vs-circleci-findings.canvas.tsx`, +`canvases/next-merged-runs.canvas.tsx`) comes out of this file. + +- **Location**: `scripts/ci-eval.db` (git-ignored) +- **Populated by**: `scripts/evaluate-ci.ts` +- **Ignored by**: `.gitignore` + +## Running the sync + +```bash +# Last 14 days (default) +yarn jiti scripts/evaluate-ci.ts + +# Last 30 days +yarn jiti scripts/evaluate-ci.ts --days 30 + +# Since a specific point (experiment start) +yarn jiti scripts/evaluate-ci.ts --since 2026-03-23T13:54:53Z + +# Report only (no API calls — read from cache) +yarn jiti scripts/evaluate-ci.ts --report-only + +# Single workflow +yarn jiti scripts/evaluate-ci.ts --workflow normal:prs --days 30 + +# Custom flaky-analytics window +yarn jiti scripts/evaluate-ci.ts --flaky-range 7 +``` + +## Regenerating the canvas from the DB + +The `canvases/nx-vs-circleci-findings.canvas.tsx` file embeds its data as inline +TypeScript constants (canvas files cannot import from anywhere outside +`cursor/canvas`). To refresh those constants from the DB: + +```bash +yarn jiti scripts/generate-canvas-data.ts +yarn jiti scripts/generate-canvas-data.ts --since 2026-03-23T13:54:53Z +yarn jiti scripts/generate-canvas-data.ts --flaky-range 7 +``` + +The script reads the DB, computes: `NEXT_MERGED`, `NORMAL_PRS`, `PAIRED_NEXT`, +`PAIRED_PRS`, `NX_AUTORETRIES_30D`, `TOP_FLAKY_PRS`, `CCI_ONLY_JOBS`, and rewrites +the corresponding `const NAME = {...};` blocks in place (preserving the type +annotations and surrounding code). Inline comments inside the replaced blocks +are not preserved — document intent in the surrounding code instead. + +Typical workflow: + +```bash +# 1. Pull fresh data from the APIs into the DB (or skip if already synced today) +yarn jiti scripts/evaluate-ci.ts --since 2026-03-23T13:54:53Z + +# 2. Regenerate the canvas from the DB +yarn jiti scripts/generate-canvas-data.ts --since 2026-03-23T13:54:53Z +``` + +The sync is **fully incremental**: + +- Runs are keyed by `id` and inserted with `INSERT OR IGNORE`. Re-syncing a + known run is a no-op. +- Flaky-task analytics are snapshotted once per calendar day per `--flaky-range` + value. Running the sync twice on the same day does not hit the dashboard + endpoints a second time. + +## Required environment variables + +| Env var | Purpose | +| ----------------------- | ---------------------------------------------------------------- | +| `NX_CLOUD_ACCESS_TOKEN` | Public NX Cloud API (pipeline-executions, runs). Required. | +| `CIRCLE_TOKEN` | Optional. CircleCI Insights works anonymously for OSS projects. | +| `NX_CLOUD_SESSION` | `_nxCloudSession` cookie. Enables exact credits + flaky analytics (Enterprise). | +| `NX_CLOUD_PAT` | Read from `~/.config/nxcloud/nxcloud.ini` if unset. | + +## Tables + +### `runs` — one row per CI workflow / CIPE + +| Column | Type | Notes | +| -------------- | ------- | --------------------------------------------------------------- | +| `id` | TEXT PK | CircleCI workflow id or NX pipeline-execution id | +| `system` | TEXT | `'circleci'` or `'nx'` | +| `workflow` | TEXT | `normal`, `merged`, `daily`, `next:merged`, `next:daily`, `normal:prs`, `merged:prs`, `daily:prs`, `base (nx-ai)` | +| `branch` | TEXT | Git branch or PR number (NX uses PR numbers for PR runs) | +| `status` | TEXT | `success` / `failed` / `canceled` (CCI) or `SUCCEEDED` / `FAILED` / `CANCELED` (NX) | +| `created_at` | TEXT | ISO 8601 timestamp | +| `duration_sec` | REAL | Wall-clock duration of the main run / workflow | +| `credits_used` | INTEGER | Raw credits consumed | +| `cost_usd` | REAL | `credits_used * credit_to_usd_rate` | +| `commit_sha` | TEXT | Git SHA; enables paired-commit comparison | + +### `failed_tasks` — one row per failing task inside a failed run + +| Column | Type | Notes | +| ----------- | ---- | --------------------------------------------- | +| `run_id` | TEXT | FK → `runs.id` | +| `task_name` | TEXT | `format-check`, `code:fmt`, `react-vite/default-ts:e2e-tests-dev`, ... | + +### `nx_template_credits` — per-run agent/credit breakdown (NX only) + +| Column | Type | Notes | +| ------------------- | ------- | ---------------------------------------------------- | +| `run_id` | TEXT | FK → `runs.id` | +| `template` | TEXT | `linux-js`, `linux-browsers-js`, `windows-js` | +| `credits` | INTEGER | Credits consumed by this template | +| `credit_multiplier` | INTEGER | Credits per minute (15 = medium+, 60 = extra_large+) | + +Used to compute the "NX if downgraded to medium+" cost projection. + +### `nx_cipe_retry_stats` — per-CIPE retry data (NX only) + +One row per NX CIPE, from `ttgImpactMetadata.taskRetryStats` on the dashboard +analysis endpoint. Used to compute the **exact** "without retry" failure count +per workflow — no proportional scaling. + +| Column | Type | Notes | +| --------------------------- | ------- | -------------------------------------------------------- | +| `run_id` | TEXT PK | FK → `runs.id` | +| `total_tasks` | INTEGER | Tasks in the CIPE | +| `total_task_retries` | INTEGER | Reruns attempted (all attempts) | +| `successful_retries` | INTEGER | Reruns that succeeded (task-level) | +| `failed_retries` | INTEGER | Reruns that still failed after retry | +| `hypothetical_no_cache_ms` | INTEGER | `ciPipelineExecution.duration.hypotheticalNoCacheMs` — total task-duration saved by the remote cache on this CIPE. Used as a cross-check against the per-task derivation in `nx_run_tasks`. NULL on pre-migration rows until the backfill catches up. | + +**Rescued CIPE** = `runs.status = 'SUCCEEDED'` AND +`nx_cipe_retry_stats.successful_retries > 0`. Without retry these CIPEs would +have been reported as FAILED. + +The sync fills this table two ways, both idempotent: + +1. **Inline** — every new NX CIPE fetched by `syncNxCloudRuns` also pulls the + analysis endpoint (a call we already need for exact credits) and stores + retry stats in the same HTTP round-trip. +2. **Backfill** — after the main sync, `backfillNxCipeRetryStats` looks for + NX CIPEs in `runs` that have no matching row in `nx_cipe_retry_stats` and + fetches the analysis endpoint for each. Once everything is filled, re-runs + print `NX retry backfill: up to date` and make zero API calls. + +### `nx_cache_stats` — per-CIPE cache hit counts (NX only) + +Used to quantify how much the NX remote cache saves on each workflow. Excludes +non-cacheable `continuous-*` tasks (`serve`, `run-registry`). + +| Column | Type | Notes | +| -------------- | ------- | -------------------------------------------------------- | +| `run_id` | TEXT PK | FK → `runs.id` | +| `cache_hits` | INTEGER | Tasks whose cacheStatus contained `cache-hit` | +| `cache_misses` | INTEGER | Tasks with `cacheStatus = cache-miss` | +| `total_tasks` | INTEGER | `cache_hits + cache_misses` | + +Filled by `backfillNxCacheStats`, which makes two API calls per missing CIPE +(`/runs/search` to find the main run id, `/runs/{runId}` for the full task list). +Idempotent: re-runs print `NX cache backfill: up to date`. + +### `nx_run_tasks` — per-task detail (NX only) + +One row per `(run_id, task_id)` from `/runs/{runId}` task lists. Excludes +continuous `serve` / `run-registry` tasks. Enables **real** per-CIPE cache +savings: each cache-hit task is valued by the avg duration of the *same +task-id* when it actually ran fresh (cache-miss samples), not by a +uniform-per-CIPE average that muddles heavy and light targets. + +| Column | Type | Notes | +| ----------------- | ------- | ------------------------------------------------------------------- | +| `run_id` | TEXT PK | FK → `runs.id` (first half of composite PK) | +| `task_id` | TEXT PK | e.g. `react-vite/default-ts:e2e-tests-dev:production` (second half) | +| `project` | TEXT | Nx project name | +| `target` | TEXT | Nx target (`compile`, `test-runner`, `sandbox`, ...) | +| `duration_ms` | INTEGER | Task duration as reported in `/runs/{runId}`. For cache hits this is just the cache-restore time (~hundreds of ms). For cache misses it's the real work time. | +| `cache_status` | TEXT | `cache-miss`, `remote-cache-hit`, or `local-cache-hit` | +| `agent_template` | TEXT | Inferred from target: `linux-js` for `compile` / `check` / `lint` / `knip` / `fmt`; `linux-browsers-js` otherwise | +| `credits_per_min` | INTEGER | Per-CIPE rate from `nx_template_credits` (60 on extra_large+, 15 on medium+). Defaults to 60 for CIPEs predating the template credits capture. | + +Filled by `backfillNxRunTasks`, which makes two API calls per missing CIPE +(`/runs/search` + `/runs/{runId}`). Idempotent on composite PK: re-runs print +`NX run-tasks backfill: up to date`. + +Index: `idx_nx_run_tasks_task_id` (task_id) and `idx_nx_run_tasks_run_id` +(run_id). + +### `nx_flaky_task_snapshots` — one row per (day, range, task) + +Data from NX Cloud Enterprise analytics endpoint: +`/orgs/{org}/workspaces/{ws}/analytics/flaky-tasks/{project}/{target}?range=N` + +| Column | Type | Notes | +| ---------------------- | ------- | --------------------------------------------------------------- | +| `snapshot_date` | TEXT PK | `YYYY-MM-DD` — day the snapshot was taken | +| `range_days` | INT PK | Window size requested (typically 30) | +| `window_start` | TEXT | ISO 8601 start of the window reported by the API | +| `window_end` | TEXT | ISO 8601 end of the window | +| `project` | TEXT PK | Nx project (e.g. `code`, `core`, `angular-cli/default-ts`) | +| `target` | TEXT PK | Nx target (e.g. `test`, `compile`, `e2e-tests-dev`) | +| `configuration` | TEXT PK | Usually `production` | +| `total_reruns` | INT | Total automatic retries | +| `total_rescues` | INT | `totalDeflakedAutomaticallyCount` — retries that succeeded | +| `total_executions` | INT | Non-rerun executions of the task hash | +| `total_flaky_hashes` | INT | Number of distinct input-hashes flagged as flaky | +| `retry_time_seconds` | INT | Total time spent on retries | +| `avg_time_consumed_ms` | REAL | Average task duration | +| `flakiness_rate` | REAL | `flaky_successes / total_successes` (%) | +| `impact_score` | REAL | NX Cloud's prioritization metric | +| `last_failure_time` | TEXT | ISO 8601 | + +### `nx_flaky_task_kpis` — one row per (day, range) — workspace-wide totals + +| Column | Type | Notes | +| ---------------------------- | ------- | -------------------------------------------- | +| `snapshot_date` | TEXT PK | `YYYY-MM-DD` | +| `range_days` | INT PK | Window size | +| `window_start` / `window_end`| TEXT | API-reported window | +| `active_flaky_tasks` | INT | Count of tasks with flake rate > 0% | +| `proportion_tasks_flaky_pct` | REAL | % of all tasks that are flaky | +| `high_risk_tasks` | INT | Tasks with flake rate > 20% | +| `total_reruns` | INT | Sum across all flaky tasks | +| `total_rescues` | INT | Sum across all flaky tasks | +| `retry_time_seconds` | INT | Sum across all flaky tasks | + +### Indexes + +```sql +idx_runs_system_workflow (runs.system, runs.workflow) +idx_failed_tasks_run_id (failed_tasks.run_id) +idx_nx_template_credits_run_id (nx_template_credits.run_id) +idx_flaky_snapshots_date (nx_flaky_task_snapshots.snapshot_date, range_days) +``` + +`nx_cipe_retry_stats` has its PRIMARY KEY on `run_id` which serves as the +lookup index. + +## Example queries + +All runnable with `sqlite3 scripts/ci-eval.db "..."`. + +### Observed flake rate per system / workflow (since PR #34122 merged) + +```sql +SELECT + system, + workflow, + COUNT(*) AS runs, + SUM(CASE WHEN status IN ('FAILED','failed') THEN 1 ELSE 0 END) AS failed, + ROUND(100.0 * SUM(CASE WHEN status IN ('FAILED','failed') THEN 1 ELSE 0 END) / COUNT(*), 1) AS flake_pct +FROM runs +WHERE created_at >= '2026-03-23T13:54:53Z' + AND workflow IN ('normal:prs','next:merged') +GROUP BY system, workflow +ORDER BY workflow, system; +``` + +### Paired-commit analysis (same commit, both providers) + +```sql +WITH paired AS ( + SELECT c.commit_sha, + c.status AS cci_status, + n.status AS nx_status + FROM runs c + JOIN runs n ON c.commit_sha = n.commit_sha + WHERE c.system='circleci' AND c.workflow='normal:prs' + AND n.system='nx' AND n.workflow='normal:prs' + AND c.commit_sha IS NOT NULL + GROUP BY c.commit_sha +) +SELECT + SUM(CASE WHEN cci_status='success' AND nx_status='SUCCEEDED' THEN 1 ELSE 0 END) AS both_passed, + SUM(CASE WHEN cci_status='failed' AND nx_status='FAILED' THEN 1 ELSE 0 END) AS both_failed, + SUM(CASE WHEN cci_status='failed' AND nx_status='SUCCEEDED' THEN 1 ELSE 0 END) AS only_cci_failed, + SUM(CASE WHEN cci_status='success' AND nx_status='FAILED' THEN 1 ELSE 0 END) AS only_nx_failed +FROM paired; +``` + +### Top 10 flaky tasks per provider + +```sql +SELECT r.system, ft.task_name, COUNT(*) AS fails +FROM failed_tasks ft +JOIN runs r ON ft.run_id = r.id +WHERE r.workflow='normal:prs' AND r.created_at >= '2026-03-23T13:54:53Z' +GROUP BY r.system, ft.task_name +ORDER BY r.system, fails DESC; +``` + +### Auto-retry rescue analysis (latest snapshot) + +```sql +SELECT project, target, total_reruns, total_rescues, + ROUND(100.0 * total_rescues / total_reruns, 1) AS rescue_pct, + retry_time_seconds / 60 AS retry_minutes +FROM nx_flaky_task_snapshots +WHERE snapshot_date = (SELECT MAX(snapshot_date) FROM nx_flaky_task_snapshots) + AND range_days = 30 +ORDER BY total_reruns DESC; +``` + +### Workspace KPIs over time + +```sql +SELECT snapshot_date, range_days, + active_flaky_tasks, total_reruns, total_rescues, + ROUND(100.0 * total_rescues / NULLIF(total_reruns, 0), 1) AS rescue_pct +FROM nx_flaky_task_kpis +ORDER BY snapshot_date DESC; +``` + +### Exact "without retry" flake rate per workflow + +```sql +SELECT + r.workflow, + COUNT(*) AS runs, + SUM(CASE WHEN r.status='FAILED' THEN 1 ELSE 0 END) AS observed_failed, + SUM(CASE WHEN r.status='SUCCEEDED' AND s.successful_retries > 0 THEN 1 ELSE 0 END) AS rescued_by_retry, + SUM(CASE WHEN r.status='FAILED' THEN 1 ELSE 0 END) + + SUM(CASE WHEN r.status='SUCCEEDED' AND s.successful_retries > 0 THEN 1 ELSE 0 END) AS raw_failed +FROM runs r +LEFT JOIN nx_cipe_retry_stats s ON s.run_id = r.id +WHERE r.system='nx' AND r.created_at >= '2026-03-23T13:54:53Z' +GROUP BY r.workflow; +``` + +### Cache hit rate per workflow + +```sql +SELECT + r.workflow, + COUNT(DISTINCT r.id) AS runs, + ROUND(100.0 * SUM(s.cache_hits) / NULLIF(SUM(s.cache_hits + s.cache_misses), 0), 1) AS hit_pct, + ROUND(AVG(s.cache_hits), 1) AS avg_hits_per_run, + ROUND(AVG(s.cache_misses), 1) AS avg_misses_per_run +FROM runs r +JOIN nx_cache_stats s ON s.run_id = r.id +WHERE r.system='nx' AND r.created_at >= '2026-03-23T13:54:53Z' +GROUP BY r.workflow; +``` + +### Real cost the NX cache saved per workflow + +Values each cache-hit task by the avg duration of the *same task-id* when it +actually ran fresh elsewhere (cache-miss samples in `nx_run_tasks`). No +uniform-per-CIPE averaging — heavy targets like `sandbox`, `build`, and +`e2e-tests-dev` are priced by their own historical fresh-run duration rather +than blended with light misses like `fmt` or `check`. + +The CTE `miss_profile_task` gives the per-task-id avg; `miss_profile_target` +is a fallback for task-ids that never missed in the DB (rare); a final 0 +fallback means task-ids with zero miss samples contribute nothing. + +```sql +WITH miss_profile_task AS ( + SELECT task_id, AVG(duration_ms) AS avg_ms + FROM nx_run_tasks + WHERE cache_status = 'cache-miss' + GROUP BY task_id +), +miss_profile_target AS ( + SELECT target, AVG(duration_ms) AS avg_ms + FROM nx_run_tasks + WHERE cache_status = 'cache-miss' + GROUP BY target +) +SELECT + r.workflow, + COUNT(DISTINCT r.id) AS runs, + SUM(CASE WHEN t.cache_status LIKE '%cache-hit%' THEN 1 ELSE 0 END) AS hit_tasks, + SUM(CASE WHEN t.cache_status = 'cache-miss' THEN 1 ELSE 0 END) AS miss_tasks, + ROUND(SUM( + CASE WHEN t.cache_status LIKE '%cache-hit%' THEN + (COALESCE(mt.avg_ms, tg.avg_ms, 0) / 60000.0) + * t.credits_per_min * 0.0005 + ELSE 0 END + ), 2) AS real_cost_saved_usd +FROM runs r +JOIN nx_run_tasks t ON t.run_id = r.id +LEFT JOIN miss_profile_task mt ON mt.task_id = t.task_id +LEFT JOIN miss_profile_target tg ON tg.target = t.target +WHERE r.system = 'nx' AND r.created_at >= '2026-03-23T13:54:53Z' +GROUP BY r.workflow; +``` + +### Cross-check: direct `hypotheticalNoCacheMs` → USD + +NX Cloud exposes a direct time-savings field on the dashboard analysis +endpoint (`ciPipelineExecution.duration.hypotheticalNoCacheMs`). It's a +duration, not credits, so we convert with a per-CIPE weighted credit rate +pulled from `nx_run_tasks` cache-miss samples. If the derived and direct +numbers agree within ~10% the methodology is sound. + +```sql +WITH cipe_rate AS ( + SELECT run_id, + CAST(SUM(duration_ms * credits_per_min) AS REAL) + / NULLIF(SUM(duration_ms), 0) AS weighted_rate + FROM nx_run_tasks + WHERE cache_status = 'cache-miss' + GROUP BY run_id +) +SELECT r.workflow, + COUNT(s.run_id) AS cipes_with_direct_value, + ROUND(SUM( + s.hypothetical_no_cache_ms / 60000.0 + * COALESCE(cr.weighted_rate, 60) * 0.0005 + ), 2) AS direct_cost_saved_usd +FROM runs r +JOIN nx_cipe_retry_stats s ON s.run_id = r.id +LEFT JOIN cipe_rate cr ON cr.run_id = r.id +WHERE r.system='nx' AND r.created_at >= '2026-03-23T13:54:53Z' + AND s.hypothetical_no_cache_ms IS NOT NULL +GROUP BY r.workflow; +``` + +### NX cost broken down by agent template + +```sql +SELECT tpl.template, + COUNT(*) AS runs, + SUM(tpl.credits) AS total_credits, + ROUND(SUM(tpl.credits) * 0.0005, 2) AS total_usd, + AVG(tpl.credit_multiplier) AS avg_multiplier +FROM nx_template_credits tpl +JOIN runs r ON tpl.run_id = r.id +WHERE r.workflow='normal:prs' +GROUP BY tpl.template +ORDER BY total_credits DESC; +``` + +### NX "if medium+" cost projection + +Downgrades only `linux-browsers-js` to 15 credits/min; leaves `linux-js` alone. + +```sql +SELECT r.id, + SUM( + CASE WHEN tpl.template='linux-browsers-js' AND tpl.credit_multiplier > 15 + THEN tpl.credits * 15.0 / tpl.credit_multiplier + ELSE tpl.credits + END + ) + 500 AS medium_plus_credits +FROM runs r +LEFT JOIN nx_template_credits tpl ON tpl.run_id = r.id +WHERE r.system='nx' AND r.workflow='normal:prs' +GROUP BY r.id; +``` + +## Data provenance + +| What | Source | +| ------------------------ | ---------------------------------------------------------------------------- | +| CircleCI run list | `GET /api/v2/insights/{proj}/workflows/{wf}` (public, no token needed) | +| CircleCI commit SHA | `GET /api/v2/workflow/{id}` → pipeline → `vcs.revision` | +| CircleCI failed jobs | `GET /api/v2/workflow/{id}/job` — filter `status='failed'` | +| NX pipeline executions | `POST /nx-cloud/mcp-context/pipeline-executions/search` | +| NX run commands / tasks | `POST /nx-cloud/mcp-context/runs/search` + `GET /runs/{id}` | +| NX exact credit breakdown | `GET /cipes/{cipeId}/analysis?runGroup={rg}` (Enterprise dashboard) | +| NX flaky-task analytics | `GET /orgs/{org}/workspaces/{ws}/analytics/flaky-tasks?range=N` | +| NX per-task rescue count | `GET /orgs/{org}/workspaces/{ws}/analytics/flaky-tasks/{proj}/{tgt}?range=N` | + +The two `analytics/*` endpoints are Enterprise-only and require the +`_nxCloudSession` cookie. + +## Schema stability + +Tables are created with `CREATE TABLE IF NOT EXISTS`. If you add a column, +either: + +1. Add a `CREATE TABLE IF NOT EXISTS` for a new table, or +2. Add a guarded `ALTER TABLE ... ADD COLUMN` inside a `try/catch` in + `initDB()` (see the existing `commit_sha` migration for an example). + +To reset from scratch: `rm scripts/ci-eval.db && yarn jiti scripts/evaluate-ci.ts`. diff --git a/scripts/ci/init-empty.ts b/scripts/ci/init-empty.ts index 735b465573db..735cde223d51 100644 --- a/scripts/ci/init-empty.ts +++ b/scripts/ci/init-empty.ts @@ -138,13 +138,13 @@ export function defineEmptyInitWindows() { export const initEmptyNoOpJob = defineNoOpJob('init-empty', [build_linux]); -export function getInitEmpty(workflow: Workflow) { +export function getInitEmpty(workflow: Workflow, options: { nxExperiment?: boolean } = {}) { const initEmpty: JobOrNoOpJob[] = ['react-vite-ts'].map(defineEmptyInitFlow); if (isWorkflowOrAbove(workflow, 'merged')) { initEmpty.push(...['nextjs-ts', 'vue-vite-ts', 'lit-vite-ts'].map(defineEmptyInitFlow)); } - if (isWorkflowOrAbove(workflow, 'daily')) { + if (!options.nxExperiment && isWorkflowOrAbove(workflow, 'daily')) { initEmpty.push(defineEmptyInitWindows()); } if (isWorkflowOrAbove(workflow, 'normal')) { diff --git a/scripts/ci/sandboxes.ts b/scripts/ci/sandboxes.ts index 8f86dc60b391..dab8d3ba7266 100644 --- a/scripts/ci/sandboxes.ts +++ b/scripts/ci/sandboxes.ts @@ -139,6 +139,8 @@ function defineSandboxJob_dev({ ); } +let nxExperiment = false; + export function defineSandboxFlow(key: Key) { const id = toId(key); const data = sandboxTemplates.allTemplates[key as keyof typeof sandboxTemplates.allTemplates]; @@ -352,7 +354,7 @@ export function defineSandboxFlow(key: Key) { createJob, buildJob, devJob, - !skipTasks?.includes('chromatic') ? chromaticJob : undefined, + !nxExperiment && !skipTasks?.includes('chromatic') ? chromaticJob : undefined, !skipTasks?.includes('vitest-integration') ? vitestJob : undefined, !skipTasks?.includes('e2e-tests') ? e2eJob : undefined, @@ -516,16 +518,16 @@ const getListOfSandboxes = (workflow: Workflow) => { } }; -export function getSandboxes(workflow: Workflow) { +export function getSandboxes(workflow: Workflow, options: { nxExperiment?: boolean } = {}) { + nxExperiment = options.nxExperiment ?? false; const sandboxes = getListOfSandboxes(workflow).map(defineSandboxFlow); const list: JobOrNoOpJob[] = sandboxes.flatMap((sandbox) => sandbox.jobs); - if (isWorkflowOrAbove(workflow, 'daily')) { + if (!nxExperiment && isWorkflowOrAbove(workflow, 'daily')) { const windows_sandbox_build = defineWindowsSandboxBuild(sandboxes[0]); const windows_sandbox_dev = defineWindowsSandboxDev(sandboxes[0]); const testRunner = defineSandboxTestRunner(sandboxes[0]); - list.push(windows_sandbox_build, windows_sandbox_dev, testRunner); } diff --git a/scripts/evaluate-ci.ts b/scripts/evaluate-ci.ts new file mode 100644 index 000000000000..b8c06d86e1b3 --- /dev/null +++ b/scripts/evaluate-ci.ts @@ -0,0 +1,1940 @@ +/** + * CI Evaluation Script: NX Cloud vs CircleCI + * + * Compares flakiness, speed, and cost across evaluation branches. + * Caches all run data in a local SQLite database for fast re-runs. + * + * Required env vars: + * CIRCLE_TOKEN — CircleCI personal API token + * NX_CLOUD_ACCESS_TOKEN — NX Cloud CI access token (from nx.json or env) + * NX_CLOUD_SESSION — _nxCloudSession cookie value (for exact credit data) + * + * Optional env vars: + * NX_CLOUD_PAT — NX Cloud Personal Access Token (from ~/.config/nxcloud/nxcloud.ini) + * + * Usage: + * yarn jiti scripts/evaluate-ci.ts # last 14 days + * yarn jiti scripts/evaluate-ci.ts --days 30 # last 30 days + * yarn jiti scripts/evaluate-ci.ts --since 2026-03-23 # since specific date + * yarn jiti scripts/evaluate-ci.ts --workflow normal --days 7 + * yarn jiti scripts/evaluate-ci.ts --report-only # no API calls + * yarn jiti scripts/evaluate-ci.ts --show-runs # per-run table + * yarn jiti scripts/evaluate-ci.ts --flaky-range 7 # flaky analytics window (default 30) + * yarn jiti scripts/evaluate-ci.ts --skip-flaky-analytics + * + * The --days / --since flags give both CI systems the same time window, so + * flake/speed/cost comparisons are apples-to-apples. Both systems paginate until + * they hit a run older than the cutoff. + * + * Sync is fully incremental: + * - Runs are keyed by id; re-syncing a known run is a no-op. + * - Flaky-task analytics are snapshotted once per calendar day per --flaky-range; + * running the sync again the same day doesn't hit the dashboard endpoints. + * + * See scripts/ci-eval.db.README.md for the full schema and example SQL queries. + */ + +import { readFileSync } from 'node:fs'; +import { join } from 'node:path'; +// @ts-expect-error - no type declarations for ini +import { parse as parseIni } from 'ini'; +import { parse as parseYaml } from 'yaml'; +import { DatabaseSync } from 'node:sqlite'; + +// ─── Configuration ─────────────────────────────────────────────────────────── + +const CIRCLECI_PROJECT = 'gh/storybookjs/storybook'; +const NX_CLOUD_URL = 'https://cloud.nx.app'; +const NX_CLOUD_ID = '6929fbef73e98d8094d2a343'; +const NX_CLOUD_ORG_ID = '606dcb5cdc2a2b00059cc0e9'; + +const WORKSPACE_ROOT = join(import.meta.dirname, '..'); +const DB_PATH = join(import.meta.dirname, 'ci-eval.db'); + +const EVAL_BRANCHES: Record = { + normal: ['kasper/nx-eval-normal'], + merged: ['kasper/nx-eval-merged'], + daily: ['kasper/nx-eval-daily-1', 'kasper/nx-eval-daily-2'], + 'base (nx-ai)': ['kasper/nx-ai'], + 'next:merged': ['next'], + 'next:daily': ['next'], + // Wild-branches workflows: query every branch (empty array = no filter) and + // drop anything on our experiment/eval branches post-fetch. Used to measure + // regular PR CI where the default extra_large+ agents are still in effect. + 'normal:prs': [], + 'merged:prs': [], + 'daily:prs': [], + // RC sweep (cohort 1 + cohort 2). Each branch varies exactly one line in + // .nx/workflows/agents.yaml (linux-browsers-js resource-class); linux-js + // stays at extra_large+ everywhere. All PRs carry the ci:normal label. + 'rc:xlarge-plus': ['kasper/nx-rc-xlarge-plus'], + 'rc:xlarge': ['kasper/nx-rc-xlarge'], + 'rc:large-plus': ['kasper/nx-rc-large-plus'], + 'rc:large': ['kasper/nx-rc-large'], + 'rc:medium-plus': ['kasper/nx-rc-medium-plus'], + 'rc:medium': ['kasper/nx-rc-medium'], + 'rc2:xlarge-plus': ['kasper/nx-rc-xlarge-plus-2'], + 'rc2:xlarge': ['kasper/nx-rc-xlarge-2'], + 'rc2:large-plus': ['kasper/nx-rc-large-plus-2'], + 'rc2:large': ['kasper/nx-rc-large-2'], + 'rc2:medium-plus': ['kasper/nx-rc-medium-plus-2'], + 'rc2:medium': ['kasper/nx-rc-medium-2'], +}; + +// Per-branch linux-browsers-js credit multipliers. Used by the syncNxCloudRuns +// fallback path when the dashboard /cipes/{id}/analysis endpoint returns an +// empty computeCreditUsages (happens for some CIPEs). Without this, the +// fallback reads from the LOCAL worktree's agents.yaml which only knows the +// current branch's class — wrong for cross-branch sweeps. +// +// linux-js is extra_large+ (60 credits/min) on every rc branch. +const RC_BRANCH_LINUX_BROWSERS_CPM: Record = { + 'kasper/nx-rc-xlarge-plus': 60, + 'kasper/nx-rc-xlarge': 40, + 'kasper/nx-rc-large-plus': 30, + 'kasper/nx-rc-large': 20, + 'kasper/nx-rc-medium-plus': 15, + 'kasper/nx-rc-medium': 10, + 'kasper/nx-rc-xlarge-plus-2': 60, + 'kasper/nx-rc-xlarge-2': 40, + 'kasper/nx-rc-large-plus-2': 30, + 'kasper/nx-rc-large-2': 20, + 'kasper/nx-rc-medium-plus-2': 15, + 'kasper/nx-rc-medium-2': 10, +}; + +const WORKFLOW_NAMES: Record = { + normal: 'normal-generated', + merged: 'merged-generated', + daily: 'daily-generated', + 'base (nx-ai)': 'daily-generated', + 'next:merged': 'merged-generated', + 'next:daily': 'daily-generated', + 'normal:prs': 'normal-generated', + 'merged:prs': 'merged-generated', + 'daily:prs': 'daily-generated', + 'rc:xlarge-plus': 'normal-generated', + 'rc:xlarge': 'normal-generated', + 'rc:large-plus': 'normal-generated', + 'rc:large': 'normal-generated', + 'rc:medium-plus': 'normal-generated', + 'rc:medium': 'normal-generated', + 'rc2:xlarge-plus': 'normal-generated', + 'rc2:xlarge': 'normal-generated', + 'rc2:large-plus': 'normal-generated', + 'rc2:large': 'normal-generated', + 'rc2:medium-plus': 'normal-generated', + 'rc2:medium': 'normal-generated', +}; + +/** + * Branches whose NX config already downgrades linux-browsers-js to medium+. + * Used by the `*:prs` wild-branch workflows to keep only PRs that use the + * default extra_large+ agents. + * + * NX CIPEs record the branch as the PR number for PRs, so both the branch + * names (used by CircleCI) and the corresponding PR numbers are listed. + */ +const MEDIUM_PLUS_BRANCHES = new Set([ + 'next', // next itself still uses extra_large+, but we have dedicated next:* workflows + 'kasper/nx-ai', + 'kasper/nx-port', + 'kasper/nx-eval-normal', + 'kasper/nx-eval-merged', + 'kasper/nx-eval-daily-1', + 'kasper/nx-eval-daily-2', + // PR numbers for the above branches (NX reports these in item.branch) + '34282', // kasper/nx-ai + '34568', // kasper/nx-port + '34562', // kasper/nx-eval-normal + '34563', // kasper/nx-eval-merged + '34564', // kasper/nx-eval-daily-1 + '34565', // kasper/nx-eval-daily-2 +]); + +// ─── Pricing ───────────────────────────────────────────────────────────────── + +const CIRCLECI_CREDIT_TO_USD = 0.0006; +const NX_CREDIT_TO_USD = 0.0005; +const NX_CREDITS_PER_CIPE = 500; + +const NX_RESOURCE_CLASS_CREDITS: Record = { + 'docker_linux_amd64/small': 5, + 'docker_linux_amd64/medium': 10, + 'docker_linux_amd64/medium+': 15, + 'docker_linux_amd64/large': 20, + 'docker_linux_amd64/large+': 30, + 'docker_linux_amd64/extra_large': 40, + 'docker_linux_amd64/extra_large+': 60, + 'docker_linux_arm64/medium': 13, + 'docker_linux_arm64/large': 26, + 'docker_linux_arm64/extra_large': 52, + 'docker_windows/medium': 40, +}; + +function loadNxAgentCreditsPerMin(): Record { + const agentsPath = join(WORKSPACE_ROOT, '.nx/workflows/agents.yaml'); + const yaml = parseYaml(readFileSync(agentsPath, 'utf-8')); + const templates = yaml['launch-templates'] ?? {}; + const result: Record = {}; + + for (const [name, config] of Object.entries(templates) as [string, any][]) { + const resourceClass: string = config['resource-class'] ?? ''; + const credits = NX_RESOURCE_CLASS_CREDITS[resourceClass]; + if (credits) { + result[name] = credits; + } else { + console.warn( + ` ⚠ Unknown resource class "${resourceClass}" for launch template "${name}", defaulting to 15 credits/min` + ); + result[name] = 15; + } + } + + return result; +} + +// ─── SQLite Database ───────────────────────────────────────────────────────── + +function initDB() { + const db = new DatabaseSync(DB_PATH); + + db.exec(` + CREATE TABLE IF NOT EXISTS runs ( + id TEXT PRIMARY KEY, + system TEXT NOT NULL, + workflow TEXT NOT NULL, + branch TEXT NOT NULL, + status TEXT NOT NULL, + created_at TEXT NOT NULL, + duration_sec REAL NOT NULL, + credits_used INTEGER NOT NULL, + cost_usd REAL NOT NULL, + commit_sha TEXT + );`); + + // Migration: add commit_sha column to existing databases + try { + db.exec(`ALTER TABLE runs ADD COLUMN commit_sha TEXT`); + } catch {} + + // Migration: add hypothetical_no_cache_ms to nx_cipe_retry_stats. This is the + // direct `ciPipelineExecution.duration.hypotheticalNoCacheMs` field from the + // dashboard analysis endpoint — the ms that cache-hit tasks would have taken + // if they had run fresh. Stored alongside retry stats because we fetch both + // from the same endpoint in the same HTTP round-trip. Used as a cross-check + // against the per-task derivation in nx_run_tasks. + try { + db.exec(`ALTER TABLE nx_cipe_retry_stats ADD COLUMN hypothetical_no_cache_ms INTEGER`); + } catch {} + + db.exec(` + CREATE TABLE IF NOT EXISTS failed_tasks ( + run_id TEXT NOT NULL, + task_name TEXT NOT NULL, + FOREIGN KEY (run_id) REFERENCES runs(id) + ); + CREATE TABLE IF NOT EXISTS nx_template_credits ( + run_id TEXT NOT NULL, + template TEXT NOT NULL, + credits INTEGER NOT NULL, + credit_multiplier INTEGER NOT NULL, + FOREIGN KEY (run_id) REFERENCES runs(id) + ); + -- Per-CIPE retry data from ttgImpactMetadata.taskRetryStats on the dashboard + -- analysis endpoint. Lets us compute exact "without retry" failure counts + -- instead of proportional scaling from workspace-wide totals. + CREATE TABLE IF NOT EXISTS nx_cipe_retry_stats ( + run_id TEXT PRIMARY KEY, + total_tasks INTEGER NOT NULL, + total_task_retries INTEGER NOT NULL, + successful_retries INTEGER NOT NULL, + failed_retries INTEGER NOT NULL, + hypothetical_no_cache_ms INTEGER, + FOREIGN KEY (run_id) REFERENCES runs(id) + ); + -- Per-CIPE cache hit counts from the run detail endpoint. Continuous + -- tasks (serve/run-registry) are excluded because they aren't cacheable. + CREATE TABLE IF NOT EXISTS nx_cache_stats ( + run_id TEXT PRIMARY KEY, + cache_hits INTEGER NOT NULL, + cache_misses INTEGER NOT NULL, + total_tasks INTEGER NOT NULL, + FOREIGN KEY (run_id) REFERENCES runs(id) + ); + -- Per-task detail from /runs/{runId}. One row per (run_id, task_id), + -- excludes continuous serve/run-registry tasks. Used to compute real + -- per-CIPE cache savings: value each cache-hit task by the avg duration + -- of the same task-id when it actually ran fresh (cache-miss samples). + CREATE TABLE IF NOT EXISTS nx_run_tasks ( + run_id TEXT NOT NULL, + task_id TEXT NOT NULL, + project TEXT NOT NULL, + target TEXT NOT NULL, + duration_ms INTEGER NOT NULL, + cache_status TEXT NOT NULL, -- 'cache-miss', 'local-cache-hit', 'remote-cache-hit' + agent_template TEXT NOT NULL, -- 'linux-js' | 'linux-browsers-js' + credits_per_min INTEGER NOT NULL, -- 60 on extra_large+, 15 on medium+; captured per-CIPE + PRIMARY KEY (run_id, task_id), + FOREIGN KEY (run_id) REFERENCES runs(id) + ); + CREATE INDEX IF NOT EXISTS idx_nx_run_tasks_task_id ON nx_run_tasks(task_id); + CREATE INDEX IF NOT EXISTS idx_nx_run_tasks_run_id ON nx_run_tasks(run_id); + -- One row per (sync date, time range, task). Snapshots are daily; re-running + -- the sync on the same day is a no-op for these tables. + CREATE TABLE IF NOT EXISTS nx_flaky_task_snapshots ( + snapshot_date TEXT NOT NULL, + range_days INTEGER NOT NULL, + window_start TEXT NOT NULL, + window_end TEXT NOT NULL, + project TEXT NOT NULL, + target TEXT NOT NULL, + configuration TEXT NOT NULL, + total_reruns INTEGER NOT NULL, + total_rescues INTEGER NOT NULL, + total_executions INTEGER NOT NULL, + total_flaky_hashes INTEGER, + retry_time_seconds INTEGER, + avg_time_consumed_ms REAL, + flakiness_rate REAL, + impact_score REAL, + last_failure_time TEXT, + PRIMARY KEY (snapshot_date, range_days, project, target, configuration) + ); + CREATE TABLE IF NOT EXISTS nx_flaky_task_kpis ( + snapshot_date TEXT NOT NULL, + range_days INTEGER NOT NULL, + window_start TEXT NOT NULL, + window_end TEXT NOT NULL, + active_flaky_tasks INTEGER, + proportion_tasks_flaky_pct REAL, + high_risk_tasks INTEGER, + total_reruns INTEGER, + total_rescues INTEGER, + retry_time_seconds INTEGER, + PRIMARY KEY (snapshot_date, range_days) + ); + CREATE INDEX IF NOT EXISTS idx_runs_system_workflow ON runs(system, workflow); + CREATE INDEX IF NOT EXISTS idx_failed_tasks_run_id ON failed_tasks(run_id); + CREATE INDEX IF NOT EXISTS idx_nx_template_credits_run_id ON nx_template_credits(run_id); + CREATE INDEX IF NOT EXISTS idx_flaky_snapshots_date ON nx_flaky_task_snapshots(snapshot_date, range_days); + `); + + return db; +} + +function dbInsertRun( + db: InstanceType, + system: string, + workflow: string, + branch: string, + run: CIRun +) { + const insertRun = db.prepare( + `INSERT OR IGNORE INTO runs (id, system, workflow, branch, status, created_at, duration_sec, credits_used, cost_usd, commit_sha) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` + ); + insertRun.run( + run.id, + system, + workflow, + branch, + run.status, + run.createdAt, + run.durationSec, + run.creditsUsed, + run.costUsd, + run.commitSha ?? null + ); + + if (run.failedJobs.length > 0) { + const insertTask = db.prepare(`INSERT INTO failed_tasks (run_id, task_name) VALUES (?, ?)`); + for (const task of run.failedJobs) { + insertTask.run(run.id, task); + } + } + + if (run.nxPerTemplate && run.nxResourceClasses) { + const insertCredit = db.prepare( + `INSERT INTO nx_template_credits (run_id, template, credits, credit_multiplier) VALUES (?, ?, ?, ?)` + ); + for (const [tmpl, credits] of Object.entries(run.nxPerTemplate)) { + const multiplier = run.nxResourceClasses[tmpl] ?? 0; + insertCredit.run(run.id, tmpl, credits, multiplier); + } + } +} + +function dbGetExistingIds( + db: InstanceType, + system: string, + workflow: string +): Set { + const rows = db + .prepare(`SELECT id FROM runs WHERE system = ? AND workflow = ?`) + .all(system, workflow) as { id: string }[]; + return new Set(rows.map((r) => r.id)); +} + +function dbGetRuns( + db: InstanceType, + system: string, + workflow: string +): CIRun[] { + const rows = db + .prepare( + `SELECT id, status, created_at, duration_sec, credits_used, cost_usd, commit_sha + FROM runs WHERE system = ? AND workflow = ? ORDER BY created_at ASC` + ) + .all(system, workflow) as any[]; + + return rows.map((row) => { + const failedRows = db + .prepare(`SELECT task_name FROM failed_tasks WHERE run_id = ?`) + .all(row.id) as { task_name: string }[]; + + const creditRows = db + .prepare( + `SELECT template, credits, credit_multiplier FROM nx_template_credits WHERE run_id = ?` + ) + .all(row.id) as { template: string; credits: number; credit_multiplier: number }[]; + + const nxPerTemplate: Record = {}; + const nxResourceClasses: Record = {}; + for (const cr of creditRows) { + nxPerTemplate[cr.template] = cr.credits; + nxResourceClasses[cr.template] = cr.credit_multiplier; + } + + return { + id: row.id, + status: row.status, + createdAt: row.created_at, + durationSec: row.duration_sec, + creditsUsed: row.credits_used, + costUsd: row.cost_usd, + commitSha: row.commit_sha ?? undefined, + failedJobs: failedRows.map((r) => r.task_name), + ...(creditRows.length > 0 ? { nxPerTemplate, nxResourceClasses } : {}), + }; + }); +} + +// ─── Auth ──────────────────────────────────────────────────────────────────── + +/** + * CircleCI v2 API. The Insights endpoint is public for open source projects + * (storybookjs is public), so no token is needed for it. Per-run details + * (`/workflow/:id`, `/pipeline/:id`) require auth — those lookups silently fall + * back to no-op on 401, so missing tokens degrade gracefully. + */ +function getCircleToken(): string | undefined { + const token = process.env.CIRCLE_TOKEN; + return token && token.length > 0 ? token : undefined; +} + +function getNxCloudHeaders(): Record { + const headers: Record = { + 'Content-Type': 'application/json', + 'Nx-Cloud-Id': NX_CLOUD_ID, + }; + + const accessToken = process.env.NX_CLOUD_ACCESS_TOKEN; + if (accessToken) headers['Authorization'] = accessToken; + + let pat = process.env.NX_CLOUD_PAT; + if (!pat) { + try { + const iniPath = process.env.XDG_CONFIG_HOME + ? join(process.env.XDG_CONFIG_HOME, 'nxcloud', 'nxcloud.ini') + : join(process.env.HOME!, '.config', 'nxcloud', 'nxcloud.ini'); + const ini = parseIni(readFileSync(iniPath, 'utf-8')); + pat = + ini?.[NX_CLOUD_URL]?.personalAccessToken ?? + ini?.['https://cloud.nx.app']?.personalAccessToken; + } catch {} + } + if (pat) headers['Nx-Cloud-Personal-Access-Token'] = pat; + + return headers; +} + +// ─── HTTP helpers ──────────────────────────────────────────────────────────── + +async function fetchJSON(url: string, options?: RequestInit): Promise { + const res = await fetch(url, options); + if (!res.ok) { + const text = await res.text(); + throw new Error(`HTTP ${res.status}: ${text.slice(0, 200)}`); + } + return res.json(); +} + +async function circleFetch(path: string): Promise { + const token = getCircleToken(); + const headers: Record = {}; + if (token) headers['Circle-Token'] = token; + return fetchJSON(`https://circleci.com/api/v2${path}`, { headers }); +} + +async function nxCloudFetch(path: string, body?: unknown): Promise { + return fetchJSON(`${NX_CLOUD_URL}/nx-cloud/mcp-context${path}`, { + method: body ? 'POST' : 'GET', + headers: getNxCloudHeaders(), + body: body ? JSON.stringify(body) : undefined, + }); +} + +// ─── Data types ────────────────────────────────────────────────────────────── + +interface CIRun { + id: string; + status: string; + createdAt: string; + durationSec: number; + creditsUsed: number; + costUsd: number; + failedJobs: string[]; + commitSha?: string; + nxPerTemplate?: Record; + nxResourceClasses?: Record; +} + +interface CIReport { + system: string; + workflow: string; + branches: string[]; + runs: CIRun[]; + summary: { + totalRuns: number; + successfulRuns: number; + failedRuns: number; + canceledRuns: number; + flakeRate: string; + durationMin: number; + durationMax: number; + durationAvg: number; + durationP50: number; + durationP95: number; + totalCredits: number; + totalCostUsd: number; + avgCostPerRun: number; + }; +} + +// ─── CircleCI Data ─────────────────────────────────────────────────────────── + +interface CircleInsightsRun { + id: string; + duration: number; + status: string; + created_at: string; + stopped_at: string; + credits_used: number; + branch: string; +} + +interface CircleJob { + name: string; + status: string; +} + +// Max pages to scan per branch, regardless of time window. Prevents runaway scans. +const CIRCLE_MAX_PAGES = 20; + +async function syncCircleCIRuns( + db: InstanceType, + workflow: string, + branches: string[], + workflowName: string, + sinceMs: number, + excludeBranches?: Set +): Promise { + const existingIds = dbGetExistingIds(db, 'circleci', workflow); + const newRuns: CIRun[] = []; + let skipped = 0; + let excluded = 0; + let hitPageCap = false; + + // Empty branches array → query across all branches (single un-filtered query). + const branchQueries = branches.length === 0 ? [undefined] : branches; + + for (const branch of branchQueries) { + let pageToken: string | undefined; + + pages: for (let page = 0; page < CIRCLE_MAX_PAGES; page++) { + const params = new URLSearchParams(); + if (branch) params.set('branch', branch); + else params.set('all-branches', 'true'); + if (pageToken) params.set('page-token', pageToken); + + const data = await circleFetch<{ + items: CircleInsightsRun[]; + next_page_token: string | null; + }>(`/insights/${CIRCLECI_PROJECT}/workflows/${workflowName}?${params}`); + + for (const run of data.items) { + // Runs come newest-first; once we're past the cutoff we're done. + if (new Date(run.created_at).getTime() < sinceMs) { + break pages; + } + if (run.status === 'canceled' || run.status === 'not_run') continue; + + if (excludeBranches?.has(run.branch)) { + excluded++; + continue; + } + + if (existingIds.has(run.id)) { + skipped++; + continue; + } + + const failedJobs: string[] = []; + if (run.status === 'failed') { + try { + const jobsData = await circleFetch<{ items: CircleJob[] }>(`/workflow/${run.id}/job`); + for (const job of jobsData.items) { + if (job.status === 'failed') failedJobs.push(job.name); + } + } catch {} + } + + // Get commit SHA via workflow → pipeline + let commitSha: string | undefined; + try { + const wf = await circleFetch<{ pipeline_id: string }>(`/workflow/${run.id}`); + const pipe = await circleFetch<{ vcs: { revision: string } }>( + `/pipeline/${wf.pipeline_id}` + ); + commitSha = pipe.vcs?.revision; + } catch {} + + const ciRun: CIRun = { + id: run.id, + status: run.status, + createdAt: run.created_at, + durationSec: run.duration, + creditsUsed: run.credits_used, + costUsd: run.credits_used * CIRCLECI_CREDIT_TO_USD, + failedJobs, + commitSha, + }; + + dbInsertRun(db, 'circleci', workflow, run.branch, ciRun); + newRuns.push(ciRun); + } + + pageToken = data.next_page_token ?? undefined; + if (!pageToken) break; + if (page === CIRCLE_MAX_PAGES - 1) hitPageCap = true; + } + } + + if (skipped > 0) console.log(` CircleCI: ${skipped} cached, ${newRuns.length} new`); + else console.log(` CircleCI: ${newRuns.length} new runs`); + if (excluded > 0) console.log(` CircleCI: excluded ${excluded} runs on medium+ branches`); + if (hitPageCap) { + console.log(` CircleCI: ⚠ hit ${CIRCLE_MAX_PAGES}-page cap before cutoff`); + } + + return dbGetRuns(db, 'circleci', workflow); +} + +// ─── NX Cloud Data ─────────────────────────────────────────────────────────── + +interface NxPipelineExecution { + id: string; + branch: string; + status: string; + createdAtMs: number; + completedAtMs: number | null; + durationMs: number; + commitSha?: string; + vcsTitle?: string; + vcsContext?: { ref?: string; headSha?: string; title?: string }; + runGroups: { + runGroupName: string; + status: string; + agentsMetadataSummary?: Record< + string, + { launchTemplate: string; onlineAtMs: number; offlineAtMs: number } + >; + }[]; +} + +interface NxPipelineSearchResult { + items: { + id: string; + branch: string; + status: string; + createdAtMs: number; + completedAtMs: number | null; + durationMs: number; + vcsContext?: { ref?: string }; + }[]; + nextPageToken?: string; +} + +// Max pages of pipeline executions to scan per branch, regardless of matches. +// With pageSize=50, 20 pages = 1000 CIPEs. Prevents runaway scans. +const NX_MAX_PAGES = 20; +const NX_PAGE_SIZE = 50; + +interface NxDashboardCredits { + totalCredits: number; + perTemplate: Record; + resourceClasses: Record; +} + +interface NxCipeRetryStats { + totalTasks: number; + totalTaskRetries: number; + successfulRetries: number; + failedRetries: number; + /** + * Direct NX field `ciPipelineExecution.duration.hypotheticalNoCacheMs` — + * total task-duration (ms) that the remote cache saved on this CIPE. Null + * on older CIPEs where the field wasn't present. + */ + hypotheticalNoCacheMs: number | null; +} + +interface NxDashboardCipeAnalysis { + credits: NxDashboardCredits | null; + retryStats: NxCipeRetryStats | null; +} + +async function fetchNxDashboardCipeAnalysis( + cipeId: string, + runGroupName: string +): Promise { + const session = process.env.NX_CLOUD_SESSION; + if (!session) return { credits: null, retryStats: null }; + + try { + const url = `${NX_CLOUD_URL}/cipes/${cipeId}/analysis?runGroup=${encodeURIComponent(runGroupName)}&_data=routes%2F_auth.cipes.%24cipeId.analysis`; + const res = await fetch(url, { + headers: { Cookie: `_nxCloudSession=${session}` }, + }); + if (!res.ok) return { credits: null, retryStats: null }; + + const data = await res.json(); + + // --- Credits (per-template) --- + let credits: NxDashboardCredits | null = null; + const usages = data?.computeCreditUsages as + | Record + | undefined; + if (usages) { + const rcData = data?.resourceClasses as + | Record + | undefined; + const resourceClasses: Record = {}; + if (rcData) { + for (const [tmpl, info] of Object.entries(rcData)) { + resourceClasses[tmpl] = info.creditMultiplier; + } + } + + let total = NX_CREDITS_PER_CIPE; + const perTemplate: Record = {}; + for (const [tmpl, info] of Object.entries(usages)) { + perTemplate[tmpl] = info.totalCredits; + total += info.totalCredits; + } + credits = { totalCredits: total, perTemplate, resourceClasses }; + } + + // --- Retry stats (per-CIPE) --- + const retryRaw = data?.ciPipelineExecution?.ttgImpactMetadata?.taskRetryStats as + | { + totalTasks: number; + totalTaskRetries: number; + successfulRetries: number; + failedRetries: number; + } + | undefined; + const hypoMs = data?.ciPipelineExecution?.duration?.hypotheticalNoCacheMs; + const hypothetical = + typeof hypoMs === 'number' && Number.isFinite(hypoMs) ? Math.round(hypoMs) : null; + + const retryStats: NxCipeRetryStats | null = retryRaw + ? { + totalTasks: retryRaw.totalTasks ?? 0, + totalTaskRetries: retryRaw.totalTaskRetries ?? 0, + successfulRetries: retryRaw.successfulRetries ?? 0, + failedRetries: retryRaw.failedRetries ?? 0, + hypotheticalNoCacheMs: hypothetical, + } + : null; + + return { credits, retryStats }; + } catch { + return { credits: null, retryStats: null }; + } +} + +const prNumberCache = new Map(); + +async function resolvePRNumber(branch: string): Promise { + if (prNumberCache.has(branch)) return prNumberCache.get(branch)!; + + try { + const data = await fetchJSON<{ number: number }[]>( + `https://api.github.com/repos/storybookjs/storybook/pulls?head=storybookjs:${branch}&state=open&per_page=1`, + { headers: { Authorization: `token ${process.env.GITHUB_TOKEN ?? ''}` } } + ); + const num = data[0]?.number?.toString() ?? branch; + prNumberCache.set(branch, num); + return num; + } catch { + return branch; + } +} + +async function syncNxCloudRuns( + db: InstanceType, + workflow: string, + branches: string[], + sinceMs: number, + agentCreditsPerMin: Record, + excludeBranches?: Set +): Promise { + const existingIds = dbGetExistingIds(db, 'nx', workflow); + const newRuns: CIRun[] = []; + let skipped = 0; + let excluded = 0; + const hasDashboardAccess = !!process.env.NX_CLOUD_SESSION; + + // Empty branches array → query across all branches (no filter). + const prNumbers = branches.length === 0 ? [] : await Promise.all(branches.map(resolvePRNumber)); + + const NX_TAG_MAP: Record = { + normal: 'ci:normal', + merged: 'ci:merged', + daily: 'ci:daily', + 'base (nx-ai)': 'ci:daily', + 'next:merged': 'ci:merged', + 'next:daily': 'ci:daily', + 'normal:prs': 'ci:normal', + 'merged:prs': 'ci:merged', + 'daily:prs': 'ci:daily', + 'rc:xlarge-plus': 'ci:normal', + 'rc:xlarge': 'ci:normal', + 'rc:large-plus': 'ci:normal', + 'rc:large': 'ci:normal', + 'rc:medium-plus': 'ci:normal', + 'rc:medium': 'ci:normal', + 'rc2:xlarge-plus': 'ci:normal', + 'rc2:xlarge': 'ci:normal', + 'rc2:large-plus': 'ci:normal', + 'rc2:large': 'ci:normal', + 'rc2:medium-plus': 'ci:normal', + 'rc2:medium': 'ci:normal', + }; + const expectedTag = NX_TAG_MAP[workflow]; + + const debug = !!process.env.NX_DEBUG; + let filteredNoTag = 0; + let filteredNoRuns = 0; + let scannedCipes = 0; + let hitPageCap = false; + + // Paginate pipeline executions until we see a CIPE older than `sinceMs`. + // Many CIPEs are filtered out (wrong tag, no runs, skip-ci) so the effective + // match rate on `next` is ~1/4. + let pageToken: string | undefined; + + outer: for (let page = 0; page < NX_MAX_PAGES; page++) { + const body: Record = { + statuses: ['SUCCEEDED', 'FAILED'], + limit: NX_PAGE_SIZE, + }; + if (prNumbers.length > 0) body.branches = prNumbers; + if (pageToken) body.pageToken = pageToken; + + const searchResult = await nxCloudFetch( + '/pipeline-executions/search', + body + ); + + for (const item of searchResult.items) { + scannedCipes++; + if (!item.completedAtMs) continue; + if ( + item.status === 'IN_PROGRESS' || + item.status === 'NOT_STARTED' || + item.status === 'CANCELED' + ) + continue; + + // When explicit branches were requested, keep only matching CIPEs. + if (prNumbers.length > 0 && !prNumbers.includes(item.branch)) continue; + + // CIPEs are returned newest-first; once we're past the cutoff we're done. + if (item.createdAtMs < sinceMs) { + break outer; + } + + // Skip experiment/eval branches when doing a wild-branch sync. + if (excludeBranches?.has(item.branch)) { + excluded++; + continue; + } + + if (existingIds.has(item.id)) { + skipped++; + continue; + } + + // Fetch runs FIRST (before details) — used for tag matching. On `next` more + // than half of CIPEs are skip-ci/compile-only and we can filter them out + // without paying for the /pipeline-executions/{id} call. + let runItems: { id: string; status: string; durationMs: number; command: string }[] = []; + try { + const runSearch = await nxCloudFetch<{ + items: { id: string; status: string; durationMs: number; command: string }[]; + }>('/runs/search', { pipelineExecutionId: item.id, limit: 50 }); + runItems = runSearch.items; + } catch {} + + // Match workflow by checking the tag in the nx command (e.g. tag:ci:daily vs tag:ci:merged). + // CIPEs without any tag filter in their commands are internal/setup runs — skip them. + if (expectedTag) { + const mainRun = runItems.find((r) => r.command?.includes(`tag:${expectedTag}`)); + if (!mainRun) { + if (runItems.length === 0) { + filteredNoRuns++; + if (debug) + console.log(` [DEBUG] skip ${item.id}: no runs found (probably [skip ci])`); + } else { + filteredNoTag++; + if (debug) + console.log( + ` [DEBUG] skip ${item.id}: no tag:${expectedTag} (runs=${runItems.length}, first cmd=${(runItems[0]?.command ?? '').slice(0, 80)})` + ); + } + continue; + } + } + + const details = await nxCloudFetch(`/pipeline-executions/${item.id}`); + + // Use run duration instead of CIPE duration (CIPE includes queueing time) + const maxRunDuration = Math.max(0, ...runItems.map((r) => r.durationMs || 0)); + const durationSec = maxRunDuration > 0 ? maxRunDuration / 1000 : details.durationMs / 1000; + + const failedJobs: string[] = []; + for (const run of runItems) { + if (run.status !== 'Failed') continue; + try { + const runDetails = await nxCloudFetch<{ + tasks: { projectName: string; target: string; status: string }[]; + }>(`/runs/${run.id}`); + for (const task of runDetails.tasks ?? []) { + if (task.status === 'Failed') { + failedJobs.push(`${task.projectName}:${task.target}`); + } + } + } catch { + failedJobs.push(run.id); + } + } + + let totalNxCredits: number; + const firstRunGroup = details.runGroups[0]; + + // Single dashboard fetch returns both credits and per-CIPE retry stats. + const dashboardAnalysis = firstRunGroup + ? await fetchNxDashboardCipeAnalysis(item.id, firstRunGroup.runGroupName) + : { credits: null, retryStats: null }; + const dashboardCredits = dashboardAnalysis.credits; + + if (dashboardCredits) { + totalNxCredits = dashboardCredits.totalCredits; + } else { + totalNxCredits = NX_CREDITS_PER_CIPE; + // For rc:* / rc2:* sweep workflows, override linux-browsers-js rate + // from the per-branch table so we don't read the local worktree's + // agents.yaml (which only knows the currently-checked-out class). + const rcLbRate = + (workflow.startsWith('rc:') || workflow.startsWith('rc2:')) && branches.length === 1 + ? (RC_BRANCH_LINUX_BROWSERS_CPM[branches[0]] ?? null) + : null; + for (const rg of details.runGroups) { + if (rg.agentsMetadataSummary) { + const cipeStartMs = details.createdAtMs; + for (const [, agent] of Object.entries(rg.agentsMetadataSummary)) { + const billableMin = (agent.offlineAtMs - cipeStartMs) / 60000; + const creditsPerMin = + rcLbRate !== null && agent.launchTemplate === 'linux-browsers-js' + ? rcLbRate + : (agentCreditsPerMin[agent.launchTemplate] ?? 15); + totalNxCredits += Math.max(0, billableMin) * creditsPerMin; + } + } + } + } + + const ciRun: CIRun = { + id: item.id, + status: item.status, + createdAt: new Date(item.createdAtMs).toISOString(), + durationSec, + creditsUsed: Math.round(totalNxCredits), + costUsd: totalNxCredits * NX_CREDIT_TO_USD, + failedJobs, + commitSha: details.commitSha ?? details.vcsContext?.headSha ?? undefined, + nxPerTemplate: dashboardCredits?.perTemplate, + nxResourceClasses: dashboardCredits?.resourceClasses, + }; + + dbInsertRun(db, 'nx', workflow, item.branch, ciRun); + if (dashboardAnalysis.retryStats) { + const rs = dashboardAnalysis.retryStats; + db.prepare( + `INSERT OR IGNORE INTO nx_cipe_retry_stats + (run_id, total_tasks, total_task_retries, successful_retries, failed_retries, hypothetical_no_cache_ms) + VALUES (?, ?, ?, ?, ?, ?)` + ).run( + item.id, + rs.totalTasks, + rs.totalTaskRetries, + rs.successfulRetries, + rs.failedRetries, + rs.hypotheticalNoCacheMs + ); + } + newRuns.push(ciRun); + } + + if (!searchResult.nextPageToken) break; + pageToken = searchResult.nextPageToken; + if (page === NX_MAX_PAGES - 1) hitPageCap = true; + } + + if (hasDashboardAccess) { + console.log(` NX Cloud: exact credits from dashboard API`); + } else { + console.log(` NX Cloud: estimated credits (set NX_CLOUD_SESSION for exact)`); + } + if (skipped > 0) console.log(` NX Cloud: ${skipped} cached, ${newRuns.length} new`); + else console.log(` NX Cloud: ${newRuns.length} new runs`); + if (filteredNoTag > 0 || filteredNoRuns > 0) { + console.log( + ` NX Cloud: scanned ${scannedCipes} CIPEs — filtered ${filteredNoTag} (no tag:${expectedTag}) + ${filteredNoRuns} (no runs, e.g. [skip ci])` + ); + } + if (excluded > 0) { + console.log(` NX Cloud: excluded ${excluded} CIPEs on medium+ branches`); + } + if (hitPageCap) { + console.log(` NX Cloud: ⚠ hit ${NX_MAX_PAGES}-page cap before cutoff`); + } + + return dbGetRuns(db, 'nx', workflow); +} + +/** + * Per-CIPE retry stats are fetched inline when a new CIPE is synced, but any + * CIPE that was already in the `runs` table before this schema existed won't + * have a row in `nx_cipe_retry_stats`. This backfill fills those gaps. + * + * Also re-fetches rows where `hypothetical_no_cache_ms` is still NULL (added + * in a later migration) so the direct cache-savings field is populated for + * all CIPEs over time. + * + * Idempotent — re-running only hits the API for CIPEs still missing data. + */ +async function backfillNxCipeRetryStats(db: InstanceType): Promise { + if (!process.env.NX_CLOUD_SESSION) { + console.log(` NX retry backfill: skipped (NX_CLOUD_SESSION not set)`); + return; + } + + const missing = db + .prepare( + `SELECT r.id FROM runs r + LEFT JOIN nx_cipe_retry_stats s ON s.run_id = r.id + WHERE r.system = 'nx' + AND (s.run_id IS NULL OR s.hypothetical_no_cache_ms IS NULL) + ORDER BY r.created_at DESC` + ) + .all() as { id: string }[]; + + if (missing.length === 0) { + console.log( + ` NX retry backfill: up to date (all CIPEs have retry stats + hypotheticalNoCacheMs)` + ); + return; + } + + console.log(` NX retry backfill: fetching for ${missing.length} CIPEs...`); + + // INSERT OR REPLACE so rows that pre-date the hypothetical_no_cache_ms + // column get updated in-place (we always have all four retry columns in + // hand from the API, so nothing is lost). + const upsert = db.prepare( + `INSERT OR REPLACE INTO nx_cipe_retry_stats + (run_id, total_tasks, total_task_retries, successful_retries, failed_retries, hypothetical_no_cache_ms) + VALUES (?, ?, ?, ?, ?, ?)` + ); + + let stored = 0; + let apiFailures = 0; + for (const { id } of missing) { + try { + const details = await nxCloudFetch(`/pipeline-executions/${id}`); + const rg = details.runGroups[0]?.runGroupName; + if (!rg) continue; + + const analysis = await fetchNxDashboardCipeAnalysis(id, rg); + if (!analysis.retryStats) { + apiFailures++; + continue; + } + const rs = analysis.retryStats; + upsert.run( + id, + rs.totalTasks, + rs.totalTaskRetries, + rs.successfulRetries, + rs.failedRetries, + rs.hypotheticalNoCacheMs + ); + stored++; + } catch { + apiFailures++; + } + } + + console.log( + ` NX retry backfill: stored ${stored} / ${missing.length}${apiFailures > 0 ? ` (${apiFailures} API failures — will retry on next sync)` : ''}` + ); +} + +/** + * Per-CIPE cache hit counts from `/runs/{runId}` task listings. Used to + * quantify how much the NX remote cache saves on each workflow. Excludes + * `continuous-*` hashed tasks (serve / run-registry) which aren't cacheable. + * Idempotent backfill — re-run only fetches CIPEs still missing. + */ +async function backfillNxCacheStats(db: InstanceType): Promise { + const missing = db + .prepare( + `SELECT r.id FROM runs r + LEFT JOIN nx_cache_stats s ON s.run_id = r.id + WHERE r.system = 'nx' AND s.run_id IS NULL + ORDER BY r.created_at DESC` + ) + .all() as { id: string }[]; + + if (missing.length === 0) { + console.log(` NX cache backfill: up to date (all CIPEs have cache stats)`); + return; + } + + console.log(` NX cache backfill: fetching for ${missing.length} CIPEs...`); + + const insert = db.prepare( + `INSERT OR IGNORE INTO nx_cache_stats (run_id, cache_hits, cache_misses, total_tasks) + VALUES (?, ?, ?, ?)` + ); + + let stored = 0; + let apiFailures = 0; + let skipped = 0; + for (const { id } of missing) { + try { + // Find the main run inside the CIPE (limit 5 is enough — most CIPEs have 1). + const runSearch = await nxCloudFetch<{ + items: { id: string; command?: string }[]; + }>('/runs/search', { pipelineExecutionId: id, limit: 5 }); + const mainRun = runSearch.items[0]; + if (!mainRun) { + skipped++; + continue; + } + + const detail = await nxCloudFetch<{ + tasks: { taskId: string; status: string; cacheStatus: string }[]; + }>(`/runs/${mainRun.id}`); + const tasks = detail.tasks ?? []; + let hits = 0; + let misses = 0; + for (const t of tasks) { + // Skip non-cacheable continuous tasks (serve / run-registry). + if ( + t.taskId.endsWith(':serve:production') || + t.taskId.endsWith(':run-registry:production') + ) { + continue; + } + const cs = t.cacheStatus ?? ''; + if (cs.includes('cache-hit')) hits++; + else if (cs === 'cache-miss') misses++; + } + insert.run(id, hits, misses, hits + misses); + stored++; + } catch { + apiFailures++; + } + } + + console.log( + ` NX cache backfill: stored ${stored} / ${missing.length}${skipped > 0 ? `, skipped ${skipped}` : ''}${apiFailures > 0 ? ` (${apiFailures} API failures — will retry on next sync)` : ''}` + ); +} + +/** + * Infer which agent template a task runs on from its target. Follows + * `.nx/workflows/distribution-config.yaml`: + * + * - compile / check / lint / knip / fmt → linux-js + * - everything else → linux-browsers-js + * + * In practice non-core `compile` is also listed on `linux-browsers-js` in + * the config, but NX assigns each task to exactly one agent and the linux-js + * pool owns the compile chain, so treating every `compile` as linux-js is + * accurate enough for credit accounting. + */ +const LINUX_JS_TARGETS = new Set(['compile', 'check', 'lint', 'knip', 'fmt']); + +function inferAgentTemplate(target: string): 'linux-js' | 'linux-browsers-js' { + return LINUX_JS_TARGETS.has(target) ? 'linux-js' : 'linux-browsers-js'; +} + +/** + * Per-task detail from `/runs/{runId}` (task-level cache status + duration). + * One row per (run_id, task_id), excluding non-cacheable continuous tasks. + * + * Used to compute **real** per-CIPE cache savings: each cache-hit task is + * valued by the avg duration of the *same task-id* when it ran fresh + * (from cache-miss samples), rather than a uniform per-CIPE average. + * + * Two API calls per missing CIPE (`/runs/search` then `/runs/{runId}`). + * Idempotent on `(run_id, task_id)` primary key. + */ +async function backfillNxRunTasks(db: InstanceType): Promise { + const missing = db + .prepare( + `SELECT r.id FROM runs r + WHERE r.system = 'nx' + AND NOT EXISTS (SELECT 1 FROM nx_run_tasks t WHERE t.run_id = r.id) + ORDER BY r.created_at DESC` + ) + .all() as { id: string }[]; + + if (missing.length === 0) { + console.log(` NX run-tasks backfill: up to date (all CIPEs have per-task rows)`); + return; + } + + console.log(` NX run-tasks backfill: fetching for ${missing.length} CIPEs...`); + + // Per-CIPE credit rates come from nx_template_credits (captured when the + // CIPE was first synced). Default to 60 credits/min if we don't have the + // data — that's the extra_large+ rate and matches the vast majority of + // CIPEs pre-dating the medium+ eval branches. + const rateStmt = db.prepare( + `SELECT template, credit_multiplier FROM nx_template_credits WHERE run_id = ?` + ); + + const insert = db.prepare( + `INSERT OR IGNORE INTO nx_run_tasks + (run_id, task_id, project, target, duration_ms, cache_status, agent_template, credits_per_min) + VALUES (?, ?, ?, ?, ?, ?, ?, ?)` + ); + + let stored = 0; + let apiFailures = 0; + let skipped = 0; + let totalTaskRows = 0; + + for (const { id } of missing) { + try { + const runSearch = await nxCloudFetch<{ + items: { id: string; command?: string }[]; + }>('/runs/search', { pipelineExecutionId: id, limit: 5 }); + const mainRun = runSearch.items[0]; + if (!mainRun) { + skipped++; + continue; + } + + const detail = await nxCloudFetch<{ + tasks: { + taskId: string; + projectName: string; + target: string; + durationMs: number; + status: string; + cacheStatus: string; + isCacheable: boolean; + }[]; + }>(`/runs/${mainRun.id}`); + + const rates = rateStmt.all(id) as { template: string; credit_multiplier: number }[]; + const rateByTemplate: Record = {}; + for (const r of rates) rateByTemplate[r.template] = r.credit_multiplier; + + for (const t of detail.tasks ?? []) { + if ( + t.taskId.endsWith(':serve:production') || + t.taskId.endsWith(':run-registry:production') + ) { + continue; + } + const cs = t.cacheStatus ?? ''; + if (!cs.includes('cache-hit') && cs !== 'cache-miss') continue; + + const template = inferAgentTemplate(t.target); + const creditsPerMin = rateByTemplate[template] ?? 60; + + insert.run( + id, + t.taskId, + t.projectName, + t.target, + t.durationMs ?? 0, + cs, + template, + creditsPerMin + ); + totalTaskRows++; + } + stored++; + } catch { + apiFailures++; + } + } + + console.log( + ` NX run-tasks backfill: stored ${totalTaskRows} task rows across ${stored} / ${missing.length} CIPEs${skipped > 0 ? `, skipped ${skipped}` : ''}${apiFailures > 0 ? ` (${apiFailures} API failures — will retry on next sync)` : ''}` + ); +} + +// ─── NX Flaky Task Analytics (Enterprise) ──────────────────────────────────── + +interface NxFlakyTaskSummary { + project: string; + target: string; + configuration: string; + totalReruns: number; + sampleSizeFlakinessRate: number; + flakinessRate: number; + impactScore: number; + windowStart: string; + windowEnd: string; + lastFailureTime?: string; + timeWastedSeconds?: number; + avgTimeConsumedMs?: number; +} + +interface NxFlakyTaskDetail { + windowStart: string; + windowEnd: string; + sampleSizeFlakinessRate: number; + totalDeflakedAutomaticallyCount: number; + totalFlakyHashes: number; + totalExecutions: number; + totalReruns: number; + timeWastedSeconds: number; + avgTaskDurationMs: number; + flakinessRate: number; + lastFailureTime?: string; +} + +interface NxFlakyKpis { + activeFlakyTasks: { current: number }; + proportionTasksFlaky: { current: string }; + highRiskTasks: { current: number }; +} + +interface NxFlakyAnalyticsResponse { + recentFlakyTaskMetrics: NxFlakyTaskSummary[]; + flakyTaskKPIs: NxFlakyKpis; + startDate: string; + endDate: string; + range: string; +} + +/** + * Fetch flaky-task analytics via the Enterprise dashboard endpoint. + * Requires NX_CLOUD_SESSION cookie. + * + * Data is snapshotted once per calendar day. Re-running the sync on the same + * day with the same --range is a no-op. + */ +async function syncNxFlakyAnalytics( + db: InstanceType, + rangeDays: number +): Promise { + const session = process.env.NX_CLOUD_SESSION; + if (!session) { + console.log(` NX flaky analytics: skipped (NX_CLOUD_SESSION not set)`); + return; + } + + const today = new Date().toISOString().slice(0, 10); + + // Skip if we already took today's snapshot for this range. + const existing = db + .prepare( + `SELECT COUNT(*) AS n FROM nx_flaky_task_snapshots WHERE snapshot_date = ? AND range_days = ?` + ) + .get(today, rangeDays) as { n: number }; + if (existing.n > 0) { + console.log( + ` NX flaky analytics: already synced today (${existing.n} rows for range=${rangeDays}d)` + ); + return; + } + + // 1. Fetch the summary (lists every flaky task + workspace KPIs). + const summaryUrl = + `${NX_CLOUD_URL}/orgs/${NX_CLOUD_ORG_ID}/workspaces/${NX_CLOUD_ID}` + + `/analytics/flaky-tasks?range=${rangeDays}` + + `&_data=routes%2F_auth.orgs.%24orgId.workspaces.%24workspaceId.analytics.flaky-tasks`; + + let summary: NxFlakyAnalyticsResponse; + try { + const res = await fetch(summaryUrl, { headers: { Cookie: `_nxCloudSession=${session}` } }); + if (!res.ok) { + console.log(` NX flaky analytics: summary HTTP ${res.status} — skipped`); + return; + } + summary = (await res.json()) as NxFlakyAnalyticsResponse; + } catch (e: any) { + console.log(` NX flaky analytics: fetch error — ${e.message}`); + return; + } + + const tasks = summary.recentFlakyTaskMetrics ?? []; + console.log( + ` NX flaky analytics: fetched summary (${tasks.length} flaky tasks, range=${rangeDays}d)` + ); + + // 2. Fetch per-task detail (includes totalDeflakedAutomaticallyCount — the rescue count). + let totalRescues = 0; + let totalReruns = 0; + let totalRetrySec = 0; + + const insertSnapshot = db.prepare(` + INSERT INTO nx_flaky_task_snapshots ( + snapshot_date, range_days, window_start, window_end, + project, target, configuration, + total_reruns, total_rescues, total_executions, total_flaky_hashes, + retry_time_seconds, avg_time_consumed_ms, flakiness_rate, impact_score, + last_failure_time + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + `); + + for (const t of tasks) { + const detailUrl = + `${NX_CLOUD_URL}/orgs/${NX_CLOUD_ORG_ID}/workspaces/${NX_CLOUD_ID}` + + `/analytics/flaky-tasks/${encodeURIComponent(t.project)}/${encodeURIComponent(t.target)}` + + `?range=${rangeDays}`; + + let detail: NxFlakyTaskDetail | null = null; + try { + const res = await fetch(detailUrl, { + headers: { Cookie: `_nxCloudSession=${session}` }, + }); + if (res.ok) detail = (await res.json()) as NxFlakyTaskDetail; + } catch {} + + const rescues = detail?.totalDeflakedAutomaticallyCount ?? 0; + const executions = detail?.totalExecutions ?? t.sampleSizeFlakinessRate; + const flakyHashes = detail?.totalFlakyHashes ?? 0; + const retrySec = detail?.timeWastedSeconds ?? t.timeWastedSeconds ?? 0; + const avgMs = detail?.avgTaskDurationMs ?? t.avgTimeConsumedMs ?? 0; + + insertSnapshot.run( + today, + rangeDays, + t.windowStart, + t.windowEnd, + t.project, + t.target, + t.configuration, + t.totalReruns, + rescues, + executions, + flakyHashes, + retrySec, + avgMs, + t.flakinessRate, + t.impactScore, + t.lastFailureTime ?? null + ); + + totalReruns += t.totalReruns; + totalRescues += rescues; + totalRetrySec += retrySec; + } + + // 3. Store workspace-wide KPIs. + const kpis = summary.flakyTaskKPIs; + const proportionPct = parseFloat( + String(kpis?.proportionTasksFlaky?.current ?? '0').replace('%', '') + ); + + db.prepare( + `INSERT OR REPLACE INTO nx_flaky_task_kpis ( + snapshot_date, range_days, window_start, window_end, + active_flaky_tasks, proportion_tasks_flaky_pct, high_risk_tasks, + total_reruns, total_rescues, retry_time_seconds + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` + ).run( + today, + rangeDays, + summary.startDate, + summary.endDate, + kpis?.activeFlakyTasks?.current ?? tasks.length, + proportionPct, + kpis?.highRiskTasks?.current ?? 0, + totalReruns, + totalRescues, + totalRetrySec + ); + + console.log( + ` NX flaky analytics: ${tasks.length} tasks stored, ${totalRescues} rescues out of ${totalReruns} retries` + ); +} + +// ─── Analysis ──────────────────────────────────────────────────────────────── + +function percentile(sorted: number[], p: number): number { + if (sorted.length === 0) return 0; + const idx = (p / 100) * (sorted.length - 1); + const lower = Math.floor(idx); + const upper = Math.ceil(idx); + if (lower === upper) return sorted[lower]; + return sorted[lower] + (sorted[upper] - sorted[lower]) * (idx - lower); +} + +function buildReport( + system: string, + workflow: string, + branches: string[], + runs: CIRun[] +): CIReport { + const completed = runs.filter((r) => r.status !== 'CANCELED'); + const successful = completed.filter((r) => r.status === 'success' || r.status === 'SUCCEEDED'); + const failed = completed.filter((r) => r.status === 'failed' || r.status === 'FAILED'); + const canceled = runs.filter((r) => r.status === 'canceled' || r.status === 'CANCELED'); + + const durations = completed.map((r) => r.durationSec).sort((a, b) => a - b); + const totalCredits = runs.reduce((sum, r) => sum + r.creditsUsed, 0); + const totalCost = runs.reduce((sum, r) => sum + r.costUsd, 0); + + return { + system, + workflow, + branches, + runs, + summary: { + totalRuns: runs.length, + successfulRuns: successful.length, + failedRuns: failed.length, + canceledRuns: canceled.length, + flakeRate: + completed.length > 0 ? `${((failed.length / completed.length) * 100).toFixed(1)}%` : 'N/A', + durationMin: durations.length > 0 ? Math.round(durations[0]) : 0, + durationMax: durations.length > 0 ? Math.round(durations[durations.length - 1]) : 0, + durationAvg: + durations.length > 0 + ? Math.round(durations.reduce((a, b) => a + b, 0) / durations.length) + : 0, + durationP50: Math.round(percentile(durations, 50)), + durationP95: Math.round(percentile(durations, 95)), + totalCredits: Math.round(totalCredits), + totalCostUsd: Math.round(totalCost * 100) / 100, + avgCostPerRun: runs.length > 0 ? Math.round((totalCost / runs.length) * 100) / 100 : 0, + }, + }; +} + +// ─── Display ───────────────────────────────────────────────────────────────── + +function formatDuration(sec: number): string { + const m = Math.floor(sec / 60); + const s = Math.round(sec % 60); + return `${m}m${s.toString().padStart(2, '0')}s`; +} + +function printReport(report: CIReport) { + const s = report.summary; + console.log(`\n${'═'.repeat(60)}`); + console.log(` ${report.system} — ${report.workflow}`); + console.log(` Branches: ${report.branches.join(', ')}`); + console.log(`${'═'.repeat(60)}`); + console.log( + ` Runs: ${s.totalRuns} (✓ ${s.successfulRuns} ✗ ${s.failedRuns} ⊘ ${s.canceledRuns})` + ); + console.log(` Flake rate: ${s.flakeRate}`); + console.log( + ` Duration: min=${formatDuration(s.durationMin)} avg=${formatDuration(s.durationAvg)} p50=${formatDuration(s.durationP50)} p95=${formatDuration(s.durationP95)} max=${formatDuration(s.durationMax)}` + ); + console.log(` Credits: ${s.totalCredits.toLocaleString()} total`); + console.log(` Cost: $${s.totalCostUsd.toFixed(2)} total ($${s.avgCostPerRun.toFixed(2)}/run)`); + + if (report.runs.some((r) => r.failedJobs.length > 0)) { + console.log(`\n Failed jobs breakdown:`); + const failCounts: Record = {}; + for (const run of report.runs) { + for (const job of run.failedJobs) { + failCounts[job] = (failCounts[job] ?? 0) + 1; + } + } + const sorted = Object.entries(failCounts).sort(([, a], [, b]) => b - a); + for (const [job, count] of sorted.slice(0, 10)) { + console.log(` ${count}x ${job}`); + } + } +} + +function printRunsTable(report: CIReport) { + console.log(`\n Individual runs (${report.system} — ${report.workflow}):`); + console.log( + ` ${'#'.padStart(3)} ${'Status'.padEnd(10)} ${'Duration'.padStart(10)} ${'Credits'.padStart(10)} ${'Cost'.padStart(8)} Created` + ); + console.log(` ${'─'.repeat(70)}`); + for (let i = 0; i < report.runs.length; i++) { + const r = report.runs[i]; + const status = r.status === 'success' || r.status === 'SUCCEEDED' ? '✓' : '✗'; + console.log( + ` ${(i + 1).toString().padStart(3)} ${status.padEnd(10)} ${formatDuration(r.durationSec).padStart(10)} ${r.creditsUsed.toLocaleString().padStart(10)} ${('$' + r.costUsd.toFixed(2)).padStart(8)} ${r.createdAt.slice(0, 19)}` + ); + } +} + +function computePredictedMediumPlusCost( + runs: CIRun[] +): { totalCredits: number; avgPerRun: number } | null { + // Only downgrade linux-browsers-js to medium+ (15 credits/min). + // linux-js stays at its current rate — it runs 1 agent doing heavy compile, + // downgrading it doesn't save much and could slow down the bottleneck. + const TARGET_RATE = 15; + const DOWNGRADE_TEMPLATES = new Set(['linux-browsers-js']); + let total = 0; + let count = 0; + + for (const run of runs) { + if (!run.nxPerTemplate || !run.nxResourceClasses) continue; + let predicted = NX_CREDITS_PER_CIPE; + for (const [tmpl, credits] of Object.entries(run.nxPerTemplate)) { + const actualRate = run.nxResourceClasses[tmpl]; + if (DOWNGRADE_TEMPLATES.has(tmpl) && actualRate && actualRate > TARGET_RATE) { + predicted += (credits / actualRate) * TARGET_RATE; + } else { + predicted += credits; + } + } + total += predicted; + count++; + } + + if (count === 0) return null; + return { totalCredits: Math.round(total), avgPerRun: Math.round((total / count) * 100) / 100 }; +} + +function printFlakeAnalysis(circleReport: CIReport, nxReport: CIReport) { + const circleFlakes: Record = {}; + const nxFlakes: Record = {}; + + for (const run of circleReport.runs) { + for (const job of run.failedJobs) { + circleFlakes[job] = (circleFlakes[job] ?? 0) + 1; + } + } + for (const run of nxReport.runs) { + for (const job of run.failedJobs) { + nxFlakes[job] = (nxFlakes[job] ?? 0) + 1; + } + } + + const circleTotal = circleReport.runs.filter( + (r) => r.status === 'failed' || r.status === 'FAILED' + ).length; + const nxTotal = nxReport.runs.filter( + (r) => r.status === 'failed' || r.status === 'FAILED' + ).length; + + if (circleTotal === 0 && nxTotal === 0) return; + + console.log(`\n${'─'.repeat(60)}`); + console.log(` FLAKE ANALYSIS: ${circleReport.workflow}`); + console.log(`${'─'.repeat(60)}`); + + if (circleTotal > 0) { + const circleRuns = circleReport.summary.totalRuns; + console.log(`\n CircleCI (${circleTotal} failed runs out of ${circleRuns}):`); + const sorted = Object.entries(circleFlakes).sort(([, a], [, b]) => b - a); + for (const [job, count] of sorted.slice(0, 15)) { + const pct = ((count / circleRuns) * 100).toFixed(1); + console.log(` ${count.toString().padStart(3)}x (${pct.padStart(5)}%) ${job}`); + } + } + + if (nxTotal > 0) { + const nxRuns = nxReport.summary.totalRuns; + console.log(`\n NX Cloud (${nxTotal} failed runs out of ${nxRuns}):`); + const sorted = Object.entries(nxFlakes).sort(([, a], [, b]) => b - a); + for (const [job, count] of sorted.slice(0, 15)) { + const pct = ((count / nxRuns) * 100).toFixed(1); + console.log(` ${count.toString().padStart(3)}x (${pct.padStart(5)}%) ${job}`); + } + } +} + +function printComparison(circleReport: CIReport, nxReport: CIReport) { + const c = circleReport.summary; + const n = nxReport.summary; + + console.log(`\n${'─'.repeat(60)}`); + console.log(` COMPARISON: ${circleReport.workflow}`); + console.log(`${'─'.repeat(60)}`); + + const row = (label: string, cVal: string, nVal: string, winner?: string) => { + const indicator = winner === 'circle' ? ' ◀' : winner === 'nx' ? ' ▶' : ''; + console.log(` ${label.padEnd(20)} ${cVal.padStart(15)} ${nVal.padStart(15)} ${indicator}`); + }; + + row('', 'CircleCI', 'NX Cloud'); + row('Runs', String(c.totalRuns), String(n.totalRuns)); + row( + 'Success rate', + `${c.successfulRuns}/${c.totalRuns - c.canceledRuns}`, + `${n.successfulRuns}/${n.totalRuns - n.canceledRuns}` + ); + row( + 'Flake rate', + c.flakeRate, + n.flakeRate, + parseFloat(c.flakeRate) < parseFloat(n.flakeRate) + ? 'circle' + : parseFloat(n.flakeRate) < parseFloat(c.flakeRate) + ? 'nx' + : undefined + ); + row( + 'Avg duration', + formatDuration(c.durationAvg), + formatDuration(n.durationAvg), + c.durationAvg < n.durationAvg ? 'circle' : n.durationAvg < c.durationAvg ? 'nx' : undefined + ); + row('P50 duration', formatDuration(c.durationP50), formatDuration(n.durationP50)); + row('P95 duration', formatDuration(c.durationP95), formatDuration(n.durationP95)); + row( + 'Avg cost/run', + `$${c.avgCostPerRun.toFixed(2)}`, + `$${n.avgCostPerRun.toFixed(2)}`, + c.avgCostPerRun < n.avgCostPerRun + ? 'circle' + : n.avgCostPerRun < c.avgCostPerRun + ? 'nx' + : undefined + ); + row('Total cost', `$${c.totalCostUsd.toFixed(2)}`, `$${n.totalCostUsd.toFixed(2)}`); + row('Total credits', c.totalCredits.toLocaleString(), n.totalCredits.toLocaleString()); + + const predicted = computePredictedMediumPlusCost(nxReport.runs); + if (predicted) { + const predCostPerRun = predicted.avgPerRun * NX_CREDIT_TO_USD; + console.log(''); + row( + 'NX if medium+', + '', + `$${predCostPerRun.toFixed(2)}/run`, + predCostPerRun < c.avgCostPerRun ? 'nx' : 'circle' + ); + row(' total predicted', '', `$${(predicted.totalCredits * NX_CREDIT_TO_USD).toFixed(2)}`); + row(' credits predicted', '', predicted.totalCredits.toLocaleString()); + } +} + +// ─── Main ──────────────────────────────────────────────────────────────────── + +async function main() { + const args = process.argv.slice(2); + const workflowFilter = args.includes('--workflow') + ? args[args.indexOf('--workflow') + 1] + : undefined; + const sinceArg = args.includes('--since') ? args[args.indexOf('--since') + 1] : undefined; + const days = args.includes('--days') ? parseInt(args[args.indexOf('--days') + 1], 10) : 14; + const reportOnly = args.includes('--report-only'); + const showRuns = args.includes('--show-runs'); + const flakyRange = args.includes('--flaky-range') + ? parseInt(args[args.indexOf('--flaky-range') + 1], 10) + : 30; + const skipFlakyAnalytics = args.includes('--skip-flaky-analytics'); + + const sinceMs = sinceArg ? new Date(sinceArg).getTime() : Date.now() - days * 24 * 60 * 60 * 1000; + if (Number.isNaN(sinceMs)) { + console.error(`Invalid --since value: ${sinceArg}`); + process.exit(1); + } + + const workflows = workflowFilter ? [workflowFilter] : Object.keys(EVAL_BRANCHES); + + const db = initDB(); + const agentCreditsPerMin = loadNxAgentCreditsPerMin(); + + console.log('╔══════════════════════════════════════════════════════════╗'); + console.log('║ CI Evaluation: NX Cloud vs CircleCI ║'); + console.log('╚══════════════════════════════════════════════════════════╝'); + console.log(` Date: ${new Date().toISOString()}`); + console.log(` Workflows: ${workflows.join(', ')}`); + console.log( + sinceArg + ? ` Window: since ${sinceArg}` + : ` Window: last ${days} days (since ${new Date(sinceMs).toISOString().slice(0, 10)})` + ); + console.log(` Mode: ${reportOnly ? 'report-only (from cache)' : 'sync + report'}`); + console.log(` DB: ${DB_PATH}`); + console.log(` NX agent rates (from .nx/workflows/agents.yaml):`); + for (const [name, rate] of Object.entries(agentCreditsPerMin)) { + console.log(` ${name}: ${rate} credits/min`); + } + + for (const workflow of workflows) { + const branches = EVAL_BRANCHES[workflow]; + if (!branches) { + console.log(`\n ⚠ Unknown workflow: ${workflow}`); + continue; + } + + const workflowName = WORKFLOW_NAMES[workflow]; + + console.log(`\n Fetching ${workflow} data...`); + + let circleRuns: CIRun[] = []; + let nxRuns: CIRun[] = []; + + if (reportOnly) { + circleRuns = dbGetRuns(db, 'circleci', workflow); + nxRuns = dbGetRuns(db, 'nx', workflow); + console.log(` CircleCI: ${circleRuns.length} runs (from cache)`); + console.log(` NX Cloud: ${nxRuns.length} runs (from cache)`); + } else { + const isWildBranch = branches.length === 0; + const excludeBranches = isWildBranch ? MEDIUM_PLUS_BRANCHES : undefined; + const branchLabel = isWildBranch + ? 'ALL branches (excluding eval/medium+)' + : branches.join(', '); + + try { + console.log(` CircleCI: querying branches ${branchLabel}...`); + circleRuns = await syncCircleCIRuns( + db, + workflow, + branches, + workflowName, + sinceMs, + excludeBranches + ); + } catch (e: any) { + console.log(` CircleCI: error — ${e.message}`); + circleRuns = dbGetRuns(db, 'circleci', workflow); + } + + try { + console.log(` NX Cloud: querying branches ${branchLabel}...`); + nxRuns = await syncNxCloudRuns( + db, + workflow, + branches, + sinceMs, + agentCreditsPerMin, + excludeBranches + ); + } catch (e: any) { + console.log(` NX Cloud: error — ${e.message}`); + nxRuns = dbGetRuns(db, 'nx', workflow); + } + } + + // Scope the report to the time window even if the DB has older cached runs. + circleRuns = circleRuns.filter((r) => new Date(r.createdAt).getTime() >= sinceMs); + nxRuns = nxRuns.filter((r) => new Date(r.createdAt).getTime() >= sinceMs); + + const circleReport = buildReport('CircleCI', workflow, branches, circleRuns); + const nxReport = buildReport('NX Cloud', workflow, branches, nxRuns); + + printReport(circleReport); + printReport(nxReport); + + if (circleRuns.length > 0 && nxRuns.length > 0) { + printComparison(circleReport, nxReport); + } + if (circleRuns.length > 0 || nxRuns.length > 0) { + printFlakeAnalysis(circleReport, nxReport); + } + + if (showRuns) { + if (circleRuns.length > 0) printRunsTable(circleReport); + if (nxRuns.length > 0) printRunsTable(nxReport); + } + } + + // Backfill per-CIPE retry stats for any NX CIPEs that pre-date this schema. + // Only hits the API for CIPEs still missing a row — idempotent on re-run. + if (!reportOnly) { + console.log(`\n Backfilling NX per-CIPE retry stats...`); + try { + await backfillNxCipeRetryStats(db); + } catch (e: any) { + console.log(` NX retry backfill: error — ${e.message}`); + } + + console.log(`\n Backfilling NX per-CIPE cache stats...`); + try { + await backfillNxCacheStats(db); + } catch (e: any) { + console.log(` NX cache backfill: error — ${e.message}`); + } + + console.log(`\n Backfilling NX per-CIPE task detail (for real cache savings)...`); + try { + await backfillNxRunTasks(db); + } catch (e: any) { + console.log(` NX run-tasks backfill: error — ${e.message}`); + } + } + + // Workspace-level flaky-task analytics (not per-workflow). Stored once per + // calendar day so re-running the sync doesn't hit the API again. + if (!reportOnly && !skipFlakyAnalytics) { + console.log(`\n Fetching NX flaky-task analytics (${flakyRange}d)...`); + try { + await syncNxFlakyAnalytics(db, flakyRange); + } catch (e: any) { + console.log(` NX flaky analytics: error — ${e.message}`); + } + } + + console.log(`\n${'═'.repeat(60)}`); + console.log(' Notes:'); + console.log(' - CircleCI credits: actual credits_used from Insights API'); + console.log( + ` - NX Cloud credits: ${process.env.NX_CLOUD_SESSION ? 'actual from dashboard API (exact)' : 'estimated from agent timing (~6% overcount)'}` + ); + console.log(' - Both platforms use identical credits/min per resource class'); + console.log(` - CircleCI cost: $${CIRCLECI_CREDIT_TO_USD}/credit (Performance plan)`); + console.log( + ` - NX Cloud cost: $${NX_CREDIT_TO_USD}/credit + ${NX_CREDITS_PER_CIPE} credits/CIPE (Enterprise plan)` + ); + console.log(` - Data cached in ${DB_PATH}`); + console.log(`${'═'.repeat(60)}\n`); + + db.close(); +} + +main().catch((err) => { + console.error('Fatal error:', err); + process.exit(1); +}); diff --git a/scripts/generate-canvas-data.ts b/scripts/generate-canvas-data.ts new file mode 100644 index 000000000000..a9cfb46072bc --- /dev/null +++ b/scripts/generate-canvas-data.ts @@ -0,0 +1,665 @@ +/** + * Regenerate the inline constants in `nx-vs-circleci-findings.canvas.tsx` from + * the SQLite cache. Canvas files must embed all data inline (no relative + * imports per the canvas SDK), so instead of importing we rewrite the + * constants in place. + * + * Usage: + * yarn jiti scripts/generate-canvas-data.ts + * yarn jiti scripts/generate-canvas-data.ts --since 2026-03-23T13:54:53Z + * + * The script: + * 1. Queries `ci-eval.db` for workflow stats, paired-commit analysis, + * auto-retry analytics, and flake leaderboards. + * 2. Serializes each result to a TypeScript object literal. + * 3. Finds `const NAME: ... = { ... };` blocks in the canvas by name and + * replaces the value (keeping the type annotation intact). + * + * If the DB is empty or missing data, the constant is left untouched. + */ + +import { readFileSync, writeFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { DatabaseSync } from 'node:sqlite'; + +const DB_PATH = join(import.meta.dirname, 'ci-eval.db'); +const CANVAS_PATH = join( + process.env.HOME!, + '.cursor/projects/Users-kasperpeulen-cursor-worktrees-storybook-jafl/canvases/nx-vs-circleci-findings.canvas.tsx' +); + +const NX_CREDITS_PER_CIPE = 500; +const NX_CREDIT_TO_USD = 0.0005; +const DOWNGRADE_TEMPLATES = new Set(['linux-browsers-js']); +const TARGET_RATE = 15; + +// ─── Argument parsing ──────────────────────────────────────────────────────── + +const args = process.argv.slice(2); +const sinceArg = args.includes('--since') + ? args[args.indexOf('--since') + 1] + : '2026-03-23T13:54:53Z'; +const flakyRange = args.includes('--flaky-range') + ? parseInt(args[args.indexOf('--flaky-range') + 1], 10) + : 30; + +// ─── DB helpers ────────────────────────────────────────────────────────────── + +type Db = InstanceType; + +function percentile(sorted: number[], p: number): number { + if (sorted.length === 0) return 0; + const idx = (p / 100) * (sorted.length - 1); + const lo = Math.floor(idx); + const hi = Math.ceil(idx); + return lo === hi ? sorted[lo] : sorted[lo] + (sorted[hi] - sorted[lo]) * (idx - lo); +} + +interface WorkflowStats { + runs: number; + passed: number; + failed: number; + flakeRate: number; + avgDurSec: number; + p50Sec: number; + p90Sec: number; + avgCost: number; + totalCost: number; + avgCostMP?: number; + totalCostMP?: number; + // NX only: CIPEs that succeeded because auto-retry rescued at least one task. + // Without retry these would have failed — so rawFailed = failed + rescuedByRetry. + rescuedByRetry?: number; + rawFailed?: number; + rawFlakeRate?: number; + // NX only: total task-level retry attempts across all CIPEs in this workflow. + // Lets us estimate retry compute cost. + taskRetries?: number; + // NX only: successful retries (sum of nx_cipe_retry_stats.successful_retries). + // Usually > rescuedByRetry because one rescued CIPE can have multiple task rescues. + successfulRetries?: number; + // NX only: estimated USD spent on retry compute in this workflow. + // Uses workspace-wide avg retry duration × agent credit rate × taskRetries. + retryCostUsd?: number; + // NX only: avg cost per run with retry compute subtracted. + avgCostWithoutRetry?: number; + // NX only: weighted avg cache hit rate across runs that have cache data + // (excluding continuous serve/registry tasks which aren't cacheable). + cacheHitRate?: number; + cacheRuns?: number; + // NX only: total USD that would have been spent without the cache. + // + // Derived by valuing each cache-hit task by the avg duration of the *same* + // task-id when it actually ran fresh (cache-miss samples from nx_run_tasks), + // then multiplying by that CIPE's credits/min. No cross-task averaging — + // each cache-hit is priced by its own task-id's fresh-run history. + costSavedByCacheUsd?: number; +} + +/** + * Companion cross-check value: cache savings sourced from NX Cloud's direct + * `ciPipelineExecution.duration.hypotheticalNoCacheMs` field. Computed + * alongside `costSavedByCacheUsd` but kept out of `WorkflowStats` because + * it's noise in the canvas — same semantic, less-precise conversion (per-CIPE + * weighted rate instead of true per-task accounting). Useful only for + * methodology sanity-checks printed at generation time. + */ +interface CacheSavingsCrossCheck { + derivedUsd: number | null; + directUsd: number | null; +} + +/** Populated as a side-effect of `computeWorkflowStats` for NX workflows. */ +const cacheCrossCheck: Record = {}; + +function computeWorkflowStats( + db: Db, + workflow: string, + system: 'circleci' | 'nx', + sinceIso: string +): WorkflowStats { + const rows = db + .prepare( + `SELECT id, status, duration_sec, credits_used, cost_usd + FROM runs WHERE workflow = ? AND system = ? AND created_at >= ? + ORDER BY created_at DESC` + ) + .all(workflow, system, sinceIso) as { + id: string; + status: string; + duration_sec: number; + credits_used: number; + cost_usd: number; + }[]; + + const passed = rows.filter((r) => r.status === 'SUCCEEDED' || r.status === 'success').length; + const failed = rows.filter((r) => r.status === 'FAILED' || r.status === 'failed').length; + const durs = rows.map((r) => r.duration_sec).sort((a, b) => a - b); + const totalCost = rows.reduce((s, r) => s + r.cost_usd, 0); + + const stats: WorkflowStats = { + runs: rows.length, + passed, + failed, + flakeRate: rows.length ? round((failed / rows.length) * 100, 1) : 0, + avgDurSec: rows.length ? Math.round(durs.reduce((a, b) => a + b, 0) / durs.length) : 0, + p50Sec: Math.round(percentile(durs, 50)), + p90Sec: Math.round(percentile(durs, 90)), + avgCost: rows.length ? round(totalCost / rows.length, 2) : 0, + totalCost: round(totalCost, 2), + }; + + if (system === 'nx' && rows.length > 0) { + let totalMP = 0; + const tplStmt = db.prepare( + 'SELECT template, credits, credit_multiplier FROM nx_template_credits WHERE run_id = ?' + ); + for (const r of rows) { + const tpls = tplStmt.all(r.id) as { + template: string; + credits: number; + credit_multiplier: number; + }[]; + if (tpls.length === 0) { + totalMP += r.cost_usd; + } else { + let credits = NX_CREDITS_PER_CIPE; + for (const t of tpls) { + if (DOWNGRADE_TEMPLATES.has(t.template) && t.credit_multiplier > TARGET_RATE) { + credits += (t.credits * TARGET_RATE) / t.credit_multiplier; + } else { + credits += t.credits; + } + } + totalMP += credits * NX_CREDIT_TO_USD; + } + } + stats.avgCostMP = round(totalMP / rows.length, 2); + stats.totalCostMP = round(totalMP, 2); + + // Exact without-retry failure count: count SUCCEEDED CIPEs whose retry stats + // show at least one successful task-retry. Those CIPEs would have failed + // had retry been disabled. + const rescued = db + .prepare( + `SELECT COUNT(*) AS n FROM runs r + JOIN nx_cipe_retry_stats s ON s.run_id = r.id + WHERE r.system = 'nx' AND r.workflow = ? AND r.created_at >= ? + AND r.status = 'SUCCEEDED' AND s.successful_retries > 0` + ) + .get(workflow, sinceIso) as { n: number }; + + const retryTotals = db + .prepare( + `SELECT + SUM(s.total_task_retries) AS task_retries, + SUM(s.successful_retries) AS succ_retries + FROM runs r JOIN nx_cipe_retry_stats s ON s.run_id = r.id + WHERE r.system = 'nx' AND r.workflow = ? AND r.created_at >= ?` + ) + .get(workflow, sinceIso) as { task_retries: number | null; succ_retries: number | null }; + + stats.rescuedByRetry = rescued.n; + stats.rawFailed = stats.failed + rescued.n; + stats.rawFlakeRate = rows.length ? round((stats.rawFailed / rows.length) * 100, 1) : 0; + stats.taskRetries = retryTotals.task_retries ?? 0; + stats.successfulRetries = retryTotals.succ_retries ?? 0; + + // Estimate retry compute cost: task_retries * avg retry duration * credit rate. + // Workspace-wide weighted avg retry duration ≈ 146s/retry (total retry seconds + // / total retries from flaky-task analytics). PR/merged branches use the + // default 60 credits/min on both linux-js and linux-browsers-js. + const AVG_RETRY_SEC = 146; + const AVG_CREDITS_PER_MIN = 60; + const retryCredits = (stats.taskRetries * AVG_RETRY_SEC * AVG_CREDITS_PER_MIN) / 60 + 0; // minutes * credits/min + stats.retryCostUsd = round(retryCredits * NX_CREDIT_TO_USD, 2); + stats.avgCostWithoutRetry = round(stats.avgCost - stats.retryCostUsd / rows.length, 2); + + // Cache hit rate (unchanged — still from nx_cache_stats, one row per CIPE). + const cacheAgg = db + .prepare( + `SELECT + COUNT(DISTINCT s.run_id) AS runs, + SUM(s.cache_hits) AS hits, + SUM(s.cache_misses) AS misses + FROM runs r JOIN nx_cache_stats s ON s.run_id = r.id + WHERE r.system = 'nx' AND r.workflow = ? AND r.created_at >= ?` + ) + .get(workflow, sinceIso) as { + runs: number; + hits: number | null; + misses: number | null; + }; + + if (cacheAgg.runs > 0) { + const hits = cacheAgg.hits ?? 0; + const misses = cacheAgg.misses ?? 0; + stats.cacheRuns = cacheAgg.runs; + stats.cacheHitRate = hits + misses > 0 ? round((hits / (hits + misses)) * 100, 1) : 0; + } + + // Real per-CIPE cache savings, derived from per-task data in nx_run_tasks. + // + // Each cache-hit task is priced as if it had run fresh: avg duration of + // *the same task-id* across cache-miss observations × this CIPE's actual + // credits/min. No uniform-per-CIPE averaging — heavy cache-hit tasks + // (sandbox/build/e2e) are correctly valued by their own fresh-run + // duration, not blended with light misses (fmt/check). + // + // IMPORTANT: the miss profile is scoped to the *same workflow* as the + // cache-hit. Measurements show test-runner, sandbox, etc. have + // materially different fresh-run durations on `next:merged` vs + // `normal:prs` (test-runner 89s vs 124s), so a global miss average + // produces a ~45% over-count on next:merged and a much larger gap vs + // NX's own `hypotheticalNoCacheMs` field. + // + // Fallback chain for the fresh-run duration: + // 1. miss-avg for this task-id within THIS workflow + // 2. miss-avg for this task-id across ALL workflows + // 3. miss-avg for this target within THIS workflow + // 4. miss-avg for this target across ALL workflows + // 5. 0 (rare — task-ids never observed missing anywhere) + const savedRow = db + .prepare( + `WITH miss_task_wf AS ( + SELECT r.workflow, t.task_id, AVG(t.duration_ms) AS avg_ms + FROM runs r JOIN nx_run_tasks t ON t.run_id = r.id + WHERE r.system = 'nx' AND t.cache_status = 'cache-miss' + GROUP BY r.workflow, t.task_id + ), + miss_task_global AS ( + SELECT task_id, AVG(duration_ms) AS avg_ms + FROM nx_run_tasks WHERE cache_status = 'cache-miss' + GROUP BY task_id + ), + miss_target_wf AS ( + SELECT r.workflow, t.target, AVG(t.duration_ms) AS avg_ms + FROM runs r JOIN nx_run_tasks t ON t.run_id = r.id + WHERE r.system = 'nx' AND t.cache_status = 'cache-miss' + GROUP BY r.workflow, t.target + ), + miss_target_global AS ( + SELECT target, AVG(duration_ms) AS avg_ms + FROM nx_run_tasks WHERE cache_status = 'cache-miss' + GROUP BY target + ) + SELECT + SUM( + CASE WHEN t.cache_status LIKE '%cache-hit%' THEN + (COALESCE( + mtw.avg_ms, mtg.avg_ms, + mgw.avg_ms, mgg.avg_ms, + 0 + ) / 60000.0) + * t.credits_per_min + * ${NX_CREDIT_TO_USD} + ELSE 0 END + ) AS saved_usd, + COUNT(DISTINCT t.run_id) AS runs_with_tasks + FROM runs r + JOIN nx_run_tasks t ON t.run_id = r.id + LEFT JOIN miss_task_wf mtw ON mtw.workflow = r.workflow AND mtw.task_id = t.task_id + LEFT JOIN miss_task_global mtg ON mtg.task_id = t.task_id + LEFT JOIN miss_target_wf mgw ON mgw.workflow = r.workflow AND mgw.target = t.target + LEFT JOIN miss_target_global mgg ON mgg.target = t.target + WHERE r.system = 'nx' AND r.workflow = ? AND r.created_at >= ?` + ) + .get(workflow, sinceIso) as { + saved_usd: number | null; + runs_with_tasks: number | null; + }; + + const derivedUsd = + (savedRow.runs_with_tasks ?? 0) > 0 ? round(savedRow.saved_usd ?? 0, 2) : null; + if (derivedUsd != null) stats.costSavedByCacheUsd = derivedUsd; + + // Cross-check against NX Cloud's direct + // `ciPipelineExecution.duration.hypotheticalNoCacheMs` field. This is a + // DURATION (total task-ms saved), so we convert to credits via a + // per-CIPE weighted rate from nx_run_tasks cache-miss samples. Falls + // back to 60 credits/min (extra_large+) if no miss samples exist. + const directRow = db + .prepare( + `WITH cipe_rate AS ( + SELECT run_id, + CAST(SUM(duration_ms * credits_per_min) AS REAL) + / NULLIF(SUM(duration_ms), 0) AS weighted_rate + FROM nx_run_tasks + WHERE cache_status = 'cache-miss' + GROUP BY run_id + ) + SELECT SUM( + s.hypothetical_no_cache_ms / 60000.0 + * COALESCE(cr.weighted_rate, 60) + * ${NX_CREDIT_TO_USD} + ) AS saved_usd + FROM runs r + JOIN nx_cipe_retry_stats s ON s.run_id = r.id + LEFT JOIN cipe_rate cr ON cr.run_id = r.id + WHERE r.system = 'nx' AND r.workflow = ? AND r.created_at >= ? + AND s.hypothetical_no_cache_ms IS NOT NULL` + ) + .get(workflow, sinceIso) as { saved_usd: number | null }; + + const directUsd = (directRow.saved_usd ?? 0) > 0 ? round(directRow.saved_usd ?? 0, 2) : null; + + cacheCrossCheck[workflow] = { derivedUsd, directUsd }; + } + + return stats; +} + +interface PairedAnalysis { + commits: number; + bothPassed: number; + bothFailed: number; + onlyCCIFailed: number; + onlyNXFailed: number; +} + +function computePaired(db: Db, workflow: string, sinceIso: string): PairedAnalysis { + const rows = db + .prepare( + `SELECT DISTINCT c.commit_sha AS sha, c.status AS cst, n.status AS nst + FROM runs c JOIN runs n ON c.commit_sha = n.commit_sha + WHERE c.workflow = ? AND c.system = 'circleci' AND c.created_at >= ? + AND n.workflow = ? AND n.system = 'nx' AND n.created_at >= ? + AND c.commit_sha IS NOT NULL` + ) + .all(workflow, sinceIso, workflow, sinceIso) as { sha: string; cst: string; nst: string }[]; + + // Dedupe by commit (take first hit per commit). + const seen = new Set(); + const unique = rows.filter((r) => { + if (seen.has(r.sha)) return false; + seen.add(r.sha); + return true; + }); + + return { + commits: unique.length, + bothPassed: unique.filter((r) => r.cst === 'success' && r.nst === 'SUCCEEDED').length, + bothFailed: unique.filter((r) => r.cst === 'failed' && r.nst === 'FAILED').length, + onlyCCIFailed: unique.filter((r) => r.cst === 'failed' && r.nst === 'SUCCEEDED').length, + onlyNXFailed: unique.filter((r) => r.cst === 'success' && r.nst === 'FAILED').length, + }; +} + +interface RetryData { + totalReruns: number; + totalRescues: number; + retryTimeMinutes: number; + activeFlakyTasks: number; + topTasks: { task: string; reruns: number; rescues: number; retryMin: number }[]; +} + +function computeRetryData(db: Db, rangeDays: number): RetryData | null { + const kpi = db + .prepare( + `SELECT * FROM nx_flaky_task_kpis WHERE range_days = ? ORDER BY snapshot_date DESC LIMIT 1` + ) + .get(rangeDays) as + | { + snapshot_date: string; + total_reruns: number; + total_rescues: number; + retry_time_seconds: number; + active_flaky_tasks: number; + } + | undefined; + + if (!kpi) return null; + + const tasks = db + .prepare( + `SELECT project, target, total_reruns, total_rescues, retry_time_seconds + FROM nx_flaky_task_snapshots + WHERE snapshot_date = ? AND range_days = ? + ORDER BY total_reruns DESC` + ) + .all(kpi.snapshot_date, rangeDays) as { + project: string; + target: string; + total_reruns: number; + total_rescues: number; + retry_time_seconds: number; + }[]; + + return { + totalReruns: kpi.total_reruns, + totalRescues: kpi.total_rescues, + retryTimeMinutes: Math.round(kpi.retry_time_seconds / 60), + activeFlakyTasks: kpi.active_flaky_tasks, + topTasks: tasks.map((t) => ({ + task: `${t.project}:${t.target}`, + reruns: t.total_reruns, + rescues: t.total_rescues, + retryMin: Math.round(t.retry_time_seconds / 60), + })), + }; +} + +function computeTopFlaky( + db: Db, + workflow: string, + system: 'circleci' | 'nx', + sinceIso: string, + limit = 10 +): { task: string; fails: number }[] { + const rows = db + .prepare( + `SELECT ft.task_name AS task, COUNT(*) AS fails + FROM failed_tasks ft JOIN runs r ON ft.run_id = r.id + WHERE r.workflow = ? AND r.system = ? AND r.created_at >= ? + GROUP BY ft.task_name + ORDER BY fails DESC, task ASC + LIMIT ?` + ) + .all(workflow, system, sinceIso, limit) as { task: string; fails: number }[]; + return rows; +} + +function computeCCIOnlyJobs( + db: Db, + workflow: string, + sinceIso: string +): { job: string; fails: number; reason: string }[] { + const rows = db + .prepare( + `SELECT ft.task_name AS task, COUNT(*) AS fails + FROM failed_tasks ft JOIN runs r ON ft.run_id = r.id + WHERE r.workflow = ? AND r.system = 'circleci' AND r.created_at >= ? + GROUP BY ft.task_name` + ) + .all(workflow, sinceIso) as { task: string; fails: number }[]; + + const buckets: Record = { + '*---chromatic (per-sandbox)': { fails: 0, reason: 'Chromatic disabled on NX side' }, + 'benchmark-packages': { fails: 0, reason: 'Benchmark target disabled on NX' }, + 'eslint---oxfmt-validation / ---prettier-validation': { + fails: 0, + reason: 'CCI-specific eslint sub-jobs', + }, + }; + + for (const r of rows) { + if (r.task.includes('chromatic')) buckets['*---chromatic (per-sandbox)'].fails += r.fails; + else if (r.task === 'benchmark-packages') buckets['benchmark-packages'].fails += r.fails; + else if (r.task.startsWith('eslint---')) + buckets['eslint---oxfmt-validation / ---prettier-validation'].fails += r.fails; + } + + return Object.entries(buckets) + .filter(([, v]) => v.fails > 0) + .map(([job, { fails, reason }]) => ({ job, fails, reason })); +} + +function round(n: number, digits = 2): number { + const m = Math.pow(10, digits); + return Math.round(n * m) / m; +} + +// ─── Object literal serialization ──────────────────────────────────────────── + +/** Serialize a JS value to a TypeScript object literal with stable key order. */ +function serialize(value: unknown, indent = 0): string { + const pad = ' '.repeat(indent); + const innerPad = ' '.repeat(indent + 1); + + if (value === null) return 'null'; + if (typeof value === 'string') return JSON.stringify(value); + if (typeof value === 'number' || typeof value === 'boolean') return String(value); + if (Array.isArray(value)) { + if (value.length === 0) return '[]'; + const inner = value.map((v) => innerPad + serialize(v, indent + 1)).join(',\n'); + return `[\n${inner},\n${pad}]`; + } + if (typeof value === 'object') { + const entries = Object.entries(value as Record); + if (entries.length === 0) return '{}'; + const inner = entries + .map(([k, v]) => `${innerPad}${k}: ${serialize(v, indent + 1)}`) + .join(',\n'); + return `{\n${inner},\n${pad}}`; + } + return 'undefined'; +} + +// ─── Canvas rewrite ────────────────────────────────────────────────────────── + +/** + * Replace the value of `const NAME: ... = ;` in `src`. Matches the + * first `{` or `[` after `= ` and finds its balanced close while ignoring + * characters inside string literals. Returns unchanged `src` if `NAME` is + * not found. + */ +function replaceConstValue(src: string, name: string, newLiteral: string): string { + // Match both `const NAME:` (typed) and `const NAME =` (inferred). + const typedIdx = src.indexOf(`const ${name}:`); + const inferredIdx = src.indexOf(`const ${name} =`); + const declIdx = + typedIdx !== -1 && (inferredIdx === -1 || typedIdx < inferredIdx) ? typedIdx : inferredIdx; + if (declIdx === -1) { + console.warn(` ⚠ const ${name} not found — skipped`); + return src; + } + + // Find `= ` after the declaration. + const eqIdx = src.indexOf('=', declIdx); + if (eqIdx === -1) return src; + + // Find the first `{` or `[` after `=`. + let valueStart = eqIdx + 1; + while (valueStart < src.length && src[valueStart] !== '{' && src[valueStart] !== '[') { + valueStart++; + } + if (valueStart >= src.length) return src; + + const open = src[valueStart]; + const close = open === '{' ? '}' : ']'; + + // Walk forward matching balanced braces, skipping string literals. + let depth = 1; + let i = valueStart + 1; + let inString: string | null = null; + while (i < src.length && depth > 0) { + const c = src[i]; + if (inString) { + if (c === '\\') { + i += 2; + continue; + } + if (c === inString) inString = null; + } else { + if (c === '"' || c === "'" || c === '`') inString = c; + else if (c === open) depth++; + else if (c === close) depth--; + } + i++; + } + if (depth !== 0) throw new Error(`unbalanced braces after const ${name}`); + + const valueEnd = i; // position just past the closing brace + return src.slice(0, valueStart) + newLiteral + src.slice(valueEnd); +} + +// ─── Main ──────────────────────────────────────────────────────────────────── + +function main() { + const db = new DatabaseSync(DB_PATH); + + console.log(`Reading from ${DB_PATH}`); + console.log(`Since: ${sinceArg}`); + console.log(`Flaky range: ${flakyRange}d\n`); + + const nextMerged = { + circleci: computeWorkflowStats(db, 'next:merged', 'circleci', sinceArg), + nx: computeWorkflowStats(db, 'next:merged', 'nx', sinceArg), + }; + const normalPrs = { + circleci: computeWorkflowStats(db, 'normal:prs', 'circleci', sinceArg), + nx: computeWorkflowStats(db, 'normal:prs', 'nx', sinceArg), + }; + const pairedNext = computePaired(db, 'next:merged', sinceArg); + const pairedPrs = computePaired(db, 'normal:prs', sinceArg); + const retryData = computeRetryData(db, flakyRange); + const topFlakyPrs = { + circleci: computeTopFlaky(db, 'normal:prs', 'circleci', sinceArg), + nx: computeTopFlaky(db, 'normal:prs', 'nx', sinceArg), + }; + const cciOnlyJobs = computeCCIOnlyJobs(db, 'normal:prs', sinceArg); + + console.log(`Computed:`); + console.log( + ` next:merged — CCI ${nextMerged.circleci.runs} runs, NX ${nextMerged.nx.runs} runs` + ); + console.log(` normal:prs — CCI ${normalPrs.circleci.runs} runs, NX ${normalPrs.nx.runs} runs`); + console.log(` paired — ${pairedPrs.commits} on PRs, ${pairedNext.commits} on next:merged`); + console.log( + ` retry data — ${retryData ? `${retryData.totalRescues}/${retryData.totalReruns} rescues` : 'no snapshot found'}` + ); + console.log( + ` top flaky — ${topFlakyPrs.circleci.length} CCI tasks, ${topFlakyPrs.nx.length} NX tasks` + ); + console.log(` CCI-only — ${cciOnlyJobs.length} job categories\n`); + + // Cross-check real (derived) vs direct cache savings. Derived value is + // what ships into the canvas; the direct `hypotheticalNoCacheMs` value is + // shown for methodology sanity only. + console.log(`Cache savings cross-check (derived vs direct NX field):`); + for (const workflow of ['next:merged', 'normal:prs']) { + const { derivedUsd: d, directUsd: x } = cacheCrossCheck[workflow] ?? {}; + if (d == null || x == null) { + console.log( + ` ${workflow.padEnd(12)} — ${d == null ? 'no derived' : 'no direct'} value (backfill may be incomplete)` + ); + continue; + } + const pct = (Math.abs(x - d) / Math.max(d, 1)) * 100; + const flag = pct > 10 ? ' ⚠ delta >10%' : ''; + console.log( + ` ${workflow.padEnd(12)} derived $${d.toFixed(2)} direct $${x.toFixed(2)} Δ${pct.toFixed(1)}%${flag}` + ); + } + console.log(''); + + db.close(); + + console.log(`Rewriting ${CANVAS_PATH}`); + let canvas = readFileSync(CANVAS_PATH, 'utf-8'); + + canvas = replaceConstValue(canvas, 'NEXT_MERGED', serialize(nextMerged)); + canvas = replaceConstValue(canvas, 'NORMAL_PRS', serialize(normalPrs)); + canvas = replaceConstValue(canvas, 'PAIRED_NEXT', serialize(pairedNext)); + canvas = replaceConstValue(canvas, 'PAIRED_PRS', serialize(pairedPrs)); + if (retryData) { + canvas = replaceConstValue(canvas, 'NX_AUTORETRIES_30D', serialize(retryData)); + } + canvas = replaceConstValue(canvas, 'TOP_FLAKY_PRS', serialize(topFlakyPrs)); + canvas = replaceConstValue(canvas, 'CCI_ONLY_JOBS', serialize(cciOnlyJobs)); + + writeFileSync(CANVAS_PATH, canvas); + console.log('Done.'); +} + +main(); diff --git a/scripts/investigate-nx-cache.ts b/scripts/investigate-nx-cache.ts new file mode 100644 index 000000000000..4ce36c01877f --- /dev/null +++ b/scripts/investigate-nx-cache.ts @@ -0,0 +1,217 @@ +/** + * Investigation script (read-only): dumps the full /cipes/{id}/analysis response + * and /runs/{runId} response for a sample NX CIPE so we can see whether NX + * exposes a direct "cache savings" field anywhere. + * + * Usage: + * NX_CLOUD_SESSION=... yarn jiti scripts/investigate-nx-cache.ts [cipeId] + * + * Prints the full JSON tree, highlighting any keys that look cache-related. + */ +import { readFileSync } from 'node:fs'; +import { join } from 'node:path'; +// @ts-expect-error - no type declarations for ini +import { parse as parseIni } from 'ini'; + +const NX_CLOUD_URL = 'https://cloud.nx.app'; +const NX_CLOUD_ID = '6929fbef73e98d8094d2a343'; + +function getNxCloudHeaders(): Record { + const headers: Record = { + 'Content-Type': 'application/json', + 'Nx-Cloud-Id': NX_CLOUD_ID, + }; + const accessToken = process.env.NX_CLOUD_ACCESS_TOKEN; + if (accessToken) headers['Authorization'] = accessToken; + + let pat = process.env.NX_CLOUD_PAT; + if (!pat) { + try { + const iniPath = process.env.XDG_CONFIG_HOME + ? join(process.env.XDG_CONFIG_HOME, 'nxcloud', 'nxcloud.ini') + : join(process.env.HOME!, '.config', 'nxcloud', 'nxcloud.ini'); + const ini = parseIni(readFileSync(iniPath, 'utf-8')); + pat = + ini?.[NX_CLOUD_URL]?.personalAccessToken ?? + ini?.['https://cloud.nx.app']?.personalAccessToken; + } catch {} + } + if (pat) headers['Nx-Cloud-Personal-Access-Token'] = pat; + + return headers; +} + +/** Walk object, return list of paths whose key OR value references 'cache' or 'saving'. */ +function findCacheKeys(obj: unknown, path: string[] = [], out: string[] = []): string[] { + if (obj === null || obj === undefined) return out; + if (typeof obj !== 'object') return out; + if (Array.isArray(obj)) { + // Sample first element only. + if (obj.length > 0) findCacheKeys(obj[0], [...path, '[0]'], out); + return out; + } + for (const [k, v] of Object.entries(obj as Record)) { + const lk = k.toLowerCase(); + if ( + lk.includes('cache') || + lk.includes('saving') || + lk.includes('saved') || + lk.includes('ttg') + ) { + const shown = + typeof v === 'object' ? `<${Array.isArray(v) ? 'array' : 'object'}>` : JSON.stringify(v); + out.push(`${[...path, k].join('.')} = ${shown}`); + } + findCacheKeys(v, [...path, k], out); + } + return out; +} + +async function main() { + const session = process.env.NX_CLOUD_SESSION; + if (!session) { + console.error('NX_CLOUD_SESSION not set'); + process.exit(1); + } + + const cipeId = process.argv[2] ?? '69df84f0e9d67243b3604a31'; + console.log(`Investigating CIPE ${cipeId}`); + + // First, find the runGroupName for this CIPE. + const details = await fetch( + `${NX_CLOUD_URL}/nx-cloud/mcp-context/pipeline-executions/${cipeId}`, + { + headers: getNxCloudHeaders(), + } + ).then((r) => r.json()); + const rg = details?.runGroups?.[0]?.runGroupName; + console.log(` runGroup: ${rg}`); + console.log(` CIPE keys: ${Object.keys(details).join(', ')}`); + + // Analysis endpoint. + console.log(`\n─── /cipes/${cipeId}/analysis ───`); + const analysisUrl = `${NX_CLOUD_URL}/cipes/${cipeId}/analysis?runGroup=${encodeURIComponent(rg)}&_data=routes%2F_auth.cipes.%24cipeId.analysis`; + const analysisRes = await fetch(analysisUrl, { + headers: { Cookie: `_nxCloudSession=${session}` }, + }); + if (!analysisRes.ok) { + console.log(` HTTP ${analysisRes.status}`); + process.exit(1); + } + const analysis = await analysisRes.json(); + console.log(` top-level keys: ${Object.keys(analysis).join(', ')}`); + if (analysis?.ciPipelineExecution) { + console.log( + ` ciPipelineExecution keys: ${Object.keys(analysis.ciPipelineExecution).join(', ')}` + ); + } + if (analysis?.ciPipelineExecution?.ttgImpactMetadata) { + console.log( + ` ttgImpactMetadata keys: ${Object.keys(analysis.ciPipelineExecution.ttgImpactMetadata).join(', ')}` + ); + console.log(` ttgImpactMetadata:`); + console.log( + JSON.stringify(analysis.ciPipelineExecution.ttgImpactMetadata, null, 2).slice(0, 4000) + ); + } + if (analysis?.ciPipelineExecution?.duration) { + console.log(`\n duration: ${JSON.stringify(analysis.ciPipelineExecution.duration, null, 2)}`); + } + if (analysis?.ciPipelineExecution?.runGroups) { + const rgs = analysis.ciPipelineExecution.runGroups; + if (Array.isArray(rgs) && rgs[0]) { + console.log(`\n runGroups[0] keys: ${Object.keys(rgs[0]).join(', ')}`); + if (rgs[0].duration) { + console.log(` runGroups[0].duration: ${JSON.stringify(rgs[0].duration, null, 2)}`); + } + } + } + if (analysis?.computeCreditUsages) { + console.log(` computeCreditUsages: ${JSON.stringify(analysis.computeCreditUsages)}`); + } + if (analysis?.resourceClasses) { + console.log(` resourceClasses: ${JSON.stringify(analysis.resourceClasses)}`); + } + + console.log(`\n ─── cache-related keys anywhere in /cipes/${cipeId}/analysis ───`); + for (const line of findCacheKeys(analysis)) { + console.log(` ${line}`); + } + + // Runs search + detail. + const runSearch = await fetch(`${NX_CLOUD_URL}/nx-cloud/mcp-context/runs/search`, { + method: 'POST', + headers: getNxCloudHeaders(), + body: JSON.stringify({ pipelineExecutionId: cipeId, limit: 5 }), + }).then((r) => r.json()); + + const mainRun = runSearch.items?.[0]; + if (!mainRun) { + console.log('\n no runs found'); + return; + } + console.log(`\n─── /runs/${mainRun.id} ───`); + console.log(` run keys (from search): ${Object.keys(mainRun).join(', ')}`); + + const runDetail = await fetch(`${NX_CLOUD_URL}/nx-cloud/mcp-context/runs/${mainRun.id}`, { + headers: getNxCloudHeaders(), + }).then((r) => r.json()); + console.log(` run detail top-level keys: ${Object.keys(runDetail).join(', ')}`); + if (Array.isArray(runDetail?.tasks) && runDetail.tasks[0]) { + console.log(` tasks[0] keys: ${Object.keys(runDetail.tasks[0]).join(', ')}`); + console.log(` tasks[0]: ${JSON.stringify(runDetail.tasks[0], null, 2)}`); + console.log(` total tasks: ${runDetail.tasks.length}`); + const cacheHitCount = runDetail.tasks.filter((t: any) => + (t.cacheStatus ?? '').includes('cache-hit') + ).length; + const cacheMissCount = runDetail.tasks.filter( + (t: any) => t.cacheStatus === 'cache-miss' + ).length; + console.log(` hits: ${cacheHitCount}, misses: ${cacheMissCount}`); + + const statuses: Record = {}; + for (const t of runDetail.tasks) { + const cs = t.cacheStatus ?? '(none)'; + statuses[cs] = (statuses[cs] ?? 0) + 1; + } + console.log(` cacheStatus distribution: ${JSON.stringify(statuses)}`); + + // Compare durations for cache-hit vs cache-miss. + const hits = runDetail.tasks.filter((t: any) => (t.cacheStatus ?? '').includes('cache-hit')); + const misses = runDetail.tasks.filter((t: any) => t.cacheStatus === 'cache-miss'); + const hitsMs = hits.reduce((s: number, t: any) => s + (t.durationMs ?? 0), 0); + const missMs = misses.reduce((s: number, t: any) => s + (t.durationMs ?? 0), 0); + console.log( + ` sum(durationMs) for hits: ${hitsMs} (avg ${Math.round(hitsMs / Math.max(hits.length, 1))}ms)` + ); + console.log( + ` sum(durationMs) for misses: ${missMs} (avg ${Math.round(missMs / Math.max(misses.length, 1))}ms)` + ); + console.log(` sample hit task: ${JSON.stringify(hits[0] ?? null, null, 2)}`); + } + + console.log(`\n ─── cache-related keys anywhere in /runs/${mainRun.id} ───`); + for (const line of findCacheKeys(runDetail)) { + console.log(` ${line}`); + } + + // Dashboard /cipes/{id} page route (sometimes has extra summary data). + console.log(`\n─── /cipes/${cipeId}?_data=routes/_auth.cipes.$cipeId ───`); + const summaryUrl = `${NX_CLOUD_URL}/cipes/${cipeId}?_data=routes%2F_auth.cipes.%24cipeId`; + const summaryRes = await fetch(summaryUrl, { headers: { Cookie: `_nxCloudSession=${session}` } }); + if (summaryRes.ok) { + const summary = await summaryRes.json(); + console.log(` top-level keys: ${Object.keys(summary).join(', ')}`); + console.log(`\n ─── cache-related keys anywhere in /cipes/${cipeId} ───`); + for (const line of findCacheKeys(summary)) { + console.log(` ${line}`); + } + } else { + console.log(` HTTP ${summaryRes.status}`); + } +} + +main().catch((e) => { + console.error('Fatal:', e); + process.exit(1); +}); diff --git a/scripts/run-backfill-only.ts b/scripts/run-backfill-only.ts new file mode 100644 index 000000000000..c0c64bfb3e99 --- /dev/null +++ b/scripts/run-backfill-only.ts @@ -0,0 +1,288 @@ +/** + * One-off: run just the new backfills without a full re-sync. + * Runs: + * 1. backfillNxCipeRetryStats (picks up hypothetical_no_cache_ms on pre-existing rows) + * 2. backfillNxRunTasks (populates the new nx_run_tasks table) + * + * Safe to re-run — both backfills are idempotent. + * + * Usage: + * NX_CLOUD_SESSION=... yarn jiti scripts/run-backfill-only.ts + */ +import { join } from 'node:path'; +import { DatabaseSync } from 'node:sqlite'; +import { readFileSync } from 'node:fs'; +// @ts-expect-error - no type declarations for ini +import { parse as parseIni } from 'ini'; + +const NX_CLOUD_URL = 'https://cloud.nx.app'; +const NX_CLOUD_ID = '6929fbef73e98d8094d2a343'; +const DB_PATH = join(import.meta.dirname, 'ci-eval.db'); + +function getNxCloudHeaders(): Record { + const headers: Record = { + 'Content-Type': 'application/json', + 'Nx-Cloud-Id': NX_CLOUD_ID, + }; + const accessToken = process.env.NX_CLOUD_ACCESS_TOKEN; + if (accessToken) headers['Authorization'] = accessToken; + + let pat = process.env.NX_CLOUD_PAT; + if (!pat) { + try { + const iniPath = process.env.XDG_CONFIG_HOME + ? join(process.env.XDG_CONFIG_HOME, 'nxcloud', 'nxcloud.ini') + : join(process.env.HOME!, '.config', 'nxcloud', 'nxcloud.ini'); + const ini = parseIni(readFileSync(iniPath, 'utf-8')); + pat = + ini?.[NX_CLOUD_URL]?.personalAccessToken ?? + ini?.['https://cloud.nx.app']?.personalAccessToken; + } catch {} + } + if (pat) headers['Nx-Cloud-Personal-Access-Token'] = pat; + + return headers; +} + +async function nxCloudFetch(path: string, body?: unknown): Promise { + const res = await fetch(`${NX_CLOUD_URL}/nx-cloud/mcp-context${path}`, { + method: body ? 'POST' : 'GET', + headers: getNxCloudHeaders(), + body: body ? JSON.stringify(body) : undefined, + }); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + return res.json(); +} + +interface NxPipelineExecution { + runGroups: { runGroupName: string }[]; +} + +async function fetchNxDashboardCipeAnalysis( + cipeId: string, + runGroupName: string +): Promise<{ + retryStats: { + totalTasks: number; + totalTaskRetries: number; + successfulRetries: number; + failedRetries: number; + hypotheticalNoCacheMs: number | null; + } | null; +}> { + const session = process.env.NX_CLOUD_SESSION; + if (!session) return { retryStats: null }; + try { + const url = `${NX_CLOUD_URL}/cipes/${cipeId}/analysis?runGroup=${encodeURIComponent(runGroupName)}&_data=routes%2F_auth.cipes.%24cipeId.analysis`; + const res = await fetch(url, { headers: { Cookie: `_nxCloudSession=${session}` } }); + if (!res.ok) return { retryStats: null }; + const data = await res.json(); + const retryRaw = data?.ciPipelineExecution?.ttgImpactMetadata?.taskRetryStats; + const hypoMs = data?.ciPipelineExecution?.duration?.hypotheticalNoCacheMs; + const hypothetical = + typeof hypoMs === 'number' && Number.isFinite(hypoMs) ? Math.round(hypoMs) : null; + if (!retryRaw) return { retryStats: null }; + return { + retryStats: { + totalTasks: retryRaw.totalTasks ?? 0, + totalTaskRetries: retryRaw.totalTaskRetries ?? 0, + successfulRetries: retryRaw.successfulRetries ?? 0, + failedRetries: retryRaw.failedRetries ?? 0, + hypotheticalNoCacheMs: hypothetical, + }, + }; + } catch { + return { retryStats: null }; + } +} + +const LINUX_JS_TARGETS = new Set(['compile', 'check', 'lint', 'knip', 'fmt']); +function inferAgentTemplate(target: string): 'linux-js' | 'linux-browsers-js' { + return LINUX_JS_TARGETS.has(target) ? 'linux-js' : 'linux-browsers-js'; +} + +async function backfillNxCipeRetryStats(db: InstanceType) { + if (!process.env.NX_CLOUD_SESSION) { + console.log(`NX retry backfill: skipped (NX_CLOUD_SESSION not set)`); + return; + } + const missing = db + .prepare( + `SELECT r.id FROM runs r + LEFT JOIN nx_cipe_retry_stats s ON s.run_id = r.id + WHERE r.system = 'nx' + AND (s.run_id IS NULL OR s.hypothetical_no_cache_ms IS NULL) + ORDER BY r.created_at DESC` + ) + .all() as { id: string }[]; + if (missing.length === 0) { + console.log(`NX retry backfill: up to date`); + return; + } + console.log(`NX retry backfill: fetching for ${missing.length} CIPEs...`); + + const upsert = db.prepare( + `INSERT OR REPLACE INTO nx_cipe_retry_stats + (run_id, total_tasks, total_task_retries, successful_retries, failed_retries, hypothetical_no_cache_ms) + VALUES (?, ?, ?, ?, ?, ?)` + ); + + let stored = 0, + apiFailures = 0; + const startTime = Date.now(); + for (let i = 0; i < missing.length; i++) { + const { id } = missing[i]; + try { + const details = await nxCloudFetch(`/pipeline-executions/${id}`); + const rg = details.runGroups[0]?.runGroupName; + if (!rg) continue; + const { retryStats: rs } = await fetchNxDashboardCipeAnalysis(id, rg); + if (!rs) { + apiFailures++; + continue; + } + upsert.run( + id, + rs.totalTasks, + rs.totalTaskRetries, + rs.successfulRetries, + rs.failedRetries, + rs.hypotheticalNoCacheMs + ); + stored++; + } catch { + apiFailures++; + } + if (i % 25 === 24) { + const rate = ((i + 1) / ((Date.now() - startTime) / 1000)).toFixed(1); + console.log(` ${i + 1}/${missing.length} (${rate} CIPEs/sec)`); + } + } + console.log( + `NX retry backfill: stored ${stored} / ${missing.length}` + + (apiFailures > 0 ? ` (${apiFailures} API failures)` : '') + ); +} + +async function backfillNxRunTasks(db: InstanceType) { + const missing = db + .prepare( + `SELECT r.id FROM runs r + WHERE r.system = 'nx' + AND NOT EXISTS (SELECT 1 FROM nx_run_tasks t WHERE t.run_id = r.id) + ORDER BY r.created_at DESC` + ) + .all() as { id: string }[]; + + if (missing.length === 0) { + console.log(`NX run-tasks backfill: up to date`); + return; + } + + console.log(`NX run-tasks backfill: fetching for ${missing.length} CIPEs...`); + + const rateStmt = db.prepare( + `SELECT template, credit_multiplier FROM nx_template_credits WHERE run_id = ?` + ); + + const insert = db.prepare( + `INSERT OR IGNORE INTO nx_run_tasks + (run_id, task_id, project, target, duration_ms, cache_status, agent_template, credits_per_min) + VALUES (?, ?, ?, ?, ?, ?, ?, ?)` + ); + + let stored = 0, + apiFailures = 0, + skipped = 0, + totalTaskRows = 0; + const startTime = Date.now(); + + for (let i = 0; i < missing.length; i++) { + const { id } = missing[i]; + try { + const runSearch = await nxCloudFetch<{ items: { id: string; command?: string }[] }>( + '/runs/search', + { pipelineExecutionId: id, limit: 5 } + ); + const mainRun = runSearch.items[0]; + if (!mainRun) { + skipped++; + continue; + } + const detail = await nxCloudFetch<{ + tasks: { + taskId: string; + projectName: string; + target: string; + durationMs: number; + status: string; + cacheStatus: string; + isCacheable: boolean; + }[]; + }>(`/runs/${mainRun.id}`); + + const rates = rateStmt.all(id) as { template: string; credit_multiplier: number }[]; + const rateByTemplate: Record = {}; + for (const r of rates) rateByTemplate[r.template] = r.credit_multiplier; + + for (const t of detail.tasks ?? []) { + if ( + t.taskId.endsWith(':serve:production') || + t.taskId.endsWith(':run-registry:production') + ) { + continue; + } + const cs = t.cacheStatus ?? ''; + if (!cs.includes('cache-hit') && cs !== 'cache-miss') continue; + + const template = inferAgentTemplate(t.target); + const creditsPerMin = rateByTemplate[template] ?? 60; + + insert.run( + id, + t.taskId, + t.projectName, + t.target, + t.durationMs ?? 0, + cs, + template, + creditsPerMin + ); + totalTaskRows++; + } + stored++; + } catch { + apiFailures++; + } + if (i % 25 === 24) { + const rate = ((i + 1) / ((Date.now() - startTime) / 1000)).toFixed(1); + console.log( + ` ${i + 1}/${missing.length} (${rate} CIPEs/sec, ${totalTaskRows} task rows so far)` + ); + } + } + + console.log( + `NX run-tasks backfill: stored ${totalTaskRows} task rows across ${stored} / ${missing.length} CIPEs` + + (skipped > 0 ? `, skipped ${skipped}` : '') + + (apiFailures > 0 ? ` (${apiFailures} API failures)` : '') + ); +} + +async function main() { + const db = new DatabaseSync(DB_PATH); + console.log(`DB: ${DB_PATH}\n`); + + console.log('── Retry stats backfill (hypothetical_no_cache_ms) ──'); + await backfillNxCipeRetryStats(db); + + console.log('\n── Run-tasks backfill ──'); + await backfillNxRunTasks(db); + + db.close(); +} + +main().catch((e) => { + console.error('Fatal:', e); + process.exit(1); +}); diff --git a/test-storybooks/portable-stories-kitchen-sink/react-vitest-3/project.json b/test-storybooks/portable-stories-kitchen-sink/react-vitest-3/project.json index d1879b795c01..68bf501705be 100644 --- a/test-storybooks/portable-stories-kitchen-sink/react-vitest-3/project.json +++ b/test-storybooks/portable-stories-kitchen-sink/react-vitest-3/project.json @@ -5,5 +5,5 @@ "targets": { "e2e-ui": {} }, - "tags": ["ci:normal", "ci:merged", "ci:daily"] + "tags": ["ci:merged", "ci:daily"] } diff --git a/test-storybooks/yarn-pnp/project.json b/test-storybooks/yarn-pnp/project.json index c689271c8c74..8ecaf2c09ed1 100644 --- a/test-storybooks/yarn-pnp/project.json +++ b/test-storybooks/yarn-pnp/project.json @@ -15,5 +15,5 @@ } } }, - "tags": ["ci:normal", "ci:merged", "ci:daily"] + "tags": ["ci:daily"] } diff --git a/yarn.lock b/yarn.lock index 05874291102d..8a9b3fa33fe2 100644 --- a/yarn.lock +++ b/yarn.lock @@ -3610,13 +3610,6 @@ __metadata: languageName: node linkType: hard -"@ltd/j-toml@npm:^1.38.0": - version: 1.38.0 - resolution: "@ltd/j-toml@npm:1.38.0" - checksum: 10c0/6db2b203e21c6bc2e781ad76f3e798ae9c4b9d83205070f482602a30f3d1c37dde8c8c72575bdc505c1b8c228c5e81171a4557f0d76b814da0fb91cdd21f6194 - languageName: node - linkType: hard - "@mdn/browser-compat-data@npm:^5.5.35, @mdn/browser-compat-data@npm:^5.6.19": version: 5.7.6 resolution: "@mdn/browser-compat-data@npm:5.7.6" @@ -4096,12 +4089,12 @@ __metadata: languageName: node linkType: hard -"@nx/devkit@npm:22.6.1": - version: 22.6.1 - resolution: "@nx/devkit@npm:22.6.1" +"@nx/devkit@npm:22.6.5": + version: 22.6.5 + resolution: "@nx/devkit@npm:22.6.5" dependencies: "@zkochan/js-yaml": "npm:0.0.7" - ejs: "npm:^3.1.7" + ejs: "npm:5.0.1" enquirer: "npm:~2.3.6" minimatch: "npm:10.2.4" semver: "npm:^7.6.3" @@ -4109,94 +4102,94 @@ __metadata: yargs-parser: "npm:21.1.1" peerDependencies: nx: ">= 21 <= 23 || ^22.0.0-0" - checksum: 10c0/f469beb58ebcdcd27db088a0dbd0dcf796796a349db61902eaa202a6e28f2c1dee51ac24cf44f180d162403974f1c1cd6497a05552dfd6ebea19ab4d396f8fe1 + checksum: 10c0/bff5f51cf2041f2d0f08ff1e93984c0e1def2e554b5f2edbcc5240a18a3b434ef6059898c87bbd2a5f9b84b5ef04e66f8607e5dcf7f26bf44e0009796ea7a628 languageName: node linkType: hard -"@nx/nx-darwin-arm64@npm:22.6.1": - version: 22.6.1 - resolution: "@nx/nx-darwin-arm64@npm:22.6.1" +"@nx/nx-darwin-arm64@npm:22.6.5": + version: 22.6.5 + resolution: "@nx/nx-darwin-arm64@npm:22.6.5" conditions: os=darwin & cpu=arm64 languageName: node linkType: hard -"@nx/nx-darwin-x64@npm:22.6.1": - version: 22.6.1 - resolution: "@nx/nx-darwin-x64@npm:22.6.1" +"@nx/nx-darwin-x64@npm:22.6.5": + version: 22.6.5 + resolution: "@nx/nx-darwin-x64@npm:22.6.5" conditions: os=darwin & cpu=x64 languageName: node linkType: hard -"@nx/nx-freebsd-x64@npm:22.6.1": - version: 22.6.1 - resolution: "@nx/nx-freebsd-x64@npm:22.6.1" +"@nx/nx-freebsd-x64@npm:22.6.5": + version: 22.6.5 + resolution: "@nx/nx-freebsd-x64@npm:22.6.5" conditions: os=freebsd & cpu=x64 languageName: node linkType: hard -"@nx/nx-linux-arm-gnueabihf@npm:22.6.1": - version: 22.6.1 - resolution: "@nx/nx-linux-arm-gnueabihf@npm:22.6.1" +"@nx/nx-linux-arm-gnueabihf@npm:22.6.5": + version: 22.6.5 + resolution: "@nx/nx-linux-arm-gnueabihf@npm:22.6.5" conditions: os=linux & cpu=arm languageName: node linkType: hard -"@nx/nx-linux-arm64-gnu@npm:22.6.1": - version: 22.6.1 - resolution: "@nx/nx-linux-arm64-gnu@npm:22.6.1" +"@nx/nx-linux-arm64-gnu@npm:22.6.5": + version: 22.6.5 + resolution: "@nx/nx-linux-arm64-gnu@npm:22.6.5" conditions: os=linux & cpu=arm64 & libc=glibc languageName: node linkType: hard -"@nx/nx-linux-arm64-musl@npm:22.6.1": - version: 22.6.1 - resolution: "@nx/nx-linux-arm64-musl@npm:22.6.1" +"@nx/nx-linux-arm64-musl@npm:22.6.5": + version: 22.6.5 + resolution: "@nx/nx-linux-arm64-musl@npm:22.6.5" conditions: os=linux & cpu=arm64 & libc=musl languageName: node linkType: hard -"@nx/nx-linux-x64-gnu@npm:22.6.1": - version: 22.6.1 - resolution: "@nx/nx-linux-x64-gnu@npm:22.6.1" +"@nx/nx-linux-x64-gnu@npm:22.6.5": + version: 22.6.5 + resolution: "@nx/nx-linux-x64-gnu@npm:22.6.5" conditions: os=linux & cpu=x64 & libc=glibc languageName: node linkType: hard -"@nx/nx-linux-x64-musl@npm:22.6.1": - version: 22.6.1 - resolution: "@nx/nx-linux-x64-musl@npm:22.6.1" +"@nx/nx-linux-x64-musl@npm:22.6.5": + version: 22.6.5 + resolution: "@nx/nx-linux-x64-musl@npm:22.6.5" conditions: os=linux & cpu=x64 & libc=musl languageName: node linkType: hard -"@nx/nx-win32-arm64-msvc@npm:22.6.1": - version: 22.6.1 - resolution: "@nx/nx-win32-arm64-msvc@npm:22.6.1" +"@nx/nx-win32-arm64-msvc@npm:22.6.5": + version: 22.6.5 + resolution: "@nx/nx-win32-arm64-msvc@npm:22.6.5" conditions: os=win32 & cpu=arm64 languageName: node linkType: hard -"@nx/nx-win32-x64-msvc@npm:22.6.1": - version: 22.6.1 - resolution: "@nx/nx-win32-x64-msvc@npm:22.6.1" +"@nx/nx-win32-x64-msvc@npm:22.6.5": + version: 22.6.5 + resolution: "@nx/nx-win32-x64-msvc@npm:22.6.5" conditions: os=win32 & cpu=x64 languageName: node linkType: hard -"@nx/workspace@npm:^22.6.1": - version: 22.6.1 - resolution: "@nx/workspace@npm:22.6.1" +"@nx/workspace@npm:^22.6.5": + version: 22.6.5 + resolution: "@nx/workspace@npm:22.6.5" dependencies: - "@nx/devkit": "npm:22.6.1" + "@nx/devkit": "npm:22.6.5" "@zkochan/js-yaml": "npm:0.0.7" chalk: "npm:^4.1.0" enquirer: "npm:~2.3.6" - nx: "npm:22.6.1" - picomatch: "npm:4.0.2" + nx: "npm:22.6.5" + picomatch: "npm:4.0.4" semver: "npm:^7.6.3" tslib: "npm:^2.3.0" yargs-parser: "npm:21.1.1" - checksum: 10c0/0f25f86d60cecac4e8e9024f870df2e4fad0c095b8511c4c2c6d37d7d9dd75ff42b903943f08d50440ac49873cd99acfe6b2d62129c25e032ba16bdd407c544a + checksum: 10c0/9f7749103df1af31e15a2ed7832914e9c595c3c98e9fe9ed65f5f5d955ddf7168a7938e0af9abf48ebdb072d9e51f38b59c7b02e93c2088219a3fcb59369cf0f languageName: node linkType: hard @@ -8696,7 +8689,7 @@ __metadata: version: 0.0.0-use.local resolution: "@storybook/root@workspace:." dependencies: - "@nx/workspace": "npm:^22.6.1" + "@nx/workspace": "npm:^22.6.5" "@playwright/test": "npm:^1.58.2" "@types/kill-port": "npm:^2.0.3" http-server: "npm:^14.1.1" @@ -8704,7 +8697,7 @@ __metadata: jiti: "npm:^2.6.1" kill-port: "npm:^2.0.1" lint-staged: "npm:^16.4.0" - nx: "npm:^22.6.1" + nx: "npm:^22.6.5" oxfmt: "npm:^0.41.0" std-env: "npm:^4.0.0" vite: "npm:^7.0.4" @@ -12433,14 +12426,14 @@ __metadata: languageName: node linkType: hard -"axios@npm:^1.12.0, axios@npm:^1.8.2": - version: 1.13.2 - resolution: "axios@npm:1.13.2" +"axios@npm:1.15.0, axios@npm:^1.8.2": + version: 1.15.0 + resolution: "axios@npm:1.15.0" dependencies: - follow-redirects: "npm:^1.15.6" - form-data: "npm:^4.0.4" - proxy-from-env: "npm:^1.1.0" - checksum: 10c0/e8a42e37e5568ae9c7a28c348db0e8cf3e43d06fcbef73f0048669edfe4f71219664da7b6cc991b0c0f01c28a48f037c515263cb79be1f1ae8ff034cd813867b + follow-redirects: "npm:^1.15.11" + form-data: "npm:^4.0.5" + proxy-from-env: "npm:^2.1.0" + checksum: 10c0/47e0f860e98d4d7aa145e89ce0cae00e1fb0f1d2485f065c21fce955ddb1dba4103a46bd0e47acd18a27208a7f62c96249e620db575521b92a968619ab133409 languageName: node linkType: hard @@ -15535,7 +15528,7 @@ __metadata: languageName: node linkType: hard -"ejs@npm:3.1.10, ejs@npm:^3.1.10, ejs@npm:^3.1.7": +"ejs@npm:3.1.10, ejs@npm:^3.1.10": version: 3.1.10 resolution: "ejs@npm:3.1.10" dependencies: @@ -15546,6 +15539,15 @@ __metadata: languageName: node linkType: hard +"ejs@npm:5.0.1": + version: 5.0.1 + resolution: "ejs@npm:5.0.1" + bin: + ejs: bin/cli.js + checksum: 10c0/7791e4d621e1c050b4310b87b75b43bb18de20cbe4560ee4640693ec052a19ae884df838ed4e391c26ec25530af90b58c35a3465462b6b1734e4b084ce45f872 + languageName: node + linkType: hard + "electron-to-chromium@npm:^1.5.249": version: 1.5.255 resolution: "electron-to-chromium@npm:1.5.255" @@ -17770,13 +17772,13 @@ __metadata: languageName: node linkType: hard -"follow-redirects@npm:^1.0.0, follow-redirects@npm:^1.15.6": - version: 1.15.6 - resolution: "follow-redirects@npm:1.15.6" +"follow-redirects@npm:^1.0.0, follow-redirects@npm:^1.15.11": + version: 1.16.0 + resolution: "follow-redirects@npm:1.16.0" peerDependenciesMeta: debug: optional: true - checksum: 10c0/9ff767f0d7be6aa6870c82ac79cf0368cd73e01bbc00e9eb1c2a16fbb198ec105e3c9b6628bb98e9f3ac66fe29a957b9645bcb9a490bb7aa0d35f908b6b85071 + checksum: 10c0/a1e2900163e6f1b4d1ed5c221b607f41decbab65534c63fe7e287e40a5d552a6496e7d9d7d976fa4ba77b4c51c11e5e9f683f10b43011ea11e442ff128d0e181 languageName: node linkType: hard @@ -17839,7 +17841,7 @@ __metadata: languageName: node linkType: hard -"form-data@npm:^4.0.4, form-data@npm:~4.0.0": +"form-data@npm:^4.0.5, form-data@npm:~4.0.0": version: 4.0.5 resolution: "form-data@npm:4.0.5" dependencies: @@ -23599,32 +23601,31 @@ __metadata: languageName: node linkType: hard -"nx@npm:22.6.1, nx@npm:^22.6.1": - version: 22.6.1 - resolution: "nx@npm:22.6.1" +"nx@npm:22.6.5, nx@npm:^22.6.5": + version: 22.6.5 + resolution: "nx@npm:22.6.5" dependencies: - "@ltd/j-toml": "npm:^1.38.0" "@napi-rs/wasm-runtime": "npm:0.2.4" - "@nx/nx-darwin-arm64": "npm:22.6.1" - "@nx/nx-darwin-x64": "npm:22.6.1" - "@nx/nx-freebsd-x64": "npm:22.6.1" - "@nx/nx-linux-arm-gnueabihf": "npm:22.6.1" - "@nx/nx-linux-arm64-gnu": "npm:22.6.1" - "@nx/nx-linux-arm64-musl": "npm:22.6.1" - "@nx/nx-linux-x64-gnu": "npm:22.6.1" - "@nx/nx-linux-x64-musl": "npm:22.6.1" - "@nx/nx-win32-arm64-msvc": "npm:22.6.1" - "@nx/nx-win32-x64-msvc": "npm:22.6.1" + "@nx/nx-darwin-arm64": "npm:22.6.5" + "@nx/nx-darwin-x64": "npm:22.6.5" + "@nx/nx-freebsd-x64": "npm:22.6.5" + "@nx/nx-linux-arm-gnueabihf": "npm:22.6.5" + "@nx/nx-linux-arm64-gnu": "npm:22.6.5" + "@nx/nx-linux-arm64-musl": "npm:22.6.5" + "@nx/nx-linux-x64-gnu": "npm:22.6.5" + "@nx/nx-linux-x64-musl": "npm:22.6.5" + "@nx/nx-win32-arm64-msvc": "npm:22.6.5" + "@nx/nx-win32-x64-msvc": "npm:22.6.5" "@yarnpkg/lockfile": "npm:^1.1.0" "@yarnpkg/parsers": "npm:3.0.2" "@zkochan/js-yaml": "npm:0.0.7" - axios: "npm:^1.12.0" + axios: "npm:1.15.0" cli-cursor: "npm:3.1.0" cli-spinners: "npm:2.6.1" cliui: "npm:^8.0.1" dotenv: "npm:~16.4.5" dotenv-expand: "npm:~11.0.6" - ejs: "npm:^3.1.7" + ejs: "npm:5.0.1" enquirer: "npm:~2.3.6" figures: "npm:3.2.0" flat: "npm:^5.0.2" @@ -23640,6 +23641,7 @@ __metadata: picocolors: "npm:^1.1.0" resolve.exports: "npm:2.0.3" semver: "npm:^7.6.3" + smol-toml: "npm:1.6.1" string-width: "npm:^4.2.3" tar-stream: "npm:~2.2.0" tmp: "npm:~0.2.1" @@ -23681,7 +23683,7 @@ __metadata: bin: nx: bin/nx.js nx-cloud: bin/nx-cloud.js - checksum: 10c0/4988ac32fece277439397837f56755d5b62b865acd15e0e18b47ea134f4dca4988dadff82d512e32e8d9e365e5cb3463fe46a465ba41434831e09b3e65436283 + checksum: 10c0/f83c002c7f7b5312ab05fdf503f6a107bbf11eabe85df0e8426f7e172d9adbc2bd24f887fb7e1aa08b28b1a27b36ed115c19c8531cd06a3d1cabdbef25c0e0c9 languageName: node linkType: hard @@ -24735,6 +24737,13 @@ __metadata: languageName: node linkType: hard +"picomatch@npm:4.0.4, picomatch@npm:^4.0.1, picomatch@npm:^4.0.2, picomatch@npm:^4.0.3": + version: 4.0.4 + resolution: "picomatch@npm:4.0.4" + checksum: 10c0/e2c6023372cc7b5764719a5ffb9da0f8e781212fa7ca4bd0562db929df8e117460f00dff3cb7509dacfc06b86de924b247f504d0ce1806a37fac4633081466b0 + languageName: node + linkType: hard + "picomatch@npm:^2.0.4, picomatch@npm:^2.2.1, picomatch@npm:^2.2.3, picomatch@npm:^2.3.0, picomatch@npm:^2.3.1": version: 2.3.1 resolution: "picomatch@npm:2.3.1" @@ -24742,13 +24751,6 @@ __metadata: languageName: node linkType: hard -"picomatch@npm:^4.0.1, picomatch@npm:^4.0.2, picomatch@npm:^4.0.3": - version: 4.0.3 - resolution: "picomatch@npm:4.0.3" - checksum: 10c0/9582c951e95eebee5434f59e426cddd228a7b97a0161a375aed4be244bd3fe8e3a31b846808ea14ef2c8a2527a6eeab7b3946a67d5979e81694654f939473ae2 - languageName: node - linkType: hard - "picoquery@npm:^1.4.0": version: 1.5.0 resolution: "picoquery@npm:1.5.0" @@ -25471,10 +25473,10 @@ __metadata: languageName: node linkType: hard -"proxy-from-env@npm:^1.1.0": - version: 1.1.0 - resolution: "proxy-from-env@npm:1.1.0" - checksum: 10c0/fe7dd8b1bdbbbea18d1459107729c3e4a2243ca870d26d34c2c1bcd3e4425b7bcc5112362df2d93cc7fb9746f6142b5e272fd1cc5c86ddf8580175186f6ad42b +"proxy-from-env@npm:^2.1.0": + version: 2.1.0 + resolution: "proxy-from-env@npm:2.1.0" + checksum: 10c0/ed01729fd4d094eab619cd7e17ce3698b3413b31eb102c4904f9875e677cd207392795d5b4adee9cec359dfd31c44d5ad7595a3a3ad51c40250e141512281c58 languageName: node linkType: hard @@ -28116,10 +28118,10 @@ __metadata: languageName: node linkType: hard -"smol-toml@npm:^1.5.2": - version: 1.5.2 - resolution: "smol-toml@npm:1.5.2" - checksum: 10c0/ccfe5dda80c1d0c45869140b1e695a13a81ba7c57c1ca083146fe2f475d6f57031c12410f95d53a5acb3a1504e8e8e12cab36871909e8c8ce0c7011ccd22a2ac +"smol-toml@npm:1.6.1, smol-toml@npm:^1.5.2": + version: 1.6.1 + resolution: "smol-toml@npm:1.6.1" + checksum: 10c0/511a78722f99c7616fdb46af708de3d7e81434b5a3d58061166da73f28bfc6cae4f0cd04683f60515b9c490cd10152fce72287c960b337419c0299cc1f0f2a22 languageName: node linkType: hard