From 878cceee0985f58c42cbe238ec17e78b247b1de3 Mon Sep 17 00:00:00 2001 From: Sebastien Lavoie Date: Wed, 7 Jan 2026 14:10:26 -0500 Subject: [PATCH] chore(dev): Add a script to analyze cross-team PR reviews --- .cspell.json | 1 + .github/workflows/label-cross-team.yml | 54 ++ scripts/reports/pr-review-times/.gitignore | 2 + scripts/reports/pr-review-times/README.md | 100 +++ scripts/reports/pr-review-times/aggregate.mjs | 492 ++++++++++++++ .../pr-review-times/backfill-label.mjs | 70 ++ .../reports/pr-review-times/codeowners.mjs | 129 ++++ scripts/reports/pr-review-times/constants.mjs | 15 + scripts/reports/pr-review-times/data/.keep | 0 scripts/reports/pr-review-times/fetch-pr.mjs | 341 ++++++++++ scripts/reports/pr-review-times/holidays.mjs | 268 ++++++++ scripts/reports/pr-review-times/index.mjs | 139 ++++ scripts/reports/pr-review-times/list-prs.mjs | 24 + scripts/reports/pr-review-times/run.sh | 17 + scripts/reports/pr-review-times/stats.mjs | 633 ++++++++++++++++++ scripts/reports/pr-review-times/storage.mjs | 75 +++ 16 files changed, 2360 insertions(+) create mode 100644 .github/workflows/label-cross-team.yml create mode 100644 scripts/reports/pr-review-times/.gitignore create mode 100644 scripts/reports/pr-review-times/README.md create mode 100755 scripts/reports/pr-review-times/aggregate.mjs create mode 100644 scripts/reports/pr-review-times/backfill-label.mjs create mode 100644 scripts/reports/pr-review-times/codeowners.mjs create mode 100644 scripts/reports/pr-review-times/constants.mjs create mode 100644 scripts/reports/pr-review-times/data/.keep create mode 100644 scripts/reports/pr-review-times/fetch-pr.mjs create mode 100644 scripts/reports/pr-review-times/holidays.mjs create mode 100755 scripts/reports/pr-review-times/index.mjs create mode 100644 scripts/reports/pr-review-times/list-prs.mjs create mode 100755 scripts/reports/pr-review-times/run.sh create mode 100644 scripts/reports/pr-review-times/stats.mjs create mode 100644 scripts/reports/pr-review-times/storage.mjs diff --git a/.cspell.json b/.cspell.json index ded3baa37b5..f0af7541d8d 100644 --- a/.cspell.json +++ b/.cspell.json @@ -46,6 +46,7 @@ "Debouncer", "debouncers", "dupl", + "dxui", "dynamicscrmitem", "Ecommerce", "electronicscoveodemocomo", diff --git a/.github/workflows/label-cross-team.yml b/.github/workflows/label-cross-team.yml new file mode 100644 index 00000000000..d5f30dacf47 --- /dev/null +++ b/.github/workflows/label-cross-team.yml @@ -0,0 +1,54 @@ +# Label PRs that require cross-team reviews +# Used by scripts/reports/pr-review-times to generate reports +name: Label Cross Team +on: + pull_request_target: + types: [review_requested, opened] + +permissions: + pull-requests: write + +jobs: + label: + runs-on: ubuntu-latest + steps: + - name: Harden Runner + uses: step-security/harden-runner@ec9f2d5744a09debf3a187a3f4f675c53b671911 # v2.13.0 + with: + egress-policy: audit + + - name: Label Cross Team + uses: actions/github-script@v7 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + try { + const { data: pr } = await github.rest.pulls.get({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: context.issue.number, + }); + + const teams = pr.requested_teams || []; + console.log(`Requested teams: ${teams.map((t) => t.slug).join(', ')}`); + + if (teams.length > 1) { + const hasLabel = pr.labels.some((l) => l.name === 'cross-team'); + if (hasLabel) { + console.log(`'cross-team' label already present. Skipping.`); + } else { + console.log(`Found ${teams.length} teams requested. Adding 'cross-team' label.`); + await github.rest.issues.addLabels({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + labels: ['cross-team'], + }); + } + } else { + console.log(`Only ${teams.length} teams requested. No label needed.`); + } + } catch (error) { + console.error('Error in label-cross-team action:', error); + core.setFailed(error.message); + } diff --git a/scripts/reports/pr-review-times/.gitignore b/scripts/reports/pr-review-times/.gitignore new file mode 100644 index 00000000000..f40e6330db0 --- /dev/null +++ b/scripts/reports/pr-review-times/.gitignore @@ -0,0 +1,2 @@ +*.json +*.csv diff --git a/scripts/reports/pr-review-times/README.md b/scripts/reports/pr-review-times/README.md new file mode 100644 index 00000000000..c1c5ef92a2b --- /dev/null +++ b/scripts/reports/pr-review-times/README.md @@ -0,0 +1,100 @@ +# PR Review Time Analysis Tools + +A suite of vibe-coded scripts to fetch, analyze, and report on Pull Request review performance and collaboration patterns in the `coveo/ui-kit` repository. + +## Intent + +The primary goal of this toolset is to provide visibility into the code review process. It helps answer questions like: +- How long does it take for different teams to Pick Up and Approve PRs? +- How much "cross-team" collaboration is happening? +- Are we improving over time? + +By distinguishing between **Business Days** and calendar days, and filtering out holidays, the metrics aim to be fair and accurate representations of team responsiveness. + +## Design Choices + +### 1. Modular Architecture +The logic is split into distinct modules to separate concerns: +- **`index.mjs`**: The CLI entry point that orchestrates the workflow. +- **`fetch-pr.mjs`**: Fetches raw data from GitHub and performs per-PR analysis (calculating durations, identifying teams). +- **`aggregate.mjs`**: Consolidates analyzed PRs into high-level metrics (averages, medians, percentiles) and generates reports (JSON, CSV, Console). +- **`holidays.mjs`**: Contains the logic for "Business Days". It ignores weekends and fixed/moving holidays (New Year's, Patriot's Day, etc.) to calculate accurate durations. +- **`codeowners.mjs`**: Parses the repository's `CODEOWNERS` file to map files to teams. + +### 2. Local Caching & Differential Updates +Github API rate limits can be a bottleneck. The script uses a local filesystem cache (`data/*.json`): +- When running `fetch`, the script checks the `updated_at` timestamp of the PR. +- It only re-fetches data from GitHub if the PR has changed since the last run. +- This allows for fast incremental updates and offline reporting. + +### 3. Identifying Cross-Team PRs + +A PR is determined to be cross-team if a review is requested from more than one team. + +A [GitHub action](../../../.github/workflows/label-cross-team.yml) automatically adds a `cross-team` label for easier filtering. + +PRs created after November 1st 2025 were [backfilled](backfill-label.mjs). + +### 4. Author Confidence + +The report distinguishes between: + +- `high-confidence`: The system is blocking from merging due to CODEOWNERS, but the author rather confident about the change and thinks they should be able to merge without an additional review. +- `low-confidence`: The author is not that confident and actually wants another team to review the change before merging. + +### 5. Metrics +All duration metrics are reported in **Business Days (8h)**. +- **Pickup Time**: Time from "Ready for Review" -> First Review or Comment. +- **Review Time**: Time from First Review -> Approval. +- **Total Time**: Time from "Ready for Review" -> Merge. + + +## Prerequisites +- Node.js environment. +- `GITHUB_TOKEN` environment variable with read access to the repository. + +## Usage + +From this directory, run `./run.sh`. Look into [the script](run.sh) for details. + +Outputs: +- **Console Summary**: A text-based overview. +- **`aggregated.json`**: Full dataset for external analysis. +- **`aggregated.csv`**: Flatted data for spreadsheet import. + +## Example Console Output + +``` +========================================= + PR Review Aggregation Summary + (Values in Business Days) +========================================= + +Global Stats: + Reviewed: count=5 / avg=3.23 / p95=6.89 + Pending: count=15 / avg=3.73 / p95=9.98 + +----------------------------------------- +By Team (Sorted by Volume) +----------------------------------------- + +@coveo/dxui + Aggregated (count=33): + Reviewed: count=10 / avg=2.71 / p95=6.86 + Pending: count=12 / avg=4.1 / p95=9.98 + + High Confidence (count=6): + Reviewed: count=4 / avg=2.94 / p95=5.1 + Pending: count=2 / avg=3.16 / p95=3.22 + + Low Confidence (count=2): + Pending: count=2 / avg=2.16 / p95=4.14 + +@coveo/dx + Aggregated (count=15): + Reviewed: count=6 / avg=1.99 / p95=5.78 + Pending: count=4 / avg=3.51 / p95=8.16 + + Low Confidence (count=1): + Reviewed: count=1 / avg=0 / p95=0 +``` diff --git a/scripts/reports/pr-review-times/aggregate.mjs b/scripts/reports/pr-review-times/aggregate.mjs new file mode 100755 index 00000000000..f0321e5af56 --- /dev/null +++ b/scripts/reports/pr-review-times/aggregate.mjs @@ -0,0 +1,492 @@ +import fs from 'node:fs'; +import path from 'node:path'; +import {calculateBusinessHours} from './holidays.mjs'; +import {DATA_DIR, loadAllPrData} from './storage.mjs'; + +/** + * Main Aggregation Function + */ +export function aggregateReports() { + console.log('Starting PR Review Aggregation...'); + + const prs = loadAllPrData(); + console.log(`Loaded ${prs.length} PR records.`); + + const aggReport = { + global: calculateAggregate(prs, '_combined'), + byConfidence: {}, + byTeam: {}, + openOverTime: calculateDailyOpenCounts(prs), + }; + + // Group by Confidence + const confidenceLevels = ['high', 'low', 'unknown']; + for (const level of confidenceLevels) { + const levelPrs = prs.filter((p) => p.confidence === level); + if (levelPrs.length > 0) { + aggReport.byConfidence[level] = calculateAggregate(levelPrs, '_combined'); + } + } + + // Group by Team + const allTeams = new Set(); + prs.forEach((p) => { + if (p.reviews) { + Object.keys(p.reviews).forEach((t) => { + if (t !== '_combined') allTeams.add(t); + }); + } + }); + + for (const team of allTeams) { + const teamPrs = prs.filter((p) => { + const hasReview = p.reviews && team in p.reviews; + if (!hasReview) return false; + + // Ignore PRs that are in draft or closed (without merging) for per-team stats + if (p.status === 'draft') return false; + if (p.status === 'closed') return false; + + return true; + }); + + // Calculate aggregate and break down by confidence + const aggregated = calculateAggregate(teamPrs, team); + + const confidenceStats = {}; + for (const level of confidenceLevels) { + const levelPrs = teamPrs.filter((p) => p.confidence === level); + if (levelPrs.length > 0) { + confidenceStats[level] = calculateAggregate(levelPrs, team); + } + } + + // Store in new structure for byTeam + aggReport.byTeam[team] = { + aggregated, + byConfidence: confidenceStats, + }; + } + + // Output JSON + const aggPath = path.resolve(DATA_DIR, '../aggregated.json'); + fs.writeFileSync(aggPath, JSON.stringify(aggReport, null, 2)); + console.log(`Aggregation report written to ${aggPath}`); + + // Output CSV + const csvPath = path.resolve(DATA_DIR, '../aggregated.csv'); + generateCsv(aggReport, csvPath); + + // Print Summary to Console + printSummary(aggReport); +} + +function calculateAggregate(prs, teamKey) { + const counts = { + total: 0, + draft: 0, + requested: 0, + pending: 0, + reviewed: 0, + merged: 0, + closed: 0, + }; + + // We separate duration stats into: + // 1. Pending: PR is open and not yet reviewed. Duration = (Now - StartTime) + // 2. Reviewed: PR is reviewed (approved/changes requested). Duration = (ReviewTime - StartTime) + + const pendingDurationValues = []; + const reviewedDurationValues = []; + + for (const p of prs) { + const stats = + teamKey === '_combined' ? calculateCombinedStats(p) : p.reviews[teamKey]; + + if (!stats) continue; + + counts.total++; + + if (p.status === 'draft') { + counts.draft++; + continue; + } + + counts.requested++; + + if (p.status === 'merged') { + counts.merged++; + } else if (p.status === 'closed') { + counts.closed++; + } + + // Reviewed Stats (Independent of PR status - if it was reviewed, count it) + const isReviewed = + stats.status === 'approved' || stats.status === 'changesRequested'; + + if (isReviewed) { + counts.reviewed++; + // Use updated businessDays if available, or fallback to durationHours for older data + const days = + stats.businessDays !== undefined + ? stats.businessDays + : stats.durationHours + ? stats.durationHours / 24 + : null; + if (days !== null) { + reviewedDurationValues.push(days); + } + } else { + // Pending Stats (Only if PR is still Open and review was started) + if (p.state === 'open' && stats.startTime) { + counts.pending++; + const pendingHours = calculateBusinessHours( + stats.startTime, + new Date().toISOString() + ); + const pendingDays = pendingHours !== null ? pendingHours / 24 : 0; + pendingDurationValues.push(pendingDays); + } + } + } + + // Verification + if (counts.total !== counts.draft + counts.requested) { + throw new Error( + `Integrity check failed: Total (${counts.total}) != Draft (${counts.draft}) + Requested (${counts.requested})` + ); + } + // Note: Requested != Pending + Reviewed + Merged + Closed anymore because Reviewed interacts with Merged/Closed. + + return { + counts, + pendingDuration: calculateStats(pendingDurationValues), + reviewedDuration: calculateStats(reviewedDurationValues), + }; +} + +/** + * Calculate basic stats (Avg, P95) + * @param {Array} values + */ +function calculateStats(values) { + if (!values || values.length === 0) { + return {count: 0}; + } + + const sorted = [...values].sort((a, b) => a - b); + const sum = sorted.reduce((a, b) => a + b, 0); + const avg = sum / values.length; + const max = sorted[sorted.length - 1]; + + const p95Index = Math.floor(values.length * 0.95); + const p95 = sorted[p95Index]; + + return { + count: values.length, + avg: parseFloat(avg.toFixed(2)), + p95: parseFloat(p95.toFixed(2)), + max: parseFloat(max.toFixed(2)), + }; +} + +/** + * Calculate count of open blocked PRs per day + * @param {Array} prs + */ +function calculateDailyOpenCounts(prs) { + // Use combined start/end times + let minDate = new Date(); + let maxDate = new Date(0); // Epoch + + const validPrs = []; + prs.forEach((p) => { + const stats = calculateCombinedStats(p); + // Attach to p for reuse within this function scope + p._tempCombined = stats; + if (stats.startTime) validPrs.push(p); + }); + + if (validPrs.length === 0) return {}; + + validPrs.forEach((p) => { + const stats = p._tempCombined; + const start = new Date(stats.startTime); + const end = stats.endTime ? new Date(stats.endTime) : new Date(); + + if (start < minDate) minDate = start; + if (end > maxDate) maxDate = end; + }); + + const dailyCounts = {}; + const runner = new Date(minDate); + runner.setHours(0, 0, 0, 0); + + const endDate = new Date(maxDate); + endDate.setHours(0, 0, 0, 0); + + while (runner <= endDate) { + const dateStr = runner.toISOString().split('T')[0]; + const eod = new Date(runner); + eod.setHours(23, 59, 59, 999); + + const count = validPrs.filter((p) => { + const stats = p._tempCombined; + const start = new Date(stats.startTime); + + // Started before EOD + if (start > eod) return false; + + // Ended after EOD or not ended + if (stats.endTime) { + const end = new Date(stats.endTime); + if (end < eod) return false; + } + + return true; + }).length; + + dailyCounts[dateStr] = count; + runner.setDate(runner.getDate() + 1); + } + + return dailyCounts; +} + +function printSummary(report) { + console.log('\n========================================='); + console.log(' PR Review Aggregation Summary'); + console.log(' (Values in Business Days)'); + console.log('=========================================\n'); + + console.log('Global Stats:'); + printGlobalStats(report.global, ' '); + + console.log('\n-----------------------------------------'); + console.log('By Team (Sorted by Volume)'); + console.log('-----------------------------------------'); + + const sortedTeams = Object.entries(report.byTeam).sort( + ([, a], [, b]) => + b.aggregated.counts.requested - a.aggregated.counts.requested + ); + + sortedTeams.forEach(([team, data]) => { + // Check if there's anything to report (Aggregated covers all subsections) + if (!hasStats(data.aggregated)) return; + + console.log(`\n${team}`); + + // Aggregated + console.log(` Aggregated:`); + printStatBlock(data.aggregated, ' '); + + // By Confidence + if (data.byConfidence) { + Object.entries(data.byConfidence).forEach(([level, stats]) => { + if (level === 'unknown') return; + if (!hasStats(stats)) return; + + console.log(`\n ${capitalize(level)} Confidence:`); + printStatBlock(stats, ' '); + }); + } + }); + console.log('\n=========================================\n'); +} + +function hasStats(stats) { + return stats.counts.reviewed > 0 || stats.counts.pending > 0; +} + +function printGlobalStats(stats, indent) { + const c = stats.counts; + console.log(`${indent}Total: ${c.total}`); + console.log(`${indent}Draft: ${c.draft}`); + console.log(`${indent}Requested: ${c.requested}`); + + // Pending line + let pendingDur = ''; + if (stats.pendingDuration.count > 0) { + pendingDur = ` / avg=${stats.pendingDuration.avg} / p95=${stats.pendingDuration.p95}`; + } + console.log(`${indent}Pending: ${c.pending}${pendingDur}`); + + // Reviewed line + let reviewedDur = ''; + if (stats.reviewedDuration.count > 0) { + reviewedDur = ` / avg=${stats.reviewedDuration.avg} / p95=${stats.reviewedDuration.p95}`; + } + console.log(`${indent}Reviewed: ${c.reviewed}${reviewedDur}`); + + console.log(`${indent}Merged: ${c.merged}`); + console.log(`${indent}Closed: ${c.closed}`); +} + +function printStatBlock(stats, indent) { + if (stats.counts.pending > 0) { + let dur = ''; + if (stats.pendingDuration.count > 0) { + dur = ` / avg=${stats.pendingDuration.avg} / p95=${stats.pendingDuration.p95}`; + } + console.log(`${indent}Pending: ${stats.counts.pending}${dur}`); + } + + if (stats.counts.reviewed > 0) { + let dur = ''; + if (stats.reviewedDuration.count > 0) { + dur = ` / avg=${stats.reviewedDuration.avg} / p95=${stats.reviewedDuration.p95}`; + } + console.log(`${indent}Reviewed: ${stats.counts.reviewed}${dur}`); + } +} + +function capitalize(s) { + return s.charAt(0).toUpperCase() + s.slice(1); +} + +function generateCsv(report, filepath) { + const headers = [ + 'Team', + 'Confidence', + 'Total', + 'Draft', + 'Requested', + 'Pending', + 'Reviewed', + 'Merged', + 'Closed', + 'PendingDuration_Avg_Days', + 'PendingDuration_P95_Days', + 'ReviewedDuration_Avg_Days', + 'ReviewedDuration_P95_Days', + ]; + + const rows = []; + + // Helper to format stats into a CSV row + const addRow = (team, confidence, stats) => { + rows.push( + [ + team, + confidence, + stats.counts.total, + stats.counts.draft, + stats.counts.requested, + stats.counts.pending, + stats.counts.reviewed, + stats.counts.merged, + stats.counts.closed, + stats.pendingDuration.count > 0 ? stats.pendingDuration.avg : '', + stats.pendingDuration.count > 0 ? stats.pendingDuration.p95 : '', + stats.reviewedDuration.count > 0 ? stats.reviewedDuration.avg : '', + stats.reviewedDuration.count > 0 ? stats.reviewedDuration.p95 : '', + ].join(',') + ); + }; + + // Global + addRow('Global', 'All', report.global); + if (report.byConfidence) { + Object.entries(report.byConfidence).forEach(([level, stats]) => { + addRow('Global', capitalize(level), stats); + }); + } + + // By Team + Object.entries(report.byTeam).forEach(([team, data]) => { + // Aggregated for team + addRow(team, 'All', data.aggregated); + + // Breakdown by confidence + if (data.byConfidence) { + Object.entries(data.byConfidence).forEach(([level, stats]) => { + addRow(team, capitalize(level), stats); + }); + } + }); + + const csvContent = `${headers.join(',')}\n${rows.join('\n')}`; + fs.writeFileSync(filepath, csvContent); + console.log(`CSV report written to ${filepath}`); +} + +/** + * Calculate combined stats from all team stats for a PR + * @param {object} pr + */ +function calculateCombinedStats(pr) { + // Extract stats from all teams (excluding any potential _combined key if it exists) + const statsList = Object.keys(pr.reviews || {}) + .filter((k) => k !== '_combined') + .map((k) => pr.reviews[k]); + + if (statsList.length === 0) { + return { + startTime: null, + endTime: null, + businessDays: null, + status: pr.draft ? 'draft' : 'requested', + }; + } + + // Start Time: Min of all valid start times + const validStartTimes = statsList + .map((s) => s.startTime) + .filter((t) => t) + .map((t) => new Date(t).getTime()); + let startTime = null; + if (validStartTimes.length > 0) { + startTime = new Date(Math.min(...validStartTimes)).toISOString(); + } + + // End Time: Max of end times (if all have end times). If any is null, combined end is null. + const validEndTimes = statsList.map((s) => s.endTime); + let endTime = null; + + if (validEndTimes.every((t) => t !== null)) { + // All finished + const endTimestamps = validEndTimes.map((t) => new Date(t).getTime()); + endTime = new Date(Math.max(...endTimestamps)).toISOString(); + } else { + // If PR is closed, end time is closedAt for the whole PR + // If closedAt is missing (old data), fallback to null (pending) + if (pr.status === 'closed' || pr.status === 'merged') { + // Support legacy _combined.endTime if closedAt is missing + const legacyCombinedEnd = pr.reviews?._combined?.endTime; + endTime = pr.closedAt || pr.mergedAt || legacyCombinedEnd || null; + } else { + endTime = null; + } + } + + // Duration + let businessDays = null; + if (startTime) { + // Use Business Hours + const endIso = endTime ? endTime : new Date().toISOString(); + const hours = calculateBusinessHours(startTime, endIso); + businessDays = hours !== null ? hours / 24 : null; + } + + // Combined Status + let status = 'requested'; + if (pr.draft) { + status = 'draft'; + } else if (statsList.some((s) => s.status === 'changesRequested')) { + status = 'changesRequested'; + } else if (statsList.every((s) => s.status === 'approved')) { + status = 'approved'; + } else if (statsList.some((s) => s.status === 'commented')) { + status = 'commented'; + } else if (statsList.some((s) => s.status === 'requested')) { + status = 'requested'; + } + + return { + startTime, + endTime, + businessDays, + status, + }; +} diff --git a/scripts/reports/pr-review-times/backfill-label.mjs b/scripts/reports/pr-review-times/backfill-label.mjs new file mode 100644 index 00000000000..3cbc7f4de48 --- /dev/null +++ b/scripts/reports/pr-review-times/backfill-label.mjs @@ -0,0 +1,70 @@ +// scripts/reports/pr-review-times/backfill-label.mjs +import {PR_REVIEW_LABELS, REPO_NAME, REPO_OWNER} from './constants.mjs'; + +export async function backfillCrossTeamLabel(octokit, query) { + if (!query.includes('repo:')) { + query = `repo:${REPO_OWNER}/${REPO_NAME} ${query}`; + } + if (!query.includes('is:pr')) { + query = `is:pr ${query}`; + } + + console.log(`Searching for PRs with query: "${query}"`); + + const iterator = octokit.paginate.iterator( + octokit.rest.search.issuesAndPullRequests, + { + q: query, + per_page: 100, + } + ); + + let count = 0; + let updated = 0; + + for await (const {data: issues} of iterator) { + for (const issue of issues) { + if (!issue.pull_request) continue; + + count++; + const prNumber = issue.number; + + try { + const {data: pr} = await octokit.rest.pulls.get({ + owner: REPO_OWNER, + repo: REPO_NAME, + pull_number: prNumber, + }); + + const teams = pr.requested_teams || []; + const labelNames = (pr.labels || []).map((l) => l.name); + + process.stdout.write( + `Checking PR #${prNumber}: ${teams.length} teams... ` + ); + + if (teams.length > 1) { + if (labelNames.includes(PR_REVIEW_LABELS.CROSS_TEAM)) { + console.log('Skipping (already labeled)'); + } else { + console.log('ADDING LABEL'); + await octokit.rest.issues.addLabels({ + owner: REPO_OWNER, + repo: REPO_NAME, + issue_number: prNumber, + labels: [PR_REVIEW_LABELS.CROSS_TEAM], + }); + updated++; + } + } else { + console.log('Skipping (< 2 teams)'); + } + } catch (e) { + console.error(`\nFailed to process PR #${prNumber}: ${e.message}`); + } + } + } + + console.log(`\nProcessed ${count} PRs.`); + console.log(`Updated ${updated} PRs.`); +} diff --git a/scripts/reports/pr-review-times/codeowners.mjs b/scripts/reports/pr-review-times/codeowners.mjs new file mode 100644 index 00000000000..81035883d81 --- /dev/null +++ b/scripts/reports/pr-review-times/codeowners.mjs @@ -0,0 +1,129 @@ +// scripts/reports/pr-review-times/codeowners.mjs +import fs from 'node:fs'; +import path from 'node:path'; + +/** + * Loads and parses CODEOWNERS file + */ +export function loadCodeOwners() { + // We assume the script is run from project root, or we try to find it relative to this file + let codeOwnersPath = path.resolve(process.cwd(), 'CODEOWNERS'); + + if (!fs.existsSync(codeOwnersPath)) { + // Try searching up from this script location + // This script is at scripts/reports/pr-review-times/ + // So root is ../../../ + const __dirname = path.dirname(new URL(import.meta.url).pathname); + codeOwnersPath = path.resolve(__dirname, '../../../CODEOWNERS'); + } + + if (!fs.existsSync(codeOwnersPath)) { + console.warn('CODEOWNERS file not found at', codeOwnersPath); + return []; + } + const content = fs.readFileSync(codeOwnersPath, 'utf-8'); + return parseCodeOwners(content); +} + +/** + * Parses CODEOWNERS content into rules + * @param {string} content + */ +function parseCodeOwners(content) { + const rules = []; + const lines = content.split('\n'); + + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith('#')) continue; + + // Split by whitespace, respecting escaped spaces if any (simplified here) + const parts = trimmed.split(/\s+/); + if (parts.length < 2) continue; // Need at least pattern and one owner + + const pattern = parts[0]; + const owners = parts.slice(1); + + rules.push({ + pattern, + regex: globToRegex(pattern), + owners, + }); + } + + // Return reversed to make "first match wins" logic easier if iterating + // But usually we iterate all and pick the last one. + // Git logic: Last match wins. + return rules; +} + +/** + * Simple Glob to Regex converter for CODEOWNERS + * @param {string} pattern + */ +function globToRegex(pattern) { + let regexStr = pattern; + + // Escape regex special characters except * and ? + regexStr = regexStr.replace(/[.+^${}()|[\]\\]/g, '\\$&'); + + // Handle double star ** + regexStr = regexStr.replace(/\*\*/g, '.*'); + + // Handle single star * (matches non-slash char) + regexStr = regexStr.replace(/(? e.event === 'merged'); + let status = 'unknown'; + + if (isMerged) { + status = 'merged'; + } else if (fullPr.state === 'closed') { + status = 'closed'; + } else if (fullPr.draft) { + status = 'draft'; + } else { + status = 'ready_for_review'; + } + + return { + prNumber, + title: fullPr.title, + url: fullPr.html_url, + reviews: reviewStats, + confidence: getConfidence(fullPr.labels || []), + author: fullPr.user.login, + state: fullPr.state, + status, + createdAt: fullPr.created_at, + closedAt: fullPr.closed_at, + mergedAt: fullPr.merged_at, + baseBranch: fullPr.base.ref, + updatedAt: fullPr.updated_at, + }; +} + +/** + * Fetch timeline events for a PR + * @param {object} octokit + * @param {number} prNumber + */ +async function fetchPREvents(octokit, prNumber) { + return await octokit.paginate(octokit.rest.issues.listEventsForTimeline, { + owner: REPO_OWNER, + repo: REPO_NAME, + issue_number: prNumber, + per_page: 100, + }); +} + +/** + * Fetch reviews for a PR + * @param {object} octokit + * @param {number} prNumber + */ +async function fetchPRReviews(octokit, prNumber) { + return await octokit.paginate(octokit.rest.pulls.listReviews, { + owner: REPO_OWNER, + repo: REPO_NAME, + pull_number: prNumber, + per_page: 100, + }); +} + +/** + * Fetch valid files for a PR (needed for CODEOWNERS check) + * @param {object} octokit + * @param {number} prNumber + */ +async function fetchPRFiles(octokit, prNumber) { + const files = await octokit.paginate(octokit.rest.pulls.listFiles, { + owner: REPO_OWNER, + repo: REPO_NAME, + pull_number: prNumber, + per_page: 100, + }); + return files.map((f) => f.filename); +} + +/** + * Analyze stats for a single team + */ +async function analyzeTeamStat( + octokit, + ownerHandle, + pr, + events, + reviews, + teamMembersCache +) { + // 1. Determine Reviewers + let potentialReviewers = []; + // Heuristic for teams vs users + if (ownerHandle.includes('/')) { + potentialReviewers = await getTeamMembers( + octokit, + ownerHandle, + teamMembersCache + ); + } else { + potentialReviewers = [ownerHandle.replace('@', '')]; + } + + // 2. Find Start Time: Earliest review_requested event for this team/user + const requestEvents = events.filter((e) => e.event === 'review_requested'); + const filteredRequests = idxRequestEvents( + requestEvents, + ownerHandle, + potentialReviewers + ); + + let startTime = null; + if (filteredRequests.length > 0) { + // Sort by earliest + filteredRequests.sort( + (a, b) => new Date(a.created_at) - new Date(b.created_at) + ); + const firstRequest = filteredRequests[0].created_at; + + // Check Draft Status + const readyEvents = events.filter((e) => e.event === 'ready_for_review'); + if (pr.draft) { + return { + startTime: null, + endTime: null, + businessDays: null, + status: 'draft', + }; + } else { + const latestReady = + readyEvents.length > 0 + ? readyEvents.sort( + (a, b) => new Date(b.created_at) - new Date(a.created_at) + )[0].created_at + : null; + + startTime = firstRequest; + if (latestReady && new Date(latestReady) > new Date(firstRequest)) { + startTime = latestReady; + } + } + } else { + return { + startTime: null, + endTime: null, + businessDays: null, + status: 'requested', + }; + } + + // 3. Status and End Time + const relevantReviews = reviews.filter( + (r) => + potentialReviewers.includes(r.user.login) && + new Date(r.submitted_at) > new Date(startTime) + ); + relevantReviews.sort( + (a, b) => new Date(a.submitted_at) - new Date(b.submitted_at) + ); + + // Latest review per user + const latestByUser = new Map(); + for (const r of relevantReviews) { + latestByUser.set(r.user.login, r); + } + const finalReviews = Array.from(latestByUser.values()); + + let status = 'requested'; + let endTime = null; + + const hasChangesRequested = finalReviews.some( + (r) => r.state === 'CHANGES_REQUESTED' + ); + const hasApproval = finalReviews.some((r) => r.state === 'APPROVED'); + const hasComment = finalReviews.some((r) => r.state === 'COMMENTED'); + + if (hasChangesRequested) { + status = 'changesRequested'; + const changes = relevantReviews.filter( + (r) => r.state === 'CHANGES_REQUESTED' + ); + if (changes.length > 0) { + endTime = changes[0].submitted_at; + } + } else if (hasApproval) { + status = 'approved'; + const approvals = relevantReviews.filter((r) => r.state === 'APPROVED'); + if (approvals.length > 0) { + endTime = approvals[0].submitted_at; + } + } else if (hasComment) { + status = 'commented'; + endTime = null; + } + + // 4. Override status for Closed/Merged PRs if still requested + if (status === 'requested') { + const isMerged = events.some((e) => e.event === 'merged'); + const isClosed = pr.state === 'closed'; + + if (isMerged) { + status = 'dismissed'; // Merged without this team's approval + } else if (isClosed) { + status = 'cancelled'; // Closed without merge => cancelled + } + } + + // 5. Duration + let businessDays = null; + if (startTime) { + const endIso = endTime ? endTime : new Date().toISOString(); + const hours = calculateBusinessHours(startTime, endIso); + businessDays = hours !== null ? hours / 24 : null; + } + + return { + startTime, + endTime, + businessDays, + status, + }; +} + +/** + * Filter review_request events for a specific owner/team + */ +function idxRequestEvents(events, ownerHandle, members) { + const slug = ownerHandle.replace('@', '').split('/').pop(); + + return events.filter((e) => { + if (e.requested_team && e.requested_team.slug === slug) return true; + if (e.requested_reviewer && members.includes(e.requested_reviewer.login)) + return true; + return false; + }); +} + +/** + * Fetch members of a GitHub team with caching + */ +async function getTeamMembers(octokit, teamSlug, cache) { + if (cache.has(teamSlug)) { + return cache.get(teamSlug); + } + + let org = REPO_OWNER; + let slug = teamSlug; + + if (teamSlug.startsWith('@')) { + const parts = teamSlug.substring(1).split('/'); + if (parts.length === 2) { + org = parts[0]; + slug = parts[1]; + } else { + slug = parts[0]; + } + } + + try { + const members = await octokit.paginate( + octokit.rest.teams.listMembersInOrg, + { + org, + team_slug: slug, + per_page: 100, + } + ); + const logins = members.map((m) => m.login); + cache.set(teamSlug, logins); + return logins; + } catch (e) { + console.warn(`Failed to fetch members for team ${teamSlug}:`, e.message); + cache.set(teamSlug, []); + return []; + } +} + +function getResponsibleTeams(files, codeOwners) { + const teams = new Set(); + for (const file of files) { + const owners = getOwnersForFile(file, codeOwners); + owners.forEach((o) => { + teams.add(o); + }); + } + return Array.from(teams); +} + +function getConfidence(labels) { + const names = labels.map((l) => l.name); + if (names.includes(PR_REVIEW_LABELS.HIGH_CONFIDENCE)) return 'high'; + if (names.includes(PR_REVIEW_LABELS.LOW_CONFIDENCE)) return 'low'; + return 'unknown'; +} diff --git a/scripts/reports/pr-review-times/holidays.mjs b/scripts/reports/pr-review-times/holidays.mjs new file mode 100644 index 00000000000..3bd33e57993 --- /dev/null +++ b/scripts/reports/pr-review-times/holidays.mjs @@ -0,0 +1,268 @@ +// scripts/reports/pr-review-times/holidays.mjs + +const FIXED_HOLIDAYS_MM_DD = new Set([ + '01-01', // New Year + '01-02', // Day after New Year (Generous) + '06-19', // Juneteenth (US) + '06-24', // St-Jean (QC) + '07-01', // Canada Day + '07-04', // Independence Day (US) + '09-30', // Truth & Reconciliation (CA - Generous) + '12-24', // Christmas Eve + '12-25', // Christmas + '12-26', // Boxing Day + '12-31', // New Year's Eve +]); + +// Hand-picked irregular moving dates (Easter-related) +// Good Friday (-2 from Easter) and Easter Monday (+1 from Easter) +const IRREGULAR_HOLIDAYS = [ + // 2023 (Easter Apr 9) + '2023-04-07', + '2023-04-10', + // 2024 (Easter Mar 31) + '2024-03-29', + '2024-04-01', + // 2025 (Easter Apr 20) + '2025-04-18', + '2025-04-21', + // 2026 (Easter Apr 5) + '2026-04-03', + '2026-04-06', + // 2027 (Easter Mar 28) + '2027-03-26', + '2027-03-29', +]; + +/** + * Returns a set of holidays strings (YYYY-MM-DD) for CA and US. + * Includes fixed dates, calculated rule-based dates, and hardcoded irregular dates. + * Generous policy: Includes likely bridge days and observed days where obvious. + */ +export function getHolidays() { + const holidays = new Set(IRREGULAR_HOLIDAYS); + const startYear = 2022; + const endYear = 2027; + + for (let year = startYear; year <= endYear; year++) { + // 1. Add Fixed Dates + for (const mmdd of FIXED_HOLIDAYS_MM_DD) { + holidays.add(`${year}-${mmdd}`); + } + + // 2. Add Rule-Based Dates + // Family Day / Presidents Day: 3rd Monday of Feb + holidays.add(getNthWeekdayOfMonth(year, 1, 1, 3)); // Feb (1), Mon (1), 3rd + + // Victoria Day: Monday before May 25 + // Logic: Find May 25. If Mon, use it? No "before" usually means strictly before? + // Convention: The Monday preceding May 25. (i.e. between 18th and 24th inclusive). + holidays.add(getVictoriaDay(year)); + + // Memorial Day: Last Monday of May + holidays.add(getLastWeekdayOfMonth(year, 4, 1)); // May (4), Mon (1) + + // Labor Day: 1st Monday of Sept + holidays.add(getNthWeekdayOfMonth(year, 8, 1, 1)); // Sept (8), Mon (1), 1st + + // Thanksgiving (CA): 2nd Monday of Oct + holidays.add(getNthWeekdayOfMonth(year, 9, 1, 2)); // Oct (9), Mon (1), 2nd + + // Thanksgiving (US): 4th Thursday of Nov + const thanksgivingUS = getNthWeekdayOfMonth(year, 10, 4, 4); // Nov (10), Thu (4), 4th + holidays.add(thanksgivingUS); + + // Day after Thanksgiving (Black Friday - Generous) + const thxDate = new Date(thanksgivingUS); + thxDate.setDate(thxDate.getDate() + 1); + holidays.add(formatDate(thxDate)); + } + + return holidays; +} + +/** + * Helper: Get YYYY-MM-DD for Nth weekday of month + * @param {number} year + * @param {number} monthIndex 0=Jan, 11=Dec + * @param {number} dayOfWeek 0=Sun, 1=Mon, ..., 6=Sat + * @param {number} n 1=1st, 2=2nd... + */ +function getNthWeekdayOfMonth(year, monthIndex, dayOfWeek, n) { + const date = new Date(year, monthIndex, 1); + let count = 0; + while (date.getMonth() === monthIndex) { + if (date.getDay() === dayOfWeek) { + count++; + if (count === n) { + return formatDate(date); + } + } + date.setDate(date.getDate() + 1); + } + return null; +} + +/** + * Helper: Get YYYY-MM-DD for Last weekday of month + */ +function getLastWeekdayOfMonth(year, monthIndex, dayOfWeek) { + const date = new Date(year, monthIndex + 1, 0); // Last day of month + while (date.getMonth() === monthIndex) { + if (date.getDay() === dayOfWeek) { + return formatDate(date); + } + date.setDate(date.getDate() - 1); + } + return null; +} + +/** + * Helper: Victoria Day (Monday before May 25) + * Dates range from May 18 to May 24. + */ +function getVictoriaDay(year) { + // Start at May 24 and go backwards until Monday + const date = new Date(year, 4, 24); // May 24 + while (date.getDay() !== 1) { + // 1 = Monday + date.setDate(date.getDate() - 1); + } + return formatDate(date); +} + +function formatDate(date) { + const yyyy = date.getFullYear(); + const mm = String(date.getMonth() + 1).padStart(2, '0'); + const dd = String(date.getDate()).padStart(2, '0'); + return `${yyyy}-${mm}-${dd}`; +} + +export function isHoliday(date) { + const yyyy = date.getFullYear(); + const mm = String(date.getMonth() + 1).padStart(2, '0'); + const dd = String(date.getDate()).padStart(2, '0'); + return getHolidays().has(`${yyyy}-${mm}-${dd}`); +} + +export function isWeekend(date) { + const day = date.getDay(); + return day === 0 || day === 6; // 0=Sun, 6=Sat +} + +/** + * Calculates business hours between two dates. + * Assumes 24h work day? Or just filtering out full days? + * Requirement: "duration calculation by business days". + * Usually this means hours, but ignoring weekends/holidays. + * Implementation: + * Iterate by hour? Or simply subtract full non-business days * 24h? + * If the user wants "business days", maybe they want the result in Days, not Hours? + * Previous metric was "durationHours". + * Let's keep Hours but exclude weedends/holidays. + * + * Simple Algorithm: + * 1. Calculate total duration in ms. + * 2. Calculate number of full weekend/holiday days in the interval. + * 3. Subtract that time. + * + * Edge case: Start or End is on a weekend/holiday? + * If you request a review on Saturday, clock shouldn't start until Monday. + * If you review on Saturday, does it count? + * + * Standard SLA logic: + * - If start is on non-business time, move start to next business open (e.g. Monday 00:00). + * - If end is on non-business time, move end to next business open? Or keep as is? + * If I review on Saturday, I reviewed it. Time stopped. + * If I review on Monday, time runs from Monday 00:00. + * + * Let's implementation: + * - Shift Start to next Business Day 00:00 if on Holiday/Weekend. + * - Shift End to previous Business Day 23:59:59 ?? No, if done on weekend, it counts as done. + * Actually, if done on weekend, duration from Friday 23:59 is 0. + * + * Revised Algorithm: + * - Iterate from Start Date to End Date by Day. + * - If Day is Weekend/Holiday, it contributes 0 hours. + * - If Start Day: contribute (24h - startHour) if valid day. + * - If End Day: contribute (endHour) if valid day. + * - Middle Days: contribute 24h if valid. + */ +export function calculateBusinessHours(startIso, endIso) { + if (!startIso || !endIso) return null; + + const start = new Date(startIso); + const end = new Date(endIso); + + if (start > end) return 0; + + // Normalize Start: If start is on non-business day, move calculated start to next business day 00:00. + // Why? Because timer shouldn't start ticking. + while (isWeekend(start) || isHoliday(start)) { + start.setDate(start.getDate() + 1); + start.setHours(0, 0, 0, 0); + // Safety: don't pass end + if (start > end) return 0; + } + + // Normalize End: If end is on non-business day... + // If I review on Sunday, and timer started Friday.. + // Time = Friday remaining. Saturday 0. Sunday? 0? + // If I review on Sunday, I should probably get credit as if I reviewed on next Monday? + // Or just stop timer at Friday 23:59? + // Let's assume non-business days stop the clock completely. + // So if done on Sunday, it's effectively done "during the weekend", so clock stopped Friday. + // But what if it started Saturday and ended Sunday? 0 hours. Correct. + + let totalMs = 0; + const current = new Date(start); + + // We clone to iterate days + // Special handling for same day + if (isSameDay(start, end)) { + if (!isWeekend(start) && !isHoliday(start)) { + return (end - start) / (1000 * 60 * 60); + } else { + return 0; + } + } + + // First partial day + if (!isWeekend(start) && !isHoliday(start)) { + const endOfDay = new Date(start); + endOfDay.setHours(23, 59, 59, 999); + totalMs += endOfDay - start; + } + + // Move to next day start + current.setDate(current.getDate() + 1); + current.setHours(0, 0, 0, 0); + + // Iterate full days until the day of 'end' + while (!isSameDay(current, end)) { + if (!isWeekend(current) && !isHoliday(current)) { + totalMs += 24 * 60 * 60 * 1000; + } + current.setDate(current.getDate() + 1); + // Safety break + if (current > end) break; + } + + // Last partial day + if (!isWeekend(end) && !isHoliday(end)) { + // From 00:00 to end time + const startOfDay = new Date(end); + startOfDay.setHours(0, 0, 0, 0); + totalMs += end - startOfDay; + } + + return totalMs / (1000 * 60 * 60); +} + +function isSameDay(d1, d2) { + return ( + d1.getFullYear() === d2.getFullYear() && + d1.getMonth() === d2.getMonth() && + d1.getDate() === d2.getDate() + ); +} diff --git a/scripts/reports/pr-review-times/index.mjs b/scripts/reports/pr-review-times/index.mjs new file mode 100755 index 00000000000..c29a968ea60 --- /dev/null +++ b/scripts/reports/pr-review-times/index.mjs @@ -0,0 +1,139 @@ +#!/usr/bin/env node + +import {getOctokit} from '@actions/github'; +import {aggregateReports} from './aggregate.mjs'; +import {backfillCrossTeamLabel} from './backfill-label.mjs'; +import {loadCodeOwners} from './codeowners.mjs'; +import {fetchAndAnalyzePr} from './fetch-pr.mjs'; +import {listPrs} from './list-prs.mjs'; +import {getStoredPrMetadata, savePrData} from './storage.mjs'; + +const USAGE = ` +Usage: + node index.mjs fetch [--force] Fetch PRs matching query and save data. + node index.mjs report Generate aggregation report from local data. + node index.mjs backfill-labels Backfill 'cross-team' labels. + + Example: + node index.mjs fetch "repo:coveo/ui-kit is:pr is:merged created:>2023-01-01" + node index.mjs backfill-labels "is:pr created:>2025-11-01" +`; + +async function main() { + const args = process.argv.slice(2); + const command = args[0]; + + if (!command) { + console.log(USAGE); + process.exit(1); + } + + if (command === 'fetch') { + const query = args[1]; + const force = args.includes('--force'); + + if (!query) { + console.error('Error: Missing query argument.'); + console.log(USAGE); + process.exit(1); + } + + const token = process.env.GITHUB_CREDENTIALS || process.env.GITHUB_TOKEN; + if (!token) { + console.error( + 'Error: GITHUB_CREDENTIALS or GITHUB_TOKEN environment variable is required.' + ); + process.exit(1); + } + + const octokit = getOctokit(token); + const teamMembersCache = new Map(); + + console.log(`Fetching PRs with query: "${query}"...`); + + // Load Codeowners once + const codeOwnersRules = loadCodeOwners(); + + try { + const prList = await listPrs(octokit, query); + console.log(`Found ${prList.length} PRs.`); + + for (const [index, pr] of prList.entries()) { + const prNumber = pr.number; + + let shouldFetch = false; + if (force) { + shouldFetch = true; + } else { + const localMeta = getStoredPrMetadata(prNumber); + if (!localMeta || !localMeta.updatedAt) { + shouldFetch = true; + } else if (new Date(pr.updated_at) > new Date(localMeta.updatedAt)) { + shouldFetch = true; + } + } + + if (!shouldFetch) { + process.stdout.write('.'); + continue; + } + + console.log( + `\n[${index + 1}/${prList.length}] Analyzing PR #${prNumber}...` + ); + try { + const analysis = await fetchAndAnalyzePr( + octokit, + pr, + codeOwnersRules, + teamMembersCache + ); + savePrData(analysis); + console.log(`Saved data for PR #${prNumber}`); + } catch (err) { + console.error(`Failed to analyze PR #${prNumber}:`, err.message); + } + } + console.log('\nFetch complete.'); + } catch (err) { + console.error('Error fetching PRs:', err); + process.exit(1); + } + } else if (command === 'report') { + try { + aggregateReports(); + } catch (err) { + console.error('Error generating report:', err); + process.exit(1); + } + } else if (command === 'backfill-labels') { + const query = args[1]; + if (!query) { + console.error('Error: Missing query argument.'); + console.log(USAGE); + process.exit(1); + } + + const token = process.env.GITHUB_CREDENTIALS || process.env.GITHUB_TOKEN; + if (!token) { + console.error( + 'Error: GITHUB_CREDENTIALS or GITHUB_TOKEN environment variable is required.' + ); + process.exit(1); + } + const octokit = getOctokit(token); + + try { + await backfillCrossTeamLabel(octokit, query); + } catch (err) { + console.error('Error in backfill:', err); + process.exit(1); + } + } else { + console.error(`Unknown command: ${command}`); + console.log(USAGE); + process.exit(1); + } +} + +main(); diff --git a/scripts/reports/pr-review-times/list-prs.mjs b/scripts/reports/pr-review-times/list-prs.mjs new file mode 100644 index 00000000000..742116a6635 --- /dev/null +++ b/scripts/reports/pr-review-times/list-prs.mjs @@ -0,0 +1,24 @@ +// scripts/reports/pr-review-times/list-prs.mjs +import {REPO_NAME, REPO_OWNER} from './constants.mjs'; + +/** + * Fetch all PRs matching the query + * @param {object} octokit + * @param {string} query + */ +export async function listPrs(octokit, query) { + console.log(`Searching for PRs with query: "${query}"...`); + // If the query doesn't contain 'repo:', prepend the default repo + let q = query; + if (!q.includes('repo:')) { + q = `repo:${REPO_OWNER}/${REPO_NAME} ${q}`; + } + + const options = octokit.rest.search.issuesAndPullRequests.endpoint.merge({ + q, + per_page: 100, + }); + + const results = await octokit.paginate(options); + return results; +} diff --git a/scripts/reports/pr-review-times/run.sh b/scripts/reports/pr-review-times/run.sh new file mode 100755 index 00000000000..2b0cceadaa5 --- /dev/null +++ b/scripts/reports/pr-review-times/run.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +if [ -z "$GITHUB_TOKEN" ]; then + echo "GITHUB_TOKEN is not set. Fetching from 1Password..." >&2 + export GITHUB_TOKEN="$(op --account "coveo.1password.com" item get "github coveo token" --fields label=password --reveal)" + + if [ -z "$GITHUB_TOKEN" ]; then + echo "Failed to retrieve GITHUB_TOKEN from 1Password." >&2 + echo "In your Coveo Vault, make sure there exists an item named 'github coveo token' with the token in the password field." >&2 + exit 1 + fi +fi + +current_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +node "$current_dir/index.mjs" fetch "is:pr label:cross-team base:main created:>2025-11-01" +node "$current_dir/index.mjs" report diff --git a/scripts/reports/pr-review-times/stats.mjs b/scripts/reports/pr-review-times/stats.mjs new file mode 100644 index 00000000000..23cf04f173f --- /dev/null +++ b/scripts/reports/pr-review-times/stats.mjs @@ -0,0 +1,633 @@ +#!/usr/bin/env node + +import fs from 'node:fs'; +import path from 'node:path'; +import {getOctokit} from '@actions/github'; +import { + PR_REVIEW_LABELS, + REPO_NAME, + REPO_OWNER, + REPORT_OUTPUT_PATH, +} from './constants.mjs'; + +/** + * Main execution function + */ +async function main() { + console.log('Starting PR Review Stats Analysis...'); + + const token = process.env.GITHUB_CREDENTIALS || process.env.GITHUB_TOKEN; + if (!token) { + console.error( + 'Error: GITHUB_CREDENTIALS or GITHUB_TOKEN environment variable is required.' + ); + process.exit(1); + } + + const octokit = getOctokit(token); + + // Cache for team members to avoid rate limits + const teamMembersCache = new Map(); + + try { + // Phase 2: CODEOWNERS parsing + console.log('Loading CODEOWNERS...'); + const codeOwners = loadCodeOwners(); + console.log(`Loaded ${codeOwners.length} CODEOWNERS rules.`); + + // Phase 3: Fetch Data + console.log(`Fetching PRs with label: ${PR_REVIEW_LABELS.CROSS_TEAM}...`); + const prs = await fetchPRsWithLabel(octokit, PR_REVIEW_LABELS.CROSS_TEAM); + console.log(`Found ${prs.length} PRs.`); + + // Phase 3: Analysis + console.log('Analyzing PR blockage duration...'); + const rawBlockages = []; + + for (const pr of prs) { + console.log(`Analyzing PR #${pr.number}: ${pr.title}`); + try { + const blockage = await analyzePR( + octokit, + pr, + codeOwners, + teamMembersCache + ); + if (blockage) { + rawBlockages.push(blockage); + } + } catch (e) { + console.error(`Failed to analyze PR #${pr.number}`, e); + } + } + + // Phase 4: Report + console.log( + `Generating JSON report for ${rawBlockages.length} blockages...` + ); + const reportPath = path.resolve(process.cwd(), REPORT_OUTPUT_PATH); + const reportData = JSON.stringify(rawBlockages, null, 2); + fs.writeFileSync(reportPath, reportData); + console.log(`Report written to ${reportPath}`); + + console.log('Done.'); + } catch (error) { + console.error('An error occurred:', error); + process.exit(1); + } +} + +/** + * Fetch all PRs (open and closed) with a specific label + * @param {object} octokit + * @param {string} label + */ +async function fetchPRsWithLabel(octokit, label) { + const q = `repo:${REPO_OWNER}/${REPO_NAME} is:pr label:"${label}"`; + const options = octokit.rest.search.issuesAndPullRequests.endpoint.merge({ + q, + per_page: 100, + }); + + const results = await octokit.paginate(options); + // Search returns issues, need to filter/map if necessary, but is:pr handles it. + // Note: search results might not have all fields? + // They have `pull_request` property. + return results; +} + +/** + * Fetch timeline events for a PR + * @param {object} octokit + * @param {number} prNumber + */ +async function fetchPREvents(octokit, prNumber) { + return await octokit.paginate(octokit.rest.issues.listEventsForTimeline, { + owner: REPO_OWNER, + repo: REPO_NAME, + issue_number: prNumber, + per_page: 100, + }); +} + +/** + * Fetch reviews for a PR + * @param {object} octokit + * @param {number} prNumber + */ +async function fetchPRReviews(octokit, prNumber) { + return await octokit.paginate(octokit.rest.pulls.listReviews, { + owner: REPO_OWNER, + repo: REPO_NAME, + pull_number: prNumber, + per_page: 100, + }); +} + +/** + * Fetch valid files for a PR (needed for CODEOWNERS check) + * @param {object} octokit + * @param {number} prNumber + */ +async function fetchPRFiles(octokit, prNumber) { + const files = await octokit.paginate(octokit.rest.pulls.listFiles, { + owner: REPO_OWNER, + repo: REPO_NAME, + pull_number: prNumber, + per_page: 100, + }); + return files.map((f) => f.filename); +} + +/** + * Analyze a single PR + * @param {object} octokit + * @param {object} pr + * @param {Array} codeOwners + * @param {Map} teamMembersCache + */ +async function analyzePR(octokit, pr, codeOwners, teamMembersCache) { + const prNumber = pr.number; + + // Fetch timeline events, reviews, and files + const events = await fetchPREvents(octokit, prNumber); + const reviews = await fetchPRReviews(octokit, prNumber); + const files = await fetchPRFiles(octokit, prNumber); + + // Identify Responsible Teams/Owners + const responsibleOwners = getResponsibleTeams(files, codeOwners); + + // Calculate Per-Team Stats + const reviewStats = {}; + const teamStatsList = []; + + for (const owner of responsibleOwners) { + const stats = await analyzeTeamStat( + octokit, + owner, + pr, + events, + reviews, + teamMembersCache + ); + reviewStats[owner] = stats; + teamStatsList.push(stats); + } + + // Calculate Combined Stats + const combined = calculateCombinedStats(teamStatsList, pr); + reviewStats._combined = combined; + + return { + prNumber, + title: pr.title, + url: pr.html_url, + reviews: reviewStats, + confidence: getConfidence(pr.labels || []), + author: pr.user.login, + state: pr.state, + }; +} + +/** + * Calculate combined stats from all team stats + */ +function calculateCombinedStats(statsList, pr) { + if (statsList.length === 0) { + return { + startTime: null, + endTime: null, + durationHours: null, + status: pr.draft ? 'draft' : 'requested', + }; + } + + // Start Time: Min of all valid start times + const validStartTimes = statsList + .map((s) => s.startTime) + .filter((t) => t) + .map((t) => new Date(t).getTime()); + let startTime = null; + if (validStartTimes.length > 0) { + startTime = new Date(Math.min(...validStartTimes)).toISOString(); + } + + // End Time: Max of end times (if all have end times). If any is null, combined end is null. + // Exception: If PR is closed/merged, end time might be forced? + // User says: "endTime: maximum of per-team endTimes." + // User JSON example shows null combined endTime when one team is null. + + const validEndTimes = statsList.map((s) => s.endTime); + let endTime = null; + + if (validEndTimes.every((t) => t !== null)) { + // All finished + const endTimestamps = validEndTimes.map((t) => new Date(t).getTime()); + endTime = new Date(Math.max(...endTimestamps)).toISOString(); + } else { + // If not all finished, check if PR is closed. + // If PR is closed, we might consider the blockage "ended" regardless of approval? + // But if we stick to the user example: one null -> combined null. + if (pr.state === 'closed') { + // If closed, the blockage is over. Use closed_at as fallback max? + endTime = pr.closed_at; + } else { + endTime = null; + } + } + + // Duration + let durationHours = null; + if (startTime) { + const start = new Date(startTime); + const end = endTime ? new Date(endTime) : new Date(); + const diffMs = end - start; + durationHours = diffMs / (1000 * 60 * 60); + } + + // Combined Status + // Priority: draft > changesRequested > asked/commented > approved? + // User example: "_combined status: changesRequested" when one team is approved, one changesRequested. + + let status = 'requested'; + if (pr.draft) { + status = 'draft'; + } else if (statsList.some((s) => s.status === 'changesRequested')) { + status = 'changesRequested'; + } else if (statsList.every((s) => s.status === 'approved')) { + status = 'approved'; + } else if (statsList.some((s) => s.status === 'commented')) { + status = 'commented'; + } else if (statsList.some((s) => s.status === 'requested')) { + status = 'requested'; + } else if (statsList.length === 0) { + status = 'requested'; + } else { + status = 'requested'; // Fallback + } + + return { + startTime, + endTime, + durationHours, + status, + }; +} + +/** + * Analyze stats for a single team + */ +async function analyzeTeamStat( + octokit, + ownerHandle, + pr, + events, + reviews, + teamMembersCache +) { + // 1. Determine Reviewers + let potentialReviewers = []; + let _isTeam = false; + // Heuristic for teams vs users + if (ownerHandle.includes('/')) { + potentialReviewers = await getTeamMembers( + octokit, + ownerHandle, + teamMembersCache + ); + _isTeam = true; + } else { + potentialReviewers = [ownerHandle.replace('@', '')]; + } + + // 2. Find Start Time: Earliest review_requested event for this team/user + // If PR was draft, we take the later of (Request Time, Ready For Review Time) + const requestEvents = events.filter((e) => e.event === 'review_requested'); + const filteredRequests = idxRequestEvents( + requestEvents, + ownerHandle, + potentialReviewers + ); + + let startTime = null; + if (filteredRequests.length > 0) { + // Sort by earliest + filteredRequests.sort( + (a, b) => new Date(a.created_at) - new Date(b.created_at) + ); + const firstRequest = filteredRequests[0].created_at; + + // Check Draft Status + const readyEvents = events.filter((e) => e.event === 'ready_for_review'); + if (pr.draft) { + // It is CURRENTLY draft. + // If filteredRequests exists, it means we requested review while in draft? + // Or before it went back to draft? + // User says: "draft: PR in draft, do not count review as started". + // If currently draft, result should be "not started"? + // But we need to return a structure. + return { + startTime: null, + endTime: null, + durationHours: null, + status: 'draft', + }; + } else { + // It is NOT currently draft. + // Did the request happen while it WAS draft? + // Simple heuristic: If request < last ready_for_review, use valid ready_for_review time. + const latestReady = + readyEvents.length > 0 + ? readyEvents.sort( + (a, b) => new Date(b.created_at) - new Date(a.created_at) + )[0].created_at + : null; + + startTime = firstRequest; + if (latestReady && new Date(latestReady) > new Date(firstRequest)) { + startTime = latestReady; + } + } + } else { + // No explicit request found. + // If it's a code owner, maybe the 'labeled' event (legacy heuristic)? + // Or if PR is open and older than some date? + // Default: If no request event, not started. + // UNLESS we fallback to PR creation if map is empty? + // Let's return nulls if no request found. + return { + startTime: null, + endTime: null, + durationHours: null, + status: 'requested', + }; + } + + // 3. Status and End Time + // Calculate Status first + const relevantReviews = reviews.filter( + (r) => + potentialReviewers.includes(r.user.login) && + new Date(r.submitted_at) > new Date(startTime) + ); + // Sort reviews chronologically + relevantReviews.sort( + (a, b) => new Date(a.submitted_at) - new Date(b.submitted_at) + ); + + // Latest review per user + const latestByUser = new Map(); + for (const r of relevantReviews) { + latestByUser.set(r.user.login, r); + } + const finalReviews = Array.from(latestByUser.values()); + + let status = 'requested'; + let endTime = null; + + const hasChangesRequested = finalReviews.some( + (r) => r.state === 'CHANGES_REQUESTED' + ); + const hasApproval = finalReviews.some((r) => r.state === 'APPROVED'); + const hasComment = finalReviews.some((r) => r.state === 'COMMENTED'); + + // End Time Determination: + // If Approved: End Time is the timestamp of the *latest* necessary approval? + // For a Team, ONE approval is usually enough. + // So find the FIRST approval that happened? + + if (hasChangesRequested) { + status = 'changesRequested'; + endTime = null; // Blocked + } else if (hasApproval) { + status = 'approved'; + // Find the first approval in the list + const approvals = relevantReviews.filter((r) => r.state === 'APPROVED'); + if (approvals.length > 0) { + endTime = approvals[0].submitted_at; + } + } else if (hasComment) { + status = 'commented'; + endTime = null; + } + + // 4. Duration + let durationHours = null; + if (startTime) { + const start = new Date(startTime); + const end = endTime ? new Date(endTime) : new Date(); + const diffMs = end - start; + durationHours = diffMs / (1000 * 60 * 60); + } + + return { + startTime, + endTime, + durationHours, + status, + }; +} + +/** + * Filter review_request events for a specific owner/team + */ +function idxRequestEvents(events, ownerHandle, members) { + const slug = ownerHandle.replace('@', '').split('/').pop(); // "dxui" from "@coveo/dxui" + + return events.filter((e) => { + // Direct team request + if (e.requested_team && e.requested_team.slug === slug) return true; + + // Member request + if (e.requested_reviewer && members.includes(e.requested_reviewer.login)) + return true; + + return false; + }); +} + +/** + * Fetch members of a GitHub team with caching + */ +async function getTeamMembers(octokit, teamSlug, cache) { + if (cache.has(teamSlug)) { + return cache.get(teamSlug); + } + + // Parse org and slug from @org/team or similar + let org = REPO_OWNER; + let slug = teamSlug; + + if (teamSlug.startsWith('@')) { + const parts = teamSlug.substring(1).split('/'); + if (parts.length === 2) { + org = parts[0]; + slug = parts[1]; + } else { + slug = parts[0]; + } + } + + try { + const members = await octokit.paginate( + octokit.rest.teams.listMembersInOrg, + { + org, + team_slug: slug, + per_page: 100, + } + ); + const logins = members.map((m) => m.login); + cache.set(teamSlug, logins); + return logins; + } catch (e) { + console.warn(`Failed to fetch members for team ${teamSlug}:`, e.message); + cache.set(teamSlug, []); + return []; + } +} + +/** + * Get distinct responsible teams for the files + * @param {Array} files + * @param {Array} codeOwners + */ +function getResponsibleTeams(files, codeOwners) { + const teams = new Set(); + for (const file of files) { + const owners = getOwnersForFile(file, codeOwners); + owners.forEach((o) => { + teams.add(o); + }); + } + return Array.from(teams); +} + +/** + * Extract confidence level from labels + * @param {Array} labels + */ +function getConfidence(labels) { + const names = labels.map((l) => l.name); + if (names.includes(PR_REVIEW_LABELS.HIGH_CONFIDENCE)) return 'high'; + if (names.includes(PR_REVIEW_LABELS.LOW_CONFIDENCE)) return 'low'; + return 'unknown'; +} + +/** + * Loads and parses CODEOWNERS file + */ +function loadCodeOwners() { + const codeOwnersPath = path.resolve(process.cwd(), 'CODEOWNERS'); + if (!fs.existsSync(codeOwnersPath)) { + console.warn('CODEOWNERS file not found at', codeOwnersPath); + return []; + } + const content = fs.readFileSync(codeOwnersPath, 'utf-8'); + return parseCodeOwners(content); +} + +/** + * Parses CODEOWNERS content into rules + * @param {string} content + */ +function parseCodeOwners(content) { + const rules = []; + const lines = content.split('\n'); + + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith('#')) continue; + + // Split by whitespace, respecting escaped spaces if any (simplified here) + const parts = trimmed.split(/\s+/); + if (parts.length < 2) continue; // Need at least pattern and one owner + + const pattern = parts[0]; + const owners = parts.slice(1); + + rules.push({ + pattern, + regex: globToRegex(pattern), + owners, + }); + } + + // Return reversed to make "first match wins" logic easier if iterating + // But usually we iterate all and pick the last one. + // Git logic: Last match wins. + return rules; +} + +/** + * Simple Glob to Regex converter for CODEOWNERS + * @param {string} pattern + */ +function globToRegex(pattern) { + let regexStr = pattern; + + // Escape regex special characters except * and ? + regexStr = regexStr.replace(/[.+^${}()|[\]\\]/g, '\\$&'); + + // Handle double star ** + regexStr = regexStr.replace(/\*\*/g, '.*'); + + // Handle single star * (matches non-slash char) + regexStr = regexStr.replace(/(? f.endsWith('.json')); + return files.map((f) => + JSON.parse(fs.readFileSync(path.join(DATA_DIR, f), 'utf-8')) + ); +}