diff --git a/.github/workflows/gh-aw-fragments/pr-context.md b/.github/workflows/gh-aw-fragments/pr-context.md index 31b8ee07..0084f329 100644 --- a/.github/workflows/gh-aw-fragments/pr-context.md +++ b/.github/workflows/gh-aw-fragments/pr-context.md @@ -37,6 +37,12 @@ steps: jq -r '[.[] | {filename, size: ((.additions // 0) + (.deletions // 0))}] | sort_by(-.size) | .[].filename' /tmp/pr-context/files.json \ > /tmp/pr-context/file_order_largest.txt + # Compute PR size metrics for review fan-out decisions + FILE_COUNT=$(jq 'length' /tmp/pr-context/files.json) + DIFF_LINES=$(wc -l < /tmp/pr-context/pr.diff | tr -d ' ') + echo "${FILE_COUNT} files, ${DIFF_LINES} diff lines" > /tmp/pr-context/pr-size.txt + echo "PR size: ${FILE_COUNT} files, ${DIFF_LINES} diff lines" + # Existing reviews gh api "repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/reviews" --paginate \ | jq -s 'add // []' > /tmp/pr-context/reviews.json @@ -132,6 +138,7 @@ steps: | `threads/.json` | Per-file review threads — one file per changed file with existing threads, mirroring the repo path under `threads/` | | `comments.json` | PR discussion comments (not inline) | | `issue-{N}.json` | Linked issue details (one file per linked issue, if any) | + | `pr-size.txt` | PR size metrics: `{N} files, {M} diff lines` — used by the agent to decide review fan-out | | `agents.md` | Repository conventions from `generate_agents_md` (if written by agent) | | `review-instructions.md` | Review instructions, criteria, and calibration examples (if written by review-process fragment) | MANIFEST diff --git a/.github/workflows/gh-aw-mention-in-pr-by-id.lock.yml b/.github/workflows/gh-aw-mention-in-pr-by-id.lock.yml index 98adb33e..2e240e9a 100644 --- a/.github/workflows/gh-aw-mention-in-pr-by-id.lock.yml +++ b/.github/workflows/gh-aw-mention-in-pr-by-id.lock.yml @@ -43,7 +43,7 @@ # # inlined-imports: true # -# gh-aw-metadata: {"schema_version":"v1","frontmatter_hash":"6d4152d2e4782ed202c539121d0370f9cedcf1bad67f7e8361a984c5c0fa8c8e"} +# gh-aw-metadata: {"schema_version":"v1","frontmatter_hash":"b39cbb0733269e20ce82625c69d50913b9939478dd0ceb435904b3aa6329261a"} name: "Mention in PR by ID" "on": @@ -690,7 +690,7 @@ jobs: GH_TOKEN: ${{ github.token }} PR_NUMBER: ${{ github.event.pull_request.number || inputs.target-pr-number || github.event.issue.number }} name: Fetch PR context to disk - run: "set -euo pipefail\nmkdir -p /tmp/pr-context\n\n# PR metadata\ngh pr view \"$PR_NUMBER\" --json title,body,author,baseRefName,headRefName,headRefOid,url \\\n > /tmp/pr-context/pr.json\n\n# Full diff\nif ! gh pr diff \"$PR_NUMBER\" > /tmp/pr-context/pr.diff; then\n echo \"::warning::Failed to fetch full PR diff; per-file diffs from files.json are still available.\"\n : > /tmp/pr-context/pr.diff\nfi\n\n# Changed files list (--paginate may output concatenated arrays; jq -s 'add' merges them)\ngh api \"repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/files\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/files.json\n\n# Per-file diffs\njq -c '.[]' /tmp/pr-context/files.json | while IFS= read -r entry; do\n filename=$(echo \"$entry\" | jq -r '.filename')\n mkdir -p \"/tmp/pr-context/diffs/$(dirname \"$filename\")\"\n echo \"$entry\" | jq -r '.patch // empty' > \"/tmp/pr-context/diffs/${filename}.diff\"\ndone\n\n# File orderings for sub-agent review (3 strategies)\njq -r '[.[] | .filename] | sort | .[]' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_az.txt\njq -r '[.[] | .filename] | sort | reverse | .[]' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_za.txt\njq -r '[.[] | {filename, size: ((.additions // 0) + (.deletions // 0))}] | sort_by(-.size) | .[].filename' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_largest.txt\n\n# Existing reviews\ngh api \"repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/reviews\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/reviews.json\n\n# Review threads with resolution status (GraphQL — REST lacks isResolved/isOutdated)\ngh api graphql --paginate -f query='\n query($owner: String!, $repo: String!, $number: Int!, $endCursor: String) {\n repository(owner: $owner, name: $repo) {\n pullRequest(number: $number) {\n reviewThreads(first: 100, after: $endCursor) {\n pageInfo { hasNextPage endCursor }\n nodes {\n id\n isResolved\n isOutdated\n isCollapsed\n path\n line\n startLine\n comments(first: 100) {\n nodes {\n id\n databaseId\n body\n author { login }\n createdAt\n }\n }\n }\n }\n }\n }\n }\n' -F owner=\"${GITHUB_REPOSITORY%/*}\" -F repo=\"${GITHUB_REPOSITORY#*/}\" -F \"number=$PR_NUMBER\" \\\n --jq '.data.repository.pullRequest.reviewThreads.nodes' \\\n | jq -s 'add // []' > /tmp/pr-context/review_comments.json\n\n# Filtered review thread views (pre-computed so agents don't need to parse review_comments.json)\njq '[.[] | select(.isResolved == false)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/unresolved_threads.json\njq '[.[] | select(.isResolved == true)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/resolved_threads.json\njq '[.[] | select(.isOutdated == true)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/outdated_threads.json\n\n# Per-file review threads (mirrors diffs/ structure)\njq -c '.[]' /tmp/pr-context/review_comments.json | while IFS= read -r thread; do\n filepath=$(echo \"$thread\" | jq -r '.path // empty')\n [ -z \"$filepath\" ] && continue\n mkdir -p \"/tmp/pr-context/threads/$(dirname \"$filepath\")\"\n echo \"$thread\" >> \"/tmp/pr-context/threads/${filepath}.jsonl\"\ndone\n# Convert per-file JSONL to proper JSON arrays\nmkdir -p /tmp/pr-context/threads\nfind /tmp/pr-context/threads -name '*.jsonl' 2>/dev/null | while IFS= read -r jsonl; do\n jq -s '.' \"$jsonl\" > \"${jsonl%.jsonl}.json\"\n rm \"$jsonl\"\ndone\n\n# PR discussion comments\ngh api \"repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/comments.json\n\n# Linked issues\njq -r '.body // \"\"' /tmp/pr-context/pr.json 2>/dev/null \\\n | grep -oiE '(fixes|closes|resolves)\\s+#[0-9]+' \\\n | grep -oE '[0-9]+$' \\\n | sort -u \\\n | while read -r issue; do\n gh api \"repos/$GITHUB_REPOSITORY/issues/$issue\" > \"/tmp/pr-context/issue-${issue}.json\" || true\n done || true\n\n# Write manifest\ncat > /tmp/pr-context/README.md << 'MANIFEST'\n# PR Context\n\nPre-fetched PR data. All files are in `/tmp/pr-context/`.\n\n| File | Description |\n| --- | --- |\n| `pr.json` | PR metadata — title, body, author, base/head branches, head commit SHA (`headRefOid`), URL |\n| `pr.diff` | Full unified diff of all changes |\n| `files.json` | Changed files array — each entry has `filename`, `status`, `additions`, `deletions`, `patch` |\n| `diffs/.diff` | Per-file diffs — one file per changed file, mirroring the repo path under `diffs/` |\n| `file_order_az.txt` | Changed files sorted alphabetically (A→Z), one filename per line |\n| `file_order_za.txt` | Changed files sorted reverse-alphabetically (Z→A), one filename per line |\n| `file_order_largest.txt` | Changed files sorted by diff size descending (largest first), one filename per line |\n| `reviews.json` | Prior review submissions — author, state (APPROVED/CHANGES_REQUESTED/COMMENTED), body |\n| `review_comments.json` | All review threads (GraphQL) — each thread has `id` (node ID for resolving), `isResolved`, `isOutdated`, `path`, `line`, and nested `comments` with `id`, `databaseId` (numeric REST ID for replies), body/author |\n| `unresolved_threads.json` | Unresolved review threads — subset of `review_comments.json` where `isResolved` is false |\n| `resolved_threads.json` | Resolved review threads — subset of `review_comments.json` where `isResolved` is true |\n| `outdated_threads.json` | Outdated review threads — subset of `review_comments.json` where `isOutdated` is true (code changed since comment) |\n| `threads/.json` | Per-file review threads — one file per changed file with existing threads, mirroring the repo path under `threads/` |\n| `comments.json` | PR discussion comments (not inline) |\n| `issue-{N}.json` | Linked issue details (one file per linked issue, if any) |\n| `agents.md` | Repository conventions from `generate_agents_md` (if written by agent) |\n| `review-instructions.md` | Review instructions, criteria, and calibration examples (if written by review-process fragment) |\nMANIFEST\n\necho \"PR context written to /tmp/pr-context/\"\nls -la /tmp/pr-context/" + run: "set -euo pipefail\nmkdir -p /tmp/pr-context\n\n# PR metadata\ngh pr view \"$PR_NUMBER\" --json title,body,author,baseRefName,headRefName,headRefOid,url \\\n > /tmp/pr-context/pr.json\n\n# Full diff\nif ! gh pr diff \"$PR_NUMBER\" > /tmp/pr-context/pr.diff; then\n echo \"::warning::Failed to fetch full PR diff; per-file diffs from files.json are still available.\"\n : > /tmp/pr-context/pr.diff\nfi\n\n# Changed files list (--paginate may output concatenated arrays; jq -s 'add' merges them)\ngh api \"repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/files\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/files.json\n\n# Per-file diffs\njq -c '.[]' /tmp/pr-context/files.json | while IFS= read -r entry; do\n filename=$(echo \"$entry\" | jq -r '.filename')\n mkdir -p \"/tmp/pr-context/diffs/$(dirname \"$filename\")\"\n echo \"$entry\" | jq -r '.patch // empty' > \"/tmp/pr-context/diffs/${filename}.diff\"\ndone\n\n# File orderings for sub-agent review (3 strategies)\njq -r '[.[] | .filename] | sort | .[]' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_az.txt\njq -r '[.[] | .filename] | sort | reverse | .[]' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_za.txt\njq -r '[.[] | {filename, size: ((.additions // 0) + (.deletions // 0))}] | sort_by(-.size) | .[].filename' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_largest.txt\n\n# Compute PR size metrics for review fan-out decisions\nFILE_COUNT=$(jq 'length' /tmp/pr-context/files.json)\nDIFF_LINES=$(wc -l < /tmp/pr-context/pr.diff | tr -d ' ')\necho \"${FILE_COUNT} files, ${DIFF_LINES} diff lines\" > /tmp/pr-context/pr-size.txt\necho \"PR size: ${FILE_COUNT} files, ${DIFF_LINES} diff lines\"\n\n# Existing reviews\ngh api \"repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/reviews\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/reviews.json\n\n# Review threads with resolution status (GraphQL — REST lacks isResolved/isOutdated)\ngh api graphql --paginate -f query='\n query($owner: String!, $repo: String!, $number: Int!, $endCursor: String) {\n repository(owner: $owner, name: $repo) {\n pullRequest(number: $number) {\n reviewThreads(first: 100, after: $endCursor) {\n pageInfo { hasNextPage endCursor }\n nodes {\n id\n isResolved\n isOutdated\n isCollapsed\n path\n line\n startLine\n comments(first: 100) {\n nodes {\n id\n databaseId\n body\n author { login }\n createdAt\n }\n }\n }\n }\n }\n }\n }\n' -F owner=\"${GITHUB_REPOSITORY%/*}\" -F repo=\"${GITHUB_REPOSITORY#*/}\" -F \"number=$PR_NUMBER\" \\\n --jq '.data.repository.pullRequest.reviewThreads.nodes' \\\n | jq -s 'add // []' > /tmp/pr-context/review_comments.json\n\n# Filtered review thread views (pre-computed so agents don't need to parse review_comments.json)\njq '[.[] | select(.isResolved == false)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/unresolved_threads.json\njq '[.[] | select(.isResolved == true)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/resolved_threads.json\njq '[.[] | select(.isOutdated == true)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/outdated_threads.json\n\n# Per-file review threads (mirrors diffs/ structure)\njq -c '.[]' /tmp/pr-context/review_comments.json | while IFS= read -r thread; do\n filepath=$(echo \"$thread\" | jq -r '.path // empty')\n [ -z \"$filepath\" ] && continue\n mkdir -p \"/tmp/pr-context/threads/$(dirname \"$filepath\")\"\n echo \"$thread\" >> \"/tmp/pr-context/threads/${filepath}.jsonl\"\ndone\n# Convert per-file JSONL to proper JSON arrays\nmkdir -p /tmp/pr-context/threads\nfind /tmp/pr-context/threads -name '*.jsonl' 2>/dev/null | while IFS= read -r jsonl; do\n jq -s '.' \"$jsonl\" > \"${jsonl%.jsonl}.json\"\n rm \"$jsonl\"\ndone\n\n# PR discussion comments\ngh api \"repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/comments.json\n\n# Linked issues\njq -r '.body // \"\"' /tmp/pr-context/pr.json 2>/dev/null \\\n | grep -oiE '(fixes|closes|resolves)\\s+#[0-9]+' \\\n | grep -oE '[0-9]+$' \\\n | sort -u \\\n | while read -r issue; do\n gh api \"repos/$GITHUB_REPOSITORY/issues/$issue\" > \"/tmp/pr-context/issue-${issue}.json\" || true\n done || true\n\n# Write manifest\ncat > /tmp/pr-context/README.md << 'MANIFEST'\n# PR Context\n\nPre-fetched PR data. All files are in `/tmp/pr-context/`.\n\n| File | Description |\n| --- | --- |\n| `pr.json` | PR metadata — title, body, author, base/head branches, head commit SHA (`headRefOid`), URL |\n| `pr.diff` | Full unified diff of all changes |\n| `files.json` | Changed files array — each entry has `filename`, `status`, `additions`, `deletions`, `patch` |\n| `diffs/.diff` | Per-file diffs — one file per changed file, mirroring the repo path under `diffs/` |\n| `file_order_az.txt` | Changed files sorted alphabetically (A→Z), one filename per line |\n| `file_order_za.txt` | Changed files sorted reverse-alphabetically (Z→A), one filename per line |\n| `file_order_largest.txt` | Changed files sorted by diff size descending (largest first), one filename per line |\n| `reviews.json` | Prior review submissions — author, state (APPROVED/CHANGES_REQUESTED/COMMENTED), body |\n| `review_comments.json` | All review threads (GraphQL) — each thread has `id` (node ID for resolving), `isResolved`, `isOutdated`, `path`, `line`, and nested `comments` with `id`, `databaseId` (numeric REST ID for replies), body/author |\n| `unresolved_threads.json` | Unresolved review threads — subset of `review_comments.json` where `isResolved` is false |\n| `resolved_threads.json` | Resolved review threads — subset of `review_comments.json` where `isResolved` is true |\n| `outdated_threads.json` | Outdated review threads — subset of `review_comments.json` where `isOutdated` is true (code changed since comment) |\n| `threads/.json` | Per-file review threads — one file per changed file with existing threads, mirroring the repo path under `threads/` |\n| `comments.json` | PR discussion comments (not inline) |\n| `issue-{N}.json` | Linked issue details (one file per linked issue, if any) |\n| `pr-size.txt` | PR size metrics: `{N} files, {M} diff lines` — used by the agent to decide review fan-out |\n| `agents.md` | Repository conventions from `generate_agents_md` (if written by agent) |\n| `review-instructions.md` | Review instructions, criteria, and calibration examples (if written by review-process fragment) |\nMANIFEST\n\necho \"PR context written to /tmp/pr-context/\"\nls -la /tmp/pr-context/" - name: Write review instructions to disk run: "mkdir -p /tmp/pr-context\ncat > /tmp/pr-context/review-instructions.md << 'REVIEW_EOF'\n# Review Instructions for Sub-agents\n\nYou are a code review sub-agent. Read these instructions, then review the PR files in the order provided in your prompt.\n\n## Context\n\nBefore reviewing files, read these to understand the PR:\n\n1. `/tmp/pr-context/pr.json` — PR title, description, author, and branches. Understand what the PR is trying to accomplish.\n2. `/tmp/pr-context/agents.md` — Repository coding conventions and guidelines (if it exists).\n3. `/tmp/pr-context/review_comments.json` — Existing review threads. Note which files already have threads so you don't duplicate.\n4. `/tmp/pr-context/issue-*.json` — Linked issue details (if any). Understand the motivation and acceptance criteria.\n\n## Process\n\nReview the PR diff file by file in your assigned order. For each changed file:\n\n1. **Read the diff** for this file from `/tmp/pr-context/diffs/.diff` to understand what changed. If the diff is empty or truncated (e.g., binary files or very large changes), fall back to reading the full file from the workspace and comparing against context.\n2. **Read the full file from the workspace.** The PR branch is checked out locally — open the file directly to get complete contents with line numbers.\n3. **Check existing threads** for this file from `/tmp/pr-context/threads/.json` (if it exists). Skip issues that are already under discussion — each thread has `isResolved` and `isOutdated` fields.\n4. **Identify potential issues** matching the review criteria below.\n5. **Quick-check each issue** before including it:\n - What specific code pattern or change triggers this concern?\n - Is there an obvious guard, handler, or mitigation visible in the immediate context?\n - Can you describe a concrete failure scenario (the `evidence` field)? If you cannot articulate what specific input or state triggers the problem, drop the finding.\n - If the issue is clearly handled, skip it. If you're unsure, include it — the parent will verify.\n6. **Add to your findings list.** Do NOT leave inline comments — you don't have that tool. Return findings in this format:\n\n```\n- file: path/to/file\n line: 42\n severity: HIGH\n title: Brief title\n description: What the issue is and why it matters\n evidence: The specific code pattern and failure scenario\n suggestion: corrected code here (optional — only if you can provide a concrete fix)\n```\n\n**Review every file in your assigned order.** Files reviewed earlier get more attention, which is why different sub-agents use different orderings.\n\n**Check existing threads** — per-file threads are at `/tmp/pr-context/threads/.json` (step 3 above). The full list is at `/tmp/pr-context/review_comments.json`. Do not flag issues that are already under discussion (resolved or unresolved). For outdated threads, only re-flag if the issue still applies to the current diff.\n\n**Return your full findings list** when done, or an empty list if no issues were found.\n\n## Review Criteria\n\nFocus on these categories in priority order:\n\n1. Security vulnerabilities (injection, XSS, auth bypass, secrets exposure)\n2. Logic bugs that could cause runtime failures or incorrect behavior\n3. Data integrity issues (race conditions, missing transactions, corruption risk)\n4. Performance bottlenecks (N+1 queries, memory leaks, blocking operations)\n5. Error handling gaps (unhandled exceptions, missing validation)\n6. Breaking changes to public APIs without migration path\n7. Missing or incorrect test coverage for critical paths\n\n## What NOT to Flag\n\nOnly review the diff — do not flag issues in unchanged code, pre-existing problems not introduced by this PR, or style preferences handled by linters or formatters.\n\n**Common false positives** — these patterns look like issues but usually aren't. Before flagging anything in these categories, confirm the problem is real by reading the surrounding code:\n\n- **Security — input already sanitized:** Don't flag injection or XSS risks when inputs are sanitized upstream, parameterized queries are used, or the framework auto-escapes output.\n- **Null/undefined — guarded elsewhere:** Don't flag potential null dereferences if the value is guaranteed by a type guard, assertion, schema validation, or upstream null check.\n- **Error handling — handled at a different layer:** Don't flag missing try/catch if the caller, middleware, or framework catches and handles the error (e.g., Express error middleware, React error boundaries).\n- **Performance — theoretical, not practical:** Don't flag algorithmic complexity (e.g., O(n^2)) unless N is demonstrably large enough to matter in the actual usage context. \"This could be slow\" without evidence is not actionable.\n- **Validation — exists at another layer:** Don't flag missing input validation if it's handled by an API gateway, middleware, schema validator, or type system.\n- **Test coverage — trivial or generated code:** Don't flag missing tests for trivial getters/setters, auto-generated code, or simple delegation methods.\n- **Style or naming — not in coding guidelines:** Don't flag naming conventions or code style unless they violate the repository's documented coding guidelines (from `generate_agents_md` or CONTRIBUTING docs).\n\n**Existing review threads** — check BEFORE flagging any issue:\n\n- **Resolved with reviewer reply** (e.g. \"This is intentional\") — reviewer's decision is final. Do NOT re-flag.\n- **Resolved without reply** — author likely fixed it. Do NOT re-raise unless the fix introduced a new problem.\n- **Unresolved** — already flagged. Do NOT duplicate.\n- **Outdated** — only re-flag if the issue still applies to the current diff.\n\nWhen in doubt, do not duplicate. Redundant comments erode trust.\n\nFinding no issues is a valid and valuable outcome. An empty findings list is better than findings that waste the author's time or erode trust. Do not manufacture findings to justify your review — if the code is sound, return an empty list.\n\n## Severity Classification\n\nDetermine severity AFTER investigating the issue, not before. First identify the problem and trace through the code, then assign a severity based on the evidence you found.\n\n- 🔴 CRITICAL — Must fix before merge (security vulnerabilities, data corruption, production-breaking bugs)\n- 🟠 HIGH — Should fix before merge (logic errors, missing validation, significant performance issues)\n- 🟡 MEDIUM — Address soon, non-blocking (error handling gaps, suboptimal patterns, missing edge cases)\n- ⚪ LOW — Author discretion (minor improvements, documentation gaps)\n- 💬 NITPICK — Truly optional (stylistic preferences, alternative approaches)\n\n## Review Intensity\n\nThe review intensity is `${{ inputs.intensity || 'balanced' }}`.\n\n- **conservative**: High evidence bar. Only flag when you can demonstrate a concrete failure scenario. If you can construct a reasonable counterargument, do not flag. Approval with zero findings is the expected outcome for most PRs.\n- **balanced**: Standard evidence bar. Flag when you can point to specific code that would fail. If the issue is ambiguous, lean toward not flagging.\n- **aggressive**: Lower evidence bar. Flag when evidence exists even if the failure scenario is not fully confirmed. Improvement suggestions welcome but must cite specific code.\n\n## Calibration Examples\n\nUse these examples to calibrate your judgment. Each pair shows a real issue and a similar-looking pattern that is NOT an issue.\n\n### Example 1: Null/Undefined Access\n\n**True positive — flag this:**\n\n```js\n// PR adds this handler\napp.get('/user/:id', async (req, res) => {\n const user = await db.findUser(req.params.id);\n res.json({ name: user.name, email: user.email });\n});\n```\n\nWhy flag: `db.findUser()` can return `null` when no user matches the ID. Accessing `user.name` will throw a TypeError at runtime. No upstream guard exists — the route handler is the entry point.\n\n**False positive — do NOT flag this:**\n\n```ts\n// PR adds this line inside an existing function\nconst settings = user.getSettings();\n```\n\nWhy skip: Reading the full file reveals `user` is typed as `User` (not `User | null`), and the calling function only runs after `authenticateUser()` middleware which guarantees a valid user object. The null case is handled at a different layer.\n\n### Example 2: SQL Injection\n\n**True positive — flag this:**\n\n```python\n# PR adds this query\ncursor.execute(f\"SELECT * FROM orders WHERE customer_id = '{customer_id}'\")\n```\n\nWhy flag: String interpolation in a SQL query with user-controlled input (`customer_id` comes from the request). No parameterization or sanitization anywhere in the call chain.\n\n**False positive — do NOT flag this:**\n\n```python\n# PR adds this query\ncursor.execute(f\"SELECT * FROM orders WHERE status = '{OrderStatus.PENDING.value}'\")\n```\n\nWhy skip: The interpolated value is a hardcoded enum constant (`OrderStatus.PENDING`), not user input. There is no injection vector.\n\n### Example 3: Borderline — Do NOT Flag\n\n```go\n// PR adds this function\nfunc processItems(items []Item) []Result {\n results := make([]Result, 0)\n for _, item := range items {\n for _, tag := range item.Tags {\n results = append(results, process(item, tag))\n }\n }\n return results\n}\n```\n\nThis looks like an O(n*m) performance concern. But without evidence that `items` or `Tags` are large in practice, this is speculative. The function processes a bounded dataset (items from a single user request). Do not flag theoretical performance issues without evidence of real-world impact.\nREVIEW_EOF" - env: diff --git a/.github/workflows/gh-aw-mention-in-pr-no-sandbox.lock.yml b/.github/workflows/gh-aw-mention-in-pr-no-sandbox.lock.yml index 3c4d5a2d..d759b900 100644 --- a/.github/workflows/gh-aw-mention-in-pr-no-sandbox.lock.yml +++ b/.github/workflows/gh-aw-mention-in-pr-no-sandbox.lock.yml @@ -44,7 +44,7 @@ # # inlined-imports: true # -# gh-aw-metadata: {"schema_version":"v1","frontmatter_hash":"df91e320ec660e442d65ff1a17f5c9467ebcc66175aa4ca2b59661d126eeba33"} +# gh-aw-metadata: {"schema_version":"v1","frontmatter_hash":"13b24e850bc77ffcfe39226d9a0276409e995be86fdc18794304cad952bfb4e0"} name: "Mention in PR (no sandbox)" "on": @@ -756,7 +756,7 @@ jobs: GH_TOKEN: ${{ github.token }} PR_NUMBER: ${{ github.event.pull_request.number || inputs.target-pr-number || github.event.issue.number }} name: Fetch PR context to disk - run: "set -euo pipefail\nmkdir -p /tmp/pr-context\n\n# PR metadata\ngh pr view \"$PR_NUMBER\" --json title,body,author,baseRefName,headRefName,headRefOid,url \\\n > /tmp/pr-context/pr.json\n\n# Full diff\nif ! gh pr diff \"$PR_NUMBER\" > /tmp/pr-context/pr.diff; then\n echo \"::warning::Failed to fetch full PR diff; per-file diffs from files.json are still available.\"\n : > /tmp/pr-context/pr.diff\nfi\n\n# Changed files list (--paginate may output concatenated arrays; jq -s 'add' merges them)\ngh api \"repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/files\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/files.json\n\n# Per-file diffs\njq -c '.[]' /tmp/pr-context/files.json | while IFS= read -r entry; do\n filename=$(echo \"$entry\" | jq -r '.filename')\n mkdir -p \"/tmp/pr-context/diffs/$(dirname \"$filename\")\"\n echo \"$entry\" | jq -r '.patch // empty' > \"/tmp/pr-context/diffs/${filename}.diff\"\ndone\n\n# File orderings for sub-agent review (3 strategies)\njq -r '[.[] | .filename] | sort | .[]' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_az.txt\njq -r '[.[] | .filename] | sort | reverse | .[]' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_za.txt\njq -r '[.[] | {filename, size: ((.additions // 0) + (.deletions // 0))}] | sort_by(-.size) | .[].filename' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_largest.txt\n\n# Existing reviews\ngh api \"repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/reviews\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/reviews.json\n\n# Review threads with resolution status (GraphQL — REST lacks isResolved/isOutdated)\ngh api graphql --paginate -f query='\n query($owner: String!, $repo: String!, $number: Int!, $endCursor: String) {\n repository(owner: $owner, name: $repo) {\n pullRequest(number: $number) {\n reviewThreads(first: 100, after: $endCursor) {\n pageInfo { hasNextPage endCursor }\n nodes {\n id\n isResolved\n isOutdated\n isCollapsed\n path\n line\n startLine\n comments(first: 100) {\n nodes {\n id\n databaseId\n body\n author { login }\n createdAt\n }\n }\n }\n }\n }\n }\n }\n' -F owner=\"${GITHUB_REPOSITORY%/*}\" -F repo=\"${GITHUB_REPOSITORY#*/}\" -F \"number=$PR_NUMBER\" \\\n --jq '.data.repository.pullRequest.reviewThreads.nodes' \\\n | jq -s 'add // []' > /tmp/pr-context/review_comments.json\n\n# Filtered review thread views (pre-computed so agents don't need to parse review_comments.json)\njq '[.[] | select(.isResolved == false)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/unresolved_threads.json\njq '[.[] | select(.isResolved == true)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/resolved_threads.json\njq '[.[] | select(.isOutdated == true)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/outdated_threads.json\n\n# Per-file review threads (mirrors diffs/ structure)\njq -c '.[]' /tmp/pr-context/review_comments.json | while IFS= read -r thread; do\n filepath=$(echo \"$thread\" | jq -r '.path // empty')\n [ -z \"$filepath\" ] && continue\n mkdir -p \"/tmp/pr-context/threads/$(dirname \"$filepath\")\"\n echo \"$thread\" >> \"/tmp/pr-context/threads/${filepath}.jsonl\"\ndone\n# Convert per-file JSONL to proper JSON arrays\nmkdir -p /tmp/pr-context/threads\nfind /tmp/pr-context/threads -name '*.jsonl' 2>/dev/null | while IFS= read -r jsonl; do\n jq -s '.' \"$jsonl\" > \"${jsonl%.jsonl}.json\"\n rm \"$jsonl\"\ndone\n\n# PR discussion comments\ngh api \"repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/comments.json\n\n# Linked issues\njq -r '.body // \"\"' /tmp/pr-context/pr.json 2>/dev/null \\\n | grep -oiE '(fixes|closes|resolves)\\s+#[0-9]+' \\\n | grep -oE '[0-9]+$' \\\n | sort -u \\\n | while read -r issue; do\n gh api \"repos/$GITHUB_REPOSITORY/issues/$issue\" > \"/tmp/pr-context/issue-${issue}.json\" || true\n done || true\n\n# Write manifest\ncat > /tmp/pr-context/README.md << 'MANIFEST'\n# PR Context\n\nPre-fetched PR data. All files are in `/tmp/pr-context/`.\n\n| File | Description |\n| --- | --- |\n| `pr.json` | PR metadata — title, body, author, base/head branches, head commit SHA (`headRefOid`), URL |\n| `pr.diff` | Full unified diff of all changes |\n| `files.json` | Changed files array — each entry has `filename`, `status`, `additions`, `deletions`, `patch` |\n| `diffs/.diff` | Per-file diffs — one file per changed file, mirroring the repo path under `diffs/` |\n| `file_order_az.txt` | Changed files sorted alphabetically (A→Z), one filename per line |\n| `file_order_za.txt` | Changed files sorted reverse-alphabetically (Z→A), one filename per line |\n| `file_order_largest.txt` | Changed files sorted by diff size descending (largest first), one filename per line |\n| `reviews.json` | Prior review submissions — author, state (APPROVED/CHANGES_REQUESTED/COMMENTED), body |\n| `review_comments.json` | All review threads (GraphQL) — each thread has `id` (node ID for resolving), `isResolved`, `isOutdated`, `path`, `line`, and nested `comments` with `id`, `databaseId` (numeric REST ID for replies), body/author |\n| `unresolved_threads.json` | Unresolved review threads — subset of `review_comments.json` where `isResolved` is false |\n| `resolved_threads.json` | Resolved review threads — subset of `review_comments.json` where `isResolved` is true |\n| `outdated_threads.json` | Outdated review threads — subset of `review_comments.json` where `isOutdated` is true (code changed since comment) |\n| `threads/.json` | Per-file review threads — one file per changed file with existing threads, mirroring the repo path under `threads/` |\n| `comments.json` | PR discussion comments (not inline) |\n| `issue-{N}.json` | Linked issue details (one file per linked issue, if any) |\n| `agents.md` | Repository conventions from `generate_agents_md` (if written by agent) |\n| `review-instructions.md` | Review instructions, criteria, and calibration examples (if written by review-process fragment) |\nMANIFEST\n\necho \"PR context written to /tmp/pr-context/\"\nls -la /tmp/pr-context/" + run: "set -euo pipefail\nmkdir -p /tmp/pr-context\n\n# PR metadata\ngh pr view \"$PR_NUMBER\" --json title,body,author,baseRefName,headRefName,headRefOid,url \\\n > /tmp/pr-context/pr.json\n\n# Full diff\nif ! gh pr diff \"$PR_NUMBER\" > /tmp/pr-context/pr.diff; then\n echo \"::warning::Failed to fetch full PR diff; per-file diffs from files.json are still available.\"\n : > /tmp/pr-context/pr.diff\nfi\n\n# Changed files list (--paginate may output concatenated arrays; jq -s 'add' merges them)\ngh api \"repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/files\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/files.json\n\n# Per-file diffs\njq -c '.[]' /tmp/pr-context/files.json | while IFS= read -r entry; do\n filename=$(echo \"$entry\" | jq -r '.filename')\n mkdir -p \"/tmp/pr-context/diffs/$(dirname \"$filename\")\"\n echo \"$entry\" | jq -r '.patch // empty' > \"/tmp/pr-context/diffs/${filename}.diff\"\ndone\n\n# File orderings for sub-agent review (3 strategies)\njq -r '[.[] | .filename] | sort | .[]' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_az.txt\njq -r '[.[] | .filename] | sort | reverse | .[]' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_za.txt\njq -r '[.[] | {filename, size: ((.additions // 0) + (.deletions // 0))}] | sort_by(-.size) | .[].filename' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_largest.txt\n\n# Compute PR size metrics for review fan-out decisions\nFILE_COUNT=$(jq 'length' /tmp/pr-context/files.json)\nDIFF_LINES=$(wc -l < /tmp/pr-context/pr.diff | tr -d ' ')\necho \"${FILE_COUNT} files, ${DIFF_LINES} diff lines\" > /tmp/pr-context/pr-size.txt\necho \"PR size: ${FILE_COUNT} files, ${DIFF_LINES} diff lines\"\n\n# Existing reviews\ngh api \"repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/reviews\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/reviews.json\n\n# Review threads with resolution status (GraphQL — REST lacks isResolved/isOutdated)\ngh api graphql --paginate -f query='\n query($owner: String!, $repo: String!, $number: Int!, $endCursor: String) {\n repository(owner: $owner, name: $repo) {\n pullRequest(number: $number) {\n reviewThreads(first: 100, after: $endCursor) {\n pageInfo { hasNextPage endCursor }\n nodes {\n id\n isResolved\n isOutdated\n isCollapsed\n path\n line\n startLine\n comments(first: 100) {\n nodes {\n id\n databaseId\n body\n author { login }\n createdAt\n }\n }\n }\n }\n }\n }\n }\n' -F owner=\"${GITHUB_REPOSITORY%/*}\" -F repo=\"${GITHUB_REPOSITORY#*/}\" -F \"number=$PR_NUMBER\" \\\n --jq '.data.repository.pullRequest.reviewThreads.nodes' \\\n | jq -s 'add // []' > /tmp/pr-context/review_comments.json\n\n# Filtered review thread views (pre-computed so agents don't need to parse review_comments.json)\njq '[.[] | select(.isResolved == false)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/unresolved_threads.json\njq '[.[] | select(.isResolved == true)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/resolved_threads.json\njq '[.[] | select(.isOutdated == true)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/outdated_threads.json\n\n# Per-file review threads (mirrors diffs/ structure)\njq -c '.[]' /tmp/pr-context/review_comments.json | while IFS= read -r thread; do\n filepath=$(echo \"$thread\" | jq -r '.path // empty')\n [ -z \"$filepath\" ] && continue\n mkdir -p \"/tmp/pr-context/threads/$(dirname \"$filepath\")\"\n echo \"$thread\" >> \"/tmp/pr-context/threads/${filepath}.jsonl\"\ndone\n# Convert per-file JSONL to proper JSON arrays\nmkdir -p /tmp/pr-context/threads\nfind /tmp/pr-context/threads -name '*.jsonl' 2>/dev/null | while IFS= read -r jsonl; do\n jq -s '.' \"$jsonl\" > \"${jsonl%.jsonl}.json\"\n rm \"$jsonl\"\ndone\n\n# PR discussion comments\ngh api \"repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/comments.json\n\n# Linked issues\njq -r '.body // \"\"' /tmp/pr-context/pr.json 2>/dev/null \\\n | grep -oiE '(fixes|closes|resolves)\\s+#[0-9]+' \\\n | grep -oE '[0-9]+$' \\\n | sort -u \\\n | while read -r issue; do\n gh api \"repos/$GITHUB_REPOSITORY/issues/$issue\" > \"/tmp/pr-context/issue-${issue}.json\" || true\n done || true\n\n# Write manifest\ncat > /tmp/pr-context/README.md << 'MANIFEST'\n# PR Context\n\nPre-fetched PR data. All files are in `/tmp/pr-context/`.\n\n| File | Description |\n| --- | --- |\n| `pr.json` | PR metadata — title, body, author, base/head branches, head commit SHA (`headRefOid`), URL |\n| `pr.diff` | Full unified diff of all changes |\n| `files.json` | Changed files array — each entry has `filename`, `status`, `additions`, `deletions`, `patch` |\n| `diffs/.diff` | Per-file diffs — one file per changed file, mirroring the repo path under `diffs/` |\n| `file_order_az.txt` | Changed files sorted alphabetically (A→Z), one filename per line |\n| `file_order_za.txt` | Changed files sorted reverse-alphabetically (Z→A), one filename per line |\n| `file_order_largest.txt` | Changed files sorted by diff size descending (largest first), one filename per line |\n| `reviews.json` | Prior review submissions — author, state (APPROVED/CHANGES_REQUESTED/COMMENTED), body |\n| `review_comments.json` | All review threads (GraphQL) — each thread has `id` (node ID for resolving), `isResolved`, `isOutdated`, `path`, `line`, and nested `comments` with `id`, `databaseId` (numeric REST ID for replies), body/author |\n| `unresolved_threads.json` | Unresolved review threads — subset of `review_comments.json` where `isResolved` is false |\n| `resolved_threads.json` | Resolved review threads — subset of `review_comments.json` where `isResolved` is true |\n| `outdated_threads.json` | Outdated review threads — subset of `review_comments.json` where `isOutdated` is true (code changed since comment) |\n| `threads/.json` | Per-file review threads — one file per changed file with existing threads, mirroring the repo path under `threads/` |\n| `comments.json` | PR discussion comments (not inline) |\n| `issue-{N}.json` | Linked issue details (one file per linked issue, if any) |\n| `pr-size.txt` | PR size metrics: `{N} files, {M} diff lines` — used by the agent to decide review fan-out |\n| `agents.md` | Repository conventions from `generate_agents_md` (if written by agent) |\n| `review-instructions.md` | Review instructions, criteria, and calibration examples (if written by review-process fragment) |\nMANIFEST\n\necho \"PR context written to /tmp/pr-context/\"\nls -la /tmp/pr-context/" - name: Write review instructions to disk run: "mkdir -p /tmp/pr-context\ncat > /tmp/pr-context/review-instructions.md << 'REVIEW_EOF'\n# Review Instructions for Sub-agents\n\nYou are a code review sub-agent. Read these instructions, then review the PR files in the order provided in your prompt.\n\n## Context\n\nBefore reviewing files, read these to understand the PR:\n\n1. `/tmp/pr-context/pr.json` — PR title, description, author, and branches. Understand what the PR is trying to accomplish.\n2. `/tmp/pr-context/agents.md` — Repository coding conventions and guidelines (if it exists).\n3. `/tmp/pr-context/review_comments.json` — Existing review threads. Note which files already have threads so you don't duplicate.\n4. `/tmp/pr-context/issue-*.json` — Linked issue details (if any). Understand the motivation and acceptance criteria.\n\n## Process\n\nReview the PR diff file by file in your assigned order. For each changed file:\n\n1. **Read the diff** for this file from `/tmp/pr-context/diffs/.diff` to understand what changed. If the diff is empty or truncated (e.g., binary files or very large changes), fall back to reading the full file from the workspace and comparing against context.\n2. **Read the full file from the workspace.** The PR branch is checked out locally — open the file directly to get complete contents with line numbers.\n3. **Check existing threads** for this file from `/tmp/pr-context/threads/.json` (if it exists). Skip issues that are already under discussion — each thread has `isResolved` and `isOutdated` fields.\n4. **Identify potential issues** matching the review criteria below.\n5. **Quick-check each issue** before including it:\n - What specific code pattern or change triggers this concern?\n - Is there an obvious guard, handler, or mitigation visible in the immediate context?\n - Can you describe a concrete failure scenario (the `evidence` field)? If you cannot articulate what specific input or state triggers the problem, drop the finding.\n - If the issue is clearly handled, skip it. If you're unsure, include it — the parent will verify.\n6. **Add to your findings list.** Do NOT leave inline comments — you don't have that tool. Return findings in this format:\n\n```\n- file: path/to/file\n line: 42\n severity: HIGH\n title: Brief title\n description: What the issue is and why it matters\n evidence: The specific code pattern and failure scenario\n suggestion: corrected code here (optional — only if you can provide a concrete fix)\n```\n\n**Review every file in your assigned order.** Files reviewed earlier get more attention, which is why different sub-agents use different orderings.\n\n**Check existing threads** — per-file threads are at `/tmp/pr-context/threads/.json` (step 3 above). The full list is at `/tmp/pr-context/review_comments.json`. Do not flag issues that are already under discussion (resolved or unresolved). For outdated threads, only re-flag if the issue still applies to the current diff.\n\n**Return your full findings list** when done, or an empty list if no issues were found.\n\n## Review Criteria\n\nFocus on these categories in priority order:\n\n1. Security vulnerabilities (injection, XSS, auth bypass, secrets exposure)\n2. Logic bugs that could cause runtime failures or incorrect behavior\n3. Data integrity issues (race conditions, missing transactions, corruption risk)\n4. Performance bottlenecks (N+1 queries, memory leaks, blocking operations)\n5. Error handling gaps (unhandled exceptions, missing validation)\n6. Breaking changes to public APIs without migration path\n7. Missing or incorrect test coverage for critical paths\n\n## What NOT to Flag\n\nOnly review the diff — do not flag issues in unchanged code, pre-existing problems not introduced by this PR, or style preferences handled by linters or formatters.\n\n**Common false positives** — these patterns look like issues but usually aren't. Before flagging anything in these categories, confirm the problem is real by reading the surrounding code:\n\n- **Security — input already sanitized:** Don't flag injection or XSS risks when inputs are sanitized upstream, parameterized queries are used, or the framework auto-escapes output.\n- **Null/undefined — guarded elsewhere:** Don't flag potential null dereferences if the value is guaranteed by a type guard, assertion, schema validation, or upstream null check.\n- **Error handling — handled at a different layer:** Don't flag missing try/catch if the caller, middleware, or framework catches and handles the error (e.g., Express error middleware, React error boundaries).\n- **Performance — theoretical, not practical:** Don't flag algorithmic complexity (e.g., O(n^2)) unless N is demonstrably large enough to matter in the actual usage context. \"This could be slow\" without evidence is not actionable.\n- **Validation — exists at another layer:** Don't flag missing input validation if it's handled by an API gateway, middleware, schema validator, or type system.\n- **Test coverage — trivial or generated code:** Don't flag missing tests for trivial getters/setters, auto-generated code, or simple delegation methods.\n- **Style or naming — not in coding guidelines:** Don't flag naming conventions or code style unless they violate the repository's documented coding guidelines (from `generate_agents_md` or CONTRIBUTING docs).\n\n**Existing review threads** — check BEFORE flagging any issue:\n\n- **Resolved with reviewer reply** (e.g. \"This is intentional\") — reviewer's decision is final. Do NOT re-flag.\n- **Resolved without reply** — author likely fixed it. Do NOT re-raise unless the fix introduced a new problem.\n- **Unresolved** — already flagged. Do NOT duplicate.\n- **Outdated** — only re-flag if the issue still applies to the current diff.\n\nWhen in doubt, do not duplicate. Redundant comments erode trust.\n\nFinding no issues is a valid and valuable outcome. An empty findings list is better than findings that waste the author's time or erode trust. Do not manufacture findings to justify your review — if the code is sound, return an empty list.\n\n## Severity Classification\n\nDetermine severity AFTER investigating the issue, not before. First identify the problem and trace through the code, then assign a severity based on the evidence you found.\n\n- 🔴 CRITICAL — Must fix before merge (security vulnerabilities, data corruption, production-breaking bugs)\n- 🟠 HIGH — Should fix before merge (logic errors, missing validation, significant performance issues)\n- 🟡 MEDIUM — Address soon, non-blocking (error handling gaps, suboptimal patterns, missing edge cases)\n- ⚪ LOW — Author discretion (minor improvements, documentation gaps)\n- 💬 NITPICK — Truly optional (stylistic preferences, alternative approaches)\n\n## Review Intensity\n\nThe review intensity is `${{ inputs.intensity || 'balanced' }}`.\n\n- **conservative**: High evidence bar. Only flag when you can demonstrate a concrete failure scenario. If you can construct a reasonable counterargument, do not flag. Approval with zero findings is the expected outcome for most PRs.\n- **balanced**: Standard evidence bar. Flag when you can point to specific code that would fail. If the issue is ambiguous, lean toward not flagging.\n- **aggressive**: Lower evidence bar. Flag when evidence exists even if the failure scenario is not fully confirmed. Improvement suggestions welcome but must cite specific code.\n\n## Calibration Examples\n\nUse these examples to calibrate your judgment. Each pair shows a real issue and a similar-looking pattern that is NOT an issue.\n\n### Example 1: Null/Undefined Access\n\n**True positive — flag this:**\n\n```js\n// PR adds this handler\napp.get('/user/:id', async (req, res) => {\n const user = await db.findUser(req.params.id);\n res.json({ name: user.name, email: user.email });\n});\n```\n\nWhy flag: `db.findUser()` can return `null` when no user matches the ID. Accessing `user.name` will throw a TypeError at runtime. No upstream guard exists — the route handler is the entry point.\n\n**False positive — do NOT flag this:**\n\n```ts\n// PR adds this line inside an existing function\nconst settings = user.getSettings();\n```\n\nWhy skip: Reading the full file reveals `user` is typed as `User` (not `User | null`), and the calling function only runs after `authenticateUser()` middleware which guarantees a valid user object. The null case is handled at a different layer.\n\n### Example 2: SQL Injection\n\n**True positive — flag this:**\n\n```python\n# PR adds this query\ncursor.execute(f\"SELECT * FROM orders WHERE customer_id = '{customer_id}'\")\n```\n\nWhy flag: String interpolation in a SQL query with user-controlled input (`customer_id` comes from the request). No parameterization or sanitization anywhere in the call chain.\n\n**False positive — do NOT flag this:**\n\n```python\n# PR adds this query\ncursor.execute(f\"SELECT * FROM orders WHERE status = '{OrderStatus.PENDING.value}'\")\n```\n\nWhy skip: The interpolated value is a hardcoded enum constant (`OrderStatus.PENDING`), not user input. There is no injection vector.\n\n### Example 3: Borderline — Do NOT Flag\n\n```go\n// PR adds this function\nfunc processItems(items []Item) []Result {\n results := make([]Result, 0)\n for _, item := range items {\n for _, tag := range item.Tags {\n results = append(results, process(item, tag))\n }\n }\n return results\n}\n```\n\nThis looks like an O(n*m) performance concern. But without evidence that `items` or `Tags` are large in practice, this is speculative. The function processes a bounded dataset (items from a single user request). Do not flag theoretical performance issues without evidence of real-world impact.\nREVIEW_EOF" - env: diff --git a/.github/workflows/gh-aw-mention-in-pr.lock.yml b/.github/workflows/gh-aw-mention-in-pr.lock.yml index d951d8db..d113bcfd 100644 --- a/.github/workflows/gh-aw-mention-in-pr.lock.yml +++ b/.github/workflows/gh-aw-mention-in-pr.lock.yml @@ -44,7 +44,7 @@ # # inlined-imports: true # -# gh-aw-metadata: {"schema_version":"v1","frontmatter_hash":"95302ccffa65ba0db480f26ae1e7941b73d43d8850ed1f043520bf4518c6c6b8"} +# gh-aw-metadata: {"schema_version":"v1","frontmatter_hash":"1dae2e9dacf4d3c04ba650954917689968d34fcca61df10c73c48d7f8524a8b8"} name: "Mention in PR" "on": @@ -784,7 +784,7 @@ jobs: GH_TOKEN: ${{ github.token }} PR_NUMBER: ${{ github.event.pull_request.number || inputs.target-pr-number || github.event.issue.number }} name: Fetch PR context to disk - run: "set -euo pipefail\nmkdir -p /tmp/pr-context\n\n# PR metadata\ngh pr view \"$PR_NUMBER\" --json title,body,author,baseRefName,headRefName,headRefOid,url \\\n > /tmp/pr-context/pr.json\n\n# Full diff\nif ! gh pr diff \"$PR_NUMBER\" > /tmp/pr-context/pr.diff; then\n echo \"::warning::Failed to fetch full PR diff; per-file diffs from files.json are still available.\"\n : > /tmp/pr-context/pr.diff\nfi\n\n# Changed files list (--paginate may output concatenated arrays; jq -s 'add' merges them)\ngh api \"repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/files\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/files.json\n\n# Per-file diffs\njq -c '.[]' /tmp/pr-context/files.json | while IFS= read -r entry; do\n filename=$(echo \"$entry\" | jq -r '.filename')\n mkdir -p \"/tmp/pr-context/diffs/$(dirname \"$filename\")\"\n echo \"$entry\" | jq -r '.patch // empty' > \"/tmp/pr-context/diffs/${filename}.diff\"\ndone\n\n# File orderings for sub-agent review (3 strategies)\njq -r '[.[] | .filename] | sort | .[]' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_az.txt\njq -r '[.[] | .filename] | sort | reverse | .[]' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_za.txt\njq -r '[.[] | {filename, size: ((.additions // 0) + (.deletions // 0))}] | sort_by(-.size) | .[].filename' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_largest.txt\n\n# Existing reviews\ngh api \"repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/reviews\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/reviews.json\n\n# Review threads with resolution status (GraphQL — REST lacks isResolved/isOutdated)\ngh api graphql --paginate -f query='\n query($owner: String!, $repo: String!, $number: Int!, $endCursor: String) {\n repository(owner: $owner, name: $repo) {\n pullRequest(number: $number) {\n reviewThreads(first: 100, after: $endCursor) {\n pageInfo { hasNextPage endCursor }\n nodes {\n id\n isResolved\n isOutdated\n isCollapsed\n path\n line\n startLine\n comments(first: 100) {\n nodes {\n id\n databaseId\n body\n author { login }\n createdAt\n }\n }\n }\n }\n }\n }\n }\n' -F owner=\"${GITHUB_REPOSITORY%/*}\" -F repo=\"${GITHUB_REPOSITORY#*/}\" -F \"number=$PR_NUMBER\" \\\n --jq '.data.repository.pullRequest.reviewThreads.nodes' \\\n | jq -s 'add // []' > /tmp/pr-context/review_comments.json\n\n# Filtered review thread views (pre-computed so agents don't need to parse review_comments.json)\njq '[.[] | select(.isResolved == false)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/unresolved_threads.json\njq '[.[] | select(.isResolved == true)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/resolved_threads.json\njq '[.[] | select(.isOutdated == true)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/outdated_threads.json\n\n# Per-file review threads (mirrors diffs/ structure)\njq -c '.[]' /tmp/pr-context/review_comments.json | while IFS= read -r thread; do\n filepath=$(echo \"$thread\" | jq -r '.path // empty')\n [ -z \"$filepath\" ] && continue\n mkdir -p \"/tmp/pr-context/threads/$(dirname \"$filepath\")\"\n echo \"$thread\" >> \"/tmp/pr-context/threads/${filepath}.jsonl\"\ndone\n# Convert per-file JSONL to proper JSON arrays\nmkdir -p /tmp/pr-context/threads\nfind /tmp/pr-context/threads -name '*.jsonl' 2>/dev/null | while IFS= read -r jsonl; do\n jq -s '.' \"$jsonl\" > \"${jsonl%.jsonl}.json\"\n rm \"$jsonl\"\ndone\n\n# PR discussion comments\ngh api \"repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/comments.json\n\n# Linked issues\njq -r '.body // \"\"' /tmp/pr-context/pr.json 2>/dev/null \\\n | grep -oiE '(fixes|closes|resolves)\\s+#[0-9]+' \\\n | grep -oE '[0-9]+$' \\\n | sort -u \\\n | while read -r issue; do\n gh api \"repos/$GITHUB_REPOSITORY/issues/$issue\" > \"/tmp/pr-context/issue-${issue}.json\" || true\n done || true\n\n# Write manifest\ncat > /tmp/pr-context/README.md << 'MANIFEST'\n# PR Context\n\nPre-fetched PR data. All files are in `/tmp/pr-context/`.\n\n| File | Description |\n| --- | --- |\n| `pr.json` | PR metadata — title, body, author, base/head branches, head commit SHA (`headRefOid`), URL |\n| `pr.diff` | Full unified diff of all changes |\n| `files.json` | Changed files array — each entry has `filename`, `status`, `additions`, `deletions`, `patch` |\n| `diffs/.diff` | Per-file diffs — one file per changed file, mirroring the repo path under `diffs/` |\n| `file_order_az.txt` | Changed files sorted alphabetically (A→Z), one filename per line |\n| `file_order_za.txt` | Changed files sorted reverse-alphabetically (Z→A), one filename per line |\n| `file_order_largest.txt` | Changed files sorted by diff size descending (largest first), one filename per line |\n| `reviews.json` | Prior review submissions — author, state (APPROVED/CHANGES_REQUESTED/COMMENTED), body |\n| `review_comments.json` | All review threads (GraphQL) — each thread has `id` (node ID for resolving), `isResolved`, `isOutdated`, `path`, `line`, and nested `comments` with `id`, `databaseId` (numeric REST ID for replies), body/author |\n| `unresolved_threads.json` | Unresolved review threads — subset of `review_comments.json` where `isResolved` is false |\n| `resolved_threads.json` | Resolved review threads — subset of `review_comments.json` where `isResolved` is true |\n| `outdated_threads.json` | Outdated review threads — subset of `review_comments.json` where `isOutdated` is true (code changed since comment) |\n| `threads/.json` | Per-file review threads — one file per changed file with existing threads, mirroring the repo path under `threads/` |\n| `comments.json` | PR discussion comments (not inline) |\n| `issue-{N}.json` | Linked issue details (one file per linked issue, if any) |\n| `agents.md` | Repository conventions from `generate_agents_md` (if written by agent) |\n| `review-instructions.md` | Review instructions, criteria, and calibration examples (if written by review-process fragment) |\nMANIFEST\n\necho \"PR context written to /tmp/pr-context/\"\nls -la /tmp/pr-context/" + run: "set -euo pipefail\nmkdir -p /tmp/pr-context\n\n# PR metadata\ngh pr view \"$PR_NUMBER\" --json title,body,author,baseRefName,headRefName,headRefOid,url \\\n > /tmp/pr-context/pr.json\n\n# Full diff\nif ! gh pr diff \"$PR_NUMBER\" > /tmp/pr-context/pr.diff; then\n echo \"::warning::Failed to fetch full PR diff; per-file diffs from files.json are still available.\"\n : > /tmp/pr-context/pr.diff\nfi\n\n# Changed files list (--paginate may output concatenated arrays; jq -s 'add' merges them)\ngh api \"repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/files\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/files.json\n\n# Per-file diffs\njq -c '.[]' /tmp/pr-context/files.json | while IFS= read -r entry; do\n filename=$(echo \"$entry\" | jq -r '.filename')\n mkdir -p \"/tmp/pr-context/diffs/$(dirname \"$filename\")\"\n echo \"$entry\" | jq -r '.patch // empty' > \"/tmp/pr-context/diffs/${filename}.diff\"\ndone\n\n# File orderings for sub-agent review (3 strategies)\njq -r '[.[] | .filename] | sort | .[]' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_az.txt\njq -r '[.[] | .filename] | sort | reverse | .[]' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_za.txt\njq -r '[.[] | {filename, size: ((.additions // 0) + (.deletions // 0))}] | sort_by(-.size) | .[].filename' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_largest.txt\n\n# Compute PR size metrics for review fan-out decisions\nFILE_COUNT=$(jq 'length' /tmp/pr-context/files.json)\nDIFF_LINES=$(wc -l < /tmp/pr-context/pr.diff | tr -d ' ')\necho \"${FILE_COUNT} files, ${DIFF_LINES} diff lines\" > /tmp/pr-context/pr-size.txt\necho \"PR size: ${FILE_COUNT} files, ${DIFF_LINES} diff lines\"\n\n# Existing reviews\ngh api \"repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/reviews\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/reviews.json\n\n# Review threads with resolution status (GraphQL — REST lacks isResolved/isOutdated)\ngh api graphql --paginate -f query='\n query($owner: String!, $repo: String!, $number: Int!, $endCursor: String) {\n repository(owner: $owner, name: $repo) {\n pullRequest(number: $number) {\n reviewThreads(first: 100, after: $endCursor) {\n pageInfo { hasNextPage endCursor }\n nodes {\n id\n isResolved\n isOutdated\n isCollapsed\n path\n line\n startLine\n comments(first: 100) {\n nodes {\n id\n databaseId\n body\n author { login }\n createdAt\n }\n }\n }\n }\n }\n }\n }\n' -F owner=\"${GITHUB_REPOSITORY%/*}\" -F repo=\"${GITHUB_REPOSITORY#*/}\" -F \"number=$PR_NUMBER\" \\\n --jq '.data.repository.pullRequest.reviewThreads.nodes' \\\n | jq -s 'add // []' > /tmp/pr-context/review_comments.json\n\n# Filtered review thread views (pre-computed so agents don't need to parse review_comments.json)\njq '[.[] | select(.isResolved == false)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/unresolved_threads.json\njq '[.[] | select(.isResolved == true)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/resolved_threads.json\njq '[.[] | select(.isOutdated == true)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/outdated_threads.json\n\n# Per-file review threads (mirrors diffs/ structure)\njq -c '.[]' /tmp/pr-context/review_comments.json | while IFS= read -r thread; do\n filepath=$(echo \"$thread\" | jq -r '.path // empty')\n [ -z \"$filepath\" ] && continue\n mkdir -p \"/tmp/pr-context/threads/$(dirname \"$filepath\")\"\n echo \"$thread\" >> \"/tmp/pr-context/threads/${filepath}.jsonl\"\ndone\n# Convert per-file JSONL to proper JSON arrays\nmkdir -p /tmp/pr-context/threads\nfind /tmp/pr-context/threads -name '*.jsonl' 2>/dev/null | while IFS= read -r jsonl; do\n jq -s '.' \"$jsonl\" > \"${jsonl%.jsonl}.json\"\n rm \"$jsonl\"\ndone\n\n# PR discussion comments\ngh api \"repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/comments.json\n\n# Linked issues\njq -r '.body // \"\"' /tmp/pr-context/pr.json 2>/dev/null \\\n | grep -oiE '(fixes|closes|resolves)\\s+#[0-9]+' \\\n | grep -oE '[0-9]+$' \\\n | sort -u \\\n | while read -r issue; do\n gh api \"repos/$GITHUB_REPOSITORY/issues/$issue\" > \"/tmp/pr-context/issue-${issue}.json\" || true\n done || true\n\n# Write manifest\ncat > /tmp/pr-context/README.md << 'MANIFEST'\n# PR Context\n\nPre-fetched PR data. All files are in `/tmp/pr-context/`.\n\n| File | Description |\n| --- | --- |\n| `pr.json` | PR metadata — title, body, author, base/head branches, head commit SHA (`headRefOid`), URL |\n| `pr.diff` | Full unified diff of all changes |\n| `files.json` | Changed files array — each entry has `filename`, `status`, `additions`, `deletions`, `patch` |\n| `diffs/.diff` | Per-file diffs — one file per changed file, mirroring the repo path under `diffs/` |\n| `file_order_az.txt` | Changed files sorted alphabetically (A→Z), one filename per line |\n| `file_order_za.txt` | Changed files sorted reverse-alphabetically (Z→A), one filename per line |\n| `file_order_largest.txt` | Changed files sorted by diff size descending (largest first), one filename per line |\n| `reviews.json` | Prior review submissions — author, state (APPROVED/CHANGES_REQUESTED/COMMENTED), body |\n| `review_comments.json` | All review threads (GraphQL) — each thread has `id` (node ID for resolving), `isResolved`, `isOutdated`, `path`, `line`, and nested `comments` with `id`, `databaseId` (numeric REST ID for replies), body/author |\n| `unresolved_threads.json` | Unresolved review threads — subset of `review_comments.json` where `isResolved` is false |\n| `resolved_threads.json` | Resolved review threads — subset of `review_comments.json` where `isResolved` is true |\n| `outdated_threads.json` | Outdated review threads — subset of `review_comments.json` where `isOutdated` is true (code changed since comment) |\n| `threads/.json` | Per-file review threads — one file per changed file with existing threads, mirroring the repo path under `threads/` |\n| `comments.json` | PR discussion comments (not inline) |\n| `issue-{N}.json` | Linked issue details (one file per linked issue, if any) |\n| `pr-size.txt` | PR size metrics: `{N} files, {M} diff lines` — used by the agent to decide review fan-out |\n| `agents.md` | Repository conventions from `generate_agents_md` (if written by agent) |\n| `review-instructions.md` | Review instructions, criteria, and calibration examples (if written by review-process fragment) |\nMANIFEST\n\necho \"PR context written to /tmp/pr-context/\"\nls -la /tmp/pr-context/" - name: Write review instructions to disk run: "mkdir -p /tmp/pr-context\ncat > /tmp/pr-context/review-instructions.md << 'REVIEW_EOF'\n# Review Instructions for Sub-agents\n\nYou are a code review sub-agent. Read these instructions, then review the PR files in the order provided in your prompt.\n\n## Context\n\nBefore reviewing files, read these to understand the PR:\n\n1. `/tmp/pr-context/pr.json` — PR title, description, author, and branches. Understand what the PR is trying to accomplish.\n2. `/tmp/pr-context/agents.md` — Repository coding conventions and guidelines (if it exists).\n3. `/tmp/pr-context/review_comments.json` — Existing review threads. Note which files already have threads so you don't duplicate.\n4. `/tmp/pr-context/issue-*.json` — Linked issue details (if any). Understand the motivation and acceptance criteria.\n\n## Process\n\nReview the PR diff file by file in your assigned order. For each changed file:\n\n1. **Read the diff** for this file from `/tmp/pr-context/diffs/.diff` to understand what changed. If the diff is empty or truncated (e.g., binary files or very large changes), fall back to reading the full file from the workspace and comparing against context.\n2. **Read the full file from the workspace.** The PR branch is checked out locally — open the file directly to get complete contents with line numbers.\n3. **Check existing threads** for this file from `/tmp/pr-context/threads/.json` (if it exists). Skip issues that are already under discussion — each thread has `isResolved` and `isOutdated` fields.\n4. **Identify potential issues** matching the review criteria below.\n5. **Quick-check each issue** before including it:\n - What specific code pattern or change triggers this concern?\n - Is there an obvious guard, handler, or mitigation visible in the immediate context?\n - Can you describe a concrete failure scenario (the `evidence` field)? If you cannot articulate what specific input or state triggers the problem, drop the finding.\n - If the issue is clearly handled, skip it. If you're unsure, include it — the parent will verify.\n6. **Add to your findings list.** Do NOT leave inline comments — you don't have that tool. Return findings in this format:\n\n```\n- file: path/to/file\n line: 42\n severity: HIGH\n title: Brief title\n description: What the issue is and why it matters\n evidence: The specific code pattern and failure scenario\n suggestion: corrected code here (optional — only if you can provide a concrete fix)\n```\n\n**Review every file in your assigned order.** Files reviewed earlier get more attention, which is why different sub-agents use different orderings.\n\n**Check existing threads** — per-file threads are at `/tmp/pr-context/threads/.json` (step 3 above). The full list is at `/tmp/pr-context/review_comments.json`. Do not flag issues that are already under discussion (resolved or unresolved). For outdated threads, only re-flag if the issue still applies to the current diff.\n\n**Return your full findings list** when done, or an empty list if no issues were found.\n\n## Review Criteria\n\nFocus on these categories in priority order:\n\n1. Security vulnerabilities (injection, XSS, auth bypass, secrets exposure)\n2. Logic bugs that could cause runtime failures or incorrect behavior\n3. Data integrity issues (race conditions, missing transactions, corruption risk)\n4. Performance bottlenecks (N+1 queries, memory leaks, blocking operations)\n5. Error handling gaps (unhandled exceptions, missing validation)\n6. Breaking changes to public APIs without migration path\n7. Missing or incorrect test coverage for critical paths\n\n## What NOT to Flag\n\nOnly review the diff — do not flag issues in unchanged code, pre-existing problems not introduced by this PR, or style preferences handled by linters or formatters.\n\n**Common false positives** — these patterns look like issues but usually aren't. Before flagging anything in these categories, confirm the problem is real by reading the surrounding code:\n\n- **Security — input already sanitized:** Don't flag injection or XSS risks when inputs are sanitized upstream, parameterized queries are used, or the framework auto-escapes output.\n- **Null/undefined — guarded elsewhere:** Don't flag potential null dereferences if the value is guaranteed by a type guard, assertion, schema validation, or upstream null check.\n- **Error handling — handled at a different layer:** Don't flag missing try/catch if the caller, middleware, or framework catches and handles the error (e.g., Express error middleware, React error boundaries).\n- **Performance — theoretical, not practical:** Don't flag algorithmic complexity (e.g., O(n^2)) unless N is demonstrably large enough to matter in the actual usage context. \"This could be slow\" without evidence is not actionable.\n- **Validation — exists at another layer:** Don't flag missing input validation if it's handled by an API gateway, middleware, schema validator, or type system.\n- **Test coverage — trivial or generated code:** Don't flag missing tests for trivial getters/setters, auto-generated code, or simple delegation methods.\n- **Style or naming — not in coding guidelines:** Don't flag naming conventions or code style unless they violate the repository's documented coding guidelines (from `generate_agents_md` or CONTRIBUTING docs).\n\n**Existing review threads** — check BEFORE flagging any issue:\n\n- **Resolved with reviewer reply** (e.g. \"This is intentional\") — reviewer's decision is final. Do NOT re-flag.\n- **Resolved without reply** — author likely fixed it. Do NOT re-raise unless the fix introduced a new problem.\n- **Unresolved** — already flagged. Do NOT duplicate.\n- **Outdated** — only re-flag if the issue still applies to the current diff.\n\nWhen in doubt, do not duplicate. Redundant comments erode trust.\n\nFinding no issues is a valid and valuable outcome. An empty findings list is better than findings that waste the author's time or erode trust. Do not manufacture findings to justify your review — if the code is sound, return an empty list.\n\n## Severity Classification\n\nDetermine severity AFTER investigating the issue, not before. First identify the problem and trace through the code, then assign a severity based on the evidence you found.\n\n- 🔴 CRITICAL — Must fix before merge (security vulnerabilities, data corruption, production-breaking bugs)\n- 🟠 HIGH — Should fix before merge (logic errors, missing validation, significant performance issues)\n- 🟡 MEDIUM — Address soon, non-blocking (error handling gaps, suboptimal patterns, missing edge cases)\n- ⚪ LOW — Author discretion (minor improvements, documentation gaps)\n- 💬 NITPICK — Truly optional (stylistic preferences, alternative approaches)\n\n## Review Intensity\n\nThe review intensity is `${{ inputs.intensity || 'balanced' }}`.\n\n- **conservative**: High evidence bar. Only flag when you can demonstrate a concrete failure scenario. If you can construct a reasonable counterargument, do not flag. Approval with zero findings is the expected outcome for most PRs.\n- **balanced**: Standard evidence bar. Flag when you can point to specific code that would fail. If the issue is ambiguous, lean toward not flagging.\n- **aggressive**: Lower evidence bar. Flag when evidence exists even if the failure scenario is not fully confirmed. Improvement suggestions welcome but must cite specific code.\n\n## Calibration Examples\n\nUse these examples to calibrate your judgment. Each pair shows a real issue and a similar-looking pattern that is NOT an issue.\n\n### Example 1: Null/Undefined Access\n\n**True positive — flag this:**\n\n```js\n// PR adds this handler\napp.get('/user/:id', async (req, res) => {\n const user = await db.findUser(req.params.id);\n res.json({ name: user.name, email: user.email });\n});\n```\n\nWhy flag: `db.findUser()` can return `null` when no user matches the ID. Accessing `user.name` will throw a TypeError at runtime. No upstream guard exists — the route handler is the entry point.\n\n**False positive — do NOT flag this:**\n\n```ts\n// PR adds this line inside an existing function\nconst settings = user.getSettings();\n```\n\nWhy skip: Reading the full file reveals `user` is typed as `User` (not `User | null`), and the calling function only runs after `authenticateUser()` middleware which guarantees a valid user object. The null case is handled at a different layer.\n\n### Example 2: SQL Injection\n\n**True positive — flag this:**\n\n```python\n# PR adds this query\ncursor.execute(f\"SELECT * FROM orders WHERE customer_id = '{customer_id}'\")\n```\n\nWhy flag: String interpolation in a SQL query with user-controlled input (`customer_id` comes from the request). No parameterization or sanitization anywhere in the call chain.\n\n**False positive — do NOT flag this:**\n\n```python\n# PR adds this query\ncursor.execute(f\"SELECT * FROM orders WHERE status = '{OrderStatus.PENDING.value}'\")\n```\n\nWhy skip: The interpolated value is a hardcoded enum constant (`OrderStatus.PENDING`), not user input. There is no injection vector.\n\n### Example 3: Borderline — Do NOT Flag\n\n```go\n// PR adds this function\nfunc processItems(items []Item) []Result {\n results := make([]Result, 0)\n for _, item := range items {\n for _, tag := range item.Tags {\n results = append(results, process(item, tag))\n }\n }\n return results\n}\n```\n\nThis looks like an O(n*m) performance concern. But without evidence that `items` or `Tags` are large in practice, this is speculative. The function processes a bounded dataset (items from a single user request). Do not flag theoretical performance issues without evidence of real-world impact.\nREVIEW_EOF" - env: diff --git a/.github/workflows/gh-aw-pr-review-addresser.lock.yml b/.github/workflows/gh-aw-pr-review-addresser.lock.yml index 986c0337..aee86115 100644 --- a/.github/workflows/gh-aw-pr-review-addresser.lock.yml +++ b/.github/workflows/gh-aw-pr-review-addresser.lock.yml @@ -40,7 +40,7 @@ # # inlined-imports: true # -# gh-aw-metadata: {"schema_version":"v1","frontmatter_hash":"09978e81da842ac2a86b1f66b69cfe315f50f46d6da032b6eb857cae510f3d3b"} +# gh-aw-metadata: {"schema_version":"v1","frontmatter_hash":"54ab6ea22d092d1d705183e80677330ce4568b345f16a555867f04f55a7e286f"} name: "PR Review Addresser" "on": @@ -599,7 +599,7 @@ jobs: GH_TOKEN: ${{ github.token }} PR_NUMBER: ${{ github.event.pull_request.number || inputs.target-pr-number || github.event.issue.number }} name: Fetch PR context to disk - run: "set -euo pipefail\nmkdir -p /tmp/pr-context\n\n# PR metadata\ngh pr view \"$PR_NUMBER\" --json title,body,author,baseRefName,headRefName,headRefOid,url \\\n > /tmp/pr-context/pr.json\n\n# Full diff\nif ! gh pr diff \"$PR_NUMBER\" > /tmp/pr-context/pr.diff; then\n echo \"::warning::Failed to fetch full PR diff; per-file diffs from files.json are still available.\"\n : > /tmp/pr-context/pr.diff\nfi\n\n# Changed files list (--paginate may output concatenated arrays; jq -s 'add' merges them)\ngh api \"repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/files\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/files.json\n\n# Per-file diffs\njq -c '.[]' /tmp/pr-context/files.json | while IFS= read -r entry; do\n filename=$(echo \"$entry\" | jq -r '.filename')\n mkdir -p \"/tmp/pr-context/diffs/$(dirname \"$filename\")\"\n echo \"$entry\" | jq -r '.patch // empty' > \"/tmp/pr-context/diffs/${filename}.diff\"\ndone\n\n# File orderings for sub-agent review (3 strategies)\njq -r '[.[] | .filename] | sort | .[]' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_az.txt\njq -r '[.[] | .filename] | sort | reverse | .[]' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_za.txt\njq -r '[.[] | {filename, size: ((.additions // 0) + (.deletions // 0))}] | sort_by(-.size) | .[].filename' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_largest.txt\n\n# Existing reviews\ngh api \"repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/reviews\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/reviews.json\n\n# Review threads with resolution status (GraphQL — REST lacks isResolved/isOutdated)\ngh api graphql --paginate -f query='\n query($owner: String!, $repo: String!, $number: Int!, $endCursor: String) {\n repository(owner: $owner, name: $repo) {\n pullRequest(number: $number) {\n reviewThreads(first: 100, after: $endCursor) {\n pageInfo { hasNextPage endCursor }\n nodes {\n id\n isResolved\n isOutdated\n isCollapsed\n path\n line\n startLine\n comments(first: 100) {\n nodes {\n id\n databaseId\n body\n author { login }\n createdAt\n }\n }\n }\n }\n }\n }\n }\n' -F owner=\"${GITHUB_REPOSITORY%/*}\" -F repo=\"${GITHUB_REPOSITORY#*/}\" -F \"number=$PR_NUMBER\" \\\n --jq '.data.repository.pullRequest.reviewThreads.nodes' \\\n | jq -s 'add // []' > /tmp/pr-context/review_comments.json\n\n# Filtered review thread views (pre-computed so agents don't need to parse review_comments.json)\njq '[.[] | select(.isResolved == false)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/unresolved_threads.json\njq '[.[] | select(.isResolved == true)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/resolved_threads.json\njq '[.[] | select(.isOutdated == true)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/outdated_threads.json\n\n# Per-file review threads (mirrors diffs/ structure)\njq -c '.[]' /tmp/pr-context/review_comments.json | while IFS= read -r thread; do\n filepath=$(echo \"$thread\" | jq -r '.path // empty')\n [ -z \"$filepath\" ] && continue\n mkdir -p \"/tmp/pr-context/threads/$(dirname \"$filepath\")\"\n echo \"$thread\" >> \"/tmp/pr-context/threads/${filepath}.jsonl\"\ndone\n# Convert per-file JSONL to proper JSON arrays\nmkdir -p /tmp/pr-context/threads\nfind /tmp/pr-context/threads -name '*.jsonl' 2>/dev/null | while IFS= read -r jsonl; do\n jq -s '.' \"$jsonl\" > \"${jsonl%.jsonl}.json\"\n rm \"$jsonl\"\ndone\n\n# PR discussion comments\ngh api \"repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/comments.json\n\n# Linked issues\njq -r '.body // \"\"' /tmp/pr-context/pr.json 2>/dev/null \\\n | grep -oiE '(fixes|closes|resolves)\\s+#[0-9]+' \\\n | grep -oE '[0-9]+$' \\\n | sort -u \\\n | while read -r issue; do\n gh api \"repos/$GITHUB_REPOSITORY/issues/$issue\" > \"/tmp/pr-context/issue-${issue}.json\" || true\n done || true\n\n# Write manifest\ncat > /tmp/pr-context/README.md << 'MANIFEST'\n# PR Context\n\nPre-fetched PR data. All files are in `/tmp/pr-context/`.\n\n| File | Description |\n| --- | --- |\n| `pr.json` | PR metadata — title, body, author, base/head branches, head commit SHA (`headRefOid`), URL |\n| `pr.diff` | Full unified diff of all changes |\n| `files.json` | Changed files array — each entry has `filename`, `status`, `additions`, `deletions`, `patch` |\n| `diffs/.diff` | Per-file diffs — one file per changed file, mirroring the repo path under `diffs/` |\n| `file_order_az.txt` | Changed files sorted alphabetically (A→Z), one filename per line |\n| `file_order_za.txt` | Changed files sorted reverse-alphabetically (Z→A), one filename per line |\n| `file_order_largest.txt` | Changed files sorted by diff size descending (largest first), one filename per line |\n| `reviews.json` | Prior review submissions — author, state (APPROVED/CHANGES_REQUESTED/COMMENTED), body |\n| `review_comments.json` | All review threads (GraphQL) — each thread has `id` (node ID for resolving), `isResolved`, `isOutdated`, `path`, `line`, and nested `comments` with `id`, `databaseId` (numeric REST ID for replies), body/author |\n| `unresolved_threads.json` | Unresolved review threads — subset of `review_comments.json` where `isResolved` is false |\n| `resolved_threads.json` | Resolved review threads — subset of `review_comments.json` where `isResolved` is true |\n| `outdated_threads.json` | Outdated review threads — subset of `review_comments.json` where `isOutdated` is true (code changed since comment) |\n| `threads/.json` | Per-file review threads — one file per changed file with existing threads, mirroring the repo path under `threads/` |\n| `comments.json` | PR discussion comments (not inline) |\n| `issue-{N}.json` | Linked issue details (one file per linked issue, if any) |\n| `agents.md` | Repository conventions from `generate_agents_md` (if written by agent) |\n| `review-instructions.md` | Review instructions, criteria, and calibration examples (if written by review-process fragment) |\nMANIFEST\n\necho \"PR context written to /tmp/pr-context/\"\nls -la /tmp/pr-context/" + run: "set -euo pipefail\nmkdir -p /tmp/pr-context\n\n# PR metadata\ngh pr view \"$PR_NUMBER\" --json title,body,author,baseRefName,headRefName,headRefOid,url \\\n > /tmp/pr-context/pr.json\n\n# Full diff\nif ! gh pr diff \"$PR_NUMBER\" > /tmp/pr-context/pr.diff; then\n echo \"::warning::Failed to fetch full PR diff; per-file diffs from files.json are still available.\"\n : > /tmp/pr-context/pr.diff\nfi\n\n# Changed files list (--paginate may output concatenated arrays; jq -s 'add' merges them)\ngh api \"repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/files\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/files.json\n\n# Per-file diffs\njq -c '.[]' /tmp/pr-context/files.json | while IFS= read -r entry; do\n filename=$(echo \"$entry\" | jq -r '.filename')\n mkdir -p \"/tmp/pr-context/diffs/$(dirname \"$filename\")\"\n echo \"$entry\" | jq -r '.patch // empty' > \"/tmp/pr-context/diffs/${filename}.diff\"\ndone\n\n# File orderings for sub-agent review (3 strategies)\njq -r '[.[] | .filename] | sort | .[]' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_az.txt\njq -r '[.[] | .filename] | sort | reverse | .[]' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_za.txt\njq -r '[.[] | {filename, size: ((.additions // 0) + (.deletions // 0))}] | sort_by(-.size) | .[].filename' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_largest.txt\n\n# Compute PR size metrics for review fan-out decisions\nFILE_COUNT=$(jq 'length' /tmp/pr-context/files.json)\nDIFF_LINES=$(wc -l < /tmp/pr-context/pr.diff | tr -d ' ')\necho \"${FILE_COUNT} files, ${DIFF_LINES} diff lines\" > /tmp/pr-context/pr-size.txt\necho \"PR size: ${FILE_COUNT} files, ${DIFF_LINES} diff lines\"\n\n# Existing reviews\ngh api \"repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/reviews\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/reviews.json\n\n# Review threads with resolution status (GraphQL — REST lacks isResolved/isOutdated)\ngh api graphql --paginate -f query='\n query($owner: String!, $repo: String!, $number: Int!, $endCursor: String) {\n repository(owner: $owner, name: $repo) {\n pullRequest(number: $number) {\n reviewThreads(first: 100, after: $endCursor) {\n pageInfo { hasNextPage endCursor }\n nodes {\n id\n isResolved\n isOutdated\n isCollapsed\n path\n line\n startLine\n comments(first: 100) {\n nodes {\n id\n databaseId\n body\n author { login }\n createdAt\n }\n }\n }\n }\n }\n }\n }\n' -F owner=\"${GITHUB_REPOSITORY%/*}\" -F repo=\"${GITHUB_REPOSITORY#*/}\" -F \"number=$PR_NUMBER\" \\\n --jq '.data.repository.pullRequest.reviewThreads.nodes' \\\n | jq -s 'add // []' > /tmp/pr-context/review_comments.json\n\n# Filtered review thread views (pre-computed so agents don't need to parse review_comments.json)\njq '[.[] | select(.isResolved == false)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/unresolved_threads.json\njq '[.[] | select(.isResolved == true)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/resolved_threads.json\njq '[.[] | select(.isOutdated == true)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/outdated_threads.json\n\n# Per-file review threads (mirrors diffs/ structure)\njq -c '.[]' /tmp/pr-context/review_comments.json | while IFS= read -r thread; do\n filepath=$(echo \"$thread\" | jq -r '.path // empty')\n [ -z \"$filepath\" ] && continue\n mkdir -p \"/tmp/pr-context/threads/$(dirname \"$filepath\")\"\n echo \"$thread\" >> \"/tmp/pr-context/threads/${filepath}.jsonl\"\ndone\n# Convert per-file JSONL to proper JSON arrays\nmkdir -p /tmp/pr-context/threads\nfind /tmp/pr-context/threads -name '*.jsonl' 2>/dev/null | while IFS= read -r jsonl; do\n jq -s '.' \"$jsonl\" > \"${jsonl%.jsonl}.json\"\n rm \"$jsonl\"\ndone\n\n# PR discussion comments\ngh api \"repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/comments.json\n\n# Linked issues\njq -r '.body // \"\"' /tmp/pr-context/pr.json 2>/dev/null \\\n | grep -oiE '(fixes|closes|resolves)\\s+#[0-9]+' \\\n | grep -oE '[0-9]+$' \\\n | sort -u \\\n | while read -r issue; do\n gh api \"repos/$GITHUB_REPOSITORY/issues/$issue\" > \"/tmp/pr-context/issue-${issue}.json\" || true\n done || true\n\n# Write manifest\ncat > /tmp/pr-context/README.md << 'MANIFEST'\n# PR Context\n\nPre-fetched PR data. All files are in `/tmp/pr-context/`.\n\n| File | Description |\n| --- | --- |\n| `pr.json` | PR metadata — title, body, author, base/head branches, head commit SHA (`headRefOid`), URL |\n| `pr.diff` | Full unified diff of all changes |\n| `files.json` | Changed files array — each entry has `filename`, `status`, `additions`, `deletions`, `patch` |\n| `diffs/.diff` | Per-file diffs — one file per changed file, mirroring the repo path under `diffs/` |\n| `file_order_az.txt` | Changed files sorted alphabetically (A→Z), one filename per line |\n| `file_order_za.txt` | Changed files sorted reverse-alphabetically (Z→A), one filename per line |\n| `file_order_largest.txt` | Changed files sorted by diff size descending (largest first), one filename per line |\n| `reviews.json` | Prior review submissions — author, state (APPROVED/CHANGES_REQUESTED/COMMENTED), body |\n| `review_comments.json` | All review threads (GraphQL) — each thread has `id` (node ID for resolving), `isResolved`, `isOutdated`, `path`, `line`, and nested `comments` with `id`, `databaseId` (numeric REST ID for replies), body/author |\n| `unresolved_threads.json` | Unresolved review threads — subset of `review_comments.json` where `isResolved` is false |\n| `resolved_threads.json` | Resolved review threads — subset of `review_comments.json` where `isResolved` is true |\n| `outdated_threads.json` | Outdated review threads — subset of `review_comments.json` where `isOutdated` is true (code changed since comment) |\n| `threads/.json` | Per-file review threads — one file per changed file with existing threads, mirroring the repo path under `threads/` |\n| `comments.json` | PR discussion comments (not inline) |\n| `issue-{N}.json` | Linked issue details (one file per linked issue, if any) |\n| `pr-size.txt` | PR size metrics: `{N} files, {M} diff lines` — used by the agent to decide review fan-out |\n| `agents.md` | Repository conventions from `generate_agents_md` (if written by agent) |\n| `review-instructions.md` | Review instructions, criteria, and calibration examples (if written by review-process fragment) |\nMANIFEST\n\necho \"PR context written to /tmp/pr-context/\"\nls -la /tmp/pr-context/" - env: GITHUB_TOKEN: ${{ github.token }} REPO_NAME: ${{ github.repository }} diff --git a/.github/workflows/gh-aw-pr-review.lock.yml b/.github/workflows/gh-aw-pr-review.lock.yml index be4ad4d8..099907e3 100644 --- a/.github/workflows/gh-aw-pr-review.lock.yml +++ b/.github/workflows/gh-aw-pr-review.lock.yml @@ -40,7 +40,7 @@ # # inlined-imports: true # -# gh-aw-metadata: {"schema_version":"v1","frontmatter_hash":"f7adb4454527b9d061ef790aece9abe299347d6fc5caa2d0d62cb5797e7392e9"} +# gh-aw-metadata: {"schema_version":"v1","frontmatter_hash":"2b55424743c0138975298375ab50c49188a8c8b6c99333daddefe42fcba82d9b"} name: "PR Review" "on": @@ -480,25 +480,33 @@ jobs: 4. Read `/tmp/pr-context/reviews.json` to check prior review submissions from this bot. Note any prior verdicts to avoid redundant reviews. 5. Read `/tmp/pr-context/review_comments.json` to check existing review threads. Note which files already have threads and whether they are resolved, unresolved, or outdated. - ### Step 2: Sub-agent Review + ### Step 2: Review - **File orderings are pre-computed** at `/tmp/pr-context/`: - - **Agent 1**: `/tmp/pr-context/file_order_az.txt` — alphabetical (A → Z) - - **Agent 2**: `/tmp/pr-context/file_order_za.txt` — reverse alphabetical (Z → A) - - **Agent 3**: `/tmp/pr-context/file_order_largest.txt` — by diff size descending + Read `/tmp/pr-context/pr-size.txt` for the PR size (file count and diff lines). Use it to decide your review approach: - **Spawn sub-agents:** Follow the **Pick Three, Keep Many** process — spawn 3 `code-review` sub-agents to review the PR diff in parallel. Each sub-agent prompt must include: + - **Small PRs (under 200 diff lines):** Review directly — no sub-agents. Read files in the order from `/tmp/pr-context/file_order_az.txt`. For each file, read the diff from `/tmp/pr-context/diffs/.diff`, read the full file from the workspace, check existing threads from `/tmp/pr-context/threads/.json`, and identify issues per the Code Review Reference criteria. Then proceed to Step 3 with your findings. + + - **Medium PRs (200–800 diff lines):** Follow the **Pick Three, Keep Many** process — spawn 2 `code-review` sub-agents in parallel: + - **Agent 1**: file ordering from `/tmp/pr-context/file_order_az.txt` (A → Z) + - **Agent 2**: file ordering from `/tmp/pr-context/file_order_za.txt` (Z → A) + + - **Large PRs (over 800 diff lines):** Follow the **Pick Three, Keep Many** process — spawn 3 `code-review` sub-agents in parallel: + - **Agent 1**: file ordering from `/tmp/pr-context/file_order_az.txt` (A → Z) + - **Agent 2**: file ordering from `/tmp/pr-context/file_order_za.txt` (Z → A) + - **Agent 3**: file ordering from `/tmp/pr-context/file_order_largest.txt` (largest diff first) + + **When spawning sub-agents**, each prompt must include: - Instruction to read `/tmp/pr-context/review-instructions.md` for the review process, criteria, and calibration examples - Instruction to read `/tmp/pr-context/README.md` for a manifest of all available context files - The review intensity (`__GH_AW_INPUTS_INTENSITY__`) and minimum severity (`__GH_AW_INPUTS_MINIMUM_SEVERITY__`) - - The path to that sub-agent's file ordering (e.g., `/tmp/pr-context/file_order_az.txt`) — tell it to read the file for its ordered list (per-file diffs are at `/tmp/pr-context/diffs/.diff`) + - The path to that sub-agent's file ordering — tell it to read the file for its ordered list (per-file diffs are at `/tmp/pr-context/diffs/.diff`) - Instruction to read changed files from the workspace (the PR branch is checked out) Each sub-agent returns a structured findings list. They do NOT leave inline comments. ### Step 3: Verify and Comment - After merging and deduplicating sub-agent findings per the Pick Three, Keep Many process, verify each finding before leaving a comment. For every finding: + If sub-agents were used, merge and deduplicate findings per the Pick Three, Keep Many process. Verify each finding before leaving a comment. For every finding: 1. **Read the file and surrounding context** — open the full file, not just the diff. Understand the broader code. 2. **Construct a concrete failure scenario** — what specific input or state causes the bug? If you cannot describe one, drop the finding. @@ -710,7 +718,7 @@ jobs: GH_TOKEN: ${{ github.token }} PR_NUMBER: ${{ github.event.pull_request.number || inputs.target-pr-number || github.event.issue.number }} name: Fetch PR context to disk - run: "set -euo pipefail\nmkdir -p /tmp/pr-context\n\n# PR metadata\ngh pr view \"$PR_NUMBER\" --json title,body,author,baseRefName,headRefName,headRefOid,url \\\n > /tmp/pr-context/pr.json\n\n# Full diff\nif ! gh pr diff \"$PR_NUMBER\" > /tmp/pr-context/pr.diff; then\n echo \"::warning::Failed to fetch full PR diff; per-file diffs from files.json are still available.\"\n : > /tmp/pr-context/pr.diff\nfi\n\n# Changed files list (--paginate may output concatenated arrays; jq -s 'add' merges them)\ngh api \"repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/files\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/files.json\n\n# Per-file diffs\njq -c '.[]' /tmp/pr-context/files.json | while IFS= read -r entry; do\n filename=$(echo \"$entry\" | jq -r '.filename')\n mkdir -p \"/tmp/pr-context/diffs/$(dirname \"$filename\")\"\n echo \"$entry\" | jq -r '.patch // empty' > \"/tmp/pr-context/diffs/${filename}.diff\"\ndone\n\n# File orderings for sub-agent review (3 strategies)\njq -r '[.[] | .filename] | sort | .[]' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_az.txt\njq -r '[.[] | .filename] | sort | reverse | .[]' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_za.txt\njq -r '[.[] | {filename, size: ((.additions // 0) + (.deletions // 0))}] | sort_by(-.size) | .[].filename' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_largest.txt\n\n# Existing reviews\ngh api \"repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/reviews\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/reviews.json\n\n# Review threads with resolution status (GraphQL — REST lacks isResolved/isOutdated)\ngh api graphql --paginate -f query='\n query($owner: String!, $repo: String!, $number: Int!, $endCursor: String) {\n repository(owner: $owner, name: $repo) {\n pullRequest(number: $number) {\n reviewThreads(first: 100, after: $endCursor) {\n pageInfo { hasNextPage endCursor }\n nodes {\n id\n isResolved\n isOutdated\n isCollapsed\n path\n line\n startLine\n comments(first: 100) {\n nodes {\n id\n databaseId\n body\n author { login }\n createdAt\n }\n }\n }\n }\n }\n }\n }\n' -F owner=\"${GITHUB_REPOSITORY%/*}\" -F repo=\"${GITHUB_REPOSITORY#*/}\" -F \"number=$PR_NUMBER\" \\\n --jq '.data.repository.pullRequest.reviewThreads.nodes' \\\n | jq -s 'add // []' > /tmp/pr-context/review_comments.json\n\n# Filtered review thread views (pre-computed so agents don't need to parse review_comments.json)\njq '[.[] | select(.isResolved == false)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/unresolved_threads.json\njq '[.[] | select(.isResolved == true)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/resolved_threads.json\njq '[.[] | select(.isOutdated == true)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/outdated_threads.json\n\n# Per-file review threads (mirrors diffs/ structure)\njq -c '.[]' /tmp/pr-context/review_comments.json | while IFS= read -r thread; do\n filepath=$(echo \"$thread\" | jq -r '.path // empty')\n [ -z \"$filepath\" ] && continue\n mkdir -p \"/tmp/pr-context/threads/$(dirname \"$filepath\")\"\n echo \"$thread\" >> \"/tmp/pr-context/threads/${filepath}.jsonl\"\ndone\n# Convert per-file JSONL to proper JSON arrays\nmkdir -p /tmp/pr-context/threads\nfind /tmp/pr-context/threads -name '*.jsonl' 2>/dev/null | while IFS= read -r jsonl; do\n jq -s '.' \"$jsonl\" > \"${jsonl%.jsonl}.json\"\n rm \"$jsonl\"\ndone\n\n# PR discussion comments\ngh api \"repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/comments.json\n\n# Linked issues\njq -r '.body // \"\"' /tmp/pr-context/pr.json 2>/dev/null \\\n | grep -oiE '(fixes|closes|resolves)\\s+#[0-9]+' \\\n | grep -oE '[0-9]+$' \\\n | sort -u \\\n | while read -r issue; do\n gh api \"repos/$GITHUB_REPOSITORY/issues/$issue\" > \"/tmp/pr-context/issue-${issue}.json\" || true\n done || true\n\n# Write manifest\ncat > /tmp/pr-context/README.md << 'MANIFEST'\n# PR Context\n\nPre-fetched PR data. All files are in `/tmp/pr-context/`.\n\n| File | Description |\n| --- | --- |\n| `pr.json` | PR metadata — title, body, author, base/head branches, head commit SHA (`headRefOid`), URL |\n| `pr.diff` | Full unified diff of all changes |\n| `files.json` | Changed files array — each entry has `filename`, `status`, `additions`, `deletions`, `patch` |\n| `diffs/.diff` | Per-file diffs — one file per changed file, mirroring the repo path under `diffs/` |\n| `file_order_az.txt` | Changed files sorted alphabetically (A→Z), one filename per line |\n| `file_order_za.txt` | Changed files sorted reverse-alphabetically (Z→A), one filename per line |\n| `file_order_largest.txt` | Changed files sorted by diff size descending (largest first), one filename per line |\n| `reviews.json` | Prior review submissions — author, state (APPROVED/CHANGES_REQUESTED/COMMENTED), body |\n| `review_comments.json` | All review threads (GraphQL) — each thread has `id` (node ID for resolving), `isResolved`, `isOutdated`, `path`, `line`, and nested `comments` with `id`, `databaseId` (numeric REST ID for replies), body/author |\n| `unresolved_threads.json` | Unresolved review threads — subset of `review_comments.json` where `isResolved` is false |\n| `resolved_threads.json` | Resolved review threads — subset of `review_comments.json` where `isResolved` is true |\n| `outdated_threads.json` | Outdated review threads — subset of `review_comments.json` where `isOutdated` is true (code changed since comment) |\n| `threads/.json` | Per-file review threads — one file per changed file with existing threads, mirroring the repo path under `threads/` |\n| `comments.json` | PR discussion comments (not inline) |\n| `issue-{N}.json` | Linked issue details (one file per linked issue, if any) |\n| `agents.md` | Repository conventions from `generate_agents_md` (if written by agent) |\n| `review-instructions.md` | Review instructions, criteria, and calibration examples (if written by review-process fragment) |\nMANIFEST\n\necho \"PR context written to /tmp/pr-context/\"\nls -la /tmp/pr-context/" + run: "set -euo pipefail\nmkdir -p /tmp/pr-context\n\n# PR metadata\ngh pr view \"$PR_NUMBER\" --json title,body,author,baseRefName,headRefName,headRefOid,url \\\n > /tmp/pr-context/pr.json\n\n# Full diff\nif ! gh pr diff \"$PR_NUMBER\" > /tmp/pr-context/pr.diff; then\n echo \"::warning::Failed to fetch full PR diff; per-file diffs from files.json are still available.\"\n : > /tmp/pr-context/pr.diff\nfi\n\n# Changed files list (--paginate may output concatenated arrays; jq -s 'add' merges them)\ngh api \"repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/files\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/files.json\n\n# Per-file diffs\njq -c '.[]' /tmp/pr-context/files.json | while IFS= read -r entry; do\n filename=$(echo \"$entry\" | jq -r '.filename')\n mkdir -p \"/tmp/pr-context/diffs/$(dirname \"$filename\")\"\n echo \"$entry\" | jq -r '.patch // empty' > \"/tmp/pr-context/diffs/${filename}.diff\"\ndone\n\n# File orderings for sub-agent review (3 strategies)\njq -r '[.[] | .filename] | sort | .[]' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_az.txt\njq -r '[.[] | .filename] | sort | reverse | .[]' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_za.txt\njq -r '[.[] | {filename, size: ((.additions // 0) + (.deletions // 0))}] | sort_by(-.size) | .[].filename' /tmp/pr-context/files.json \\\n > /tmp/pr-context/file_order_largest.txt\n\n# Compute PR size metrics for review fan-out decisions\nFILE_COUNT=$(jq 'length' /tmp/pr-context/files.json)\nDIFF_LINES=$(wc -l < /tmp/pr-context/pr.diff | tr -d ' ')\necho \"${FILE_COUNT} files, ${DIFF_LINES} diff lines\" > /tmp/pr-context/pr-size.txt\necho \"PR size: ${FILE_COUNT} files, ${DIFF_LINES} diff lines\"\n\n# Existing reviews\ngh api \"repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/reviews\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/reviews.json\n\n# Review threads with resolution status (GraphQL — REST lacks isResolved/isOutdated)\ngh api graphql --paginate -f query='\n query($owner: String!, $repo: String!, $number: Int!, $endCursor: String) {\n repository(owner: $owner, name: $repo) {\n pullRequest(number: $number) {\n reviewThreads(first: 100, after: $endCursor) {\n pageInfo { hasNextPage endCursor }\n nodes {\n id\n isResolved\n isOutdated\n isCollapsed\n path\n line\n startLine\n comments(first: 100) {\n nodes {\n id\n databaseId\n body\n author { login }\n createdAt\n }\n }\n }\n }\n }\n }\n }\n' -F owner=\"${GITHUB_REPOSITORY%/*}\" -F repo=\"${GITHUB_REPOSITORY#*/}\" -F \"number=$PR_NUMBER\" \\\n --jq '.data.repository.pullRequest.reviewThreads.nodes' \\\n | jq -s 'add // []' > /tmp/pr-context/review_comments.json\n\n# Filtered review thread views (pre-computed so agents don't need to parse review_comments.json)\njq '[.[] | select(.isResolved == false)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/unresolved_threads.json\njq '[.[] | select(.isResolved == true)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/resolved_threads.json\njq '[.[] | select(.isOutdated == true)]' /tmp/pr-context/review_comments.json \\\n > /tmp/pr-context/outdated_threads.json\n\n# Per-file review threads (mirrors diffs/ structure)\njq -c '.[]' /tmp/pr-context/review_comments.json | while IFS= read -r thread; do\n filepath=$(echo \"$thread\" | jq -r '.path // empty')\n [ -z \"$filepath\" ] && continue\n mkdir -p \"/tmp/pr-context/threads/$(dirname \"$filepath\")\"\n echo \"$thread\" >> \"/tmp/pr-context/threads/${filepath}.jsonl\"\ndone\n# Convert per-file JSONL to proper JSON arrays\nmkdir -p /tmp/pr-context/threads\nfind /tmp/pr-context/threads -name '*.jsonl' 2>/dev/null | while IFS= read -r jsonl; do\n jq -s '.' \"$jsonl\" > \"${jsonl%.jsonl}.json\"\n rm \"$jsonl\"\ndone\n\n# PR discussion comments\ngh api \"repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments\" --paginate \\\n | jq -s 'add // []' > /tmp/pr-context/comments.json\n\n# Linked issues\njq -r '.body // \"\"' /tmp/pr-context/pr.json 2>/dev/null \\\n | grep -oiE '(fixes|closes|resolves)\\s+#[0-9]+' \\\n | grep -oE '[0-9]+$' \\\n | sort -u \\\n | while read -r issue; do\n gh api \"repos/$GITHUB_REPOSITORY/issues/$issue\" > \"/tmp/pr-context/issue-${issue}.json\" || true\n done || true\n\n# Write manifest\ncat > /tmp/pr-context/README.md << 'MANIFEST'\n# PR Context\n\nPre-fetched PR data. All files are in `/tmp/pr-context/`.\n\n| File | Description |\n| --- | --- |\n| `pr.json` | PR metadata — title, body, author, base/head branches, head commit SHA (`headRefOid`), URL |\n| `pr.diff` | Full unified diff of all changes |\n| `files.json` | Changed files array — each entry has `filename`, `status`, `additions`, `deletions`, `patch` |\n| `diffs/.diff` | Per-file diffs — one file per changed file, mirroring the repo path under `diffs/` |\n| `file_order_az.txt` | Changed files sorted alphabetically (A→Z), one filename per line |\n| `file_order_za.txt` | Changed files sorted reverse-alphabetically (Z→A), one filename per line |\n| `file_order_largest.txt` | Changed files sorted by diff size descending (largest first), one filename per line |\n| `reviews.json` | Prior review submissions — author, state (APPROVED/CHANGES_REQUESTED/COMMENTED), body |\n| `review_comments.json` | All review threads (GraphQL) — each thread has `id` (node ID for resolving), `isResolved`, `isOutdated`, `path`, `line`, and nested `comments` with `id`, `databaseId` (numeric REST ID for replies), body/author |\n| `unresolved_threads.json` | Unresolved review threads — subset of `review_comments.json` where `isResolved` is false |\n| `resolved_threads.json` | Resolved review threads — subset of `review_comments.json` where `isResolved` is true |\n| `outdated_threads.json` | Outdated review threads — subset of `review_comments.json` where `isOutdated` is true (code changed since comment) |\n| `threads/.json` | Per-file review threads — one file per changed file with existing threads, mirroring the repo path under `threads/` |\n| `comments.json` | PR discussion comments (not inline) |\n| `issue-{N}.json` | Linked issue details (one file per linked issue, if any) |\n| `pr-size.txt` | PR size metrics: `{N} files, {M} diff lines` — used by the agent to decide review fan-out |\n| `agents.md` | Repository conventions from `generate_agents_md` (if written by agent) |\n| `review-instructions.md` | Review instructions, criteria, and calibration examples (if written by review-process fragment) |\nMANIFEST\n\necho \"PR context written to /tmp/pr-context/\"\nls -la /tmp/pr-context/" - name: Write review instructions to disk run: "mkdir -p /tmp/pr-context\ncat > /tmp/pr-context/review-instructions.md << 'REVIEW_EOF'\n# Review Instructions for Sub-agents\n\nYou are a code review sub-agent. Read these instructions, then review the PR files in the order provided in your prompt.\n\n## Context\n\nBefore reviewing files, read these to understand the PR:\n\n1. `/tmp/pr-context/pr.json` — PR title, description, author, and branches. Understand what the PR is trying to accomplish.\n2. `/tmp/pr-context/agents.md` — Repository coding conventions and guidelines (if it exists).\n3. `/tmp/pr-context/review_comments.json` — Existing review threads. Note which files already have threads so you don't duplicate.\n4. `/tmp/pr-context/issue-*.json` — Linked issue details (if any). Understand the motivation and acceptance criteria.\n\n## Process\n\nReview the PR diff file by file in your assigned order. For each changed file:\n\n1. **Read the diff** for this file from `/tmp/pr-context/diffs/.diff` to understand what changed. If the diff is empty or truncated (e.g., binary files or very large changes), fall back to reading the full file from the workspace and comparing against context.\n2. **Read the full file from the workspace.** The PR branch is checked out locally — open the file directly to get complete contents with line numbers.\n3. **Check existing threads** for this file from `/tmp/pr-context/threads/.json` (if it exists). Skip issues that are already under discussion — each thread has `isResolved` and `isOutdated` fields.\n4. **Identify potential issues** matching the review criteria below.\n5. **Quick-check each issue** before including it:\n - What specific code pattern or change triggers this concern?\n - Is there an obvious guard, handler, or mitigation visible in the immediate context?\n - Can you describe a concrete failure scenario (the `evidence` field)? If you cannot articulate what specific input or state triggers the problem, drop the finding.\n - If the issue is clearly handled, skip it. If you're unsure, include it — the parent will verify.\n6. **Add to your findings list.** Do NOT leave inline comments — you don't have that tool. Return findings in this format:\n\n```\n- file: path/to/file\n line: 42\n severity: HIGH\n title: Brief title\n description: What the issue is and why it matters\n evidence: The specific code pattern and failure scenario\n suggestion: corrected code here (optional — only if you can provide a concrete fix)\n```\n\n**Review every file in your assigned order.** Files reviewed earlier get more attention, which is why different sub-agents use different orderings.\n\n**Check existing threads** — per-file threads are at `/tmp/pr-context/threads/.json` (step 3 above). The full list is at `/tmp/pr-context/review_comments.json`. Do not flag issues that are already under discussion (resolved or unresolved). For outdated threads, only re-flag if the issue still applies to the current diff.\n\n**Return your full findings list** when done, or an empty list if no issues were found.\n\n## Review Criteria\n\nFocus on these categories in priority order:\n\n1. Security vulnerabilities (injection, XSS, auth bypass, secrets exposure)\n2. Logic bugs that could cause runtime failures or incorrect behavior\n3. Data integrity issues (race conditions, missing transactions, corruption risk)\n4. Performance bottlenecks (N+1 queries, memory leaks, blocking operations)\n5. Error handling gaps (unhandled exceptions, missing validation)\n6. Breaking changes to public APIs without migration path\n7. Missing or incorrect test coverage for critical paths\n\n## What NOT to Flag\n\nOnly review the diff — do not flag issues in unchanged code, pre-existing problems not introduced by this PR, or style preferences handled by linters or formatters.\n\n**Common false positives** — these patterns look like issues but usually aren't. Before flagging anything in these categories, confirm the problem is real by reading the surrounding code:\n\n- **Security — input already sanitized:** Don't flag injection or XSS risks when inputs are sanitized upstream, parameterized queries are used, or the framework auto-escapes output.\n- **Null/undefined — guarded elsewhere:** Don't flag potential null dereferences if the value is guaranteed by a type guard, assertion, schema validation, or upstream null check.\n- **Error handling — handled at a different layer:** Don't flag missing try/catch if the caller, middleware, or framework catches and handles the error (e.g., Express error middleware, React error boundaries).\n- **Performance — theoretical, not practical:** Don't flag algorithmic complexity (e.g., O(n^2)) unless N is demonstrably large enough to matter in the actual usage context. \"This could be slow\" without evidence is not actionable.\n- **Validation — exists at another layer:** Don't flag missing input validation if it's handled by an API gateway, middleware, schema validator, or type system.\n- **Test coverage — trivial or generated code:** Don't flag missing tests for trivial getters/setters, auto-generated code, or simple delegation methods.\n- **Style or naming — not in coding guidelines:** Don't flag naming conventions or code style unless they violate the repository's documented coding guidelines (from `generate_agents_md` or CONTRIBUTING docs).\n\n**Existing review threads** — check BEFORE flagging any issue:\n\n- **Resolved with reviewer reply** (e.g. \"This is intentional\") — reviewer's decision is final. Do NOT re-flag.\n- **Resolved without reply** — author likely fixed it. Do NOT re-raise unless the fix introduced a new problem.\n- **Unresolved** — already flagged. Do NOT duplicate.\n- **Outdated** — only re-flag if the issue still applies to the current diff.\n\nWhen in doubt, do not duplicate. Redundant comments erode trust.\n\nFinding no issues is a valid and valuable outcome. An empty findings list is better than findings that waste the author's time or erode trust. Do not manufacture findings to justify your review — if the code is sound, return an empty list.\n\n## Severity Classification\n\nDetermine severity AFTER investigating the issue, not before. First identify the problem and trace through the code, then assign a severity based on the evidence you found.\n\n- 🔴 CRITICAL — Must fix before merge (security vulnerabilities, data corruption, production-breaking bugs)\n- 🟠 HIGH — Should fix before merge (logic errors, missing validation, significant performance issues)\n- 🟡 MEDIUM — Address soon, non-blocking (error handling gaps, suboptimal patterns, missing edge cases)\n- ⚪ LOW — Author discretion (minor improvements, documentation gaps)\n- 💬 NITPICK — Truly optional (stylistic preferences, alternative approaches)\n\n## Review Intensity\n\nThe review intensity is `${{ inputs.intensity || 'balanced' }}`.\n\n- **conservative**: High evidence bar. Only flag when you can demonstrate a concrete failure scenario. If you can construct a reasonable counterargument, do not flag. Approval with zero findings is the expected outcome for most PRs.\n- **balanced**: Standard evidence bar. Flag when you can point to specific code that would fail. If the issue is ambiguous, lean toward not flagging.\n- **aggressive**: Lower evidence bar. Flag when evidence exists even if the failure scenario is not fully confirmed. Improvement suggestions welcome but must cite specific code.\n\n## Calibration Examples\n\nUse these examples to calibrate your judgment. Each pair shows a real issue and a similar-looking pattern that is NOT an issue.\n\n### Example 1: Null/Undefined Access\n\n**True positive — flag this:**\n\n```js\n// PR adds this handler\napp.get('/user/:id', async (req, res) => {\n const user = await db.findUser(req.params.id);\n res.json({ name: user.name, email: user.email });\n});\n```\n\nWhy flag: `db.findUser()` can return `null` when no user matches the ID. Accessing `user.name` will throw a TypeError at runtime. No upstream guard exists — the route handler is the entry point.\n\n**False positive — do NOT flag this:**\n\n```ts\n// PR adds this line inside an existing function\nconst settings = user.getSettings();\n```\n\nWhy skip: Reading the full file reveals `user` is typed as `User` (not `User | null`), and the calling function only runs after `authenticateUser()` middleware which guarantees a valid user object. The null case is handled at a different layer.\n\n### Example 2: SQL Injection\n\n**True positive — flag this:**\n\n```python\n# PR adds this query\ncursor.execute(f\"SELECT * FROM orders WHERE customer_id = '{customer_id}'\")\n```\n\nWhy flag: String interpolation in a SQL query with user-controlled input (`customer_id` comes from the request). No parameterization or sanitization anywhere in the call chain.\n\n**False positive — do NOT flag this:**\n\n```python\n# PR adds this query\ncursor.execute(f\"SELECT * FROM orders WHERE status = '{OrderStatus.PENDING.value}'\")\n```\n\nWhy skip: The interpolated value is a hardcoded enum constant (`OrderStatus.PENDING`), not user input. There is no injection vector.\n\n### Example 3: Borderline — Do NOT Flag\n\n```go\n// PR adds this function\nfunc processItems(items []Item) []Result {\n results := make([]Result, 0)\n for _, item := range items {\n for _, tag := range item.Tags {\n results = append(results, process(item, tag))\n }\n }\n return results\n}\n```\n\nThis looks like an O(n*m) performance concern. But without evidence that `items` or `Tags` are large in practice, this is speculative. The function processes a bounded dataset (items from a single user request). Do not flag theoretical performance issues without evidence of real-world impact.\nREVIEW_EOF" - env: diff --git a/.github/workflows/gh-aw-pr-review.md b/.github/workflows/gh-aw-pr-review.md index aaab7018..73ed58d7 100644 --- a/.github/workflows/gh-aw-pr-review.md +++ b/.github/workflows/gh-aw-pr-review.md @@ -120,25 +120,33 @@ Follow these steps in order. 4. Read `/tmp/pr-context/reviews.json` to check prior review submissions from this bot. Note any prior verdicts to avoid redundant reviews. 5. Read `/tmp/pr-context/review_comments.json` to check existing review threads. Note which files already have threads and whether they are resolved, unresolved, or outdated. -### Step 2: Sub-agent Review +### Step 2: Review -**File orderings are pre-computed** at `/tmp/pr-context/`: -- **Agent 1**: `/tmp/pr-context/file_order_az.txt` — alphabetical (A → Z) -- **Agent 2**: `/tmp/pr-context/file_order_za.txt` — reverse alphabetical (Z → A) -- **Agent 3**: `/tmp/pr-context/file_order_largest.txt` — by diff size descending +Read `/tmp/pr-context/pr-size.txt` for the PR size (file count and diff lines). Use it to decide your review approach: -**Spawn sub-agents:** Follow the **Pick Three, Keep Many** process — spawn 3 `code-review` sub-agents to review the PR diff in parallel. Each sub-agent prompt must include: +- **Small PRs (under 200 diff lines):** Review directly — no sub-agents. Read files in the order from `/tmp/pr-context/file_order_az.txt`. For each file, read the diff from `/tmp/pr-context/diffs/.diff`, read the full file from the workspace, check existing threads from `/tmp/pr-context/threads/.json`, and identify issues per the Code Review Reference criteria. Then proceed to Step 3 with your findings. + +- **Medium PRs (200–800 diff lines):** Follow the **Pick Three, Keep Many** process — spawn 2 `code-review` sub-agents in parallel: + - **Agent 1**: file ordering from `/tmp/pr-context/file_order_az.txt` (A → Z) + - **Agent 2**: file ordering from `/tmp/pr-context/file_order_za.txt` (Z → A) + +- **Large PRs (over 800 diff lines):** Follow the **Pick Three, Keep Many** process — spawn 3 `code-review` sub-agents in parallel: + - **Agent 1**: file ordering from `/tmp/pr-context/file_order_az.txt` (A → Z) + - **Agent 2**: file ordering from `/tmp/pr-context/file_order_za.txt` (Z → A) + - **Agent 3**: file ordering from `/tmp/pr-context/file_order_largest.txt` (largest diff first) + +**When spawning sub-agents**, each prompt must include: - Instruction to read `/tmp/pr-context/review-instructions.md` for the review process, criteria, and calibration examples - Instruction to read `/tmp/pr-context/README.md` for a manifest of all available context files - The review intensity (`${{ inputs.intensity }}`) and minimum severity (`${{ inputs.minimum_severity }}`) -- The path to that sub-agent's file ordering (e.g., `/tmp/pr-context/file_order_az.txt`) — tell it to read the file for its ordered list (per-file diffs are at `/tmp/pr-context/diffs/.diff`) +- The path to that sub-agent's file ordering — tell it to read the file for its ordered list (per-file diffs are at `/tmp/pr-context/diffs/.diff`) - Instruction to read changed files from the workspace (the PR branch is checked out) Each sub-agent returns a structured findings list. They do NOT leave inline comments. ### Step 3: Verify and Comment -After merging and deduplicating sub-agent findings per the Pick Three, Keep Many process, verify each finding before leaving a comment. For every finding: +If sub-agents were used, merge and deduplicate findings per the Pick Three, Keep Many process. Verify each finding before leaving a comment. For every finding: 1. **Read the file and surrounding context** — open the full file, not just the diff. Understand the broader code. 2. **Construct a concrete failure scenario** — what specific input or state causes the bug? If you cannot describe one, drop the finding.