diff --git a/.github/scripts/verifier_issue_formatter.js b/.github/scripts/verifier_issue_formatter.js index 086492371..ecfdb9beb 100644 --- a/.github/scripts/verifier_issue_formatter.js +++ b/.github/scripts/verifier_issue_formatter.js @@ -4,6 +4,51 @@ const { parseScopeTasksAcceptanceSections, } = require('./issue_scope_parser.js'); +/** + * Check if content is a placeholder (missing section marker). + * These placeholders are generated by the bot when PR/issue lacks structured content. + * + * @param {string} text - Text to check + * @returns {boolean} True if text is a placeholder + */ +function isPlaceholderContent(text) { + const normalized = String(text || '').toLowerCase().trim(); + // Match common placeholder patterns + return /^[-*]?\s*\[[\sx]\]\s*(scope|tasks?|acceptance\s*criteria?)\s+(section\s+)?missing\s+from\s+source\s+issue\.?$/.test(normalized) || + /section\s+missing\s+from\s+source\s+issue/.test(normalized) || + /^n\s*\/?\s*a$/.test(normalized) || + normalized === ''; +} + +/** + * Check if an item looks like a markdown section header that was incorrectly + * captured as a list item (e.g., "## Related" or "### Notes"). + * + * @param {string} text - Text to check + * @returns {boolean} True if text looks like a section header + */ +function looksLikeSectionHeader(text) { + const trimmed = String(text || '').trim(); + // Match markdown headers at start + return /^#{1,6}\s+\w/.test(trimmed); +} + +/** + * Check if an item looks like a PR/Issue reference link rather than actual criteria. + * Matches patterns like: + * - "- PR #123 - Title" + * - "- Issue #456 - Description" + * - "PR #789 - Some fix" + * + * @param {string} text - Text to check + * @returns {boolean} True if text looks like a reference link + */ +function looksLikeReferenceLink(text) { + const trimmed = String(text || '').trim(); + // Match "- PR #N" or "- Issue #N" or just "PR #N" / "Issue #N" at start + return /^[-–•]?\s*(PR|Issue|Pull\s+Request)\s*#\d+/i.test(trimmed); +} + /** * Simple similarity score between two strings (0-1). * Uses Jaccard similarity on word sets for fuzzy matching. @@ -171,6 +216,9 @@ function parseVerifierFindings(verifierOutput) { /** * Extract unchecked items from a checkbox section. + * Filters out generic noise such as placeholder content, section headers, and + * reference links so it can be reused for any checkbox-based content + * (e.g., criteria, tasks, or other checklist items). * * @param {string} content - Section content with checkboxes * @returns {string[]} Array of unchecked item texts @@ -184,14 +232,22 @@ function extractUncheckedItems(content) { const match = line.match(/^\s*[-*]\s+\[\s\]\s+(.+)$/); if (match) { const text = match[1].trim(); - // Skip markdown section headers that were incorrectly captured as criteria - if (text.startsWith('#') || text.startsWith('##')) { + + // Skip placeholder content (e.g., "Tasks section missing from source issue.") + if (isPlaceholderContent(text)) { continue; } - // Skip items that look like markdown links to sections (e.g., "- PR #123") - if (/^[-–]\s*(PR|Issue)\s*#\d+/i.test(text)) { + + // Skip markdown section headers (e.g., "## Related") + if (looksLikeSectionHeader(text)) { continue; } + + // Skip PR/Issue reference links (e.g., "- PR #123 - Title") + if (looksLikeReferenceLink(text)) { + continue; + } + items.push(text); } } @@ -452,12 +508,21 @@ function formatFollowUpIssue({ ? `[Follow-up] Unmet criteria from PR #${prNumber}` : '[Follow-up] Verifier failure - unmet acceptance criteria'; + // Determine if this issue has substantive content worth creating + // Skip if we have no real tasks/criteria (just defaults) and no verifier gaps + const hasSubstantiveContent = + (newTasks.length > 0 && !newTasks.every(t => isPlaceholderContent(t))) || + (refinedUnmetCriteria.length > 0 && !refinedUnmetCriteria.every(c => isPlaceholderContent(c))) || + findings.gaps.length > 0 || + findings.unmetCriteria.length > 0; + return { title, body: sections.join('\n\n'), findings, unmetCriteria: refinedUnmetCriteria, newTasks, + hasSubstantiveContent, }; } @@ -516,10 +581,18 @@ function formatSimpleFollowUpIssue({ ? `Verifier failure for PR #${prNumber}` : 'Verifier failure on merged commit'; + // Simple format always has substantive content (verifier output) + // since we only use it when we have actual verifier output to display + const hasSubstantiveContent = + findings.gaps.length > 0 || + findings.unmetCriteria.length > 0 || + (verifierOutput && verifierOutput.trim().length > 0); + return { title, body: lines.join('\n'), findings, + hasSubstantiveContent, }; } @@ -531,4 +604,8 @@ module.exports = { extractUncheckedItems, extractCheckedItems, buildChecklist, + // Helper functions exported for testing + isPlaceholderContent, + looksLikeSectionHeader, + looksLikeReferenceLink, }; diff --git a/.github/workflows/agents-verifier.yml b/.github/workflows/agents-verifier.yml index f6a2ad5ec..ff028993c 100644 --- a/.github/workflows/agents-verifier.yml +++ b/.github/workflows/agents-verifier.yml @@ -353,6 +353,15 @@ jobs: }); } + // Skip issue creation if there's no substantive content + // (e.g., all sections are placeholder text or empty) + if (!result.hasSubstantiveContent) { + core.info('Skipping issue creation: no substantive content to act on (all placeholder sections)'); + core.setOutput('issue_number', ''); + core.setOutput('skipped', 'true'); + return; + } + const { data: issue } = await github.rest.issues.create({ ...context.repo, title: result.title, @@ -360,6 +369,7 @@ jobs: labels: ['agent:codex'], }); core.setOutput('issue_number', issue?.number ? String(issue.number) : ''); + core.setOutput('skipped', 'false'); core.info(`Created follow-up issue #${issue.number}: ${result.title}`); - name: Collect verifier metrics diff --git a/.github/workflows/reusable-agents-verifier.yml b/.github/workflows/reusable-agents-verifier.yml index 8e38c0fa6..491cac932 100644 --- a/.github/workflows/reusable-agents-verifier.yml +++ b/.github/workflows/reusable-agents-verifier.yml @@ -320,6 +320,15 @@ jobs: }); } + // Skip issue creation if there's no substantive content + // (e.g., all sections are placeholder text or empty) + if (!result.hasSubstantiveContent) { + core.info('Skipping issue creation: no substantive content to act on (all placeholder sections)'); + core.setOutput('issue_number', ''); + core.setOutput('skipped', 'true'); + return; + } + const { data: issue } = await github.rest.issues.create({ ...context.repo, title: result.title, @@ -327,6 +336,7 @@ jobs: labels: ['agent:codex'], }); core.setOutput('issue_number', issue?.number ? String(issue.number) : ''); + core.setOutput('skipped', 'false'); core.info(`Created follow-up issue #${issue.number}: ${result.title}`); - name: Collect verifier metrics diff --git a/docs/workflows/SystemEvaluation.md b/docs/workflows/SystemEvaluation.md index 0713eae7c..074559cad 100644 --- a/docs/workflows/SystemEvaluation.md +++ b/docs/workflows/SystemEvaluation.md @@ -71,8 +71,8 @@ This document evaluates the current GitHub Actions workflow architecture against **Goal**: Identify gaps in the current workflow suite and opportunities for speed optimization. * **Performance**: - * **Sequential Testing**: `reusable-10-ci-python.yml` runs `pytest` without parallelism. `pytest-xdist` is missing from `pyproject.toml`. - * **Impact**: Tests run one by one, slowing down feedback loops as the test suite grows. + * **Parallel Testing**: ✅ `reusable-10-ci-python.yml` now runs `pytest -n auto --dist=loadgroup` when `pytest-xdist` is detected. The workflow automatically installs `pytest-xdist` and enables parallelism. + * **Status**: Implemented. Tests run in parallel across available CPU cores. * **Artifact Overload**: The CI workflow uploads a massive amount of artifacts (coverage XML/JSON/HTML, metrics, history, classification, delta, trend) for *every* run. * **Impact**: Increases storage costs and workflow runtime (upload/download time). * **Fail-Slow Strategy**: The CI workflow uses `continue-on-error: true` for all checks (lint, format, typecheck, test) to gather a full report. @@ -88,7 +88,7 @@ This document evaluates the current GitHub Actions workflow architecture against * **Gap**: It does not check for "Stale PRs" or "Dependency Freshness" (beyond what Dependabot might do silently). * **Recommendations**: - * **Enable Parallel Testing**: Add `pytest-xdist` to `pyproject.toml` and update `reusable-10-ci-python.yml` to use `pytest -n auto`. + * ~~**Enable Parallel Testing**: Add `pytest-xdist` to `pyproject.toml` and update `reusable-10-ci-python.yml` to use `pytest -n auto`.~~ ✅ Done * **Optimize Artifacts**: Only upload full coverage reports on `main` or when explicitly requested. Use summary comments for PRs. * **Add Security Workflow**: Create `security.yml` running `bandit` and `safety`. * **Create Release Workflow**: Add `release.yml` for automated publishing.