Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 81 additions & 4 deletions .github/scripts/verifier_issue_formatter.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,51 @@ const {
parseScopeTasksAcceptanceSections,
} = require('./issue_scope_parser.js');

/**
* Check if content is a placeholder (missing section marker).
* These placeholders are generated by the bot when PR/issue lacks structured content.
*
* @param {string} text - Text to check
* @returns {boolean} True if text is a placeholder
*/
function isPlaceholderContent(text) {
const normalized = String(text || '').toLowerCase().trim();
// Match common placeholder patterns
return /^[-*]?\s*\[[\sx]\]\s*(scope|tasks?|acceptance\s*criteria?)\s+(section\s+)?missing\s+from\s+source\s+issue\.?$/.test(normalized) ||
/section\s+missing\s+from\s+source\s+issue/.test(normalized) ||
/^n\s*\/?\s*a$/.test(normalized) ||
normalized === '';
}

/**
* Check if an item looks like a markdown section header that was incorrectly
* captured as a list item (e.g., "## Related" or "### Notes").
*
* @param {string} text - Text to check
* @returns {boolean} True if text looks like a section header
*/
function looksLikeSectionHeader(text) {
const trimmed = String(text || '').trim();
// Match markdown headers at start
return /^#{1,6}\s+\w/.test(trimmed);
}

/**
* Check if an item looks like a PR/Issue reference link rather than actual criteria.
* Matches patterns like:
* - "- PR #123 - Title"
* - "- Issue #456 - Description"
* - "PR #789 - Some fix"
*
* @param {string} text - Text to check
* @returns {boolean} True if text looks like a reference link
*/
function looksLikeReferenceLink(text) {
const trimmed = String(text || '').trim();
// Match "- PR #N" or "- Issue #N" or just "PR #N" / "Issue #N" at start
return /^[-–•]?\s*(PR|Issue|Pull\s+Request)\s*#\d+/i.test(trimmed);
}

/**
* Simple similarity score between two strings (0-1).
* Uses Jaccard similarity on word sets for fuzzy matching.
Expand Down Expand Up @@ -171,6 +216,9 @@ function parseVerifierFindings(verifierOutput) {

/**
* Extract unchecked items from a checkbox section.
* Filters out generic noise such as placeholder content, section headers, and
* reference links so it can be reused for any checkbox-based content
* (e.g., criteria, tasks, or other checklist items).
*
* @param {string} content - Section content with checkboxes
* @returns {string[]} Array of unchecked item texts
Expand All @@ -184,14 +232,22 @@ function extractUncheckedItems(content) {
const match = line.match(/^\s*[-*]\s+\[\s\]\s+(.+)$/);
if (match) {
const text = match[1].trim();
// Skip markdown section headers that were incorrectly captured as criteria
if (text.startsWith('#') || text.startsWith('##')) {

// Skip placeholder content (e.g., "Tasks section missing from source issue.")
if (isPlaceholderContent(text)) {
continue;
}
// Skip items that look like markdown links to sections (e.g., "- PR #123")
if (/^[-–]\s*(PR|Issue)\s*#\d+/i.test(text)) {

// Skip markdown section headers (e.g., "## Related")
if (looksLikeSectionHeader(text)) {
continue;
}

// Skip PR/Issue reference links (e.g., "- PR #123 - Title")
if (looksLikeReferenceLink(text)) {
continue;
}

items.push(text);
}
}
Expand Down Expand Up @@ -452,12 +508,21 @@ function formatFollowUpIssue({
? `[Follow-up] Unmet criteria from PR #${prNumber}`
: '[Follow-up] Verifier failure - unmet acceptance criteria';

// Determine if this issue has substantive content worth creating
// Skip if we have no real tasks/criteria (just defaults) and no verifier gaps
const hasSubstantiveContent =
(newTasks.length > 0 && !newTasks.every(t => isPlaceholderContent(t))) ||
(refinedUnmetCriteria.length > 0 && !refinedUnmetCriteria.every(c => isPlaceholderContent(c))) ||
findings.gaps.length > 0 ||
findings.unmetCriteria.length > 0;

return {
title,
body: sections.join('\n\n'),
findings,
unmetCriteria: refinedUnmetCriteria,
newTasks,
hasSubstantiveContent,
};
}

Expand Down Expand Up @@ -516,10 +581,18 @@ function formatSimpleFollowUpIssue({
? `Verifier failure for PR #${prNumber}`
: 'Verifier failure on merged commit';

// Simple format always has substantive content (verifier output)
// since we only use it when we have actual verifier output to display
const hasSubstantiveContent =
findings.gaps.length > 0 ||
findings.unmetCriteria.length > 0 ||
(verifierOutput && verifierOutput.trim().length > 0);

return {
title,
body: lines.join('\n'),
findings,
hasSubstantiveContent,
};
}

Expand All @@ -531,4 +604,8 @@ module.exports = {
extractUncheckedItems,
extractCheckedItems,
buildChecklist,
// Helper functions exported for testing
isPlaceholderContent,
looksLikeSectionHeader,
looksLikeReferenceLink,
};
10 changes: 10 additions & 0 deletions .github/workflows/agents-verifier.yml
Original file line number Diff line number Diff line change
Expand Up @@ -353,13 +353,23 @@ jobs:
});
}

// Skip issue creation if there's no substantive content
// (e.g., all sections are placeholder text or empty)
if (!result.hasSubstantiveContent) {
core.info('Skipping issue creation: no substantive content to act on (all placeholder sections)');
core.setOutput('issue_number', '');
core.setOutput('skipped', 'true');
return;
}

const { data: issue } = await github.rest.issues.create({
...context.repo,
title: result.title,
body: result.body,
labels: ['agent:codex'],
});
core.setOutput('issue_number', issue?.number ? String(issue.number) : '');
core.setOutput('skipped', 'false');
core.info(`Created follow-up issue #${issue.number}: ${result.title}`);

- name: Collect verifier metrics
Expand Down
10 changes: 10 additions & 0 deletions .github/workflows/reusable-agents-verifier.yml
Original file line number Diff line number Diff line change
Expand Up @@ -320,13 +320,23 @@ jobs:
});
}

// Skip issue creation if there's no substantive content
// (e.g., all sections are placeholder text or empty)
if (!result.hasSubstantiveContent) {
core.info('Skipping issue creation: no substantive content to act on (all placeholder sections)');
core.setOutput('issue_number', '');
core.setOutput('skipped', 'true');
return;
}

const { data: issue } = await github.rest.issues.create({
...context.repo,
title: result.title,
body: result.body,
labels: ['agent:codex'],
});
core.setOutput('issue_number', issue?.number ? String(issue.number) : '');
core.setOutput('skipped', 'false');
core.info(`Created follow-up issue #${issue.number}: ${result.title}`);

- name: Collect verifier metrics
Expand Down
6 changes: 3 additions & 3 deletions docs/workflows/SystemEvaluation.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ This document evaluates the current GitHub Actions workflow architecture against
**Goal**: Identify gaps in the current workflow suite and opportunities for speed optimization.

* **Performance**:
* **Sequential Testing**: `reusable-10-ci-python.yml` runs `pytest` without parallelism. `pytest-xdist` is missing from `pyproject.toml`.
* **Impact**: Tests run one by one, slowing down feedback loops as the test suite grows.
* **Parallel Testing**: `reusable-10-ci-python.yml` now runs `pytest -n auto --dist=loadgroup` when `pytest-xdist` is detected. The workflow automatically installs `pytest-xdist` and enables parallelism.
* **Status**: Implemented. Tests run in parallel across available CPU cores.
* **Artifact Overload**: The CI workflow uploads a massive amount of artifacts (coverage XML/JSON/HTML, metrics, history, classification, delta, trend) for *every* run.
* **Impact**: Increases storage costs and workflow runtime (upload/download time).
* **Fail-Slow Strategy**: The CI workflow uses `continue-on-error: true` for all checks (lint, format, typecheck, test) to gather a full report.
Expand All @@ -88,7 +88,7 @@ This document evaluates the current GitHub Actions workflow architecture against
* **Gap**: It does not check for "Stale PRs" or "Dependency Freshness" (beyond what Dependabot might do silently).

* **Recommendations**:
* **Enable Parallel Testing**: Add `pytest-xdist` to `pyproject.toml` and update `reusable-10-ci-python.yml` to use `pytest -n auto`.
* ~~**Enable Parallel Testing**: Add `pytest-xdist` to `pyproject.toml` and update `reusable-10-ci-python.yml` to use `pytest -n auto`.~~ ✅ Done
* **Optimize Artifacts**: Only upload full coverage reports on `main` or when explicitly requested. Use summary comments for PRs.
* **Add Security Workflow**: Create `security.yml` running `bandit` and `safety`.
* **Create Release Workflow**: Add `release.yml` for automated publishing.
Expand Down
Loading