diff --git a/.github/workflows/autofix.yml b/.github/workflows/autofix.yml index bb2b7a34b..9c8676e37 100644 --- a/.github/workflows/autofix.yml +++ b/.github/workflows/autofix.yml @@ -45,6 +45,7 @@ jobs: id: context uses: actions/github-script@v8 with: + github-token: ${{ secrets.AGENTS_AUTOMATION_PAT || secrets.ACTIONS_BOT_PAT || github.token }} script: | const path = require('path'); const { paginateWithBackoff } = require( @@ -100,12 +101,24 @@ jobs: // File check with rate limit retry const { owner, repo } = context.repo; - const files = await paginateWithBackoff( - github, - github.rest.pulls.listFiles, - { owner, repo, pull_number: pr.number, per_page: 100 }, - { maxRetries: 3, core } - ); + let files = []; + try { + files = await paginateWithBackoff( + github, + github.rest.pulls.listFiles, + { owner, repo, pull_number: pr.number, per_page: 100 }, + { maxRetries: 3, core } + ); + } catch (error) { + const message = String(error?.message || error || ''); + const status = Number(error?.status || error?.response?.status || 0); + if (status === 403 && message.toLowerCase().includes('rate limit exceeded')) { + core.warning(`Rate limited while listing PR files; skipping autofix run. ${message}`); + core.setOutput('should_run', 'false'); + return; + } + throw error; + } const hasPython = files.some(f => f.filename.endsWith('.py') || f.filename.endsWith('.pyi')); if (!hasPython) { diff --git a/agents/codex-483.md b/agents/codex-483.md new file mode 100644 index 000000000..df8589d4a --- /dev/null +++ b/agents/codex-483.md @@ -0,0 +1 @@ + diff --git a/autofix_report_enriched.json b/autofix_report_enriched.json index 2bcd8d43a..cb19d2195 100644 --- a/autofix_report_enriched.json +++ b/autofix_report_enriched.json @@ -1 +1 @@ -{"changed": true, "classification": {"total": 0, "new": 0, "allowed": 0}, "timestamp": "2026-01-05T06:00:00Z", "files": [".github/scripts/agents_pr_meta_update_body.js", ".github/scripts/__tests__/agents-pr-meta-update-body.test.js", "scripts/langchain/context_extractor.py", "scripts/langchain/prompts/context_extract.md", "tests/scripts/test_context_extractor.py"]} +{"changed": true, "classification": {"total": 0, "new": 0, "allowed": 0}, "timestamp": "2026-01-05T08:45:34Z", "files": ["scripts/issue_pattern_feedback.py", "scripts/issue_pattern_report.py", "scripts/langchain/issue_formatter.py", "tests/scripts/test_issue_pattern_feedback.py", "tests/scripts/test_issue_pattern_report.py"]} \ No newline at end of file diff --git a/docs/keepalive/METRICS_SCHEMA.md b/docs/keepalive/METRICS_SCHEMA.md index bea5982dc..df8fe91c7 100644 --- a/docs/keepalive/METRICS_SCHEMA.md +++ b/docs/keepalive/METRICS_SCHEMA.md @@ -4,7 +4,14 @@ This document defines the structured metrics record written per keepalive iteration. Records are newline-delimited JSON (NDJSON); one JSON object per line. -## Fields +## Record Types + +The metrics log supports two record types: + +- Keepalive iteration records (`metric_type: "keepalive"` or omitted) +- Post-merge summary records (`metric_type: "post-merge"`) + +## Keepalive Iteration Fields - pr_number: Integer PR number. - iteration: Integer iteration count for the keepalive loop (1-based). @@ -14,9 +21,28 @@ line. - duration_ms: Integer duration in milliseconds for the iteration. - tasks_total: Integer total tasks detected for the PR. - tasks_complete: Integer completed tasks detected for the PR. +- metric_type: Optional string. When present, set to `"keepalive"`. + +## Post-Merge Summary Fields + +- metric_type: String literal `"post-merge"`. +- pr_number: Integer PR number. +- timestamp: ISO 8601 UTC timestamp for when the summary record was emitted. +- merged_at: ISO 8601 UTC timestamp for when the PR was merged. +- iteration_count: Integer total keepalive iterations for the PR. +- tasks_total: Integer total tasks detected for the PR. +- tasks_complete: Integer completed tasks detected for the PR. +- completion_rate: Float between 0.0 and 1.0 representing task completion. +- human_interventions: Integer count of human interventions (comments, manual edits). ## Example Record ```json {"pr_number":1234,"iteration":2,"timestamp":"2025-01-15T12:34:56Z","action":"retry","error_category":"none","duration_ms":4821,"tasks_total":14,"tasks_complete":6} ``` + +## Example Post-Merge Record + +```json +{"metric_type":"post-merge","pr_number":1234,"timestamp":"2025-01-16T09:00:00Z","merged_at":"2025-01-16T08:55:12Z","iteration_count":3,"tasks_total":14,"tasks_complete":14,"completion_rate":1.0,"human_interventions":2} +``` diff --git a/pr_body.md b/pr_body.md index ead998d67..64483595d 100644 --- a/pr_body.md +++ b/pr_body.md @@ -1,5 +1,5 @@ -> **Source:** Issue #482 +> **Source:** Issue #483 @@ -10,22 +10,30 @@ _Scope section missing from source issue._ ## Context for Agent -- Pending extraction from linked issue context. + +### Related Issues/PRs +- [#5](https://github.com/stranske/Workflows/issues/5) +- [#483](https://github.com/stranske/Workflows/issues/483) + +### References +- https://github.com/stranske/Workflows/blob/main/docs/plans/langchain-issue-intake-proposal.md +- https://github.com/stranske/Workflows/compare/main...codex/issue-483?expand=1 + +### Blockers & Dependencies +- After PR merges, capture what worked/didn't for future issue formatting improvements. #### Tasks -- [x] Create context extraction chain with `CONTEXT_EXTRACTOR_PROMPT` -- [x] Run during PR creation in `reusable-agents-issue-bridge.yml` -- [x] Insert context into PR body after Scope, before Tasks -- [x] Preserve in `<- Updated WORKFLOW_OUTPUTS.md context:start -->...<- Updated WORKFLOW_OUTPUTS.md context:end -->` markers -- [x] Modify `agents_pr_meta_update_body.js` to include context section -- [x] Optional: fetch linked issue comments for richer context -- [x] Add tests for context extraction +- [x] Design data collection schema for post-merge metrics +- [x] Track iteration count, completion rate, human interventions +- [x] Build corpus of successful issue patterns +- [ ] Create feedback loop to improve formatting prompts +- [ ] Add visualization/reporting for patterns #### Acceptance criteria -- [x] Context section added to PR body when relevant -- [x] Related issues/PRs linked -- [x] Design decisions captured -- [x] Markers allow programmatic identification +- [x] Post-merge metrics captured automatically +- [x] Successful patterns identified +- [ ] Formatting prompts improve over time +- [ ] Dashboard or report available for review diff --git a/scripts/issue_pattern_corpus.py b/scripts/issue_pattern_corpus.py new file mode 100755 index 000000000..087bc9d3b --- /dev/null +++ b/scripts/issue_pattern_corpus.py @@ -0,0 +1,391 @@ +#!/usr/bin/env python3 +"""Build a corpus of successful issue patterns from issue and metrics logs.""" + +from __future__ import annotations + +import argparse +import json +import sys +from collections.abc import Iterable +from dataclasses import dataclass +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +from scripts.langchain import issue_formatter + +SUCCESS_PLACEHOLDERS = { + "why": "_Not provided._", + "scope": "_Not provided._", + "non_goals": "_Not provided._", + "implementation": "_Not provided._", + "tasks": "- [ ] _Not provided._", + "acceptance": "- [ ] _Not provided._", +} + +SECTION_HEADERS = { + "why": "## Why", + "scope": "## Scope", + "non_goals": "## Non-Goals", + "tasks": "## Tasks", + "acceptance": "## Acceptance Criteria", + "implementation": "## Implementation Notes", +} + + +@dataclass(frozen=True) +class CorpusCriteria: + """Filter criteria for successful issue patterns.""" + + min_completion_rate: float + max_human_interventions: int | None + min_tasks_total: int | None + + +def _safe_float(value: Any) -> float | None: + if value is None or value == "": + return None + try: + return float(value) + except (TypeError, ValueError): + return None + + +def _safe_int(value: Any) -> int | None: + if value is None or value == "": + return None + try: + return int(value) + except (TypeError, ValueError): + return None + + +def _parse_timestamp(value: Any) -> datetime | None: + if value is None: + return None + if isinstance(value, str): + text = value.strip() + if not text: + return None + if text.endswith("Z"): + text = text[:-1] + "+00:00" + try: + return datetime.fromisoformat(text) + except ValueError: + return None + return None + + +def _read_json_or_ndjson(path: Path) -> tuple[list[dict[str, Any]], int]: + errors = 0 + try: + content = path.read_text(encoding="utf-8") + except OSError: + return [], 1 + + stripped = content.strip() + if not stripped: + return [], 0 + + try: + parsed = json.loads(stripped) + except json.JSONDecodeError: + parsed = None + + if parsed is not None: + if isinstance(parsed, list): + entries = [item for item in parsed if isinstance(item, dict)] + errors = len(parsed) - len(entries) + return entries, errors + if isinstance(parsed, dict): + return [parsed], 0 + return [], 1 + + entries: list[dict[str, Any]] = [] + for line in content.splitlines(): + raw = line.strip() + if not raw: + continue + try: + parsed_line = json.loads(raw) + except json.JSONDecodeError: + errors += 1 + continue + if isinstance(parsed_line, dict): + entries.append(parsed_line) + else: + errors += 1 + return entries, errors + + +def _extract_pr_number(entry: dict[str, Any]) -> int | None: + for key in ("pr_number", "pr", "pull_request_number"): + value = _safe_int(entry.get(key)) + if value is not None: + return value + pull_request = entry.get("pull_request") + if isinstance(pull_request, dict): + return _safe_int(pull_request.get("number")) + return None + + +def _index_post_merge(records: Iterable[dict[str, Any]]) -> dict[int, dict[str, Any]]: + indexed: dict[int, dict[str, Any]] = {} + for record in records: + metric_type = str(record.get("metric_type") or "").strip().lower() + if metric_type != "post-merge": + continue + pr_number = _safe_int(record.get("pr_number")) + if pr_number is None: + continue + existing = indexed.get(pr_number) + if existing is None: + indexed[pr_number] = record + continue + current_ts = _parse_timestamp(record.get("timestamp")) + existing_ts = _parse_timestamp(existing.get("timestamp")) + if existing_ts is None and current_ts is not None: + indexed[pr_number] = record + continue + if current_ts is None or existing_ts is None: + continue + if current_ts > existing_ts: + indexed[pr_number] = record + return indexed + + +def _split_sections(formatted_body: str) -> dict[str, list[str]]: + sections = {key: [] for key in SECTION_HEADERS} + current: str | None = None + for line in formatted_body.splitlines(): + heading = line.strip() + for key, header in SECTION_HEADERS.items(): + if heading == header: + current = key + break + else: + if current: + sections[current].append(line) + return sections + + +def _section_has_content(section_key: str, lines: list[str]) -> bool: + text = "\n".join(lines).strip() + if not text: + return False + placeholder = SUCCESS_PLACEHOLDERS.get(section_key) + return not (placeholder and text.strip() == placeholder) + + +def _count_checklist_items(lines: list[str]) -> int: + count = 0 + for line in lines: + if line.strip() == SUCCESS_PLACEHOLDERS["tasks"]: + return 0 + if line.strip() == SUCCESS_PLACEHOLDERS["acceptance"]: + return 0 + if line.strip().startswith("- [") and "]" in line or line.strip().startswith("- ["): + count += 1 + return count + + +def _bucket_count(value: int) -> str: + if value <= 0: + return "0" + if value <= 2: + return "1-2" + if value <= 5: + return "3-5" + if value <= 10: + return "6-10" + return "11+" + + +def _pattern_key(task_count: int, acceptance_count: int, flags: dict[str, bool]) -> str: + sections = ",".join( + key for key in ("why", "scope", "non_goals", "implementation") if flags.get(key) + ) + if not sections: + sections = "none" + return ( + f"tasks={_bucket_count(task_count)}|" + f"acceptance={_bucket_count(acceptance_count)}|" + f"sections={sections}" + ) + + +def _meets_success_criteria(metrics: dict[str, Any], criteria: CorpusCriteria) -> bool: + completion_rate = _safe_float(metrics.get("completion_rate")) + if completion_rate is None or completion_rate < criteria.min_completion_rate: + return False + if criteria.max_human_interventions is not None: + interventions = _safe_int(metrics.get("human_interventions")) + if interventions is None or interventions > criteria.max_human_interventions: + return False + if criteria.min_tasks_total is not None: + tasks_total = _safe_int(metrics.get("tasks_total")) + if tasks_total is None or tasks_total < criteria.min_tasks_total: + return False + return True + + +def _build_issue_pattern( + issue: dict[str, Any], metrics: dict[str, Any], include_formatted: bool +) -> dict[str, Any]: + title = str(issue.get("title") or issue.get("issue_title") or "").strip() + body = str(issue.get("body") or issue.get("issue_body") or "").strip() + formatted = issue_formatter.format_issue_body(body, use_llm=False)["formatted_body"] + sections = _split_sections(formatted) + flags = {key: _section_has_content(key, sections[key]) for key in SECTION_HEADERS} + task_count = _count_checklist_items(sections["tasks"]) + acceptance_count = _count_checklist_items(sections["acceptance"]) + + pattern = { + "issue_number": issue.get("issue_number") or issue.get("number"), + "pr_number": _extract_pr_number(issue), + "title": title, + "task_count": task_count, + "acceptance_count": acceptance_count, + "sections": {key: flags[key] for key in ("why", "scope", "non_goals", "implementation")}, + "completion_rate": metrics.get("completion_rate"), + "iteration_count": metrics.get("iteration_count"), + "human_interventions": metrics.get("human_interventions"), + "pattern_key": _pattern_key(task_count, acceptance_count, flags), + } + if include_formatted: + pattern["formatted_body"] = formatted + return pattern + + +def build_corpus( + issues: Iterable[dict[str, Any]], + metrics: Iterable[dict[str, Any]], + criteria: CorpusCriteria, + *, + include_formatted: bool = False, +) -> dict[str, Any]: + metrics_index = _index_post_merge(metrics) + successful: list[dict[str, Any]] = [] + + for issue in issues: + pr_number = _extract_pr_number(issue) + if pr_number is None: + continue + metric = metrics_index.get(pr_number) + if not metric or not _meets_success_criteria(metric, criteria): + continue + successful.append(_build_issue_pattern(issue, metric, include_formatted)) + + pattern_index: dict[str, dict[str, Any]] = {} + for entry in successful: + key = entry["pattern_key"] + grouped = pattern_index.setdefault( + key, + { + "pattern_key": key, + "count": 0, + "issue_numbers": [], + "avg_task_count": 0.0, + "avg_acceptance_count": 0.0, + }, + ) + grouped["count"] += 1 + if entry.get("issue_number") is not None: + grouped["issue_numbers"].append(entry["issue_number"]) + grouped["avg_task_count"] += entry["task_count"] + grouped["avg_acceptance_count"] += entry["acceptance_count"] + + patterns = [] + for grouped in pattern_index.values(): + count = grouped["count"] + grouped["avg_task_count"] = grouped["avg_task_count"] / count + grouped["avg_acceptance_count"] = grouped["avg_acceptance_count"] / count + patterns.append(grouped) + + patterns.sort(key=lambda item: item["count"], reverse=True) + return { + "generated_at": datetime.now(UTC).replace(microsecond=0).isoformat(), + "criteria": { + "min_completion_rate": criteria.min_completion_rate, + "max_human_interventions": criteria.max_human_interventions, + "min_tasks_total": criteria.min_tasks_total, + }, + "successful_issues": successful, + "patterns": patterns, + } + + +def _build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="Build corpus of successful issue patterns.") + parser.add_argument("--issues-path", required=True, help="Path to issue JSON/NDJSON.") + parser.add_argument("--metrics-path", required=True, help="Path to metrics NDJSON.") + parser.add_argument( + "--output", + default="issue-pattern-corpus.json", + help="Output JSON path", + ) + parser.add_argument( + "--min-completion-rate", + type=float, + default=1.0, + help="Minimum completion_rate to treat as successful.", + ) + parser.add_argument( + "--max-human-interventions", + type=int, + default=None, + help="Max allowed human_interventions to include (omit to disable filter).", + ) + parser.add_argument( + "--min-tasks-total", + type=int, + default=None, + help="Minimum tasks_total to include (omit to disable filter).", + ) + parser.add_argument( + "--include-formatted-body", + action="store_true", + help="Include formatted issue body in output.", + ) + return parser + + +def main(argv: list[str]) -> int: + parser = _build_parser() + args = parser.parse_args(argv) + + issues_path = Path(args.issues_path) + metrics_path = Path(args.metrics_path) + + issue_entries, issue_errors = _read_json_or_ndjson(issues_path) + metric_entries, metric_errors = _read_json_or_ndjson(metrics_path) + + criteria = CorpusCriteria( + min_completion_rate=args.min_completion_rate, + max_human_interventions=args.max_human_interventions, + min_tasks_total=args.min_tasks_total, + ) + + corpus = build_corpus( + issue_entries, + metric_entries, + criteria, + include_formatted=args.include_formatted_body, + ) + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(json.dumps(corpus, indent=2, ensure_ascii=True), encoding="utf-8") + + if issue_errors or metric_errors: + print( + "issue_pattern_corpus: parse errors " + f"(issues={issue_errors}, metrics={metric_errors})", + file=sys.stderr, + ) + return 1 + return 0 + + +if __name__ == "__main__": # pragma: no cover - CLI entry point + raise SystemExit(main(sys.argv[1:])) diff --git a/scripts/issue_pattern_feedback.py b/scripts/issue_pattern_feedback.py new file mode 100644 index 000000000..306460d65 --- /dev/null +++ b/scripts/issue_pattern_feedback.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 +"""Generate feedback snippets from issue pattern corpora for prompt tuning.""" + +from __future__ import annotations + +import argparse +import json +import sys +from collections import Counter +from pathlib import Path +from typing import Any + + +def _safe_float(value: Any) -> float | None: + if value is None or value == "": + return None + try: + return float(value) + except (TypeError, ValueError): + return None + + +def _parse_pattern_key(key: str) -> dict[str, str]: + parts = key.split("|") + parsed: dict[str, str] = {} + for part in parts: + if "=" not in part: + continue + label, value = part.split("=", 1) + parsed[label.strip()] = value.strip() + return parsed + + +def _format_ratio(count: int, total: int) -> str: + if total <= 0: + return "n/a" + rate = (count / total) * 100 + return f"{count}/{total} ({rate:.0f}%)" + + +def _summarize_sections(successful: list[dict[str, Any]]) -> list[str]: + section_counts: Counter[str] = Counter() + total = len(successful) + for entry in successful: + sections = entry.get("sections") + if not isinstance(sections, dict): + continue + for key, value in sections.items(): + if value: + section_counts[key] += 1 + + lines = [] + for key in ("why", "scope", "non_goals", "implementation"): + label = key.replace("_", " ").title() + lines.append(f"- {label}: {_format_ratio(section_counts.get(key, 0), total)}") + return lines + + +def _summarize_buckets(patterns: list[dict[str, Any]], label: str) -> str: + bucket_counts: Counter[str] = Counter() + for pattern in patterns: + key = str(pattern.get("pattern_key") or "") + parsed = _parse_pattern_key(key) + bucket = parsed.get(label) + if bucket: + bucket_counts[bucket] += int(pattern.get("count") or 0) + if not bucket_counts: + return "n/a" + bucket, _ = bucket_counts.most_common(1)[0] + return bucket + + +def build_feedback(corpus: dict[str, Any], *, max_patterns: int = 5) -> str: + successful = corpus.get("successful_issues") + patterns = corpus.get("patterns") + + successful_list = successful if isinstance(successful, list) else [] + patterns_list = patterns if isinstance(patterns, list) else [] + + if not successful_list: + return "\n".join( + [ + "# Issue Formatting Feedback", + "", + "No successful issues available yet. Keep using the standard formatting rules.", + ] + ) + + avg_tasks = _safe_float( + sum(entry.get("task_count", 0) for entry in successful_list) / len(successful_list) + ) + avg_acceptance = _safe_float( + sum(entry.get("acceptance_count", 0) for entry in successful_list) / len(successful_list) + ) + avg_tasks_text = f"{avg_tasks:.1f}" if avg_tasks is not None else "n/a" + avg_acceptance_text = f"{avg_acceptance:.1f}" if avg_acceptance is not None else "n/a" + + task_bucket = _summarize_buckets(patterns_list, "tasks") + acceptance_bucket = _summarize_buckets(patterns_list, "acceptance") + + lines = [ + "# Issue Formatting Feedback", + "", + f"Successful issue sample size: {len(successful_list)}", + f"Typical task count: avg {avg_tasks_text} (most common bucket: {task_bucket})", + f"Typical acceptance count: avg {avg_acceptance_text} (most common bucket: {acceptance_bucket})", + "", + "Common sections present:", + ] + lines.extend(_summarize_sections(successful_list)) + lines.append("") + lines.append("Top patterns:") + + if patterns_list: + for pattern in patterns_list[:max_patterns]: + key = pattern.get("pattern_key", "unknown") + count = pattern.get("count", 0) + avg_task = _safe_float(pattern.get("avg_task_count")) + avg_accept = _safe_float(pattern.get("avg_acceptance_count")) + avg_task_text = f"{avg_task:.1f}" if avg_task is not None else "n/a" + avg_accept_text = f"{avg_accept:.1f}" if avg_accept is not None else "n/a" + lines.append( + f"- {key} (count={count}, avg_tasks={avg_task_text}, avg_acceptance={avg_accept_text})" + ) + else: + lines.append("- n/a") + + return "\n".join(lines).strip() + + +def _build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description="Generate issue formatting feedback from issue pattern corpus." + ) + parser.add_argument( + "--corpus-path", + default="issue-pattern-corpus.json", + help="Path to issue pattern corpus JSON.", + ) + parser.add_argument( + "--output", + default=str(Path("scripts") / "langchain" / "prompts" / "format_issue_feedback.md"), + help="Output path for feedback snippet.", + ) + parser.add_argument("--max-patterns", type=int, default=5, help="Max patterns to list.") + return parser + + +def main(argv: list[str]) -> int: + parser = _build_parser() + args = parser.parse_args(argv) + + corpus_path = Path(args.corpus_path) + if not corpus_path.exists(): + print(f"issue_pattern_feedback: corpus not found: {corpus_path}", file=sys.stderr) + return 1 + + try: + corpus = json.loads(corpus_path.read_text(encoding="utf-8")) + except json.JSONDecodeError: + print(f"issue_pattern_feedback: invalid JSON in {corpus_path}", file=sys.stderr) + return 1 + + if not isinstance(corpus, dict): + print("issue_pattern_feedback: corpus must be a JSON object", file=sys.stderr) + return 1 + + feedback = build_feedback(corpus, max_patterns=args.max_patterns) + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(feedback, encoding="utf-8") + print(f"Wrote issue format feedback to {output_path}") + return 0 + + +if __name__ == "__main__": # pragma: no cover - CLI entry point + raise SystemExit(main(sys.argv[1:])) diff --git a/scripts/issue_pattern_report.py b/scripts/issue_pattern_report.py new file mode 100644 index 000000000..a8f80db4f --- /dev/null +++ b/scripts/issue_pattern_report.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 +"""Generate a markdown report from issue pattern corpora.""" + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path +from typing import Any + +from scripts.metrics_format_utils import format_percentage, truncate_string + + +def _safe_float(value: Any) -> float | None: + if value is None or value == "": + return None + try: + return float(value) + except (TypeError, ValueError): + return None + + +def _format_float(value: Any, default: str = "n/a") -> str: + parsed = _safe_float(value) + return f"{parsed:.1f}" if parsed is not None else default + + +def _format_completion(value: Any) -> str: + parsed = _safe_float(value) + if parsed is None: + return "n/a" + return format_percentage(parsed * 100, decimals=1) + + +def build_report(corpus: dict[str, Any], *, max_patterns: int = 10, max_issues: int = 10) -> str: + generated_at = corpus.get("generated_at") or "n/a" + criteria = corpus.get("criteria") if isinstance(corpus.get("criteria"), dict) else {} + patterns = corpus.get("patterns") if isinstance(corpus.get("patterns"), list) else [] + issues = ( + corpus.get("successful_issues") if isinstance(corpus.get("successful_issues"), list) else [] + ) + + lines = [ + "# Issue Pattern Report", + "", + f"Generated at: {generated_at}", + f"Successful issues: {len(issues)}", + f"Distinct patterns: {len(patterns)}", + "", + "## Criteria", + f"- min_completion_rate: {criteria.get('min_completion_rate', 'n/a')}", + f"- max_human_interventions: {criteria.get('max_human_interventions', 'n/a')}", + f"- min_tasks_total: {criteria.get('min_tasks_total', 'n/a')}", + "", + ] + + lines.extend( + [ + "## Patterns", + "", + "| Pattern | Count | Avg tasks | Avg acceptance | Issues |", + "| --- | --- | --- | --- | --- |", + ] + ) + if patterns: + for pattern in patterns[:max_patterns]: + issue_numbers = pattern.get("issue_numbers") + if isinstance(issue_numbers, list) and issue_numbers: + issue_list = ", ".join(str(value) for value in issue_numbers[:5]) + else: + issue_list = "n/a" + lines.append( + "| {pattern} | {count} | {avg_tasks} | {avg_acceptance} | {issues} |".format( + pattern=pattern.get("pattern_key", "n/a"), + count=pattern.get("count", 0), + avg_tasks=_format_float(pattern.get("avg_task_count")), + avg_acceptance=_format_float(pattern.get("avg_acceptance_count")), + issues=issue_list, + ) + ) + else: + lines.append("| n/a | 0 | n/a | n/a | n/a |") + + lines.extend(["", "## Successful Issues (sample)", ""]) + lines.extend( + [ + "| PR | Issue | Title | Completion | Iterations | Interventions | Tasks | Acceptance |", + "| --- | --- | --- | --- | --- | --- | --- | --- |", + ] + ) + if issues: + for issue in issues[:max_issues]: + lines.append( + "| {pr} | {issue} | {title} | {completion} | {iterations} | {interventions} | {tasks} | {acceptance} |".format( + pr=issue.get("pr_number", "n/a"), + issue=issue.get("issue_number", "n/a"), + title=truncate_string(str(issue.get("title") or "n/a"), max_length=40), + completion=_format_completion(issue.get("completion_rate")), + iterations=issue.get("iteration_count", "n/a"), + interventions=issue.get("human_interventions", "n/a"), + tasks=issue.get("task_count", "n/a"), + acceptance=issue.get("acceptance_count", "n/a"), + ) + ) + else: + lines.append("| n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a |") + + return "\n".join(lines).strip() + + +def _build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description="Generate issue pattern report from issue pattern corpus." + ) + parser.add_argument( + "--corpus-path", + default="issue-pattern-corpus.json", + help="Path to issue pattern corpus JSON.", + ) + parser.add_argument( + "--output", + default="issue-pattern-report.md", + help="Output markdown path.", + ) + parser.add_argument("--max-patterns", type=int, default=10, help="Max patterns to list.") + parser.add_argument("--max-issues", type=int, default=10, help="Max issues to list.") + return parser + + +def main(argv: list[str]) -> int: + parser = _build_parser() + args = parser.parse_args(argv) + + corpus_path = Path(args.corpus_path) + if not corpus_path.exists(): + print(f"issue_pattern_report: corpus not found: {corpus_path}", file=sys.stderr) + return 1 + + try: + corpus = json.loads(corpus_path.read_text(encoding="utf-8")) + except json.JSONDecodeError: + print(f"issue_pattern_report: invalid JSON in {corpus_path}", file=sys.stderr) + return 1 + + if not isinstance(corpus, dict): + print("issue_pattern_report: corpus must be a JSON object", file=sys.stderr) + return 1 + + report = build_report(corpus, max_patterns=args.max_patterns, max_issues=args.max_issues) + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(report, encoding="utf-8") + print(f"Wrote issue pattern report to {output_path}") + return 0 + + +if __name__ == "__main__": # pragma: no cover - CLI entry point + raise SystemExit(main(sys.argv[1:])) diff --git a/scripts/keepalive_metrics_collector.py b/scripts/keepalive_metrics_collector.py index 4ace003cc..c4896cf95 100755 --- a/scripts/keepalive_metrics_collector.py +++ b/scripts/keepalive_metrics_collector.py @@ -12,7 +12,7 @@ from pathlib import Path from typing import Any -REQUIRED_FIELDS = ( +KEEPALIVE_REQUIRED_FIELDS = ( "pr_number", "iteration", "timestamp", @@ -23,6 +23,18 @@ "tasks_complete", ) +POST_MERGE_REQUIRED_FIELDS = ( + "metric_type", + "pr_number", + "timestamp", + "merged_at", + "iteration_count", + "tasks_total", + "tasks_complete", + "completion_rate", + "human_interventions", +) + @dataclass(frozen=True) class ValidationError(Exception): @@ -38,6 +50,10 @@ def _is_int(value: Any) -> bool: return isinstance(value, int) and not isinstance(value, bool) +def _is_number(value: Any) -> bool: + return isinstance(value, (int, float)) and not isinstance(value, bool) + + def _parse_timestamp(value: str) -> datetime: if not value: raise ValidationError("timestamp is required") @@ -53,9 +69,8 @@ def _parse_timestamp(value: str) -> datetime: return parsed -def validate_record(record: dict[str, Any]) -> None: - """Validate required fields and types for a metrics record.""" - missing = [field for field in REQUIRED_FIELDS if field not in record] +def _validate_keepalive(record: dict[str, Any]) -> None: + missing = [field for field in KEEPALIVE_REQUIRED_FIELDS if field not in record] if missing: raise ValidationError(f"missing fields: {', '.join(missing)}") @@ -77,6 +92,44 @@ def validate_record(record: dict[str, Any]) -> None: _parse_timestamp(str(record["timestamp"])) +def _validate_post_merge(record: dict[str, Any]) -> None: + missing = [field for field in POST_MERGE_REQUIRED_FIELDS if field not in record] + if missing: + raise ValidationError(f"missing fields: {', '.join(missing)}") + + metric_type = str(record.get("metric_type", "")).strip().lower() + if metric_type != "post-merge": + raise ValidationError("metric_type must be 'post-merge'") + if not _is_int(record["pr_number"]): + raise ValidationError("pr_number must be an integer") + if not _is_int(record["iteration_count"]): + raise ValidationError("iteration_count must be an integer") + if not _is_int(record["tasks_total"]): + raise ValidationError("tasks_total must be an integer") + if not _is_int(record["tasks_complete"]): + raise ValidationError("tasks_complete must be an integer") + if not _is_number(record["completion_rate"]): + raise ValidationError("completion_rate must be a number") + if not _is_int(record["human_interventions"]): + raise ValidationError("human_interventions must be an integer") + + completion_rate = float(record["completion_rate"]) + if not (0.0 <= completion_rate <= 1.0): + raise ValidationError("completion_rate must be between 0.0 and 1.0") + + _parse_timestamp(str(record["timestamp"])) + _parse_timestamp(str(record["merged_at"])) + + +def validate_record(record: dict[str, Any]) -> None: + """Validate required fields and types for a metrics record.""" + metric_type = record.get("metric_type") + if metric_type is None or str(metric_type).strip().lower() == "keepalive": + _validate_keepalive(record) + return + _validate_post_merge(record) + + def _utc_now_iso() -> str: return datetime.now(UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z") diff --git a/scripts/keepalive_metrics_dashboard.py b/scripts/keepalive_metrics_dashboard.py index 72ddeb0d1..6d6d3cc47 100755 --- a/scripts/keepalive_metrics_dashboard.py +++ b/scripts/keepalive_metrics_dashboard.py @@ -65,6 +65,11 @@ def _summarise(records: Iterable[dict[str, Any]]) -> dict[str, Any]: pr_iterations: dict[int, int] = {} for record in records: + metric_type = record.get("metric_type") + if metric_type is not None: + metric_value = str(metric_type).strip().lower() + if metric_value and metric_value != "keepalive": + continue total += 1 error_category_raw = record.get("error_category") error_category = str(error_category_raw).strip() if error_category_raw is not None else "" diff --git a/scripts/keepalive_post_merge_metrics.py b/scripts/keepalive_post_merge_metrics.py new file mode 100755 index 000000000..a36aad1b0 --- /dev/null +++ b/scripts/keepalive_post_merge_metrics.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python3 +"""Build post-merge metrics records from keepalive metrics logs.""" + +from __future__ import annotations + +import argparse +import json +import sys +from collections.abc import Iterable +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +from scripts import keepalive_metrics_collector as collector + + +def _utc_now_iso() -> str: + return datetime.now(UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z") + + +def _safe_int(value: Any) -> int | None: + if value is None or value == "": + return None + try: + return int(value) + except (TypeError, ValueError): + return None + + +def _parse_timestamp(value: Any) -> datetime | None: + if value is None: + return None + if isinstance(value, str): + text = value.strip() + if not text: + return None + if text.endswith("Z"): + text = text[:-1] + "+00:00" + try: + parsed = datetime.fromisoformat(text) + except ValueError: + return None + if parsed.tzinfo is None: + parsed = parsed.replace(tzinfo=UTC) + return parsed + return None + + +def _read_ndjson(path: Path) -> tuple[list[dict[str, Any]], int]: + entries: list[dict[str, Any]] = [] + errors = 0 + try: + content = path.read_text(encoding="utf-8") + except OSError: + return entries, 1 + for line in content.splitlines(): + raw = line.strip() + if not raw: + continue + try: + parsed = json.loads(raw) + except json.JSONDecodeError: + errors += 1 + continue + if isinstance(parsed, dict): + entries.append(parsed) + else: + errors += 1 + return entries, errors + + +def _is_keepalive_record(record: dict[str, Any]) -> bool: + metric_type = record.get("metric_type") + if metric_type is None: + return True + return str(metric_type).strip().lower() == "keepalive" + + +def _filter_keepalive_records( + records: Iterable[dict[str, Any]], + pr_number: int, +) -> list[dict[str, Any]]: + filtered: list[dict[str, Any]] = [] + for record in records: + if not _is_keepalive_record(record): + continue + record_pr = _safe_int(record.get("pr_number")) + if record_pr == pr_number: + filtered.append(record) + return filtered + + +def _latest_record(records: Iterable[dict[str, Any]]) -> dict[str, Any] | None: + best: dict[str, Any] | None = None + best_ts: datetime | None = None + best_iteration = -1 + + for record in records: + timestamp = _parse_timestamp(record.get("timestamp")) + iteration = _safe_int(record.get("iteration")) or -1 + if timestamp is not None: + if best_ts is None or timestamp > best_ts: + best = record + best_ts = timestamp + best_iteration = iteration + continue + if best_ts is None and iteration > best_iteration: + best = record + best_iteration = iteration + + return best + + +def build_post_merge_record( + records: Iterable[dict[str, Any]], + *, + pr_number: int, + merged_at: str, + human_interventions: int, + timestamp: str | None = None, + iteration_count: int | None = None, + tasks_total: int | None = None, + tasks_complete: int | None = None, +) -> dict[str, Any]: + keepalive = _filter_keepalive_records(records, pr_number) + latest = _latest_record(keepalive) if keepalive else None + + if iteration_count is None: + iterations = [_safe_int(record.get("iteration")) for record in keepalive] + iteration_values = [value for value in iterations if value is not None] + if iteration_values: + iteration_count = max(iteration_values) + else: + raise ValueError(f"no keepalive iterations found for PR #{pr_number}") + + if tasks_total is None: + tasks_total = _safe_int(latest.get("tasks_total") if latest else None) + if tasks_complete is None: + tasks_complete = _safe_int(latest.get("tasks_complete") if latest else None) + if tasks_total is None or tasks_complete is None: + raise ValueError(f"missing task counts for PR #{pr_number}") + + completion_rate = float(tasks_complete) / float(tasks_total) if tasks_total > 0 else 0.0 + record = { + "metric_type": "post-merge", + "pr_number": pr_number, + "timestamp": timestamp or _utc_now_iso(), + "merged_at": merged_at, + "iteration_count": iteration_count, + "tasks_total": tasks_total, + "tasks_complete": tasks_complete, + "completion_rate": completion_rate, + "human_interventions": human_interventions, + } + return record + + +def _coerce_int(value: str, field: str) -> int: + try: + return int(value) + except (TypeError, ValueError) as exc: + raise ValueError(f"{field} must be an integer") from exc + + +def _build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description="Build post-merge metrics records from keepalive logs." + ) + parser.add_argument( + "--metrics-path", default="keepalive-metrics.ndjson", help="Keepalive NDJSON log path" + ) + parser.add_argument( + "--output-path", + help="NDJSON output path (defaults to metrics path)", + ) + parser.add_argument("--pr-number", required=True, help="Pull request number") + parser.add_argument("--merged-at", required=True, help="Merged timestamp (ISO 8601)") + parser.add_argument( + "--human-interventions", default="0", help="Human intervention count (integer)" + ) + parser.add_argument("--timestamp", help="Record timestamp (ISO 8601, defaults to now)") + parser.add_argument("--iteration-count", help="Override iteration count") + parser.add_argument("--tasks-total", help="Override tasks total") + parser.add_argument("--tasks-complete", help="Override tasks complete") + return parser + + +def main(argv: list[str]) -> int: + parser = _build_parser() + args = parser.parse_args(argv) + + metrics_path = Path(args.metrics_path) + output_path = Path(args.output_path) if args.output_path else metrics_path + + pr_number = _coerce_int(args.pr_number, "pr_number") + human_interventions = _coerce_int(args.human_interventions, "human_interventions") + iteration_override = ( + _coerce_int(args.iteration_count, "iteration_count") if args.iteration_count else None + ) + tasks_total_override = ( + _coerce_int(args.tasks_total, "tasks_total") if args.tasks_total else None + ) + tasks_complete_override = ( + _coerce_int(args.tasks_complete, "tasks_complete") if args.tasks_complete else None + ) + + records, errors = _read_ndjson(metrics_path) + if errors: + print( + f"keepalive_post_merge_metrics: {errors} parse error(s) in {metrics_path}", + file=sys.stderr, + ) + + try: + record = build_post_merge_record( + records, + pr_number=pr_number, + merged_at=args.merged_at, + human_interventions=human_interventions, + timestamp=args.timestamp, + iteration_count=iteration_override, + tasks_total=tasks_total_override, + tasks_complete=tasks_complete_override, + ) + collector.validate_record(record) + collector.append_record(output_path, record) + except Exception as exc: + print(f"keepalive_post_merge_metrics: {exc}", file=sys.stderr) + return 1 + + print(f"Wrote post-merge metrics record to {output_path}") + return 0 + + +if __name__ == "__main__": # pragma: no cover - CLI entry point + raise SystemExit(main(sys.argv[1:])) diff --git a/scripts/langchain/context_extractor.py b/scripts/langchain/context_extractor.py old mode 100644 new mode 100755 diff --git a/scripts/langchain/issue_formatter.py b/scripts/langchain/issue_formatter.py index bcee41616..121fb2fc1 100755 --- a/scripts/langchain/issue_formatter.py +++ b/scripts/langchain/issue_formatter.py @@ -40,6 +40,7 @@ """.strip() PROMPT_PATH = Path(__file__).resolve().parent / "prompts" / "format_issue.md" +FEEDBACK_PROMPT_PATH = Path(__file__).resolve().parent / "prompts" / "format_issue_feedback.md" SECTION_ALIASES = { "why": ["why", "motivation", "summary", "goals"], @@ -77,8 +78,15 @@ def _load_prompt() -> str: if PROMPT_PATH.is_file(): - return PROMPT_PATH.read_text(encoding="utf-8").strip() - return ISSUE_FORMATTER_PROMPT + base_prompt = PROMPT_PATH.read_text(encoding="utf-8").strip() + else: + base_prompt = ISSUE_FORMATTER_PROMPT + + if FEEDBACK_PROMPT_PATH.is_file(): + feedback = FEEDBACK_PROMPT_PATH.read_text(encoding="utf-8").strip() + if feedback: + return f"{base_prompt}\n\n{feedback}\n" + return base_prompt def _get_llm_client() -> tuple[object, str] | None: diff --git a/tests/scripts/test_issue_formatter.py b/tests/scripts/test_issue_formatter.py index b06123e4a..0f2cbc642 100644 --- a/tests/scripts/test_issue_formatter.py +++ b/tests/scripts/test_issue_formatter.py @@ -70,3 +70,18 @@ def test_format_issue_fallback_uses_placeholders() -> None: assert tasks == "- [ ] _Not provided._" assert acceptance == "- [ ] _Not provided._" + + +def test_load_prompt_appends_feedback(tmp_path, monkeypatch) -> None: + prompt_path = tmp_path / "format_issue.md" + feedback_path = tmp_path / "format_issue_feedback.md" + prompt_path.write_text("Base prompt.", encoding="utf-8") + feedback_path.write_text("Feedback notes.", encoding="utf-8") + + monkeypatch.setattr(issue_formatter, "PROMPT_PATH", prompt_path) + monkeypatch.setattr(issue_formatter, "FEEDBACK_PROMPT_PATH", feedback_path) + + prompt = issue_formatter._load_prompt() + + assert "Base prompt." in prompt + assert "Feedback notes." in prompt diff --git a/tests/scripts/test_issue_pattern_corpus.py b/tests/scripts/test_issue_pattern_corpus.py new file mode 100644 index 000000000..b3aad0f9a --- /dev/null +++ b/tests/scripts/test_issue_pattern_corpus.py @@ -0,0 +1,103 @@ +import json +from pathlib import Path + +from scripts import issue_pattern_corpus as corpus + + +def _write_ndjson(path: Path, records: list[dict]) -> None: + payload = "\n".join(json.dumps(record) for record in records) + "\n" + path.write_text(payload, encoding="utf-8") + + +def test_build_corpus_filters_successful(tmp_path: Path) -> None: + issues = [ + { + "issue_number": 10, + "pr_number": 101, + "title": "Issue A", + "body": "## Tasks\n- [ ] One\n## Acceptance Criteria\n- [ ] Done", + }, + { + "issue_number": 11, + "pr_number": 102, + "title": "Issue B", + "body": "## Tasks\n- [ ] Two\n## Acceptance Criteria\n- [ ] Done", + }, + ] + metrics = [ + { + "metric_type": "post-merge", + "pr_number": 101, + "completion_rate": 1.0, + "human_interventions": 0, + "tasks_total": 2, + "iteration_count": 2, + }, + { + "metric_type": "post-merge", + "pr_number": 102, + "completion_rate": 0.5, + "human_interventions": 0, + "tasks_total": 2, + "iteration_count": 3, + }, + ] + + issues_path = tmp_path / "issues.ndjson" + metrics_path = tmp_path / "metrics.ndjson" + _write_ndjson(issues_path, issues) + _write_ndjson(metrics_path, metrics) + + criteria = corpus.CorpusCriteria( + min_completion_rate=1.0, max_human_interventions=None, min_tasks_total=None + ) + issue_entries, _ = corpus._read_json_or_ndjson(issues_path) + metric_entries, _ = corpus._read_json_or_ndjson(metrics_path) + result = corpus.build_corpus(issue_entries, metric_entries, criteria) + + assert len(result["successful_issues"]) == 1 + assert result["successful_issues"][0]["issue_number"] == 10 + + +def test_build_corpus_groups_patterns() -> None: + issues = [ + { + "issue_number": 20, + "pr_number": 201, + "title": "Issue C", + "body": "## Tasks\n- [ ] One\n- [ ] Two\n## Acceptance Criteria\n- [ ] Done", + }, + { + "issue_number": 21, + "pr_number": 202, + "title": "Issue D", + "body": "## Tasks\n- [ ] Three\n- [ ] Four\n## Acceptance Criteria\n- [ ] Done", + }, + ] + metrics = [ + { + "metric_type": "post-merge", + "pr_number": 201, + "completion_rate": 1.0, + "human_interventions": 0, + "tasks_total": 2, + "iteration_count": 1, + }, + { + "metric_type": "post-merge", + "pr_number": 202, + "completion_rate": 1.0, + "human_interventions": 0, + "tasks_total": 2, + "iteration_count": 1, + }, + ] + + criteria = corpus.CorpusCriteria( + min_completion_rate=1.0, max_human_interventions=0, min_tasks_total=1 + ) + result = corpus.build_corpus(issues, metrics, criteria) + + assert len(result["successful_issues"]) == 2 + assert len(result["patterns"]) == 1 + assert result["patterns"][0]["count"] == 2 diff --git a/tests/scripts/test_issue_pattern_feedback.py b/tests/scripts/test_issue_pattern_feedback.py new file mode 100644 index 000000000..97513b055 --- /dev/null +++ b/tests/scripts/test_issue_pattern_feedback.py @@ -0,0 +1,80 @@ +import json + +from scripts import issue_pattern_feedback as feedback + + +def test_build_feedback_with_patterns() -> None: + corpus = { + "successful_issues": [ + { + "task_count": 3, + "acceptance_count": 2, + "sections": { + "why": True, + "scope": True, + "non_goals": False, + "implementation": True, + }, + }, + { + "task_count": 4, + "acceptance_count": 1, + "sections": { + "why": True, + "scope": False, + "non_goals": True, + "implementation": True, + }, + }, + ], + "patterns": [ + { + "pattern_key": "tasks=3-5|acceptance=1-2|sections=why,scope,implementation", + "count": 2, + "avg_task_count": 3.5, + "avg_acceptance_count": 1.5, + } + ], + } + + output = feedback.build_feedback(corpus) + + assert "Successful issue sample size: 2" in output + assert "Typical task count" in output + assert "Top patterns" in output + assert "tasks=3-5" in output + + +def test_main_writes_feedback(tmp_path, capsys) -> None: + corpus_path = tmp_path / "corpus.json" + corpus_path.write_text( + json.dumps( + { + "successful_issues": [ + { + "task_count": 1, + "acceptance_count": 1, + "sections": {"why": True, "scope": True}, + } + ], + "patterns": [], + } + ), + encoding="utf-8", + ) + output_path = tmp_path / "feedback.md" + + result = feedback.main(["--corpus-path", str(corpus_path), "--output", str(output_path)]) + + captured = capsys.readouterr() + assert result == 0 + assert output_path.exists() + assert "Wrote issue format feedback to" in captured.out + + +def test_main_missing_corpus_returns_error(capsys) -> None: + result = feedback.main(["--corpus-path", "missing.json"]) + + captured = capsys.readouterr() + assert result == 1 + assert "issue_pattern_feedback: corpus not found" in captured.err diff --git a/tests/scripts/test_issue_pattern_report.py b/tests/scripts/test_issue_pattern_report.py new file mode 100644 index 000000000..12317ca6f --- /dev/null +++ b/tests/scripts/test_issue_pattern_report.py @@ -0,0 +1,61 @@ +import json + +from scripts import issue_pattern_report as report + + +def test_build_report_with_patterns() -> None: + corpus = { + "generated_at": "2025-01-01T00:00:00Z", + "criteria": {"min_completion_rate": 1.0}, + "patterns": [ + { + "pattern_key": "tasks=1-2|acceptance=1-2|sections=why", + "count": 3, + "avg_task_count": 2.0, + "avg_acceptance_count": 1.5, + "issue_numbers": [10, 11, 12], + } + ], + "successful_issues": [ + { + "issue_number": 10, + "pr_number": 101, + "title": "Add tests for parser", + "completion_rate": 1.0, + "iteration_count": 2, + "human_interventions": 0, + "task_count": 2, + "acceptance_count": 1, + } + ], + } + + output = report.build_report(corpus, max_patterns=5, max_issues=5) + + assert "# Issue Pattern Report" in output + assert "tasks=1-2|acceptance=1-2|sections=why" in output + assert "Add tests for parser" in output + + +def test_main_writes_report(tmp_path, capsys) -> None: + corpus_path = tmp_path / "corpus.json" + corpus_path.write_text( + json.dumps({"patterns": [], "successful_issues": []}), + encoding="utf-8", + ) + output_path = tmp_path / "report.md" + + result = report.main(["--corpus-path", str(corpus_path), "--output", str(output_path)]) + + captured = capsys.readouterr() + assert result == 0 + assert output_path.exists() + assert "Wrote issue pattern report to" in captured.out + + +def test_main_missing_corpus_returns_error(capsys) -> None: + result = report.main(["--corpus-path", "missing.json"]) + + captured = capsys.readouterr() + assert result == 1 + assert "issue_pattern_report: corpus not found" in captured.err diff --git a/tests/scripts/test_keepalive_metrics_collector.py b/tests/scripts/test_keepalive_metrics_collector.py index a05cc0819..4d1bebb0f 100644 --- a/tests/scripts/test_keepalive_metrics_collector.py +++ b/tests/scripts/test_keepalive_metrics_collector.py @@ -19,12 +19,32 @@ def _sample_record() -> dict: } +def _sample_post_merge_record() -> dict: + return { + "metric_type": "post-merge", + "pr_number": 202, + "timestamp": datetime(2025, 2, 1, tzinfo=UTC).isoformat().replace("+00:00", "Z"), + "merged_at": datetime(2025, 2, 1, 1, tzinfo=UTC).isoformat().replace("+00:00", "Z"), + "iteration_count": 4, + "tasks_total": 6, + "tasks_complete": 6, + "completion_rate": 1.0, + "human_interventions": 2, + } + + def test_validate_record_accepts_valid_payload() -> None: record = _sample_record() collector.validate_record(record) +def test_validate_record_accepts_post_merge_payload() -> None: + record = _sample_post_merge_record() + + collector.validate_record(record) + + def test_validate_record_rejects_missing_fields() -> None: record = {"pr_number": 101} @@ -66,6 +86,12 @@ def test_is_int_rejects_bool() -> None: assert collector._is_int(True) is False +def test_is_number_rejects_bool() -> None: + assert collector._is_number(1.5) is True + assert collector._is_number(2) is True + assert collector._is_number(False) is False + + def test_parse_timestamp_validates_timezone() -> None: parsed = collector._parse_timestamp("2025-02-03T04:05:06Z") @@ -146,6 +172,33 @@ def test_validate_record_rejects_specific_fields(field: str, value: object, mess collector.validate_record(record) +@pytest.mark.parametrize( + ("field", "value", "message"), + [ + ("metric_type", "unknown", "metric_type must be 'post-merge'"), + ("iteration_count", "two", "iteration_count must be an integer"), + ("completion_rate", "full", "completion_rate must be a number"), + ("human_interventions", None, "human_interventions must be an integer"), + ], +) +def test_validate_post_merge_rejects_specific_fields( + field: str, value: object, message: str +) -> None: + record = _sample_post_merge_record() + record[field] = value + + with pytest.raises(collector.ValidationError, match=message): + collector.validate_record(record) + + +def test_validate_post_merge_rejects_out_of_range_completion_rate() -> None: + record = _sample_post_merge_record() + record["completion_rate"] = 1.5 + + with pytest.raises(collector.ValidationError, match="completion_rate must be between"): + collector.validate_record(record) + + def test_utc_now_iso_format() -> None: stamp = collector._utc_now_iso() diff --git a/tests/scripts/test_keepalive_metrics_dashboard.py b/tests/scripts/test_keepalive_metrics_dashboard.py index 84989479f..71468e6cb 100644 --- a/tests/scripts/test_keepalive_metrics_dashboard.py +++ b/tests/scripts/test_keepalive_metrics_dashboard.py @@ -62,6 +62,21 @@ def test_summarise_normalizes_categories_and_iterations() -> None: assert summary["avg_iterations"] == 2.5 +def test_summarise_skips_post_merge_records() -> None: + summary = dashboard._summarise( + [ + {"metric_type": "post-merge", "pr_number": 1, "iteration": 9}, + {"metric_type": "keepalive", "pr_number": 2, "iteration": 1, "error_category": "none"}, + {"pr_number": 3, "iteration": 2, "error_category": "none"}, + ] + ) + + assert summary["total"] == 2 + assert summary["successes"] == 2 + assert summary["iteration_counts"]["1"] == 1 + assert summary["iteration_counts"]["2"] == 1 + + def test_build_parser_defaults() -> None: parser = dashboard._build_parser() args = parser.parse_args([]) diff --git a/tests/scripts/test_keepalive_post_merge_metrics.py b/tests/scripts/test_keepalive_post_merge_metrics.py new file mode 100644 index 000000000..127ec959a --- /dev/null +++ b/tests/scripts/test_keepalive_post_merge_metrics.py @@ -0,0 +1,116 @@ +from datetime import UTC, datetime +from pathlib import Path + +from scripts import keepalive_post_merge_metrics as post_merge + + +def _sample_keepalive( + pr_number: int, iteration: int, timestamp: str, total: int, complete: int +) -> dict: + return { + "pr_number": pr_number, + "iteration": iteration, + "timestamp": timestamp, + "tasks_total": total, + "tasks_complete": complete, + "action": "run", + "error_category": "none", + "duration_ms": 100, + } + + +def test_build_post_merge_record_computes_from_keepalive_records() -> None: + records = [ + _sample_keepalive( + 7, + 1, + datetime(2025, 1, 1, tzinfo=UTC).isoformat().replace("+00:00", "Z"), + 4, + 1, + ), + _sample_keepalive( + 7, + 3, + datetime(2025, 1, 2, tzinfo=UTC).isoformat().replace("+00:00", "Z"), + 4, + 4, + ), + _sample_keepalive( + 9, + 2, + datetime(2025, 1, 3, tzinfo=UTC).isoformat().replace("+00:00", "Z"), + 6, + 2, + ), + ] + + record = post_merge.build_post_merge_record( + records, + pr_number=7, + merged_at="2025-01-03T00:00:00Z", + human_interventions=2, + timestamp="2025-01-03T01:00:00Z", + ) + + assert record["iteration_count"] == 3 + assert record["tasks_total"] == 4 + assert record["tasks_complete"] == 4 + assert record["completion_rate"] == 1.0 + assert record["human_interventions"] == 2 + + +def test_build_post_merge_record_rejects_missing_records() -> None: + try: + post_merge.build_post_merge_record( + [], + pr_number=11, + merged_at="2025-01-04T00:00:00Z", + human_interventions=0, + ) + except ValueError as exc: + assert "no keepalive iterations" in str(exc) + else: + raise AssertionError("Expected ValueError for missing records") + + +def test_main_writes_post_merge_record(tmp_path: Path, capsys) -> None: + log_path = tmp_path / "metrics.ndjson" + log_path.write_text( + "\n".join( + [ + '{"pr_number": 12, "iteration": 1, "timestamp": "2025-02-01T00:00:00Z",' + ' "tasks_total": 3, "tasks_complete": 1, "action": "run",' + ' "error_category": "none", "duration_ms": 100}', + '{"pr_number": 12, "iteration": 2, "timestamp": "2025-02-02T00:00:00Z",' + ' "tasks_total": 3, "tasks_complete": 3, "action": "run",' + ' "error_category": "none", "duration_ms": 100}', + ] + ), + encoding="utf-8", + ) + output_path = tmp_path / "out.ndjson" + + result = post_merge.main( + [ + "--metrics-path", + str(log_path), + "--output-path", + str(output_path), + "--pr-number", + "12", + "--merged-at", + "2025-02-03T00:00:00Z", + "--human-interventions", + "1", + "--timestamp", + "2025-02-03T01:00:00Z", + ] + ) + + captured = capsys.readouterr() + assert result == 0 + assert output_path.exists() + output = output_path.read_text(encoding="utf-8").splitlines() + assert len(output) == 1 + assert "post-merge" in output[0] + assert "Wrote post-merge metrics record" in captured.out