From eea2a1f74c93394f5e0517cdf88ed8bd57533006 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 01:25:57 +0200
Subject: [PATCH 01/31] feat: implement sandcastle refinement loop with
 critic-based convergence
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace single-pass implement→review→merge with iterative implement↔critic
loop per task. Key changes:

- Orchestrator fetches and sanitizes issues (prevents prompt injection)
- Implement↔Critic loop with deterministic dedup convergence
- Critic produces structured findings (nonce-tagged JSON, zod-validated)
- Decreasing iteration budget per round [100, 50, 25, 10, 10]
- Host-side validation and rebase (no agent needed)
- One PR per task (no merger agent)
- Draft PR on non-convergence with outstanding findings listed

Implements #110
---
 .sandcastle/critic-prompt.md    |  56 +++++
 .sandcastle/implement-prompt.md |  17 +-
 .sandcastle/main.ts             | 364 ++++++++++++++++++++++----------
 .sandcastle/merge-prompt.md     |  68 ------
 .sandcastle/plan-prompt.md      |   2 +-
 .sandcastle/review-prompt.md    |  67 ------
 6 files changed, 325 insertions(+), 249 deletions(-)
 create mode 100644 .sandcastle/critic-prompt.md
 delete mode 100644 .sandcastle/merge-prompt.md
 delete mode 100644 .sandcastle/review-prompt.md
diff --git a/.sandcastle/critic-prompt.md b/.sandcastle/critic-prompt.md
new file mode 100644
index 0000000..98ec518
--- /dev/null
+++ b/.sandcastle/critic-prompt.md
@@ -0,0 +1,56 @@
+# Critic Agent
+
+Analyze the implementation on branch `{{BRANCH}}` and produce structured findings.
+
+## Task
+
+Run `git diff main...{{BRANCH}}` to see all changes. Examine the diff carefully. For each issue found, produce a structured finding.
+
+Read `CONTRIBUTING.md` for the project's coding standards.
+
+## Output Format
+
+Output your findings as JSON wrapped in nonce-tagged delimiters. Use EXACTLY this tag format:
+
+```text
+<findings-{{NONCE}}>[...]</findings-{{NONCE}}>
+```
+
+Each finding must have this structure:
+
+```json
+{
+  "file": "path/to/file.ts",
+  "line": 42,
+  "title": "short description of the issue",
+  "severity": "CRITICAL|HIGH|MEDIUM|LOW",
+  "category": "security|logic|performance|architecture|style",
+  "confidence": "HIGH|MEDIUM|LOW",
+  "description": "detailed explanation of why this is a problem",
+  "suggestion": "how to fix it"
+}
+```
+
+If no issues are found, output:
+
+```text
+<findings-{{NONCE}}>[]</findings-{{NONCE}}>
+```
+
+## Rules
+
+- Do NOT modify any files. Do NOT commit. Do NOT push.
+- Only report issues in the CHANGED code (not pre-existing issues).
+- Use HIGH confidence only when you've verified the issue by reading the relevant code.
+- Use MEDIUM confidence for pattern-based detection.
+- Use LOW confidence for style preferences or uncertain issues.
+- Focus on: logic errors, missing edge cases, security issues, type safety violations, test gaps.
+- Do NOT report formatting issues (prettier handles those).
+
+## Completion
+
+After outputting the findings, output:
+
+```text
+<promise>COMPLETE</promise>
+```
diff --git a/.sandcastle/implement-prompt.md b/.sandcastle/implement-prompt.md
index 79f39b1..d132f5c 100644
--- a/.sandcastle/implement-prompt.md
+++ b/.sandcastle/implement-prompt.md
@@ -4,11 +4,11 @@ Implement issue **#{{TASK_ID}}** ("{{ISSUE_TITLE}}") on branch `{{BRANCH}}`.
 
 ## Issue Details
 
-!`gh issue view {{TASK_ID}} --json body,title,labels,comments`
+{{ISSUE_BODY}}
 
-## Recent Commits
+## Review Findings
 
-!`git log -n 10 --format="%h %s" --date=short`
+{{FINDINGS}}
 
 ## Exploration
 
@@ -22,24 +22,26 @@ Read `AGENTS.md` and `CONTRIBUTING.md` for project conventions.
 
 ## Implementation
 
-1. Implement the fix/feature. Follow existing patterns:
+1. If review findings are provided above, cross-validate each one against the code. Fix findings you agree with. Ignore findings that are incorrect or not applicable.
+
+2. If no findings are provided, implement the issue from scratch following existing patterns:
    - Strict TypeScript, JSDoc on public APIs
    - Co-located tests in `*.test.ts` files
    - Zod for runtime validation
 
-2. Before every commit, run the full validation suite:
+3. Before every commit, run the full validation suite:
 
    ```bash
    npm run type-check && npm run test && npm run test:node && npm run test:edge && npm run prettier-check && npm run lint && npm run build && npm run check-build && npm run build:v2 && npm run check-build:v2
    ```
 
-3. Commit with conventional commits:
+4. Commit with conventional commits:
    - `fix: <description>` — bug fix
    - `feat: <description>` — new feature
    - `refactor: <description>` — restructuring
    - `chore: <description>` — tooling/config
 
-4. Push the branch:
+5. Push the branch:
 
    ```bash
    git push -u origin {{BRANCH}}
@@ -51,6 +53,7 @@ Read `AGENTS.md` and `CONTRIBUTING.md` for project conventions.
 - Tests must pass before pushing. Zero type errors, zero test failures.
 - Do not modify unrelated files.
 - Do not bump version numbers.
+- Push BEFORE signaling completion.
 
 ## Completion
 
diff --git a/.sandcastle/main.ts b/.sandcastle/main.ts
index 2153268..cebbea6 100644
--- a/.sandcastle/main.ts
+++ b/.sandcastle/main.ts
@@ -1,28 +1,104 @@
 import * as sandcastle from "@ai-hero/sandcastle";
 import { docker } from "@ai-hero/sandcastle/sandboxes/docker";
+import { execSync } from "node:child_process";
+import crypto from "node:crypto";
+import { z } from "zod";
 
 const BRANCH_PREFIX = "agent/issue";
 const ESCAPED_PREFIX = BRANCH_PREFIX.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
 const BRANCH_PATTERN = new RegExp(`^${ESCAPED_PREFIX}-\\d+-[\\w-]+$`);
 const ISSUE_LABEL = "sandcastle";
-const LABEL_FILTER = `--label "${ISSUE_LABEL}"`;
 const MAX_PLANNER_RETRIES = 5;
+const MAX_CRITIC_ROUNDS = 5;
+const ITERATION_BUDGET = [100, 50, 25, 10, 10];
 const MAX_PARALLEL = 3;
 const DOCKER_IMAGE = "sandcastle-sap-ai";
 
+const FindingSchema = z.object({
+  category: z.enum(["security", "logic", "performance", "architecture", "style"]),
+  confidence: z.enum(["HIGH", "MEDIUM", "LOW"]),
+  description: z.string(),
+  file: z.string(),
+  line: z.number().optional(),
+  severity: z.enum(["CRITICAL", "HIGH", "MEDIUM", "LOW"]),
+  suggestion: z.string().optional(),
+  title: z.string(),
+});
+const FindingsSchema = z.array(FindingSchema);
+type Finding = z.infer<typeof FindingSchema>;
+
+/**
+ * @param f - Finding to compute dedup key for.
+ * @returns Composite key for deterministic deduplication.
+ */
+function findingKey(f: Finding): string {
+  const normalizedTitle = f.title
+    .toLowerCase()
+    .replace(/[^\w\s]/g, "")
+    .replace(/\s+/g, " ")
+    .trim();
+  return `${f.file}::${f.category}::${normalizedTitle}`;
+}
+
+/**
+ * @param stdout - Agent stdout to parse findings from.
+ * @param nonce - Unique tag identifier for this run.
+ * @returns Parsed findings array or null on parse failure.
+ */
+function parseFindings(stdout: string, nonce: string): Finding[] | null {
+  const tagPattern = new RegExp(`<findings-${nonce}>([\\s\\S]*?)<\\/findings-${nonce}>`, "g");
+  const matches = [...stdout.matchAll(tagPattern)];
+  const raw = matches.at(-1)?.[1]?.trim() ?? "[]";
+  const cleaned = raw.replace(/^```(?:json)?\s*\n?/g, "").replace(/\n?```\s*$/g, "");
+  try {
+    return FindingsSchema.parse(JSON.parse(cleaned));
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * @param text - Raw text to strip injection-prone tags from.
+ * @returns Sanitized text safe for prompt injection.
+ */
+function sanitizeForPrompt(text: string): string {
+  return text.replace(/<\/?(?:plan|findings[\w-]*|promise)[^>]*>/gi, "");
+}
+
+// --- Phase 1: Fetch and sanitize issues ---
+
+const rawIssues = JSON.parse(
+  execSync(
+    `gh issue list --state open --json number,title,labels,body --limit 50 --label "${ISSUE_LABEL}"`,
+  ).toString(),
+) as { body: string; labels: { name: string }[]; number: number; title: string }[];
+
+const issuesJson = rawIssues.map((i) => ({
+  body: sanitizeForPrompt(i.body),
+  labels: i.labels.map((l) => l.name),
+  number: i.number,
+  title: i.title,
+}));
+
+if (issuesJson.length === 0) {
+  console.log("No issues with label '%s'. Exiting.", ISSUE_LABEL);
+  process.exit(0);
+}
+
+// --- Phase 2: Plan ---
+
 let workCompleted = false;
 
-for (let iteration = 1; iteration <= MAX_PLANNER_RETRIES; iteration++) {
-  console.log(`\n=== Iteration ${String(iteration)}/${String(MAX_PLANNER_RETRIES)} ===\n`);
+for (let attempt = 1; attempt <= MAX_PLANNER_RETRIES; attempt++) {
+  console.log(`\n=== Planner attempt ${String(attempt)}/${String(MAX_PLANNER_RETRIES)} ===\n`);
 
-  // Phase 1: Plan
   const plan = await sandcastle.run({
     agent: sandcastle.opencode("github-copilot/claude-opus-4.6"),
     maxIterations: 1,
     name: "Planner",
     promptArgs: {
       BRANCH_PREFIX,
-      LABEL_FILTER,
+      ISSUES_JSON: JSON.stringify(issuesJson, null, 2),
     },
     promptFile: "./.sandcastle/plan-prompt.md",
     sandbox: docker({ imageName: DOCKER_IMAGE }),
@@ -31,62 +107,51 @@ for (let iteration = 1; iteration <= MAX_PLANNER_RETRIES; iteration++) {
   const planMatches = [...plan.stdout.matchAll(/<plan>([\s\S]*?)<\/plan>/g)];
   const planMatch = planMatches.at(-1);
   if (!planMatch) {
-    console.error("Planner did not produce a <plan> tag. Skipping iteration.");
+    console.error("Planner did not produce a <plan> tag. Retrying.");
     continue;
   }
 
   const planContent = planMatch[1] ?? "";
-  let issues: { branch: string; id: string; title: string }[];
+  let issues: { body: string; branch: string; id: string; title: string }[];
   try {
     const parsed = JSON.parse(planContent) as { issues: unknown[] };
     if (!Array.isArray(parsed.issues)) {
-      console.error("Planner output missing issues array. Skipping iteration.");
+      console.error("Planner output missing issues array. Retrying.");
       continue;
     }
     const validated = parsed.issues.filter(
-      (entry): entry is { branch: string; id: string; title: string } => {
-        if (typeof entry !== "object" || entry === null) {
-          console.warn("  Skipping non-object issue entry");
-          return false;
-        }
+      (entry): entry is { body: string; branch: string; id: string; title: string } => {
+        if (typeof entry !== "object" || entry === null) return false;
         const item = entry as Record<string, unknown>;
-        if (typeof item.id !== "string" || !/^\d+$/.test(item.id)) {
-          console.warn(`  Skipping issue with invalid id: ${String(item.id)}`);
-          return false;
-        }
-        if (typeof item.branch !== "string") {
-          console.warn("  Skipping issue with missing branch");
-          return false;
-        }
-        if (typeof item.title !== "string") {
-          console.warn("  Skipping issue with missing title");
-          return false;
-        }
-        if (!BRANCH_PATTERN.test(item.branch)) {
-          console.warn(`  Skipping issue with invalid branch: ${item.branch}`);
-          return false;
-        }
+        if (typeof item.id !== "string" || !/^\d+$/.test(item.id)) return false;
+        if (typeof item.branch !== "string" || !BRANCH_PATTERN.test(item.branch)) return false;
+        if (typeof item.title !== "string") return false;
         return true;
       },
     );
-    issues = validated;
+    // Attach sanitized body from our fetched data
+    issues = validated.map((v) => ({
+      ...v,
+      body: issuesJson.find((i) => String(i.number) === v.id)?.body ?? "",
+    }));
   } catch {
-    console.error("Planner produced invalid JSON. Skipping iteration.");
+    console.error("Planner produced invalid JSON. Retrying.");
     continue;
   }
 
   if (issues.length === 0) {
-    console.log("No issues to work on. Exiting.");
+    console.log("No actionable issues. Exiting.");
     workCompleted = true;
     break;
   }
 
-  console.log(`Planning complete. ${String(issues.length)} issue(s) to work in parallel:`);
+  console.log(`Plan: ${String(issues.length)} issue(s) to work on:`);
   for (const issue of issues) {
     console.log(`  #${issue.id}: ${issue.title} → ${issue.branch}`);
   }
 
-  // Phase 2: Execute + Review (semaphore for MAX_PARALLEL)
+  // --- Phase 3: Implement ↔ Critic loop (parallel, max 3) ---
+
   let running = 0;
   const queue: (() => void)[] = [];
   const acquire = () =>
@@ -115,99 +180,186 @@ for (let iteration = 1; iteration <= MAX_PLANNER_RETRIES; iteration++) {
           sandbox: docker({ imageName: DOCKER_IMAGE }),
         });
 
-        const result = await sandbox.run({
-          agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
-          maxIterations: 100,
-          name: "Implementer #" + issue.id,
-          promptArgs: {
-            BRANCH: issue.branch,
-            ISSUE_TITLE: issue.title,
-            TASK_ID: issue.id,
-          },
-          promptFile: "./.sandcastle/implement-prompt.md",
-        });
+        const seenKeys = new Set<string>();
+        let lastFindings: Finding[] = [];
+        let converged = false;
+        let totalCommits = 0;
 
-        if (result.commits.length > 0) {
-          try {
-            await sandbox.run({
+        for (let round = 1; round <= MAX_CRITIC_ROUNDS; round++) {
+          const budget = ITERATION_BUDGET[round - 1] ?? 10;
+          const findingsArg = lastFindings.length > 0 ? JSON.stringify(lastFindings, null, 2) : "";
+
+          console.log(
+            `  #${issue.id} round ${String(round)}/${String(MAX_CRITIC_ROUNDS)} (budget: ${String(budget)})`,
+          );
+
+          // Implementer
+          const impl = await sandbox.run({
+            agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
+            maxIterations: budget,
+            name: `Implementer #${issue.id} R${String(round)}`,
+            promptArgs: {
+              BRANCH: issue.branch,
+              FINDINGS: findingsArg,
+              ISSUE_BODY: issue.body,
+              ISSUE_TITLE: issue.title,
+              TASK_ID: issue.id,
+            },
+            promptFile: "./.sandcastle/implement-prompt.md",
+          });
+
+          totalCommits += impl.commits.length;
+
+          if (round === 1 && impl.commits.length === 0) {
+            console.warn(`  #${issue.id}: 0 commits on round 1. Skipping.`);
+            break;
+          }
+
+          // Critic
+          const nonce = crypto.randomBytes(4).toString("hex");
+
+          let critic = await sandbox.run({
+            agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
+            maxIterations: 1,
+            name: `Critic #${issue.id} R${String(round)}`,
+            promptArgs: {
+              BRANCH: issue.branch,
+              NONCE: nonce,
+            },
+            promptFile: "./.sandcastle/critic-prompt.md",
+          });
+
+          let findings = parseFindings(critic.stdout, nonce);
+
+          // Retry once on parse failure
+          if (findings === null) {
+            console.warn(`  #${issue.id}: Critic parse failed. Retrying.`);
+            critic = await sandbox.run({
               agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
-              maxIterations: 10,
-              name: "Reviewer #" + issue.id,
+              maxIterations: 1,
+              name: `Critic #${issue.id} R${String(round)} retry`,
               promptArgs: {
                 BRANCH: issue.branch,
+                NONCE: nonce,
               },
-              promptFile: "./.sandcastle/review-prompt.md",
+              promptFile: "./.sandcastle/critic-prompt.md",
             });
-          } catch (reviewError: unknown) {
-            const msg = reviewError instanceof Error ? reviewError.message : String(reviewError);
-            console.warn(`  Reviewer for #${issue.id} failed, proceeding unreviewed: ${msg}`);
+            findings = parseFindings(critic.stdout, nonce);
+          }
+
+          if (findings === null) {
+            console.warn(`  #${issue.id}: Critic failed twice. Breaking (non-converged).`);
+            break;
+          }
+
+          // Dedup
+          const newFindings = findings.filter(
+            (f) => f.confidence !== "LOW" && !seenKeys.has(findingKey(f)),
+          );
+          for (const f of newFindings) {
+            seenKeys.add(findingKey(f));
           }
+
+          console.log(
+            `  #${issue.id}: ${String(findings.length)} findings, ${String(newFindings.length)} new`,
+          );
+
+          if (newFindings.length === 0) {
+            converged = true;
+            break;
+          }
+
+          lastFindings = newFindings;
         }
 
-        return result;
+        // --- Final validation (host-side, deterministic) ---
+        if (totalCommits > 0) {
+          const cwd = sandbox.worktreePath;
+          let validationPassed = false;
+
+          try {
+            execSync(
+              "npm run type-check && npm run test && npm run test:node && npm run test:edge && npm run prettier-check && npm run lint && npm run build && npm run check-build && npm run build:v2 && npm run check-build:v2",
+              { cwd, stdio: "pipe" },
+            );
+            validationPassed = true;
+          } catch {
+            console.warn(`  #${issue.id}: Validation failed.`);
+          }
+
+          // Rebase on latest main
+          try {
+            execSync("git fetch origin main && git rebase origin/main", {
+              cwd,
+              stdio: "pipe",
+            });
+            if (validationPassed) {
+              // Post-rebase smoke test
+              try {
+                execSync("npm run type-check && npm run test", {
+                  cwd,
+                  stdio: "pipe",
+                });
+              } catch {
+                validationPassed = false;
+              }
+            }
+            execSync("git push --force-with-lease", { cwd, stdio: "pipe" });
+          } catch {
+            // Rebase failed — push un-rebased
+            try {
+              execSync("git rebase --abort", { cwd, stdio: "pipe" });
+            } catch {
+              /* empty */
+            }
+            execSync("git push", { cwd, stdio: "pipe" });
+          }
+
+          // Create PR
+          const isDraft = !converged || !validationPassed;
+          const draftFlag = isDraft ? " --draft" : "";
+          const outstandingNote =
+            !converged && lastFindings.length > 0
+              ? `\n\n⚠️ Outstanding findings:\n${lastFindings.map((f) => `- [${f.severity}] ${f.file}: ${f.title}`).join("\n")}`
+              : "";
+          const validationNote = !validationPassed
+            ? "\n\n⚠️ Validation did not pass. Manual review required."
+            : "";
+
+          const prTitle = `fix: resolve #${issue.id} — ${issue.title}`;
+          const prBody = `## Description\n\nAutomated fix for #${issue.id}: ${issue.title}\n\n## Type of Change\n\n- [x] Bug fix (non-breaking change that fixes an issue)\n\n## Checklist\n\n- [x] I have run validation suite\n- [x] My changes follow the existing code style\n\n## Related Issues\n\nFixes #${issue.id}${outstandingNote}${validationNote}`;
+
+          try {
+            execSync(
+              `gh pr create${draftFlag} --head "${issue.branch}" --base main --title "${prTitle}" --body "${prBody.replace(/"/g, '\\"')}"`,
+              { cwd, stdio: "pipe" },
+            );
+            console.log(`  #${issue.id}: PR created${isDraft ? " (draft)" : ""}.`);
+            workCompleted = true;
+          } catch (err: unknown) {
+            const msg = err instanceof Error ? err.message : String(err);
+            console.error(`  #${issue.id}: PR creation failed: ${msg}`);
+          }
+        }
+
+        return { converged, issue, totalCommits };
       } finally {
         release();
       }
     }),
   );
 
+  // Log failures
   for (const [i, outcome] of settled.entries()) {
     if (outcome.status === "rejected") {
-      const currentIssue = issues[i];
+      const issue = issues[i];
       const reason: unknown = outcome.reason;
-      const errorMessage =
-        reason instanceof Error ? (reason.stack ?? reason.message) : String(reason);
-      console.error(
-        `  ✗ #${currentIssue?.id ?? String(i)} (${currentIssue?.branch ?? "unknown"}) failed: ${errorMessage}`,
-      );
+      const msg = reason instanceof Error ? (reason.stack ?? reason.message) : String(reason);
+      console.error(`  ✗ #${issue?.id ?? String(i)} failed: ${msg}`);
     }
   }
 
-  const completedIssues = settled
-    .map((outcome, i) => ({ issue: issues[i], outcome }))
-    .filter(
-      (
-        entry,
-      ): entry is {
-        issue: (typeof issues)[number];
-        outcome: PromiseFulfilledResult<Awaited<ReturnType<typeof sandcastle.run>>>;
-      } =>
-        entry.issue !== undefined &&
-        entry.outcome.status === "fulfilled" &&
-        entry.outcome.value.commits.length > 0,
-    )
-    .map((entry) => entry.issue);
-
-  const completedBranches = completedIssues.map((i) => i.branch);
-
-  if (completedBranches.length === 0) {
-    console.log("No commits produced. Nothing to merge.");
-    break;
-  }
-
-  // Phase 3: Merge
-  try {
-    await sandcastle.run({
-      agent: sandcastle.opencode("github-copilot/claude-opus-4.6"),
-      maxIterations: 10,
-      name: "Merger",
-      promptArgs: {
-        BRANCHES: completedBranches.map((b) => `- ${b}`).join("\n"),
-        ISSUES: completedIssues.map((i) => `- #${i.id}: ${i.title}`).join("\n"),
-      },
-      promptFile: "./.sandcastle/merge-prompt.md",
-      sandbox: docker({ imageName: DOCKER_IMAGE }),
-    });
-
-    console.log("\nPR created.");
-    workCompleted = true;
-    break;
-  } catch (error: unknown) {
-    const errorMessage = error instanceof Error ? (error.stack ?? error.message) : String(error);
-    console.error(`Merge phase failed: ${errorMessage}`);
-    console.error("Branches are pushed and available for manual merge.");
-    break;
-  }
+  break; // Plan executed — exit planner retry loop
 }
 
 console.log("\nAll done.");
diff --git a/.sandcastle/merge-prompt.md b/.sandcastle/merge-prompt.md
deleted file mode 100644
index 11aec41..0000000
--- a/.sandcastle/merge-prompt.md
+++ /dev/null
@@ -1,68 +0,0 @@
-# Merge Agent
-
-Merge completed branches and create a pull request.
-
-## Inputs
-
-- Branches: {{BRANCHES}}
-- Issues: {{ISSUES}}
-
-## Current State
-
-!`git status --short`
-
-!`git branch -a | grep agent/ || true`
-
-## Steps
-
-1. Create a merge branch from main:
-
-   ```bash
-   git branch -D agent/merge-batch 2>/dev/null || true
-   git checkout -b agent/merge-batch origin/main
-   ```
-
-2. Ensure working tree is clean.
-
-3. Merge each branch with a merge commit:
-
-   ```bash
-   git merge --no-ff <branch>
-   ```
-
-   Process branches in the order given.
-
-4. If a merge conflict occurs:
-   - Read the conflicting files.
-   - Resolve favoring the incoming branch changes. Validation in step 6 will catch regressions.
-   - Stage resolved files and complete the merge.
-
-5. After all merges, verify that `git diff main...agent/merge-batch` shows changes. If there are no file changes compared to main, do NOT create a PR — output `<promise>COMPLETE</promise>` and stop.
-
-6. Run full validation:
-
-   ```bash
-   npm run type-check && npm run test && npm run test:node && npm run test:edge && npm run prettier-check && npm run lint && npm run build && npm run check-build && npm run build:v2 && npm run check-build:v2
-   ```
-
-7. If validation fails, fix the issue and amend the merge commit.
-
-8. Push the branch and create a pull request. Read `.github/PULL_REQUEST_TEMPLATE.md` and fill in all sections. Use conventional commit format for the title (`feat:`, `fix:`, `chore:`, `refactor:`). Include `Fixes #N` for each resolved issue in the Related Issues section.
-
-## Rules
-
-- Every merge uses `--no-ff` to preserve branch history.
-- Validation must pass after all merges complete.
-- Do not push directly to main — create a PR for human review instead.
-- Do not force-push.
-- Do not delete remote branches (leave for cleanup elsewhere).
-- Do not close issues manually — the PR merge handles it via "Fixes #N" in the body.
-- Do not create a PR if there are zero file changes compared to main.
-
-## Completion
-
-When all branches are merged, validation passes, and the PR is created, output:
-
-```text
-<promise>COMPLETE</promise>
-```
diff --git a/.sandcastle/plan-prompt.md b/.sandcastle/plan-prompt.md
index 9c67d7b..5d34138 100644
--- a/.sandcastle/plan-prompt.md
+++ b/.sandcastle/plan-prompt.md
@@ -9,7 +9,7 @@ Read `AGENTS.md` for project conventions.
 
 ## Open Issues
 
-!`gh issue list --state open --json number,title,labels,body --limit 50 {{LABEL_FILTER}}`
+{{ISSUES_JSON}}
 
 ## Steps
 
diff --git a/.sandcastle/review-prompt.md b/.sandcastle/review-prompt.md
deleted file mode 100644
index 854c9f1..0000000
--- a/.sandcastle/review-prompt.md
+++ /dev/null
@@ -1,67 +0,0 @@
-# Review Agent
-
-Review and validate the implementation on branch `{{BRANCH}}`.
-
-## Setup
-
-```bash
-git checkout {{BRANCH}}
-```
-
-## Changes to Review
-
-!`git diff --stat main...{{BRANCH}}`
-
-## Commits on This Branch
-
-!`git log main..{{BRANCH}} --oneline`
-
-## Validation
-
-Run the full CI validation suite. Every command must exit 0:
-
-```bash
-npm run type-check
-npm run test
-npm run test:node
-npm run test:edge
-npm run prettier-check
-npm run lint
-npm run build
-npm run check-build
-npm run build:v2
-npm run check-build:v2
-```
-
-## On Failure
-
-If any command fails:
-
-1. Read the error output.
-2. Fix the issue in the source code.
-3. Commit the fix: `fix: <describe what was wrong>`.
-4. Re-run the full suite from the top.
-5. Repeat until all commands pass.
-
-## Quality Checks
-
-After validation passes, verify compliance with the coding standards in `CONTRIBUTING.md`. Fix violations and commit.
-
-## Rules
-
-- Zero errors, zero warnings from type-check and lint.
-- All tests pass in both Node.js and Edge environments.
-- Both V3 and V2 builds succeed.
-- Do not skip or disable tests.
-
-## Completion
-
-When the full suite passes cleanly, push the fixes and output:
-
-```bash
-git push
-```
-
-```text
-<promise>COMPLETE</promise>
-```

From 6a5e9670d0b37ada2c3cfed5490db9536117ca39 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 01:46:33 +0200
Subject: [PATCH 02/31] fix: address review findings (shell injection, false
 convergence, nesting, zod validation)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Replace execSync with execFileSync for gh pr create (prevents shell injection)
- Guard parseFindings against empty matches (prevents false convergence)
- Add try/catch on gh issue list startup call
- Guard git push in rebase catch block
- Extract finalizeIssue function (reduces nesting from 6+ to 3 levels)
- Add zod schema for rawIssues (replaces unsafe 'as' cast)
- Implement validation retry round per spec (one more implement→critic if budget remains)
---
 .sandcastle/main.ts | 261 ++++++++++++++++++++++++++++++++------------
 1 file changed, 192 insertions(+), 69 deletions(-)

diff --git a/.sandcastle/main.ts b/.sandcastle/main.ts
index cebbea6..8d42337 100644
--- a/.sandcastle/main.ts
+++ b/.sandcastle/main.ts
@@ -1,6 +1,6 @@
 import * as sandcastle from "@ai-hero/sandcastle";
 import { docker } from "@ai-hero/sandcastle/sandboxes/docker";
-import { execSync } from "node:child_process";
+import { execFileSync, execSync } from "node:child_process";
 import crypto from "node:crypto";
 import { z } from "zod";
 
@@ -27,6 +27,171 @@ const FindingSchema = z.object({
 const FindingsSchema = z.array(FindingSchema);
 type Finding = z.infer<typeof FindingSchema>;
 
+// --- Zod schema for GitHub issue list response (fix #6) ---
+
+const RawIssueSchema = z.object({
+  body: z.string(),
+  labels: z.array(z.object({ name: z.string() })),
+  number: z.number(),
+  title: z.string(),
+});
+const RawIssuesSchema = z.array(RawIssueSchema);
+
+// --- Type alias for sandbox instance ---
+
+type SandboxInstance = Awaited<ReturnType<typeof sandcastle.createSandbox>>;
+
+/**
+ * @param sandbox - The sandcastle sandbox instance.
+ * @param cwd - Working directory (worktree path).
+ * @param issue - Issue metadata.
+ * @param issue.body
+ * @param issue.branch
+ * @param issue.id
+ * @param issue.title
+ * @param converged - Whether the critic loop converged.
+ * @param lastFindings - Outstanding findings from the last round.
+ * @param round - The round at which the critic loop ended.
+ * @returns Whether work was completed (PR created).
+ */
+async function finalizeIssue(
+  sandbox: SandboxInstance,
+  cwd: string,
+  issue: { body: string; branch: string; id: string; title: string },
+  converged: boolean,
+  lastFindings: Finding[],
+  round: number,
+): Promise<boolean> {
+  let validationPassed = false;
+
+  try {
+    execSync(
+      "npm run type-check && npm run test && npm run test:node && npm run test:edge && npm run prettier-check && npm run lint && npm run build && npm run check-build && npm run build:v2 && npm run check-build:v2",
+      { cwd, stdio: "pipe" },
+    );
+    validationPassed = true;
+  } catch {
+    console.warn(`  #${issue.id}: Validation failed.`);
+  }
+
+  // --- Validation retry round (fix #7) ---
+  if (!validationPassed && round < MAX_CRITIC_ROUNDS) {
+    const retryBudget = ITERATION_BUDGET[MAX_CRITIC_ROUNDS - 1] ?? 10;
+    console.log(
+      `  #${issue.id}: Retrying one more implement→critic round (budget: ${String(retryBudget)})`,
+    );
+
+    const retryNonce = crypto.randomBytes(4).toString("hex");
+
+    await sandbox.run({
+      agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
+      maxIterations: retryBudget,
+      name: `Implementer #${issue.id} retry`,
+      promptArgs: {
+        BRANCH: issue.branch,
+        FINDINGS: lastFindings.length > 0 ? JSON.stringify(lastFindings, null, 2) : "",
+        ISSUE_BODY: issue.body,
+        ISSUE_TITLE: issue.title,
+        TASK_ID: issue.id,
+      },
+      promptFile: "./.sandcastle/implement-prompt.md",
+    });
+
+    await sandbox.run({
+      agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
+      maxIterations: 1,
+      name: `Critic #${issue.id} retry-validation`,
+      promptArgs: {
+        BRANCH: issue.branch,
+        NONCE: retryNonce,
+      },
+      promptFile: "./.sandcastle/critic-prompt.md",
+    });
+
+    // Re-validate after retry
+    try {
+      execSync(
+        "npm run type-check && npm run test && npm run test:node && npm run test:edge && npm run prettier-check && npm run lint && npm run build && npm run check-build && npm run build:v2 && npm run check-build:v2",
+        { cwd, stdio: "pipe" },
+      );
+      validationPassed = true;
+      console.log(`  #${issue.id}: Validation passed after retry round.`);
+    } catch {
+      console.warn(`  #${issue.id}: Validation still fails after retry. Will create draft PR.`);
+    }
+  }
+
+  // Rebase on latest main
+  try {
+    execSync("git fetch origin main && git rebase origin/main", {
+      cwd,
+      stdio: "pipe",
+    });
+    if (validationPassed) {
+      // Post-rebase smoke test
+      try {
+        execSync("npm run type-check && npm run test", {
+          cwd,
+          stdio: "pipe",
+        });
+      } catch {
+        validationPassed = false;
+      }
+    }
+    execSync("git push --force-with-lease", { cwd, stdio: "pipe" });
+  } catch {
+    // Rebase failed — push un-rebased
+    try {
+      execSync("git rebase --abort", { cwd, stdio: "pipe" });
+    } catch {
+      /* empty */
+    }
+    try {
+      execSync("git push", { cwd, stdio: "pipe" });
+    } catch (pushErr: unknown) {
+      const pushMsg = pushErr instanceof Error ? pushErr.message : String(pushErr);
+      console.warn(`  #${issue.id}: git push failed after rebase abort: ${pushMsg}`);
+    }
+  }
+
+  // Create PR (fix #1: use execFileSync to avoid shell injection)
+  const isDraft = !converged || !validationPassed;
+  const outstandingNote =
+    !converged && lastFindings.length > 0
+      ? `\n\n⚠️ Outstanding findings:\n${lastFindings.map((f) => `- [${f.severity}] ${f.file}: ${f.title}`).join("\n")}`
+      : "";
+  const validationNote = !validationPassed
+    ? "\n\n⚠️ Validation did not pass. Manual review required."
+    : "";
+
+  const prTitle = `fix: resolve #${issue.id} — ${issue.title}`;
+  const prBody = `## Description\n\nAutomated fix for #${issue.id}: ${issue.title}\n\n## Type of Change\n\n- [x] Bug fix (non-breaking change that fixes an issue)\n\n## Checklist\n\n- [x] I have run validation suite\n- [x] My changes follow the existing code style\n\n## Related Issues\n\nFixes #${issue.id}${outstandingNote}${validationNote}`;
+
+  const prArgs = [
+    "pr",
+    "create",
+    ...(isDraft ? ["--draft"] : []),
+    "--head",
+    issue.branch,
+    "--base",
+    "main",
+    "--title",
+    prTitle,
+    "--body",
+    prBody,
+  ];
+
+  try {
+    execFileSync("gh", prArgs, { cwd, encoding: "utf-8", stdio: "pipe" });
+    console.log(`  #${issue.id}: PR created${isDraft ? " (draft)" : ""}.`);
+    return true;
+  } catch (err: unknown) {
+    const msg = err instanceof Error ? err.message : String(err);
+    console.error(`  #${issue.id}: PR creation failed: ${msg}`);
+    return false;
+  }
+}
+
 /**
  * @param f - Finding to compute dedup key for.
  * @returns Composite key for deterministic deduplication.
@@ -48,6 +213,9 @@ function findingKey(f: Finding): string {
 function parseFindings(stdout: string, nonce: string): Finding[] | null {
   const tagPattern = new RegExp(`<findings-${nonce}>([\\s\\S]*?)<\\/findings-${nonce}>`, "g");
   const matches = [...stdout.matchAll(tagPattern)];
+  if (matches.length === 0) {
+    return null;
+  }
   const raw = matches.at(-1)?.[1]?.trim() ?? "[]";
   const cleaned = raw.replace(/^```(?:json)?\s*\n?/g, "").replace(/\n?```\s*$/g, "");
   try {
@@ -67,11 +235,18 @@ function sanitizeForPrompt(text: string): string {
 
 // --- Phase 1: Fetch and sanitize issues ---
 
-const rawIssues = JSON.parse(
-  execSync(
+let rawIssuesJson: string;
+try {
+  rawIssuesJson = execSync(
     `gh issue list --state open --json number,title,labels,body --limit 50 --label "${ISSUE_LABEL}"`,
-  ).toString(),
-) as { body: string; labels: { name: string }[]; number: number; title: string }[];
+    { encoding: "utf-8" },
+  );
+} catch {
+  console.error("Failed to fetch issues. Ensure gh is installed and authenticated.");
+  process.exit(1);
+}
+
+const rawIssues = RawIssuesSchema.parse(JSON.parse(rawIssuesJson));
 
 const issuesJson = rawIssues.map((i) => ({
   body: sanitizeForPrompt(i.body),
@@ -184,8 +359,10 @@ for (let attempt = 1; attempt <= MAX_PLANNER_RETRIES; attempt++) {
         let lastFindings: Finding[] = [];
         let converged = false;
         let totalCommits = 0;
+        let lastRound = 0;
 
         for (let round = 1; round <= MAX_CRITIC_ROUNDS; round++) {
+          lastRound = round;
           const budget = ITERATION_BUDGET[round - 1] ?? 10;
           const findingsArg = lastFindings.length > 0 ? JSON.stringify(lastFindings, null, 2) : "";
 
@@ -272,73 +449,19 @@ for (let attempt = 1; attempt <= MAX_PLANNER_RETRIES; attempt++) {
           lastFindings = newFindings;
         }
 
-        // --- Final validation (host-side, deterministic) ---
+        // --- Final validation, rebase, and PR creation ---
         if (totalCommits > 0) {
           const cwd = sandbox.worktreePath;
-          let validationPassed = false;
-
-          try {
-            execSync(
-              "npm run type-check && npm run test && npm run test:node && npm run test:edge && npm run prettier-check && npm run lint && npm run build && npm run check-build && npm run build:v2 && npm run check-build:v2",
-              { cwd, stdio: "pipe" },
-            );
-            validationPassed = true;
-          } catch {
-            console.warn(`  #${issue.id}: Validation failed.`);
-          }
-
-          // Rebase on latest main
-          try {
-            execSync("git fetch origin main && git rebase origin/main", {
-              cwd,
-              stdio: "pipe",
-            });
-            if (validationPassed) {
-              // Post-rebase smoke test
-              try {
-                execSync("npm run type-check && npm run test", {
-                  cwd,
-                  stdio: "pipe",
-                });
-              } catch {
-                validationPassed = false;
-              }
-            }
-            execSync("git push --force-with-lease", { cwd, stdio: "pipe" });
-          } catch {
-            // Rebase failed — push un-rebased
-            try {
-              execSync("git rebase --abort", { cwd, stdio: "pipe" });
-            } catch {
-              /* empty */
-            }
-            execSync("git push", { cwd, stdio: "pipe" });
-          }
-
-          // Create PR
-          const isDraft = !converged || !validationPassed;
-          const draftFlag = isDraft ? " --draft" : "";
-          const outstandingNote =
-            !converged && lastFindings.length > 0
-              ? `\n\n⚠️ Outstanding findings:\n${lastFindings.map((f) => `- [${f.severity}] ${f.file}: ${f.title}`).join("\n")}`
-              : "";
-          const validationNote = !validationPassed
-            ? "\n\n⚠️ Validation did not pass. Manual review required."
-            : "";
-
-          const prTitle = `fix: resolve #${issue.id} — ${issue.title}`;
-          const prBody = `## Description\n\nAutomated fix for #${issue.id}: ${issue.title}\n\n## Type of Change\n\n- [x] Bug fix (non-breaking change that fixes an issue)\n\n## Checklist\n\n- [x] I have run validation suite\n- [x] My changes follow the existing code style\n\n## Related Issues\n\nFixes #${issue.id}${outstandingNote}${validationNote}`;
-
-          try {
-            execSync(
-              `gh pr create${draftFlag} --head "${issue.branch}" --base main --title "${prTitle}" --body "${prBody.replace(/"/g, '\\"')}"`,
-              { cwd, stdio: "pipe" },
-            );
-            console.log(`  #${issue.id}: PR created${isDraft ? " (draft)" : ""}.`);
+          const prCreated = await finalizeIssue(
+            sandbox,
+            cwd,
+            issue,
+            converged,
+            lastFindings,
+            lastRound,
+          );
+          if (prCreated) {
             workCompleted = true;
-          } catch (err: unknown) {
-            const msg = err instanceof Error ? err.message : String(err);
-            console.error(`  #${issue.id}: PR creation failed: ${msg}`);
           }
         }
 

From d7cd5e2226f1ac2bd00e8596a8856be8e622dbbc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 01:52:46 +0200
Subject: [PATCH 03/31] fix: guard retry calls, split rebase logic, remove dead
 critic retry

---
 .sandcastle/main.ts | 62 +++++++++++++++++++++++++--------------------
 1 file changed, 34 insertions(+), 28 deletions(-)

diff --git a/.sandcastle/main.ts b/.sandcastle/main.ts
index 8d42337..9cc64b8 100644
--- a/.sandcastle/main.ts
+++ b/.sandcastle/main.ts
@@ -81,32 +81,26 @@ async function finalizeIssue(
       `  #${issue.id}: Retrying one more implement→critic round (budget: ${String(retryBudget)})`,
     );
 
-    const retryNonce = crypto.randomBytes(4).toString("hex");
-
-    await sandbox.run({
-      agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
-      maxIterations: retryBudget,
-      name: `Implementer #${issue.id} retry`,
-      promptArgs: {
-        BRANCH: issue.branch,
-        FINDINGS: lastFindings.length > 0 ? JSON.stringify(lastFindings, null, 2) : "",
-        ISSUE_BODY: issue.body,
-        ISSUE_TITLE: issue.title,
-        TASK_ID: issue.id,
-      },
-      promptFile: "./.sandcastle/implement-prompt.md",
-    });
-
-    await sandbox.run({
-      agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
-      maxIterations: 1,
-      name: `Critic #${issue.id} retry-validation`,
-      promptArgs: {
-        BRANCH: issue.branch,
-        NONCE: retryNonce,
-      },
-      promptFile: "./.sandcastle/critic-prompt.md",
-    });
+    try {
+      await sandbox.run({
+        agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
+        maxIterations: retryBudget,
+        name: `Implementer #${issue.id} retry`,
+        promptArgs: {
+          BRANCH: issue.branch,
+          FINDINGS: lastFindings.length > 0 ? JSON.stringify(lastFindings, null, 2) : "",
+          ISSUE_BODY: issue.body,
+          ISSUE_TITLE: issue.title,
+          TASK_ID: issue.id,
+        },
+        promptFile: "./.sandcastle/implement-prompt.md",
+      });
+    } catch (retryErr: unknown) {
+      const retryMsg = retryErr instanceof Error ? retryErr.message : String(retryErr);
+      console.warn(
+        `  #${issue.id}: Implementer retry threw: ${retryMsg}. Falling through to PR creation.`,
+      );
+    }
 
     // Re-validate after retry
     try {
@@ -122,11 +116,13 @@ async function finalizeIssue(
   }
 
   // Rebase on latest main
+  let rebaseSucceeded = false;
   try {
     execSync("git fetch origin main && git rebase origin/main", {
       cwd,
       stdio: "pipe",
     });
+    rebaseSucceeded = true;
     if (validationPassed) {
       // Post-rebase smoke test
       try {
@@ -138,9 +134,8 @@ async function finalizeIssue(
         validationPassed = false;
       }
     }
-    execSync("git push --force-with-lease", { cwd, stdio: "pipe" });
   } catch {
-    // Rebase failed — push un-rebased
+    // Rebase failed — abort and push un-rebased
     try {
       execSync("git rebase --abort", { cwd, stdio: "pipe" });
     } catch {
@@ -154,6 +149,17 @@ async function finalizeIssue(
     }
   }
 
+  if (rebaseSucceeded) {
+    try {
+      execSync("git push --force-with-lease", { cwd, stdio: "pipe" });
+    } catch (pushErr: unknown) {
+      const pushMsg = pushErr instanceof Error ? pushErr.message : String(pushErr);
+      console.warn(
+        `  #${issue.id}: git push --force-with-lease failed (branch un-pushed, PR creation will fail gracefully): ${pushMsg}`,
+      );
+    }
+  }
+
   // Create PR (fix #1: use execFileSync to avoid shell injection)
   const isDraft = !converged || !validationPassed;
   const outstandingNote =

From bfad1d93ca5ae5715634eb5693185fbdd5233290 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 01:57:37 +0200
Subject: [PATCH 04/31] fix: handle nullable issue body and guard JSON parse

---
 .sandcastle/main.ts | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/.sandcastle/main.ts b/.sandcastle/main.ts
index 9cc64b8..8dde19e 100644
--- a/.sandcastle/main.ts
+++ b/.sandcastle/main.ts
@@ -30,7 +30,10 @@ type Finding = z.infer<typeof FindingSchema>;
 // --- Zod schema for GitHub issue list response (fix #6) ---
 
 const RawIssueSchema = z.object({
-  body: z.string(),
+  body: z
+    .string()
+    .nullable()
+    .transform((b) => b ?? ""),
   labels: z.array(z.object({ name: z.string() })),
   number: z.number(),
   title: z.string(),
@@ -252,7 +255,13 @@ try {
   process.exit(1);
 }
 
-const rawIssues = RawIssuesSchema.parse(JSON.parse(rawIssuesJson));
+let rawIssues: z.infer<typeof RawIssuesSchema>;
+try {
+  rawIssues = RawIssuesSchema.parse(JSON.parse(rawIssuesJson));
+} catch {
+  console.error("Failed to parse issues JSON. Unexpected format from gh CLI.");
+  process.exit(1);
+}
 
 const issuesJson = rawIssues.map((i) => ({
   body: sanitizeForPrompt(i.body),

From e6b613ac3a7a23567fd03416c82a810da9fc1c3d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 02:02:03 +0200
Subject: [PATCH 05/31] =?UTF-8?q?fix:=20distinguish=20stalled=20from=20con?=
 =?UTF-8?q?verged=20(re-reported=20findings=20=E2=86=92=20draft=20PR)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .sandcastle/main.ts | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/.sandcastle/main.ts b/.sandcastle/main.ts
index 8dde19e..4263733 100644
--- a/.sandcastle/main.ts
+++ b/.sandcastle/main.ts
@@ -457,7 +457,12 @@ for (let attempt = 1; attempt <= MAX_PLANNER_RETRIES; attempt++) {
           );
 
           if (newFindings.length === 0) {
-            converged = true;
+            if (findings.length > 0) {
+              lastFindings = findings;
+              converged = false;
+            } else {
+              converged = true;
+            }
             break;
           }
 

From 5901fbfde1159466a3546ef825c9138e6ee86c73 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 02:08:33 +0200
Subject: [PATCH 06/31] fix: LOW-only findings should not prevent convergence

---
 .sandcastle/main.ts | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.sandcastle/main.ts b/.sandcastle/main.ts
index 4263733..09e7c8c 100644
--- a/.sandcastle/main.ts
+++ b/.sandcastle/main.ts
@@ -457,8 +457,9 @@ for (let attempt = 1; attempt <= MAX_PLANNER_RETRIES; attempt++) {
           );
 
           if (newFindings.length === 0) {
-            if (findings.length > 0) {
-              lastFindings = findings;
+            const nonLowFindings = findings.filter((f) => f.confidence !== "LOW");
+            if (nonLowFindings.length > 0) {
+              lastFindings = nonLowFindings;
               converged = false;
             } else {
               converged = true;

From ee43546a4fa47f65e9308c8d08adb384de8fd4b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 02:17:08 +0200
Subject: [PATCH 07/31] fix: log validation errors, conditional checklist,
 derive PR title from labels

---
 .sandcastle/main.ts | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/.sandcastle/main.ts b/.sandcastle/main.ts
index 09e7c8c..f5729cb 100644
--- a/.sandcastle/main.ts
+++ b/.sandcastle/main.ts
@@ -48,10 +48,11 @@ type SandboxInstance = Awaited<ReturnType<typeof sandcastle.createSandbox>>;
  * @param sandbox - The sandcastle sandbox instance.
  * @param cwd - Working directory (worktree path).
  * @param issue - Issue metadata.
- * @param issue.body
- * @param issue.branch
- * @param issue.id
- * @param issue.title
+ * @param issue.body - Sanitized issue body text.
+ * @param issue.branch - Git branch name for this issue.
+ * @param issue.id - GitHub issue number as string.
+ * @param issue.labels - Issue label names.
+ * @param issue.title - Issue title.
  * @param converged - Whether the critic loop converged.
  * @param lastFindings - Outstanding findings from the last round.
  * @param round - The round at which the critic loop ended.
@@ -60,7 +61,7 @@ type SandboxInstance = Awaited<ReturnType<typeof sandcastle.createSandbox>>;
 async function finalizeIssue(
   sandbox: SandboxInstance,
   cwd: string,
-  issue: { body: string; branch: string; id: string; title: string },
+  issue: { body: string; branch: string; id: string; labels: string[]; title: string },
   converged: boolean,
   lastFindings: Finding[],
   round: number,
@@ -73,15 +74,19 @@ async function finalizeIssue(
       { cwd, stdio: "pipe" },
     );
     validationPassed = true;
-  } catch {
-    console.warn(`  #${issue.id}: Validation failed.`);
+  } catch (err: unknown) {
+    const stderr =
+      err instanceof Error && "stderr" in err
+        ? String((err as { stderr: unknown }).stderr).slice(0, 500)
+        : "";
+    console.warn(`  #${issue.id}: Validation failed.${stderr ? `\n${stderr}` : ""}`);
   }
 
   // --- Validation retry round (fix #7) ---
   if (!validationPassed && round < MAX_CRITIC_ROUNDS) {
     const retryBudget = ITERATION_BUDGET[MAX_CRITIC_ROUNDS - 1] ?? 10;
     console.log(
-      `  #${issue.id}: Retrying one more implement→critic round (budget: ${String(retryBudget)})`,
+      `  #${issue.id}: Retrying one more implement round (budget: ${String(retryBudget)})`,
     );
 
     try {
@@ -173,8 +178,14 @@ async function finalizeIssue(
     ? "\n\n⚠️ Validation did not pass. Manual review required."
     : "";
 
-  const prTitle = `fix: resolve #${issue.id} — ${issue.title}`;
-  const prBody = `## Description\n\nAutomated fix for #${issue.id}: ${issue.title}\n\n## Type of Change\n\n- [x] Bug fix (non-breaking change that fixes an issue)\n\n## Checklist\n\n- [x] I have run validation suite\n- [x] My changes follow the existing code style\n\n## Related Issues\n\nFixes #${issue.id}${outstandingNote}${validationNote}`;
+  const validationCheck = validationPassed ? "- [x]" : "- [ ]";
+  const commitPrefix = issue.labels.includes("feature request")
+    ? "feat"
+    : issue.labels.includes("bug")
+      ? "fix"
+      : "chore";
+  const prTitle = `${commitPrefix}: resolve #${issue.id} — ${issue.title}`;
+  const prBody = `## Description\n\nAutomated fix for #${issue.id}: ${issue.title}\n\n## Type of Change\n\n- [x] Bug fix (non-breaking change that fixes an issue)\n\n## Checklist\n\n${validationCheck} I have run validation suite\n- [x] My changes follow the existing code style\n\n## Related Issues\n\nFixes #${issue.id}${outstandingNote}${validationNote}`;
 
   const prArgs = [
     "pr",
@@ -302,7 +313,7 @@ for (let attempt = 1; attempt <= MAX_PLANNER_RETRIES; attempt++) {
   }
 
   const planContent = planMatch[1] ?? "";
-  let issues: { body: string; branch: string; id: string; title: string }[];
+  let issues: { body: string; branch: string; id: string; labels: string[]; title: string }[];
   try {
     const parsed = JSON.parse(planContent) as { issues: unknown[] };
     if (!Array.isArray(parsed.issues)) {
@@ -323,6 +334,7 @@ for (let attempt = 1; attempt <= MAX_PLANNER_RETRIES; attempt++) {
     issues = validated.map((v) => ({
       ...v,
       body: issuesJson.find((i) => String(i.number) === v.id)?.body ?? "",
+      labels: issuesJson.find((i) => String(i.number) === v.id)?.labels ?? [],
     }));
   } catch {
     console.error("Planner produced invalid JSON. Retrying.");

From d70bc29ab551e2f28b2cd66e79ba439562e2625f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 13:38:10 +0200
Subject: [PATCH 08/31] refactor: extract sandcastle into modular architecture
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Split main.ts (525 lines) into 6 self-contained modules:
- types.ts: shared domain types (TaskSpec, Finding, LoopResult, FinalizeResult)
- refinement-loop.ts: reusable implement↔critic loop engine
- finalizer.ts: validation, rebase, PR creation
- concurrency-pool.ts: semaphore utility
- task-source.ts: TaskSource interface + GithubIssueSource
- main.ts: 74-line thin orchestrator wiring all modules

The refinement loop is now reusable by any task source (GitHub issues,
CI failures, manual triggers) without coupling to the planner.
---
 .sandcastle/concurrency-pool.ts |  45 +++
 .sandcastle/finalizer.ts        | 164 ++++++++++
 .sandcastle/main.ts             | 545 +++-----------------------------
 .sandcastle/refinement-loop.ts  | 193 +++++++++++
 .sandcastle/task-source.ts      | 190 +++++++++++
 .sandcastle/types.ts            |  63 ++++
 6 files changed, 703 insertions(+), 497 deletions(-)
 create mode 100644 .sandcastle/concurrency-pool.ts
 create mode 100644 .sandcastle/finalizer.ts
 create mode 100644 .sandcastle/refinement-loop.ts
 create mode 100644 .sandcastle/task-source.ts
 create mode 100644 .sandcastle/types.ts

diff --git a/.sandcastle/concurrency-pool.ts b/.sandcastle/concurrency-pool.ts
new file mode 100644
index 0000000..f2e6fee
--- /dev/null
+++ b/.sandcastle/concurrency-pool.ts
@@ -0,0 +1,45 @@
+/**
+ * A concurrency limiter that restricts parallel execution to a maximum number of tasks.
+ */
+export class ConcurrencyPool {
+  private readonly queue: (() => void)[] = [];
+  private running = 0;
+
+  /**
+   * @param max - Maximum number of concurrent tasks.
+   */
+  constructor(private readonly max: number) {}
+
+  /**
+   * Executes the given async function, waiting if the pool is at capacity.
+   * @param fn - Async function to execute within the pool.
+   * @returns The result of the function.
+   */
+  async run<T>(fn: () => Promise<T>): Promise<T> {
+    await this.acquire();
+    try {
+      return await fn();
+    } finally {
+      this.release();
+    }
+  }
+
+  private acquire(): Promise<void> {
+    if (this.running < this.max) {
+      this.running++;
+      return Promise.resolve();
+    }
+    return new Promise<void>((resolve) => {
+      this.queue.push(resolve);
+    });
+  }
+
+  private release(): void {
+    this.running--;
+    const next = this.queue.shift();
+    if (next) {
+      this.running++;
+      next();
+    }
+  }
+}
diff --git a/.sandcastle/finalizer.ts b/.sandcastle/finalizer.ts
new file mode 100644
index 0000000..1ac076e
--- /dev/null
+++ b/.sandcastle/finalizer.ts
@@ -0,0 +1,164 @@
+import * as sandcastle from "@ai-hero/sandcastle";
+import { execFileSync, execSync } from "node:child_process";
+
+import type { FinalizeResult, LoopResult, SandboxInstance, TaskSpec } from "./types.js";
+
+const MAX_CRITIC_ROUNDS = 5;
+const ITERATION_BUDGET = [100, 50, 25, 10, 10];
+const VALIDATION_COMMAND =
+  "npm run type-check && npm run test && npm run test:node && npm run test:edge && npm run prettier-check && npm run lint && npm run build && npm run check-build && npm run build:v2 && npm run check-build:v2";
+
+/**
+ * Finalizes a task after the refinement loop: validates, retries if needed, rebases, pushes, and creates a PR.
+ * @param spec - The task specification.
+ * @param loopResult - The result from the refinement loop.
+ * @param sandbox - The sandcastle sandbox instance.
+ * @param cwd - Working directory (worktree path).
+ * @returns Finalization result with PR and validation status.
+ */
+export async function finalizeTask(
+  spec: TaskSpec,
+  loopResult: LoopResult,
+  sandbox: SandboxInstance,
+  cwd: string,
+): Promise<FinalizeResult> {
+  let validationPassed = false;
+
+  try {
+    execSync(VALIDATION_COMMAND, { cwd, stdio: "pipe" });
+    validationPassed = true;
+  } catch (err: unknown) {
+    const stderr =
+      err instanceof Error && "stderr" in err
+        ? String((err as { stderr: unknown }).stderr).slice(0, 500)
+        : "";
+    console.warn(`  #${spec.id}: Validation failed.${stderr ? `\n${stderr}` : ""}`);
+  }
+
+  if (!validationPassed && loopResult.roundsCompleted < MAX_CRITIC_ROUNDS) {
+    const retryBudget = ITERATION_BUDGET[MAX_CRITIC_ROUNDS - 1] ?? 10;
+    console.log(
+      `  #${spec.id}: Retrying one more implement round (budget: ${String(retryBudget)})`,
+    );
+
+    try {
+      await sandbox.run({
+        agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
+        maxIterations: retryBudget,
+        name: `Implementer #${spec.id} retry`,
+        promptArgs: {
+          BRANCH: spec.branch,
+          FINDINGS:
+            loopResult.lastFindings.length > 0
+              ? JSON.stringify(loopResult.lastFindings, null, 2)
+              : "",
+          ISSUE_BODY: spec.body,
+          ISSUE_TITLE: spec.title,
+          TASK_ID: spec.id,
+        },
+        promptFile: "./.sandcastle/implement-prompt.md",
+      });
+    } catch (retryErr: unknown) {
+      const retryMsg = retryErr instanceof Error ? retryErr.message : String(retryErr);
+      console.warn(
+        `  #${spec.id}: Implementer retry threw: ${retryMsg}. Falling through to PR creation.`,
+      );
+    }
+
+    try {
+      execSync(VALIDATION_COMMAND, { cwd, stdio: "pipe" });
+      validationPassed = true;
+      console.log(`  #${spec.id}: Validation passed after retry round.`);
+    } catch {
+      console.warn(`  #${spec.id}: Validation still fails after retry. Will create draft PR.`);
+    }
+  }
+
+  // Rebase on latest main
+  let rebaseSucceeded = false;
+  try {
+    execSync("git fetch origin main && git rebase origin/main", {
+      cwd,
+      stdio: "pipe",
+    });
+    rebaseSucceeded = true;
+    if (validationPassed) {
+      try {
+        execSync("npm run type-check && npm run test", {
+          cwd,
+          stdio: "pipe",
+        });
+      } catch {
+        validationPassed = false;
+      }
+    }
+  } catch {
+    try {
+      execSync("git rebase --abort", { cwd, stdio: "pipe" });
+    } catch {
+      /* empty */
+    }
+    try {
+      execSync("git push", { cwd, stdio: "pipe" });
+    } catch (pushErr: unknown) {
+      const pushMsg = pushErr instanceof Error ? pushErr.message : String(pushErr);
+      console.warn(`  #${spec.id}: git push failed after rebase abort: ${pushMsg}`);
+    }
+  }
+
+  if (rebaseSucceeded) {
+    try {
+      execSync("git push --force-with-lease", { cwd, stdio: "pipe" });
+    } catch (pushErr: unknown) {
+      const pushMsg = pushErr instanceof Error ? pushErr.message : String(pushErr);
+      console.warn(
+        `  #${spec.id}: git push --force-with-lease failed (branch un-pushed, PR creation will fail gracefully): ${pushMsg}`,
+      );
+    }
+  }
+
+  const converged = loopResult.status === "converged";
+  const isDraft = !converged || !validationPassed;
+  const outstandingNote =
+    !converged && loopResult.lastFindings.length > 0
+      ? `\n\n⚠️ Outstanding findings:\n${loopResult.lastFindings.map((f) => `- [${f.severity}] ${f.file}: ${f.title}`).join("\n")}`
+      : "";
+  const validationNote = !validationPassed
+    ? "\n\n⚠️ Validation did not pass. Manual review required."
+    : "";
+
+  const validationCheck = validationPassed ? "- [x]" : "- [ ]";
+  const commitPrefix = spec.labels.includes("feature request")
+    ? "feat"
+    : spec.labels.includes("bug")
+      ? "fix"
+      : "chore";
+  const prTitle = `${commitPrefix}: resolve #${spec.id} — ${spec.title}`;
+  const prBody = `## Description\n\nAutomated fix for #${spec.id}: ${spec.title}\n\n## Type of Change\n\n- [x] Bug fix (non-breaking change that fixes an issue)\n\n## Checklist\n\n${validationCheck} I have run validation suite\n- [x] My changes follow the existing code style\n\n## Related Issues\n\nFixes #${spec.id}${outstandingNote}${validationNote}`;
+
+  const prArgs = [
+    "pr",
+    "create",
+    ...(isDraft ? ["--draft"] : []),
+    "--head",
+    spec.branch,
+    "--base",
+    "main",
+    "--title",
+    prTitle,
+    "--body",
+    prBody,
+  ];
+
+  let prCreated = false;
+  try {
+    execFileSync("gh", prArgs, { cwd, encoding: "utf-8", stdio: "pipe" });
+    console.log(`  #${spec.id}: PR created${isDraft ? " (draft)" : ""}.`);
+    prCreated = true;
+  } catch (err: unknown) {
+    const msg = err instanceof Error ? err.message : String(err);
+    console.error(`  #${spec.id}: PR creation failed: ${msg}`);
+  }
+
+  return { isDraft, prCreated, validationPassed };
+}
diff --git a/.sandcastle/main.ts b/.sandcastle/main.ts
index f5729cb..de9987d 100644
--- a/.sandcastle/main.ts
+++ b/.sandcastle/main.ts
@@ -1,521 +1,72 @@
 import * as sandcastle from "@ai-hero/sandcastle";
 import { docker } from "@ai-hero/sandcastle/sandboxes/docker";
-import { execFileSync, execSync } from "node:child_process";
-import crypto from "node:crypto";
-import { z } from "zod";
+
+import { ConcurrencyPool } from "./concurrency-pool.js";
+import { finalizeTask } from "./finalizer.js";
+import { runRefinementLoop } from "./refinement-loop.js";
+import { GithubIssueSource } from "./task-source.js";
 
 const BRANCH_PREFIX = "agent/issue";
-const ESCAPED_PREFIX = BRANCH_PREFIX.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
-const BRANCH_PATTERN = new RegExp(`^${ESCAPED_PREFIX}-\\d+-[\\w-]+$`);
 const ISSUE_LABEL = "sandcastle";
-const MAX_PLANNER_RETRIES = 5;
-const MAX_CRITIC_ROUNDS = 5;
-const ITERATION_BUDGET = [100, 50, 25, 10, 10];
 const MAX_PARALLEL = 3;
 const DOCKER_IMAGE = "sandcastle-sap-ai";
+const ITERATION_BUDGET = [100, 50, 25, 10, 10];
+const MAX_CRITIC_ROUNDS = 5;
 
-const FindingSchema = z.object({
-  category: z.enum(["security", "logic", "performance", "architecture", "style"]),
-  confidence: z.enum(["HIGH", "MEDIUM", "LOW"]),
-  description: z.string(),
-  file: z.string(),
-  line: z.number().optional(),
-  severity: z.enum(["CRITICAL", "HIGH", "MEDIUM", "LOW"]),
-  suggestion: z.string().optional(),
-  title: z.string(),
-});
-const FindingsSchema = z.array(FindingSchema);
-type Finding = z.infer<typeof FindingSchema>;
-
-// --- Zod schema for GitHub issue list response (fix #6) ---
-
-const RawIssueSchema = z.object({
-  body: z
-    .string()
-    .nullable()
-    .transform((b) => b ?? ""),
-  labels: z.array(z.object({ name: z.string() })),
-  number: z.number(),
-  title: z.string(),
+const source = new GithubIssueSource({
+  branchPrefix: BRANCH_PREFIX,
+  dockerImage: DOCKER_IMAGE,
+  label: ISSUE_LABEL,
 });
-const RawIssuesSchema = z.array(RawIssueSchema);
-
-// --- Type alias for sandbox instance ---
-
-type SandboxInstance = Awaited<ReturnType<typeof sandcastle.createSandbox>>;
 
-/**
- * @param sandbox - The sandcastle sandbox instance.
- * @param cwd - Working directory (worktree path).
- * @param issue - Issue metadata.
- * @param issue.body - Sanitized issue body text.
- * @param issue.branch - Git branch name for this issue.
- * @param issue.id - GitHub issue number as string.
- * @param issue.labels - Issue label names.
- * @param issue.title - Issue title.
- * @param converged - Whether the critic loop converged.
- * @param lastFindings - Outstanding findings from the last round.
- * @param round - The round at which the critic loop ended.
- * @returns Whether work was completed (PR created).
- */
-async function finalizeIssue(
-  sandbox: SandboxInstance,
-  cwd: string,
-  issue: { body: string; branch: string; id: string; labels: string[]; title: string },
-  converged: boolean,
-  lastFindings: Finding[],
-  round: number,
-): Promise<boolean> {
-  let validationPassed = false;
+const tasks = await source.discover();
 
-  try {
-    execSync(
-      "npm run type-check && npm run test && npm run test:node && npm run test:edge && npm run prettier-check && npm run lint && npm run build && npm run check-build && npm run build:v2 && npm run check-build:v2",
-      { cwd, stdio: "pipe" },
-    );
-    validationPassed = true;
-  } catch (err: unknown) {
-    const stderr =
-      err instanceof Error && "stderr" in err
-        ? String((err as { stderr: unknown }).stderr).slice(0, 500)
-        : "";
-    console.warn(`  #${issue.id}: Validation failed.${stderr ? `\n${stderr}` : ""}`);
-  }
+if (tasks.length === 0) {
+  process.exit(0);
+}
 
-  // --- Validation retry round (fix #7) ---
-  if (!validationPassed && round < MAX_CRITIC_ROUNDS) {
-    const retryBudget = ITERATION_BUDGET[MAX_CRITIC_ROUNDS - 1] ?? 10;
-    console.log(
-      `  #${issue.id}: Retrying one more implement round (budget: ${String(retryBudget)})`,
-    );
+const pool = new ConcurrencyPool(MAX_PARALLEL);
 
-    try {
-      await sandbox.run({
-        agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
-        maxIterations: retryBudget,
-        name: `Implementer #${issue.id} retry`,
-        promptArgs: {
-          BRANCH: issue.branch,
-          FINDINGS: lastFindings.length > 0 ? JSON.stringify(lastFindings, null, 2) : "",
-          ISSUE_BODY: issue.body,
-          ISSUE_TITLE: issue.title,
-          TASK_ID: issue.id,
+const settled = await Promise.allSettled(
+  tasks.map((spec) =>
+    pool.run(async () => {
+      await using sandbox = await sandcastle.createSandbox({
+        branch: spec.branch,
+        copyToWorktree: ["node_modules"],
+        hooks: {
+          sandbox: { onSandboxReady: [{ command: "npm install && npm run build" }] },
         },
-        promptFile: "./.sandcastle/implement-prompt.md",
+        sandbox: docker({ imageName: DOCKER_IMAGE }),
       });
-    } catch (retryErr: unknown) {
-      const retryMsg = retryErr instanceof Error ? retryErr.message : String(retryErr);
-      console.warn(
-        `  #${issue.id}: Implementer retry threw: ${retryMsg}. Falling through to PR creation.`,
-      );
-    }
 
-    // Re-validate after retry
-    try {
-      execSync(
-        "npm run type-check && npm run test && npm run test:node && npm run test:edge && npm run prettier-check && npm run lint && npm run build && npm run check-build && npm run build:v2 && npm run check-build:v2",
-        { cwd, stdio: "pipe" },
-      );
-      validationPassed = true;
-      console.log(`  #${issue.id}: Validation passed after retry round.`);
-    } catch {
-      console.warn(`  #${issue.id}: Validation still fails after retry. Will create draft PR.`);
-    }
-  }
+      const loopResult = await runRefinementLoop(spec, sandbox, {
+        iterationBudget: ITERATION_BUDGET,
+        maxRounds: MAX_CRITIC_ROUNDS,
+      });
 
-  // Rebase on latest main
-  let rebaseSucceeded = false;
-  try {
-    execSync("git fetch origin main && git rebase origin/main", {
-      cwd,
-      stdio: "pipe",
-    });
-    rebaseSucceeded = true;
-    if (validationPassed) {
-      // Post-rebase smoke test
-      try {
-        execSync("npm run type-check && npm run test", {
-          cwd,
-          stdio: "pipe",
-        });
-      } catch {
-        validationPassed = false;
+      let prCreated = false;
+      if (loopResult.totalCommits > 0) {
+        const cwd = sandbox.worktreePath;
+        const result = await finalizeTask(spec, loopResult, sandbox, cwd);
+        prCreated = result.prCreated;
       }
-    }
-  } catch {
-    // Rebase failed — abort and push un-rebased
-    try {
-      execSync("git rebase --abort", { cwd, stdio: "pipe" });
-    } catch {
-      /* empty */
-    }
-    try {
-      execSync("git push", { cwd, stdio: "pipe" });
-    } catch (pushErr: unknown) {
-      const pushMsg = pushErr instanceof Error ? pushErr.message : String(pushErr);
-      console.warn(`  #${issue.id}: git push failed after rebase abort: ${pushMsg}`);
-    }
-  }
-
-  if (rebaseSucceeded) {
-    try {
-      execSync("git push --force-with-lease", { cwd, stdio: "pipe" });
-    } catch (pushErr: unknown) {
-      const pushMsg = pushErr instanceof Error ? pushErr.message : String(pushErr);
-      console.warn(
-        `  #${issue.id}: git push --force-with-lease failed (branch un-pushed, PR creation will fail gracefully): ${pushMsg}`,
-      );
-    }
-  }
-
-  // Create PR (fix #1: use execFileSync to avoid shell injection)
-  const isDraft = !converged || !validationPassed;
-  const outstandingNote =
-    !converged && lastFindings.length > 0
-      ? `\n\n⚠️ Outstanding findings:\n${lastFindings.map((f) => `- [${f.severity}] ${f.file}: ${f.title}`).join("\n")}`
-      : "";
-  const validationNote = !validationPassed
-    ? "\n\n⚠️ Validation did not pass. Manual review required."
-    : "";
-
-  const validationCheck = validationPassed ? "- [x]" : "- [ ]";
-  const commitPrefix = issue.labels.includes("feature request")
-    ? "feat"
-    : issue.labels.includes("bug")
-      ? "fix"
-      : "chore";
-  const prTitle = `${commitPrefix}: resolve #${issue.id} — ${issue.title}`;
-  const prBody = `## Description\n\nAutomated fix for #${issue.id}: ${issue.title}\n\n## Type of Change\n\n- [x] Bug fix (non-breaking change that fixes an issue)\n\n## Checklist\n\n${validationCheck} I have run validation suite\n- [x] My changes follow the existing code style\n\n## Related Issues\n\nFixes #${issue.id}${outstandingNote}${validationNote}`;
-
-  const prArgs = [
-    "pr",
-    "create",
-    ...(isDraft ? ["--draft"] : []),
-    "--head",
-    issue.branch,
-    "--base",
-    "main",
-    "--title",
-    prTitle,
-    "--body",
-    prBody,
-  ];
-
-  try {
-    execFileSync("gh", prArgs, { cwd, encoding: "utf-8", stdio: "pipe" });
-    console.log(`  #${issue.id}: PR created${isDraft ? " (draft)" : ""}.`);
-    return true;
-  } catch (err: unknown) {
-    const msg = err instanceof Error ? err.message : String(err);
-    console.error(`  #${issue.id}: PR creation failed: ${msg}`);
-    return false;
-  }
-}
-
-/**
- * @param f - Finding to compute dedup key for.
- * @returns Composite key for deterministic deduplication.
- */
-function findingKey(f: Finding): string {
-  const normalizedTitle = f.title
-    .toLowerCase()
-    .replace(/[^\w\s]/g, "")
-    .replace(/\s+/g, " ")
-    .trim();
-  return `${f.file}::${f.category}::${normalizedTitle}`;
-}
-
-/**
- * @param stdout - Agent stdout to parse findings from.
- * @param nonce - Unique tag identifier for this run.
- * @returns Parsed findings array or null on parse failure.
- */
-function parseFindings(stdout: string, nonce: string): Finding[] | null {
-  const tagPattern = new RegExp(`<findings-${nonce}>([\\s\\S]*?)<\\/findings-${nonce}>`, "g");
-  const matches = [...stdout.matchAll(tagPattern)];
-  if (matches.length === 0) {
-    return null;
-  }
-  const raw = matches.at(-1)?.[1]?.trim() ?? "[]";
-  const cleaned = raw.replace(/^```(?:json)?\s*\n?/g, "").replace(/\n?```\s*$/g, "");
-  try {
-    return FindingsSchema.parse(JSON.parse(cleaned));
-  } catch {
-    return null;
-  }
-}
-
-/**
- * @param text - Raw text to strip injection-prone tags from.
- * @returns Sanitized text safe for prompt injection.
- */
-function sanitizeForPrompt(text: string): string {
-  return text.replace(/<\/?(?:plan|findings[\w-]*|promise)[^>]*>/gi, "");
-}
-
-// --- Phase 1: Fetch and sanitize issues ---
-
-let rawIssuesJson: string;
-try {
-  rawIssuesJson = execSync(
-    `gh issue list --state open --json number,title,labels,body --limit 50 --label "${ISSUE_LABEL}"`,
-    { encoding: "utf-8" },
-  );
-} catch {
-  console.error("Failed to fetch issues. Ensure gh is installed and authenticated.");
-  process.exit(1);
-}
-
-let rawIssues: z.infer<typeof RawIssuesSchema>;
-try {
-  rawIssues = RawIssuesSchema.parse(JSON.parse(rawIssuesJson));
-} catch {
-  console.error("Failed to parse issues JSON. Unexpected format from gh CLI.");
-  process.exit(1);
-}
-
-const issuesJson = rawIssues.map((i) => ({
-  body: sanitizeForPrompt(i.body),
-  labels: i.labels.map((l) => l.name),
-  number: i.number,
-  title: i.title,
-}));
 
-if (issuesJson.length === 0) {
-  console.log("No issues with label '%s'. Exiting.", ISSUE_LABEL);
-  process.exit(0);
-}
-
-// --- Phase 2: Plan ---
-
-let workCompleted = false;
-
-for (let attempt = 1; attempt <= MAX_PLANNER_RETRIES; attempt++) {
-  console.log(`\n=== Planner attempt ${String(attempt)}/${String(MAX_PLANNER_RETRIES)} ===\n`);
-
-  const plan = await sandcastle.run({
-    agent: sandcastle.opencode("github-copilot/claude-opus-4.6"),
-    maxIterations: 1,
-    name: "Planner",
-    promptArgs: {
-      BRANCH_PREFIX,
-      ISSUES_JSON: JSON.stringify(issuesJson, null, 2),
-    },
-    promptFile: "./.sandcastle/plan-prompt.md",
-    sandbox: docker({ imageName: DOCKER_IMAGE }),
-  });
-
-  const planMatches = [...plan.stdout.matchAll(/<plan>([\s\S]*?)<\/plan>/g)];
-  const planMatch = planMatches.at(-1);
-  if (!planMatch) {
-    console.error("Planner did not produce a <plan> tag. Retrying.");
-    continue;
-  }
-
-  const planContent = planMatch[1] ?? "";
-  let issues: { body: string; branch: string; id: string; labels: string[]; title: string }[];
-  try {
-    const parsed = JSON.parse(planContent) as { issues: unknown[] };
-    if (!Array.isArray(parsed.issues)) {
-      console.error("Planner output missing issues array. Retrying.");
-      continue;
-    }
-    const validated = parsed.issues.filter(
-      (entry): entry is { body: string; branch: string; id: string; title: string } => {
-        if (typeof entry !== "object" || entry === null) return false;
-        const item = entry as Record<string, unknown>;
-        if (typeof item.id !== "string" || !/^\d+$/.test(item.id)) return false;
-        if (typeof item.branch !== "string" || !BRANCH_PATTERN.test(item.branch)) return false;
-        if (typeof item.title !== "string") return false;
-        return true;
-      },
-    );
-    // Attach sanitized body from our fetched data
-    issues = validated.map((v) => ({
-      ...v,
-      body: issuesJson.find((i) => String(i.number) === v.id)?.body ?? "",
-      labels: issuesJson.find((i) => String(i.number) === v.id)?.labels ?? [],
-    }));
-  } catch {
-    console.error("Planner produced invalid JSON. Retrying.");
-    continue;
-  }
-
-  if (issues.length === 0) {
-    console.log("No actionable issues. Exiting.");
-    workCompleted = true;
-    break;
-  }
-
-  console.log(`Plan: ${String(issues.length)} issue(s) to work on:`);
-  for (const issue of issues) {
-    console.log(`  #${issue.id}: ${issue.title} → ${issue.branch}`);
-  }
-
-  // --- Phase 3: Implement ↔ Critic loop (parallel, max 3) ---
-
-  let running = 0;
-  const queue: (() => void)[] = [];
-  const acquire = () =>
-    running < MAX_PARALLEL
-      ? (running++, Promise.resolve())
-      : new Promise<void>((resolve) => queue.push(resolve));
-  const release = () => {
-    running--;
-    const next = queue.shift();
-    if (next) {
-      running++;
-      next();
-    }
-  };
-
-  const settled = await Promise.allSettled(
-    issues.map(async (issue) => {
-      await acquire();
-      try {
-        await using sandbox = await sandcastle.createSandbox({
-          branch: issue.branch,
-          copyToWorktree: ["node_modules"],
-          hooks: {
-            sandbox: { onSandboxReady: [{ command: "npm install && npm run build" }] },
-          },
-          sandbox: docker({ imageName: DOCKER_IMAGE }),
-        });
-
-        const seenKeys = new Set<string>();
-        let lastFindings: Finding[] = [];
-        let converged = false;
-        let totalCommits = 0;
-        let lastRound = 0;
-
-        for (let round = 1; round <= MAX_CRITIC_ROUNDS; round++) {
-          lastRound = round;
-          const budget = ITERATION_BUDGET[round - 1] ?? 10;
-          const findingsArg = lastFindings.length > 0 ? JSON.stringify(lastFindings, null, 2) : "";
-
-          console.log(
-            `  #${issue.id} round ${String(round)}/${String(MAX_CRITIC_ROUNDS)} (budget: ${String(budget)})`,
-          );
-
-          // Implementer
-          const impl = await sandbox.run({
-            agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
-            maxIterations: budget,
-            name: `Implementer #${issue.id} R${String(round)}`,
-            promptArgs: {
-              BRANCH: issue.branch,
-              FINDINGS: findingsArg,
-              ISSUE_BODY: issue.body,
-              ISSUE_TITLE: issue.title,
-              TASK_ID: issue.id,
-            },
-            promptFile: "./.sandcastle/implement-prompt.md",
-          });
-
-          totalCommits += impl.commits.length;
-
-          if (round === 1 && impl.commits.length === 0) {
-            console.warn(`  #${issue.id}: 0 commits on round 1. Skipping.`);
-            break;
-          }
-
-          // Critic
-          const nonce = crypto.randomBytes(4).toString("hex");
-
-          let critic = await sandbox.run({
-            agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
-            maxIterations: 1,
-            name: `Critic #${issue.id} R${String(round)}`,
-            promptArgs: {
-              BRANCH: issue.branch,
-              NONCE: nonce,
-            },
-            promptFile: "./.sandcastle/critic-prompt.md",
-          });
-
-          let findings = parseFindings(critic.stdout, nonce);
-
-          // Retry once on parse failure
-          if (findings === null) {
-            console.warn(`  #${issue.id}: Critic parse failed. Retrying.`);
-            critic = await sandbox.run({
-              agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
-              maxIterations: 1,
-              name: `Critic #${issue.id} R${String(round)} retry`,
-              promptArgs: {
-                BRANCH: issue.branch,
-                NONCE: nonce,
-              },
-              promptFile: "./.sandcastle/critic-prompt.md",
-            });
-            findings = parseFindings(critic.stdout, nonce);
-          }
-
-          if (findings === null) {
-            console.warn(`  #${issue.id}: Critic failed twice. Breaking (non-converged).`);
-            break;
-          }
-
-          // Dedup
-          const newFindings = findings.filter(
-            (f) => f.confidence !== "LOW" && !seenKeys.has(findingKey(f)),
-          );
-          for (const f of newFindings) {
-            seenKeys.add(findingKey(f));
-          }
-
-          console.log(
-            `  #${issue.id}: ${String(findings.length)} findings, ${String(newFindings.length)} new`,
-          );
-
-          if (newFindings.length === 0) {
-            const nonLowFindings = findings.filter((f) => f.confidence !== "LOW");
-            if (nonLowFindings.length > 0) {
-              lastFindings = nonLowFindings;
-              converged = false;
-            } else {
-              converged = true;
-            }
-            break;
-          }
-
-          lastFindings = newFindings;
-        }
-
-        // --- Final validation, rebase, and PR creation ---
-        if (totalCommits > 0) {
-          const cwd = sandbox.worktreePath;
-          const prCreated = await finalizeIssue(
-            sandbox,
-            cwd,
-            issue,
-            converged,
-            lastFindings,
-            lastRound,
-          );
-          if (prCreated) {
-            workCompleted = true;
-          }
-        }
-
-        return { converged, issue, totalCommits };
-      } finally {
-        release();
-      }
+      return { prCreated, spec };
     }),
-  );
-
-  // Log failures
-  for (const [i, outcome] of settled.entries()) {
-    if (outcome.status === "rejected") {
-      const issue = issues[i];
-      const reason: unknown = outcome.reason;
-      const msg = reason instanceof Error ? (reason.stack ?? reason.message) : String(reason);
-      console.error(`  ✗ #${issue?.id ?? String(i)} failed: ${msg}`);
-    }
+  ),
+);
+
+const workCompleted = settled.some(
+  (outcome) => outcome.status === "fulfilled" && outcome.value.prCreated,
+);
+
+for (const [i, outcome] of settled.entries()) {
+  if (outcome.status === "rejected") {
+    const spec = tasks[i];
+    const reason: unknown = outcome.reason;
+    const msg = reason instanceof Error ? (reason.stack ?? reason.message) : String(reason);
+    console.error(`  ✗ #${spec?.id ?? String(i)} failed: ${msg}`);
   }
-
-  break; // Plan executed — exit planner retry loop
 }
 
 console.log("\nAll done.");
diff --git a/.sandcastle/refinement-loop.ts b/.sandcastle/refinement-loop.ts
new file mode 100644
index 0000000..ae66e9e
--- /dev/null
+++ b/.sandcastle/refinement-loop.ts
@@ -0,0 +1,193 @@
+import * as sandcastle from "@ai-hero/sandcastle";
+import crypto from "node:crypto";
+
+import type { Finding, LoopResult, LoopStatus, SandboxInstance, TaskSpec } from "./types.js";
+
+import { FindingsSchema } from "./types.js";
+
+const DEFAULT_MAX_ROUNDS = 5;
+const DEFAULT_ITERATION_BUDGET = [100, 50, 25, 10, 10];
+
+/** Options for configuring the refinement loop. */
+export interface RefinementLoopOptions {
+  /** Budget of iterations per round (array indexed by round - 1). */
+  iterationBudget?: number[];
+  /** Maximum number of implement↔critic rounds. */
+  maxRounds?: number;
+  /** Optional callback invoked after each round completes. */
+  onRoundComplete?: (round: number, findings: Finding[]) => void;
+}
+
+/**
+ * Runs the implement↔critic refinement loop for a given task.
+ * @param spec - The task specification.
+ * @param sandbox - The sandcastle sandbox instance.
+ * @param opts - Optional configuration for rounds, budget, and callbacks.
+ * @returns The loop result with status, commits, findings, and rounds completed.
+ */
+export async function runRefinementLoop(
+  spec: TaskSpec,
+  sandbox: SandboxInstance,
+  opts?: RefinementLoopOptions,
+): Promise<LoopResult> {
+  const maxRounds = opts?.maxRounds ?? DEFAULT_MAX_ROUNDS;
+  const iterationBudget = opts?.iterationBudget ?? DEFAULT_ITERATION_BUDGET;
+
+  const seenKeys = new Set<string>();
+  let lastFindings: Finding[] = [];
+  let status: LoopStatus = "exhausted";
+  let totalCommits = 0;
+  let roundsCompleted = 0;
+
+  for (let round = 1; round <= maxRounds; round++) {
+    roundsCompleted = round;
+    const budget = iterationBudget[round - 1] ?? 10;
+    const findingsArg = lastFindings.length > 0 ? JSON.stringify(lastFindings, null, 2) : "";
+
+    console.log(
+      `  #${spec.id} round ${String(round)}/${String(maxRounds)} (budget: ${String(budget)})`,
+    );
+
+    // Implementer
+    const impl = await sandbox.run({
+      agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
+      maxIterations: budget,
+      name: `Implementer #${spec.id} R${String(round)}`,
+      promptArgs: {
+        BRANCH: spec.branch,
+        FINDINGS: findingsArg,
+        ISSUE_BODY: spec.body,
+        ISSUE_TITLE: spec.title,
+        TASK_ID: spec.id,
+      },
+      promptFile: "./.sandcastle/implement-prompt.md",
+    });
+
+    totalCommits += impl.commits.length;
+
+    if (round === 1 && impl.commits.length === 0) {
+      console.warn(`  #${spec.id}: 0 commits on round 1. Skipping.`);
+      status = "skipped";
+      break;
+    }
+
+    // Critic
+    const nonce = crypto.randomBytes(4).toString("hex");
+    const findings = await runCritic(sandbox, spec, round, nonce);
+
+    if (findings === null) {
+      console.warn(`  #${spec.id}: Critic failed twice. Breaking (non-converged).`);
+      status = "failed";
+      break;
+    }
+
+    // Dedup
+    const newFindings = findings.filter(
+      (f) => f.confidence !== "LOW" && !seenKeys.has(findingKey(f)),
+    );
+    for (const f of newFindings) {
+      seenKeys.add(findingKey(f));
+    }
+
+    console.log(
+      `  #${spec.id}: ${String(findings.length)} findings, ${String(newFindings.length)} new`,
+    );
+
+    opts?.onRoundComplete?.(round, findings);
+
+    if (newFindings.length === 0) {
+      const nonLowFindings = findings.filter((f) => f.confidence !== "LOW");
+      if (nonLowFindings.length > 0) {
+        lastFindings = nonLowFindings;
+        status = "exhausted";
+      } else {
+        status = "converged";
+      }
+      break;
+    }
+
+    lastFindings = newFindings;
+  }
+
+  return { lastFindings, roundsCompleted, status, totalCommits };
+}
+
+/**
+ * Computes a deduplication key for a finding.
+ * @param f - Finding to compute a key for.
+ * @returns Composite dedup key.
+ */
+function findingKey(f: Finding): string {
+  const normalizedTitle = f.title
+    .toLowerCase()
+    .replace(/[^\w\s]/g, "")
+    .replace(/\s+/g, " ")
+    .trim();
+  return `${f.file}::${f.category}::${normalizedTitle}`;
+}
+
+/**
+ * Parses findings from agent stdout using nonce-tagged delimiters.
+ * @param stdout - Agent stdout to parse findings from.
+ * @param nonce - Unique tag identifier for this run.
+ * @returns Parsed findings array or null on parse failure.
+ */
+function parseFindings(stdout: string, nonce: string): Finding[] | null {
+  const tagPattern = new RegExp(`<findings-${nonce}>([\\s\\S]*?)<\\/findings-${nonce}>`, "g");
+  const matches = [...stdout.matchAll(tagPattern)];
+  if (matches.length === 0) {
+    return null;
+  }
+  const raw = matches.at(-1)?.[1]?.trim() ?? "[]";
+  const cleaned = raw.replace(/^```(?:json)?\s*\n?/g, "").replace(/\n?```\s*$/g, "");
+  try {
+    return FindingsSchema.parse(JSON.parse(cleaned));
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Runs the critic agent, retrying once on parse failure.
+ * @param sandbox - The sandcastle sandbox instance.
+ * @param spec - The task specification.
+ * @param round - Current round number.
+ * @param nonce - Unique nonce for parsing.
+ * @returns Parsed findings or null if both attempts failed.
+ */
+async function runCritic(
+  sandbox: SandboxInstance,
+  spec: TaskSpec,
+  round: number,
+  nonce: string,
+): Promise<Finding[] | null> {
+  let critic = await sandbox.run({
+    agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
+    maxIterations: 1,
+    name: `Critic #${spec.id} R${String(round)}`,
+    promptArgs: {
+      BRANCH: spec.branch,
+      NONCE: nonce,
+    },
+    promptFile: "./.sandcastle/critic-prompt.md",
+  });
+
+  let findings = parseFindings(critic.stdout, nonce);
+
+  if (findings === null) {
+    console.warn(`  #${spec.id}: Critic parse failed. Retrying.`);
+    critic = await sandbox.run({
+      agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
+      maxIterations: 1,
+      name: `Critic #${spec.id} R${String(round)} retry`,
+      promptArgs: {
+        BRANCH: spec.branch,
+        NONCE: nonce,
+      },
+      promptFile: "./.sandcastle/critic-prompt.md",
+    });
+    findings = parseFindings(critic.stdout, nonce);
+  }
+
+  return findings;
+}
diff --git a/.sandcastle/task-source.ts b/.sandcastle/task-source.ts
new file mode 100644
index 0000000..e39e9c6
--- /dev/null
+++ b/.sandcastle/task-source.ts
@@ -0,0 +1,190 @@
+import * as sandcastle from "@ai-hero/sandcastle";
+import { docker } from "@ai-hero/sandcastle/sandboxes/docker";
+import { execSync } from "node:child_process";
+import { z } from "zod";
+
+import type { TaskSpec } from "./types.js";
+
+const RawIssueSchema = z.object({
+  body: z
+    .string()
+    .nullable()
+    .transform((b) => b ?? ""),
+  labels: z.array(z.object({ name: z.string() })),
+  number: z.number(),
+  title: z.string(),
+});
+const RawIssuesSchema = z.array(RawIssueSchema);
+
+/** Configuration for the GitHub issue task source. */
+export interface GithubIssueSourceConfig {
+  /** Git branch prefix for issue branches. */
+  branchPrefix: string;
+  /** Docker image name for the sandbox. */
+  dockerImage: string;
+  /** GitHub issue label to filter by. */
+  label: string;
+  /** Maximum planner retries. */
+  maxRetries?: number;
+}
+
+/** Interface for task discovery sources. */
+export interface TaskSource {
+  /** Discovers tasks to work on. */
+  discover(): Promise<TaskSpec[]>;
+}
+
+/**
+ * Task source that discovers work from GitHub issues via planner agent.
+ */
+export class GithubIssueSource implements TaskSource {
+  private readonly branchPattern: RegExp;
+  private readonly branchPrefix: string;
+  private readonly dockerImage: string;
+  private readonly label: string;
+  private readonly maxRetries: number;
+
+  /**
+   * @param config - Configuration for the GitHub issue source.
+   */
+  constructor(config: GithubIssueSourceConfig) {
+    this.branchPrefix = config.branchPrefix;
+    this.dockerImage = config.dockerImage;
+    this.label = config.label;
+    this.maxRetries = config.maxRetries ?? 5;
+
+    const escapedPrefix = this.branchPrefix.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+    this.branchPattern = new RegExp(`^${escapedPrefix}-\\d+-[\\w-]+$`);
+  }
+
+  /**
+   * Discovers tasks by fetching GitHub issues, running the planner, and validating the plan.
+   * @returns Array of task specifications to implement.
+   */
+  async discover(): Promise<TaskSpec[]> {
+    const issuesJson = this.fetchAndSanitizeIssues();
+
+    if (issuesJson.length === 0) {
+      console.log("No issues with label '%s'. Exiting.", this.label);
+      return [];
+    }
+
+    for (let attempt = 1; attempt <= this.maxRetries; attempt++) {
+      console.log(`\n=== Planner attempt ${String(attempt)}/${String(this.maxRetries)} ===\n`);
+
+      const plan = await sandcastle.run({
+        agent: sandcastle.opencode("github-copilot/claude-opus-4.6"),
+        maxIterations: 1,
+        name: "Planner",
+        promptArgs: {
+          BRANCH_PREFIX: this.branchPrefix,
+          ISSUES_JSON: JSON.stringify(issuesJson, null, 2),
+        },
+        promptFile: "./.sandcastle/plan-prompt.md",
+        sandbox: docker({ imageName: this.dockerImage }),
+      });
+
+      const planMatches = [...plan.stdout.matchAll(/<plan>([\s\S]*?)<\/plan>/g)];
+      const planMatch = planMatches.at(-1);
+      if (!planMatch) {
+        console.error("Planner did not produce a <plan> tag. Retrying.");
+        continue;
+      }
+
+      const planContent = planMatch[1] ?? "";
+      const tasks = this.validatePlan(planContent, issuesJson);
+      if (tasks === null) {
+        continue;
+      }
+
+      if (tasks.length === 0) {
+        console.log("No actionable issues. Exiting.");
+        return [];
+      }
+
+      console.log(`Plan: ${String(tasks.length)} issue(s) to work on:`);
+      for (const task of tasks) {
+        console.log(`  #${task.id}: ${task.title} → ${task.branch}`);
+      }
+
+      return tasks;
+    }
+
+    return [];
+  }
+
+  private fetchAndSanitizeIssues(): {
+    body: string;
+    labels: string[];
+    number: number;
+    title: string;
+  }[] {
+    let rawIssuesJson: string;
+    try {
+      rawIssuesJson = execSync(
+        `gh issue list --state open --json number,title,labels,body --limit 50 --label "${this.label}"`,
+        { encoding: "utf-8" },
+      );
+    } catch {
+      console.error("Failed to fetch issues. Ensure gh is installed and authenticated.");
+      process.exit(1);
+    }
+
+    let rawIssues: z.infer<typeof RawIssuesSchema>;
+    try {
+      rawIssues = RawIssuesSchema.parse(JSON.parse(rawIssuesJson));
+    } catch {
+      console.error("Failed to parse issues JSON. Unexpected format from gh CLI.");
+      process.exit(1);
+    }
+
+    return rawIssues.map((i) => ({
+      body: sanitizeForPrompt(i.body),
+      labels: i.labels.map((l) => l.name),
+      number: i.number,
+      title: i.title,
+    }));
+  }
+
+  private validatePlan(
+    planContent: string,
+    issuesJson: { body: string; labels: string[]; number: number; title: string }[],
+  ): null | TaskSpec[] {
+    try {
+      const parsed = JSON.parse(planContent) as { issues: unknown[] };
+      if (!Array.isArray(parsed.issues)) {
+        console.error("Planner output missing issues array. Retrying.");
+        return null;
+      }
+      const validated = parsed.issues.filter(
+        (entry): entry is { body: string; branch: string; id: string; title: string } => {
+          if (typeof entry !== "object" || entry === null) return false;
+          const item = entry as Record<string, unknown>;
+          if (typeof item.id !== "string" || !/^\d+$/.test(item.id)) return false;
+          if (typeof item.branch !== "string" || !this.branchPattern.test(item.branch))
+            return false;
+          if (typeof item.title !== "string") return false;
+          return true;
+        },
+      );
+
+      return validated.map((v) => ({
+        ...v,
+        body: issuesJson.find((i) => String(i.number) === v.id)?.body ?? "",
+        labels: issuesJson.find((i) => String(i.number) === v.id)?.labels ?? [],
+      }));
+    } catch {
+      console.error("Planner produced invalid JSON. Retrying.");
+      return null;
+    }
+  }
+}
+
+/**
+ * Strips injection-prone tags from text.
+ * @param text - Raw text to sanitize.
+ * @returns Sanitized text safe for prompt injection.
+ */
+function sanitizeForPrompt(text: string): string {
+  return text.replace(/<\/?(?:plan|findings[\w-]*|promise)[^>]*>/gi, "");
+}
diff --git a/.sandcastle/types.ts b/.sandcastle/types.ts
new file mode 100644
index 0000000..14d1b38
--- /dev/null
+++ b/.sandcastle/types.ts
@@ -0,0 +1,63 @@
+import type * as sandcastle from "@ai-hero/sandcastle";
+
+import { z } from "zod";
+
+/** Zod schema for a single critic finding. */
+export const FindingSchema = z.object({
+  category: z.enum(["security", "logic", "performance", "architecture", "style"]),
+  confidence: z.enum(["HIGH", "MEDIUM", "LOW"]),
+  description: z.string(),
+  file: z.string(),
+  line: z.number().optional(),
+  severity: z.enum(["CRITICAL", "HIGH", "MEDIUM", "LOW"]),
+  suggestion: z.string().optional(),
+  title: z.string(),
+});
+
+/** Zod schema for an array of critic findings. */
+export const FindingsSchema = z.array(FindingSchema);
+
+/** Result of post-loop finalization. */
+export interface FinalizeResult {
+  /** Whether the PR was marked as draft. */
+  isDraft: boolean;
+  /** Whether a PR was successfully created. */
+  prCreated: boolean;
+  /** Whether validation passed. */
+  validationPassed: boolean;
+}
+
+/** A single critic finding parsed from agent output. */
+export type Finding = z.infer<typeof FindingSchema>;
+
+/** Result returned by the refinement loop. */
+export interface LoopResult {
+  /** Outstanding findings from the last round. */
+  lastFindings: Finding[];
+  /** Number of rounds completed. */
+  roundsCompleted: number;
+  /** Termination status. */
+  status: LoopStatus;
+  /** Total commits produced across all rounds. */
+  totalCommits: number;
+}
+
+/** Outcome status of the refinement loop. */
+export type LoopStatus = "converged" | "exhausted" | "failed" | "skipped";
+
+/** Type alias for a sandcastle sandbox instance. */
+export type SandboxInstance = Awaited<ReturnType<typeof sandcastle.createSandbox>>;
+
+/** Specification for a task to be implemented. */
+export interface TaskSpec {
+  /** Sanitized issue body text. */
+  body: string;
+  /** Git branch name for this task. */
+  branch: string;
+  /** Task identifier (e.g. GitHub issue number as string). */
+  id: string;
+  /** Label names associated with the task. */
+  labels: string[];
+  /** Task title. */
+  title: string;
+}

From 3215748573a25fe5c36acf7a2184d017e98c882f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 14:00:47 +0200
Subject: [PATCH 09/31] fix: centralize constants, fix PR type-of-change,
 sanitize titles

---
 .sandcastle/finalizer.ts       | 12 +++++++++---
 .sandcastle/main.ts            |  3 +--
 .sandcastle/refinement-loop.ts | 11 ++++-------
 .sandcastle/task-source.ts     |  2 +-
 .sandcastle/types.ts           |  6 ++++++
 5 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/.sandcastle/finalizer.ts b/.sandcastle/finalizer.ts
index 1ac076e..a2e9db7 100644
--- a/.sandcastle/finalizer.ts
+++ b/.sandcastle/finalizer.ts
@@ -3,8 +3,8 @@ import { execFileSync, execSync } from "node:child_process";
 
 import type { FinalizeResult, LoopResult, SandboxInstance, TaskSpec } from "./types.js";
 
-const MAX_CRITIC_ROUNDS = 5;
-const ITERATION_BUDGET = [100, 50, 25, 10, 10];
+import { ITERATION_BUDGET, MAX_CRITIC_ROUNDS } from "./types.js";
+
 const VALIDATION_COMMAND =
   "npm run type-check && npm run test && npm run test:node && npm run test:edge && npm run prettier-check && npm run lint && npm run build && npm run check-build && npm run build:v2 && npm run check-build:v2";
 
@@ -134,7 +134,13 @@ export async function finalizeTask(
       ? "fix"
       : "chore";
   const prTitle = `${commitPrefix}: resolve #${spec.id} — ${spec.title}`;
-  const prBody = `## Description\n\nAutomated fix for #${spec.id}: ${spec.title}\n\n## Type of Change\n\n- [x] Bug fix (non-breaking change that fixes an issue)\n\n## Checklist\n\n${validationCheck} I have run validation suite\n- [x] My changes follow the existing code style\n\n## Related Issues\n\nFixes #${spec.id}${outstandingNote}${validationNote}`;
+  const typeOfChange =
+    commitPrefix === "feat"
+      ? "New feature (non-breaking change that adds functionality)"
+      : commitPrefix === "fix"
+        ? "Bug fix (non-breaking change that fixes an issue)"
+        : "Refactoring (no functional changes)";
+  const prBody = `## Description\n\nAutomated ${commitPrefix} for #${spec.id}: ${spec.title}\n\n## Type of Change\n\n- [x] ${typeOfChange}\n\n## Checklist\n\n${validationCheck} I have run validation suite\n- [x] My changes follow the existing code style\n\n## Related Issues\n\nFixes #${spec.id}${outstandingNote}${validationNote}`;
 
   const prArgs = [
     "pr",
diff --git a/.sandcastle/main.ts b/.sandcastle/main.ts
index de9987d..0b916fd 100644
--- a/.sandcastle/main.ts
+++ b/.sandcastle/main.ts
@@ -5,13 +5,12 @@ import { ConcurrencyPool } from "./concurrency-pool.js";
 import { finalizeTask } from "./finalizer.js";
 import { runRefinementLoop } from "./refinement-loop.js";
 import { GithubIssueSource } from "./task-source.js";
+import { ITERATION_BUDGET, MAX_CRITIC_ROUNDS } from "./types.js";
 
 const BRANCH_PREFIX = "agent/issue";
 const ISSUE_LABEL = "sandcastle";
 const MAX_PARALLEL = 3;
 const DOCKER_IMAGE = "sandcastle-sap-ai";
-const ITERATION_BUDGET = [100, 50, 25, 10, 10];
-const MAX_CRITIC_ROUNDS = 5;
 
 const source = new GithubIssueSource({
   branchPrefix: BRANCH_PREFIX,
diff --git a/.sandcastle/refinement-loop.ts b/.sandcastle/refinement-loop.ts
index ae66e9e..61c8b54 100644
--- a/.sandcastle/refinement-loop.ts
+++ b/.sandcastle/refinement-loop.ts
@@ -3,15 +3,12 @@ import crypto from "node:crypto";
 
 import type { Finding, LoopResult, LoopStatus, SandboxInstance, TaskSpec } from "./types.js";
 
-import { FindingsSchema } from "./types.js";
-
-const DEFAULT_MAX_ROUNDS = 5;
-const DEFAULT_ITERATION_BUDGET = [100, 50, 25, 10, 10];
+import { FindingsSchema, ITERATION_BUDGET, MAX_CRITIC_ROUNDS } from "./types.js";
 
 /** Options for configuring the refinement loop. */
 export interface RefinementLoopOptions {
   /** Budget of iterations per round (array indexed by round - 1). */
-  iterationBudget?: number[];
+  iterationBudget?: readonly number[];
   /** Maximum number of implement↔critic rounds. */
   maxRounds?: number;
   /** Optional callback invoked after each round completes. */
@@ -30,8 +27,8 @@ export async function runRefinementLoop(
   sandbox: SandboxInstance,
   opts?: RefinementLoopOptions,
 ): Promise<LoopResult> {
-  const maxRounds = opts?.maxRounds ?? DEFAULT_MAX_ROUNDS;
-  const iterationBudget = opts?.iterationBudget ?? DEFAULT_ITERATION_BUDGET;
+  const maxRounds = opts?.maxRounds ?? MAX_CRITIC_ROUNDS;
+  const iterationBudget = opts?.iterationBudget ?? ITERATION_BUDGET;
 
   const seenKeys = new Set<string>();
   let lastFindings: Finding[] = [];
diff --git a/.sandcastle/task-source.ts b/.sandcastle/task-source.ts
index e39e9c6..347e120 100644
--- a/.sandcastle/task-source.ts
+++ b/.sandcastle/task-source.ts
@@ -142,7 +142,7 @@ export class GithubIssueSource implements TaskSource {
       body: sanitizeForPrompt(i.body),
       labels: i.labels.map((l) => l.name),
       number: i.number,
-      title: i.title,
+      title: sanitizeForPrompt(i.title),
     }));
   }
 
diff --git a/.sandcastle/types.ts b/.sandcastle/types.ts
index 14d1b38..849df1f 100644
--- a/.sandcastle/types.ts
+++ b/.sandcastle/types.ts
@@ -48,6 +48,12 @@ export type LoopStatus = "converged" | "exhausted" | "failed" | "skipped";
 /** Type alias for a sandcastle sandbox instance. */
 export type SandboxInstance = Awaited<ReturnType<typeof sandcastle.createSandbox>>;
 
+/** Maximum implement↔critic rounds before giving up. */
+export const MAX_CRITIC_ROUNDS = 5;
+
+/** Token budget per round (decreasing). Index = round - 1. */
+export const ITERATION_BUDGET = [100, 50, 25, 10, 10] as const;
+
 /** Specification for a task to be implemented. */
 export interface TaskSpec {
   /** Sanitized issue body text. */

From 25e85de3173a2f56a04de4db00ca0f97fcb94e35 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 14:06:55 +0200
Subject: [PATCH 10/31] fix: full validation post-rebase, execFileSync for gh
 issue list

---
 .sandcastle/finalizer.ts   |  2 +-
 .sandcastle/task-source.ts | 18 +++++++++++++++---
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/.sandcastle/finalizer.ts b/.sandcastle/finalizer.ts
index a2e9db7..97efd38 100644
--- a/.sandcastle/finalizer.ts
+++ b/.sandcastle/finalizer.ts
@@ -84,7 +84,7 @@ export async function finalizeTask(
     rebaseSucceeded = true;
     if (validationPassed) {
       try {
-        execSync("npm run type-check && npm run test", {
+        execSync(VALIDATION_COMMAND, {
           cwd,
           stdio: "pipe",
         });
diff --git a/.sandcastle/task-source.ts b/.sandcastle/task-source.ts
index 347e120..28e9e49 100644
--- a/.sandcastle/task-source.ts
+++ b/.sandcastle/task-source.ts
@@ -1,6 +1,6 @@
 import * as sandcastle from "@ai-hero/sandcastle";
 import { docker } from "@ai-hero/sandcastle/sandboxes/docker";
-import { execSync } from "node:child_process";
+import { execFileSync } from "node:child_process";
 import { z } from "zod";
 
 import type { TaskSpec } from "./types.js";
@@ -121,8 +121,20 @@ export class GithubIssueSource implements TaskSource {
   }[] {
     let rawIssuesJson: string;
     try {
-      rawIssuesJson = execSync(
-        `gh issue list --state open --json number,title,labels,body --limit 50 --label "${this.label}"`,
+      rawIssuesJson = execFileSync(
+        "gh",
+        [
+          "issue",
+          "list",
+          "--state",
+          "open",
+          "--json",
+          "number,title,labels,body",
+          "--limit",
+          "50",
+          "--label",
+          this.label,
+        ],
         { encoding: "utf-8" },
       );
     } catch {

From 4f83a6c72d140446004db6a4321e62988f3d9fd3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 14:12:40 +0200
Subject: [PATCH 11/31] perf: skip critic when implementer produces 0 commits
 on round 2+

---
 .sandcastle/refinement-loop.ts | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.sandcastle/refinement-loop.ts b/.sandcastle/refinement-loop.ts
index 61c8b54..d3c3aaf 100644
--- a/.sandcastle/refinement-loop.ts
+++ b/.sandcastle/refinement-loop.ts
@@ -68,6 +68,11 @@ export async function runRefinementLoop(
       break;
     }
 
+    if (round > 1 && impl.commits.length === 0) {
+      status = "exhausted";
+      break;
+    }
+
     // Critic
     const nonce = crypto.randomBytes(4).toString("hex");
     const findings = await runCritic(sandbox, spec, round, nonce);

From 301c3918ad05e680690453c2a90ee7d9c92752ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 14:44:57 +0200
Subject: [PATCH 12/31] fix: remove unsound type guard, filter unknown plan
 IDs, guard ConcurrencyPool, warn on planner exhaustion

---
 .sandcastle/concurrency-pool.ts |  8 ++++++--
 .sandcastle/task-source.ts      | 19 +++++++++++++------
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/.sandcastle/concurrency-pool.ts b/.sandcastle/concurrency-pool.ts
index f2e6fee..5809e53 100644
--- a/.sandcastle/concurrency-pool.ts
+++ b/.sandcastle/concurrency-pool.ts
@@ -6,9 +6,13 @@ export class ConcurrencyPool {
   private running = 0;
 
   /**
-   * @param max - Maximum number of concurrent tasks.
+   * @param max - Maximum number of concurrent tasks. Must be >= 1.
    */
-  constructor(private readonly max: number) {}
+  constructor(private readonly max: number) {
+    if (max < 1) {
+      throw new RangeError("ConcurrencyPool max must be >= 1");
+    }
+  }
 
   /**
    * Executes the given async function, waiting if the pool is at capacity.
diff --git a/.sandcastle/task-source.ts b/.sandcastle/task-source.ts
index 28e9e49..8e67b8b 100644
--- a/.sandcastle/task-source.ts
+++ b/.sandcastle/task-source.ts
@@ -110,6 +110,7 @@ export class GithubIssueSource implements TaskSource {
       return tasks;
     }
 
+    console.warn("Planner failed to produce a valid plan after all retries.");
     return [];
   }
 
@@ -169,7 +170,7 @@ export class GithubIssueSource implements TaskSource {
         return null;
       }
       const validated = parsed.issues.filter(
-        (entry): entry is { body: string; branch: string; id: string; title: string } => {
+        (entry): entry is { branch: string; id: string; title: string } => {
           if (typeof entry !== "object" || entry === null) return false;
           const item = entry as Record<string, unknown>;
           if (typeof item.id !== "string" || !/^\d+$/.test(item.id)) return false;
@@ -180,11 +181,17 @@ export class GithubIssueSource implements TaskSource {
         },
       );
 
-      return validated.map((v) => ({
-        ...v,
-        body: issuesJson.find((i) => String(i.number) === v.id)?.body ?? "",
-        labels: issuesJson.find((i) => String(i.number) === v.id)?.labels ?? [],
-      }));
+      return validated
+        .map((v) => {
+          const source = issuesJson.find((i) => String(i.number) === v.id);
+          if (!source) return null;
+          return {
+            ...v,
+            body: source.body,
+            labels: source.labels,
+          };
+        })
+        .filter((v): v is NonNullable<typeof v> => v !== null);
     } catch {
       console.error("Planner produced invalid JSON. Retrying.");
       return null;

From 8e05a151711dd56917398e98b06ed3126fc4ed37 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 15:34:52 +0200
Subject: [PATCH 13/31] feat: state-of-the-art algorithmic improvements
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Budget: flat constant 50/round (ARCS/SWE-Agent pattern, replaces suboptimal decreasing schedule)
- Dedup: context-hash fingerprint of ±3 lines (CodeQL/Qodana pattern, drift-safe)
- Quality ratchet: rollback round on regression (findings increase → git reset --hard)
- Log: post-rebase validation failure now logs stderr
- Safety: rescue branch on push failure (preserves commits before sandbox disposal)
---
 .sandcastle/finalizer.ts       | 27 +++++++---
 .sandcastle/main.ts            |  4 +-
 .sandcastle/refinement-loop.ts | 95 ++++++++++++++++++++++++++++------
 .sandcastle/types.ts           |  4 +-
 4 files changed, 104 insertions(+), 26 deletions(-)

diff --git a/.sandcastle/finalizer.ts b/.sandcastle/finalizer.ts
index 97efd38..60527b9 100644
--- a/.sandcastle/finalizer.ts
+++ b/.sandcastle/finalizer.ts
@@ -3,7 +3,7 @@ import { execFileSync, execSync } from "node:child_process";
 
 import type { FinalizeResult, LoopResult, SandboxInstance, TaskSpec } from "./types.js";
 
-import { ITERATION_BUDGET, MAX_CRITIC_ROUNDS } from "./types.js";
+import { ITERATION_BUDGET_PER_ROUND, MAX_CRITIC_ROUNDS } from "./types.js";
 
 const VALIDATION_COMMAND =
   "npm run type-check && npm run test && npm run test:node && npm run test:edge && npm run prettier-check && npm run lint && npm run build && npm run check-build && npm run build:v2 && npm run check-build:v2";
@@ -36,7 +36,7 @@ export async function finalizeTask(
   }
 
   if (!validationPassed && loopResult.roundsCompleted < MAX_CRITIC_ROUNDS) {
-    const retryBudget = ITERATION_BUDGET[MAX_CRITIC_ROUNDS - 1] ?? 10;
+    const retryBudget = ITERATION_BUDGET_PER_ROUND;
     console.log(
       `  #${spec.id}: Retrying one more implement round (budget: ${String(retryBudget)})`,
     );
@@ -88,7 +88,14 @@ export async function finalizeTask(
           cwd,
           stdio: "pipe",
         });
-      } catch {
+      } catch (postRebaseErr: unknown) {
+        const postRebaseStderr =
+          postRebaseErr instanceof Error && "stderr" in postRebaseErr
+            ? String((postRebaseErr as { stderr: unknown }).stderr).slice(0, 500)
+            : "";
+        console.warn(
+          `  #${spec.id}: Post-rebase validation failed.${postRebaseStderr ? `\n${postRebaseStderr}` : ""}`,
+        );
         validationPassed = false;
       }
     }
@@ -111,9 +118,17 @@ export async function finalizeTask(
       execSync("git push --force-with-lease", { cwd, stdio: "pipe" });
     } catch (pushErr: unknown) {
       const pushMsg = pushErr instanceof Error ? pushErr.message : String(pushErr);
-      console.warn(
-        `  #${spec.id}: git push --force-with-lease failed (branch un-pushed, PR creation will fail gracefully): ${pushMsg}`,
-      );
+      try {
+        execSync(`git push origin HEAD:refs/heads/rescue/${spec.branch}-${String(Date.now())}`, {
+          cwd,
+          stdio: "pipe",
+        });
+        console.warn(`  #${spec.id}: Push failed. Commits preserved at rescue/${spec.branch}-...`);
+      } catch {
+        console.error(
+          `  #${spec.id}: Push failed and rescue failed. Commits will be lost on sandbox disposal: ${pushMsg}`,
+        );
+      }
     }
   }
 
diff --git a/.sandcastle/main.ts b/.sandcastle/main.ts
index 0b916fd..988d832 100644
--- a/.sandcastle/main.ts
+++ b/.sandcastle/main.ts
@@ -5,7 +5,7 @@ import { ConcurrencyPool } from "./concurrency-pool.js";
 import { finalizeTask } from "./finalizer.js";
 import { runRefinementLoop } from "./refinement-loop.js";
 import { GithubIssueSource } from "./task-source.js";
-import { ITERATION_BUDGET, MAX_CRITIC_ROUNDS } from "./types.js";
+import { ITERATION_BUDGET_PER_ROUND, MAX_CRITIC_ROUNDS } from "./types.js";
 
 const BRANCH_PREFIX = "agent/issue";
 const ISSUE_LABEL = "sandcastle";
@@ -39,7 +39,7 @@ const settled = await Promise.allSettled(
       });
 
       const loopResult = await runRefinementLoop(spec, sandbox, {
-        iterationBudget: ITERATION_BUDGET,
+        iterationBudget: ITERATION_BUDGET_PER_ROUND,
         maxRounds: MAX_CRITIC_ROUNDS,
       });
 
diff --git a/.sandcastle/refinement-loop.ts b/.sandcastle/refinement-loop.ts
index d3c3aaf..8b08221 100644
--- a/.sandcastle/refinement-loop.ts
+++ b/.sandcastle/refinement-loop.ts
@@ -1,14 +1,15 @@
 import * as sandcastle from "@ai-hero/sandcastle";
+import { execSync } from "node:child_process";
 import crypto from "node:crypto";
 
 import type { Finding, LoopResult, LoopStatus, SandboxInstance, TaskSpec } from "./types.js";
 
-import { FindingsSchema, ITERATION_BUDGET, MAX_CRITIC_ROUNDS } from "./types.js";
+import { FindingsSchema, ITERATION_BUDGET_PER_ROUND, MAX_CRITIC_ROUNDS } from "./types.js";
 
 /** Options for configuring the refinement loop. */
 export interface RefinementLoopOptions {
-  /** Budget of iterations per round (array indexed by round - 1). */
-  iterationBudget?: readonly number[];
+  /** Budget of iterations per round (flat constant applied to every round). */
+  iterationBudget?: number;
   /** Maximum number of implement↔critic rounds. */
   maxRounds?: number;
   /** Optional callback invoked after each round completes. */
@@ -28,23 +29,35 @@ export async function runRefinementLoop(
   opts?: RefinementLoopOptions,
 ): Promise<LoopResult> {
   const maxRounds = opts?.maxRounds ?? MAX_CRITIC_ROUNDS;
-  const iterationBudget = opts?.iterationBudget ?? ITERATION_BUDGET;
+  const budget = opts?.iterationBudget ?? ITERATION_BUDGET_PER_ROUND;
 
   const seenKeys = new Set<string>();
   let lastFindings: Finding[] = [];
   let status: LoopStatus = "exhausted";
   let totalCommits = 0;
   let roundsCompleted = 0;
+  let previousFindingsCount = Infinity;
 
   for (let round = 1; round <= maxRounds; round++) {
     roundsCompleted = round;
-    const budget = iterationBudget[round - 1] ?? 10;
     const findingsArg = lastFindings.length > 0 ? JSON.stringify(lastFindings, null, 2) : "";
 
     console.log(
       `  #${spec.id} round ${String(round)}/${String(maxRounds)} (budget: ${String(budget)})`,
     );
 
+    // Capture SHA before implementer runs (for quality ratchet rollback)
+    let beforeRoundSha = "";
+    try {
+      beforeRoundSha = execSync("git rev-parse HEAD", {
+        cwd: sandbox.worktreePath,
+        encoding: "utf-8",
+        stdio: ["pipe", "pipe", "pipe"],
+      }).trim();
+    } catch {
+      /* empty */
+    }
+
     // Implementer
     const impl = await sandbox.run({
       agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
@@ -83,18 +96,37 @@ export async function runRefinementLoop(
       break;
     }
 
-    // Dedup
+    // Dedup via context hash
+    const cwd = sandbox.worktreePath;
     const newFindings = findings.filter(
-      (f) => f.confidence !== "LOW" && !seenKeys.has(findingKey(f)),
+      (f) => f.confidence !== "LOW" && !seenKeys.has(findingKey(f, cwd)),
     );
     for (const f of newFindings) {
-      seenKeys.add(findingKey(f));
+      seenKeys.add(findingKey(f, cwd));
     }
 
     console.log(
       `  #${spec.id}: ${String(findings.length)} findings, ${String(newFindings.length)} new`,
     );
 
+    // Quality ratchet: rollback if findings increased (regression)
+    if (round > 1 && findings.length > previousFindingsCount) {
+      try {
+        execSync(`git reset --hard ${beforeRoundSha}`, {
+          cwd: sandbox.worktreePath,
+          stdio: "pipe",
+        });
+        console.warn(
+          `  #${spec.id} R${String(round)}: Regression detected (${String(previousFindingsCount)} → ${String(findings.length)}). Rolled back.`,
+        );
+      } catch {
+        /* empty */
+      }
+      status = "exhausted";
+      break;
+    }
+    previousFindingsCount = findings.length;
+
     opts?.onRoundComplete?.(round, findings);
 
     if (newFindings.length === 0) {
@@ -115,17 +147,48 @@ export async function runRefinementLoop(
 }
 
 /**
- * Computes a deduplication key for a finding.
+ * Computes a deduplication key for a finding using a context hash of surrounding lines.
  * @param f - Finding to compute a key for.
+ * @param cwd - Working directory (worktree path) for reading file context.
  * @returns Composite dedup key.
  */
-function findingKey(f: Finding): string {
-  const normalizedTitle = f.title
-    .toLowerCase()
-    .replace(/[^\w\s]/g, "")
-    .replace(/\s+/g, " ")
-    .trim();
-  return `${f.file}::${f.category}::${normalizedTitle}`;
+function findingKey(f: Finding, cwd: string): string {
+  if (!f.file || f.line == null) {
+    const truncTitle = f.title
+      .toLowerCase()
+      .slice(0, 50)
+      .replace(/[^\w\s]/g, "")
+      .replace(/\s+/g, " ")
+      .trim();
+    return `${f.file || "global"}::${f.category}::${truncTitle}`;
+  }
+  const contextHash = hashContextLines(cwd, f.file, f.line, 3);
+  return `${f.file}::${f.category}::${contextHash}`;
+}
+
+/**
+ * Hashes ±radius lines around the given line in a file for dedup stability.
+ * @param cwd - Working directory.
+ * @param file - Relative file path.
+ * @param line - Line number of the finding.
+ * @param radius - Number of context lines above and below.
+ * @returns Truncated SHA-256 hex digest.
+ */
+function hashContextLines(cwd: string, file: string, line: number, radius: number): string {
+  try {
+    const start = Math.max(1, line - radius);
+    const end = line + radius;
+    const content = execSync(`sed -n '${String(start)},${String(end)}p' "${file}"`, {
+      cwd,
+      encoding: "utf-8",
+      stdio: ["pipe", "pipe", "pipe"],
+    });
+    const normalized = content.replace(/\s+/g, " ").trim();
+    return crypto.createHash("sha256").update(normalized).digest("hex").slice(0, 16);
+  } catch {
+    const truncTitle = file.slice(0, 30) + String(line);
+    return crypto.createHash("sha256").update(truncTitle).digest("hex").slice(0, 16);
+  }
 }
 
 /**
diff --git a/.sandcastle/types.ts b/.sandcastle/types.ts
index 849df1f..efea8f8 100644
--- a/.sandcastle/types.ts
+++ b/.sandcastle/types.ts
@@ -51,8 +51,8 @@ export type SandboxInstance = Awaited<ReturnType<typeof sandcastle.createSandbox
 /** Maximum implement↔critic rounds before giving up. */
 export const MAX_CRITIC_ROUNDS = 5;
 
-/** Token budget per round (decreasing). Index = round - 1. */
-export const ITERATION_BUDGET = [100, 50, 25, 10, 10] as const;
+/** Token budget applied uniformly to every implement round. */
+export const ITERATION_BUDGET_PER_ROUND = 50;
 
 /** Specification for a task to be implemented. */
 export interface TaskSpec {

From 100838c2596ddf94732525fbde441ea799e261dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 16:08:35 +0200
Subject: [PATCH 14/31] fix(sandcastle): resolve all algorithmic audit findings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Replace execSync+sed shell injection with readFileSync+join in hashContextLines (RCE, P0)
- Fix quality ratchet to compare nonLowFindings.length not findings.length (logic bug, P1)
- Replace title truncation with SHA-256 hash in findingKey (collision fix, P2)
- Hash single target line + file salt instead of positional context window (line-drift fix, P2)
- Replace overlapping regex quantifiers in sanitizeForPrompt (ReDoS O(n²), P2)
- Replace Array FIFO queue with O(1) singly-linked list in ConcurrencyPool (P3)
- Move totalCommits accumulation after ratchet check to fix phantom counting (P3)
- Replace O(T×I) .find() with O(1) Map lookup in validatePlan (P3)
---
 .sandcastle/concurrency-pool.ts | 29 +++++++++++++++++++----
 .sandcastle/refinement-loop.ts  | 41 ++++++++++++++++++---------------
 .sandcastle/task-source.ts      | 25 ++++++++++----------
 3 files changed, 59 insertions(+), 36 deletions(-)

diff --git a/.sandcastle/concurrency-pool.ts b/.sandcastle/concurrency-pool.ts
index 5809e53..7ce4be2 100644
--- a/.sandcastle/concurrency-pool.ts
+++ b/.sandcastle/concurrency-pool.ts
@@ -1,8 +1,16 @@
+/** Internal node for the O(1) FIFO waiting queue. Not exported. */
+interface QueueNode {
+  resolve: () => void;
+  next: QueueNode | null;
+}
+
 /**
  * A concurrency limiter that restricts parallel execution to a maximum number of tasks.
+ * Queue operations are O(1) amortized (singly-linked list).
  */
 export class ConcurrencyPool {
-  private readonly queue: (() => void)[] = [];
+  private head: QueueNode | null = null;
+  private tail: QueueNode | null = null;
   private running = 0;
 
   /**
@@ -34,16 +42,27 @@ export class ConcurrencyPool {
       return Promise.resolve();
     }
     return new Promise<void>((resolve) => {
-      this.queue.push(resolve);
+      const node: QueueNode = { resolve, next: null };
+      if (this.tail === null) {
+        this.head = node;
+        this.tail = node;
+      } else {
+        this.tail.next = node;
+        this.tail = node;
+      }
     });
   }
 
   private release(): void {
     this.running--;
-    const next = this.queue.shift();
-    if (next) {
+    const next = this.head;
+    if (next !== null) {
+      this.head = next.next;
+      if (this.head === null) {
+        this.tail = null;
+      }
       this.running++;
-      next();
+      next.resolve();
     }
   }
 }
diff --git a/.sandcastle/refinement-loop.ts b/.sandcastle/refinement-loop.ts
index 8b08221..918170f 100644
--- a/.sandcastle/refinement-loop.ts
+++ b/.sandcastle/refinement-loop.ts
@@ -1,6 +1,8 @@
 import * as sandcastle from "@ai-hero/sandcastle";
 import { execSync } from "node:child_process";
 import crypto from "node:crypto";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
 
 import type { Finding, LoopResult, LoopStatus, SandboxInstance, TaskSpec } from "./types.js";
 
@@ -73,8 +75,6 @@ export async function runRefinementLoop(
       promptFile: "./.sandcastle/implement-prompt.md",
     });
 
-    totalCommits += impl.commits.length;
-
     if (round === 1 && impl.commits.length === 0) {
       console.warn(`  #${spec.id}: 0 commits on round 1. Skipping.`);
       status = "skipped";
@@ -110,7 +110,8 @@ export async function runRefinementLoop(
     );
 
     // Quality ratchet: rollback if findings increased (regression)
-    if (round > 1 && findings.length > previousFindingsCount) {
+    const nonLowFindings = findings.filter((f) => f.confidence !== "LOW");
+    if (round > 1 && nonLowFindings.length > previousFindingsCount) {
       try {
         execSync(`git reset --hard ${beforeRoundSha}`, {
           cwd: sandbox.worktreePath,
@@ -125,7 +126,8 @@ export async function runRefinementLoop(
       status = "exhausted";
       break;
     }
-    previousFindingsCount = findings.length;
+    totalCommits += impl.commits.length;
+    previousFindingsCount = nonLowFindings.length;
 
     opts?.onRoundComplete?.(round, findings);
 
@@ -154,37 +156,38 @@ export async function runRefinementLoop(
  */
 function findingKey(f: Finding, cwd: string): string {
   if (!f.file || f.line == null) {
-    const truncTitle = f.title
+    const normalizedTitle = f.title
       .toLowerCase()
-      .slice(0, 50)
       .replace(/[^\w\s]/g, "")
       .replace(/\s+/g, " ")
       .trim();
-    return `${f.file || "global"}::${f.category}::${truncTitle}`;
+    const titleHash = crypto.createHash("sha256").update(normalizedTitle).digest("hex").slice(0, 16);
+    return `${f.file || "global"}::${f.category}::${titleHash}`;
   }
   const contextHash = hashContextLines(cwd, f.file, f.line, 3);
   return `${f.file}::${f.category}::${contextHash}`;
 }
 
 /**
- * Hashes ±radius lines around the given line in a file for dedup stability.
+ * Hashes the target line content for dedup stability.
  * @param cwd - Working directory.
  * @param file - Relative file path.
  * @param line - Line number of the finding.
- * @param radius - Number of context lines above and below.
+ * @param _radius - Unused; kept for call-site compatibility.
  * @returns Truncated SHA-256 hex digest.
  */
-function hashContextLines(cwd: string, file: string, line: number, radius: number): string {
+function hashContextLines(cwd: string, file: string, line: number, _radius: number): string {
   try {
-    const start = Math.max(1, line - radius);
-    const end = line + radius;
-    const content = execSync(`sed -n '${String(start)},${String(end)}p' "${file}"`, {
-      cwd,
-      encoding: "utf-8",
-      stdio: ["pipe", "pipe", "pipe"],
-    });
-    const normalized = content.replace(/\s+/g, " ").trim();
-    return crypto.createHash("sha256").update(normalized).digest("hex").slice(0, 16);
+    const fullPath = join(cwd, file);
+    if (!fullPath.startsWith(cwd)) {
+      throw new Error("Path traversal");
+    }
+    const raw = readFileSync(fullPath, "utf-8");
+    const lines = raw.split("\n");
+    const targetLine = lines[Math.max(0, line - 1)] ?? "";
+    const normalized = targetLine.replace(/\s+/g, " ").trim();
+    // Include file path as salt to prevent cross-file hash collisions
+    return crypto.createHash("sha256").update(`${file}:${normalized}`).digest("hex").slice(0, 16);
   } catch {
     const truncTitle = file.slice(0, 30) + String(line);
     return crypto.createHash("sha256").update(truncTitle).digest("hex").slice(0, 16);
diff --git a/.sandcastle/task-source.ts b/.sandcastle/task-source.ts
index 8e67b8b..42d27bb 100644
--- a/.sandcastle/task-source.ts
+++ b/.sandcastle/task-source.ts
@@ -181,17 +181,18 @@ export class GithubIssueSource implements TaskSource {
         },
       );
 
-      return validated
-        .map((v) => {
-          const source = issuesJson.find((i) => String(i.number) === v.id);
-          if (!source) return null;
-          return {
-            ...v,
-            body: source.body,
-            labels: source.labels,
-          };
-        })
-        .filter((v): v is NonNullable<typeof v> => v !== null);
+    const issueMap = new Map(issuesJson.map((i) => [String(i.number), i]));
+    return validated
+      .map((v) => {
+        const source = issueMap.get(v.id);
+        if (!source) return null;
+        return {
+          ...v,
+          body: source.body,
+          labels: source.labels,
+        };
+      })
+      .filter((v): v is NonNullable<typeof v> => v !== null);
     } catch {
       console.error("Planner produced invalid JSON. Retrying.");
       return null;
@@ -205,5 +206,5 @@ export class GithubIssueSource implements TaskSource {
  * @returns Sanitized text safe for prompt injection.
  */
 function sanitizeForPrompt(text: string): string {
-  return text.replace(/<\/?(?:plan|findings[\w-]*|promise)[^>]*>/gi, "");
+  return text.replace(/<\/?(?:plan|findings|promise)[^>]*>/gi, "");
 }

From badfc9231d089499d8258666fc7f85a9c6d8d3ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 16:34:48 +0200
Subject: [PATCH 15/31] fix(sandcastle): harden subprocess calls and reduce
 cyclomatic complexity
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Replace all execSync template literals with execFileSync (array args)
  to eliminate shell injection vectors from LLM-controlled data
- Strengthen path traversal guard: resolve() + sep instead of join()
- Validate SHA format (/^[0-9a-f]{40}$/) before git reset --hard
- Decompose finalizeTask (CC 15→5) into runValidation, attemptRebase,
  pushBranch, buildPrArgs, extractStderr
- Decompose runRefinementLoop (CC 18→8) into executeRound,
  deduplicateFindings, checkQualityRatchet
- Replace process.exit(0) with natural script termination
- Add title validation (length ≤200, no control chars) in validatePlan
- Add console.warn in previously silent catch blocks
---
 .sandcastle/concurrency-pool.ts |   8 +-
 .sandcastle/finalizer.ts        | 201 +++++++++++++++++++-----------
 .sandcastle/main.ts             |  84 ++++++-------
 .sandcastle/refinement-loop.ts  | 213 ++++++++++++++++++++++----------
 .sandcastle/task-source.ts      |  28 +++--
 5 files changed, 340 insertions(+), 194 deletions(-)

diff --git a/.sandcastle/concurrency-pool.ts b/.sandcastle/concurrency-pool.ts
index 7ce4be2..e023a43 100644
--- a/.sandcastle/concurrency-pool.ts
+++ b/.sandcastle/concurrency-pool.ts
@@ -1,7 +1,7 @@
 /** Internal node for the O(1) FIFO waiting queue. Not exported. */
 interface QueueNode {
+  next: null | QueueNode;
   resolve: () => void;
-  next: QueueNode | null;
 }
 
 /**
@@ -9,9 +9,9 @@ interface QueueNode {
  * Queue operations are O(1) amortized (singly-linked list).
  */
 export class ConcurrencyPool {
-  private head: QueueNode | null = null;
-  private tail: QueueNode | null = null;
+  private head: null | QueueNode = null;
   private running = 0;
+  private tail: null | QueueNode = null;
 
   /**
    * @param max - Maximum number of concurrent tasks. Must be >= 1.
@@ -42,7 +42,7 @@ export class ConcurrencyPool {
       return Promise.resolve();
     }
     return new Promise<void>((resolve) => {
-      const node: QueueNode = { resolve, next: null };
+      const node: QueueNode = { next: null, resolve };
       if (this.tail === null) {
         this.head = node;
         this.tail = node;
diff --git a/.sandcastle/finalizer.ts b/.sandcastle/finalizer.ts
index 60527b9..e1fedff 100644
--- a/.sandcastle/finalizer.ts
+++ b/.sandcastle/finalizer.ts
@@ -1,5 +1,5 @@
 import * as sandcastle from "@ai-hero/sandcastle";
-import { execFileSync, execSync } from "node:child_process";
+import { execFileSync } from "node:child_process";
 
 import type { FinalizeResult, LoopResult, SandboxInstance, TaskSpec } from "./types.js";
 
@@ -22,19 +22,9 @@ export async function finalizeTask(
   sandbox: SandboxInstance,
   cwd: string,
 ): Promise<FinalizeResult> {
-  let validationPassed = false;
-
-  try {
-    execSync(VALIDATION_COMMAND, { cwd, stdio: "pipe" });
-    validationPassed = true;
-  } catch (err: unknown) {
-    const stderr =
-      err instanceof Error && "stderr" in err
-        ? String((err as { stderr: unknown }).stderr).slice(0, 500)
-        : "";
-    console.warn(`  #${spec.id}: Validation failed.${stderr ? `\n${stderr}` : ""}`);
-  }
+  let validationPassed = runValidation(cwd, spec);
 
+  // Retry one more round if validation failed and budget remains
   if (!validationPassed && loopResult.roundsCompleted < MAX_CRITIC_ROUNDS) {
     const retryBudget = ITERATION_BUDGET_PER_ROUND;
     console.log(
@@ -66,7 +56,7 @@ export async function finalizeTask(
     }
 
     try {
-      execSync(VALIDATION_COMMAND, { cwd, stdio: "pipe" });
+      execFileSync("sh", ["-c", VALIDATION_COMMAND], { cwd, stdio: "pipe" });
       validationPassed = true;
       console.log(`  #${spec.id}: Validation passed after retry round.`);
     } catch {
@@ -75,71 +65,80 @@ export async function finalizeTask(
   }
 
   // Rebase on latest main
-  let rebaseSucceeded = false;
-  try {
-    execSync("git fetch origin main && git rebase origin/main", {
-      cwd,
-      stdio: "pipe",
-    });
-    rebaseSucceeded = true;
-    if (validationPassed) {
-      try {
-        execSync(VALIDATION_COMMAND, {
-          cwd,
-          stdio: "pipe",
-        });
-      } catch (postRebaseErr: unknown) {
-        const postRebaseStderr =
-          postRebaseErr instanceof Error && "stderr" in postRebaseErr
-            ? String((postRebaseErr as { stderr: unknown }).stderr).slice(0, 500)
-            : "";
-        console.warn(
-          `  #${spec.id}: Post-rebase validation failed.${postRebaseStderr ? `\n${postRebaseStderr}` : ""}`,
-        );
-        validationPassed = false;
-      }
-    }
-  } catch {
-    try {
-      execSync("git rebase --abort", { cwd, stdio: "pipe" });
-    } catch {
-      /* empty */
-    }
+  const rebaseSucceeded = attemptRebase(cwd, spec);
+  if (rebaseSucceeded && validationPassed) {
     try {
-      execSync("git push", { cwd, stdio: "pipe" });
-    } catch (pushErr: unknown) {
-      const pushMsg = pushErr instanceof Error ? pushErr.message : String(pushErr);
-      console.warn(`  #${spec.id}: git push failed after rebase abort: ${pushMsg}`);
+      execFileSync("sh", ["-c", VALIDATION_COMMAND], { cwd, stdio: "pipe" });
+    } catch (postRebaseErr: unknown) {
+      const postRebaseStderr = extractStderr(postRebaseErr);
+      console.warn(
+        `  #${spec.id}: Post-rebase validation failed.${postRebaseStderr ? `\n${postRebaseStderr}` : ""}`,
+      );
+      validationPassed = false;
     }
   }
 
-  if (rebaseSucceeded) {
+  // Push
+  pushBranch(cwd, spec, rebaseSucceeded);
+
+  // Build PR arguments and create PR
+  const { isDraft, prArgs } = buildPrArgs(spec, loopResult, validationPassed);
+
+  let prCreated = false;
+  try {
+    execFileSync("gh", prArgs, { cwd, encoding: "utf-8", stdio: "pipe" });
+    console.log(`  #${spec.id}: PR created${isDraft ? " (draft)" : ""}.`);
+    prCreated = true;
+  } catch (err: unknown) {
+    const msg = err instanceof Error ? err.message : String(err);
+    console.error(`  #${spec.id}: PR creation failed: ${msg}`);
+  }
+
+  return { isDraft, prCreated, validationPassed };
+}
+
+/**
+ * Fetches origin/main and rebases the current branch onto it.
+ * On failure, aborts the rebase cleanly.
+ * @param cwd - Working directory (worktree path).
+ * @param _spec - Task specification (unused, reserved for future logging).
+ * @returns `true` if rebase succeeded, `false` otherwise.
+ */
+function attemptRebase(cwd: string, _spec: TaskSpec): boolean {
+  try {
+    execFileSync("git", ["fetch", "origin", "main"], { cwd, stdio: "pipe" });
+    execFileSync("git", ["rebase", "origin/main"], { cwd, stdio: "pipe" });
+    return true;
+  } catch {
     try {
-      execSync("git push --force-with-lease", { cwd, stdio: "pipe" });
-    } catch (pushErr: unknown) {
-      const pushMsg = pushErr instanceof Error ? pushErr.message : String(pushErr);
-      try {
-        execSync(`git push origin HEAD:refs/heads/rescue/${spec.branch}-${String(Date.now())}`, {
-          cwd,
-          stdio: "pipe",
-        });
-        console.warn(`  #${spec.id}: Push failed. Commits preserved at rescue/${spec.branch}-...`);
-      } catch {
-        console.error(
-          `  #${spec.id}: Push failed and rescue failed. Commits will be lost on sandbox disposal: ${pushMsg}`,
-        );
-      }
+      execFileSync("git", ["rebase", "--abort"], { cwd, stdio: "pipe" });
+    } catch {
+      /* empty */
     }
+    return false;
   }
+}
 
+/**
+ * Builds the PR title, body, and `gh pr create` argument list.
+ * @param spec - The task specification.
+ * @param loopResult - The result from the refinement loop.
+ * @param validationPassed - Whether the validation suite passed.
+ * @returns Object with `isDraft` flag and `prArgs` string array.
+ */
+function buildPrArgs(
+  spec: TaskSpec,
+  loopResult: LoopResult,
+  validationPassed: boolean,
+): { isDraft: boolean; prArgs: string[] } {
   const converged = loopResult.status === "converged";
   const isDraft = !converged || !validationPassed;
   const outstandingNote =
     !converged && loopResult.lastFindings.length > 0
-      ? `\n\n⚠️ Outstanding findings:\n${loopResult.lastFindings.map((f) => `- [${f.severity}] ${f.file}: ${f.title}`).join("\n")}`
+      ? `\n\n\u26a0\ufe0f Outstanding findings:\n${loopResult.lastFindings.map((f) => `- [${f.severity}] ${f.file}: ${f.title}`).join("\n")}`
       : "";
   const validationNote = !validationPassed
-    ? "\n\n⚠️ Validation did not pass. Manual review required."
+    ? "\n\n\u26a0\ufe0f Validation did not pass. Manual review required."
     : "";
 
   const validationCheck = validationPassed ? "- [x]" : "- [ ]";
@@ -148,7 +147,7 @@ export async function finalizeTask(
     : spec.labels.includes("bug")
       ? "fix"
       : "chore";
-  const prTitle = `${commitPrefix}: resolve #${spec.id} — ${spec.title}`;
+  const prTitle = `${commitPrefix}: resolve #${spec.id} \u2014 ${spec.title}`;
   const typeOfChange =
     commitPrefix === "feat"
       ? "New feature (non-breaking change that adds functionality)"
@@ -171,15 +170,69 @@ export async function finalizeTask(
     prBody,
   ];
 
-  let prCreated = false;
+  return { isDraft, prArgs };
+}
+
+/**
+ * Extracts stderr from a caught error, truncated to 500 chars.
+ * @param err - The caught error value.
+ * @returns Stderr string or empty string if unavailable.
+ */
+function extractStderr(err: unknown): string {
+  return err instanceof Error && "stderr" in err
+    ? String((err as { stderr: unknown }).stderr).slice(0, 500)
+    : "";
+}
+
+/**
+ * Pushes the branch to origin. When rebase succeeded, uses force-with-lease
+ * with a rescue-branch fallback. When rebase was aborted, does a plain push.
+ * @param cwd - Working directory (worktree path).
+ * @param spec - The task specification.
+ * @param rebaseSucceeded - Whether the preceding rebase completed successfully.
+ */
+function pushBranch(cwd: string, spec: TaskSpec, rebaseSucceeded: boolean): void {
+  if (rebaseSucceeded) {
+    try {
+      execFileSync("git", ["push", "--force-with-lease"], { cwd, stdio: "pipe" });
+    } catch (pushErr: unknown) {
+      const pushMsg = pushErr instanceof Error ? pushErr.message : String(pushErr);
+      try {
+        execFileSync(
+          "git",
+          ["push", "origin", `HEAD:refs/heads/rescue/${spec.branch}-${String(Date.now())}`],
+          { cwd, stdio: "pipe" },
+        );
+        console.warn(`  #${spec.id}: Push failed. Commits preserved at rescue/${spec.branch}-...`);
+      } catch {
+        console.error(
+          `  #${spec.id}: Push failed and rescue failed. Commits will be lost on sandbox disposal: ${pushMsg}`,
+        );
+      }
+    }
+  } else {
+    try {
+      execFileSync("git", ["push"], { cwd, stdio: "pipe" });
+    } catch (pushErr: unknown) {
+      const pushMsg = pushErr instanceof Error ? pushErr.message : String(pushErr);
+      console.warn(`  #${spec.id}: git push failed after rebase abort: ${pushMsg}`);
+    }
+  }
+}
+
+/**
+ * Runs the full validation suite.
+ * @param cwd - Working directory (worktree path).
+ * @param spec - The task specification (used for logging).
+ * @returns `true` if validation passed, `false` otherwise.
+ */
+function runValidation(cwd: string, spec: TaskSpec): boolean {
   try {
-    execFileSync("gh", prArgs, { cwd, encoding: "utf-8", stdio: "pipe" });
-    console.log(`  #${spec.id}: PR created${isDraft ? " (draft)" : ""}.`);
-    prCreated = true;
+    execFileSync("sh", ["-c", VALIDATION_COMMAND], { cwd, stdio: "pipe" });
+    return true;
   } catch (err: unknown) {
-    const msg = err instanceof Error ? err.message : String(err);
-    console.error(`  #${spec.id}: PR creation failed: ${msg}`);
+    const stderr = extractStderr(err);
+    console.warn(`  #${spec.id}: Validation failed.${stderr ? `\n${stderr}` : ""}`);
+    return false;
   }
-
-  return { isDraft, prCreated, validationPassed };
 }
diff --git a/.sandcastle/main.ts b/.sandcastle/main.ts
index 988d832..98bf998 100644
--- a/.sandcastle/main.ts
+++ b/.sandcastle/main.ts
@@ -21,55 +21,55 @@ const source = new GithubIssueSource({
 const tasks = await source.discover();
 
 if (tasks.length === 0) {
-  process.exit(0);
-}
-
-const pool = new ConcurrencyPool(MAX_PARALLEL);
+  console.log("No tasks to process.");
+} else {
+  const pool = new ConcurrencyPool(MAX_PARALLEL);
 
-const settled = await Promise.allSettled(
-  tasks.map((spec) =>
-    pool.run(async () => {
-      await using sandbox = await sandcastle.createSandbox({
-        branch: spec.branch,
-        copyToWorktree: ["node_modules"],
-        hooks: {
-          sandbox: { onSandboxReady: [{ command: "npm install && npm run build" }] },
-        },
-        sandbox: docker({ imageName: DOCKER_IMAGE }),
-      });
+  const settled = await Promise.allSettled(
+    tasks.map((spec) =>
+      pool.run(async () => {
+        await using sandbox = await sandcastle.createSandbox({
+          branch: spec.branch,
+          copyToWorktree: ["node_modules"],
+          hooks: {
+            sandbox: { onSandboxReady: [{ command: "npm install && npm run build" }] },
+          },
+          sandbox: docker({ imageName: DOCKER_IMAGE }),
+        });
 
-      const loopResult = await runRefinementLoop(spec, sandbox, {
-        iterationBudget: ITERATION_BUDGET_PER_ROUND,
-        maxRounds: MAX_CRITIC_ROUNDS,
-      });
+        const loopResult = await runRefinementLoop(spec, sandbox, {
+          iterationBudget: ITERATION_BUDGET_PER_ROUND,
+          maxRounds: MAX_CRITIC_ROUNDS,
+        });
 
-      let prCreated = false;
-      if (loopResult.totalCommits > 0) {
-        const cwd = sandbox.worktreePath;
-        const result = await finalizeTask(spec, loopResult, sandbox, cwd);
-        prCreated = result.prCreated;
-      }
+        let prCreated = false;
+        if (loopResult.totalCommits > 0) {
+          const cwd = sandbox.worktreePath;
+          const result = await finalizeTask(spec, loopResult, sandbox, cwd);
+          prCreated = result.prCreated;
+        }
 
-      return { prCreated, spec };
-    }),
-  ),
-);
+        return { prCreated, spec };
+      }),
+    ),
+  );
 
-const workCompleted = settled.some(
-  (outcome) => outcome.status === "fulfilled" && outcome.value.prCreated,
-);
+  const workCompleted = settled.some(
+    (outcome) => outcome.status === "fulfilled" && outcome.value.prCreated,
+  );
 
-for (const [i, outcome] of settled.entries()) {
-  if (outcome.status === "rejected") {
-    const spec = tasks[i];
-    const reason: unknown = outcome.reason;
-    const msg = reason instanceof Error ? (reason.stack ?? reason.message) : String(reason);
-    console.error(`  ✗ #${spec?.id ?? String(i)} failed: ${msg}`);
+  for (const [i, outcome] of settled.entries()) {
+    if (outcome.status === "rejected") {
+      const spec = tasks[i];
+      const reason: unknown = outcome.reason;
+      const msg = reason instanceof Error ? (reason.stack ?? reason.message) : String(reason);
+      console.error(`  ✗ #${spec?.id ?? String(i)} failed: ${msg}`);
+    }
   }
-}
 
-console.log("\nAll done.");
+  console.log("\nAll done.");
 
-if (!workCompleted) {
-  process.exitCode = 1;
+  if (!workCompleted) {
+    process.exitCode = 1;
+  }
 }
diff --git a/.sandcastle/refinement-loop.ts b/.sandcastle/refinement-loop.ts
index 918170f..baa06a9 100644
--- a/.sandcastle/refinement-loop.ts
+++ b/.sandcastle/refinement-loop.ts
@@ -1,8 +1,8 @@
 import * as sandcastle from "@ai-hero/sandcastle";
-import { execSync } from "node:child_process";
+import { execFileSync } from "node:child_process";
 import crypto from "node:crypto";
 import { readFileSync } from "node:fs";
-import { join } from "node:path";
+import { resolve, sep } from "node:path";
 
 import type { Finding, LoopResult, LoopStatus, SandboxInstance, TaskSpec } from "./types.js";
 
@@ -18,6 +18,16 @@ export interface RefinementLoopOptions {
   onRoundComplete?: (round: number, findings: Finding[]) => void;
 }
 
+/** Result of a single implement↔critic round. */
+interface RoundResult {
+  /** SHA of HEAD before the implementer ran. */
+  beforeSha: string;
+  /** Number of commits made by the implementer. */
+  commits: number;
+  /** Parsed findings from the critic, or null on critic failure. */
+  findings: Finding[] | null;
+}
+
 /**
  * Runs the implement↔critic refinement loop for a given task.
  * @param spec - The task specification.
@@ -42,55 +52,25 @@ export async function runRefinementLoop(
 
   for (let round = 1; round <= maxRounds; round++) {
     roundsCompleted = round;
-    const findingsArg = lastFindings.length > 0 ? JSON.stringify(lastFindings, null, 2) : "";
 
     console.log(
       `  #${spec.id} round ${String(round)}/${String(maxRounds)} (budget: ${String(budget)})`,
     );
 
-    // Capture SHA before implementer runs (for quality ratchet rollback)
-    let beforeRoundSha = "";
-    try {
-      beforeRoundSha = execSync("git rev-parse HEAD", {
-        cwd: sandbox.worktreePath,
-        encoding: "utf-8",
-        stdio: ["pipe", "pipe", "pipe"],
-      }).trim();
-    } catch {
-      /* empty */
-    }
+    const result = await executeRound(spec, sandbox, round, budget, lastFindings);
 
-    // Implementer
-    const impl = await sandbox.run({
-      agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
-      maxIterations: budget,
-      name: `Implementer #${spec.id} R${String(round)}`,
-      promptArgs: {
-        BRANCH: spec.branch,
-        FINDINGS: findingsArg,
-        ISSUE_BODY: spec.body,
-        ISSUE_TITLE: spec.title,
-        TASK_ID: spec.id,
-      },
-      promptFile: "./.sandcastle/implement-prompt.md",
-    });
-
-    if (round === 1 && impl.commits.length === 0) {
+    if (round === 1 && result.commits === 0) {
       console.warn(`  #${spec.id}: 0 commits on round 1. Skipping.`);
       status = "skipped";
       break;
     }
 
-    if (round > 1 && impl.commits.length === 0) {
+    if (round > 1 && result.commits === 0) {
       status = "exhausted";
       break;
     }
 
-    // Critic
-    const nonce = crypto.randomBytes(4).toString("hex");
-    const findings = await runCritic(sandbox, spec, round, nonce);
-
-    if (findings === null) {
+    if (result.findings === null) {
       console.warn(`  #${spec.id}: Critic failed twice. Breaking (non-converged).`);
       status = "failed";
       break;
@@ -98,41 +78,34 @@ export async function runRefinementLoop(
 
     // Dedup via context hash
     const cwd = sandbox.worktreePath;
-    const newFindings = findings.filter(
-      (f) => f.confidence !== "LOW" && !seenKeys.has(findingKey(f, cwd)),
-    );
-    for (const f of newFindings) {
-      seenKeys.add(findingKey(f, cwd));
-    }
+    const newFindings = deduplicateFindings(result.findings, seenKeys, cwd);
 
     console.log(
-      `  #${spec.id}: ${String(findings.length)} findings, ${String(newFindings.length)} new`,
+      `  #${spec.id}: ${String(result.findings.length)} findings, ${String(newFindings.length)} new`,
     );
 
     // Quality ratchet: rollback if findings increased (regression)
-    const nonLowFindings = findings.filter((f) => f.confidence !== "LOW");
-    if (round > 1 && nonLowFindings.length > previousFindingsCount) {
-      try {
-        execSync(`git reset --hard ${beforeRoundSha}`, {
-          cwd: sandbox.worktreePath,
-          stdio: "pipe",
-        });
-        console.warn(
-          `  #${spec.id} R${String(round)}: Regression detected (${String(previousFindingsCount)} → ${String(findings.length)}). Rolled back.`,
-        );
-      } catch {
-        /* empty */
-      }
+    const nonLowFindings = result.findings.filter((f) => f.confidence !== "LOW");
+    if (
+      checkQualityRatchet(
+        spec,
+        round,
+        nonLowFindings.length,
+        previousFindingsCount,
+        result.beforeSha,
+        cwd,
+      )
+    ) {
       status = "exhausted";
       break;
     }
-    totalCommits += impl.commits.length;
+    totalCommits += result.commits;
     previousFindingsCount = nonLowFindings.length;
 
-    opts?.onRoundComplete?.(round, findings);
+    opts?.onRoundComplete?.(round, result.findings);
 
     if (newFindings.length === 0) {
-      const nonLowFindings = findings.filter((f) => f.confidence !== "LOW");
+      const nonLowFindings = result.findings.filter((f) => f.confidence !== "LOW");
       if (nonLowFindings.length > 0) {
         lastFindings = nonLowFindings;
         status = "exhausted";
@@ -148,6 +121,118 @@ export async function runRefinementLoop(
   return { lastFindings, roundsCompleted, status, totalCommits };
 }
 
+/**
+ * Checks whether findings regressed compared to the previous round and rolls back if so.
+ * @param spec - The task specification.
+ * @param round - Current round number.
+ * @param findingsCount - Number of non-LOW findings this round.
+ * @param previousCount - Number of non-LOW findings from the previous round.
+ * @param beforeSha - SHA to reset to on regression.
+ * @param cwd - Working directory for git operations.
+ * @returns True if a regression was detected and rollback performed.
+ */
+function checkQualityRatchet(
+  spec: TaskSpec,
+  round: number,
+  findingsCount: number,
+  previousCount: number,
+  beforeSha: string,
+  cwd: string,
+): boolean {
+  if (round <= 1 || findingsCount <= previousCount) {
+    return false;
+  }
+
+  // Validate SHA format before passing to execFileSync
+  if (!/^[0-9a-f]{40}$/.test(beforeSha)) {
+    console.warn(`  #${spec.id}: Invalid SHA for rollback, skipping reset.`);
+    return true;
+  }
+
+  try {
+    execFileSync("git", ["reset", "--hard", beforeSha], {
+      cwd,
+      stdio: "pipe",
+    });
+    console.warn(
+      `  #${spec.id} R${String(round)}: Regression detected (${String(previousCount)} → ${String(findingsCount)}). Rolled back.`,
+    );
+  } catch {
+    console.warn(`  #${spec.id}: Failed to reset to ${beforeSha} after regression.`);
+  }
+
+  return true;
+}
+
+/**
+ * Filters findings by confidence and deduplicates against previously seen keys.
+ * @param findings - Raw findings from the critic.
+ * @param seenKeys - Set of previously seen dedup keys (mutated: new keys are added).
+ * @param cwd - Working directory for context hashing.
+ * @returns Array of new, non-LOW-confidence findings.
+ */
+function deduplicateFindings(findings: Finding[], seenKeys: Set<string>, cwd: string): Finding[] {
+  const newFindings = findings.filter(
+    (f) => f.confidence !== "LOW" && !seenKeys.has(findingKey(f, cwd)),
+  );
+  for (const f of newFindings) {
+    seenKeys.add(findingKey(f, cwd));
+  }
+  return newFindings;
+}
+
+/**
+ * Executes a single implement↔critic round.
+ * @param spec - The task specification.
+ * @param sandbox - The sandcastle sandbox instance.
+ * @param round - Current round number (1-indexed).
+ * @param budget - Iteration budget for the implementer.
+ * @param lastFindings - Findings from the previous round to feed to the implementer.
+ * @returns The round result containing commits, findings, and the pre-round SHA.
+ */
+async function executeRound(
+  spec: TaskSpec,
+  sandbox: SandboxInstance,
+  round: number,
+  budget: number,
+  lastFindings: Finding[],
+): Promise<RoundResult> {
+  const findingsArg = lastFindings.length > 0 ? JSON.stringify(lastFindings, null, 2) : "";
+
+  // Capture SHA before implementer runs (for quality ratchet rollback)
+  let beforeSha = "";
+  try {
+    beforeSha = execFileSync("git", ["rev-parse", "HEAD"], {
+      cwd: sandbox.worktreePath,
+      encoding: "utf-8",
+      stdio: ["pipe", "pipe", "pipe"],
+    }).trim();
+  } catch {
+    console.warn(`  #${spec.id}: Failed to capture HEAD SHA before round ${String(round)}.`);
+  }
+
+  // Implementer
+  const impl = await sandbox.run({
+    agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
+    maxIterations: budget,
+    name: `Implementer #${spec.id} R${String(round)}`,
+    promptArgs: {
+      BRANCH: spec.branch,
+      FINDINGS: findingsArg,
+      ISSUE_BODY: spec.body,
+      ISSUE_TITLE: spec.title,
+      TASK_ID: spec.id,
+    },
+    promptFile: "./.sandcastle/implement-prompt.md",
+  });
+
+  // Critic
+  const nonce = crypto.randomBytes(4).toString("hex");
+  const findings = await runCritic(sandbox, spec, round, nonce);
+
+  return { beforeSha, commits: impl.commits.length, findings };
+}
+
 /**
  * Computes a deduplication key for a finding using a context hash of surrounding lines.
  * @param f - Finding to compute a key for.
@@ -161,7 +246,11 @@ function findingKey(f: Finding, cwd: string): string {
       .replace(/[^\w\s]/g, "")
       .replace(/\s+/g, " ")
       .trim();
-    const titleHash = crypto.createHash("sha256").update(normalizedTitle).digest("hex").slice(0, 16);
+    const titleHash = crypto
+      .createHash("sha256")
+      .update(normalizedTitle)
+      .digest("hex")
+      .slice(0, 16);
     return `${f.file || "global"}::${f.category}::${titleHash}`;
   }
   const contextHash = hashContextLines(cwd, f.file, f.line, 3);
@@ -178,8 +267,8 @@ function findingKey(f: Finding, cwd: string): string {
  */
 function hashContextLines(cwd: string, file: string, line: number, _radius: number): string {
   try {
-    const fullPath = join(cwd, file);
-    if (!fullPath.startsWith(cwd)) {
+    const fullPath = resolve(cwd, file);
+    if (!fullPath.startsWith(resolve(cwd) + sep)) {
       throw new Error("Path traversal");
     }
     const raw = readFileSync(fullPath, "utf-8");
diff --git a/.sandcastle/task-source.ts b/.sandcastle/task-source.ts
index 42d27bb..99e415c 100644
--- a/.sandcastle/task-source.ts
+++ b/.sandcastle/task-source.ts
@@ -177,22 +177,26 @@ export class GithubIssueSource implements TaskSource {
           if (typeof item.branch !== "string" || !this.branchPattern.test(item.branch))
             return false;
           if (typeof item.title !== "string") return false;
+          if (item.title.length > 200) return false;
+          for (let ci = 0; ci < item.title.length; ci++) {
+            if (item.title.charCodeAt(ci) < 0x20) return false;
+          }
           return true;
         },
       );
 
-    const issueMap = new Map(issuesJson.map((i) => [String(i.number), i]));
-    return validated
-      .map((v) => {
-        const source = issueMap.get(v.id);
-        if (!source) return null;
-        return {
-          ...v,
-          body: source.body,
-          labels: source.labels,
-        };
-      })
-      .filter((v): v is NonNullable<typeof v> => v !== null);
+      const issueMap = new Map(issuesJson.map((i) => [String(i.number), i]));
+      return validated
+        .map((v) => {
+          const source = issueMap.get(v.id);
+          if (!source) return null;
+          return {
+            ...v,
+            body: source.body,
+            labels: source.labels,
+          };
+        })
+        .filter((v): v is NonNullable<typeof v> => v !== null);
     } catch {
       console.error("Planner produced invalid JSON. Retrying.");
       return null;

From 3d2a46f8c5fff39923607b035835f563479cab97 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 16:38:44 +0200
Subject: [PATCH 16/31] =?UTF-8?q?fix(sandcastle):=20address=20review=20fin?=
 =?UTF-8?q?dings=20=E2=80=94=20convergence,=20planner=20failure,=20rebase?=
 =?UTF-8?q?=20note?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Treat newFindings=0 as 'converged' regardless of repeated non-LOW
  findings (previously marked 'exhausted', forcing draft PRs)
- Set process.exitCode=1 when planner exhausts retries (prevents
  masking a broken planner as 'no work to do')
- Pass rebaseSucceeded to buildPrArgs, append rebase failure note
  to PR body when branch is not rebased onto main
- Reword sanitizeForPrompt JSDoc to accurately describe scope
---
 .sandcastle/finalizer.ts       | 13 +++++++++----
 .sandcastle/refinement-loop.ts |  4 +---
 .sandcastle/task-source.ts     |  5 +++--
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/.sandcastle/finalizer.ts b/.sandcastle/finalizer.ts
index e1fedff..2574e90 100644
--- a/.sandcastle/finalizer.ts
+++ b/.sandcastle/finalizer.ts
@@ -82,7 +82,7 @@ export async function finalizeTask(
   pushBranch(cwd, spec, rebaseSucceeded);
 
   // Build PR arguments and create PR
-  const { isDraft, prArgs } = buildPrArgs(spec, loopResult, validationPassed);
+  const { isDraft, prArgs } = buildPrArgs(spec, loopResult, validationPassed, rebaseSucceeded);
 
   let prCreated = false;
   try {
@@ -124,21 +124,26 @@ function attemptRebase(cwd: string, _spec: TaskSpec): boolean {
  * @param spec - The task specification.
  * @param loopResult - The result from the refinement loop.
  * @param validationPassed - Whether the validation suite passed.
+ * @param rebaseSucceeded - Whether the rebase onto main succeeded.
  * @returns Object with `isDraft` flag and `prArgs` string array.
  */
 function buildPrArgs(
   spec: TaskSpec,
   loopResult: LoopResult,
   validationPassed: boolean,
+  rebaseSucceeded: boolean,
 ): { isDraft: boolean; prArgs: string[] } {
   const converged = loopResult.status === "converged";
   const isDraft = !converged || !validationPassed;
   const outstandingNote =
     !converged && loopResult.lastFindings.length > 0
-      ? `\n\n\u26a0\ufe0f Outstanding findings:\n${loopResult.lastFindings.map((f) => `- [${f.severity}] ${f.file}: ${f.title}`).join("\n")}`
+      ? `\n\n⚠️ Outstanding findings:\n${loopResult.lastFindings.map((f) => `- [${f.severity}] ${f.file}: ${f.title}`).join("\n")}`
       : "";
   const validationNote = !validationPassed
-    ? "\n\n\u26a0\ufe0f Validation did not pass. Manual review required."
+    ? "\n\n⚠️ Validation did not pass. Manual review required."
+    : "";
+  const rebaseNote = !rebaseSucceeded
+    ? "\n\n⚠️ Rebase failed. Branch is not rebased onto main."
     : "";
 
   const validationCheck = validationPassed ? "- [x]" : "- [ ]";
@@ -154,7 +159,7 @@ function buildPrArgs(
       : commitPrefix === "fix"
         ? "Bug fix (non-breaking change that fixes an issue)"
         : "Refactoring (no functional changes)";
-  const prBody = `## Description\n\nAutomated ${commitPrefix} for #${spec.id}: ${spec.title}\n\n## Type of Change\n\n- [x] ${typeOfChange}\n\n## Checklist\n\n${validationCheck} I have run validation suite\n- [x] My changes follow the existing code style\n\n## Related Issues\n\nFixes #${spec.id}${outstandingNote}${validationNote}`;
+  const prBody = `## Description\n\nAutomated ${commitPrefix} for #${spec.id}: ${spec.title}\n\n## Type of Change\n\n- [x] ${typeOfChange}\n\n## Checklist\n\n${validationCheck} I have run validation suite\n- [x] My changes follow the existing code style\n\n## Related Issues\n\nFixes #${spec.id}${outstandingNote}${validationNote}${rebaseNote}`;
 
   const prArgs = [
     "pr",
diff --git a/.sandcastle/refinement-loop.ts b/.sandcastle/refinement-loop.ts
index baa06a9..f2755d8 100644
--- a/.sandcastle/refinement-loop.ts
+++ b/.sandcastle/refinement-loop.ts
@@ -108,10 +108,8 @@ export async function runRefinementLoop(
       const nonLowFindings = result.findings.filter((f) => f.confidence !== "LOW");
       if (nonLowFindings.length > 0) {
         lastFindings = nonLowFindings;
-        status = "exhausted";
-      } else {
-        status = "converged";
       }
+      status = "converged";
       break;
     }
 
diff --git a/.sandcastle/task-source.ts b/.sandcastle/task-source.ts
index 99e415c..a0405d3 100644
--- a/.sandcastle/task-source.ts
+++ b/.sandcastle/task-source.ts
@@ -111,6 +111,7 @@ export class GithubIssueSource implements TaskSource {
     }
 
     console.warn("Planner failed to produce a valid plan after all retries.");
+    process.exitCode = 1;
     return [];
   }
 
@@ -205,9 +206,9 @@ export class GithubIssueSource implements TaskSource {
 }
 
 /**
- * Strips injection-prone tags from text.
+ * Strips agent-control tags from text to reduce prompt-injection risk.
  * @param text - Raw text to sanitize.
- * @returns Sanitized text safe for prompt injection.
+ * @returns Text with plan/findings/promise tags removed.
  */
 function sanitizeForPrompt(text: string): string {
   return text.replace(/<\/?(?:plan|findings|promise)[^>]*>/gi, "");

From bf8ca0485b67a4cbdec7a30ea1fc19ce6e63aa14 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 17:31:24 +0200
Subject: [PATCH 17/31] fix(.sandcastle): address all algorithmic audit
 findings

- types.ts: parseFindingsSafe with partial recovery (finding #18)
- concurrency-pool.ts: guard NaN/float/Infinity in constructor (#1,#4), JSDoc re-entrant warning (#2)
- task-source.ts: NFKC normalize + extended blocklist in sanitizeForPrompt (#17,#20), Map lookup in validatePlan (#7)
- refinement-loop.ts: readFileSync replaces execSync/sed (shell injection #5), radius used (#12), fallback hash hardened (#13), line clamping (#14), realpathSync symlinks (#15), file cache Map (#11), parseFindingsSafe (#18), nonce validation (#16), trivial match skip (#19), implementer try/catch (#7), ratchet guard round<=2 (#6), no duplicate nonLowFindings (#9)
- finalizer.ts: pushBranch returns boolean (#22), crypto rescue suffix (#21), rollback error level (#8)
- main.ts: TASK_TIMEOUT_MS + Promise.race timeout (#3), flat budget JSDoc (#10)
---
 .sandcastle/concurrency-pool.ts |   7 +-
 .sandcastle/finalizer.ts        |  25 +++++--
 .sandcastle/main.ts             |  52 ++++++++------
 .sandcastle/refinement-loop.ts  | 121 ++++++++++++++++++++------------
 .sandcastle/task-source.ts      |   6 +-
 .sandcastle/types.ts            |  19 ++++-
 6 files changed, 154 insertions(+), 76 deletions(-)

diff --git a/.sandcastle/concurrency-pool.ts b/.sandcastle/concurrency-pool.ts
index e023a43..a755e5a 100644
--- a/.sandcastle/concurrency-pool.ts
+++ b/.sandcastle/concurrency-pool.ts
@@ -14,11 +14,11 @@ export class ConcurrencyPool {
   private tail: null | QueueNode = null;
 
   /**
-   * @param max - Maximum number of concurrent tasks. Must be >= 1.
+   * @param max - Maximum number of concurrent tasks. Must be a positive integer >= 1.
    */
   constructor(private readonly max: number) {
-    if (max < 1) {
-      throw new RangeError("ConcurrencyPool max must be >= 1");
+    if (!Number.isInteger(max) || max < 1) {
+      throw new RangeError("ConcurrencyPool max must be a positive integer >= 1");
     }
   }
 
@@ -26,6 +26,7 @@ export class ConcurrencyPool {
    * Executes the given async function, waiting if the pool is at capacity.
    * @param fn - Async function to execute within the pool.
    * @returns The result of the function.
+   * @remarks Re-entrant calls using the same pool instance may deadlock when all slots are occupied.
    */
   async run<T>(fn: () => Promise<T>): Promise<T> {
     await this.acquire();
diff --git a/.sandcastle/finalizer.ts b/.sandcastle/finalizer.ts
index 2574e90..3832fd4 100644
--- a/.sandcastle/finalizer.ts
+++ b/.sandcastle/finalizer.ts
@@ -1,5 +1,6 @@
 import * as sandcastle from "@ai-hero/sandcastle";
 import { execFileSync } from "node:child_process";
+import crypto from "node:crypto";
 
 import type { FinalizeResult, LoopResult, SandboxInstance, TaskSpec } from "./types.js";
 
@@ -79,7 +80,10 @@ export async function finalizeTask(
   }
 
   // Push
-  pushBranch(cwd, spec, rebaseSucceeded);
+  const pushSucceeded = pushBranch(cwd, spec, rebaseSucceeded);
+  if (!pushSucceeded) {
+    console.warn(`  #${spec.id}: Push did not succeed; PR may reference unpushed commits.`);
+  }
 
   // Build PR arguments and create PR
   const { isDraft, prArgs } = buildPrArgs(spec, loopResult, validationPassed, rebaseSucceeded);
@@ -195,32 +199,39 @@ function extractStderr(err: unknown): string {
  * @param cwd - Working directory (worktree path).
  * @param spec - The task specification.
  * @param rebaseSucceeded - Whether the preceding rebase completed successfully.
+ * @returns `true` if the primary push succeeded, `false` otherwise.
  */
-function pushBranch(cwd: string, spec: TaskSpec, rebaseSucceeded: boolean): void {
+function pushBranch(cwd: string, spec: TaskSpec, rebaseSucceeded: boolean): boolean {
   if (rebaseSucceeded) {
     try {
       execFileSync("git", ["push", "--force-with-lease"], { cwd, stdio: "pipe" });
+      return true;
     } catch (pushErr: unknown) {
       const pushMsg = pushErr instanceof Error ? pushErr.message : String(pushErr);
       try {
-        execFileSync(
-          "git",
-          ["push", "origin", `HEAD:refs/heads/rescue/${spec.branch}-${String(Date.now())}`],
-          { cwd, stdio: "pipe" },
+        const suffix = crypto.randomBytes(4).toString("hex");
+        execFileSync("git", ["push", "origin", `HEAD:refs/heads/rescue/${spec.branch}-${suffix}`], {
+          cwd,
+          stdio: "pipe",
+        });
+        console.warn(
+          `  #${spec.id}: Push failed. Commits preserved at rescue/${spec.branch}-${suffix}`,
         );
-        console.warn(`  #${spec.id}: Push failed. Commits preserved at rescue/${spec.branch}-...`);
       } catch {
         console.error(
           `  #${spec.id}: Push failed and rescue failed. Commits will be lost on sandbox disposal: ${pushMsg}`,
         );
       }
+      return false;
     }
   } else {
     try {
       execFileSync("git", ["push"], { cwd, stdio: "pipe" });
+      return true;
     } catch (pushErr: unknown) {
       const pushMsg = pushErr instanceof Error ? pushErr.message : String(pushErr);
       console.warn(`  #${spec.id}: git push failed after rebase abort: ${pushMsg}`);
+      return false;
     }
   }
 }
diff --git a/.sandcastle/main.ts b/.sandcastle/main.ts
index 98bf998..dbf3286 100644
--- a/.sandcastle/main.ts
+++ b/.sandcastle/main.ts
@@ -11,6 +11,7 @@ const BRANCH_PREFIX = "agent/issue";
 const ISSUE_LABEL = "sandcastle";
 const MAX_PARALLEL = 3;
 const DOCKER_IMAGE = "sandcastle-sap-ai";
+const TASK_TIMEOUT_MS = 15 * 60 * 1000;
 
 const source = new GithubIssueSource({
   branchPrefix: BRANCH_PREFIX,
@@ -27,30 +28,39 @@ if (tasks.length === 0) {
 
   const settled = await Promise.allSettled(
     tasks.map((spec) =>
-      pool.run(async () => {
-        await using sandbox = await sandcastle.createSandbox({
-          branch: spec.branch,
-          copyToWorktree: ["node_modules"],
-          hooks: {
-            sandbox: { onSandboxReady: [{ command: "npm install && npm run build" }] },
-          },
-          sandbox: docker({ imageName: DOCKER_IMAGE }),
-        });
+      pool.run(() =>
+        Promise.race([
+          (async () => {
+            await using sandbox = await sandcastle.createSandbox({
+              branch: spec.branch,
+              copyToWorktree: ["node_modules"],
+              hooks: {
+                sandbox: { onSandboxReady: [{ command: "npm install && npm run build" }] },
+              },
+              sandbox: docker({ imageName: DOCKER_IMAGE }),
+            });
 
-        const loopResult = await runRefinementLoop(spec, sandbox, {
-          iterationBudget: ITERATION_BUDGET_PER_ROUND,
-          maxRounds: MAX_CRITIC_ROUNDS,
-        });
+            const loopResult = await runRefinementLoop(spec, sandbox, {
+              iterationBudget: ITERATION_BUDGET_PER_ROUND,
+              maxRounds: MAX_CRITIC_ROUNDS,
+            });
 
-        let prCreated = false;
-        if (loopResult.totalCommits > 0) {
-          const cwd = sandbox.worktreePath;
-          const result = await finalizeTask(spec, loopResult, sandbox, cwd);
-          prCreated = result.prCreated;
-        }
+            let prCreated = false;
+            if (loopResult.totalCommits > 0) {
+              const cwd = sandbox.worktreePath;
+              const result = await finalizeTask(spec, loopResult, sandbox, cwd);
+              prCreated = result.prCreated;
+            }
 
-        return { prCreated, spec };
-      }),
+            return { prCreated, spec };
+          })(),
+          new Promise<never>((_, reject) => {
+            setTimeout(() => {
+              reject(new Error(`Task #${spec.id} timed out after ${String(TASK_TIMEOUT_MS)}ms`));
+            }, TASK_TIMEOUT_MS);
+          }),
+        ]),
+      ),
     ),
   );
 
diff --git a/.sandcastle/refinement-loop.ts b/.sandcastle/refinement-loop.ts
index f2755d8..b3c5a93 100644
--- a/.sandcastle/refinement-loop.ts
+++ b/.sandcastle/refinement-loop.ts
@@ -1,12 +1,12 @@
 import * as sandcastle from "@ai-hero/sandcastle";
 import { execFileSync } from "node:child_process";
 import crypto from "node:crypto";
-import { readFileSync } from "node:fs";
-import { resolve, sep } from "node:path";
+import { readFileSync, realpathSync } from "node:fs";
+import { join, sep } from "node:path";
 
 import type { Finding, LoopResult, LoopStatus, SandboxInstance, TaskSpec } from "./types.js";
 
-import { FindingsSchema, ITERATION_BUDGET_PER_ROUND, MAX_CRITIC_ROUNDS } from "./types.js";
+import { ITERATION_BUDGET_PER_ROUND, MAX_CRITIC_ROUNDS, parseFindingsSafe } from "./types.js";
 
 /** Options for configuring the refinement loop. */
 export interface RefinementLoopOptions {
@@ -105,7 +105,6 @@ export async function runRefinementLoop(
     opts?.onRoundComplete?.(round, result.findings);
 
     if (newFindings.length === 0) {
-      const nonLowFindings = result.findings.filter((f) => f.confidence !== "LOW");
       if (nonLowFindings.length > 0) {
         lastFindings = nonLowFindings;
       }
@@ -137,7 +136,7 @@ function checkQualityRatchet(
   beforeSha: string,
   cwd: string,
 ): boolean {
-  if (round <= 1 || findingsCount <= previousCount) {
+  if (round <= 2 || findingsCount <= previousCount) {
     return false;
   }
 
@@ -170,11 +169,12 @@ function checkQualityRatchet(
  * @returns Array of new, non-LOW-confidence findings.
  */
 function deduplicateFindings(findings: Finding[], seenKeys: Set<string>, cwd: string): Finding[] {
+  const fileCache = new Map<string, string>();
   const newFindings = findings.filter(
-    (f) => f.confidence !== "LOW" && !seenKeys.has(findingKey(f, cwd)),
+    (f) => f.confidence !== "LOW" && !seenKeys.has(findingKey(f, cwd, fileCache)),
   );
   for (const f of newFindings) {
-    seenKeys.add(findingKey(f, cwd));
+    seenKeys.add(findingKey(f, cwd, fileCache));
   }
   return newFindings;
 }
@@ -210,19 +210,26 @@ async function executeRound(
   }
 
   // Implementer
-  const impl = await sandbox.run({
-    agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
-    maxIterations: budget,
-    name: `Implementer #${spec.id} R${String(round)}`,
-    promptArgs: {
-      BRANCH: spec.branch,
-      FINDINGS: findingsArg,
-      ISSUE_BODY: spec.body,
-      ISSUE_TITLE: spec.title,
-      TASK_ID: spec.id,
-    },
-    promptFile: "./.sandcastle/implement-prompt.md",
-  });
+  let impl: Awaited<ReturnType<typeof sandbox.run>>;
+  try {
+    impl = await sandbox.run({
+      agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
+      maxIterations: budget,
+      name: `Implementer #${spec.id} R${String(round)}`,
+      promptArgs: {
+        BRANCH: spec.branch,
+        FINDINGS: findingsArg,
+        ISSUE_BODY: spec.body,
+        ISSUE_TITLE: spec.title,
+        TASK_ID: spec.id,
+      },
+      promptFile: "./.sandcastle/implement-prompt.md",
+    });
+  } catch (err: unknown) {
+    const msg = err instanceof Error ? (err.stack ?? err.message) : String(err);
+    console.error(`  #${spec.id} R${String(round)}: Implementer threw: ${msg}`);
+    return { beforeSha, commits: 0, findings: null };
+  }
 
   // Critic
   const nonce = crypto.randomBytes(4).toString("hex");
@@ -235,9 +242,10 @@ async function executeRound(
  * Computes a deduplication key for a finding using a context hash of surrounding lines.
  * @param f - Finding to compute a key for.
  * @param cwd - Working directory (worktree path) for reading file context.
+ * @param fileCache - Optional cache of file contents keyed by resolved path.
  * @returns Composite dedup key.
  */
-function findingKey(f: Finding, cwd: string): string {
+function findingKey(f: Finding, cwd: string, fileCache?: Map<string, string>): string {
   if (!f.file || f.line == null) {
     const normalizedTitle = f.title
       .toLowerCase()
@@ -251,33 +259,56 @@ function findingKey(f: Finding, cwd: string): string {
       .slice(0, 16);
     return `${f.file || "global"}::${f.category}::${titleHash}`;
   }
-  const contextHash = hashContextLines(cwd, f.file, f.line, 3);
+  const contextHash = hashContextLines(cwd, f.file, f.line, 3, fileCache);
   return `${f.file}::${f.category}::${contextHash}`;
 }
 
 /**
- * Hashes the target line content for dedup stability.
+ * Hashes a window of lines around the finding for dedup stability.
  * @param cwd - Working directory.
  * @param file - Relative file path.
  * @param line - Line number of the finding.
- * @param _radius - Unused; kept for call-site compatibility.
+ * @param radius - Number of lines above/below to include in the context window.
+ * @param fileCache - Optional cache of file contents keyed by resolved path.
  * @returns Truncated SHA-256 hex digest.
  */
-function hashContextLines(cwd: string, file: string, line: number, _radius: number): string {
+function hashContextLines(
+  cwd: string,
+  file: string,
+  line: number,
+  radius: number,
+  fileCache?: Map<string, string>,
+): string {
   try {
-    const fullPath = resolve(cwd, file);
-    if (!fullPath.startsWith(resolve(cwd) + sep)) {
+    const fullPath = realpathSync(join(cwd, file));
+    if (!fullPath.startsWith(realpathSync(cwd) + sep)) {
       throw new Error("Path traversal");
     }
-    const raw = readFileSync(fullPath, "utf-8");
+    let raw: string;
+    const cached = fileCache?.get(fullPath);
+    if (cached !== undefined) {
+      raw = cached;
+    } else {
+      raw = readFileSync(fullPath, "utf-8");
+      if (fileCache) fileCache.set(fullPath, raw);
+    }
     const lines = raw.split("\n");
-    const targetLine = lines[Math.max(0, line - 1)] ?? "";
-    const normalized = targetLine.replace(/\s+/g, " ").trim();
-    // Include file path as salt to prevent cross-file hash collisions
-    return crypto.createHash("sha256").update(`${file}:${normalized}`).digest("hex").slice(0, 16);
+    const idx = Math.min(Math.max(0, line - 1), lines.length - 1);
+    const start = Math.max(0, idx - radius);
+    const end = Math.min(lines.length - 1, idx + radius);
+    const window = lines.slice(start, end + 1).join("\n");
+    const normalized = window.replace(/\s+/g, " ").trim();
+    return crypto
+      .createHash("sha256")
+      .update(`${file}:${String(line)}:${normalized}`)
+      .digest("hex")
+      .slice(0, 16);
   } catch {
-    const truncTitle = file.slice(0, 30) + String(line);
-    return crypto.createHash("sha256").update(truncTitle).digest("hex").slice(0, 16);
+    return crypto
+      .createHash("sha256")
+      .update(`${file}:${String(line)}:fallback`)
+      .digest("hex")
+      .slice(0, 16);
   }
 }
 
@@ -288,18 +319,22 @@ function hashContextLines(cwd: string, file: string, line: number, _radius: numb
  * @returns Parsed findings array or null on parse failure.
  */
 function parseFindings(stdout: string, nonce: string): Finding[] | null {
+  if (!/^[0-9a-f]+$/.test(nonce)) return null;
   const tagPattern = new RegExp(`<findings-${nonce}>([\\s\\S]*?)<\\/findings-${nonce}>`, "g");
   const matches = [...stdout.matchAll(tagPattern)];
-  if (matches.length === 0) {
-    return null;
-  }
-  const raw = matches.at(-1)?.[1]?.trim() ?? "[]";
-  const cleaned = raw.replace(/^```(?:json)?\s*\n?/g, "").replace(/\n?```\s*$/g, "");
-  try {
-    return FindingsSchema.parse(JSON.parse(cleaned));
-  } catch {
-    return null;
+  if (matches.length === 0) return null;
+  // Find last non-trivial match
+  for (let i = matches.length - 1; i >= 0; i--) {
+    const raw = matches[i]?.[1]?.trim() ?? "";
+    if (raw.length < 2) continue;
+    const cleaned = raw.replace(/^```(?:json)?\s*\n?/g, "").replace(/\n?```\s*$/g, "");
+    try {
+      return parseFindingsSafe(JSON.parse(cleaned));
+    } catch {
+      continue;
+    }
   }
+  return null;
 }
 
 /**
diff --git a/.sandcastle/task-source.ts b/.sandcastle/task-source.ts
index a0405d3..8773b0d 100644
--- a/.sandcastle/task-source.ts
+++ b/.sandcastle/task-source.ts
@@ -211,5 +211,9 @@ export class GithubIssueSource implements TaskSource {
  * @returns Text with plan/findings/promise tags removed.
  */
 function sanitizeForPrompt(text: string): string {
-  return text.replace(/<\/?(?:plan|findings|promise)[^>]*>/gi, "");
+  const normalized = text.normalize("NFKC");
+  return normalized.replace(
+    /<\/?(?:plan|findings|promise|system|code|instructions|implement|review|tool_call)[^>]*>/gi,
+    "",
+  );
 }
diff --git a/.sandcastle/types.ts b/.sandcastle/types.ts
index efea8f8..300df94 100644
--- a/.sandcastle/types.ts
+++ b/.sandcastle/types.ts
@@ -48,10 +48,27 @@ export type LoopStatus = "converged" | "exhausted" | "failed" | "skipped";
 /** Type alias for a sandcastle sandbox instance. */
 export type SandboxInstance = Awaited<ReturnType<typeof sandcastle.createSandbox>>;
 
+/**
+ * Parses a findings array with partial recovery — invalid entries are discarded.
+ * @param data - Raw parsed JSON value to validate as a findings array.
+ * @returns Array of valid findings (may be empty).
+ */
+export function parseFindingsSafe(data: unknown): Finding[] {
+  if (!Array.isArray(data)) return [];
+  return data
+    .map((entry) => FindingSchema.safeParse(entry))
+    .filter((r): r is z.ZodSafeParseSuccess<Finding> => r.success)
+    .map((r) => r.data);
+}
+
 /** Maximum implement↔critic rounds before giving up. */
 export const MAX_CRITIC_ROUNDS = 5;
 
-/** Token budget applied uniformly to every implement round. */
+/**
+ * Flat iteration budget per round (intentionally constant, not decreasing).
+ * Evidence: ARCS (arXiv:2504.20434), SWE-Agent, AutoCodeRover all use flat budgets.
+ * Decreasing schedules penalize harder residual problems in later rounds.
+ */
 export const ITERATION_BUDGET_PER_ROUND = 50;
 
 /** Specification for a task to be implemented. */

From d1db4e46e5524d66a597c23f9376462e105c924c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 17:59:11 +0200
Subject: [PATCH 18/31] fix: unref timeout timer to prevent process hang, catch
 critic throws

---
 .sandcastle/main.ts            | 2 +-
 .sandcastle/refinement-loop.ts | 9 ++++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/.sandcastle/main.ts b/.sandcastle/main.ts
index dbf3286..7b61188 100644
--- a/.sandcastle/main.ts
+++ b/.sandcastle/main.ts
@@ -57,7 +57,7 @@ if (tasks.length === 0) {
           new Promise<never>((_, reject) => {
             setTimeout(() => {
               reject(new Error(`Task #${spec.id} timed out after ${String(TASK_TIMEOUT_MS)}ms`));
-            }, TASK_TIMEOUT_MS);
+            }, TASK_TIMEOUT_MS).unref();
           }),
         ]),
       ),
diff --git a/.sandcastle/refinement-loop.ts b/.sandcastle/refinement-loop.ts
index b3c5a93..1f3ec8f 100644
--- a/.sandcastle/refinement-loop.ts
+++ b/.sandcastle/refinement-loop.ts
@@ -233,7 +233,14 @@ async function executeRound(
 
   // Critic
   const nonce = crypto.randomBytes(4).toString("hex");
-  const findings = await runCritic(sandbox, spec, round, nonce);
+  let findings: Finding[] | null;
+  try {
+    findings = await runCritic(sandbox, spec, round, nonce);
+  } catch (err: unknown) {
+    const msg = err instanceof Error ? err.message : String(err);
+    console.error(`  #${spec.id} R${String(round)}: Critic threw: ${msg}`);
+    findings = null;
+  }
 
   return { beforeSha, commits: impl.commits.length, findings };
 }

From 7f0a41ad2f6d7d6a98b3ebc14536166e894b0e91 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 18:05:24 +0200
Subject: [PATCH 19/31] fix: count commits before break on critic failure
 (prevents work loss)

---
 .sandcastle/refinement-loop.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.sandcastle/refinement-loop.ts b/.sandcastle/refinement-loop.ts
index 1f3ec8f..2d4ca47 100644
--- a/.sandcastle/refinement-loop.ts
+++ b/.sandcastle/refinement-loop.ts
@@ -71,6 +71,7 @@ export async function runRefinementLoop(
     }
 
     if (result.findings === null) {
+      totalCommits += result.commits;
       console.warn(`  #${spec.id}: Critic failed twice. Breaking (non-converged).`);
       status = "failed";
       break;

From 5872f709541e0ac66de01a50a6f97161283ed7bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 18:09:19 +0200
Subject: [PATCH 20/31] fix: suppress unhandled rejection from timeout promise
 on task success

---
 .sandcastle/main.ts | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/.sandcastle/main.ts b/.sandcastle/main.ts
index 7b61188..946138f 100644
--- a/.sandcastle/main.ts
+++ b/.sandcastle/main.ts
@@ -54,11 +54,17 @@ if (tasks.length === 0) {
 
             return { prCreated, spec };
           })(),
-          new Promise<never>((_, reject) => {
-            setTimeout(() => {
-              reject(new Error(`Task #${spec.id} timed out after ${String(TASK_TIMEOUT_MS)}ms`));
-            }, TASK_TIMEOUT_MS).unref();
-          }),
+          (() => {
+            const p = new Promise<never>((_, reject) => {
+              setTimeout(() => {
+                reject(new Error(`Task #${spec.id} timed out after ${String(TASK_TIMEOUT_MS)}ms`));
+              }, TASK_TIMEOUT_MS).unref();
+            });
+            p.catch(() => {
+              /* suppress unhandled rejection when task completes before timeout */
+            });
+            return p;
+          })(),
         ]),
       ),
     ),

From 5881b920afbed8b7b5c8237efcb961204db1045b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 18:14:19 +0200
Subject: [PATCH 21/31] fix: force process exit after completion (prevents hang
 from timed-out sandboxes)

---
 .sandcastle/main.ts | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.sandcastle/main.ts b/.sandcastle/main.ts
index 946138f..3ca2d55 100644
--- a/.sandcastle/main.ts
+++ b/.sandcastle/main.ts
@@ -89,3 +89,5 @@ if (tasks.length === 0) {
     process.exitCode = 1;
   }
 }
+
+process.exit(process.exitCode ?? 0);

From 29c2ce6f67bafa2add5a217acceb015df7b8570b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 18:18:32 +0200
Subject: [PATCH 22/31] fix: check findings null before commits zero (correct
 status on implementer crash)

---
 .sandcastle/refinement-loop.ts | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.sandcastle/refinement-loop.ts b/.sandcastle/refinement-loop.ts
index 2d4ca47..358153d 100644
--- a/.sandcastle/refinement-loop.ts
+++ b/.sandcastle/refinement-loop.ts
@@ -65,11 +65,6 @@ export async function runRefinementLoop(
       break;
     }
 
-    if (round > 1 && result.commits === 0) {
-      status = "exhausted";
-      break;
-    }
-
     if (result.findings === null) {
       totalCommits += result.commits;
       console.warn(`  #${spec.id}: Critic failed twice. Breaking (non-converged).`);
@@ -77,6 +72,11 @@ export async function runRefinementLoop(
       break;
     }
 
+    if (round > 1 && result.commits === 0) {
+      status = "exhausted";
+      break;
+    }
+
     // Dedup via context hash
     const cwd = sandbox.worktreePath;
     const newFindings = deduplicateFindings(result.findings, seenKeys, cwd);

From a4f323de9d30400b857b5a97e342765b38817c9c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 18:29:58 +0200
Subject: [PATCH 23/31] fix: report known findings in PR body even when
 converged (prevents silent false convergence)

---
 .sandcastle/finalizer.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.sandcastle/finalizer.ts b/.sandcastle/finalizer.ts
index 3832fd4..1b5aa42 100644
--- a/.sandcastle/finalizer.ts
+++ b/.sandcastle/finalizer.ts
@@ -140,8 +140,8 @@ function buildPrArgs(
   const converged = loopResult.status === "converged";
   const isDraft = !converged || !validationPassed;
   const outstandingNote =
-    !converged && loopResult.lastFindings.length > 0
-      ? `\n\n⚠️ Outstanding findings:\n${loopResult.lastFindings.map((f) => `- [${f.severity}] ${f.file}: ${f.title}`).join("\n")}`
+    loopResult.lastFindings.length > 0
+      ? `\n\n${converged ? "ℹ️ Known findings (not addressed):" : "⚠️ Outstanding findings:"}\n${loopResult.lastFindings.map((f) => `- [${f.severity}] ${f.file}: ${f.title}`).join("\n")}`
       : "";
   const validationNote = !validationPassed
     ? "\n\n⚠️ Validation did not pass. Manual review required."

From 41d09b47b611c965fd955b421eb1597871792e4b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 18:42:25 +0200
Subject: [PATCH 24/31] feat: state-of-the-art convergence improvements (ARCS,
 SWE-Agent, OpenHands)

- Validation in-loop (ARCS): deterministic convergence when tests pass mid-loop
- Best-state checkpoint (SWE-Agent): reset to best SHA on non-convergence
- Severity-weighted convergence (OpenHands): refuse convergence if CRITICAL/HIGH persist
---
 .sandcastle/refinement-loop.ts | 57 ++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/.sandcastle/refinement-loop.ts b/.sandcastle/refinement-loop.ts
index 358153d..a31de28 100644
--- a/.sandcastle/refinement-loop.ts
+++ b/.sandcastle/refinement-loop.ts
@@ -8,6 +8,9 @@ import type { Finding, LoopResult, LoopStatus, SandboxInstance, TaskSpec } from
 
 import { ITERATION_BUDGET_PER_ROUND, MAX_CRITIC_ROUNDS, parseFindingsSafe } from "./types.js";
 
+const VALIDATION_COMMAND =
+  "npm run type-check && npm run test && npm run test:node && npm run test:edge && npm run prettier-check && npm run lint && npm run build && npm run check-build && npm run build:v2 && npm run check-build:v2";
+
 /** Options for configuring the refinement loop. */
 export interface RefinementLoopOptions {
   /** Budget of iterations per round (flat constant applied to every round). */
@@ -49,6 +52,8 @@ export async function runRefinementLoop(
   let totalCommits = 0;
   let roundsCompleted = 0;
   let previousFindingsCount = Infinity;
+  let bestSha = "";
+  let bestFindingsCount = Infinity;
 
   for (let round = 1; round <= maxRounds; round++) {
     roundsCompleted = round;
@@ -77,6 +82,22 @@ export async function runRefinementLoop(
       break;
     }
 
+    // Fix 1: Validation in-loop (ARCS pattern) — deterministic convergence signal
+    if (result.commits > 0) {
+      try {
+        execFileSync("sh", ["-c", VALIDATION_COMMAND], {
+          cwd: sandbox.worktreePath,
+          stdio: "pipe",
+        });
+        // Validation passed mid-loop — deterministic convergence
+        totalCommits += result.commits;
+        status = "converged";
+        break;
+      } catch {
+        // Validation failed — continue to critic for feedback
+      }
+    }
+
     // Dedup via context hash
     const cwd = sandbox.worktreePath;
     const newFindings = deduplicateFindings(result.findings, seenKeys, cwd);
@@ -85,6 +106,20 @@ export async function runRefinementLoop(
       `  #${spec.id}: ${String(result.findings.length)} findings, ${String(newFindings.length)} new`,
     );
 
+    // Best-state checkpoint (SWE-Agent pattern)
+    if (newFindings.length < bestFindingsCount) {
+      bestFindingsCount = newFindings.length;
+      try {
+        bestSha = execFileSync("git", ["rev-parse", "HEAD"], {
+          cwd: sandbox.worktreePath,
+          encoding: "utf-8",
+          stdio: ["pipe", "pipe", "pipe"],
+        }).trim();
+      } catch {
+        /* empty */
+      }
+    }
+
     // Quality ratchet: rollback if findings increased (regression)
     const nonLowFindings = result.findings.filter((f) => f.confidence !== "LOW");
     if (
@@ -106,6 +141,16 @@ export async function runRefinementLoop(
     opts?.onRoundComplete?.(round, result.findings);
 
     if (newFindings.length === 0) {
+      // Severity-weighted convergence (OpenHands pattern):
+      // Don't converge if CRITICAL/HIGH findings persist, even if already seen
+      const criticalPersistent = result.findings.filter(
+        (f) => (f.severity === "CRITICAL" || f.severity === "HIGH") && f.confidence !== "LOW",
+      );
+      if (criticalPersistent.length > 0) {
+        lastFindings = criticalPersistent;
+        status = "exhausted";
+        break;
+      }
       if (nonLowFindings.length > 0) {
         lastFindings = nonLowFindings;
       }
@@ -116,6 +161,18 @@ export async function runRefinementLoop(
     lastFindings = newFindings;
   }
 
+  // Best-state reset: if not converged, restore best intermediate state
+  if (status !== "converged" && /^[0-9a-f]{40}$/.test(bestSha)) {
+    try {
+      execFileSync("git", ["reset", "--hard", bestSha], {
+        cwd: sandbox.worktreePath,
+        stdio: "pipe",
+      });
+    } catch {
+      /* empty */
+    }
+  }
+
   return { lastFindings, roundsCompleted, status, totalCommits };
 }
 

From 2f8cbf74a80d5de85e6077f61d2aa77ea483db0a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 18:50:41 +0200
Subject: [PATCH 25/31] fix: move bestSha after ratchet, add validation
 timeout, fix severity/bestSha mismatch

---
 .sandcastle/refinement-loop.ts | 40 ++++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/.sandcastle/refinement-loop.ts b/.sandcastle/refinement-loop.ts
index a31de28..0159152 100644
--- a/.sandcastle/refinement-loop.ts
+++ b/.sandcastle/refinement-loop.ts
@@ -88,6 +88,7 @@ export async function runRefinementLoop(
         execFileSync("sh", ["-c", VALIDATION_COMMAND], {
           cwd: sandbox.worktreePath,
           stdio: "pipe",
+          timeout: 120_000,
         });
         // Validation passed mid-loop — deterministic convergence
         totalCommits += result.commits;
@@ -106,20 +107,6 @@ export async function runRefinementLoop(
       `  #${spec.id}: ${String(result.findings.length)} findings, ${String(newFindings.length)} new`,
     );
 
-    // Best-state checkpoint (SWE-Agent pattern)
-    if (newFindings.length < bestFindingsCount) {
-      bestFindingsCount = newFindings.length;
-      try {
-        bestSha = execFileSync("git", ["rev-parse", "HEAD"], {
-          cwd: sandbox.worktreePath,
-          encoding: "utf-8",
-          stdio: ["pipe", "pipe", "pipe"],
-        }).trim();
-      } catch {
-        /* empty */
-      }
-    }
-
     // Quality ratchet: rollback if findings increased (regression)
     const nonLowFindings = result.findings.filter((f) => f.confidence !== "LOW");
     if (
@@ -135,6 +122,21 @@ export async function runRefinementLoop(
       status = "exhausted";
       break;
     }
+
+    // Best-state checkpoint (SWE-Agent pattern) — after ratchet passes
+    if (newFindings.length < bestFindingsCount) {
+      bestFindingsCount = newFindings.length;
+      try {
+        bestSha = execFileSync("git", ["rev-parse", "HEAD"], {
+          cwd: sandbox.worktreePath,
+          encoding: "utf-8",
+          stdio: ["pipe", "pipe", "pipe"],
+        }).trim();
+      } catch {
+        /* empty */
+      }
+    }
+
     totalCommits += result.commits;
     previousFindingsCount = nonLowFindings.length;
 
@@ -149,6 +151,16 @@ export async function runRefinementLoop(
       if (criticalPersistent.length > 0) {
         lastFindings = criticalPersistent;
         status = "exhausted";
+        // Capture current HEAD so post-loop reset is a no-op (code matches findings)
+        try {
+          bestSha = execFileSync("git", ["rev-parse", "HEAD"], {
+            cwd: sandbox.worktreePath,
+            encoding: "utf-8",
+            stdio: ["pipe", "pipe", "pipe"],
+          }).trim();
+        } catch {
+          /* empty */
+        }
         break;
       }
       if (nonLowFindings.length > 0) {

From 5ef372560d69cf755efc8484fc98d80d270a01d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 18:56:46 +0200
Subject: [PATCH 26/31] fix: recount totalCommits from git after best-state
 reset (semantic correctness)

---
 .sandcastle/refinement-loop.ts | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.sandcastle/refinement-loop.ts b/.sandcastle/refinement-loop.ts
index 0159152..0589ebc 100644
--- a/.sandcastle/refinement-loop.ts
+++ b/.sandcastle/refinement-loop.ts
@@ -180,6 +180,11 @@ export async function runRefinementLoop(
         cwd: sandbox.worktreePath,
         stdio: "pipe",
       });
+      const commitCount = execFileSync("git", ["rev-list", "--count", "main..HEAD"], {
+        cwd: sandbox.worktreePath,
+        encoding: "utf-8",
+      }).trim();
+      totalCommits = parseInt(commitCount, 10) || 0;
     } catch {
       /* empty */
     }

From 177ecbd7214590791126fe1ab2dd3a6e41f02c99 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 20:19:27 +0200
Subject: [PATCH 27/31] refactor(.sandcastle): address all 45 quality audit
 findings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- New constants.ts: shared constants (VALIDATION_COMMAND, timeouts, model names) + utilities (getHeadSha, toErrorMessage)
- refinement-loop.ts: decompose runRefinementLoop (CC 17→≤10), RoundContext/HashInput param objects, computeFindingKey rename
- finalizer.ts: add timeouts to all execFileSync, use runValidation helper consistently
- task-source.ts: add timeout, replace char loop with regex, fix terse names
- main.ts: extract withTimeout helper, use model constants
- types.ts: unexport FindingsSchema (internal only)
---
 .sandcastle/constants.ts       |  61 ++++++
 .sandcastle/finalizer.ts       |  64 ++++--
 .sandcastle/main.ts            |  38 ++--
 .sandcastle/refinement-loop.ts | 388 +++++++++++++++++++++------------
 .sandcastle/task-source.ts     |  47 ++--
 .sandcastle/types.ts           |  23 +-
 6 files changed, 417 insertions(+), 204 deletions(-)
 create mode 100644 .sandcastle/constants.ts

diff --git a/.sandcastle/constants.ts b/.sandcastle/constants.ts
new file mode 100644
index 0000000..23374bb
--- /dev/null
+++ b/.sandcastle/constants.ts
@@ -0,0 +1,61 @@
+import { execFileSync } from "node:child_process";
+
+/** Model identifier used for implementation and critic agents. */
+export const AGENT_MODEL = "github-copilot/claude-sonnet-4.6";
+
+/** Number of context lines around a diff hunk used for hash computation. */
+export const CONTEXT_HASH_RADIUS = 3;
+
+/** Timeout in milliseconds for git operations. */
+export const GIT_TIMEOUT_MS = 30_000;
+
+/** Number of characters to retain from a SHA for display purposes. */
+export const HASH_PREFIX_LENGTH = 16;
+
+/** Maximum number of characters captured from stderr before truncation. */
+export const MAX_STDERR_CHARS = 500;
+
+/** Maximum number of characters allowed in a PR or commit title. */
+export const MAX_TITLE_LENGTH = 200;
+
+/** Model identifier used for planning and orchestration agents. */
+export const PLANNER_MODEL = "github-copilot/claude-opus-4.6";
+
+/** Timeout in milliseconds for git push operations. */
+export const PUSH_TIMEOUT_MS = 60_000;
+
+/** Timeout in milliseconds for a single sandcastle task execution. */
+export const TASK_TIMEOUT_MS = 15 * 60 * 1000;
+
+/** Full validation command run after each implementation round. */
+export const VALIDATION_COMMAND =
+  "npm run type-check && npm run test && npm run test:node && npm run test:edge && npm run prettier-check && npm run lint && npm run build && npm run check-build && npm run build:v2 && npm run check-build:v2";
+
+/** Timeout in milliseconds for the validation command. */
+export const VALIDATION_TIMEOUT_MS = 120_000;
+
+/**
+ * Returns the current HEAD commit SHA for the given working directory.
+ * @param cwd - Absolute path to the git repository root.
+ * @returns The full SHA string, or `null` if the command fails.
+ */
+export function getHeadSha(cwd: string): null | string {
+  try {
+    return execFileSync("git", ["rev-parse", "HEAD"], {
+      cwd,
+      encoding: "utf-8",
+      stdio: ["pipe", "pipe", "pipe"],
+    }).trim();
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Converts an unknown thrown value to a human-readable error message.
+ * @param err - The caught value (may be an `Error` or any other type).
+ * @returns The `message` property if `err` is an `Error`, otherwise `String(err)`.
+ */
+export function toErrorMessage(err: unknown): string {
+  return err instanceof Error ? err.message : String(err);
+}
diff --git a/.sandcastle/finalizer.ts b/.sandcastle/finalizer.ts
index 1b5aa42..ab949d3 100644
--- a/.sandcastle/finalizer.ts
+++ b/.sandcastle/finalizer.ts
@@ -4,11 +4,17 @@ import crypto from "node:crypto";
 
 import type { FinalizeResult, LoopResult, SandboxInstance, TaskSpec } from "./types.js";
 
+import {
+  AGENT_MODEL,
+  GIT_TIMEOUT_MS,
+  MAX_STDERR_CHARS,
+  PUSH_TIMEOUT_MS,
+  toErrorMessage,
+  VALIDATION_COMMAND,
+  VALIDATION_TIMEOUT_MS,
+} from "./constants.js";
 import { ITERATION_BUDGET_PER_ROUND, MAX_CRITIC_ROUNDS } from "./types.js";
 
-const VALIDATION_COMMAND =
-  "npm run type-check && npm run test && npm run test:node && npm run test:edge && npm run prettier-check && npm run lint && npm run build && npm run check-build && npm run build:v2 && npm run check-build:v2";
-
 /**
  * Finalizes a task after the refinement loop: validates, retries if needed, rebases, pushes, and creates a PR.
  * @param spec - The task specification.
@@ -34,7 +40,7 @@ export async function finalizeTask(
 
     try {
       await sandbox.run({
-        agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
+        agent: sandcastle.opencode(AGENT_MODEL),
         maxIterations: retryBudget,
         name: `Implementer #${spec.id} retry`,
         promptArgs: {
@@ -50,14 +56,18 @@ export async function finalizeTask(
         promptFile: "./.sandcastle/implement-prompt.md",
       });
     } catch (retryErr: unknown) {
-      const retryMsg = retryErr instanceof Error ? retryErr.message : String(retryErr);
+      const retryMsg = toErrorMessage(retryErr);
       console.warn(
         `  #${spec.id}: Implementer retry threw: ${retryMsg}. Falling through to PR creation.`,
       );
     }
 
     try {
-      execFileSync("sh", ["-c", VALIDATION_COMMAND], { cwd, stdio: "pipe" });
+      execFileSync("sh", ["-c", VALIDATION_COMMAND], {
+        cwd,
+        stdio: "pipe",
+        timeout: VALIDATION_TIMEOUT_MS,
+      });
       validationPassed = true;
       console.log(`  #${spec.id}: Validation passed after retry round.`);
     } catch {
@@ -66,10 +76,14 @@ export async function finalizeTask(
   }
 
   // Rebase on latest main
-  const rebaseSucceeded = attemptRebase(cwd, spec);
+  const rebaseSucceeded = attemptRebase(cwd);
   if (rebaseSucceeded && validationPassed) {
     try {
-      execFileSync("sh", ["-c", VALIDATION_COMMAND], { cwd, stdio: "pipe" });
+      execFileSync("sh", ["-c", VALIDATION_COMMAND], {
+        cwd,
+        stdio: "pipe",
+        timeout: VALIDATION_TIMEOUT_MS,
+      });
     } catch (postRebaseErr: unknown) {
       const postRebaseStderr = extractStderr(postRebaseErr);
       console.warn(
@@ -94,7 +108,7 @@ export async function finalizeTask(
     console.log(`  #${spec.id}: PR created${isDraft ? " (draft)" : ""}.`);
     prCreated = true;
   } catch (err: unknown) {
-    const msg = err instanceof Error ? err.message : String(err);
+    const msg = toErrorMessage(err);
     console.error(`  #${spec.id}: PR creation failed: ${msg}`);
   }
 
@@ -105,13 +119,16 @@ export async function finalizeTask(
  * Fetches origin/main and rebases the current branch onto it.
  * On failure, aborts the rebase cleanly.
  * @param cwd - Working directory (worktree path).
- * @param _spec - Task specification (unused, reserved for future logging).
  * @returns `true` if rebase succeeded, `false` otherwise.
  */
-function attemptRebase(cwd: string, _spec: TaskSpec): boolean {
+function attemptRebase(cwd: string): boolean {
   try {
-    execFileSync("git", ["fetch", "origin", "main"], { cwd, stdio: "pipe" });
-    execFileSync("git", ["rebase", "origin/main"], { cwd, stdio: "pipe" });
+    execFileSync("git", ["fetch", "origin", "main"], {
+      cwd,
+      stdio: "pipe",
+      timeout: GIT_TIMEOUT_MS,
+    });
+    execFileSync("git", ["rebase", "origin/main"], { cwd, stdio: "pipe", timeout: GIT_TIMEOUT_MS });
     return true;
   } catch {
     try {
@@ -189,7 +206,7 @@ function buildPrArgs(
  */
 function extractStderr(err: unknown): string {
   return err instanceof Error && "stderr" in err
-    ? String((err as { stderr: unknown }).stderr).slice(0, 500)
+    ? String((err as { stderr: unknown }).stderr).slice(0, MAX_STDERR_CHARS)
     : "";
 }
 
@@ -204,15 +221,20 @@ function extractStderr(err: unknown): string {
 function pushBranch(cwd: string, spec: TaskSpec, rebaseSucceeded: boolean): boolean {
   if (rebaseSucceeded) {
     try {
-      execFileSync("git", ["push", "--force-with-lease"], { cwd, stdio: "pipe" });
+      execFileSync("git", ["push", "--force-with-lease"], {
+        cwd,
+        stdio: "pipe",
+        timeout: PUSH_TIMEOUT_MS,
+      });
       return true;
     } catch (pushErr: unknown) {
-      const pushMsg = pushErr instanceof Error ? pushErr.message : String(pushErr);
+      const pushMsg = toErrorMessage(pushErr);
       try {
         const suffix = crypto.randomBytes(4).toString("hex");
         execFileSync("git", ["push", "origin", `HEAD:refs/heads/rescue/${spec.branch}-${suffix}`], {
           cwd,
           stdio: "pipe",
+          timeout: PUSH_TIMEOUT_MS,
         });
         console.warn(
           `  #${spec.id}: Push failed. Commits preserved at rescue/${spec.branch}-${suffix}`,
@@ -226,10 +248,10 @@ function pushBranch(cwd: string, spec: TaskSpec, rebaseSucceeded: boolean): bool
     }
   } else {
     try {
-      execFileSync("git", ["push"], { cwd, stdio: "pipe" });
+      execFileSync("git", ["push"], { cwd, stdio: "pipe", timeout: PUSH_TIMEOUT_MS });
       return true;
     } catch (pushErr: unknown) {
-      const pushMsg = pushErr instanceof Error ? pushErr.message : String(pushErr);
+      const pushMsg = toErrorMessage(pushErr);
       console.warn(`  #${spec.id}: git push failed after rebase abort: ${pushMsg}`);
       return false;
     }
@@ -244,7 +266,11 @@ function pushBranch(cwd: string, spec: TaskSpec, rebaseSucceeded: boolean): bool
  */
 function runValidation(cwd: string, spec: TaskSpec): boolean {
   try {
-    execFileSync("sh", ["-c", VALIDATION_COMMAND], { cwd, stdio: "pipe" });
+    execFileSync("sh", ["-c", VALIDATION_COMMAND], {
+      cwd,
+      stdio: "pipe",
+      timeout: VALIDATION_TIMEOUT_MS,
+    });
     return true;
   } catch (err: unknown) {
     const stderr = extractStderr(err);
diff --git a/.sandcastle/main.ts b/.sandcastle/main.ts
index 3ca2d55..74122d9 100644
--- a/.sandcastle/main.ts
+++ b/.sandcastle/main.ts
@@ -2,6 +2,7 @@ import * as sandcastle from "@ai-hero/sandcastle";
 import { docker } from "@ai-hero/sandcastle/sandboxes/docker";
 
 import { ConcurrencyPool } from "./concurrency-pool.js";
+import { TASK_TIMEOUT_MS } from "./constants.js";
 import { finalizeTask } from "./finalizer.js";
 import { runRefinementLoop } from "./refinement-loop.js";
 import { GithubIssueSource } from "./task-source.js";
@@ -11,7 +12,25 @@ const BRANCH_PREFIX = "agent/issue";
 const ISSUE_LABEL = "sandcastle";
 const MAX_PARALLEL = 3;
 const DOCKER_IMAGE = "sandcastle-sap-ai";
-const TASK_TIMEOUT_MS = 15 * 60 * 1000;
+
+/**
+ * Races a promise against a timeout, rejecting with a descriptive error if the timeout fires first.
+ * @param promise - The promise to race against the timeout.
+ * @param ms - Timeout duration in milliseconds.
+ * @param label - Human-readable label used in the timeout error message.
+ * @returns The resolved value of the promise if it completes before the timeout.
+ */
+function withTimeout<T>(promise: Promise<T>, ms: number, label: string): Promise<T> {
+  const timeoutPromise = new Promise<never>((_, reject) => {
+    setTimeout(() => {
+      reject(new Error(`${label} timed out after ${String(ms)}ms`));
+    }, ms).unref();
+  });
+  timeoutPromise.catch(() => {
+    /* suppress unhandled rejection when task completes before timeout */
+  });
+  return Promise.race([promise, timeoutPromise]);
+}
 
 const source = new GithubIssueSource({
   branchPrefix: BRANCH_PREFIX,
@@ -29,7 +48,7 @@ if (tasks.length === 0) {
   const settled = await Promise.allSettled(
     tasks.map((spec) =>
       pool.run(() =>
-        Promise.race([
+        withTimeout(
           (async () => {
             await using sandbox = await sandcastle.createSandbox({
               branch: spec.branch,
@@ -54,18 +73,9 @@ if (tasks.length === 0) {
 
             return { prCreated, spec };
           })(),
-          (() => {
-            const p = new Promise<never>((_, reject) => {
-              setTimeout(() => {
-                reject(new Error(`Task #${spec.id} timed out after ${String(TASK_TIMEOUT_MS)}ms`));
-              }, TASK_TIMEOUT_MS).unref();
-            });
-            p.catch(() => {
-              /* suppress unhandled rejection when task completes before timeout */
-            });
-            return p;
-          })(),
-        ]),
+          TASK_TIMEOUT_MS,
+          `Task #${spec.id}`,
+        ),
       ),
     ),
   );
diff --git a/.sandcastle/refinement-loop.ts b/.sandcastle/refinement-loop.ts
index 0589ebc..ddfc966 100644
--- a/.sandcastle/refinement-loop.ts
+++ b/.sandcastle/refinement-loop.ts
@@ -6,11 +6,9 @@ import { join, sep } from "node:path";
 
 import type { Finding, LoopResult, LoopStatus, SandboxInstance, TaskSpec } from "./types.js";
 
+import { VALIDATION_COMMAND } from "./constants.js";
 import { ITERATION_BUDGET_PER_ROUND, MAX_CRITIC_ROUNDS, parseFindingsSafe } from "./types.js";
 
-const VALIDATION_COMMAND =
-  "npm run type-check && npm run test && npm run test:node && npm run test:edge && npm run prettier-check && npm run lint && npm run build && npm run check-build && npm run build:v2 && npm run check-build:v2";
-
 /** Options for configuring the refinement loop. */
 export interface RefinementLoopOptions {
   /** Budget of iterations per round (flat constant applied to every round). */
@@ -21,6 +19,53 @@ export interface RefinementLoopOptions {
   onRoundComplete?: (round: number, findings: Finding[]) => void;
 }
 
+/** Result of a convergence check. */
+interface ConvergenceResult {
+  /** Best SHA to restore (empty string = no update). */
+  bestSha: string;
+  /** Updated last findings. */
+  lastFindings: Finding[];
+  /** New loop status. */
+  status: LoopStatus;
+}
+
+/**
+ * Input descriptor for hashing a window of source lines around a finding.
+ */
+interface HashInput {
+  /** Working directory (worktree path) for resolving the file. */
+  readonly cwd: string;
+  /** Relative file path of the finding. */
+  readonly file: string;
+  /** Line number of the finding (1-indexed). */
+  readonly line: number;
+}
+
+/**
+ * Context passed to the quality ratchet check.
+ * Groups the per-round identifiers needed for regression detection and rollback.
+ */
+interface RatchetContext {
+  /** SHA of HEAD before the implementer ran (used for rollback). */
+  readonly beforeSha: string;
+  /** Working directory for git operations. */
+  readonly cwd: string;
+  /** Current round number (1-indexed). */
+  readonly round: number;
+  /** The task specification. */
+  readonly spec: TaskSpec;
+}
+
+/** Resolved loop options with defaults applied. */
+interface ResolvedLoopOptions {
+  /** Iteration budget per round. */
+  budget: number;
+  /** Maximum number of rounds. */
+  maxRounds: number;
+  /** Optional round-complete callback (no-op if not provided). */
+  onRoundComplete: (round: number, findings: Finding[]) => void;
+}
+
 /** Result of a single implement↔critic round. */
 interface RoundResult {
   /** SHA of HEAD before the implementer ran. */
@@ -43,8 +88,7 @@ export async function runRefinementLoop(
   sandbox: SandboxInstance,
   opts?: RefinementLoopOptions,
 ): Promise<LoopResult> {
-  const maxRounds = opts?.maxRounds ?? MAX_CRITIC_ROUNDS;
-  const budget = opts?.iterationBudget ?? ITERATION_BUDGET_PER_ROUND;
+  const { budget, maxRounds, onRoundComplete } = resolveLoopOptions(opts);
 
   const seenKeys = new Set<string>();
   let lastFindings: Finding[] = [];
@@ -64,153 +108,162 @@ export async function runRefinementLoop(
 
     const result = await executeRound(spec, sandbox, round, budget, lastFindings);
 
-    if (round === 1 && result.commits === 0) {
-      console.warn(`  #${spec.id}: 0 commits on round 1. Skipping.`);
-      status = "skipped";
+    const earlyExit = checkEarlyExit(spec, round, result, totalCommits);
+    if (earlyExit !== null) {
+      totalCommits = earlyExit.totalCommits;
+      status = earlyExit.status;
       break;
     }
 
-    if (result.findings === null) {
-      totalCommits += result.commits;
-      console.warn(`  #${spec.id}: Critic failed twice. Breaking (non-converged).`);
-      status = "failed";
-      break;
-    }
+    if (result.findings === null) break;
+    const findings: Finding[] = result.findings;
 
-    if (round > 1 && result.commits === 0) {
-      status = "exhausted";
+    if (result.commits > 0 && runMidLoopValidation(sandbox.worktreePath)) {
+      totalCommits += result.commits;
+      status = "converged";
       break;
     }
 
-    // Fix 1: Validation in-loop (ARCS pattern) — deterministic convergence signal
-    if (result.commits > 0) {
-      try {
-        execFileSync("sh", ["-c", VALIDATION_COMMAND], {
-          cwd: sandbox.worktreePath,
-          stdio: "pipe",
-          timeout: 120_000,
-        });
-        // Validation passed mid-loop — deterministic convergence
-        totalCommits += result.commits;
-        status = "converged";
-        break;
-      } catch {
-        // Validation failed — continue to critic for feedback
-      }
-    }
-
-    // Dedup via context hash
     const cwd = sandbox.worktreePath;
-    const newFindings = deduplicateFindings(result.findings, seenKeys, cwd);
+    const newFindings = deduplicateFindings(findings, seenKeys, cwd);
 
     console.log(
-      `  #${spec.id}: ${String(result.findings.length)} findings, ${String(newFindings.length)} new`,
+      `  #${spec.id}: ${String(findings.length)} findings, ${String(newFindings.length)} new`,
     );
 
-    // Quality ratchet: rollback if findings increased (regression)
-    const nonLowFindings = result.findings.filter((f) => f.confidence !== "LOW");
+    const nonLowFindings = findings.filter((f) => f.confidence !== "LOW");
     if (
       checkQualityRatchet(
-        spec,
-        round,
+        { beforeSha: result.beforeSha, cwd, round, spec },
         nonLowFindings.length,
         previousFindingsCount,
-        result.beforeSha,
-        cwd,
       )
     ) {
       status = "exhausted";
       break;
     }
 
-    // Best-state checkpoint (SWE-Agent pattern) — after ratchet passes
     if (newFindings.length < bestFindingsCount) {
       bestFindingsCount = newFindings.length;
-      try {
-        bestSha = execFileSync("git", ["rev-parse", "HEAD"], {
-          cwd: sandbox.worktreePath,
-          encoding: "utf-8",
-          stdio: ["pipe", "pipe", "pipe"],
-        }).trim();
-      } catch {
-        /* empty */
-      }
+      bestSha = captureHeadSha(cwd);
     }
 
     totalCommits += result.commits;
     previousFindingsCount = nonLowFindings.length;
+    onRoundComplete(round, findings);
 
-    opts?.onRoundComplete?.(round, result.findings);
-
-    if (newFindings.length === 0) {
-      // Severity-weighted convergence (OpenHands pattern):
-      // Don't converge if CRITICAL/HIGH findings persist, even if already seen
-      const criticalPersistent = result.findings.filter(
-        (f) => (f.severity === "CRITICAL" || f.severity === "HIGH") && f.confidence !== "LOW",
-      );
-      if (criticalPersistent.length > 0) {
-        lastFindings = criticalPersistent;
-        status = "exhausted";
-        // Capture current HEAD so post-loop reset is a no-op (code matches findings)
-        try {
-          bestSha = execFileSync("git", ["rev-parse", "HEAD"], {
-            cwd: sandbox.worktreePath,
-            encoding: "utf-8",
-            stdio: ["pipe", "pipe", "pipe"],
-          }).trim();
-        } catch {
-          /* empty */
-        }
-        break;
-      }
-      if (nonLowFindings.length > 0) {
-        lastFindings = nonLowFindings;
-      }
-      status = "converged";
+    const convergenceResult = checkConvergence(cwd, findings, newFindings, nonLowFindings);
+    if (convergenceResult !== null) {
+      lastFindings = convergenceResult.lastFindings;
+      status = convergenceResult.status;
+      bestSha = convergenceResult.bestSha;
       break;
     }
 
     lastFindings = newFindings;
   }
 
-  // Best-state reset: if not converged, restore best intermediate state
-  if (status !== "converged" && /^[0-9a-f]{40}$/.test(bestSha)) {
-    try {
-      execFileSync("git", ["reset", "--hard", bestSha], {
-        cwd: sandbox.worktreePath,
-        stdio: "pipe",
-      });
-      const commitCount = execFileSync("git", ["rev-list", "--count", "main..HEAD"], {
-        cwd: sandbox.worktreePath,
-        encoding: "utf-8",
-      }).trim();
-      totalCommits = parseInt(commitCount, 10) || 0;
-    } catch {
-      /* empty */
-    }
+  if (shouldResetToBest(status, bestSha)) {
+    totalCommits = resetToBestState(sandbox.worktreePath, bestSha, totalCommits);
   }
 
   return { lastFindings, roundsCompleted, status, totalCommits };
 }
 
 /**
- * Checks whether findings regressed compared to the previous round and rolls back if so.
+ * Captures the current HEAD SHA, returning empty string on failure.
+ * @param cwd - Working directory for git operations.
+ * @returns The HEAD SHA or empty string.
+ */
+function captureHeadSha(cwd: string): string {
+  try {
+    return execFileSync("git", ["rev-parse", "HEAD"], {
+      cwd,
+      encoding: "utf-8",
+      stdio: ["pipe", "pipe", "pipe"],
+    }).trim();
+  } catch {
+    return "";
+  }
+}
+
+/**
+ * Checks whether the current round converged (no new findings).
+ * @param cwd - Working directory for git operations.
+ * @param allFindings - All findings from the critic.
+ * @param newFindings - Deduplicated new findings.
+ * @param nonLowFindings - Non-LOW-confidence findings.
+ * @returns A ConvergenceResult if the loop should break, or null to continue.
+ */
+function checkConvergence(
+  cwd: string,
+  allFindings: Finding[],
+  newFindings: Finding[],
+  nonLowFindings: Finding[],
+): ConvergenceResult | null {
+  if (newFindings.length !== 0) return null;
+
+  // Severity-weighted convergence (OpenHands pattern):
+  // Don't converge if CRITICAL/HIGH findings persist, even if already seen
+  const criticalPersistent = allFindings.filter(
+    (f) => (f.severity === "CRITICAL" || f.severity === "HIGH") && f.confidence !== "LOW",
+  );
+  if (criticalPersistent.length > 0) {
+    // Capture current HEAD so post-loop reset is a no-op (code matches findings)
+    return {
+      bestSha: captureHeadSha(cwd),
+      lastFindings: criticalPersistent,
+      status: "exhausted",
+    };
+  }
+
+  return {
+    bestSha: "",
+    lastFindings: nonLowFindings.length > 0 ? nonLowFindings : [],
+    status: "converged",
+  };
+}
+
+/**
+ * Checks whether the round result warrants an early exit from the loop.
  * @param spec - The task specification.
  * @param round - Current round number.
+ * @param result - The round result.
+ * @param totalCommits - Running total of commits before this round.
+ * @returns An object with updated status and totalCommits if early exit, or null to continue.
+ */
+function checkEarlyExit(
+  spec: TaskSpec,
+  round: number,
+  result: RoundResult,
+  totalCommits: number,
+): null | { status: LoopStatus; totalCommits: number } {
+  if (round === 1 && result.commits === 0) {
+    console.warn(`  #${spec.id}: 0 commits on round 1. Skipping.`);
+    return { status: "skipped", totalCommits };
+  }
+  if (result.findings === null) {
+    console.warn(`  #${spec.id}: Critic failed twice. Breaking (non-converged).`);
+    return { status: "failed", totalCommits: totalCommits + result.commits };
+  }
+  if (round > 1 && result.commits === 0) {
+    return { status: "exhausted", totalCommits };
+  }
+  return null;
+}
+
+/**
+ * @param ctx - Ratchet context containing spec, round, beforeSha, and cwd.
  * @param findingsCount - Number of non-LOW findings this round.
  * @param previousCount - Number of non-LOW findings from the previous round.
- * @param beforeSha - SHA to reset to on regression.
- * @param cwd - Working directory for git operations.
  * @returns True if a regression was detected and rollback performed.
  */
 function checkQualityRatchet(
-  spec: TaskSpec,
-  round: number,
+  ctx: RatchetContext,
   findingsCount: number,
   previousCount: number,
-  beforeSha: string,
-  cwd: string,
 ): boolean {
+  const { beforeSha, cwd, round, spec } = ctx;
   if (round <= 2 || findingsCount <= previousCount) {
     return false;
   }
@@ -236,6 +289,31 @@ function checkQualityRatchet(
   return true;
 }
 
+/**
+ * Computes a deduplication key for a finding using a context hash of surrounding lines.
+ * @param f - Finding to compute a key for.
+ * @param cwd - Working directory (worktree path) for reading file context.
+ * @param fileCache - Optional cache of file contents keyed by resolved path.
+ * @returns Composite dedup key.
+ */
+function computeFindingKey(f: Finding, cwd: string, fileCache?: Map<string, string>): string {
+  if (!f.file || f.line == null) {
+    const normalizedTitle = f.title
+      .toLowerCase()
+      .replace(/[^\w\s]/g, "")
+      .replace(/\s+/g, " ")
+      .trim();
+    const titleHash = crypto
+      .createHash("sha256")
+      .update(normalizedTitle)
+      .digest("hex")
+      .slice(0, 16);
+    return `${f.file || "global"}::${f.category}::${titleHash}`;
+  }
+  const contextHash = hashContextLines({ cwd, file: f.file, line: f.line }, 3, fileCache);
+  return `${f.file}::${f.category}::${contextHash}`;
+}
+
 /**
  * Filters findings by confidence and deduplicates against previously seen keys.
  * @param findings - Raw findings from the critic.
@@ -246,10 +324,10 @@ function checkQualityRatchet(
 function deduplicateFindings(findings: Finding[], seenKeys: Set<string>, cwd: string): Finding[] {
   const fileCache = new Map<string, string>();
   const newFindings = findings.filter(
-    (f) => f.confidence !== "LOW" && !seenKeys.has(findingKey(f, cwd, fileCache)),
+    (f) => f.confidence !== "LOW" && !seenKeys.has(computeFindingKey(f, cwd, fileCache)),
   );
   for (const f of newFindings) {
-    seenKeys.add(findingKey(f, cwd, fileCache));
+    seenKeys.add(computeFindingKey(f, cwd, fileCache));
   }
   return newFindings;
 }
@@ -285,9 +363,9 @@ async function executeRound(
   }
 
   // Implementer
-  let impl: Awaited<ReturnType<typeof sandbox.run>>;
+  let implementerResult: Awaited<ReturnType<typeof sandbox.run>>;
   try {
-    impl = await sandbox.run({
+    implementerResult = await sandbox.run({
       agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
       maxIterations: budget,
       name: `Implementer #${spec.id} R${String(round)}`,
@@ -317,50 +395,22 @@ async function executeRound(
     findings = null;
   }
 
-  return { beforeSha, commits: impl.commits.length, findings };
-}
-
-/**
- * Computes a deduplication key for a finding using a context hash of surrounding lines.
- * @param f - Finding to compute a key for.
- * @param cwd - Working directory (worktree path) for reading file context.
- * @param fileCache - Optional cache of file contents keyed by resolved path.
- * @returns Composite dedup key.
- */
-function findingKey(f: Finding, cwd: string, fileCache?: Map<string, string>): string {
-  if (!f.file || f.line == null) {
-    const normalizedTitle = f.title
-      .toLowerCase()
-      .replace(/[^\w\s]/g, "")
-      .replace(/\s+/g, " ")
-      .trim();
-    const titleHash = crypto
-      .createHash("sha256")
-      .update(normalizedTitle)
-      .digest("hex")
-      .slice(0, 16);
-    return `${f.file || "global"}::${f.category}::${titleHash}`;
-  }
-  const contextHash = hashContextLines(cwd, f.file, f.line, 3, fileCache);
-  return `${f.file}::${f.category}::${contextHash}`;
+  return { beforeSha, commits: implementerResult.commits.length, findings };
 }
 
 /**
  * Hashes a window of lines around the finding for dedup stability.
- * @param cwd - Working directory.
- * @param file - Relative file path.
- * @param line - Line number of the finding.
+ * @param input - Hash input containing cwd, file, and line.
  * @param radius - Number of lines above/below to include in the context window.
  * @param fileCache - Optional cache of file contents keyed by resolved path.
  * @returns Truncated SHA-256 hex digest.
  */
 function hashContextLines(
-  cwd: string,
-  file: string,
-  line: number,
+  input: HashInput,
   radius: number,
   fileCache?: Map<string, string>,
 ): string {
+  const { cwd, file, line } = input;
   try {
     const fullPath = realpathSync(join(cwd, file));
     if (!fullPath.startsWith(realpathSync(cwd) + sep)) {
@@ -419,6 +469,42 @@ function parseFindings(stdout: string, nonce: string): Finding[] | null {
   return null;
 }
 
+/**
+ * Resets the worktree to the best intermediate state and recounts commits.
+ * @param cwd - Working directory for git operations.
+ * @param bestSha - The SHA to reset to.
+ * @param currentCommits - Current total commits (fallback if recount fails).
+ * @returns Updated total commit count.
+ */
+function resetToBestState(cwd: string, bestSha: string, currentCommits: number): number {
+  try {
+    execFileSync("git", ["reset", "--hard", bestSha], {
+      cwd,
+      stdio: "pipe",
+    });
+    const commitCount = execFileSync("git", ["rev-list", "--count", "main..HEAD"], {
+      cwd,
+      encoding: "utf-8",
+    }).trim();
+    return parseInt(commitCount, 10) || 0;
+  } catch {
+    return currentCommits;
+  }
+}
+
+/**
+ * Resolves loop options, applying defaults for missing values.
+ * @param opts - Optional loop options.
+ * @returns Resolved options with all fields populated.
+ */
+function resolveLoopOptions(opts: RefinementLoopOptions | undefined): ResolvedLoopOptions {
+  return {
+    budget: opts?.iterationBudget ?? ITERATION_BUDGET_PER_ROUND,
+    maxRounds: opts?.maxRounds ?? MAX_CRITIC_ROUNDS,
+    onRoundComplete: opts?.onRoundComplete ?? (() => undefined),
+  };
+}
+
 /**
  * Runs the critic agent, retrying once on parse failure.
  * @param sandbox - The sandcastle sandbox instance.
@@ -463,3 +549,31 @@ async function runCritic(
 
   return findings;
 }
+
+/**
+ * Runs the mid-loop validation command (ARCS pattern).
+ * @param cwd - Working directory for the validation command.
+ * @returns True if validation passed (deterministic convergence), false otherwise.
+ */
+function runMidLoopValidation(cwd: string): boolean {
+  try {
+    execFileSync("sh", ["-c", VALIDATION_COMMAND], {
+      cwd,
+      stdio: "pipe",
+      timeout: 120_000,
+    });
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Returns true if the best-state reset should be applied after the loop.
+ * @param status - Final loop status.
+ * @param bestSha - Best intermediate SHA (empty string if none captured).
+ * @returns True if reset should be applied.
+ */
+function shouldResetToBest(status: LoopStatus, bestSha: string): boolean {
+  return status !== "converged" && /^[0-9a-f]{40}$/.test(bestSha);
+}
diff --git a/.sandcastle/task-source.ts b/.sandcastle/task-source.ts
index 8773b0d..c9f5ffa 100644
--- a/.sandcastle/task-source.ts
+++ b/.sandcastle/task-source.ts
@@ -5,6 +5,8 @@ import { z } from "zod";
 
 import type { TaskSpec } from "./types.js";
 
+import { GIT_TIMEOUT_MS, MAX_TITLE_LENGTH, toErrorMessage } from "./constants.js";
+
 const RawIssueSchema = z.object({
   body: z
     .string()
@@ -137,26 +139,30 @@ export class GithubIssueSource implements TaskSource {
           "--label",
           this.label,
         ],
-        { encoding: "utf-8" },
+        { encoding: "utf-8", timeout: GIT_TIMEOUT_MS },
+      );
+    } catch (err) {
+      console.error(
+        `Failed to fetch issues: ${toErrorMessage(err)}. Ensure gh is installed and authenticated.`,
       );
-    } catch {
-      console.error("Failed to fetch issues. Ensure gh is installed and authenticated.");
       process.exit(1);
     }
 
     let rawIssues: z.infer<typeof RawIssuesSchema>;
     try {
       rawIssues = RawIssuesSchema.parse(JSON.parse(rawIssuesJson));
-    } catch {
-      console.error("Failed to parse issues JSON. Unexpected format from gh CLI.");
+    } catch (err) {
+      console.error(
+        `Failed to parse issues JSON: ${toErrorMessage(err)}. Unexpected format from gh CLI.`,
+      );
       process.exit(1);
     }
 
-    return rawIssues.map((i) => ({
-      body: sanitizeForPrompt(i.body),
-      labels: i.labels.map((l) => l.name),
-      number: i.number,
-      title: sanitizeForPrompt(i.title),
+    return rawIssues.map((issue) => ({
+      body: sanitizeForPrompt(issue.body),
+      labels: issue.labels.map((label) => label.name),
+      number: issue.number,
+      title: sanitizeForPrompt(issue.title),
     }));
   }
 
@@ -178,28 +184,27 @@ export class GithubIssueSource implements TaskSource {
           if (typeof item.branch !== "string" || !this.branchPattern.test(item.branch))
             return false;
           if (typeof item.title !== "string") return false;
-          if (item.title.length > 200) return false;
-          for (let ci = 0; ci < item.title.length; ci++) {
-            if (item.title.charCodeAt(ci) < 0x20) return false;
-          }
+          if (item.title.length > MAX_TITLE_LENGTH) return false;
+          // eslint-disable-next-line no-control-regex
+          if (/[\x00-\x1f]/.test(item.title)) return false;
           return true;
         },
       );
 
-      const issueMap = new Map(issuesJson.map((i) => [String(i.number), i]));
+      const issueMap = new Map(issuesJson.map((issue) => [String(issue.number), issue]));
       return validated
-        .map((v) => {
-          const source = issueMap.get(v.id);
+        .map((entry) => {
+          const source = issueMap.get(entry.id);
           if (!source) return null;
           return {
-            ...v,
+            ...entry,
             body: source.body,
             labels: source.labels,
           };
         })
-        .filter((v): v is NonNullable<typeof v> => v !== null);
-    } catch {
-      console.error("Planner produced invalid JSON. Retrying.");
+        .filter((entry): entry is NonNullable<typeof entry> => entry !== null);
+    } catch (err) {
+      console.error(`Planner produced invalid JSON: ${toErrorMessage(err)}. Retrying.`);
       return null;
     }
   }
diff --git a/.sandcastle/types.ts b/.sandcastle/types.ts
index 300df94..9ebc589 100644
--- a/.sandcastle/types.ts
+++ b/.sandcastle/types.ts
@@ -2,6 +2,16 @@ import type * as sandcastle from "@ai-hero/sandcastle";
 
 import { z } from "zod";
 
+/** Result of post-loop finalization. */
+export interface FinalizeResult {
+  /** Whether the PR was marked as draft. */
+  isDraft: boolean;
+  /** Whether a PR was successfully created. */
+  prCreated: boolean;
+  /** Whether validation passed. */
+  validationPassed: boolean;
+}
+
 /** Zod schema for a single critic finding. */
 export const FindingSchema = z.object({
   category: z.enum(["security", "logic", "performance", "architecture", "style"]),
@@ -14,19 +24,6 @@ export const FindingSchema = z.object({
   title: z.string(),
 });
 
-/** Zod schema for an array of critic findings. */
-export const FindingsSchema = z.array(FindingSchema);
-
-/** Result of post-loop finalization. */
-export interface FinalizeResult {
-  /** Whether the PR was marked as draft. */
-  isDraft: boolean;
-  /** Whether a PR was successfully created. */
-  prCreated: boolean;
-  /** Whether validation passed. */
-  validationPassed: boolean;
-}
-
 /** A single critic finding parsed from agent output. */
 export type Finding = z.infer<typeof FindingSchema>;
 

From 69ca4fa826a1320efaef48b5ae71d48ff20749c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 20:40:42 +0200
Subject: [PATCH 28/31] fix: use constants from constants.ts + add planner
 timeout (multi-agent audit findings)

---
 .sandcastle/refinement-loop.ts | 28 +++++++++++++++++++---------
 .sandcastle/task-source.ts     | 21 ++++++++++++++++++---
 2 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/.sandcastle/refinement-loop.ts b/.sandcastle/refinement-loop.ts
index ddfc966..bb04f2c 100644
--- a/.sandcastle/refinement-loop.ts
+++ b/.sandcastle/refinement-loop.ts
@@ -6,7 +6,13 @@ import { join, sep } from "node:path";
 
 import type { Finding, LoopResult, LoopStatus, SandboxInstance, TaskSpec } from "./types.js";
 
-import { VALIDATION_COMMAND } from "./constants.js";
+import {
+  AGENT_MODEL,
+  CONTEXT_HASH_RADIUS,
+  HASH_PREFIX_LENGTH,
+  VALIDATION_COMMAND,
+  VALIDATION_TIMEOUT_MS,
+} from "./constants.js";
 import { ITERATION_BUDGET_PER_ROUND, MAX_CRITIC_ROUNDS, parseFindingsSafe } from "./types.js";
 
 /** Options for configuring the refinement loop. */
@@ -307,10 +313,14 @@ function computeFindingKey(f: Finding, cwd: string, fileCache?: Map<string, stri
       .createHash("sha256")
       .update(normalizedTitle)
       .digest("hex")
-      .slice(0, 16);
+      .slice(0, HASH_PREFIX_LENGTH);
     return `${f.file || "global"}::${f.category}::${titleHash}`;
   }
-  const contextHash = hashContextLines({ cwd, file: f.file, line: f.line }, 3, fileCache);
+  const contextHash = hashContextLines(
+    { cwd, file: f.file, line: f.line },
+    CONTEXT_HASH_RADIUS,
+    fileCache,
+  );
   return `${f.file}::${f.category}::${contextHash}`;
 }
 
@@ -366,7 +376,7 @@ async function executeRound(
   let implementerResult: Awaited<ReturnType<typeof sandbox.run>>;
   try {
     implementerResult = await sandbox.run({
-      agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
+      agent: sandcastle.opencode(AGENT_MODEL),
       maxIterations: budget,
       name: `Implementer #${spec.id} R${String(round)}`,
       promptArgs: {
@@ -434,13 +444,13 @@ function hashContextLines(
       .createHash("sha256")
       .update(`${file}:${String(line)}:${normalized}`)
       .digest("hex")
-      .slice(0, 16);
+      .slice(0, HASH_PREFIX_LENGTH);
   } catch {
     return crypto
       .createHash("sha256")
       .update(`${file}:${String(line)}:fallback`)
       .digest("hex")
-      .slice(0, 16);
+      .slice(0, HASH_PREFIX_LENGTH);
   }
 }
 
@@ -520,7 +530,7 @@ async function runCritic(
   nonce: string,
 ): Promise<Finding[] | null> {
   let critic = await sandbox.run({
-    agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
+    agent: sandcastle.opencode(AGENT_MODEL),
     maxIterations: 1,
     name: `Critic #${spec.id} R${String(round)}`,
     promptArgs: {
@@ -535,7 +545,7 @@ async function runCritic(
   if (findings === null) {
     console.warn(`  #${spec.id}: Critic parse failed. Retrying.`);
     critic = await sandbox.run({
-      agent: sandcastle.opencode("github-copilot/claude-sonnet-4.6"),
+      agent: sandcastle.opencode(AGENT_MODEL),
       maxIterations: 1,
       name: `Critic #${spec.id} R${String(round)} retry`,
       promptArgs: {
@@ -560,7 +570,7 @@ function runMidLoopValidation(cwd: string): boolean {
     execFileSync("sh", ["-c", VALIDATION_COMMAND], {
       cwd,
       stdio: "pipe",
-      timeout: 120_000,
+      timeout: VALIDATION_TIMEOUT_MS,
     });
     return true;
   } catch {
diff --git a/.sandcastle/task-source.ts b/.sandcastle/task-source.ts
index c9f5ffa..1f31f32 100644
--- a/.sandcastle/task-source.ts
+++ b/.sandcastle/task-source.ts
@@ -5,7 +5,13 @@ import { z } from "zod";
 
 import type { TaskSpec } from "./types.js";
 
-import { GIT_TIMEOUT_MS, MAX_TITLE_LENGTH, toErrorMessage } from "./constants.js";
+import {
+  GIT_TIMEOUT_MS,
+  MAX_TITLE_LENGTH,
+  PLANNER_MODEL,
+  TASK_TIMEOUT_MS,
+  toErrorMessage,
+} from "./constants.js";
 
 const RawIssueSchema = z.object({
   body: z
@@ -74,8 +80,8 @@ export class GithubIssueSource implements TaskSource {
     for (let attempt = 1; attempt <= this.maxRetries; attempt++) {
       console.log(`\n=== Planner attempt ${String(attempt)}/${String(this.maxRetries)} ===\n`);
 
-      const plan = await sandcastle.run({
-        agent: sandcastle.opencode("github-copilot/claude-opus-4.6"),
+      const planPromise = sandcastle.run({
+        agent: sandcastle.opencode(PLANNER_MODEL),
         maxIterations: 1,
         name: "Planner",
         promptArgs: {
@@ -85,6 +91,15 @@ export class GithubIssueSource implements TaskSource {
         promptFile: "./.sandcastle/plan-prompt.md",
         sandbox: docker({ imageName: this.dockerImage }),
       });
+      const timeoutPromise = new Promise<never>((_, reject) => {
+        setTimeout(() => {
+          reject(new Error("Planner timed out"));
+        }, TASK_TIMEOUT_MS).unref();
+      });
+      timeoutPromise.catch(() => {
+        /* suppress unhandled rejection when planner completes before timeout */
+      });
+      const plan = await Promise.race([planPromise, timeoutPromise]);
 
       const planMatches = [...plan.stdout.matchAll(/<plan>([\s\S]*?)<\/plan>/g)];
       const planMatch = planMatches.at(-1);

From d8b3454a63090692f0d0b3eeb3665fc2f38982d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 21:51:06 +0200
Subject: [PATCH 29/31] =?UTF-8?q?refactor(.sandcastle):=20harden=20prompts?=
 =?UTF-8?q?=20=E2=80=94=20cap=20findings,=20add=20known=20decisions,=20sco?=
 =?UTF-8?q?pe=20preference?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .sandcastle/critic-prompt.md | 10 +++++++++-
 .sandcastle/plan-prompt.md   |  1 +
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/.sandcastle/critic-prompt.md b/.sandcastle/critic-prompt.md
index 98ec518..fb6a997 100644
--- a/.sandcastle/critic-prompt.md
+++ b/.sandcastle/critic-prompt.md
@@ -6,7 +6,7 @@ Analyze the implementation on branch `{{BRANCH}}` and produce structured finding
 
 Run `git diff main...{{BRANCH}}` to see all changes. Examine the diff carefully. For each issue found, produce a structured finding.
 
-Read `CONTRIBUTING.md` for the project's coding standards.
+Read `AGENTS.md` and `CONTRIBUTING.md` for the project's coding standards.
 
 ## Output Format
 
@@ -39,6 +39,8 @@ If no issues are found, output:
 
 ## Rules
 
+- Report ≤5 findings. HIGH and CRITICAL only. Omit LOW/MEDIUM unless zero higher-severity issues exist.
+- If >5 HIGH/CRITICAL issues exist, report the top 5 and add a summary note in the last finding's description.
 - Do NOT modify any files. Do NOT commit. Do NOT push.
 - Only report issues in the CHANGED code (not pre-existing issues).
 - Use HIGH confidence only when you've verified the issue by reading the relevant code.
@@ -47,6 +49,12 @@ If no issues are found, output:
 - Focus on: logic errors, missing edge cases, security issues, type safety violations, test gaps.
 - Do NOT report formatting issues (prettier handles those).
 
+## Known Design Decisions (do not flag)
+
+- Mid-loop validation convergence bypasses critic (ARCS pattern — deterministic tests > subjective review).
+- `process.exit()` at script end kills timed-out sandboxes (no cooperative cancellation available in sandcastle).
+- Content-addressed dedup hash includes line number (collision reduction tradeoff, bounded by hard cap).
+
 ## Completion
 
 After outputting the findings, output:
diff --git a/.sandcastle/plan-prompt.md b/.sandcastle/plan-prompt.md
index 5d34138..27be837 100644
--- a/.sandcastle/plan-prompt.md
+++ b/.sandcastle/plan-prompt.md
@@ -31,6 +31,7 @@ Read `AGENTS.md` for project conventions.
 
 - Exclude issues labeled `wontfix`, `duplicate`, or `question`.
 - Exclude issues that depend on another open issue (mention "blocked by #N" or similar).
+- Prefer issues where scope fits a single-file change over cross-cutting refactors.
 - If every issue is blocked, include the single highest-priority candidate (fewest/weakest dependencies).
 - If no actionable issues exist, output:
 

From 189f6555a143c83f3861a8f8a6c319218bea37c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 22:26:43 +0200
Subject: [PATCH 30/31] perf(.sandcastle): convert execFileSync to async
 execFileAsync (unblock event loop)

Replace all blocking execFileSync calls with util.promisify(execFile) to enable
true parallelism between tasks during subprocess execution.

- constants.ts: add execFileAsync export, convert getHeadSha to async
- refinement-loop.ts: captureHeadSha, checkQualityRatchet, checkConvergence,
  runMidLoopValidation, resetToBestState all async
- finalizer.ts: runValidation, attemptRebase, pushBranch all async
- task-source.ts: fetchAndSanitizeIssues async

readFileSync/realpathSync stay sync (<1ms local I/O, no benefit from async).
maxBuffer: 8MB added to validation and gh issue list calls.
---
 .sandcastle/constants.ts       | 15 +++++---
 .sandcastle/finalizer.ts       | 53 +++++++++++++-------------
 .sandcastle/refinement-loop.ts | 69 +++++++++++++++-------------------
 .sandcastle/task-source.ts     | 23 +++++++-----
 4 files changed, 79 insertions(+), 81 deletions(-)

diff --git a/.sandcastle/constants.ts b/.sandcastle/constants.ts
index 23374bb..c47bed5 100644
--- a/.sandcastle/constants.ts
+++ b/.sandcastle/constants.ts
@@ -1,4 +1,5 @@
-import { execFileSync } from "node:child_process";
+import { execFile } from "node:child_process";
+import util from "node:util";
 
 /** Model identifier used for implementation and critic agents. */
 export const AGENT_MODEL = "github-copilot/claude-sonnet-4.6";
@@ -6,6 +7,9 @@ export const AGENT_MODEL = "github-copilot/claude-sonnet-4.6";
 /** Number of context lines around a diff hunk used for hash computation. */
 export const CONTEXT_HASH_RADIUS = 3;
 
+/** Async execFile — does not block the event loop. Same error shape as execFileSync. */
+export const execFileAsync = util.promisify(execFile);
+
 /** Timeout in milliseconds for git operations. */
 export const GIT_TIMEOUT_MS = 30_000;
 
@@ -39,13 +43,12 @@ export const VALIDATION_TIMEOUT_MS = 120_000;
  * @param cwd - Absolute path to the git repository root.
  * @returns The full SHA string, or `null` if the command fails.
  */
-export function getHeadSha(cwd: string): null | string {
+export async function getHeadSha(cwd: string): Promise<null | string> {
   try {
-    return execFileSync("git", ["rev-parse", "HEAD"], {
+    const { stdout } = await execFileAsync("git", ["rev-parse", "HEAD"], {
       cwd,
-      encoding: "utf-8",
-      stdio: ["pipe", "pipe", "pipe"],
-    }).trim();
+    });
+    return stdout.trim();
   } catch {
     return null;
   }
diff --git a/.sandcastle/finalizer.ts b/.sandcastle/finalizer.ts
index ab949d3..a164565 100644
--- a/.sandcastle/finalizer.ts
+++ b/.sandcastle/finalizer.ts
@@ -1,11 +1,11 @@
 import * as sandcastle from "@ai-hero/sandcastle";
-import { execFileSync } from "node:child_process";
 import crypto from "node:crypto";
 
 import type { FinalizeResult, LoopResult, SandboxInstance, TaskSpec } from "./types.js";
 
 import {
   AGENT_MODEL,
+  execFileAsync,
   GIT_TIMEOUT_MS,
   MAX_STDERR_CHARS,
   PUSH_TIMEOUT_MS,
@@ -29,7 +29,7 @@ export async function finalizeTask(
   sandbox: SandboxInstance,
   cwd: string,
 ): Promise<FinalizeResult> {
-  let validationPassed = runValidation(cwd, spec);
+  let validationPassed = await runValidation(cwd, spec);
 
   // Retry one more round if validation failed and budget remains
   if (!validationPassed && loopResult.roundsCompleted < MAX_CRITIC_ROUNDS) {
@@ -63,9 +63,9 @@ export async function finalizeTask(
     }
 
     try {
-      execFileSync("sh", ["-c", VALIDATION_COMMAND], {
+      await execFileAsync("sh", ["-c", VALIDATION_COMMAND], {
         cwd,
-        stdio: "pipe",
+        maxBuffer: 8 * 1024 * 1024,
         timeout: VALIDATION_TIMEOUT_MS,
       });
       validationPassed = true;
@@ -76,12 +76,12 @@ export async function finalizeTask(
   }
 
   // Rebase on latest main
-  const rebaseSucceeded = attemptRebase(cwd);
+  const rebaseSucceeded = await attemptRebase(cwd);
   if (rebaseSucceeded && validationPassed) {
     try {
-      execFileSync("sh", ["-c", VALIDATION_COMMAND], {
+      await execFileAsync("sh", ["-c", VALIDATION_COMMAND], {
         cwd,
-        stdio: "pipe",
+        maxBuffer: 8 * 1024 * 1024,
         timeout: VALIDATION_TIMEOUT_MS,
       });
     } catch (postRebaseErr: unknown) {
@@ -94,7 +94,7 @@ export async function finalizeTask(
   }
 
   // Push
-  const pushSucceeded = pushBranch(cwd, spec, rebaseSucceeded);
+  const pushSucceeded = await pushBranch(cwd, spec, rebaseSucceeded);
   if (!pushSucceeded) {
     console.warn(`  #${spec.id}: Push did not succeed; PR may reference unpushed commits.`);
   }
@@ -104,7 +104,7 @@ export async function finalizeTask(
 
   let prCreated = false;
   try {
-    execFileSync("gh", prArgs, { cwd, encoding: "utf-8", stdio: "pipe" });
+    await execFileAsync("gh", prArgs, { cwd, maxBuffer: 8 * 1024 * 1024 });
     console.log(`  #${spec.id}: PR created${isDraft ? " (draft)" : ""}.`);
     prCreated = true;
   } catch (err: unknown) {
@@ -121,18 +121,17 @@ export async function finalizeTask(
  * @param cwd - Working directory (worktree path).
  * @returns `true` if rebase succeeded, `false` otherwise.
  */
-function attemptRebase(cwd: string): boolean {
+async function attemptRebase(cwd: string): Promise<boolean> {
   try {
-    execFileSync("git", ["fetch", "origin", "main"], {
+    await execFileAsync("git", ["fetch", "origin", "main"], {
       cwd,
-      stdio: "pipe",
       timeout: GIT_TIMEOUT_MS,
     });
-    execFileSync("git", ["rebase", "origin/main"], { cwd, stdio: "pipe", timeout: GIT_TIMEOUT_MS });
+    await execFileAsync("git", ["rebase", "origin/main"], { cwd, timeout: GIT_TIMEOUT_MS });
     return true;
   } catch {
     try {
-      execFileSync("git", ["rebase", "--abort"], { cwd, stdio: "pipe" });
+      await execFileAsync("git", ["rebase", "--abort"], { cwd });
     } catch {
       /* empty */
     }
@@ -218,12 +217,11 @@ function extractStderr(err: unknown): string {
  * @param rebaseSucceeded - Whether the preceding rebase completed successfully.
  * @returns `true` if the primary push succeeded, `false` otherwise.
  */
-function pushBranch(cwd: string, spec: TaskSpec, rebaseSucceeded: boolean): boolean {
+async function pushBranch(cwd: string, spec: TaskSpec, rebaseSucceeded: boolean): Promise<boolean> {
   if (rebaseSucceeded) {
     try {
-      execFileSync("git", ["push", "--force-with-lease"], {
+      await execFileAsync("git", ["push", "--force-with-lease"], {
         cwd,
-        stdio: "pipe",
         timeout: PUSH_TIMEOUT_MS,
       });
       return true;
@@ -231,11 +229,14 @@ function pushBranch(cwd: string, spec: TaskSpec, rebaseSucceeded: boolean): bool
       const pushMsg = toErrorMessage(pushErr);
       try {
         const suffix = crypto.randomBytes(4).toString("hex");
-        execFileSync("git", ["push", "origin", `HEAD:refs/heads/rescue/${spec.branch}-${suffix}`], {
-          cwd,
-          stdio: "pipe",
-          timeout: PUSH_TIMEOUT_MS,
-        });
+        await execFileAsync(
+          "git",
+          ["push", "origin", `HEAD:refs/heads/rescue/${spec.branch}-${suffix}`],
+          {
+            cwd,
+            timeout: PUSH_TIMEOUT_MS,
+          },
+        );
         console.warn(
           `  #${spec.id}: Push failed. Commits preserved at rescue/${spec.branch}-${suffix}`,
         );
@@ -248,7 +249,7 @@ function pushBranch(cwd: string, spec: TaskSpec, rebaseSucceeded: boolean): bool
     }
   } else {
     try {
-      execFileSync("git", ["push"], { cwd, stdio: "pipe", timeout: PUSH_TIMEOUT_MS });
+      await execFileAsync("git", ["push"], { cwd, timeout: PUSH_TIMEOUT_MS });
       return true;
     } catch (pushErr: unknown) {
       const pushMsg = toErrorMessage(pushErr);
@@ -264,11 +265,11 @@ function pushBranch(cwd: string, spec: TaskSpec, rebaseSucceeded: boolean): bool
  * @param spec - The task specification (used for logging).
  * @returns `true` if validation passed, `false` otherwise.
  */
-function runValidation(cwd: string, spec: TaskSpec): boolean {
+async function runValidation(cwd: string, spec: TaskSpec): Promise<boolean> {
   try {
-    execFileSync("sh", ["-c", VALIDATION_COMMAND], {
+    await execFileAsync("sh", ["-c", VALIDATION_COMMAND], {
       cwd,
-      stdio: "pipe",
+      maxBuffer: 8 * 1024 * 1024,
       timeout: VALIDATION_TIMEOUT_MS,
     });
     return true;
diff --git a/.sandcastle/refinement-loop.ts b/.sandcastle/refinement-loop.ts
index bb04f2c..3e852a3 100644
--- a/.sandcastle/refinement-loop.ts
+++ b/.sandcastle/refinement-loop.ts
@@ -1,5 +1,4 @@
 import * as sandcastle from "@ai-hero/sandcastle";
-import { execFileSync } from "node:child_process";
 import crypto from "node:crypto";
 import { readFileSync, realpathSync } from "node:fs";
 import { join, sep } from "node:path";
@@ -9,6 +8,7 @@ import type { Finding, LoopResult, LoopStatus, SandboxInstance, TaskSpec } from
 import {
   AGENT_MODEL,
   CONTEXT_HASH_RADIUS,
+  execFileAsync,
   HASH_PREFIX_LENGTH,
   VALIDATION_COMMAND,
   VALIDATION_TIMEOUT_MS,
@@ -124,7 +124,7 @@ export async function runRefinementLoop(
     if (result.findings === null) break;
     const findings: Finding[] = result.findings;
 
-    if (result.commits > 0 && runMidLoopValidation(sandbox.worktreePath)) {
+    if (result.commits > 0 && (await runMidLoopValidation(sandbox.worktreePath))) {
       totalCommits += result.commits;
       status = "converged";
       break;
@@ -139,7 +139,7 @@ export async function runRefinementLoop(
 
     const nonLowFindings = findings.filter((f) => f.confidence !== "LOW");
     if (
-      checkQualityRatchet(
+      await checkQualityRatchet(
         { beforeSha: result.beforeSha, cwd, round, spec },
         nonLowFindings.length,
         previousFindingsCount,
@@ -151,14 +151,14 @@ export async function runRefinementLoop(
 
     if (newFindings.length < bestFindingsCount) {
       bestFindingsCount = newFindings.length;
-      bestSha = captureHeadSha(cwd);
+      bestSha = await captureHeadSha(cwd);
     }
 
     totalCommits += result.commits;
     previousFindingsCount = nonLowFindings.length;
     onRoundComplete(round, findings);
 
-    const convergenceResult = checkConvergence(cwd, findings, newFindings, nonLowFindings);
+    const convergenceResult = await checkConvergence(cwd, findings, newFindings, nonLowFindings);
     if (convergenceResult !== null) {
       lastFindings = convergenceResult.lastFindings;
       status = convergenceResult.status;
@@ -170,7 +170,7 @@ export async function runRefinementLoop(
   }
 
   if (shouldResetToBest(status, bestSha)) {
-    totalCommits = resetToBestState(sandbox.worktreePath, bestSha, totalCommits);
+    totalCommits = await resetToBestState(sandbox.worktreePath, bestSha, totalCommits);
   }
 
   return { lastFindings, roundsCompleted, status, totalCommits };
@@ -181,13 +181,10 @@ export async function runRefinementLoop(
  * @param cwd - Working directory for git operations.
  * @returns The HEAD SHA or empty string.
  */
-function captureHeadSha(cwd: string): string {
+async function captureHeadSha(cwd: string): Promise<string> {
   try {
-    return execFileSync("git", ["rev-parse", "HEAD"], {
-      cwd,
-      encoding: "utf-8",
-      stdio: ["pipe", "pipe", "pipe"],
-    }).trim();
+    const { stdout } = await execFileAsync("git", ["rev-parse", "HEAD"], { cwd });
+    return stdout.trim();
   } catch {
     return "";
   }
@@ -201,12 +198,12 @@ function captureHeadSha(cwd: string): string {
  * @param nonLowFindings - Non-LOW-confidence findings.
  * @returns A ConvergenceResult if the loop should break, or null to continue.
  */
-function checkConvergence(
+async function checkConvergence(
   cwd: string,
   allFindings: Finding[],
   newFindings: Finding[],
   nonLowFindings: Finding[],
-): ConvergenceResult | null {
+): Promise<ConvergenceResult | null> {
   if (newFindings.length !== 0) return null;
 
   // Severity-weighted convergence (OpenHands pattern):
@@ -217,7 +214,7 @@ function checkConvergence(
   if (criticalPersistent.length > 0) {
     // Capture current HEAD so post-loop reset is a no-op (code matches findings)
     return {
-      bestSha: captureHeadSha(cwd),
+      bestSha: await captureHeadSha(cwd),
       lastFindings: criticalPersistent,
       status: "exhausted",
     };
@@ -264,27 +261,24 @@ function checkEarlyExit(
  * @param previousCount - Number of non-LOW findings from the previous round.
  * @returns True if a regression was detected and rollback performed.
  */
-function checkQualityRatchet(
+async function checkQualityRatchet(
   ctx: RatchetContext,
   findingsCount: number,
   previousCount: number,
-): boolean {
+): Promise<boolean> {
   const { beforeSha, cwd, round, spec } = ctx;
   if (round <= 2 || findingsCount <= previousCount) {
     return false;
   }
 
-  // Validate SHA format before passing to execFileSync
+  // Validate SHA format before passing to execFileAsync
   if (!/^[0-9a-f]{40}$/.test(beforeSha)) {
     console.warn(`  #${spec.id}: Invalid SHA for rollback, skipping reset.`);
     return true;
   }
 
   try {
-    execFileSync("git", ["reset", "--hard", beforeSha], {
-      cwd,
-      stdio: "pipe",
-    });
+    await execFileAsync("git", ["reset", "--hard", beforeSha], { cwd });
     console.warn(
       `  #${spec.id} R${String(round)}: Regression detected (${String(previousCount)} → ${String(findingsCount)}). Rolled back.`,
     );
@@ -363,11 +357,10 @@ async function executeRound(
   // Capture SHA before implementer runs (for quality ratchet rollback)
   let beforeSha = "";
   try {
-    beforeSha = execFileSync("git", ["rev-parse", "HEAD"], {
+    const { stdout } = await execFileAsync("git", ["rev-parse", "HEAD"], {
       cwd: sandbox.worktreePath,
-      encoding: "utf-8",
-      stdio: ["pipe", "pipe", "pipe"],
-    }).trim();
+    });
+    beforeSha = stdout.trim();
   } catch {
     console.warn(`  #${spec.id}: Failed to capture HEAD SHA before round ${String(round)}.`);
   }
@@ -486,17 +479,15 @@ function parseFindings(stdout: string, nonce: string): Finding[] | null {
  * @param currentCommits - Current total commits (fallback if recount fails).
  * @returns Updated total commit count.
  */
-function resetToBestState(cwd: string, bestSha: string, currentCommits: number): number {
+async function resetToBestState(
+  cwd: string,
+  bestSha: string,
+  currentCommits: number,
+): Promise<number> {
   try {
-    execFileSync("git", ["reset", "--hard", bestSha], {
-      cwd,
-      stdio: "pipe",
-    });
-    const commitCount = execFileSync("git", ["rev-list", "--count", "main..HEAD"], {
-      cwd,
-      encoding: "utf-8",
-    }).trim();
-    return parseInt(commitCount, 10) || 0;
+    await execFileAsync("git", ["reset", "--hard", bestSha], { cwd });
+    const { stdout } = await execFileAsync("git", ["rev-list", "--count", "main..HEAD"], { cwd });
+    return parseInt(stdout.trim(), 10) || 0;
   } catch {
     return currentCommits;
   }
@@ -565,11 +556,11 @@ async function runCritic(
  * @param cwd - Working directory for the validation command.
  * @returns True if validation passed (deterministic convergence), false otherwise.
  */
-function runMidLoopValidation(cwd: string): boolean {
+async function runMidLoopValidation(cwd: string): Promise<boolean> {
   try {
-    execFileSync("sh", ["-c", VALIDATION_COMMAND], {
+    await execFileAsync("sh", ["-c", VALIDATION_COMMAND], {
       cwd,
-      stdio: "pipe",
+      maxBuffer: 8 * 1024 * 1024,
       timeout: VALIDATION_TIMEOUT_MS,
     });
     return true;
diff --git a/.sandcastle/task-source.ts b/.sandcastle/task-source.ts
index 1f31f32..a59066d 100644
--- a/.sandcastle/task-source.ts
+++ b/.sandcastle/task-source.ts
@@ -1,11 +1,11 @@
 import * as sandcastle from "@ai-hero/sandcastle";
 import { docker } from "@ai-hero/sandcastle/sandboxes/docker";
-import { execFileSync } from "node:child_process";
 import { z } from "zod";
 
 import type { TaskSpec } from "./types.js";
 
 import {
+  execFileAsync,
   GIT_TIMEOUT_MS,
   MAX_TITLE_LENGTH,
   PLANNER_MODEL,
@@ -70,7 +70,7 @@ export class GithubIssueSource implements TaskSource {
    * @returns Array of task specifications to implement.
    */
   async discover(): Promise<TaskSpec[]> {
-    const issuesJson = this.fetchAndSanitizeIssues();
+    const issuesJson = await this.fetchAndSanitizeIssues();
 
     if (issuesJson.length === 0) {
       console.log("No issues with label '%s'. Exiting.", this.label);
@@ -132,15 +132,17 @@ export class GithubIssueSource implements TaskSource {
     return [];
   }
 
-  private fetchAndSanitizeIssues(): {
-    body: string;
-    labels: string[];
-    number: number;
-    title: string;
-  }[] {
+  private async fetchAndSanitizeIssues(): Promise<
+    {
+      body: string;
+      labels: string[];
+      number: number;
+      title: string;
+    }[]
+  > {
     let rawIssuesJson: string;
     try {
-      rawIssuesJson = execFileSync(
+      const { stdout } = await execFileAsync(
         "gh",
         [
           "issue",
@@ -154,8 +156,9 @@ export class GithubIssueSource implements TaskSource {
           "--label",
           this.label,
         ],
-        { encoding: "utf-8", timeout: GIT_TIMEOUT_MS },
+        { encoding: "utf-8", maxBuffer: 8 * 1024 * 1024, timeout: GIT_TIMEOUT_MS },
       );
+      rawIssuesJson = stdout;
     } catch (err) {
       console.error(
         `Failed to fetch issues: ${toErrorMessage(err)}. Ensure gh is installed and authenticated.`,

From 96bbd805ca05d3b4c38446baa286da1242c4afd6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= <jerome.benoit@sap.com>
Date: Tue, 5 May 2026 22:35:12 +0200
Subject: [PATCH 31/31] fix: catch planner timeout rejection (retry instead of
 crash) + add catch type annotations

---
 .sandcastle/task-source.ts | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/.sandcastle/task-source.ts b/.sandcastle/task-source.ts
index a59066d..358d73c 100644
--- a/.sandcastle/task-source.ts
+++ b/.sandcastle/task-source.ts
@@ -99,7 +99,13 @@ export class GithubIssueSource implements TaskSource {
       timeoutPromise.catch(() => {
         /* suppress unhandled rejection when planner completes before timeout */
       });
-      const plan = await Promise.race([planPromise, timeoutPromise]);
+      let plan: Awaited<ReturnType<typeof sandcastle.run>>;
+      try {
+        plan = await Promise.race([planPromise, timeoutPromise]);
+      } catch {
+        console.error("Planner timed out or failed. Retrying.");
+        continue;
+      }
 
       const planMatches = [...plan.stdout.matchAll(/<plan>([\s\S]*?)<\/plan>/g)];
       const planMatch = planMatches.at(-1);
@@ -159,7 +165,7 @@ export class GithubIssueSource implements TaskSource {
         { encoding: "utf-8", maxBuffer: 8 * 1024 * 1024, timeout: GIT_TIMEOUT_MS },
       );
       rawIssuesJson = stdout;
-    } catch (err) {
+    } catch (err: unknown) {
       console.error(
         `Failed to fetch issues: ${toErrorMessage(err)}. Ensure gh is installed and authenticated.`,
       );
@@ -169,7 +175,7 @@ export class GithubIssueSource implements TaskSource {
     let rawIssues: z.infer<typeof RawIssuesSchema>;
     try {
       rawIssues = RawIssuesSchema.parse(JSON.parse(rawIssuesJson));
-    } catch (err) {
+    } catch (err: unknown) {
       console.error(
         `Failed to parse issues JSON: ${toErrorMessage(err)}. Unexpected format from gh CLI.`,
       );
@@ -221,7 +227,7 @@ export class GithubIssueSource implements TaskSource {
           };
         })
         .filter((entry): entry is NonNullable<typeof entry> => entry !== null);
-    } catch (err) {
+    } catch (err: unknown) {
       console.error(`Planner produced invalid JSON: ${toErrorMessage(err)}. Retrying.`);
       return null;
     }