marcusquinn · marcusquinn · Feb 8, 2026 · Feb 8, 2026 · coderabbitai · Feb 8, 2026
diff --git a/.agents/scripts/supervisor-helper.sh b/.agents/scripts/supervisor-helper.sh
@@ -34,6 +34,8 @@
 #   supervisor-helper.sh pr-check <task_id>             Check PR CI/review status
 #   supervisor-helper.sh pr-merge <task_id> [--dry-run]  Merge PR (squash)
 #   supervisor-helper.sh self-heal <task_id>            Create diagnostic subtask for failed/blocked task
+#   supervisor-helper.sh backup [reason]               Backup supervisor database (t162)
+#   supervisor-helper.sh restore [backup_file]         Restore from backup (lists if no file) (t162)
 #   supervisor-helper.sh db [sql]                      Direct SQLite access
 #   supervisor-helper.sh help
 #
@@ -449,6 +451,108 @@ db() {
     sqlite3 -cmd ".timeout 5000" "$@"
 }
 
+#######################################
+# Backup supervisor database before destructive operations (t162)
+# Creates timestamped copy in supervisor dir. Keeps last 5 backups.
+# Usage: backup_db [reason]
+#######################################
+backup_db() {
+    local reason="${1:-manual}"
+
+    if [[ ! -f "$SUPERVISOR_DB" ]]; then
+        log_warn "No database to backup at: $SUPERVISOR_DB"
+        return 1
+    fi
+
+    local timestamp
+    timestamp=$(date -u +%Y%m%dT%H%M%SZ)
+    local backup_file="$SUPERVISOR_DIR/supervisor-backup-${timestamp}-${reason}.db"
+
+    # Use SQLite .backup for consistency (handles WAL correctly)
+    if sqlite3 "$SUPERVISOR_DB" ".backup '$backup_file'" 2>/dev/null; then
+        log_success "Database backed up: $backup_file"
+    else
+        # Fallback to file copy if .backup fails
+        if cp "$SUPERVISOR_DB" "$backup_file" 2>/dev/null; then
+            # Also copy WAL/SHM if present
+            [[ -f "${SUPERVISOR_DB}-wal" ]] && cp "${SUPERVISOR_DB}-wal" "${backup_file}-wal" 2>/dev/null || true
+            [[ -f "${SUPERVISOR_DB}-shm" ]] && cp "${SUPERVISOR_DB}-shm" "${backup_file}-shm" 2>/dev/null || true
+            log_success "Database backed up (file copy): $backup_file"
+        else
+            log_error "Failed to backup database"
+            return 1
+        fi
+    fi
+
+    # Prune old backups: keep last 5
+    local backup_count
+    # shellcheck disable=SC2012
+    backup_count=$(ls -1 "$SUPERVISOR_DIR"/supervisor-backup-*.db 2>/dev/null | wc -l | tr -d ' ')
+    if [[ "$backup_count" -gt 5 ]]; then
+        local to_remove
+        to_remove=$((backup_count - 5))
+        # shellcheck disable=SC2012
+        ls -1t "$SUPERVISOR_DIR"/supervisor-backup-*.db 2>/dev/null | tail -n "$to_remove" | while IFS= read -r old_backup; do
+            rm -f "$old_backup" "${old_backup}-wal" "${old_backup}-shm" 2>/dev/null || true
+        done
+        log_info "Pruned $to_remove old backup(s)"
+    fi
+
+    echo "$backup_file"
+    return 0
+}
+
+#######################################
+# Restore supervisor database from backup (t162)
+# Usage: restore_db [backup_file]
+# If no file specified, lists available backups
+#######################################
+restore_db() {
+    local backup_file="${1:-}"
+
+    if [[ -z "$backup_file" ]]; then
+        log_info "Available backups:"
+        # shellcheck disable=SC2012
+        ls -1t "$SUPERVISOR_DIR"/supervisor-backup-*.db 2>/dev/null | while IFS= read -r f; do
+            local size
+            size=$(du -h "$f" 2>/dev/null | cut -f1)
+            local task_count
+            task_count=$(sqlite3 "$f" "SELECT count(*) FROM tasks;" 2>/dev/null || echo "?")
+            echo "  $f ($size, $task_count tasks)"
+        done
+        return 0
+    fi
+
+    if [[ ! -f "$backup_file" ]]; then
+        log_error "Backup file not found: $backup_file"
+        return 1
+    fi
+
+    # Verify backup is valid SQLite
+    if ! sqlite3 "$backup_file" "SELECT count(*) FROM tasks;" >/dev/null 2>&1; then
+        log_error "Backup file is not a valid supervisor database: $backup_file"
+        return 1
+    fi
+
+    # Backup current DB before overwriting
+    if [[ -f "$SUPERVISOR_DB" ]]; then
+        backup_db "pre-restore" >/dev/null 2>&1 || true
+    fi
+
+    cp "$backup_file" "$SUPERVISOR_DB"
+    [[ -f "${backup_file}-wal" ]] && cp "${backup_file}-wal" "${SUPERVISOR_DB}-wal" 2>/dev/null || true
+    [[ -f "${backup_file}-shm" ]] && cp "${backup_file}-shm" "${SUPERVISOR_DB}-shm" 2>/dev/null || true
+
+    local task_count
+    task_count=$(db "$SUPERVISOR_DB" "SELECT count(*) FROM tasks;")
+    local batch_count
+    batch_count=$(db "$SUPERVISOR_DB" "SELECT count(*) FROM batches;")
+
+    log_success "Database restored from: $backup_file"
+    log_info "Tasks: $task_count | Batches: $batch_count"
+    return 0
+}
-restore_db() {
-    local backup_file="${1:-}"
-
-    if [[ -z "$backup_file" ]]; then
-        log_info "Available backups:"
-        # shellcheck disable=SC2012
-        ls -1t "$SUPERVISOR_DIR"/supervisor-backup-*.db 2>/dev/null | while IFS= read -r f; do
-            local size
-            size=$(du -h "$f" 2>/dev/null | cut -f1)
-            local task_count
-            task_count=$(sqlite3 "$f" "SELECT count(*) FROM tasks;" 2>/dev/null || echo "?")
-            echo "  $f ($size, $task_count tasks)"
-        done
-        return 0
-    fi
-
-    if [[ ! -f "$backup_file" ]]; then
-        log_error "Backup file not found: $backup_file"
-        return 1
-    fi
-
-    # Verify backup is valid SQLite
-    if ! sqlite3 "$backup_file" "SELECT count(*) FROM tasks;" >/dev/null 2>&1; then
-        log_error "Backup file is not a valid supervisor database: $backup_file"
-        return 1
-    fi
-
-    # Backup current DB before overwriting
-    if [[ -f "$SUPERVISOR_DB" ]]; then
-        backup_db "pre-restore" >/dev/null 2>&1 || true
-    fi
-
-    cp "$backup_file" "$SUPERVISOR_DB"
-    [[ -f "${backup_file}-wal" ]] && cp "${backup_file}-wal" "${SUPERVISOR_DB}-wal" 2>/dev/null || true
-    [[ -f "${backup_file}-shm" ]] && cp "${backup_file}-shm" "${SUPERVISOR_DB}-shm" 2>/dev/null || true
-
-    local task_count
-    task_count=$(db "$SUPERVISOR_DB" "SELECT count(*) FROM tasks;")
-    local batch_count
-    batch_count=$(db "$SUPERVISOR_DB" "SELECT count(*) FROM batches;")
-
-    log_success "Database restored from: $backup_file"
-    log_info "Tasks: $task_count | Batches: $batch_count"
-    return 0
-}
+restore_db() {
+    local backup_file="${1:-}"
+
+    if [[ -z "$backup_file" ]]; then
+        log_info "Available backups:"
+        # shellcheck disable=SC2012
+        ls -1t "$SUPERVISOR_DIR"/supervisor-backup-*.db 2>/dev/null | while IFS= read -r f; do
+            local size
+            size=$(du -h "$f" 2>/dev/null | cut -f1)
+            local task_count
+            task_count=$(sqlite3 "$f" "SELECT count(*) FROM tasks;" 2>/dev/null || echo "?")
+            echo "  $f ($size, $task_count tasks)"
+        done
+        return 0
+    fi
+
+    if [[ ! -f "$backup_file" ]]; then
+        log_error "Backup file not found: $backup_file"
+        return 1
+    fi
+
+    # Verify backup is valid SQLite
+    if ! sqlite3 "$backup_file" "SELECT count(*) FROM tasks;" >/dev/null 2>&1; then
+        log_error "Backup file is not a valid supervisor database: $backup_file"
+        return 1
+    fi
+
+    # Backup current DB before overwriting
+    if [[ -f "$SUPERVISOR_DB" ]]; then
+        backup_db "pre-restore" >/dev/null 2>&1 || true
+    fi
+
+    # Remove stale WAL/SHM to avoid replaying old frames
+    rm -f "${SUPERVISOR_DB}-wal" "${SUPERVISOR_DB}-shm" 2>/dev/null || true
+
+    if ! cp "$backup_file" "$SUPERVISOR_DB"; then
+        log_error "Failed to restore database from: $backup_file"
+        return 1
+    fi
+    [[ -f "${backup_file}-wal" ]] && cp "${backup_file}-wal" "${SUPERVISOR_DB}-wal" 2>/dev/null || true
+    [[ -f "${backup_file}-shm" ]] && cp "${backup_file}-shm" "${SUPERVISOR_DB}-shm" 2>/dev/null || true
+
+    local task_count
+    task_count=$(db "$SUPERVISOR_DB" "SELECT count(*) FROM tasks;")
+    local batch_count
+    batch_count=$(db "$SUPERVISOR_DB" "SELECT count(*) FROM batches;")
+
+    log_success "Database restored from: $backup_file"
+    log_info "Tasks: $task_count | Batches: $batch_count"
+    return 0
+}
-restore_db() {
-    local backup_file="${1:-}"
-
-    if [[ -z "$backup_file" ]]; then
-        log_info "Available backups:"
-        # shellcheck disable=SC2012
-        ls -1t "$SUPERVISOR_DIR"/supervisor-backup-*.db 2>/dev/null | while IFS= read -r f; do
-            local size
-            size=$(du -h "$f" 2>/dev/null | cut -f1)
-            local task_count
-            task_count=$(sqlite3 "$f" "SELECT count(*) FROM tasks;" 2>/dev/null || echo "?")
-            echo "  $f ($size, $task_count tasks)"
-        done
-        return 0
-    fi
-
-    if [[ ! -f "$backup_file" ]]; then
-        log_error "Backup file not found: $backup_file"
-        return 1
-    fi
-
-    # Verify backup is valid SQLite
-    if ! sqlite3 "$backup_file" "SELECT count(*) FROM tasks;" >/dev/null 2>&1; then
-        log_error "Backup file is not a valid supervisor database: $backup_file"
-        return 1
-    fi
-
-    # Backup current DB before overwriting
-    if [[ -f "$SUPERVISOR_DB" ]]; then
-        backup_db "pre-restore" >/dev/null 2>&1 || true
-    fi
-
-    cp "$backup_file" "$SUPERVISOR_DB"
-    [[ -f "${backup_file}-wal" ]] && cp "${backup_file}-wal" "${SUPERVISOR_DB}-wal" 2>/dev/null || true
-    [[ -f "${backup_file}-shm" ]] && cp "${backup_file}-shm" "${SUPERVISOR_DB}-shm" 2>/dev/null || true
-
-    local task_count
-    task_count=$(db "$SUPERVISOR_DB" "SELECT count(*) FROM tasks;")
-    local batch_count
-    batch_count=$(db "$SUPERVISOR_DB" "SELECT count(*) FROM batches;")
-
-    log_success "Database restored from: $backup_file"
-    log_info "Tasks: $task_count | Batches: $batch_count"
-    return 0
-}
+restore_db() {
+    local backup_file="${1:-}"
+
+    if [[ -z "$backup_file" ]]; then
+        log_info "Available backups:"
+        # shellcheck disable=SC2012
+        ls -1t "$SUPERVISOR_DIR"/supervisor-backup-*.db 2>/dev/null | while IFS= read -r f; do
+            local size
+            size=$(du -h "$f" 2>/dev/null | cut -f1)
+            local task_count
+            task_count=$(sqlite3 "$f" "SELECT count(*) FROM tasks;" 2>/dev/null || echo "?")
+            echo "  $f ($size, $task_count tasks)"
+        done
+        return 0
+    fi
+
+    if [[ ! -f "$backup_file" ]]; then
+        log_error "Backup file not found: $backup_file"
+        return 1
+    fi
+
+    # Verify backup is valid SQLite
+    if ! sqlite3 "$backup_file" "SELECT count(*) FROM tasks;" >/dev/null 2>&1; then
+        log_error "Backup file is not a valid supervisor database: $backup_file"
+        return 1
+    fi
+
+    # Backup current DB before overwriting
+    if [[ -f "$SUPERVISOR_DB" ]]; then
+        backup_db "pre-restore" >/dev/null 2>&1 || true
+    fi
+
+    # Remove stale WAL/SHM to avoid replaying old frames
+    rm -f "${SUPERVISOR_DB}-wal" "${SUPERVISOR_DB}-shm" 2>/dev/null || true
+
+    if ! cp "$backup_file" "$SUPERVISOR_DB"; then
+        log_error "Failed to restore database from: $backup_file"
+        return 1
+    fi
+    [[ -f "${backup_file}-wal" ]] && cp "${backup_file}-wal" "${SUPERVISOR_DB}-wal" 2>/dev/null || true
+    [[ -f "${backup_file}-shm" ]] && cp "${backup_file}-shm" "${SUPERVISOR_DB}-shm" 2>/dev/null || true
+
+    local task_count
+    task_count=$(db "$SUPERVISOR_DB" "SELECT count(*) FROM tasks;")
+    local batch_count
+    batch_count=$(db "$SUPERVISOR_DB" "SELECT count(*) FROM batches;")
+
+    log_success "Database restored from: $backup_file"
+    log_info "Tasks: $task_count | Batches: $batch_count"
+    return 0
+}
+
 #######################################
 # Ensure supervisor directory and DB exist
 #######################################
@@ -475,6 +579,7 @@ ensure_db() {
     check_sql=$(db "$SUPERVISOR_DB" "SELECT sql FROM sqlite_master WHERE type='table' AND name='tasks';" 2>/dev/null || echo "")
     if [[ -n "$check_sql" ]] && ! echo "$check_sql" | grep -q 'pr_review'; then
         log_info "Migrating database schema for post-PR lifecycle states (t128.8)..."
+        backup_db "pre-migrate-t128.8" >/dev/null 2>&1 || log_warn "Backup failed, proceeding with migration"
         db "$SUPERVISOR_DB" << 'MIGRATE'
 PRAGMA foreign_keys=OFF;
 BEGIN TRANSACTION;
@@ -499,7 +604,13 @@ CREATE TABLE tasks (
     completed_at    TEXT,
     updated_at      TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ','now'))
 );
-INSERT INTO tasks SELECT * FROM tasks_old;
+INSERT INTO tasks (id, repo, description, status, session_id, worktree, branch,
+    log_file, retries, max_retries, model, error, pr_url,
+    created_at, started_at, completed_at, updated_at)
+SELECT id, repo, description, status, session_id, worktree, branch,
+    log_file, retries, max_retries, model, error, pr_url,
+    created_at, started_at, completed_at, updated_at
+FROM tasks_old;
 DROP TABLE tasks_old;
 CREATE INDEX IF NOT EXISTS idx_tasks_status ON tasks(status);
 CREATE INDEX IF NOT EXISTS idx_tasks_repo ON tasks(repo);
@@ -565,6 +676,7 @@ MIGRATE
     check_sql_t148=$(db "$SUPERVISOR_DB" "SELECT sql FROM sqlite_master WHERE type='table' AND name='tasks';" 2>/dev/null || echo "")
     if [[ -n "$check_sql_t148" ]] && ! echo "$check_sql_t148" | grep -q 'review_triage'; then
         log_info "Migrating database schema for review_triage state (t148)..."
+        backup_db "pre-migrate-t148" >/dev/null 2>&1 || log_warn "Backup failed, proceeding with migration"
         db "$SUPERVISOR_DB" << 'MIGRATE_T148'
 PRAGMA foreign_keys=OFF;
 BEGIN TRANSACTION;
@@ -740,6 +852,22 @@ cmd_init() {
     return 0
 }
 
+#######################################
+# Backup supervisor database (t162)
+#######################################
+cmd_backup() {
+    local reason="${1:-manual}"
+    backup_db "$reason"
+}
+
+#######################################
+# Restore supervisor database from backup (t162)
+#######################################
+cmd_restore() {
+    local backup_file="${1:-}"
+    restore_db "$backup_file"
+}
+
 #######################################
 # Add a task to the supervisor
 #######################################
@@ -7132,6 +7260,8 @@ Usage:
   supervisor-helper.sh running-count [batch_id]      Count active tasks
   supervisor-helper.sh reset <task_id>               Reset task to queued
   supervisor-helper.sh cancel <task_id|batch_id>     Cancel task or batch
+  supervisor-helper.sh backup [reason]               Backup supervisor database
+  supervisor-helper.sh restore [backup_file]         Restore from backup (lists if no file)
   supervisor-helper.sh auto-pickup [--repo path]      Scan TODO.md for auto-dispatch tasks
   supervisor-helper.sh cron [install|uninstall|status] Manage cron-based pulse scheduling
   supervisor-helper.sh watch [--repo path]            Watch TODO.md for changes (fswatch)
@@ -7416,6 +7546,8 @@ main() {
         running-count) cmd_running_count "$@" ;;
         reset) cmd_reset "$@" ;;
         cancel) cmd_cancel "$@" ;;
+        backup) cmd_backup "$@" ;;
+        restore) cmd_restore "$@" ;;
         db) cmd_db "$@" ;;
         help|--help|-h) show_usage ;;
         *) log_error "Unknown command: $command"; show_usage; return 1 ;;

diff --git a/TODO.md b/TODO.md
@@ -59,6 +59,8 @@
   - Notes: Problem: supervisor adds tasks to its DB but not to TODO.md, so workers running /full-loop cannot find the task description. Fix: 1) build_dispatch_cmd now passes task description inline in the prompt (` -- description`), 2) full-loop.md Step 0 has 3-tier resolution: inline desc > TODO.md > supervisor DB, 3) full-loop-helper.sh PR creation queries supervisor DB as fallback. Also adds headless worker rules (no user prompts, no TODO.md edits, graceful auth failure handling) and PR creation hardening (gh auth check, rebase before push, proper title/body).
 - [ ] t160 fix: supervisor TODO.md push fails under concurrent workers, add reconcile-todo command #bugfix #supervisor ~1h (ai:45m test:15m) logged:2026-02-08 started:2026-02-08
   - Notes: Root cause: update_todo_on_complete() commits TODO.md locally but git push fails when multiple concurrent workers push to main simultaneously (non-fast-forward). The function logs a warning but treats it as non-blocking, so tasks transition to deployed even though TODO.md was never pushed. Fix: add commit_and_push_todo() helper with pull-rebase retry logic (3 attempts with backoff). Add reconcile-todo command for bulk reconciliation. Wire reconcile into pulse cycle Phase 7 (runs when no workers active). Affected 12 tasks from batch-20260208.
+- [ ] t162 fix: supervisor DB safety - add backup-before-migrate and explicit column migrations #bugfix #supervisor ~30m (ai:25m test:5m) logged:2026-02-08 started:2026-02-08
+  - Notes: Root cause: t128.8 migration uses INSERT INTO tasks SELECT * FROM tasks_old which fails on column count mismatch if migrations run out of order. No backup before destructive table rename/recreate migrations. Fix: 1) Add backup_db() helper with SQLite .backup, timestamped copies, auto-prune to 5. 2) Add backup before t128.8 and t148 destructive migrations. 3) Fix t128.8 SELECT * to use explicit column list. 4) Add backup/restore commands for manual recovery.
 
 - [x] t152 Fix `((cleaned++))` arithmetic exit code bug in setup.sh causing silent abort under `set -e` #bug #setup ~30m actual:15m (ai:15m) ref:GH#548 logged:2026-02-08 started:2026-02-08 completed:2026-02-08