diff --git a/tests/docker/run-tests.sh b/tests/docker/run-tests.sh
index f7c19022..447f3ee7 100755
--- a/tests/docker/run-tests.sh
+++ b/tests/docker/run-tests.sh
@@ -2,7 +2,8 @@
 # Setup script test runner
 set -uo pipefail
 
-SCRIPTS_DIR="${HOME}/git/aidevops/.agents/scripts"
+REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+SCRIPTS_DIR="$REPO_DIR/.agents/scripts"
 PASS=0
 FAIL=0
 SKIP=0
diff --git a/tests/test-memory-mail.sh b/tests/test-memory-mail.sh
new file mode 100644
index 00000000..bdd643e3
--- /dev/null
+++ b/tests/test-memory-mail.sh
@@ -0,0 +1,425 @@
+#!/usr/bin/env bash
+# test-memory-mail.sh
+#
+# Unit tests for memory-helper.sh and mail-helper.sh:
+# - Memory: store, recall (FTS5), stats, prune, namespaces, relational versioning
+# - Mail: send, check, read, archive, prune, register/deregister agents
+#
+# Uses isolated temp directories to avoid touching production data.
+#
+# Usage: bash tests/test-memory-mail.sh [--verbose]
+#
+# Exit codes: 0 = all pass, 1 = failures found
+
+set -euo pipefail
+
+REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+SCRIPTS_DIR="$REPO_DIR/.agents/scripts"
+MEMORY_SCRIPT="$SCRIPTS_DIR/memory-helper.sh"
+MAIL_SCRIPT="$SCRIPTS_DIR/mail-helper.sh"
+VERBOSE="${1:-}"
+
+# --- Test Framework ---
+PASS_COUNT=0
+FAIL_COUNT=0
+SKIP_COUNT=0
+TOTAL_COUNT=0
+
+pass() {
+    PASS_COUNT=$((PASS_COUNT + 1))
+    TOTAL_COUNT=$((TOTAL_COUNT + 1))
+    printf "  \033[0;32mPASS\033[0m %s\n" "$1"
+}
+
+fail() {
+    FAIL_COUNT=$((FAIL_COUNT + 1))
+    TOTAL_COUNT=$((TOTAL_COUNT + 1))
+    printf "  \033[0;31mFAIL\033[0m %s\n" "$1"
+    if [[ -n "${2:-}" ]]; then
+        printf "       %s\n" "$2"
+    fi
+}
+
+skip() {
+    SKIP_COUNT=$((SKIP_COUNT + 1))
+    TOTAL_COUNT=$((TOTAL_COUNT + 1))
+    printf "  \033[0;33mSKIP\033[0m %s\n" "$1"
+}
+
+section() {
+    echo ""
+    printf "\033[1m=== %s ===\033[0m\n" "$1"
+}
+
+# --- Isolated Test Environment ---
+TEST_DIR=$(mktemp -d)
+export AIDEVOPS_MEMORY_DIR="$TEST_DIR/memory"
+export AIDEVOPS_MAIL_DIR="$TEST_DIR/mail"
+trap 'rm -rf "$TEST_DIR"' EXIT
+
+# Helper: run memory command
+mem() {
+    bash "$MEMORY_SCRIPT" "$@" 2>&1
+}
+
+# Helper: run mail command
+mail_cmd() {
+    bash "$MAIL_SCRIPT" "$@" 2>&1
+}
+
+# Helper: query memory DB
+mem_db() {
+    sqlite3 -cmd ".timeout 5000" "$AIDEVOPS_MEMORY_DIR/memory.db" "$@"
+}
+
+# Helper: query mail DB
+mail_db() {
+    sqlite3 -cmd ".timeout 5000" "$AIDEVOPS_MAIL_DIR/mailbox.db" "$@"
+}
+
+# ============================================================
+# MEMORY TESTS
+# ============================================================
+
+section "Memory: Database Initialization"
+
+# Test: first store creates database
+mem store --content "Test memory entry" --type "WORKING_SOLUTION" --tags "test,init" >/dev/null
+if [[ -f "$AIDEVOPS_MEMORY_DIR/memory.db" ]]; then
+    pass "memory store creates database"
+else
+    fail "memory store did not create database"
+fi
+
+# Test: FTS5 table exists
+fts_check=$(mem_db "SELECT count(*) FROM sqlite_master WHERE type='table' AND name='learnings';")
+if [[ "$fts_check" -ge 1 ]]; then
+    pass "FTS5 learnings table exists"
+else
+    fail "FTS5 learnings table missing"
+fi
+
+# Test: WAL mode
+journal=$(mem_db "PRAGMA journal_mode;")
+if [[ "$journal" == "wal" ]]; then
+    pass "Memory DB uses WAL mode"
+else
+    fail "Memory DB journal mode is '$journal', expected 'wal'"
+fi
+
+section "Memory: Store and Recall"
+
+# Test: store returns success
+store_output=$(mem store --content "Bash arrays need declare -a for indexed arrays" --type "CODEBASE_PATTERN" --tags "bash,arrays")
+if echo "$store_output" | grep -qi "stored\|ok\|success"; then
+    pass "memory store reports success"
+else
+    fail "memory store output unexpected" "$store_output"
+fi
+
+# Test: recall finds stored content
+recall_output=$(mem recall --query "bash arrays")
+if echo "$recall_output" | grep -qi "arrays\|bash"; then
+    pass "memory recall finds stored content by keyword"
+else
+    fail "memory recall did not find stored content" "$recall_output"
+fi
+
+# Test: recall with type filter
+mem store --content "User prefers dark mode in terminal" --type "USER_PREFERENCE" --tags "ui,terminal" >/dev/null
+recall_typed=$(mem recall --query "dark mode" --type "USER_PREFERENCE")
+if echo "$recall_typed" | grep -qi "dark mode"; then
+    pass "memory recall with --type filter works"
+else
+    fail "memory recall with --type filter failed" "$recall_typed"
+fi
+
+# Test: FTS5 hyphenated query (t139 regression)
+mem store --content "Fixed pre-commit hook for shellcheck" --type "WORKING_SOLUTION" --tags "pre-commit,shellcheck" >/dev/null
+recall_hyphen=$(mem recall --query "pre-commit hook" 2>&1)
+if echo "$recall_hyphen" | grep -qiE "error.*column|fts5.*syntax"; then
+    fail "FTS5 hyphenated query causes error (t139 regression)" "$recall_hyphen"
+else
+    pass "FTS5 hyphenated query works without error (t139)"
+fi
+
+# Test: recall with limit
+mem store --content "Memory test entry A" --type "CONTEXT" --tags "test" >/dev/null
+mem store --content "Memory test entry B" --type "CONTEXT" --tags "test" >/dev/null
+mem store --content "Memory test entry C" --type "CONTEXT" --tags "test" >/dev/null
+recall_limited=$(mem recall --query "memory test entry" --limit 2)
+# Count result entries (each has a type marker like [CONTEXT])
+result_count=$(echo "$recall_limited" | grep -c '\[CONTEXT\]' || true)
+if [[ "$result_count" -le 2 ]]; then
+    pass "memory recall --limit restricts results"
+else
+    fail "memory recall --limit did not restrict (got $result_count, expected <= 2)"
+fi
+
+section "Memory: Stats"
+
+stats_output=$(mem stats)
+if echo "$stats_output" | grep -qiE "total|memories|entries|count"; then
+    pass "memory stats produces output"
+else
+    fail "memory stats output unexpected" "$stats_output"
+fi
+
+section "Memory: Relational Versioning"
+
+# Store a memory, then update it
+original_output=$(mem store --content "Favorite color is blue" --type "USER_PREFERENCE" --tags "preference")
+original_id=$(echo "$original_output" | grep -oE 'mem_[a-z0-9_]+' | head -1 || true)
+
+if [[ -n "$original_id" ]]; then
+    # Store an update that supersedes the original
+    update_output=$(mem store --content "Favorite color is now green" --type "USER_PREFERENCE" --tags "preference" --supersedes "$original_id" --relation updates 2>&1 || true)
+    if echo "$update_output" | grep -qi "stored\|ok\|success"; then
+        pass "Relational versioning: store with --supersedes works"
+    else
+        # May not support --supersedes flag yet, that's OK
+        skip "Relational versioning: --supersedes may not be implemented yet"
+    fi
+else
+    skip "Could not extract memory ID for relational test"
+fi
+
+section "Memory: Namespace Isolation"
+
+# Store in a namespace
+ns_output=$(mem --namespace test-runner store --content "Runner-specific config" --type "TOOL_CONFIG" --tags "runner" 2>&1)
+if echo "$ns_output" | grep -qi "stored\|ok\|success"; then
+    pass "Namespace store works"
+
+    # Verify namespace directory created
+    if [[ -d "$AIDEVOPS_MEMORY_DIR/namespaces/test-runner" ]]; then
+        pass "Namespace directory created"
+    else
+        fail "Namespace directory not created"
+    fi
+
+    # Recall from namespace
+    ns_recall=$(mem --namespace test-runner recall --query "runner config" 2>&1)
+    if echo "$ns_recall" | grep -qi "runner\|config"; then
+        pass "Namespace recall finds namespace-specific content"
+    else
+        fail "Namespace recall failed" "$ns_recall"
+    fi
+else
+    skip "Namespace store failed" "$ns_output"
+fi
+
+# Invalid namespace name
+invalid_ns=$(mem --namespace "invalid namespace!" store --content "test" --type "CONTEXT" 2>&1 || true)
+if echo "$invalid_ns" | grep -qi "invalid"; then
+    pass "Invalid namespace name rejected"
+else
+    fail "Invalid namespace name was not rejected"
+fi
+
+section "Memory: Prune"
+
+# Prune with dry-run (should not delete anything)
+prune_output=$(mem prune --dry-run 2>&1 || true)
+if echo "$prune_output" | grep -qiE "prune|would|dry|entries|0"; then
+    pass "memory prune --dry-run works"
+else
+    skip "memory prune --dry-run output unexpected" "$prune_output"
+fi
+
+section "Memory: Help"
+
+help_output=$(mem help 2>&1)
+if echo "$help_output" | grep -qiE "usage|store|recall|memory|COMMANDS"; then
+    pass "memory help shows usage information"
+else
+    fail "memory help output unexpected" "$(echo "$help_output" | head -3)"
+fi
+
+# ============================================================
+# MAIL TESTS
+# ============================================================
+
+section "Mail: Database Initialization"
+
+# Test: first command creates database
+mail_cmd status >/dev/null 2>&1 || true
+if [[ -f "$AIDEVOPS_MAIL_DIR/mailbox.db" ]]; then
+    pass "mail command creates database"
+else
+    fail "mail command did not create database"
+fi
+
+# Test: tables exist
+mail_tables=$(mail_db "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;" | tr '\n' ',')
+if [[ "$mail_tables" == *"messages"* && "$mail_tables" == *"agents"* ]]; then
+    pass "Mail tables exist (messages, agents)"
+else
+    fail "Missing mail tables" "Found: $mail_tables"
+fi
+
+section "Mail: Agent Registration"
+
+# Register an agent
+reg_output=$(mail_cmd register --agent "test-agent-1" --role "worker" 2>&1)
+if echo "$reg_output" | grep -qiE "register|success|ok"; then
+    pass "Agent registration works"
+else
+    fail "Agent registration failed" "$(echo "$reg_output" | head -3)"
+fi
+
+# Register second agent
+mail_cmd register --agent "test-agent-2" --role "orchestrator" >/dev/null 2>&1
+
+# List agents
+agents_output=$(mail_cmd agents 2>&1)
+if echo "$agents_output" | grep -q "test-agent-1"; then
+    pass "Registered agent appears in agent list"
+else
+    fail "Registered agent not in list" "$agents_output"
+fi
+
+section "Mail: Send and Receive"
+
+# Send a message
+send_output=$(mail_cmd send --from "test-agent-1" --to "test-agent-2" --type "task_dispatch" --payload "Please process task t001" 2>&1)
+if echo "$send_output" | grep -qiE "sent|success|ok|msg-"; then
+    pass "mail send works"
+else
+    fail "mail send failed" "$(echo "$send_output" | head -3)"
+fi
+
+# Check inbox
+check_output=$(mail_cmd check --agent "test-agent-2" 2>&1)
+if echo "$check_output" | grep -qiE "1|unread|message"; then
+    pass "mail check shows unread messages"
+else
+    fail "mail check did not show unread messages" "$check_output"
+fi
+
+# Read message
+# First get the message ID
+msg_id=$(mail_db "SELECT id FROM messages WHERE to_agent = 'test-agent-2' LIMIT 1;" 2>/dev/null || echo "")
+if [[ -n "$msg_id" ]]; then
+    read_output=$(mail_cmd read "$msg_id" 2>&1)
+    if echo "$read_output" | grep -qiE "task t001|process|payload"; then
+        pass "mail read shows message content"
+    else
+        fail "mail read did not show content" "$(echo "$read_output" | head -3)"
+    fi
+
+    # Verify message marked as read
+    msg_status=$(mail_db "SELECT status FROM messages WHERE id = '$msg_id';")
+    if [[ "$msg_status" == "read" ]]; then
+        pass "Message marked as 'read' after reading"
+    else
+        fail "Message status is '$msg_status', expected 'read'"
+    fi
+else
+    fail "Could not find message ID in database"
+fi
+
+section "Mail: Archive"
+
+if [[ -n "$msg_id" ]]; then
+    archive_output=$(mail_cmd archive "$msg_id" 2>&1)
+    if echo "$archive_output" | grep -qiE "archived|success|ok"; then
+        pass "mail archive works"
+    else
+        fail "mail archive failed" "$(echo "$archive_output" | head -3)"
+    fi
+
+    # Verify archived
+    archived_status=$(mail_db "SELECT status FROM messages WHERE id = '$msg_id';")
+    if [[ "$archived_status" == "archived" ]]; then
+        pass "Message status is 'archived' after archiving"
+    else
+        fail "Message status is '$archived_status', expected 'archived'"
+    fi
+fi
+
+section "Mail: Message Types"
+
+# Test all valid message types
+for msg_type in task_dispatch status_report discovery request broadcast; do
+    type_output=$(mail_cmd send --from "test-agent-1" --to "test-agent-2" --type "$msg_type" --payload "Test $msg_type" 2>&1)
+    if echo "$type_output" | grep -qiE "sent|success|ok|msg-"; then
+        pass "mail send type=$msg_type"
+    else
+        fail "mail send type=$msg_type failed" "$(echo "$type_output" | head -3)"
+    fi
+done
+
+# Test invalid message type
+invalid_type_output=$(mail_cmd send --from "test-agent-1" --to "test-agent-2" --type "invalid_type" --payload "Test" 2>&1 || true)
+if echo "$invalid_type_output" | grep -qiE "invalid|error|constraint"; then
+    pass "Invalid message type rejected"
+else
+    fail "Invalid message type was not rejected" "$invalid_type_output"
+fi
+
+section "Mail: Priority"
+
+# Send with priority
+priority_output=$(mail_cmd send --from "test-agent-1" --to "test-agent-2" --type "request" --priority "high" --payload "Urgent request" 2>&1)
+if echo "$priority_output" | grep -qiE "sent|success|ok|msg-"; then
+    pass "mail send with --priority works"
+else
+    fail "mail send with --priority failed" "$(echo "$priority_output" | head -3)"
+fi
+
+section "Mail: Status"
+
+status_output=$(mail_cmd status 2>&1)
+if echo "$status_output" | grep -qiE "message|agent|total|unread|mail"; then
+    pass "mail status produces summary"
+else
+    fail "mail status output unexpected" "$status_output"
+fi
+
+section "Mail: Deregister"
+
+dereg_output=$(mail_cmd deregister --agent "test-agent-1" 2>&1)
+if echo "$dereg_output" | grep -qiE "deregister|removed|success|ok|inactive"; then
+    pass "Agent deregistration works"
+else
+    fail "Agent deregistration failed" "$(echo "$dereg_output" | head -3)"
+fi
+
+section "Mail: Prune"
+
+prune_mail_output=$(mail_cmd prune 2>&1 || true)
+if echo "$prune_mail_output" | grep -qiE "prune|storage|archived|messages|0"; then
+    pass "mail prune works"
+else
+    skip "mail prune output unexpected" "$prune_mail_output"
+fi
+
+section "Mail: Help"
+
+# mail-helper.sh doesn't have a cmd_help but main() should show usage on unknown command
+help_mail=$(mail_cmd help 2>&1 || true)
+if echo "$help_mail" | grep -qiE "usage|send|check|read|mail|commands"; then
+    pass "mail help shows usage information"
+else
+    fail "mail help output unexpected" "$(echo "$help_mail" | head -3)"
+fi
+
+# ============================================================
+# SUMMARY
+# ============================================================
+echo ""
+echo "========================================"
+printf "  \033[1mResults: %d total, \033[0;32m%d passed\033[0m, \033[0;31m%d failed\033[0m, \033[0;33m%d skipped\033[0m\n" \
+    "$TOTAL_COUNT" "$PASS_COUNT" "$FAIL_COUNT" "$SKIP_COUNT"
+echo "========================================"
+
+if [[ "$FAIL_COUNT" -gt 0 ]]; then
+    echo ""
+    printf "\033[0;31mFAILURES DETECTED - review output above\033[0m\n"
+    exit 1
+else
+    echo ""
+    printf "\033[0;32mAll tests passed.\033[0m\n"
+    exit 0
+fi
diff --git a/tests/test-smoke-help.sh b/tests/test-smoke-help.sh
new file mode 100644
index 00000000..fedf9c90
--- /dev/null
+++ b/tests/test-smoke-help.sh
@@ -0,0 +1,257 @@
+#!/usr/bin/env bash
+# test-smoke-help.sh
+#
+# Smoke tests: bash -n syntax check for ALL scripts, plus help command
+# validation for scripts that define a help function.
+#
+# Usage: bash tests/test-smoke-help.sh [--verbose]
+#
+# Exit codes: 0 = all pass, 1 = failures found
+
+set -euo pipefail
+
+REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+SCRIPTS_DIR="$REPO_DIR/.agents/scripts"
+VERBOSE="${1:-}"
+
+# --- Test Framework ---
+PASS_COUNT=0
+FAIL_COUNT=0
+SKIP_COUNT=0
+TOTAL_COUNT=0
+
+pass() {
+    PASS_COUNT=$((PASS_COUNT + 1))
+    TOTAL_COUNT=$((TOTAL_COUNT + 1))
+    if [[ "$VERBOSE" == "--verbose" ]]; then
+        printf "  \033[0;32mPASS\033[0m %s\n" "$1"
+    fi
+    return 0
+}
+
+fail() {
+    FAIL_COUNT=$((FAIL_COUNT + 1))
+    TOTAL_COUNT=$((TOTAL_COUNT + 1))
+    printf "  \033[0;31mFAIL\033[0m %s\n" "$1"
+    if [[ -n "${2:-}" ]]; then
+        printf "       %s\n" "$2"
+    fi
+    return 0
+}
+
+skip() {
+    SKIP_COUNT=$((SKIP_COUNT + 1))
+    TOTAL_COUNT=$((TOTAL_COUNT + 1))
+    if [[ "$VERBOSE" == "--verbose" ]]; then
+        printf "  \033[0;33mSKIP\033[0m %s\n" "$1"
+    fi
+    return 0
+}
+
+section() {
+    echo ""
+    printf "\033[1m=== %s ===\033[0m\n" "$1"
+}
+
+# ============================================================
+# SECTION 1: bash -n syntax check for ALL scripts
+# ============================================================
+section "Syntax Check (bash -n) - All Scripts"
+
+syntax_pass=0
+syntax_fail=0
+
+while IFS= read -r script; do
+    abs_path="$REPO_DIR/$script"
+    name=$(basename "$script")
+
+    if bash -n "$abs_path" 2>/dev/null; then
+        pass "syntax: $name"
+        syntax_pass=$((syntax_pass + 1))
+    else
+        fail "syntax: $name" "bash -n failed"
+        syntax_fail=$((syntax_fail + 1))
+    fi
+done < <(git -C "$REPO_DIR" ls-files '.agents/scripts/*.sh' | grep -v '_archive/')
+
+printf "  Syntax: %d passed, %d failed (of %d non-archived scripts)\n" \
+    "$syntax_pass" "$syntax_fail" "$((syntax_pass + syntax_fail))"
+
+# ============================================================
+# SECTION 2: Help command smoke tests
+# ============================================================
+section "Help Command Smoke Tests"
+
+# Scripts known to NOT support a help subcommand (libraries, hooks, utilities)
+# These are sourced or run without arguments, not invoked with "help"
+SKIP_HELP=(
+    "shared-constants.sh"
+    "loop-common.sh"
+    "pre-commit-hook.sh"
+    "cron-dispatch.sh"
+    "aidevops-update-check.sh"
+    "auto-version-bump.sh"
+    "validate-version-consistency.sh"
+    "extract-opencode-prompts.sh"
+    "generate-opencode-commands.sh"
+    "generate-skills.sh"
+    "opencode-prompt-drift-check.sh"
+    "find-missing-returns.sh"
+    "mass-fix-returns.sh"
+    "efficient-return-fix.sh"
+    "comprehensive-quality-fix.sh"
+    "quality-fix.sh"
+    "sonarcloud-autofix.sh"
+    "monitor-code-review.sh"
+    "code-audit-helper.sh"
+    "session-time-helper.sh"
+    "planning-commit-helper.sh"
+    "log-issue-helper.sh"
+    "humanise-update-helper.sh"
+    "dns-helper.sh"
+    "closte-helper.sh"
+    "cloudron-helper.sh"
+    "hetzner-helper.sh"
+    "hostinger-helper.sh"
+    "coolify-helper.sh"
+    "ses-helper.sh"
+    "servers-helper.sh"
+    "pagespeed-helper.sh"
+    "tool-version-check.sh"
+    "todo-ready.sh"
+    "mcp-diagnose.sh"
+    "localhost-helper.sh"
+    "linters-local.sh"
+    "markdown-lint-fix.sh"
+    "setup-mcp-integrations.sh"
+    "generate-opencode-agents.sh"
+    "setup-local-api-keys.sh"
+    "stagehand-setup.sh"
+    "stagehand-python-setup.sh"
+    "test-stagehand-integration.sh"
+    "test-stagehand-python-integration.sh"
+    "test-stagehand-both-integration.sh"
+    "crawl4ai-examples.sh"
+    "ampcode-cli.sh"
+    "agno-setup.sh"
+    "sonarscanner-cli.sh"
+    "codacy-cli.sh"
+    "codacy-cli-chunked.sh"
+    "coderabbit-pro-analysis.sh"
+    "snyk-helper.sh"
+    "verify-mirrors.sh"
+    "webhosting-verify.sh"
+)
+
+is_skip_help() {
+    local name="$1"
+    for s in "${SKIP_HELP[@]}"; do
+        [[ "$name" == "$s" ]] && return 0
+    done
+    return 1
+}
+
+help_pass=0
+help_fail=0
+help_skip=0
+
+while IFS= read -r script; do
+    abs_path="$REPO_DIR/$script"
+    name=$(basename "$script")
+
+    # Skip archived scripts
+    [[ "$script" == *"_archive/"* ]] && continue
+
+    # Skip scripts that don't support help
+    if is_skip_help "$name"; then
+        skip "help: $name (not a help-command script)"
+        help_skip=$((help_skip + 1))
+        continue
+    fi
+
+    # Check if script defines a help function
+    if ! grep -qE 'cmd_help\(\)|show_help\(\)|show_usage\(\)|usage\(\)' "$abs_path" 2>/dev/null; then
+        skip "help: $name (no help function defined)"
+        help_skip=$((help_skip + 1))
+        continue
+    fi
+
+    # Run help command with timeout (5s max) and capture output
+    help_output=$(timeout 5 bash "$abs_path" help 2>&1) || true
+    help_exit=$?
+
+    # Some scripts exit 0 on help, some exit 1 (usage error) - both are acceptable
+    # as long as they produce output and don't hang/crash
+    if [[ -n "$help_output" ]]; then
+        pass "help: $name"
+        help_pass=$((help_pass + 1))
+    elif [[ $help_exit -eq 124 ]]; then
+        fail "help: $name" "Timed out after 5 seconds"
+        help_fail=$((help_fail + 1))
+    else
+        fail "help: $name" "No output produced (exit=$help_exit)"
+        help_fail=$((help_fail + 1))
+    fi
+done < <(git -C "$REPO_DIR" ls-files '.agents/scripts/*.sh')
+
+printf "  Help: %d passed, %d failed, %d skipped\n" \
+    "$help_pass" "$help_fail" "$help_skip"
+
+# ============================================================
+# SECTION 3: ShellCheck on critical scripts (errors only)
+# ============================================================
+section "ShellCheck (errors only) - Critical Scripts"
+
+CRITICAL_SCRIPTS=(
+    "supervisor-helper.sh"
+    "memory-helper.sh"
+    "mail-helper.sh"
+    "runner-helper.sh"
+    "full-loop-helper.sh"
+    "ralph-loop-helper.sh"
+    "quality-loop-helper.sh"
+    "pre-edit-check.sh"
+    "worktree-helper.sh"
+    "credential-helper.sh"
+    "secret-helper.sh"
+)
+
+if command -v shellcheck &>/dev/null; then
+    for name in "${CRITICAL_SCRIPTS[@]}"; do
+        script_path="$SCRIPTS_DIR/$name"
+        if [[ ! -f "$script_path" ]]; then
+            skip "shellcheck: $name (not found)"
+            continue
+        fi
+
+        sc_output=$(shellcheck -S error "$script_path" 2>&1 || true)
+        sc_errors=$(echo "$sc_output" | grep -c "error" || true)
+        if [[ "$sc_errors" -eq 0 ]]; then
+            pass "shellcheck: $name (0 errors)"
+        else
+            fail "shellcheck: $name ($sc_errors errors)" \
+                "$(echo "$sc_output" | head -5)"
+        fi
+    done
+else
+    skip "shellcheck not installed"
+fi
+
+# ============================================================
+# SUMMARY
+# ============================================================
+echo ""
+echo "========================================"
+printf "  \033[1mResults: %d total, \033[0;32m%d passed\033[0m, \033[0;31m%d failed\033[0m, \033[0;33m%d skipped\033[0m\n" \
+    "$TOTAL_COUNT" "$PASS_COUNT" "$FAIL_COUNT" "$SKIP_COUNT"
+echo "========================================"
+
+if [[ "$FAIL_COUNT" -gt 0 ]]; then
+    echo ""
+    printf "\033[0;31mFAILURES DETECTED - review output above\033[0m\n"
+    exit 1
+else
+    echo ""
+    printf "\033[0;32mAll tests passed.\033[0m\n"
+    exit 0
+fi
diff --git a/tests/test-supervisor-state-machine.sh b/tests/test-supervisor-state-machine.sh
new file mode 100644
index 00000000..2837a78b
--- /dev/null
+++ b/tests/test-supervisor-state-machine.sh
@@ -0,0 +1,582 @@
+#!/usr/bin/env bash
+# test-supervisor-state-machine.sh
+#
+# Unit tests for supervisor-helper.sh state machine:
+# - Valid/invalid state transitions
+# - Task lifecycle (add -> dispatch -> run -> evaluate -> complete)
+# - Retry logic
+# - Batch completion detection
+# - Post-PR lifecycle (complete -> pr_review -> merging -> merged -> deployed)
+#
+# Uses an isolated temp DB to avoid touching production data.
+#
+# Usage: bash tests/test-supervisor-state-machine.sh [--verbose]
+#
+# Exit codes: 0 = all pass, 1 = failures found
+
+set -euo pipefail
+
+REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+SCRIPTS_DIR="$REPO_DIR/.agents/scripts"
+SUPERVISOR_SCRIPT="$SCRIPTS_DIR/supervisor-helper.sh"
+VERBOSE="${1:-}"
+
+# --- Test Framework ---
+PASS_COUNT=0
+FAIL_COUNT=0
+SKIP_COUNT=0
+TOTAL_COUNT=0
+
+pass() {
+    PASS_COUNT=$((PASS_COUNT + 1))
+    TOTAL_COUNT=$((TOTAL_COUNT + 1))
+    printf "  \033[0;32mPASS\033[0m %s\n" "$1"
+}
+
+fail() {
+    FAIL_COUNT=$((FAIL_COUNT + 1))
+    TOTAL_COUNT=$((TOTAL_COUNT + 1))
+    printf "  \033[0;31mFAIL\033[0m %s\n" "$1"
+    if [[ -n "${2:-}" ]]; then
+        printf "       %s\n" "$2"
+    fi
+}
+
+skip() {
+    SKIP_COUNT=$((SKIP_COUNT + 1))
+    TOTAL_COUNT=$((TOTAL_COUNT + 1))
+    printf "  \033[0;33mSKIP\033[0m %s\n" "$1"
+}
+
+section() {
+    echo ""
+    printf "\033[1m=== %s ===\033[0m\n" "$1"
+}
+
+# --- Test DB Setup ---
+TEST_DIR=$(mktemp -d)
+export AIDEVOPS_SUPERVISOR_DIR="$TEST_DIR"
+trap 'rm -rf "$TEST_DIR"' EXIT
+
+# Helper: run supervisor command with isolated DB
+sup() {
+    bash "$SUPERVISOR_SCRIPT" "$@" 2>&1
+}
+
+# Helper: query the test DB directly
+test_db() {
+    sqlite3 -cmd ".timeout 5000" "$TEST_DIR/supervisor.db" "$@"
+}
+
+# Helper: get task status
+get_status() {
+    test_db "SELECT status FROM tasks WHERE id = '$1';"
+}
+
+# Helper: get task field
+get_field() {
+    test_db "SELECT $2 FROM tasks WHERE id = '$1';"
+}
+
+# ============================================================
+# SECTION 1: Database Initialization
+# ============================================================
+section "Database Initialization"
+
+# Test: init creates database
+sup init >/dev/null
+if [[ -f "$TEST_DIR/supervisor.db" ]]; then
+    pass "init creates supervisor.db"
+else
+    fail "init did not create supervisor.db"
+fi
+
+# Test: tables exist
+tables=$(test_db "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;" | tr '\n' ',')
+if [[ "$tables" == *"tasks"* && "$tables" == *"batches"* && "$tables" == *"state_log"* ]]; then
+    pass "Required tables exist (tasks, batches, state_log, batch_tasks)"
+else
+    fail "Missing required tables" "Found: $tables"
+fi
+
+# Test: WAL mode is set
+journal_mode=$(test_db "PRAGMA journal_mode;")
+if [[ "$journal_mode" == "wal" ]]; then
+    pass "WAL journal mode is set"
+else
+    fail "Journal mode is '$journal_mode', expected 'wal'"
+fi
+
+# ============================================================
+# SECTION 2: Task Addition
+# ============================================================
+section "Task Addition"
+
+# Test: add a task
+sup add test-t001 --repo /tmp/test --description "Test task 1" >/dev/null
+status=$(get_status "test-t001")
+if [[ "$status" == "queued" ]]; then
+    pass "Added task starts in 'queued' state"
+else
+    fail "Added task has status '$status', expected 'queued'"
+fi
+
+# Test: duplicate task rejected
+dup_output=$(sup add test-t001 --repo /tmp/test 2>&1 || true)
+if echo "$dup_output" | grep -qi "already exists"; then
+    pass "Duplicate task ID is rejected"
+else
+    fail "Duplicate task was not rejected" "$dup_output"
+fi
+
+# Test: state_log records initial state
+log_entry=$(test_db "SELECT to_state FROM state_log WHERE task_id = 'test-t001' ORDER BY id LIMIT 1;")
+if [[ "$log_entry" == "queued" ]]; then
+    pass "State log records initial 'queued' entry"
+else
+    fail "State log initial entry is '$log_entry', expected 'queued'"
+fi
+
+# ============================================================
+# SECTION 3: Valid State Transitions (Happy Path)
+# ============================================================
+section "Valid State Transitions (Happy Path)"
+
+# queued -> dispatched
+sup transition test-t001 dispatched >/dev/null
+if [[ "$(get_status test-t001)" == "dispatched" ]]; then
+    pass "queued -> dispatched"
+else
+    fail "queued -> dispatched failed"
+fi
+
+# Test: started_at is set on first dispatch
+started=$(get_field "test-t001" "started_at")
+if [[ -n "$started" ]]; then
+    pass "started_at set on first dispatch"
+else
+    fail "started_at not set on dispatch"
+fi
+
+# dispatched -> running
+sup transition test-t001 running >/dev/null
+if [[ "$(get_status test-t001)" == "running" ]]; then
+    pass "dispatched -> running"
+else
+    fail "dispatched -> running failed"
+fi
+
+# running -> evaluating
+sup transition test-t001 evaluating >/dev/null
+if [[ "$(get_status test-t001)" == "evaluating" ]]; then
+    pass "running -> evaluating"
+else
+    fail "running -> evaluating failed"
+fi
+
+# evaluating -> complete
+sup transition test-t001 complete >/dev/null
+if [[ "$(get_status test-t001)" == "complete" ]]; then
+    pass "evaluating -> complete"
+else
+    fail "evaluating -> complete failed"
+fi
+
+# Test: completed_at is set
+completed=$(get_field "test-t001" "completed_at")
+if [[ -n "$completed" ]]; then
+    pass "completed_at set on terminal state"
+else
+    fail "completed_at not set on complete"
+fi
+
+# ============================================================
+# SECTION 4: Post-PR Lifecycle Transitions
+# ============================================================
+section "Post-PR Lifecycle Transitions"
+
+# complete -> pr_review
+sup transition test-t001 pr_review >/dev/null
+if [[ "$(get_status test-t001)" == "pr_review" ]]; then
+    pass "complete -> pr_review"
+else
+    fail "complete -> pr_review failed"
+fi
+
+# pr_review -> merging
+sup transition test-t001 merging >/dev/null
+if [[ "$(get_status test-t001)" == "merging" ]]; then
+    pass "pr_review -> merging"
+else
+    fail "pr_review -> merging failed"
+fi
+
+# merging -> merged
+sup transition test-t001 merged >/dev/null
+if [[ "$(get_status test-t001)" == "merged" ]]; then
+    pass "merging -> merged"
+else
+    fail "merging -> merged failed"
+fi
+
+# merged -> deploying
+sup transition test-t001 deploying >/dev/null
+if [[ "$(get_status test-t001)" == "deploying" ]]; then
+    pass "merged -> deploying"
+else
+    fail "merged -> deploying failed"
+fi
+
+# deploying -> deployed
+sup transition test-t001 deployed >/dev/null
+if [[ "$(get_status test-t001)" == "deployed" ]]; then
+    pass "deploying -> deployed"
+else
+    fail "deploying -> deployed failed"
+fi
+
+# ============================================================
+# SECTION 5: Invalid State Transitions
+# ============================================================
+section "Invalid State Transitions"
+
+# Add a fresh task for invalid transition tests
+sup add test-t002 --repo /tmp/test --description "Invalid transition test" >/dev/null
+
+# queued -> running (must go through dispatched first)
+invalid_output=$(sup transition test-t002 running 2>&1 || true)
+if echo "$invalid_output" | grep -qi "invalid transition"; then
+    pass "queued -> running rejected (must go through dispatched)"
+else
+    fail "queued -> running was not rejected" "$invalid_output"
+fi
+
+# Verify state didn't change
+if [[ "$(get_status test-t002)" == "queued" ]]; then
+    pass "State unchanged after invalid transition"
+else
+    fail "State changed despite invalid transition: $(get_status test-t002)"
+fi
+
+# queued -> complete (skipping intermediate states)
+invalid_output2=$(sup transition test-t002 complete 2>&1 || true)
+if echo "$invalid_output2" | grep -qi "invalid transition"; then
+    pass "queued -> complete rejected (skipping intermediate states)"
+else
+    fail "queued -> complete was not rejected"
+fi
+
+# queued -> deployed (skipping all states)
+invalid_output3=$(sup transition test-t002 deployed 2>&1 || true)
+if echo "$invalid_output3" | grep -qi "invalid transition"; then
+    pass "queued -> deployed rejected"
+else
+    fail "queued -> deployed was not rejected"
+fi
+
+# Invalid state name
+invalid_output4=$(sup transition test-t002 nonexistent_state 2>&1 || true)
+if echo "$invalid_output4" | grep -qi "invalid state"; then
+    pass "Nonexistent state name rejected"
+else
+    fail "Nonexistent state name was not rejected"
+fi
+
+# ============================================================
+# SECTION 6: Retry Logic
+# ============================================================
+section "Retry Logic"
+
+# Add task and move to evaluating
+sup add test-t003 --repo /tmp/test --description "Retry test" >/dev/null
+sup transition test-t003 dispatched >/dev/null
+sup transition test-t003 running >/dev/null
+sup transition test-t003 evaluating >/dev/null
+
+# evaluating -> retrying
+sup transition test-t003 retrying >/dev/null
+if [[ "$(get_status test-t003)" == "retrying" ]]; then
+    pass "evaluating -> retrying"
+else
+    fail "evaluating -> retrying failed"
+fi
+
+# Test: retries counter incremented
+retries=$(get_field "test-t003" "retries")
+if [[ "$retries" -eq 1 ]]; then
+    pass "Retry counter incremented to 1"
+else
+    fail "Retry counter is $retries, expected 1"
+fi
+
+# retrying -> dispatched (re-dispatch)
+sup transition test-t003 dispatched >/dev/null
+if [[ "$(get_status test-t003)" == "dispatched" ]]; then
+    pass "retrying -> dispatched (re-dispatch)"
+else
+    fail "retrying -> dispatched failed"
+fi
+
+# Second retry cycle
+sup transition test-t003 running >/dev/null
+sup transition test-t003 evaluating >/dev/null
+sup transition test-t003 retrying >/dev/null
+retries2=$(get_field "test-t003" "retries")
+if [[ "$retries2" -eq 2 ]]; then
+    pass "Retry counter incremented to 2 on second retry"
+else
+    fail "Retry counter is $retries2, expected 2"
+fi
+
+# ============================================================
+# SECTION 7: Error Handling
+# ============================================================
+section "Error Handling"
+
+# Add task and move to running, then fail
+sup add test-t004 --repo /tmp/test --description "Error test" >/dev/null
+sup transition test-t004 dispatched >/dev/null
+sup transition test-t004 running >/dev/null
+
+# running -> failed with error message
+sup transition test-t004 failed --error "Timeout after 30 minutes" >/dev/null
+if [[ "$(get_status test-t004)" == "failed" ]]; then
+    pass "running -> failed with error"
+else
+    fail "running -> failed transition failed"
+fi
+
+# Test: error message stored
+error_msg=$(get_field "test-t004" "error")
+if [[ "$error_msg" == "Timeout after 30 minutes" ]]; then
+    pass "Error message stored correctly"
+else
+    fail "Error message is '$error_msg', expected 'Timeout after 30 minutes'"
+fi
+
+# Test: completed_at set on failure
+completed_fail=$(get_field "test-t004" "completed_at")
+if [[ -n "$completed_fail" ]]; then
+    pass "completed_at set on failed state"
+else
+    fail "completed_at not set on failed state"
+fi
+
+# Test: failed -> queued (re-queue after failure)
+sup transition test-t004 queued >/dev/null
+if [[ "$(get_status test-t004)" == "queued" ]]; then
+    pass "failed -> queued (re-queue)"
+else
+    fail "failed -> queued failed"
+fi
+
+# ============================================================
+# SECTION 8: Cancellation
+# ============================================================
+section "Cancellation"
+
+# queued -> cancelled
+sup add test-t005 --repo /tmp/test --description "Cancel test" >/dev/null
+sup transition test-t005 cancelled >/dev/null
+if [[ "$(get_status test-t005)" == "cancelled" ]]; then
+    pass "queued -> cancelled"
+else
+    fail "queued -> cancelled failed"
+fi
+
+# dispatched -> cancelled
+sup add test-t006 --repo /tmp/test --description "Cancel dispatched" >/dev/null
+sup transition test-t006 dispatched >/dev/null
+sup transition test-t006 cancelled >/dev/null
+if [[ "$(get_status test-t006)" == "cancelled" ]]; then
+    pass "dispatched -> cancelled"
+else
+    fail "dispatched -> cancelled failed"
+fi
+
+# running -> cancelled
+sup add test-t007 --repo /tmp/test --description "Cancel running" >/dev/null
+sup transition test-t007 dispatched >/dev/null
+sup transition test-t007 running >/dev/null
+sup transition test-t007 cancelled >/dev/null
+if [[ "$(get_status test-t007)" == "cancelled" ]]; then
+    pass "running -> cancelled"
+else
+    fail "running -> cancelled failed"
+fi
+
+# ============================================================
+# SECTION 9: Blocked State
+# ============================================================
+section "Blocked State"
+
+# evaluating -> blocked
+sup add test-t008 --repo /tmp/test --description "Blocked test" >/dev/null
+sup transition test-t008 dispatched >/dev/null
+sup transition test-t008 running >/dev/null
+sup transition test-t008 evaluating >/dev/null
+sup transition test-t008 blocked >/dev/null
+if [[ "$(get_status test-t008)" == "blocked" ]]; then
+    pass "evaluating -> blocked"
+else
+    fail "evaluating -> blocked failed"
+fi
+
+# blocked -> queued (unblock)
+sup transition test-t008 queued >/dev/null
+if [[ "$(get_status test-t008)" == "queued" ]]; then
+    pass "blocked -> queued (unblock)"
+else
+    fail "blocked -> queued failed"
+fi
+
+# blocked -> cancelled
+sup add test-t009 --repo /tmp/test --description "Blocked cancel" >/dev/null
+sup transition test-t009 dispatched >/dev/null
+sup transition test-t009 running >/dev/null
+sup transition test-t009 evaluating >/dev/null
+sup transition test-t009 blocked >/dev/null
+sup transition test-t009 cancelled >/dev/null
+if [[ "$(get_status test-t009)" == "cancelled" ]]; then
+    pass "blocked -> cancelled"
+else
+    fail "blocked -> cancelled failed"
+fi
+
+# ============================================================
+# SECTION 10: State Log Audit Trail
+# ============================================================
+section "State Log Audit Trail"
+
+# Count state log entries for test-t001 (went through full lifecycle)
+log_count=$(test_db "SELECT count(*) FROM state_log WHERE task_id = 'test-t001';")
+if [[ "$log_count" -ge 8 ]]; then
+    pass "State log has $log_count entries for full lifecycle task"
+else
+    fail "State log has only $log_count entries, expected >= 8"
+fi
+
+# Verify log entries are in order
+first_transition=$(test_db "SELECT from_state || '->' || to_state FROM state_log WHERE task_id = 'test-t001' ORDER BY id LIMIT 1;")
+if [[ "$first_transition" == "->queued" ]]; then
+    pass "First state log entry is initial queued"
+else
+    fail "First state log entry is '$first_transition', expected '->queued'"
+fi
+
+# ============================================================
+# SECTION 11: Metadata Fields
+# ============================================================
+section "Metadata Fields"
+
+# Test: transition with --session, --branch, --worktree, --pr-url
+sup add test-t010 --repo /tmp/test --description "Metadata test" >/dev/null
+sup transition test-t010 dispatched --session "ses_abc123" --branch "feature/test" --worktree "/tmp/wt" >/dev/null
+
+session_id=$(get_field "test-t010" "session_id")
+branch=$(get_field "test-t010" "branch")
+worktree=$(get_field "test-t010" "worktree")
+
+if [[ "$session_id" == "ses_abc123" ]]; then
+    pass "session_id stored on transition"
+else
+    fail "session_id is '$session_id', expected 'ses_abc123'"
+fi
+
+if [[ "$branch" == "feature/test" ]]; then
+    pass "branch stored on transition"
+else
+    fail "branch is '$branch', expected 'feature/test'"
+fi
+
+if [[ "$worktree" == "/tmp/wt" ]]; then
+    pass "worktree stored on transition"
+else
+    fail "worktree is '$worktree', expected '/tmp/wt'"
+fi
+
+# ============================================================
+# SECTION 12: Batch Completion Detection
+# ============================================================
+section "Batch Completion Detection"
+
+# Create a batch with two tasks
+sup add test-b001 --repo /tmp/test --description "Batch task 1" >/dev/null
+sup add test-b002 --repo /tmp/test --description "Batch task 2" >/dev/null
+sup batch test-batch --tasks "test-b001,test-b002" >/dev/null 2>&1 || true
+
+# Check if batch was created
+batch_status=$(test_db "SELECT status FROM batches WHERE name = 'test-batch';" 2>/dev/null || echo "")
+if [[ "$batch_status" == "active" ]]; then
+    pass "Batch created in 'active' state"
+
+    # Complete first task
+    sup transition test-b001 dispatched >/dev/null
+    sup transition test-b001 running >/dev/null
+    sup transition test-b001 evaluating >/dev/null
+    sup transition test-b001 complete >/dev/null
+
+    # Batch should still be active (one task remaining)
+    batch_after_one=$(test_db "SELECT status FROM batches WHERE name = 'test-batch';")
+    if [[ "$batch_after_one" == "active" ]]; then
+        pass "Batch stays active with incomplete tasks"
+    else
+        fail "Batch status is '$batch_after_one' after one task complete, expected 'active'"
+    fi
+
+    # Complete second task
+    sup transition test-b002 dispatched >/dev/null
+    sup transition test-b002 running >/dev/null
+    sup transition test-b002 evaluating >/dev/null
+    sup transition test-b002 complete >/dev/null
+
+    # Batch should now be complete
+    batch_after_all=$(test_db "SELECT status FROM batches WHERE name = 'test-batch';")
+    if [[ "$batch_after_all" == "complete" ]]; then
+        pass "Batch auto-completes when all tasks finish"
+    else
+        fail "Batch status is '$batch_after_all' after all tasks complete, expected 'complete'"
+    fi
+else
+    skip "Batch creation may require different syntax (status: '$batch_status')"
+fi
+
+# ============================================================
+# SECTION 13: Nonexistent Task
+# ============================================================
+section "Edge Cases"
+
+# Transition on nonexistent task
+nonexist_output=$(sup transition nonexistent-task dispatched 2>&1 || true)
+if echo "$nonexist_output" | grep -qi "not found"; then
+    pass "Transition on nonexistent task returns error"
+else
+    fail "Transition on nonexistent task did not return error" "$nonexist_output"
+fi
+
+# Missing arguments
+missing_output=$(sup transition 2>&1 || true)
+if echo "$missing_output" | grep -qiE "usage|requires"; then
+    pass "Missing arguments shows usage"
+else
+    fail "Missing arguments did not show usage"
+fi
+
+# ============================================================
+# SUMMARY
+# ============================================================
+echo ""
+echo "========================================"
+printf "  \033[1mResults: %d total, \033[0;32m%d passed\033[0m, \033[0;31m%d failed\033[0m, \033[0;33m%d skipped\033[0m\n" \
+    "$TOTAL_COUNT" "$PASS_COUNT" "$FAIL_COUNT" "$SKIP_COUNT"
+echo "========================================"
+
+if [[ "$FAIL_COUNT" -gt 0 ]]; then
+    echo ""
+    printf "\033[0;31mFAILURES DETECTED - review output above\033[0m\n"
+    exit 1
+else
+    echo ""
+    printf "\033[0;32mAll tests passed.\033[0m\n"
+    exit 0
+fi