diff --git a/tests/docker/run-tests.sh b/tests/docker/run-tests.sh index f7c19022..447f3ee7 100755 --- a/tests/docker/run-tests.sh +++ b/tests/docker/run-tests.sh @@ -2,7 +2,8 @@ # Setup script test runner set -uo pipefail -SCRIPTS_DIR="${HOME}/git/aidevops/.agents/scripts" +REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +SCRIPTS_DIR="$REPO_DIR/.agents/scripts" PASS=0 FAIL=0 SKIP=0 diff --git a/tests/test-memory-mail.sh b/tests/test-memory-mail.sh new file mode 100644 index 00000000..bdd643e3 --- /dev/null +++ b/tests/test-memory-mail.sh @@ -0,0 +1,425 @@ +#!/usr/bin/env bash +# test-memory-mail.sh +# +# Unit tests for memory-helper.sh and mail-helper.sh: +# - Memory: store, recall (FTS5), stats, prune, namespaces, relational versioning +# - Mail: send, check, read, archive, prune, register/deregister agents +# +# Uses isolated temp directories to avoid touching production data. +# +# Usage: bash tests/test-memory-mail.sh [--verbose] +# +# Exit codes: 0 = all pass, 1 = failures found + +set -euo pipefail + +REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +SCRIPTS_DIR="$REPO_DIR/.agents/scripts" +MEMORY_SCRIPT="$SCRIPTS_DIR/memory-helper.sh" +MAIL_SCRIPT="$SCRIPTS_DIR/mail-helper.sh" +VERBOSE="${1:-}" + +# --- Test Framework --- +PASS_COUNT=0 +FAIL_COUNT=0 +SKIP_COUNT=0 +TOTAL_COUNT=0 + +pass() { + PASS_COUNT=$((PASS_COUNT + 1)) + TOTAL_COUNT=$((TOTAL_COUNT + 1)) + printf " \033[0;32mPASS\033[0m %s\n" "$1" +} + +fail() { + FAIL_COUNT=$((FAIL_COUNT + 1)) + TOTAL_COUNT=$((TOTAL_COUNT + 1)) + printf " \033[0;31mFAIL\033[0m %s\n" "$1" + if [[ -n "${2:-}" ]]; then + printf " %s\n" "$2" + fi +} + +skip() { + SKIP_COUNT=$((SKIP_COUNT + 1)) + TOTAL_COUNT=$((TOTAL_COUNT + 1)) + printf " \033[0;33mSKIP\033[0m %s\n" "$1" +} + +section() { + echo "" + printf "\033[1m=== %s ===\033[0m\n" "$1" +} + +# --- Isolated Test Environment --- +TEST_DIR=$(mktemp -d) +export AIDEVOPS_MEMORY_DIR="$TEST_DIR/memory" +export AIDEVOPS_MAIL_DIR="$TEST_DIR/mail" +trap 'rm -rf "$TEST_DIR"' EXIT + +# Helper: run memory command +mem() { + bash "$MEMORY_SCRIPT" "$@" 2>&1 +} + +# Helper: run mail command +mail_cmd() { + bash "$MAIL_SCRIPT" "$@" 2>&1 +} + +# Helper: query memory DB +mem_db() { + sqlite3 -cmd ".timeout 5000" "$AIDEVOPS_MEMORY_DIR/memory.db" "$@" +} + +# Helper: query mail DB +mail_db() { + sqlite3 -cmd ".timeout 5000" "$AIDEVOPS_MAIL_DIR/mailbox.db" "$@" +} + +# ============================================================ +# MEMORY TESTS +# ============================================================ + +section "Memory: Database Initialization" + +# Test: first store creates database +mem store --content "Test memory entry" --type "WORKING_SOLUTION" --tags "test,init" >/dev/null +if [[ -f "$AIDEVOPS_MEMORY_DIR/memory.db" ]]; then + pass "memory store creates database" +else + fail "memory store did not create database" +fi + +# Test: FTS5 table exists +fts_check=$(mem_db "SELECT count(*) FROM sqlite_master WHERE type='table' AND name='learnings';") +if [[ "$fts_check" -ge 1 ]]; then + pass "FTS5 learnings table exists" +else + fail "FTS5 learnings table missing" +fi + +# Test: WAL mode +journal=$(mem_db "PRAGMA journal_mode;") +if [[ "$journal" == "wal" ]]; then + pass "Memory DB uses WAL mode" +else + fail "Memory DB journal mode is '$journal', expected 'wal'" +fi + +section "Memory: Store and Recall" + +# Test: store returns success +store_output=$(mem store --content "Bash arrays need declare -a for indexed arrays" --type "CODEBASE_PATTERN" --tags "bash,arrays") +if echo "$store_output" | grep -qi "stored\|ok\|success"; then + pass "memory store reports success" +else + fail "memory store output unexpected" "$store_output" +fi + +# Test: recall finds stored content +recall_output=$(mem recall --query "bash arrays") +if echo "$recall_output" | grep -qi "arrays\|bash"; then + pass "memory recall finds stored content by keyword" +else + fail "memory recall did not find stored content" "$recall_output" +fi + +# Test: recall with type filter +mem store --content "User prefers dark mode in terminal" --type "USER_PREFERENCE" --tags "ui,terminal" >/dev/null +recall_typed=$(mem recall --query "dark mode" --type "USER_PREFERENCE") +if echo "$recall_typed" | grep -qi "dark mode"; then + pass "memory recall with --type filter works" +else + fail "memory recall with --type filter failed" "$recall_typed" +fi + +# Test: FTS5 hyphenated query (t139 regression) +mem store --content "Fixed pre-commit hook for shellcheck" --type "WORKING_SOLUTION" --tags "pre-commit,shellcheck" >/dev/null +recall_hyphen=$(mem recall --query "pre-commit hook" 2>&1) +if echo "$recall_hyphen" | grep -qiE "error.*column|fts5.*syntax"; then + fail "FTS5 hyphenated query causes error (t139 regression)" "$recall_hyphen" +else + pass "FTS5 hyphenated query works without error (t139)" +fi + +# Test: recall with limit +mem store --content "Memory test entry A" --type "CONTEXT" --tags "test" >/dev/null +mem store --content "Memory test entry B" --type "CONTEXT" --tags "test" >/dev/null +mem store --content "Memory test entry C" --type "CONTEXT" --tags "test" >/dev/null +recall_limited=$(mem recall --query "memory test entry" --limit 2) +# Count result entries (each has a type marker like [CONTEXT]) +result_count=$(echo "$recall_limited" | grep -c '\[CONTEXT\]' || true) +if [[ "$result_count" -le 2 ]]; then + pass "memory recall --limit restricts results" +else + fail "memory recall --limit did not restrict (got $result_count, expected <= 2)" +fi + +section "Memory: Stats" + +stats_output=$(mem stats) +if echo "$stats_output" | grep -qiE "total|memories|entries|count"; then + pass "memory stats produces output" +else + fail "memory stats output unexpected" "$stats_output" +fi + +section "Memory: Relational Versioning" + +# Store a memory, then update it +original_output=$(mem store --content "Favorite color is blue" --type "USER_PREFERENCE" --tags "preference") +original_id=$(echo "$original_output" | grep -oE 'mem_[a-z0-9_]+' | head -1 || true) + +if [[ -n "$original_id" ]]; then + # Store an update that supersedes the original + update_output=$(mem store --content "Favorite color is now green" --type "USER_PREFERENCE" --tags "preference" --supersedes "$original_id" --relation updates 2>&1 || true) + if echo "$update_output" | grep -qi "stored\|ok\|success"; then + pass "Relational versioning: store with --supersedes works" + else + # May not support --supersedes flag yet, that's OK + skip "Relational versioning: --supersedes may not be implemented yet" + fi +else + skip "Could not extract memory ID for relational test" +fi + +section "Memory: Namespace Isolation" + +# Store in a namespace +ns_output=$(mem --namespace test-runner store --content "Runner-specific config" --type "TOOL_CONFIG" --tags "runner" 2>&1) +if echo "$ns_output" | grep -qi "stored\|ok\|success"; then + pass "Namespace store works" + + # Verify namespace directory created + if [[ -d "$AIDEVOPS_MEMORY_DIR/namespaces/test-runner" ]]; then + pass "Namespace directory created" + else + fail "Namespace directory not created" + fi + + # Recall from namespace + ns_recall=$(mem --namespace test-runner recall --query "runner config" 2>&1) + if echo "$ns_recall" | grep -qi "runner\|config"; then + pass "Namespace recall finds namespace-specific content" + else + fail "Namespace recall failed" "$ns_recall" + fi +else + skip "Namespace store failed" "$ns_output" +fi + +# Invalid namespace name +invalid_ns=$(mem --namespace "invalid namespace!" store --content "test" --type "CONTEXT" 2>&1 || true) +if echo "$invalid_ns" | grep -qi "invalid"; then + pass "Invalid namespace name rejected" +else + fail "Invalid namespace name was not rejected" +fi + +section "Memory: Prune" + +# Prune with dry-run (should not delete anything) +prune_output=$(mem prune --dry-run 2>&1 || true) +if echo "$prune_output" | grep -qiE "prune|would|dry|entries|0"; then + pass "memory prune --dry-run works" +else + skip "memory prune --dry-run output unexpected" "$prune_output" +fi + +section "Memory: Help" + +help_output=$(mem help 2>&1) +if echo "$help_output" | grep -qiE "usage|store|recall|memory|COMMANDS"; then + pass "memory help shows usage information" +else + fail "memory help output unexpected" "$(echo "$help_output" | head -3)" +fi + +# ============================================================ +# MAIL TESTS +# ============================================================ + +section "Mail: Database Initialization" + +# Test: first command creates database +mail_cmd status >/dev/null 2>&1 || true +if [[ -f "$AIDEVOPS_MAIL_DIR/mailbox.db" ]]; then + pass "mail command creates database" +else + fail "mail command did not create database" +fi + +# Test: tables exist +mail_tables=$(mail_db "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;" | tr '\n' ',') +if [[ "$mail_tables" == *"messages"* && "$mail_tables" == *"agents"* ]]; then + pass "Mail tables exist (messages, agents)" +else + fail "Missing mail tables" "Found: $mail_tables" +fi + +section "Mail: Agent Registration" + +# Register an agent +reg_output=$(mail_cmd register --agent "test-agent-1" --role "worker" 2>&1) +if echo "$reg_output" | grep -qiE "register|success|ok"; then + pass "Agent registration works" +else + fail "Agent registration failed" "$(echo "$reg_output" | head -3)" +fi + +# Register second agent +mail_cmd register --agent "test-agent-2" --role "orchestrator" >/dev/null 2>&1 + +# List agents +agents_output=$(mail_cmd agents 2>&1) +if echo "$agents_output" | grep -q "test-agent-1"; then + pass "Registered agent appears in agent list" +else + fail "Registered agent not in list" "$agents_output" +fi + +section "Mail: Send and Receive" + +# Send a message +send_output=$(mail_cmd send --from "test-agent-1" --to "test-agent-2" --type "task_dispatch" --payload "Please process task t001" 2>&1) +if echo "$send_output" | grep -qiE "sent|success|ok|msg-"; then + pass "mail send works" +else + fail "mail send failed" "$(echo "$send_output" | head -3)" +fi + +# Check inbox +check_output=$(mail_cmd check --agent "test-agent-2" 2>&1) +if echo "$check_output" | grep -qiE "1|unread|message"; then + pass "mail check shows unread messages" +else + fail "mail check did not show unread messages" "$check_output" +fi + +# Read message +# First get the message ID +msg_id=$(mail_db "SELECT id FROM messages WHERE to_agent = 'test-agent-2' LIMIT 1;" 2>/dev/null || echo "") +if [[ -n "$msg_id" ]]; then + read_output=$(mail_cmd read "$msg_id" 2>&1) + if echo "$read_output" | grep -qiE "task t001|process|payload"; then + pass "mail read shows message content" + else + fail "mail read did not show content" "$(echo "$read_output" | head -3)" + fi + + # Verify message marked as read + msg_status=$(mail_db "SELECT status FROM messages WHERE id = '$msg_id';") + if [[ "$msg_status" == "read" ]]; then + pass "Message marked as 'read' after reading" + else + fail "Message status is '$msg_status', expected 'read'" + fi +else + fail "Could not find message ID in database" +fi + +section "Mail: Archive" + +if [[ -n "$msg_id" ]]; then + archive_output=$(mail_cmd archive "$msg_id" 2>&1) + if echo "$archive_output" | grep -qiE "archived|success|ok"; then + pass "mail archive works" + else + fail "mail archive failed" "$(echo "$archive_output" | head -3)" + fi + + # Verify archived + archived_status=$(mail_db "SELECT status FROM messages WHERE id = '$msg_id';") + if [[ "$archived_status" == "archived" ]]; then + pass "Message status is 'archived' after archiving" + else + fail "Message status is '$archived_status', expected 'archived'" + fi +fi + +section "Mail: Message Types" + +# Test all valid message types +for msg_type in task_dispatch status_report discovery request broadcast; do + type_output=$(mail_cmd send --from "test-agent-1" --to "test-agent-2" --type "$msg_type" --payload "Test $msg_type" 2>&1) + if echo "$type_output" | grep -qiE "sent|success|ok|msg-"; then + pass "mail send type=$msg_type" + else + fail "mail send type=$msg_type failed" "$(echo "$type_output" | head -3)" + fi +done + +# Test invalid message type +invalid_type_output=$(mail_cmd send --from "test-agent-1" --to "test-agent-2" --type "invalid_type" --payload "Test" 2>&1 || true) +if echo "$invalid_type_output" | grep -qiE "invalid|error|constraint"; then + pass "Invalid message type rejected" +else + fail "Invalid message type was not rejected" "$invalid_type_output" +fi + +section "Mail: Priority" + +# Send with priority +priority_output=$(mail_cmd send --from "test-agent-1" --to "test-agent-2" --type "request" --priority "high" --payload "Urgent request" 2>&1) +if echo "$priority_output" | grep -qiE "sent|success|ok|msg-"; then + pass "mail send with --priority works" +else + fail "mail send with --priority failed" "$(echo "$priority_output" | head -3)" +fi + +section "Mail: Status" + +status_output=$(mail_cmd status 2>&1) +if echo "$status_output" | grep -qiE "message|agent|total|unread|mail"; then + pass "mail status produces summary" +else + fail "mail status output unexpected" "$status_output" +fi + +section "Mail: Deregister" + +dereg_output=$(mail_cmd deregister --agent "test-agent-1" 2>&1) +if echo "$dereg_output" | grep -qiE "deregister|removed|success|ok|inactive"; then + pass "Agent deregistration works" +else + fail "Agent deregistration failed" "$(echo "$dereg_output" | head -3)" +fi + +section "Mail: Prune" + +prune_mail_output=$(mail_cmd prune 2>&1 || true) +if echo "$prune_mail_output" | grep -qiE "prune|storage|archived|messages|0"; then + pass "mail prune works" +else + skip "mail prune output unexpected" "$prune_mail_output" +fi + +section "Mail: Help" + +# mail-helper.sh doesn't have a cmd_help but main() should show usage on unknown command +help_mail=$(mail_cmd help 2>&1 || true) +if echo "$help_mail" | grep -qiE "usage|send|check|read|mail|commands"; then + pass "mail help shows usage information" +else + fail "mail help output unexpected" "$(echo "$help_mail" | head -3)" +fi + +# ============================================================ +# SUMMARY +# ============================================================ +echo "" +echo "========================================" +printf " \033[1mResults: %d total, \033[0;32m%d passed\033[0m, \033[0;31m%d failed\033[0m, \033[0;33m%d skipped\033[0m\n" \ + "$TOTAL_COUNT" "$PASS_COUNT" "$FAIL_COUNT" "$SKIP_COUNT" +echo "========================================" + +if [[ "$FAIL_COUNT" -gt 0 ]]; then + echo "" + printf "\033[0;31mFAILURES DETECTED - review output above\033[0m\n" + exit 1 +else + echo "" + printf "\033[0;32mAll tests passed.\033[0m\n" + exit 0 +fi diff --git a/tests/test-smoke-help.sh b/tests/test-smoke-help.sh new file mode 100644 index 00000000..fedf9c90 --- /dev/null +++ b/tests/test-smoke-help.sh @@ -0,0 +1,257 @@ +#!/usr/bin/env bash +# test-smoke-help.sh +# +# Smoke tests: bash -n syntax check for ALL scripts, plus help command +# validation for scripts that define a help function. +# +# Usage: bash tests/test-smoke-help.sh [--verbose] +# +# Exit codes: 0 = all pass, 1 = failures found + +set -euo pipefail + +REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +SCRIPTS_DIR="$REPO_DIR/.agents/scripts" +VERBOSE="${1:-}" + +# --- Test Framework --- +PASS_COUNT=0 +FAIL_COUNT=0 +SKIP_COUNT=0 +TOTAL_COUNT=0 + +pass() { + PASS_COUNT=$((PASS_COUNT + 1)) + TOTAL_COUNT=$((TOTAL_COUNT + 1)) + if [[ "$VERBOSE" == "--verbose" ]]; then + printf " \033[0;32mPASS\033[0m %s\n" "$1" + fi + return 0 +} + +fail() { + FAIL_COUNT=$((FAIL_COUNT + 1)) + TOTAL_COUNT=$((TOTAL_COUNT + 1)) + printf " \033[0;31mFAIL\033[0m %s\n" "$1" + if [[ -n "${2:-}" ]]; then + printf " %s\n" "$2" + fi + return 0 +} + +skip() { + SKIP_COUNT=$((SKIP_COUNT + 1)) + TOTAL_COUNT=$((TOTAL_COUNT + 1)) + if [[ "$VERBOSE" == "--verbose" ]]; then + printf " \033[0;33mSKIP\033[0m %s\n" "$1" + fi + return 0 +} + +section() { + echo "" + printf "\033[1m=== %s ===\033[0m\n" "$1" +} + +# ============================================================ +# SECTION 1: bash -n syntax check for ALL scripts +# ============================================================ +section "Syntax Check (bash -n) - All Scripts" + +syntax_pass=0 +syntax_fail=0 + +while IFS= read -r script; do + abs_path="$REPO_DIR/$script" + name=$(basename "$script") + + if bash -n "$abs_path" 2>/dev/null; then + pass "syntax: $name" + syntax_pass=$((syntax_pass + 1)) + else + fail "syntax: $name" "bash -n failed" + syntax_fail=$((syntax_fail + 1)) + fi +done < <(git -C "$REPO_DIR" ls-files '.agents/scripts/*.sh' | grep -v '_archive/') + +printf " Syntax: %d passed, %d failed (of %d non-archived scripts)\n" \ + "$syntax_pass" "$syntax_fail" "$((syntax_pass + syntax_fail))" + +# ============================================================ +# SECTION 2: Help command smoke tests +# ============================================================ +section "Help Command Smoke Tests" + +# Scripts known to NOT support a help subcommand (libraries, hooks, utilities) +# These are sourced or run without arguments, not invoked with "help" +SKIP_HELP=( + "shared-constants.sh" + "loop-common.sh" + "pre-commit-hook.sh" + "cron-dispatch.sh" + "aidevops-update-check.sh" + "auto-version-bump.sh" + "validate-version-consistency.sh" + "extract-opencode-prompts.sh" + "generate-opencode-commands.sh" + "generate-skills.sh" + "opencode-prompt-drift-check.sh" + "find-missing-returns.sh" + "mass-fix-returns.sh" + "efficient-return-fix.sh" + "comprehensive-quality-fix.sh" + "quality-fix.sh" + "sonarcloud-autofix.sh" + "monitor-code-review.sh" + "code-audit-helper.sh" + "session-time-helper.sh" + "planning-commit-helper.sh" + "log-issue-helper.sh" + "humanise-update-helper.sh" + "dns-helper.sh" + "closte-helper.sh" + "cloudron-helper.sh" + "hetzner-helper.sh" + "hostinger-helper.sh" + "coolify-helper.sh" + "ses-helper.sh" + "servers-helper.sh" + "pagespeed-helper.sh" + "tool-version-check.sh" + "todo-ready.sh" + "mcp-diagnose.sh" + "localhost-helper.sh" + "linters-local.sh" + "markdown-lint-fix.sh" + "setup-mcp-integrations.sh" + "generate-opencode-agents.sh" + "setup-local-api-keys.sh" + "stagehand-setup.sh" + "stagehand-python-setup.sh" + "test-stagehand-integration.sh" + "test-stagehand-python-integration.sh" + "test-stagehand-both-integration.sh" + "crawl4ai-examples.sh" + "ampcode-cli.sh" + "agno-setup.sh" + "sonarscanner-cli.sh" + "codacy-cli.sh" + "codacy-cli-chunked.sh" + "coderabbit-pro-analysis.sh" + "snyk-helper.sh" + "verify-mirrors.sh" + "webhosting-verify.sh" +) + +is_skip_help() { + local name="$1" + for s in "${SKIP_HELP[@]}"; do + [[ "$name" == "$s" ]] && return 0 + done + return 1 +} + +help_pass=0 +help_fail=0 +help_skip=0 + +while IFS= read -r script; do + abs_path="$REPO_DIR/$script" + name=$(basename "$script") + + # Skip archived scripts + [[ "$script" == *"_archive/"* ]] && continue + + # Skip scripts that don't support help + if is_skip_help "$name"; then + skip "help: $name (not a help-command script)" + help_skip=$((help_skip + 1)) + continue + fi + + # Check if script defines a help function + if ! grep -qE 'cmd_help\(\)|show_help\(\)|show_usage\(\)|usage\(\)' "$abs_path" 2>/dev/null; then + skip "help: $name (no help function defined)" + help_skip=$((help_skip + 1)) + continue + fi + + # Run help command with timeout (5s max) and capture output + help_output=$(timeout 5 bash "$abs_path" help 2>&1) || true + help_exit=$? + + # Some scripts exit 0 on help, some exit 1 (usage error) - both are acceptable + # as long as they produce output and don't hang/crash + if [[ -n "$help_output" ]]; then + pass "help: $name" + help_pass=$((help_pass + 1)) + elif [[ $help_exit -eq 124 ]]; then + fail "help: $name" "Timed out after 5 seconds" + help_fail=$((help_fail + 1)) + else + fail "help: $name" "No output produced (exit=$help_exit)" + help_fail=$((help_fail + 1)) + fi +done < <(git -C "$REPO_DIR" ls-files '.agents/scripts/*.sh') + +printf " Help: %d passed, %d failed, %d skipped\n" \ + "$help_pass" "$help_fail" "$help_skip" + +# ============================================================ +# SECTION 3: ShellCheck on critical scripts (errors only) +# ============================================================ +section "ShellCheck (errors only) - Critical Scripts" + +CRITICAL_SCRIPTS=( + "supervisor-helper.sh" + "memory-helper.sh" + "mail-helper.sh" + "runner-helper.sh" + "full-loop-helper.sh" + "ralph-loop-helper.sh" + "quality-loop-helper.sh" + "pre-edit-check.sh" + "worktree-helper.sh" + "credential-helper.sh" + "secret-helper.sh" +) + +if command -v shellcheck &>/dev/null; then + for name in "${CRITICAL_SCRIPTS[@]}"; do + script_path="$SCRIPTS_DIR/$name" + if [[ ! -f "$script_path" ]]; then + skip "shellcheck: $name (not found)" + continue + fi + + sc_output=$(shellcheck -S error "$script_path" 2>&1 || true) + sc_errors=$(echo "$sc_output" | grep -c "error" || true) + if [[ "$sc_errors" -eq 0 ]]; then + pass "shellcheck: $name (0 errors)" + else + fail "shellcheck: $name ($sc_errors errors)" \ + "$(echo "$sc_output" | head -5)" + fi + done +else + skip "shellcheck not installed" +fi + +# ============================================================ +# SUMMARY +# ============================================================ +echo "" +echo "========================================" +printf " \033[1mResults: %d total, \033[0;32m%d passed\033[0m, \033[0;31m%d failed\033[0m, \033[0;33m%d skipped\033[0m\n" \ + "$TOTAL_COUNT" "$PASS_COUNT" "$FAIL_COUNT" "$SKIP_COUNT" +echo "========================================" + +if [[ "$FAIL_COUNT" -gt 0 ]]; then + echo "" + printf "\033[0;31mFAILURES DETECTED - review output above\033[0m\n" + exit 1 +else + echo "" + printf "\033[0;32mAll tests passed.\033[0m\n" + exit 0 +fi diff --git a/tests/test-supervisor-state-machine.sh b/tests/test-supervisor-state-machine.sh new file mode 100644 index 00000000..2837a78b --- /dev/null +++ b/tests/test-supervisor-state-machine.sh @@ -0,0 +1,582 @@ +#!/usr/bin/env bash +# test-supervisor-state-machine.sh +# +# Unit tests for supervisor-helper.sh state machine: +# - Valid/invalid state transitions +# - Task lifecycle (add -> dispatch -> run -> evaluate -> complete) +# - Retry logic +# - Batch completion detection +# - Post-PR lifecycle (complete -> pr_review -> merging -> merged -> deployed) +# +# Uses an isolated temp DB to avoid touching production data. +# +# Usage: bash tests/test-supervisor-state-machine.sh [--verbose] +# +# Exit codes: 0 = all pass, 1 = failures found + +set -euo pipefail + +REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +SCRIPTS_DIR="$REPO_DIR/.agents/scripts" +SUPERVISOR_SCRIPT="$SCRIPTS_DIR/supervisor-helper.sh" +VERBOSE="${1:-}" + +# --- Test Framework --- +PASS_COUNT=0 +FAIL_COUNT=0 +SKIP_COUNT=0 +TOTAL_COUNT=0 + +pass() { + PASS_COUNT=$((PASS_COUNT + 1)) + TOTAL_COUNT=$((TOTAL_COUNT + 1)) + printf " \033[0;32mPASS\033[0m %s\n" "$1" +} + +fail() { + FAIL_COUNT=$((FAIL_COUNT + 1)) + TOTAL_COUNT=$((TOTAL_COUNT + 1)) + printf " \033[0;31mFAIL\033[0m %s\n" "$1" + if [[ -n "${2:-}" ]]; then + printf " %s\n" "$2" + fi +} + +skip() { + SKIP_COUNT=$((SKIP_COUNT + 1)) + TOTAL_COUNT=$((TOTAL_COUNT + 1)) + printf " \033[0;33mSKIP\033[0m %s\n" "$1" +} + +section() { + echo "" + printf "\033[1m=== %s ===\033[0m\n" "$1" +} + +# --- Test DB Setup --- +TEST_DIR=$(mktemp -d) +export AIDEVOPS_SUPERVISOR_DIR="$TEST_DIR" +trap 'rm -rf "$TEST_DIR"' EXIT + +# Helper: run supervisor command with isolated DB +sup() { + bash "$SUPERVISOR_SCRIPT" "$@" 2>&1 +} + +# Helper: query the test DB directly +test_db() { + sqlite3 -cmd ".timeout 5000" "$TEST_DIR/supervisor.db" "$@" +} + +# Helper: get task status +get_status() { + test_db "SELECT status FROM tasks WHERE id = '$1';" +} + +# Helper: get task field +get_field() { + test_db "SELECT $2 FROM tasks WHERE id = '$1';" +} + +# ============================================================ +# SECTION 1: Database Initialization +# ============================================================ +section "Database Initialization" + +# Test: init creates database +sup init >/dev/null +if [[ -f "$TEST_DIR/supervisor.db" ]]; then + pass "init creates supervisor.db" +else + fail "init did not create supervisor.db" +fi + +# Test: tables exist +tables=$(test_db "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;" | tr '\n' ',') +if [[ "$tables" == *"tasks"* && "$tables" == *"batches"* && "$tables" == *"state_log"* ]]; then + pass "Required tables exist (tasks, batches, state_log, batch_tasks)" +else + fail "Missing required tables" "Found: $tables" +fi + +# Test: WAL mode is set +journal_mode=$(test_db "PRAGMA journal_mode;") +if [[ "$journal_mode" == "wal" ]]; then + pass "WAL journal mode is set" +else + fail "Journal mode is '$journal_mode', expected 'wal'" +fi + +# ============================================================ +# SECTION 2: Task Addition +# ============================================================ +section "Task Addition" + +# Test: add a task +sup add test-t001 --repo /tmp/test --description "Test task 1" >/dev/null +status=$(get_status "test-t001") +if [[ "$status" == "queued" ]]; then + pass "Added task starts in 'queued' state" +else + fail "Added task has status '$status', expected 'queued'" +fi + +# Test: duplicate task rejected +dup_output=$(sup add test-t001 --repo /tmp/test 2>&1 || true) +if echo "$dup_output" | grep -qi "already exists"; then + pass "Duplicate task ID is rejected" +else + fail "Duplicate task was not rejected" "$dup_output" +fi + +# Test: state_log records initial state +log_entry=$(test_db "SELECT to_state FROM state_log WHERE task_id = 'test-t001' ORDER BY id LIMIT 1;") +if [[ "$log_entry" == "queued" ]]; then + pass "State log records initial 'queued' entry" +else + fail "State log initial entry is '$log_entry', expected 'queued'" +fi + +# ============================================================ +# SECTION 3: Valid State Transitions (Happy Path) +# ============================================================ +section "Valid State Transitions (Happy Path)" + +# queued -> dispatched +sup transition test-t001 dispatched >/dev/null +if [[ "$(get_status test-t001)" == "dispatched" ]]; then + pass "queued -> dispatched" +else + fail "queued -> dispatched failed" +fi + +# Test: started_at is set on first dispatch +started=$(get_field "test-t001" "started_at") +if [[ -n "$started" ]]; then + pass "started_at set on first dispatch" +else + fail "started_at not set on dispatch" +fi + +# dispatched -> running +sup transition test-t001 running >/dev/null +if [[ "$(get_status test-t001)" == "running" ]]; then + pass "dispatched -> running" +else + fail "dispatched -> running failed" +fi + +# running -> evaluating +sup transition test-t001 evaluating >/dev/null +if [[ "$(get_status test-t001)" == "evaluating" ]]; then + pass "running -> evaluating" +else + fail "running -> evaluating failed" +fi + +# evaluating -> complete +sup transition test-t001 complete >/dev/null +if [[ "$(get_status test-t001)" == "complete" ]]; then + pass "evaluating -> complete" +else + fail "evaluating -> complete failed" +fi + +# Test: completed_at is set +completed=$(get_field "test-t001" "completed_at") +if [[ -n "$completed" ]]; then + pass "completed_at set on terminal state" +else + fail "completed_at not set on complete" +fi + +# ============================================================ +# SECTION 4: Post-PR Lifecycle Transitions +# ============================================================ +section "Post-PR Lifecycle Transitions" + +# complete -> pr_review +sup transition test-t001 pr_review >/dev/null +if [[ "$(get_status test-t001)" == "pr_review" ]]; then + pass "complete -> pr_review" +else + fail "complete -> pr_review failed" +fi + +# pr_review -> merging +sup transition test-t001 merging >/dev/null +if [[ "$(get_status test-t001)" == "merging" ]]; then + pass "pr_review -> merging" +else + fail "pr_review -> merging failed" +fi + +# merging -> merged +sup transition test-t001 merged >/dev/null +if [[ "$(get_status test-t001)" == "merged" ]]; then + pass "merging -> merged" +else + fail "merging -> merged failed" +fi + +# merged -> deploying +sup transition test-t001 deploying >/dev/null +if [[ "$(get_status test-t001)" == "deploying" ]]; then + pass "merged -> deploying" +else + fail "merged -> deploying failed" +fi + +# deploying -> deployed +sup transition test-t001 deployed >/dev/null +if [[ "$(get_status test-t001)" == "deployed" ]]; then + pass "deploying -> deployed" +else + fail "deploying -> deployed failed" +fi + +# ============================================================ +# SECTION 5: Invalid State Transitions +# ============================================================ +section "Invalid State Transitions" + +# Add a fresh task for invalid transition tests +sup add test-t002 --repo /tmp/test --description "Invalid transition test" >/dev/null + +# queued -> running (must go through dispatched first) +invalid_output=$(sup transition test-t002 running 2>&1 || true) +if echo "$invalid_output" | grep -qi "invalid transition"; then + pass "queued -> running rejected (must go through dispatched)" +else + fail "queued -> running was not rejected" "$invalid_output" +fi + +# Verify state didn't change +if [[ "$(get_status test-t002)" == "queued" ]]; then + pass "State unchanged after invalid transition" +else + fail "State changed despite invalid transition: $(get_status test-t002)" +fi + +# queued -> complete (skipping intermediate states) +invalid_output2=$(sup transition test-t002 complete 2>&1 || true) +if echo "$invalid_output2" | grep -qi "invalid transition"; then + pass "queued -> complete rejected (skipping intermediate states)" +else + fail "queued -> complete was not rejected" +fi + +# queued -> deployed (skipping all states) +invalid_output3=$(sup transition test-t002 deployed 2>&1 || true) +if echo "$invalid_output3" | grep -qi "invalid transition"; then + pass "queued -> deployed rejected" +else + fail "queued -> deployed was not rejected" +fi + +# Invalid state name +invalid_output4=$(sup transition test-t002 nonexistent_state 2>&1 || true) +if echo "$invalid_output4" | grep -qi "invalid state"; then + pass "Nonexistent state name rejected" +else + fail "Nonexistent state name was not rejected" +fi + +# ============================================================ +# SECTION 6: Retry Logic +# ============================================================ +section "Retry Logic" + +# Add task and move to evaluating +sup add test-t003 --repo /tmp/test --description "Retry test" >/dev/null +sup transition test-t003 dispatched >/dev/null +sup transition test-t003 running >/dev/null +sup transition test-t003 evaluating >/dev/null + +# evaluating -> retrying +sup transition test-t003 retrying >/dev/null +if [[ "$(get_status test-t003)" == "retrying" ]]; then + pass "evaluating -> retrying" +else + fail "evaluating -> retrying failed" +fi + +# Test: retries counter incremented +retries=$(get_field "test-t003" "retries") +if [[ "$retries" -eq 1 ]]; then + pass "Retry counter incremented to 1" +else + fail "Retry counter is $retries, expected 1" +fi + +# retrying -> dispatched (re-dispatch) +sup transition test-t003 dispatched >/dev/null +if [[ "$(get_status test-t003)" == "dispatched" ]]; then + pass "retrying -> dispatched (re-dispatch)" +else + fail "retrying -> dispatched failed" +fi + +# Second retry cycle +sup transition test-t003 running >/dev/null +sup transition test-t003 evaluating >/dev/null +sup transition test-t003 retrying >/dev/null +retries2=$(get_field "test-t003" "retries") +if [[ "$retries2" -eq 2 ]]; then + pass "Retry counter incremented to 2 on second retry" +else + fail "Retry counter is $retries2, expected 2" +fi + +# ============================================================ +# SECTION 7: Error Handling +# ============================================================ +section "Error Handling" + +# Add task and move to running, then fail +sup add test-t004 --repo /tmp/test --description "Error test" >/dev/null +sup transition test-t004 dispatched >/dev/null +sup transition test-t004 running >/dev/null + +# running -> failed with error message +sup transition test-t004 failed --error "Timeout after 30 minutes" >/dev/null +if [[ "$(get_status test-t004)" == "failed" ]]; then + pass "running -> failed with error" +else + fail "running -> failed transition failed" +fi + +# Test: error message stored +error_msg=$(get_field "test-t004" "error") +if [[ "$error_msg" == "Timeout after 30 minutes" ]]; then + pass "Error message stored correctly" +else + fail "Error message is '$error_msg', expected 'Timeout after 30 minutes'" +fi + +# Test: completed_at set on failure +completed_fail=$(get_field "test-t004" "completed_at") +if [[ -n "$completed_fail" ]]; then + pass "completed_at set on failed state" +else + fail "completed_at not set on failed state" +fi + +# Test: failed -> queued (re-queue after failure) +sup transition test-t004 queued >/dev/null +if [[ "$(get_status test-t004)" == "queued" ]]; then + pass "failed -> queued (re-queue)" +else + fail "failed -> queued failed" +fi + +# ============================================================ +# SECTION 8: Cancellation +# ============================================================ +section "Cancellation" + +# queued -> cancelled +sup add test-t005 --repo /tmp/test --description "Cancel test" >/dev/null +sup transition test-t005 cancelled >/dev/null +if [[ "$(get_status test-t005)" == "cancelled" ]]; then + pass "queued -> cancelled" +else + fail "queued -> cancelled failed" +fi + +# dispatched -> cancelled +sup add test-t006 --repo /tmp/test --description "Cancel dispatched" >/dev/null +sup transition test-t006 dispatched >/dev/null +sup transition test-t006 cancelled >/dev/null +if [[ "$(get_status test-t006)" == "cancelled" ]]; then + pass "dispatched -> cancelled" +else + fail "dispatched -> cancelled failed" +fi + +# running -> cancelled +sup add test-t007 --repo /tmp/test --description "Cancel running" >/dev/null +sup transition test-t007 dispatched >/dev/null +sup transition test-t007 running >/dev/null +sup transition test-t007 cancelled >/dev/null +if [[ "$(get_status test-t007)" == "cancelled" ]]; then + pass "running -> cancelled" +else + fail "running -> cancelled failed" +fi + +# ============================================================ +# SECTION 9: Blocked State +# ============================================================ +section "Blocked State" + +# evaluating -> blocked +sup add test-t008 --repo /tmp/test --description "Blocked test" >/dev/null +sup transition test-t008 dispatched >/dev/null +sup transition test-t008 running >/dev/null +sup transition test-t008 evaluating >/dev/null +sup transition test-t008 blocked >/dev/null +if [[ "$(get_status test-t008)" == "blocked" ]]; then + pass "evaluating -> blocked" +else + fail "evaluating -> blocked failed" +fi + +# blocked -> queued (unblock) +sup transition test-t008 queued >/dev/null +if [[ "$(get_status test-t008)" == "queued" ]]; then + pass "blocked -> queued (unblock)" +else + fail "blocked -> queued failed" +fi + +# blocked -> cancelled +sup add test-t009 --repo /tmp/test --description "Blocked cancel" >/dev/null +sup transition test-t009 dispatched >/dev/null +sup transition test-t009 running >/dev/null +sup transition test-t009 evaluating >/dev/null +sup transition test-t009 blocked >/dev/null +sup transition test-t009 cancelled >/dev/null +if [[ "$(get_status test-t009)" == "cancelled" ]]; then + pass "blocked -> cancelled" +else + fail "blocked -> cancelled failed" +fi + +# ============================================================ +# SECTION 10: State Log Audit Trail +# ============================================================ +section "State Log Audit Trail" + +# Count state log entries for test-t001 (went through full lifecycle) +log_count=$(test_db "SELECT count(*) FROM state_log WHERE task_id = 'test-t001';") +if [[ "$log_count" -ge 8 ]]; then + pass "State log has $log_count entries for full lifecycle task" +else + fail "State log has only $log_count entries, expected >= 8" +fi + +# Verify log entries are in order +first_transition=$(test_db "SELECT from_state || '->' || to_state FROM state_log WHERE task_id = 'test-t001' ORDER BY id LIMIT 1;") +if [[ "$first_transition" == "->queued" ]]; then + pass "First state log entry is initial queued" +else + fail "First state log entry is '$first_transition', expected '->queued'" +fi + +# ============================================================ +# SECTION 11: Metadata Fields +# ============================================================ +section "Metadata Fields" + +# Test: transition with --session, --branch, --worktree, --pr-url +sup add test-t010 --repo /tmp/test --description "Metadata test" >/dev/null +sup transition test-t010 dispatched --session "ses_abc123" --branch "feature/test" --worktree "/tmp/wt" >/dev/null + +session_id=$(get_field "test-t010" "session_id") +branch=$(get_field "test-t010" "branch") +worktree=$(get_field "test-t010" "worktree") + +if [[ "$session_id" == "ses_abc123" ]]; then + pass "session_id stored on transition" +else + fail "session_id is '$session_id', expected 'ses_abc123'" +fi + +if [[ "$branch" == "feature/test" ]]; then + pass "branch stored on transition" +else + fail "branch is '$branch', expected 'feature/test'" +fi + +if [[ "$worktree" == "/tmp/wt" ]]; then + pass "worktree stored on transition" +else + fail "worktree is '$worktree', expected '/tmp/wt'" +fi + +# ============================================================ +# SECTION 12: Batch Completion Detection +# ============================================================ +section "Batch Completion Detection" + +# Create a batch with two tasks +sup add test-b001 --repo /tmp/test --description "Batch task 1" >/dev/null +sup add test-b002 --repo /tmp/test --description "Batch task 2" >/dev/null +sup batch test-batch --tasks "test-b001,test-b002" >/dev/null 2>&1 || true + +# Check if batch was created +batch_status=$(test_db "SELECT status FROM batches WHERE name = 'test-batch';" 2>/dev/null || echo "") +if [[ "$batch_status" == "active" ]]; then + pass "Batch created in 'active' state" + + # Complete first task + sup transition test-b001 dispatched >/dev/null + sup transition test-b001 running >/dev/null + sup transition test-b001 evaluating >/dev/null + sup transition test-b001 complete >/dev/null + + # Batch should still be active (one task remaining) + batch_after_one=$(test_db "SELECT status FROM batches WHERE name = 'test-batch';") + if [[ "$batch_after_one" == "active" ]]; then + pass "Batch stays active with incomplete tasks" + else + fail "Batch status is '$batch_after_one' after one task complete, expected 'active'" + fi + + # Complete second task + sup transition test-b002 dispatched >/dev/null + sup transition test-b002 running >/dev/null + sup transition test-b002 evaluating >/dev/null + sup transition test-b002 complete >/dev/null + + # Batch should now be complete + batch_after_all=$(test_db "SELECT status FROM batches WHERE name = 'test-batch';") + if [[ "$batch_after_all" == "complete" ]]; then + pass "Batch auto-completes when all tasks finish" + else + fail "Batch status is '$batch_after_all' after all tasks complete, expected 'complete'" + fi +else + skip "Batch creation may require different syntax (status: '$batch_status')" +fi + +# ============================================================ +# SECTION 13: Nonexistent Task +# ============================================================ +section "Edge Cases" + +# Transition on nonexistent task +nonexist_output=$(sup transition nonexistent-task dispatched 2>&1 || true) +if echo "$nonexist_output" | grep -qi "not found"; then + pass "Transition on nonexistent task returns error" +else + fail "Transition on nonexistent task did not return error" "$nonexist_output" +fi + +# Missing arguments +missing_output=$(sup transition 2>&1 || true) +if echo "$missing_output" | grep -qiE "usage|requires"; then + pass "Missing arguments shows usage" +else + fail "Missing arguments did not show usage" +fi + +# ============================================================ +# SUMMARY +# ============================================================ +echo "" +echo "========================================" +printf " \033[1mResults: %d total, \033[0;32m%d passed\033[0m, \033[0;31m%d failed\033[0m, \033[0;33m%d skipped\033[0m\n" \ + "$TOTAL_COUNT" "$PASS_COUNT" "$FAIL_COUNT" "$SKIP_COUNT" +echo "========================================" + +if [[ "$FAIL_COUNT" -gt 0 ]]; then + echo "" + printf "\033[0;31mFAILURES DETECTED - review output above\033[0m\n" + exit 1 +else + echo "" + printf "\033[0;32mAll tests passed.\033[0m\n" + exit 0 +fi