Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions .agents/scripts/supervisor/ai-actions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -712,8 +712,21 @@ validate_action_fields() {
echo "missing required field: task_id"
return 0
fi
# new_priority is no longer strictly required — the executor infers it
# from reasoning text if missing (see _exec_adjust_priority)
# new_priority is required — the AI prompt documents this explicitly (t1126, t1201).
# The executor can infer from reasoning text as a fallback, but the field must be
# present to ensure the AI is producing structured output as intended.
if [[ -z "$new_priority" || "$new_priority" == "null" ]]; then
echo "missing required field: new_priority (must be high|medium|low|critical)"
return 0
fi
# Validate new_priority value — must be one of the known values (t1197, t1201)
case "$new_priority" in
high | medium | low | critical) ;;
*)
echo "invalid new_priority: $new_priority (must be high|medium|low|critical)"
return 0
;;
esac
;;
close_verified)
local issue_number pr_number
Expand Down
65 changes: 55 additions & 10 deletions .agents/scripts/supervisor/ai-reason.sh
Original file line number Diff line number Diff line change
Expand Up @@ -307,30 +307,58 @@ ${user_prompt}"
local action_plan
action_plan=$(extract_action_plan "$ai_result")

# Retry once if parse failed on a non-empty response (t1187)
# Retry once if parse failed on a non-empty response (t1187, t1201)
# The AI model occasionally produces malformed or truncated JSON on the first
# attempt. A single retry resolves most transient failures without adding
# significant latency (the retry only fires when the first attempt fails).
# attempt (e.g., markdown-fenced JSON, empty response, preamble text before the
# array). A single retry with a simplified prompt resolves most transient failures
# without adding significant latency (the retry only fires when the first attempt
# fails). The simplified prompt strips the large context and explicitly reinforces
# "respond with ONLY a JSON array, no markdown fencing" (t1201).
if [[ -z "$action_plan" || "$action_plan" == "null" ]]; then
log_warn "AI Reasoning: parse failed on first attempt — retrying AI call once (t1187)"
log_warn "AI Reasoning: parse failed on first attempt — retrying with simplified JSON-only prompt (t1187, t1201)"
{
echo "## Parse Attempt 1"
echo ""
echo "Status: FAILED — retrying AI call"
echo "Status: FAILED — retrying with simplified JSON-only prompt"
} >>"$reason_log"

# Simplified retry prompt: strip the large context, keep only the output
# format requirement with explicit reinforcement against markdown fencing.
# This is more likely to produce clean JSON when the model returned empty
# output or markdown-wrapped JSON on the first attempt (t1201).
local simplified_retry_prompt
simplified_retry_prompt="$(
cat <<'SIMPLIFIED_PROMPT'
You are an AI Engineering Manager. Your previous response could not be parsed as a JSON array.

Respond with ONLY a JSON array of actions. No markdown fencing (no ```json or ```), no explanation, no preamble — just the raw JSON array starting with [ and ending with ].

If you have no actions to propose, respond with exactly: []

Valid action types: comment_on_issue, create_task, create_subtasks, flag_for_review, adjust_priority, close_verified, request_info, create_improvement, escalate_model, propose_auto_dispatch

Example of correct output (raw JSON, no fencing):
[{"type":"comment_on_issue","issue_number":123,"body":"Status update","reasoning":"Issue needs acknowledgment"}]

Or if nothing needs attention:
[]

Respond with ONLY the JSON array. No markdown, no explanation, no code fences.
SIMPLIFIED_PROMPT
)"

local ai_result_retry=""
if [[ "$ai_cli" == "opencode" ]]; then
ai_result_retry=$(portable_timeout "$ai_timeout" opencode run \
-m "$ai_model" \
--format default \
--title "ai-supervisor-${timestamp}-retry" \
"$full_prompt" 2>/dev/null || echo "")
"$simplified_retry_prompt" 2>/dev/null || echo "")
ai_result_retry=$(printf '%s' "$ai_result_retry" | sed 's/\x1b\[[0-9;]*[mGKHF]//g; s/\x1b\[[0-9;]*[A-Za-z]//g; s/\x1b\]//g; s/\x07//g')
else
local claude_model_retry="${ai_model#*/}"
ai_result_retry=$(portable_timeout "$ai_timeout" claude \
-p "$full_prompt" \
-p "$simplified_retry_prompt" \
--model "$claude_model_retry" \
--output-format text 2>/dev/null || echo "")
fi
Expand All @@ -341,7 +369,7 @@ ${user_prompt}"
action_plan=$(extract_action_plan "$ai_result_retry")
{
echo ""
echo "## Parse Attempt 2 (retry)"
echo "## Parse Attempt 2 (simplified JSON-only prompt retry, t1201)"
echo ""
echo "Response length: $(printf '%s' "$ai_result_retry" | wc -c | tr -d ' ') bytes"
echo "Parse result: $([ -n "$action_plan" ] && echo "SUCCESS" || echo "FAILED")"
Expand All @@ -350,7 +378,7 @@ ${user_prompt}"
log_info "AI Reasoning: retry also returned empty response"
{
echo ""
echo "## Parse Attempt 2 (retry)"
echo "## Parse Attempt 2 (simplified JSON-only prompt retry, t1201)"
echo ""
echo "Response length: 0 bytes (empty)"
echo "Parse result: EMPTY — treating as empty action plan []"
Expand Down Expand Up @@ -631,7 +659,24 @@ extract_action_plan() {
fi
fi

# Try 4: Find the last JSON array in the response (between [ and ])
# Try 4a: Single-line JSON array — grep for lines starting with [ and parse directly.
# This handles the common case where the AI returns a single-line array possibly
# surrounded by preamble/postamble text (t1201).
local single_line_json
single_line_json=$(printf '%s' "$response" | grep -E '^[[:space:]]*\[' | tail -1)
if [[ -n "$single_line_json" ]]; then
parsed=$(printf '%s' "$single_line_json" | jq '.' 2>/dev/null)
if [[ $? -eq 0 && -n "$parsed" ]]; then
local sl_type
sl_type=$(printf '%s' "$parsed" | jq 'type' 2>/dev/null || echo "")
if [[ "$sl_type" == '"array"' ]]; then
printf '%s' "$parsed"
return 0
fi
fi
fi

# Try 4b: Find the last multi-line JSON array in the response (between [ and ])
# Handles both column-0 and indented arrays.
local bracket_json
bracket_json=$(printf '%s' "$response" | awk '
Expand Down
174 changes: 174 additions & 0 deletions tests/test-ai-actions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1051,6 +1051,180 @@ else
fail "dedup record state_hash storage has errors"
fi

# ─── Test 18: extract_action_plan edge cases (t1201) ────────────────
echo "Test 18: extract_action_plan — markdown fence stripping and fallback parsing"

_test_extract_action_plan() {
(
BLUE='' GREEN='' YELLOW='' RED='' NC=''
SUPERVISOR_DB="/tmp/test-extract-$$.db"
SUPERVISOR_LOG="/dev/null"
SCRIPT_DIR="$REPO_DIR/.agents/scripts"
REPO_PATH="$REPO_DIR"
AI_ACTIONS_LOG_DIR="/tmp/test-ai-actions-logs-$$"
db() { :; }
log_info() { :; }
log_success() { :; }
log_warn() { :; }
log_error() { :; }
log_verbose() { :; }
sql_escape() { printf '%s' "$1" | sed "s/'/''/g"; }
# extract_action_plan is defined in ai-reason.sh — source it for this test
# shellcheck source=../.agents/scripts/supervisor/ai-reason.sh
source "$REPO_DIR/.agents/scripts/supervisor/ai-reason.sh"

source "$ACTIONS_SCRIPT"

local failures=0

# Empty response
result=$(extract_action_plan "")
if [[ -n "$result" ]]; then
echo "FAIL: empty response should return empty string, got: $result"
failures=$((failures + 1))
fi

# Whitespace-only response
result=$(extract_action_plan "
")
if [[ -n "$result" ]]; then
echo "FAIL: whitespace-only response should return empty string, got: $result"
failures=$((failures + 1))
fi

# Pure JSON array (no fencing)
result=$(extract_action_plan '[{"type":"comment_on_issue","issue_number":1,"body":"test","reasoning":"r"}]')
if [[ -z "$result" ]]; then
echo "FAIL: pure JSON array should be parsed successfully"
failures=$((failures + 1))
fi

# Markdown-fenced JSON (```json ... ```)
result=$(extract_action_plan '```json
[{"type":"comment_on_issue","issue_number":1,"body":"test","reasoning":"r"}]
```')
if [[ -z "$result" ]]; then
echo "FAIL: markdown-fenced JSON should be extracted and parsed"
failures=$((failures + 1))
fi

# Non-JSON response (preamble text)
result=$(extract_action_plan "Here is my analysis of the project state...")
if [[ -n "$result" ]]; then
echo "FAIL: non-JSON response should return empty string, got: $result"
failures=$((failures + 1))
fi

# Empty array (valid — model has no actions)
result=$(extract_action_plan '[]')
if [[ "$result" != "[]" ]]; then
echo "FAIL: empty array should parse to '[]', got: $result"
failures=$((failures + 1))
fi

# Array embedded in preamble text
result=$(extract_action_plan 'Here is my action plan:
[{"type":"comment_on_issue","issue_number":1,"body":"test","reasoning":"r"}]
That is all.')
if [[ -z "$result" ]]; then
echo "FAIL: array embedded in text should be extracted via bracket fallback"
failures=$((failures + 1))
fi

rm -rf "/tmp/test-ai-actions-logs-$$" "$SUPERVISOR_DB"
exit "$failures"
)
}

if _test_extract_action_plan 2>/dev/null; then
pass "extract_action_plan handles empty, whitespace, fenced, and embedded JSON"
else
fail "extract_action_plan edge case handling has errors"
fi

# ─── Test 19: adjust_priority new_priority validation (t1126, t1201) ─
echo "Test 19: adjust_priority — new_priority required and validated"

_test_adjust_priority_validation() {
(
BLUE='' GREEN='' YELLOW='' RED='' NC=''
SUPERVISOR_DB="/tmp/test-adj-$$.db"
SUPERVISOR_LOG="/dev/null"
SCRIPT_DIR="$REPO_DIR/.agents/scripts"
REPO_PATH="$REPO_DIR"
AI_ACTIONS_LOG_DIR="/tmp/test-ai-actions-logs-$$"
db() { :; }
log_info() { :; }
log_success() { :; }
log_warn() { :; }
log_error() { :; }
log_verbose() { :; }
sql_escape() { printf '%s' "$1" | sed "s/'/''/g"; }

source "$ACTIONS_SCRIPT"

local failures=0

# Valid: high
result=$(validate_action_fields '{"type":"adjust_priority","task_id":"t100","new_priority":"high"}' "adjust_priority")
if [[ -n "$result" ]]; then
echo "FAIL: valid adjust_priority (high) rejected: $result"
failures=$((failures + 1))
fi

# Valid: medium
result=$(validate_action_fields '{"type":"adjust_priority","task_id":"t100","new_priority":"medium"}' "adjust_priority")
if [[ -n "$result" ]]; then
echo "FAIL: valid adjust_priority (medium) rejected: $result"
failures=$((failures + 1))
fi

# Valid: low
result=$(validate_action_fields '{"type":"adjust_priority","task_id":"t100","new_priority":"low"}' "adjust_priority")
if [[ -n "$result" ]]; then
echo "FAIL: valid adjust_priority (low) rejected: $result"
failures=$((failures + 1))
fi

# Valid: critical
result=$(validate_action_fields '{"type":"adjust_priority","task_id":"t100","new_priority":"critical"}' "adjust_priority")
if [[ -n "$result" ]]; then
echo "FAIL: valid adjust_priority (critical) rejected: $result"
failures=$((failures + 1))
fi

# Invalid: missing new_priority (must be rejected — t1126, t1201)
result=$(validate_action_fields '{"type":"adjust_priority","task_id":"t100"}' "adjust_priority")
if [[ -z "$result" ]]; then
echo "FAIL: adjust_priority without new_priority should be rejected"
failures=$((failures + 1))
fi

# Invalid: invalid new_priority value
result=$(validate_action_fields '{"type":"adjust_priority","task_id":"t100","new_priority":"urgent"}' "adjust_priority")
if [[ -z "$result" ]]; then
echo "FAIL: adjust_priority with invalid new_priority 'urgent' should be rejected"
failures=$((failures + 1))
fi

# Invalid: missing task_id
result=$(validate_action_fields '{"type":"adjust_priority","new_priority":"high"}' "adjust_priority")
if [[ -z "$result" ]]; then
echo "FAIL: adjust_priority without task_id should be rejected"
failures=$((failures + 1))
fi

rm -rf "/tmp/test-ai-actions-logs-$$" "$SUPERVISOR_DB"
exit "$failures"
)
}

if _test_adjust_priority_validation 2>/dev/null; then
pass "adjust_priority new_priority validation works correctly"
else
fail "adjust_priority new_priority validation has errors"
fi

# ─── Summary ────────────────────────────────────────────────────────
echo ""
echo "=== Results: $PASS/$TOTAL passed, $FAIL failed ==="
Expand Down