From 1d979c7d3e6886d938bbe22c7c39b9087759b609 Mon Sep 17 00:00:00 2001 From: marcusquinn <6428977+marcusquinn@users.noreply.github.com> Date: Wed, 18 Feb 2026 21:30:05 +0000 Subject: [PATCH] fix: harden AI pipeline JSON parser against whitespace responses and improve failure logging (t1184) - extract_action_plan: add whitespace-only response check (model returning only newlines/spaces now treated as empty, not a parse failure) - extract_action_plan Try 3: verify extracted generic code block is a JSON array before returning (prevents returning non-JSON content when outer fences wrap the response) - extract_action_plan Try 4: handle indented arrays (not just column-0 brackets) and verify extracted content is a JSON array - run_ai_reasoning: log full raw LLM response to reason log on parse failure (previously only first/last 100 bytes; now full response + expanded to 200 bytes for diagnostics) so intermittent failures can be fully diagnosed - run_ai_reasoning: log warning with response length and json block count when parse fails, making failures visible in supervisor log without reading log file - run_ai_actions_pipeline: add whitespace-only check alongside empty check (belt-and-suspenders against whitespace-only output from reasoning) - run_ai_actions_pipeline: improve 'expected array' error message to include JSON type and first 100 chars of content for faster diagnosis Root causes of the 15:53-19:15 errors were already fixed by t1157 (concurrency guard) and t1123 (multi-block parser). This PR adds defensive hardening and better observability for any future parse failures. ref:GH#1789 --- .agents/scripts/supervisor/ai-actions.sh | 10 +++--- .agents/scripts/supervisor/ai-reason.sh | 44 +++++++++++++++++------- 2 files changed, 38 insertions(+), 16 deletions(-) diff --git a/.agents/scripts/supervisor/ai-actions.sh b/.agents/scripts/supervisor/ai-actions.sh index e28460318..fc7fa4f41 100755 --- a/.agents/scripts/supervisor/ai-actions.sh +++ b/.agents/scripts/supervisor/ai-actions.sh @@ -1594,8 +1594,10 @@ run_ai_actions_pipeline() { return 1 fi - # Handle empty output — concurrency guard or other silent skip - if [[ -z "$action_plan" ]]; then + # Handle empty or whitespace-only output — concurrency guard or other silent skip + local _trimmed_plan + _trimmed_plan=$(printf '%s' "$action_plan" | tr -d '[:space:]') + if [[ -z "$action_plan" || -z "$_trimmed_plan" ]]; then log_info "AI Actions Pipeline: reasoning returned empty output (skipped)" echo '{"executed":0,"failed":0,"skipped":0,"actions":[]}' return 0 @@ -1628,11 +1630,11 @@ run_ai_actions_pipeline() { local plan_type plan_type=$(printf '%s' "$action_plan" | jq 'type' 2>/dev/null || echo "") if [[ "$plan_type" != '"array"' ]]; then - # Log raw content for debugging (t1182: helps diagnose parse failures) + # Log raw content for debugging (t1182/t1184: helps diagnose parse failures) local plan_len plan_head plan_len=$(printf '%s' "$action_plan" | wc -c | tr -d ' ') plan_head=$(printf '%s' "$action_plan" | head -c 200 | tr '\n' ' ') - log_warn "AI Actions Pipeline: expected array, got $plan_type (len=${plan_len} head='${plan_head}')" + log_warn "AI Actions Pipeline: expected array, got ${plan_type:-} (len=${plan_len} head='${plan_head}')" echo '{"error":"invalid_plan_type","actions":[]}' return 1 fi diff --git a/.agents/scripts/supervisor/ai-reason.sh b/.agents/scripts/supervisor/ai-reason.sh index 4689f9ed7..17980f49a 100755 --- a/.agents/scripts/supervisor/ai-reason.sh +++ b/.agents/scripts/supervisor/ai-reason.sh @@ -313,8 +313,8 @@ ${user_prompt}" local response_len json_block_count first_bytes last_bytes raw_hex_head response_len=$(printf '%s' "$ai_result" | wc -c | tr -d ' ') json_block_count=$(printf '%s' "$ai_result" | grep -c '^```json' 2>/dev/null || echo 0) - first_bytes=$(printf '%s' "$ai_result" | head -c 100 | tr '\n' ' ') - last_bytes=$(printf '%s' "$ai_result" | tail -c 100 | tr '\n' ' ') + first_bytes=$(printf '%s' "$ai_result" | head -c 200 | tr '\n' ' ') + last_bytes=$(printf '%s' "$ai_result" | tail -c 200 | tr '\n' ' ') raw_hex_head=$(printf '%s' "$ai_result" | head -c 32 | od -An -tx1 | tr -d ' \n' | head -c 64) { echo "## Parsing Result" @@ -324,16 +324,18 @@ ${user_prompt}" echo "### Debug Diagnostics" echo "- Response length: $response_len bytes" echo "- \`\`\`json blocks found: $json_block_count" - echo "- First 100 bytes: \`$first_bytes\`" - echo "- Last 100 bytes: \`$last_bytes\`" + echo "- First 200 bytes: \`$first_bytes\`" + echo "- Last 200 bytes: \`$last_bytes\`" echo "- First 32 bytes (hex): \`$raw_hex_head\`" echo "" - echo "### Raw Response (first 500 bytes)" + echo "### Raw Response (for debugging)" + echo "" echo '```' - printf '%s' "$ai_result" | head -c 500 + printf '%s' "$ai_result" echo "" echo '```' } >>"$reason_log" + log_warn "AI Reasoning: raw response logged to $reason_log (${response_len} bytes, ${json_block_count} json blocks)" echo '{"error":"no_action_plan","actions":[]}' _release_ai_lock return 1 @@ -519,6 +521,14 @@ extract_action_plan() { return 0 fi + # Handle whitespace-only responses (e.g., model returned only newlines/spaces) + local trimmed + trimmed=$(printf '%s' "$response" | tr -d '[:space:]') + if [[ -z "$trimmed" ]]; then + echo "" + return 0 + fi + # Try 1: Direct JSON parse (response is pure JSON) local parsed parsed=$(printf '%s' "$response" | jq '.' 2>/dev/null) @@ -551,6 +561,7 @@ extract_action_plan() { fi # Try 3: Extract from any generic code block (last one, handles unclosed) + # Only accept if the extracted content is a valid JSON array. json_block=$(printf '%s' "$response" | awk ' /^```/ && !capture { capture=1; block=""; next } /^```$/ && capture { capture=0; last_block=block; next } @@ -560,24 +571,33 @@ extract_action_plan() { if [[ -n "$json_block" ]]; then parsed=$(printf '%s' "$json_block" | jq '.' 2>/dev/null) if [[ $? -eq 0 && -n "$parsed" ]]; then - printf '%s' "$parsed" - return 0 + local block_type + block_type=$(printf '%s' "$parsed" | jq 'type' 2>/dev/null || echo "") + if [[ "$block_type" == '"array"' ]]; then + printf '%s' "$parsed" + return 0 + fi fi fi # Try 4: Find the last JSON array in the response (between [ and ]) + # Handles both column-0 and indented arrays. local bracket_json bracket_json=$(printf '%s' "$response" | awk ' - /^\[/ { capture=1; block="" } + /^[[:space:]]*\[/ { capture=1; block="" } capture { block = block (block ? "\n" : "") $0 } - /^\]/ && capture { capture=0; last_block=block } + /^[[:space:]]*\]/ && capture { capture=0; last_block=block } END { if (last_block) print last_block } ') if [[ -n "$bracket_json" ]]; then parsed=$(printf '%s' "$bracket_json" | jq '.' 2>/dev/null) if [[ $? -eq 0 && -n "$parsed" ]]; then - printf '%s' "$parsed" - return 0 + local arr_type + arr_type=$(printf '%s' "$parsed" | jq 'type' 2>/dev/null || echo "") + if [[ "$arr_type" == '"array"' ]]; then + printf '%s' "$parsed" + return 0 + fi fi fi