From 1d979c7d3e6886d938bbe22c7c39b9087759b609 Mon Sep 17 00:00:00 2001
From: marcusquinn <6428977+marcusquinn@users.noreply.github.com>
Date: Wed, 18 Feb 2026 21:30:05 +0000
Subject: [PATCH] fix: harden AI pipeline JSON parser against whitespace
 responses and improve failure logging (t1184)

- extract_action_plan: add whitespace-only response check (model returning only
  newlines/spaces now treated as empty, not a parse failure)
- extract_action_plan Try 3: verify extracted generic code block is a JSON array
  before returning (prevents returning non-JSON content when outer fences wrap
  the response)
- extract_action_plan Try 4: handle indented arrays (not just column-0 brackets)
  and verify extracted content is a JSON array
- run_ai_reasoning: log full raw LLM response to reason log on parse failure
  (previously only first/last 100 bytes; now full response + expanded to 200 bytes
  for diagnostics) so intermittent failures can be fully diagnosed
- run_ai_reasoning: log warning with response length and json block count when
  parse fails, making failures visible in supervisor log without reading log file
- run_ai_actions_pipeline: add whitespace-only check alongside empty check
  (belt-and-suspenders against whitespace-only output from reasoning)
- run_ai_actions_pipeline: improve 'expected array' error message to include
  JSON type and first 100 chars of content for faster diagnosis

Root causes of the 15:53-19:15 errors were already fixed by t1157 (concurrency
guard) and t1123 (multi-block parser). This PR adds defensive hardening and
better observability for any future parse failures.

ref:GH#1789
---
 .agents/scripts/supervisor/ai-actions.sh | 10 +++---
 .agents/scripts/supervisor/ai-reason.sh  | 44 +++++++++++++++++-------
 2 files changed, 38 insertions(+), 16 deletions(-)
diff --git a/.agents/scripts/supervisor/ai-actions.sh b/.agents/scripts/supervisor/ai-actions.sh
index e28460318..fc7fa4f41 100755
--- a/.agents/scripts/supervisor/ai-actions.sh
+++ b/.agents/scripts/supervisor/ai-actions.sh
@@ -1594,8 +1594,10 @@ run_ai_actions_pipeline() {
 		return 1
 	fi
 
-	# Handle empty output — concurrency guard or other silent skip
-	if [[ -z "$action_plan" ]]; then
+	# Handle empty or whitespace-only output — concurrency guard or other silent skip
+	local _trimmed_plan
+	_trimmed_plan=$(printf '%s' "$action_plan" | tr -d '[:space:]')
+	if [[ -z "$action_plan" || -z "$_trimmed_plan" ]]; then
 		log_info "AI Actions Pipeline: reasoning returned empty output (skipped)"
 		echo '{"executed":0,"failed":0,"skipped":0,"actions":[]}'
 		return 0
@@ -1628,11 +1630,11 @@ run_ai_actions_pipeline() {
 	local plan_type
 	plan_type=$(printf '%s' "$action_plan" | jq 'type' 2>/dev/null || echo "")
 	if [[ "$plan_type" != '"array"' ]]; then
-		# Log raw content for debugging (t1182: helps diagnose parse failures)
+		# Log raw content for debugging (t1182/t1184: helps diagnose parse failures)
 		local plan_len plan_head
 		plan_len=$(printf '%s' "$action_plan" | wc -c | tr -d ' ')
 		plan_head=$(printf '%s' "$action_plan" | head -c 200 | tr '\n' ' ')
-		log_warn "AI Actions Pipeline: expected array, got $plan_type (len=${plan_len} head='${plan_head}')"
+		log_warn "AI Actions Pipeline: expected array, got ${plan_type:-<invalid JSON>} (len=${plan_len} head='${plan_head}')"
 		echo '{"error":"invalid_plan_type","actions":[]}'
 		return 1
 	fi
diff --git a/.agents/scripts/supervisor/ai-reason.sh b/.agents/scripts/supervisor/ai-reason.sh
index 4689f9ed7..17980f49a 100755
--- a/.agents/scripts/supervisor/ai-reason.sh
+++ b/.agents/scripts/supervisor/ai-reason.sh
@@ -313,8 +313,8 @@ ${user_prompt}"
 		local response_len json_block_count first_bytes last_bytes raw_hex_head
 		response_len=$(printf '%s' "$ai_result" | wc -c | tr -d ' ')
 		json_block_count=$(printf '%s' "$ai_result" | grep -c '^```json' 2>/dev/null || echo 0)
-		first_bytes=$(printf '%s' "$ai_result" | head -c 100 | tr '\n' ' ')
-		last_bytes=$(printf '%s' "$ai_result" | tail -c 100 | tr '\n' ' ')
+		first_bytes=$(printf '%s' "$ai_result" | head -c 200 | tr '\n' ' ')
+		last_bytes=$(printf '%s' "$ai_result" | tail -c 200 | tr '\n' ' ')
 		raw_hex_head=$(printf '%s' "$ai_result" | head -c 32 | od -An -tx1 | tr -d ' \n' | head -c 64)
 		{
 			echo "## Parsing Result"
@@ -324,16 +324,18 @@ ${user_prompt}"
 			echo "### Debug Diagnostics"
 			echo "- Response length: $response_len bytes"
 			echo "- \`\`\`json blocks found: $json_block_count"
-			echo "- First 100 bytes: \`$first_bytes\`"
-			echo "- Last 100 bytes: \`$last_bytes\`"
+			echo "- First 200 bytes: \`$first_bytes\`"
+			echo "- Last 200 bytes: \`$last_bytes\`"
 			echo "- First 32 bytes (hex): \`$raw_hex_head\`"
 			echo ""
-			echo "### Raw Response (first 500 bytes)"
+			echo "### Raw Response (for debugging)"
+			echo ""
 			echo '```'
-			printf '%s' "$ai_result" | head -c 500
+			printf '%s' "$ai_result"
 			echo ""
 			echo '```'
 		} >>"$reason_log"
+		log_warn "AI Reasoning: raw response logged to $reason_log (${response_len} bytes, ${json_block_count} json blocks)"
 		echo '{"error":"no_action_plan","actions":[]}'
 		_release_ai_lock
 		return 1
@@ -519,6 +521,14 @@ extract_action_plan() {
 		return 0
 	fi
 
+	# Handle whitespace-only responses (e.g., model returned only newlines/spaces)
+	local trimmed
+	trimmed=$(printf '%s' "$response" | tr -d '[:space:]')
+	if [[ -z "$trimmed" ]]; then
+		echo ""
+		return 0
+	fi
+
 	# Try 1: Direct JSON parse (response is pure JSON)
 	local parsed
 	parsed=$(printf '%s' "$response" | jq '.' 2>/dev/null)
@@ -551,6 +561,7 @@ extract_action_plan() {
 	fi
 
 	# Try 3: Extract from any generic code block (last one, handles unclosed)
+	# Only accept if the extracted content is a valid JSON array.
 	json_block=$(printf '%s' "$response" | awk '
 		/^```/ && !capture { capture=1; block=""; next }
 		/^```$/ && capture { capture=0; last_block=block; next }
@@ -560,24 +571,33 @@ extract_action_plan() {
 	if [[ -n "$json_block" ]]; then
 		parsed=$(printf '%s' "$json_block" | jq '.' 2>/dev/null)
 		if [[ $? -eq 0 && -n "$parsed" ]]; then
-			printf '%s' "$parsed"
-			return 0
+			local block_type
+			block_type=$(printf '%s' "$parsed" | jq 'type' 2>/dev/null || echo "")
+			if [[ "$block_type" == '"array"' ]]; then
+				printf '%s' "$parsed"
+				return 0
+			fi
 		fi
 	fi
 
 	# Try 4: Find the last JSON array in the response (between [ and ])
+	# Handles both column-0 and indented arrays.
 	local bracket_json
 	bracket_json=$(printf '%s' "$response" | awk '
-		/^\[/ { capture=1; block="" }
+		/^[[:space:]]*\[/ { capture=1; block="" }
 		capture { block = block (block ? "\n" : "") $0 }
-		/^\]/ && capture { capture=0; last_block=block }
+		/^[[:space:]]*\]/ && capture { capture=0; last_block=block }
 		END { if (last_block) print last_block }
 	')
 	if [[ -n "$bracket_json" ]]; then
 		parsed=$(printf '%s' "$bracket_json" | jq '.' 2>/dev/null)
 		if [[ $? -eq 0 && -n "$parsed" ]]; then
-			printf '%s' "$parsed"
-			return 0
+			local arr_type
+			arr_type=$(printf '%s' "$parsed" | jq 'type' 2>/dev/null || echo "")
+			if [[ "$arr_type" == '"array"' ]]; then
+				printf '%s' "$parsed"
+				return 0
+			fi
 		fi
 	fi